diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..04be9f36b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,23 @@ +.git +__pycache__ +*.py[oc] +.venv +.env +.envrc +.ruff_cache +.mypy_cache +.pytest_cache +.claude +.coverage +.DS_Store +build +dist +wheels +*.egg-info +docs +site +.github +.qdrant_code_embeddings +CLAUDE.md +AGENTS.md +PROJECT.md diff --git a/.env.example b/.env.example index dc518b501..9e449e031 100644 --- a/.env.example +++ b/.env.example @@ -60,6 +60,17 @@ # CYPHER_MODEL=gemini-2.5-flash # CYPHER_API_KEY=your-google-api-key +# Example 6: LiteLLM with custom provider +# ORCHESTRATOR_PROVIDER=litellm_proxy +# ORCHESTRATOR_MODEL=gpt-oss:120b +# ORCHESTRATOR_ENDPOINT=http://litellm:4000/v1 +# ORCHESTRATOR_API_KEY=sk-your-litellm-key + +# CYPHER_PROVIDER=litellm_proxy +# CYPHER_MODEL=openrouter/gpt-oss:120b +# CYPHER_ENDPOINT=http://litellm:4000/v1 +# CYPHER_API_KEY=sk-your-litellm-key + # Thinking budget for reasoning models (optional) # ORCHESTRATOR_THINKING_BUDGET=10000 # CYPHER_THINKING_BUDGET=5000 @@ -68,9 +79,20 @@ MEMGRAPH_HOST=localhost MEMGRAPH_PORT=7687 MEMGRAPH_HTTP_PORT=7444 +# Memgraph authentication credentials +# Leave MEMGRAPH_USERNAME empty (or omit it) if your Memgraph instance doesn't require authentication +# If authentication is enabled, provide both username and password +# Common defaults: username=neo4j, password=password (or your custom credentials) +MEMGRAPH_USERNAME= +MEMGRAPH_PASSWORD= LAB_PORT=3000 MEMGRAPH_BATCH_SIZE=1000 +# Qdrant settings +# Leave QDRANT_URL unset to use local file mode (only suitable below ~20k embeddings) +# For larger codebases, run the bundled docker-compose service and point at it: +# QDRANT_URL=http://localhost:6333 + # Repository settings TARGET_REPO_PATH=. diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..49ff9c712 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @vitali87 diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..9b47f9561 --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +eheva87@gmail.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index d5f29c336..163b5ae21 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -3,5 +3,4 @@ github: vitali87 buy_me_a_coffee: vitali87 -# Uncomment and add username when you set up Patreon: -# patreon: YOUR_USERNAME +patreon: vitali87 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 4b6f8f59b..70c1f1023 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,10 +1,7 @@ -blank_issues_enabled: false +blank_issues_enabled: true contact_links: - - name: 💬 Discussions - url: https://github.com/vitali87/code-graph-rag/discussions - about: Ask questions and discuss ideas with the community - name: 📚 Documentation - url: https://github.com/vitali87/code-graph-rag#readme + url: https://github.com/vitali87/code-graph-rag about: Read the documentation and setup guides - name: 🎓 MCP Server Setup url: https://github.com/vitali87/code-graph-rag/blob/main/docs/claude-code-setup.md diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index 47d83bcd9..154945398 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -6,7 +6,7 @@ body: - type: markdown attributes: value: | - Thank you for your question! For general discussions or open-ended questions, consider using [GitHub Discussions](https://github.com/vitali87/code-graph-rag/discussions). + Thank you for your question! Please keep questions concrete; for broader topics, prefer opening an [issue](https://github.com/vitali87/code-graph-rag/issues) with the `question` label. - type: textarea id: question diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 000000000..77c1a62b4 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,46 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| 0.0.x | :white_check_mark: | + +As the project is in early development (pre 1.0), only the latest release receives security updates. Please ensure you are running the most recent version before reporting a vulnerability. + +## Reporting a Vulnerability + +**Please do not report security vulnerabilities through public issues, pull requests, or any other public channels.** + +Instead, please use GitHub's private vulnerability reporting: go to the [Security tab](https://github.com/vitali87/code-graph-rag/security/advisories/new) and click **Report a vulnerability**. This keeps the details confidential between you and the maintainers until a fix is available. + +When reporting, please include: + +- A description of the vulnerability and its potential impact +- Steps to reproduce or a proof of concept +- The version(s) affected +- Any suggested fix, if available + +## What to Expect + +- **Acknowledgement** within 72 hours of your report +- **Status update** within 7 days with an initial assessment +- **Resolution target** of 30 days for confirmed vulnerabilities, though critical issues will be prioritized for faster turnaround + +If the vulnerability is accepted, we will work on a fix, coordinate disclosure with you, and credit you in the release notes (unless you prefer to remain anonymous). + +If the vulnerability is declined, we will provide a clear explanation of why. + +## Scope + +This policy applies to the `code-graph-rag` Python package and its official repository. Third party dependencies are outside the direct scope of this policy, though we use Dependabot to monitor and update them. + +## Security Measures in This Project + +- **Dependency scanning**: Dependabot is enabled for automated dependency updates +- **Secret scanning**: GitHub secret scanning is active on this repository +- **Branch protection**: The `main` branch requires pull request reviews before merging + +## Preferred Languages + +We accept security reports in English. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..a075b29ee --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "docker" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..8dc054f6c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,38 @@ +## Summary + + + +- + +## Type of Change + + + +- [ ] Bug fix +- [ ] New feature +- [ ] Performance improvement +- [ ] Refactoring (no functional changes) +- [ ] Documentation +- [ ] CI/CD or tooling +- [ ] Dependencies + +## Related Issues + + + +## Test Plan + + + +- [ ] Unit tests pass (`make test-parallel` or `uv run pytest -n auto -m "not integration"`) +- [ ] New tests added +- [ ] Integration tests pass (`make test-integration`, requires Docker) +- [ ] Manual testing (describe below) + +## Checklist + +- [ ] PR title follows [Conventional Commits](https://www.conventionalcommits.org/) format +- [ ] All pre-commit checks pass (`make pre-commit`) +- [ ] No hardcoded strings in non-config/non-constants files +- [ ] No `# type: ignore`, `cast()`, `Any`, or `object` type hints +- [ ] No new comments or docstrings (code should be self-documenting) diff --git a/.github/workflows/build-binaries.yml b/.github/workflows/build-binaries.yml index c548d82ea..315cfa45a 100644 --- a/.github/workflows/build-binaries.yml +++ b/.github/workflows/build-binaries.yml @@ -8,10 +8,14 @@ on: release: types: [created] +permissions: read-all + jobs: build: name: Build ${{ matrix.platform }}-${{ matrix.arch }} runs-on: ${{ matrix.os }} + permissions: + contents: write timeout-minutes: 30 strategy: fail-fast: false @@ -32,18 +36,18 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 submodules: recursive - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -66,7 +70,7 @@ jobs: fi - name: Upload binary artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: code-graph-rag-${{ matrix.platform }}-${{ matrix.arch }} path: dist/code-graph-rag-* @@ -75,7 +79,39 @@ jobs: - name: Upload to release if: startsWith(github.ref, 'refs/tags/v') - uses: softprops/action-gh-release@v2 + uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2 with: files: dist/code-graph-rag-* fail_on_unmatched_files: true + + sign-release: + name: Sign Release Artifacts + if: startsWith(github.ref, 'refs/tags/v') + needs: build + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + steps: + - name: Install cosign + uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0 + + - name: Download all artifacts + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + path: artifacts + merge-multiple: true + + - name: Sign artifacts + shell: bash + run: | + for f in artifacts/*; do + [ -f "$f" ] || continue + cosign sign-blob --yes --bundle "${f}.sigstore.json" "$f" + done + + - name: Upload signatures to release + uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2 + with: + files: artifacts/*.sigstore.json + fail_on_unmatched_files: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43b0cc8db..a7742b439 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,8 @@ on: branches: [main, master, develop] workflow_dispatch: +permissions: read-all + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -19,16 +21,16 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -51,16 +53,16 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -75,7 +77,7 @@ jobs: test-unit: name: Unit Tests (${{ matrix.os }}) runs-on: ${{ matrix.os }} - timeout-minutes: 15 + timeout-minutes: 20 strategy: fail-fast: false matrix: @@ -83,19 +85,19 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive fetch-depth: 0 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -103,13 +105,19 @@ jobs: run: | uv sync --extra treesitter-full --extra test --extra semantic --group dev - - name: Run unit tests (parallel) + - name: Run unit tests (parallel, with coverage) + if: matrix.os == 'macos-latest' run: | uv run pytest -n auto -m "not integration" --tb=short --cov=codebase_rag --cov-report=xml --cov-report=term + - name: Run unit tests (parallel, no coverage) + if: matrix.os != 'macos-latest' + run: | + uv run pytest -n auto -m "not integration" --tb=short + - name: Upload coverage to Codecov - if: always() && secrets.CODECOV_TOKEN != '' - uses: codecov/codecov-action@v4 + if: always() && matrix.os == 'macos-latest' + uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3 with: files: ./coverage.xml flags: unit-${{ matrix.os }} @@ -123,7 +131,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive fetch-depth: 0 @@ -133,7 +141,7 @@ jobs: docker run -d --name memgraph -p 7687:7687 memgraph/memgraph-platform:latest echo "Waiting for Memgraph to start..." for i in {1..30}; do - if docker exec memgraph echo "SELECT 1;" 2>/dev/null; then + if docker exec memgraph mgconsole --no-history -c "RETURN 1;" 2>/dev/null; then echo "Memgraph is ready!" break fi @@ -142,13 +150,13 @@ jobs: done - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -164,8 +172,8 @@ jobs: uv run pytest -m "integration" -v --tb=short --cov=codebase_rag --cov-report=xml --cov-report=term - name: Upload coverage to Codecov - if: always() && secrets.CODECOV_TOKEN != '' - uses: codecov/codecov-action@v4 + if: always() + uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3 with: files: ./coverage.xml flags: integration-ubuntu-latest @@ -187,7 +195,7 @@ jobs: steps: - name: Check PR title format - uses: amannn/action-semantic-pull-request@v5 + uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6.1.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index ecd3732f3..6c0c48ebf 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -10,6 +10,8 @@ on: - "*.py" - "pyproject.toml" +permissions: read-all + jobs: claude-review: name: AI Code Review @@ -26,13 +28,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 1 - name: Run Claude Code Review id: claude-review - uses: anthropics/claude-code-action@beta + uses: anthropics/claude-code-action@28f83620103c48a57093dcc2837eec89e036bb9f # beta with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 000000000..853e4df66 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,62 @@ +name: Docker Publish + +on: + push: + tags: + - 'v*' + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +permissions: read-all + +jobs: + build-and-push: + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 + + - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 + + - uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5 + id: meta + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 + id: push + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - uses: actions/attest-build-provenance@96b4a1ef7235a096b17240c259729fdd70c83d45 # v2 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..912c8eb02 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,58 @@ +name: Deploy Documentation + +on: + push: + branches: + - main + paths: + - "docs/**" + - "mkdocs.yml" + # (H) Rebuilds periodically so the GitHub repo widget (version, stars, forks) + # stays current; MkDocs Material fetches these stats at build time. + schedule: + - cron: "0 */6 * * *" + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: uv sync --group docs + + - name: Build site + run: uv run mkdocs build --strict + + - uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0 + with: + path: site + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4 diff --git a/.github/workflows/label-sync.yml b/.github/workflows/label-sync.yml index ec787447e..40cc0e2c0 100644 --- a/.github/workflows/label-sync.yml +++ b/.github/workflows/label-sync.yml @@ -9,9 +9,10 @@ on: - ".github/workflows/label-sync.yml" workflow_dispatch: schedule: - # Run weekly on Mondays at 00:00 UTC to ensure labels stay in sync - cron: "0 0 * * 1" +permissions: read-all + jobs: sync-labels: name: Sync Repository Labels @@ -22,10 +23,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Sync labels - uses: micnncim/action-label-syncer@v1 + uses: micnncim/action-label-syncer@3abd5ab72fda571e69fffd97bd4e0033dd5f495c # v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/osv-scanner.yml b/.github/workflows/osv-scanner.yml new file mode 100644 index 000000000..5ac2a0a24 --- /dev/null +++ b/.github/workflows/osv-scanner.yml @@ -0,0 +1,50 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# A sample workflow which sets up periodic OSV-Scanner scanning for vulnerabilities, +# in addition to a PR check which fails if new vulnerabilities are introduced. +# +# For more examples and options, including how to ignore specific vulnerabilities, +# see https://google.github.io/osv-scanner/github-action/ + +name: OSV-Scanner + +on: + pull_request: + branches: [ "main" ] + merge_group: + branches: [ "main" ] + schedule: + - cron: '29 2 * * 4' + push: + branches: [ "main" ] + +permissions: read-all + +jobs: + scan-scheduled: + if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }} + uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c5996e0193a3df57d695c1b8a1dec2a4c62e8730 # v2.3.3 + permissions: + actions: read + security-events: write + contents: read + with: + scan-args: |- + -r + --skip-git + ./ + scan-pr: + if: ${{ github.event_name == 'pull_request' || github.event_name == 'merge_group' }} + uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable-pr.yml@c5996e0193a3df57d695c1b8a1dec2a4c62e8730 # v2.3.3 + permissions: + actions: read + security-events: write + contents: read + with: + scan-args: |- + -r + --skip-git + ./ diff --git a/.github/workflows/poor-quality-management.yml b/.github/workflows/poor-quality-management.yml index df73ada89..657a86dae 100644 --- a/.github/workflows/poor-quality-management.yml +++ b/.github/workflows/poor-quality-management.yml @@ -4,9 +4,11 @@ on: pull_request_target: types: [labeled] schedule: - - cron: "0 9 * * *" # Daily at 9 AM UTC + - cron: "0 9 * * *" workflow_dispatch: +permissions: read-all + jobs: notify-poor-quality: name: Notify Poor Quality PR @@ -19,7 +21,7 @@ jobs: steps: - name: Add warning comment - uses: actions/github-script@v7 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: script: | const message = `⚠️ **This PR has been marked as poor-quality.** @@ -73,7 +75,7 @@ jobs: steps: - name: Close PRs with poor-quality label older than 7 days - uses: actions/github-script@v7 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: script: | const LABEL_NAME = 'poor-quality'; diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..1201a3a14 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,38 @@ +name: Publish to PyPI + +on: + release: + types: [published] + +permissions: read-all + +jobs: + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + timeout-minutes: 10 + environment: pypi + permissions: + id-token: write + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Install uv + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Build package + run: uv build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 000000000..08b117574 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,78 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '32 23 * * 2' + push: + branches: [ "main" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + # `publish_results: true` only works when run from the default branch. conditional can be removed if disabled. + if: github.event.repository.default_branch == github.ref_name || github.event_name == 'pull_request' + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # (Optional) Uncomment file_mode if you have a .gitattributes with files marked export-ignore + # file_mode: git + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard (optional). + # Commenting out will disable upload of results to your repo's Code Scanning dashboard + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@38697555549f1db7851b81482ff19f1fa5c4fedc # v3 + with: + sarif_file: results.sarif diff --git a/.github/workflows/sonarcloud.yml b/.github/workflows/sonarcloud.yml new file mode 100644 index 000000000..123b16f0a --- /dev/null +++ b/.github/workflows/sonarcloud.yml @@ -0,0 +1,45 @@ +name: SonarCloud + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + sonarcloud: + name: SonarCloud Analysis + if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push' + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: uv sync --extra treesitter-full --extra test --extra semantic --group dev + + - name: Run tests with coverage + run: uv run pytest -n auto -m "not integration" --tb=short --cov=codebase_rag --cov-report=xml + + - name: SonarCloud Scan + uses: SonarSource/sonarqube-scan-action@fd88b7d7ccbaefd23d8f36f73b59db7a3d246602 # v6 + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/.github/workflows/split-score.yml b/.github/workflows/split-score.yml new file mode 100644 index 000000000..7c65ac2e2 --- /dev/null +++ b/.github/workflows/split-score.yml @@ -0,0 +1,22 @@ +name: PR Split Score + +on: + pull_request: + branches: [main] + +permissions: + contents: read + pull-requests: write + +jobs: + score: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: pr-split score + uses: vitali87/pr-split@v1.0.0 + with: + max-loc: "400" diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml index 0940adcad..596a01ccd 100644 --- a/.github/workflows/version-bump.yml +++ b/.github/workflows/version-bump.yml @@ -16,6 +16,8 @@ on: - minor - major +permissions: read-all + jobs: bump-version: name: Auto Version Bump @@ -26,7 +28,7 @@ jobs: contents: write steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 2 token: ${{ secrets.GITHUB_TOKEN }} @@ -90,12 +92,17 @@ jobs: run: | sed -i 's/^version = ".*"/version = "${{ steps.bump_version.outputs.new }}"/' pyproject.toml + - name: Update server.json + if: steps.check_manual.outputs.skip == 'false' + run: | + sed -i 's/"version": "[^"]*"/"version": "${{ steps.bump_version.outputs.new }}"/g' server.json + - name: Commit version bump if: steps.check_manual.outputs.skip == 'false' run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add pyproject.toml + git add pyproject.toml server.json git commit -m "chore: bump version to ${{ steps.bump_version.outputs.new }}" git push diff --git a/.gitignore b/.gitignore index 4b6211856..c44ce990d 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,19 @@ PROJECT.md .DS_Store .pypi_cache.json .omc +site/ + +# Eval harness scratch workspace (regenerated each run); result files are committed +evals/results/l3_workspace/ +# Rust oracle build artifacts (the source + Cargo.lock are committed) +evals/oracles/rs_oracle/target/ +# TypeScript oracle deps (the source + package-lock.json are committed) +evals/oracles/ts_oracle/node_modules/ +# Java oracle compiled classes (the source is committed) +evals/oracles/java_oracle/*.class +# Lua oracle deps (the source + package-lock.json are committed) +evals/oracles/lua_oracle/node_modules/ +# PHP oracle deps (the source + package-lock.json are committed) +evals/oracles/php_oracle/node_modules/ +.cgr-hash-cache.json +.cgr-dir-mtimes.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 92a09727a..12a7db5f0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,23 +5,24 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml + args: [--unsafe] - id: check-toml - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.12.2 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - exclude: ^codec/schema_pb2\.(py|pyi)$ + exclude: ^(codec/schema_pb2\.(py|pyi)|benchmarks/|optimize/)$ - id: ruff-format - exclude: ^codec/schema_pb2\.(py|pyi)$ + exclude: ^(codec/schema_pb2\.(py|pyi)|benchmarks/|optimize/)$ - repo: local hooks: - id: ty name: ty check - entry: uv run ty check --exclude codebase_rag/tests/ + entry: uv run ty check --exclude codebase_rag/tests/ --exclude benchmarks/ --exclude optimize/ --exclude codec/ --exclude grammars/ --exclude query_modules/ language: system types: [python] - exclude: ^codec/.*_pb2\.py$ + exclude: ^(codec/.*_pb2\.py|benchmarks/|optimize/|grammars/|query_modules/)$ pass_filenames: false - repo: local hooks: @@ -30,7 +31,7 @@ repos: entry: uv run python scripts/check_no_docs.py language: system types: [python] - exclude: ^codec/schema_pb2\.py$ + exclude: ^(codec/schema_pb2\.py|benchmarks/|optimize/) - repo: local hooks: - id: generate-readme @@ -45,7 +46,7 @@ repos: - id: bandit args: ["-c", "pyproject.toml", "--severity-level", "high"] additional_dependencies: ["bandit[toml]"] - exclude: ^(codebase_rag/tests/|scripts/) + exclude: ^(codebase_rag/tests/|scripts/|benchmarks/|optimize/) - repo: https://github.com/compilerla/conventional-pre-commit rev: v4.2.0 hooks: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cfc7c6d05..5fd788a9c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ Thank you for your interest in contributing to Code Graph RAG! We welcome contri ## Getting Started -1. **Browse Issues**: Check out our [GitHub Issues](https://github.com/vitali87/code-graph-rag/issues) to find tasks that need work +1. **Browse Issues**: Check out our [issue tracker](https://github.com/vitali87/code-graph-rag/issues) to find tasks that need work - Look for issues labeled `good first issue` for beginner-friendly tasks - Issues labeled `help wanted` are open for community contributions 2. **Pick an Issue**: Choose an issue that interests you and matches your skill level diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..e965de91d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,51 @@ +FROM ghcr.io/astral-sh/uv:0.10@sha256:72ab0aeb448090480ccabb99fb5f52b0dc3c71923bffb5e2e26517a1c27b7fec AS uv + +FROM python:3.14-slim@sha256:fb83750094b46fd6b8adaa80f66e2302ecbe45d513f6cece637a841e1025b4ca AS builder + +COPY --from=uv /uv /uvx /bin/ + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + cmake build-essential libssl-dev zlib1g-dev libzstd-dev && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen --no-dev --extra treesitter-full --no-install-project --no-binary-package pymgclient + +COPY . . +RUN uv sync --frozen --no-dev --extra treesitter-full --no-binary-package pymgclient + +FROM python:3.14-slim@sha256:fb83750094b46fd6b8adaa80f66e2302ecbe45d513f6cece637a841e1025b4ca + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ripgrep libssl3 zlib1g libzstd1 && \ + rm -rf /var/lib/apt/lists/* + +RUN useradd --create-home appuser +USER appuser +WORKDIR /app + +COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv +COPY --from=builder --chown=appuser:appuser /app/codebase_rag /app/codebase_rag +COPY --from=builder --chown=appuser:appuser /app/codec /app/codec +COPY --from=builder --chown=appuser:appuser /app/cgr /app/cgr +COPY --from=builder --chown=appuser:appuser /app/pyproject.toml /app/pyproject.toml + +ENV PATH="/app/.venv/bin:$PATH" + +COPY --chmod=755 <<'EOF' /app/entrypoint.sh +#!/bin/sh +ARCH=$(uname -m) +case "$ARCH" in + x86_64) LIBDIR="/lib/x86_64-linux-gnu" ;; + aarch64) LIBDIR="/lib/aarch64-linux-gnu" ;; + *) LIBDIR="/lib" ;; +esac +export LD_PRELOAD="$LIBDIR/libz.so.1:$LIBDIR/libzstd.so.1" +exec code-graph-rag "$@" +EOF + +ENTRYPOINT ["/app/entrypoint.sh"] +CMD ["mcp-server"] diff --git a/LICENSE b/LICENSE index fd189113e..4765780e7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) [2025] [Vitali Avagyan] +Copyright (c) 2025 Vitali Avagyan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 10c757dac..d8fa492d8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help all install dev test test-parallel test-integration test-all test-parallel-all clean python build-grammars watch readme lint format typecheck check pre-commit +.PHONY: help all install dev test test-parallel test-integration test-all test-parallel-all clean python build-grammars watch readme lint format typecheck check pre-commit release PYTHON := uv run @@ -77,6 +77,9 @@ typecheck: ## Run type checking with ty check: lint typecheck test ## Run all checks: lint, typecheck, test +release: ## Build, verify, and publish the current pyproject version to PyPI, then tag and create a GitHub Release + ./scripts/release.sh + pre-commit: ## Run all pre-commit checks locally (comprehensive test before commit) @echo "Running pre-commit checks..." @echo "1. Formatting code..." diff --git a/PYPI_README.md b/PYPI_README.md new file mode 100644 index 000000000..93c74db31 --- /dev/null +++ b/PYPI_README.md @@ -0,0 +1,160 @@ +# Code-Graph-RAG + +A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization. + +## Install + +```bash +pip install code-graph-rag +``` + +With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua): + +```bash +pip install 'code-graph-rag[treesitter-full]' +``` + +With semantic code search (UniXcoder embeddings): + +```bash +pip install 'code-graph-rag[semantic]' +``` + +### Prerequisites + +- Python 3.12+ +- Docker (for Memgraph) +- `cmake` (for building pymgclient) +- `ripgrep` (`rg`) (for shell command text searching) + +## CLI Quick Start + +The package installs a `cgr` command. + +**Start Memgraph, parse a repo, and query it:** + +```bash +cgr daemon up # start Memgraph + Qdrant +cgr start --repo-path ./my-project \ + --update-graph --clean # parse & launch interactive chat +``` + +**Index to protobuf for offline use:** + +```bash +cgr index -o ./index-output --repo-path ./my-project +``` + +**Export knowledge graph to JSON:** + +```bash +cgr export -o graph.json +``` + +**AI-guided optimization:** + +```bash +cgr optimize python --repo-path ./my-project +``` + +**Run as an MCP server (for Claude Code):** + +```bash +cgr mcp-server +``` + +**Check your setup:** + +```bash +cgr doctor +``` + +## Python SDK + +The `cgr` package provides short imports for programmatic use. + +### Load and query an exported graph + +```python +from cgr import load_graph + +graph = load_graph("graph.json") +print(graph.summary()) + +functions = graph.find_nodes_by_label("Function") +for fn in functions[:5]: + rels = graph.get_relationships_for_node(fn.node_id) + print(f"{fn.properties['name']}: {len(rels)} relationships") +``` + +### Query Memgraph with Cypher + +```python +from cgr import MemgraphIngestor + +with MemgraphIngestor(host="localhost", port=7687) as db: + rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10") + for row in rows: + print(row) +``` + +### Generate Cypher from natural language + +```python +import asyncio +from cgr import CypherGenerator + +async def main(): + gen = CypherGenerator() + cypher = await gen.generate("Find all classes that inherit from BaseModel") + print(cypher) + +asyncio.run(main()) +``` + +### Semantic code search + +Requires the `semantic` extra. + +```python +from cgr import embed_code + +embedding = embed_code("def authenticate(user, password): ...") +print(f"Embedding dimension: {len(embedding)}") +``` + +### Configuration + +```python +from cgr import settings + +settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...") +settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key") +``` + +## Environment Variables + +Configure via `.env` or environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `MEMGRAPH_HOST` | `localhost` | Memgraph hostname | +| `MEMGRAPH_PORT` | `7687` | Memgraph port | +| `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` | +| `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) | +| `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) | +| `CYPHER_PROVIDER` | | Provider for Cypher generation | +| `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) | +| `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) | +| `TARGET_REPO_PATH` | `.` | Default repository path | + +## Documentation + +Full documentation, architecture details, and contribution guide: +[docs.code-graph-rag.com](https://docs.code-graph-rag.com) + +## License + +MIT + + diff --git a/README.md b/README.md index 5ef87d4e0..fa36b17fa 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,54 @@
+ + Code-Graph-RAG Logo

+ + + Enterprise Support + + PyPI Downloads + + +

@@ -35,8 +63,9 @@ An accurate Retrieval-Augmented Generation (RAG) system that analyzes multi-lang ## Latest News 🔥 -- **[NEW]** **MCP Server Integration**: Code-Graph-RAG now works as an MCP server with Claude Code! Query and edit your codebase using natural language directly from Claude Code. [Setup Guide](docs/claude-code-setup.md) -- [2025/10/21] **Semantic Code Search**: Added intent-based code search using UniXcoder embeddings. Find functions by describing what they do (e.g., "error handling functions", "authentication code") rather than by exact names. +- **PHP Language Support**: Full PHP language support added — classes, interfaces, traits, enums, namespaces, PHP 8 attributes, and call graph analysis. Contributed by [@rs-ipps](https://github.com/rs-ipps). +- **C Language Support**: Full C language support added — functions, structs, unions, enums, preprocessor includes, and call graph analysis. Contributed by [@dj0nes](https://github.com/dj0nes). +- **Visualise any GitHub repo instantly!** Just change `github.com` to `gitcgr.com` in any repo URL — that's it, only 3 letters! Get an interactive graph of the entire codebase structure. Try it now: [gitcgr.com](https://gitcgr.com) ## 🚀 Features @@ -45,16 +74,16 @@ An accurate Retrieval-Augmented Generation (RAG) system that analyzes multi-lang | Language | Status | Extensions | Functions | Classes/Structs | Modules | Package Detection | Additional Features | |--------|------|----------|---------|---------------|-------|-----------------|-------------------| +| C | Fully Supported | .c | ✓ | ✓ | ✓ | ✓ | Functions, structs, unions, enums, preprocessor includes | | C++ | Fully Supported | .cpp, .h, .hpp, .cc, .cxx, .hxx, .hh, .ixx, .cppm, .ccm | ✓ | ✓ | ✓ | ✓ | Constructors, destructors, operator overloading, templates, lambdas, C++20 modules, namespaces | | Java | Fully Supported | .java | ✓ | ✓ | ✓ | - | Generics, annotations, modern features (records/sealed classes), concurrency, reflection | | JavaScript | Fully Supported | .js, .jsx | ✓ | ✓ | ✓ | - | ES6 modules, CommonJS, prototype methods, object methods, arrow functions | | Lua | Fully Supported | .lua | ✓ | - | ✓ | - | Local/global functions, metatables, closures, coroutines | +| PHP | Fully Supported | .php | ✓ | ✓ | ✓ | - | Classes, interfaces, traits, enums, namespaces, PHP 8 attributes | | Python | Fully Supported | .py | ✓ | ✓ | ✓ | ✓ | Type inference, decorators, nested functions | | Rust | Fully Supported | .rs | ✓ | ✓ | ✓ | ✓ | impl blocks, associated functions | | TypeScript | Fully Supported | .ts, .tsx | ✓ | ✓ | ✓ | - | Interfaces, type aliases, enums, namespaces, ES6/CommonJS modules | -| C# | In Development | .cs | ✓ | ✓ | ✓ | - | Classes, interfaces, generics (planned) | | Go | In Development | .go | ✓ | ✓ | ✓ | - | Methods, type declarations | -| PHP | In Development | .php | ✓ | ✓ | ✓ | - | Classes, functions, namespaces | | Scala | In Development | .scala, .sc | ✓ | ✓ | ✓ | - | Case classes, objects | - **🌳 Tree-sitter Parsing**: Uses Tree-sitter for robust, language-agnostic AST parsing @@ -111,9 +140,54 @@ sudo dnf install ripgrep ## 🛠️ Installation +### System-wide install (recommended for end users) + +`cgr` is published to PyPI and can be installed system-wide so it works from any +target repo without activating a project virtualenv. Install with the +`treesitter-full` (all languages) and `semantic` (vector search) extras: + ```bash -git clone https://github.com/vitali87/code-graph-rag.git +# with uv (recommended) +uv tool install "code-graph-rag[treesitter-full,semantic]" + +# or with pipx +pipx install "code-graph-rag[treesitter-full,semantic]" +``` + +For a Python-only install, omit the extras. For local development from a clone, +use `uv tool install --editable "/path/to/code-graph-rag[treesitter-full,semantic]"`. + +After install, `cgr` is on PATH. From any repository, run: + +```bash +cd ~/path/to/some-target-repo +cgr daemon up # one-time: start the shared memgraph + qdrant stack +cgr start # auto-sync the current repo and drop into the agent +``` + +`cgr start` defaults `--repo-path` to the current directory and auto-syncs the +graph incrementally on entry. Pass `--no-sync` to skip the sync, or +`--no-start-stack` if memgraph/qdrant already run elsewhere. + +Useful subcommands: + +| Command | Purpose | +|---|---| +| `cgr daemon up/down/status/restart/logs` | Manage the shared docker stack | +| `cgr stop` | Alias for `cgr daemon down` | +| `cgr status` | Show stack state + per-project last-sync timestamp | +| `cgr workspace create/list/show/delete` | Manage named bundles of repos | +| `cgr workspace add-repo / remove-repo` | Edit a workspace's repo set | +| `cgr start --workspace mono` | Open the agent over every project in the workspace | +| `cgr start --projects a,b,c` | Scope agent queries to the listed projects | +Indexed data persists across `cgr daemon down` thanks to named memgraph + qdrant +volumes (`memgraph_data`, `memgraph_log`, `qdrant_storage`). + +### Local development install + +```bash +git clone https://github.com/vitali87/code-graph-rag.git cd code-graph-rag ``` @@ -218,9 +292,20 @@ ollama pull llama3.2 4. **Start Memgraph database**: ```bash -docker-compose up -d +cgr daemon up ``` +5. **Verify installation**: +```bash +# If installed from PyPI: +cgr --help + +# If running from source: +uv run cgr --help +``` + +> **Note**: When running from source (cloned repo), prefix all `cgr` commands below with `uv run`, e.g., `uv run cgr start ...` + ## 🛠️ Makefile Commands Use the Makefile for common development tasks: @@ -246,6 +331,7 @@ Use the Makefile for common development tasks: | `make format` | Run ruff format | | `make typecheck` | Run type checking with ty | | `make check` | Run all checks: lint, typecheck, test | +| `make release` | Build, verify, and publish the current pyproject version to PyPI, then tag and create a GitHub Release | | `make pre-commit` | Run all pre-commit checks locally (comprehensive test before commit) | @@ -284,12 +370,23 @@ The system automatically detects and processes files for all supported languages ### Step 2: Query the Codebase +**Interactive mode:** + Start the interactive RAG CLI: ```bash cgr start --repo-path /path/to/your/repo ``` +**Non-interactive mode (single query):** + +Run a single query and exit, with output sent to stdout (useful for scripting): + +```bash +python -m codebase_rag.main start --repo-path /path/to/your/repo \ + --ask-agent "What functions call UserService.create_user?" +``` + ### Step 2.5: Real-Time Graph Updates (Optional) For active development, you can keep your knowledge graph automatically synchronized with code changes using the realtime updater. This is particularly useful when you're actively modifying code and want the AI assistant to always work with the latest codebase structure. @@ -454,7 +551,7 @@ cgr optimize javascript --repo-path /path/to/frontend \ ``` **Supported Languages for Optimization:** -All supported languages: `python`, `javascript`, `typescript`, `rust`, `go`, `java`, `scala`, `cpp` +All supported languages: `python`, `javascript`, `typescript`, `rust`, `go`, `java`, `scala`, `c`, `cpp` **How It Works:** 1. **Analysis Phase**: The agent analyzes your codebase structure using the knowledge graph @@ -532,13 +629,16 @@ claude mcp add --transport stdio code-graph-rag \ | `list_projects` | List all indexed projects in the knowledge graph database. Returns a list of project names that have been indexed. | | `delete_project` | Delete a specific project from the knowledge graph database. This removes all nodes associated with the project while preserving other projects. Use list_projects first to see available projects. | | `wipe_database` | WARNING: Completely wipe the entire database, removing ALL indexed projects. This cannot be undone. Use delete_project for removing individual projects. | -| `index_repository` | Parse and ingest the repository into the Memgraph knowledge graph. This builds a comprehensive graph of functions, classes, dependencies, and relationships. Note: This preserves other projects - only the current project is re-indexed. | -| `query_code_graph` | Query the codebase knowledge graph using natural language. Ask questions like 'What functions call UserService.create_user?' or 'Show me all classes that implement the Repository interface'. | +| `index_repository` | WARNING: Clears all data for the current project including its embeddings. Parse and ingest the repository into the Memgraph knowledge graph. Use update_repository for incremental updates. Only use when explicitly requested. | +| `update_repository` | Update the repository in the Memgraph knowledge graph without clearing existing data. Use this for incremental updates. | +| `query_code_graph` | Query the codebase knowledge graph using natural language. Use semantic_search unless you know the exact names of classes/functions you are searching for. Ask questions like 'What functions call UserService.create_user?' or 'Show me all classes that implement the Repository interface'. | | `get_code_snippet` | Retrieve source code for a function, class, or method by its qualified name. Returns the source code, file path, line numbers, and docstring. | | `surgical_replace_code` | Surgically replace an exact code block in a file using diff-match-patch. Only modifies the exact target block, leaving the rest unchanged. | | `read_file` | Read the contents of a file from the project. Supports pagination for large files. | | `write_file` | Write content to a file, creating it if it doesn't exist. | | `list_directory` | List contents of a directory in the project. | +| `semantic_search` | Performs a semantic search for functions based on a natural language query describing their purpose, returning a list of potential matches with similarity scores. Requires the 'semantic' extra to be installed. | +| `ask_agent` | Ask the Code Graph RAG agent a question about the codebase. Uses the full RAG pipeline to analyze the code graph and provide a detailed answer. Use this for general questions about architecture, functionality, and code relationships. | ### Example Usage @@ -561,35 +661,35 @@ The knowledge graph uses the following node types and relationships: | Label | Properties | |-----|----------| | Project | `{name: string}` | -| Package | `{qualified_name: string, name: string, path: string}` | -| Folder | `{path: string, name: string}` | -| File | `{path: string, name: string, extension: string}` | -| Module | `{qualified_name: string, name: string, path: string}` | -| Class | `{qualified_name: string, name: string, decorators: list[string]}` | -| Function | `{qualified_name: string, name: string, decorators: list[string]}` | -| Method | `{qualified_name: string, name: string, decorators: list[string]}` | -| Interface | `{qualified_name: string, name: string}` | -| Enum | `{qualified_name: string, name: string}` | +| Package | `{qualified_name: string, name: string, path: string, absolute_path: string}` | +| Folder | `{path: string, name: string, absolute_path: string}` | +| File | `{path: string, name: string, extension: string, absolute_path: string}` | +| Module | `{qualified_name: string, name: string, path: string, absolute_path: string}` | +| Class | `{qualified_name: string, name: string, decorators: list[string], path: string, absolute_path: string}` | +| Function | `{qualified_name: string, name: string, decorators: list[string], path: string, absolute_path: string}` | +| Method | `{qualified_name: string, name: string, decorators: list[string], path: string, absolute_path: string}` | +| Interface | `{qualified_name: string, name: string, path: string, absolute_path: string}` | +| Enum | `{qualified_name: string, name: string, path: string, absolute_path: string}` | | Type | `{qualified_name: string, name: string}` | | Union | `{qualified_name: string, name: string}` | -| ModuleInterface | `{qualified_name: string, name: string, path: string}` | -| ModuleImplementation | `{qualified_name: string, name: string, path: string, implements_module: string}` | +| ModuleInterface | `{qualified_name: string, name: string, path: string, absolute_path: string}` | +| ModuleImplementation | `{qualified_name: string, name: string, path: string, absolute_path: string, implements_module: string}` | | ExternalPackage | `{name: string, version_spec: string}` | ### Language-Specific Mappings +- **C**: `enum_specifier`, `function_definition`, `struct_specifier`, `union_specifier` - **C++**: `class_specifier`, `declaration`, `enum_specifier`, `field_declaration`, `function_definition`, `lambda_expression`, `struct_specifier`, `template_declaration`, `union_specifier` - **Java**: `annotation_type_declaration`, `class_declaration`, `constructor_declaration`, `enum_declaration`, `interface_declaration`, `method_declaration`, `record_declaration` - **JavaScript**: `arrow_function`, `class`, `class_declaration`, `function_declaration`, `function_expression`, `generator_function_declaration`, `method_definition` - **Lua**: `function_declaration`, `function_definition` +- **PHP**: `anonymous_function`, `arrow_function`, `class_declaration`, `enum_declaration`, `function_definition`, `interface_declaration`, `method_declaration`, `trait_declaration` - **Python**: `class_definition`, `function_definition` - **Rust**: `closure_expression`, `enum_item`, `function_item`, `function_signature_item`, `impl_item`, `struct_item`, `trait_item`, `type_item`, `union_item` - **TypeScript**: `abstract_class_declaration`, `arrow_function`, `class`, `class_declaration`, `enum_declaration`, `function_declaration`, `function_expression`, `function_signature`, `generator_function_declaration`, `interface_declaration`, `internal_module`, `method_definition`, `type_alias_declaration` -- **C#**: `anonymous_method_expression`, `class_declaration`, `constructor_declaration`, `destructor_declaration`, `enum_declaration`, `function_pointer_type`, `interface_declaration`, `lambda_expression`, `local_function_statement`, `method_declaration`, `struct_declaration` -- **Go**: `function_declaration`, `method_declaration`, `type_declaration` -- **PHP**: `anonymous_function`, `arrow_function`, `class_declaration`, `enum_declaration`, `function_definition`, `function_static_declaration`, `interface_declaration`, `trait_declaration` +- **Go**: `function_declaration`, `method_declaration`, `type_alias`, `type_spec` - **Scala**: `class_definition`, `function_declaration`, `function_definition`, `object_definition`, `trait_definition` @@ -614,6 +714,7 @@ The knowledge graph uses the following node types and relationships: | ModuleImplementation | IMPLEMENTS | ModuleInterface | | Project | DEPENDS_ON_EXTERNAL | ExternalPackage | | Function, Method | CALLS | Function, Method | +| Module, Function, Method | INSTANTIATES | Class | ## 🔧 Configuration @@ -679,6 +780,7 @@ my_build_output - **pydantic-settings**: Settings management using Pydantic - **pymgclient**: Memgraph database adapter for Python language - **python-dotenv**: Read key-value pairs from a .env file and set them as environment variables +- **tiktoken**: tiktoken is a fast BPE tokeniser for use with OpenAI's models - **toml**: Python Library for Tom's Obvious, Minimal Language - **tree-sitter-python**: Python grammar for tree-sitter - **tree-sitter**: Python bindings to the Tree-sitter parsing library @@ -691,6 +793,7 @@ my_build_output - **protobuf** - **defusedxml**: XML bomb protection for Python stdlib modules - **huggingface-hub**: Client library to download and publish models, datasets and other repos on the huggingface.co hub +- **griffe**: Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API. ## 🤖 Agentic Workflow & Tools @@ -705,11 +808,10 @@ The agent has access to a suite of tools to understand and interact with the cod | Tool | Description | |----|-----------| | `query_graph` | Query the codebase knowledge graph using natural language questions. Ask in plain English about classes, functions, methods, dependencies, or code structure. Examples: 'Find all functions that call each other', 'What classes are in the user module', 'Show me functions with the longest call chains'. | -| `read_file` | Reads the content of text-based files. For documents like PDFs or images, use the 'analyze_document' tool instead. | +| `read_file` | Reads the content of text-based files. Images and PDFs the user references are attached inline; read them directly. | | `create_file` | Creates a new file with content. IMPORTANT: Check file existence first! Overwrites completely WITHOUT showing diff. Use only for new files, not existing file modifications. | | `replace_code` | Surgically replaces specific code blocks in files. Requires exact target code and replacement. Only modifies the specified block, leaving rest of file unchanged. True surgical patching. | | `list_directory` | Lists the contents of a directory to explore the codebase. | -| `analyze_document` | Analyzes documents (PDFs, images) to answer questions about their content. | | `execute_shell` | Executes shell commands from allowlist. Read-only commands run without approval; write operations require user confirmation. | | `semantic_search` | Performs a semantic search for functions based on a natural language query describing their purpose, returning a list of potential matches with similarity scores. | | `get_function_source` | Retrieves the source code for a specific function or method using its internal node ID, typically obtained from a semantic search result. | @@ -887,3 +989,7 @@ We also offer custom development, integration consulting, technical support cont ## Star History [![Star History Chart](https://api.star-history.com/svg?repos=vitali87/code-graph-rag&type=Date)](https://www.star-history.com/#vitali87/code-graph-rag&Date) + +## Fork History + +[![Fork History Chart](https://fork-history.site/svg?repos=vitali87/code-graph-rag)](https://fork-history.site/#vitali87/code-graph-rag) diff --git a/benchmarks/bench_ast_cache.py b/benchmarks/bench_ast_cache.py new file mode 100644 index 000000000..b1e3e65d9 --- /dev/null +++ b/benchmarks/bench_ast_cache.py @@ -0,0 +1,134 @@ +import statistics +import sys +import time +from collections import OrderedDict +from pathlib import Path + +WARMUP_RUNS = 3 +BENCH_RUNS = 50 + + +class MockNode: + __slots__ = ("data",) + + def __init__(self, size: int) -> None: + self.data = b"\x00" * size + + +def bench_ordered_dict_insert(count: int, item_size: int) -> float: + start = time.perf_counter() + cache: OrderedDict[Path, tuple[MockNode, str]] = OrderedDict() + for i in range(count): + key = Path(f"/fake/path/module_{i}.py") + cache[key] = (MockNode(item_size), "python") + return time.perf_counter() - start + + +def bench_ordered_dict_lookup(cache: OrderedDict, keys: list[Path]) -> float: + start = time.perf_counter() + for key in keys: + _ = key in cache + return time.perf_counter() - start + + +def bench_ordered_dict_access_lru(cache: OrderedDict, keys: list[Path]) -> float: + start = time.perf_counter() + for key in keys: + if key in cache: + cache.move_to_end(key) + _ = cache[key] + return time.perf_counter() - start + + +def bench_ordered_dict_eviction(count: int, max_size: int, item_size: int) -> float: + start = time.perf_counter() + cache: OrderedDict[Path, tuple[MockNode, str]] = OrderedDict() + for i in range(count): + key = Path(f"/fake/path/module_{i}.py") + cache[key] = (MockNode(item_size), "python") + while len(cache) > max_size: + cache.popitem(last=False) + return time.perf_counter() - start + + +def bench_getsizeof_overhead(cache: OrderedDict) -> float: + start = time.perf_counter() + _ = sum(sys.getsizeof(v) for v in cache.values()) + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<45} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 115) + for r in results: + print( + f"{r['name']:<45} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + configs = [ + (500, 1024), + (2000, 4096), + (5000, 8192), + ] + + for count, item_size in configs: + print(f"\n{'='*115}") + print(f"BoundedASTCache Benchmark (entries={count}, item_size={item_size}B)") + print(f"{'='*115}") + + results = [] + + r = run_benchmark(f"insert ({count})", bench_ordered_dict_insert, count, item_size) + results.append(r) + + cache: OrderedDict[Path, tuple[MockNode, str]] = OrderedDict() + keys: list[Path] = [] + for i in range(count): + key = Path(f"/fake/path/module_{i}.py") + keys.append(key) + cache[key] = (MockNode(item_size), "python") + + r = run_benchmark(f"lookup ({count})", bench_ordered_dict_lookup, cache, keys) + results.append(r) + + r = run_benchmark(f"access+LRU ({count})", bench_ordered_dict_access_lru, cache, keys) + results.append(r) + + max_size = count // 2 + r = run_benchmark( + f"insert+evict (max={max_size})", + bench_ordered_dict_eviction, count, max_size, item_size, + ) + results.append(r) + + r = run_benchmark(f"getsizeof scan ({count})", bench_getsizeof_overhead, cache) + results.append(r) + + print_results(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_dropin_replacements.py b/benchmarks/bench_dropin_replacements.py new file mode 100644 index 000000000..ee4eb0b0a --- /dev/null +++ b/benchmarks/bench_dropin_replacements.py @@ -0,0 +1,267 @@ +import hashlib +import json +import os +import statistics +import tempfile +import time +from pathlib import Path + +try: + import blake3 + import orjson +except ImportError as e: + print(f"SKIP bench_dropin_replacements: {e}") + print("Install with: uv pip install blake3 orjson") + raise SystemExit(0) + +WARMUP_RUNS = 3 +BENCH_RUNS = 30 + + +def generate_graph_data(num_nodes: int, num_rels: int) -> dict: + nodes = [] + for i in range(num_nodes): + nodes.append({ + "node_id": i, + "labels": ["Function" if i % 3 == 0 else "Class" if i % 3 == 1 else "Module"], + "properties": { + "qualified_name": f"project.module{i // 100}.Class{i // 10}.method{i}", + "name": f"method{i}", + "start_line": i * 10, + "end_line": i * 10 + 9, + "docstring": f"Method {i} documentation string with some content" if i % 5 == 0 else None, + "decorators": ["staticmethod"] if i % 7 == 0 else [], + "is_exported": i % 4 == 0, + }, + }) + + rels = [] + for i in range(num_rels): + rels.append({ + "from_id": i % num_nodes, + "to_id": (i * 7 + 3) % num_nodes, + "type": "CALLS" if i % 3 == 0 else "DEFINES" if i % 3 == 1 else "IMPORTS", + "properties": {"weight": i % 10} if i % 5 == 0 else {}, + }) + + return { + "nodes": nodes, + "relationships": rels, + "metadata": { + "total_nodes": num_nodes, + "total_relationships": num_rels, + "exported_at": "2026-03-14T10:00:00+00:00", + }, + } + + +def generate_snippets(count: int, avg_length: int = 200) -> list[str]: + import random + import string + random.seed(42) + snippets = [] + for _ in range(count): + length = avg_length + random.randint(-50, 50) + snippet = "".join(random.choices(string.ascii_letters + string.digits + " \n\t", k=length)) + snippets.append(snippet) + return snippets + + +def create_test_files(directory: str, count: int, avg_size_kb: int) -> list[Path]: + paths = [] + for i in range(count): + path = Path(directory) / f"file_{i}.py" + content = os.urandom(avg_size_kb * 1024) + path.write_bytes(content) + paths.append(path) + return paths + + +def bench_json_dumps(data: dict) -> float: + start = time.perf_counter() + _ = json.dumps(data) + return time.perf_counter() - start + + +def bench_orjson_dumps(data: dict) -> float: + start = time.perf_counter() + _ = orjson.dumps(data) + return time.perf_counter() - start + + +def bench_json_dumps_indent(data: dict) -> float: + start = time.perf_counter() + _ = json.dumps(data, indent=2, ensure_ascii=False) + return time.perf_counter() - start + + +def bench_orjson_dumps_indent(data: dict) -> float: + start = time.perf_counter() + _ = orjson.dumps(data, option=orjson.OPT_INDENT_2) + return time.perf_counter() - start + + +def bench_json_loads(json_bytes: bytes) -> float: + start = time.perf_counter() + _ = json.loads(json_bytes) + return time.perf_counter() - start + + +def bench_orjson_loads(json_bytes: bytes) -> float: + start = time.perf_counter() + _ = orjson.loads(json_bytes) + return time.perf_counter() - start + + +def bench_sha256_hashing(snippets: list[str]) -> float: + start = time.perf_counter() + for s in snippets: + _ = hashlib.sha256(s.encode()).hexdigest() + return time.perf_counter() - start + + +def bench_blake3_hashing(snippets: list[str]) -> float: + start = time.perf_counter() + for s in snippets: + _ = blake3.blake3(s.encode()).hexdigest() + return time.perf_counter() - start + + +def bench_sha256_file(files: list[Path]) -> float: + start = time.perf_counter() + for f in files: + hasher = hashlib.sha256() + with f.open("rb") as fh: + while chunk := fh.read(8192): + hasher.update(chunk) + _ = hasher.hexdigest() + return time.perf_counter() - start + + +def bench_blake3_file(files: list[Path]) -> float: + start = time.perf_counter() + for f in files: + hasher = blake3.blake3() + with f.open("rb") as fh: + while chunk := fh.read(8192): + hasher.update(chunk) + _ = hasher.hexdigest() + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<50} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 120) + for r in results: + print( + f"{r['name']:<50} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def print_comparison(baseline: dict[str, float], optimized: dict[str, float]) -> None: + speedup = baseline["median_ms"] / optimized["median_ms"] if optimized["median_ms"] > 0 else float("inf") + print(f" -> Speedup: {speedup:.1f}x (median)") + + +def main() -> None: + print("=" * 120) + print("DROP-IN REPLACEMENT BENCHMARKS: Python stdlib vs Rust-backed alternatives") + print("=" * 120) + + # --- JSON Serialization --- + for num_nodes, num_rels in [(1000, 2000), (5000, 10000), (20000, 50000)]: + print(f"\n{'='*120}") + print(f"JSON Serialization: stdlib json vs orjson (nodes={num_nodes}, rels={num_rels})") + print(f"{'='*120}") + + data = generate_graph_data(num_nodes, num_rels) + json_bytes = json.dumps(data).encode() + orjson_bytes = orjson.dumps(data) + print(f"Data size: {len(json_bytes) / 1024:.1f} KB") + + results = [] + + r1 = run_benchmark(f"json.dumps compact ({num_nodes}n)", bench_json_dumps, data) + results.append(r1) + r2 = run_benchmark(f"orjson.dumps compact ({num_nodes}n)", bench_orjson_dumps, data) + results.append(r2) + + r3 = run_benchmark(f"json.dumps indented ({num_nodes}n)", bench_json_dumps_indent, data) + results.append(r3) + r4 = run_benchmark(f"orjson.dumps indented ({num_nodes}n)", bench_orjson_dumps_indent, data) + results.append(r4) + + r5 = run_benchmark(f"json.loads ({num_nodes}n)", bench_json_loads, json_bytes) + results.append(r5) + r6 = run_benchmark(f"orjson.loads ({num_nodes}n)", bench_orjson_loads, orjson_bytes) + results.append(r6) + + print_results(results) + + print("\nSpeedups:") + print(f" dumps compact: {r1['median_ms'] / r2['median_ms']:.1f}x") + print(f" dumps indented: {r3['median_ms'] / r4['median_ms']:.1f}x") + print(f" loads: {r5['median_ms'] / r6['median_ms']:.1f}x") + + # --- Hashing: SHA256 vs BLAKE3 --- + print(f"\n\n{'='*120}") + print("Hashing: hashlib.sha256 vs blake3 (snippet hashing for EmbeddingCache)") + print(f"{'='*120}") + + for size in [500, 2000, 10000]: + snippets = generate_snippets(size) + print(f"\n--- Snippet count: {size} ---") + + results = [] + r1 = run_benchmark(f"hashlib.sha256 ({size} snippets)", bench_sha256_hashing, snippets) + results.append(r1) + r2 = run_benchmark(f"blake3 ({size} snippets)", bench_blake3_hashing, snippets) + results.append(r2) + + print_results(results) + print(f" Speedup: {r1['median_ms'] / r2['median_ms']:.1f}x") + + # --- File Hashing --- + print(f"\n\n{'='*120}") + print("File Hashing: SHA256 vs BLAKE3 (incremental build file change detection)") + print(f"{'='*120}") + + for file_count, avg_size_kb in [(50, 5), (200, 10), (500, 20)]: + with tempfile.TemporaryDirectory() as tmpdir: + files = create_test_files(tmpdir, file_count, avg_size_kb) + total_mb = sum(f.stat().st_size for f in files) / (1024 * 1024) + print(f"\n--- Files: {file_count}, Total: {total_mb:.1f} MB ---") + + results = [] + r1 = run_benchmark(f"sha256 ({file_count}f, {avg_size_kb}KB avg)", bench_sha256_file, files) + results.append(r1) + r2 = run_benchmark(f"blake3 ({file_count}f, {avg_size_kb}KB avg)", bench_blake3_file, files) + results.append(r2) + + print_results(results) + print(f" Speedup: {r1['median_ms'] / r2['median_ms']:.1f}x") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_embedding_cache.py b/benchmarks/bench_embedding_cache.py new file mode 100644 index 000000000..b63e93338 --- /dev/null +++ b/benchmarks/bench_embedding_cache.py @@ -0,0 +1,130 @@ +import hashlib +import random +import statistics +import string +import time + +from codebase_rag.embedder import EmbeddingCache + +WARMUP_RUNS = 3 +BENCH_RUNS = 50 +EMBEDDING_DIM = 768 + + +def generate_snippets(count: int, avg_length: int = 200) -> list[str]: + snippets = [] + for i in range(count): + length = avg_length + random.randint(-50, 50) + snippet = "".join(random.choices(string.ascii_letters + string.digits + " \n\t", k=length)) + snippets.append(snippet) + return snippets + + +def generate_embedding() -> list[float]: + return [random.random() for _ in range(EMBEDDING_DIM)] + + +def bench_sha256_hashing(snippets: list[str]) -> float: + start = time.perf_counter() + for s in snippets: + _ = hashlib.sha256(s.encode()).hexdigest() + return time.perf_counter() - start + + +def bench_cache_put(cache: EmbeddingCache, snippets: list[str], embeddings: list[list[float]]) -> float: + start = time.perf_counter() + for s, e in zip(snippets, embeddings): + cache.put(s, e) + return time.perf_counter() - start + + +def bench_cache_get_hit(cache: EmbeddingCache, snippets: list[str]) -> float: + start = time.perf_counter() + for s in snippets: + _ = cache.get(s) + return time.perf_counter() - start + + +def bench_cache_get_miss(cache: EmbeddingCache, miss_snippets: list[str]) -> float: + start = time.perf_counter() + for s in miss_snippets: + _ = cache.get(s) + return time.perf_counter() - start + + +def bench_cache_get_many(cache: EmbeddingCache, snippets: list[str]) -> float: + start = time.perf_counter() + _ = cache.get_many(snippets) + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<40} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 110) + for r in results: + print( + f"{r['name']:<40} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + random.seed(42) + + sizes = [500, 2000, 10000] + + for size in sizes: + print(f"\n{'='*110}") + print(f"EmbeddingCache Benchmark (n={size})") + print(f"{'='*110}") + + snippets = generate_snippets(size) + embeddings = [generate_embedding() for _ in range(size)] + miss_snippets = generate_snippets(size, avg_length=300) + + results = [] + + r = run_benchmark(f"sha256 hashing ({size})", bench_sha256_hashing, snippets) + results.append(r) + + cache = EmbeddingCache() + r = run_benchmark(f"cache.put ({size})", bench_cache_put, cache, snippets, embeddings) + results.append(r) + + cache = EmbeddingCache() + cache.put_many(snippets, embeddings) + + r = run_benchmark(f"cache.get hit ({size})", bench_cache_get_hit, cache, snippets) + results.append(r) + + r = run_benchmark(f"cache.get miss ({size})", bench_cache_get_miss, cache, miss_snippets) + results.append(r) + + r = run_benchmark(f"cache.get_many ({size})", bench_cache_get_many, cache, snippets) + results.append(r) + + print_results(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_file_hashing.py b/benchmarks/bench_file_hashing.py new file mode 100644 index 000000000..3be76059b --- /dev/null +++ b/benchmarks/bench_file_hashing.py @@ -0,0 +1,138 @@ +import hashlib +import os +import statistics +import tempfile +import time +from pathlib import Path + +WARMUP_RUNS = 3 +BENCH_RUNS = 30 + + +def create_test_files(directory: str, count: int, avg_size_kb: int) -> list[Path]: + paths = [] + for i in range(count): + path = Path(directory) / f"file_{i}.py" + content = os.urandom(avg_size_kb * 1024) + path.write_bytes(content) + paths.append(path) + return paths + + +def hash_file_sha256(filepath: Path) -> str: + hasher = hashlib.sha256() + with filepath.open("rb") as f: + while chunk := f.read(8192): + hasher.update(chunk) + return hasher.hexdigest() + + +def hash_file_sha256_large_buffer(filepath: Path) -> str: + hasher = hashlib.sha256() + with filepath.open("rb") as f: + while chunk := f.read(65536): + hasher.update(chunk) + return hasher.hexdigest() + + +def hash_file_sha256_mmap(filepath: Path) -> str: + import mmap + hasher = hashlib.sha256() + with filepath.open("rb") as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm: + hasher.update(mm) + return hasher.hexdigest() + + +def hash_file_md5(filepath: Path) -> str: + hasher = hashlib.md5() + with filepath.open("rb") as f: + while chunk := f.read(8192): + hasher.update(chunk) + return hasher.hexdigest() + + +def hash_file_blake2b(filepath: Path) -> str: + hasher = hashlib.blake2b() + with filepath.open("rb") as f: + while chunk := f.read(8192): + hasher.update(chunk) + return hasher.hexdigest() + + +def bench_hash_files(files: list[Path], hash_func) -> float: + start = time.perf_counter() + for f in files: + _ = hash_func(f) + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<45} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 115) + for r in results: + print( + f"{r['name']:<45} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + configs = [ + (50, 5), + (200, 10), + (500, 20), + ] + + for file_count, avg_size_kb in configs: + print(f"\n{'='*115}") + print(f"File Hashing Benchmark (files={file_count}, avg_size={avg_size_kb}KB)") + print(f"{'='*115}") + + with tempfile.TemporaryDirectory() as tmpdir: + files = create_test_files(tmpdir, file_count, avg_size_kb) + total_mb = sum(f.stat().st_size for f in files) / (1024 * 1024) + print(f"Total data: {total_mb:.1f} MB") + + results = [] + + r = run_benchmark(f"sha256 8KB buf ({file_count}f)", bench_hash_files, files, hash_file_sha256) + results.append(r) + + r = run_benchmark(f"sha256 64KB buf ({file_count}f)", bench_hash_files, files, hash_file_sha256_large_buffer) + results.append(r) + + r = run_benchmark(f"sha256 mmap ({file_count}f)", bench_hash_files, files, hash_file_sha256_mmap) + results.append(r) + + r = run_benchmark(f"md5 ({file_count}f)", bench_hash_files, files, hash_file_md5) + results.append(r) + + r = run_benchmark(f"blake2b ({file_count}f)", bench_hash_files, files, hash_file_blake2b) + results.append(r) + + print_results(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_find_ending_with_fix.py b/benchmarks/bench_find_ending_with_fix.py new file mode 100644 index 000000000..c9ef01cae --- /dev/null +++ b/benchmarks/bench_find_ending_with_fix.py @@ -0,0 +1,218 @@ +import statistics +import time +from collections import defaultdict + +from codebase_rag.graph_updater import FunctionRegistryTrie +from codebase_rag.types_defs import NodeType, SimpleNameLookup + +WARMUP_RUNS = 3 +BENCH_RUNS = 30 + + +def generate_realistic_registry(count: int) -> tuple[list[str], list[str]]: + modules = ["codebase_rag", "utils", "parsers", "services", "tools", "models"] + submodules = ["core", "api", "handlers", "helpers", "base", "factory"] + classes = ["Handler", "Manager", "Factory", "Builder", "Processor", "Resolver", + "Analyzer", "Extractor", "Generator", "Validator"] + methods = ["process", "handle", "create", "build", "resolve", "validate", + "execute", "parse", "extract", "transform", "analyze", "generate", + "find", "get", "set", "update", "delete", "check"] + + qualified_names = [] + for i in range(count): + mod = modules[i % len(modules)] + sub = submodules[(i // len(modules)) % len(submodules)] + cls = classes[(i // (len(modules) * len(submodules))) % len(classes)] + meth = methods[(i // (len(modules) * len(submodules) * len(classes))) % len(methods)] + qualified_names.append(f"{mod}.{sub}.{cls}.method_{i}.{meth}") + + lookup_suffixes = methods + [f"method_{i}" for i in range(0, count, count // 20)] + return qualified_names, lookup_suffixes + + +def bench_linear_scan_endswith(entries: dict[str, NodeType], suffix: str) -> float: + start = time.perf_counter() + _ = [qn for qn in entries.keys() if qn.endswith(f".{suffix}")] + return time.perf_counter() - start + + +def bench_indexed_lookup(lookup: SimpleNameLookup, suffix: str) -> float: + start = time.perf_counter() + _ = list(lookup.get(suffix, set())) + return time.perf_counter() - start + + +def bench_trie_find_ending_with_index_hit( + trie: FunctionRegistryTrie, suffixes: list[str], indexed_suffixes: set[str] +) -> float: + start = time.perf_counter() + for suffix in suffixes: + if suffix in indexed_suffixes: + _ = trie.find_ending_with(suffix) + return time.perf_counter() - start + + +def bench_trie_find_ending_with_index_miss( + trie: FunctionRegistryTrie, suffixes: list[str], indexed_suffixes: set[str] +) -> float: + start = time.perf_counter() + for suffix in suffixes: + if suffix not in indexed_suffixes: + _ = trie.find_ending_with(suffix) + return time.perf_counter() - start + + +def bench_trie_find_ending_with_all( + trie: FunctionRegistryTrie, suffixes: list[str] +) -> float: + start = time.perf_counter() + for suffix in suffixes: + _ = trie.find_ending_with(suffix) + return time.perf_counter() - start + + +def bench_linear_scan_batch(entries: dict[str, NodeType], suffixes: list[str]) -> float: + start = time.perf_counter() + for suffix in suffixes: + _ = [qn for qn in entries.keys() if qn.endswith(f".{suffix}")] + return time.perf_counter() - start + + +def bench_indexed_lookup_batch(lookup: SimpleNameLookup, suffixes: list[str]) -> float: + start = time.perf_counter() + for suffix in suffixes: + _ = list(lookup.get(suffix, set())) + return time.perf_counter() - start + + +def bench_full_suffix_index_batch( + suffix_index: dict[str, set[str]], suffixes: list[str] +) -> float: + start = time.perf_counter() + for suffix in suffixes: + _ = list(suffix_index.get(suffix, set())) + return time.perf_counter() - start + + +def build_full_suffix_index(qualified_names: list[str]) -> dict[str, set[str]]: + index: dict[str, set[str]] = defaultdict(set) + for qn in qualified_names: + simple_name = qn.rsplit(".", 1)[-1] + index[simple_name].add(qn) + return dict(index) + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<55} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 125) + for r in results: + print( + f"{r['name']:<55} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + print("=" * 125) + print("find_ending_with FIX BENCHMARK: Linear Scan vs Indexed Lookup") + print("This benchmarks the #1 CPU hotspot (48.3% of total runtime)") + print("=" * 125) + + sizes = [1000, 4500, 10000] + + for size in sizes: + print(f"\n{'='*125}") + print(f"Registry size: {size} entries") + print(f"{'='*125}") + + qualified_names, lookup_suffixes = generate_realistic_registry(size) + + simple_lookup: SimpleNameLookup = defaultdict(set) + trie = FunctionRegistryTrie(simple_name_lookup=simple_lookup) + for qn in qualified_names: + trie.insert(qn, NodeType.FUNCTION) + simple_name = qn.rsplit(".", 1)[-1] + simple_lookup[simple_name].add(qn) + + full_suffix_index = build_full_suffix_index(qualified_names) + + partially_indexed_suffixes = set(list(simple_lookup.keys())[:len(simple_lookup) // 5]) + miss_suffixes = [s for s in lookup_suffixes if s not in partially_indexed_suffixes] + + results = [] + + print(f"\nSingle-suffix operations (on '{lookup_suffixes[0]}'):") + r = run_benchmark( + f"LINEAR SCAN endswith ({size} entries)", + bench_linear_scan_endswith, dict(trie.items()), lookup_suffixes[0], + ) + results.append(r) + + r = run_benchmark( + f"INDEXED lookup (hit) ({size} entries)", + bench_indexed_lookup, simple_lookup, lookup_suffixes[0], + ) + results.append(r) + + print_results(results) + if results[1]["median_ms"] > 0: + speedup = results[0]["median_ms"] / results[1]["median_ms"] + print(f"\n -> Index hit speedup: {speedup:.0f}x") + + results = [] + num_queries = len(lookup_suffixes) + print(f"\nBatch operations ({num_queries} queries, simulating call resolution):") + + r = run_benchmark( + f"LINEAR SCAN batch ({num_queries}q, {size} entries)", + bench_linear_scan_batch, dict(trie.items()), lookup_suffixes, + ) + results.append(r) + + r = run_benchmark( + f"PARTIAL INDEX batch ({num_queries}q, {size} entries)", + bench_trie_find_ending_with_all, trie, lookup_suffixes, + ) + results.append(r) + + r = run_benchmark( + f"FULL SUFFIX INDEX batch ({num_queries}q, {size} entries)", + bench_full_suffix_index_batch, full_suffix_index, lookup_suffixes, + ) + results.append(r) + + print_results(results) + + if results[2]["median_ms"] > 0: + print(f"\n -> Linear scan vs full index: {results[0]['median_ms'] / results[2]['median_ms']:.0f}x speedup") + print(f" -> Partial index vs full index: {results[1]['median_ms'] / results[2]['median_ms']:.1f}x speedup") + + print(f"\n\n{'='*125}") + print("CONCLUSION: The 48.3% CPU hotspot is caused by linear scans on index misses.") + print("Building a complete suffix index eliminates the bottleneck entirely.") + print("This is a pure Python fix requiring zero FFI, zero new dependencies.") + print(f"{'='*125}") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_graph_loader.py b/benchmarks/bench_graph_loader.py new file mode 100644 index 000000000..f93ccd7a4 --- /dev/null +++ b/benchmarks/bench_graph_loader.py @@ -0,0 +1,169 @@ +import json +import statistics +import tempfile +import time +from pathlib import Path + +from codebase_rag.graph_loader import GraphLoader + +WARMUP_RUNS = 2 +BENCH_RUNS = 20 + + +def generate_graph_json(num_nodes: int, num_rels: int) -> str: + nodes = [] + for i in range(num_nodes): + nodes.append({ + "node_id": i, + "labels": ["Function" if i % 3 == 0 else "Class" if i % 3 == 1 else "Module"], + "properties": { + "qualified_name": f"project.module{i // 100}.Class{i // 10}.method{i}", + "name": f"method{i}", + "start_line": i * 10, + "end_line": i * 10 + 9, + }, + }) + + rels = [] + for i in range(num_rels): + rels.append({ + "from_id": i % num_nodes, + "to_id": (i * 7 + 3) % num_nodes, + "type": "CALLS" if i % 2 == 0 else "DEFINES", + "properties": {}, + }) + + graph = { + "nodes": nodes, + "relationships": rels, + "metadata": { + "total_nodes": num_nodes, + "total_relationships": num_rels, + }, + } + return json.dumps(graph) + + +def bench_json_parse(json_str: str) -> float: + start = time.perf_counter() + _ = json.loads(json_str) + return time.perf_counter() - start + + +def bench_graph_load(file_path: str) -> float: + start = time.perf_counter() + loader = GraphLoader(file_path) + loader.load() + return time.perf_counter() - start + + +def bench_find_nodes_by_label(loader: GraphLoader) -> float: + labels = ["Function", "Class", "Module"] + start = time.perf_counter() + for label in labels: + _ = loader.find_nodes_by_label(label) + return time.perf_counter() - start + + +def bench_find_node_by_property(loader: GraphLoader) -> float: + start = time.perf_counter() + for i in range(100): + qn = f"project.module{i}.Class{i * 10 // 10}.method{i * 10}" + _ = loader.find_node_by_property("qualified_name", qn) + return time.perf_counter() - start + + +def bench_get_relationships(loader: GraphLoader, num_nodes: int) -> float: + start = time.perf_counter() + for i in range(min(500, num_nodes)): + _ = loader.get_relationships_for_node(i) + return time.perf_counter() - start + + +def bench_summary(loader: GraphLoader) -> float: + start = time.perf_counter() + _ = loader.summary() + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<40} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 110) + for r in results: + print( + f"{r['name']:<40} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + configs = [ + (1000, 2000), + (5000, 10000), + (20000, 50000), + ] + + for num_nodes, num_rels in configs: + print(f"\n{'='*110}") + print(f"GraphLoader Benchmark (nodes={num_nodes}, rels={num_rels})") + print(f"{'='*110}") + + json_str = generate_graph_json(num_nodes, num_rels) + print(f"JSON size: {len(json_str) / 1024:.1f} KB") + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as tmp: + tmp.write(json_str) + tmp_path = tmp.name + + results = [] + + r = run_benchmark(f"json.loads ({num_nodes}n)", bench_json_parse, json_str) + results.append(r) + + r = run_benchmark(f"GraphLoader.load ({num_nodes}n)", bench_graph_load, tmp_path) + results.append(r) + + loader = GraphLoader(tmp_path) + loader.load() + + r = run_benchmark(f"find_nodes_by_label ({num_nodes}n)", bench_find_nodes_by_label, loader) + results.append(r) + + r = run_benchmark(f"find_node_by_property ({num_nodes}n)", bench_find_node_by_property, loader) + results.append(r) + + r = run_benchmark(f"get_relationships ({num_nodes}n)", bench_get_relationships, loader, num_nodes) + results.append(r) + + r = run_benchmark(f"summary ({num_nodes}n)", bench_summary, loader) + results.append(r) + + print_results(results) + + Path(tmp_path).unlink(missing_ok=True) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_json_serialization.py b/benchmarks/bench_json_serialization.py new file mode 100644 index 000000000..98fc477f7 --- /dev/null +++ b/benchmarks/bench_json_serialization.py @@ -0,0 +1,159 @@ +import json +import statistics +import tempfile +import time +from pathlib import Path + +WARMUP_RUNS = 3 +BENCH_RUNS = 20 + + +def generate_graph_data(num_nodes: int, num_rels: int) -> dict: + nodes = [] + for i in range(num_nodes): + nodes.append({ + "id": i, + "labels": ["Function" if i % 3 == 0 else "Class" if i % 3 == 1 else "Module"], + "properties": { + "qualified_name": f"project.module{i // 100}.Class{i // 10}.method{i}", + "name": f"method{i}", + "start_line": i * 10, + "end_line": i * 10 + 9, + "docstring": f"Method {i} documentation string with some content" if i % 5 == 0 else None, + "decorators": ["staticmethod"] if i % 7 == 0 else [], + "is_exported": i % 4 == 0, + }, + }) + + rels = [] + for i in range(num_rels): + rels.append({ + "from_id": i % num_nodes, + "to_id": (i * 7 + 3) % num_nodes, + "type": "CALLS" if i % 3 == 0 else "DEFINES" if i % 3 == 1 else "IMPORTS", + "properties": {"weight": i % 10} if i % 5 == 0 else {}, + }) + + return { + "nodes": nodes, + "relationships": rels, + "metadata": { + "total_nodes": num_nodes, + "total_relationships": num_rels, + "exported_at": "2026-03-14T10:00:00+00:00", + }, + } + + +def bench_json_dumps(data: dict) -> float: + start = time.perf_counter() + _ = json.dumps(data) + return time.perf_counter() - start + + +def bench_json_dumps_indent(data: dict) -> float: + start = time.perf_counter() + _ = json.dumps(data, indent=2, ensure_ascii=False) + return time.perf_counter() - start + + +def bench_json_loads(json_str: str) -> float: + start = time.perf_counter() + _ = json.loads(json_str) + return time.perf_counter() - start + + +def bench_json_dump_file(data: dict, path: str) -> float: + start = time.perf_counter() + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + return time.perf_counter() - start + + +def bench_json_load_file(path: str) -> float: + start = time.perf_counter() + with open(path, encoding="utf-8") as f: + _ = json.load(f) + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<45} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 115) + for r in results: + print( + f"{r['name']:<45} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + configs = [ + (1000, 2000), + (5000, 10000), + (20000, 50000), + ] + + for num_nodes, num_rels in configs: + print(f"\n{'='*115}") + print(f"JSON Serialization Benchmark (nodes={num_nodes}, rels={num_rels})") + print(f"{'='*115}") + + data = generate_graph_data(num_nodes, num_rels) + json_str = json.dumps(data) + json_str_indented = json.dumps(data, indent=2, ensure_ascii=False) + print(f"Compact JSON: {len(json_str) / 1024:.1f} KB, Indented: {len(json_str_indented) / 1024:.1f} KB") + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as tmp: + json.dump(data, tmp, indent=2, ensure_ascii=False) + tmp_path = tmp.name + + results = [] + + r = run_benchmark(f"json.dumps compact ({num_nodes}n)", bench_json_dumps, data) + results.append(r) + + r = run_benchmark(f"json.dumps indented ({num_nodes}n)", bench_json_dumps_indent, data) + results.append(r) + + r = run_benchmark(f"json.loads compact ({num_nodes}n)", bench_json_loads, json_str) + results.append(r) + + r = run_benchmark(f"json.loads indented ({num_nodes}n)", bench_json_loads, json_str_indented) + results.append(r) + + r = run_benchmark(f"json.dump to file ({num_nodes}n)", bench_json_dump_file, data, tmp_path) + results.append(r) + + r = run_benchmark(f"json.load from file ({num_nodes}n)", bench_json_load_file, tmp_path) + results.append(r) + + print_results(results) + + Path(tmp_path).unlink(missing_ok=True) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_pathlib_vs_string.py b/benchmarks/bench_pathlib_vs_string.py new file mode 100644 index 000000000..1794b2cef --- /dev/null +++ b/benchmarks/bench_pathlib_vs_string.py @@ -0,0 +1,214 @@ +import os +import statistics +import time +from pathlib import Path, PurePosixPath + +WARMUP_RUNS = 3 +BENCH_RUNS = 50 + + +def generate_file_paths(repo_root: str, count: int) -> list[str]: + dirs = ["src", "lib", "utils", "core", "parsers", "services", "tools", "tests"] + subdirs = ["base", "handlers", "helpers", "models", "schemas", "config"] + extensions = [".py", ".js", ".ts", ".rs", ".go", ".java", ".cpp"] + + paths = [] + for i in range(count): + d = dirs[i % len(dirs)] + sd = subdirs[(i // len(dirs)) % len(subdirs)] + ext = extensions[(i // (len(dirs) * len(subdirs))) % len(extensions)] + paths.append(f"{repo_root}/{d}/{sd}/module_{i}{ext}") + return paths + + +def generate_skip_patterns() -> list[str]: + return [ + "node_modules", ".git", "__pycache__", ".venv", "dist", "build", + ".mypy_cache", ".pytest_cache", ".tox", "egg-info", + ] + + +def bench_pathlib_relative_to(paths: list[str], repo_root: str) -> float: + repo_path = Path(repo_root) + start = time.perf_counter() + for p in paths: + path = Path(p) + _ = path.relative_to(repo_path) + return time.perf_counter() - start + + +def bench_string_removeprefix(paths: list[str], repo_root: str) -> float: + prefix = repo_root + "/" + start = time.perf_counter() + for p in paths: + _ = p.removeprefix(prefix) + return time.perf_counter() - start + + +def bench_os_path_relpath(paths: list[str], repo_root: str) -> float: + start = time.perf_counter() + for p in paths: + _ = os.path.relpath(p, repo_root) + return time.perf_counter() - start + + +def bench_pathlib_should_skip(paths: list[str], repo_root: str, skip_patterns: list[str]) -> float: + repo_path = Path(repo_root) + skip_set = set(skip_patterns) + start = time.perf_counter() + for p in paths: + path = Path(p) + try: + relative = path.relative_to(repo_path) + parts = relative.parts + _ = any(part in skip_set for part in parts) + except ValueError: + pass + return time.perf_counter() - start + + +def bench_string_should_skip(paths: list[str], repo_root: str, skip_patterns: list[str]) -> float: + prefix = repo_root + "/" + skip_set = set(skip_patterns) + start = time.perf_counter() + for p in paths: + relative = p.removeprefix(prefix) + parts = relative.split("/") + _ = any(part in skip_set for part in parts) + return time.perf_counter() - start + + +def bench_pathlib_suffix_check(paths: list[str]) -> float: + start = time.perf_counter() + for p in paths: + path = Path(p) + _ = path.suffix + return time.perf_counter() - start + + +def bench_string_suffix_check(paths: list[str]) -> float: + start = time.perf_counter() + for p in paths: + dot_idx = p.rfind(".") + _ = p[dot_idx:] if dot_idx >= 0 else "" + return time.perf_counter() - start + + +def bench_os_path_splitext(paths: list[str]) -> float: + start = time.perf_counter() + for p in paths: + _, _ = os.path.splitext(p) + return time.perf_counter() - start + + +def bench_pathlib_name(paths: list[str]) -> float: + start = time.perf_counter() + for p in paths: + path = Path(p) + _ = path.name + return time.perf_counter() - start + + +def bench_string_name(paths: list[str]) -> float: + start = time.perf_counter() + for p in paths: + slash_idx = p.rfind("/") + _ = p[slash_idx + 1:] if slash_idx >= 0 else p + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<55} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 125) + for r in results: + print( + f"{r['name']:<55} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + print("=" * 125) + print("pathlib vs String Operations Benchmark") + print("This benchmarks the #2 CPU hotspot (13.7% of total runtime)") + print("=" * 125) + + repo_root = "/Users/developer/projects/large-repo" + skip_patterns = generate_skip_patterns() + + for count in [1000, 5000, 20000, 59012]: + print(f"\n{'='*125}") + print(f"Path count: {count} (59012 = actual profiled call count)") + print(f"{'='*125}") + + paths = generate_file_paths(repo_root, count) + + results = [] + + print("\n--- relative_to vs removeprefix ---") + r1 = run_benchmark(f"pathlib.relative_to ({count}p)", bench_pathlib_relative_to, paths, repo_root) + results.append(r1) + r2 = run_benchmark(f"str.removeprefix ({count}p)", bench_string_removeprefix, paths, repo_root) + results.append(r2) + r3 = run_benchmark(f"os.path.relpath ({count}p)", bench_os_path_relpath, paths, repo_root) + results.append(r3) + + print_results(results) + print(f"\n -> pathlib vs str.removeprefix: {r1['median_ms'] / r2['median_ms']:.0f}x slower") + print(f" -> pathlib vs os.path.relpath: {r1['median_ms'] / r3['median_ms']:.1f}x slower") + + results = [] + print("\n--- should_skip_path (full function) ---") + r1 = run_benchmark(f"pathlib should_skip ({count}p)", bench_pathlib_should_skip, paths, repo_root, skip_patterns) + results.append(r1) + r2 = run_benchmark(f"string should_skip ({count}p)", bench_string_should_skip, paths, repo_root, skip_patterns) + results.append(r2) + + print_results(results) + print(f"\n -> pathlib vs string: {r1['median_ms'] / r2['median_ms']:.1f}x slower") + + results = [] + print("\n--- Suffix/extension extraction ---") + r1 = run_benchmark(f"Path.suffix ({count}p)", bench_pathlib_suffix_check, paths) + results.append(r1) + r2 = run_benchmark(f"str.rfind ({count}p)", bench_string_suffix_check, paths) + results.append(r2) + r3 = run_benchmark(f"os.path.splitext ({count}p)", bench_os_path_splitext, paths) + results.append(r3) + + print_results(results) + print(f"\n -> Path.suffix vs str.rfind: {r1['median_ms'] / r2['median_ms']:.1f}x slower") + + results = [] + print("\n--- Filename extraction ---") + r1 = run_benchmark(f"Path.name ({count}p)", bench_pathlib_name, paths) + results.append(r1) + r2 = run_benchmark(f"str.rfind+slice ({count}p)", bench_string_name, paths) + results.append(r2) + + print_results(results) + print(f"\n -> Path.name vs str: {r1['median_ms'] / r2['median_ms']:.1f}x slower") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_string_ops.py b/benchmarks/bench_string_ops.py new file mode 100644 index 000000000..cc10e91f8 --- /dev/null +++ b/benchmarks/bench_string_ops.py @@ -0,0 +1,148 @@ +import re +import statistics +import time + +WARMUP_RUNS = 3 +BENCH_RUNS = 100 + +SEPARATOR_PATTERN = re.compile(r"[.:]|::") + + +def generate_qualified_names(count: int) -> list[str]: + names = [] + modules = ["project", "utils", "core", "api", "services", "models"] + classes = ["Handler", "Manager", "Factory", "Builder", "Processor", "Resolver"] + methods = ["process", "handle", "create", "build", "resolve", "validate"] + for i in range(count): + mod = modules[i % len(modules)] + cls = classes[(i // len(modules)) % len(classes)] + meth = methods[(i // (len(modules) * len(classes))) % len(methods)] + names.append(f"{mod}.{cls}.sub{i}.{meth}") + return names + + +def bench_str_split(names: list[str]) -> float: + start = time.perf_counter() + for name in names: + _ = name.split(".") + return time.perf_counter() - start + + +def bench_str_endswith(names: list[str]) -> float: + suffixes = [".process", ".handle", ".create", ".build", ".resolve"] + start = time.perf_counter() + for name in names: + for suffix in suffixes: + _ = name.endswith(suffix) + return time.perf_counter() - start + + +def bench_str_startswith(names: list[str]) -> float: + prefixes = ["project.", "utils.", "core.", "api."] + start = time.perf_counter() + for name in names: + for prefix in prefixes: + _ = name.startswith(prefix) + return time.perf_counter() - start + + +def bench_str_join(names: list[str]) -> float: + split_names = [name.split(".") for name in names] + start = time.perf_counter() + for parts in split_names: + _ = ".".join(parts) + return time.perf_counter() - start + + +def bench_str_replace(names: list[str]) -> float: + start = time.perf_counter() + for name in names: + _ = name.replace("/", ".") + return time.perf_counter() - start + + +def bench_regex_split(names: list[str]) -> float: + start = time.perf_counter() + for name in names: + _ = SEPARATOR_PATTERN.split(name) + return time.perf_counter() - start + + +def bench_str_format(names: list[str]) -> float: + start = time.perf_counter() + for name in names: + _ = f"module.{name}.method" + return time.perf_counter() - start + + +def bench_import_distance(names: list[str]) -> float: + start = time.perf_counter() + for i in range(0, len(names) - 1, 2): + caller_parts = names[i].split(".") + candidate_parts = names[i + 1].split(".") + common = 0 + for j in range(min(len(caller_parts), len(candidate_parts))): + if caller_parts[j] == candidate_parts[j]: + common += 1 + else: + break + _ = max(len(caller_parts), len(candidate_parts)) - common + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<40} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 110) + for r in results: + print( + f"{r['name']:<40} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + sizes = [1000, 5000, 20000] + + for size in sizes: + print(f"\n{'='*110}") + print(f"String Operations Benchmark (n={size})") + print(f"{'='*110}") + + names = generate_qualified_names(size) + + results = [ + run_benchmark(f"str.split ({size})", bench_str_split, names), + run_benchmark(f"str.endswith ({size})", bench_str_endswith, names), + run_benchmark(f"str.startswith ({size})", bench_str_startswith, names), + run_benchmark(f"str.join ({size})", bench_str_join, names), + run_benchmark(f"str.replace ({size})", bench_str_replace, names), + run_benchmark(f"regex split ({size})", bench_regex_split, names), + run_benchmark(f"f-string format ({size})", bench_str_format, names), + run_benchmark(f"import_distance ({size})", bench_import_distance, names), + ] + + print_results(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/bench_trie.py b/benchmarks/bench_trie.py new file mode 100644 index 000000000..dba339100 --- /dev/null +++ b/benchmarks/bench_trie.py @@ -0,0 +1,138 @@ +import statistics +import time +from collections import defaultdict + +from codebase_rag.graph_updater import FunctionRegistryTrie +from codebase_rag.types_defs import NodeType, SimpleNameLookup + +WARMUP_RUNS = 3 +BENCH_RUNS = 50 + + +def generate_qualified_names(count: int) -> list[str]: + names = [] + modules = ["project", "utils", "core", "api", "services", "models"] + classes = ["Handler", "Manager", "Factory", "Builder", "Processor", "Resolver"] + methods = ["process", "handle", "create", "build", "resolve", "validate", "execute"] + for i in range(count): + mod = modules[i % len(modules)] + cls = classes[(i // len(modules)) % len(classes)] + meth = methods[(i // (len(modules) * len(classes))) % len(methods)] + sub = f"sub{i}" + names.append(f"{mod}.{cls}.{sub}.{meth}") + return names + + +def bench_insert(trie: FunctionRegistryTrie, names: list[str]) -> float: + start = time.perf_counter() + for name in names: + trie.insert(name, NodeType.FUNCTION) + return time.perf_counter() - start + + +def bench_lookup(trie: FunctionRegistryTrie, names: list[str]) -> float: + start = time.perf_counter() + for name in names: + _ = name in trie + return time.perf_counter() - start + + +def bench_find_ending_with(trie: FunctionRegistryTrie) -> float: + suffixes = ["process", "handle", "create", "build", "resolve", "validate", "execute"] + start = time.perf_counter() + for suffix in suffixes: + _ = trie.find_ending_with(suffix) + return time.perf_counter() - start + + +def bench_find_with_prefix(trie: FunctionRegistryTrie) -> float: + prefixes = ["project", "utils", "core", "api", "services", "models"] + start = time.perf_counter() + for prefix in prefixes: + _ = trie.find_with_prefix(prefix) + return time.perf_counter() - start + + +def bench_delete(names: list[str]) -> float: + simple_lookup: SimpleNameLookup = defaultdict(set) + trie = FunctionRegistryTrie(simple_name_lookup=simple_lookup) + for name in names: + trie.insert(name, NodeType.FUNCTION) + simple_name = name.split(".")[-1] + simple_lookup[simple_name].add(name) + + start = time.perf_counter() + for name in names[:len(names) // 4]: + del trie[name] + return time.perf_counter() - start + + +def run_benchmark(name: str, func, *args) -> dict[str, float]: + for _ in range(WARMUP_RUNS): + func(*args) + + times = [] + for _ in range(BENCH_RUNS): + times.append(func(*args)) + + return { + "name": name, + "median_ms": statistics.median(times) * 1000, + "mean_ms": statistics.mean(times) * 1000, + "stddev_ms": statistics.stdev(times) * 1000 if len(times) > 1 else 0, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "p95_ms": sorted(times)[int(len(times) * 0.95)] * 1000, + } + + +def print_results(results: list[dict[str, float]]) -> None: + print(f"\n{'Benchmark':<35} {'Median':>10} {'Mean':>10} {'StdDev':>10} {'Min':>10} {'Max':>10} {'P95':>10}") + print("-" * 105) + for r in results: + print( + f"{r['name']:<35} {r['median_ms']:>9.3f}ms {r['mean_ms']:>9.3f}ms " + f"{r['stddev_ms']:>9.3f}ms {r['min_ms']:>9.3f}ms {r['max_ms']:>9.3f}ms " + f"{r['p95_ms']:>9.3f}ms" + ) + + +def main() -> None: + sizes = [1000, 5000, 10000, 50000] + + for size in sizes: + print(f"\n{'='*105}") + print(f"FunctionRegistryTrie Benchmark (n={size})") + print(f"{'='*105}") + + names = generate_qualified_names(size) + + simple_lookup: SimpleNameLookup = defaultdict(set) + trie = FunctionRegistryTrie(simple_name_lookup=simple_lookup) + + results = [] + + r = run_benchmark(f"insert ({size})", bench_insert, trie, names) + results.append(r) + + for name in names: + simple_name = name.split(".")[-1] + simple_lookup[simple_name].add(name) + + r = run_benchmark(f"lookup ({size})", bench_lookup, trie, names) + results.append(r) + + r = run_benchmark(f"find_ending_with ({size})", bench_find_ending_with, trie) + results.append(r) + + r = run_benchmark(f"find_with_prefix ({size})", bench_find_with_prefix, trie) + results.append(r) + + r = run_benchmark(f"delete 25% ({size})", bench_delete, names) + results.append(r) + + print_results(results) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/results/bench_ast_cache_20260315_000043.txt b/benchmarks/results/bench_ast_cache_20260315_000043.txt new file mode 100644 index 000000000..5084d79ef --- /dev/null +++ b/benchmarks/results/bench_ast_cache_20260315_000043.txt @@ -0,0 +1,42 @@ +Benchmark: bench_ast_cache.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 2.2s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +=================================================================================================================== +BoundedASTCache Benchmark (entries=500, item_size=1024B) +=================================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +insert (500) 1.119ms 1.128ms 0.020ms 1.113ms 1.229ms 1.158ms +lookup (500) 0.019ms 0.019ms 0.000ms 0.018ms 0.019ms 0.019ms +access+LRU (500) 0.053ms 0.053ms 0.000ms 0.053ms 0.056ms 0.053ms +insert+evict (max=250) 1.141ms 1.155ms 0.092ms 1.133ms 1.792ms 1.158ms +getsizeof scan (500) 0.062ms 0.062ms 0.001ms 0.061ms 0.067ms 0.062ms + +=================================================================================================================== +BoundedASTCache Benchmark (entries=2000, item_size=4096B) +=================================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +insert (2000) 4.717ms 4.798ms 0.248ms 4.591ms 5.567ms 5.558ms +lookup (2000) 0.077ms 0.077ms 0.000ms 0.076ms 0.078ms 0.077ms +access+LRU (2000) 0.214ms 0.214ms 0.001ms 0.213ms 0.217ms 0.216ms +insert+evict (max=1000) 4.768ms 4.814ms 0.221ms 4.614ms 5.870ms 5.103ms +getsizeof scan (2000) 0.257ms 0.259ms 0.005ms 0.254ms 0.279ms 0.269ms + +=================================================================================================================== +BoundedASTCache Benchmark (entries=5000, item_size=8192B) +=================================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +insert (5000) 12.829ms 13.137ms 0.611ms 12.561ms 14.340ms 14.280ms +lookup (5000) 0.206ms 0.206ms 0.002ms 0.203ms 0.210ms 0.209ms +access+LRU (5000) 0.551ms 0.552ms 0.005ms 0.544ms 0.565ms 0.563ms +insert+evict (max=2500) 12.558ms 12.992ms 0.936ms 12.246ms 16.534ms 14.787ms +getsizeof scan (5000) 0.681ms 0.686ms 0.027ms 0.651ms 0.812ms 0.740ms diff --git a/benchmarks/results/bench_embedding_cache_20260315_000043.txt b/benchmarks/results/bench_embedding_cache_20260315_000043.txt new file mode 100644 index 000000000..807a58402 --- /dev/null +++ b/benchmarks/results/bench_embedding_cache_20260315_000043.txt @@ -0,0 +1,42 @@ +Benchmark: bench_embedding_cache.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 3.4s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +============================================================================================================== +EmbeddingCache Benchmark (n=500) +============================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +sha256 hashing (500) 0.155ms 0.151ms 0.006ms 0.143ms 0.161ms 0.159ms +cache.put (500) 0.182ms 0.182ms 0.002ms 0.179ms 0.187ms 0.185ms +cache.get hit (500) 0.177ms 0.177ms 0.001ms 0.176ms 0.180ms 0.179ms +cache.get miss (500) 0.190ms 0.192ms 0.003ms 0.189ms 0.207ms 0.195ms +cache.get_many (500) 0.190ms 0.190ms 0.001ms 0.189ms 0.193ms 0.191ms + +============================================================================================================== +EmbeddingCache Benchmark (n=2000) +============================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +sha256 hashing (2000) 0.562ms 0.564ms 0.006ms 0.557ms 0.581ms 0.576ms +cache.put (2000) 0.751ms 0.760ms 0.027ms 0.738ms 0.918ms 0.794ms +cache.get hit (2000) 0.729ms 0.732ms 0.009ms 0.719ms 0.765ms 0.748ms +cache.get miss (2000) 0.797ms 0.801ms 0.026ms 0.771ms 0.866ms 0.839ms +cache.get_many (2000) 0.798ms 0.808ms 0.028ms 0.777ms 0.888ms 0.856ms + +============================================================================================================== +EmbeddingCache Benchmark (n=10000) +============================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +sha256 hashing (10000) 2.884ms 2.875ms 0.034ms 2.815ms 2.950ms 2.921ms +cache.put (10000) 3.790ms 3.786ms 0.024ms 3.729ms 3.827ms 3.821ms +cache.get hit (10000) 3.690ms 3.697ms 0.029ms 3.653ms 3.775ms 3.750ms +cache.get miss (10000) 3.939ms 3.943ms 0.041ms 3.878ms 4.079ms 4.018ms +cache.get_many (10000) 3.987ms 3.989ms 0.023ms 3.948ms 4.051ms 4.041ms diff --git a/benchmarks/results/bench_file_hashing_20260315_000043.txt b/benchmarks/results/bench_file_hashing_20260315_000043.txt new file mode 100644 index 000000000..6346ad2f7 --- /dev/null +++ b/benchmarks/results/bench_file_hashing_20260315_000043.txt @@ -0,0 +1,45 @@ +Benchmark: bench_file_hashing.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 4.4s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +=================================================================================================================== +File Hashing Benchmark (files=50, avg_size=5KB) +=================================================================================================================== +Total data: 0.2 MB + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +sha256 8KB buf (50f) 1.006ms 1.016ms 0.043ms 0.977ms 1.186ms 1.146ms +sha256 64KB buf (50f) 1.075ms 1.070ms 0.016ms 1.036ms 1.106ms 1.090ms +sha256 mmap (50f) 1.356ms 1.355ms 0.033ms 1.299ms 1.453ms 1.395ms +md5 (50f) 1.310ms 1.374ms 0.171ms 1.191ms 1.878ms 1.727ms +blake2b (50f) 1.201ms 1.253ms 0.147ms 1.106ms 1.718ms 1.632ms + +=================================================================================================================== +File Hashing Benchmark (files=200, avg_size=10KB) +=================================================================================================================== +Total data: 2.0 MB + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +sha256 8KB buf (200f) 4.587ms 4.777ms 0.512ms 4.377ms 6.201ms 6.185ms +sha256 64KB buf (200f) 4.729ms 4.819ms 0.285ms 4.557ms 5.794ms 5.706ms +sha256 mmap (200f) 5.984ms 8.714ms 11.275ms 5.650ms 63.888ms 29.536ms +md5 (200f) 6.532ms 6.547ms 0.143ms 6.367ms 6.993ms 6.804ms +blake2b (200f) 5.217ms 5.289ms 0.272ms 5.068ms 6.416ms 6.003ms + +=================================================================================================================== +File Hashing Benchmark (files=500, avg_size=20KB) +=================================================================================================================== +Total data: 9.8 MB + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +sha256 8KB buf (500f) 13.926ms 14.170ms 0.910ms 13.581ms 18.406ms 15.773ms +sha256 64KB buf (500f) 14.268ms 14.312ms 0.253ms 13.957ms 15.319ms 14.640ms +sha256 mmap (500f) 16.699ms 20.110ms 15.978ms 16.299ms 104.163ms 25.618ms +md5 (500f) 23.512ms 23.670ms 0.567ms 23.157ms 25.836ms 25.075ms +blake2b (500f) 17.669ms 17.783ms 0.496ms 17.229ms 19.433ms 18.815ms diff --git a/benchmarks/results/bench_graph_loader_20260315_000043.txt b/benchmarks/results/bench_graph_loader_20260315_000043.txt new file mode 100644 index 000000000..d9cd28a0b --- /dev/null +++ b/benchmarks/results/bench_graph_loader_20260315_000043.txt @@ -0,0 +1,48 @@ +Benchmark: bench_graph_loader.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 2.9s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +============================================================================================================== +GraphLoader Benchmark (nodes=1000, rels=2000) +============================================================================================================== +JSON size: 298.2 KB + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +json.loads (1000n) 1.001ms 1.011ms 0.029ms 0.974ms 1.071ms 1.071ms +GraphLoader.load (1000n) 2.040ms 2.143ms 0.583ms 1.865ms 4.581ms 4.581ms +find_nodes_by_label (1000n) 0.001ms 0.001ms 0.000ms 0.000ms 0.001ms 0.001ms +find_node_by_property (1000n) 0.030ms 0.030ms 0.000ms 0.029ms 0.030ms 0.030ms +get_relationships (1000n) 0.148ms 0.148ms 0.001ms 0.146ms 0.151ms 0.151ms +summary (1000n) 0.069ms 0.070ms 0.001ms 0.068ms 0.073ms 0.073ms + +============================================================================================================== +GraphLoader Benchmark (nodes=5000, rels=10000) +============================================================================================================== +JSON size: 1537.8 KB + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +json.loads (5000n) 5.032ms 5.002ms 0.112ms 4.843ms 5.180ms 5.180ms +GraphLoader.load (5000n) 10.106ms 11.137ms 2.030ms 9.396ms 14.997ms 14.997ms +find_nodes_by_label (5000n) 0.000ms 0.000ms 0.000ms 0.000ms 0.001ms 0.001ms +find_node_by_property (5000n) 0.030ms 0.030ms 0.000ms 0.030ms 0.030ms 0.030ms +get_relationships (5000n) 0.150ms 0.152ms 0.005ms 0.148ms 0.170ms 0.170ms +summary (5000n) 0.350ms 0.356ms 0.018ms 0.341ms 0.420ms 0.420ms + +============================================================================================================== +GraphLoader Benchmark (nodes=20000, rels=50000) +============================================================================================================== +JSON size: 6979.7 KB + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +json.loads (20000n) 24.136ms 24.783ms 2.550ms 23.565ms 35.321ms 35.321ms +GraphLoader.load (20000n) 61.008ms 62.676ms 5.050ms 57.534ms 75.337ms 75.337ms +find_nodes_by_label (20000n) 0.000ms 0.000ms 0.000ms 0.000ms 0.001ms 0.001ms +find_node_by_property (20000n) 0.030ms 0.030ms 0.000ms 0.030ms 0.030ms 0.030ms +get_relationships (20000n) 0.152ms 0.153ms 0.001ms 0.151ms 0.155ms 0.155ms +summary (20000n) 1.738ms 1.745ms 0.023ms 1.714ms 1.819ms 1.819ms diff --git a/benchmarks/results/bench_json_serialization_20260315_000043.txt b/benchmarks/results/bench_json_serialization_20260315_000043.txt new file mode 100644 index 000000000..aab002921 --- /dev/null +++ b/benchmarks/results/bench_json_serialization_20260315_000043.txt @@ -0,0 +1,48 @@ +Benchmark: bench_json_serialization.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 18.8s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +=================================================================================================================== +JSON Serialization Benchmark (nodes=1000, rels=2000) +=================================================================================================================== +Compact JSON: 366.8 KB, Indented: 547.7 KB + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +json.dumps compact (1000n) 1.089ms 1.094ms 0.010ms 1.084ms 1.117ms 1.117ms +json.dumps indented (1000n) 9.612ms 9.703ms 0.220ms 9.560ms 10.479ms 10.479ms +json.loads compact (1000n) 1.202ms 1.202ms 0.015ms 1.185ms 1.260ms 1.260ms +json.loads indented (1000n) 1.286ms 1.281ms 0.023ms 1.253ms 1.325ms 1.325ms +json.dump to file (1000n) 12.239ms 12.241ms 0.071ms 12.145ms 12.398ms 12.398ms +json.load from file (1000n) 1.345ms 1.350ms 0.036ms 1.309ms 1.429ms 1.429ms + +=================================================================================================================== +JSON Serialization Benchmark (nodes=5000, rels=10000) +=================================================================================================================== +Compact JSON: 1881.4 KB, Indented: 2786.1 KB + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +json.dumps compact (5000n) 5.701ms 5.718ms 0.158ms 5.464ms 6.000ms 6.000ms +json.dumps indented (5000n) 47.875ms 47.950ms 0.285ms 47.618ms 48.611ms 48.611ms +json.loads compact (5000n) 6.291ms 6.327ms 0.244ms 5.999ms 6.754ms 6.754ms +json.loads indented (5000n) 6.686ms 6.666ms 0.263ms 6.346ms 7.152ms 7.152ms +json.dump to file (5000n) 60.552ms 60.895ms 1.262ms 60.082ms 64.565ms 64.565ms +json.load from file (5000n) 6.573ms 6.590ms 0.049ms 6.528ms 6.717ms 6.717ms + +=================================================================================================================== +JSON Serialization Benchmark (nodes=20000, rels=50000) +=================================================================================================================== +Compact JSON: 8381.6 KB, Indented: 12363.2 KB + +Benchmark Median Mean StdDev Min Max P95 +------------------------------------------------------------------------------------------------------------------- +json.dumps compact (20000n) 25.446ms 25.483ms 0.156ms 25.314ms 25.797ms 25.797ms +json.dumps indented (20000n) 215.190ms 215.593ms 1.383ms 214.183ms 219.350ms 219.350ms +json.loads compact (20000n) 28.713ms 28.731ms 0.480ms 28.049ms 30.253ms 30.253ms +json.loads indented (20000n) 30.416ms 30.558ms 0.813ms 29.707ms 32.258ms 32.258ms +json.dump to file (20000n) 271.376ms 271.918ms 3.051ms 266.710ms 278.494ms 278.494ms +json.load from file (20000n) 32.144ms 33.111ms 3.488ms 31.594ms 47.762ms 47.762ms diff --git a/benchmarks/results/bench_string_ops_20260315_000043.txt b/benchmarks/results/bench_string_ops_20260315_000043.txt new file mode 100644 index 000000000..66c1bcd8b --- /dev/null +++ b/benchmarks/results/bench_string_ops_20260315_000043.txt @@ -0,0 +1,51 @@ +Benchmark: bench_string_ops.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 3.2s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +============================================================================================================== +String Operations Benchmark (n=1000) +============================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +str.split (1000) 0.079ms 0.079ms 0.001ms 0.077ms 0.083ms 0.082ms +str.endswith (1000) 0.179ms 0.181ms 0.006ms 0.174ms 0.219ms 0.188ms +str.startswith (1000) 0.146ms 0.147ms 0.003ms 0.144ms 0.165ms 0.150ms +str.join (1000) 0.036ms 0.036ms 0.001ms 0.035ms 0.047ms 0.039ms +str.replace (1000) 0.014ms 0.014ms 0.000ms 0.014ms 0.016ms 0.014ms +regex split (1000) 0.418ms 0.420ms 0.006ms 0.414ms 0.437ms 0.431ms +f-string format (1000) 0.029ms 0.029ms 0.000ms 0.029ms 0.032ms 0.029ms +import_distance (1000) 0.164ms 0.165ms 0.004ms 0.162ms 0.185ms 0.171ms + +============================================================================================================== +String Operations Benchmark (n=5000) +============================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +str.split (5000) 0.380ms 0.380ms 0.003ms 0.371ms 0.395ms 0.387ms +str.endswith (5000) 0.897ms 0.899ms 0.004ms 0.892ms 0.919ms 0.909ms +str.startswith (5000) 0.722ms 0.723ms 0.003ms 0.715ms 0.733ms 0.728ms +str.join (5000) 0.185ms 0.187ms 0.005ms 0.184ms 0.234ms 0.191ms +str.replace (5000) 0.071ms 0.071ms 0.001ms 0.070ms 0.074ms 0.071ms +regex split (5000) 2.033ms 2.037ms 0.023ms 1.984ms 2.103ms 2.076ms +f-string format (5000) 0.146ms 0.147ms 0.002ms 0.145ms 0.154ms 0.150ms +import_distance (5000) 0.781ms 0.773ms 0.014ms 0.752ms 0.797ms 0.790ms + +============================================================================================================== +String Operations Benchmark (n=20000) +============================================================================================================== + +Benchmark Median Mean StdDev Min Max P95 +-------------------------------------------------------------------------------------------------------------- +str.split (20000) 1.588ms 1.590ms 0.014ms 1.559ms 1.626ms 1.612ms +str.endswith (20000) 3.582ms 3.619ms 0.147ms 3.497ms 4.883ms 3.803ms +str.startswith (20000) 2.920ms 2.926ms 0.031ms 2.876ms 3.064ms 3.005ms +str.join (20000) 0.733ms 0.735ms 0.015ms 0.719ms 0.850ms 0.752ms +str.replace (20000) 0.287ms 0.288ms 0.009ms 0.282ms 0.374ms 0.293ms +regex split (20000) 8.051ms 8.047ms 0.068ms 7.924ms 8.195ms 8.174ms +f-string format (20000) 0.593ms 0.594ms 0.006ms 0.582ms 0.624ms 0.603ms +import_distance (20000) 3.183ms 3.184ms 0.039ms 3.129ms 3.315ms 3.262ms diff --git a/benchmarks/results/bench_trie_20260315_000043.txt b/benchmarks/results/bench_trie_20260315_000043.txt new file mode 100644 index 000000000..10ad3978e --- /dev/null +++ b/benchmarks/results/bench_trie_20260315_000043.txt @@ -0,0 +1,54 @@ +Benchmark: bench_trie.py +Timestamp: 20260315_000043 +Exit code: 0 +Duration: 9.3s +Python: 3.12.2 (main, Feb 25 2024, 03:55:42) [Clang 17.0.6 ] +================================================================================ + +========================================================================================================= +FunctionRegistryTrie Benchmark (n=1000) +========================================================================================================= + +Benchmark Median Mean StdDev Min Max P95 +--------------------------------------------------------------------------------------------------------- +insert (1000) 0.340ms 0.341ms 0.012ms 0.327ms 0.385ms 0.378ms +lookup (1000) 0.036ms 0.036ms 0.000ms 0.035ms 0.037ms 0.036ms +find_ending_with (1000) 0.004ms 0.005ms 0.004ms 0.004ms 0.031ms 0.004ms +find_with_prefix (1000) 0.390ms 0.425ms 0.059ms 0.369ms 0.589ms 0.528ms +delete 25% (1000) 0.407ms 0.418ms 0.021ms 0.394ms 0.457ms 0.449ms + +========================================================================================================= +FunctionRegistryTrie Benchmark (n=5000) +========================================================================================================= + +Benchmark Median Mean StdDev Min Max P95 +--------------------------------------------------------------------------------------------------------- +insert (5000) 1.795ms 1.797ms 0.037ms 1.721ms 1.911ms 1.876ms +lookup (5000) 0.195ms 0.196ms 0.002ms 0.193ms 0.201ms 0.200ms +find_ending_with (5000) 0.019ms 0.019ms 0.000ms 0.018ms 0.021ms 0.019ms +find_with_prefix (5000) 2.104ms 2.299ms 1.047ms 2.024ms 9.499ms 2.416ms +delete 25% (5000) 2.116ms 2.122ms 0.048ms 2.043ms 2.260ms 2.214ms + +========================================================================================================= +FunctionRegistryTrie Benchmark (n=10000) +========================================================================================================= + +Benchmark Median Mean StdDev Min Max P95 +--------------------------------------------------------------------------------------------------------- +insert (10000) 3.709ms 3.735ms 0.106ms 3.627ms 4.244ms 3.912ms +lookup (10000) 0.402ms 0.403ms 0.003ms 0.398ms 0.412ms 0.407ms +find_ending_with (10000) 0.046ms 0.046ms 0.002ms 0.045ms 0.056ms 0.050ms +find_with_prefix (10000) 4.244ms 4.630ms 1.843ms 3.904ms 13.674ms 5.386ms +delete 25% (10000) 4.204ms 4.207ms 0.066ms 3.959ms 4.349ms 4.312ms + +========================================================================================================= +FunctionRegistryTrie Benchmark (n=50000) +========================================================================================================= + +Benchmark Median Mean StdDev Min Max P95 +--------------------------------------------------------------------------------------------------------- +insert (50000) 18.036ms 18.128ms 0.306ms 17.831ms 18.972ms 18.820ms +lookup (50000) 2.058ms 2.061ms 0.013ms 2.036ms 2.091ms 2.085ms +find_ending_with (50000) 0.420ms 0.426ms 0.014ms 0.412ms 0.477ms 0.458ms +find_with_prefix (50000) 38.507ms 38.096ms 10.219ms 22.462ms 56.890ms 52.739ms +delete 25% (50000) 21.744ms 21.830ms 0.410ms 21.277ms 23.496ms 22.524ms diff --git a/benchmarks/run_all.py b/benchmarks/run_all.py new file mode 100644 index 000000000..a79c339ab --- /dev/null +++ b/benchmarks/run_all.py @@ -0,0 +1,74 @@ +import subprocess +import sys +import time +from pathlib import Path + +BENCHMARKS = [ + "bench_string_ops.py", + "bench_trie.py", + "bench_find_ending_with_fix.py", + "bench_dropin_replacements.py", + "bench_graph_loader.py", + "bench_file_hashing.py", + "bench_embedding_cache.py", + "bench_json_serialization.py", + "bench_ast_cache.py", + "bench_pathlib_vs_string.py", +] + + +def main() -> None: + bench_dir = Path(__file__).parent + results_dir = bench_dir / "results" + results_dir.mkdir(exist_ok=True) + + timestamp = time.strftime("%Y%m%d_%H%M%S") + overall_start = time.perf_counter() + + print(f"Running {len(BENCHMARKS)} benchmark suites") + print(f"Results will be saved to: {results_dir}") + print(f"Timestamp: {timestamp}") + print("=" * 80) + + for bench_file in BENCHMARKS: + bench_path = bench_dir / bench_file + if not bench_path.exists(): + print(f"SKIP: {bench_file} (not found)") + continue + + result_file = results_dir / f"{bench_path.stem}_{timestamp}.txt" + print(f"\nRunning: {bench_file}") + + start = time.perf_counter() + result = subprocess.run( + [sys.executable, str(bench_path)], + capture_output=True, + text=True, + timeout=600, + ) + elapsed = time.perf_counter() - start + + output = result.stdout + if result.returncode != 0: + output += f"\nSTDERR:\n{result.stderr}" + print(f" FAILED (exit code {result.returncode}, {elapsed:.1f}s)") + else: + print(f" OK ({elapsed:.1f}s)") + + with result_file.open("w") as f: + f.write(f"Benchmark: {bench_file}\n") + f.write(f"Timestamp: {timestamp}\n") + f.write(f"Exit code: {result.returncode}\n") + f.write(f"Duration: {elapsed:.1f}s\n") + f.write(f"Python: {sys.version}\n") + f.write("=" * 80 + "\n") + f.write(output) + + total = time.perf_counter() - overall_start + print(f"\n{'='*80}") + print(f"All benchmarks completed in {total:.1f}s") + print(f"Results saved in: {results_dir}") + + +if __name__ == "__main__": + main() diff --git a/build_binary.py b/build_binary.py index b82c48c6e..fd1884a0c 100644 --- a/build_binary.py +++ b/build_binary.py @@ -70,6 +70,9 @@ def build_binary() -> bool: for pkg in cs.PYINSTALLER_PACKAGES: cmd.extend(_build_package_args(pkg)) + for mod in cs.PYINSTALLER_EXCLUDED_MODULES: + cmd.extend([cs.PYINSTALLER_ARG_EXCLUDE_MODULE, mod]) + cmd.append(cs.PYINSTALLER_ENTRY_POINT) logger.info(logs.BUILD_BINARY.format(name=binary_name)) diff --git a/cgr/__init__.py b/cgr/__init__.py new file mode 100644 index 000000000..3d76ac771 --- /dev/null +++ b/cgr/__init__.py @@ -0,0 +1,14 @@ +from codebase_rag.config import settings +from codebase_rag.embedder import embed_code +from codebase_rag.graph_loader import GraphLoader, load_graph +from codebase_rag.services.graph_service import MemgraphIngestor +from codebase_rag.services.llm import CypherGenerator + +__all__ = [ + "CypherGenerator", + "GraphLoader", + "MemgraphIngestor", + "embed_code", + "load_graph", + "settings", +] diff --git a/codebase_rag/cgr_state.py b/codebase_rag/cgr_state.py new file mode 100644 index 000000000..703672a64 --- /dev/null +++ b/codebase_rag/cgr_state.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import json +from datetime import UTC, datetime +from pathlib import Path +from typing import TypedDict + +from loguru import logger + +from .config import settings + +STATE_FILENAME = "state.json" + + +class _StateShape(TypedDict, total=False): + last_sync: dict[str, str] + + +def state_path(home: Path | None = None) -> Path: + base = (home or settings.CGR_HOME).expanduser() + return base / STATE_FILENAME + + +def _load(path: Path) -> _StateShape: + if not path.exists(): + return _StateShape() + try: + with path.open(encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return _StateShape(last_sync=data.get("last_sync", {})) + except (OSError, json.JSONDecodeError) as e: + logger.warning(f"Failed to load cgr state from {path}: {e}") + return _StateShape() + + +def _save(path: Path, data: _StateShape) -> None: + try: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + except OSError as e: + logger.warning(f"Failed to save cgr state to {path}: {e}") + + +def record_sync(project_name: str, home: Path | None = None) -> None: + path = state_path(home) + state = _load(path) + last_sync = state.get("last_sync", {}) + last_sync[project_name] = datetime.now(UTC).isoformat() + state["last_sync"] = last_sync + _save(path, state) + + +def read_sync_timestamps(home: Path | None = None) -> dict[str, str]: + state = _load(state_path(home)) + return dict(state.get("last_sync", {})) diff --git a/codebase_rag/cli.py b/codebase_rag/cli.py index 87f9a5379..07b72ad88 100644 --- a/codebase_rag/cli.py +++ b/codebase_rag/cli.py @@ -1,39 +1,69 @@ import asyncio +import json +import time +from collections.abc import Callable +from functools import partial +from importlib.metadata import version as get_version from pathlib import Path import typer from loguru import logger +from rich.console import Console from rich.panel import Panel from rich.table import Table +from . import cgr_state from . import cli_help as ch from . import constants as cs from . import logs as ls from .config import load_cgrignore_patterns, settings from .graph_updater import GraphUpdater from .main import ( + _create_configuration_table, app_context, connect_memgraph, export_graph_to_file, main_async, main_optimize_async, + main_single_query, prompt_for_unignored_directories, style, update_model_settings, ) from .parser_loader import load_parsers +from .services.graph_service import MemgraphIngestor from .services.protobuf_service import ProtobufFileIngestor +from .stack import StackManager +from .stack.cli import cli as daemon_cli +from .stack.constants import StackState +from .stack.manager import StackError from .tools.health_checker import HealthChecker from .tools.language import cli as language_cli +from .types_defs import DeadCodeRow, PropertyValue, ResultRow +from .utils.path_utils import derive_project_name, resolve_repo_path +from .vector_store import delete_project_embeddings +from .workspaces import WorkspaceConfig, WorkspaceError, load_workspace +from .workspaces.cli import cli as workspace_cli app = typer.Typer( - name="code-graph-rag", + name=cs.PACKAGE_NAME, help=ch.APP_DESCRIPTION, no_args_is_help=True, add_completion=False, ) +def _version_callback(value: bool) -> None: + if value: + app_context.console.print( + cs.CLI_MSG_VERSION.format( + package=cs.PACKAGE_NAME, version=get_version(cs.PACKAGE_NAME) + ), + highlight=False, + ) + raise typer.Exit() + + def validate_models_early() -> None: try: orchestrator_config = settings.active_orchestrator_config @@ -58,6 +88,14 @@ def _update_and_validate_models(orchestrator: str | None, cypher: str | None) -> @app.callback() def _global_options( + version: bool | None = typer.Option( + None, + "--version", + "-v", + help=ch.HELP_VERSION, + callback=_version_callback, + is_eager=True, + ), quiet: bool = typer.Option( False, "--quiet", @@ -77,6 +115,184 @@ def _info(msg: str) -> None: app_context.console.print(msg) +def _load_workspace_or_exit(workspace: str | None) -> WorkspaceConfig | None: + if workspace is None: + return None + try: + return load_workspace(workspace) + except WorkspaceError as e: + app_context.console.print(style(str(e), cs.Color.RED)) + raise typer.Exit(1) from e + + +def _sync_workspace( + config: WorkspaceConfig, + batch_size: int, + exclude: list[str] | None, +) -> None: + total = len(config.repos) + if total == 0: + _info( + style(cs.CLI_MSG_WORKSPACE_EMPTY.format(name=config.name), cs.Color.YELLOW) + ) + return + _info( + style( + cs.CLI_MSG_WORKSPACE_SYNCING.format(name=config.name, count=total), + cs.Color.CYAN, + ) + ) + for idx, repo in enumerate(config.repos, start=1): + repo_path = repo.repo_path() + _info( + style( + cs.CLI_MSG_WORKSPACE_SYNC_REPO.format( + idx=idx, + total=total, + path=repo_path, + project_name=repo.project_name, + ), + cs.Color.CYAN, + ) + ) + _run_graph_sync( + repo=repo_path, + project_name=repo.project_name, + batch_size=batch_size, + exclude=exclude, + interactive_setup=False, + ) + + +def _resolve_active_projects(projects: str | None, default_project: str) -> list[str]: + if projects: + parsed = [p.strip() for p in projects.split(",") if p.strip()] + if parsed: + return parsed + return [default_project] + + +def _maybe_start_stack() -> None: + mgr = StackManager() + if mgr.status().state == StackState.RUNNING: + return + try: + mgr.ensure_running() + except StackError as e: + app_context.console.print(style(str(e), cs.Color.RED)) + raise typer.Exit(1) from e + + +def _run_graph_sync( + repo: Path, + project_name: str, + batch_size: int, + exclude: list[str] | None, + interactive_setup: bool, + clean: bool = False, + output: str | None = None, +) -> None: + cgrignore = load_cgrignore_patterns(repo) + cli_excludes = frozenset(exclude) if exclude else frozenset() + exclude_paths = cli_excludes | cgrignore.exclude or None + unignore_paths: frozenset[str] | None + if interactive_setup: + unignore_paths = prompt_for_unignored_directories(repo, exclude) + else: + unignore_paths = cgrignore.unignore or None + + elapsed = time.monotonic() + with connect_memgraph(batch_size) as ingestor: + if clean: + _info(style(cs.CLI_MSG_CLEANING_DB, cs.Color.YELLOW)) + ingestor.clean_database() + _delete_hash_cache(repo) + + ingestor.ensure_constraints() + + parsers, queries = load_parsers() + + updater = GraphUpdater( + ingestor=ingestor, + repo_path=repo, + parsers=parsers, + queries=queries, + unignore_paths=unignore_paths, + exclude_paths=exclude_paths, + project_name=project_name, + ) + updater.run() + cgr_state.record_sync(project_name) + + if output: + _info(style(cs.CLI_MSG_EXPORTING_TO.format(path=output), cs.Color.CYAN)) + if not export_graph_to_file(ingestor, output): + raise typer.Exit(1) + elapsed = time.monotonic() - elapsed + if updater.skipped_because_in_sync: + app_context.console.print( + style( + cs.CLI_MSG_SYNC_SKIPPED.format(project=project_name, elapsed=elapsed), + cs.Color.CYAN, + cs.StyleModifier.DIM, + ) + ) + else: + app_context.console.print( + style( + cs.CLI_MSG_SYNC_DONE.format(project=project_name, elapsed=elapsed), + cs.Color.CYAN, + cs.StyleModifier.NONE, + ) + ) + + +def _delete_hash_cache(repo_path: Path) -> None: + cache_path = repo_path / cs.HASH_CACHE_FILENAME + if cache_path.exists(): + _info( + style( + cs.CLI_MSG_CLEANING_HASH_CACHE.format(path=cache_path), + cs.Color.YELLOW, + ) + ) + cache_path.unlink(missing_ok=True) + dir_mtimes_path = repo_path / cs.DIR_MTIMES_FILENAME + dir_mtimes_path.unlink(missing_ok=True) + + +def _resolve_and_validate_repo(repo_path: str | None) -> Path: + resolved = resolve_repo_path(repo_path, settings.TARGET_REPO_PATH) + if not resolved.exists(): + app_context.console.print( + style(cs.CLI_ERR_PATH_NOT_EXISTS.format(path=resolved), cs.Color.RED) + ) + raise typer.Exit(1) + if not resolved.is_dir(): + app_context.console.print( + style(cs.CLI_ERR_PATH_NOT_DIR.format(path=resolved), cs.Color.RED) + ) + raise typer.Exit(1) + if not (resolved / cs.GIT_DIR_NAME).exists(): + app_context.console.print( + style(cs.CLI_WARN_NOT_GIT_REPO.format(path=resolved), cs.Color.YELLOW) + ) + return resolved + + +def _cleanup_project_embeddings(ingestor: MemgraphIngestor, project_name: str) -> None: + rows = ingestor.fetch_all( + cs.CYPHER_QUERY_PROJECT_NODE_IDS, + {cs.KEY_PROJECT_NAME: project_name}, + ) + node_ids: list[int] = [] + for row in rows: + node_id = row.get(cs.KEY_NODE_ID) + if isinstance(node_id, int): + node_ids.append(node_id) + delete_project_embeddings(project_name, node_ids) + + @app.command(help=ch.CMD_START) def start( repo_path: str | None = typer.Option( @@ -113,12 +329,22 @@ def start( "--no-confirm", help=ch.HELP_NO_CONFIRM, ), + no_instructions: bool = typer.Option( + False, + "--no-instructions", + help=ch.HELP_NO_INSTRUCTIONS, + ), batch_size: int | None = typer.Option( None, "--batch-size", min=1, help=ch.HELP_BATCH_SIZE, ), + project_name: str | None = typer.Option( + None, + "--project-name", + help=ch.HELP_PROJECT_NAME, + ), exclude: list[str] | None = typer.Option( None, "--exclude", @@ -129,10 +355,50 @@ def start( "--interactive-setup", help=ch.HELP_INTERACTIVE_SETUP, ), + ask_agent: str | None = typer.Option( + None, + "-a", + "--ask-agent", + help=ch.HELP_ASK_AGENT, + ), + output_format: cs.QueryFormat = typer.Option( + cs.QueryFormat.TABLE, + "--output-format", + help=ch.HELP_QUERY_OUTPUT_FORMAT, + ), + no_start_stack: bool = typer.Option( + False, + "--no-start-stack", + help=ch.HELP_NO_START_STACK, + ), + no_sync: bool = typer.Option( + False, + "--no-sync", + help=ch.HELP_NO_SYNC, + ), + projects: str | None = typer.Option( + None, + "--projects", + help=ch.HELP_PROJECTS, + ), + workspace: str | None = typer.Option( + None, + "--workspace", + help=ch.HELP_WORKSPACE, + ), ) -> None: app_context.session.confirm_edits = not no_confirm + app_context.session.load_cgr_instructions = not no_instructions + + if output_format == cs.QueryFormat.JSON and not ask_agent: + app_context.console.print( + style(cs.CLI_ERR_JSON_REQUIRES_ASK_AGENT, cs.Color.RED) + ) + raise typer.Exit(1) - target_repo_path = repo_path or settings.TARGET_REPO_PATH + resolved_repo = _resolve_and_validate_repo(repo_path) + target_repo_path = str(resolved_repo) + resolved_project_name = project_name or derive_project_name(resolved_repo) if output and not update_graph: app_context.console.print( @@ -140,54 +406,95 @@ def start( ) raise typer.Exit(1) - _update_and_validate_models(orchestrator, cypher) + if not no_start_stack: + _maybe_start_stack() effective_batch_size = settings.resolve_batch_size(batch_size) + if clean and not update_graph: + repo_to_clean = Path(target_repo_path) + with connect_memgraph(effective_batch_size) as ingestor: + _info(style(cs.CLI_MSG_CLEANING_DB, cs.Color.YELLOW)) + ingestor.clean_database() + + _delete_hash_cache(repo_to_clean) + _info(style(cs.CLI_MSG_CLEAN_DONE, cs.Color.GREEN)) + return + + _update_and_validate_models(orchestrator, cypher) + + if not ask_agent and not update_graph: + app_context.console.print(_create_configuration_table(target_repo_path)) + if update_graph: - repo_to_update = Path(target_repo_path) _info( - style(cs.CLI_MSG_UPDATING_GRAPH.format(path=repo_to_update), cs.Color.GREEN) + style(cs.CLI_MSG_UPDATING_GRAPH.format(path=resolved_repo), cs.Color.GREEN) ) - - cgrignore = load_cgrignore_patterns(repo_to_update) - cli_excludes = frozenset(exclude) if exclude else frozenset() - exclude_paths = cli_excludes | cgrignore.exclude or None - unignore_paths: frozenset[str] | None = None - if interactive_setup: - unignore_paths = prompt_for_unignored_directories(repo_to_update, exclude) - else: + if not interactive_setup: _info(style(cs.CLI_MSG_AUTO_EXCLUDE, cs.Color.YELLOW)) - unignore_paths = cgrignore.unignore or None + _run_graph_sync( + repo=resolved_repo, + project_name=resolved_project_name, + batch_size=effective_batch_size, + exclude=exclude, + interactive_setup=interactive_setup, + clean=clean, + output=output, + ) + _info(style(cs.CLI_MSG_GRAPH_UPDATED, cs.Color.GREEN)) + return - with connect_memgraph(effective_batch_size) as ingestor: - if clean: - _info(style(cs.CLI_MSG_CLEANING_DB, cs.Color.YELLOW)) - ingestor.clean_database() - ingestor.ensure_constraints() - - parsers, queries = load_parsers() - - updater = GraphUpdater( - ingestor, - repo_to_update, - parsers, - queries, - unignore_paths, - exclude_paths, - ) - updater.run() + workspace_config = _load_workspace_or_exit(workspace) - if output: - _info(style(cs.CLI_MSG_EXPORTING_TO.format(path=output), cs.Color.CYAN)) - if not export_graph_to_file(ingestor, output): - raise typer.Exit(1) + sync_task: Callable[[], None] | None = None + sync_message = cs.MSG_SYNCING_KNOWLEDGE_GRAPH + if not no_sync: + if workspace_config is not None: + sync_task = partial( + _sync_workspace, workspace_config, effective_batch_size, exclude + ) + sync_message = cs.MSG_SYNCING_WORKSPACE.format( + name=workspace_config.name, count=len(workspace_config.repos) + ) + else: + sync_task = partial( + _run_graph_sync, + repo=resolved_repo, + project_name=resolved_project_name, + batch_size=effective_batch_size, + exclude=exclude, + interactive_setup=interactive_setup, + ) - _info(style(cs.CLI_MSG_GRAPH_UPDATED, cs.Color.GREEN)) - return + if workspace_config is not None: + active_projects = workspace_config.project_names() + if projects: + active_projects = _resolve_active_projects(projects, active_projects[0]) + else: + active_projects = _resolve_active_projects(projects, resolved_project_name) try: - asyncio.run(main_async(target_repo_path, effective_batch_size)) + if ask_agent: + if sync_task is not None: + sync_task() + main_single_query( + target_repo_path, + effective_batch_size, + ask_agent, + active_projects=active_projects, + output_format=output_format, + ) + else: + asyncio.run( + main_async( + target_repo_path, + effective_batch_size, + active_projects=active_projects, + show_config_table=False, + pre_chat_sync=sync_task, + pre_chat_sync_message=sync_message, + ) + ) except KeyboardInterrupt: app_context.console.print(style(cs.CLI_MSG_APP_TERMINATED, cs.Color.RED)) except ValueError as e: @@ -223,8 +530,7 @@ def index( help=ch.HELP_INTERACTIVE_SETUP, ), ) -> None: - target_repo_path = repo_path or settings.TARGET_REPO_PATH - repo_to_index = Path(target_repo_path) + repo_to_index = _resolve_and_validate_repo(repo_path) _info(style(cs.CLI_MSG_INDEXING_AT.format(path=repo_to_index), cs.Color.GREEN)) _info(style(cs.CLI_MSG_OUTPUT_TO.format(path=output_proto_dir), cs.Color.CYAN)) @@ -245,7 +551,12 @@ def index( ) parsers, queries = load_parsers() updater = GraphUpdater( - ingestor, repo_to_index, parsers, queries, unignore_paths, exclude_paths + ingestor=ingestor, + repo_path=repo_to_index, + parsers=parsers, + queries=queries, + unignore_paths=unignore_paths, + exclude_paths=exclude_paths, ) updater.run() @@ -324,6 +635,11 @@ def optimize( "--no-confirm", help=ch.HELP_NO_CONFIRM, ), + no_instructions: bool = typer.Option( + False, + "--no-instructions", + help=ch.HELP_NO_INSTRUCTIONS, + ), batch_size: int | None = typer.Option( None, "--batch-size", @@ -332,8 +648,9 @@ def optimize( ), ) -> None: app_context.session.confirm_edits = not no_confirm + app_context.session.load_cgr_instructions = not no_instructions - target_repo_path = repo_path or settings.TARGET_REPO_PATH + target_repo_path = str(_resolve_and_validate_repo(repo_path)) _update_and_validate_models(orchestrator, cypher) @@ -357,11 +674,24 @@ def optimize( @app.command(name=ch.CLICommandName.MCP_SERVER, help=ch.CMD_MCP_SERVER) -def mcp_server() -> None: +def mcp_server( + transport: cs.MCPTransport = typer.Option( + cs.MCPTransport.STDIO, help=ch.HELP_MCP_TRANSPORT + ), + host: str = typer.Option(None, help=ch.HELP_MCP_HTTP_HOST), + port: int = typer.Option(None, help=ch.HELP_MCP_HTTP_PORT), +) -> None: try: - from codebase_rag.mcp import main as mcp_main + if transport == cs.MCPTransport.HTTP: + from codebase_rag.mcp import serve_http + + resolved_host = host or settings.MCP_HTTP_HOST + resolved_port = port or settings.MCP_HTTP_PORT + asyncio.run(serve_http(host=resolved_host, port=resolved_port)) + else: + from codebase_rag.mcp import serve_stdio - asyncio.run(mcp_main()) + asyncio.run(serve_stdio()) except KeyboardInterrupt: app_context.console.print(style(cs.CLI_MSG_APP_TERMINATED, cs.Color.RED)) except ValueError as e: @@ -369,7 +699,6 @@ def mcp_server() -> None: style(cs.CLI_ERR_CONFIG.format(error=e), cs.Color.RED) ) _info(style(cs.CLI_MSG_HINT_TARGET_REPO, cs.Color.YELLOW)) - except Exception as e: app_context.console.print( style(cs.CLI_ERR_MCP_SERVER.format(error=e), cs.Color.RED) @@ -417,6 +746,53 @@ def language_command(ctx: typer.Context) -> None: language_cli(ctx.args, standalone_mode=False) +@app.command( + name=ch.CLICommandName.DAEMON, + help=ch.CMD_DAEMON, + context_settings={"allow_extra_args": True, "allow_interspersed_args": False}, +) +def daemon_command(ctx: typer.Context) -> None: + daemon_cli(ctx.args, standalone_mode=False) + + +@app.command( + name=ch.CLICommandName.WORKSPACE, + help=ch.CMD_WORKSPACE, + context_settings={"allow_extra_args": True, "allow_interspersed_args": False}, +) +def workspace_command(ctx: typer.Context) -> None: + workspace_cli(ctx.args, standalone_mode=False) + + +@app.command(name=ch.CLICommandName.STOP, help=ch.CMD_STOP) +def stop_command() -> None: + mgr = StackManager() + try: + mgr.down() + except StackError as e: + app_context.console.print(style(str(e), cs.Color.RED)) + raise typer.Exit(1) from e + _info(style("stack stopped", cs.Color.GREEN)) + + +@app.command(name=ch.CLICommandName.STATUS, help=ch.CMD_STATUS) +def status_command() -> None: + status = StackManager().status() + app_context.console.print( + f"stack: {status.state.value} " + f"(memgraph={status.memgraph_endpoint} reachable={status.memgraph_reachable}, " + f"qdrant={status.qdrant_endpoint} reachable={status.qdrant_reachable})" + ) + app_context.console.print(f"compose: {status.compose_file}") + timestamps = cgr_state.read_sync_timestamps() + if not timestamps: + app_context.console.print("syncs: (no projects synced via cgr yet)") + return + app_context.console.print("syncs:") + for project, ts in sorted(timestamps.items()): + app_context.console.print(f" - {project}: last sync {ts}") + + @app.command(name=ch.CLICommandName.DOCTOR, help=ch.CMD_DOCTOR) def doctor() -> None: checker = HealthChecker() @@ -465,5 +841,324 @@ def doctor() -> None: raise typer.Exit(1) +def _build_stats_table( + title: str, + col_label: str, + rows: list[ResultRow], + get_label: Callable[[ResultRow], str], + total_label: str, +) -> Table: + table = Table( + title=style(title, cs.Color.GREEN), + show_header=True, + header_style=f"{cs.StyleModifier.BOLD} {cs.Color.MAGENTA}", + ) + table.add_column(col_label, style=cs.Color.CYAN) + table.add_column(cs.CLI_STATS_COL_COUNT, style=cs.Color.YELLOW, justify="right") + total = 0 + for row in rows: + raw_count = row.get("count", 0) + count = int(raw_count) if isinstance(raw_count, int | float) else 0 + total += count + table.add_row(get_label(row), f"{count:,}") + table.add_section() + table.add_row( + style(total_label, cs.Color.GREEN), + style(f"{total:,}", cs.Color.GREEN), + ) + return table + + +@app.command(name=ch.CLICommandName.STATS, help=ch.CMD_STATS) +def stats() -> None: + from .cypher_queries import ( + CYPHER_STATS_NODE_COUNTS, + CYPHER_STATS_RELATIONSHIP_COUNTS, + ) + + app_context.console.print(style(cs.CLI_MSG_CONNECTING_STATS, cs.Color.CYAN)) + + try: + with connect_memgraph(batch_size=1) as ingestor: + node_results = ingestor.fetch_all(CYPHER_STATS_NODE_COUNTS) + rel_results = ingestor.fetch_all(CYPHER_STATS_RELATIONSHIP_COUNTS) + + app_context.console.print( + _build_stats_table( + cs.CLI_STATS_NODE_TITLE, + cs.CLI_STATS_COL_NODE_TYPE, + node_results, + lambda r: ":".join(r.get("labels", [])) or cs.CLI_STATS_UNKNOWN, + cs.CLI_STATS_TOTAL_NODES, + ) + ) + app_context.console.print() + app_context.console.print( + _build_stats_table( + cs.CLI_STATS_REL_TITLE, + cs.CLI_STATS_COL_REL_TYPE, + rel_results, + lambda r: str(r.get("type", cs.CLI_STATS_UNKNOWN)), + cs.CLI_STATS_TOTAL_RELS, + ) + ) + + except Exception as e: + app_context.console.print( + style(cs.CLI_ERR_STATS_FAILED.format(error=e), cs.Color.RED) + ) + logger.exception(ls.STATS_ERROR.format(error=e)) + raise typer.Exit(1) from e + + +def _resolve_dead_code_project( + project_name: str | None, projects: list[str] +) -> str | None: + if project_name: + return project_name.strip() + if len(projects) == 1: + return projects[0] + return None + + +def _dead_code_params( + project_name: str, + entry_points: list[str], + decorator_roots: list[str], +) -> dict[str, PropertyValue]: + root_decorators = sorted( + {d.lower() for d in cs.DEFAULT_ROOT_DECORATORS} + | {d.lower() for d in decorator_roots} + ) + # (H) test_patterns is always passed: with tests included it makes test + # (H) functions roots; with tests excluded it filters test modules out of the + # (H) module-load root clause so test-only code is not kept alive. + return { + "project_prefix": f"{project_name}{cs.SEPARATOR_DOT}", + "root_decorators": root_decorators, + "entry_points": list(entry_points), + "test_patterns": list(cs.TEST_PATH_PATTERNS), + } + + +def _to_dead_code_row(row: ResultRow) -> DeadCodeRow: + start = row.get(cs.KEY_START_LINE, 0) + end = row.get(cs.KEY_END_LINE, 0) + return DeadCodeRow( + label=str(row.get(cs.KEY_LABEL, "")), + name=str(row.get(cs.KEY_NAME, "")), + qualified_name=str(row.get(cs.KEY_QUALIFIED_NAME, "")), + start_line=int(start) if isinstance(start, int | float) else 0, + end_line=int(end) if isinstance(end, int | float) else 0, + ) + + +def _build_dead_code_table(candidates: list[DeadCodeRow], project_name: str) -> Table: + table = Table( + title=style( + cs.CLI_DEADCODE_TABLE_TITLE.format(project_name=project_name), + cs.Color.GREEN, + ), + show_header=True, + header_style=f"{cs.StyleModifier.BOLD} {cs.Color.MAGENTA}", + ) + table.add_column(cs.CLI_DEADCODE_COL_KIND, style=cs.Color.MAGENTA) + table.add_column(cs.CLI_DEADCODE_COL_QUALIFIED_NAME, style=cs.Color.CYAN) + table.add_column(cs.CLI_DEADCODE_COL_LINES, style=cs.Color.YELLOW, justify="right") + for row in candidates: + table.add_row( + row["label"], + row["qualified_name"], + cs.CLI_DEADCODE_LINE_RANGE.format( + start=row["start_line"], end=row["end_line"] + ), + ) + return table + + +def _emit_dead_code( + candidates: list[DeadCodeRow], + output_format: cs.DeadCodeFormat, + output: Path | None, + project_name: str, +) -> None: + if output_format == cs.DeadCodeFormat.JSON: + payload = json.dumps(candidates, indent=2) + if output is not None: + output.write_text(payload, encoding=cs.ENCODING_UTF8) + app_context.console.print( + style( + cs.CLI_DEADCODE_WRITTEN.format(count=len(candidates), path=output), + cs.Color.GREEN, + ) + ) + return + typer.echo(payload) + return + + table = _build_dead_code_table(candidates, project_name) + if output is not None: + with output.open("w", encoding=cs.ENCODING_UTF8) as fh: + Console(file=fh).print(table) + app_context.console.print( + style( + cs.CLI_DEADCODE_WRITTEN.format(count=len(candidates), path=output), + cs.Color.GREEN, + ) + ) + return + + if not candidates: + app_context.console.print(style(cs.CLI_DEADCODE_NONE, cs.Color.GREEN)) + return + app_context.console.print(table) + app_context.console.print( + style(cs.CLI_DEADCODE_SUMMARY.format(count=len(candidates)), cs.Color.GREEN) + ) + + +@app.command(name=ch.CLICommandName.DEAD_CODE, help=ch.CMD_DEAD_CODE) +def dead_code( + project_name: str | None = typer.Option( + None, "--project-name", "-n", help=ch.HELP_DEADCODE_PROJECT_NAME + ), + entry_point: list[str] = typer.Option( + [], "--entry-point", "-e", help=ch.HELP_DEADCODE_ENTRY_POINT + ), + decorator_root: list[str] = typer.Option( + [], "--decorator-root", help=ch.HELP_DEADCODE_DECORATOR_ROOT + ), + include_tests: bool = typer.Option( + True, + "--include-tests/--no-include-tests", + help=ch.HELP_DEADCODE_INCLUDE_TESTS, + ), + include_classes: bool = typer.Option( + False, + "--classes/--no-classes", + help=ch.HELP_DEADCODE_CLASSES, + ), + output_format: cs.DeadCodeFormat = typer.Option( + cs.DeadCodeFormat.TABLE, "--format", help=ch.HELP_DEADCODE_FORMAT + ), + output: Path | None = typer.Option( + None, "--output", "-o", help=ch.HELP_DEADCODE_OUTPUT + ), + fail_on_found: bool = typer.Option( + False, "--fail-on-found", help=ch.HELP_DEADCODE_FAIL_ON_FOUND + ), +) -> None: + from .cypher_queries import build_dead_code_query + + show_progress = output_format == cs.DeadCodeFormat.TABLE and output is None + if show_progress: + app_context.console.print(style(cs.CLI_DEADCODE_CONNECTING, cs.Color.CYAN)) + + projects: list[str] = [] + resolved: str | None = None + rows: list[ResultRow] = [] + try: + with connect_memgraph(batch_size=1) as ingestor: + projects = ingestor.list_projects() + resolved = _resolve_dead_code_project(project_name, projects) + if resolved is not None: + logger.info(ls.DEADCODE_SCANNING.format(project_name=resolved)) + rows = ingestor.fetch_all( + build_dead_code_query(include_tests, include_classes), + _dead_code_params(resolved, entry_point, decorator_root), + ) + except Exception as e: + app_context.console.print( + style(cs.CLI_ERR_DEADCODE_FAILED.format(error=e), cs.Color.RED) + ) + logger.exception(ls.DEADCODE_ERROR.format(error=e)) + raise typer.Exit(1) from e + + if resolved is None: + message = ( + cs.CLI_ERR_DEADCODE_NO_PROJECTS + if not projects + else cs.CLI_ERR_DEADCODE_AMBIGUOUS_PROJECT.format(projects=projects) + ) + app_context.console.print(style(message, cs.Color.RED)) + raise typer.Exit(1) + + candidates = [_to_dead_code_row(row) for row in rows] + _emit_dead_code(candidates, output_format, output, resolved) + + if fail_on_found and candidates: + raise typer.Exit(1) + + +@app.command(name=ch.CLICommandName.DELETE_PROJECT, help=ch.CMD_DELETE_PROJECT) +def delete_project( + name: str = typer.Option( + ..., + "--name", + "-n", + help=ch.HELP_DELETE_PROJECT_NAME, + ), + repo_path: str | None = typer.Option( + None, + "--repo-path", + help=ch.HELP_DELETE_PROJECT_REPO_PATH, + ), +) -> None: + project_name = name.strip() + if not project_name: + app_context.console.print(style(cs.CLI_ERR_PROJECT_NAME_REQUIRED, cs.Color.RED)) + raise typer.Exit(1) + + effective_batch_size = settings.resolve_batch_size(None) + + try: + with connect_memgraph(effective_batch_size) as ingestor: + projects = ingestor.list_projects() + if project_name not in projects: + app_context.console.print( + style( + cs.CLI_ERR_PROJECT_NOT_FOUND.format( + project_name=project_name, projects=projects + ), + cs.Color.RED, + ) + ) + raise typer.Exit(1) + + _info( + style( + cs.CLI_MSG_DELETING_PROJECT.format(project_name=project_name), + cs.Color.YELLOW, + ) + ) + _cleanup_project_embeddings(ingestor, project_name) + ingestor.delete_project(project_name) + except typer.Exit: + raise + except Exception as e: + app_context.console.print( + style( + cs.CLI_ERR_DELETE_PROJECT_FAILED.format( + project_name=project_name, error=e + ), + cs.Color.RED, + ) + ) + logger.exception( + cs.CLI_ERR_DELETE_PROJECT_FAILED.format(project_name=project_name, error=e) + ) + raise typer.Exit(1) from e + + if repo_path: + _delete_hash_cache(Path(repo_path)) + + _info( + style( + cs.CLI_MSG_PROJECT_DELETED.format(project_name=project_name), + cs.Color.GREEN, + ) + ) + + if __name__ == "__main__": app() diff --git a/codebase_rag/cli_help.py b/codebase_rag/cli_help.py index 96e816d9a..5d6114e27 100644 --- a/codebase_rag/cli_help.py +++ b/codebase_rag/cli_help.py @@ -10,6 +10,13 @@ class CLICommandName(StrEnum): GRAPH_LOADER = "graph-loader" LANGUAGE = "language" DOCTOR = "doctor" + STATS = "stats" + DEAD_CODE = "dead-code" + DELETE_PROJECT = "delete-project" + DAEMON = "daemon" + WORKSPACE = "workspace" + STOP = "stop" + STATUS = "status" APP_DESCRIPTION = ( @@ -26,6 +33,12 @@ class CLICommandName(StrEnum): CMD_GRAPH_LOADER = "Load and display summary of exported graph JSON" CMD_LANGUAGE = "Manage language grammars (add, remove, list)" CMD_DOCTOR = "Verify that all dependencies and configurations are properly set up" +CMD_STATS = "Display node and relationship statistics for the indexed graph" +CMD_DEAD_CODE = ( + "Report functions/methods that are unreachable from any entry point " + "(candidates for review, not a guaranteed delete list)" +) +CMD_DELETE_PROJECT = "Delete a single project from the shared graph database (keeps other projects intact)" CMD_LANGUAGE_GROUP = "CLI for managing language grammars" CMD_LANGUAGE_ADD = "Add a new language grammar to the project." @@ -33,23 +46,86 @@ class CLICommandName(StrEnum): CMD_LANGUAGE_REMOVE = "Remove a language from the project." CMD_LANGUAGE_CLEANUP = "Clean up orphaned git modules that weren't properly removed." +CMD_DAEMON = "Manage the shared cgr docker stack (memgraph + qdrant)" +CMD_DAEMON_GROUP = "Manage the shared cgr docker stack (memgraph + qdrant)" +CMD_DAEMON_UP = "Start the docker stack and wait until healthy." +CMD_DAEMON_DOWN = "Stop the docker stack (preserves data volumes)." +CMD_DAEMON_STATUS = "Show whether memgraph and qdrant are reachable." +CMD_DAEMON_LOGS = "Tail docker compose logs for the stack." +CMD_DAEMON_RESTART = "Restart the docker stack." + +CMD_WORKSPACE = "Manage cgr workspaces (named bundles of repos)" +CMD_WORKSPACE_GROUP = "Manage cgr workspaces (named bundles of repos)" +CMD_WORKSPACE_LIST = "List all workspaces." +CMD_WORKSPACE_CREATE = "Create a new empty workspace." +CMD_WORKSPACE_DELETE = "Delete a workspace TOML (does not touch indexed graph data)." +CMD_WORKSPACE_SHOW = "Show a workspace's repos and project names." +CMD_WORKSPACE_ADD_REPO = "Add a repo to a workspace." +CMD_WORKSPACE_REMOVE_REPO = "Remove a repo from a workspace by path." + +HELP_WORKSPACE_DESCRIPTION = "Optional human-readable description." +HELP_WORKSPACE_FORCE = "Overwrite an existing workspace with the same name." +HELP_WORKSPACE_REPO_PROJECT_NAME = ( + "Project name to associate with this repo (defaults to derive_project_name(repo))." +) + +MSG_NO_WORKSPACES = "(no workspaces; create one with 'cgr workspace create ')" + +CMD_STOP = "Alias for `cgr daemon down`: stop the shared docker stack." +CMD_STATUS = "Show daemon stack state plus last-sync timestamp per project." + +HELP_DAEMON_LOGS_FOLLOW = "Stream logs continuously (Ctrl+C to stop)." +HELP_DAEMON_LOGS_SERVICE = ( + "Limit logs to a specific service (memgraph, qdrant, lab). Default: all." +) +HELP_NO_START_STACK = ( + "Skip auto-starting the docker stack. Useful when memgraph/qdrant run elsewhere." +) +HELP_NO_SYNC = ( + "Skip the automatic incremental graph sync that runs before the agent starts." +) +HELP_PROJECTS = ( + "Comma-separated list of project names to scope agent queries to. " + "Overrides --project-name. If omitted, defaults to the current repo's project." +) +HELP_WORKSPACE = ( + "Open the agent over all projects defined in a cgr workspace TOML " + "(stored under ~/.cgr/workspaces/.toml)." +) + HELP_BATCH_SIZE = "Number of buffered nodes/relationships before flushing to Memgraph" HELP_MEMGRAPH_HOST = "Memgraph host" HELP_MEMGRAPH_PORT = "Memgraph port" HELP_ORCHESTRATOR = ( "Specify orchestrator as provider:model " - "(e.g., ollama:llama3.2, openai:gpt-4, google:gemini-2.5-pro)" + "(e.g., ollama:llama3.2, openai:gpt-4, google:gemini-3.1-pro-preview)" ) HELP_CYPHER_MODEL = ( "Specify cypher model as provider:model " - "(e.g., ollama:codellama, google:gemini-2.5-flash)" + "(e.g., ollama:codellama, google:gemini-3-flash-preview)" ) HELP_NO_CONFIRM = "Disable confirmation prompts for edit operations (YOLO mode)" +HELP_NO_INSTRUCTIONS = ( + "Skip loading project instructions from ~/.cgr.md and /.cgr.md " + "(useful when the consolidated memories are bloating the system prompt)" +) -HELP_REPO_PATH_RETRIEVAL = "Path to the target repository for code retrieval" -HELP_REPO_PATH_INDEX = "Path to the target repository to index." -HELP_REPO_PATH_OPTIMIZE = "Path to the repository to optimize" +HELP_REPO_PATH_RETRIEVAL = ( + "Path to the target repository for code retrieval (defaults to current directory)" +) +HELP_REPO_PATH_INDEX = ( + "Path to the target repository to index (defaults to current directory)." +) +HELP_REPO_PATH_OPTIMIZE = ( + "Path to the repository to optimize (defaults to current directory)" +) HELP_REPO_PATH_WATCH = "Path to the repository to watch." +HELP_VERSION = "Show the version and exit." + +HELP_DEBOUNCE = "Debounce delay in seconds. Set to 0 to disable debouncing." +HELP_MAX_WAIT = ( + "Maximum wait time in seconds before forcing an update during continuous edits." +) HELP_UPDATE_GRAPH = "Update the knowledge graph by parsing the repository" HELP_CLEAN_DB = "Clean the database before updating (use when adding first repo)" @@ -73,6 +149,10 @@ class CLICommandName(StrEnum): ) HELP_KEEP_SUBMODULE = "Keep the git submodule (default: remove it)" +HELP_PROJECT_NAME = ( + "Override the project name used as qualified-name prefix for all nodes. " + "Defaults to the repo directory name." +) HELP_EXCLUDE_PATTERNS = ( "Additional directories to exclude from indexing. Can be specified multiple times." ) @@ -81,6 +161,60 @@ class CLICommandName(StrEnum): "Without this flag, all directories matching ignore patterns are automatically excluded." ) +HELP_ASK_AGENT = ( + "Run a single query in non-interactive mode and exit. " + "Output is sent to stdout, useful for scripting." +) + +HELP_QUERY_OUTPUT_FORMAT = ( + "Output format for --ask-agent: 'table' (default) prints the plain answer; " + '\'json\' wraps it as {"query": ..., "response": ...} for scripting.' +) + +HELP_MCP_TRANSPORT = "Transport mode: 'stdio' (default) or 'http'" +HELP_MCP_HTTP_HOST = ( + "Host to bind the HTTP server — only used when --transport http (default: 0.0.0.0)" +) +HELP_MCP_HTTP_PORT = ( + "Port to bind the HTTP server — only used when --transport http (default: 8080)" +) + +HELP_DEADCODE_PROJECT_NAME = ( + "Project to scan (matches the Project node name). " + "If omitted, the sole indexed project is used." +) +HELP_DEADCODE_ENTRY_POINT = ( + "Treat functions/methods whose qualified name ends with this value as " + "reachable roots. Repeatable." +) +HELP_DEADCODE_DECORATOR_ROOT = ( + "Treat functions/methods carrying this decorator as reachable roots. " + "Extends the built-in set (route, task, fixture, command, ...). Repeatable." +) +HELP_DEADCODE_INCLUDE_TESTS = ( + "Treat test code as reachable roots so production code it exercises is " + "not reported. On by default." +) +HELP_DEADCODE_CLASSES = ( + "Also report unreachable classes. A class counts as used when it is " + "instantiated or subclassed by a reachable class, so a base whose only " + "subclass is itself unreachable is reported as part of the dead cluster. " + "Off by default: classes referenced only via type annotations, isinstance, " + "or dynamic lookups are not tracked and may be false positives." +) +HELP_DEADCODE_FORMAT = "Output format: 'table' (default) or 'json'." +HELP_DEADCODE_OUTPUT = "Write the report to this file instead of stdout." +HELP_DEADCODE_FAIL_ON_FOUND = ( + "Exit with code 1 when any candidate is found (useful in CI)." +) + +HELP_DELETE_PROJECT_NAME = ( + "Name of the project to delete (matches the Project node name in the graph)." +) +HELP_DELETE_PROJECT_REPO_PATH = ( + "Optional path to the project's repo. If supplied, its hash cache is removed too." +) + CLI_COMMANDS: dict[CLICommandName, str] = { CLICommandName.START: CMD_START, CLICommandName.INDEX: CMD_INDEX, @@ -90,4 +224,11 @@ class CLICommandName(StrEnum): CLICommandName.GRAPH_LOADER: CMD_GRAPH_LOADER, CLICommandName.LANGUAGE: CMD_LANGUAGE, CLICommandName.DOCTOR: CMD_DOCTOR, + CLICommandName.STATS: CMD_STATS, + CLICommandName.DEAD_CODE: CMD_DEAD_CODE, + CLICommandName.DELETE_PROJECT: CMD_DELETE_PROJECT, + CLICommandName.DAEMON: CMD_DAEMON, + CLICommandName.WORKSPACE: CMD_WORKSPACE, + CLICommandName.STOP: CMD_STOP, + CLICommandName.STATUS: CMD_STATUS, } diff --git a/codebase_rag/config.py b/codebase_rag/config.py index 31848e4d1..4c4a95857 100644 --- a/codebase_rag/config.py +++ b/codebase_rag/config.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from dataclasses import asdict, dataclass from pathlib import Path from typing import TypedDict, Unpack @@ -44,11 +45,6 @@ class ApiKeyInfoEntry(TypedDict): "url": "https://portal.azure.com/", "name": "Azure OpenAI", }, - cs.Provider.COHERE: { - "env_var": "COHERE_API_KEY", - "url": "https://dashboard.cohere.com/api-keys", - "name": "Cohere", - }, } @@ -94,6 +90,9 @@ def format_missing_api_key_errors( return error_msg +LOCAL_PROVIDERS = frozenset({cs.Provider.OLLAMA}) + + @dataclass class ModelConfig: provider: str @@ -113,8 +112,20 @@ def to_update_kwargs(self) -> ModelConfigKwargs: return ModelConfigKwargs(**result) def validate_api_key(self, role: str = cs.DEFAULT_MODEL_ROLE) -> None: - local_providers = {cs.Provider.OLLAMA, cs.Provider.LOCAL, cs.Provider.VLLM} - if self.provider.lower() in local_providers: + provider_lower = self.provider.lower() + provider_env_keys = { + cs.Provider.ANTHROPIC: cs.ENV_ANTHROPIC_API_KEY, + cs.Provider.AZURE: cs.ENV_AZURE_API_KEY, + } + env_key = provider_env_keys.get(provider_lower) + if ( + provider_lower in LOCAL_PROVIDERS + or ( + provider_lower == cs.Provider.GOOGLE + and self.provider_type == cs.GoogleProviderType.VERTEX + ) + or (env_key and os.environ.get(env_key)) + ): return if ( not self.api_key @@ -139,6 +150,8 @@ class AppConfig(BaseSettings): MEMGRAPH_HOST: str = "localhost" MEMGRAPH_PORT: int = 7687 MEMGRAPH_HTTP_PORT: int = 7444 + MEMGRAPH_USERNAME: str | None = None + MEMGRAPH_PASSWORD: str | None = None LAB_PORT: int = 3000 MEMGRAPH_BATCH_SIZE: int = 1000 AGENT_RETRIES: int = 3 @@ -150,7 +163,7 @@ class AppConfig(BaseSettings): ORCHESTRATOR_ENDPOINT: str | None = None ORCHESTRATOR_PROJECT_ID: str | None = None ORCHESTRATOR_REGION: str = cs.DEFAULT_REGION - ORCHESTRATOR_PROVIDER_TYPE: str | None = None + ORCHESTRATOR_PROVIDER_TYPE: cs.GoogleProviderType | None = None ORCHESTRATOR_THINKING_BUDGET: int | None = None ORCHESTRATOR_SERVICE_ACCOUNT_FILE: str | None = None @@ -160,7 +173,7 @@ class AppConfig(BaseSettings): CYPHER_ENDPOINT: str | None = None CYPHER_PROJECT_ID: str | None = None CYPHER_REGION: str = cs.DEFAULT_REGION - CYPHER_PROVIDER_TYPE: str | None = None + CYPHER_PROVIDER_TYPE: cs.GoogleProviderType | None = None CYPHER_THINKING_BUDGET: int | None = None CYPHER_SERVICE_ACCOUNT_FILE: str | None = None @@ -171,6 +184,11 @@ def ollama_endpoint(self) -> str: return f"{self.OLLAMA_BASE_URL.rstrip('/')}/v1" TARGET_REPO_PATH: str = "." + CPP_FRONTEND: cs.CppFrontend = cs.CppFrontend.TREESITTER + CAPTURE_FUNCTION_LOCAL_DEFINITIONS: bool = Field( + True, validation_alias="CGR_CAPTURE_LOCAL_DEFINITIONS" + ) + CGR_HOME: Path = Field(default_factory=lambda: Path.home() / ".cgr") SHELL_COMMAND_TIMEOUT: int = 30 SHELL_COMMAND_ALLOWLIST: frozenset[str] = frozenset( { @@ -235,24 +253,41 @@ def ollama_endpoint(self) -> str: ) QDRANT_DB_PATH: str = "./.qdrant_code_embeddings" + QDRANT_URL: str | None = None QDRANT_COLLECTION_NAME: str = "code_embeddings" QDRANT_VECTOR_DIM: int = 768 QDRANT_TOP_K: int = 5 + QDRANT_UPSERT_RETRIES: int = Field(default=3, gt=0) + QDRANT_RETRY_BASE_DELAY: float = Field(default=0.5, gt=0) + QDRANT_BATCH_SIZE: int = Field(default=50, gt=0) EMBEDDING_MAX_LENGTH: int = 512 EMBEDDING_PROGRESS_INTERVAL: int = 10 + FLUSH_THREAD_POOL_SIZE: int = Field(default=4, gt=0) + FILE_FLUSH_INTERVAL: int = Field(default=500, gt=0) + CACHE_MAX_ENTRIES: int = 1000 CACHE_MAX_MEMORY_MB: int = 500 CACHE_EVICTION_DIVISOR: int = 10 CACHE_MEMORY_THRESHOLD_RATIO: float = 0.8 + QUERY_RESULT_MAX_TOKENS: int = Field(default=16000, gt=0) + QUERY_RESULT_ROW_CAP: int = Field(default=500, gt=0) + QUERY_MEMORY_LIMIT_MB: int = Field(default=4096, gt=0) + QUERY_TIMEOUT_S: float = Field(default=60.0, gt=0) + OLLAMA_HEALTH_TIMEOUT: float = 5.0 + LITELLM_HEALTH_TIMEOUT: float = 5.0 _active_orchestrator: ModelConfig | None = None _active_cypher: ModelConfig | None = None QUIET: bool = Field(False, validation_alias="CGR_QUIET") + MCP_HTTP_HOST: str = "0.0.0.0" + MCP_HTTP_PORT: int = 8080 + MCP_HTTP_ENDPOINT_PATH: str = "/mcp" + def _get_default_config(self, role: str) -> ModelConfig: role_upper = role.upper() @@ -362,3 +397,34 @@ def load_cgrignore_patterns(repo_path: Path) -> CgrignorePatterns: except OSError as e: logger.warning(logs.CGRIGNORE_READ_FAILED.format(path=ignore_file, error=e)) return EMPTY_CGRIGNORE + + +CGR_INSTRUCTIONS_FILENAME = ".cgr.md" +GLOBAL_CGR_INSTRUCTIONS_PATH = Path.home() / CGR_INSTRUCTIONS_FILENAME + + +def _read_cgr_instructions_file(path: Path) -> str | None: + if not path.is_file(): + return None + try: + with path.open(encoding="utf-8") as f: + body = f.read().strip() + except OSError as e: + logger.warning(logs.CGR_INSTRUCTIONS_READ_FAILED.format(path=path, error=e)) + return None + if not body: + return None + logger.info(logs.CGR_INSTRUCTIONS_LOADED.format(path=path, chars=len(body))) + return body + + +def load_cgr_instructions(repo_path: Path | None) -> str | None: + global_body = _read_cgr_instructions_file(GLOBAL_CGR_INSTRUCTIONS_PATH) + repo_body = ( + _read_cgr_instructions_file(repo_path / CGR_INSTRUCTIONS_FILENAME) + if repo_path is not None + else None + ) + if global_body and repo_body: + return f"{global_body}\n\n---\n\n{repo_body}" + return global_body or repo_body diff --git a/codebase_rag/constants.py b/codebase_rag/constants.py index 4ef971d8a..64e477f2b 100644 --- a/codebase_rag/constants.py +++ b/codebase_rag/constants.py @@ -20,9 +20,7 @@ class Provider(StrEnum): OPENAI = "openai" GOOGLE = "google" AZURE = "azure" - COHERE = "cohere" - LOCAL = "local" - VLLM = "vllm" + LITELLM_PROXY = "litellm_proxy" class Color(StrEnum): @@ -36,8 +34,15 @@ class Color(StrEnum): class KeyBinding(StrEnum): CTRL_J = "c-j" + CTRL_E = "c-e" ENTER = "enter" CTRL_C = "c-c" + SHIFT_TAB = "s-tab" + + +class PermissionMode(StrEnum): + NORMAL = "normal" + YOLO = "yolo" class StyleModifier(StrEnum): @@ -89,7 +94,7 @@ class FileAction(StrEnum): EXT_IXX = ".ixx" EXT_CPPM = ".cppm" EXT_CCM = ".ccm" -EXT_CS = ".cs" +EXT_C = ".c" EXT_PHP = ".php" EXT_LUA = ".lua" @@ -101,6 +106,7 @@ class FileAction(StrEnum): GO_EXTENSIONS = (EXT_GO,) SCALA_EXTENSIONS = (EXT_SCALA, EXT_SC) JAVA_EXTENSIONS = (EXT_JAVA,) +C_EXTENSIONS = (EXT_C,) CPP_EXTENSIONS = ( EXT_CPP, EXT_H, @@ -113,7 +119,6 @@ class FileAction(StrEnum): EXT_CPPM, EXT_CCM, ) -CS_EXTENSIONS = (EXT_CS,) PHP_EXTENSIONS = (EXT_PHP,) LUA_EXTENSIONS = (EXT_LUA,) @@ -131,6 +136,10 @@ class FileAction(StrEnum): ENV_OPENAI_API_KEY = "OPENAI_API_KEY" ENV_GOOGLE_API_KEY = "GOOGLE_API_KEY" +ENV_ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY" +ENV_AZURE_API_KEY = "AZURE_API_KEY" +ENV_AZURE_ENDPOINT = "AZURE_OPENAI_ENDPOINT" +ENV_AZURE_API_VERSION = "AZURE_API_VERSION" HELP_ARG = "help" @@ -140,6 +149,11 @@ class GoogleProviderType(StrEnum): VERTEX = "vertex" +class CppFrontend(StrEnum): + TREESITTER = "treesitter" + LIBCLANG = "libclang" + + # (H) Provider endpoints OPENAI_DEFAULT_ENDPOINT = "https://api.openai.com/v1" OLLAMA_HEALTH_PATH = "/api/tags" @@ -150,11 +164,14 @@ class GoogleProviderType(StrEnum): HTTP_OK = 200 UNIXCODER_MODEL = "microsoft/unixcoder-base" +EMBEDDING_DEFAULT_BATCH_SIZE = 64 +EMBEDDING_CACHE_FILENAME = ".embedding_cache.json" KEY_NODES = "nodes" KEY_RELATIONSHIPS = "relationships" KEY_NODE_ID = "node_id" KEY_LABELS = "labels" +KEY_LABEL = "label" KEY_PROPERTIES = "properties" KEY_FROM_ID = "from_id" KEY_TO_ID = "to_id" @@ -168,9 +185,12 @@ class GoogleProviderType(StrEnum): KEY_PARSER = "parser" KEY_NAME = "name" KEY_QUALIFIED_NAME = "qualified_name" +KEY_QUERY = "query" +KEY_RESPONSE = "response" KEY_START_LINE = "start_line" KEY_END_LINE = "end_line" KEY_PATH = "path" +KEY_ABSOLUTE_PATH = "absolute_path" KEY_EXTENSION = "extension" KEY_MODULE_TYPE = "module_type" KEY_IMPLEMENTS_MODULE = "implements_module" @@ -209,12 +229,23 @@ class GoogleProviderType(StrEnum): ONEOF_EXTERNAL_PACKAGE = "external_package" ONEOF_MODULE_IMPLEMENTATION = "module_implementation" ONEOF_MODULE_INTERFACE = "module_interface" +ONEOF_INTERFACE = "interface_node" +ONEOF_ENUM = "enum_node" +ONEOF_TYPE = "type_node" +ONEOF_UNION = "union_node" # (H) CLI error and info messages CLI_ERR_OUTPUT_REQUIRES_UPDATE = ( "Error: --output/-o option requires --update-graph to be specified." ) CLI_ERR_ONLY_JSON = "Error: Currently only JSON format is supported." +CLI_ERR_JSON_REQUIRES_ASK_AGENT = ( + "Error: --output-format json requires --ask-agent/-a; " + "it only applies to single-query output." +) +CLI_ERR_PATH_NOT_EXISTS = "Error: --repo-path does not exist: {path}" +CLI_ERR_PATH_NOT_DIR = "Error: --repo-path is not a directory: {path}" +CLI_WARN_NOT_GIT_REPO = "Warning: --repo-path is not a Git repository: {path}" CLI_ERR_STARTUP = "Startup Error: {error}" CLI_ERR_CONFIG = "Configuration Error: {error}" CLI_ERR_INDEXING = "An error occurred during indexing: {error}" @@ -223,7 +254,34 @@ class GoogleProviderType(StrEnum): CLI_ERR_MCP_SERVER = "MCP Server Error: {error}" CLI_MSG_UPDATING_GRAPH = "Updating knowledge graph for: {path}" +CLI_MSG_SYNCING_GRAPH = "Syncing knowledge graph for: {path} (use --no-sync to skip)" +CLI_MSG_WORKSPACE_SYNCING = "Syncing workspace '{name}' ({count} repos)..." +CLI_MSG_WORKSPACE_SYNC_REPO = ( + "[{idx}/{total}] Syncing {path} as project '{project_name}'" +) +CLI_MSG_WORKSPACE_EMPTY = ( + "Workspace '{name}' has no repos (use cgr workspace add-repo)." +) +MSG_SYNCING_KNOWLEDGE_GRAPH = ( + "[bold cyan]Syncing knowledge graph[/bold cyan] (incremental, --no-sync to skip)" +) +MSG_SYNCING_WORKSPACE = ( + "[bold cyan]Syncing workspace '{name}'[/bold cyan] ({count} repos)" +) +CLI_MSG_SYNC_SKIPPED = "Knowledge graph already in sync for '{project}' ({elapsed:.2f}s, no changes detected)." +CLI_MSG_SYNC_DONE = "Knowledge graph sync done for '{project}' in {elapsed:.2f}s." CLI_MSG_CLEANING_DB = "Cleaning database..." +CLI_MSG_CLEANING_HASH_CACHE = "Removing hash cache: {path}" +CLI_MSG_CLEAN_DONE = "Clean completed successfully!" +CLI_MSG_DELETING_PROJECT = "Deleting project '{project_name}' from the graph..." +CLI_MSG_PROJECT_DELETED = "Project '{project_name}' deleted successfully." +CLI_ERR_PROJECT_NOT_FOUND = ( + "Project '{project_name}' not found. Available projects: {projects}" +) +CLI_ERR_PROJECT_NAME_REQUIRED = ( + "Error: --name is required and must be a non-empty project name." +) +CLI_ERR_DELETE_PROJECT_FAILED = "Failed to delete project '{project_name}': {error}" CLI_MSG_EXPORTING_TO = "Exporting graph to: {path}" CLI_MSG_GRAPH_UPDATED = "Graph update completed!" CLI_MSG_APP_TERMINATED = "\nApplication terminated by user." @@ -234,10 +292,39 @@ class GoogleProviderType(StrEnum): CLI_MSG_EXPORTING_DATA = "Exporting graph data..." CLI_MSG_OPTIMIZATION_TERMINATED = "\nOptimization session terminated by user." CLI_MSG_MCP_TERMINATED = "\nMCP server terminated by user." +PACKAGE_NAME = "code-graph-rag" +CLI_MSG_VERSION = "{package} version {version}" CLI_MSG_HINT_TARGET_REPO = ( "\nHint: Make sure TARGET_REPO_PATH environment variable is set." ) CLI_MSG_GRAPH_SUMMARY = "Graph Summary:" +CLI_MSG_CONNECTING_STATS = "Fetching graph statistics..." +CLI_STATS_NODE_TITLE = "Node Statistics" +CLI_STATS_REL_TITLE = "Relationship Statistics" +CLI_STATS_COL_NODE_TYPE = "Node Type" +CLI_STATS_COL_REL_TYPE = "Relationship Type" +CLI_STATS_COL_COUNT = "Count" +CLI_STATS_TOTAL_NODES = "Total Nodes" +CLI_STATS_TOTAL_RELS = "Total Relationships" +CLI_STATS_UNKNOWN = "Unknown" +CLI_ERR_STATS_FAILED = "Failed to get graph statistics: {error}" + +CLI_DEADCODE_CONNECTING = "Scanning for unreachable functions and methods..." +CLI_DEADCODE_TABLE_TITLE = "Dead Code Candidates ({project_name})" +CLI_DEADCODE_COL_KIND = "Kind" +CLI_DEADCODE_COL_QUALIFIED_NAME = "Qualified Name" +CLI_DEADCODE_COL_LINES = "Lines" +CLI_DEADCODE_LINE_RANGE = "{start}-{end}" +CLI_DEADCODE_SUMMARY = "{count} candidate(s) for review." +CLI_DEADCODE_NONE = "No unreachable functions or methods found." +CLI_DEADCODE_WRITTEN = "Wrote {count} candidate(s) to {path}" +CLI_ERR_DEADCODE_FAILED = "Failed to scan for dead code: {error}" +CLI_ERR_DEADCODE_NO_PROJECTS = ( + "No projects found in the graph. Index a repository first with 'cgr start'." +) +CLI_ERR_DEADCODE_AMBIGUOUS_PROJECT = ( + "Multiple projects found: {projects}. Specify which one with --project-name/-n." +) CLI_MSG_AUTO_EXCLUDE = ( "Auto-excluding common directories (venv, node_modules, .git, etc.). " "Use --interactive-setup to customize." @@ -247,9 +334,7 @@ class GoogleProviderType(StrEnum): UI_NEW_FILE_HEADER = "[bold cyan]New file: {path}[/bold cyan]" UI_SHELL_COMMAND_HEADER = "[bold cyan]Shell command:[/bold cyan]" UI_TOOL_APPROVAL = "[bold yellow]⚠️ Tool '{tool_name}' requires approval:[/bold yellow]" -UI_FEEDBACK_PROMPT = ( - "[bold yellow]Feedback (why rejected, or press Enter to skip)[/bold yellow]" -) +UI_FEEDBACK_PROMPT = "Feedback (why rejected, or press Enter to skip)" UI_OPTIMIZATION_START = ( "[bold green]Starting {language} optimization session...[/bold green]" ) @@ -268,7 +353,7 @@ class GoogleProviderType(StrEnum): UI_MODEL_SWITCHED = "[bold green]Model switched to: {model}[/bold green]" UI_MODEL_CURRENT = "[bold cyan]Current model: {model}[/bold cyan]" UI_MODEL_SWITCH_ERROR = "[bold red]Failed to switch model: {error}[/bold red]" -UI_MODEL_USAGE = "[bold yellow]Usage: /model (e.g., /model google:gemini-2.0-flash)[/bold yellow]" +UI_MODEL_USAGE = "[bold yellow]Usage: /model (e.g., /model google:gemini-3.1-pro-preview)[/bold yellow]" UI_HELP_COMMANDS = """[bold cyan]Available commands:[/bold cyan] /model - Switch to a different model /model - Show current model @@ -296,6 +381,9 @@ class GoogleProviderType(StrEnum): # (H) Qualified name separators SEPARATOR_DOT = "." SEPARATOR_SLASH = "/" +# (H) Disambiguates definitions that share one qualified name (if/else import +# (H) fallbacks, typing.overload, try/except fallbacks): "@". +DUP_QN_MARKER = "@" # (H) Path navigation PATH_CURRENT_DIR = "." @@ -318,6 +406,42 @@ class UniqueKeyType(StrEnum): QUALIFIED_NAME = KEY_QUALIFIED_NAME +class DeadCodeFormat(StrEnum): + TABLE = "table" + JSON = "json" + + +class QueryFormat(StrEnum): + TABLE = "table" + JSON = "json" + + +# (H) Decorators whose presence marks a function/method as an implicit entry point +# (H) (web routes, task/flow handlers, fixtures, CLI commands, event listeners). +DEFAULT_ROOT_DECORATORS: frozenset[str] = frozenset( + { + "route", + "get", + "post", + "put", + "delete", + "patch", + "websocket", + "task", + "flow", + "fixture", + "command", + "cli", + "app", + "on_event", + "listener", + } +) + +# (H) Substrings in a node's file path that mark it as test code. +TEST_PATH_PATTERNS: tuple[str, ...] = ("test_", "_test", "conftest", "/tests/") + + class NodeLabel(StrEnum): PROJECT = "Project" PACKAGE = "Package" @@ -377,6 +501,7 @@ class RelationshipType(StrEnum): IMPLEMENTS = "IMPLEMENTS" OVERRIDES = "OVERRIDES" CALLS = "CALLS" + INSTANTIATES = "INSTANTIATES" DEPENDS_ON_EXTERNAL = "DEPENDS_ON_EXTERNAL" @@ -417,14 +542,21 @@ class RelationshipType(StrEnum): # (H) Cypher queries CYPHER_DEFAULT_LIMIT = 50 -CYPHER_QUERY_EMBEDDINGS = """ +_CYPHER_EMBEDDING_BASE = """ MATCH (m:Module)-[:DEFINES]->(n) WHERE (n:Function OR n:Method) - AND m.qualified_name STARTS WITH $project_name + '.' -RETURN id(n) AS node_id, n.qualified_name AS qualified_name, + AND m.qualified_name STARTS WITH ($project_name + '.') +""" + +CYPHER_QUERY_EMBEDDINGS = ( + _CYPHER_EMBEDDING_BASE + + """RETURN id(n) AS node_id, n.qualified_name AS qualified_name, n.start_line AS start_line, n.end_line AS end_line, m.path AS path """ +) + +CYPHER_QUERY_PROJECT_NODE_IDS = _CYPHER_EMBEDDING_BASE + "RETURN id(n) AS node_id\n" class SupportedLanguage(StrEnum): @@ -435,8 +567,8 @@ class SupportedLanguage(StrEnum): GO = "go" SCALA = "scala" JAVA = "java" + C = "c" CPP = "cpp" - CSHARP = "c-sharp" PHP = "php" LUA = "lua" @@ -468,6 +600,11 @@ class LanguageMetadata(NamedTuple): "Interfaces, type aliases, enums, namespaces, ES6/CommonJS modules", "TypeScript", ), + SupportedLanguage.C: LanguageMetadata( + LanguageStatus.FULL, + "Functions, structs, unions, enums, preprocessor includes", + "C", + ), SupportedLanguage.CPP: LanguageMetadata( LanguageStatus.FULL, "Constructors, destructors, operator overloading, templates, lambdas, C++20 modules, namespaces", @@ -498,14 +635,9 @@ class LanguageMetadata(NamedTuple): "Case classes, objects", "Scala", ), - SupportedLanguage.CSHARP: LanguageMetadata( - LanguageStatus.DEV, - "Classes, interfaces, generics (planned)", - "C#", - ), SupportedLanguage.PHP: LanguageMetadata( - LanguageStatus.DEV, - "Classes, functions, namespaces", + LanguageStatus.FULL, + "Classes, interfaces, traits, enums, namespaces, PHP 8 attributes", "PHP", ), } @@ -551,7 +683,6 @@ class LanguageMetadata(NamedTuple): IMPORT_NODES_FROM = ("import_from_statement",) IMPORT_NODES_MODULE = ("lexical_declaration", "export_statement") IMPORT_NODES_INCLUDE = ("preproc_include",) -IMPORT_NODES_USING = ("using_directive",) # (H) JS/TS specific node types JS_TS_FUNCTION_NODES = ( @@ -584,10 +715,16 @@ class LanguageMetadata(NamedTuple): FIELD_MODULE_NAME = "module_name" FIELD_ARGUMENTS = "arguments" FIELD_BODY = "body" +FIELD_RETURN_TYPE = "return_type" FIELD_CONSTRUCTOR = "constructor" FIELD_DECLARATOR = "declarator" FIELD_PARAMETERS = "parameters" +FIELD_RECEIVER = "receiver" FIELD_TYPE = "type" +# (H) Rust impl `trait`/`type` fields and a trait's supertrait `bounds`. +FIELD_TRAIT = "trait" +FIELD_BOUNDS = "bounds" +TS_RS_TRAIT_BOUNDS = "trait_bounds" FIELD_VALUE = "value" FIELD_LEFT = "left" FIELD_RIGHT = "right" @@ -601,7 +738,35 @@ class LanguageMetadata(NamedTuple): METHOD_ITEMS = "items" # (H) Image file extensions for chat image handling -IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".gif") +MULTIMODAL_EXTENSIONS = (".png", ".jpg", ".jpeg", ".gif", ".webp", ".pdf") +MIME_TYPE_PDF = "application/pdf" +MIME_TYPE_FALLBACK = "application/octet-stream" +YES_ANSWER = "y" +YES_ANSWERS = frozenset({"y", "yes", ""}) +NO_ANSWERS = frozenset({"n", "no"}) +SHIFT_TAB_ESCAPE = b"\x1b[Z" +DIFF_GIT_HEADER = "diff --git " +MARKDOWN_FENCE = "```" +MARKDOWN_FENCE_DIFF = "```diff" +DIFF_CONTINUATION_PREFIXES = ( + "diff --git ", + "index ", + "--- ", + "+++ ", + "@@ ", + "+", + "-", + " ", + "\\ ", + "new file mode", + "deleted file mode", + "old mode", + "new mode", + "rename from ", + "rename to ", + "similarity index ", + "Binary files ", +) # (H) CLI exit commands EXIT_COMMANDS = frozenset({"exit", "quit"}) @@ -670,6 +835,7 @@ class DiffMarker: MSG_CONNECTED_MEMGRAPH = "Successfully connected to Memgraph." MSG_THINKING_CANCELLED = "Thinking cancelled." MSG_TIMEOUT_FORMAT = "Operation timed out after {timeout} seconds." +MSG_TOOL_CALL_CANCELLED = "Tool call cancelled by user." MSG_CHAT_INSTRUCTIONS = ( "Ask questions about your codebase graph. Type 'exit' or 'quit' to end." ) @@ -679,7 +845,65 @@ class DiffMarker: OPTIMIZATION_TABLE_TITLE = "Optimization Session Configuration" PROMPT_ASK_QUESTION = "Ask a question" PROMPT_YOUR_RESPONSE = "Your response" -MULTILINE_INPUT_HINT = "(Press Ctrl+J to submit, Enter for new line)" +MULTILINE_INPUT_HINT = ( + "(Press Ctrl+J or Ctrl+E to submit, Enter for new line, Shift+Tab to toggle mode)" +) +PERMISSION_MODE_NORMAL_LABEL = "● Normal mode (asks before destructive)" +PERMISSION_MODE_YOLO_LABEL = "● YOLO mode (auto-approve, allowlist off)" +PERMISSION_MODE_TOGGLED = "Permission mode: {label}" +STATUS_BAR_BRANCH_CLEAN_HTML = ( + '' +) +STATUS_BAR_BRANCH_DIRTY_HTML = ( + '' +) +STATUS_BAR_BRANCH_CLEAN_PLAIN = " ⎇ {branch} " +STATUS_BAR_BRANCH_DIRTY_PLAIN = " ⎇ {branch} ± " +STATUS_BAR_BRANCH_RICH_TEXT = " ⎇ {branch}{marker} " +STATUS_BAR_CLEAN_STYLE = "black on green" +STATUS_BAR_DIRTY_STYLE = "black on yellow" +STATUS_BAR_DIRTY_MARKER = " ±" +STATUS_BAR_SPINNER = "dots" +STATUS_BAR_SEPARATOR_CHAR = "─" +STATUS_BAR_SEPARATOR_COLOR = "#666666" +STATUS_BAR_TOKEN_HTML = ' ' +STATUS_BAR_CONFIG_COLOR = "#888888" +STATUS_BAR_CONFIG_LABEL_COLOR = "#5fafd7" +STATUS_BAR_CONFIG_SEPARATOR = " │ " +STATUS_BAR_CONFIG_LABEL_O = "O" +STATUS_BAR_CONFIG_LABEL_C = "C" +STATUS_BAR_CONFIG_LABEL_EDIT = "edit" +STATUS_BAR_CONFIG_LABEL_INSTRUCTIONS = "instructions" +STATUS_BAR_CONFIG_LABEL_REPO = "repo" +STATUS_BAR_EDIT_ON = "on" +STATUS_BAR_EDIT_OFF = "off" +TOKEN_THRESHOLD_WARNING = 50 +TOKEN_THRESHOLD_CRITICAL = 80 +TOKEN_COLOR_OK = "green" +TOKEN_COLOR_WARNING = "yellow" +TOKEN_COLOR_CRITICAL = "red" + +ANTHROPIC_COUNT_TOKENS_URL = "https://api.anthropic.com/v1/messages/count_tokens" +ANTHROPIC_API_VERSION = "2023-06-01" +ANTHROPIC_HEADER_API_KEY = "x-api-key" +ANTHROPIC_HEADER_VERSION = "anthropic-version" +HEADER_CONTENT_TYPE = "content-type" +CONTENT_TYPE_JSON = "application/json" +ANTHROPIC_COUNT_TIMEOUT_S = 10.0 + +DEFAULT_CONTEXT_WINDOW = 200_000 +MODEL_CONTEXT_WINDOWS: dict[str, int] = { + "claude-opus-4-7": 1_000_000, + "claude-opus-4-6": 200_000, + "claude-opus-4-5": 200_000, + "claude-opus-4-1": 200_000, + "claude-opus-4-0": 200_000, + "claude-sonnet-4-6": 200_000, + "claude-sonnet-4-5": 200_000, + "claude-sonnet-4-0": 200_000, + "claude-haiku-4-5": 200_000, + "claude-haiku-4-0": 200_000, +} # (H) Interactive setup prompt - grouped view INTERACTIVE_TITLE_GROUPED = "Detected Directories (will be excluded unless kept)" @@ -723,6 +947,7 @@ class DiffMarker: INPLACE_FLAG = "--inplace" LANG_ATTR_PREFIX = "language_" LANG_ATTR_TYPESCRIPT = "language_typescript" +LANG_ATTR_PHP = "language_php" class TreeSitterModule(StrEnum): @@ -733,8 +958,10 @@ class TreeSitterModule(StrEnum): GO = "tree_sitter_go" SCALA = "tree_sitter_scala" JAVA = "tree_sitter_java" + C = "tree_sitter_c" CPP = "tree_sitter_cpp" LUA = "tree_sitter_lua" + PHP = "tree_sitter_php" # (H) Query dict keys @@ -839,10 +1066,64 @@ class TreeSitterModule(StrEnum): class EventType(StrEnum): MODIFIED = "modified" CREATED = "created" + DELETED = "deleted" -CYPHER_DELETE_MODULE = "MATCH (m:Module {path: $path})-[*0..]->(c) DETACH DELETE m, c" +CYPHER_DELETE_MODULE = ( + "MATCH (m:Module {path: $path}) " + "OPTIONAL MATCH (m)-[:DEFINES|DEFINES_METHOD*0..]->(c) " + "DETACH DELETE m, c" +) +CYPHER_DELETE_FILE = "MATCH (f:File {path: $path}) DETACH DELETE f" +CYPHER_DELETE_FOLDER = "MATCH (f:Folder {path: $path}) DETACH DELETE f" CYPHER_DELETE_CALLS = "MATCH ()-[r:CALLS]->() DELETE r" +# (H) Removes external import-target Module nodes that no module imports anymore +# (H) (e.g. an imported name that was renamed/removed on an incremental rebuild). +CYPHER_DELETE_ORPHAN_EXTERNAL_MODULES = ( + "MATCH (m:Module) WHERE m.is_external = true AND NOT (m)<--() DETACH DELETE m" +) + +# (H) Queries for orphan pruning — returns all paths stored in the graph +CYPHER_ALL_FILE_PATHS = ( + "MATCH (f:File) RETURN f.path AS path, f.absolute_path AS absolute_path" +) +CYPHER_ALL_MODULE_PATHS_INTERNAL = ( + "MATCH (m:Module) WHERE m.is_external IS NULL OR m.is_external = false " + "RETURN m.path AS path, m.qualified_name AS qualified_name" +) +CYPHER_ALL_FOLDER_PATHS = ( + "MATCH (f:Folder) RETURN f.path AS path, f.absolute_path AS absolute_path" +) + +# (H) Rehydrate the in-memory function registry on an incremental run: returns +# (H) every definition node's qualified name and label so call/instantiation +# (H) resolution can see symbols defined in files that were not re-parsed. +CYPHER_ALL_DEFINITION_QNS = ( + "MATCH (n) WHERE n:Function OR n:Method OR n:Class OR n:Interface " + "OR n:Enum OR n:Type OR n:Union " + "RETURN n.qualified_name AS qualified_name, head(labels(n)) AS label" +) + +# (H) Inbound reference edges (from unchanged files) into symbols defined in one +# (H) of $paths. Captured BEFORE a changed file's subtree is deleted so the exact +# (H) edges can be restored verbatim afterwards (issue #532, inbound half). +# (H) Re-resolving the callers instead would diverge from a clean index, because +# (H) cgr's call resolution is context-sensitive (protocol vs concrete receiver, +# (H) import granularity); the original edges already match a clean re-index. +CYPHER_INBOUND_EDGES = ( + "MATCH (caller)-[r:CALLS|INSTANTIATES|IMPORTS|INHERITS|OVERRIDES]->(target) " + "WHERE target.path IN $paths AND caller.qualified_name IS NOT NULL " + "AND (caller.path IS NULL OR NOT caller.path IN $paths) " + "RETURN head(labels(caller)) AS caller_label, " + "caller.qualified_name AS caller_qn, type(r) AS rel, " + "head(labels(target)) AS target_label, target.qualified_name AS target_qn" +) +CYPHER_PARAM_PATHS = "paths" +KEY_CALLER_LABEL = "caller_label" +KEY_CALLER_QN = "caller_qn" +KEY_REL = "rel" +KEY_TARGET_LABEL = "target_label" +KEY_TARGET_QN = "target_qn" REALTIME_LOGGER_FORMAT = ( "{time:YYYY-MM-DD HH:mm:ss.SSS} | " @@ -853,6 +1134,11 @@ class EventType(StrEnum): WATCHER_SLEEP_INTERVAL = 1 LOG_LEVEL_INFO = "INFO" +LOG_LEVEL_ERROR = "ERROR" + +# (H) Debounce settings for realtime watcher +DEFAULT_DEBOUNCE_SECONDS = 5 +DEFAULT_MAX_WAIT_SECONDS = 30 class Architecture(StrEnum): @@ -880,8 +1166,11 @@ class Architecture(StrEnum): PYINSTALLER_ARG_COLLECT_ALL = "--collect-all" PYINSTALLER_ARG_COLLECT_DATA = "--collect-data" PYINSTALLER_ARG_HIDDEN_IMPORT = "--hidden-import" +PYINSTALLER_ARG_EXCLUDE_MODULE = "--exclude-module" PYINSTALLER_ENTRY_POINT = "main.py" +PYINSTALLER_EXCLUDED_MODULES = ["logfire"] + # (H) TOML parsing constants TOML_KEY_PROJECT = "project" TOML_KEY_OPTIONAL_DEPS = "optional-dependencies" @@ -905,6 +1194,7 @@ class Architecture(StrEnum): PyInstallerPackage(name="loguru", collect_all=True), PyInstallerPackage(name="toml", collect_all=True), PyInstallerPackage(name="protobuf", collect_all=True), + PyInstallerPackage(name="genai_prices", collect_all=True), ] ALLOWED_COMMENT_MARKERS = frozenset( @@ -961,6 +1251,46 @@ class UniXcoderMode(StrEnum): CYPHER_SEMICOLON = ";" CYPHER_BACKTICK = "`" CYPHER_MATCH_KEYWORD = "MATCH" +CYPHER_DANGEROUS_KEYWORDS: frozenset[str] = frozenset( + { + "DELETE", + "DETACH", + "DROP", + "CREATE INDEX", + "CREATE CONSTRAINT", + "REMOVE", + "SET", + "MERGE", + "CREATE", + "LOAD CSV", + "FOREACH", + } +) + +CYPHER_ALLOWED_PROCEDURE_PREFIXES: frozenset[str] = frozenset( + { + "algo.", + "betweenness_centrality.", + "biconnected_components.", + "bridges.", + "community_detection.", + "cycles.", + "degree_centrality.", + "graph_analyzer.", + "graph_util.", + "igraphalg.", + "katz_centrality.", + "leiden_community_detection.", + "neighbors.", + "node_similarity.", + "nxalg.", + "pagerank.", + "path.", + "schema.", + "weakly_connected_components.", + "wcc.", + } +) # (H) Tool success messages MSG_SURGICAL_SUCCESS = "Successfully applied surgical code replacement in: {path}" @@ -1105,12 +1435,23 @@ class UniXcoderMode(StrEnum): # (H) Query tool messages QUERY_NOT_AVAILABLE = "N/A" DICT_KEY_RESULTS = "results" +TIKTOKEN_ENCODING = "cl100k_base" QUERY_SUMMARY_SUCCESS = "Successfully retrieved {count} item(s) from the graph." +QUERY_SUMMARY_TRUNCATED = ( + "Results truncated: showing {kept} of {total} items (~{tokens} tokens, limit {max_tokens}). " + "Refine your query for more specific results." +) QUERY_SUMMARY_TRANSLATION_FAILED = ( "I couldn't translate your request into a database query. Error: {error}" ) QUERY_SUMMARY_DB_ERROR = "There was an error querying the database: {error}" +QUERY_SUMMARY_TIMEOUT = ( + "Query exceeded the {timeout:.1f}s timeout and was cancelled. " + "Avoid unbounded traversals; add depth bounds or use a graph-algorithm procedure." +) QUERY_RESULTS_PANEL_TITLE = "[bold blue]Cypher Query Results[/bold blue]" +CYPHER_MEMORY_LIMIT_SUFFIX = " QUERY MEMORY LIMIT {mb} MB" +CYPHER_MEMORY_LIMIT_TOKEN = "QUERY MEMORY LIMIT" # (H) File editor constants TMP_EXTENSION = ".tmp" @@ -1453,6 +1794,15 @@ class CppNodeType(StrEnum): CONSTRUCTOR_OR_DESTRUCTOR_DECLARATION = "constructor_or_destructor_declaration" INLINE_METHOD_DEFINITION = "inline_method_definition" OPERATOR_CAST_DEFINITION = "operator_cast_definition" + TYPE_IDENTIFIER = "type_identifier" + PARAMETER_LIST = "parameter_list" + PARAMETER_DECLARATION = "parameter_declaration" + OPTIONAL_PARAMETER_DECLARATION = "optional_parameter_declaration" + INIT_DECLARATOR = "init_declarator" + TEMPLATE_TYPE = "template_type" + FIELD_EXPRESSION = "field_expression" + COMPOUND_STATEMENT = "compound_statement" + THIS = "this" CPP_MODULE_EXTENSIONS = (".ixx", ".cppm", ".ccm", ".mxx") @@ -1485,6 +1835,13 @@ class CppNodeType(StrEnum): CPP_KEYWORD_STRUCT = "struct" CPP_EXPORTED_CLASS_KEYWORDS = frozenset({CPP_KEYWORD_CLASS, CPP_KEYWORD_STRUCT}) +# (H) A C/C++ class/struct/union tag with no body is a forward declaration +# (H) (`class Widget;`); it must not become its own node, or it collides with the +# (H) real definition's qn and fragments one class into several same-named nodes. +CPP_TYPE_SPECIFIER_NODE_TYPES = frozenset( + {"class_specifier", "struct_specifier", "union_specifier"} +) + CPP_FALLBACK_OPERATOR = "operator_unknown" CPP_FALLBACK_DESTRUCTOR = "~destructor" CPP_OPERATOR_TEXT_PREFIX = "operator" @@ -1569,6 +1926,13 @@ class CppNodeType(StrEnum): # (H) Gemfile parsing patterns GEMFILE_GEM_PREFIX = "gem " +# (H) Incremental update hash cache +HASH_CACHE_FILENAME = ".cgr-hash-cache.json" +DIR_MTIMES_FILENAME = ".cgr-dir-mtimes.json" +GIT_DIR_NAME = ".git" +ROOT_DIR_KEY = "." +JSON_EMPTY_OBJECT = "{}" + # (H) Import processor cache config IMPORT_CACHE_TTL = 3600 IMPORT_CACHE_DIR = ".cache/codebase_rag" @@ -1666,11 +2030,25 @@ class CppNodeType(StrEnum): # (H) Tree-sitter Go node types TS_GO_TYPE_DECLARATION = "type_declaration" +TS_GO_TYPE_SPEC = "type_spec" +TS_GO_TYPE_ALIAS = "type_alias" +TS_GO_STRUCT_TYPE = "struct_type" +TS_GO_INTERFACE_TYPE = "interface_type" +TS_GO_PARAMETER_DECLARATION = "parameter_declaration" TS_GO_SOURCE_FILE = "source_file" TS_GO_FUNCTION_DECLARATION = "function_declaration" TS_GO_METHOD_DECLARATION = "method_declaration" TS_GO_CALL_EXPRESSION = "call_expression" TS_GO_IMPORT_DECLARATION = "import_declaration" +TS_GO_PARAMETER_LIST = "parameter_list" +TS_GO_VAR_DECLARATION = "var_declaration" +TS_GO_VAR_SPEC = "var_spec" +TS_GO_SHORT_VAR_DECLARATION = "short_var_declaration" +TS_GO_EXPRESSION_LIST = "expression_list" +TS_GO_COMPOSITE_LITERAL = "composite_literal" +TS_GO_UNARY_EXPRESSION = "unary_expression" +TS_GO_POINTER_TYPE = "pointer_type" +FIELD_OPERAND = "operand" # (H) Tree-sitter Scala node types TS_SCALA_CLASS_DEFINITION = "class_definition" @@ -1680,23 +2058,28 @@ class CppNodeType(StrEnum): TS_SCALA_FUNCTION_DEFINITION = "function_definition" TS_SCALA_FUNCTION_DECLARATION = "function_declaration" TS_SCALA_CALL_EXPRESSION = "call_expression" -TS_SCALA_GENERIC_FUNCTION = "generic_function" +# (H) Shared tree-sitter node type: a call with explicit type args, e.g. Rust +# (H) turbofish `f::()` and Scala `f[T]()`. Its `function` field holds the +# (H) actual callee (identifier or scoped_identifier). +TS_GENERIC_FUNCTION = "generic_function" +TS_SCALA_GENERIC_FUNCTION = TS_GENERIC_FUNCTION TS_SCALA_FIELD_EXPRESSION = "field_expression" TS_SCALA_INFIX_EXPRESSION = "infix_expression" TS_SCALA_IMPORT_DECLARATION = "import_declaration" -# (H) Tree-sitter C# node types -TS_CS_STRUCT_DECLARATION = "struct_declaration" -TS_CS_COMPILATION_UNIT = "compilation_unit" -TS_CS_DESTRUCTOR_DECLARATION = "destructor_declaration" -TS_CS_LOCAL_FUNCTION_STATEMENT = "local_function_statement" -TS_CS_FUNCTION_POINTER_TYPE = "function_pointer_type" -TS_CS_ANONYMOUS_METHOD_EXPRESSION = "anonymous_method_expression" -TS_CS_LAMBDA_EXPRESSION = "lambda_expression" -TS_CS_INVOCATION_EXPRESSION = "invocation_expression" - # (H) Tree-sitter PHP node types +TS_PHP_FUNCTION_DEFINITION = "function_definition" +TS_PHP_METHOD_DECLARATION = "method_declaration" TS_PHP_TRAIT_DECLARATION = "trait_declaration" +# (H) PHP inheritance clauses: `extends ...` (base_clause, for class AND +# (H) interface) and `implements ...` (class_interface_clause); each lists `name` +# (H) nodes naming the base types. +TS_PHP_BASE_CLAUSE = "base_clause" +TS_PHP_CLASS_INTERFACE_CLAUSE = "class_interface_clause" +TS_PHP_NAME = "name" +# (H) PHP fully-qualified base (`\Exception`, `\App\Base`); its trailing `name` +# (H) child is the simple name cgr resolves against. +TS_PHP_QUALIFIED_NAME = "qualified_name" TS_PHP_FUNCTION_STATIC_DECLARATION = "function_static_declaration" TS_PHP_ANONYMOUS_FUNCTION = "anonymous_function" TS_PHP_ARROW_FUNCTION = "arrow_function" @@ -1704,6 +2087,20 @@ class CppNodeType(StrEnum): TS_PHP_SCOPED_CALL_EXPRESSION = "scoped_call_expression" TS_PHP_FUNCTION_CALL_EXPRESSION = "function_call_expression" TS_PHP_NULLSAFE_MEMBER_CALL_EXPRESSION = "nullsafe_member_call_expression" +TS_PHP_OBJECT_CREATION_EXPRESSION = "object_creation_expression" +TS_PHP_NAMESPACE_DEFINITION = "namespace_definition" +TS_PHP_NAMESPACE_USE_DECLARATION = "namespace_use_declaration" +TS_PHP_NAMESPACE_USE_CLAUSE = "namespace_use_clause" +TS_PHP_INCLUDE_EXPRESSION = "include_expression" +TS_PHP_INCLUDE_ONCE_EXPRESSION = "include_once_expression" +TS_PHP_REQUIRE_EXPRESSION = "require_expression" +TS_PHP_REQUIRE_ONCE_EXPRESSION = "require_once_expression" +TS_PHP_ATTRIBUTE_LIST = "attribute_list" +TS_PHP_ATTRIBUTE = "attribute" +TS_PHP_ATTRIBUTE_GROUP = "attribute_group" +TS_PHP_VISIBILITY_MODIFIER = "visibility_modifier" +TS_PHP_USE_DECLARATION = "use_declaration" +TS_PHP_QUALIFIED_NAME = "qualified_name" # (H) Tree-sitter Lua node types for language_spec TS_LUA_CHUNK = "chunk" @@ -1739,11 +2136,16 @@ class CppNodeType(StrEnum): TS_VIRTUAL = "virtual" TS_TYPE_LIST = "type_list" TS_CLASS_HERITAGE = "class_heritage" +# (H) TS class `implements I, J` clause (a child of class_heritage). +TS_IMPLEMENTS_CLAUSE = "implements_clause" TS_EXTENDS_CLAUSE = "extends_clause" TS_MEMBER_EXPRESSION = "member_expression" +TS_SELECTOR_EXPRESSION = "selector_expression" TS_EXTENDS = "extends" TS_ARGUMENTS = "arguments" TS_EXTENDS_TYPE_CLAUSE = "extends_type_clause" +# (H) Java interface `extends A, B` clause (tree-sitter-java); holds a type_list. +TS_JAVA_EXTENDS_INTERFACES = "extends_interfaces" TS_METHOD_DEFINITION = "method_definition" TS_DECORATOR = "decorator" TS_ERROR = "ERROR" @@ -1757,6 +2159,16 @@ class CppNodeType(StrEnum): # (H) Derived node type tuples for class ingestion CPP_CLASS_TYPES = (CppNodeType.CLASS_SPECIFIER, TS_STRUCT_SPECIFIER) CPP_COMPOUND_TYPES = (*CPP_CLASS_TYPES, TS_UNION_SPECIFIER, TS_ENUM_SPECIFIER) +# (H) Node types that open their own variable scope; C++ local-variable inference must +# (H) not descend into them, or a name declared inside a lambda / nested function / +# (H) local class body would be attributed to the enclosing function's scope. +CPP_NESTED_SCOPE_NODE_TYPES = frozenset( + ( + TS_CPP_FUNCTION_DEFINITION, + TS_CPP_LAMBDA_EXPRESSION, + *CPP_COMPOUND_TYPES, + ) +) JS_TS_PARENT_REF_TYPES = (TS_IDENTIFIER, TS_MEMBER_EXPRESSION) # (H) Import processor function names @@ -1827,6 +2239,20 @@ class CppNodeType(StrEnum): } ) +# (H) Java stdlib package prefixes for static stdlib detection +JAVA_STDLIB_PREFIXES = ( + "java.", + "javax.", + "jdk.", + "com.sun.", + "sun.", + "org.w3c.", + "org.xml.", + "org.ietf.", + "org.omg.", + "netscape.", +) + # (H) Java common class names for heuristic detection JAVA_STDLIB_CLASSES = frozenset( { @@ -1906,6 +2332,7 @@ class CppNodeType(StrEnum): # (H) Tree-sitter field names for child_by_field_name TS_FIELD_NAME = "name" TS_FIELD_TYPE = "type" +TS_SCOPED_TYPE_IDENTIFIER = "scoped_type_identifier" TS_FIELD_SUPERCLASS = "superclass" TS_FIELD_INTERFACES = "interfaces" TS_FIELD_TYPE_PARAMETERS = "type_parameters" @@ -2095,6 +2522,8 @@ class CppNodeType(StrEnum): TS_PY_FOR_STATEMENT = "for_statement" TS_PY_FOR_IN_CLAUSE = "for_in_clause" TS_PY_ASSIGNMENT = "assignment" +PY_ASSIGNMENT_QUERY = "(assignment) @assignment" +PY_RETURN_QUERY = "(return_statement) @return_stmt" TS_PY_CLASS_DEFINITION = "class_definition" TS_PY_BLOCK = "block" TS_PY_FUNCTION_DEFINITION = "function_definition" @@ -2109,11 +2538,57 @@ class CppNodeType(StrEnum): TS_PY_STRING = "string" TS_PY_DECORATED_DEFINITION = "decorated_definition" TS_PY_DECORATOR = "decorator" +TS_PY_KEYWORD_ARGUMENT = "keyword_argument" +TS_PY_DEFAULT_PARAMETER = "default_parameter" +TS_PY_LIST_SPLAT_PATTERN = "list_splat_pattern" +TS_PY_DICTIONARY_SPLAT_PATTERN = "dictionary_splat_pattern" +TS_PY_SUBSCRIPT = "subscript" +TS_PY_COMPARISON_OPERATOR = "comparison_operator" +TS_FIELD_OPERATORS = "operators" +TS_PY_IF_STATEMENT = "if_statement" +TS_PY_WHILE_STATEMENT = "while_statement" +TS_PY_ELIF_CLAUSE = "elif_clause" +TS_PY_CONDITIONAL_EXPRESSION = "conditional_expression" +TS_PY_BOOLEAN_OPERATOR = "boolean_operator" +TS_PY_NOT_OPERATOR = "not_operator" +TS_FIELD_CONDITION = "condition" +TS_FIELD_ARGUMENT = "argument" + +# (H) Python operator syntax dispatches to dunder methods at runtime; these names +# (H) let the call extractor synthesize the implied .__dunder__ call. +PY_OP_IN = "in" +PY_BUILTIN_LEN = "len" +PY_BUILTIN_GETATTR = "getattr" +TS_PY_STRING_CONTENT = "string_content" +PY_DUNDER_GETITEM = "__getitem__" +PY_DUNDER_SETITEM = "__setitem__" +PY_DUNDER_CONTAINS = "__contains__" +PY_DUNDER_LEN = "__len__" +PY_DUNDER_BOOL = "__bool__" +# (H) Operands with these characters are not simple attribute/name chains (calls, +# (H) nested subscripts, whitespace), so the operator-dispatch synthesizer skips them. +PY_OPERAND_REJECT_CHARS = "()[]{}\n\t " +# (H) Optional annotation handling: X | None names a single concrete class. +PY_UNION_SEPARATOR = "|" +PY_NONE = "None" # (H) Python keyword identifiers PY_KEYWORD_SELF = "self" PY_KEYWORD_CLS = "cls" +# (H) typing.Protocol base name and the conventional XxxProtocol class suffix +# (H) used to map a Protocol to its concrete implementer. +PY_PROTOCOL = "Protocol" PY_METHOD_INIT = "__init__" +DECORATOR_AT = "@" +PROPERTY_DECORATORS: frozenset[str] = frozenset({"property", "cached_property"}) +ABSTRACT_DECORATORS: frozenset[str] = frozenset({"abstractmethod", "abstractproperty"}) + +# (H) Eager builtins that invoke a callable argument synchronously within the +# (H) caller's own stack frame; a function passed to one is invoked there, so the +# (H) trace attributes the call to the enclosing function (no Python frame exists +# (H) for the builtin). Lazy higher-order builtins (map/filter) are excluded: +# (H) they defer invocation until the result is consumed, which may be elsewhere. +HIGHER_ORDER_BUILTINS: frozenset[str] = frozenset({"sorted", "min", "max", "reduce"}) # (H) Python attribute prefixes PY_SELF_PREFIX = "self." @@ -2134,8 +2609,9 @@ class CppNodeType(StrEnum): TYPE_INFERENCE_LIST = "list" TYPE_INFERENCE_BASE_MODEL = "BaseModel" -# (H) Type inference guard attribute +# (H) Recursion guard attributes ATTR_TYPE_INFERENCE_IN_PROGRESS = "_type_inference_in_progress" +GUARD_INHERITED_METHOD = "_inherited_method_guard" # (H) JS/TS ingest node types TS_PAIR = "pair" @@ -2307,6 +2783,7 @@ class CppNodeType(StrEnum): # (H) Tree-sitter Rust node types TS_RS_SCOPED_TYPE_IDENTIFIER = "scoped_type_identifier" +TS_RS_PRIMITIVE_TYPE = "primitive_type" TS_RS_USE_AS_CLAUSE = "use_as_clause" TS_RS_USE_WILDCARD = "use_wildcard" TS_RS_USE_LIST = "use_list" @@ -2355,12 +2832,21 @@ class MCPToolName(StrEnum): DELETE_PROJECT = "delete_project" WIPE_DATABASE = "wipe_database" INDEX_REPOSITORY = "index_repository" + UPDATE_REPOSITORY = "update_repository" QUERY_CODE_GRAPH = "query_code_graph" GET_CODE_SNIPPET = "get_code_snippet" SURGICAL_REPLACE_CODE = "surgical_replace_code" READ_FILE = "read_file" WRITE_FILE = "write_file" LIST_DIRECTORY = "list_directory" + SEMANTIC_SEARCH = "semantic_search" + ASK_AGENT = "ask_agent" + + +# (H) MCP transport selection +class MCPTransport(StrEnum): + STDIO = "stdio" + HTTP = "http" # (H) MCP environment variables @@ -2400,6 +2886,8 @@ class MCPParamName(StrEnum): LIMIT = "limit" CONTENT = "content" DIRECTORY_PATH = "directory_path" + TOP_K = "top_k" + QUESTION = "question" # (H) MCP server constants @@ -2418,6 +2906,12 @@ class MCPParamName(StrEnum): MCP_WRITE_SUCCESS = "Successfully wrote file: {path}" MCP_UNKNOWN_TOOL_ERROR = "Unknown tool: {name}" MCP_TOOL_EXEC_ERROR = "Error executing tool '{name}': {error}" +MCP_UPDATE_SUCCESS = "Successfully updated repository at {path} (no database wipe)." +MCP_UPDATE_ERROR = "Error updating repository: {error}" +MCP_SEMANTIC_NOT_AVAILABLE_RESPONSE = ( + "Semantic search is not available. Install with: uv sync --extra semantic" +) +MCP_ASK_AGENT_ERROR = "Error running ask_agent: {error}" MCP_PROJECT_DELETED = "Successfully deleted project '{project_name}'." MCP_WIPE_CANCELLED = "Database wipe cancelled. Set confirm=true to proceed." MCP_WIPE_SUCCESS = "Database completely wiped. All projects have been removed." @@ -2460,11 +2954,14 @@ class MCPParamName(StrEnum): TS_FUNCTION_EXPRESSION, ) -# (H) FQN node type tuples for TS +# (H) FQN node type tuples for TS. The grammar emits `internal_module` for a +# (H) `namespace`/`module` block; without it a class declared inside a namespace +# (H) loses the namespace from its qn and collides with a top-level same name. FQN_TS_SCOPE_TYPES = ( TS_CLASS_DECLARATION, TS_INTERFACE_DECLARATION, TS_NAMESPACE_DEFINITION, + TS_INTERNAL_MODULE, TS_PROGRAM, TS_FUNCTION_DECLARATION, TS_FUNCTION_EXPRESSION, @@ -2550,35 +3047,19 @@ class MCPParamName(StrEnum): TS_SCALA_FUNCTION_DECLARATION, ) -# (H) FQN node type tuples for C# -FQN_CS_SCOPE_TYPES = ( - TS_CLASS_DECLARATION, - TS_CS_STRUCT_DECLARATION, - TS_INTERFACE_DECLARATION, - TS_CS_COMPILATION_UNIT, -) -FQN_CS_FUNCTION_TYPES = ( - TS_CS_DESTRUCTOR_DECLARATION, - TS_CS_LOCAL_FUNCTION_STATEMENT, - TS_CS_FUNCTION_POINTER_TYPE, - TS_CONSTRUCTOR_DECLARATION, - TS_CS_ANONYMOUS_METHOD_EXPRESSION, - TS_CS_LAMBDA_EXPRESSION, - TS_METHOD_DECLARATION, -) - # (H) FQN node type tuples for PHP FQN_PHP_SCOPE_TYPES = ( TS_CLASS_DECLARATION, TS_INTERFACE_DECLARATION, TS_PHP_TRAIT_DECLARATION, + TS_PHP_NAMESPACE_DEFINITION, TS_PROGRAM, ) FQN_PHP_FUNCTION_TYPES = ( - TS_PY_FUNCTION_DEFINITION, + TS_PHP_FUNCTION_DEFINITION, + TS_PHP_METHOD_DECLARATION, TS_PHP_ANONYMOUS_FUNCTION, TS_PHP_ARROW_FUNCTION, - TS_PHP_FUNCTION_STATIC_DECLARATION, ) # (H) LANGUAGE_SPECS node type tuples for Python @@ -2599,6 +3080,8 @@ class MCPParamName(StrEnum): TS_FUNCTION_DECLARATION, TS_CLASS_DECLARATION, TS_METHOD_DEFINITION, + # (H) TS `namespace`/`module` block; its `name` field scopes nested classes. + TS_INTERNAL_MODULE, ) # (H) Derived node types for _rust_get_name @@ -2617,6 +3100,13 @@ class MCPParamName(StrEnum): TS_ENUM_SPECIFIER, ) +# (H) Derived node types for _c_get_name +C_NAME_NODE_TYPES = ( + TS_STRUCT_SPECIFIER, + TS_UNION_SPECIFIER, + TS_ENUM_SPECIFIER, +) + # (H) LANGUAGE_SPECS node type tuples for Rust SPEC_RS_FUNCTION_TYPES = ( TS_RS_FUNCTION_ITEM, @@ -2639,7 +3129,7 @@ class MCPParamName(StrEnum): # (H) LANGUAGE_SPECS node type tuples for Go SPEC_GO_FUNCTION_TYPES = (TS_GO_FUNCTION_DECLARATION, TS_GO_METHOD_DECLARATION) -SPEC_GO_CLASS_TYPES = (TS_GO_TYPE_DECLARATION,) +SPEC_GO_CLASS_TYPES = (TS_GO_TYPE_SPEC, TS_GO_TYPE_ALIAS) SPEC_GO_MODULE_TYPES = (TS_GO_SOURCE_FILE,) SPEC_GO_CALL_TYPES = (TS_GO_CALL_EXPRESSION,) SPEC_GO_IMPORT_TYPES = (TS_GO_IMPORT_DECLARATION,) @@ -2713,44 +3203,53 @@ class MCPParamName(StrEnum): PKG_CONANFILE, ) -# (H) LANGUAGE_SPECS node type tuples for C# -SPEC_CS_FUNCTION_TYPES = ( - TS_CS_DESTRUCTOR_DECLARATION, - TS_CS_LOCAL_FUNCTION_STATEMENT, - TS_CS_FUNCTION_POINTER_TYPE, - TS_CONSTRUCTOR_DECLARATION, - TS_CS_ANONYMOUS_METHOD_EXPRESSION, - TS_CS_LAMBDA_EXPRESSION, - TS_METHOD_DECLARATION, +# (H) FQN node type tuples for C +FQN_C_SCOPE_TYPES = ( + TS_CPP_TRANSLATION_UNIT, + TS_STRUCT_SPECIFIER, + TS_UNION_SPECIFIER, + TS_ENUM_SPECIFIER, ) -SPEC_CS_CLASS_TYPES = ( - TS_CLASS_DECLARATION, - TS_CS_STRUCT_DECLARATION, - TS_ENUM_DECLARATION, - TS_INTERFACE_DECLARATION, +FQN_C_FUNCTION_TYPES = (TS_CPP_FUNCTION_DEFINITION,) + +# (H) LANGUAGE_SPECS node type tuples for C +SPEC_C_FUNCTION_TYPES = (TS_CPP_FUNCTION_DEFINITION,) +SPEC_C_CLASS_TYPES = ( + TS_STRUCT_SPECIFIER, + TS_UNION_SPECIFIER, + TS_ENUM_SPECIFIER, ) -SPEC_CS_MODULE_TYPES = (TS_CS_COMPILATION_UNIT,) -SPEC_CS_CALL_TYPES = (TS_CS_INVOCATION_EXPRESSION,) +SPEC_C_MODULE_TYPES = (TS_CPP_TRANSLATION_UNIT,) +SPEC_C_CALL_TYPES = (TS_CPP_CALL_EXPRESSION,) +SPEC_C_PACKAGE_INDICATORS = (PKG_CMAKE_LISTS, PKG_MAKEFILE) # (H) LANGUAGE_SPECS node type tuples for PHP SPEC_PHP_FUNCTION_TYPES = ( - TS_PHP_FUNCTION_STATIC_DECLARATION, + TS_PHP_FUNCTION_DEFINITION, + TS_PHP_METHOD_DECLARATION, TS_PHP_ANONYMOUS_FUNCTION, - TS_PY_FUNCTION_DEFINITION, TS_PHP_ARROW_FUNCTION, ) SPEC_PHP_CLASS_TYPES = ( + TS_CLASS_DECLARATION, + TS_INTERFACE_DECLARATION, TS_PHP_TRAIT_DECLARATION, TS_ENUM_DECLARATION, - TS_INTERFACE_DECLARATION, - TS_CLASS_DECLARATION, ) SPEC_PHP_MODULE_TYPES = (TS_PROGRAM,) SPEC_PHP_CALL_TYPES = ( + TS_PHP_FUNCTION_CALL_EXPRESSION, TS_PHP_MEMBER_CALL_EXPRESSION, TS_PHP_SCOPED_CALL_EXPRESSION, - TS_PHP_FUNCTION_CALL_EXPRESSION, TS_PHP_NULLSAFE_MEMBER_CALL_EXPRESSION, + TS_PHP_OBJECT_CREATION_EXPRESSION, +) +SPEC_PHP_IMPORT_TYPES = (TS_PHP_NAMESPACE_USE_DECLARATION,) +SPEC_PHP_IMPORT_FROM_TYPES = ( + TS_PHP_INCLUDE_EXPRESSION, + TS_PHP_INCLUDE_ONCE_EXPRESSION, + TS_PHP_REQUIRE_EXPRESSION, + TS_PHP_REQUIRE_ONCE_EXPRESSION, ) # (H) LANGUAGE_SPECS node type tuples for Lua diff --git a/codebase_rag/cypher_queries.py b/codebase_rag/cypher_queries.py index 8d70bae4e..cf06641d7 100644 --- a/codebase_rag/cypher_queries.py +++ b/codebase_rag/cypher_queries.py @@ -52,8 +52,8 @@ CYPHER_EXAMPLE_LIMIT_ONE = """MATCH (f:File) RETURN f.path as path, f.name as name, labels(f) as type LIMIT 1""" CYPHER_EXAMPLE_CLASS_METHODS = f"""MATCH (c:Class)-[:DEFINES_METHOD]->(m:Method) -WHERE c.qualified_name ENDS WITH '.UserService' -RETURN m.name AS name, m.qualified_name AS qualified_name, labels(m) AS type +WHERE c.name = 'UserService' +RETURN c.name AS className, m.name AS methodName, m.qualified_name AS qualified_name, labels(m) AS type LIMIT {CYPHER_DEFAULT_LIMIT}""" CYPHER_EXPORT_NODES = """ @@ -84,6 +84,90 @@ """ +CYPHER_STATS_NODE_COUNTS = """ +MATCH (n) +RETURN labels(n) AS labels, count(*) AS count +ORDER BY count DESC +""" + +CYPHER_STATS_RELATIONSHIP_COUNTS = """ +MATCH ()-[r]->() +RETURN type(r) AS type, count(*) AS count +ORDER BY count DESC +""" + + +_DEAD_CODE_TEST_ROOT_CLAUSE = ( + "\n OR ANY(p IN $test_patterns WHERE n.path CONTAINS p)" +) + +# (H) A node reached by a Module node runs at import (top-level statement, +# (H) `if __name__ == "__main__"`, a bare decorator, or a module-scope +# (H) construction), so it is a root. `size([...])` avoids the non-standard +# (H) `exists(pattern)`. When tests are excluded, an edge from a test module must +# (H) NOT keep project code alive, so the test-module variant filters by path. +# (H) `{module_rels}` is the relationship set walked from the module (CALLS, plus +# (H) INSTANTIATES when classes are included so module-scope construction roots a +# (H) class). +_DEAD_CODE_MODULE_ROOT_ANY = "size([(n)<-[:{module_rels}]-(:Module) | 1]) > 0" +_DEAD_CODE_MODULE_ROOT_NON_TEST = ( + "size([(n)<-[:{module_rels}]-(m:Module)" + " WHERE NOT ANY(p IN $test_patterns WHERE m.path CONTAINS p) | 1]) > 0" +) + +# (H) Reachability walks CALLS only by default. With classes included it also +# (H) walks INSTANTIATES (construction keeps a class live) and INHERITS forward +# (H) from subclass to base, so a base is kept live only by a REACHABLE subclass. +# (H) A base whose sole subclass is itself unreachable is therefore reported as +# (H) part of the dead cluster (the subclass is reported too). Classes referenced +# (H) solely via type annotations / isinstance / dynamic lookups are not modelled +# (H) as edges, so class candidates are review hints, not a delete list. +_DEAD_CODE_QUERY_TEMPLATE = """MATCH (n:{labels}) +WHERE n.qualified_name STARTS WITH $project_prefix + AND ( + ANY(d IN n.decorators + WHERE toLower(last(split(split(replace(d, '@', ''), '(')[0], '.'))) + IN $root_decorators) + OR n.is_exported = true + OR ANY(e IN $entry_points WHERE n.qualified_name ENDS WITH e) + OR {module_clause}{test_clause} + ) +WITH collect(n) AS roots +UNWIND roots AS r +MATCH (r)-[:{traversal}*0..]->(live) +WITH collect(DISTINCT live) AS live_set +MATCH (n:{labels}) +WHERE n.qualified_name STARTS WITH $project_prefix + AND NOT n IN live_set +RETURN labels(n)[0] AS label, n.name AS name, + n.qualified_name AS qualified_name, + n.start_line AS start_line, n.end_line AS end_line +ORDER BY qualified_name""" + + +def build_dead_code_query(include_tests: bool, include_classes: bool = False) -> str: + if include_classes: + labels = "Function|Method|Class" + traversal = "CALLS|INSTANTIATES|INHERITS" + module_rels = "CALLS|INSTANTIATES" + else: + labels = "Function|Method" + traversal = "CALLS" + module_rels = "CALLS" + if include_tests: + module_clause = _DEAD_CODE_MODULE_ROOT_ANY.format(module_rels=module_rels) + test_clause = _DEAD_CODE_TEST_ROOT_CLAUSE + else: + module_clause = _DEAD_CODE_MODULE_ROOT_NON_TEST.format(module_rels=module_rels) + test_clause = "" + return _DEAD_CODE_QUERY_TEMPLATE.format( + labels=labels, + traversal=traversal, + module_clause=module_clause, + test_clause=test_clause, + ) + + def wrap_with_unwind(query: str) -> str: return f"UNWIND $batch AS row\n{query}" @@ -126,3 +210,24 @@ def build_merge_relationship_query( ) query += CYPHER_SET_PROPS_RETURN_COUNT if has_props else CYPHER_RETURN_COUNT return query + + +def build_create_node_query(label: str, id_key: str) -> str: + return f"CREATE (n:{label} {{{id_key}: row.id}})\nSET n += row.props" + + +def build_create_relationship_query( + from_label: str, + from_key: str, + rel_type: str, + to_label: str, + to_key: str, + has_props: bool = False, +) -> str: + query = ( + f"MATCH (a:{from_label} {{{from_key}: row.from_val}}), " + f"(b:{to_label} {{{to_key}: row.to_val}})\n" + f"CREATE (a)-[r:{rel_type}]->(b)\n" + ) + query += CYPHER_SET_PROPS_RETURN_COUNT if has_props else CYPHER_RETURN_COUNT + return query diff --git a/codebase_rag/docker-compose.yaml b/codebase_rag/docker-compose.yaml new file mode 100644 index 000000000..1b394c873 --- /dev/null +++ b/codebase_rag/docker-compose.yaml @@ -0,0 +1,27 @@ +services: + memgraph: + image: memgraph/memgraph-mage + ports: + - "${MEMGRAPH_PORT:-7687}:7687" + - "${MEMGRAPH_HTTP_PORT:-7444}:7444" + volumes: + - memgraph_data:/var/lib/memgraph + - memgraph_log:/var/log/memgraph + lab: + image: memgraph/lab + ports: + - "${LAB_PORT:-3000}:3000" + environment: + QUICK_CONNECT_MG_HOST: memgraph + qdrant: + image: qdrant/qdrant + ports: + - "${QDRANT_HTTP_PORT:-6333}:6333" + - "${QDRANT_GRPC_PORT:-6334}:6334" + volumes: + - qdrant_storage:/qdrant/storage + +volumes: + qdrant_storage: + memgraph_data: + memgraph_log: diff --git a/codebase_rag/embedder.py b/codebase_rag/embedder.py index 0928cae97..89b3b466f 100644 --- a/codebase_rag/embedder.py +++ b/codebase_rag/embedder.py @@ -1,19 +1,96 @@ -# ┌────────────────────────────────────────────────────────────────────────┐ -# │ UniXcoder Model Singleton via LRU Cache │ -# ├────────────────────────────────────────────────────────────────────────┤ -# │ get_model() provides: │ -# │ - Singleton behavior without global variables │ -# │ - Thread-safe lazy initialization │ -# │ - Easy testability with cache_clear() method │ -# │ - Memory efficient with maxsize=1 │ -# └────────────────────────────────────────────────────────────────────────┘ +from __future__ import annotations + +import hashlib +import json from functools import lru_cache +from pathlib import Path + +from loguru import logger +from . import constants as cs from . import exceptions as ex +from . import logs as ls from .config import settings -from .constants import UNIXCODER_MODEL from .utils.dependencies import has_torch, has_transformers + +class EmbeddingCache: + __slots__ = ("_cache", "_path") + + def __init__(self, path: Path | None = None) -> None: + self._cache: dict[str, list[float]] = {} + self._path = path + + @staticmethod + def _content_hash(content: str) -> str: + return hashlib.sha256(content.encode()).hexdigest() + + def get(self, content: str) -> list[float] | None: + return self._cache.get(self._content_hash(content)) + + def put(self, content: str, embedding: list[float]) -> None: + self._cache[self._content_hash(content)] = embedding + + def get_many(self, snippets: list[str]) -> dict[int, list[float]]: + results: dict[int, list[float]] = {} + for i, snippet in enumerate(snippets): + if (cached := self.get(snippet)) is not None: + results[i] = cached + return results + + def put_many(self, snippets: list[str], embeddings: list[list[float]]) -> None: + for snippet, embedding in zip(snippets, embeddings): + self.put(snippet, embedding) + + def save(self) -> None: + if self._path is None: + return + try: + self._path.parent.mkdir(parents=True, exist_ok=True) + with self._path.open("w", encoding="utf-8") as f: + json.dump(self._cache, f) + except Exception as e: + logger.warning(ls.EMBEDDING_CACHE_SAVE_FAILED, path=self._path, error=e) + + def load(self) -> None: + if self._path is None or not self._path.exists(): + return + try: + with self._path.open("r", encoding="utf-8") as f: + self._cache = json.load(f) + logger.debug( + ls.EMBEDDING_CACHE_LOADED, count=len(self._cache), path=self._path + ) + except Exception as e: + logger.warning(ls.EMBEDDING_CACHE_LOAD_FAILED, path=self._path, error=e) + self._cache = {} + + def clear(self) -> None: + self._cache.clear() + + def __len__(self) -> int: + return len(self._cache) + + +_embedding_cache: EmbeddingCache | None = None + + +def get_embedding_cache() -> EmbeddingCache: + global _embedding_cache + if _embedding_cache is None: + cache_path = Path(settings.QDRANT_DB_PATH) / cs.EMBEDDING_CACHE_FILENAME + _embedding_cache = EmbeddingCache(path=cache_path) + _embedding_cache.load() + return _embedding_cache + + +def clear_embedding_cache() -> None: + global _embedding_cache + if _embedding_cache is not None: + _embedding_cache.clear() + _embedding_cache = None + + if has_torch() and has_transformers(): import numpy as np import torch @@ -21,15 +98,29 @@ from .unixcoder import UniXcoder + def _select_device() -> str: + if torch.cuda.is_available(): + return "cuda" + if torch.backends.mps.is_available(): + return "mps" + return "cpu" + @lru_cache(maxsize=1) def get_model() -> UniXcoder: - model = UniXcoder(UNIXCODER_MODEL) + model = UniXcoder(cs.UNIXCODER_MODEL) model.eval() - if torch.cuda.is_available(): + device = _select_device() + if device == "cuda": model = model.cuda() + elif device == "mps": + model = model.to("mps") return model def embed_code(code: str, max_length: int | None = None) -> list[float]: + cache = get_embedding_cache() + if (cached := cache.get(code)) is not None: + return cached + if max_length is None: max_length = settings.EMBEDDING_MAX_LENGTH model = get_model() @@ -40,9 +131,63 @@ def embed_code(code: str, max_length: int | None = None) -> list[float]: _, sentence_embeddings = model(tokens_tensor) embedding: NDArray[np.float32] = sentence_embeddings.cpu().numpy() result: list[float] = embedding[0].tolist() + + cache.put(code, result) return result + def embed_code_batch( + snippets: list[str], + max_length: int | None = None, + batch_size: int = cs.EMBEDDING_DEFAULT_BATCH_SIZE, + ) -> list[list[float]]: + if not snippets: + return [] + + if max_length is None: + max_length = settings.EMBEDDING_MAX_LENGTH + + cache = get_embedding_cache() + cached_results = cache.get_many(snippets) + + if len(cached_results) == len(snippets): + logger.debug(ls.EMBEDDING_CACHE_HIT, count=len(snippets)) + return [cached_results[i] for i in range(len(snippets))] + + uncached_indices = [i for i in range(len(snippets)) if i not in cached_results] + uncached_snippets = [snippets[i] for i in uncached_indices] + + model = get_model() + device = next(model.parameters()).device + + all_new_embeddings: list[list[float]] = [] + for start in range(0, len(uncached_snippets), batch_size): + batch = uncached_snippets[start : start + batch_size] + tokens_list = model.tokenize(batch, max_length=max_length, padding=True) + tokens_tensor = torch.tensor(tokens_list).to(device) + with torch.no_grad(): + _, sentence_embeddings = model(tokens_tensor) + batch_np: NDArray[np.float32] = sentence_embeddings.cpu().numpy() + for row in batch_np: + all_new_embeddings.append(row.tolist()) + + cache.put_many(uncached_snippets, all_new_embeddings) + + results: list[list[float]] = [[] for _ in snippets] + for i, emb in cached_results.items(): + results[i] = emb + for idx, orig_i in enumerate(uncached_indices): + results[orig_i] = all_new_embeddings[idx] + + return results + else: def embed_code(code: str, max_length: int | None = None) -> list[float]: raise RuntimeError(ex.SEMANTIC_EXTRA) + + def embed_code_batch( + snippets: list[str], + max_length: int | None = None, + batch_size: int = cs.EMBEDDING_DEFAULT_BATCH_SIZE, + ) -> list[list[float]]: + raise RuntimeError(ex.SEMANTIC_EXTRA) diff --git a/codebase_rag/exceptions.py b/codebase_rag/exceptions.py index f30202395..21c479995 100644 --- a/codebase_rag/exceptions.py +++ b/codebase_rag/exceptions.py @@ -11,10 +11,26 @@ "OpenAI provider requires api_key. " "Set ORCHESTRATOR_API_KEY or CYPHER_API_KEY in .env file." ) +ANTHROPIC_NO_KEY = ( + "Anthropic provider requires api_key. " + "Set ORCHESTRATOR_API_KEY or CYPHER_API_KEY in .env file." +) +AZURE_NO_KEY = "Azure OpenAI provider requires api_key. Set AZURE_API_KEY in .env file." +AZURE_NO_ENDPOINT = ( + "Azure OpenAI provider requires endpoint. Set AZURE_OPENAI_ENDPOINT in .env file." +) OLLAMA_NOT_RUNNING = ( "Ollama server not responding at {endpoint}. " "Make sure Ollama is running: ollama serve" ) +LITELLM_NO_ENDPOINT = ( + "LiteLLM provider requires endpoint. " + "Set ORCHESTRATOR_ENDPOINT or CYPHER_ENDPOINT in .env file." +) +LITELLM_NOT_RUNNING = ( + "LiteLLM proxy server not responding at {endpoint}. " + "Make sure LiteLLM proxy is running and API key is valid." +) UNKNOWN_PROVIDER = "Unknown provider '{provider}'. Available providers: {available}" # (H) Dependency errors @@ -42,16 +58,29 @@ # (H) LLM errors LLM_INIT_CYPHER = "Failed to initialize CypherGenerator: {error}" LLM_INVALID_QUERY = "LLM did not generate a valid query. Output: {output}" +LLM_DANGEROUS_QUERY = "LLM generated a destructive Cypher query (found '{keyword}'). Query rejected: {query}" +LLM_UNBOUNDED_PATH = ( + "LLM generated an unbounded variable-length path pattern " + "(e.g. [:TYPE*] or [:TYPE*N..]) which causes memory exhaustion on cyclic graphs. " + "Add an upper bound such as [:TYPE*1..6]. Query rejected: {query}" +) +LLM_DISALLOWED_PROCEDURE = ( + "LLM generated a CALL to procedure '{name}' which is outside the read-only " + "MAGE allowlist. Query rejected: {query}" +) LLM_GENERATION_FAILED = "Cypher generation failed: {error}" LLM_INIT_ORCHESTRATOR = "Failed to initialize RAG Orchestrator: {error}" # (H) Graph service errors BATCH_SIZE = "batch_size must be a positive integer" CONN = "Not connected to Memgraph." +AUTH_INCOMPLETE = ( + "Both username and password are required for authentication. " + "Either provide both or neither." +) # (H) Access control errors (used with raise) ACCESS_DENIED = "Access denied: Cannot access files outside the project root." -DOC_UNSUPPORTED_PROVIDER = "DocumentAnalyzer does not support the 'local' LLM provider." # (H) Exception classes diff --git a/codebase_rag/graph_loader.py b/codebase_rag/graph_loader.py index b69635755..6a210c6d5 100644 --- a/codebase_rag/graph_loader.py +++ b/codebase_rag/graph_loader.py @@ -13,6 +13,18 @@ class GraphLoader: + __slots__ = ( + "file_path", + "_data", + "_nodes", + "_relationships", + "_nodes_by_id", + "_nodes_by_label", + "_outgoing_rels", + "_incoming_rels", + "_property_indexes", + ) + def __init__(self, file_path: str): self.file_path = Path(file_path) self._data: GraphData | None = None diff --git a/codebase_rag/graph_updater.py b/codebase_rag/graph_updater.py index 2620d2bcb..08b44ff9d 100644 --- a/codebase_rag/graph_updater.py +++ b/codebase_rag/graph_updater.py @@ -1,16 +1,27 @@ +import hashlib +import json +import os import sys from collections import OrderedDict, defaultdict from collections.abc import Callable, ItemsView, KeysView from pathlib import Path from loguru import logger -from tree_sitter import Node, Parser +from rich.progress import Progress, SpinnerColumn, TextColumn +from tree_sitter import Node, Parser, QueryCursor from . import constants as cs from . import logs as ls from .config import settings from .language_spec import LANGUAGE_FQN_SPECS, get_language_spec +from .parser_loader import COMBINED_FUNC_CLASS_IMPORT_QUERIES +from .parsers.cpp_frontend import ( + cpp_frontend_available, + find_compile_commands, + run_cpp_frontend, +) from .parsers.factory import ProcessorFactory +from .parsers.utils import sorted_captures from .services import IngestorProtocol, QueryProtocol from .types_defs import ( EmbeddingQueryResult, @@ -24,19 +35,90 @@ ) from .utils.dependencies import has_semantic_dependencies from .utils.fqn_resolver import find_function_source_by_fqn -from .utils.path_utils import should_skip_path +from .utils.path_utils import ( + cached_relative_path, + should_skip_path, + should_skip_rel_file, +) from .utils.source_extraction import extract_source_with_fallback +type FileHashCache = dict[str, str] +type DirMtimesCache = dict[str, float] + class FunctionRegistryTrie: + __slots__ = ( + "root", + "_entries", + "_simple_name_lookup", + "_ending_with_cache", + "_duplicates", + "_properties", + "_property_names", + "_abstracts", + "_callable_params", + ) + def __init__(self, simple_name_lookup: SimpleNameLookup | None = None) -> None: self.root: TrieNode = {} self._entries: FunctionRegistry = {} self._simple_name_lookup = simple_name_lookup + self._ending_with_cache: dict[str, list[QualifiedName]] = {} + self._duplicates: dict[QualifiedName, list[QualifiedName]] = {} + self._properties: set[QualifiedName] = set() + self._property_names: set[str] = set() + self._abstracts: set[QualifiedName] = set() + self._callable_params: dict[QualifiedName, dict[str, int]] = {} + + def mark_callable_params( + self, qualified_name: QualifiedName, params: dict[str, int] + ) -> None: + if params: + self._callable_params[qualified_name] = params + + def callable_params(self, qualified_name: QualifiedName) -> dict[str, int] | None: + return self._callable_params.get(qualified_name) + + def mark_property(self, qualified_name: QualifiedName) -> None: + self._properties.add(qualified_name) + self._property_names.add(qualified_name.rsplit(cs.SEPARATOR_DOT, 1)[-1]) + + def is_property(self, qualified_name: QualifiedName) -> bool: + return qualified_name in self._properties + + def property_names(self) -> set[str]: + return self._property_names + + def mark_abstract(self, qualified_name: QualifiedName) -> None: + self._abstracts.add(qualified_name) + + def is_abstract(self, qualified_name: QualifiedName) -> bool: + return qualified_name in self._abstracts + + def register_unique_qn( + self, natural_qn: QualifiedName, start_line: int + ) -> QualifiedName: + if natural_qn not in self._entries: + return natural_qn + variant = f"{natural_qn}{cs.DUP_QN_MARKER}{start_line}" + bucket = self._duplicates.setdefault(natural_qn, [natural_qn]) + if variant not in bucket: + bucket.append(variant) + return variant + + def variants(self, qualified_name: QualifiedName) -> list[QualifiedName]: + return self._duplicates.get(qualified_name, [qualified_name]) def insert(self, qualified_name: QualifiedName, func_type: NodeType) -> None: + qualified_name = sys.intern(qualified_name) self._entries[qualified_name] = func_type + simple_name = qualified_name.rsplit(cs.SEPARATOR_DOT, 1)[-1] + if self._simple_name_lookup is not None: + self._simple_name_lookup[simple_name].add(qualified_name) + if self._ending_with_cache: + self._ending_with_cache.pop(simple_name, None) + parts = qualified_name.split(cs.SEPARATOR_DOT) current: TrieNode = self.root @@ -69,6 +151,30 @@ def __delitem__(self, qualified_name: QualifiedName) -> None: return del self._entries[qualified_name] + self._duplicates.pop(qualified_name, None) + for natural, bucket in list(self._duplicates.items()): + if qualified_name in bucket: + bucket.remove(qualified_name) + if len(bucket) <= 1: + self._duplicates.pop(natural, None) + simple_name = qualified_name.rsplit(cs.SEPARATOR_DOT, 1)[-1] + + if qualified_name in self._properties: + self._properties.discard(qualified_name) + if not any( + p.rsplit(cs.SEPARATOR_DOT, 1)[-1] == simple_name + for p in self._properties + ): + self._property_names.discard(simple_name) + self._abstracts.discard(qualified_name) + self._callable_params.pop(qualified_name, None) + + if self._ending_with_cache: + self._ending_with_cache.pop(simple_name, None) + + if self._simple_name_lookup is not None: + if simple_name in self._simple_name_lookup: + self._simple_name_lookup[simple_name].discard(qualified_name) parts = qualified_name.split(cs.SEPARATOR_DOT) self._cleanup_trie_path(parts, self.root) @@ -148,11 +254,20 @@ def find_with_prefix_and_suffix( return [qn for qn, _ in matches] def find_ending_with(self, suffix: str) -> list[QualifiedName]: - if self._simple_name_lookup is not None and suffix in self._simple_name_lookup: - # (H) O(1) lookup using the simple_name_lookup index - return list(self._simple_name_lookup[suffix]) - # (H) Fallback to linear scan if no index available - return [qn for qn in self._entries.keys() if qn.endswith(f".{suffix}")] + cached = self._ending_with_cache.get(suffix) + if cached is not None: + return cached + if self._simple_name_lookup is not None: + if suffix in self._simple_name_lookup: + result = sorted(self._simple_name_lookup[suffix]) + else: + result = [] + else: + result = sorted( + qn for qn in self._entries.keys() if qn.endswith(f".{suffix}") + ) + self._ending_with_cache[suffix] = result + return result def find_with_prefix(self, prefix: str) -> list[tuple[QualifiedName, NodeType]]: node = self._navigate_to_prefix(prefix) @@ -160,6 +275,8 @@ def find_with_prefix(self, prefix: str) -> list[tuple[QualifiedName, NodeType]]: class BoundedASTCache: + __slots__ = ("cache", "max_entries", "max_memory_bytes") + def __init__( self, max_entries: int | None = None, @@ -220,6 +337,78 @@ def _should_evict_for_memory(self) -> bool: ) +def _hash_file(filepath: Path) -> str: + data = filepath.read_bytes() + return hashlib.md5(data, usedforsecurity=False).hexdigest() + + +def _hash_file_with_bytes(filepath: Path) -> tuple[str, bytes] | None: + try: + with open(filepath, "rb") as f: + data = f.read() + except OSError as e: + logger.warning(ls.FILE_UNREADABLE, path=filepath, error=e) + return None + return hashlib.md5(data, usedforsecurity=False).hexdigest(), data + + +def _load_hash_cache(cache_path: Path) -> FileHashCache: + if not cache_path.is_file(): + return {} + try: + with cache_path.open(encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + logger.info(ls.HASH_CACHE_LOADED, count=len(data), path=cache_path) + return data + except (json.JSONDecodeError, OSError) as e: + logger.warning(ls.HASH_CACHE_LOAD_FAILED, path=cache_path, error=e) + return {} + + +def _save_hash_cache(cache_path: Path, hashes: FileHashCache) -> None: + try: + cache_path.parent.mkdir(parents=True, exist_ok=True) + with cache_path.open("w", encoding="utf-8") as f: + json.dump(hashes, f, indent=2) + logger.info(ls.HASH_CACHE_SAVED, count=len(hashes), path=cache_path) + except OSError as e: + logger.warning(ls.HASH_CACHE_SAVE_FAILED, path=cache_path, error=e) + + +def _load_dir_mtimes(cache_path: Path) -> DirMtimesCache: + if not cache_path.is_file(): + return {} + try: + with cache_path.open(encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return {k: float(v) for k, v in data.items() if isinstance(v, int | float)} + except (json.JSONDecodeError, OSError, ValueError): + pass + return {} + + +def _save_dir_mtimes(cache_path: Path, mtimes: DirMtimesCache) -> None: + try: + cache_path.parent.mkdir(parents=True, exist_ok=True) + with cache_path.open("w", encoding="utf-8") as f: + json.dump(mtimes, f) + except OSError: + pass + + +def _touch_empty_json(cache_path: Path) -> None: + if cache_path.exists(): + return + try: + cache_path.parent.mkdir(parents=True, exist_ok=True) + with cache_path.open("w", encoding="utf-8") as f: + f.write(cs.JSON_EMPTY_OBJECT) + except OSError: + pass + + class GraphUpdater: def __init__( self, @@ -229,12 +418,20 @@ def __init__( queries: dict[cs.SupportedLanguage, LanguageQueries], unignore_paths: frozenset[str] | None = None, exclude_paths: frozenset[str] | None = None, + project_name: str | None = None, ): self.ingestor = ingestor + self._single_file: Path | None = None + if repo_path.is_file(): + resolved = repo_path.resolve() + self._single_file = resolved + repo_path = resolved.parent self.repo_path = repo_path self.parsers = parsers self.queries = queries - self.project_name = repo_path.resolve().name + self.project_name = ( + project_name and project_name.strip() + ) or repo_path.resolve().name self.simple_name_lookup: SimpleNameLookup = defaultdict(set) self.function_registry = FunctionRegistryTrie( simple_name_lookup=self.simple_name_lookup @@ -242,6 +439,9 @@ def __init__( self.ast_cache = BoundedASTCache() self.unignore_paths = unignore_paths self.exclude_paths = exclude_paths + self.skipped_because_in_sync = False + self._collected_dir_mtimes: DirMtimesCache = {} + self._cpp_frontend_covered: frozenset[str] = frozenset() self.factory = ProcessorFactory( ingestor=self.ingestor, @@ -255,25 +455,93 @@ def __init__( exclude_paths=self.exclude_paths, ) + def _run_cpp_frontend(self) -> None: + # (H) Optional libclang C++ pre-pass: when CPP_FRONTEND=libclang and a + # (H) compile_commands.json is discoverable, emit macro-accurate C/C++ + # (H) nodes/edges directly (tree-sitter cannot expand macros). Covered + # (H) files are then skipped by the tree-sitter definition pass. Missing + # (H) either condition falls back to tree-sitter with no change. + self._cpp_frontend_covered = frozenset() + if settings.CPP_FRONTEND != cs.CppFrontend.LIBCLANG: + return + if not cpp_frontend_available(): + logger.warning(ls.CPP_FRONTEND_UNAVAILABLE) + return + compdb_dir = find_compile_commands(self.repo_path) + if compdb_dir is None: + logger.warning(ls.CPP_FRONTEND_NO_COMPDB) + return + logger.info(ls.CPP_FRONTEND_RUNNING.format(path=compdb_dir)) + self._cpp_frontend_covered = run_cpp_frontend( + self.ingestor, + self.repo_path, + self.project_name, + compdb_dir, + function_registry=self.function_registry, + simple_name_lookup=self.simple_name_lookup, + structural_elements=self.factory.structure_processor.structural_elements, + ) + logger.info( + ls.CPP_FRONTEND_COVERED.format(count=len(self._cpp_frontend_covered)) + ) + def _is_dependency_file(self, file_name: str, filepath: Path) -> bool: return ( file_name.lower() in cs.DEPENDENCY_FILES or filepath.suffix.lower() == cs.CSPROJ_SUFFIX ) - def run(self) -> None: + def run(self, force: bool = False) -> None: + py_engine = self.factory.type_inference._python_type_inference + if py_engine is not None: + py_engine._available_classes_cache.clear() + py_engine._return_stmt_cache.clear() + py_engine._method_return_type_cache.clear() + py_engine._self_assignment_cache.clear() self.ingestor.ensure_node_batch( cs.NODE_PROJECT, {cs.KEY_NAME: self.project_name} ) - logger.info(ls.ENSURING_PROJECT.format(name=self.project_name)) + logger.info(ls.ENSURING_PROJECT, name=self.project_name) + + if not force and self._is_already_in_sync(): + logger.info(ls.GRAPH_ALREADY_IN_SYNC) + self.skipped_because_in_sync = True + self.ingestor.flush_all() + return logger.info(ls.PASS_1_STRUCTURE) self.factory.structure_processor.identify_structure() + self._run_cpp_frontend() + logger.info(ls.PASS_2_FILES) - self._process_files() + self._process_files(force=force) - logger.info(ls.FOUND_FUNCTIONS.format(count=len(self.function_registry))) + corrected = self.factory.definition_processor.resolve_deferred_cpp_methods() + if corrected: + logger.info("Resolved {} deferred C++ out-of-class methods", corrected) + + go_methods = self.factory.definition_processor.resolve_deferred_go_methods() + if go_methods: + logger.info("Resolved {} Go receiver methods", go_methods) + + if not force: + self._rehydrate_registry_from_graph() + + # (H) After rehydration so the "does a real definition exist?" check sees + # (H) definitions in files an incremental run did not re-parse; otherwise a + # (H) forward declaration whose definition lives in an unchanged file would be + # (H) kept as a phantom and re-fragment the class. + kept_forwards = ( + self.factory.definition_processor.resolve_deferred_forward_declarations() + ) + if kept_forwards: + logger.info( + "Registered {} forward-declared C/C++ types with no definition", + kept_forwards, + ) + + logger.info(ls.FOUND_FUNCTIONS, count=len(self.function_registry)) logger.info(ls.PASS_3_CALLS) self._process_function_calls() @@ -282,16 +550,93 @@ def run(self) -> None: logger.info(ls.ANALYSIS_COMPLETE) self.ingestor.flush_all() + self._prune_orphan_nodes() + self._generate_semantic_embeddings() + def _rehydrate_registry_from_graph(self) -> None: + # (H) Incremental runs populate the function registry only from re-parsed + # (H) files. Read every definition's qualified name back from the graph and + # (H) re-register the ones missing locally, so calls and instantiations + # (H) into files that were not re-parsed still resolve and their edges are + # (H) re-emitted. Without this, editing one file drops cross-file CALLS / + # (H) INSTANTIATES into any unchanged file (issue #532, outbound half). + if not isinstance(self.ingestor, QueryProtocol): + return + added = 0 + for row in self.ingestor.fetch_all(cs.CYPHER_ALL_DEFINITION_QNS): + qn = row.get(cs.KEY_QUALIFIED_NAME) + label = row.get(cs.KEY_LABEL) + if not isinstance(qn, str) or not isinstance(label, str): + continue + if qn in self.function_registry: + continue + try: + node_type = NodeType(label) + except ValueError: + continue + self.function_registry[qn] = node_type + added += 1 + if added: + logger.info(ls.REGISTRY_REHYDRATED, count=added) + + def _capture_inbound_edges(self, reindexed_keys: list[str]) -> list[ResultRow]: + # (H) Record the reference edges that unchanged files point at the + # (H) re-indexed files, BEFORE those files' subtrees (and thus the inbound + # (H) edges) are deleted. Capturing and restoring the exact edges avoids + # (H) re-resolving the callers, whose resolution would diverge from a clean + # (H) index (cgr resolution is context-sensitive). + if not reindexed_keys or not isinstance(self.ingestor, QueryProtocol): + return [] + return self.ingestor.fetch_all( + cs.CYPHER_INBOUND_EDGES, {cs.CYPHER_PARAM_PATHS: reindexed_keys} + ) + + def _restore_inbound_edges(self, captured: list[ResultRow]) -> None: + # (H) Re-emit each captured inbound edge whose target still exists after the + # (H) re-index. A target that was renamed or removed is correctly left + # (H) without its stale inbound edge, matching a clean re-index. + if not captured: + return + module_label = cs.NodeLabel.MODULE.value + restored = 0 + for row in captured: + caller_label = row.get(cs.KEY_CALLER_LABEL) + caller_qn = row.get(cs.KEY_CALLER_QN) + rel = row.get(cs.KEY_REL) + target_label = row.get(cs.KEY_TARGET_LABEL) + target_qn = row.get(cs.KEY_TARGET_QN) + if not ( + isinstance(caller_label, str) + and isinstance(caller_qn, str) + and isinstance(rel, str) + and isinstance(target_label, str) + and isinstance(target_qn, str) + ): + continue + if target_label != module_label and target_qn not in self.function_registry: + continue + caller_key = cs.NODE_UNIQUE_CONSTRAINTS.get(caller_label) + target_key = cs.NODE_UNIQUE_CONSTRAINTS.get(target_label) + if caller_key is None or target_key is None: + continue + self.ingestor.ensure_relationship_batch( + (caller_label, caller_key, caller_qn), + rel, + (target_label, target_key, target_qn), + ) + restored += 1 + if restored: + logger.info(ls.INCREMENTAL_REBUILD_INBOUND, count=restored) + def remove_file_from_state(self, file_path: Path) -> None: - logger.debug(ls.REMOVING_STATE.format(path=file_path)) + logger.debug(ls.REMOVING_STATE, path=file_path) if file_path in self.ast_cache: del self.ast_cache[file_path] logger.debug(ls.REMOVED_FROM_CACHE) - relative_path = file_path.relative_to(self.repo_path) + relative_path = cached_relative_path(file_path, self.repo_path) path_parts = ( relative_path.parent.parts if file_path.name == cs.INIT_PY @@ -307,51 +652,492 @@ def remove_file_from_state(self, file_path: Path) -> None: del self.function_registry[qn] if qns_to_remove: - logger.debug(ls.REMOVING_QNS.format(count=len(qns_to_remove))) + logger.debug(ls.REMOVING_QNS, count=len(qns_to_remove)) for simple_name, qn_set in self.simple_name_lookup.items(): original_count = len(qn_set) new_qn_set = qn_set - qns_to_remove if len(new_qn_set) < original_count: self.simple_name_lookup[simple_name] = new_qn_set - logger.debug(ls.CLEANED_SIMPLE_NAME.format(name=simple_name)) + logger.debug(ls.CLEANED_SIMPLE_NAME, name=simple_name) + + def _delete_module_entities(self, file_key: str) -> None: + """Remove a changed/deleted file's Module subtree from the graph. + + The incremental path re-parses a changed file and re-adds its + entities, but the entities the previous parse contributed (the + Module and everything it DEFINES, plus their IMPORTS/CALLS edges via + DETACH) must be removed first; otherwise renamed-away Function/Class/ + Method nodes and their edges linger alongside the new ones. + """ + if isinstance(self.ingestor, QueryProtocol): + self.ingestor.execute_write( + cs.CYPHER_DELETE_MODULE, {cs.KEY_PATH: file_key} + ) - def _process_files(self) -> None: - for filepath in self.repo_path.rglob("*"): - if filepath.is_file() and not should_skip_path( - filepath, + def _diff_dir_against_cache( + self, + dir_path_str: str, + dir_key: str, + old_hashes: FileHashCache, + old_dir_mtimes: DirMtimesCache, + ) -> tuple[str | None, str | None]: + prefix = "" if dir_key == cs.ROOT_DIR_KEY else f"{dir_key}/" + expected_files: set[str] = set() + expected_dirs: set[str] = set() + for fk in old_hashes: + if fk.startswith(prefix): + rest = fk[len(prefix) :] + if "/" not in rest: + expected_files.add(rest) + for dk in old_dir_mtimes: + if dk == cs.ROOT_DIR_KEY or not dk.startswith(prefix): + continue + rest = dk[len(prefix) :] + if "/" not in rest: + expected_dirs.add(rest) + + actual_files: set[str] = set() + actual_dirs: set[str] = set() + try: + with os.scandir(dir_path_str) as it: + for entry in it: + name = entry.name + if name in (cs.HASH_CACHE_FILENAME, cs.DIR_MTIMES_FILENAME): + continue + try: + is_symlink = entry.is_symlink() + except OSError: + is_symlink = False + try: + is_dir_following = entry.is_dir() + except OSError: + is_dir_following = False + if is_symlink and is_dir_following: + continue + if is_dir_following: + actual_dirs.add(name) + else: + actual_files.add(name) + except OSError: + return None, dir_key + + dir_parts: tuple[str, ...] = ( + () if dir_key == cs.ROOT_DIR_KEY else tuple(dir_key.split("/")) + ) + dir_prefix_for_keep = "" if dir_key == cs.ROOT_DIR_KEY else f"{dir_key}/" + + for name in actual_dirs - expected_dirs: + if not self._should_keep_dir(name, dir_prefix_for_keep): + continue + return f"{prefix}{name}", None + for name in actual_files - expected_files: + dot = name.rfind(".") + suffix = name[dot:] if dot != -1 else "" + if should_skip_rel_file( + f"{prefix}{name}", + dir_parts, + suffix, + exclude_paths=self.exclude_paths, + unignore_paths=self.unignore_paths, + ): + continue + return f"{prefix}{name}", None + + for name in expected_files - actual_files: + return None, f"{prefix}{name}" + for name in expected_dirs - actual_dirs: + return None, f"{prefix}{name}" + + return None, None + + def _should_keep_dir(self, dirname: str, dir_prefix: str) -> bool: + if dirname not in cs.IGNORE_PATTERNS and ( + not self.exclude_paths or dirname not in self.exclude_paths + ): + return True + return bool( + self.unignore_paths + and any( + u.startswith(f"{dir_prefix}{dirname}/") or u == f"{dir_prefix}{dirname}" + for u in self.unignore_paths + ) + ) + + def _is_already_in_sync(self) -> bool: + if self._single_file is not None: + return False + cache_path = self.repo_path / cs.HASH_CACHE_FILENAME + if not cache_path.is_file(): + return False + cache_mtime = cache_path.stat().st_mtime + dir_mtimes_path = self.repo_path / cs.DIR_MTIMES_FILENAME + old_hashes = _load_hash_cache(cache_path) + old_dir_mtimes = _load_dir_mtimes(dir_mtimes_path) + if not old_hashes or not old_dir_mtimes: + return False + + repo_str = str(self.repo_path) + for dir_key, cached_mtime in old_dir_mtimes.items(): + dir_path_str = ( + repo_str if dir_key == cs.ROOT_DIR_KEY else f"{repo_str}/{dir_key}" + ) + try: + current_mtime = os.stat(dir_path_str).st_mtime + except OSError: + return False + if current_mtime != cached_mtime: + addition, removal = self._diff_dir_against_cache( + dir_path_str, dir_key, old_hashes, old_dir_mtimes + ) + if addition is not None or removal is not None: + return False + + for file_key, old_hash in old_hashes.items(): + file_path_str = f"{repo_str}/{file_key}" + try: + stat = os.stat(file_path_str) + except OSError: + return False + if stat.st_mtime <= cache_mtime: + continue + if _hash_file(Path(file_path_str)) != old_hash: + return False + return True + + def _collect_eligible_files(self) -> list[tuple[Path, str]]: + if self._single_file is not None: + if not should_skip_path( + self._single_file, self.repo_path, exclude_paths=self.exclude_paths, unignore_paths=self.unignore_paths, ): - lang_config = get_language_spec(filepath.suffix) - if ( - lang_config - and isinstance(lang_config.language, cs.SupportedLanguage) - and lang_config.language in self.parsers + file_key = cached_relative_path( + self._single_file, self.repo_path + ).as_posix() + return [(self._single_file, file_key)] + return [] + + eligible: list[tuple[Path, str]] = [] + hash_name = cs.HASH_CACHE_FILENAME + dir_mtimes_name = cs.DIR_MTIMES_FILENAME + repo_str = str(self.repo_path) + repo_prefix_len = len(repo_str) + 1 + exclude_paths = self.exclude_paths + unignore_paths = self.unignore_paths + self._collected_dir_mtimes = {} + for dirpath, dirnames, filenames in os.walk(repo_str): + if len(dirpath) < repo_prefix_len: + rel_dir = "" + dir_parts: tuple[str, ...] = () + dir_key = cs.ROOT_DIR_KEY + else: + rel_dir = dirpath[repo_prefix_len:].replace(os.sep, "/") + dir_parts = tuple(rel_dir.split("/")) if rel_dir else () + dir_key = rel_dir or cs.ROOT_DIR_KEY + dir_prefix = f"{rel_dir}/" if rel_dir else "" + try: + self._collected_dir_mtimes[dir_key] = os.stat(dirpath).st_mtime + except OSError: + pass + dirnames[:] = sorted( + d for d in dirnames if self._should_keep_dir(d, dir_prefix) + ) + for fname in sorted(filenames): + if fname in (hash_name, dir_mtimes_name): + continue + dot = fname.rfind(".") + suffix = fname[dot:] if dot != -1 else "" + rel_path_str = f"{dir_prefix}{fname}" + if not should_skip_rel_file( + rel_path_str, + dir_parts, + suffix, + exclude_paths=exclude_paths, + unignore_paths=unignore_paths, ): - result = self.factory.definition_processor.process_file( - filepath, - lang_config.language, - self.queries, - self.factory.structure_processor.structural_elements, + eligible.append((Path(f"{dirpath}/{fname}"), rel_path_str)) + return eligible + + def _process_files(self, force: bool = False) -> None: + cache_path = self.repo_path / cs.HASH_CACHE_FILENAME + dir_mtimes_path = self.repo_path / cs.DIR_MTIMES_FILENAME + old_hashes = _load_hash_cache(cache_path) if not force else {} + cache_mtime = cache_path.stat().st_mtime if cache_path.is_file() else 0.0 + if force: + logger.info(ls.INCREMENTAL_FORCE) + + _touch_empty_json(cache_path) + _touch_empty_json(dir_mtimes_path) + + eligible_files = self._collect_eligible_files() + new_hashes: FileHashCache = {} + skipped_count = 0 + changed_count = 0 + unreadable_count = 0 + + current_file_keys: set[str] = set() + + processed_since_flush = 0 + + changed_entries: list[tuple[Path, str, bool, bytes]] = [] + for filepath, file_key in eligible_files: + if not force and file_key in old_hashes: + try: + file_mtime = filepath.stat().st_mtime + except OSError: + unreadable_count += 1 + continue + if file_mtime <= cache_mtime: + new_hashes[file_key] = old_hashes[file_key] + current_file_keys.add(file_key) + skipped_count += 1 + continue + + hashed = _hash_file_with_bytes(filepath) + if hashed is None: + unreadable_count += 1 + continue + current_hash, file_bytes = hashed + + current_file_keys.add(file_key) + new_hashes[file_key] = current_hash + + if ( + not force + and file_key in old_hashes + and old_hashes[file_key] == current_hash + ): + logger.debug(ls.FILE_HASH_UNCHANGED, path=file_key) + skipped_count += 1 + continue + + is_new = file_key not in old_hashes + if not is_new: + logger.debug(ls.FILE_HASH_CHANGED, path=file_key) + else: + logger.debug(ls.FILE_HASH_NEW, path=file_key) + changed_entries.append((filepath, file_key, is_new, file_bytes)) + + # (H) Before deleting any changed file's subtree (which removes the inbound + # (H) CALLS/IMPORTS/INSTANTIATES edges incident on it), capture those edges + # (H) so they can be restored verbatim afterwards (issue #532, inbound + # (H) half). New files have no prior inbound edges, so only re-indexed + # (H) (changed, non-new) files matter. + reindexed_keys = sorted( + file_key for _fp, file_key, is_new, _b in changed_entries if not is_new + ) + captured_inbound = self._capture_inbound_edges(reindexed_keys) + + pre_parsed = self._pre_parse_changed_files(changed_entries) + + with Progress( + SpinnerColumn(), + TextColumn(ls.PROGRESS_INDEXING_LABEL), + TextColumn("[progress.description]{task.description}"), + transient=True, + disable=not sys.stderr.isatty(), + ) as progress: + task = progress.add_task("", total=len(eligible_files)) + if skipped_count or unreadable_count: + progress.advance(task, skipped_count + unreadable_count) + + for filepath, file_key, is_new, file_bytes in changed_entries: + if not is_new: + self.remove_file_from_state(filepath) + self._delete_module_entities(file_key) + + changed_count += 1 + self._process_single_file( + filepath, + file_bytes=file_bytes, + pre_parsed=pre_parsed.get(filepath), + ) + + processed_since_flush += 1 + if processed_since_flush >= settings.FILE_FLUSH_INTERVAL: + logger.info(ls.PERIODIC_FLUSH.format(count=processed_since_flush)) + self.ingestor.flush_all() + processed_since_flush = 0 + + progress.update( + task, + advance=1, + description=ls.PROGRESS_FILES_PROCESSED.format(count=changed_count), + ) + + deleted_keys = set(old_hashes.keys()) - current_file_keys + if deleted_keys: + logger.info(ls.INCREMENTAL_DELETED, count=len(deleted_keys)) + for deleted_key in deleted_keys: + deleted_path = self.repo_path / deleted_key + self.remove_file_from_state(deleted_path) + self._delete_module_entities(deleted_key) + if isinstance(self.ingestor, QueryProtocol): + self.ingestor.execute_write( + cs.CYPHER_DELETE_FILE, {cs.KEY_PATH: deleted_key} ) - if result: - root_node, language = result - self.ast_cache[filepath] = (root_node, language) - elif self._is_dependency_file(filepath.name, filepath): - self.factory.definition_processor.process_dependencies(filepath) + self._restore_inbound_edges(captured_inbound) + + if skipped_count > 0: + logger.info(ls.INCREMENTAL_SKIPPED, count=skipped_count) + if changed_count > 0: + logger.info(ls.INCREMENTAL_CHANGED, count=changed_count) + if unreadable_count > 0: + logger.info(ls.INCREMENTAL_UNREADABLE, count=unreadable_count) + + _save_hash_cache(cache_path, new_hashes) + _save_dir_mtimes(dir_mtimes_path, self._collected_dir_mtimes) + + def _pre_parse_changed_files( + self, + changed_entries: list[tuple[Path, str, bool, bytes]], + ) -> dict[Path, tuple[Node, dict[str, list] | None]]: + result: dict[Path, tuple[Node, dict[str, list] | None]] = {} + for filepath, _file_key, _is_new, file_bytes in changed_entries: + lang_config = get_language_spec(filepath.suffix) + if not ( + lang_config + and isinstance(lang_config.language, cs.SupportedLanguage) + and lang_config.language in self.parsers + ): + continue + language = lang_config.language + parser = self.queries[language].get(cs.KEY_PARSER) + if not parser: + continue + tree = parser.parse(file_bytes) + root_node = tree.root_node + combined_query = COMBINED_FUNC_CLASS_IMPORT_QUERIES.get(language) + combined_captures: dict[str, list] | None = None + if combined_query: + cursor = QueryCursor(combined_query) + combined_captures = sorted_captures(cursor, root_node) + result[filepath] = (root_node, combined_captures) + return result + + def _process_single_file( + self, + filepath: Path, + file_bytes: bytes | None = None, + pre_parsed: tuple[Node, dict[str, list] | None] | None = None, + ) -> None: + if self._cpp_frontend_covered: + rel = cached_relative_path(filepath, self.repo_path).as_posix() + if rel in self._cpp_frontend_covered: + # (H) The libclang frontend already emitted this file's + # (H) definitions; keep only the generic File node. self.factory.structure_processor.process_generic_file( filepath, filepath.name ) + return + + lang_config = get_language_spec(filepath.suffix) + if ( + lang_config + and isinstance(lang_config.language, cs.SupportedLanguage) + and lang_config.language in self.parsers + ): + result = self.factory.definition_processor.process_file( + filepath, + lang_config.language, + self.queries, + self.factory.structure_processor.structural_elements, + source_bytes=file_bytes, + pre_parsed=pre_parsed, + ) + if result: + root_node, language = result + self.ast_cache[filepath] = (root_node, language) + elif self._is_dependency_file(filepath.name, filepath): + self.factory.definition_processor.process_dependencies(filepath) + + self.factory.structure_processor.process_generic_file(filepath, filepath.name) def _process_function_calls(self) -> None: + captures_cache = self.factory._func_class_captures_cache ast_cache_items = list(self.ast_cache.items()) for file_path, (root_node, language) in ast_cache_items: + self.factory.call_processor.collect_callable_field_bindings( + file_path, + root_node, + language, + self.queries, + func_class_captures_cache=captures_cache, + ) + for file_path, (root_node, language) in ast_cache_items: + if captures_cache is not None and file_path in captures_cache: + cached = captures_cache[file_path] + if not cached.get(cs.CAPTURE_CALL) and not cached.get( + cs.CAPTURE_FUNCTION + ): + continue self.factory.call_processor.process_calls_in_file( - file_path, root_node, language, self.queries + file_path, + root_node, + language, + self.queries, + func_class_captures_cache=captures_cache, ) + self.factory.call_processor.finalize_callable_param_flow() + + def _prune_orphan_nodes(self) -> None: + """Remove graph nodes whose files/folders no longer exist on disk.""" + if not isinstance(self.ingestor, QueryProtocol): + return + + logger.info(ls.PRUNE_START) + total_pruned = 0 + + project_prefix = self.project_name + "." + repo_abs = self.repo_path.resolve().as_posix() + prune_specs: list[tuple[str, str, str]] = [ + (cs.CYPHER_ALL_FILE_PATHS, cs.CYPHER_DELETE_FILE, "File"), + ( + cs.CYPHER_ALL_MODULE_PATHS_INTERNAL, + cs.CYPHER_DELETE_MODULE, + "Module", + ), + (cs.CYPHER_ALL_FOLDER_PATHS, cs.CYPHER_DELETE_FOLDER, "Folder"), + ] + + for query_all, delete_query, label in prune_specs: + rows = self.ingestor.fetch_all(query_all) + orphans = [] + for r in rows: + path = r.get("path") + if not isinstance(path, str) or not path: + continue + if path.startswith(cs.INLINE_MODULE_PATH_PREFIX): + continue + abs_path = r.get("absolute_path") + qn = r.get("qualified_name", "") + if isinstance(abs_path, str) and not abs_path.startswith(repo_abs): + continue + if isinstance(qn, str) and qn and not qn.startswith(project_prefix): + continue + if not (self.repo_path / path).exists(): + orphans.append(path) + + if orphans: + logger.info(ls.PRUNE_FOUND, count=len(orphans), label=label) + for orphan_path in orphans: + logger.debug(ls.PRUNE_DELETING, label=label, path=orphan_path) + self.ingestor.execute_write( + delete_query, {cs.KEY_PATH: orphan_path} + ) + total_pruned += len(orphans) + + # (H) Drop external import-target modules that no module imports anymore, + # (H) e.g. an imported name renamed/removed on an incremental rebuild. + self.ingestor.execute_write(cs.CYPHER_DELETE_ORPHAN_EXTERNAL_MODULES) + + if total_pruned: + logger.info(ls.PRUNE_COMPLETE, count=total_pruned) + else: + logger.info(ls.PRUNE_SKIP) def _generate_semantic_embeddings(self) -> None: if not has_semantic_dependencies(): @@ -363,22 +1149,55 @@ def _generate_semantic_embeddings(self) -> None: return try: - from .embedder import embed_code - from .vector_store import store_embedding + from .embedder import embed_code_batch, get_embedding_cache + from .vector_store import ( + close_qdrant_client, + store_embedding_batch, + verify_stored_ids, + ) logger.info(ls.PASS_4_EMBEDDINGS) results = self.ingestor.fetch_all( - cs.CYPHER_QUERY_EMBEDDINGS, {"project_name": self.project_name + "."} + cs.CYPHER_QUERY_EMBEDDINGS, {"project_name": self.project_name} ) if not results: logger.info(ls.NO_FUNCTIONS_FOR_EMBEDDING) return - logger.info(ls.GENERATING_EMBEDDINGS.format(count=len(results))) + logger.info(ls.GENERATING_EMBEDDINGS, count=len(results)) embedded_count = 0 + expected_ids: set[int] = set() + pending: list[tuple[int, str, str]] = [] + flush_at = settings.QDRANT_BATCH_SIZE + + def flush() -> int: + nonlocal pending + if not pending: + return 0 + snippets = [item[2] for item in pending] + try: + embeddings = embed_code_batch(snippets) + except Exception as e: + logger.warning( + ls.EMBEDDING_BATCH_COMPUTE_FAILED, + count=len(pending), + error=e, + ) + pending = [] + return 0 + points: list[tuple[int, list[float], str]] = [ + (node_id, emb, qname) + for (node_id, qname, _), emb in zip(pending, embeddings) + ] + for node_id, _qname, _src in pending: + expected_ids.add(node_id) + stored = store_embedding_batch(points) + pending = [] + return stored + for row in results: parsed = self._parse_embedding_result(row) if parsed is None: @@ -391,33 +1210,62 @@ def _generate_semantic_embeddings(self) -> None: file_path = parsed.get(cs.KEY_PATH) if start_line is None or end_line is None or file_path is None: - logger.debug(ls.NO_SOURCE_FOR.format(name=qualified_name)) + logger.debug(ls.NO_SOURCE_FOR, name=qualified_name) + continue - elif source_code := self._extract_source_code( + if source_code := self._extract_source_code( qualified_name, file_path, start_line, end_line ): - try: - embedding = embed_code(source_code) - store_embedding(node_id, embedding, qualified_name) - embedded_count += 1 - - if embedded_count % settings.EMBEDDING_PROGRESS_INTERVAL == 0: + pending.append((node_id, qualified_name, source_code)) + if len(pending) >= flush_at: + embedded_count += flush() + if ( + embedded_count % settings.EMBEDDING_PROGRESS_INTERVAL == 0 + and embedded_count > 0 + ): logger.debug( - ls.EMBEDDING_PROGRESS.format( - done=embedded_count, total=len(results) - ) + ls.EMBEDDING_PROGRESS, + done=embedded_count, + total=len(results), ) - - except Exception as e: - logger.warning( - ls.EMBEDDING_FAILED.format(name=qualified_name, error=e) - ) else: - logger.debug(ls.NO_SOURCE_FOR.format(name=qualified_name)) - logger.info(ls.EMBEDDINGS_COMPLETE.format(count=embedded_count)) + logger.debug(ls.NO_SOURCE_FOR, name=qualified_name) + + embedded_count += flush() + + logger.info(ls.EMBEDDINGS_COMPLETE, count=embedded_count) + + self._reconcile_embeddings(expected_ids, verify_stored_ids) + + get_embedding_cache().save() + close_qdrant_client() except Exception as e: - logger.warning(ls.EMBEDDING_GENERATION_FAILED.format(error=e)) + logger.warning(ls.EMBEDDING_GENERATION_FAILED, error=e) + + def _reconcile_embeddings( + self, + expected_ids: set[int], + verify_fn: Callable[[set[int]], set[int]], + ) -> None: + if not expected_ids: + return + try: + stored_ids = verify_fn(expected_ids) + missing = expected_ids - stored_ids + if missing: + sample = sorted(missing)[:10] + logger.warning( + ls.EMBEDDING_RECONCILE_MISSING.format( + missing=len(missing), + expected=len(expected_ids), + sample_ids=sample, + ) + ) + else: + logger.info(ls.EMBEDDING_RECONCILE_OK.format(count=len(expected_ids))) + except Exception as e: + logger.warning(ls.EMBEDDING_RECONCILE_FAILED.format(error=e)) def _extract_source_code( self, qualified_name: str, file_path: str, start_line: int, end_line: int diff --git a/codebase_rag/language_spec.py b/codebase_rag/language_spec.py index cf550ab08..a48b6442b 100644 --- a/codebase_rag/language_spec.py +++ b/codebase_rag/language_spec.py @@ -82,6 +82,14 @@ def _rust_get_name(node: Node) -> str | None: name_node = node.child_by_field_name(cs.FIELD_NAME) if name_node and name_node.type == cs.TS_IDENTIFIER and name_node.text: return name_node.text.decode(cs.ENCODING_UTF8) + elif node.type == cs.TS_IMPL_ITEM: + # (H) An `impl Foo` block is an FQN scope, but it has no `name` field; its + # (H) target type is the segment that anchors its methods' qns + # (H) (owner_module.Foo.method). Without this the scope walk drops `Foo`, so + # (H) a closure/nested fn in an impl method binds to a phantom parent qn. + from .parsers.rs import utils as rs_utils + + return rs_utils.extract_impl_target(node) return _generic_get_name(node) @@ -97,6 +105,38 @@ def _rust_file_to_module(file_path: Path, repo_root: Path) -> list[str]: return [] +def _php_file_to_module(file_path: Path, repo_root: Path) -> list[str]: + try: + rel = file_path.relative_to(repo_root) + parts = list(rel.with_suffix("").parts) + if parts and parts[0] in ("src", "app", "lib"): + parts = parts[1:] + return parts + except ValueError: + return [] + + +def _c_unwrap_declarator(declarator: Node | None) -> Node | None: + while declarator and declarator.type == cs.CppNodeType.POINTER_DECLARATOR: + declarator = declarator.child_by_field_name(cs.FIELD_DECLARATOR) + return declarator + + +def _c_get_name(node: Node) -> str | None: + if node.type in cs.C_NAME_NODE_TYPES: + name_node = node.child_by_field_name(cs.FIELD_NAME) + if name_node and name_node.text: + return name_node.text.decode(cs.ENCODING_UTF8) + elif node.type == cs.TS_CPP_FUNCTION_DEFINITION: + declarator = node.child_by_field_name(cs.FIELD_DECLARATOR) + declarator = _c_unwrap_declarator(declarator) + if declarator and declarator.type == cs.TS_CPP_FUNCTION_DECLARATOR: + name_node = declarator.child_by_field_name(cs.FIELD_DECLARATOR) + if name_node and name_node.type == cs.TS_IDENTIFIER and name_node.text: + return name_node.text.decode(cs.ENCODING_UTF8) + return _generic_get_name(node) + + def _cpp_get_name(node: Node) -> str | None: if node.type in cs.CPP_NAME_NODE_TYPES: name_node = node.child_by_field_name(cs.FIELD_NAME) @@ -154,6 +194,13 @@ def _cpp_get_name(node: Node) -> str | None: file_to_module_parts=_generic_file_to_module, ) +C_FQN_SPEC = FQNSpec( + scope_node_types=frozenset(cs.FQN_C_SCOPE_TYPES), + function_node_types=frozenset(cs.FQN_C_FUNCTION_TYPES), + get_name=_c_get_name, + file_to_module_parts=_generic_file_to_module, +) + LUA_FQN_SPEC = FQNSpec( scope_node_types=frozenset(cs.FQN_LUA_SCOPE_TYPES), function_node_types=frozenset(cs.FQN_LUA_FUNCTION_TYPES), @@ -175,18 +222,11 @@ def _cpp_get_name(node: Node) -> str | None: file_to_module_parts=_generic_file_to_module, ) -CSHARP_FQN_SPEC = FQNSpec( - scope_node_types=frozenset(cs.FQN_CS_SCOPE_TYPES), - function_node_types=frozenset(cs.FQN_CS_FUNCTION_TYPES), - get_name=_generic_get_name, - file_to_module_parts=_generic_file_to_module, -) - PHP_FQN_SPEC = FQNSpec( scope_node_types=frozenset(cs.FQN_PHP_SCOPE_TYPES), function_node_types=frozenset(cs.FQN_PHP_FUNCTION_TYPES), get_name=_generic_get_name, - file_to_module_parts=_generic_file_to_module, + file_to_module_parts=_php_file_to_module, ) LANGUAGE_FQN_SPECS: dict[cs.SupportedLanguage, FQNSpec] = { @@ -195,11 +235,11 @@ def _cpp_get_name(node: Node) -> str | None: cs.SupportedLanguage.TS: TS_FQN_SPEC, cs.SupportedLanguage.RUST: RUST_FQN_SPEC, cs.SupportedLanguage.JAVA: JAVA_FQN_SPEC, + cs.SupportedLanguage.C: C_FQN_SPEC, cs.SupportedLanguage.CPP: CPP_FQN_SPEC, cs.SupportedLanguage.LUA: LUA_FQN_SPEC, cs.SupportedLanguage.GO: GO_FQN_SPEC, cs.SupportedLanguage.SCALA: SCALA_FQN_SPEC, - cs.SupportedLanguage.CSHARP: CSHARP_FQN_SPEC, cs.SupportedLanguage.PHP: PHP_FQN_SPEC, } @@ -285,8 +325,14 @@ def _cpp_get_name(node: Node) -> str | None: function: (scoped_identifier "::" name: (identifier) @name)) @call + (call_expression + function: (generic_function) @name) @call (macro_invocation macro: (identifier) @name) @call + (token_tree + (identifier) @name @call + . + (token_tree . "(")) """, ), cs.SupportedLanguage.GO: LanguageSpec( @@ -343,6 +389,28 @@ def _cpp_get_name(node: Node) -> str | None: type: (type_identifier) @name) @call """, ), + cs.SupportedLanguage.C: LanguageSpec( + language=cs.SupportedLanguage.C, + file_extensions=cs.C_EXTENSIONS, + function_node_types=cs.SPEC_C_FUNCTION_TYPES, + class_node_types=cs.SPEC_C_CLASS_TYPES, + module_node_types=cs.SPEC_C_MODULE_TYPES, + call_node_types=cs.SPEC_C_CALL_TYPES, + import_node_types=cs.IMPORT_NODES_INCLUDE, + import_from_node_types=cs.IMPORT_NODES_INCLUDE, + package_indicators=cs.SPEC_C_PACKAGE_INDICATORS, + function_query=""" + (function_definition) @function + """, + class_query=""" + (struct_specifier) @class + (union_specifier) @class + (enum_specifier) @class + """, + call_query=""" + (call_expression) @call + """, + ), cs.SupportedLanguage.CPP: LanguageSpec( language=cs.SupportedLanguage.CPP, file_extensions=cs.CPP_EXTENSIONS, @@ -381,16 +449,6 @@ def _cpp_get_name(node: Node) -> str | None: (delete_expression) @call """, ), - cs.SupportedLanguage.CSHARP: LanguageSpec( - language=cs.SupportedLanguage.CSHARP, - file_extensions=cs.CS_EXTENSIONS, - function_node_types=cs.SPEC_CS_FUNCTION_TYPES, - class_node_types=cs.SPEC_CS_CLASS_TYPES, - module_node_types=cs.SPEC_CS_MODULE_TYPES, - call_node_types=cs.SPEC_CS_CALL_TYPES, - import_node_types=cs.IMPORT_NODES_USING, - import_from_node_types=cs.IMPORT_NODES_USING, - ), cs.SupportedLanguage.PHP: LanguageSpec( language=cs.SupportedLanguage.PHP, file_extensions=cs.PHP_EXTENSIONS, @@ -398,6 +456,42 @@ def _cpp_get_name(node: Node) -> str | None: class_node_types=cs.SPEC_PHP_CLASS_TYPES, module_node_types=cs.SPEC_PHP_MODULE_TYPES, call_node_types=cs.SPEC_PHP_CALL_TYPES, + import_node_types=cs.SPEC_PHP_IMPORT_TYPES, + import_from_node_types=cs.SPEC_PHP_IMPORT_FROM_TYPES, + function_query=""" + (function_definition + name: (name) @name) @function + (method_declaration + name: (name) @name) @function + (anonymous_function) @function + (arrow_function) @function + """, + class_query=""" + (class_declaration + name: (name) @name) @class + (interface_declaration + name: (name) @name) @class + (trait_declaration + name: (name) @name) @class + (enum_declaration + name: (name) @name) @class + """, + call_query=""" + (function_call_expression + function: (name) @name) @call + (function_call_expression + function: (qualified_name) @name) @call + (member_call_expression + name: (name) @name) @call + (scoped_call_expression + name: (name) @name) @call + (nullsafe_member_call_expression + name: (name) @name) @call + (object_creation_expression + (name) @name) @call + (object_creation_expression + (qualified_name) @name) @call + """, ), cs.SupportedLanguage.LUA: LanguageSpec( language=cs.SupportedLanguage.LUA, diff --git a/codebase_rag/logs.py b/codebase_rag/logs.py index 3e075c877..b32e73548 100644 --- a/codebase_rag/logs.py +++ b/codebase_rag/logs.py @@ -13,9 +13,22 @@ ) PASS_3_CALLS = "--- Pass 3: Processing Function Calls from AST Cache ---" PASS_4_EMBEDDINGS = "--- Pass 4: Generating semantic embeddings ---" +CPP_FRONTEND_RUNNING = "--- C/C++ libclang frontend: {path} ---" +CPP_FRONTEND_UNAVAILABLE = ( + "CPP_FRONTEND=libclang but libclang is unavailable; using tree-sitter" +) +CPP_FRONTEND_NO_COMPDB = ( + "CPP_FRONTEND=libclang but no compile_commands.json found; using tree-sitter" +) +CPP_FRONTEND_COVERED = "C/C++ libclang frontend covered {count} file(s)" +GRAPH_ALREADY_IN_SYNC = ( + "Knowledge graph already in sync (hash cache matches every file). Skipping passes." +) # (H) Analysis logs FOUND_FUNCTIONS = "\n--- Found {count} functions/methods in codebase ---" +REGISTRY_REHYDRATED = "Rehydrated {count} definitions from the graph for resolution" +INCREMENTAL_REBUILD_INBOUND = "Rebuilding inbound edges from {count} dependent files" ANALYSIS_COMPLETE = "\n--- Analysis complete. Flushing all data to database... ---" REMOVING_STATE = "Removing in-memory state for: {path}" REMOVED_FROM_CACHE = " - Removed from ast_cache" @@ -45,14 +58,40 @@ GENERATING_EMBEDDINGS = "Generating embeddings for {count} functions/methods" EMBEDDING_PROGRESS = "Generated {done}/{total} embeddings" EMBEDDING_FAILED = "Failed to embed {name}: {error}" +EMBEDDING_BATCH_COMPUTE_FAILED = "Failed to embed batch of {count}: {error}" +CONTEXT_TOKEN_COUNT_FAILED = "Context token count failed: {error}" NO_SOURCE_FOR = "No source code found for {name}" EMBEDDINGS_COMPLETE = "Successfully generated {count} semantic embeddings" EMBEDDING_GENERATION_FAILED = "Failed to generate semantic embeddings: {error}" EMBEDDING_STORE_FAILED = "Failed to store embedding for {name}: {error}" +EMBEDDING_STORE_RETRY = "Qdrant upsert failed (attempt {attempt}/{max_attempts}), retrying in {delay:.1f}s: {error}" +EMBEDDING_BATCH_STORED = "Stored batch of {count} embeddings in Qdrant" +EMBEDDING_BATCH_FAILED = "Failed to store embedding batch: {error}" EMBEDDING_SEARCH_FAILED = "Failed to search embeddings: {error}" - -# (H) Image logs -IMAGE_COPIED = "Copied image to temporary path: {path}" +EMBEDDING_RECONCILE_OK = "Qdrant reconciliation: all {count} expected embeddings found" +EMBEDDING_RECONCILE_MISSING = "Qdrant reconciliation: {missing} of {expected} embeddings missing (IDs: {sample_ids})" +EMBEDDING_RECONCILE_FAILED = "Qdrant reconciliation check failed: {error}" +QDRANT_DELETE_PROJECT = "Deleting {count} Qdrant vectors for project '{project}'" +QDRANT_DELETE_PROJECT_DONE = "Deleted Qdrant vectors for project '{project}'" +QDRANT_DELETE_PROJECT_FAILED = ( + "Failed to delete Qdrant vectors for project '{project}': {error}" +) +QDRANT_LOCK_ERROR = ( + "Failed to open embedded Qdrant at '{path}': {error}. The storage folder is " + "locked by another process; look for the '.lock' sentinel inside it. Embedded " + "Qdrant allows only one process at a time, so a running MCP server and a CLI " + "indexing run cannot share it. Set QDRANT_URL to point at a shared Qdrant " + "server for concurrent access." +) +EMBEDDING_CACHE_HIT = "Embedding cache hit for {count} snippets" +EMBEDDING_CACHE_LOADED = "Loaded embedding cache with {count} entries from {path}" +EMBEDDING_CACHE_SAVE_FAILED = "Failed to save embedding cache to {path}: {error}" +EMBEDDING_CACHE_LOAD_FAILED = "Failed to load embedding cache from {path}: {error}" + +# (H) Multimodal attachment logs +MULTIMODAL_ATTACHED = "Attached multimodal content: {path}" +MULTIMODAL_NOT_FOUND = "Multimodal path referenced but not found: {path}" +MULTIMODAL_READ_FAILED = "Failed to read multimodal file '{path}': {error}" # (H) Protobuf service logs PROTOBUF_INIT = "ProtobufFileIngestor initialized to write to: {path}" @@ -95,10 +134,30 @@ ) CGRIGNORE_READ_FAILED = "Failed to read {path}: {error}" +CGR_INSTRUCTIONS_LOADED = "Loaded project instructions from {path} ({chars} chars)" +CGR_INSTRUCTIONS_READ_FAILED = "Failed to read project instructions {path}: {error}" + # (H) File watcher logs WATCHER_ACTIVE = "File watcher is now active." +WATCHER_DEBOUNCE_ACTIVE = ( + "File watcher active with debouncing (debounce={debounce}s, max_wait={max_wait}s)" +) WATCHER_SKIP_NO_QUERY = "Ingestor does not support querying, skipping real-time update." CHANGE_DETECTED = "Change detected: {event_type} on {path}. Updating graph." +CHANGE_DEBOUNCING = ( + "Change detected: {event_type} on {name} (debouncing for {debounce}s)" +) +DEBOUNCE_RESET = "Reset debounce timer for {path}" +DEBOUNCE_MAX_WAIT = "Max wait ({max_wait}s) exceeded for {path}, processing now" +DEBOUNCE_SCHEDULED = ( + "Scheduled update for {path} in {debounce}s (max wait: {remaining}s remaining)" +) +DEBOUNCE_PROCESSING = "Processing debounced change: {path}" +DEBOUNCE_NO_EVENT = "No pending event for {path}, skipping" +DEBOUNCE_MAX_WAIT_ADJUSTED = ( + "max_wait ({max_wait}s) is less than debounce ({debounce}s). " + "Setting max_wait to debounce value." +) DELETION_QUERY = "Ran deletion query for path: {path}" RECALC_CALLS = "Recalculating all function call relationships for consistency..." GRAPH_UPDATED = "Graph updated successfully for change in: {name}" @@ -155,7 +214,8 @@ # (H) Memgraph logs MG_CONNECTING = "Connecting to Memgraph at {host}:{port}..." MG_CONNECTED = "Successfully connected to Memgraph." -MG_EXCEPTION = "An exception occurred: {error}. Flushing remaining items..." +MG_EXCEPTION = "An exception occurred: {error}. Attempting best-effort flush..." +MG_FLUSH_ERROR = "Failed to flush during cleanup: {error}" MG_DISCONNECTED = "\nDisconnected from Memgraph." MG_CYPHER_ERROR = "!!! Cypher Error: {error}" MG_CYPHER_QUERY = " Query: {query}" @@ -177,7 +237,9 @@ "Relationship buffer reached batch size ({size}). Performing incremental flush." ) MG_NO_CONSTRAINT = "No unique constraint defined for label '{label}'. Skipping flush." -MG_MISSING_PROP = "Skipping {label} node missing required '{key}' property: {props}" +MG_MISSING_PROP = ( + "Skipping {label} node missing required '{key}' property (keys: {prop_keys})" +) MG_NODES_FLUSHED = "Flushed {flushed} of {total} buffered nodes." MG_NODES_SKIPPED = ( "Skipped {count} buffered nodes due to missing identifiers or constraints." @@ -189,6 +251,18 @@ ) MG_FLUSH_START = "--- Flushing all pending writes to database... ---" MG_FLUSH_COMPLETE = "--- Flushing complete. ---" +MG_PARALLEL_FLUSH_NODES = ( + "Parallel flushing {count} label groups with {workers} workers" +) +MG_PARALLEL_FLUSH_RELS = ( + "Parallel flushing {count} relationship groups with {workers} workers" +) +MG_LABEL_FLUSH_ERROR = "Error flushing label group '{label}': {error}" +MG_REL_FLUSH_ERROR = "Error flushing relationship group '{pattern}': {error}" +MG_NO_CONN_NODES = "No database connection for label '{label}', skipping flush." +MG_NO_CONN_RELS = ( + "No database connection for relationship group '{pattern}', skipping flush." +) MG_FETCH_QUERY = "Executing fetch query: {query} with params: {params}" MG_WRITE_QUERY = "Executing write query: {query} with params: {params}" MG_EXPORTING = "Exporting graph data..." @@ -215,6 +289,13 @@ ) TOOL_QUERY_RECEIVED = "[Tool:QueryGraph] Received NL query: '{query}'" TOOL_QUERY_ERROR = "[Tool:QueryGraph] Error during query execution: {error}" +TOOL_QUERY_TIMEOUT = ( + "[Tool:QueryGraph] Query exceeded {timeout:.1f}s and was cancelled: {query}" +) +QUERY_RESULTS_TRUNCATED = ( + "[Tool:QueryGraph] Results truncated: showing {kept} of {total} rows " + "({tokens} tokens, limit {max_tokens})" +) TOOL_SHELL_EXEC = "Executing shell command: {cmd}" TOOL_SHELL_RETURN = "Return code: {code}" TOOL_SHELL_STDOUT = "Stdout: {stdout}" @@ -224,7 +305,6 @@ "Process already terminated when timeout kill was attempted." ) TOOL_SHELL_ERROR = "An error occurred while executing command: {error}" -TOOL_DOC_ANALYZE = "[DocumentAnalyzer] Analyzing '{path}' with question: '{question}'" # (H) Shell timing log SHELL_TIMING = "'{func}' executed in {time:.2f}ms" @@ -276,15 +356,6 @@ SEMANTIC_TOOL_SEARCH = "[Tool:SemanticSearch] Searching for: '{query}'" SEMANTIC_TOOL_SOURCE = "[Tool:GetFunctionSource] Retrieving source for node ID: {id}" -# (H) Document analyzer logs -DOC_COPIED = "Copied external file to: {path}" -DOC_SUCCESS = "Successfully received analysis for '{path}'." -DOC_NO_TEXT = "No text found in response: {response}" -DOC_API_ERROR = "Google GenAI API error for '{path}': {error}" -DOC_FAILED = "Failed to analyze document '{path}': {error}" -DOC_RESULT = "[analyze_document] Result type: {type}, content: {preview}..." -DOC_EXCEPTION = "[analyze_document] Exception during analysis: {error}" - # (H) Code retrieval logs CODE_RETRIEVER_INIT = "CodeRetriever initialized with root: {root}" CODE_RETRIEVER_SEARCH = "[CodeRetriever] Searching for: {name}" @@ -295,14 +366,12 @@ FILE_EDITOR_INIT = "FileEditor initialized with root: {root}" FILE_READER_INIT = "FileReader initialized with root: {root}" SHELL_COMMANDER_INIT = "ShellCommander initialized with root: {root}" -DOC_ANALYZER_INIT = "DocumentAnalyzer initialized with root: {root}" # (H) Tool error logs FILE_EDITOR_WARN = "[FileEditor] {msg}" FILE_EDITOR_ERR = "[FileEditor] {msg}" FILE_EDITOR_ERR_EDIT = "[FileEditor] Error editing file {path}: {error}" FILE_READER_ERR = "Error reading file {path}: {error}" -DOC_ANALYZER_API_ERR = "[DocumentAnalyzer] API validation error: {error}" # (H) File writer logs FILE_WRITER_INIT = "FileWriter initialized with root: {root}" @@ -312,18 +381,20 @@ # (H) Error logs (used with logger.error/warning) UNEXPECTED = "An unexpected error occurred: {error}" EXPORT_ERROR = "Export error: {error}" +STATS_ERROR = "Stats error: {error}" +DEADCODE_SCANNING = "Scanning project '{project_name}' for dead code" +DEADCODE_ERROR = "Dead code scan error: {error}" INDEXING_FAILED = "Indexing failed" PATH_NOT_IN_QUESTION = ( - "Could not find original path in question for replacement: {path}" + "Could not locate path token in user message for attachment: {path}" ) -IMAGE_NOT_FOUND = "Image path found, but does not exist: {path}" -IMAGE_COPY_FAILED = "Failed to copy image to temporary directory: {error}" FILE_OUTSIDE_ROOT = "Security risk: Attempted to {action} file outside of project root." # (H) Call processor logs CALL_PROCESSING_FILE = "Processing calls in cached AST for: {path}" CALL_PROCESSING_FAILED = "Failed to process calls in {path}: {error}" CALL_FOUND_NODES = "Found {count} call nodes in {language} for {caller}" +CALL_SKIP_CLASS = "Skipping CALLS edge from {caller} to {call_name} (callee is Class node: {callee_qn})" CALL_FOUND = ( "Found call from {caller} to {call_name} (resolved as {callee_type}:{callee_qn})" ) @@ -593,6 +664,14 @@ MCP_ERROR_WRITE = "[MCP] Error writing file: {error}" MCP_LIST_DIR = "[MCP] list_directory: {path}" MCP_ERROR_LIST_DIR = "[MCP] Error listing directory: {error}" +MCP_SEMANTIC_NOT_AVAILABLE = ( + "[MCP] Semantic search not available. Install with: uv sync --extra semantic" +) +MCP_UPDATING_REPO = "[MCP] Updating repository at: {path}" +MCP_ERROR_UPDATING = "[MCP] Error updating repository: {error}" +MCP_SEMANTIC_SEARCH = "[MCP] semantic_search: {query}" +MCP_ASK_AGENT = "[MCP] ask_agent: {question}" +MCP_ASK_AGENT_ERROR = "[MCP] Error running ask_agent: {error}" # (H) MCP server logs MCP_SERVER_INFERRED_ROOT = "[GraphCode MCP] Using inferred project root: {path}" @@ -612,6 +691,35 @@ MCP_SERVER_CONNECTED = "[GraphCode MCP] Connected to Memgraph at {host}:{port}" MCP_SERVER_FATAL_ERROR = "[GraphCode MCP] Fatal error: {error}" MCP_SERVER_SHUTDOWN = "[GraphCode MCP] Shutting down server..." +MCP_HTTP_SERVER_STARTING = "[GraphCode MCP] Starting HTTP server on {host}:{port}..." +MCP_HTTP_SERVER_READY = ( + "[GraphCode MCP] HTTP server ready. MCP endpoint: http://{host}:{port}/mcp" +) + +# (H) Incremental update logs +HASH_CACHE_LOADED = "Loaded hash cache with {count} entries from {path}" +HASH_CACHE_LOAD_FAILED = "Failed to load hash cache from {path}: {error}" +HASH_CACHE_SAVED = "Saved hash cache with {count} entries to {path}" +HASH_CACHE_SAVE_FAILED = "Failed to save hash cache to {path}: {error}" +PERIODIC_FLUSH = "Periodic flush after {count} files processed" +INCREMENTAL_SKIPPED = "Skipped {count} unchanged files" +INCREMENTAL_CHANGED = "Re-indexing {count} changed files" +INCREMENTAL_DELETED = "Removed state for {count} deleted files" +INCREMENTAL_FORCE = "Force mode enabled, bypassing hash cache" + +# (H) Orphan pruning logs +PRUNE_START = "--- Pruning orphan nodes from graph ---" +PRUNE_FOUND = "Found {count} orphan {label} nodes to remove" +PRUNE_DELETING = "Pruning orphan {label}: {path}" +PRUNE_COMPLETE = "Pruning complete. Removed {count} orphan nodes." +PRUNE_SKIP = "No orphan nodes found. Graph is clean." +FILE_HASH_UNCHANGED = "File unchanged (hash match): {path}" +FILE_HASH_CHANGED = "File changed (hash mismatch): {path}" +FILE_HASH_NEW = "New file detected: {path}" +FILE_UNREADABLE = ( + "Skipping unreadable file (broken symlink or removed): {path} ({error})" +) +INCREMENTAL_UNREADABLE = "Skipped {count} unreadable files (broken symlinks or removed)" # (H) Exclude prompt logs EXCLUDE_INVALID_INDEX = "Invalid index: {index} (out of range)" @@ -621,3 +729,7 @@ MODEL_SWITCHED = "Model switched to: {model}" MODEL_SWITCH_FAILED = "Failed to switch model: {error}" MODEL_CURRENT = "Current model: {model}" + +# (H) Progress bar logs +PROGRESS_INDEXING_LABEL = "[bold blue]Indexing files..." +PROGRESS_FILES_PROCESSED = "{count} processed" diff --git a/codebase_rag/main.py b/codebase_rag/main.py index af58a84a4..756ec43c3 100644 --- a/codebase_rag/main.py +++ b/codebase_rag/main.py @@ -3,26 +3,44 @@ import asyncio import difflib import json +import mimetypes import os import shlex import shutil +import subprocess import sys import uuid from collections import deque -from collections.abc import Coroutine +from collections.abc import Callable, Coroutine +from contextlib import contextmanager from dataclasses import replace +from html import escape as html_escape from pathlib import Path from typing import TYPE_CHECKING from loguru import logger -from prompt_toolkit import prompt +from prompt_toolkit import PromptSession, prompt from prompt_toolkit.formatted_text import HTML from prompt_toolkit.key_binding import KeyBindings from prompt_toolkit.shortcuts import print_formatted_text -from pydantic_ai import DeferredToolRequests, DeferredToolResults, ToolDenied -from rich.markdown import Markdown +from pydantic_ai import ( + BinaryContent, + DeferredToolRequests, + DeferredToolResults, + ToolDenied, +) +from pydantic_ai.messages import ( + ModelRequest, + ModelResponse, + ToolCallPart, + ToolReturnPart, + UserContent, +) +from rich.console import Group +from rich.live import Live from rich.panel import Panel -from rich.prompt import Confirm, Prompt +from rich.prompt import Prompt +from rich.spinner import Spinner from rich.table import Table from rich.text import Text @@ -39,7 +57,6 @@ from .tools.code_retrieval import CodeRetriever, create_code_retrieval_tool from .tools.codebase_query import create_query_tool from .tools.directory_lister import DirectoryLister, create_directory_lister_tool -from .tools.document_analyzer import DocumentAnalyzer, create_document_analyzer_tool from .tools.file_editor import FileEditor, create_file_editor_tool from .tools.file_reader import FileReader, create_file_reader_tool from .tools.file_writer import FileWriter, create_file_writer_tool @@ -57,11 +74,13 @@ ConfirmationToolNames, CreateFileArgs, GraphData, + QueryJsonOutput, RawToolArgs, ReplaceCodeArgs, ShellCommandArgs, ToolArgs, ) +from .utils.rich_markdown import LeftAlignedMarkdown if TYPE_CHECKING: from prompt_toolkit.key_binding import KeyPressEvent @@ -109,6 +128,50 @@ def get_session_context() -> str: return "" +def _autowrap_diff_blocks(text: str) -> str: + if cs.DIFF_GIT_HEADER not in text: + return text + lines = text.split("\n") + out: list[str] = [] + in_fence = False + in_diff = False + + def is_diff_continuation(line: str) -> bool: + if line == "": + return True + return line.startswith(cs.DIFF_CONTINUATION_PREFIXES) + + for line in lines: + if line.startswith(cs.MARKDOWN_FENCE): + if in_diff: + out.append(cs.MARKDOWN_FENCE) + in_diff = False + in_fence = not in_fence + out.append(line) + continue + if in_fence: + out.append(line) + continue + if not in_diff and line.startswith(cs.DIFF_GIT_HEADER): + out.append(cs.MARKDOWN_FENCE_DIFF) + in_diff = True + out.append(line) + continue + if in_diff: + if is_diff_continuation(line): + out.append(line) + else: + out.append(cs.MARKDOWN_FENCE) + in_diff = False + out.append(line) + continue + out.append(line) + + if in_diff: + out.append(cs.MARKDOWN_FENCE) + return "\n".join(out) + + def _print_unified_diff(target: str, replacement: str, path: str) -> None: separator = dim(cs.HORIZONTAL_SEPARATOR) app_context.console.print(f"\n{cs.UI_DIFF_FILE_HEADER.format(path=path)}") @@ -216,7 +279,7 @@ def _display_tool_call_diff( ) -def _process_tool_approvals( +async def _process_tool_approvals( requests: DeferredToolRequests, approval_prompt: str, denial_default: str, @@ -228,30 +291,102 @@ def _process_tool_approvals( tool_args = _to_tool_args( call.tool_name, RawToolArgs(**call.args_as_dict()), tool_names ) - app_context.console.print( - f"\n{cs.UI_TOOL_APPROVAL.format(tool_name=call.tool_name)}" + will_prompt = ( + app_context.session.confirm_edits and not app_context.session.is_yolo() ) + + if will_prompt: + app_context.console.print( + f"\n{cs.UI_TOOL_APPROVAL.format(tool_name=call.tool_name)}" + ) _display_tool_call_diff(call.tool_name, tool_args, tool_names) - if app_context.session.confirm_edits: - if Confirm.ask(style(approval_prompt, cs.Color.CYAN)): - deferred_results.approvals[call.tool_call_id] = True - else: - feedback = Prompt.ask( - cs.UI_FEEDBACK_PROMPT, - default="", - ) - denial_msg = feedback.strip() or denial_default - deferred_results.approvals[call.tool_call_id] = ToolDenied(denial_msg) - else: + if not will_prompt: + deferred_results.approvals[call.tool_call_id] = True + continue + + if await _confirm_with_toggle(approval_prompt): + deferred_results.approvals[call.tool_call_id] = True + elif app_context.session.is_yolo(): deferred_results.approvals[call.tool_call_id] = True + else: + feedback = await _prompt_with_toggle(cs.UI_FEEDBACK_PROMPT) + denial_msg = feedback.strip() or denial_default + deferred_results.approvals[call.tool_call_id] = ToolDenied(denial_msg) return deferred_results +def _approval_keybindings() -> KeyBindings: + bindings = KeyBindings() + + @bindings.add(cs.KeyBinding.SHIFT_TAB) + def _toggle(event: KeyPressEvent) -> None: + app_context.session.cycle_permission_mode() + if app_context.session.is_yolo(): + event.app.exit(result=cs.YES_ANSWER) + else: + event.app.invalidate() + + @bindings.add(cs.KeyBinding.CTRL_C) + def _interrupt(event: KeyPressEvent) -> None: + event.app.exit(exception=KeyboardInterrupt) + + return bindings + + +async def _confirm_with_toggle(question: str) -> bool: + bindings = _approval_keybindings() + prompt_text = HTML( + f' [y/n] (Y): ' + ) + session: PromptSession[str] = PromptSession() + while True: + try: + answer = await session.prompt_async( + prompt_text, + key_bindings=bindings, + style=ORANGE_STYLE, + bottom_toolbar=lambda: _status_bar_label(), + refresh_interval=0.5, + ) + except (KeyboardInterrupt, EOFError): + return False + if app_context.session.is_yolo(): + return True + normalized = (answer or "").strip().lower() + if normalized in cs.YES_ANSWERS: + return True + if normalized in cs.NO_ANSWERS: + return False + + +async def _prompt_with_toggle(question: str) -> str: + bindings = _approval_keybindings() + prompt_text = HTML( + f': ' + ) + session: PromptSession[str] = PromptSession() + try: + answer = await session.prompt_async( + prompt_text, + key_bindings=bindings, + style=ORANGE_STYLE, + bottom_toolbar=lambda: _status_bar_label(), + refresh_interval=0.5, + ) + except (KeyboardInterrupt, EOFError): + return "" + return answer or "" + + +def _rich_log_sink(message: object) -> None: + app_context.console.print(str(message), end="", markup=False, highlight=False) + + def _setup_common_initialization(repo_path: str) -> Path: logger.remove() - logger.add(sys.stdout, format=cs.LOG_FORMAT) + logger.add(_rich_log_sink, format=cs.LOG_FORMAT, colorize=False) project_root = Path(repo_path).resolve() tmp_dir = project_root / cs.TMP_DIR @@ -262,6 +397,7 @@ def _setup_common_initialization(repo_path: str) -> Path: tmp_dir.unlink() tmp_dir.mkdir() + app_context.session.target_repo = project_root return project_root @@ -385,46 +521,75 @@ async def run_with_cancellation[T]( return CancelledResult(cancelled=True) +def _cancel_orphaned_tool_calls(message_history: list[ModelMessage]) -> None: + if not message_history: + return + last = message_history[-1] + if not isinstance(last, ModelResponse): + return + tool_calls = [p for p in last.parts if isinstance(p, ToolCallPart)] + if not tool_calls: + return + message_history.append( + ModelRequest( + parts=[ + ToolReturnPart( + tool_name=p.tool_name, + content=cs.MSG_TOOL_CALL_CANCELLED, + tool_call_id=p.tool_call_id, + ) + for p in tool_calls + ] + ) + ) + + async def _run_agent_response_loop( rag_agent: Agent[None, str | DeferredToolRequests], message_history: list[ModelMessage], - question_with_context: str, + question_with_context: str | list[UserContent], config: AgentLoopUI, tool_names: ConfirmationToolNames, model_override: Model | None = None, ) -> None: deferred_results: DeferredToolResults | None = None + pending_prompt: str | list[UserContent] | None = question_with_context while True: - with app_context.console.status(config.status_message): + with _thinking_with_status_bar(config.status_message): response = await run_with_cancellation( rag_agent.run( - question_with_context, + pending_prompt, message_history=message_history, deferred_tool_results=deferred_results, model=model_override, ), ) + pending_prompt = None if isinstance(response, CancelledResult): log_session_event(config.cancelled_log) app_context.session.cancelled = True + _cancel_orphaned_tool_calls(message_history) break + message_history.extend(response.new_messages()) + if isinstance(response.output, DeferredToolRequests): - deferred_results = _process_tool_approvals( + deferred_results = await _process_tool_approvals( response.output, config.approval_prompt, config.denial_default, tool_names, ) - message_history.extend(response.new_messages()) continue + asyncio.create_task(_refresh_context_tokens(list(message_history))) + output_text = response.output if not isinstance(output_text, str): continue - markdown_response = Markdown(output_text) + markdown_response = LeftAlignedMarkdown(_autowrap_diff_blocks(output_text)) app_context.console.print( Panel( markdown_response, @@ -434,35 +599,29 @@ async def _run_agent_response_loop( ) log_session_event(f"{cs.SESSION_PREFIX_ASSISTANT}{output_text}") - message_history.extend(response.new_messages()) break -def _find_image_paths(question: str) -> list[Path]: +def _find_multimodal_paths(question: str) -> list[Path]: try: if os.name == "nt": - # (H) On Windows, shlex.split with posix=False to preserve backslashes tokens = shlex.split(question, posix=False) else: tokens = shlex.split(question) except ValueError: tokens = question.split() - image_paths: list[Path] = [] + paths: list[Path] = [] for token in tokens: - # (H) Strip quotes if they remain (shlex with posix=False might keep some) token = token.strip("'\"") - # (H) Check if it looks like an image path - if token.lower().endswith(cs.IMAGE_EXTENSIONS): - # (H) On Windows, could be C:\... or \... - # (H) On POSIX, starts with / + if token.lower().endswith(cs.MULTIMODAL_EXTENSIONS): p = Path(token) if p.is_absolute() or token.startswith("/") or token.startswith("\\"): - image_paths.append(p) - return image_paths + paths.append(p) + return paths -def _get_path_variants(path_str: str) -> tuple[str, ...]: +def _path_variants(path_str: str) -> tuple[str, ...]: return ( path_str.replace(" ", r"\ "), f"'{path_str}'", @@ -471,40 +630,417 @@ def _get_path_variants(path_str: str) -> tuple[str, ...]: ) -def _replace_path_in_question(question: str, old_path: str, new_path: str) -> str: - for variant in _get_path_variants(old_path): - if variant in question: - return question.replace(variant, new_path) - logger.warning(ls.PATH_NOT_IN_QUESTION.format(path=old_path)) - return question +def _guess_media_type(path: Path) -> str: + mime, _ = mimetypes.guess_type(str(path)) + return mime or cs.MIME_TYPE_FALLBACK -def _handle_chat_images(question: str, project_root: Path) -> str: - image_files = _find_image_paths(question) - if not image_files: +def _build_user_prompt(question: str) -> str | list[UserContent]: + paths = _find_multimodal_paths(question) + if not paths: return question - tmp_dir = project_root / cs.TMP_DIR - tmp_dir.mkdir(exist_ok=True) - updated_question = question - - for original_path in image_files: - if not original_path.exists() or not original_path.is_file(): - logger.warning(ls.IMAGE_NOT_FOUND.format(path=original_path)) + content: list[UserContent] = [] + remaining = question + for path in paths: + if not path.exists() or not path.is_file(): + logger.warning(ls.MULTIMODAL_NOT_FOUND.format(path=path)) continue - + match_token = next( + (v for v in _path_variants(str(path)) if v in remaining), None + ) + if match_token is None: + logger.warning(ls.PATH_NOT_IN_QUESTION.format(path=path)) + continue + before, _, after = remaining.partition(match_token) + if before.strip(): + content.append(before.rstrip()) try: - new_path = tmp_dir / f"{uuid.uuid4()}-{original_path.name}" - shutil.copy(original_path, new_path) - new_relative = str(new_path.relative_to(project_root)) - updated_question = _replace_path_in_question( - updated_question, str(original_path), new_relative + content.append( + BinaryContent( + data=path.read_bytes(), media_type=_guess_media_type(path) + ) ) - logger.info(ls.IMAGE_COPIED.format(path=new_relative)) + logger.info(ls.MULTIMODAL_ATTACHED.format(path=path)) except Exception as e: - logger.error(ls.IMAGE_COPY_FAILED.format(error=e)) + logger.error(ls.MULTIMODAL_READ_FAILED.format(path=path, error=e)) + content.append(match_token) + remaining = after + + if remaining.strip(): + content.append(remaining.lstrip()) + + return content or question + + +def _permission_mode_label() -> str: + return ( + cs.PERMISSION_MODE_YOLO_LABEL + if app_context.session.is_yolo() + else cs.PERMISSION_MODE_NORMAL_LABEL + ) + + +def _git_state() -> tuple[str, bool] | None: + repo = app_context.session.target_repo + if repo is None or not repo.exists(): + return None + try: + result = subprocess.run( + ["git", "status", "--porcelain", "--branch"], + capture_output=True, + text=True, + timeout=1.0, + check=True, + cwd=repo, + ) + except (subprocess.SubprocessError, FileNotFoundError): + return None + lines = result.stdout.splitlines() + if not lines or not lines[0].startswith("## "): + return None + header = lines[0][3:].split("...", 1)[0].split(" ", 1)[0] + if header in ("HEAD", "No"): + return None + is_dirty = any(line for line in lines[1:]) + return header, is_dirty + + +def _terminal_columns() -> int: + return shutil.get_terminal_size((80, 24)).columns + + +def _format_tokens(n: int) -> str: + if n >= 1_000_000: + return f"{n / 1_000_000:.1f}M" + if n >= 1_000: + return f"{n / 1_000:.1f}k" + return str(n) + + +def _token_color(pct: float) -> str: + if pct >= cs.TOKEN_THRESHOLD_CRITICAL: + return cs.TOKEN_COLOR_CRITICAL + if pct >= cs.TOKEN_THRESHOLD_WARNING: + return cs.TOKEN_COLOR_WARNING + return cs.TOKEN_COLOR_OK + - return updated_question +def _token_usage() -> tuple[int, int, float]: + try: + used = int(app_context.session.context_tokens) + except (TypeError, ValueError): + used = 0 + try: + model_id = settings.active_orchestrator_config.model_id or "" + except Exception: + model_id = "" + bare = model_id.split(":", 1)[-1] + max_ctx = cs.MODEL_CONTEXT_WINDOWS.get(bare, cs.DEFAULT_CONTEXT_WINDOW) + pct = (used / max_ctx * 100) if max_ctx > 0 else 0.0 + return used, max_ctx, pct + + +async def _refresh_context_tokens(messages: list[ModelMessage]) -> None: + try: + config = settings.active_orchestrator_config + except Exception: + return + if config.provider != cs.Provider.ANTHROPIC or not config.api_key: + return + try: + from .services.anthropic_token_counter import count_anthropic_context + + count = await count_anthropic_context(config.api_key, config.model_id, messages) + app_context.session.context_tokens = count + except Exception as e: + logger.debug(ls.CONTEXT_TOKEN_COUNT_FAILED.format(error=e)) + + +def _prime_context_token_counter(system_prompt: str) -> None: + if not system_prompt: + return + from pydantic_ai.messages import ModelRequest, SystemPromptPart + + baseline_messages: list[ModelMessage] = [ + ModelRequest(parts=[SystemPromptPart(content=system_prompt)]) + ] + asyncio.create_task(_refresh_context_tokens(baseline_messages)) + + +def _short_model_id() -> tuple[str, str]: + try: + orch = settings.active_orchestrator_config.model_id or "" + except Exception: + orch = "" + try: + cyph = settings.active_cypher_config.model_id or "" + except Exception: + cyph = "" + return orch.split(":", 1)[-1], cyph.split(":", 1)[-1] + + +def _abbreviated_repo(p: Path | None) -> str: + if p is None: + return "" + try: + home = Path.home() + return f"~/{p.relative_to(home)}" if p.is_relative_to(home) else str(p) + except (ValueError, OSError): + return str(p) + + +def _config_segments() -> list[tuple[str, str]]: + orch, cyph = _short_model_id() + segments: list[tuple[str, str]] = [] + if orch: + segments.append((cs.STATUS_BAR_CONFIG_LABEL_O, orch)) + if cyph: + segments.append((cs.STATUS_BAR_CONFIG_LABEL_C, cyph)) + segments.append( + ( + cs.STATUS_BAR_CONFIG_LABEL_EDIT, + cs.STATUS_BAR_EDIT_ON + if app_context.session.confirm_edits + else cs.STATUS_BAR_EDIT_OFF, + ) + ) + segments.append( + ( + cs.STATUS_BAR_CONFIG_LABEL_INSTRUCTIONS, + cs.STATUS_BAR_EDIT_ON + if app_context.session.load_cgr_instructions + else cs.STATUS_BAR_EDIT_OFF, + ) + ) + repo = _abbreviated_repo(app_context.session.target_repo) + if repo: + segments.append((cs.STATUS_BAR_CONFIG_LABEL_REPO, repo)) + return segments + + +def _config_status_html() -> str: + parts = [ + f'' + f'' + for label, value in _config_segments() + ] + return cs.STATUS_BAR_CONFIG_SEPARATOR.join(parts) + + +def _config_status_plain() -> str: + parts = [f"{label}:{value}" for label, value in _config_segments()] + return cs.STATUS_BAR_CONFIG_SEPARATOR.join(parts) + + +def _config_status_rich() -> Text: + line = Text() + segments = _config_segments() + for i, (label, value) in enumerate(segments): + if i > 0: + line.append(cs.STATUS_BAR_CONFIG_SEPARATOR, style="dim") + line.append(f"{label}:", style=f"bold {cs.STATUS_BAR_CONFIG_LABEL_COLOR}") + line.append(value, style=cs.STATUS_BAR_CONFIG_COLOR) + return line + + +def _branch_chip_html_and_plain(state: tuple[str, bool] | None) -> tuple[str, str]: + if state is None: + return "", "" + branch, is_dirty = state + html_template = ( + cs.STATUS_BAR_BRANCH_DIRTY_HTML if is_dirty else cs.STATUS_BAR_BRANCH_CLEAN_HTML + ) + plain_template = ( + cs.STATUS_BAR_BRANCH_DIRTY_PLAIN + if is_dirty + else cs.STATUS_BAR_BRANCH_CLEAN_PLAIN + ) + return ( + html_template.format(branch=html_escape(branch)), + plain_template.format(branch=branch), + ) + + +def _branch_chip_rich(state: tuple[str, bool] | None) -> Text: + if state is None: + return Text() + branch, is_dirty = state + marker = cs.STATUS_BAR_DIRTY_MARKER if is_dirty else "" + chip_style = cs.STATUS_BAR_DIRTY_STYLE if is_dirty else cs.STATUS_BAR_CLEAN_STYLE + chip = Text() + chip.append( + cs.STATUS_BAR_BRANCH_RICH_TEXT.format(branch=branch, marker=marker), + style=chip_style, + ) + return chip + + +def _status_bar_label() -> HTML | str: + mode = _permission_mode_label() + state = _git_state() + columns = _terminal_columns() + sep_html = ( + f'" + ) + + used, max_ctx, pct = _token_usage() + used_str = _format_tokens(used) + max_str = _format_tokens(max_ctx) + pct_str = f"{pct:.1f}%" + token_html = cs.STATUS_BAR_TOKEN_HTML.format( + color=_token_color(pct), + used=used_str, + max_ctx=max_str, + pct=pct_str, + ) + token_plain = f" {used_str} / {max_str} ({pct_str})" + body_html = html_escape(mode) + token_html + body_plain = mode + token_plain + + config_html = _config_status_html() + config_plain = _config_status_plain() + branch_html, branch_plain = _branch_chip_html_and_plain(state) + + config_with_branch_html = config_html + config_with_branch_plain = config_plain + if branch_html: + if config_html: + config_with_branch_html = f"{config_html} {branch_html}" + config_with_branch_plain = f"{config_plain} {branch_plain}" + else: + config_with_branch_html = branch_html + config_with_branch_plain = branch_plain + + if not config_with_branch_plain: + return HTML(f"{sep_html}\n{body_html}") + inline_sep = " " + if len(body_plain) + len(inline_sep) + len(config_with_branch_plain) <= columns: + return HTML(f"{sep_html}\n{body_html}{inline_sep}{config_with_branch_html}") + return HTML(f"{sep_html}\n{config_with_branch_html}\n{body_html}") + + +def _rich_status_bar() -> Text: + body = Text() + body.append(_permission_mode_label(), style="dim") + used, max_ctx, pct = _token_usage() + body.append(" ") + body.append( + f"{_format_tokens(used)} / {_format_tokens(max_ctx)} ({pct:.1f}%)", + style=_token_color(pct), + ) + + config_line = _config_status_rich() + branch_chip = _branch_chip_rich(_git_state()) + if config_line.plain and branch_chip.plain: + config_line.append(" ") + config_line.append_text(branch_chip) + elif branch_chip.plain: + config_line = branch_chip + + if not config_line.plain: + return body + + inline_sep = " " + if ( + len(body.plain) + len(inline_sep) + len(config_line.plain) + <= _terminal_columns() + ): + body.append(inline_sep) + body.append_text(config_line) + return body + return Text("\n").join([config_line, body]) + + +@contextmanager +def _shift_tab_listener(): + if sys.platform == "win32" or not sys.stdin.isatty(): + yield + return + try: + import termios + except ImportError: + yield + return + fd = sys.stdin.fileno() + try: + original = termios.tcgetattr(fd) + except (termios.error, OSError): + yield + return + try: + new_attrs = termios.tcgetattr(fd) + new_attrs[3] &= ~(termios.ICANON | termios.ECHO) + new_attrs[6][termios.VMIN] = 0 + new_attrs[6][termios.VTIME] = 0 + termios.tcsetattr(fd, termios.TCSANOW, new_attrs) + loop = asyncio.get_running_loop() + buffer = bytearray() + + def on_input() -> None: + try: + data = os.read(fd, 1024) + except OSError: + return + if not data: + return + buffer.extend(data) + while cs.SHIFT_TAB_ESCAPE in buffer: + idx = buffer.index(cs.SHIFT_TAB_ESCAPE) + del buffer[idx : idx + len(cs.SHIFT_TAB_ESCAPE)] + app_context.session.cycle_permission_mode() + + loop.add_reader(fd, on_input) + try: + yield + finally: + try: + loop.remove_reader(fd) + except Exception: + pass + finally: + try: + termios.tcsetattr(fd, termios.TCSADRAIN, original) + except (termios.error, OSError): + pass + + +@contextmanager +def _thinking_with_status_bar(message: str): + spinner = Spinner(cs.STATUS_BAR_SPINNER, text=Text.from_markup(message)) + separator = Text( + cs.STATUS_BAR_SEPARATOR_CHAR * _terminal_columns(), + style=cs.STATUS_BAR_SEPARATOR_COLOR, + ) + + def render() -> Group: + return Group(separator, spinner, _rich_status_bar()) + + with ( + Live( + render(), + console=app_context.console, + refresh_per_second=4, + transient=True, + ) as live, + _shift_tab_listener(), + ): + + async def _refresh_bar() -> None: + while True: + try: + live.update(render()) + await asyncio.sleep(0.25) + except asyncio.CancelledError: + return + + refresh_task = asyncio.get_running_loop().create_task(_refresh_bar()) + try: + yield live + finally: + refresh_task.cancel() def get_multiline_input(prompt_text: str = cs.PROMPT_ASK_QUESTION) -> str: @@ -514,6 +1050,10 @@ def get_multiline_input(prompt_text: str = cs.PROMPT_ASK_QUESTION) -> str: def submit(event: KeyPressEvent) -> None: event.app.exit(result=event.app.current_buffer.text) + @bindings.add(cs.KeyBinding.CTRL_E) + def submit_ctrl_e(event: KeyPressEvent) -> None: + event.app.exit(result=event.app.current_buffer.text) + @bindings.add(cs.KeyBinding.ENTER) def new_line(event: KeyPressEvent) -> None: event.current_buffer.insert_text("\n") @@ -522,6 +1062,11 @@ def new_line(event: KeyPressEvent) -> None: def keyboard_interrupt(event: KeyPressEvent) -> None: event.app.exit(exception=KeyboardInterrupt) + @bindings.add(cs.KeyBinding.SHIFT_TAB) + def toggle_permission_mode(event: KeyPressEvent) -> None: + app_context.session.cycle_permission_mode() + event.app.invalidate() + clean_prompt = Text.from_markup(prompt_text).plain print_formatted_text( @@ -538,6 +1083,8 @@ def keyboard_interrupt(event: KeyPressEvent) -> None: key_bindings=bindings, wrap_lines=True, style=ORANGE_STYLE, + bottom_toolbar=lambda: _status_bar_label(), + refresh_interval=0.5, ) if result is None: raise EOFError @@ -664,19 +1211,17 @@ async def _run_interactive_loop( log_session_event(f"{cs.SESSION_PREFIX_USER}{question}") if app_context.session.cancelled: - question_with_context = question + get_session_context() + question_text = question + get_session_context() app_context.session.reset_cancelled() else: - question_with_context = question + question_text = question - question_with_context = _handle_chat_images( - question_with_context, project_root - ) + user_prompt: str | list[UserContent] = _build_user_prompt(question_text) await _run_agent_response_loop( rag_agent, message_history, - question_with_context, + user_prompt, config, tool_names, model_override, @@ -752,6 +1297,8 @@ def connect_memgraph(batch_size: int) -> MemgraphIngestor: host=settings.MEMGRAPH_HOST, port=settings.MEMGRAPH_PORT, batch_size=batch_size, + username=settings.MEMGRAPH_USERNAME, + password=settings.MEMGRAPH_PASSWORD, ) @@ -969,8 +1516,10 @@ def _validate_provider_config(role: cs.ModelRole, config: ModelConfig) -> None: def _initialize_services_and_agent( - repo_path: str, ingestor: QueryProtocol -) -> tuple[Agent[None, str | DeferredToolRequests], ConfirmationToolNames]: + repo_path: str, + ingestor: QueryProtocol, + active_projects: list[str] | None = None, +) -> tuple[Agent[None, str | DeferredToolRequests], ConfirmationToolNames, str]: _validate_provider_config( cs.ModelRole.ORCHESTRATOR, settings.active_orchestrator_config ) @@ -982,10 +1531,11 @@ def _initialize_services_and_agent( file_writer = FileWriter(project_root=repo_path) file_editor = FileEditor(project_root=repo_path) shell_commander = ShellCommander( - project_root=repo_path, timeout=settings.SHELL_COMMAND_TIMEOUT + project_root=repo_path, + timeout=settings.SHELL_COMMAND_TIMEOUT, + is_yolo=app_context.session.is_yolo, ) directory_lister = DirectoryLister(project_root=repo_path) - document_analyzer = DocumentAnalyzer(project_root=repo_path) query_tool = create_query_tool(ingestor, cypher_generator, app_context.console) code_tool = create_code_retrieval_tool(code_retriever) @@ -994,9 +1544,8 @@ def _initialize_services_and_agent( file_editor_tool = create_file_editor_tool(file_editor) shell_command_tool = create_shell_command_tool(shell_commander) directory_lister_tool = create_directory_lister_tool(directory_lister) - document_analyzer_tool = create_document_analyzer_tool(document_analyzer) - semantic_search_tool = create_semantic_search_tool() - function_source_tool = create_get_function_source_tool() + semantic_search_tool = create_semantic_search_tool(ingestor) + function_source_tool = create_get_function_source_tool(ingestor) confirmation_tool_names = ConfirmationToolNames( replace_code=file_editor_tool.name, @@ -1004,7 +1553,7 @@ def _initialize_services_and_agent( shell_command=shell_command_tool.name, ) - rag_agent = create_rag_orchestrator( + rag_agent, system_prompt = create_rag_orchestrator( tools=[ query_tool, code_tool, @@ -1013,21 +1562,55 @@ def _initialize_services_and_agent( file_editor_tool, shell_command_tool, directory_lister_tool, - document_analyzer_tool, semantic_search_tool, function_source_tool, - ] + ], + project_root=Path(repo_path), + load_instructions=app_context.session.load_cgr_instructions, + active_projects=active_projects, ) - return rag_agent, confirmation_tool_names + return rag_agent, confirmation_tool_names, system_prompt -async def main_async(repo_path: str, batch_size: int) -> None: +def main_single_query( + repo_path: str, + batch_size: int, + question: str, + active_projects: list[str] | None = None, + output_format: cs.QueryFormat = cs.QueryFormat.TABLE, +) -> None: + _setup_common_initialization(repo_path) + # (H) Override logger to stderr so stdout is clean for scripted output + logger.remove() + logger.add(sys.stderr, level=cs.LOG_LEVEL_ERROR, format=cs.LOG_FORMAT) + + with connect_memgraph(batch_size) as ingestor: + rag_agent, _, _ = _initialize_services_and_agent( + repo_path, ingestor, active_projects=active_projects + ) + response = asyncio.run(rag_agent.run(question, message_history=[])) + if output_format == cs.QueryFormat.JSON: + payload = QueryJsonOutput(query=question, response=str(response.output)) + print(json.dumps(payload, ensure_ascii=False)) # noqa: T201 + else: + print(response.output) # noqa: T201 + + +async def main_async( + repo_path: str, + batch_size: int, + active_projects: list[str] | None = None, + show_config_table: bool = True, + pre_chat_sync: Callable[[], None] | None = None, + pre_chat_sync_message: str = cs.MSG_SYNCING_KNOWLEDGE_GRAPH, +) -> None: project_root = _setup_common_initialization(repo_path) - table = _create_configuration_table(repo_path) - app_context.console.print(table) + if show_config_table: + table = _create_configuration_table(repo_path) + app_context.console.print(table) - with connect_memgraph(batch_size) as ingestor: + async with connect_memgraph(batch_size) as ingestor: app_context.console.print(style(cs.MSG_CONNECTED_MEMGRAPH, cs.Color.GREEN)) app_context.console.print( Panel( @@ -1036,10 +1619,26 @@ async def main_async(repo_path: str, batch_size: int) -> None: ) ) - rag_agent, tool_names = _initialize_services_and_agent(repo_path, ingestor) + rag_agent, tool_names, system_prompt = _initialize_services_and_agent( + repo_path, ingestor, active_projects=active_projects + ) + _prime_context_token_counter(system_prompt) + + if pre_chat_sync is not None: + await _run_pre_chat_sync(pre_chat_sync, pre_chat_sync_message) + await run_chat_loop(rag_agent, [], project_root, tool_names) +async def _run_pre_chat_sync(task: Callable[[], None], message: str) -> None: + logger.disable("codebase_rag") + try: + with _thinking_with_status_bar(message): + await asyncio.to_thread(task) + finally: + logger.enable("codebase_rag") + + async def main_optimize_async( language: str, target_repo_path: str, @@ -1063,12 +1662,13 @@ async def main_optimize_async( effective_batch_size = settings.resolve_batch_size(batch_size) - with connect_memgraph(effective_batch_size) as ingestor: + async with connect_memgraph(effective_batch_size) as ingestor: app_context.console.print(style(cs.MSG_CONNECTED_MEMGRAPH, cs.Color.GREEN)) - rag_agent, tool_names = _initialize_services_and_agent( + rag_agent, tool_names, system_prompt = _initialize_services_and_agent( target_repo_path, ingestor ) + _prime_context_token_counter(system_prompt) await run_optimization_loop( rag_agent, [], project_root, language, tool_names, reference_document ) diff --git a/codebase_rag/mcp/__init__.py b/codebase_rag/mcp/__init__.py index 77c80d78a..f3a26b0b7 100644 --- a/codebase_rag/mcp/__init__.py +++ b/codebase_rag/mcp/__init__.py @@ -1 +1,2 @@ -from codebase_rag.mcp.server import main as main +from codebase_rag.mcp.server import serve_http as serve_http +from codebase_rag.mcp.server import serve_stdio as serve_stdio diff --git a/codebase_rag/mcp/client.py b/codebase_rag/mcp/client.py new file mode 100644 index 000000000..b6abb205d --- /dev/null +++ b/codebase_rag/mcp/client.py @@ -0,0 +1,65 @@ +import asyncio +import io +import json +import os +import sys + +import typer +from mcp import ClientSession +from mcp.client.stdio import StdioServerParameters, stdio_client + +from codebase_rag import constants as cs + +app = typer.Typer() + + +async def _query_with_errlog(question: str, errlog: io.TextIOWrapper) -> dict[str, str]: + server_params = StdioServerParameters( + command=sys.executable, + args=["-m", "codebase_rag.cli", "mcp-server"], + ) + + async with stdio_client(server=server_params, errlog=errlog) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + result = await session.call_tool( + cs.MCPToolName.ASK_AGENT, + {cs.MCPParamName.QUESTION: question}, + ) + + if result.content: + response_text = result.content[0].text + try: + parsed = json.loads(response_text) + if isinstance(parsed, dict): + return parsed + return {"output": str(parsed)} + except json.JSONDecodeError: + return {"output": response_text} + return {"output": "No response from server"} + + +def query_mcp_server(question: str) -> dict[str, str]: + with open(os.devnull, "w") as devnull: # noqa: SIM115 + return asyncio.run(_query_with_errlog(question, devnull)) + + +@app.command() +def main( + question: str = typer.Option( + ..., "--ask-agent", "-a", help="Question to ask about the codebase" + ), +) -> None: + try: + result = query_mcp_server(question) + if isinstance(result, dict) and "output" in result: + print(result["output"]) # noqa: T201 + else: + print(json.dumps(result)) # noqa: T201 + except Exception as e: + print(f"Error: {e}", file=sys.stderr) # noqa: T201 + sys.exit(1) + + +if __name__ == "__main__": + app() diff --git a/codebase_rag/mcp/server.py b/codebase_rag/mcp/server.py index 9218a2d93..6f59e4b67 100644 --- a/codebase_rag/mcp/server.py +++ b/codebase_rag/mcp/server.py @@ -1,6 +1,8 @@ +import contextlib import json import os import sys +from collections.abc import Iterator from pathlib import Path from loguru import logger @@ -16,6 +18,7 @@ from codebase_rag.services.graph_service import MemgraphIngestor from codebase_rag.services.llm import CypherGenerator from codebase_rag.types_defs import MCPToolArguments +from codebase_rag.vector_store import close_qdrant_client def setup_logging() -> None: @@ -71,6 +74,8 @@ def create_server() -> tuple[Server, MemgraphIngestor]: host=settings.MEMGRAPH_HOST, port=settings.MEMGRAPH_PORT, batch_size=settings.MEMGRAPH_BATCH_SIZE, + username=settings.MEMGRAPH_USERNAME, + password=settings.MEMGRAPH_PASSWORD, ) cypher_generator = CypherGenerator() @@ -135,18 +140,33 @@ async def call_tool(name: str, arguments: MCPToolArguments) -> list[TextContent] return server, ingestor -async def main() -> None: +@contextlib.contextmanager +def _service_lifecycle(ingestor: MemgraphIngestor) -> Iterator[None]: + """Manage shared service lifetimes for the MCP server. + + Opens the Memgraph ingestor connection and guarantees the embedded Qdrant + client lock is released on shutdown, so a CLI indexing run can reuse the + storage folder once the server stops. + """ + try: + with ingestor: + logger.info( + lg.MCP_SERVER_CONNECTED.format( + host=settings.MEMGRAPH_HOST, port=settings.MEMGRAPH_PORT + ) + ) + yield + finally: + close_qdrant_client() + + +async def serve_stdio() -> None: logger.info(lg.MCP_SERVER_STARTING) server, ingestor = create_server() logger.info(lg.MCP_SERVER_CREATED) - with ingestor: - logger.info( - lg.MCP_SERVER_CONNECTED.format( - host=settings.MEMGRAPH_HOST, port=settings.MEMGRAPH_PORT - ) - ) + with _service_lifecycle(ingestor): try: async with stdio_server() as (read_stream, write_stream): await server.run( @@ -159,7 +179,45 @@ async def main() -> None: logger.info(lg.MCP_SERVER_SHUTDOWN) +async def serve_http( + host: str = settings.MCP_HTTP_HOST, + port: int = settings.MCP_HTTP_PORT, +) -> None: + import uvicorn + from mcp.server.streamable_http_manager import StreamableHTTPSessionManager + from starlette.applications import Starlette + from starlette.routing import Mount + + logger.info(lg.MCP_HTTP_SERVER_STARTING.format(host=host, port=port)) + + server, ingestor = create_server() + + session_manager = StreamableHTTPSessionManager( + app=server, + json_response=False, + stateless=False, + ) + + @contextlib.asynccontextmanager + async def lifespan(app: Starlette): + with _service_lifecycle(ingestor): + async with session_manager.run(): + logger.info(lg.MCP_HTTP_SERVER_READY.format(host=host, port=port)) + yield + + starlette_app = Starlette( + routes=[ + Mount(settings.MCP_HTTP_ENDPOINT_PATH, app=session_manager.handle_request), + ], + lifespan=lifespan, + ) + + config = uvicorn.Config(starlette_app, host=host, port=port, log_level="info") + uvicorn_server = uvicorn.Server(config) + await uvicorn_server.serve() + + if __name__ == "__main__": import asyncio - asyncio.run(main()) + asyncio.run(serve_stdio()) diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py index 5d1d2f7f5..99daf6223 100644 --- a/codebase_rag/mcp/tools.py +++ b/codebase_rag/mcp/tools.py @@ -1,7 +1,11 @@ +import asyncio import itertools +import sys from pathlib import Path from loguru import logger +from pydantic_ai import Agent +from rich.console import Console from codebase_rag import constants as cs from codebase_rag import logs as lg @@ -10,9 +14,12 @@ from codebase_rag.models import ToolMetadata from codebase_rag.parser_loader import load_parsers from codebase_rag.services.graph_service import MemgraphIngestor -from codebase_rag.services.llm import CypherGenerator +from codebase_rag.services.llm import CypherGenerator, create_rag_orchestrator from codebase_rag.tools import tool_descriptions as td -from codebase_rag.tools.code_retrieval import CodeRetriever, create_code_retrieval_tool +from codebase_rag.tools.code_retrieval import ( + CodeRetriever, + create_code_retrieval_tool, +) from codebase_rag.tools.codebase_query import create_query_tool from codebase_rag.tools.directory_lister import ( DirectoryLister, @@ -21,6 +28,7 @@ from codebase_rag.tools.file_editor import FileEditor, create_file_editor_tool from codebase_rag.tools.file_reader import FileReader, create_file_reader_tool from codebase_rag.tools.file_writer import FileWriter, create_file_writer_tool +from codebase_rag.tools.shell_command import ShellCommander, create_shell_command_tool from codebase_rag.types_defs import ( CodeSnippetResultDict, DeleteProjectErrorResult, @@ -35,6 +43,8 @@ MCPToolSchema, QueryResultDict, ) +from codebase_rag.utils.dependencies import has_semantic_dependencies +from codebase_rag.vector_store import delete_project_embeddings class MCPToolsRegistry: @@ -47,6 +57,7 @@ def __init__( self.project_root = project_root self.ingestor = ingestor self.cypher_gen = cypher_gen + self._ingestor_lock = asyncio.Lock() self.parsers, self.queries = load_parsers() @@ -55,9 +66,11 @@ def __init__( self.file_reader = FileReader(project_root=project_root) self.file_writer = FileWriter(project_root=project_root) self.directory_lister = DirectoryLister(project_root=project_root) + self.shell_commander = ShellCommander(project_root=project_root) + stderr_console = Console(file=sys.stderr, width=None, force_terminal=True) self._query_tool = create_query_tool( - ingestor=ingestor, cypher_gen=cypher_gen, console=None + ingestor=ingestor, cypher_gen=cypher_gen, console=stderr_console ) self._code_tool = create_code_retrieval_tool(code_retriever=self.code_retriever) self._file_editor_tool = create_file_editor_tool(file_editor=self.file_editor) @@ -66,6 +79,24 @@ def __init__( self._directory_lister_tool = create_directory_lister_tool( directory_lister=self.directory_lister ) + self._shell_command_tool = create_shell_command_tool( + shell_commander=self.shell_commander + ) + + self._rag_agent: Agent | None = None + + self._semantic_search_tool = None + self._semantic_search_available = False + + if has_semantic_dependencies(): + from codebase_rag.tools.semantic_search import ( + create_semantic_search_tool, + ) + + self._semantic_search_tool = create_semantic_search_tool(self.ingestor) + self._semantic_search_available = True + else: + logger.info(lg.MCP_SEMANTIC_NOT_AVAILABLE) self._tools: dict[str, ToolMetadata] = { cs.MCPToolName.LIST_PROJECTS: ToolMetadata( @@ -122,6 +153,17 @@ def __init__( handler=self.index_repository, returns_json=False, ), + cs.MCPToolName.UPDATE_REPOSITORY: ToolMetadata( + name=cs.MCPToolName.UPDATE_REPOSITORY, + description=td.MCP_TOOLS[cs.MCPToolName.UPDATE_REPOSITORY], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={}, + required=[], + ), + handler=self.update_repository, + returns_json=False, + ), cs.MCPToolName.QUERY_CODE_GRAPH: ToolMetadata( name=cs.MCPToolName.QUERY_CODE_GRAPH, description=td.MCP_TOOLS[cs.MCPToolName.QUERY_CODE_GRAPH], @@ -247,33 +289,122 @@ def __init__( returns_json=False, ), } + if self._semantic_search_available: + self._tools[cs.MCPToolName.SEMANTIC_SEARCH] = ToolMetadata( + name=cs.MCPToolName.SEMANTIC_SEARCH, + description=td.MCP_TOOLS[cs.MCPToolName.SEMANTIC_SEARCH], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.NATURAL_LANGUAGE_QUERY: MCPInputSchemaProperty( + type=cs.MCPSchemaType.STRING, + description=td.MCP_PARAM_NATURAL_LANGUAGE_QUERY, + ), + cs.MCPParamName.TOP_K: MCPInputSchemaProperty( + type=cs.MCPSchemaType.INTEGER, + description=td.MCP_PARAM_TOP_K, + default=5, + ), + }, + required=[cs.MCPParamName.NATURAL_LANGUAGE_QUERY], + ), + handler=self.semantic_search, + returns_json=False, + ) + + self._tools[cs.MCPToolName.ASK_AGENT] = ToolMetadata( + name=cs.MCPToolName.ASK_AGENT, + description=td.MCP_TOOLS[cs.MCPToolName.ASK_AGENT], + input_schema=MCPInputSchema( + type=cs.MCPSchemaType.OBJECT, + properties={ + cs.MCPParamName.QUESTION: MCPInputSchemaProperty( + type=cs.MCPSchemaType.STRING, + description=td.MCP_PARAM_QUESTION, + ) + }, + required=[cs.MCPParamName.QUESTION], + ), + handler=self.ask_agent, + returns_json=True, + ) + + @property + def rag_agent(self) -> Agent: + if self._rag_agent is None: + from codebase_rag.tools.semantic_search import ( + create_get_function_source_tool, + ) + + tools = [ + self._query_tool, + self._code_tool, + self._file_reader_tool, + self._file_writer_tool, + self._file_editor_tool, + self._shell_command_tool, + self._directory_lister_tool, + create_get_function_source_tool(self.ingestor), + ] + if self._semantic_search_tool is not None: + tools.append(self._semantic_search_tool) + self._rag_agent, _ = create_rag_orchestrator( + tools=tools, project_root=Path(self.project_root) + ) + return self._rag_agent + + # (H) Setter allows tests to inject a mock agent without triggering LLM init + @rag_agent.setter + def rag_agent(self, value: Agent) -> None: + self._rag_agent = value async def list_projects(self) -> ListProjectsResult: logger.info(lg.MCP_LISTING_PROJECTS) try: - projects = self.ingestor.list_projects() + projects = await asyncio.to_thread(self.ingestor.list_projects) return ListProjectsSuccessResult(projects=projects, count=len(projects)) except Exception as e: logger.error(lg.MCP_ERROR_LIST_PROJECTS.format(error=e)) return ListProjectsErrorResult(error=str(e), projects=[], count=0) + def _get_project_node_ids(self, project_name: str) -> list[int]: + rows = self.ingestor.fetch_all( + cs.CYPHER_QUERY_PROJECT_NODE_IDS, + {cs.KEY_PROJECT_NAME: project_name}, + ) + result: list[int] = [] + for row in rows: + node_id = row.get(cs.KEY_NODE_ID) + if isinstance(node_id, int): + result.append(node_id) + return result + + def _cleanup_project_embeddings(self, project_name: str) -> None: + node_ids = self._get_project_node_ids(project_name) + delete_project_embeddings(project_name, node_ids) + + def _delete_project_sync(self, project_name: str) -> DeleteProjectResult: + projects = self.ingestor.list_projects() + if project_name not in projects: + return DeleteProjectErrorResult( + success=False, + error=te.MCP_PROJECT_NOT_FOUND.format( + project_name=project_name, projects=projects + ), + ) + self._cleanup_project_embeddings(project_name) + self.ingestor.delete_project(project_name) + return DeleteProjectSuccessResult( + success=True, + project=project_name, + message=cs.MCP_PROJECT_DELETED.format(project_name=project_name), + ) + async def delete_project(self, project_name: str) -> DeleteProjectResult: logger.info(lg.MCP_DELETING_PROJECT.format(project_name=project_name)) try: - projects = self.ingestor.list_projects() - if project_name not in projects: - return DeleteProjectErrorResult( - success=False, - error=te.MCP_PROJECT_NOT_FOUND.format( - project_name=project_name, projects=projects - ), - ) - self.ingestor.delete_project(project_name) - return DeleteProjectSuccessResult( - success=True, - project=project_name, - message=cs.MCP_PROJECT_DELETED.format(project_name=project_name), - ) + async with self._ingestor_lock: + return await asyncio.to_thread(self._delete_project_sync, project_name) except Exception as e: logger.error(lg.MCP_ERROR_DELETE_PROJECT.format(error=e)) return DeleteProjectErrorResult(success=False, error=str(e)) @@ -283,34 +414,88 @@ async def wipe_database(self, confirm: bool) -> str: return cs.MCP_WIPE_CANCELLED logger.warning(lg.MCP_WIPING_DATABASE) try: - self.ingestor.clean_database() + async with self._ingestor_lock: + await asyncio.to_thread(self.ingestor.clean_database) return cs.MCP_WIPE_SUCCESS except Exception as e: logger.error(lg.MCP_ERROR_WIPE.format(error=e)) return cs.MCP_WIPE_ERROR.format(error=e) + def _index_repository_sync(self) -> str: + project_name = Path(self.project_root).resolve().name + logger.info(lg.MCP_CLEARING_PROJECT.format(project_name=project_name)) + self._cleanup_project_embeddings(project_name) + self.ingestor.delete_project(project_name) + + self.ingestor.ensure_constraints() + self.ingestor.flush_all() + + updater = GraphUpdater( + ingestor=self.ingestor, + repo_path=Path(self.project_root), + parsers=self.parsers, + queries=self.queries, + project_name=project_name, + ) + updater.run() + self.ingestor.flush_all() + + return cs.MCP_INDEX_SUCCESS_PROJECT.format( + path=self.project_root, project_name=project_name + ) + async def index_repository(self) -> str: logger.info(lg.MCP_INDEXING_REPO.format(path=self.project_root)) - project_name = Path(self.project_root).resolve().name try: - logger.info(lg.MCP_CLEARING_PROJECT.format(project_name=project_name)) - self.ingestor.delete_project(project_name) - - updater = GraphUpdater( - ingestor=self.ingestor, - repo_path=Path(self.project_root), - parsers=self.parsers, - queries=self.queries, - ) - updater.run() - - return cs.MCP_INDEX_SUCCESS_PROJECT.format( - path=self.project_root, project_name=project_name - ) + async with self._ingestor_lock: + return await asyncio.to_thread(self._index_repository_sync) except Exception as e: logger.error(lg.MCP_ERROR_INDEXING.format(error=e)) return cs.MCP_INDEX_ERROR.format(error=e) + def _update_repository_sync(self) -> str: + project_name = Path(self.project_root).resolve().name + + self.ingestor.ensure_constraints() + self.ingestor.flush_all() + + updater = GraphUpdater( + ingestor=self.ingestor, + repo_path=Path(self.project_root), + parsers=self.parsers, + queries=self.queries, + project_name=project_name, + ) + updater.run() + self.ingestor.flush_all() + return cs.MCP_UPDATE_SUCCESS.format(path=self.project_root) + + async def update_repository(self) -> str: + logger.info(lg.MCP_UPDATING_REPO.format(path=self.project_root)) + try: + async with self._ingestor_lock: + return await asyncio.to_thread(self._update_repository_sync) + except Exception as e: + logger.error(lg.MCP_ERROR_UPDATING.format(error=e)) + return cs.MCP_UPDATE_ERROR.format(error=e) + + async def semantic_search(self, natural_language_query: str, top_k: int = 5) -> str: + assert self._semantic_search_tool is not None + logger.info(lg.MCP_SEMANTIC_SEARCH.format(query=natural_language_query)) + result = await self._semantic_search_tool.function( + query=natural_language_query, top_k=top_k + ) + return str(result) + + async def ask_agent(self, question: str) -> dict[str, str]: + logger.info(lg.MCP_ASK_AGENT.format(question=question)) + try: + response = await self.rag_agent.run(question, message_history=[]) + return {"output": str(response.output)} + except Exception as e: + logger.error(lg.MCP_ASK_AGENT_ERROR.format(error=e)) + return {"error": cs.MCP_ASK_AGENT_ERROR.format(error=e)} + async def query_code_graph(self, natural_language_query: str) -> QueryResultDict: logger.info(lg.MCP_QUERY_CODE_GRAPH.format(query=natural_language_query)) try: diff --git a/codebase_rag/models.py b/codebase_rag/models.py index e189dbde0..763371a16 100644 --- a/codebase_rag/models.py +++ b/codebase_rag/models.py @@ -5,7 +5,7 @@ from rich.console import Console -from .constants import SupportedLanguage +from .constants import PermissionMode, SupportedLanguage from .types_defs import MCPHandlerType, MCPInputSchema, PropertyValue if TYPE_CHECKING: @@ -15,12 +15,27 @@ @dataclass class SessionState: confirm_edits: bool = True + load_cgr_instructions: bool = True log_file: Path | None = None cancelled: bool = False + permission_mode: PermissionMode = PermissionMode.NORMAL + context_tokens: int = 0 + target_repo: Path | None = None def reset_cancelled(self) -> None: self.cancelled = False + def is_yolo(self) -> bool: + return self.permission_mode == PermissionMode.YOLO + + def cycle_permission_mode(self) -> PermissionMode: + self.permission_mode = ( + PermissionMode.YOLO + if self.permission_mode == PermissionMode.NORMAL + else PermissionMode.NORMAL + ) + return self.permission_mode + def _default_console() -> Console: return Console(width=None, force_terminal=True) diff --git a/codebase_rag/parser_loader.py b/codebase_rag/parser_loader.py index 69ddabda3..6e79353ce 100644 --- a/codebase_rag/parser_loader.py +++ b/codebase_rag/parser_loader.py @@ -33,7 +33,7 @@ def _try_load_from_submodule(lang_name: cs.SupportedLanguage) -> LanguageLoader: setup_py_path = submodule_path / cs.SETUP_PY if setup_py_path.exists(): - logger.debug(ls.BUILDING_BINDINGS.format(lang=lang_name)) + logger.debug(ls.BUILDING_BINDINGS, lang=lang_name) result = subprocess.run( [sys.executable, cs.SETUP_PY, cs.BUILD_EXT_CMD, cs.INPLACE_FLAG], check=False, @@ -44,14 +44,15 @@ def _try_load_from_submodule(lang_name: cs.SupportedLanguage) -> LanguageLoader: if result.returncode != 0: logger.debug( - ls.BUILD_FAILED.format( - lang=lang_name, stdout=result.stdout, stderr=result.stderr - ) + ls.BUILD_FAILED, + lang=lang_name, + stdout=result.stdout, + stderr=result.stderr, ) return None - logger.debug(ls.BUILD_SUCCESS.format(lang=lang_name)) + logger.debug(ls.BUILD_SUCCESS, lang=lang_name) - logger.debug(ls.IMPORTING_MODULE.format(module=module_name)) + logger.debug(ls.IMPORTING_MODULE, module=module_name) module = importlib.import_module(module_name) language_attrs: list[str] = [ @@ -63,21 +64,19 @@ def _try_load_from_submodule(lang_name: cs.SupportedLanguage) -> LanguageLoader: for attr_name in language_attrs: if hasattr(module, attr_name): logger.debug( - ls.LOADED_FROM_SUBMODULE.format(lang=lang_name, attr=attr_name) + ls.LOADED_FROM_SUBMODULE, lang=lang_name, attr=attr_name ) loader: LanguageLoader = getattr(module, attr_name) return loader - logger.debug( - ls.NO_LANG_ATTR.format(module=module_name, available=dir(module)) - ) + logger.debug(ls.NO_LANG_ATTR, module=module_name, available=dir(module)) finally: if python_bindings_str in sys.path: sys.path.remove(python_bindings_str) except Exception as e: - logger.debug(ls.SUBMODULE_LOAD_FAILED.format(lang=lang_name, error=e)) + logger.debug(ls.SUBMODULE_LOAD_FAILED, lang=lang_name, error=e) return None @@ -137,6 +136,12 @@ def _import_language_loaders() -> dict[cs.SupportedLanguage, LanguageLoader]: cs.QUERY_LANGUAGE, cs.SupportedLanguage.JAVA, ), + LanguageImport( + cs.SupportedLanguage.C, + cs.TreeSitterModule.C, + cs.QUERY_LANGUAGE, + cs.SupportedLanguage.C, + ), LanguageImport( cs.SupportedLanguage.CPP, cs.TreeSitterModule.CPP, @@ -149,6 +154,12 @@ def _import_language_loaders() -> dict[cs.SupportedLanguage, LanguageLoader]: cs.QUERY_LANGUAGE, cs.SupportedLanguage.LUA, ), + LanguageImport( + cs.SupportedLanguage.PHP, + cs.TreeSitterModule.PHP, + cs.LANG_ATTR_PHP, + cs.SupportedLanguage.PHP, + ), ] loaders: dict[cs.SupportedLanguage, LanguageLoader] = { @@ -215,10 +226,14 @@ def _create_locals_query( try: return Query(language, locals_pattern) except Exception as e: - logger.debug(ls.LOCALS_QUERY_FAILED.format(lang=lang_name, error=e)) + logger.debug(ls.LOCALS_QUERY_FAILED, lang=lang_name, error=e) return None +COMBINED_FUNC_CLASS_QUERIES: dict[cs.SupportedLanguage, Query | None] = {} +COMBINED_FUNC_CLASS_IMPORT_QUERIES: dict[cs.SupportedLanguage, Query | None] = {} + + def _create_language_queries( language: Language, parser: Parser, @@ -236,6 +251,22 @@ def _create_language_queries( ) combined_import_patterns = _build_combined_import_pattern(lang_config) + combined_fc_pattern = f"{function_patterns} {class_patterns}".strip() + try: + COMBINED_FUNC_CLASS_QUERIES[lang_name] = ( + Query(language, combined_fc_pattern) if combined_fc_pattern else None + ) + except Exception: + COMBINED_FUNC_CLASS_QUERIES[lang_name] = None + + combined_fci_pattern = f"{function_patterns} {class_patterns} {combined_import_patterns} {call_patterns}".strip() + try: + COMBINED_FUNC_CLASS_IMPORT_QUERIES[lang_name] = ( + Query(language, combined_fci_pattern) if combined_fci_pattern else None + ) + except Exception: + COMBINED_FUNC_CLASS_IMPORT_QUERIES[lang_name] = None + return LanguageQueries( functions=_create_optional_query(language, function_patterns), classes=_create_optional_query(language, class_patterns), @@ -256,7 +287,7 @@ def _process_language( ) -> bool: lang_lib = LANGUAGE_LIBRARIES.get(lang_name) if not lang_lib: - logger.debug(ls.LIB_NOT_AVAILABLE.format(lang=lang_name)) + logger.debug(ls.LIB_NOT_AVAILABLE, lang=lang_name) return False try: diff --git a/codebase_rag/parsers/call_processor.py b/codebase_rag/parsers/call_processor.py index 0e53cbe73..5a30df19b 100644 --- a/codebase_rag/parsers/call_processor.py +++ b/codebase_rag/parsers/call_processor.py @@ -1,6 +1,9 @@ from __future__ import annotations +from bisect import bisect_left, bisect_right +from collections import defaultdict from pathlib import Path +from typing import NamedTuple from loguru import logger from tree_sitter import Node, QueryCursor @@ -8,16 +11,68 @@ from .. import constants as cs from .. import logs as ls from ..language_spec import LanguageSpec +from ..parser_loader import COMBINED_FUNC_CLASS_QUERIES from ..services import IngestorProtocol from ..types_defs import FunctionRegistryTrieProtocol, LanguageQueries +from ..utils.path_utils import cached_relative_path from .call_resolver import CallResolver from .cpp import utils as cpp_utils +from .go import utils as go_utils from .import_processor import ImportProcessor +from .java import utils as java_utils +from .lua import utils as lua_utils +from .rs import utils as rs_utils from .type_inference import TypeInferenceEngine -from .utils import get_function_captures, is_method_node +from .utils import ( + get_function_captures, + is_method_node, + python_parameter_names, + safe_decode_text, + sorted_captures, +) + + +class _CallableFlowArg(NamedTuple): + # (H) One call-site argument that may carry a callable: bound either to a concrete + # (H) function (source_concrete) or to a parameter of the caller (source_caller + + # (H) source_param), keyed to the callee parameter by position or keyword. + callee_qn: str + position: int + keyword: str + source_concrete: str + source_caller: str + source_param: str + + +_TYPED_LANGUAGES = frozenset( + { + cs.SupportedLanguage.PYTHON, + cs.SupportedLanguage.JS, + cs.SupportedLanguage.TS, + cs.SupportedLanguage.JAVA, + cs.SupportedLanguage.LUA, + cs.SupportedLanguage.GO, + cs.SupportedLanguage.CPP, + } +) + +# (H) C and C++ share the function_definition/declarator shape, so the callee +# (H) name lives in a nested declarator (no `name` field). Both need the libclang +# (H) declarator-aware extractor rather than a plain child_by_field_name("name"). +_C_FAMILY_LANGUAGES = frozenset({cs.SupportedLanguage.C, cs.SupportedLanguage.CPP}) +_JS_TS_LANGUAGES = frozenset({cs.SupportedLanguage.JS, cs.SupportedLanguage.TS}) class CallProcessor: + __slots__ = ( + "ingestor", + "repo_path", + "project_name", + "_resolver", + "_flow_param_names", + "_flow_args", + ) + def __init__( self, ingestor: IngestorProtocol, @@ -38,6 +93,10 @@ def __init__( type_inference=type_inference, class_inheritance=class_inheritance, ) + # (H) Inter-procedural callable-parameter flow: ordered params per function and + # (H) the per-call-site argument bindings, resolved to a fixpoint in finalize. + self._flow_param_names: dict[str, list[str]] = {} + self._flow_args: list[_CallableFlowArg] = [] def _get_node_name(self, node: Node, field: str = cs.FIELD_NAME) -> str | None: name_node = node.child_by_field_name(field) @@ -46,31 +105,299 @@ def _get_node_name(self, node: Node, field: str = cs.FIELD_NAME) -> str | None: text = name_node.text return None if text is None else text.decode(cs.ENCODING_UTF8) + def _collect_all_call_nodes( + self, + root_node: Node, + language: cs.SupportedLanguage, + queries: dict[cs.SupportedLanguage, LanguageQueries], + ) -> tuple[list[Node], list[int]]: + calls_query = queries[language].get(cs.QUERY_CALLS) + if not calls_query: + return [], [] + cursor = QueryCursor(calls_query) + captures = sorted_captures(cursor, root_node) + call_nodes = captures.get(cs.CAPTURE_CALL, []) + call_starts = [n.start_byte for n in call_nodes] + return call_nodes, call_starts + + def _filter_calls_in_node( + self, + all_call_nodes: list[Node], + call_starts: list[int], + container: Node, + ) -> list[Node]: + start = container.start_byte + end = container.end_byte + lo = bisect_left(call_starts, start) + hi = bisect_right(call_starts, end) + return [n for n in all_call_nodes[lo:hi] if n.end_byte <= end] + + def _filter_top_level_calls( + self, + all_call_nodes: list[Node], + call_starts: list[int], + func_nodes: list[Node], + ) -> list[Node]: + # (H) Calls inside a function's BODY belong to that function, not the + # (H) module; only genuine top-level calls are module-attributed. The body + # (H) (not the whole node) is the boundary so def-time calls in the + # (H) signature -- default args like `def f(x=make_default())` and + # (H) decorators -- run at module load and stay module-attributed. A node + # (H) with no body is not a real function scope (e.g. a file-scope + # (H) declaration `int x = top();` that the grammar captures as a + # (H) function); its calls run at load time, so it excludes nothing. + nested_starts: set[int] = set() + for func_node in func_nodes: + body = func_node.child_by_field_name(cs.FIELD_BODY) + if body is None: + continue + for call in self._filter_calls_in_node(all_call_nodes, call_starts, body): + nested_starts.add(call.start_byte) + return [c for c in all_call_nodes if c.start_byte not in nested_starts] + + def _bare_decorator_name(self, decorator_node: Node) -> str | None: + # (H) A bare decorator `@task` / `@pkg.deco` (no call parens) is not a + # (H) `call` node, so the normal call pass misses it even though applying + # (H) it runs `task(func)` at module load. A call decorator `@deco(...)` + # (H) IS a call node and is already captured, so skip it here. + named = decorator_node.named_children + if not named: + return None + expr = named[0] + if expr.type in (cs.TS_IDENTIFIER, cs.TS_ATTRIBUTE) and expr.text is not None: + return expr.text.decode(cs.ENCODING_UTF8) + return None + + def _runs_at_module_load(self, node: Node) -> bool: + # (H) A definition runs at module load only when it is at module or + # (H) class-body scope; nested inside a function body it runs at that + # (H) function's call time, so its decorator is not a module-load call. + ancestor = node.parent + while ancestor is not None: + if ancestor.type == cs.TS_PY_FUNCTION_DEFINITION: + return False + ancestor = ancestor.parent + return True + + def _ingest_decorator_calls( + self, + nodes: list[Node], + module_qn: str, + root_node: Node, + lang_config: LanguageSpec, + ) -> None: + # (H) Emit `(Module)->decorator` CALLS for bare decorators on functions, + # (H) methods, AND classes: the decoration executes at module-load time, + # (H) so the module is the caller. Only first-party callables get an edge. + resolver = self._resolver + ensure_rel = self.ingestor.ensure_relationship_batch + qn_key = cs.KEY_QUALIFIED_NAME + module_spec = (cs.NodeLabel.MODULE, qn_key, module_qn) + callable_labels = (cs.NodeLabel.FUNCTION, cs.NodeLabel.METHOD) + alias_map: dict[str, str] | None = None + for node in nodes: + parent = node.parent + if parent is None or parent.type != cs.TS_PY_DECORATED_DEFINITION: + continue + if not self._runs_at_module_load(parent): + continue + for child in parent.children: + if child.type != cs.TS_PY_DECORATOR: + continue + name = self._bare_decorator_name(child) + if not name: + continue + callee = resolver.resolve_function_call(name, module_qn) + if not callee and cs.SEPARATOR_DOT not in name: + # (H) `@alias` where `alias = task` still calls task at load; + # (H) reuse the local-alias fallback the call pass uses. + if alias_map is None: + alias_map = self._build_local_alias_map( + root_node, lang_config, module_qn + ) + if (rhs := alias_map.get(name)) is not None: + callee = resolver.resolve_function_call(rhs, module_qn) + if callee and callee[0] in callable_labels: + ensure_rel( + module_spec, + cs.RelationshipType.CALLS, + (callee[0], qn_key, callee[1]), + ) + + def _module_qn(self, relative_path: Path, file_name: str) -> str: + if file_name in (cs.INIT_PY, cs.MOD_RS): + return cs.SEPARATOR_DOT.join( + [self.project_name] + list(relative_path.parent.parts) + ) + return cs.SEPARATOR_DOT.join( + [self.project_name] + list(relative_path.with_suffix("").parts) + ) + + def collect_callable_field_bindings( + self, + file_path: Path, + root_node: Node, + language: cs.SupportedLanguage, + queries: dict[cs.SupportedLanguage, LanguageQueries], + func_class_captures_cache: dict[Path, dict] | None = None, + ) -> None: + # (H) Pre-pass: record which functions are bound to a class's callable + # (H) fields (FQNSpec(get_name=_python_get_name, ...)). Runs before call + # (H) resolution so a field invocation can resolve regardless of which + # (H) file the construction site lives in. Keyword bindings only; + # (H) positional callable args would need declared field order. + if language != cs.SupportedLanguage.PYTHON: + return + try: + module_qn = self._module_qn( + cached_relative_path(file_path, self.repo_path), file_path.name + ) + if ( + func_class_captures_cache is not None + and file_path in func_class_captures_cache + ): + call_nodes = func_class_captures_cache[file_path].get(cs.CAPTURE_CALL) + else: + call_nodes = None + if call_nodes is None: + call_nodes, _ = self._collect_all_call_nodes( + root_node, language, queries + ) + resolver = self._resolver + registry = resolver.function_registry + callable_labels = (cs.NodeLabel.FUNCTION, cs.NodeLabel.METHOD) + for call_node in call_nodes: + _positional, keyword = self._parse_call_arguments(call_node) + if not keyword: + continue + name = self._get_call_target_name(call_node) + if not name: + continue + callee = resolver.resolve_function_call(name, module_qn) + if not callee or callee[0] != cs.NodeLabel.CLASS: + continue + for field, value_node in keyword.items(): + if not (value_text := safe_decode_text(value_node)): + continue + bound = resolver.resolve_function_call(value_text, module_qn) + if bound and bound[0] in callable_labels and bound[1] in registry: + resolver.record_callable_field_binding( + callee[1], field, bound[1] + ) + except Exception as e: + logger.error(ls.CALL_PROCESSING_FAILED, path=file_path, error=e) + def process_calls_in_file( self, file_path: Path, root_node: Node, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + func_class_captures_cache: dict[Path, dict] | None = None, ) -> None: - relative_path = file_path.relative_to(self.repo_path) - logger.debug(ls.CALL_PROCESSING_FILE.format(path=relative_path)) + relative_path = cached_relative_path(file_path, self.repo_path) + logger.debug(ls.CALL_PROCESSING_FILE, path=relative_path) try: - module_qn = cs.SEPARATOR_DOT.join( - [self.project_name] + list(relative_path.with_suffix("").parts) - ) - if file_path.name in (cs.INIT_PY, cs.MOD_RS): - module_qn = cs.SEPARATOR_DOT.join( - [self.project_name] + list(relative_path.parent.parts) + module_qn = self._module_qn(relative_path, file_path.name) + + call_name_cache: dict[int, str | None] = {} + + if ( + func_class_captures_cache is not None + and file_path in func_class_captures_cache + ): + combined_captures = func_class_captures_cache[file_path] + else: + combined_query = COMBINED_FUNC_CLASS_QUERIES.get(language) + if combined_query: + cursor = QueryCursor(combined_query) + combined_captures = sorted_captures(cursor, root_node) + else: + combined_captures = {} + + cached_calls = combined_captures.get(cs.CAPTURE_CALL) + if cached_calls is not None: + all_call_nodes = cached_calls + call_starts: list[int] | None = None + else: + all_call_nodes, call_starts = self._collect_all_call_nodes( + root_node, language, queries ) - self._process_calls_in_functions(root_node, module_qn, language, queries) - self._process_calls_in_classes(root_node, module_qn, language, queries) - self._process_module_level_calls(root_node, module_qn, language, queries) + sorted_func_nodes = combined_captures.get(cs.CAPTURE_FUNCTION) + if sorted_func_nodes or combined_captures.get(cs.CAPTURE_CLASS): + if cached_calls is not None: + call_starts = [n.start_byte for n in all_call_nodes] + func_node_starts = ( + [n.start_byte for n in sorted_func_nodes] + if sorted_func_nodes + else None + ) + else: + call_starts = None + func_node_starts = None + + self._process_calls_in_functions( + root_node, + module_qn, + language, + queries, + all_call_nodes, + call_starts, + call_name_cache=call_name_cache, + combined_captures=combined_captures or None, + ) + # (H) Bare decorators (`@task`) are not call nodes; emit their + # (H) module-load CALLS before the empty-`all_call_nodes` early return, + # (H) since a file may have decorators but no other calls. Classes can + # (H) be decorated too, so include captured class nodes. + if language == cs.SupportedLanguage.PYTHON: + decorator_targets = list(sorted_func_nodes or []) + if combined_captures and ( + class_nodes := combined_captures.get(cs.CAPTURE_CLASS) + ): + decorator_targets.extend(class_nodes) + if decorator_targets: + self._ingest_decorator_calls( + decorator_targets, + module_qn, + root_node, + queries[language][cs.QUERY_CONFIG], + ) + if not all_call_nodes: + return + self._process_calls_in_classes( + root_node, + module_qn, + language, + queries, + all_call_nodes, + call_starts, + call_name_cache=call_name_cache, + combined_captures=combined_captures, + sorted_func_nodes=sorted_func_nodes, + func_node_starts=func_node_starts, + ) + if sorted_func_nodes and call_starts is not None: + module_calls = self._filter_top_level_calls( + all_call_nodes, call_starts, sorted_func_nodes + ) + else: + module_calls = all_call_nodes + self._ingest_function_calls( + root_node, + module_qn, + cs.NodeLabel.MODULE, + module_qn, + language, + queries, + call_nodes=module_calls, + call_name_cache=call_name_cache, + ) except Exception as e: - logger.error(ls.CALL_PROCESSING_FAILED.format(path=file_path, error=e)) + logger.error(ls.CALL_PROCESSING_FAILED, path=file_path, error=e) def _process_calls_in_functions( self, @@ -78,28 +405,121 @@ def _process_calls_in_functions( module_qn: str, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + all_call_nodes: list[Node] | None = None, + call_starts: list[int] | None = None, + call_name_cache: dict[int, str | None] | None = None, + combined_captures: dict[str, list[Node]] | None = None, ) -> None: - result = get_function_captures(root_node, language, queries) - if not result: - return - - lang_config, captures = result - func_nodes = captures.get(cs.CAPTURE_FUNCTION, []) + if combined_captures is not None: + lang_config = queries[language][cs.QUERY_CONFIG] + func_nodes = combined_captures.get(cs.CAPTURE_FUNCTION, []) + has_classes = bool(combined_captures.get(cs.CAPTURE_CLASS)) + else: + result = get_function_captures(root_node, language, queries) + if not result: + return + lang_config, captures = result + func_nodes = captures.get(cs.CAPTURE_FUNCTION, []) + has_classes = bool(captures.get(cs.CAPTURE_CLASS)) for func_node in func_nodes: - if not isinstance(func_node, Node): - continue - if self._is_method(func_node, lang_config): + if has_classes and self._is_method(func_node, lang_config): continue - if language == cs.SupportedLanguage.CPP: + if language in _C_FAMILY_LANGUAGES: func_name = cpp_utils.extract_function_name(func_node) else: func_name = self._get_node_name(func_node) + if not func_name and language in _JS_TS_LANGUAGES: + func_name = self._js_ts_arrow_binding_name(func_node) + if ( + not func_name + and language == cs.SupportedLanguage.LUA + and func_node.type == cs.TS_LUA_FUNCTION_DEFINITION + ): + # (H) A function expression bound to a variable or table field + # (H) (`local f = function()`, `M.f = function()`) has no name field; + # (H) the definition pass names it after its assignment target, so + # (H) recover the same name here or the whole body would be skipped. + func_name = lua_utils.extract_assigned_name( + func_node, + accepted_var_types=(cs.TS_DOT_INDEX_EXPRESSION, cs.TS_IDENTIFIER), + ) if not func_name: continue - if func_qn := self._build_nested_qualified_name( - func_node, module_qn, func_name, lang_config + # (H) An out-of-line C++ method definition (`Ret Class::method() {...}` + # (H) at namespace/file scope) is bound by the definition pass to its + # (H) class node (qn `class_qn.method`). Attribute its body's calls to + # (H) that method node, not a phantom module-rooted free-function qn, + # (H) so the CALLS edges join to a real node. + if language == cs.SupportedLanguage.CPP and ( + bound := self._cpp_out_of_class_method_caller( + func_node, func_name, module_qn + ) + ): + caller_qn, class_qn = bound + filtered = ( + self._filter_calls_in_node(all_call_nodes, call_starts, func_node) + if all_call_nodes is not None and call_starts is not None + else None + ) + self._ingest_function_calls( + func_node, + caller_qn, + cs.NodeLabel.METHOD, + module_qn, + language, + queries, + class_qn, + call_nodes=filtered, + call_name_cache=call_name_cache, + ) + continue + # (H) A Go receiver method (`func (t T) m()`) is declared at file scope + # (H) but the definition pass binds it to its receiver type's node + # (H) (qn `module.T.m`). Attribute its body's calls to that method node, + # (H) not the receiver-dropping `module.m` that _build_nested_qualified_name + # (H) would produce, so the CALLS edges join to a real node. + if language == cs.SupportedLanguage.GO and ( + bound := self._go_receiver_method_caller( + func_node, func_name, module_qn + ) ): + caller_qn, container_qn = bound + filtered = ( + self._filter_calls_in_node(all_call_nodes, call_starts, func_node) + if all_call_nodes is not None and call_starts is not None + else None + ) + self._ingest_function_calls( + func_node, + caller_qn, + cs.NodeLabel.METHOD, + module_qn, + language, + queries, + container_qn, + call_nodes=filtered, + call_name_cache=call_name_cache, + ) + continue + # (H) A C++ free function inside a namespace is bound by the definition + # (H) pass via build_qualified_name (qn `module.ns.fn`); _build_nested... + # (H) ignores namespace_definition ancestors and would drop the namespace + # (H) (`module.fn`), dangling the CALLS source. Use the same builder so + # (H) caller and node qns agree. + func_qn = ( + cpp_utils.build_qualified_name(func_node, module_qn, func_name) + if language == cs.SupportedLanguage.CPP + else self._build_nested_qualified_name( + func_node, module_qn, func_name, lang_config + ) + ) + if func_qn: + filtered = ( + self._filter_calls_in_node(all_call_nodes, call_starts, func_node) + if all_call_nodes is not None and call_starts is not None + else None + ) self._ingest_function_calls( func_node, func_qn, @@ -107,20 +527,63 @@ def _process_calls_in_functions( module_qn, language, queries, + call_nodes=filtered, + call_name_cache=call_name_cache, ) - def _get_rust_impl_class_name(self, class_node: Node) -> str | None: - class_name = self._get_node_name(class_node, cs.FIELD_TYPE) - if class_name: - return class_name - return next( - ( - child.text.decode(cs.ENCODING_UTF8) - for child in class_node.children - if child.type == cs.TS_TYPE_IDENTIFIER and child.is_named and child.text - ), - None, + def _go_receiver_method_caller( + self, func_node: Node, method_name: str, module_qn: str + ) -> tuple[str, str] | None: + # (H) Resolve a Go receiver method to its (method_qn, container_qn), + # (H) mirroring the definition pass's receiver-type binding. The receiver + # (H) type resolves to its node qn (same-file or sibling-file in the + # (H) package), and the registry check ensures the method node exists + # (H) before overriding the default attribution. + if not go_utils.is_receiver_method(func_node): + return None + receiver_type = go_utils.extract_receiver_type_name(func_node) + if not receiver_type: + return None + container_qn = self._resolver._resolve_class_name(receiver_type, module_qn) or ( + f"{module_qn}{cs.SEPARATOR_DOT}{receiver_type}" ) + caller_qn = f"{container_qn}{cs.SEPARATOR_DOT}{method_name}" + if caller_qn in self._resolver.function_registry: + return caller_qn, container_qn + return None + + def _cpp_out_of_class_method_caller( + self, func_node: Node, method_name: str, module_qn: str + ) -> tuple[str, str] | None: + # (H) Resolve an out-of-line C++ method definition to its (method_qn, + # (H) class_qn), mirroring the definition pass's class binding. The leaf + # (H) class name resolves the class across files (header-declared classes); + # (H) `endswith(normalized)` guards against a leaf collision binding to the + # (H) wrong class, and the registry membership check ensures the method node + # (H) actually exists before overriding the default attribution. + if not cpp_utils.is_out_of_class_method_definition(func_node): + return None + class_name = cpp_utils.extract_class_name_from_out_of_class_method(func_node) + if not class_name: + return None + normalized = class_name.replace(cs.SEPARATOR_DOUBLE_COLON, cs.SEPARATOR_DOT) + leaf = normalized.rsplit(cs.SEPARATOR_DOT, 1)[-1] + class_qn = self._resolver._resolve_class_name(leaf, module_qn) + if not class_qn or not class_qn.endswith(normalized): + return None + caller_qn = f"{class_qn}{cs.SEPARATOR_DOT}{method_name}" + if caller_qn in self._resolver.function_registry: + return caller_qn, class_qn + return None + + def _get_rust_impl_class_name(self, class_node: Node) -> str | None: + # (H) Use the same bare-type extraction as the definition pass + # (H) (rs_utils.extract_impl_target), which strips generic arguments + # (H) (`Chars<'a>` -> `Chars`). _get_node_name returns the full generic + # (H) text, so a call inside a generic impl block was attributed to a + # (H) caller qn bearing the generics (crate.lib.Chars<'a>.go) that matches + # (H) no registered node, silently dropping the CALLS edge. + return rs_utils.extract_impl_target(class_node) def _get_class_name_for_node( self, class_node: Node, language: cs.SupportedLanguage @@ -136,68 +599,190 @@ def _process_methods_in_class( module_qn: str, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + all_call_nodes: list[Node] | None = None, + call_starts: list[int] | None = None, + call_name_cache: dict[int, str | None] | None = None, + sorted_func_nodes: list[Node] | None = None, + func_node_starts: list[int] | None = None, ) -> None: - method_query = queries[language][cs.QUERY_FUNCTIONS] - if not method_query: - return - method_cursor = QueryCursor(method_query) - method_captures = method_cursor.captures(body_node) - method_nodes = method_captures.get(cs.CAPTURE_FUNCTION, []) + if sorted_func_nodes is not None and func_node_starts is not None: + body_start = body_node.start_byte + body_end = body_node.end_byte + lo = bisect_left(func_node_starts, body_start) + hi = bisect_right(func_node_starts, body_end) + method_nodes = [ + n for n in sorted_func_nodes[lo:hi] if n.end_byte <= body_end + ] + else: + method_query = queries[language][cs.QUERY_FUNCTIONS] + if not method_query: + return + method_cursor = QueryCursor(method_query) + method_captures = sorted_captures(method_cursor, body_node) + method_nodes = method_captures.get(cs.CAPTURE_FUNCTION, []) + lang_config = queries[language][cs.QUERY_CONFIG] for method_node in method_nodes: - if not isinstance(method_node, Node): - continue - method_name = self._get_node_name(method_node) + if language in _C_FAMILY_LANGUAGES: + method_name = cpp_utils.extract_function_name(method_node) + else: + method_name = self._get_node_name(method_node) + if not method_name and language in _JS_TS_LANGUAGES: + method_name = self._js_ts_arrow_binding_name(method_node) if not method_name: continue - method_qn = f"{class_qn}{cs.SEPARATOR_DOT}{method_name}" + # (H) method_nodes includes functions nested inside methods. Build the + # (H) qn through the enclosing-function chain (Class.method.nested, not + # (H) the method-dropping Class.nested) and label a nested function + # (H) FUNCTION, so the CALLS edge joins the real node. + caller_qn, caller_label = self._class_member_qn_and_label( + method_node, class_qn, method_name, lang_config, language + ) + filtered = ( + self._calls_owned_by( + method_node, method_nodes, all_call_nodes, call_starts + ) + if all_call_nodes is not None and call_starts is not None + else None + ) self._ingest_function_calls( method_node, - method_qn, - cs.NodeLabel.METHOD, + caller_qn, + caller_label, module_qn, language, queries, class_qn, + call_nodes=filtered, + call_name_cache=call_name_cache, ) + def _class_member_qn_and_label( + self, + func_node: Node, + class_qn: str, + func_name: str, + lang_config: LanguageSpec, + language: str, + ) -> tuple[str, str]: + # (H) Build a class-body function's qn through the chain of enclosing + # (H) functions up to the class: a direct method is Class.method (METHOD); + # (H) a function nested in a method is Class.method.nested (FUNCTION). + path_parts: list[str] = [] + current = func_node.parent + while current and current.type not in lang_config.class_node_types: + if current.type in lang_config.function_node_types: + if (name_node := current.child_by_field_name(cs.FIELD_NAME)) and ( + name_node.text is not None + ): + path_parts.append(name_node.text.decode(cs.ENCODING_UTF8)) + current = current.parent + path_parts.reverse() + if path_parts: + joined = cs.SEPARATOR_DOT.join([*path_parts, func_name]) + return f"{class_qn}{cs.SEPARATOR_DOT}{joined}", cs.NodeLabel.FUNCTION + member = self._java_method_member(func_node, func_name, language) + return f"{class_qn}{cs.SEPARATOR_DOT}{member}", cs.NodeLabel.METHOD + + def _java_method_member( + self, func_node: Node, func_name: str, language: str + ) -> str: + # (H) A Java Method node is registered with its parameter signature + # (H) (definition pass: class_qn.name(params)), so the caller endpoint of a + # (H) CALLS edge must carry the same signature to join that node. Mirrors + # (H) class_ingest.mixin's method-qn build exactly. + if language != cs.SupportedLanguage.JAVA: + return func_name + info = java_utils.extract_method_info(func_node) + name = info.get(cs.KEY_NAME) or func_name + parameters = info.get(cs.KEY_PARAMETERS, []) + param_sig = f"({','.join(parameters)})" if parameters else cs.EMPTY_PARENS + return f"{name}{param_sig}" + + def _calls_owned_by( + self, + func_node: Node, + sibling_func_nodes: list[Node], + all_call_nodes: list[Node], + call_starts: list[int], + ) -> list[Node]: + # (H) Calls inside func_node MINUS calls owned by functions nested within + # (H) it, so a call in a nested function is attributed only to the nested + # (H) function, never also to the enclosing one. + own = self._filter_calls_in_node(all_call_nodes, call_starts, func_node) + descendant_bodies = [ + body + for n in sibling_func_nodes + if n is not func_node + and n.start_byte >= func_node.start_byte + and n.end_byte <= func_node.end_byte + and (body := n.child_by_field_name(cs.FIELD_BODY)) is not None + ] + if not descendant_bodies: + return own + return [ + call + for call in own + if not any( + body.start_byte <= call.start_byte and call.end_byte <= body.end_byte + for body in descendant_bodies + ) + ] + def _process_calls_in_classes( self, root_node: Node, module_qn: str, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + all_call_nodes: list[Node] | None = None, + call_starts: list[int] | None = None, + call_name_cache: dict[int, str | None] | None = None, + combined_captures: dict[str, list] | None = None, + sorted_func_nodes: list[Node] | None = None, + func_node_starts: list[int] | None = None, ) -> None: - query = queries[language][cs.QUERY_CLASSES] - if not query: - return - cursor = QueryCursor(query) - captures = cursor.captures(root_node) - class_nodes = captures.get(cs.CAPTURE_CLASS, []) + if combined_captures is not None: + class_nodes = combined_captures.get(cs.CAPTURE_CLASS, []) + else: + query = queries[language][cs.QUERY_CLASSES] + if not query: + return + cursor = QueryCursor(query) + captures = sorted_captures(cursor, root_node) + class_nodes = captures.get(cs.CAPTURE_CLASS, []) for class_node in class_nodes: - if not isinstance(class_node, Node): - continue class_name = self._get_class_name_for_node(class_node, language) if not class_name: continue - class_qn = f"{module_qn}{cs.SEPARATOR_DOT}{class_name}" + # (H) A C++ class inside a namespace is bound by the definition pass via + # (H) build_qualified_name (qn `module.ns.Class`); the bare join would drop + # (H) the namespace, dangling every inline method's CALLS source. Use the + # (H) same builder so the class qn (and thus method caller qns) agree. + class_qn = ( + cpp_utils.build_qualified_name(class_node, module_qn, class_name) + if language == cs.SupportedLanguage.CPP + else f"{module_qn}{cs.SEPARATOR_DOT}{class_name}" + ) if body_node := class_node.child_by_field_name(cs.FIELD_BODY): self._process_methods_in_class( - body_node, class_qn, module_qn, language, queries + body_node, + class_qn, + module_qn, + language, + queries, + all_call_nodes, + call_starts, + call_name_cache=call_name_cache, + sorted_func_nodes=sorted_func_nodes, + func_node_starts=func_node_starts, ) - def _process_module_level_calls( - self, - root_node: Node, - module_qn: str, - language: cs.SupportedLanguage, - queries: dict[cs.SupportedLanguage, LanguageQueries], - ) -> None: - self._ingest_function_calls( - root_node, module_qn, cs.NodeLabel.MODULE, module_qn, language, queries - ) - def _get_call_target_name(self, call_node: Node) -> str | None: + # (H) A macro-internal call (Rust `name(args)` inside a token_tree) is + # (H) captured as the bare identifier node; its text is the callee name. + if call_node.type == cs.TS_IDENTIFIER and call_node.text is not None: + return call_node.text.decode(cs.ENCODING_UTF8) if func_child := call_node.child_by_field_name(cs.TS_FIELD_FUNCTION): match func_child.type: case ( @@ -206,13 +791,35 @@ def _get_call_target_name(self, call_node: Node) -> str | None: | cs.TS_MEMBER_EXPRESSION | cs.CppNodeType.QUALIFIED_IDENTIFIER | cs.TS_SCOPED_IDENTIFIER + | cs.TS_SELECTOR_EXPRESSION + | cs.TS_PHP_NAME ): if func_child.text is not None: - return str(func_child.text.decode(cs.ENCODING_UTF8)) + return func_child.text.decode(cs.ENCODING_UTF8) + case cs.TS_GENERIC_FUNCTION: + # (H) turbofish: unwrap to the underlying callee identifier + inner = func_child.child_by_field_name(cs.TS_FIELD_FUNCTION) + if inner and inner.text: + return inner.text.decode(cs.ENCODING_UTF8) case cs.TS_CPP_FIELD_EXPRESSION: field_node = func_child.child_by_field_name(cs.FIELD_FIELD) if field_node and field_node.text: - return str(field_node.text.decode(cs.ENCODING_UTF8)) + method = field_node.text.decode(cs.ENCODING_UTF8) + # (H) Prepend a simple-identifier receiver (`obj->m`/`obj.m` + # (H) -> `obj.m`) so the resolver can map obj to its type and + # (H) bind the correct class method; a `.`-joined two-part name + # (H) still falls back to the bare method-name trie when the + # (H) receiver type is unknown. Complex receivers (chains, + # (H) calls, `this`) keep the bare method name, as before. + arg = func_child.child_by_field_name(cs.TS_FIELD_ARGUMENT) + if ( + arg is not None + and arg.type == cs.TS_IDENTIFIER + and arg.text + ): + receiver = arg.text.decode(cs.ENCODING_UTF8) + return f"{receiver}{cs.SEPARATOR_DOT}{method}" + return method case cs.TS_PARENTHESIZED_EXPRESSION: return self._get_iife_target_name(func_child) @@ -230,15 +837,15 @@ def _get_call_target_name(self, call_node: Node) -> str | None: object_node = call_node.child_by_field_name(cs.FIELD_OBJECT) name_node = call_node.child_by_field_name(cs.FIELD_NAME) if name_node and name_node.text: - method_name = str(name_node.text.decode(cs.ENCODING_UTF8)) + method_name = name_node.text.decode(cs.ENCODING_UTF8) if not object_node or not object_node.text: return method_name - object_text = str(object_node.text.decode(cs.ENCODING_UTF8)) + object_text = object_node.text.decode(cs.ENCODING_UTF8) return f"{object_text}{cs.SEPARATOR_DOT}{method_name}" if name_node := call_node.child_by_field_name(cs.FIELD_NAME): if name_node.text is not None: - return str(name_node.text.decode(cs.ENCODING_UTF8)) + return name_node.text.decode(cs.ENCODING_UTF8) return None @@ -260,70 +867,774 @@ def _ingest_function_calls( language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], class_context: str | None = None, + call_nodes: list[Node] | None = None, + call_name_cache: dict[int, str | None] | None = None, ) -> None: - calls_query = queries[language].get(cs.QUERY_CALLS) - if not calls_query: - return + if language in _TYPED_LANGUAGES: + local_var_types = ( + self._resolver.type_inference.build_local_variable_type_map( + caller_node, module_qn, language, class_context + ) + ) + else: + local_var_types = None - local_var_types = self._resolver.type_inference.build_local_variable_type_map( - caller_node, module_qn, language - ) + caller_spec = (caller_type, cs.KEY_QUALIFIED_NAME, caller_qn) - cursor = QueryCursor(calls_query) - captures = cursor.captures(caller_node) - call_nodes = captures.get(cs.CAPTURE_CALL, []) + caller_params: frozenset[str] = frozenset() + if language == cs.SupportedLanguage.PYTHON: + ordered_params = python_parameter_names(caller_node) + self._flow_param_names[caller_qn] = ordered_params + caller_params = frozenset(ordered_params) - logger.debug( - ls.CALL_FOUND_NODES.format( - count=len(call_nodes), language=language, caller=caller_qn + # (H) Runs independently of call_nodes: a getter access is an attribute, not + # (H) a call, so callers that read a property but make no other call must + # (H) still reach this pass before the early return below. + if language == cs.SupportedLanguage.PYTHON and ( + prop_names := self._resolver.function_registry.property_names() + ): + self._ingest_property_accesses( + caller_node, + caller_spec, + caller_qn, + module_qn, + local_var_types, + class_context, + queries[language][cs.QUERY_CONFIG], + prop_names, ) - ) - for call_node in call_nodes: - if not isinstance(call_node, Node): - continue + # (H) Operator syntax (k in r, r[k], r[k]=v, len(r)) dispatches to dunder + # (H) methods; emit those edges when the operand is a first-party type. + if language == cs.SupportedLanguage.PYTHON: + self._ingest_operator_dispatch_calls( + caller_node, caller_spec, module_qn, local_var_types + ) + + if call_nodes is None: + calls_query = queries[language].get(cs.QUERY_CALLS) + if not calls_query: + return + cursor = QueryCursor(calls_query) + captures = sorted_captures(cursor, caller_node) + call_nodes = captures.get(cs.CAPTURE_CALL, []) - # (H) tree-sitter finds ALL call nodes including nested; no recursive processing needed + if not call_nodes: + return - call_name = self._get_call_target_name(call_node) + is_java = language == cs.SupportedLanguage.JAVA + is_js_ts = language in (cs.SupportedLanguage.JS, cs.SupportedLanguage.TS) + is_cpp = language == cs.SupportedLanguage.CPP + method_invocation_type = cs.TS_METHOD_INVOCATION + resolver = self._resolver + resolve_func = resolver.resolve_function_call + resolve_builtin = resolver.resolve_builtin_call if is_js_ts else None + resolve_cpp_op = resolver.resolve_cpp_operator_call if is_cpp else None + get_target = self._get_call_target_name + class_label = cs.NodeLabel.CLASS + ensure_rel = self.ingestor.ensure_relationship_batch + calls_rel = cs.RelationshipType.CALLS + qn_key = cs.KEY_QUALIFIED_NAME + _id = id + is_python = language == cs.SupportedLanguage.PYTHON + alias_map: dict[str, str] | None = None + + for call_node in call_nodes: + node_id = _id(call_node) + if call_name_cache is not None and node_id in call_name_cache: + call_name = call_name_cache[node_id] + else: + call_name = get_target(call_node) + if call_name_cache is not None: + call_name_cache[node_id] = call_name if not call_name: continue - if ( - language == cs.SupportedLanguage.JAVA - and call_node.type == cs.TS_METHOD_INVOCATION - ): - callee_info = self._resolver.resolve_java_method_call( + if is_java and call_node.type == method_invocation_type: + callee_info = resolver.resolve_java_method_call( call_node, module_qn, local_var_types ) else: - callee_info = self._resolver.resolve_function_call( + callee_info = resolve_func( call_name, module_qn, local_var_types, class_context ) - if callee_info: - callee_type, callee_qn = callee_info - elif builtin_info := self._resolver.resolve_builtin_call(call_name): - callee_type, callee_qn = builtin_info - elif operator_info := self._resolver.resolve_cpp_operator_call( - call_name, module_qn + if not callee_info and resolve_builtin is not None: + callee_info = resolve_builtin(call_name) + if not callee_info and resolve_cpp_op is not None: + callee_info = resolve_cpp_op(call_name, module_qn) + if not callee_info and is_python and cs.SEPARATOR_DOT not in call_name: + # (H) A bare name that resolves to nothing may be a local alias of a + # (H) callable (do = self._start; do()). Resolve the assignment's + # (H) right-hand side and treat the alias call as a call to it. + if alias_map is None: + alias_map = self._build_local_alias_map( + caller_node, queries[language][cs.QUERY_CONFIG], module_qn + ) + if (rhs := alias_map.get(call_name)) is not None: + callee_info = resolve_func( + rhs, module_qn, local_var_types, class_context + ) + + if not callee_info and is_python and cs.SEPARATOR_DOT in call_name: + # (H) recv.field(...) where field is a callable struct field: + # (H) resolve to the functions bound to it at construction sites. + self._ingest_callable_field_calls( + call_name, caller_spec, local_var_types, ensure_rel + ) + + if is_python and call_name.rsplit(cs.SEPARATOR_DOT, 1)[-1] in ( + cs.HIGHER_ORDER_BUILTINS ): - callee_type, callee_qn = operator_info + # (H) sorted(xs, key=f) and friends invoke f synchronously in this + # (H) frame, so the trace attributes the call to the enclosing fn. + self._ingest_higher_order_builtin_calls( + call_node, + caller_spec, + module_qn, + local_var_types, + class_context, + resolve_func, + ensure_rel, + ) + + if not callee_info: + continue + + callee_type, callee_qn = callee_info + + if is_python: + self._collect_callable_flow( + call_node, + callee_qn, + caller_qn, + caller_params, + module_qn, + local_var_types, + class_context, + ) + + if is_python and ( + dispatch_targets := resolver.protocol_dispatch_targets(callee_qn) + ): + # (H) The call resolved to a Protocol stub; the stub never runs, so emit + # (H) edges to the method on every conformer instead of the stub. + for conformer_type, conformer_qn in dispatch_targets: + for target_qn in resolver.function_registry.variants(conformer_qn): + ensure_rel( + caller_spec, + calls_rel, + (conformer_type, qn_key, target_qn), + ) + continue + + if is_python: + # (H) f(...) invoked through a parameter: the edge runs from the + # (H) callee to whatever each call site binds to that parameter. + self._ingest_callable_param_calls( + call_node, + callee_type, + callee_qn, + module_qn, + local_var_types, + class_context, + resolve_func, + ensure_rel, + ) + + if callee_type == class_label: + # (H) Record construction as INSTANTIATES -> the class node (keeps + # (H) CALLS function/method-only). When the class defines __init__, + # (H) ALSO redirect a CALLS edge to it (the constructor runs); when + # (H) it does not (dataclass/NamedTuple/pydantic), INSTANTIATES is + # (H) the only edge. + for class_variant in resolver.function_registry.variants(callee_qn): + ensure_rel( + caller_spec, + cs.RelationshipType.INSTANTIATES, + (class_label, qn_key, class_variant), + ) + init_qn = f"{callee_qn}{cs.SEPARATOR_DOT}{cs.PY_METHOD_INIT}" + if init_qn not in resolver.function_registry: + continue + callee_type = cs.NodeLabel.METHOD + callee_qn = init_qn + + for target_qn in resolver.function_registry.variants(callee_qn): + ensure_rel( + caller_spec, + calls_rel, + (callee_type, qn_key, target_qn), + ) + + def _ingest_operator_dispatch_calls( + self, + caller_node: Node, + caller_spec: tuple[str, str, str], + module_qn: str, + local_var_types: dict[str, str] | None, + ) -> None: + boundary = (cs.TS_PY_FUNCTION_DEFINITION, cs.TS_PY_CLASS_DEFINITION) + stack: list[Node] = list(caller_node.children) + while stack: + node = stack.pop() + if node.type in boundary: + continue + match node.type: + case cs.TS_PY_SUBSCRIPT: + parent = node.parent + left = ( + parent.child_by_field_name(cs.TS_FIELD_LEFT) + if parent is not None and parent.type == cs.TS_PY_ASSIGNMENT + else None + ) + is_write = left is not None and left.id == node.id + self._emit_operator_dunder( + node.child_by_field_name(cs.FIELD_VALUE), + cs.PY_DUNDER_SETITEM if is_write else cs.PY_DUNDER_GETITEM, + caller_spec, + module_qn, + local_var_types, + ) + case cs.TS_PY_COMPARISON_OPERATOR: + operators = node.child_by_field_name(cs.TS_FIELD_OPERATORS) + if ( + operators is not None + and (op_text := safe_decode_text(operators)) + and cs.PY_OP_IN in op_text.split() + and node.named_children + ): + self._emit_operator_dunder( + node.named_children[-1], + cs.PY_DUNDER_CONTAINS, + caller_spec, + module_qn, + local_var_types, + ) + case cs.TS_PY_CALL: + func = node.child_by_field_name(cs.TS_FIELD_FUNCTION) + args = node.child_by_field_name(cs.FIELD_ARGUMENTS) + if ( + func is not None + and safe_decode_text(func) == cs.PY_BUILTIN_LEN + and args is not None + and len(args.named_children) == 1 + ): + self._emit_operator_dunder( + args.named_children[0], + cs.PY_DUNDER_LEN, + caller_spec, + module_qn, + local_var_types, + ) + case cs.TS_PY_BOOLEAN_OPERATOR: + self._emit_truthiness( + node.child_by_field_name(cs.TS_FIELD_LEFT), + caller_spec, + module_qn, + local_var_types, + ) + self._emit_truthiness( + node.child_by_field_name(cs.TS_FIELD_RIGHT), + caller_spec, + module_qn, + local_var_types, + ) + case cs.TS_PY_NOT_OPERATOR: + self._emit_truthiness( + node.child_by_field_name(cs.TS_FIELD_ARGUMENT), + caller_spec, + module_qn, + local_var_types, + ) + case ( + cs.TS_PY_IF_STATEMENT + | cs.TS_PY_WHILE_STATEMENT + | cs.TS_PY_ELIF_CLAUSE + | cs.TS_PY_CONDITIONAL_EXPRESSION + ): + # (H) A bare object as a condition is tested for truthiness; nested + # (H) boolean/not operators are handled when the walk reaches them. + self._emit_truthiness( + node.child_by_field_name(cs.TS_FIELD_CONDITION), + caller_spec, + module_qn, + local_var_types, + ) + stack.extend(node.children) + + def _emit_truthiness( + self, + operand: Node | None, + caller_spec: tuple[str, str, str], + module_qn: str, + local_var_types: dict[str, str] | None, + ) -> None: + # (H) Truthiness of an object calls __bool__ if defined, else __len__. Only a + # (H) bare name/attribute operand names an object (a comparison/call is already + # (H) a bool and is handled elsewhere); try __bool__ first, then __len__. + if operand is None or operand.type not in ( + cs.TS_PY_IDENTIFIER, + cs.TS_PY_ATTRIBUTE, + ): + return + for dunder in (cs.PY_DUNDER_BOOL, cs.PY_DUNDER_LEN): + if self._emit_operator_dunder( + operand, dunder, caller_spec, module_qn, local_var_types + ): + return + + def _emit_operator_dunder( + self, + operand: Node | None, + dunder: str, + caller_spec: tuple[str, str, str], + module_qn: str, + local_var_types: dict[str, str] | None, + ) -> bool: + # (H) Resolve the implied .__dunder__ call; resolution only succeeds + # (H) for a first-party class that defines the dunder, so builtin containers + # (H) (dict/list) yield no edge. Restrict to simple attribute/name operands. + # (H) Returns whether an edge was emitted (truthiness tries __bool__ then __len__). + if operand is None or not (operand_text := safe_decode_text(operand)): + return False + if any(ch in operand_text for ch in cs.PY_OPERAND_REJECT_CHARS): + return False + targets = self._resolver.operator_dunder_targets( + operand_text, dunder, module_qn, local_var_types + ) + if not targets: + return False + for callee_type, callee_qn in targets: + for target_qn in self._resolver.function_registry.variants(callee_qn): + self.ingestor.ensure_relationship_batch( + caller_spec, + cs.RelationshipType.CALLS, + (callee_type, cs.KEY_QUALIFIED_NAME, target_qn), + ) + return True + + def _parse_call_arguments( + self, call_node: Node + ) -> tuple[list[Node], dict[str, Node]]: + positional: list[Node] = [] + keyword: dict[str, Node] = {} + args_node = call_node.child_by_field_name(cs.FIELD_ARGUMENTS) + if args_node is None: + return positional, keyword + for child in args_node.named_children: + if child.type == cs.TS_PY_KEYWORD_ARGUMENT: + name_node = child.child_by_field_name(cs.FIELD_NAME) + value_node = child.child_by_field_name(cs.FIELD_VALUE) + if ( + name_node is not None + and value_node is not None + and (name := safe_decode_text(name_node)) is not None + ): + keyword[name] = value_node else: + positional.append(child) + return positional, keyword + + def _emit_callback_edge( + self, + source_spec: tuple[str, str, str], + arg_node: Node, + module_qn: str, + local_var_types: dict[str, str] | None, + class_context: str | None, + resolve_func, + ensure_rel, + ) -> None: + if not (arg_text := safe_decode_text(arg_node)): + return + if not ( + resolved := resolve_func( + arg_text, module_qn, local_var_types, class_context + ) + ): + return + res_type, res_qn = resolved + registry = self._resolver.function_registry + if res_type == cs.NodeLabel.CLASS: + init_qn = f"{res_qn}{cs.SEPARATOR_DOT}{cs.PY_METHOD_INIT}" + if init_qn not in registry: + return + res_type = cs.NodeLabel.METHOD + res_qn = init_qn + for target_qn in registry.variants(res_qn): + ensure_rel( + source_spec, + cs.RelationshipType.CALLS, + (res_type, cs.KEY_QUALIFIED_NAME, target_qn), + ) + + def _ingest_callable_param_calls( + self, + call_node: Node, + callee_type: str, + callee_qn: str, + module_qn: str, + local_var_types: dict[str, str] | None, + class_context: str | None, + resolve_func, + ensure_rel, + ) -> None: + if not (params := self._resolver.function_registry.callable_params(callee_qn)): + return + positional, keyword = self._parse_call_arguments(call_node) + source_spec = (callee_type, cs.KEY_QUALIFIED_NAME, callee_qn) + for param_name, index in params.items(): + arg_node = keyword.get(param_name) + if arg_node is None and index < len(positional): + arg_node = positional[index] + if arg_node is not None: + self._emit_callback_edge( + source_spec, + arg_node, + module_qn, + local_var_types, + class_context, + resolve_func, + ensure_rel, + ) + + def _collect_callable_flow( + self, + call_node: Node, + callee_qn: str, + caller_qn: str, + caller_params: frozenset[str], + module_qn: str, + local_var_types: dict[str, str] | None, + class_context: str | None, + ) -> None: + # (H) Record, for each call-site argument that names a callable, whether it is a + # (H) concrete function or a parameter of the caller (a pass-through). The + # (H) fixpoint in finalize propagates concretes through pass-through params to + # (H) the functions that actually invoke them. + positional, keyword = self._parse_call_arguments(call_node) + items: list[tuple[int, str, Node]] = [ + (index, "", node) for index, node in enumerate(positional) + ] + items.extend((-1, name, node) for name, node in keyword.items()) + callable_labels = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, + ) + for position, keyword_name, arg_node in items: + if arg_node.type not in (cs.TS_PY_IDENTIFIER, cs.TS_PY_ATTRIBUTE): continue - logger.debug( - ls.CALL_FOUND.format( - caller=caller_qn, - call_name=call_name, - callee_type=callee_type, - callee_qn=callee_qn, + arg_text = safe_decode_text(arg_node) + if not arg_text: + continue + if arg_node.type == cs.TS_PY_IDENTIFIER and arg_text in caller_params: + self._flow_args.append( + _CallableFlowArg( + callee_qn, position, keyword_name, "", caller_qn, arg_text + ) ) + continue + resolved = self._resolver.resolve_function_call( + arg_text, module_qn, local_var_types, class_context ) + if resolved is not None and resolved[0] in callable_labels: + self._flow_args.append( + _CallableFlowArg( + callee_qn, position, keyword_name, resolved[1], "", "" + ) + ) - self.ingestor.ensure_relationship_batch( - (caller_type, cs.KEY_QUALIFIED_NAME, caller_qn), - cs.RelationshipType.CALLS, - (callee_type, cs.KEY_QUALIFIED_NAME, callee_qn), + def finalize_callable_param_flow(self) -> None: + # (H) Resolve the recorded call-site argument bindings to a fixpoint and emit a + # (H) CALLS edge from every function that invokes a callable parameter to each + # (H) concrete function that can reach it (directly or via pass-through params). + registry = self._resolver.function_registry + seeds: dict[tuple[str, str], set[str]] = defaultdict(set) + edges: dict[tuple[str, str], set[tuple[str, str]]] = defaultdict(set) + for arg in self._flow_args: + if arg.keyword: + param_name = arg.keyword + else: + callee_params = self._flow_param_names.get(arg.callee_qn) + if callee_params is None or not ( + 0 <= arg.position < len(callee_params) + ): + continue + param_name = callee_params[arg.position] + slot = (arg.callee_qn, param_name) + if arg.source_concrete: + seeds[slot].add(arg.source_concrete) + else: + edges[slot].add((arg.source_caller, arg.source_param)) + + bindings: dict[tuple[str, str], set[str]] = { + k: set(v) for k, v in seeds.items() + } + for slot in edges: + bindings.setdefault(slot, set()) + changed = True + while changed: + changed = False + for slot, sources in edges.items(): + for source in sources: + if (reachable := bindings.get(source)) and not reachable.issubset( + bindings[slot] + ): + bindings[slot] |= reachable + changed = True + + ensure_rel = self.ingestor.ensure_relationship_batch + for func_qn, invoked in ( + (qn, registry.callable_params(qn)) for qn in self._flow_param_names + ): + if not invoked or (func_type := registry.get(func_qn)) is None: + continue + source_spec = (func_type, cs.KEY_QUALIFIED_NAME, func_qn) + for param_name in invoked: + for target_qn in bindings.get((func_qn, param_name), ()): + target_type = registry.get(target_qn) + if target_type is None: + continue + for variant in registry.variants(target_qn): + ensure_rel( + source_spec, + cs.RelationshipType.CALLS, + (target_type, cs.KEY_QUALIFIED_NAME, variant), + ) + + def _ingest_callable_field_calls( + self, + call_name: str, + caller_spec: tuple[str, str, str], + local_var_types: dict[str, str] | None, + ensure_rel, + ) -> None: + recv, sep, field = call_name.rpartition(cs.SEPARATOR_DOT) + if not sep: + return + recv_type = local_var_types.get(recv) if local_var_types else None + targets = self._resolver.callable_field_targets(field, recv_type) + if not targets: + return + registry = self._resolver.function_registry + for target_qn in targets: + if target_qn in registry: + ensure_rel( + caller_spec, + cs.RelationshipType.CALLS, + (registry[target_qn], cs.KEY_QUALIFIED_NAME, target_qn), + ) + + def _ingest_higher_order_builtin_calls( + self, + call_node: Node, + caller_spec: tuple[str, str, str], + module_qn: str, + local_var_types: dict[str, str] | None, + class_context: str | None, + resolve_func, + ensure_rel, + ) -> None: + positional, keyword = self._parse_call_arguments(call_node) + for arg_node in (*positional, *keyword.values()): + self._emit_callback_edge( + caller_spec, + arg_node, + module_qn, + local_var_types, + class_context, + resolve_func, + ensure_rel, + ) + + def _build_local_alias_map( + self, caller_node: Node, lang_config: LanguageSpec, module_qn: str + ) -> dict[str, str]: + identifier = cs.TS_PY_IDENTIFIER + attribute = cs.TS_PY_ATTRIBUTE + assignment = cs.TS_PY_ASSIGNMENT + left_field = cs.TS_FIELD_LEFT + right_field = cs.TS_FIELD_RIGHT + function_types = lang_config.function_node_types + class_types = lang_config.class_node_types + aliases: dict[str, str] = {} + stack = list(caller_node.children) + while stack: + node = stack.pop() + node_type = node.type + if node_type in function_types or node_type in class_types: + continue + if node_type == assignment: + left = node.child_by_field_name(left_field) + right = node.child_by_field_name(right_field) + if ( + left is not None + and left.type == identifier + and (left_text := left.text) is not None + and right is not None + and ( + target := self._alias_reference_text( + right, identifier, attribute, module_qn + ) + ) + is not None + ): + aliases.setdefault(left_text.decode(cs.ENCODING_UTF8), target) + stack.extend(node.children) + return aliases + + def _alias_reference_text( + self, right: Node, identifier: str, attribute: str, module_qn: str + ) -> str | None: + # (H) An alias rhs is a plain name/attribute, a conditional that picks one + # (H) (resolve_builtin_call if is_js_ts else None), or getattr(recv, name) + # (H) dynamic dispatch. Take the name/attribute branch (consequence or + # (H) alternative, never the condition) or build recv. for getattr. + if right.type in (identifier, attribute): + return right.text.decode(cs.ENCODING_UTF8) if right.text else None + if right.type == cs.TS_PY_CONDITIONAL_EXPRESSION and right.named_children: + for branch in (right.named_children[0], right.named_children[-1]): + if branch.type in (identifier, attribute) and branch.text: + return branch.text.decode(cs.ENCODING_UTF8) + if right.type == cs.TS_PY_CALL: + return self._getattr_reference_text(right, identifier, attribute, module_qn) + return None + + def _getattr_reference_text( + self, call: Node, identifier: str, attribute: str, module_qn: str + ) -> str | None: + func = call.child_by_field_name(cs.TS_FIELD_FUNCTION) + args = call.child_by_field_name(cs.FIELD_ARGUMENTS) + if ( + func is None + or safe_decode_text(func) != cs.PY_BUILTIN_GETATTR + or args is None + or len(args.named_children) < 2 + ): + return None + receiver, name_node = args.named_children[0], args.named_children[1] + if receiver.type not in (identifier, attribute): + return None + if (name := self._resolve_str_const(name_node, module_qn)) is None: + return None + return f"{safe_decode_text(receiver)}{cs.SEPARATOR_DOT}{name}" + + def _resolve_str_const(self, node: Node, module_qn: str) -> str | None: + # (H) Resolve a getattr name argument to its string value: a string literal + # (H) directly, or a module-level constant (cs.METHOD_X / METHOD_X) read from + # (H) the defining module's AST. + if node.type == cs.TS_PY_STRING: + content = next( + (c for c in node.children if c.type == cs.TS_PY_STRING_CONTENT), None + ) + return safe_decode_text(content) if content is not None else None + if node.type not in (cs.TS_PY_IDENTIFIER, cs.TS_PY_ATTRIBUTE): + return None + name_text = safe_decode_text(node) + if not name_text: + return None + import_map = self._resolver.import_processor.import_mapping.get(module_qn, {}) + prefix, _, const_name = name_text.rpartition(cs.SEPARATOR_DOT) + if not prefix: + mapped = import_map.get(const_name) + const_module_qn = ( + mapped.rsplit(cs.SEPARATOR_DOT, 1)[0] if mapped else module_qn ) + elif (mapped_module := import_map.get(prefix)) is not None: + const_module_qn = mapped_module + else: + const_module_qn = prefix + return self._module_string_constant(const_module_qn, const_name) + + def _module_string_constant(self, module_qn: str, const_name: str) -> str | None: + type_inference = self._resolver.type_inference + file_path = type_inference.module_qn_to_file_path.get(module_qn) + if file_path is None or file_path not in type_inference.ast_cache: + return None + root_node, _ = type_inference.ast_cache[file_path] + for child in root_node.children: + if child.type != cs.TS_PY_EXPRESSION_STATEMENT or not child.children: + continue + assignment = child.children[0] + if assignment.type != cs.TS_PY_ASSIGNMENT: + continue + left = assignment.child_by_field_name(cs.TS_FIELD_LEFT) + right = assignment.child_by_field_name(cs.TS_FIELD_RIGHT) + if ( + left is not None + and left.type == cs.TS_PY_IDENTIFIER + and safe_decode_text(left) == const_name + and right is not None + and right.type == cs.TS_PY_STRING + ): + return self._resolve_str_const(right, module_qn) + return None + + def _ingest_property_accesses( + self, + caller_node: Node, + caller_spec: tuple[str, str, str], + caller_qn: str, + module_qn: str, + local_var_types: dict[str, str] | None, + class_context: str | None, + lang_config: LanguageSpec, + prop_names: set[str], + ) -> None: + # (H) Accessing an @property getter invokes the getter method at runtime, but + # (H) tree-sitter sees a plain attribute, not a call. Resolve attribute + # (H) accesses whose tail names a known property and emit a CALLS edge to the + # (H) getter (skipping the attribute that is itself a call's function, which + # (H) the call path above already resolves). + resolver = self._resolver + resolve_func = resolver.resolve_function_call + registry = resolver.function_registry + ensure_rel = self.ingestor.ensure_relationship_batch + calls_rel = cs.RelationshipType.CALLS + qn_key = cs.KEY_QUALIFIED_NAME + method_label = cs.NodeLabel.METHOD + attr_type = cs.TS_PY_ATTRIBUTE + call_type = cs.TS_PY_CALL + func_field = cs.TS_FIELD_FUNCTION + function_types = lang_config.function_node_types + class_types = lang_config.class_node_types + seen: set[str] = set() + + stack = list(caller_node.children) + while stack: + node = stack.pop() + node_type = node.type + if node_type in function_types or node_type in class_types: + continue + if node_type == attr_type and (text := node.text) is not None: + attr_text = text.decode(cs.ENCODING_UTF8) + if attr_text.rsplit(cs.SEPARATOR_DOT, 1)[-1] in prop_names: + parent = node.parent + is_call_target = ( + parent is not None + and parent.type == call_type + and parent.child_by_field_name(func_field) is node + ) + if not is_call_target and ( + callee_info := resolve_func( + attr_text, module_qn, local_var_types, class_context + ) + ): + callee_qn = callee_info[1] + if ( + registry.is_property(callee_qn) + and callee_qn != caller_qn + and callee_qn not in seen + ): + seen.add(callee_qn) + for target_qn in registry.variants(callee_qn): + ensure_rel( + caller_spec, + calls_rel, + (method_label, qn_key, target_qn), + ) + stack.extend(node.children) def _build_nested_qualified_name( self, @@ -337,9 +1648,7 @@ def _build_nested_qualified_name( if not isinstance(current, Node): logger.warning( - ls.CALL_UNEXPECTED_PARENT.format( - node=func_node, parent_type=type(current) - ) + ls.CALL_UNEXPECTED_PARENT, node=func_node, parent_type=type(current) ) return None @@ -359,5 +1668,32 @@ def _build_nested_qualified_name( return f"{module_qn}{cs.SEPARATOR_DOT}{cs.SEPARATOR_DOT.join(path_parts)}{cs.SEPARATOR_DOT}{func_name}" return f"{module_qn}{cs.SEPARATOR_DOT}{func_name}" + def _js_ts_arrow_binding_name(self, func_node: Node) -> str | None: + # (H) An arrow / function expression has no `name` field, so the call pass + # (H) skipped it and never processed its body's calls. Recover the binding + # (H) name for the two named forms whose value IS the arrow: a module/local + # (H) `const f = () => ...` (variable_declarator) and a class field + # (H) `helper = () => ...` (public_field_definition). The body's calls then + # (H) attribute to the same qn the definition pass registered. Anonymous / + # (H) destructured arrows stay unnamed (skipped), as before. + if func_node.type not in (cs.TS_ARROW_FUNCTION, cs.TS_FUNCTION_EXPRESSION): + return None + parent = func_node.parent + if parent is None: + return None + # (H) func_node must be the parent's value/initializer for both forms + # (H) (variable_declarator and public_field_definition), so one value check + # (H) covers both. `==` not `is`: py-tree-sitter returns a fresh Node wrapper + # (H) per access, so identity comparison always fails (Node `==` compares id). + if parent.child_by_field_name(cs.FIELD_VALUE) != func_node: + return None + name_node = parent.child_by_field_name(cs.FIELD_NAME) + if name_node is None or name_node.type not in ( + cs.TS_IDENTIFIER, + cs.TS_PROPERTY_IDENTIFIER, + ): + return None + return safe_decode_text(name_node) + def _is_method(self, func_node: Node, lang_config: LanguageSpec) -> bool: return is_method_node(func_node, lang_config) diff --git a/codebase_rag/parsers/call_resolver.py b/codebase_rag/parsers/call_resolver.py index 322a583a3..881f90640 100644 --- a/codebase_rag/parsers/call_resolver.py +++ b/codebase_rag/parsers/call_resolver.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +from collections import defaultdict, deque from loguru import logger from tree_sitter import Node @@ -12,8 +13,28 @@ from .py import resolve_class_name from .type_inference import TypeInferenceEngine +_SEPARATOR_PATTERN = re.compile(r"[.:]|::") +_SEARCH_NAME_CACHE: dict[str, str] = {} +_CHAINED_METHOD_PATTERN = re.compile(r"\.([^.()]+)$") +_QN_SPLIT_CACHE: dict[str, tuple[list[str], int]] = {} + class CallResolver: + __slots__ = ( + "function_registry", + "import_processor", + "type_inference", + "class_inheritance", + "_simple_resolution_cache", + "_wildcard_cache", + "_protocol_impl_cache", + "_field_bindings", + "_field_to_classes", + "_subclass_map_cache", + "_protocol_classes_cache", + "_struct_impl_cache", + ) + def __init__( self, function_registry: FunctionRegistryTrieProtocol, @@ -25,16 +46,95 @@ def __init__( self.import_processor = import_processor self.type_inference = type_inference self.class_inheritance = class_inheritance + self._simple_resolution_cache: dict[ + tuple[str, str], tuple[str, str] | None + ] = {} + self._wildcard_cache: dict[int, list[tuple[str, str]]] = {} + self._protocol_impl_cache: dict[str, str] | None = None + self._field_bindings: dict[tuple[str, str], set[str]] = {} + self._field_to_classes: dict[str, set[str]] = {} + self._subclass_map_cache: dict[str, set[str]] | None = None + self._protocol_classes_cache: set[str] | None = None + self._struct_impl_cache: dict[str, set[str]] = {} + + def record_callable_field_binding( + self, class_qn: str, field: str, func_qn: str + ) -> None: + # (H) A NamedTuple/dataclass field holding a function reference: every + # (H) function bound to it at any construction site is a possible callee + # (H) when the field is invoked. Recording all of them is a sound call + # (H) graph (each runs for its own configuration), so recall is complete. + self._field_bindings.setdefault((class_qn, field), set()).add(func_qn) + self._field_to_classes.setdefault(field, set()).add(class_qn) + + def callable_field_targets( + self, field: str, recv_type: str | None = None + ) -> set[str]: + classes = self._field_to_classes.get(field) + if not classes: + return set() + if recv_type: + simple = recv_type.rsplit(cs.SEPARATOR_DOT, 1)[-1] + matched = [ + qn + for qn in classes + if qn == recv_type or qn.rsplit(cs.SEPARATOR_DOT, 1)[-1] == simple + ] + if len(matched) == 1: + return self._field_bindings.get((matched[0], field), set()) + # (H) Receiver type unknown or ambiguous: only resolve when exactly one + # (H) class declares this callable field, so the targets are unambiguous. + if len(classes) == 1: + return self._field_bindings.get((next(iter(classes)), field), set()) + return set() def _resolve_class_qn_from_type( self, var_type: str, import_map: dict[str, str], module_qn: str ) -> str: + var_type = self._strip_optional(var_type) if cs.SEPARATOR_DOT in var_type: - return var_type + return self._follow_reexports(var_type) if var_type in import_map: - return import_map[var_type] + return self._follow_reexports(import_map[var_type]) return self._resolve_class_name(var_type, module_qn) or "" + def _strip_optional(self, var_type: str) -> str: + # (H) An Optional annotation (X | None) names a single concrete class; reduce it + # (H) so attribute/operator resolution can find that class. Genuine multi-type + # (H) unions stay unresolved (ambiguous). + if cs.PY_UNION_SEPARATOR not in var_type: + return var_type + non_none = [ + member + for part in var_type.split(cs.PY_UNION_SEPARATOR) + if (member := part.strip()) and member != cs.PY_NONE + ] + return non_none[0] if len(non_none) == 1 else var_type + + def _follow_reexports(self, class_qn: str) -> str: + # (H) `from .pkg import Cls` records the importer's name against the re-export + # (H) module (pkg.Cls), not the class's real definition (pkg.mod.Cls), so a + # (H) class_qn that is not itself registered may be a re-export. Follow the + # (H) module's own import map one hop at a time until a registered class is + # (H) reached, guarding against cycles. + seen: set[str] = set() + current = class_qn + while ( + current + and current not in seen + and current not in self.function_registry + and cs.SEPARATOR_DOT in current + ): + seen.add(current) + module_qn, _, name = current.rpartition(cs.SEPARATOR_DOT) + following = self.import_processor.import_mapping.get(module_qn, {}).get( + name + ) + if not following or following == current: + break + current = following + return current + def _try_resolve_method( self, class_qn: str, method_name: str, separator: str = cs.SEPARATOR_DOT ) -> tuple[str, str] | None: @@ -50,6 +150,95 @@ def resolve_function_call( local_var_types: dict[str, str] | None = None, class_context: str | None = None, ) -> tuple[str, str] | None: + return self._redirect_protocol_method( + self._resolve_function_call( + call_name, module_qn, local_var_types, class_context + ) + ) + + def _protocol_impl_map(self) -> dict[str, str]: + # (H) A Protocol stub never runs; the concrete implementer does. Map each + # (H) XxxProtocol to a unique non-Protocol class named Xxx (the suffix + # (H) convention disambiguates the real impl from test mocks or other + # (H) structural conformers, which structural matching alone cannot). + if self._protocol_impl_cache is not None: + return self._protocol_impl_cache + sep = cs.SEPARATOR_DOT + protocols: set[str] = set() + classes_by_simple: dict[str, list[str]] = defaultdict(list) + for qn, bases in self.class_inheritance.items(): + classes_by_simple[qn.rsplit(sep, 1)[-1]].append(qn) + if any(base.rsplit(sep, 1)[-1] == cs.PY_PROTOCOL for base in bases): + protocols.add(qn) + impl: dict[str, str] = {} + for protocol_qn in protocols: + simple = protocol_qn.rsplit(sep, 1)[-1] + if simple == cs.PY_PROTOCOL or not simple.endswith(cs.PY_PROTOCOL): + continue + base_name = simple[: -len(cs.PY_PROTOCOL)] + candidates = [ + qn for qn in classes_by_simple.get(base_name, []) if qn not in protocols + ] + if len(candidates) == 1: + impl[protocol_qn] = candidates[0] + self._protocol_impl_cache = impl + return impl + + def _protocol_classes(self) -> set[str]: + if self._protocol_classes_cache is None: + sep = cs.SEPARATOR_DOT + self._protocol_classes_cache = { + qn + for qn, bases in self.class_inheritance.items() + if any(base.rsplit(sep, 1)[-1] == cs.PY_PROTOCOL for base in bases) + } + return self._protocol_classes_cache + + def protocol_dispatch_targets(self, callee_qn: str) -> set[tuple[str, str]]: + # (H) A call resolved to a Protocol stub method (P.M) never runs the stub: the + # (H) runtime receiver is some conformer, so the sound call graph emits an edge + # (H) to M on every non-Protocol class that defines it. Gating on the resolved + # (H) target being a Protocol method keeps this from firing on ordinary calls. + class_qn, sep, method_name = callee_qn.rpartition(cs.SEPARATOR_DOT) + if not sep or class_qn not in self._protocol_classes(): + return set() + protocols = self._protocol_classes() + targets: set[tuple[str, str]] = set() + for qn in self.function_registry.find_ending_with(method_name): + definer, dot, name = qn.rpartition(cs.SEPARATOR_DOT) + if dot and name == method_name and definer not in protocols: + targets.add((self.function_registry[qn], qn)) + return targets + + def _redirect_protocol_method( + self, result: tuple[str, str] | None + ) -> tuple[str, str] | None: + if result is None: + return result + class_qn, sep, method_name = result[1].rpartition(cs.SEPARATOR_DOT) + if not sep: + return result + impl_qn = self._protocol_impl_map().get(class_qn) + if impl_qn is None: + return result + redirected = f"{impl_qn}{cs.SEPARATOR_DOT}{method_name}" + if redirected in self.function_registry: + return self.function_registry[redirected], redirected + return result + + def _resolve_function_call( + self, + call_name: str, + module_qn: str, + local_var_types: dict[str, str] | None = None, + class_context: str | None = None, + ) -> tuple[str, str] | None: + use_cache = not local_var_types + if use_cache: + cache_key = (call_name, module_qn) + if cache_key in self._simple_resolution_cache: + return self._simple_resolution_cache[cache_key] + if result := self._try_resolve_iife(call_name, module_qn): return result @@ -62,12 +251,115 @@ def resolve_function_call( if result := self._try_resolve_via_imports( call_name, module_qn, local_var_types ): + if use_cache: + self._simple_resolution_cache[cache_key] = result return result if result := self._try_resolve_same_module(call_name, module_qn): + if use_cache: + self._simple_resolution_cache[cache_key] = result + return result + + if class_context and ( + result := self._resolve_self_sibling_method(call_name, class_context) + ): return result - return self._try_resolve_via_trie(call_name, module_qn) + # (H) A bare name explicitly imported from outside the project binds to that + # (H) external symbol. Since precise import / same-module resolution above + # (H) already failed, the symbol is unindexed; do NOT let the simple-name + # (H) trie fallback rebind it to an unrelated first-party symbol of the same + # (H) name. (The instantiation eval caught `from evals import GraphData; + # (H) GraphData()` being resolved to codebase_rag's own GraphData class.) + if cs.SEPARATOR_DOT not in call_name and self._is_external_import( + call_name, module_qn + ): + if use_cache: + self._simple_resolution_cache[cache_key] = None + return None + + # (H) A member call `obj.method` whose receiver has a KNOWN inferred type that is + # (H) not a first-party class is a call on an external object (e.g. a + # (H) `std::string`). Precise local-type resolution above already failed, so the + # (H) method lives on the external type; do NOT let the simple-name trie fallback + # (H) rebind it to an unrelated first-party method of the same name. Untyped + # (H) receivers keep the fallback (their type is unknown, not known-external). + if self._receiver_type_is_external(call_name, module_qn, local_var_types): + if use_cache: + self._simple_resolution_cache[cache_key] = None + return None + + result = self._try_resolve_via_trie(call_name, module_qn) + if use_cache: + self._simple_resolution_cache[cache_key] = result + return result + + def _is_external_import(self, call_name: str, module_qn: str) -> bool: + # (H) True when call_name is imported in module_qn from a module outside the + # (H) project. First-party imports are written either project-prefixed + # (H) (`from proj.w import X`) or bare (`from utils.helpers import X`, where + # (H) the registered node is `proj.utils.helpers.X`); both are first-party + # (H) and left to the trie fallback. Only a target that is neither rooted at + # (H) the project nor registered under the project prefix is external, so + # (H) this suppresses cross-project fuzzy rebinds without dropping real + # (H) first-party calls. + import_map = self.import_processor.import_mapping.get(module_qn) + if not import_map: + return False + target = import_map.get(call_name) + if not target: + return False + # (H) Only dotted absolute-path imports (Python/Java `pkg.mod.Name`) are + # (H) judged here. Rust/C++ record relative or `::`-separated targets + # (H) (`super::b::helper`) that never carry the project prefix and rely on + # (H) the trie fallback to resolve, so they must not be mistaken external. + if cs.SEPARATOR_DOT not in target or cs.SEPARATOR_DOUBLE_COLON in target: + return False + project_root = module_qn.split(cs.SEPARATOR_DOT, 1)[0] + if target.split(cs.SEPARATOR_DOT, 1)[0] == project_root: + return False + return f"{project_root}{cs.SEPARATOR_DOT}{target}" not in self.function_registry + + def _receiver_type_is_external( + self, + call_name: str, + module_qn: str, + local_var_types: dict[str, str] | None, + ) -> bool: + # (H) True only for a two-part dotted member call `obj.method` whose `obj` has an + # (H) inferred local type that is known to be external. The receiver type is + # (H) external when it resolves to nothing, or to a qn that is neither registered + # (H) nor rooted at the project (a `std::string` -> `std.string`). In that case + # (H) the method lives on the external type, so the simple-name trie fallback must + # (H) not rebind it to a same-named first-party method. An untyped receiver (obj + # (H) absent from the map) or a project-rooted type is left alone: its method may + # (H) still be resolved by the fallback (e.g. a cross-file imported-class call the + # (H) precise path missed), so only a provably external type is suppressed. + if not local_var_types or cs.SEPARATOR_DOT not in call_name: + return False + parts = call_name.split(cs.SEPARATOR_DOT) + if len(parts) != 2: + return False + var_type = local_var_types.get(parts[0]) + if var_type is None: + return False + import_map = self.import_processor.import_mapping.get(module_qn, {}) + class_qn = self._resolve_class_qn_from_type(var_type, import_map, module_qn) + if not class_qn: + return True + # (H) First-party class qns may be written without the project prefix (a bare + # (H) `from models.user import User` resolves to `models.user.User` while the + # (H) registry stores `proj.models.user.User`), so check both the qn as-is and + # (H) the project-prefixed form before judging a type external -- mirrors + # (H) _is_external_import. A project-rooted qn is always treated as first-party. + project_root = module_qn.split(cs.SEPARATOR_DOT, 1)[0] + if class_qn.split(cs.SEPARATOR_DOT, 1)[0] == project_root: + return False + return ( + class_qn not in self.function_registry + and f"{project_root}{cs.SEPARATOR_DOT}{class_qn}" + not in self.function_registry + ) def _try_resolve_iife( self, call_name: str, module_qn: str @@ -119,9 +411,7 @@ def _try_resolve_direct_import( return None imported_qn = import_map[call_name] if imported_qn in self.function_registry: - logger.debug( - ls.CALL_DIRECT_IMPORT.format(call_name=call_name, qn=imported_qn) - ) + logger.debug(ls.CALL_DIRECT_IMPORT, call_name=call_name, qn=imported_qn) return self.function_registry[imported_qn], imported_qn return None @@ -132,10 +422,15 @@ def _try_resolve_qualified_call( module_qn: str, local_var_types: dict[str, str] | None, ) -> tuple[str, str] | None: - if not self._has_separator(call_name): + if cs.SEPARATOR_DOUBLE_COLON in call_name: + separator = cs.SEPARATOR_DOUBLE_COLON + elif cs.SEPARATOR_COLON in call_name: + separator = cs.SEPARATOR_COLON + elif cs.SEPARATOR_DOT in call_name: + separator = cs.SEPARATOR_DOT + else: return None - separator = self._get_separator(call_name) parts = call_name.split(separator) if len(parts) == 2: @@ -170,9 +465,17 @@ def _get_separator(self, call_name: str) -> str: def _try_resolve_wildcard_imports( self, call_name: str, import_map: dict[str, str] ) -> tuple[str, str] | None: - for local_name, imported_qn in import_map.items(): - if not local_name.startswith("*"): - continue + map_id = id(import_map) + if map_id not in self._wildcard_cache: + self._wildcard_cache[map_id] = ( + [(k, v) for k, v in import_map.items() if k[0] == "*"] + if import_map + else [] + ) + wildcards = self._wildcard_cache[map_id] + if not wildcards: + return None + for _, imported_qn in wildcards: if result := self._try_wildcard_qns(call_name, imported_qn): return result return None @@ -187,9 +490,7 @@ def _try_wildcard_qns( for wildcard_qn in potential_qns: if wildcard_qn in self.function_registry: - logger.debug( - ls.CALL_WILDCARD.format(call_name=call_name, qn=wildcard_qn) - ) + logger.debug(ls.CALL_WILDCARD, call_name=call_name, qn=wildcard_qn) return self.function_registry[wildcard_qn], wildcard_qn return None @@ -199,7 +500,7 @@ def _try_resolve_same_module( same_module_func_qn = f"{module_qn}.{call_name}" if same_module_func_qn in self.function_registry: logger.debug( - ls.CALL_SAME_MODULE.format(call_name=call_name, qn=same_module_func_qn) + ls.CALL_SAME_MODULE, call_name=call_name, qn=same_module_func_qn ) return self.function_registry[same_module_func_qn], same_module_func_qn return None @@ -207,19 +508,39 @@ def _try_resolve_same_module( def _try_resolve_via_trie( self, call_name: str, module_qn: str ) -> tuple[str, str] | None: - search_name = re.split(r"[.:]|::", call_name)[-1] + search_name = _SEARCH_NAME_CACHE.get(call_name) + if search_name is None: + search_name = _SEPARATOR_PATTERN.split(call_name)[-1] + _SEARCH_NAME_CACHE[call_name] = search_name possible_matches = self.function_registry.find_ending_with(search_name) if not possible_matches: - logger.debug(ls.CALL_UNRESOLVED.format(call_name=call_name)) + logger.debug(ls.CALL_UNRESOLVED, call_name=call_name) return None - possible_matches.sort( - key=lambda qn: self._calculate_import_distance(qn, module_qn) - ) - best_candidate_qn = possible_matches[0] - logger.debug( - ls.CALL_TRIE_FALLBACK.format(call_name=call_name, qn=best_candidate_qn) - ) + if len(possible_matches) == 1: + best_candidate_qn = possible_matches[0] + else: + caller_parts = module_qn.split(cs.SEPARATOR_DOT) + caller_len = len(caller_parts) + caller_parent_prefix = ( + cs.SEPARATOR_DOT.join(caller_parts[:-1]) + cs.SEPARATOR_DOT + if caller_len > 1 + else "" + ) + best_candidate_qn = min( + possible_matches, + key=lambda qn: ( + # (H) An @abstractmethod stub never runs when a concrete override + # (H) exists, so prefer concrete candidates over abstract ones + # (H) even when the abstract stub is closer by import distance. + self.function_registry.is_abstract(qn), + self._import_distance_fast( + qn, caller_parts, caller_len, caller_parent_prefix + ), + qn, + ), + ) + logger.debug(ls.CALL_TRIE_FALLBACK, call_name=call_name, qn=best_candidate_qn) return self.function_registry[best_candidate_qn], best_candidate_qn def _resolve_two_part_call( @@ -293,23 +614,21 @@ def _try_method_on_class( method_qn = f"{class_qn}{separator}{method_name}" if method_qn in self.function_registry: logger.debug( - ls.CALL_TYPE_INFERRED.format( - call_name=call_name, - method_qn=method_qn, - obj=object_name, - var_type=var_type, - ) + ls.CALL_TYPE_INFERRED, + call_name=call_name, + method_qn=method_qn, + obj=object_name, + var_type=var_type, ) return self.function_registry[method_qn], method_qn if inherited := self._resolve_inherited_method(class_qn, method_name): logger.debug( - ls.CALL_TYPE_INFERRED_INHERITED.format( - call_name=call_name, - method_qn=inherited[1], - obj=object_name, - var_type=var_type, - ) + ls.CALL_TYPE_INFERRED_INHERITED, + call_name=call_name, + method_qn=inherited[1], + obj=object_name, + var_type=var_type, ) return inherited return None @@ -336,7 +655,7 @@ def _try_resolve_via_import( if method_qn in self.function_registry: logger.debug( - ls.CALL_IMPORT_STATIC.format(call_name=call_name, method_qn=method_qn) + ls.CALL_IMPORT_STATIC, call_name=call_name, method_qn=method_qn ) return self.function_registry[method_qn], method_qn return None @@ -377,7 +696,7 @@ def _try_resolve_module_method( method_qn = f"{module_qn}.{method_name}" if method_qn in self.function_registry: logger.debug( - ls.CALL_OBJECT_METHOD.format(call_name=call_name, method_qn=method_qn) + ls.CALL_OBJECT_METHOD, call_name=call_name, method_qn=method_qn ) return self.function_registry[method_qn], method_qn return None @@ -401,12 +720,11 @@ def _resolve_self_attribute_call( method_qn = f"{class_qn}.{method_name}" if method_qn in self.function_registry: logger.debug( - ls.CALL_INSTANCE_ATTR.format( - call_name=call_name, - method_qn=method_qn, - attr_ref=attribute_ref, - var_type=var_type, - ) + ls.CALL_INSTANCE_ATTR, + call_name=call_name, + method_qn=method_qn, + attr_ref=attribute_ref, + var_type=var_type, ) return self.function_registry[method_qn], method_qn @@ -414,12 +732,11 @@ def _resolve_self_attribute_call( class_qn, method_name ): logger.debug( - ls.CALL_INSTANCE_ATTR_INHERITED.format( - call_name=call_name, - method_qn=inherited_method[1], - attr_ref=attribute_ref, - var_type=var_type, - ) + ls.CALL_INSTANCE_ATTR_INHERITED, + call_name=call_name, + method_qn=inherited_method[1], + attr_ref=attribute_ref, + var_type=var_type, ) return inherited_method @@ -441,9 +758,9 @@ def _resolve_multi_part_call( method_qn = f"{class_qn}.{method_name}" if method_qn in self.function_registry: logger.debug( - ls.CALL_IMPORT_QUALIFIED.format( - call_name=call_name, method_qn=method_qn - ) + ls.CALL_IMPORT_QUALIFIED, + call_name=call_name, + method_qn=method_qn, ) return self.function_registry[method_qn], method_qn @@ -455,12 +772,11 @@ def _resolve_multi_part_call( method_qn = f"{class_qn}.{method_name}" if method_qn in self.function_registry: logger.debug( - ls.CALL_INSTANCE_QUALIFIED.format( - call_name=call_name, - method_qn=method_qn, - class_name=class_name, - var_type=var_type, - ) + ls.CALL_INSTANCE_QUALIFIED, + call_name=call_name, + method_qn=method_qn, + class_name=class_name, + var_type=var_type, ) return self.function_registry[method_qn], method_qn @@ -468,17 +784,75 @@ def _resolve_multi_part_call( class_qn, method_name ): logger.debug( - ls.CALL_INSTANCE_INHERITED.format( - call_name=call_name, - method_qn=inherited_method[1], - class_name=class_name, - var_type=var_type, - ) + ls.CALL_INSTANCE_INHERITED, + call_name=call_name, + method_qn=inherited_method[1], + class_name=class_name, + var_type=var_type, ) return inherited_method return None + def operator_dunder_targets( + self, + operand_text: str, + dunder: str, + module_qn: str, + local_var_types: dict[str, str] | None, + ) -> set[tuple[str, str]]: + # (H) Operator syntax dispatches to a dunder on the operand's type. Resolve only + # (H) when the operand type is known; never via the name-only trie fallback, so a + # (H) builtin container does not borrow a first-party dunder. A Protocol-typed + # (H) operand dispatches to the dunder on each structural implementer (which may + # (H) define the dunder even when the Protocol stub does not, e.g. __len__). + if not local_var_types or not (var_type := local_var_types.get(operand_text)): + return set() + import_map = self.import_processor.import_mapping.get(module_qn, {}) + class_qn = self._resolve_class_qn_from_type(var_type, import_map, module_qn) + if not class_qn: + return set() + if class_qn in self._protocol_classes(): + # (H) Naming convention (XxxProtocol -> Xxx) is robust when it applies; + # (H) structural conformance covers protocols whose implementer is named + # (H) differently. Union both so neither gap drops a concrete target. + classes = set(self._protocol_structural_implementers(class_qn)) + if named_impl := self._protocol_impl_map().get(class_qn): + classes.add(named_impl) + else: + classes = {class_qn} + targets: set[tuple[str, str]] = set() + for candidate in classes: + if resolved := self._try_resolve_method(candidate, dunder): + targets.add(resolved) + return targets + + def _protocol_structural_implementers(self, protocol_qn: str) -> set[str]: + # (H) Classes that define every method declared on the Protocol (own or + # (H) inherited). Used to dispatch operator dunders to the concrete type when the + # (H) Protocol/implementer names don't follow the XxxProtocol convention. + if protocol_qn in self._struct_impl_cache: + return self._struct_impl_cache[protocol_qn] + sep = cs.SEPARATOR_DOT + protocol_methods = { + qn.rsplit(sep, 1)[-1] + for qn, node_type in self.function_registry.find_with_prefix(protocol_qn) + if node_type == NodeType.METHOD and qn.rsplit(sep, 1)[0] == protocol_qn + } + result: set[str] = set() + if protocol_methods: + protocols = self._protocol_classes() + for candidate in self.class_inheritance: + if candidate in protocols: + continue + if all( + self._try_resolve_method(candidate, method) + for method in protocol_methods + ): + result.add(candidate) + self._struct_impl_cache[protocol_qn] = result + return result + def resolve_builtin_call(self, call_name: str) -> tuple[str, str] | None: if call_name in cs.JS_BUILTIN_PATTERNS: return (cs.NodeLabel.FUNCTION, f"{cs.BUILTIN_PREFIX}.{call_name}") @@ -536,7 +910,7 @@ def _resolve_chained_call( module_qn: str, local_var_types: dict[str, str] | None = None, ) -> tuple[str, str] | None: - match = re.search(r"\.([^.()]+)$", call_name) + match = _CHAINED_METHOD_PATTERN.search(call_name) if not match: return None @@ -559,12 +933,11 @@ def _resolve_chained_call( if method_qn in self.function_registry: logger.debug( - ls.CALL_CHAINED.format( - call_name=call_name, - method_qn=method_qn, - obj_expr=object_expr, - obj_type=object_type, - ) + ls.CALL_CHAINED, + call_name=call_name, + method_qn=method_qn, + obj_expr=object_expr, + obj_type=object_type, ) return self.function_registry[method_qn], method_qn @@ -572,12 +945,11 @@ def _resolve_chained_call( full_object_type, final_method ): logger.debug( - ls.CALL_CHAINED_INHERITED.format( - call_name=call_name, - method_qn=inherited_method[1], - obj_expr=object_expr, - obj_type=object_type, - ) + ls.CALL_CHAINED_INHERITED, + call_name=call_name, + method_qn=inherited_method[1], + obj_expr=object_expr, + obj_type=object_type, ) return inherited_method @@ -596,45 +968,118 @@ def _resolve_super_call( current_class_qn = class_context if not current_class_qn: - logger.debug(ls.CALL_SUPER_NO_CONTEXT.format(call_name=call_name)) + logger.debug(ls.CALL_SUPER_NO_CONTEXT, call_name=call_name) return None if current_class_qn not in self.class_inheritance: - logger.debug(ls.CALL_SUPER_NO_INHERITANCE.format(class_qn=current_class_qn)) + logger.debug(ls.CALL_SUPER_NO_INHERITANCE, class_qn=current_class_qn) return None parent_classes = self.class_inheritance[current_class_qn] if not parent_classes: - logger.debug(ls.CALL_SUPER_NO_PARENTS.format(class_qn=current_class_qn)) + logger.debug(ls.CALL_SUPER_NO_PARENTS, class_qn=current_class_qn) return None if result := self._resolve_inherited_method(current_class_qn, method_name): callee_type, parent_method_qn = result logger.debug( - ls.CALL_SUPER_RESOLVED.format( - call_name=call_name, method_qn=parent_method_qn - ) + ls.CALL_SUPER_RESOLVED, + call_name=call_name, + method_qn=parent_method_qn, ) return callee_type, parent_method_qn logger.debug( - ls.CALL_SUPER_UNRESOLVED.format( - call_name=call_name, class_qn=current_class_qn - ) + ls.CALL_SUPER_UNRESOLVED, + call_name=call_name, + class_qn=current_class_qn, ) return None + def _resolve_self_sibling_method( + self, call_name: str, class_context: str + ) -> tuple[str, str] | None: + # (H) self.method() in a mixin may call a method defined on a SIBLING mixin + # (H) (neither is the other's base); both are combined into a concrete class. + # (H) Resolve through the concrete subclasses' MRO and accept the target only + # (H) when it is unambiguous, so an unrelated same-named method cannot win. + parts = call_name.split(cs.SEPARATOR_DOT) + if len(parts) != 2 or parts[0] != cs.KEYWORD_SELF: + return None + method_name = parts[1] + candidates: set[str] = set() + for subclass_qn in self._concrete_subclasses(class_context): + candidates |= self._mro_method_qns(subclass_qn, method_name) + if not candidates: + return None + # (H) An @abstractmethod stub never runs when a concrete sibling implements the + # (H) method, so prefer concrete candidates; resolve only when unambiguous. + chosen = { + qn for qn in candidates if not self.function_registry.is_abstract(qn) + } or candidates + if len(chosen) != 1: + return None + method_qn = next(iter(chosen)) + logger.debug( + ls.CALL_INSTANCE_ATTR_INHERITED, + call_name=call_name, + method_qn=method_qn, + attr_ref=cs.KEYWORD_SELF, + var_type=class_context, + ) + return self.function_registry[method_qn], method_qn + + def _mro_method_qns(self, class_qn: str, method_name: str) -> set[str]: + results: set[str] = set() + visited: set[str] = set() + queue: deque[str] = deque([class_qn]) + while queue: + current = self._follow_reexports(queue.popleft()) + if current in visited: + continue + visited.add(current) + method_qn = f"{current}.{method_name}" + if method_qn in self.function_registry: + results.add(method_qn) + queue.extend(self.class_inheritance.get(current, ())) + return results + + def _subclass_map(self) -> dict[str, set[str]]: + if self._subclass_map_cache is None: + mapping: dict[str, set[str]] = defaultdict(set) + for subclass_qn, bases in self.class_inheritance.items(): + for base in bases: + mapping[self._follow_reexports(base)].add(subclass_qn) + self._subclass_map_cache = mapping + return self._subclass_map_cache + + def _concrete_subclasses(self, class_qn: str) -> set[str]: + subclass_map = self._subclass_map() + found: set[str] = set() + stack = list(subclass_map.get(class_qn, ())) + while stack: + current = stack.pop() + if current in found: + continue + found.add(current) + stack.extend(subclass_map.get(current, ())) + return found + def _resolve_inherited_method( self, class_qn: str, method_name: str ) -> tuple[str, str] | None: if class_qn not in self.class_inheritance: return None - queue = list(self.class_inheritance.get(class_qn, [])) - visited = set(queue) + bfs_queue = deque(self.class_inheritance.get(class_qn, [])) + visited = set(bfs_queue) - while queue: - parent_class_qn = queue.pop(0) + while bfs_queue: + # (H) Base classes are recorded by the name the subclass imported, which + # (H) may be a package re-export (class_ingest.ClassIngestMixin) rather than + # (H) the real definition (class_ingest.mixin.ClassIngestMixin); follow the + # (H) re-export so the inherited method qn matches the registry. + parent_class_qn = self._follow_reexports(bfs_queue.popleft()) parent_method_qn = f"{parent_class_qn}.{method_name}" if parent_method_qn in self.function_registry: @@ -647,7 +1092,7 @@ def _resolve_inherited_method( for grandparent_qn in self.class_inheritance[parent_class_qn]: if grandparent_qn not in visited: visited.add(grandparent_qn) - queue.append(grandparent_qn) + bfs_queue.append(grandparent_qn) return None @@ -673,6 +1118,30 @@ def _calculate_import_distance( return base_distance + def _import_distance_fast( + self, + candidate_qn: str, + caller_parts: list[str], + caller_len: int, + caller_parent_prefix: str, + ) -> int: + if candidate_qn in _QN_SPLIT_CACHE: + candidate_parts, candidate_len = _QN_SPLIT_CACHE[candidate_qn] + else: + candidate_parts = candidate_qn.split(cs.SEPARATOR_DOT) + candidate_len = len(candidate_parts) + _QN_SPLIT_CACHE[candidate_qn] = (candidate_parts, candidate_len) + common_prefix = 0 + for i in range(min(caller_len, candidate_len)): + if caller_parts[i] == candidate_parts[i]: + common_prefix += 1 + else: + break + base_distance = max(caller_len, candidate_len) - common_prefix + if caller_parent_prefix and candidate_qn.startswith(caller_parent_prefix): + base_distance -= 1 + return base_distance + def _resolve_class_name(self, class_name: str, module_qn: str) -> str | None: return resolve_class_name( class_name, module_qn, self.import_processor, self.function_registry @@ -682,7 +1151,7 @@ def resolve_java_method_call( self, call_node: Node, module_qn: str, - local_var_types: dict[str, str], + local_var_types: dict[str, str] | None, ) -> tuple[str, str] | None: java_engine = self.type_inference.java_type_inference @@ -697,7 +1166,7 @@ def resolve_java_method_call( else cs.TEXT_UNKNOWN ) logger.debug( - ls.CALL_JAVA_RESOLVED.format(call_text=call_text, method_qn=result[1]) + ls.CALL_JAVA_RESOLVED, call_text=call_text, method_qn=result[1] ) return result diff --git a/codebase_rag/parsers/class_ingest/cpp_modules.py b/codebase_rag/parsers/class_ingest/cpp_modules.py index a5db9bc47..afae6d901 100644 --- a/codebase_rag/parsers/class_ingest/cpp_modules.py +++ b/codebase_rag/parsers/class_ingest/cpp_modules.py @@ -8,6 +8,7 @@ from ... import constants as cs from ... import logs +from ...utils.path_utils import cached_relative_path, cached_resolve_posix from ..utils import safe_decode_text, safe_decode_with_fallback from .utils import decode_node_stripped @@ -41,7 +42,7 @@ def ingest_cpp_module_declarations( def _find_module_declarations(root_node: Node) -> list[tuple[Node, str]]: module_declarations: list[tuple[Node, str]] = [] - def find_declarations(node: Node) -> None: + for node in root_node.children: if node.type == cs.TS_MODULE_DECLARATION: module_declarations.append((node, decode_node_stripped(node))) elif node.type == cs.CppNodeType.DECLARATION: @@ -56,10 +57,6 @@ def find_declarations(node: Node) -> None: if has_module: module_declarations.append((node, decode_node_stripped(node))) - for child in node.children: - find_declarations(child) - - find_declarations(root_node) return module_declarations @@ -83,7 +80,8 @@ def _process_export_module( { cs.KEY_QUALIFIED_NAME: interface_qn, cs.KEY_NAME: module_name, - cs.KEY_PATH: str(file_path.relative_to(repo_path)), + cs.KEY_PATH: cached_relative_path(file_path, repo_path).as_posix(), + cs.KEY_ABSOLUTE_PATH: cached_resolve_posix(file_path), cs.KEY_MODULE_TYPE: cs.CPP_MODULE_TYPE_INTERFACE, }, ) @@ -117,7 +115,8 @@ def _process_module_implementation( { cs.KEY_QUALIFIED_NAME: impl_qn, cs.KEY_NAME: f"{module_name}{cs.CPP_IMPL_SUFFIX}", - cs.KEY_PATH: str(file_path.relative_to(repo_path)), + cs.KEY_PATH: cached_relative_path(file_path, repo_path).as_posix(), + cs.KEY_ABSOLUTE_PATH: cached_resolve_posix(file_path), cs.KEY_IMPLEMENTS_MODULE: module_name, cs.KEY_MODULE_TYPE: cs.CPP_MODULE_TYPE_IMPLEMENTATION, }, @@ -141,27 +140,27 @@ def _process_module_implementation( def find_cpp_exported_classes(root_node: Node) -> list[Node]: exported_class_nodes: list[Node] = [] + stack = list(root_node.children) - def traverse(node: Node) -> None: + while stack: + node = stack.pop() if node.type == cs.CppNodeType.FUNCTION_DEFINITION: node_text = decode_node_stripped(node) if node_text.startswith(cs.CPP_EXPORT_PREFIXES): + found = False for child in node.children: if child.type == cs.TS_ERROR and child.text: error_text = safe_decode_text(child) if error_text in cs.CPP_EXPORTED_CLASS_KEYWORDS: exported_class_nodes.append(node) + found = True break - else: - if ( - cs.CPP_EXPORT_CLASS_PREFIX in node_text - or cs.CPP_EXPORT_STRUCT_PREFIX in node_text - ): - exported_class_nodes.append(node) - - for child in node.children: - traverse(child) + if not found and ( + cs.CPP_EXPORT_CLASS_PREFIX in node_text + or cs.CPP_EXPORT_STRUCT_PREFIX in node_text + ): + exported_class_nodes.append(node) + stack.extend(node.children) - traverse(root_node) return exported_class_nodes diff --git a/codebase_rag/parsers/class_ingest/identity.py b/codebase_rag/parsers/class_ingest/identity.py index 85f670444..fc5ba13c6 100644 --- a/codebase_rag/parsers/class_ingest/identity.py +++ b/codebase_rag/parsers/class_ingest/identity.py @@ -7,7 +7,6 @@ from ... import constants as cs from ...language_spec import LANGUAGE_FQN_SPECS -from ...utils.fqn_resolver import resolve_fqn_from_ast from ..cpp import utils as cpp_utils from ..rs import utils as rs_utils from ..utils import safe_decode_text @@ -22,18 +21,23 @@ def resolve_class_identity( language: cs.SupportedLanguage, lang_config: LanguageSpec, file_path: Path | None, - repo_path: Path, - project_name: str, ) -> tuple[str, str, bool] | None: if (fqn_config := LANGUAGE_FQN_SPECS.get(language)) and file_path: - if class_qn := resolve_fqn_from_ast( - class_node, - file_path, - repo_path, - project_name, - fqn_config, - ): - class_name = class_qn.split(cs.SEPARATOR_DOT)[-1] + class_name = fqn_config.get_name(class_node) + if class_name: + parts = [class_name] + current = class_node.parent + while current: + if current.type in fqn_config.scope_node_types: + if scope_name := fqn_config.get_name(current): + parts.append(scope_name) + current = current.parent + parts.reverse() + + # (H) Use the module's already-resolved (and collision-disambiguated) + # (H) qualified name as the prefix rather than recomputing from the path, + # (H) so same-stem cross-language siblings get distinct class/method qns. + class_qn = module_qn + cs.SEPARATOR_DOT + cs.SEPARATOR_DOT.join(parts) is_exported = language == cs.SupportedLanguage.CPP and ( class_node.type == cs.CppNodeType.FUNCTION_DEFINITION or cpp_utils.is_exported(class_node) diff --git a/codebase_rag/parsers/class_ingest/method_override.py b/codebase_rag/parsers/class_ingest/method_override.py index 686ff26e6..9dfc8bedf 100644 --- a/codebase_rag/parsers/class_ingest/method_override.py +++ b/codebase_rag/parsers/class_ingest/method_override.py @@ -66,9 +66,9 @@ def check_method_overrides( (cs.NodeLabel.METHOD, cs.KEY_QUALIFIED_NAME, parent_method_qn), ) logger.debug( - logs.CLASS_METHOD_OVERRIDE.format( - method_qn=method_qn, parent_method_qn=parent_method_qn - ) + logs.CLASS_METHOD_OVERRIDE, + method_qn=method_qn, + parent_method_qn=parent_method_qn, ) return diff --git a/codebase_rag/parsers/class_ingest/mixin.py b/codebase_rag/parsers/class_ingest/mixin.py index 2ba3f8f8c..a8e23ad3d 100644 --- a/codebase_rag/parsers/class_ingest/mixin.py +++ b/codebase_rag/parsers/class_ingest/mixin.py @@ -1,19 +1,24 @@ from __future__ import annotations from abc import abstractmethod +from bisect import bisect_left, bisect_right from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, NamedTuple from loguru import logger from tree_sitter import Node, QueryCursor from ... import constants as cs from ... import logs +from ...config import settings +from ...language_spec import LanguageSpec from ...types_defs import ASTNode, PropertyDict +from ...utils.path_utils import cached_relative_path, cached_resolve_posix +from ..cpp import CppTypeInferenceEngine from ..java import utils as java_utils from ..py import resolve_class_name from ..rs import utils as rs_utils -from ..utils import ingest_method, safe_decode_text +from ..utils import ingest_method, safe_decode_text, sorted_captures from . import cpp_modules from . import identity as id_ from . import method_override as mo @@ -21,7 +26,6 @@ from . import relationships as rel if TYPE_CHECKING: - from ...language_spec import LanguageSpec from ...services import IngestorProtocol from ...types_defs import ( FunctionRegistryTrieProtocol, @@ -31,7 +35,66 @@ from ..import_processor import ImportProcessor +def _is_nested_inside_function( + node: Node, class_body: Node, lang_config: LanguageSpec +) -> bool: + current = node.parent + while current and current is not class_body: + if ( + current.type in lang_config.function_node_types + and current.child_by_field_name(cs.FIELD_BODY) is not None + ): + return True + current = current.parent + return False + + +def _method_belongs_directly( + method_node: Node, class_node: Node, lang_config: LanguageSpec +) -> bool: + current = method_node.parent + while current is not None: + if current == class_node: + return True + if current.type in lang_config.class_node_types or ( + current.type in lang_config.function_node_types + and current.child_by_field_name(cs.FIELD_BODY) is not None + ): + return False + current = current.parent + return False + + +def _skip_method( + method_node: Node, class_node: Node, class_body: Node, lang_config: LanguageSpec +) -> bool: + if settings.CAPTURE_FUNCTION_LOCAL_DEFINITIONS: + return not _method_belongs_directly(method_node, class_node, lang_config) + return _is_nested_inside_function(method_node, class_body, lang_config) + + +class _DeferredForwardDecl(NamedTuple): + # (H) A C/C++ forward declaration held back until every file's real definitions + # (H) are registered, so we can tell an only-forward-declared type (keep it) from + # (H) one that also has a bodied definition elsewhere (drop the phantom). + class_node: Node + class_name: str + # (H) The namespace-qualified name (module-file prefix stripped, so `A::Foo` is + # (H) `A.Foo` regardless of which header declares it). Comparing on this — not the + # (H) bare simple name — keeps a forward-declared `B::Foo` when only `A::Foo` is + # (H) defined, while still matching a cross-file forward/definition of one type. + ns_qn: str + module_qn: str + language: cs.SupportedLanguage + lang_queries: LanguageQueries + lang_config: LanguageSpec + file_path: Path | None + sorted_func_nodes: list[Node] | None + func_node_starts: list[int] | None + + class ClassIngestMixin: + __slots__ = () ingestor: IngestorProtocol repo_path: Path project_name: str @@ -40,6 +103,30 @@ class ClassIngestMixin: module_qn_to_file_path: dict[str, Path] import_processor: ImportProcessor class_inheritance: dict[str, list[str]] + class_field_types: dict[str, dict[str, str]] + _deferred_forward_decls: list[_DeferredForwardDecl] + + def _namespace_qn(self, class_qn: str, module_qn: str) -> str: + # (H) Strip the module-file prefix so two nodes for the same C++ type in + # (H) different headers share one key (`leveldb.db.x.h.leveldb.VersionSet` and + # (H) `...y.h.leveldb.VersionSet` both -> `leveldb.VersionSet`), while types in + # (H) different namespaces stay distinct. + prefix = f"{module_qn}{cs.SEPARATOR_DOT}" + return class_qn[len(prefix) :] if class_qn.startswith(prefix) else class_qn + + def _namespace_qn_has_definition(self, ns_qn: str) -> bool: + # (H) A real definition of this namespace-qualified type is registered iff some + # (H) class qn ends with it (`....leveldb.VersionSet`). find_ending_with is + # (H) indexed by simple name, and because it is queried AFTER the registry is + # (H) rehydrated from the graph, it also covers definitions in files an + # (H) incremental run did not re-parse (issue: a forward decl must still drop + # (H) when its definition lives in an unchanged file). + simple = ns_qn.rsplit(cs.SEPARATOR_DOT, 1)[-1] + suffix = f"{cs.SEPARATOR_DOT}{ns_qn}" + return any( + qn.endswith(suffix) + for qn in self.function_registry.find_ending_with(simple) + ) @abstractmethod def _get_docstring(self, node: ASTNode) -> str | None: ... @@ -47,6 +134,16 @@ def _get_docstring(self, node: ASTNode) -> str | None: ... @abstractmethod def _extract_decorators(self, node: ASTNode) -> list[str]: ... + @abstractmethod + def _determine_function_parent( + self, + func_node: Node, + func_qn: str, + module_qn: str, + lang_config: LanguageSpec, + language: cs.SupportedLanguage | None = None, + ) -> tuple[str, str]: ... + def _resolve_to_qn(self, name: str, module_qn: str) -> str: return self._resolve_class_name(name, module_qn) or f"{module_qn}.{name}" @@ -74,35 +171,79 @@ def _ingest_classes_and_methods( module_qn: str, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + combined_captures: dict[str, list] | None = None, ) -> None: lang_queries = queries[language] - if not (query := lang_queries[cs.QUERY_CLASSES]): - return - lang_config: LanguageSpec = lang_queries[cs.QUERY_CONFIG] - cursor = QueryCursor(query) - captures = cursor.captures(root_node) - class_nodes = captures.get(cs.CAPTURE_CLASS, []) - module_nodes = captures.get(cs.ONEOF_MODULE, []) + + if combined_captures is not None: + class_nodes = list(combined_captures.get(cs.CAPTURE_CLASS, [])) + module_nodes = combined_captures.get(cs.ONEOF_MODULE, []) + else: + if not (query := lang_queries[cs.QUERY_CLASSES]): + return + cursor = QueryCursor(query) + captures = sorted_captures(cursor, root_node) + class_nodes = captures.get(cs.CAPTURE_CLASS, []) + module_nodes = captures.get(cs.ONEOF_MODULE, []) if language == cs.SupportedLanguage.CPP: class_nodes.extend(self._find_cpp_exported_classes(root_node)) file_path = self.module_qn_to_file_path.get(module_qn) + sorted_func_nodes: list[Node] | None = None + func_node_starts: list[int] | None = None + if combined_captures is not None and cs.CAPTURE_FUNCTION in combined_captures: + sorted_func_nodes = combined_captures[cs.CAPTURE_FUNCTION] + func_node_starts = [n.start_byte for n in sorted_func_nodes] + for class_node in class_nodes: - if isinstance(class_node, Node): - self._process_class_node( - class_node, - module_qn, - language, - lang_queries, - lang_config, - file_path, - ) + self._process_class_node( + class_node, + module_qn, + language, + lang_queries, + lang_config, + file_path, + sorted_func_nodes=sorted_func_nodes, + func_node_starts=func_node_starts, + ) self._process_inline_modules(module_nodes, module_qn, lang_config) + def resolve_deferred_forward_declarations(self) -> int: + # (H) Run after every file's definitions are registered. A deferred forward + # (H) declaration whose class name already produced a real node is a phantom + # (H) (the bodied definition exists) -> drop it. Otherwise it is the only + # (H) representation of the type -> register it now. Deterministic: the + # (H) deferred list is in file (sorted) order, and the first surviving forward + # (H) declaration of an only-declared type claims the name for the rest. + deferred = getattr(self, "_deferred_forward_decls", None) + if not deferred: + return 0 + self._deferred_forward_decls = [] + registered = 0 + for entry in deferred: + # (H) Drop the forward declaration only when a real definition of the SAME + # (H) namespace-qualified type exists (not merely the same simple name in + # (H) another namespace). Otherwise it is the type's only node -> keep it. + if self._namespace_qn_has_definition(entry.ns_qn): + continue + self._process_class_node( + entry.class_node, + entry.module_qn, + entry.language, + entry.lang_queries, + entry.lang_config, + entry.file_path, + sorted_func_nodes=entry.sorted_func_nodes, + func_node_starts=entry.func_node_starts, + allow_defer=False, + ) + registered += 1 + return registered + def _process_class_node( self, class_node: Node, @@ -111,26 +252,89 @@ def _process_class_node( lang_queries: LanguageQueries, lang_config: LanguageSpec, file_path: Path | None, + sorted_func_nodes: list[Node] | None = None, + func_node_starts: list[int] | None = None, + allow_defer: bool = True, ) -> None: if language == cs.SupportedLanguage.RUST and class_node.type == cs.TS_IMPL_ITEM: self._ingest_rust_impl_methods( - class_node, module_qn, language, lang_queries + class_node, + module_qn, + language, + lang_queries, + sorted_func_nodes=sorted_func_nodes, + func_node_starts=func_node_starts, ) return + # (H) A C/C++ forward declaration (`class Widget;`, or `template class + # (H) Widget;`) is a bodyless type specifier. Registering it collides with the + # (H) real definition's qn (suffixing it `@line`) and fragments one class into + # (H) several same-named nodes, which makes member-call resolution pick among + # (H) duplicates nondeterministically. But a type that is ONLY ever + # (H) forward-declared (an opaque handle, or a metaprogramming primary defined + # (H) solely via specializations) has no other node, so it must be kept. We + # (H) cannot tell which until every file's definitions are in, so defer the + # (H) forward declaration and decide in resolve_deferred_forward_declarations. + type_spec = class_node + if class_node.type == cs.CppNodeType.TEMPLATE_DECLARATION: + type_spec = next( + ( + child + for child in class_node.children + if child.type in cs.CPP_TYPE_SPECIFIER_NODE_TYPES + ), + None, + ) + if ( + type_spec is not None + and type_spec.type in cs.CPP_TYPE_SPECIFIER_NODE_TYPES + and type_spec.child_by_field_name(cs.FIELD_BODY) is None + ): + # (H) The inner bodyless specifier of a template forward decl is redundant + # (H) with its template_declaration wrapper (the canonical template node), + # (H) which is deferred separately; drop the inner one outright. + if ( + class_node.type in cs.CPP_TYPE_SPECIFIER_NODE_TYPES + and class_node.parent is not None + and class_node.parent.type == cs.CppNodeType.TEMPLATE_DECLARATION + ): + return + if allow_defer: + deferred_identity = id_.resolve_class_identity( + class_node, module_qn, language, lang_config, file_path + ) + if deferred_identity: + self._deferred_forward_decls.append( + _DeferredForwardDecl( + class_node, + deferred_identity[1], + self._namespace_qn(deferred_identity[0], module_qn), + module_qn, + language, + lang_queries, + lang_config, + file_path, + sorted_func_nodes, + func_node_starts, + ) + ) + return + identity = id_.resolve_class_identity( class_node, module_qn, language, lang_config, file_path, - self.repo_path, - self.project_name, ) if not identity: return class_qn, class_name, is_exported = identity + class_qn = self.function_registry.register_unique_qn( + class_qn, class_node.start_point[0] + 1 + ) node_type = nt.determine_node_type(class_node, class_name, class_qn, language) class_props: PropertyDict = { @@ -142,15 +346,25 @@ def _process_class_node( cs.KEY_DOCSTRING: self._get_docstring(class_node), cs.KEY_IS_EXPORTED: is_exported, } + if file_path is not None: + class_props[cs.KEY_PATH] = cached_relative_path( + file_path, self.repo_path + ).as_posix() + class_props[cs.KEY_ABSOLUTE_PATH] = cached_resolve_posix(file_path) self.ingestor.ensure_node_batch(node_type, class_props) self.function_registry[class_qn] = node_type if class_name: self.simple_name_lookup[class_name].add(class_qn) + parent_label, parent_qn = self._determine_function_parent( + class_node, class_qn, module_qn, lang_config, language + ) rel.create_class_relationships( class_node, class_qn, module_qn, + parent_label, + parent_qn, node_type, is_exported, language, @@ -160,7 +374,21 @@ def _process_class_node( self._resolve_to_qn, self.function_registry, ) - self._ingest_class_methods(class_node, class_qn, language, lang_queries) + if language == cs.SupportedLanguage.CPP: + # (H) Record this class's member-field types now (from the class body, + # (H) usually a header) so out-of-line method bodies in other files can + # (H) resolve `field_.method()` via the field's type at call resolution. + if field_types := CppTypeInferenceEngine().build_field_type_map(class_node): + self.class_field_types[class_qn] = field_types + self._ingest_class_methods( + class_node, + class_qn, + language, + lang_queries, + file_path, + sorted_func_nodes=sorted_func_nodes, + func_node_starts=func_node_starts, + ) def _ingest_rust_impl_methods( self, @@ -168,31 +396,83 @@ def _ingest_rust_impl_methods( module_qn: str, language: cs.SupportedLanguage, lang_queries: LanguageQueries, + sorted_func_nodes: list[Node] | None = None, + func_node_starts: list[int] | None = None, ) -> None: if not (impl_target := rs_utils.extract_impl_target(class_node)): return - class_qn = f"{module_qn}.{impl_target}" + # (H) An impl block inside `mod inner` targets a type whose node lives + # (H) under the module path (proj...inner.Widget). Resolve the impl target + # (H) against its enclosing module so the method binds to the real type + # (H) node instead of a phantom under the file module. + mod_parts = rs_utils.build_module_path(class_node) + owner_module_qn = ( + f"{module_qn}{cs.SEPARATOR_DOT}{cs.SEPARATOR_DOT.join(mod_parts)}" + if mod_parts + else module_qn + ) + class_qn = f"{owner_module_qn}.{impl_target}" + + # (H) `impl Trait for Type` means Type IMPLEMENTS Trait. The target type's + # (H) node label may be Class/Enum/Type, so match the relationship source + # (H) to its registered label (else the IMPLEMENTS edge never resolves). + if trait_name := rs_utils.extract_impl_trait(class_node): + owner_type = self.function_registry.get(class_qn) + owner_label = ( + cs.NodeLabel(owner_type.value) + if owner_type is not None + else cs.NodeLabel.CLASS + ) + self.ingestor.ensure_relationship_batch( + (owner_label, cs.KEY_QUALIFIED_NAME, class_qn), + cs.RelationshipType.IMPLEMENTS, + ( + cs.NodeLabel.INTERFACE, + cs.KEY_QUALIFIED_NAME, + self._resolve_to_qn(trait_name, owner_module_qn), + ), + ) + body_node = class_node.child_by_field_name("body") - method_query = lang_queries[cs.QUERY_FUNCTIONS] - if not body_node or not method_query: + if not body_node: return - method_cursor = QueryCursor(method_query) - method_captures = method_cursor.captures(body_node) - for method_node in method_captures.get(cs.CAPTURE_FUNCTION, []): - if isinstance(method_node, Node): - ingest_method( - method_node, - class_qn, - cs.NodeLabel.CLASS, - self.ingestor, - self.function_registry, - self.simple_name_lookup, - self._get_docstring, - language, - ) + file_path = self.module_qn_to_file_path.get(module_qn) + lang_config: LanguageSpec = lang_queries[cs.QUERY_CONFIG] + + if sorted_func_nodes is not None and func_node_starts is not None: + body_start = body_node.start_byte + body_end = body_node.end_byte + lo = bisect_left(func_node_starts, body_start) + hi = bisect_right(func_node_starts, body_end) + method_nodes = [ + n for n in sorted_func_nodes[lo:hi] if n.end_byte <= body_end + ] + else: + method_query = lang_queries[cs.QUERY_FUNCTIONS] + if not method_query: + return + method_cursor = QueryCursor(method_query) + method_captures = sorted_captures(method_cursor, body_node) + method_nodes = method_captures.get(cs.CAPTURE_FUNCTION, []) + + for method_node in method_nodes: + if _skip_method(method_node, class_node, body_node, lang_config): + continue + ingest_method( + method_node, + class_qn, + cs.NodeLabel.CLASS, + self.ingestor, + self.function_registry, + self.simple_name_lookup, + self._get_docstring, + language, + file_path=file_path, + repo_path=self.repo_path, + ) def _ingest_class_methods( self, @@ -200,16 +480,34 @@ def _ingest_class_methods( class_qn: str, language: cs.SupportedLanguage, lang_queries: LanguageQueries, + file_path: Path | None = None, + sorted_func_nodes: list[Node] | None = None, + func_node_starts: list[int] | None = None, ) -> None: body_node = class_node.child_by_field_name("body") - method_query = lang_queries[cs.QUERY_FUNCTIONS] - if not body_node or not method_query: + if not body_node: return - method_cursor = QueryCursor(method_query) - method_captures = method_cursor.captures(body_node) - for method_node in method_captures.get(cs.CAPTURE_FUNCTION, []): - if not isinstance(method_node, Node): + lang_config: LanguageSpec = lang_queries[cs.QUERY_CONFIG] + + if sorted_func_nodes is not None and func_node_starts is not None: + body_start = body_node.start_byte + body_end = body_node.end_byte + lo = bisect_left(func_node_starts, body_start) + hi = bisect_right(func_node_starts, body_end) + method_nodes = [ + n for n in sorted_func_nodes[lo:hi] if n.end_byte <= body_end + ] + else: + method_query = lang_queries[cs.QUERY_FUNCTIONS] + if not method_query: + return + method_cursor = QueryCursor(method_query) + method_captures = sorted_captures(method_cursor, body_node) + method_nodes = method_captures.get(cs.CAPTURE_FUNCTION, []) + + for method_node in method_nodes: + if _skip_method(method_node, class_node, body_node, lang_config): continue method_qualified_name = None @@ -233,6 +531,8 @@ def _ingest_class_methods( language, self._extract_decorators, method_qualified_name, + file_path=file_path, + repo_path=self.repo_path, ) def _process_inline_modules( @@ -249,6 +549,13 @@ def _process_inline_modules( if not module_name_node.text: continue + # (H) A bodyless `mod foo;` only declares that the file module foo.rs + # (H) belongs here; foo.rs already yields its own real-path Module node + # (H) with the same qn. Emitting a second synthetic-path node collides + # (H) on that qn and clobbers the file's real path, so skip it. + if module_node.child_by_field_name(cs.FIELD_BODY) is None: + continue + module_name = safe_decode_text(module_name_node) nested_qn = id_.build_nested_qualified_name_for_class( module_node, module_qn, module_name or "", lang_config @@ -259,7 +566,17 @@ def _process_inline_modules( cs.KEY_QUALIFIED_NAME: inline_module_qn, cs.KEY_NAME: module_name, cs.KEY_PATH: f"{cs.INLINE_MODULE_PATH_PREFIX}{module_name}", + cs.KEY_START_LINE: module_node.start_point[0] + 1, + cs.KEY_END_LINE: module_node.end_point[0] + 1, } + # (H) A bodied inline module is physically located in this file; give + # (H) it the real path so it joins containment on (file, line). + file_path = self.module_qn_to_file_path.get(module_qn) + if file_path is not None: + module_props[cs.KEY_PATH] = cached_relative_path( + file_path, self.repo_path + ).as_posix() + module_props[cs.KEY_ABSOLUTE_PATH] = cached_resolve_posix(file_path) logger.info( logs.CLASS_FOUND_INLINE_MODULE.format( name=module_name, qn=inline_module_qn @@ -267,6 +584,17 @@ def _process_inline_modules( ) self.ingestor.ensure_node_batch(cs.NodeLabel.MODULE, module_props) + # (H) Link the inline module into the containment tree: its enclosing + # (H) module (file module, or an outer mod) DEFINES it. Without this the + # (H) inline Module node is an orphan defining nothing. + parent_module_qn = inline_module_qn.rsplit(cs.SEPARATOR_DOT, 1)[0] + if parent_module_qn and parent_module_qn != inline_module_qn: + self.ingestor.ensure_relationship_batch( + (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, parent_module_qn), + cs.RelationshipType.DEFINES, + (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, inline_module_qn), + ) + def process_all_method_overrides(self) -> None: mo.process_all_method_overrides( self.function_registry, diff --git a/codebase_rag/parsers/class_ingest/node_type.py b/codebase_rag/parsers/class_ingest/node_type.py index 8cdf66d78..7485ab66b 100644 --- a/codebase_rag/parsers/class_ingest/node_type.py +++ b/codebase_rag/parsers/class_ingest/node_type.py @@ -16,19 +16,28 @@ def determine_node_type( language: cs.SupportedLanguage, ) -> NodeType: match class_node.type: - case cs.TS_INTERFACE_DECLARATION: + case cs.TS_GO_TYPE_SPEC | cs.TS_GO_TYPE_ALIAS if ( + language == cs.SupportedLanguage.GO + ): + return _go_type_node_type(class_node, class_name, class_qn) + case cs.TS_INTERFACE_DECLARATION | cs.TS_RS_TRAIT_ITEM: logger.info(logs.CLASS_FOUND_INTERFACE.format(name=class_name, qn=class_qn)) return NodeType.INTERFACE - case cs.TS_ENUM_DECLARATION | cs.TS_ENUM_SPECIFIER | cs.TS_ENUM_CLASS_SPECIFIER: + case ( + cs.TS_ENUM_DECLARATION + | cs.TS_ENUM_SPECIFIER + | cs.TS_ENUM_CLASS_SPECIFIER + | cs.TS_RS_ENUM_ITEM + ): logger.info(logs.CLASS_FOUND_ENUM.format(name=class_name, qn=class_qn)) return NodeType.ENUM - case cs.TS_TYPE_ALIAS_DECLARATION: + case cs.TS_TYPE_ALIAS_DECLARATION | cs.TS_RS_TYPE_ITEM: logger.info(logs.CLASS_FOUND_TYPE.format(name=class_name, qn=class_qn)) return NodeType.TYPE - case cs.TS_STRUCT_SPECIFIER: + case cs.TS_STRUCT_SPECIFIER | cs.TS_RS_STRUCT_ITEM: logger.info(logs.CLASS_FOUND_STRUCT.format(name=class_name, qn=class_qn)) return NodeType.CLASS - case cs.TS_UNION_SPECIFIER: + case cs.TS_UNION_SPECIFIER | cs.TS_RS_UNION_ITEM: logger.info(logs.CLASS_FOUND_UNION.format(name=class_name, qn=class_qn)) return NodeType.UNION case cs.CppNodeType.TEMPLATE_DECLARATION: @@ -47,6 +56,22 @@ def determine_node_type( return NodeType.CLASS +def _go_type_node_type( + class_node: Node, class_name: str | None, class_qn: str +) -> NodeType: + underlying = class_node.child_by_field_name(cs.FIELD_TYPE) + match underlying.type if underlying else None: + case cs.TS_GO_STRUCT_TYPE: + logger.info(logs.CLASS_FOUND_STRUCT.format(name=class_name, qn=class_qn)) + return NodeType.CLASS + case cs.TS_GO_INTERFACE_TYPE: + logger.info(logs.CLASS_FOUND_INTERFACE.format(name=class_name, qn=class_qn)) + return NodeType.INTERFACE + case _: + logger.info(logs.CLASS_FOUND_TYPE.format(name=class_name, qn=class_qn)) + return NodeType.TYPE + + def log_exported_class_type( class_node: Node, class_name: str | None, class_qn: str ) -> None: diff --git a/codebase_rag/parsers/class_ingest/parent_extraction.py b/codebase_rag/parsers/class_ingest/parent_extraction.py index 289e82c35..fd8673748 100644 --- a/codebase_rag/parsers/class_ingest/parent_extraction.py +++ b/codebase_rag/parsers/class_ingest/parent_extraction.py @@ -16,6 +16,21 @@ from ..import_processor import ImportProcessor +def php_base_simple_name(node: Node) -> str | None: + # (H) A PHP base type is a plain `name` (`Base`) or a `qualified_name` + # (H) (`\Exception`, `\App\Base`) whose trailing `name` child is the simple + # (H) name; cgr resolves bases by simple name. + if node.type == cs.TS_PHP_NAME and node.text: + return safe_decode_text(node) + if node.type == cs.TS_PHP_QUALIFIED_NAME: + last: Node | None = None + for child in node.children: + if child.type == cs.TS_PHP_NAME: + last = child + return safe_decode_text(last) if last and last.text else None + return None + + def extract_parent_classes( class_node: Node, module_qn: str, @@ -52,6 +67,22 @@ def extract_parent_classes( ) ) + # (H) PHP `extends` (a class's superclass or an interface's superinterfaces) + # (H) is a base_clause listing `name` nodes; both are inheritance. + if base_clause := find_child_by_type(class_node, cs.TS_PHP_BASE_CLAUSE): + for child in base_clause.children: + if parent_name := php_base_simple_name(child): + parent_classes.append(resolve_to_qn(parent_name, module_qn)) + + # (H) Rust supertrait bound (`trait Sub: Super`) is inheritance between traits. + if class_node.type == cs.TS_RS_TRAIT_ITEM: + if bounds := class_node.child_by_field_name(cs.FIELD_BOUNDS): + for child in bounds.children: + base = java_base_type_identifier(child) + if base is not None and base.text: + if name := safe_decode_text(base): + parent_classes.append(resolve_to_qn(name, module_qn)) + return parent_classes @@ -90,9 +121,9 @@ def parse_cpp_base_classes( ) parent_classes.append(parent_qn) logger.debug( - logs.CLASS_CPP_INHERITANCE.format( - parent_name=parent_name, parent_qn=parent_qn - ) + logs.CLASS_CPP_INHERITANCE, + parent_name=parent_name, + parent_qn=parent_qn, ) return parent_classes @@ -108,13 +139,38 @@ def extract_cpp_base_class_name(parent_text: str) -> str: return parent_text +def java_base_type_identifier(type_node: Node) -> Node | None: + # (H) The base type in a Java extends/implements clause may be plain + # (H) (`Base`), generic (`Base` -> generic_type), or qualified + # (H) (`pkg.Base` -> scoped_type_identifier). Unwrap to the base type's + # (H) type_identifier so generic/qualified bases are captured, not dropped. + if type_node.type == cs.TS_TYPE_IDENTIFIER: + return type_node + if type_node.type == cs.TS_GENERIC_TYPE: + for child in type_node.children: + if child.type in ( + cs.TS_TYPE_IDENTIFIER, + cs.TS_RS_SCOPED_TYPE_IDENTIFIER, + ): + return java_base_type_identifier(child) + if type_node.type == cs.TS_RS_SCOPED_TYPE_IDENTIFIER: + # (H) `a.b.Base` -> the trailing type_identifier is the simple name. + last: Node | None = None + for child in type_node.children: + if child.type == cs.TS_TYPE_IDENTIFIER: + last = child + return last + return None + + def resolve_superclass_from_type_identifier( type_identifier_node: Node, module_qn: str, resolve_to_qn: Callable[[str, str], str], ) -> str | None: - if type_identifier_node.text: - if parent_name := safe_decode_text(type_identifier_node): + base = java_base_type_identifier(type_identifier_node) + if base is not None and base.text: + if parent_name := safe_decode_text(base): return resolve_to_qn(parent_name, module_qn) return None @@ -128,7 +184,12 @@ def extract_java_superclass( if not superclass_node: return [] - if superclass_node.type == cs.TS_TYPE_IDENTIFIER: + _JAVA_BASE_TYPES = ( + cs.TS_TYPE_IDENTIFIER, + cs.TS_GENERIC_TYPE, + cs.TS_RS_SCOPED_TYPE_IDENTIFIER, + ) + if superclass_node.type in _JAVA_BASE_TYPES: if resolved := resolve_superclass_from_type_identifier( superclass_node, module_qn, resolve_to_qn ): @@ -136,7 +197,7 @@ def extract_java_superclass( return [] for child in superclass_node.children: - if child.type == cs.TS_TYPE_IDENTIFIER: + if child.type in _JAVA_BASE_TYPES: if resolved := resolve_superclass_from_type_identifier( child, module_qn, resolve_to_qn ): @@ -158,17 +219,19 @@ def extract_python_superclasses( import_map = import_processor.import_mapping.get(module_qn) for child in superclasses_node.children: - if child.type != cs.TS_IDENTIFIER or not child.text: + if child.type not in (cs.TS_IDENTIFIER, cs.TS_PY_ATTRIBUTE) or not child.text: continue if not (parent_name := safe_decode_text(child)): continue - if import_map and parent_name in import_map: - parent_classes.append(import_map[parent_name]) + head, sep, tail = parent_name.partition(cs.SEPARATOR_DOT) + if import_map and head in import_map: + resolved_head = import_map[head] elif import_map: - parent_classes.append(resolve_to_qn(parent_name, module_qn)) + resolved_head = resolve_to_qn(head, module_qn) else: - parent_classes.append(f"{module_qn}.{parent_name}") + resolved_head = f"{module_qn}.{head}" + parent_classes.append(f"{resolved_head}{sep}{tail}") return parent_classes @@ -238,6 +301,13 @@ def extract_interface_parents( import_processor: ImportProcessor, resolve_to_qn: Callable[[str, str], str], ) -> list[str]: + # (H) Java interface `extends A, B` is an `extends_interfaces` clause holding a + # (H) type_list; superinterfaces are inheritance, so emit them as INHERITS. + if java_extends := find_child_by_type(class_node, cs.TS_JAVA_EXTENDS_INTERFACES): + parents: list[str] = [] + extract_java_interface_names(java_extends, parents, module_qn, resolve_to_qn) + return parents + extends_clause = find_child_by_type(class_node, cs.TS_EXTENDS_TYPE_CLAUSE) if not extends_clause: return [] @@ -310,6 +380,23 @@ def extract_implemented_interfaces( interfaces_node, implemented_interfaces, module_qn, resolve_to_qn ) + # (H) TypeScript `class C implements I, J` lives in class_heritage > + # (H) implements_clause (no `interfaces` field), holding type_identifiers. + if class_heritage := find_child_by_type(class_node, cs.TS_CLASS_HERITAGE): + if implements_clause := find_child_by_type( + class_heritage, cs.TS_IMPLEMENTS_CLAUSE + ): + for child in implements_clause.children: + if child.type == cs.TS_TYPE_IDENTIFIER and child.text: + if name := safe_decode_text(child): + implemented_interfaces.append(resolve_to_qn(name, module_qn)) + + # (H) PHP `class C implements I, J` is a class_interface_clause of `name` nodes. + if php_impl := find_child_by_type(class_node, cs.TS_PHP_CLASS_INTERFACE_CLAUSE): + for child in php_impl.children: + if name := php_base_simple_name(child): + implemented_interfaces.append(resolve_to_qn(name, module_qn)) + return implemented_interfaces @@ -322,6 +409,10 @@ def extract_java_interface_names( for child in interfaces_node.children: if child.type == cs.TS_TYPE_LIST: for type_child in child.children: - if type_child.type == cs.TS_TYPE_IDENTIFIER and type_child.text: - if interface_name := safe_decode_text(type_child): + # (H) Unwrap generic/qualified bases (`TBase`, `pkg.IScheme`) to + # (H) the base type_identifier; plain identifiers pass straight + # (H) through. Skips list punctuation (commas). + base = java_base_type_identifier(type_child) + if base is not None and base.text: + if interface_name := safe_decode_text(base): interface_list.append(resolve_to_qn(interface_name, module_qn)) diff --git a/codebase_rag/parsers/class_ingest/relationships.py b/codebase_rag/parsers/class_ingest/relationships.py index 6af794fac..07daccf37 100644 --- a/codebase_rag/parsers/class_ingest/relationships.py +++ b/codebase_rag/parsers/class_ingest/relationships.py @@ -19,6 +19,8 @@ def create_class_relationships( class_node: Node, class_qn: str, module_qn: str, + parent_label: str, + parent_qn: str, node_type: NodeType, is_exported: bool, language: cs.SupportedLanguage, @@ -34,7 +36,7 @@ def create_class_relationships( class_inheritance[class_qn] = parent_classes ingestor.ensure_relationship_batch( - (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn), + (parent_label, cs.KEY_QUALIFIED_NAME, parent_qn), cs.RelationshipType.DEFINES, (node_type, cs.KEY_QUALIFIED_NAME, class_qn), ) @@ -51,7 +53,9 @@ def create_class_relationships( node_type, class_qn, parent_class_qn, function_registry, ingestor ) - if class_node.type == cs.TS_CLASS_DECLARATION: + # (H) A class OR an enum can `implements` interfaces; both expose them via the + # (H) `interfaces` field (a super_interfaces clause), so handle both. + if class_node.type in (cs.TS_CLASS_DECLARATION, cs.TS_ENUM_DECLARATION): for interface_qn in pe.extract_implemented_interfaces( class_node, module_qn, resolve_to_qn ): diff --git a/codebase_rag/parsers/cpp/__init__.py b/codebase_rag/parsers/cpp/__init__.py index e69de29bb..1900172cc 100644 --- a/codebase_rag/parsers/cpp/__init__.py +++ b/codebase_rag/parsers/cpp/__init__.py @@ -0,0 +1,5 @@ +from .type_inference import CppTypeInferenceEngine + +__all__ = [ + "CppTypeInferenceEngine", +] diff --git a/codebase_rag/parsers/cpp/type_inference.py b/codebase_rag/parsers/cpp/type_inference.py new file mode 100644 index 000000000..c2f99035a --- /dev/null +++ b/codebase_rag/parsers/cpp/type_inference.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +from tree_sitter import Node + +from ... import constants as cs +from ..utils import safe_decode_text + + +class CppTypeInferenceEngine: + # (H) Maps local variable / parameter names to their bare C++ type name within a + # (H) function or method body, so the resolver can bind a member-dispatch call + # (H) (`obj->method()` / `obj.method()`) to the method node on the receiver's + # (H) class instead of guessing by the bare method name. Bare names only: the + # (H) resolver turns a name into a class qn via the same _resolve_class_name path + # (H) the definition pass uses, so pointer/reference/const/template wrappers are + # (H) stripped here down to the underlying type identifier. + __slots__ = () + + def build_local_variable_type_map( + self, caller_node: Node, module_qn: str + ) -> dict[str, str]: + decls: list[tuple[str, str]] = [] + if declarator := self._function_declarator(caller_node): + self._collect_parameters(declarator, decls) + if body := caller_node.child_by_field_name(cs.FIELD_BODY): + self._collect_body_declarations(body, decls) + # (H) The map is keyed by name only, with no knowledge of a call's lexical + # (H) position, so it cannot tell an outer `Zeta z` from an inner-block + # (H) `Alpha z` that shadows it. Rather than pick a write order that is wrong + # (H) for one of the two scopes, decline to infer any name declared with more + # (H) than one type: such a call falls back to name-only resolution instead of + # (H) getting a confidently wrong typed edge. (Same flat-map limitation the Go + # (H) engine carries; true scoping would need positional call resolution.) + var_types: dict[str, str] = {} + conflicting: set[str] = set() + for name, type_name in decls: + if name in conflicting: + continue + existing = var_types.get(name) + if existing is not None and existing != type_name: + del var_types[name] + conflicting.add(name) + continue + var_types[name] = type_name + return var_types + + def build_field_type_map(self, class_node: Node) -> dict[str, str]: + # (H) Map each data member of a C++ class to its bare type name, so a member + # (H) call `field_.method()` inside the class's methods can resolve via the + # (H) field's type. Fields live in the class body (often a header) separate from + # (H) out-of-line method bodies, so this is captured once at class ingestion and + # (H) looked up by the enclosing class qn at call resolution. + field_types: dict[str, str] = {} + if body := class_node.child_by_field_name(cs.FIELD_BODY): + self._collect_fields(body, field_types) + return field_types + + def _collect_fields(self, node: Node, field_types: dict[str, str]) -> None: + for child in node.children: + # (H) A nested type / function / lambda opens its own member scope; its + # (H) declarations are not this class's fields. Preprocessor blocks + # (H) (`#ifdef ... #endif`) are transparent, so recurse through them to + # (H) reach fields declared conditionally. + if child.type in cs.CPP_NESTED_SCOPE_NODE_TYPES: + continue + if child.type == cs.CppNodeType.FIELD_DECLARATION: + self._record_field(child, field_types) + continue + self._collect_fields(child, field_types) + + def _record_field(self, node: Node, field_types: dict[str, str]) -> None: + type_node = node.child_by_field_name(cs.FIELD_TYPE) + if type_node is None or not (type_name := self._bare_type_name(type_node)): + return + for declarator in node.children_by_field_name(cs.FIELD_DECLARATOR): + # (H) A member function declaration (`void Lock();`) is also a + # (H) field_declaration, but its declarator is a function_declarator; + # (H) only data members are fields. + if declarator.type == cs.CppNodeType.FUNCTION_DECLARATOR: + continue + if (name := self._declarator_name(declarator)) is not None: + field_types.setdefault(name, type_name) + + def _function_declarator(self, caller_node: Node) -> Node | None: + # (H) The parameter_list hangs off the (possibly pointer/reference-wrapped) + # (H) function_declarator in the definition's declarator chain. + declarator = caller_node.child_by_field_name(cs.FIELD_DECLARATOR) + while declarator is not None: + if declarator.type == cs.CppNodeType.FUNCTION_DECLARATOR: + return declarator + declarator = declarator.child_by_field_name(cs.FIELD_DECLARATOR) + return None + + def _collect_parameters( + self, declarator: Node, decls: list[tuple[str, str]] + ) -> None: + params = declarator.child_by_field_name(cs.KEY_PARAMETERS) + if params is None: + return + for param in params.children: + if param.type not in ( + cs.CppNodeType.PARAMETER_DECLARATION, + cs.CppNodeType.OPTIONAL_PARAMETER_DECLARATION, + ): + continue + self._record_declaration(param, decls) + + def _collect_body_declarations( + self, node: Node, decls: list[tuple[str, str]] + ) -> None: + for child in node.children: + # (H) A lambda / nested function / local class body opens its own scope; + # (H) its declarations are not locals of the enclosing function, so descend + # (H) no further or an inner `x` would be attributed to the outer `x`. + if child.type in cs.CPP_NESTED_SCOPE_NODE_TYPES: + continue + if child.type == cs.CppNodeType.DECLARATION: + self._record_declaration(child, decls) + # (H) Recurse into ordinary nested blocks (if/for/while/try bodies) so a + # (H) variable declared only in an inner block still resolves; conflicting + # (H) redecls across scopes are reconciled by the caller (drop-on-conflict). + self._collect_body_declarations(child, decls) + + def _record_declaration(self, node: Node, decls: list[tuple[str, str]]) -> None: + type_node = node.child_by_field_name(cs.FIELD_TYPE) + if type_node is None or not (type_name := self._bare_type_name(type_node)): + return + # (H) One statement may declare several variables sharing the leading type + # (H) (`Zeta a, b;`), each its own `declarator` field child; record them all. + for declarator in node.children_by_field_name(cs.FIELD_DECLARATOR): + if (name := self._declarator_name(declarator)) is not None: + decls.append((name, type_name)) + + def _bare_type_name(self, type_node: Node) -> str | None: + match type_node.type: + case cs.CppNodeType.TYPE_IDENTIFIER: + return safe_decode_text(type_node) + case cs.CppNodeType.QUALIFIED_IDENTIFIER: + # (H) `ns::Foo` -> `Foo`: the resolver maps the bare class name to its + # (H) namespaced node qn via find_ending_with. + return self._rightmost_name(type_node) + case cs.CppNodeType.TEMPLATE_TYPE: + inner = type_node.child_by_field_name(cs.KEY_NAME) + return self._bare_type_name(inner) if inner is not None else None + case _: + return None + + def _rightmost_name(self, node: Node) -> str | None: + name_node = node.child_by_field_name(cs.KEY_NAME) + if name_node is not None and name_node.type in ( + cs.CppNodeType.TYPE_IDENTIFIER, + cs.CppNodeType.IDENTIFIER, + ): + return safe_decode_text(name_node) + text = safe_decode_text(node) + if not text: + return None + return text.rsplit(cs.SEPARATOR_DOUBLE_COLON, 1)[-1] or None + + def _declarator_name(self, declarator: Node | None) -> str | None: + # (H) Unwrap pointer/reference/init declarators down to the bound identifier. + current = declarator + while current is not None: + if current.type in ( + cs.CppNodeType.IDENTIFIER, + cs.CppNodeType.FIELD_IDENTIFIER, + ): + return safe_decode_text(current) + if inner := current.child_by_field_name(cs.FIELD_DECLARATOR): + current = inner + continue + # (H) `reference_declarator` (`T& x`) holds its identifier as a positional + # (H) child, not under the `declarator` field that pointer/init declarators + # (H) expose, so the field-based unwrap stalls; descend into the first + # (H) named declarator-bearing child instead. + current = self._first_declarator_child(current) + return None + + def _first_declarator_child(self, node: Node) -> Node | None: + for child in node.children: + if child.type in ( + cs.CppNodeType.IDENTIFIER, + cs.CppNodeType.FIELD_IDENTIFIER, + cs.CppNodeType.REFERENCE_DECLARATOR, + cs.CppNodeType.POINTER_DECLARATOR, + cs.CppNodeType.INIT_DECLARATOR, + ): + return child + return None diff --git a/codebase_rag/parsers/cpp/utils.py b/codebase_rag/parsers/cpp/utils.py index de9669a33..c5b813d45 100644 --- a/codebase_rag/parsers/cpp/utils.py +++ b/codebase_rag/parsers/cpp/utils.py @@ -57,35 +57,43 @@ def build_qualified_name(node: Node, module_qn: str, name: str) -> str: return cs.SEPARATOR_DOT.join([module_qn, name]) +_EXPORT_CANDIDATE_TYPES = frozenset( + { + cs.CppNodeType.EXPORT, + cs.CppNodeType.EXPORT_KEYWORD, + cs.CppNodeType.IDENTIFIER, + cs.CppNodeType.PRIMITIVE_TYPE, + } +) + +_EXPORT_STOP_TYPES = frozenset( + { + cs.CppNodeType.DECLARATION, + cs.CppNodeType.FUNCTION_DEFINITION, + cs.CppNodeType.TEMPLATE_DECLARATION, + cs.CppNodeType.CLASS_SPECIFIER, + cs.CppNodeType.TRANSLATION_UNIT, + } +) + + def is_exported(node: Node) -> bool: current = node + export_text = cs.CppNodeType.EXPORT while current and current.parent: parent = current.parent - found_export = False for child in parent.children: if child == current: break - if child.text: - child_text = safe_decode_text(child) - if child_text == cs.CppNodeType.EXPORT and child.type in ( - cs.CppNodeType.EXPORT, - cs.CppNodeType.EXPORT_KEYWORD, - cs.CppNodeType.IDENTIFIER, - cs.CppNodeType.PRIMITIVE_TYPE, - ): - found_export = True - - if found_export: - return True + if ( + child.type in _EXPORT_CANDIDATE_TYPES + and child.text + and safe_decode_text(child) == export_text + ): + return True - if current.type in ( - cs.CppNodeType.DECLARATION, - cs.CppNodeType.FUNCTION_DEFINITION, - cs.CppNodeType.TEMPLATE_DECLARATION, - cs.CppNodeType.CLASS_SPECIFIER, - cs.CppNodeType.TRANSLATION_UNIT, - ): + if current.type in _EXPORT_STOP_TYPES: break current = current.parent diff --git a/codebase_rag/parsers/cpp_frontend/__init__.py b/codebase_rag/parsers/cpp_frontend/__init__.py new file mode 100644 index 000000000..eb67d2372 --- /dev/null +++ b/codebase_rag/parsers/cpp_frontend/__init__.py @@ -0,0 +1,14 @@ +from .frontend import ( + cpp_frontend_available, + find_compile_commands, + run_cpp_frontend, +) +from .qn import CppQnResolver, build_module_qn_map + +__all__ = [ + "CppQnResolver", + "build_module_qn_map", + "cpp_frontend_available", + "find_compile_commands", + "run_cpp_frontend", +] diff --git a/codebase_rag/parsers/cpp_frontend/constants.py b/codebase_rag/parsers/cpp_frontend/constants.py new file mode 100644 index 000000000..06c2f7735 --- /dev/null +++ b/codebase_rag/parsers/cpp_frontend/constants.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from ... import constants as cs + +# (H) libclang CursorKind members are registered dynamically (not static class +# (H) attributes), so they are matched by the stable NAME string that +# (H) `cursor.kind.name` yields at runtime, never via `ci.CursorKind.CLASS_DECL` +# (H) (which trips ty's unresolved-attribute). Same approach as the eval oracle +# (H) (evals/oracles/cpp_oracle.py). + +KIND_NAMESPACE = "NAMESPACE" +KIND_DESTRUCTOR = "DESTRUCTOR" +KIND_BASE_SPECIFIER = "CXX_BASE_SPECIFIER" +KIND_TRANSLATION_UNIT = "TRANSLATION_UNIT" +KIND_CALL_EXPR = "CALL_EXPR" + +# (H) class/struct/union and their templated forms -> a Class node (cgr collapses +# (H) struct/class to Class, matching parsers/cpp + the oracle). +CLASS_KIND_NAMES: frozenset[str] = frozenset( + {"CLASS_DECL", "STRUCT_DECL", "CLASS_TEMPLATE"} +) +# (H) free functions and function templates -> a Function node, UNLESS their +# (H) semantic parent is a class (a templated method is a FUNCTION_TEMPLATE whose +# (H) parent is the class), in which case they are Methods. +FUNCTION_KIND_NAMES: frozenset[str] = frozenset({"FUNCTION_DECL", "FUNCTION_TEMPLATE"}) +# (H) members -> a Method node. +METHOD_KIND_NAMES: frozenset[str] = frozenset( + {"CXX_METHOD", "CONSTRUCTOR", "DESTRUCTOR", "CONVERSION_FUNCTION"} +) +# (H) `using Alias = T;` (TYPE_ALIAS_DECL) and `typedef T Alias;` (TYPEDEF_DECL) +# (H) -> a Type node, matching how the tree-sitter path maps C++ alias/typedef +# (H) declarations (TS_TYPE_ALIAS_DECLARATION) and Go/Rust type decls. +TYPE_KIND_NAMES: frozenset[str] = frozenset({"TYPE_ALIAS_DECL", "TYPEDEF_DECL"}) + +LABEL_MODULE = cs.NodeLabel.MODULE.value +LABEL_CLASS = cs.NodeLabel.CLASS.value +LABEL_FUNCTION = cs.NodeLabel.FUNCTION.value +LABEL_METHOD = cs.NodeLabel.METHOD.value +LABEL_TYPE = cs.NodeLabel.TYPE.value diff --git a/codebase_rag/parsers/cpp_frontend/frontend.py b/codebase_rag/parsers/cpp_frontend/frontend.py new file mode 100644 index 000000000..bf48fac7f --- /dev/null +++ b/codebase_rag/parsers/cpp_frontend/frontend.py @@ -0,0 +1,381 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from ... import constants as cs +from ...services import IngestorProtocol +from ...types_defs import ( + FunctionRegistryTrieProtocol, + NodeType, + PropertyDict, + SimpleNameLookup, +) +from . import constants as fc +from .qn import CppQnResolver + +if TYPE_CHECKING: + from clang.cindex import Cursor + +_NodeKey = tuple[str, str] +_EdgeKey = tuple[str, str, str, str, str] +_Scope = tuple[str, str] | None + +_COMPILE_COMMANDS = "compile_commands.json" +_BUILD_DIR = "build" + + +def cpp_frontend_available() -> bool: + try: + import clang.cindex as ci + + ci.Index.create() + except Exception: + return False + return True + + +def find_compile_commands(start: Path) -> Path | None: + # (H) Discover the directory holding a compile_commands.json: the indexed + # (H) target, a conventional build/ subdir, then walking up to the repo root. + start = start.resolve() + seen: set[Path] = set() + for candidate in (start, start / _BUILD_DIR, *start.parents): + if candidate in seen: + continue + seen.add(candidate) + if (candidate / _COMPILE_COMMANDS).is_file(): + return candidate + return None + + +def _base_simple_name(spelling: str) -> str: + flat = spelling.replace(cs.SEPARATOR_DOUBLE_COLON, cs.SEPARATOR_DOT) + return flat.rsplit(cs.SEPARATOR_DOT, 1)[-1] + + +def _classify(cursor: Cursor) -> str | None: + kind = cursor.kind.name + if kind in fc.CLASS_KIND_NAMES: + return fc.LABEL_CLASS + if kind in fc.TYPE_KIND_NAMES: + return fc.LABEL_TYPE + if kind in fc.METHOD_KIND_NAMES: + return fc.LABEL_METHOD + if kind in fc.FUNCTION_KIND_NAMES: + parent = cursor.semantic_parent + if parent is not None and parent.kind.name in fc.CLASS_KIND_NAMES: + return fc.LABEL_METHOD + return fc.LABEL_FUNCTION + return None + + +class _Collector: + def __init__( + self, + resolver: CppQnResolver, + function_registry: FunctionRegistryTrieProtocol | None = None, + simple_name_lookup: SimpleNameLookup | None = None, + structural_elements: dict[Path, str | None] | None = None, + ) -> None: + self.resolver = resolver + self.function_registry = function_registry + self.simple_name_lookup = simple_name_lookup + self.structural_elements = structural_elements + self.nodes: dict[_NodeKey, tuple[str, PropertyDict, bool]] = {} + self.modules: dict[str, PropertyDict] = {} + self.edges: set[_EdgeKey] = set() + self.covered: set[str] = set() + + def _node_props(self, cursor: Cursor, qn: str, name: str, rel: str) -> PropertyDict: + return { + cs.KEY_QUALIFIED_NAME: qn, + cs.KEY_NAME: name, + cs.KEY_DECORATORS: [], + cs.KEY_START_LINE: cursor.location.line, + cs.KEY_END_LINE: cursor.extent.end.line, + cs.KEY_DOCSTRING: None, + cs.KEY_IS_EXPORTED: False, + cs.KEY_PATH: rel, + cs.KEY_ABSOLUTE_PATH: Path(cursor.location.file.name).resolve().as_posix(), + } + + def _add_node(self, label: str, qn: str, props: PropertyDict, is_def: bool) -> None: + key: _NodeKey = (label, qn) + existing = self.nodes.get(key) + # (H) Prefer the definition cursor's properties (its span is the accurate + # (H) one) over a mere declaration's, matching cgr where the deferred + # (H) out-of-line definition is ingested last and wins the MERGE. + if existing is None or (is_def and not existing[2]): + self.nodes[key] = (label, props, is_def) + + def _add_module(self, module_qn: str, rel: str, absolute_file: str) -> None: + if module_qn in self.modules: + return + self.modules[module_qn] = { + cs.KEY_QUALIFIED_NAME: module_qn, + cs.KEY_NAME: Path(rel).name, + cs.KEY_PATH: rel, + cs.KEY_ABSOLUTE_PATH: Path(absolute_file).resolve().as_posix(), + } + + def _add_edge( + self, rel_type: str, from_label: str, from_qn: str, to_label: str, to_qn: str + ) -> None: + self.edges.add((rel_type, from_label, from_qn, to_label, to_qn)) + + def process(self, cursor: Cursor, enclosing: _Scope) -> _Scope: + # (H) Returns the scope its subtree should attribute calls to: the node's + # (H) own (label, qn) when it is a function/method, else the unchanged + # (H) enclosing scope. + if cursor.kind.name == fc.KIND_CALL_EXPR: + self._process_call(cursor, enclosing) + return None + label = _classify(cursor) + if label is None or cursor.location.file is None: + return None + if label == fc.LABEL_CLASS and not cursor.is_definition(): + return None # (H) forward declarations are not nodes + rel = self.resolver.rel_path(cursor.location.file.name) + module_qn = self.resolver.module_qn(cursor.location.file.name) + if rel is None or module_qn is None: + return None # (H) outside the indexed repo (system headers, etc.) + + if label == fc.LABEL_METHOD: + return self._process_method(cursor, rel) + if label == fc.LABEL_TYPE: + self._process_type(cursor, rel, module_qn) + return None + + qn = ( + self.resolver.class_qn(cursor) + if label == fc.LABEL_CLASS + else self.resolver.function_qn(cursor) + ) + if qn is None: + return None + self.covered.add(rel) + self._add_module(module_qn, rel, cursor.location.file.name) + self._add_node( + label, + qn, + self._node_props(cursor, qn, cursor.spelling, rel), + cursor.is_definition(), + ) + self._add_edge( + cs.RelationshipType.DEFINES, fc.LABEL_MODULE, module_qn, label, qn + ) + if label == fc.LABEL_CLASS: + self._emit_inheritance(cursor, qn) + return None + return (label, qn) + + def _process_method(self, cursor: Cursor, rel: str) -> _Scope: + qn = self.resolver.method_qn(cursor) + parent = cursor.semantic_parent + if qn is None or parent is None: + return None + class_qn = self.resolver.class_qn(parent) + if class_qn is None: + return None + self.covered.add(rel) + name = self.resolver.member_name(cursor) + self._add_node( + fc.LABEL_METHOD, + qn, + self._node_props(cursor, qn, name, rel), + cursor.is_definition(), + ) + self._add_edge( + cs.RelationshipType.DEFINES_METHOD, + fc.LABEL_CLASS, + class_qn, + fc.LABEL_METHOD, + qn, + ) + return (fc.LABEL_METHOD, qn) + + def _process_type(self, cursor: Cursor, rel: str, module_qn: str) -> None: + # (H) A `using`/`typedef` alias becomes a Type node, DEFINED by its + # (H) enclosing Class (member alias) or its Module (namespace/file scope), + # (H) matching the tree-sitter alias path and Go/Rust type decls. + qn = self.resolver.type_qn(cursor) + if qn is None: + return + self.covered.add(rel) + self._add_module(module_qn, rel, cursor.location.file.name) + self._add_node( + fc.LABEL_TYPE, + qn, + self._node_props(cursor, qn, cursor.spelling, rel), + cursor.is_definition(), + ) + parent = cursor.semantic_parent + if parent is not None and parent.kind.name in fc.CLASS_KIND_NAMES: + class_qn = self.resolver.class_qn(parent) + if class_qn is not None: + self._add_edge( + cs.RelationshipType.DEFINES, + fc.LABEL_CLASS, + class_qn, + fc.LABEL_TYPE, + qn, + ) + return + self._add_edge( + cs.RelationshipType.DEFINES, fc.LABEL_MODULE, module_qn, fc.LABEL_TYPE, qn + ) + + def _process_call(self, cursor: Cursor, enclosing: _Scope) -> None: + # (H) Resolve the callee semantically via cursor.referenced (libclang did + # (H) the overload/name resolution already), preferring its definition so + # (H) the edge targets the node the frontend emitted for the body. + referenced = cursor.referenced + if referenced is None: + return + callee = referenced.get_definition() or referenced + callee_label = _classify(callee) + if callee_label is None or callee_label == fc.LABEL_CLASS: + return + callee_qn = ( + self.resolver.method_qn(callee) + if callee_label == fc.LABEL_METHOD + else self.resolver.function_qn(callee) + ) + if callee_qn is None: + return # (H) callee outside the indexed repo (stdlib, etc.) + caller = enclosing or self._module_caller(cursor) + if caller is None: + return + caller_label, caller_qn = caller + self._add_edge( + cs.RelationshipType.CALLS, caller_label, caller_qn, callee_label, callee_qn + ) + + def _module_caller(self, cursor: Cursor) -> _Scope: + # (H) A call with no enclosing function/method runs at module load time + # (H) (a default member initializer or a file/namespace-scope global + # (H) initializer); the tree-sitter path attributes these to the Module, + # (H) so mirror that instead of dropping the edge. The call site must be + # (H) inside the indexed repo (module_qn is None for system headers). + if cursor.location.file is None: + return None + file_name = cursor.location.file.name + module_qn = self.resolver.module_qn(file_name) + rel = self.resolver.rel_path(file_name) + if module_qn is None or rel is None: + return None + self._add_module(module_qn, rel, file_name) + return (fc.LABEL_MODULE, module_qn) + + def _emit_inheritance(self, cursor: Cursor, derived_qn: str) -> None: + for child in cursor.get_children(): + if child.kind.name != fc.KIND_BASE_SPECIFIER: + continue + base_decl = child.type.get_declaration() + base_qn = self.resolver.class_qn(base_decl) if base_decl else None + if base_qn is None: + base_qn = _base_simple_name(child.type.spelling) + self._add_edge( + cs.RelationshipType.INHERITS, + fc.LABEL_CLASS, + derived_qn, + fc.LABEL_CLASS, + base_qn, + ) + + def _contains_module_parent(self, rel: str) -> tuple[str, str, str]: + # (H) Mirror DefinitionProcessor's module-parent choice: a Package if the + # (H) directory is one, else a Folder, else the Project at the root. + parent_rel = Path(rel).parent + package_qn = ( + self.structural_elements.get(parent_rel) + if self.structural_elements is not None + else None + ) + if package_qn: + return (cs.NodeLabel.PACKAGE, cs.KEY_QUALIFIED_NAME, package_qn) + if parent_rel != Path(cs.SEPARATOR_DOT): + return (cs.NodeLabel.FOLDER, cs.KEY_PATH, parent_rel.as_posix()) + return (cs.NodeLabel.PROJECT, cs.KEY_NAME, self.resolver.project_name) + + def _register(self, label: str, props: PropertyDict) -> None: + if self.function_registry is None: + return + qn = props[cs.KEY_QUALIFIED_NAME] + if not isinstance(qn, str): + return + self.function_registry[qn] = NodeType(label) + name = props[cs.KEY_NAME] + if self.simple_name_lookup is not None and isinstance(name, str): + self.simple_name_lookup[name].add(qn) + + def flush(self, ingestor: IngestorProtocol) -> None: + for module_qn, props in self.modules.items(): + ingestor.ensure_node_batch(fc.LABEL_MODULE, props) + path = props[cs.KEY_PATH] + if self.structural_elements is not None and isinstance(path, str): + ingestor.ensure_relationship_batch( + self._contains_module_parent(path), + cs.RelationshipType.CONTAINS_MODULE, + (fc.LABEL_MODULE, cs.KEY_QUALIFIED_NAME, module_qn), + ) + for label, props, _ in self.nodes.values(): + ingestor.ensure_node_batch(label, props) + self._register(label, props) + for rel_type, from_label, from_qn, to_label, to_qn in self.edges: + ingestor.ensure_relationship_batch( + (from_label, cs.KEY_QUALIFIED_NAME, from_qn), + rel_type, + (to_label, cs.KEY_QUALIFIED_NAME, to_qn), + ) + + +def _walk(cursor: Cursor, collector: _Collector, enclosing: _Scope = None) -> None: + for child in cursor.get_children(): + produced = collector.process(child, enclosing) + _walk(child, collector, produced or enclosing) + + +def run_cpp_frontend( + ingestor: IngestorProtocol, + repo_path: Path, + project_name: str, + compdb_dir: Path, + function_registry: FunctionRegistryTrieProtocol | None = None, + simple_name_lookup: SimpleNameLookup | None = None, + structural_elements: dict[Path, str | None] | None = None, +) -> frozenset[str]: + """Index C/C++ via libclang + a compile_commands.json (macro-accurate). + + Parses every translation unit in the compilation database, walks the cursor + tree, and emits Module/Class/Function/Method nodes plus DEFINES / + DEFINES_METHOD / INHERITS edges and exact spans straight to the ingestor, + synthesizing the same qualified names the tree-sitter path would. Returns the + set of repo-relative files it covered (so callers can skip them in the + tree-sitter pass). + + When ``function_registry`` / ``simple_name_lookup`` are supplied, emitted + definitions are registered for cross-file resolution; when + ``structural_elements`` is supplied, each Module is linked to its parent via + CONTAINS_MODULE (the full-replace path used by GraphUpdater). + """ + import clang.cindex as ci + + resolver = CppQnResolver(repo_path, project_name) + collector = _Collector( + resolver, function_registry, simple_name_lookup, structural_elements + ) + + db = ci.CompilationDatabase.fromDirectory(str(Path(compdb_dir).resolve())) + index = ci.Index.create() + for command in db.getAllCompileCommands(): + args = list(command.arguments)[1:] + try: + tu = index.parse(None, args=args) + except ci.TranslationUnitLoadError: + continue + _walk(tu.cursor, collector) + + collector.flush(ingestor) + return frozenset(collector.covered) diff --git a/codebase_rag/parsers/cpp_frontend/qn.py b/codebase_rag/parsers/cpp_frontend/qn.py new file mode 100644 index 000000000..b427aa232 --- /dev/null +++ b/codebase_rag/parsers/cpp_frontend/qn.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +import os +from pathlib import Path +from typing import TYPE_CHECKING + +from ... import constants as cs +from ...utils.path_utils import should_skip_rel_file +from ..cpp.utils import convert_operator_symbol_to_name +from . import constants as fc + +if TYPE_CHECKING: + from clang.cindex import Cursor + + +def _eligible_rel_files(repo_path: Path) -> list[str]: + # (H) Reproduce GraphUpdater._collect_eligible_files' ordering exactly: an + # (H) os.walk with dirnames AND filenames sorted, top-down. The module-qn + # (H) disambiguation below depends on this order (the file processed LATER in + # (H) a basename collision is the one that gets its extension appended), so it + # (H) must match cgr's tree-sitter pass to produce identical qualified names. + repo_str = str(repo_path) + repo_prefix_len = len(repo_str) + 1 + rels: list[str] = [] + for dirpath, dirnames, filenames in os.walk(repo_str): + rel_dir = "" if len(dirpath) < repo_prefix_len else dirpath[repo_prefix_len:] + rel_dir = rel_dir.replace(os.sep, "/") + dir_parts = tuple(rel_dir.split("/")) if rel_dir else () + dir_prefix = f"{rel_dir}/" if rel_dir else "" + dirnames[:] = sorted(dirnames) + for fname in sorted(filenames): + dot = fname.rfind(".") + suffix = fname[dot:] if dot != -1 else "" + rel_path_str = f"{dir_prefix}{fname}" + if not should_skip_rel_file(rel_path_str, dir_parts, suffix): + rels.append(rel_path_str) + return rels + + +def _base_module_qn(rel: str, project_name: str) -> str: + rel_path = Path(rel) + if rel_path.name in (cs.INIT_PY, cs.MOD_RS): + parts = rel_path.parent.parts + else: + parts = rel_path.with_suffix("").parts + return cs.SEPARATOR_DOT.join([project_name, *parts]) + + +def build_module_qn_map(repo_path: Path, project_name: str) -> dict[str, str]: + # (H) Mirror DefinitionProcessor._disambiguate_module_qn: a base qn is claimed + # (H) by the first file (in walk order); a later file colliding on that base qn + # (H) gets its extension appended (foo.cpp -> proj.foo, foo.h -> proj.foo.h). + claimed: dict[str, str] = {} + result: dict[str, str] = {} + for rel in _eligible_rel_files(repo_path): + base = _base_module_qn(rel, project_name) + existing = claimed.get(base) + if existing is None or existing == rel: + final = base + else: + suffix = Path(rel).suffix.lstrip(cs.SEPARATOR_DOT) + final = f"{base}{cs.SEPARATOR_DOT}{suffix}" + claimed.setdefault(final, rel) + result[rel] = final + return result + + +class CppQnResolver: + """Synthesizes cgr-correct qualified names for libclang cursors. + + The qns must be byte-identical to what the tree-sitter C++ path produces + (parsers/cpp/utils.build_qualified_name + the deferred out-of-class method + resolver), because the whole graph keys on them. + """ + + def __init__(self, repo_path: Path, project_name: str) -> None: + self.repo_path = repo_path.resolve() + self.project_name = project_name + self._module_qn = build_module_qn_map(self.repo_path, project_name) + + def rel_path(self, absolute_file: str) -> str | None: + try: + return Path(absolute_file).resolve().relative_to(self.repo_path).as_posix() + except ValueError: + return None + + def module_qn(self, absolute_file: str) -> str | None: + rel = self.rel_path(absolute_file) + if rel is None: + return None + return self._module_qn.get(rel) + + def _namespace_chain(self, cursor: Cursor) -> list[str]: + parts: list[str] = [] + parent = cursor.semantic_parent + while parent is not None and parent.kind.name == fc.KIND_NAMESPACE: + if parent.spelling: # (H) skip anonymous namespaces (no name segment) + parts.append(parent.spelling) + parent = parent.semantic_parent + parts.reverse() + return parts + + def member_name(self, cursor: Cursor) -> str: + # (H) Mirror cpp.utils.extract_operator_name / extract_destructor_name: + # (H) destructors keep their `~Name` spelling, operators map their symbol + # (H) through CPP_OPERATOR_SYMBOL_MAP; everything else is its plain name. + spelling = cursor.spelling + if cursor.kind.name == fc.KIND_DESTRUCTOR: + return spelling + if self._is_operator_spelling(spelling): + symbol = spelling[len(cs.CPP_OPERATOR_TEXT_PREFIX) :].strip() + return convert_operator_symbol_to_name(symbol) + return spelling + + @staticmethod + def _is_operator_spelling(spelling: str) -> bool: + prefix = cs.CPP_OPERATOR_TEXT_PREFIX + if not spelling.startswith(prefix): + return False + rest = spelling[len(prefix) :] + # (H) `operator+`, `operator[]`, `operator int` are operators/conversions; + # (H) an identifier like `operatorState` is not (next char is alnum/_). + return not rest or not (rest[0].isalnum() or rest[0] == cs.CHAR_UNDERSCORE) + + def class_qn(self, cursor: Cursor) -> str | None: + if cursor.location.file is None: + return None + module_qn = self.module_qn(cursor.location.file.name) + if module_qn is None: + return None + parts = [module_qn, *self._namespace_chain(cursor), cursor.spelling] + return cs.SEPARATOR_DOT.join(parts) + + def function_qn(self, cursor: Cursor) -> str | None: + if cursor.location.file is None: + return None + module_qn = self.module_qn(cursor.location.file.name) + if module_qn is None: + return None + parts = [module_qn, *self._namespace_chain(cursor), self.member_name(cursor)] + return cs.SEPARATOR_DOT.join(parts) + + def type_qn(self, cursor: Cursor) -> str | None: + # (H) A class-scoped `using`/`typedef` is anchored to its enclosing class + # (H) (e.g. proj.Box.Handle); a namespace/file-scoped one mirrors a free + # (H) function's qn (module + namespace chain + name). + parent = cursor.semantic_parent + if parent is not None and parent.kind.name in fc.CLASS_KIND_NAMES: + class_qn = self.class_qn(parent) + if class_qn is None: + return None + return cs.SEPARATOR_DOT.join([class_qn, cursor.spelling]) + if cursor.location.file is None: + return None + module_qn = self.module_qn(cursor.location.file.name) + if module_qn is None: + return None + parts = [module_qn, *self._namespace_chain(cursor), cursor.spelling] + return cs.SEPARATOR_DOT.join(parts) + + def method_qn(self, cursor: Cursor) -> str | None: + # (H) A method's qn is anchored to its CLASS's declaring file (the header), + # (H) via semantic_parent, NOT the out-of-line definition file. This mirrors + # (H) cgr's deferred out-of-class method resolver. + parent = cursor.semantic_parent + if parent is None: + return None + class_qn = self.class_qn(parent) + if class_qn is None: + return None + return cs.SEPARATOR_DOT.join([class_qn, self.member_name(cursor)]) diff --git a/codebase_rag/parsers/definition_processor.py b/codebase_rag/parsers/definition_processor.py index 8110140f8..bfd7d01e0 100644 --- a/codebase_rag/parsers/definition_processor.py +++ b/codebase_rag/parsers/definition_processor.py @@ -4,16 +4,19 @@ from typing import TYPE_CHECKING from loguru import logger +from tree_sitter import QueryCursor from .. import constants as cs from .. import logs as ls +from ..parser_loader import COMBINED_FUNC_CLASS_IMPORT_QUERIES from ..types_defs import ASTNode, FunctionRegistryTrieProtocol, SimpleNameLookup +from ..utils.path_utils import cached_relative_path, cached_resolve_posix from .class_ingest import ClassIngestMixin from .dependency_parser import parse_dependencies from .function_ingest import FunctionIngestMixin from .handlers import get_handler from .js_ts.ingest import JsTsIngestMixin -from .utils import safe_decode_with_fallback +from .utils import safe_decode_with_fallback, sorted_captures if TYPE_CHECKING: from ..services import IngestorProtocol @@ -38,6 +41,7 @@ def __init__( simple_name_lookup: SimpleNameLookup, import_processor: ImportProcessor, module_qn_to_file_path: dict[str, Path], + func_class_captures_cache: dict[Path, dict] | None = None, ): super().__init__() self.ingestor = ingestor @@ -48,7 +52,28 @@ def __init__( self.import_processor = import_processor self.module_qn_to_file_path = module_qn_to_file_path self.class_inheritance: dict[str, list[str]] = {} + # (H) {class_qn: {field_name: bare_type_name}} for C++ member fields, so a + # (H) member call `field_.method()` in a (possibly out-of-line, cross-file) + # (H) method resolves via the field's declared type. Populated at class + # (H) ingestion, read by the type-inference engine at call resolution. + self.class_field_types: dict[str, dict[str, str]] = {} + self._deferred_cpp_methods: list = [] + self._deferred_go_methods: list = [] + self._deferred_forward_decls: list = [] self._handler = get_handler(cs.SupportedLanguage.PYTHON) + self._func_class_captures_cache = func_class_captures_cache + + def _disambiguate_module_qn(self, module_qn: str, file_path: Path) -> str: + # (H) Two files that share a basename but differ by extension (foo.py / + # (H) foo.cpp) strip to the same module qn. Append the extension to the + # (H) later one so their module nodes and all derived class/method qns stay + # (H) distinct instead of colliding under the qualified_name constraint. + existing = self.module_qn_to_file_path.get(module_qn) + if existing is None or existing == file_path: + return module_qn + return ( + f"{module_qn}{cs.SEPARATOR_DOT}{file_path.suffix.lstrip(cs.SEPARATOR_DOT)}" + ) def process_file( self, @@ -56,10 +81,12 @@ def process_file( language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], structural_elements: dict[Path, str | None], + source_bytes: bytes | None = None, + pre_parsed: tuple[ASTNode, dict[str, list] | None] | None = None, ) -> tuple[ASTNode, cs.SupportedLanguage] | None: if isinstance(file_path, str): file_path = Path(file_path) - relative_path = file_path.relative_to(self.repo_path) + relative_path = cached_relative_path(file_path, self.repo_path) relative_path_str = str(relative_path) logger.info( ls.DEF_PARSING_AST.format(language=language, path=relative_path_str) @@ -75,15 +102,19 @@ def process_file( return None self._handler = get_handler(language) - source_bytes = file_path.read_bytes() - lang_queries = queries[language] - parser = lang_queries.get(cs.KEY_PARSER) - if not parser: - logger.warning(ls.DEF_NO_PARSER.format(language=language)) - return None - - tree = parser.parse(source_bytes) - root_node = tree.root_node + if pre_parsed is not None: + root_node, pre_combined_captures = pre_parsed + else: + if source_bytes is None: + source_bytes = file_path.read_bytes() + lang_queries = queries[language] + parser = lang_queries.get(cs.KEY_PARSER) + if not parser: + logger.warning(ls.DEF_NO_PARSER.format(language=language)) + return None + tree = parser.parse(source_bytes) + root_node = tree.root_node + pre_combined_captures = None module_qn = cs.SEPARATOR_DOT.join( [self.project_name] + list(relative_path.with_suffix("").parts) @@ -92,6 +123,7 @@ def process_file( module_qn = cs.SEPARATOR_DOT.join( [self.project_name] + list(relative_path.parent.parts) ) + module_qn = self._disambiguate_module_qn(module_qn, file_path) self.module_qn_to_file_path[module_qn] = file_path self.ingestor.ensure_node_batch( @@ -100,6 +132,7 @@ def process_file( cs.KEY_QUALIFIED_NAME: module_qn, cs.KEY_NAME: file_path.name, cs.KEY_PATH: relative_path_str, + cs.KEY_ABSOLUTE_PATH: cached_resolve_posix(file_path), }, ) @@ -120,22 +153,61 @@ def process_file( (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn), ) - self.import_processor.parse_imports(root_node, module_qn, language, queries) - self._ingest_missing_import_patterns( - root_node, module_qn, language, queries + if pre_combined_captures is not None: + combined_captures = pre_combined_captures + else: + combined_captures = None + combined_query = COMBINED_FUNC_CLASS_IMPORT_QUERIES.get(language) + if combined_query: + cursor = QueryCursor(combined_query) + combined_captures = sorted_captures(cursor, root_node) + if self._func_class_captures_cache is not None and combined_captures: + cache_entry: dict[str, list] = {} + for key in (cs.CAPTURE_FUNCTION, cs.CAPTURE_CLASS, cs.CAPTURE_CALL): + if key in combined_captures: + cache_entry[key] = combined_captures[key] + if cache_entry: + self._func_class_captures_cache[file_path] = cache_entry + + self.import_processor.parse_imports( + root_node, + module_qn, + language, + queries, + pre_captures=combined_captures, ) + if language in (cs.SupportedLanguage.JS, cs.SupportedLanguage.TS): + self._ingest_missing_import_patterns( + root_node, module_qn, language, queries + ) if language == cs.SupportedLanguage.CPP: self._ingest_cpp_module_declarations(root_node, module_qn, file_path) - self._ingest_all_functions(root_node, module_qn, language, queries) - self._ingest_classes_and_methods(root_node, module_qn, language, queries) - self._ingest_object_literal_methods(root_node, module_qn, language, queries) - self._ingest_commonjs_exports(root_node, module_qn, language, queries) - if language in {cs.SupportedLanguage.JS, cs.SupportedLanguage.TS}: - self._ingest_es6_exports(root_node, module_qn, language, queries) - self._ingest_assignment_arrow_functions( - root_node, module_qn, language, queries + self._ingest_all_functions( + root_node, + module_qn, + language, + queries, + combined_captures=combined_captures, + ) + self._ingest_classes_and_methods( + root_node, + module_qn, + language, + queries, + combined_captures=combined_captures, ) - self._ingest_prototype_inheritance(root_node, module_qn, language, queries) + if language in (cs.SupportedLanguage.JS, cs.SupportedLanguage.TS): + self._ingest_object_literal_methods( + root_node, module_qn, language, queries + ) + self._ingest_commonjs_exports(root_node, module_qn, language, queries) + self._ingest_es6_exports(root_node, module_qn, language, queries) + self._ingest_assignment_arrow_functions( + root_node, module_qn, language, queries + ) + self._ingest_prototype_inheritance( + root_node, module_qn, language, queries + ) return (root_node, language) diff --git a/codebase_rag/parsers/dependency_parser.py b/codebase_rag/parsers/dependency_parser.py index 61f7d4b92..94a66ad87 100644 --- a/codebase_rag/parsers/dependency_parser.py +++ b/codebase_rag/parsers/dependency_parser.py @@ -26,11 +26,15 @@ def _extract_pep508_package_name(dep_string: str) -> tuple[str, str]: class DependencyParser: + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: raise NotImplementedError class PyProjectTomlParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -72,6 +76,8 @@ def parse(self, file_path: Path) -> list[Dependency]: class RequirementsTxtParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -92,6 +98,8 @@ def parse(self, file_path: Path) -> list[Dependency]: class PackageJsonParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -120,6 +128,8 @@ def _load_and_collect_deps( class CargoTomlParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -148,6 +158,8 @@ def parse(self, file_path: Path) -> list[Dependency]: class GoModParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -186,6 +198,8 @@ def parse(self, file_path: Path) -> list[Dependency]: class GemfileParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -206,6 +220,8 @@ def parse(self, file_path: Path) -> list[Dependency]: class ComposerJsonParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: @@ -229,6 +245,8 @@ def parse(self, file_path: Path) -> list[Dependency]: class CsprojParser(DependencyParser): + __slots__ = () + def parse(self, file_path: Path) -> list[Dependency]: dependencies: list[Dependency] = [] try: diff --git a/codebase_rag/parsers/factory.py b/codebase_rag/parsers/factory.py index a6b8a244c..ee55551ba 100644 --- a/codebase_rag/parsers/factory.py +++ b/codebase_rag/parsers/factory.py @@ -16,6 +16,25 @@ class ProcessorFactory: + __slots__ = ( + "ingestor", + "repo_path", + "project_name", + "queries", + "function_registry", + "simple_name_lookup", + "ast_cache", + "unignore_paths", + "exclude_paths", + "module_qn_to_file_path", + "_import_processor", + "_structure_processor", + "_definition_processor", + "_type_inference", + "_call_processor", + "_func_class_captures_cache", + ) + def __init__( self, ingestor: IngestorProtocol, @@ -39,6 +58,7 @@ def __init__( self.exclude_paths = exclude_paths self.module_qn_to_file_path: dict[str, Path] = {} + self._func_class_captures_cache: dict[Path, dict] = {} self._import_processor: ImportProcessor | None = None self._structure_processor: StructureProcessor | None = None @@ -81,6 +101,7 @@ def definition_processor(self) -> DefinitionProcessor: simple_name_lookup=self.simple_name_lookup, import_processor=self.import_processor, module_qn_to_file_path=self.module_qn_to_file_path, + func_class_captures_cache=self._func_class_captures_cache, ) return self._definition_processor @@ -97,6 +118,7 @@ def type_inference(self) -> TypeInferenceEngine: module_qn_to_file_path=self.module_qn_to_file_path, class_inheritance=self.definition_processor.class_inheritance, simple_name_lookup=self.simple_name_lookup, + class_field_types=self.definition_processor.class_field_types, ) return self._type_inference diff --git a/codebase_rag/parsers/function_ingest.py b/codebase_rag/parsers/function_ingest.py index 1d32186e0..d87240c63 100644 --- a/codebase_rag/parsers/function_ingest.py +++ b/codebase_rag/parsers/function_ingest.py @@ -17,11 +17,13 @@ PropertyDict, SimpleNameLookup, ) -from ..utils.fqn_resolver import resolve_fqn_from_ast +from ..utils.path_utils import cached_relative_path, cached_resolve_posix from .cpp import utils as cpp_utils +from .go import utils as go_utils from .lua import utils as lua_utils from .rs import utils as rs_utils from .utils import ( + callable_parameter_indices, get_function_captures, ingest_method, is_method_node, @@ -40,7 +42,32 @@ class FunctionResolution(NamedTuple): is_exported: bool +class _DeferredMethod(NamedTuple): + """Out-of-class C++ method whose class hasn't been parsed yet.""" + + method_name: str + class_name: str + fallback_class_qn: str + method_props: PropertyDict + + +class _DeferredGoMethod(NamedTuple): + """Go receiver method, linked to its receiver type once all types are known.""" + + method_node: Node + module_qn: str + receiver_type: str + file_path: Path | None + + +# (H) Go node labels a receiver type can resolve to (struct -> Class, defined +# (H) type/alias -> Type, interface -> Interface); used to pick the declaring +# (H) type out of same-named candidates when binding a cross-file method. +_GO_TYPE_NODE_TYPES = frozenset({NodeType.CLASS, NodeType.TYPE, NodeType.INTERFACE}) + + class FunctionIngestMixin: + __slots__ = () ingestor: IngestorProtocol repo_path: Path project_name: str @@ -48,6 +75,8 @@ class FunctionIngestMixin: simple_name_lookup: SimpleNameLookup module_qn_to_file_path: dict[str, Path] _handler: LanguageHandler + _deferred_cpp_methods: list[_DeferredMethod] + _deferred_go_methods: list[_DeferredGoMethod] @abstractmethod def _get_docstring(self, node: ASTNode) -> str | None: ... @@ -61,29 +90,33 @@ def _ingest_all_functions( module_qn: str, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + combined_captures: dict[str, list] | None = None, ) -> None: - result = get_function_captures(root_node, language, queries) - if not result: - return - - lang_config, captures = result + if combined_captures is not None: + lang_queries = queries[language] + lang_config: LanguageSpec = lang_queries[cs.QUERY_CONFIG] + captures = combined_captures + else: + result = get_function_captures(root_node, language, queries) + if not result: + return + lang_config, captures = result file_path = self.module_qn_to_file_path.get(module_qn) + has_classes = bool(captures.get(cs.CAPTURE_CLASS)) for func_node in captures.get(cs.CAPTURE_FUNCTION, []): - if not isinstance(func_node, Node): - logger.warning( - ls.FUNC_EXPECTED_NODE.format( - actual_type=type(func_node), value=func_node - ) - ) - continue - if self._is_method(func_node, lang_config): + if has_classes and self._is_method(func_node, lang_config): continue if language == cs.SupportedLanguage.CPP: if self._handle_cpp_out_of_class_method(func_node, module_qn): continue + if language == cs.SupportedLanguage.GO and self._defer_go_receiver_method( + func_node, module_qn + ): + continue + resolution = self._resolve_function_identity( func_node, module_qn, language, lang_config, file_path ) @@ -102,7 +135,9 @@ def _resolve_function_identity( lang_config: LanguageSpec, file_path: Path | None, ) -> FunctionResolution | None: - resolution = self._try_unified_fqn_resolution(func_node, language, file_path) + resolution = self._try_unified_fqn_resolution( + func_node, module_qn, language, file_path + ) if resolution: return resolution @@ -113,6 +148,7 @@ def _resolve_function_identity( def _try_unified_fqn_resolution( self, func_node: Node, + module_qn: str, language: cs.SupportedLanguage, file_path: Path | None, ) -> FunctionResolution | None: @@ -120,19 +156,31 @@ def _try_unified_fqn_resolution( if not fqn_config or not file_path: return None - func_qn = resolve_fqn_from_ast( - func_node, file_path, self.repo_path, self.project_name, fqn_config - ) - if not func_qn: + func_name = fqn_config.get_name(func_node) + if not func_name: return None - func_name = func_qn.split(cs.SEPARATOR_DOT)[-1] + parts = [func_name] + current = func_node.parent + while current: + if current.type in fqn_config.scope_node_types: + if scope_name := fqn_config.get_name(current): + parts.append(scope_name) + current = current.parent + parts.reverse() + + # (H) Prefix with the module's resolved (collision-disambiguated) qn rather + # (H) than recomputing from the path, so same-stem cross-language siblings + # (H) stay distinct. + func_qn = module_qn + cs.SEPARATOR_DOT + cs.SEPARATOR_DOT.join(parts) + simple_name = func_qn.rsplit(cs.SEPARATOR_DOT, 1)[-1] + is_exported = ( cpp_utils.is_exported(func_node) if language == cs.SupportedLanguage.CPP else False ) - return FunctionResolution(func_qn, func_name, is_exported) + return FunctionResolution(func_qn, simple_name, is_exported) def _fallback_function_resolution( self, @@ -147,6 +195,45 @@ def _fallback_function_resolution( func_node, module_qn, language, lang_config ) + def _resolve_cpp_class_qn( + self, class_name: str, module_qn: str + ) -> tuple[str, bool]: + """Look up an existing Class node for *class_name* across all parsed files. + + Returns ``(class_qn, resolved)`` where *resolved* is True when the + qualified name was obtained from the function registry (i.e. the + class has already been parsed, typically from a header file). + """ + class_name_normalized = class_name.replace( + cs.SEPARATOR_DOUBLE_COLON, cs.SEPARATOR_DOT + ) + leaf_name = class_name_normalized.rsplit(cs.SEPARATOR_DOT, 1)[-1] + + if leaf_name in self.simple_name_lookup: + for candidate_qn in self.simple_name_lookup[leaf_name]: + node_type = self.function_registry.get(candidate_qn) + if node_type in {NodeType.CLASS, NodeType.TYPE}: + if candidate_qn.endswith( + f".{class_name_normalized}" + ) and self._is_cpp_defined(candidate_qn): + return candidate_qn, True + + return f"{module_qn}.{class_name_normalized}", False + + def _is_cpp_defined(self, qn: str) -> bool: + # (H) A C++ out-of-class method may only bind to a class defined in a + # (H) C/C++ source file; matching a same-named class in another language + # (H) would collide their qualified names. Resolve qn -> defining file by + # (H) the longest module-qn prefix and check its extension. + parts = qn.split(cs.SEPARATOR_DOT) + while parts: + if path := self.module_qn_to_file_path.get(cs.SEPARATOR_DOT.join(parts)): + return ( + path.suffix in cs.CPP_EXTENSIONS or path.suffix in cs.C_EXTENSIONS + ) + parts = parts[:-1] + return False + def _handle_cpp_out_of_class_method(self, func_node: Node, module_qn: str) -> bool: if not cpp_utils.is_out_of_class_method_definition(func_node): return False @@ -155,25 +242,163 @@ def _handle_cpp_out_of_class_method(self, func_node: Node, module_qn: str) -> bo if not class_name: return False - class_name_normalized = class_name.replace( - cs.SEPARATOR_DOUBLE_COLON, cs.SEPARATOR_DOT - ) - class_qn = f"{module_qn}.{class_name_normalized}" - - ingest_method( - method_node=func_node, - container_qn=class_qn, - container_type=cs.NodeLabel.CLASS, - ingestor=self.ingestor, - function_registry=self.function_registry, - simple_name_lookup=self.simple_name_lookup, - get_docstring_func=self._get_docstring, - language=cs.SupportedLanguage.CPP, - extract_decorators_func=self._extract_decorators, - ) + class_qn, resolved = self._resolve_cpp_class_qn(class_name, module_qn) + file_path = self.module_qn_to_file_path.get(module_qn) + + if resolved: + ingest_method( + method_node=func_node, + container_qn=class_qn, + container_type=cs.NodeLabel.CLASS, + ingestor=self.ingestor, + function_registry=self.function_registry, + simple_name_lookup=self.simple_name_lookup, + get_docstring_func=self._get_docstring, + language=cs.SupportedLanguage.CPP, + extract_decorators_func=self._extract_decorators, + file_path=file_path, + repo_path=self.repo_path, + ) + else: + method_name = cpp_utils.extract_function_name(func_node) + if not method_name: + return True + decorators = self._extract_decorators(func_node) + props: PropertyDict = { + cs.KEY_NAME: method_name, + cs.KEY_DECORATORS: decorators, + cs.KEY_START_LINE: func_node.start_point[0] + 1, + cs.KEY_END_LINE: func_node.end_point[0] + 1, + cs.KEY_DOCSTRING: self._get_docstring(func_node), + } + if file_path is not None and self.repo_path is not None: + props[cs.KEY_PATH] = cached_relative_path( + file_path, self.repo_path + ).as_posix() + props[cs.KEY_ABSOLUTE_PATH] = cached_resolve_posix(file_path) + if not hasattr(self, "_deferred_cpp_methods"): + self._deferred_cpp_methods = [] + self._deferred_cpp_methods.append( + _DeferredMethod( + method_name=method_name, + class_name=class_name, + fallback_class_qn=class_qn, + method_props=props, + ) + ) return True + def resolve_deferred_cpp_methods(self) -> int: + """Ingest deferred out-of-class C++ methods now that all classes are known. + + Called after all files have been parsed so that every Class node + is guaranteed to be in the registry. Returns the number of + methods that were ingested. + """ + deferred = getattr(self, "_deferred_cpp_methods", None) + if not deferred: + return 0 + + ingested = 0 + for entry in deferred: + real_class_qn, resolved = self._resolve_cpp_class_qn(entry.class_name, "") + class_qn = real_class_qn if resolved else entry.fallback_class_qn + method_qn = f"{class_qn}.{entry.method_name}" + + props = dict(entry.method_props) + props[cs.KEY_QUALIFIED_NAME] = method_qn + + logger.info(ls.METHOD_FOUND.format(name=entry.method_name, qn=method_qn)) + self.ingestor.ensure_node_batch(cs.NodeLabel.METHOD, props) + self.function_registry[method_qn] = NodeType.METHOD + self.simple_name_lookup[entry.method_name].add(method_qn) + + self.ingestor.ensure_relationship_batch( + (cs.NodeLabel.CLASS, cs.KEY_QUALIFIED_NAME, class_qn), + cs.RelationshipType.DEFINES_METHOD, + (cs.NodeLabel.METHOD, cs.KEY_QUALIFIED_NAME, method_qn), + ) + ingested += 1 + + self._deferred_cpp_methods = [] + return ingested + + def _defer_go_receiver_method(self, func_node: Node, module_qn: str) -> bool: + if not go_utils.is_receiver_method(func_node): + return False + receiver_type = go_utils.extract_receiver_type_name(func_node) + if not receiver_type: + return False + if not hasattr(self, "_deferred_go_methods"): + self._deferred_go_methods = [] + self._deferred_go_methods.append( + _DeferredGoMethod( + method_node=func_node, + module_qn=module_qn, + receiver_type=receiver_type, + file_path=self.module_qn_to_file_path.get(module_qn), + ) + ) + return True + + def _resolve_go_container_qn(self, module_qn: str, receiver_type: str) -> str: + # (H) A method binds to its receiver type. Prefer the same-file type, but + # (H) a Go package spans every file in its directory, so fall back to a + # (H) sibling-file type with the same name in the same package. This keeps + # (H) the method's qn and DEFINES_METHOD parent anchored to the real type + # (H) node instead of a phantom under the method's own module. + same_file = f"{module_qn}{cs.SEPARATOR_DOT}{receiver_type}" + if self.function_registry.get(same_file) is not None: + return same_file + package = module_qn.rsplit(cs.SEPARATOR_DOT, 1)[0] + for qn in self.simple_name_lookup.get(receiver_type, set()): + if self.function_registry.get(qn) not in _GO_TYPE_NODE_TYPES: + continue + type_module = qn.rsplit(cs.SEPARATOR_DOT, 1)[0] + if type_module.rsplit(cs.SEPARATOR_DOT, 1)[0] == package: + return qn + return same_file + + def resolve_deferred_go_methods(self) -> int: + """Ingest Go receiver methods now that every receiver type is registered. + + A Go method (``func (p Point) Area()``) is declared at file scope, not + inside its receiver type, so the receiver's node may not exist yet when + the method is first seen. Deferring to after Pass 2 lets the method bind + to the actual node label (``Class`` for structs, ``Type`` for defined + types, ``Interface`` for interfaces). Returns the number ingested. + """ + deferred = getattr(self, "_deferred_go_methods", None) + if not deferred: + return 0 + + for entry in deferred: + container_qn = self._resolve_go_container_qn( + entry.module_qn, entry.receiver_type + ) + container_type = self.function_registry.get(container_qn) + container_label = ( + cs.NodeLabel(container_type.value) + if container_type is not None + else cs.NodeLabel.CLASS + ) + ingest_method( + method_node=entry.method_node, + container_qn=container_qn, + container_type=container_label, + ingestor=self.ingestor, + function_registry=self.function_registry, + simple_name_lookup=self.simple_name_lookup, + get_docstring_func=self._get_docstring, + language=cs.SupportedLanguage.GO, + file_path=entry.file_path, + repo_path=self.repo_path, + ) + ingested = len(deferred) + self._deferred_go_methods = [] + return ingested + def _resolve_cpp_function( self, func_node: Node, module_qn: str ) -> FunctionResolution | None: @@ -238,13 +463,23 @@ def _register_function( language: cs.SupportedLanguage, lang_config: LanguageSpec, ) -> None: - func_props = self._build_function_props(func_node, resolution) + unique_qn = self.function_registry.register_unique_qn( + resolution.qualified_name, func_node.start_point[0] + 1 + ) + if unique_qn != resolution.qualified_name: + resolution = resolution._replace(qualified_name=unique_qn) + + func_props = self._build_function_props(func_node, resolution, module_qn) logger.info( ls.FUNC_FOUND.format(name=resolution.name, qn=resolution.qualified_name) ) self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, func_props) self.function_registry[resolution.qualified_name] = NodeType.FUNCTION + self.function_registry.mark_callable_params( + resolution.qualified_name, + callable_parameter_indices(func_node, language), + ) if resolution.name: self.simple_name_lookup[resolution.name].add(resolution.qualified_name) @@ -253,9 +488,10 @@ def _register_function( ) def _build_function_props( - self, func_node: Node, resolution: FunctionResolution + self, func_node: Node, resolution: FunctionResolution, module_qn: str ) -> PropertyDict: - return { + file_path = self.module_qn_to_file_path.get(module_qn) + props: PropertyDict = { cs.KEY_QUALIFIED_NAME: resolution.qualified_name, cs.KEY_NAME: resolution.name, cs.KEY_DECORATORS: self._extract_decorators(func_node), @@ -264,6 +500,12 @@ def _build_function_props( cs.KEY_DOCSTRING: self._get_docstring(func_node), cs.KEY_IS_EXPORTED: resolution.is_exported, } + if file_path is not None: + props[cs.KEY_PATH] = cached_relative_path( + file_path, self.repo_path + ).as_posix() + props[cs.KEY_ABSOLUTE_PATH] = cached_resolve_posix(file_path) + return props def _create_function_relationships( self, @@ -274,7 +516,7 @@ def _create_function_relationships( lang_config: LanguageSpec, ) -> None: parent_type, parent_qn = self._determine_function_parent( - func_node, module_qn, lang_config + func_node, resolution.qualified_name, module_qn, lang_config, language ) self.ingestor.ensure_relationship_batch( (parent_type, cs.KEY_QUALIFIED_NAME, parent_qn), @@ -444,25 +686,55 @@ def _is_method(self, func_node: Node, lang_config: LanguageSpec) -> bool: return is_method_node(func_node, lang_config) def _determine_function_parent( - self, func_node: Node, module_qn: str, lang_config: LanguageSpec + self, + func_node: Node, + func_qn: str, + module_qn: str, + lang_config: LanguageSpec, + language: cs.SupportedLanguage | None = None, ) -> tuple[str, str]: current = func_node.parent if not isinstance(current, Node): return cs.NodeLabel.MODULE, module_qn + file_path = self.module_qn_to_file_path.get(module_qn) while current and current.type not in lang_config.module_node_types: if current.type in lang_config.function_node_types: - if name_node := current.child_by_field_name(cs.FIELD_NAME): - parent_text = name_node.text - if parent_text is None: - continue - if parent_func_name := safe_decode_text(name_node): - if parent_func_qn := self._build_nested_qualified_name( - current, module_qn, parent_func_name, lang_config - ): - return cs.NodeLabel.FUNCTION, parent_func_qn - break + parent_label = ( + cs.NodeLabel.METHOD + if self._is_method(current, lang_config) + else cs.NodeLabel.FUNCTION + ) + # (H) Bind to the enclosing function's OWN qn, recomputed from its + # (H) node. A function nested in an anonymous callback otherwise + # (H) loses that callback: anonymous scopes contribute no segment to + # (H) the child qn, so trimming the child qn would skip the callback + # (H) and hoist the child to the nearest named ancestor. + resolution = ( + self._resolve_function_identity( + current, module_qn, language, lang_config, file_path + ) + if language is not None + else None + ) + parent_qn = ( + resolution.qualified_name + if resolution + else func_qn.rsplit(cs.SEPARATOR_DOT, 1)[0] + ) + if not parent_qn or parent_qn == func_qn: + break + return parent_label, parent_qn current = current.parent + # (H) A Rust item inside `mod inner` is contained by that inline module, + # (H) not the file module. Its enclosing module qn is the file module plus + # (H) the mod path; the inline Module node carries that exact qn. + if language == cs.SupportedLanguage.RUST and ( + mod_parts := rs_utils.build_module_path(func_node) + ): + nested = module_qn + cs.SEPARATOR_DOT + cs.SEPARATOR_DOT.join(mod_parts) + return cs.NodeLabel.MODULE, nested + return cs.NodeLabel.MODULE, module_qn diff --git a/codebase_rag/parsers/go/__init__.py b/codebase_rag/parsers/go/__init__.py new file mode 100644 index 000000000..a96f8ef02 --- /dev/null +++ b/codebase_rag/parsers/go/__init__.py @@ -0,0 +1,8 @@ +from .type_inference import GoTypeInferenceEngine +from .utils import extract_receiver_type_name, is_receiver_method + +__all__ = [ + "GoTypeInferenceEngine", + "extract_receiver_type_name", + "is_receiver_method", +] diff --git a/codebase_rag/parsers/go/type_inference.py b/codebase_rag/parsers/go/type_inference.py new file mode 100644 index 000000000..a14935104 --- /dev/null +++ b/codebase_rag/parsers/go/type_inference.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from tree_sitter import Node + +from ... import constants as cs +from ..utils import safe_decode_text +from .utils import type_identifier_text + + +class GoTypeInferenceEngine: + # (H) Maps local variable / parameter / receiver names to their bare Go type + # (H) name within a function or method body, so the resolver can bind a + # (H) receiver-dispatch call (`d.method()`) to the method node on the type. + # (H) Bare names only: the resolver turns a name into a class qn via the same + # (H) _resolve_class_name path the definition pass uses, so pointer/generic + # (H) wrappers are stripped here down to the underlying type identifier. + __slots__ = () + + def build_local_variable_type_map( + self, caller_node: Node, module_qn: str + ) -> dict[str, str]: + var_types: dict[str, str] = {} + self._collect_receiver(caller_node, var_types) + self._collect_parameters(caller_node, var_types) + if body := caller_node.child_by_field_name(cs.FIELD_BODY): + self._collect_body_declarations(body, var_types) + return var_types + + def _collect_receiver(self, caller_node: Node, var_types: dict[str, str]) -> None: + receiver = caller_node.child_by_field_name(cs.FIELD_RECEIVER) + if receiver is not None: + self._collect_parameter_list(receiver, var_types) + + def _collect_parameters(self, caller_node: Node, var_types: dict[str, str]) -> None: + params = caller_node.child_by_field_name(cs.FIELD_PARAMETERS) + if params is not None: + self._collect_parameter_list(params, var_types) + + def _collect_parameter_list( + self, list_node: Node, var_types: dict[str, str] + ) -> None: + for param in list_node.children: + if param.type != cs.TS_GO_PARAMETER_DECLARATION: + continue + type_node = param.child_by_field_name(cs.FIELD_TYPE) + if type_node is None or not (type_name := type_identifier_text(type_node)): + continue + for child in param.children: + if child.type == cs.TS_IDENTIFIER and (name := safe_decode_text(child)): + var_types[name] = type_name + + def _collect_body_declarations(self, node: Node, var_types: dict[str, str]) -> None: + match node.type: + case cs.TS_GO_VAR_DECLARATION: + self._collect_var_declaration(node, var_types) + case cs.TS_GO_SHORT_VAR_DECLARATION: + self._collect_short_var_declaration(node, var_types) + case _: + pass + for child in node.children: + self._collect_body_declarations(child, var_types) + + def _collect_var_declaration(self, node: Node, var_types: dict[str, str]) -> None: + # (H) `var a, b T` binds every name in the spec to the declared type. + for spec in node.children: + if spec.type != cs.TS_GO_VAR_SPEC: + continue + type_node = spec.child_by_field_name(cs.FIELD_TYPE) + if type_node is None or not (type_name := type_identifier_text(type_node)): + continue + for child in spec.children: + if child.type == cs.TS_IDENTIFIER and (name := safe_decode_text(child)): + var_types[name] = type_name + + def _collect_short_var_declaration( + self, node: Node, var_types: dict[str, str] + ) -> None: + # (H) `x := T{}` / `x := &T{}`: pair each left name with the type inferred + # (H) from the value at the same position; non-literal initializers (calls) + # (H) are left unresolved rather than guessed. + left = node.child_by_field_name(cs.FIELD_LEFT) + right = node.child_by_field_name(cs.FIELD_RIGHT) + if left is None or right is None: + return + names = [ + safe_decode_text(c) for c in left.children if c.type == cs.TS_IDENTIFIER + ] + values = [c for c in right.children if c.is_named] + for name, value in zip(names, values, strict=False): + if name and (type_name := self._infer_value_type(value)): + var_types[name] = type_name + + def _infer_value_type(self, value: Node) -> str | None: + if value.type == cs.TS_GO_COMPOSITE_LITERAL: + type_node = value.child_by_field_name(cs.FIELD_TYPE) + return type_identifier_text(type_node) if type_node else None + if value.type == cs.TS_GO_UNARY_EXPRESSION: + # (H) `&T{}` wraps the composite literal in its operand. + operand = value.child_by_field_name(cs.FIELD_OPERAND) + return self._infer_value_type(operand) if operand else None + return None diff --git a/codebase_rag/parsers/go/utils.py b/codebase_rag/parsers/go/utils.py new file mode 100644 index 000000000..cf4f3a954 --- /dev/null +++ b/codebase_rag/parsers/go/utils.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from tree_sitter import Node + +from ... import constants as cs +from ..utils import safe_decode_text + + +def is_receiver_method(node: Node) -> bool: + return ( + node.type == cs.TS_GO_METHOD_DECLARATION + and node.child_by_field_name(cs.FIELD_RECEIVER) is not None + ) + + +def extract_receiver_type_name(node: Node) -> str | None: + receiver = node.child_by_field_name(cs.FIELD_RECEIVER) + if receiver is None: + return None + for param in receiver.children: + if param.type != cs.TS_GO_PARAMETER_DECLARATION: + continue + type_node = param.child_by_field_name(cs.FIELD_TYPE) + if type_node is not None: + return type_identifier_text(type_node) + return None + + +def type_identifier_text(type_node: Node) -> str | None: + if type_node.type == cs.TS_TYPE_IDENTIFIER and type_node.text: + return safe_decode_text(type_node) + # (H) Unwrap pointer (*T) and generic (T[P]) receivers down to the base name. + for child in type_node.children: + if name := type_identifier_text(child): + return name + return None diff --git a/codebase_rag/parsers/handlers/base.py b/codebase_rag/parsers/handlers/base.py index 14fa8cec9..7f264c1e1 100644 --- a/codebase_rag/parsers/handlers/base.py +++ b/codebase_rag/parsers/handlers/base.py @@ -13,6 +13,8 @@ class BaseLanguageHandler: + __slots__ = () + def is_inside_method_with_object_literals(self, node: ASTNode) -> bool: return False diff --git a/codebase_rag/parsers/handlers/cpp.py b/codebase_rag/parsers/handlers/cpp.py index d7c9dea04..854bcc4ac 100644 --- a/codebase_rag/parsers/handlers/cpp.py +++ b/codebase_rag/parsers/handlers/cpp.py @@ -17,6 +17,8 @@ class CppHandler(BaseLanguageHandler): + __slots__ = () + def extract_function_name(self, node: ASTNode) -> str | None: if func_name := cpp_utils.extract_function_name(node): return func_name diff --git a/codebase_rag/parsers/handlers/java.py b/codebase_rag/parsers/handlers/java.py index 4bd576beb..882fae0da 100644 --- a/codebase_rag/parsers/handlers/java.py +++ b/codebase_rag/parsers/handlers/java.py @@ -11,6 +11,8 @@ class JavaHandler(BaseLanguageHandler): + __slots__ = () + def extract_decorators(self, node: ASTNode) -> list[str]: return java_utils.extract_from_modifiers_node(node, frozenset()).annotations diff --git a/codebase_rag/parsers/handlers/js_ts.py b/codebase_rag/parsers/handlers/js_ts.py index 7a2ed6684..75c561209 100644 --- a/codebase_rag/parsers/handlers/js_ts.py +++ b/codebase_rag/parsers/handlers/js_ts.py @@ -12,6 +12,8 @@ class JsTsHandler(BaseLanguageHandler): + __slots__ = () + def extract_decorators(self, node: ASTNode) -> list[str]: return [ decorator_text diff --git a/codebase_rag/parsers/handlers/lua.py b/codebase_rag/parsers/handlers/lua.py index 9db185904..6b2d6177f 100644 --- a/codebase_rag/parsers/handlers/lua.py +++ b/codebase_rag/parsers/handlers/lua.py @@ -11,6 +11,8 @@ class LuaHandler(BaseLanguageHandler): + __slots__ = () + def extract_function_name(self, node: ASTNode) -> str | None: if (name_node := node.child_by_field_name(cs.TS_FIELD_NAME)) and name_node.text: from ..utils import safe_decode_text diff --git a/codebase_rag/parsers/handlers/php.py b/codebase_rag/parsers/handlers/php.py new file mode 100644 index 000000000..e529ab7dd --- /dev/null +++ b/codebase_rag/parsers/handlers/php.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ... import constants as cs +from ..utils import safe_decode_text +from .base import BaseLanguageHandler + +if TYPE_CHECKING: + from ...types_defs import ASTNode + + +class PhpHandler(BaseLanguageHandler): + __slots__ = () + + _CLASS_LIKE_TYPES = frozenset( + { + cs.TS_CLASS_DECLARATION, + cs.TS_INTERFACE_DECLARATION, + cs.TS_PHP_TRAIT_DECLARATION, + cs.TS_ENUM_DECLARATION, + } + ) + + def is_class_method(self, node: ASTNode) -> bool: + parent = node.parent + while parent: + if parent.type in self._CLASS_LIKE_TYPES: + return True + parent = parent.parent + return False + + def extract_function_name(self, node: ASTNode) -> str | None: + if node.type == cs.TS_PHP_ANONYMOUS_FUNCTION: + return f"anonymous_{node.start_point[0]}_{node.start_point[1]}" + if node.type == cs.TS_PHP_ARROW_FUNCTION: + return f"arrow_{node.start_point[0]}_{node.start_point[1]}" + name_node = node.child_by_field_name(cs.TS_FIELD_NAME) + if name_node and name_node.text: + return safe_decode_text(name_node) + return None + + def is_function_exported(self, node: ASTNode) -> bool: + if node.type != cs.TS_PHP_METHOD_DECLARATION: + return True + for child in node.children: + if child.type == cs.TS_PHP_VISIBILITY_MODIFIER: + text = safe_decode_text(child) + return text == "public" + return True + + def extract_decorators(self, node: ASTNode) -> list[str]: + decorators: list[str] = [] + for child in node.children: + if child.type == cs.TS_PHP_ATTRIBUTE_LIST: + for group in child.children: + if group.type == cs.TS_PHP_ATTRIBUTE_GROUP: + for attr in group.children: + if attr.type == cs.TS_PHP_ATTRIBUTE: + if text := safe_decode_text(attr): + decorators.append(text) + return decorators diff --git a/codebase_rag/parsers/handlers/protocol.py b/codebase_rag/parsers/handlers/protocol.py index 9bdbe72b6..893888d78 100644 --- a/codebase_rag/parsers/handlers/protocol.py +++ b/codebase_rag/parsers/handlers/protocol.py @@ -10,6 +10,8 @@ class LanguageHandler(Protocol): + __slots__ = () + def is_inside_method_with_object_literals(self, node: ASTNode) -> bool: ... def is_class_method(self, node: ASTNode) -> bool: ... diff --git a/codebase_rag/parsers/handlers/python.py b/codebase_rag/parsers/handlers/python.py index ae96501a5..1c424fdd8 100644 --- a/codebase_rag/parsers/handlers/python.py +++ b/codebase_rag/parsers/handlers/python.py @@ -11,6 +11,8 @@ class PythonHandler(BaseLanguageHandler): + __slots__ = () + def extract_decorators(self, node: ASTNode) -> list[str]: if not node.parent or node.parent.type != cs.TS_PY_DECORATED_DEFINITION: return [] diff --git a/codebase_rag/parsers/handlers/registry.py b/codebase_rag/parsers/handlers/registry.py index a886d7f9e..6f490700e 100644 --- a/codebase_rag/parsers/handlers/registry.py +++ b/codebase_rag/parsers/handlers/registry.py @@ -8,6 +8,7 @@ from .java import JavaHandler from .js_ts import JsTsHandler from .lua import LuaHandler +from .php import PhpHandler from .protocol import LanguageHandler from .python import PythonHandler from .rust import RustHandler @@ -20,6 +21,7 @@ SupportedLanguage.RUST: RustHandler, SupportedLanguage.JAVA: JavaHandler, SupportedLanguage.LUA: LuaHandler, + SupportedLanguage.PHP: PhpHandler, } _DEFAULT_HANDLER = BaseLanguageHandler diff --git a/codebase_rag/parsers/handlers/rust.py b/codebase_rag/parsers/handlers/rust.py index 650bec974..186704ab2 100644 --- a/codebase_rag/parsers/handlers/rust.py +++ b/codebase_rag/parsers/handlers/rust.py @@ -17,6 +17,8 @@ class RustHandler(BaseLanguageHandler): + __slots__ = () + def extract_decorators(self, node: ASTNode) -> list[str]: outer_decorators: list[str] = [] sibling = node.prev_named_sibling @@ -31,13 +33,12 @@ def extract_decorators(self, node: ASTNode) -> list[str]: if body_node := node.child_by_field_name(cs.FIELD_BODY): nodes_to_search.append(body_node) + inner_attr_type = cs.TS_RS_INNER_ATTRIBUTE_ITEM for search_node in nodes_to_search: - decorators.extend( - attr_text - for child in search_node.children - if child.type == cs.TS_RS_INNER_ATTRIBUTE_ITEM - if (attr_text := safe_decode_text(child)) - ) + for child in search_node.children: + if child.type == inner_attr_type: + if attr_text := safe_decode_text(child): + decorators.append(attr_text) return decorators diff --git a/codebase_rag/parsers/import_processor.py b/codebase_rag/parsers/import_processor.py index 99c3a8526..28f04cac3 100644 --- a/codebase_rag/parsers/import_processor.py +++ b/codebase_rag/parsers/import_processor.py @@ -1,3 +1,4 @@ +from functools import lru_cache from pathlib import Path from loguru import logger @@ -19,10 +20,26 @@ load_persistent_cache, save_persistent_cache, ) -from .utils import get_query_cursor, safe_decode_text, safe_decode_with_fallback +from .utils import ( + get_query_cursor, + safe_decode_text, + safe_decode_with_fallback, + sorted_captures, +) class ImportProcessor: + __slots__ = ( + "repo_path", + "project_name", + "ingestor", + "function_registry", + "import_mapping", + "stdlib_extractor", + "_is_local_module_cached", + "_is_local_java_import_cached", + ) + def __init__( self, repo_path: Path, @@ -39,6 +56,29 @@ def __init__( function_registry, repo_path, project_name ) + repo_is_package = (repo_path / cs.INIT_PY).is_file() + + @lru_cache(maxsize=4096) + def _is_local_module_cached(module_name: str) -> bool: + # (H) When the repo root is itself a package, its children are importable + # (H) only under the package name (project_name.child), never as bare + # (H) top-level names, so a bare top-level import resolves externally. + if repo_is_package: + return module_name == project_name + return ( + (repo_path / module_name).is_dir() + or (repo_path / f"{module_name}{cs.EXT_PY}").is_file() + or (repo_path / module_name / cs.INIT_PY).is_file() + ) + + @lru_cache(maxsize=4096) + def _is_local_java_import_cached(import_path: str) -> bool: + top_level = import_path.split(cs.SEPARATOR_DOT)[0] + return (repo_path / top_level).is_dir() + + self._is_local_module_cached = _is_local_module_cached + self._is_local_java_import_cached = _is_local_java_import_cached + load_persistent_cache() def __del__(self) -> None: @@ -65,6 +105,7 @@ def parse_imports( module_qn: str, language: cs.SupportedLanguage, queries: dict[cs.SupportedLanguage, LanguageQueries], + pre_captures: dict | None = None, ) -> None: if language not in queries: return @@ -77,8 +118,11 @@ def parse_imports( self.import_mapping[module_qn] = {} try: - cursor = get_query_cursor(imports_query) - captures = cursor.captures(root_node) + if pre_captures is not None: + captures = pre_captures + else: + cursor = get_query_cursor(imports_query) + captures = sorted_captures(cursor, root_node) match language: case cs.SupportedLanguage.PYTHON: @@ -95,13 +139,15 @@ def parse_imports( self._parse_cpp_imports(captures, module_qn) case cs.SupportedLanguage.LUA: self._parse_lua_imports(captures, module_qn) + case cs.SupportedLanguage.PHP: + self._parse_php_imports(captures, module_qn) case _: self._parse_generic_imports(captures, module_qn, lang_config) logger.debug( - ls.IMP_PARSED_COUNT.format( - count=len(self.import_mapping[module_qn]), module=module_qn - ) + ls.IMP_PARSED_COUNT, + count=len(self.import_mapping[module_qn]), + module=module_qn, ) if self.ingestor: @@ -124,15 +170,14 @@ def parse_imports( ), ) logger.debug( - ls.IMP_CREATED_RELATIONSHIP.format( - from_module=module_qn, - to_module=module_path, - full_name=full_name, - ) + ls.IMP_CREATED_RELATIONSHIP, + from_module=module_qn, + to_module=module_path, + full_name=full_name, ) except Exception as e: - logger.warning(ls.IMP_PARSE_FAILED.format(module=module_qn, error=e)) + logger.warning(ls.IMP_PARSE_FAILED, module=module_qn, error=e) def _parse_python_imports(self, captures: dict, module_qn: str) -> None: all_imports = captures.get(cs.CAPTURE_IMPORT, []) + captures.get( @@ -159,7 +204,7 @@ def _handle_dotted_name_import(self, child: Node, module_qn: str) -> None: local_name = module_name.split(cs.SEPARATOR_DOT)[0] full_name = self._resolve_import_full_name(module_name, local_name) self.import_mapping[module_qn][local_name] = full_name - logger.debug(ls.IMP_IMPORT.format(local=local_name, full=full_name)) + logger.debug(ls.IMP_IMPORT, local=local_name, full=full_name) def _handle_aliased_import(self, child: Node, module_qn: str) -> None: module_name_node = child.child_by_field_name(cs.FIELD_NAME) @@ -175,23 +220,22 @@ def _handle_aliased_import(self, child: Node, module_qn: str) -> None: top_level = module_name.split(cs.SEPARATOR_DOT)[0] full_name = self._resolve_import_full_name(module_name, top_level) self.import_mapping[module_qn][alias] = full_name - logger.debug(ls.IMP_ALIASED_IMPORT.format(alias=alias, full=full_name)) + logger.debug(ls.IMP_ALIASED_IMPORT, alias=alias, full=full_name) def _resolve_import_full_name(self, module_name: str, top_level: str) -> str: + if module_name == self.project_name or module_name.startswith( + self.project_name + cs.SEPARATOR_DOT + ): + return module_name if self._is_local_module(top_level): return f"{self.project_name}{cs.SEPARATOR_DOT}{module_name}" return module_name def _is_local_module(self, module_name: str) -> bool: - return ( - (self.repo_path / module_name).is_dir() - or (self.repo_path / f"{module_name}{cs.EXT_PY}").is_file() - or (self.repo_path / module_name / cs.INIT_PY).is_file() - ) + return self._is_local_module_cached(module_name) def _is_local_java_import(self, import_path: str) -> bool: - top_level = import_path.split(cs.SEPARATOR_DOT)[0] - return (self.repo_path / top_level).is_dir() + return self._is_local_java_import_cached(import_path) def _resolve_java_import_path(self, import_path: str) -> str: if self._is_local_java_import(import_path): @@ -364,16 +408,26 @@ def _register_python_from_imports( if is_wildcard: wildcard_key = f"*{base_module}" self.import_mapping[module_qn][wildcard_key] = base_module - logger.debug(ls.IMP_WILDCARD_IMPORT.format(module=base_module)) + logger.debug(ls.IMP_WILDCARD_IMPORT, module=base_module) return for local_name, original_name in imported_items: full_name = f"{base_module}{cs.SEPARATOR_DOT}{original_name}" self.import_mapping[module_qn][local_name] = full_name - logger.debug(ls.IMP_FROM_IMPORT.format(local=local_name, full=full_name)) + logger.debug(ls.IMP_FROM_IMPORT, local=local_name, full=full_name) + + def _is_package_qn(self, module_qn: str) -> bool: + prefix = self.project_name + cs.SEPARATOR_DOT + if not module_qn.startswith(prefix): + return False + rel = module_qn[len(prefix) :].replace(cs.SEPARATOR_DOT, cs.SEPARATOR_SLASH) + return (self.repo_path / rel / cs.INIT_PY).is_file() def _resolve_relative_import(self, relative_node: Node, module_qn: str) -> str: - module_parts = module_qn.split(cs.SEPARATOR_DOT)[1:] + # (H) Relative imports are always internal; resolve to the full project- + # (H) prefixed qualified name so resolution does not depend on bare-name + # (H) locality checks (which treat package children as external). + module_parts = module_qn.split(cs.SEPARATOR_DOT) dots = 0 module_name = "" @@ -386,11 +440,21 @@ def _resolve_relative_import(self, relative_node: Node, module_qn: str) -> str: if decoded_name := safe_decode_text(child): module_name = decoded_name - target_parts = module_parts[:-dots] if dots > 0 else module_parts + # (H) A package's qualified name already IS the package, so `from .` inside + # (H) an __init__.py drops one fewer level than inside a regular module. + drop = dots - 1 if self._is_package_qn(module_qn) else dots + keep = max(len(module_parts) - drop, 0) + target_parts = module_parts[:keep] if module_name: target_parts.extend(module_name.split(cs.SEPARATOR_DOT)) + # (H) A relative climb that lands at the project root (e.g. `from . import x` + # (H) in a top-level module) leaves no parts; resolve it to the project root + # (H) so the import is not silently dropped. + if not target_parts: + return self.project_name + return cs.SEPARATOR_DOT.join(target_parts) def _parse_js_ts_imports(self, captures: dict, module_qn: str) -> None: @@ -446,7 +510,7 @@ def _parse_js_import_clause( f"{source_module}{cs.IMPORT_DEFAULT_SUFFIX}" ) logger.debug( - ls.IMP_JS_DEFAULT.format(name=imported_name, module=source_module) + ls.IMP_JS_DEFAULT, name=imported_name, module=source_module ) elif child.type == cs.TS_NAMED_IMPORTS: @@ -465,11 +529,10 @@ def _parse_js_import_clause( f"{source_module}{cs.SEPARATOR_DOT}{imported_name}" ) logger.debug( - ls.IMP_JS_NAMED.format( - local=local_name, - module=source_module, - name=imported_name, - ) + ls.IMP_JS_NAMED, + local=local_name, + module=source_module, + name=imported_name, ) elif child.type == cs.TS_NAMESPACE_IMPORT: @@ -480,9 +543,9 @@ def _parse_js_import_clause( source_module ) logger.debug( - ls.IMP_JS_NAMESPACE.format( - name=namespace_name, module=source_module - ) + ls.IMP_JS_NAMESPACE, + name=namespace_name, + module=source_module, ) break @@ -521,9 +584,9 @@ def _parse_js_require(self, decl_node: Node, current_module: str) -> None: resolved_module ) logger.debug( - ls.IMP_JS_REQUIRE.format( - var=var_name, module=resolved_module - ) + ls.IMP_JS_REQUIRE, + var=var_name, + module=resolved_module, ) break @@ -544,7 +607,7 @@ def _parse_js_reexport(self, export_node: Node, current_module: str) -> None: if child.type == cs.TS_ASTERISK: wildcard_key = f"*{source_module}" self.import_mapping[current_module][wildcard_key] = source_module - logger.debug(ls.IMP_JS_NAMESPACE_REEXPORT.format(module=source_module)) + logger.debug(ls.IMP_JS_NAMESPACE_REEXPORT, module=source_module) elif child.type == cs.TS_EXPORT_CLAUSE: for grandchild in child.children: if grandchild.type == cs.TS_EXPORT_SPECIFIER: @@ -561,11 +624,10 @@ def _parse_js_reexport(self, export_node: Node, current_module: str) -> None: f"{source_module}{cs.SEPARATOR_DOT}{original_name}" ) logger.debug( - ls.IMP_JS_REEXPORT.format( - exported=exported_name, - module=source_module, - original=original_name, - ) + ls.IMP_JS_REEXPORT, + exported=exported_name, + module=source_module, + original=original_name, ) def _parse_java_imports(self, captures: dict, module_qn: str) -> None: @@ -589,22 +651,22 @@ def _parse_java_imports(self, captures: dict, module_qn: str) -> None: resolved_path = self._resolve_java_import_path(imported_path) if is_wildcard: - logger.debug(ls.IMP_JAVA_WILDCARD.format(path=resolved_path)) + logger.debug(ls.IMP_JAVA_WILDCARD, path=resolved_path) self.import_mapping[module_qn][f"*{resolved_path}"] = resolved_path elif parts := resolved_path.split(cs.SEPARATOR_DOT): imported_name = parts[-1] self.import_mapping[module_qn][imported_name] = resolved_path if is_static: logger.debug( - ls.IMP_JAVA_STATIC.format( - name=imported_name, path=resolved_path - ) + ls.IMP_JAVA_STATIC, + name=imported_name, + path=resolved_path, ) else: logger.debug( - ls.IMP_JAVA_IMPORT.format( - name=imported_name, path=resolved_path - ) + ls.IMP_JAVA_IMPORT, + name=imported_name, + path=resolved_path, ) def _parse_rust_imports(self, captures: dict, module_qn: str) -> None: @@ -617,7 +679,7 @@ def _parse_rust_use_declaration(self, use_node: Node, module_qn: str) -> None: for imported_name, full_path in imports.items(): self.import_mapping[module_qn][imported_name] = full_path - logger.debug(ls.IMP_RUST.format(name=imported_name, path=full_path)) + logger.debug(ls.IMP_RUST, name=imported_name, path=full_path) def _parse_go_imports(self, captures: dict, module_qn: str) -> None: for import_node in captures.get(cs.CAPTURE_IMPORT, []): @@ -646,7 +708,7 @@ def _parse_go_import_spec(self, spec_node: Node, module_qn: str) -> None: if import_path: package_name = alias_name or import_path.split(cs.SEPARATOR_SLASH)[-1] self.import_mapping[module_qn][package_name] = import_path - logger.debug(ls.IMP_GO.format(package=package_name, path=import_path)) + logger.debug(ls.IMP_GO, package=package_name, path=import_path) def _parse_cpp_imports(self, captures: dict, module_qn: str) -> None: for import_node in captures.get(cs.CAPTURE_IMPORT, []): @@ -692,9 +754,10 @@ def _parse_cpp_include(self, include_node: Node, module_qn: str) -> None: self.import_mapping[module_qn][local_name] = full_name logger.debug( - ls.IMP_CPP_INCLUDE.format( - local=local_name, full=full_name, system=is_system_include - ) + ls.IMP_CPP_INCLUDE, + local=local_name, + full=full_name, + system=is_system_include, ) def _parse_cpp_module_import(self, import_node: Node, module_qn: str) -> None: @@ -727,7 +790,7 @@ def _parse_cpp_module_import(self, import_node: Node, module_qn: str) -> None: full_name = f"{cs.IMPORT_STD_PREFIX}{module_name}" self.import_mapping[module_qn][local_name] = full_name - logger.debug(ls.IMP_CPP_MODULE.format(local=local_name, full=full_name)) + logger.debug(ls.IMP_CPP_MODULE, local=local_name, full=full_name) def _parse_cpp_module_declaration(self, decl_node: Node, module_qn: str) -> None: decoded_text = safe_decode_text(decl_node) @@ -757,9 +820,9 @@ def _parse_cpp_module_declaration(self, decl_node: Node, module_qn: str) -> None full_name = f"{self.project_name}{cs.SEPARATOR_DOT}{partition_part}" self.import_mapping[module_qn][partition_name] = full_name logger.debug( - ls.IMP_CPP_PARTITION.format( - partition=partition_name, full=full_name - ) + ls.IMP_CPP_PARTITION, + partition=partition_name, + full=full_name, ) def _register_cpp_module_mapping( @@ -769,16 +832,74 @@ def _register_cpp_module_mapping( self.import_mapping[module_qn][module_name] = ( f"{self.project_name}{cs.SEPARATOR_DOT}{module_name}" ) - logger.debug(log_template.format(name=module_name)) + logger.debug(log_template, name=module_name) + + _PHP_INCLUDE_REQUIRE_TYPES = frozenset( + { + cs.TS_PHP_INCLUDE_EXPRESSION, + cs.TS_PHP_INCLUDE_ONCE_EXPRESSION, + cs.TS_PHP_REQUIRE_EXPRESSION, + cs.TS_PHP_REQUIRE_ONCE_EXPRESSION, + } + ) + + def _parse_php_imports(self, captures: dict, module_qn: str) -> None: + all_imports = captures.get(cs.CAPTURE_IMPORT, []) + captures.get( + cs.CAPTURE_IMPORT_FROM, [] + ) + for import_node in all_imports: + if import_node.type == cs.TS_PHP_NAMESPACE_USE_DECLARATION: + self._handle_php_use_declaration(import_node, module_qn) + elif import_node.type in self._PHP_INCLUDE_REQUIRE_TYPES: + self._handle_php_include_require(import_node, module_qn) + + def _handle_php_use_declaration(self, use_node: Node, module_qn: str) -> None: + for child in use_node.named_children: + if child.type != cs.TS_PHP_NAMESPACE_USE_CLAUSE: + continue + qn_node = next( + (c for c in child.named_children if c.type == cs.TS_PHP_QUALIFIED_NAME), + None, + ) + if not qn_node: + continue + imported_path = safe_decode_with_fallback(qn_node) + if not imported_path: + continue + imported_path = imported_path.replace("\\", cs.SEPARATOR_DOT) + alias_node = child.child_by_field_name("alias") + if alias_node and alias_node.text: + local_name = safe_decode_with_fallback(alias_node) + else: + parts = imported_path.split(cs.SEPARATOR_DOT) + local_name = parts[-1] if parts else imported_path + self.import_mapping[module_qn][local_name] = imported_path + + def _handle_php_include_require(self, node: Node, module_qn: str) -> None: + for child in node.children: + if child.type in {"string", "encapsed_string"}: + raw = safe_decode_with_fallback(child) + if not raw: + continue + path_str = raw.strip("'\"") + path_str = path_str.replace("/", cs.SEPARATOR_DOT).replace( + "\\", cs.SEPARATOR_DOT + ) + if path_str.endswith(".php"): + path_str = path_str[:-4] + parts = path_str.split(cs.SEPARATOR_DOT) + local_name = parts[-1] if parts else path_str + self.import_mapping[module_qn][local_name] = path_str + return def _parse_generic_imports( self, captures: dict, module_qn: str, lang_config: LanguageSpec ) -> None: for import_node in captures.get(cs.CAPTURE_IMPORT, []): logger.debug( - ls.IMP_GENERIC.format( - language=lang_config.language, node_type=import_node.type - ) + ls.IMP_GENERIC, + language=lang_config.language, + node_type=import_node.type, ) def _parse_lua_imports(self, captures: dict, module_qn: str) -> None: diff --git a/codebase_rag/parsers/java/method_resolver.py b/codebase_rag/parsers/java/method_resolver.py index 01bd25cae..a268131f5 100644 --- a/codebase_rag/parsers/java/method_resolver.py +++ b/codebase_rag/parsers/java/method_resolver.py @@ -8,9 +8,14 @@ from ... import constants as cs from ... import logs as ls +from ...decorators import recursion_guard from ...types_defs import ASTNode, NodeType from ..utils import safe_decode_text -from .utils import extract_method_call_info, get_class_context_from_qn +from .utils import ( + extract_class_info, + extract_method_call_info, + get_class_context_from_qn, +) if TYPE_CHECKING: from pathlib import Path @@ -20,6 +25,7 @@ class JavaMethodResolverMixin: + __slots__ = () import_processor: ImportProcessor function_registry: FunctionRegistryTrieProtocol project_name: str @@ -53,14 +59,29 @@ def _get_current_class_name(self, module_qn: str) -> str | None: ... @abstractmethod def _lookup_variable_type(self, var_name: str, module_qn: str) -> str | None: ... + @abstractmethod + def _lookup_java_field_type( + self, class_type: str, field_name: str, module_qn: str + ) -> str | None: ... + + @abstractmethod + def _find_containing_java_class(self, node: ASTNode) -> ASTNode | None: ... + def _resolve_java_object_type( - self, object_ref: str, local_var_types: dict[str, str], module_qn: str + self, + object_ref: str, + local_var_types: dict[str, str], + module_qn: str, + context_node: ASTNode | None = None, ) -> str | None: if object_ref in local_var_types: return local_var_types[object_ref] - # (H) Check for 'this' reference - find the containing class (using trie for O(k) lookup) + # (H) Check for 'this' reference - prefer the lexical containing class (precise in + # (H) multi-class files); fall back to the first class under the module otherwise. if object_ref == cs.JAVA_KEYWORD_THIS: + if lexical := self._lexical_class_qn(context_node, module_qn): + return lexical return next( ( str(qn) @@ -72,8 +93,13 @@ def _resolve_java_object_type( None, ) - # (H) Check for 'super' reference - for super calls, look at parent classes (using trie for O(k) lookup) + # (H) Check for 'super' reference - resolve the lexical class then its parent when + # (H) available; otherwise fall back to the first class under the module with a parent. if object_ref == cs.JAVA_KEYWORD_SUPER: + if (lexical := self._lexical_class_qn(context_node, module_qn)) and ( + parent_qn := self._find_parent_class(lexical) + ): + return parent_qn for qn, entity_type in self.function_registry.find_with_prefix(module_qn): if entity_type == NodeType.CLASS: if parent_qn := self._find_parent_class(qn): @@ -92,12 +118,87 @@ def _resolve_java_object_type( ): return simple_class_qn + # (H) An unqualified class-name receiver for a static call (`T.make()`) + # (H) defined in a sibling file: imports and the current module were checked + # (H) above, so the remaining unqualified case is a same-package class. + if sibling_class_qn := self._resolve_sibling_class_qn(object_ref, module_qn): + return sibling_class_qn + + # (H) A receiver like `obj.engine` (field access on a typed variable) is not a + # (H) single name: resolve the base, then walk each field's declared type across + # (H) classes so `obj.engine.start()` and deeper chains resolve to a method. + if cs.SEPARATOR_DOT in object_ref: + return self._resolve_field_access_chain_type( + object_ref, local_var_types, module_qn, context_node + ) + return None + def _lexical_class_qn( + self, context_node: ASTNode | None, module_qn: str + ) -> str | None: + if context_node is None: + return None + if not (class_node := self._find_containing_java_class(context_node)): + return None + if not (class_name := extract_class_info(class_node).get(cs.FIELD_NAME)): + return None + return self._resolve_java_type_name(class_name, module_qn) + + def _resolve_field_access_chain_type( + self, + object_ref: str, + local_var_types: dict[str, str], + module_qn: str, + context_node: ASTNode | None = None, + ) -> str | None: + parts = object_ref.split(cs.SEPARATOR_DOT) + if len(parts) < 2: + return None + + current_type = self._resolve_java_object_type( + parts[0], local_var_types, module_qn, context_node + ) + if not current_type: + return None + + for field_name in parts[1:]: + next_type = self._lookup_java_field_type( + current_type, field_name, module_qn + ) + if not next_type: + return None + current_type = next_type + + return current_type + def _find_parent_class(self, class_qn: str) -> str | None: parent_classes = self.class_inheritance.get(class_qn, []) return parent_classes[0] if parent_classes else None + def _resolve_sibling_class_qn(self, class_name: str, module_qn: str) -> str | None: + # (H) Resolve a bare class name to a registered Class/Interface in a SIBLING + # (H) file of the same package (directory), so an unqualified same-package + # (H) reference resolves without an import. A bare receiver with no import + # (H) is only valid for the current package in Java, so a class in another + # (H) package is NOT a match -- linking it would be a wrong cross-package + # (H) edge; leave the receiver unresolved instead. + if not (candidate_modules := self._fqn_to_module_qn.get(class_name)): + return None + if not (current_file := self.module_qn_to_file_path.get(module_qn)): + return None + current_dir = current_file.parent + for candidate_module in candidate_modules: + candidate_qn = f"{candidate_module}{cs.SEPARATOR_DOT}{class_name}" + if candidate_qn not in self.function_registry or self.function_registry[ + candidate_qn + ] not in (NodeType.CLASS, NodeType.INTERFACE): + continue + candidate_file = self.module_qn_to_file_path.get(candidate_module) + if candidate_file and candidate_file.parent == current_dir: + return candidate_qn + return None + def _resolve_static_or_local_method( self, method_name: str, module_qn: str ) -> tuple[str, str] | None: @@ -202,6 +303,10 @@ def _is_matching_method(self, member: str, method_name: str) -> bool: or member == f"{method_name}{cs.EMPTY_PARENS}" ) + @recursion_guard( + key_func=lambda self, class_qn, *_, **__: class_qn, + guard_name=cs.GUARD_INHERITED_METHOD, + ) def _find_inherited_method( self, class_qn: str, method_name: str, module_qn: str ) -> tuple[str, str] | None: @@ -235,8 +340,10 @@ def _resolve_java_method_return_type( parts = method_call.split(cs.SEPARATOR_DOT) if len(parts) < 2: method_name = method_call - if current_class_qn := self._get_current_class_name(module_qn): - return self._find_method_return_type(current_class_qn, method_name) + if (current_class_qn := self._get_current_class_name(module_qn)) and ( + result := self._find_method_return_type(current_class_qn, method_name) + ): + return result else: object_part = cs.SEPARATOR_DOT.join(parts[:-1]) method_name = parts[-1] @@ -348,34 +455,32 @@ def _do_resolve_java_method_call( logger.debug(ls.JAVA_NO_METHOD_NAME) return None - logger.debug( - ls.JAVA_RESOLVING_CALL.format(method=method_name, object=object_ref) - ) + logger.debug(ls.JAVA_RESOLVING_CALL, method=method_name, object=object_ref) if not object_ref: - logger.debug(ls.JAVA_RESOLVING_STATIC.format(method=method_name)) + logger.debug(ls.JAVA_RESOLVING_STATIC, method=method_name) result = self._resolve_static_or_local_method(str(method_name), module_qn) if result: - logger.debug(ls.JAVA_FOUND_STATIC.format(result=result)) + logger.debug(ls.JAVA_FOUND_STATIC, result=result) else: - logger.debug(ls.JAVA_STATIC_NOT_FOUND.format(method=method_name)) + logger.debug(ls.JAVA_STATIC_NOT_FOUND, method=method_name) return result - logger.debug(ls.JAVA_RESOLVING_OBJ_TYPE.format(object=object_ref)) + logger.debug(ls.JAVA_RESOLVING_OBJ_TYPE, object=object_ref) if not ( object_type := self._resolve_java_object_type( - str(object_ref), local_var_types, module_qn + str(object_ref), local_var_types, module_qn, call_node ) ): - logger.debug(ls.JAVA_OBJ_TYPE_UNKNOWN.format(object=object_ref)) + logger.debug(ls.JAVA_OBJ_TYPE_UNKNOWN, object=object_ref) return None - logger.debug(ls.JAVA_OBJ_TYPE_RESOLVED.format(type=object_type)) + logger.debug(ls.JAVA_OBJ_TYPE_RESOLVED, type=object_type) result = self._resolve_instance_method(object_type, str(method_name), module_qn) if result: - logger.debug(ls.JAVA_FOUND_INSTANCE.format(result=result)) + logger.debug(ls.JAVA_FOUND_INSTANCE, result=result) else: logger.debug( - ls.JAVA_INSTANCE_NOT_FOUND.format(type=object_type, method=method_name) + ls.JAVA_INSTANCE_NOT_FOUND, type=object_type, method=method_name ) return result diff --git a/codebase_rag/parsers/java/type_inference.py b/codebase_rag/parsers/java/type_inference.py index 8fd86a7d2..16a0a7047 100644 --- a/codebase_rag/parsers/java/type_inference.py +++ b/codebase_rag/parsers/java/type_inference.py @@ -26,6 +26,21 @@ class JavaTypeInferenceEngine( JavaVariableAnalyzerMixin, JavaMethodResolverMixin, ): + __slots__ = ( + "import_processor", + "function_registry", + "repo_path", + "project_name", + "ast_cache", + "queries", + "module_qn_to_file_path", + "class_inheritance", + "simple_name_lookup", + "_lookup_cache", + "_lookup_in_progress", + "_fqn_to_module_qn", + ) + def __init__( self, import_processor: ImportProcessor, @@ -63,16 +78,20 @@ def _add_mapping(key: str, value: str) -> None: for module_qn in self.module_qn_to_file_path.keys(): parts = module_qn.split(cs.SEPARATOR_DOT) - if package_start_idx := find_package_start_index(parts): - if simple_class_name := cs.SEPARATOR_DOT.join( - parts[package_start_idx:] - ): - _add_mapping(simple_class_name, module_qn) - - class_parts = simple_class_name.split(cs.SEPARATOR_DOT) - for j in range(1, len(class_parts)): - suffix = cs.SEPARATOR_DOT.join(class_parts[j:]) - _add_mapping(suffix, module_qn) + # (H) Without a recognized src/main/java layout find_package_start_index + # (H) returns None, leaving the whole map empty so cross-file Java + # (H) resolution (static calls, instance dispatch in sibling files) + # (H) silently fails. Fall back to the segment after the project root + # (H) (index 1) so flat / non-standard layouts still register their + # (H) simple class names. find_package_start_index never returns 0. + package_start_idx = find_package_start_index(parts) or 1 + if simple_class_name := cs.SEPARATOR_DOT.join(parts[package_start_idx:]): + _add_mapping(simple_class_name, module_qn) + + class_parts = simple_class_name.split(cs.SEPARATOR_DOT) + for j in range(1, len(class_parts)): + suffix = cs.SEPARATOR_DOT.join(class_parts[j:]) + _add_mapping(suffix, module_qn) return fqn_map @@ -83,17 +102,19 @@ def build_variable_type_map( try: self._collect_all_variable_types(scope_node, local_var_types, module_qn) - logger.debug(ls.JAVA_VAR_TYPE_MAP_BUILT.format(count=len(local_var_types))) + logger.debug(ls.JAVA_VAR_TYPE_MAP_BUILT, count=len(local_var_types)) except Exception as e: - logger.error(ls.JAVA_VAR_TYPE_MAP_FAILED.format(error=e)) + logger.error(ls.JAVA_VAR_TYPE_MAP_FAILED, error=e) return local_var_types def resolve_java_method_call( - self, call_node: ASTNode, local_var_types: dict[str, str], module_qn: str + self, call_node: ASTNode, local_var_types: dict[str, str] | None, module_qn: str ) -> tuple[str, str] | None: - return self._do_resolve_java_method_call(call_node, local_var_types, module_qn) + return self._do_resolve_java_method_call( + call_node, local_var_types or {}, module_qn + ) def _find_containing_java_class(self, node: ASTNode) -> ASTNode | None: current = node.parent diff --git a/codebase_rag/parsers/java/type_resolver.py b/codebase_rag/parsers/java/type_resolver.py index cbb69fcf7..f1827e6e5 100644 --- a/codebase_rag/parsers/java/type_resolver.py +++ b/codebase_rag/parsers/java/type_resolver.py @@ -20,6 +20,7 @@ class JavaTypeResolverMixin: + __slots__ = () import_processor: ImportProcessor function_registry: FunctionRegistryTrieProtocol module_qn_to_file_path: dict[str, Path] diff --git a/codebase_rag/parsers/java/utils.py b/codebase_rag/parsers/java/utils.py index f267afe47..77784a746 100644 --- a/codebase_rag/parsers/java/utils.py +++ b/codebase_rag/parsers/java/utils.py @@ -114,15 +114,36 @@ def _extract_superclass(class_node: ASTNode) -> str | None: superclass_node = class_node.child_by_field_name(cs.TS_FIELD_SUPERCLASS) if not superclass_node: return None + return _extract_type_identifier_name(superclass_node) - match superclass_node.type: + +def _extract_type_identifier_name(node: ASTNode) -> str | None: + match node.type: case cs.TS_TYPE_IDENTIFIER: - return safe_decode_text(superclass_node) + return safe_decode_text(node) + case cs.TS_SCOPED_TYPE_IDENTIFIER: + # (H) `Outer.Base`/`pkg.Base`: keep the full scoped name rather than + # (H) descending to the first segment (the outer/package), which would + # (H) point resolution at the wrong class. + return safe_decode_text(node) case cs.TS_GENERIC_TYPE: - for child in superclass_node.children: - if child.type == cs.TS_TYPE_IDENTIFIER: + # (H) The base of a generic type is its first type_identifier/scoped child + # (H) (e.g. `Box` -> Box, `Outer.Base` -> Outer.Base); ignore the + # (H) type_arguments that follow. + for child in node.children: + if child.type in ( + cs.TS_TYPE_IDENTIFIER, + cs.TS_SCOPED_TYPE_IDENTIFIER, + ): return safe_decode_text(child) - return None + return None + case _: + # (H) `extends X` exposes a `superclass` wrapper node, not the type itself; + # (H) descend into it to reach the type_identifier/generic_type. + for child in node.children: + if name := _extract_type_identifier_name(child): + return name + return None def _extract_interface_name(type_child: ASTNode) -> str | None: diff --git a/codebase_rag/parsers/java/variable_analyzer.py b/codebase_rag/parsers/java/variable_analyzer.py index 65003d9bb..022ddf18d 100644 --- a/codebase_rag/parsers/java/variable_analyzer.py +++ b/codebase_rag/parsers/java/variable_analyzer.py @@ -23,8 +23,10 @@ class JavaVariableAnalyzerMixin: + __slots__ = () ast_cache: ASTCacheProtocol module_qn_to_file_path: dict[str, Path] + class_inheritance: dict[str, list[str]] _lookup_cache: dict[str, str | None] _lookup_in_progress: set[str] @@ -84,7 +86,7 @@ def _process_formal_parameter( if param_name and param_type: resolved_type = self._resolve_java_type_name(param_type, module_qn) local_var_types[param_name] = resolved_type - logger.debug(ls.JAVA_PARAM.format(name=param_name, type=resolved_type)) + logger.debug(ls.JAVA_PARAM, name=param_name, type=resolved_type) def _process_spread_parameter( self, param_node: ASTNode, local_var_types: dict[str, str], module_qn: str @@ -103,9 +105,7 @@ def _process_spread_parameter( if param_name and param_type: resolved_type = self._resolve_java_type_name(param_type, module_qn) local_var_types[param_name] = resolved_type - logger.debug( - ls.JAVA_VARARGS_PARAM.format(name=param_name, type=resolved_type) - ) + logger.debug(ls.JAVA_VARARGS_PARAM, name=param_name, type=resolved_type) def _analyze_java_local_variables( self, scope_node: ASTNode, local_var_types: dict[str, str], module_qn: str @@ -164,15 +164,13 @@ def _process_variable_declarator( resolved_type = self._resolve_java_type_name(inferred_type, module_qn) local_var_types[var_name] = resolved_type logger.debug( - ls.JAVA_LOCAL_VAR_INFERRED.format(name=var_name, type=resolved_type) + ls.JAVA_LOCAL_VAR_INFERRED, name=var_name, type=resolved_type ) return resolved_type = self._resolve_java_type_name(declared_type, module_qn) local_var_types[var_name] = resolved_type - logger.debug( - ls.JAVA_LOCAL_VAR_DECLARED.format(name=var_name, type=resolved_type) - ) + logger.debug(ls.JAVA_LOCAL_VAR_DECLARED, name=var_name, type=resolved_type) def _analyze_java_class_fields( self, scope_node: ASTNode, local_var_types: dict[str, str], module_qn: str @@ -201,7 +199,7 @@ def _analyze_java_class_fields( if str(field_name) not in local_var_types: local_var_types[str(field_name)] = resolved_type logger.debug( - ls.JAVA_CLASS_FIELD.format(name=field_name, type=resolved_type) + ls.JAVA_CLASS_FIELD, name=field_name, type=resolved_type ) def _analyze_java_constructor_assignments( @@ -235,7 +233,7 @@ def _process_java_assignment( ): resolved_type = self._resolve_java_type_name(inferred_type, module_qn) local_var_types[var_name] = resolved_type - logger.debug(ls.JAVA_ASSIGNMENT.format(name=var_name, type=resolved_type)) + logger.debug(ls.JAVA_ASSIGNMENT, name=var_name, type=resolved_type) def _extract_java_variable_reference(self, node: ASTNode) -> str | None: match node.type: @@ -297,9 +295,7 @@ def _register_for_loop_variable( ): resolved_type = self._resolve_java_type_name(var_type, module_qn) local_var_types[var_name] = resolved_type - logger.debug( - ls.JAVA_ENHANCED_FOR_VAR.format(name=var_name, type=resolved_type) - ) + logger.debug(ls.JAVA_ENHANCED_FOR_VAR, name=var_name, type=resolved_type) def _extract_for_loop_variable_from_children( self, for_node: ASTNode, local_var_types: dict[str, str], module_qn: str @@ -325,9 +321,9 @@ def _extract_for_loop_variable_from_children( ) local_var_types[var_name] = resolved_type logger.debug( - ls.JAVA_ENHANCED_FOR_VAR_ALT.format( - name=var_name, type=resolved_type - ) + ls.JAVA_ENHANCED_FOR_VAR_ALT, + name=var_name, + type=resolved_type, ) break @@ -399,16 +395,51 @@ def _infer_java_field_access_type( if not object_node or not field_node: return None - object_name = safe_decode_text(object_node) field_name = safe_decode_text(field_node) - - if not object_name or not field_name: + if not field_name: return None - if object_type := self._lookup_variable_type(object_name, module_qn): + # (H) A nested receiver (`obj.address.zipCode`) has a field_access as its object; + # (H) recurse to infer that inner type before looking up the outer field, so + # (H) multi-level field access resolves rather than failing on a non-variable name. + if object_node.type == cs.TS_FIELD_ACCESS: + object_type = self._infer_java_field_access_type(object_node, module_qn) + elif object_name := safe_decode_text(object_node): + object_type = self._resolve_field_access_base_type( + object_name, field_access_node, module_qn + ) + else: + object_type = None + + if object_type: return self._lookup_java_field_type(object_type, field_name, module_qn) return None + def _resolve_field_access_base_type( + self, object_name: str, field_access_node: ASTNode, module_qn: str + ) -> str | None: + # (H) `this`/`super` are receiver keywords, not variables: resolve them to the + # (H) containing class (or its superclass) so nested chains rooted at them + # (H) (e.g. `var c = this.address.city`) infer a type instead of failing. + if object_name in (cs.JAVA_KEYWORD_THIS, cs.JAVA_KEYWORD_SUPER): + if not (class_node := self._find_containing_java_class(field_access_node)): + return None + class_info = extract_class_info(class_node) + class_name = class_info.get(cs.FIELD_NAME) + if object_name == cs.JAVA_KEYWORD_THIS: + return class_name + # (H) `super`: return the fully-qualified parent from class_inheritance so a + # (H) nested superclass (`Outer.Base`) resolves; the relative name from the + # (H) AST would be treated as an absolute class key by the field lookup. + if class_name: + own_qn = self._resolve_java_type_name(class_name, module_qn) + if cs.SEPARATOR_DOT not in own_qn: + own_qn = f"{module_qn}{cs.SEPARATOR_DOT}{own_qn}" + if parents := self.class_inheritance.get(own_qn): + return parents[0] + return class_info.get(cs.FIELD_SUPERCLASS) + return self._lookup_variable_type(object_name, module_qn) + def _lookup_variable_type(self, var_name: str, module_qn: str) -> str | None: if not var_name or not module_qn: return None @@ -448,45 +479,82 @@ def _lookup_java_field_type( if not class_type or not field_name: return None - resolved_class_type = self._resolve_java_type_name(class_type, module_qn) - - class_qn = ( - resolved_class_type - if cs.SEPARATOR_DOT in resolved_class_type - else f"{module_qn}{cs.SEPARATOR_DOT}{resolved_class_type}" + resolved = self._resolve_java_type_name(class_type, module_qn) + class_qn: str | None = ( + resolved + if cs.SEPARATOR_DOT in resolved + else f"{module_qn}{cs.SEPARATOR_DOT}{resolved}" ) - parts = class_qn.split(cs.SEPARATOR_DOT) - if len(parts) < 2: - return None - - target_module_qn = cs.SEPARATOR_DOT.join(parts[:-1]) - target_class_name = parts[-1] - - file_path = self.module_qn_to_file_path.get(target_module_qn) - if file_path is None or file_path not in self.ast_cache: - return None + # (H) Walk the inheritance chain using authoritative qualified parents from + # (H) class_inheritance: a field accessed on a subclass may be declared on a + # (H) superclass, including a nested one like `Outer.Base`. Seen-guarded. + seen: set[str] = set() + while class_qn and class_qn not in seen: + seen.add(class_qn) + if located := self._locate_class(class_qn): + root_node, class_path, target_module_qn = located + if field_type := self._find_field_type_in_nested_class( + root_node, class_path, field_name, target_module_qn + ): + return field_type + parents = self.class_inheritance.get(class_qn) + class_qn = parents[0] if parents else None - root_node, _ = self.ast_cache[file_path] + return None - return self._find_field_type_in_class( - root_node, target_class_name, field_name, target_module_qn - ) + def _locate_class(self, class_qn: str) -> tuple[ASTNode, list[str], str] | None: + # (H) The file module is the longest registered prefix of the class qn; the + # (H) remaining segments are the (possibly nested) class path within that file, + # (H) so `proj.pkg.Outer.Base` resolves to file `proj.pkg` + path [Outer, Base]. + parts = class_qn.split(cs.SEPARATOR_DOT) + for split in range(len(parts) - 1, 0, -1): + module_candidate = cs.SEPARATOR_DOT.join(parts[:split]) + file_path = self.module_qn_to_file_path.get(module_candidate) + if file_path is not None and file_path in self.ast_cache: + root_node, _ = self.ast_cache[file_path] + return root_node, parts[split:], module_candidate + return None def _find_field_type_in_class( self, root_node: ASTNode, class_name: str, field_name: str, module_qn: str ) -> str | None: - for child in root_node.children: - if child.type == cs.TS_CLASS_DECLARATION: - class_info = extract_class_info(child) - if class_info.get(cs.FIELD_NAME) == class_name: - if class_body := child.child_by_field_name(cs.FIELD_BODY): - for field_child in class_body.children: - if field_child.type == cs.TS_FIELD_DECLARATION: - field_info = extract_field_info(field_child) - if field_info.get(cs.FIELD_NAME) == field_name: - if field_type := field_info.get(cs.FIELD_TYPE): - return self._resolve_java_type_name( - str(field_type), module_qn - ) + return self._find_field_type_in_nested_class( + root_node, [class_name], field_name, module_qn + ) + + def _find_field_type_in_nested_class( + self, + root_node: ASTNode, + class_path: list[str], + field_name: str, + module_qn: str, + ) -> str | None: + children = root_node.children + body: ASTNode | None = None + for class_name in class_path: + class_node = next( + ( + child + for child in children + if child.type == cs.TS_CLASS_DECLARATION + and extract_class_info(child).get(cs.FIELD_NAME) == class_name + ), + None, + ) + if class_node is None or not ( + body := class_node.child_by_field_name(cs.FIELD_BODY) + ): + return None + children = body.children + + if body is None: + return None + + for field_child in body.children: + if field_child.type == cs.TS_FIELD_DECLARATION: + field_info = extract_field_info(field_child) + if field_info.get(cs.FIELD_NAME) == field_name: + if field_type := field_info.get(cs.FIELD_TYPE): + return self._resolve_java_type_name(str(field_type), module_qn) return None diff --git a/codebase_rag/parsers/js_ts/ingest.py b/codebase_rag/parsers/js_ts/ingest.py index 30580e184..2641ae367 100644 --- a/codebase_rag/parsers/js_ts/ingest.py +++ b/codebase_rag/parsers/js_ts/ingest.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING from loguru import logger -from tree_sitter import Query, QueryCursor +from tree_sitter import QueryCursor from ... import constants as cs from ... import logs as lg @@ -16,7 +16,12 @@ PropertyDict, SimpleNameLookup, ) -from ..utils import safe_decode_text, safe_decode_with_fallback +from ..utils import ( + get_cached_query, + safe_decode_text, + safe_decode_with_fallback, + sorted_captures, +) from .module_system import JsTsModuleSystemMixin from .utils import get_js_ts_language_obj @@ -29,6 +34,7 @@ class JsTsIngestMixin(JsTsModuleSystemMixin): + __slots__ = () ingestor: IngestorProtocol repo_path: Path project_name: str @@ -88,14 +94,14 @@ def _ingest_prototype_inheritance_links( language_obj, root_node, module_qn ) except Exception as e: - logger.debug(lg.JS_PROTOTYPE_INHERITANCE_FAILED.format(error=e)) + logger.debug(lg.JS_PROTOTYPE_INHERITANCE_FAILED, error=e) def _process_prototype_inheritance_captures( self, language_obj, root_node, module_qn ): - query = Query(language_obj, cs.JS_PROTOTYPE_INHERITANCE_QUERY) + query = get_cached_query(language_obj, cs.JS_PROTOTYPE_INHERITANCE_QUERY) cursor = QueryCursor(query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) child_classes = captures.get(cs.CAPTURE_CHILD_CLASS, []) parent_classes = captures.get(cs.CAPTURE_PARENT_CLASS, []) @@ -122,9 +128,7 @@ def _process_prototype_inheritance_captures( ) logger.debug( - lg.JS_PROTOTYPE_INHERITANCE.format( - child_qn=child_qn, parent_qn=parent_qn - ) + lg.JS_PROTOTYPE_INHERITANCE, child_qn=child_qn, parent_qn=parent_qn ) def _ingest_prototype_method_assignments( @@ -143,12 +147,12 @@ def _ingest_prototype_method_assignments( try: self._process_prototype_method_captures(language_obj, root_node, module_qn) except Exception as e: - logger.debug(lg.JS_PROTOTYPE_METHODS_FAILED.format(error=e)) + logger.debug(lg.JS_PROTOTYPE_METHODS_FAILED, error=e) def _process_prototype_method_captures(self, language_obj, root_node, module_qn): - method_query = Query(language_obj, cs.JS_PROTOTYPE_METHOD_QUERY) + method_query = get_cached_query(language_obj, cs.JS_PROTOTYPE_METHOD_QUERY) method_cursor = QueryCursor(method_query) - method_captures = method_cursor.captures(root_node) + method_captures = sorted_captures(method_cursor, root_node) constructor_names = method_captures.get(cs.CAPTURE_CONSTRUCTOR_NAME, []) method_names = method_captures.get(cs.CAPTURE_METHOD_NAME, []) @@ -165,6 +169,9 @@ def _process_prototype_method_captures(self, language_obj, root_node, module_qn) if constructor_name and method_name: constructor_qn = f"{module_qn}{cs.SEPARATOR_DOT}{constructor_name}" method_qn = f"{constructor_qn}{cs.SEPARATOR_DOT}{method_name}" + method_qn = self.function_registry.register_unique_qn( + method_qn, func_node.start_point[0] + 1 + ) method_props: PropertyDict = { cs.KEY_QUALIFIED_NAME: method_qn, @@ -174,9 +181,9 @@ def _process_prototype_method_captures(self, language_obj, root_node, module_qn) cs.KEY_DOCSTRING: self._get_docstring(func_node), } logger.info( - lg.JS_PROTOTYPE_METHOD_FOUND.format( - method_name=method_name, method_qn=method_qn - ) + lg.JS_PROTOTYPE_METHOD_FOUND, + method_name=method_name, + method_qn=method_qn, ) self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, method_props) @@ -190,9 +197,9 @@ def _process_prototype_method_captures(self, language_obj, root_node, module_qn) ) logger.debug( - lg.JS_PROTOTYPE_METHOD_DEFINES.format( - constructor_qn=constructor_qn, method_qn=method_qn - ) + lg.JS_PROTOTYPE_METHOD_DEFINES, + constructor_qn=constructor_qn, + method_qn=method_qn, ) def _ingest_object_literal_methods( @@ -213,7 +220,7 @@ def _ingest_object_literal_methods( language_obj, query_text, root_node, module_qn, lang_config ) except Exception as e: - logger.debug(lg.JS_OBJECT_METHODS_DETECT_FAILED.format(error=e)) + logger.debug(lg.JS_OBJECT_METHODS_DETECT_FAILED, error=e) def _process_object_method_query( self, @@ -224,9 +231,9 @@ def _process_object_method_query( lang_config, ) -> None: try: - query = Query(language_obj, query_text) + query = get_cached_query(language_obj, query_text) cursor = QueryCursor(query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) method_names = captures.get(cs.CAPTURE_METHOD_NAME, []) method_functions = captures.get(cs.CAPTURE_METHOD_FUNCTION, []) @@ -250,7 +257,7 @@ def _process_object_method_query( method_name_node, method_func_node, module_qn, lang_config ) except Exception as e: - logger.debug(lg.JS_OBJECT_METHODS_PROCESS_FAILED.format(error=e)) + logger.debug(lg.JS_OBJECT_METHODS_PROCESS_FAILED, error=e) def _process_single_object_method( self, @@ -306,6 +313,9 @@ def _register_object_method( method_func_node: ASTNode, module_qn: str, ) -> None: + method_qn = self.function_registry.register_unique_qn( + method_qn, method_func_node.start_point[0] + 1 + ) method_props: PropertyDict = { cs.KEY_QUALIFIED_NAME: method_qn, cs.KEY_NAME: method_name, @@ -314,9 +324,7 @@ def _register_object_method( cs.KEY_DOCSTRING: self._get_docstring(method_func_node), } logger.info( - lg.JS_OBJECT_METHOD_FOUND.format( - method_name=method_name, method_qn=method_qn - ) + lg.JS_OBJECT_METHOD_FOUND, method_name=method_name, method_qn=method_qn ) self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, method_props) @@ -352,7 +360,7 @@ def _ingest_assignment_arrow_functions( lang_query, query_text, root_node, module_qn, lang_config ) except Exception as e: - logger.debug(lg.JS_ASSIGNMENT_ARROW_DETECT_FAILED.format(error=e)) + logger.debug(lg.JS_ASSIGNMENT_ARROW_DETECT_FAILED, error=e) def _process_arrow_query( self, @@ -363,9 +371,9 @@ def _process_arrow_query( lang_config, ) -> None: try: - query = Query(lang_query, query_text) + query = get_cached_query(lang_query, query_text) cursor = QueryCursor(query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) method_names = captures.get(cs.CAPTURE_METHOD_NAME, []) member_exprs = captures.get(cs.CAPTURE_MEMBER_EXPR, []) @@ -390,7 +398,7 @@ def _process_arrow_query( lg.JS_ASSIGNMENT_FUNC_EXPR_FOUND, ) except Exception as e: - logger.debug(lg.JS_ASSIGNMENT_ARROW_QUERY_FAILED.format(error=e)) + logger.debug(lg.JS_ASSIGNMENT_ARROW_QUERY_FAILED, error=e) def _process_direct_arrow_functions( self, @@ -498,6 +506,9 @@ def _register_arrow_function( function_node: ASTNode, log_message: str, ) -> None: + function_qn = self.function_registry.register_unique_qn( + function_qn, function_node.start_point[0] + 1 + ) function_props: PropertyDict = { cs.KEY_QUALIFIED_NAME: function_qn, cs.KEY_NAME: function_name, @@ -506,9 +517,7 @@ def _register_arrow_function( cs.KEY_DOCSTRING: self._get_docstring(function_node), } - logger.debug( - log_message.format(function_name=function_name, function_qn=function_qn) - ) + logger.debug(log_message, function_name=function_name, function_qn=function_qn) self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, function_props) self.function_registry[function_qn] = NodeType.FUNCTION self.simple_name_lookup[function_name].add(function_qn) diff --git a/codebase_rag/parsers/js_ts/module_system.py b/codebase_rag/parsers/js_ts/module_system.py index 436603575..c41296502 100644 --- a/codebase_rag/parsers/js_ts/module_system.py +++ b/codebase_rag/parsers/js_ts/module_system.py @@ -6,15 +6,17 @@ from typing import TYPE_CHECKING from loguru import logger -from tree_sitter import Query, QueryCursor +from tree_sitter import QueryCursor from ... import constants as cs from ... import logs as ls from ...types_defs import ASTNode from ..utils import ( + get_cached_query, ingest_exported_function, safe_decode_text, safe_decode_with_fallback, + sorted_captures, ) from .utils import get_js_ts_language_obj @@ -29,6 +31,7 @@ class JsTsModuleSystemMixin: + __slots__ = ("_processed_imports",) ingestor: IngestorProtocol repo_path: Path project_name: str @@ -59,9 +62,9 @@ def _ingest_missing_import_patterns( try: try: - query = Query(language_obj, cs.JS_COMMONJS_DESTRUCTURE_QUERY) + query = get_cached_query(language_obj, cs.JS_COMMONJS_DESTRUCTURE_QUERY) cursor = QueryCursor(query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) variable_declarators = captures.get(cs.CAPTURE_VARIABLE_DECLARATOR, []) @@ -71,10 +74,10 @@ def _ingest_missing_import_patterns( ) except Exception as e: - logger.debug(ls.JS_COMMONJS_DESTRUCTURE_FAILED.format(error=e)) + logger.debug(ls.JS_COMMONJS_DESTRUCTURE_FAILED, error=e) except Exception as e: - logger.debug(ls.JS_MISSING_IMPORT_PATTERNS_FAILED.format(error=e)) + logger.debug(ls.JS_MISSING_IMPORT_PATTERNS_FAILED, error=e) def _extract_require_module_name(self, declarator: ASTNode) -> str | None: name_node = declarator.child_by_field_name(cs.FIELD_NAME) @@ -148,7 +151,7 @@ def _process_variable_declarator_for_commonjs( self._process_destructured_child(child, module_name, module_qn) except Exception as e: - logger.debug(ls.JS_COMMONJS_VAR_DECLARATOR_FAILED.format(error=e)) + logger.debug(ls.JS_COMMONJS_VAR_DECLARATOR_FAILED, error=e) def _process_commonjs_import( self, imported_name: str, module_name: str, module_qn: str @@ -179,20 +182,17 @@ def _process_commonjs_import( ) logger.debug( - ls.JS_MISSING_IMPORT_PATTERN.format( - module_qn=module_qn, - imported_name=imported_name, - resolved_source_module=resolved_source_module, - ) + ls.JS_MISSING_IMPORT_PATTERN, + module_qn=module_qn, + imported_name=imported_name, + resolved_source_module=resolved_source_module, ) self._processed_imports.add(import_key) except Exception as e: logger.debug( - ls.JS_COMMONJS_IMPORT_FAILED.format( - imported_name=imported_name, error=e - ) + ls.JS_COMMONJS_IMPORT_FAILED, imported_name=imported_name, error=e ) def _ingest_export_function( @@ -282,9 +282,8 @@ def _ingest_commonjs_exports( for query_text in query_texts: try: - captures = QueryCursor(Query(language_obj, query_text)).captures( - root_node - ) + cursor = QueryCursor(get_cached_query(language_obj, query_text)) + captures = sorted_captures(cursor, root_node) self._process_exports_pattern( captures.get(cs.CAPTURE_EXPORTS_OBJ, []), @@ -302,7 +301,7 @@ def _ingest_commonjs_exports( ) except Exception as e: - logger.debug(ls.JS_COMMONJS_EXPORTS_QUERY_FAILED.format(error=e)) + logger.debug(ls.JS_COMMONJS_EXPORTS_QUERY_FAILED, error=e) def _ingest_es6_exports( self, @@ -320,9 +319,9 @@ def _ingest_es6_exports( ]: try: cleaned_query = textwrap.dedent(query_text).strip() - query = Query(lang_query, cleaned_query) + query = get_cached_query(lang_query, cleaned_query) cursor = QueryCursor(query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) export_names = captures.get(cs.CAPTURE_EXPORT_NAME, []) export_functions = captures.get(cs.CAPTURE_EXPORT_FUNCTION, []) @@ -365,7 +364,7 @@ def _ingest_es6_exports( ) except Exception as e: - logger.debug(ls.JS_ES6_EXPORTS_QUERY_FAILED.format(error=e)) + logger.debug(ls.JS_ES6_EXPORTS_QUERY_FAILED, error=e) except Exception as e: - logger.debug(ls.JS_ES6_EXPORTS_DETECT_FAILED.format(error=e)) + logger.debug(ls.JS_ES6_EXPORTS_DETECT_FAILED, error=e) diff --git a/codebase_rag/parsers/js_ts/type_inference.py b/codebase_rag/parsers/js_ts/type_inference.py index e4930e365..590beb44e 100644 --- a/codebase_rag/parsers/js_ts/type_inference.py +++ b/codebase_rag/parsers/js_ts/type_inference.py @@ -1,83 +1,149 @@ +from __future__ import annotations + from collections.abc import Callable +from typing import TYPE_CHECKING from loguru import logger +from tree_sitter import Node, QueryCursor from ... import constants as cs from ... import logs as ls from ...types_defs import ASTNode, FunctionRegistryTrieProtocol, NodeType from ..import_processor import ImportProcessor -from ..utils import safe_decode_text +from ..utils import get_cached_query, safe_decode_text from . import utils as ut +if TYPE_CHECKING: + from ...types_defs import LanguageQueries + +_JS_DECLARATOR_QUERY = "(variable_declarator) @declarator" + class JsTypeInferenceEngine: + __slots__ = ( + "import_processor", + "function_registry", + "project_name", + "_find_method_ast_node", + "_queries", + ) + def __init__( self, import_processor: ImportProcessor, function_registry: FunctionRegistryTrieProtocol, project_name: str, find_method_ast_node_func: Callable[[str], ASTNode | None], + queries: dict[cs.SupportedLanguage, LanguageQueries] | None = None, ): self.import_processor = import_processor self.function_registry = function_registry self.project_name = project_name self._find_method_ast_node = find_method_ast_node_func + self._queries = queries + + def _get_declarators_via_query( + self, caller_node: ASTNode, language: cs.SupportedLanguage | None = None + ) -> list[Node] | None: + if self._queries is None: + return None + langs = ( + [language] + if language is not None + else [cs.SupportedLanguage.JS, cs.SupportedLanguage.TS] + ) + for lang in langs: + lang_queries = self._queries.get(lang) + if lang_queries and "language" in lang_queries: + try: + q = get_cached_query(lang_queries["language"], _JS_DECLARATOR_QUERY) + cursor = QueryCursor(q) + captures = cursor.captures(caller_node) + return captures.get("declarator", []) + except Exception: + continue + return None def build_local_variable_type_map( - self, caller_node: ASTNode, module_qn: str + self, + caller_node: ASTNode, + module_qn: str, + language: cs.SupportedLanguage | None = None, ) -> dict[str, str]: local_var_types: dict[str, str] = {} - - stack: list[ASTNode] = [caller_node] - declarator_count = 0 - while stack: - current = stack.pop() - - if current.type == cs.TS_VARIABLE_DECLARATOR: + declarator_nodes = self._get_declarators_via_query(caller_node, language) + if declarator_nodes is not None: + for current in declarator_nodes: declarator_count += 1 name_node = current.child_by_field_name("name") value_node = current.child_by_field_name("value") - if name_node and value_node: var_name_text = name_node.text if var_name_text: var_name = safe_decode_text(name_node) if var_name is not None: logger.debug( - ls.JS_VAR_DECLARATOR_FOUND.format( - var_name=var_name, module_qn=module_qn - ) + ls.JS_VAR_DECLARATOR_FOUND, + var_name=var_name, + module_qn=module_qn, ) - if var_type := self._infer_js_variable_type_from_value( value_node, module_qn ): local_var_types[var_name] = var_type logger.debug( - ls.JS_VAR_INFERRED.format( - var_name=var_name, var_type=var_type - ) + ls.JS_VAR_INFERRED, + var_name=var_name, + var_type=var_type, ) else: + logger.debug(ls.JS_VAR_INFER_FAILED, var_name=var_name) + else: + stack: list[ASTNode] = [caller_node] + while stack: + current = stack.pop() + if current.type == cs.TS_VARIABLE_DECLARATOR: + declarator_count += 1 + name_node = current.child_by_field_name("name") + value_node = current.child_by_field_name("value") + if name_node and value_node: + var_name_text = name_node.text + if var_name_text: + var_name = safe_decode_text(name_node) + if var_name is not None: logger.debug( - ls.JS_VAR_INFER_FAILED.format(var_name=var_name) + ls.JS_VAR_DECLARATOR_FOUND, + var_name=var_name, + module_qn=module_qn, ) - - stack.extend(reversed(current.children)) + if var_type := self._infer_js_variable_type_from_value( + value_node, module_qn + ): + local_var_types[var_name] = var_type + logger.debug( + ls.JS_VAR_INFERRED, + var_name=var_name, + var_type=var_type, + ) + else: + logger.debug( + ls.JS_VAR_INFER_FAILED, var_name=var_name + ) + stack.extend(reversed(current.children)) logger.debug( - ls.JS_VAR_TYPE_MAP_BUILT.format( - count=len(local_var_types), declarator_count=declarator_count - ) + ls.JS_VAR_TYPE_MAP_BUILT, + count=len(local_var_types), + declarator_count=declarator_count, ) return local_var_types def _infer_js_variable_type_from_value( self, value_node: ASTNode, module_qn: str ) -> str | None: - logger.debug(ls.JS_INFER_VALUE_NODE.format(node_type=value_node.type)) + logger.debug(ls.JS_INFER_VALUE_NODE, node_type=value_node.type) if value_node.type == cs.TS_NEW_EXPRESSION: if class_name := ut.extract_constructor_name(value_node): @@ -87,28 +153,23 @@ def _infer_js_variable_type_from_value( elif value_node.type == cs.TS_CALL_EXPRESSION: func_node = value_node.child_by_field_name("function") func_type = func_node.type if func_node else cs.STR_NONE - logger.debug(ls.JS_CALL_EXPR_FUNC_NODE.format(func_type=func_type)) + logger.debug(ls.JS_CALL_EXPR_FUNC_NODE, func_type=func_type) if func_node and func_node.type == cs.TS_MEMBER_EXPRESSION: method_call_text = ut.extract_method_call(func_node) - logger.debug( - ls.JS_EXTRACTED_METHOD_CALL.format(method_call=method_call_text) - ) + logger.debug(ls.JS_EXTRACTED_METHOD_CALL, method_call=method_call_text) if method_call_text: if inferred_type := self._infer_js_method_return_type( method_call_text, module_qn ): logger.debug( - ls.JS_TYPE_INFERRED.format( - method_call=method_call_text, - inferred_type=inferred_type, - ) + ls.JS_TYPE_INFERRED, + method_call=method_call_text, + inferred_type=inferred_type, ) return inferred_type logger.debug( - ls.JS_RETURN_TYPE_INFER_FAILED.format( - method_call=method_call_text - ) + ls.JS_RETURN_TYPE_INFER_FAILED, method_call=method_call_text ) elif func_node and func_node.type == cs.TS_IDENTIFIER: @@ -116,7 +177,7 @@ def _infer_js_variable_type_from_value( if func_name: return safe_decode_text(func_node) - logger.debug(ls.JS_NO_PATTERN_MATCHED.format(node_type=value_node.type)) + logger.debug(ls.JS_NO_PATTERN_MATCHED, node_type=value_node.type) return None def _infer_js_method_return_type( @@ -124,7 +185,7 @@ def _infer_js_method_return_type( ) -> str | None: parts = method_call.split(cs.SEPARATOR_DOT) if len(parts) != 2: - logger.debug(ls.JS_METHOD_CALL_INVALID.format(method_call=method_call)) + logger.debug(ls.JS_METHOD_CALL_INVALID, method_call=method_call) return None class_name, method_name = parts @@ -132,27 +193,23 @@ def _infer_js_method_return_type( class_qn = self._resolve_js_class_name(class_name, module_qn) if not class_qn: logger.debug( - ls.JS_CLASS_RESOLVE_FAILED.format( - class_name=class_name, module_qn=module_qn - ) + ls.JS_CLASS_RESOLVE_FAILED, class_name=class_name, module_qn=module_qn ) return None - logger.debug( - ls.JS_CLASS_RESOLVED.format(class_name=class_name, class_qn=class_qn) - ) + logger.debug(ls.JS_CLASS_RESOLVED, class_name=class_name, class_qn=class_qn) method_qn = f"{class_qn}{cs.SEPARATOR_DOT}{method_name}" - logger.debug(ls.JS_LOOKING_FOR_METHOD.format(method_qn=method_qn)) + logger.debug(ls.JS_LOOKING_FOR_METHOD, method_qn=method_qn) method_node = self._find_method_ast_node(method_qn) if not method_node: - logger.debug(ls.JS_METHOD_AST_NOT_FOUND.format(method_qn=method_qn)) + logger.debug(ls.JS_METHOD_AST_NOT_FOUND, method_qn=method_qn) return None return_type = self._analyze_return_statements(method_node, method_qn) logger.debug( - ls.JS_RETURN_ANALYZED.format(method_qn=method_qn, return_type=return_type) + ls.JS_RETURN_ANALYZED, method_qn=method_qn, return_type=return_type ) return return_type @@ -180,11 +237,20 @@ def _resolve_js_class_name(self, class_name: str, module_qn: str) -> str | None: return None + def _get_language_obj(self) -> object | None: + if self._queries is None: + return None + for lang in (cs.SupportedLanguage.JS, cs.SupportedLanguage.TS): + lang_queries = self._queries.get(lang) + if lang_queries and "language" in lang_queries: + return lang_queries["language"] + return None + def _analyze_return_statements( self, method_node: ASTNode, method_qn: str ) -> str | None: return_nodes: list[ASTNode] = [] - ut.find_return_statements(method_node, return_nodes) + ut.find_return_statements(method_node, return_nodes, self._get_language_obj()) for return_node in return_nodes: for child in return_node.children: diff --git a/codebase_rag/parsers/js_ts/utils.py b/codebase_rag/parsers/js_ts/utils.py index 5049afb0c..752660db7 100644 --- a/codebase_rag/parsers/js_ts/utils.py +++ b/codebase_rag/parsers/js_ts/utils.py @@ -1,9 +1,9 @@ from typing import TYPE_CHECKING -from tree_sitter import Language, Node +from tree_sitter import Language, Node, QueryCursor from ... import constants as cs -from ..utils import safe_decode_text +from ..utils import get_cached_query, safe_decode_text if TYPE_CHECKING: from ...types_defs import LanguageQueries @@ -53,11 +53,26 @@ def find_method_in_class_body(class_body_node: Node, method_name: str) -> Node | return None +_CLASS_BODY_CACHE: dict[tuple[int, str], Node | None] = {} +_CLASS_BODY_CACHE_OWNER: int | None = None + + def find_method_in_ast( root_node: Node, class_name: str, method_name: str ) -> Node | None: - stack: list[Node] = [root_node] + global _CLASS_BODY_CACHE_OWNER + root_id = id(root_node) + if _CLASS_BODY_CACHE_OWNER != root_id: + _CLASS_BODY_CACHE.clear() + _CLASS_BODY_CACHE_OWNER = root_id + cache_key = (root_id, class_name) + if cache_key in _CLASS_BODY_CACHE: + body_node = _CLASS_BODY_CACHE[cache_key] + if body_node is not None: + return find_method_in_class_body(body_node, method_name) + return None + stack: list[Node] = [root_node] while stack: current = stack.pop() @@ -66,23 +81,38 @@ def find_method_in_ast( if name_node and name_node.text: found_class_name = safe_decode_text(name_node) if found_class_name == class_name: - if body_node := current.child_by_field_name(cs.FIELD_BODY): + body_node = current.child_by_field_name(cs.FIELD_BODY) + _CLASS_BODY_CACHE[cache_key] = body_node + if body_node: return find_method_in_class_body(body_node, method_name) + return None stack.extend(reversed(current.children)) + _CLASS_BODY_CACHE[cache_key] = None return None -def find_return_statements(node: Node, return_nodes: list[Node]) -> None: - stack: list[Node] = [node] +_JS_RETURN_QUERY = "(return_statement) @return_stmt" + +def find_return_statements( + node: Node, return_nodes: list[Node], language_obj=None +) -> None: + if language_obj is not None: + try: + q = get_cached_query(language_obj, _JS_RETURN_QUERY) + cursor = QueryCursor(q) + captures = cursor.captures(node) + return_nodes.extend(captures.get("return_stmt", [])) + return + except Exception: + pass + stack: list[Node] = [node] while stack: current = stack.pop() - if current.type == cs.TS_RETURN_STATEMENT: return_nodes.append(current) - stack.extend(reversed(current.children)) diff --git a/codebase_rag/parsers/lua/type_inference.py b/codebase_rag/parsers/lua/type_inference.py index 99a5515ba..92b910881 100644 --- a/codebase_rag/parsers/lua/type_inference.py +++ b/codebase_rag/parsers/lua/type_inference.py @@ -14,6 +14,12 @@ class LuaTypeInferenceEngine: + __slots__ = ( + "import_processor", + "function_registry", + "project_name", + ) + def __init__( self, import_processor: ImportProcessor, @@ -36,7 +42,7 @@ def build_local_variable_type_map( self._process_variable_declaration(current, module_qn, local_var_types) stack.extend(reversed(current.children)) - logger.debug(ls.LUA_VAR_TYPE_MAP_BUILT.format(count=len(local_var_types))) + logger.debug(ls.LUA_VAR_TYPE_MAP_BUILT, count=len(local_var_types)) return local_var_types def _process_variable_declaration( @@ -62,9 +68,7 @@ def _process_variable_declaration( func_calls[i], module_qn ): local_var_types[var_name] = var_type - logger.debug( - ls.LUA_VAR_INFERRED.format(var_name=var_name, var_type=var_type) - ) + logger.debug(ls.LUA_VAR_INFERRED, var_name=var_name, var_type=var_type) def _extract_var_names(self, assignment: TreeSitterNodeProtocol) -> list[str]: names: list[str] = [] @@ -110,11 +114,10 @@ def _infer_lua_variable_type_from_value( class_name, module_qn ): logger.debug( - ls.LUA_TYPE_INFERENCE_RETURN.format( - class_name=class_name, - method_name=method_name, - class_qn=class_qn, - ) + ls.LUA_TYPE_INFERENCE_RETURN, + class_name=class_name, + method_name=method_name, + class_qn=class_qn, ) return class_qn diff --git a/codebase_rag/parsers/py/ast_analyzer.py b/codebase_rag/parsers/py/ast_analyzer.py index ec663db4f..9aea42fc2 100644 --- a/codebase_rag/parsers/py/ast_analyzer.py +++ b/codebase_rag/parsers/py/ast_analyzer.py @@ -10,7 +10,14 @@ from ... import logs as lg from ...types_defs import LanguageQueries from ..js_ts.utils import find_method_in_ast as find_js_method_in_ast -from ..utils import safe_decode_text +from ..utils import get_cached_query, safe_decode_text, sorted_captures + +_PY_TRAVERSE_QUERY = ( + f"({cs.TS_PY_ASSIGNMENT}) @assignment " + f"({cs.TS_PY_LIST_COMPREHENSION}) @comprehension " + f"({cs.TS_PY_FOR_STATEMENT}) @for_stmt " + f"({cs.TS_PY_RETURN_STATEMENT}) @return_stmt" +) if TYPE_CHECKING: from collections.abc import Callable @@ -45,6 +52,7 @@ def _infer_instance_variable_types_from_assignments( class PythonAstAnalyzerMixin(_AstBase): + __slots__ = () queries: dict[cs.SupportedLanguage, LanguageQueries] module_qn_to_file_path: dict[str, Path] ast_cache: ASTCacheProtocol @@ -72,6 +80,8 @@ def _infer_method_call_return_type( @abstractmethod def _find_class_in_scope(self, class_name: str, module_qn: str) -> str | None: ... + _return_stmt_cache: dict[int, list[Node]] + def _traverse_single_pass( self, node: Node, local_var_types: dict[str, str], module_qn: str ) -> None: @@ -79,19 +89,35 @@ def _traverse_single_pass( comprehensions: list[Node] = [] for_statements: list[Node] = [] - stack: list[Node] = [node] - while stack: - current = stack.pop() - node_type = current.type - - if node_type == cs.TS_PY_ASSIGNMENT: - assignments.append(current) - elif node_type == cs.TS_PY_LIST_COMPREHENSION: - comprehensions.append(current) - elif node_type == cs.TS_PY_FOR_STATEMENT: - for_statements.append(current) - - stack.extend(reversed(current.children)) + py_lang_queries = self.queries.get(cs.SupportedLanguage.PYTHON) + py_lang_obj = py_lang_queries["language"] if py_lang_queries else None + if py_lang_obj is not None: + try: + q = get_cached_query(py_lang_obj, _PY_TRAVERSE_QUERY) + cursor = QueryCursor(q) + captures = cursor.captures(node) + assignments = captures.get("assignment", []) + comprehensions = captures.get("comprehension", []) + for_statements = captures.get("for_stmt", []) + if return_stmts := captures.get("return_stmt"): + self._return_stmt_cache[id(node)] = return_stmts + except Exception: + py_lang_obj = None + + if py_lang_obj is None: + stack: list[Node] = [node] + while stack: + current = stack.pop() + node_type = current.type + + if node_type == cs.TS_PY_ASSIGNMENT: + assignments.append(current) + elif node_type == cs.TS_PY_LIST_COMPREHENSION: + comprehensions.append(current) + elif node_type == cs.TS_PY_FOR_STATEMENT: + for_statements.append(current) + + stack.extend(reversed(current.children)) for assignment in assignments: self._process_assignment_simple(assignment, local_var_types, module_qn) @@ -140,7 +166,7 @@ def _process_assignment_simple( right_node, module_qn ): local_var_types[var_name] = inferred_type - logger.debug(lg.PY_TYPE_SIMPLE.format(var=var_name, type=inferred_type)) + logger.debug(lg.PY_TYPE_SIMPLE, var=var_name, type=inferred_type) def _process_assignment_complex( self, assignment_node: Node, local_var_types: dict[str, str], module_qn: str @@ -162,7 +188,7 @@ def _process_assignment_complex( right_node, module_qn, local_var_types ): local_var_types[var_name] = inferred_type - logger.debug(lg.PY_TYPE_COMPLEX.format(var=var_name, type=inferred_type)) + logger.debug(lg.PY_TYPE_COMPLEX, var=var_name, type=inferred_type) def _extract_assignment_variable_name(self, node: Node) -> str | None: if node.type != cs.TS_PY_IDENTIFIER or node.text is None: @@ -202,6 +228,32 @@ def _find_method_in_ast( case _: return None + def _find_class_node(self, class_qn: str) -> Node | None: + # (H) Locate a class definition node from its qualified name so cross-class + # (H) attribute/property types can be read when resolving chained calls. + module_qn, _, class_name = class_qn.rpartition(cs.SEPARATOR_DOT) + if not module_qn: + return None + file_path = self.module_qn_to_file_path.get(module_qn) + if not file_path or file_path not in self.ast_cache: + return None + root_node, language = self.ast_cache[file_path] + if language != cs.SupportedLanguage.PYTHON: + return None + lang_queries = self.queries[cs.SupportedLanguage.PYTHON] + class_query = lang_queries[cs.QUERY_KEY_CLASSES] + if not class_query: + return None + cursor = QueryCursor(class_query) + captures = sorted_captures(cursor, root_node) + for class_node in captures.get(cs.QUERY_CAPTURE_CLASS, []): + if not isinstance(class_node, Node): + continue + name_node = class_node.child_by_field_name(cs.TS_FIELD_NAME) + if name_node and safe_decode_text(name_node) == class_name: + return class_node + return None + def _find_python_method_in_ast( self, root_node: Node, class_name: str, method_name: str ) -> Node | None: @@ -210,7 +262,7 @@ def _find_python_method_in_ast( if not class_query: return None cursor = QueryCursor(class_query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) method_query = lang_queries[cs.QUERY_KEY_FUNCTIONS] if not method_query: @@ -232,7 +284,7 @@ def _find_python_method_in_ast( continue method_cursor = QueryCursor(method_query) - method_captures = method_cursor.captures(body_node) + method_captures = sorted_captures(method_cursor, body_node) for method_node in method_captures.get(cs.QUERY_CAPTURE_FUNCTION, []): if not isinstance(method_node, Node): @@ -272,13 +324,26 @@ def _analyze_method_return_statements( return None def _find_return_statements(self, node: Node, return_nodes: list[Node]) -> None: + cached = self._return_stmt_cache.get(id(node)) + if cached is not None: + return_nodes.extend(cached) + return + py_lang_queries = self.queries.get(cs.SupportedLanguage.PYTHON) + py_lang_obj = py_lang_queries["language"] if py_lang_queries else None + if py_lang_obj is not None: + try: + q = get_cached_query(py_lang_obj, cs.PY_RETURN_QUERY) + cursor = QueryCursor(q) + captures = cursor.captures(node) + return_nodes.extend(captures.get("return_stmt", [])) + return + except Exception: + pass stack: list[Node] = [node] - while stack: current = stack.pop() if current.type == cs.TS_PY_RETURN_STATEMENT: return_nodes.append(current) - stack.extend(reversed(current.children)) def _analyze_return_expression(self, expr_node: Node, method_qn: str) -> str | None: @@ -344,13 +409,11 @@ def _analyze_identifier_return(self, expr_node: Node, method_qn: str) -> str | N local_vars = self.build_local_variable_type_map(method_node, module_qn) if identifier in local_vars: logger.debug( - lg.PY_VAR_FROM_CONTEXT.format( - var=identifier, type=local_vars[identifier] - ) + lg.PY_VAR_FROM_CONTEXT, var=identifier, type=local_vars[identifier] ) return local_vars[identifier] - logger.debug(lg.PY_VAR_CANNOT_INFER.format(var=identifier)) + logger.debug(lg.PY_VAR_CANNOT_INFER, var=identifier) return None def _analyze_attribute_return(self, expr_node: Node, method_qn: str) -> str | None: diff --git a/codebase_rag/parsers/py/expression_analyzer.py b/codebase_rag/parsers/py/expression_analyzer.py index 81e0c28a2..73c159159 100644 --- a/codebase_rag/parsers/py/expression_analyzer.py +++ b/codebase_rag/parsers/py/expression_analyzer.py @@ -40,6 +40,7 @@ def _analyze_method_return_statements( class PythonExpressionAnalyzerMixin(_ExprBase): + __slots__ = () import_processor: ImportProcessor function_registry: FunctionRegistryTrieProtocol simple_name_lookup: SimpleNameLookup @@ -47,6 +48,7 @@ class PythonExpressionAnalyzerMixin(_ExprBase): ast_cache: ASTCacheProtocol _method_return_type_cache: dict[str, str | None] + _self_assignment_cache: dict[tuple[int, str], dict[str, str] | None] def _infer_type_from_expression(self, node: Node, module_qn: str) -> str | None: if node.type == cs.TS_PY_CALL: @@ -243,7 +245,7 @@ def _infer_method_return_type( return self._analyze_method_return_statements(method_node, method_qn) return None except Exception as e: - logger.debug(lg.PY_INFER_RETURN_FAILED.format(method=method_call, error=e)) + logger.debug(lg.PY_INFER_RETURN_FAILED, method=method_call, error=e) return None def _resolve_method_qualified_name( @@ -305,11 +307,10 @@ def _resolve_class_method( for qn in self.simple_name_lookup.get(class_name, []): if result := self._try_resolve_method(qn, method_name): logger.debug( - lg.PY_RESOLVED_METHOD.format( - class_name=class_name, - method_name=method_name, - method_qn=result, - ) + lg.PY_RESOLVED_METHOD, + class_name=class_name, + method_name=method_name, + method_qn=result, ) return result @@ -348,14 +349,22 @@ def _try_infer_from_self_assignments( if language != cs.SupportedLanguage.PYTHON: return None - instance_vars: dict[str, str] = {} - self._analyze_self_assignments(root_node, instance_vars, module_qn) + cache_key = (id(root_node), module_qn) + if cache_key in self._self_assignment_cache: + instance_vars = self._self_assignment_cache[cache_key] + else: + instance_vars = {} + self._analyze_self_assignments(root_node, instance_vars, module_qn) + self._self_assignment_cache[cache_key] = instance_vars or None + + if not instance_vars: + return None full_attr_name = f"{cs.PY_SELF_PREFIX}{attribute_name}" return instance_vars.get(full_attr_name) except Exception as e: - logger.debug(lg.PY_INFER_ATTR_FAILED.format(attr=attribute_name, error=e)) + logger.debug(lg.PY_INFER_ATTR_FAILED, attr=attribute_name, error=e) return None def _find_class_in_scope(self, class_name: str, module_qn: str) -> str | None: diff --git a/codebase_rag/parsers/py/type_inference.py b/codebase_rag/parsers/py/type_inference.py index 5908ee76a..ca9b9601a 100644 --- a/codebase_rag/parsers/py/type_inference.py +++ b/codebase_rag/parsers/py/type_inference.py @@ -30,6 +30,25 @@ class PythonTypeInferenceEngine( PythonAstAnalyzerMixin, PythonVariableAnalyzerMixin, ): + __slots__ = ( + "import_processor", + "function_registry", + "repo_path", + "project_name", + "ast_cache", + "queries", + "module_qn_to_file_path", + "class_inheritance", + "simple_name_lookup", + "_js_type_inference_getter", + "_method_return_type_cache", + "_type_inference_in_progress", + "_available_classes_cache", + "_return_stmt_cache", + "_self_assignment_cache", + "_class_member_type_cache", + ) + def __init__( self, import_processor: ImportProcessor, @@ -56,6 +75,10 @@ def __init__( self._method_return_type_cache: dict[str, str | None] = {} self._type_inference_in_progress: set[str] = set() + self._available_classes_cache: dict[str, list[str]] = {} + self._return_stmt_cache: dict[int, list] = {} + self._self_assignment_cache: dict[tuple[int, str], dict[str, str] | None] = {} + self._class_member_type_cache: dict[str, dict[str, str]] = {} def build_local_variable_type_map( self, caller_node: Node, module_qn: str @@ -66,8 +89,15 @@ def build_local_variable_type_map( self._infer_parameter_types(caller_node, local_var_types, module_qn) # (H) Single-pass traversal avoids O(5*N) multiple traversals for type inference. self._traverse_single_pass(caller_node, local_var_types, module_qn) + self._infer_instance_attributes_from_init( + caller_node, local_var_types, module_qn + ) + self._infer_property_return_types(caller_node, local_var_types, module_qn) + self._infer_class_annotation_types(caller_node, local_var_types, module_qn) + aliases = self._collect_local_aliases(caller_node) + self._expand_chained_attribute_types(local_var_types, module_qn, aliases) except Exception as e: - logger.debug(lg.PY_BUILD_VAR_MAP_FAILED.format(error=e)) + logger.debug(lg.PY_BUILD_VAR_MAP_FAILED, error=e) return local_var_types diff --git a/codebase_rag/parsers/py/variable_analyzer.py b/codebase_rag/parsers/py/variable_analyzer.py index 9a49f9a27..d0fe47220 100644 --- a/codebase_rag/parsers/py/variable_analyzer.py +++ b/codebase_rag/parsers/py/variable_analyzer.py @@ -3,12 +3,14 @@ from typing import TYPE_CHECKING, Protocol from loguru import logger +from tree_sitter import QueryCursor from ... import constants as cs from ... import logs as lg from ...types_defs import ASTNode, FunctionRegistryTrieProtocol, NodeType from ..import_processor import ImportProcessor -from ..utils import safe_decode_text +from ..utils import get_cached_query, safe_decode_text +from .utils import resolve_class_name if TYPE_CHECKING: @@ -17,14 +19,20 @@ def _infer_type_from_expression( self, node: ASTNode, module_qn: str ) -> str | None: ... + def _find_class_node(self, class_qn: str) -> ASTNode | None: ... + _VarBase: type = _VariableAnalyzerDeps else: _VarBase = object class PythonVariableAnalyzerMixin(_VarBase): + __slots__ = () import_processor: ImportProcessor function_registry: FunctionRegistryTrieProtocol + queries: dict[cs.SupportedLanguage, object] + _available_classes_cache: dict[str, list[str]] + _class_member_type_cache: dict[str, dict[str, str]] def _infer_parameter_types( self, caller_node: ASTNode, local_var_types: dict[str, str], module_qn: str @@ -61,9 +69,7 @@ def _process_untyped_parameter( ): return local_var_types[param_name] = inferred_type - logger.debug( - lg.PY_PARAM_TYPE_INFERRED.format(param=param_name, type=inferred_type) - ) + logger.debug(lg.PY_PARAM_TYPE_INFERRED, param=param_name, type=inferred_type) def _process_typed_parameter( self, param: ASTNode, local_var_types: dict[str, str] @@ -102,14 +108,14 @@ def _process_typed_default_parameter( def _infer_type_from_parameter_name( self, param_name: str, module_qn: str ) -> str | None: - logger.debug( - lg.PY_TYPE_INFER_ATTEMPT.format(param=param_name, module=module_qn) - ) + logger.debug(lg.PY_TYPE_INFER_ATTEMPT, param=param_name, module=module_qn) available_class_names = self._collect_available_classes(module_qn) - logger.debug(lg.PY_AVAILABLE_CLASSES.format(classes=available_class_names)) + logger.debug(lg.PY_AVAILABLE_CLASSES, classes=available_class_names) return self._find_best_class_match(param_name, available_class_names) def _collect_available_classes(self, module_qn: str) -> list[str]: + if module_qn in self._available_classes_cache: + return self._available_classes_cache[module_qn] available_class_names: list[str] = [] for qn, node_type in self.function_registry.find_with_prefix(module_qn): if node_type != NodeType.CLASS: @@ -118,6 +124,7 @@ def _collect_available_classes(self, module_qn: str) -> list[str]: available_class_names.append(qn.split(cs.SEPARATOR_DOT)[-1]) if module_qn not in self.import_processor.import_mapping: + self._available_classes_cache[module_qn] = available_class_names return available_class_names for local_name, imported_qn in self.import_processor.import_mapping[ @@ -126,6 +133,7 @@ def _collect_available_classes(self, module_qn: str) -> list[str]: if self.function_registry.get(imported_qn) == NodeType.CLASS: available_class_names.append(local_name) + self._available_classes_cache[module_qn] = available_class_names return available_class_names def _find_best_class_match( @@ -142,9 +150,7 @@ def _find_best_class_match( best_match = class_name logger.debug( - lg.PY_BEST_MATCH.format( - param=param_name, match=best_match, score=highest_score - ) + lg.PY_BEST_MATCH, param=param_name, match=best_match, score=highest_score ) return best_match @@ -195,9 +201,7 @@ def _infer_loop_var_from_iterable( right_node, local_var_types, module_qn ): local_var_types[loop_var] = element_type - logger.debug( - lg.PY_LOOP_VAR_INFERRED.format(var=loop_var, type=element_type) - ) + logger.debug(lg.PY_LOOP_VAR_INFERRED, var=loop_var, type=element_type) def _infer_iterable_element_type( self, iterable_node: ASTNode, local_var_types: dict[str, str], module_qn: str @@ -250,27 +254,286 @@ def _process_self_assignment( and (attr_name := left_text.decode(cs.ENCODING_UTF8)).startswith( cs.PY_SELF_PREFIX ) - and ( - assigned_type := self._infer_type_from_expression(right_node, module_qn) - ) ): return + assigned_type = self._infer_type_from_expression(right_node, module_qn) + if not assigned_type and right_node.type == cs.TS_PY_IDENTIFIER: + # (H) self.x = param: a bare identifier carries the type of the matching + # (H) (already-seeded) parameter or local, so flow it onto the attribute. + ident = safe_decode_text(right_node) + assigned_type = local_var_types.get(ident) if ident else None + if not assigned_type: + return local_var_types[attr_name] = assigned_type - logger.debug( - lg.PY_INSTANCE_VAR_INFERRED.format(attr=attr_name, type=assigned_type) - ) + logger.debug(lg.PY_INSTANCE_VAR_INFERRED, attr=attr_name, type=assigned_type) def _analyze_self_assignments( self, node: ASTNode, local_var_types: dict[str, str], module_qn: str ) -> None: + py_lang_queries = self.queries.get(cs.SupportedLanguage.PYTHON) + py_lang_obj = py_lang_queries["language"] if py_lang_queries else None + if py_lang_obj is not None: + try: + q = get_cached_query(py_lang_obj, cs.PY_ASSIGNMENT_QUERY) + cursor = QueryCursor(q) + captures = cursor.captures(node) + for assign_node in captures.get("assignment", []): + self._process_self_assignment( + assign_node, local_var_types, module_qn + ) + return + except Exception: + pass stack: list[ASTNode] = [node] - while stack: current = stack.pop() if current.type == cs.TS_PY_ASSIGNMENT: self._process_self_assignment(current, local_var_types, module_qn) stack.extend(reversed(current.children)) + def _enclosing_class_node(self, node: ASTNode) -> ASTNode | None: + current = node.parent + while current is not None: + if current.type == cs.TS_PY_CLASS_DEFINITION: + return current + current = current.parent + return None + + def _find_init_method_node(self, class_node: ASTNode) -> ASTNode | None: + body = class_node.child_by_field_name(cs.FIELD_BODY) + if body is None: + return None + for child in body.children: + if child.type == cs.TS_PY_DECORATED_DEFINITION: + func = next( + ( + c + for c in child.children + if c.type == cs.TS_PY_FUNCTION_DEFINITION + ), + None, + ) + elif child.type == cs.TS_PY_FUNCTION_DEFINITION: + func = child + else: + continue + if func is None: + continue + name_node = func.child_by_field_name(cs.FIELD_NAME) + if ( + name_node + and (text := name_node.text) + and text.decode(cs.ENCODING_UTF8) == cs.PY_METHOD_INIT + ): + return func + return None + + def _infer_instance_attributes_from_init( + self, caller_node: ASTNode, local_var_types: dict[str, str], module_qn: str + ) -> None: + # (H) Instance attributes are assigned in __init__ (self.x = T()), so a method + # (H) that only reads self.x has no local assignment to infer from. Scan the + # (H) enclosing class's __init__ and seed the attribute types, letting any + # (H) reassignment in the calling method itself take precedence (setdefault). + if (class_node := self._enclosing_class_node(caller_node)) is None: + return + init_node = self._find_init_method_node(class_node) + if init_node is None or init_node is caller_node: + return + init_types: dict[str, str] = {} + # (H) Seed __init__ parameter types first so self.x = param flows the + # (H) parameter annotation onto the attribute. + self._infer_parameter_types(init_node, init_types, module_qn) + self._analyze_self_assignments(init_node, init_types, module_qn) + for attr, attr_type in init_types.items(): + if attr.startswith(cs.PY_SELF_PREFIX): + local_var_types.setdefault(attr, attr_type) + + def _has_property_decorator(self, decorated_node: ASTNode) -> bool: + for child in decorated_node.children: + if child.type == cs.TS_PY_DECORATOR and (text := child.text): + tail = ( + text.decode(cs.ENCODING_UTF8) + .lstrip(cs.DECORATOR_AT) + .split(cs.SEPARATOR_DOT)[-1] + ) + if tail in cs.PROPERTY_DECORATORS: + return True + return False + + def _infer_property_return_types( + self, caller_node: ASTNode, local_var_types: dict[str, str], module_qn: str + ) -> None: + # (H) self.prop where prop is an @property has the property's declared return + # (H) type, so a chained call self.prop.method() can resolve against the + # (H) returned class rather than an ambiguous same-named method elsewhere. + if (class_node := self._enclosing_class_node(caller_node)) is None: + return + self._collect_property_return_types(class_node, local_var_types) + + def _collect_property_return_types( + self, class_node: ASTNode, out: dict[str, str] + ) -> None: + body = class_node.child_by_field_name(cs.FIELD_BODY) + if body is None: + return + for child in body.children: + if child.type != cs.TS_PY_DECORATED_DEFINITION: + continue + if not self._has_property_decorator(child): + continue + func = next( + (c for c in child.children if c.type == cs.TS_PY_FUNCTION_DEFINITION), + None, + ) + if func is None: + continue + name_node = func.child_by_field_name(cs.FIELD_NAME) + return_node = func.child_by_field_name(cs.FIELD_RETURN_TYPE) + if not ( + name_node + and (name_text := name_node.text) + and return_node + and (return_text := return_node.text) + ): + continue + # (H) The return_type field wraps a type node; only a bare class name (not + # (H) a union, subscripted generic, or string forward ref) seeds a type. + return_type = return_text.decode(cs.ENCODING_UTF8) + if return_type.isidentifier(): + out.setdefault( + f"{cs.PY_SELF_PREFIX}{name_text.decode(cs.ENCODING_UTF8)}", + return_type, + ) + + def _infer_class_annotation_types( + self, caller_node: ASTNode, local_var_types: dict[str, str], module_qn: str + ) -> None: + # (H) A class-level annotation (_handler: LanguageHandler) declares the type of + # (H) an instance attribute even when it is assigned from a factory call whose + # (H) return type cannot be inferred, so seed self. from the annotation. + if (class_node := self._enclosing_class_node(caller_node)) is None: + return + self._collect_class_annotation_types(class_node, local_var_types) + + def _collect_class_annotation_types( + self, class_node: ASTNode, out: dict[str, str] + ) -> None: + body = class_node.child_by_field_name(cs.FIELD_BODY) + if body is None: + return + for child in body.children: + if child.type != cs.TS_PY_EXPRESSION_STATEMENT: + continue + assignment = child.children[0] if child.children else None + if assignment is None or assignment.type != cs.TS_PY_ASSIGNMENT: + continue + left_node = assignment.child_by_field_name(cs.TS_FIELD_LEFT) + type_node = assignment.child_by_field_name(cs.TS_FIELD_TYPE) + if not ( + left_node + and left_node.type == cs.TS_PY_IDENTIFIER + and type_node + and (name := safe_decode_text(left_node)) + and (type_text := safe_decode_text(type_node)) + and type_text.isidentifier() + ): + continue + out.setdefault(f"{cs.PY_SELF_PREFIX}{name}", type_text) + + def _expand_chained_attribute_types( + self, + local_var_types: dict[str, str], + module_qn: str, + aliases: dict[str, str] | None = None, + max_depth: int = 4, + ) -> None: + # (H) A chained reference a.b.c needs the type of a.b (member b on a's class). + # (H) Each pass: (1) propagate local aliases (x = ref) from the referent's type, + # (H) then (2) for every typed ref, seed ref.member -> member type (full QN), so + # (H) deeper chains and aliases resolve on the next pass until a fixpoint. + aliases = aliases or {} + for _ in range(max_depth): + added = False + for local, referent in aliases.items(): + if local not in local_var_types and ( + referent_type := local_var_types.get(referent) + ): + local_var_types[local] = referent_type + added = True + for ref, type_name in list(local_var_types.items()): + class_qn = self._class_qn_of_type(type_name, module_qn) + if not class_qn: + continue + for member, member_type in self._class_member_types_by_qn( + class_qn + ).items(): + key = f"{ref}{cs.SEPARATOR_DOT}{member}" + if key not in local_var_types: + local_var_types[key] = member_type + added = True + if not added: + break + + def _collect_local_aliases(self, caller_node: ASTNode) -> dict[str, str]: + # (H) Record local-variable aliases (resolver = self._resolver) where the rhs is + # (H) a plain name/attribute reference, so its type can be propagated. Skip + # (H) nested scopes and any rhs that is a call/subscript/other expression. + aliases: dict[str, str] = {} + boundary = (cs.TS_PY_FUNCTION_DEFINITION, cs.TS_PY_CLASS_DEFINITION) + stack: list[ASTNode] = list(caller_node.children) + while stack: + node = stack.pop() + if node.type in boundary: + continue + if node.type == cs.TS_PY_ASSIGNMENT: + left = node.child_by_field_name(cs.TS_FIELD_LEFT) + right = node.child_by_field_name(cs.TS_FIELD_RIGHT) + if ( + left is not None + and left.type == cs.TS_PY_IDENTIFIER + and right is not None + and right.type in (cs.TS_PY_IDENTIFIER, cs.TS_PY_ATTRIBUTE) + and (local := safe_decode_text(left)) + and (referent := safe_decode_text(right)) + and local not in aliases + ): + aliases[local] = referent + stack.extend(node.children) + return aliases + + def _class_qn_of_type(self, type_name: str, module_qn: str) -> str | None: + if cs.SEPARATOR_DOT in type_name: + return type_name + return resolve_class_name( + type_name, module_qn, self.import_processor, self.function_registry + ) + + def _class_member_types_by_qn(self, class_qn: str) -> dict[str, str]: + if class_qn in self._class_member_type_cache: + return self._class_member_type_cache[class_qn] + members: dict[str, str] = {} + class_node = self._find_class_node(class_qn) + if class_node is not None: + class_module_qn = class_qn.rpartition(cs.SEPARATOR_DOT)[0] + raw: dict[str, str] = {} + self._collect_property_return_types(class_node, raw) + self._collect_class_annotation_types(class_node, raw) + if (init_node := self._find_init_method_node(class_node)) is not None: + init_types: dict[str, str] = {} + self._infer_parameter_types(init_node, init_types, class_module_qn) + self._analyze_self_assignments(init_node, init_types, class_module_qn) + for attr, attr_type in init_types.items(): + raw.setdefault(attr, attr_type) + for attr, attr_type in raw.items(): + if not attr.startswith(cs.PY_SELF_PREFIX): + continue + member = attr[len(cs.PY_SELF_PREFIX) :] + resolved = self._class_qn_of_type(attr_type, class_module_qn) + members[member] = resolved or attr_type + self._class_member_type_cache[class_qn] = members + return members + def _infer_variable_element_type( self, var_name: str, local_var_types: dict[str, str], module_qn: str ) -> str | None: diff --git a/codebase_rag/parsers/rs/utils.py b/codebase_rag/parsers/rs/utils.py index 64cc84cf6..99743e758 100644 --- a/codebase_rag/parsers/rs/utils.py +++ b/codebase_rag/parsers/rs/utils.py @@ -137,12 +137,9 @@ def _process_scoped_use_list( _process_use_tree(child, final_base, imports) -def extract_impl_target(impl_node: Node) -> str | None: - if impl_node.type != cs.TS_IMPL_ITEM: - return None - +def _impl_field_type_name(impl_node: Node, field: str) -> str | None: for i in range(impl_node.child_count): - if impl_node.field_name_for_child(i) == cs.FIELD_TYPE: + if impl_node.field_name_for_child(i) == field: type_node = impl_node.child(i) if type_node is None: continue @@ -151,7 +148,7 @@ def extract_impl_target(impl_node: Node) -> str | None: for child in type_node.children: if child.type == cs.TS_TYPE_IDENTIFIER: return safe_decode_text(child) - case cs.TS_TYPE_IDENTIFIER: + case cs.TS_TYPE_IDENTIFIER | cs.TS_RS_PRIMITIVE_TYPE: return safe_decode_text(type_node) case cs.TS_RS_SCOPED_TYPE_IDENTIFIER: for child in type_node.children: @@ -162,6 +159,20 @@ def extract_impl_target(impl_node: Node) -> str | None: return None +def extract_impl_target(impl_node: Node) -> str | None: + if impl_node.type != cs.TS_IMPL_ITEM: + return None + return _impl_field_type_name(impl_node, cs.FIELD_TYPE) + + +def extract_impl_trait(impl_node: Node) -> str | None: + # (H) The `trait` field of `impl Trait for Type` -> the implemented trait's + # (H) simple name (a trait impl means Type IMPLEMENTS Trait). + if impl_node.type != cs.TS_IMPL_ITEM: + return None + return _impl_field_type_name(impl_node, cs.FIELD_TRAIT) + + def extract_use_imports(use_node: Node) -> dict[str, str]: if use_node.type != cs.TS_USE_DECLARATION: return {} diff --git a/codebase_rag/parsers/stdlib_extractor.py b/codebase_rag/parsers/stdlib_extractor.py index fbcbddd4c..52fc5d219 100644 --- a/codebase_rag/parsers/stdlib_extractor.py +++ b/codebase_rag/parsers/stdlib_extractor.py @@ -42,7 +42,7 @@ def _is_tool_available(tool_name: str) -> bool: subprocess.CalledProcessError, ): _EXTERNAL_TOOLS[tool_name] = False - logger.debug(ls.IMP_TOOL_NOT_AVAILABLE.format(tool=tool_name)) + logger.debug(ls.IMP_TOOL_NOT_AVAILABLE, tool=tool_name) return False @@ -77,9 +77,9 @@ def load_persistent_cache() -> None: data = json.load(f) _STDLIB_CACHE.update(data.get(cs.IMPORT_CACHE_KEY, {})) _CACHE_TIMESTAMPS.update(data.get(cs.IMPORT_TIMESTAMPS_KEY, {})) - logger.debug(ls.IMP_CACHE_LOADED.format(path=cache_file)) + logger.debug(ls.IMP_CACHE_LOADED, path=cache_file) except (json.JSONDecodeError, OSError) as e: - logger.debug(ls.IMP_CACHE_LOAD_ERROR.format(error=e)) + logger.debug(ls.IMP_CACHE_LOAD_ERROR, error=e) def save_persistent_cache() -> None: @@ -97,9 +97,9 @@ def save_persistent_cache() -> None: f, indent=2, ) - logger.debug(ls.IMP_CACHE_SAVED.format(path=cache_file)) + logger.debug(ls.IMP_CACHE_SAVED, path=cache_file) except OSError as e: - logger.debug(ls.IMP_CACHE_SAVE_ERROR.format(error=e)) + logger.debug(ls.IMP_CACHE_SAVE_ERROR, error=e) def flush_stdlib_cache() -> None: @@ -115,7 +115,7 @@ def clear_stdlib_cache() -> None: cache_file.unlink() logger.debug(ls.IMP_CACHE_CLEARED) except OSError as e: - logger.debug(ls.IMP_CACHE_CLEAR_ERROR.format(error=e)) + logger.debug(ls.IMP_CACHE_CLEAR_ERROR, error=e) def get_stdlib_cache_stats() -> StdlibCacheStats: @@ -130,6 +130,8 @@ def get_stdlib_cache_stats() -> StdlibCacheStats: class StdlibExtractor: + __slots__ = ("function_registry", "repo_path", "project_name") + def __init__( self, function_registry: FunctionRegistryTrieProtocol | None = None, @@ -248,7 +250,7 @@ def _resolve_python_entity_module_path( result = ( cs.SEPARATOR_DOT.join(parts[:-1]) - if entity_name[0].isupper() + if entity_name[:1].isupper() else full_qualified_name ) _cache_stdlib_result(cs.SupportedLanguage.PYTHON, full_qualified_name, result) @@ -330,15 +332,16 @@ def _resolve_js_entity_module_path( ): pass - result = ( - cs.SEPARATOR_DOT.join(parts[:-1]) - if entity_name[0].isupper() - else full_qualified_name - ) + result = cs.SEPARATOR_DOT.join(parts[:-1]) _cache_stdlib_result(cs.SupportedLanguage.JS, full_qualified_name, result) return result def _extract_go_stdlib_path(self, full_qualified_name: str) -> str: + if cached := _get_cached_stdlib_result( + cs.SupportedLanguage.GO, full_qualified_name + ): + return cached + parts = full_qualified_name.split(cs.SEPARATOR_SLASH) if len(parts) >= 2: try: @@ -453,6 +456,11 @@ def _extract_go_stdlib_path(self, full_qualified_name: str) -> str: if proc.returncode == 0: data = json.loads(stdout.strip()) if data[cs.JSON_KEY_HAS_ENTITY]: + _cache_stdlib_result( + cs.SupportedLanguage.GO, + full_qualified_name, + package_path, + ) return package_path except ( @@ -464,219 +472,112 @@ def _extract_go_stdlib_path(self, full_qualified_name: str) -> str: pass entity_name = parts[-1] - if entity_name[0].isupper(): - return cs.SEPARATOR_SLASH.join(parts[:-1]) + if entity_name[:1].isupper(): + result = cs.SEPARATOR_SLASH.join(parts[:-1]) + _cache_stdlib_result( + cs.SupportedLanguage.GO, full_qualified_name, result + ) + return result + _cache_stdlib_result( + cs.SupportedLanguage.GO, full_qualified_name, full_qualified_name + ) return full_qualified_name def _extract_rust_stdlib_path(self, full_qualified_name: str) -> str: + if cached := _get_cached_stdlib_result( + cs.SupportedLanguage.RUST, full_qualified_name + ): + return cached + parts = full_qualified_name.split(cs.SEPARATOR_DOUBLE_COLON) if len(parts) >= 2: entity_name = parts[-1] if ( - entity_name[0].isupper() + entity_name[:1].isupper() or entity_name.isupper() or (cs.CHAR_UNDERSCORE not in entity_name and entity_name.islower()) ): - return cs.SEPARATOR_DOUBLE_COLON.join(parts[:-1]) + result = cs.SEPARATOR_DOUBLE_COLON.join(parts[:-1]) + _cache_stdlib_result( + cs.SupportedLanguage.RUST, full_qualified_name, result + ) + return result + _cache_stdlib_result( + cs.SupportedLanguage.RUST, full_qualified_name, full_qualified_name + ) return full_qualified_name def _extract_cpp_stdlib_path(self, full_qualified_name: str) -> str: + if cached := _get_cached_stdlib_result( + cs.SupportedLanguage.CPP, full_qualified_name + ): + return cached + parts = full_qualified_name.split(cs.SEPARATOR_DOUBLE_COLON) if len(parts) >= 2: namespace = parts[0] if namespace == cs.CPP_STD_NAMESPACE: entity_name = parts[-1] - - try: - import os - import subprocess - import tempfile - - with tempfile.NamedTemporaryFile( - mode="w", suffix=".txt", delete=False - ) as f: - f.write(entity_name) - entity_file = f.name - - try: - cpp_template_program = f""" -#include -#include -#include - -int main() {{ - std::ifstream file("{entity_file}"); - std::string entity_name; - std::getline(file, entity_name); - file.close(); - - // This is a compile-time check strategy - we can't dynamically construct templates - // Fall back to heuristic approach for safety - std::cout << "heuristic_check" << std::endl; - return 0; -}} - """ - - subprocess.run( - ["g++", "-std=c++17", "-x", "c++", "-", "-o", "/dev/null"], - check=False, - input=cpp_template_program, - capture_output=True, - text=True, - timeout=5, - ) - - finally: - os.unlink(entity_file) - - except ( - subprocess.TimeoutExpired, - subprocess.CalledProcessError, - OSError, - ): - pass - - entity_name = parts[-1] if ( - entity_name[0].isupper() + entity_name[:1].isupper() or entity_name.startswith(cs.CPP_PREFIX_IS) or entity_name.startswith(cs.CPP_PREFIX_HAS) or entity_name in cs.CPP_STDLIB_ENTITIES ): - return cs.SEPARATOR_DOUBLE_COLON.join(parts[:-1]) + result = cs.SEPARATOR_DOUBLE_COLON.join(parts[:-1]) + _cache_stdlib_result( + cs.SupportedLanguage.CPP, full_qualified_name, result + ) + return result + _cache_stdlib_result( + cs.SupportedLanguage.CPP, full_qualified_name, full_qualified_name + ) return full_qualified_name def _extract_java_stdlib_path(self, full_qualified_name: str) -> str: + cached_result = _get_cached_stdlib_result( + cs.SupportedLanguage.JAVA, full_qualified_name + ) + if cached_result is not None: + return cached_result + parts = full_qualified_name.split(cs.SEPARATOR_DOT) if len(parts) >= 2: - try: - import os - import subprocess - import tempfile - - package_name = cs.SEPARATOR_DOT.join(parts[:-1]) - entity_name = parts[-1] - - java_program = """ -import java.lang.reflect.*; - -public class StdlibCheck { - public static void main(String[] args) { - if (args.length < 2) { - System.out.println("{\\"hasEntity\\": false}"); - return; - } - - String packageName = args[0]; - String entityName = args[1]; - - try { - Class clazz = Class.forName(packageName + "." + entityName); - System.out.println("{\\"hasEntity\\": true, \\"entityType\\": \\"class\\"}"); - } catch (ClassNotFoundException e) { - // Try as method or field in parent package - try { - Class packageClass = Class.forName(packageName); - Method[] methods = packageClass.getMethods(); - Field[] fields = packageClass.getFields(); - - boolean foundMethod = false; - for (Method method : methods) { - if (method.getName().equals(entityName)) { - foundMethod = true; - break; - } - } - - boolean foundField = false; - for (Field field : fields) { - if (field.getName().equals(entityName)) { - foundField = true; - break; - } - } - - if (foundMethod || foundField) { - System.out.println("{\\"hasEntity\\": true, \\"entityType\\": \\"member\\"}"); - } else { - System.out.println("{\\"hasEntity\\": false}"); - } - } catch (Exception ex) { - System.out.println("{\\"hasEntity\\": false}"); - } - } - } -} - """ - - with tempfile.NamedTemporaryFile( - mode="w", suffix=".java", delete=False - ) as f: - f.write(java_program) - java_file = f.name - - try: - compile_result = subprocess.run( - ["javac", java_file], - check=False, - capture_output=True, - text=True, - timeout=10, - ) - - if compile_result.returncode == 0: - class_name = os.path.splitext(os.path.basename(java_file))[0] - run_result = subprocess.run( - [ - "java", - "-cp", - os.path.dirname(java_file), - class_name, - package_name, - entity_name, - ], - check=False, - capture_output=True, - text=True, - timeout=10, - ) - - if run_result.returncode == 0: - data = json.loads(run_result.stdout.strip()) - if data.get(cs.JSON_KEY_HAS_ENTITY): - return cs.SEPARATOR_DOT.join(parts[:-1]) - - finally: - for ext in (cs.EXT_JAVA, cs.EXT_CLASS): - temp_file = os.path.splitext(java_file)[0] + ext - try: - os.unlink(temp_file) - except OSError: - pass - - except ( - subprocess.TimeoutExpired, - subprocess.CalledProcessError, - json.JSONDecodeError, - OSError, - ): - pass - entity_name = parts[-1] - if ( - entity_name[0].isupper() + is_class_entity = ( + entity_name[:1].isupper() or entity_name.endswith(cs.JAVA_SUFFIX_EXCEPTION) or entity_name.endswith(cs.JAVA_SUFFIX_ERROR) or entity_name.endswith(cs.JAVA_SUFFIX_INTERFACE) or entity_name.endswith(cs.JAVA_SUFFIX_BUILDER) or entity_name in cs.JAVA_STDLIB_CLASSES - ): - return cs.SEPARATOR_DOT.join(parts[:-1]) + ) + + if full_qualified_name.startswith(cs.JAVA_STDLIB_PREFIXES): + result = ( + cs.SEPARATOR_DOT.join(parts[:-1]) + if is_class_entity + else full_qualified_name + ) + _cache_stdlib_result( + cs.SupportedLanguage.JAVA, full_qualified_name, result + ) + return result + if is_class_entity: + result = cs.SEPARATOR_DOT.join(parts[:-1]) + _cache_stdlib_result( + cs.SupportedLanguage.JAVA, full_qualified_name, result + ) + return result + + _cache_stdlib_result( + cs.SupportedLanguage.JAVA, full_qualified_name, full_qualified_name + ) return full_qualified_name def _extract_lua_stdlib_path(self, full_qualified_name: str) -> str: @@ -750,7 +651,7 @@ def _extract_lua_stdlib_path(self, full_qualified_name: str) -> str: pass entity_name = parts[-1] - if entity_name[0].isupper() or entity_name in cs.LUA_STDLIB_MODULES: + if entity_name[:1].isupper() or entity_name in cs.LUA_STDLIB_MODULES: return cs.SEPARATOR_DOT.join(parts[:-1]) return full_qualified_name @@ -759,7 +660,7 @@ def _extract_generic_stdlib_path(self, full_qualified_name: str) -> str: parts = full_qualified_name.split(cs.SEPARATOR_DOT) if len(parts) >= 2: entity_name = parts[-1] - if entity_name[0].isupper(): + if entity_name[:1].isupper(): return cs.SEPARATOR_DOT.join(parts[:-1]) return full_qualified_name diff --git a/codebase_rag/parsers/structure_processor.py b/codebase_rag/parsers/structure_processor.py index 9b4065bd3..78b853773 100644 --- a/codebase_rag/parsers/structure_processor.py +++ b/codebase_rag/parsers/structure_processor.py @@ -6,10 +6,24 @@ from .. import logs from ..services import IngestorProtocol from ..types_defs import LanguageQueries, NodeIdentifier -from ..utils.path_utils import should_skip_path +from ..utils.path_utils import ( + cached_relative_path, + cached_resolve_posix, + should_skip_path, +) class StructureProcessor: + __slots__ = ( + "ingestor", + "repo_path", + "project_name", + "queries", + "structural_elements", + "unignore_paths", + "exclude_paths", + ) + def __init__( self, ingestor: IngestorProtocol, @@ -47,19 +61,18 @@ def identify_structure(self) -> None: ): directories.add(path) + package_indicators: set[str] = set() + for lang_queries in self.queries.values(): + lang_config = lang_queries[cs.QUERY_CONFIG] + package_indicators.update(lang_config.package_indicators) + for root in sorted(directories): - relative_root = root.relative_to(self.repo_path) + relative_root = cached_relative_path(root, self.repo_path) parent_rel_path = relative_root.parent parent_container_qn = self.structural_elements.get(parent_rel_path) is_package = False - package_indicators: set[str] = set() - - for lang_queries in self.queries.values(): - lang_config = lang_queries[cs.QUERY_CONFIG] - package_indicators.update(lang_config.package_indicators) - for indicator in package_indicators: if (root / indicator).exists(): is_package = True @@ -79,6 +92,7 @@ def identify_structure(self) -> None: cs.KEY_QUALIFIED_NAME: package_qn, cs.KEY_NAME: root.name, cs.KEY_PATH: relative_root.as_posix(), + cs.KEY_ABSOLUTE_PATH: cached_resolve_posix(root), }, ) parent_identifier = self._get_parent_identifier( @@ -96,7 +110,11 @@ def identify_structure(self) -> None: ) self.ingestor.ensure_node_batch( cs.NodeLabel.FOLDER, - {cs.KEY_PATH: relative_root.as_posix(), cs.KEY_NAME: root.name}, + { + cs.KEY_PATH: relative_root.as_posix(), + cs.KEY_NAME: root.name, + cs.KEY_ABSOLUTE_PATH: cached_resolve_posix(root), + }, ) parent_identifier = self._get_parent_identifier( parent_rel_path, parent_container_qn @@ -108,8 +126,8 @@ def identify_structure(self) -> None: ) def process_generic_file(self, file_path: Path, file_name: str) -> None: - relative_filepath = file_path.relative_to(self.repo_path).as_posix() - relative_root = file_path.parent.relative_to(self.repo_path) + relative_filepath = cached_relative_path(file_path, self.repo_path).as_posix() + relative_root = cached_relative_path(file_path.parent, self.repo_path) parent_container_qn = self.structural_elements.get(relative_root) parent_identifier = self._get_parent_identifier( @@ -122,6 +140,7 @@ def process_generic_file(self, file_path: Path, file_name: str) -> None: cs.KEY_PATH: relative_filepath, cs.KEY_NAME: file_name, cs.KEY_EXTENSION: file_path.suffix, + cs.KEY_ABSOLUTE_PATH: cached_resolve_posix(file_path), }, ) diff --git a/codebase_rag/parsers/type_inference.py b/codebase_rag/parsers/type_inference.py index 815e4af81..3c21c6cd3 100644 --- a/codebase_rag/parsers/type_inference.py +++ b/codebase_rag/parsers/type_inference.py @@ -8,6 +8,8 @@ LanguageQueries, SimpleNameLookup, ) +from .cpp import CppTypeInferenceEngine +from .go import GoTypeInferenceEngine from .import_processor import ImportProcessor from .java import JavaTypeInferenceEngine from .js_ts import JsTypeInferenceEngine @@ -19,6 +21,25 @@ class TypeInferenceEngine: + __slots__ = ( + "import_processor", + "function_registry", + "repo_path", + "project_name", + "ast_cache", + "queries", + "module_qn_to_file_path", + "class_inheritance", + "simple_name_lookup", + "class_field_types", + "_java_type_inference", + "_lua_type_inference", + "_js_type_inference", + "_python_type_inference", + "_go_type_inference", + "_cpp_type_inference", + ) + def __init__( self, import_processor: ImportProcessor, @@ -30,6 +51,7 @@ def __init__( module_qn_to_file_path: dict[str, Path], class_inheritance: dict[str, list[str]], simple_name_lookup: SimpleNameLookup, + class_field_types: dict[str, dict[str, str]] | None = None, ): self.import_processor = import_processor self.function_registry = function_registry @@ -40,11 +62,32 @@ def __init__( self.module_qn_to_file_path = module_qn_to_file_path self.class_inheritance = class_inheritance self.simple_name_lookup = simple_name_lookup + # (H) Must preserve the shared dict reference: the factory passes the + # (H) DefinitionProcessor's map, which is empty at construction and populated + # (H) later during ingestion. `or {}` would swap an empty dict for a new one and + # (H) silently lose every field type written afterward. + self.class_field_types = ( + class_field_types if class_field_types is not None else {} + ) self._java_type_inference: JavaTypeInferenceEngine | None = None self._lua_type_inference: LuaTypeInferenceEngine | None = None self._js_type_inference: JsTypeInferenceEngine | None = None self._python_type_inference: PythonTypeInferenceEngine | None = None + self._go_type_inference: GoTypeInferenceEngine | None = None + self._cpp_type_inference: CppTypeInferenceEngine | None = None + + @property + def go_type_inference(self) -> GoTypeInferenceEngine: + if self._go_type_inference is None: + self._go_type_inference = GoTypeInferenceEngine() + return self._go_type_inference + + @property + def cpp_type_inference(self) -> CppTypeInferenceEngine: + if self._cpp_type_inference is None: + self._cpp_type_inference = CppTypeInferenceEngine() + return self._cpp_type_inference @property def java_type_inference(self) -> JavaTypeInferenceEngine: @@ -80,6 +123,7 @@ def js_type_inference(self) -> JsTypeInferenceEngine: function_registry=self.function_registry, project_name=self.project_name, find_method_ast_node_func=self.python_type_inference._find_method_ast_node, + queries=self.queries, ) return self._js_type_inference @@ -101,6 +145,41 @@ def python_type_inference(self) -> PythonTypeInferenceEngine: return self._python_type_inference def build_local_variable_type_map( + self, + caller_node: ASTNode, + module_qn: str, + language: cs.SupportedLanguage, + class_context: str | None = None, + ) -> dict[str, str]: + local = self._build_local_variable_type_map(caller_node, module_qn, language) + # (H) When the caller is a method, overlay its class's member-field types as a + # (H) base so a bare `field_.method()` receiver resolves; parameters and locals + # (H) with the same name shadow a field, so the local map wins on conflict. + if class_context and (fields := self._collect_field_types(class_context)): + return {**fields, **local} + return local + + def _collect_field_types(self, class_qn: str) -> dict[str, str]: + # (H) Collect member-field types along the inheritance chain so a derived class + # (H) method can resolve a field inherited from a base. Bases are visited first + # (H) and the class's own fields applied last, so a derived field shadows a + # (H) base field of the same name. Guards against inheritance cycles. + fields: dict[str, str] = {} + seen: set[str] = set() + + def collect(qn: str) -> None: + if qn in seen: + return + seen.add(qn) + for base in self.class_inheritance.get(qn, []): + collect(base) + if own := self.class_field_types.get(qn): + fields.update(own) + + collect(class_qn) + return fields + + def _build_local_variable_type_map( self, caller_node: ASTNode, module_qn: str, language: cs.SupportedLanguage ) -> dict[str, str]: match language: @@ -110,7 +189,7 @@ def build_local_variable_type_map( ) case cs.SupportedLanguage.JS | cs.SupportedLanguage.TS: return self.js_type_inference.build_local_variable_type_map( - caller_node, module_qn + caller_node, module_qn, language ) case cs.SupportedLanguage.JAVA: return self.java_type_inference.build_variable_type_map( @@ -120,6 +199,14 @@ def build_local_variable_type_map( return self.lua_type_inference.build_local_variable_type_map( caller_node, module_qn ) + case cs.SupportedLanguage.GO: + return self.go_type_inference.build_local_variable_type_map( + caller_node, module_qn + ) + case cs.SupportedLanguage.CPP: + return self.cpp_type_inference.build_local_variable_type_map( + caller_node, module_qn + ) case _: return {} diff --git a/codebase_rag/parsers/utils.py b/codebase_rag/parsers/utils.py index b164a5022..470baefd3 100644 --- a/codebase_rag/parsers/utils.py +++ b/codebase_rag/parsers/utils.py @@ -2,6 +2,7 @@ from collections.abc import Callable from functools import lru_cache +from pathlib import Path from typing import TYPE_CHECKING, NamedTuple from loguru import logger @@ -17,18 +18,59 @@ SimpleNameLookup, TreeSitterNodeProtocol, ) +from ..utils.path_utils import cached_relative_path, cached_resolve_posix if TYPE_CHECKING: from ..language_spec import LanguageSpec from ..services import IngestorProtocol from ..types_defs import FunctionRegistryTrieProtocol +_QUERY_CACHE: dict[tuple[int, str], Query] = {} +_QUERY_LAST: tuple[tuple[int, str], Query] | None = None + + +def get_cached_query(language_obj, query_text: str) -> Query: + global _QUERY_LAST + key = (id(language_obj), query_text) + if _QUERY_LAST is not None and _QUERY_LAST[0] == key: + return _QUERY_LAST[1] + if key not in _QUERY_CACHE: + _QUERY_CACHE[key] = Query(language_obj, query_text) + result = _QUERY_CACHE[key] + _QUERY_LAST = (key, result) + return result + class FunctionCapturesResult(NamedTuple): lang_config: LanguageSpec captures: dict[str, list[ASTNode]] +def sorted_captures(cursor: QueryCursor, node: ASTNode) -> dict[str, list[ASTNode]]: + # (H) tree-sitter v0.25 captures() returns nodes in non-deterministic order + # (H) across process invocations; sort by start_byte for reproducibility + raw = cursor.captures(node) + result: dict[str, list[ASTNode]] = {} + for name, nodes in raw.items(): + if len(nodes) <= 1: + result[name] = nodes + else: + is_sorted = True + prev_byte = nodes[0].start_byte + for i in range(1, len(nodes)): + cur_byte = nodes[i].start_byte + if cur_byte < prev_byte: + is_sorted = False + break + prev_byte = cur_byte + result[name] = nodes if is_sorted else sorted(nodes, key=_start_byte_key) + return result + + +def _start_byte_key(n: ASTNode) -> int: + return n.start_byte + + def get_function_captures( root_node: ASTNode, language: cs.SupportedLanguage, @@ -41,11 +83,11 @@ def get_function_captures( return None cursor = QueryCursor(query) - captures = cursor.captures(root_node) + captures = sorted_captures(cursor, root_node) return FunctionCapturesResult(lang_config, captures) -@lru_cache(maxsize=10000) +@lru_cache(maxsize=50000) def _cached_decode_bytes(text_bytes: bytes) -> str: return text_bytes.decode(cs.ENCODING_UTF8) @@ -72,6 +114,126 @@ def contains_node(parent: ASTNode, target: ASTNode) -> bool: ) +def _decorator_tail_names(decorators: list[str]) -> set[str]: + return { + decorator.lstrip(cs.DECORATOR_AT).split(cs.SEPARATOR_DOT)[-1] + for decorator in decorators + } + + +def _is_property_decorator(decorators: list[str]) -> bool: + return bool(_decorator_tail_names(decorators) & cs.PROPERTY_DECORATORS) + + +def _is_abstract_decorator(decorators: list[str]) -> bool: + return bool(_decorator_tail_names(decorators) & cs.ABSTRACT_DECORATORS) + + +_PY_NAMED_PARAMETERS = frozenset( + {cs.TS_PY_DEFAULT_PARAMETER, cs.TS_PY_TYPED_DEFAULT_PARAMETER} +) +_PY_SCOPE_BOUNDARIES = frozenset( + { + cs.TS_PY_FUNCTION_DEFINITION, + cs.TS_PY_CLASS_DEFINITION, + cs.TS_PY_DECORATED_DEFINITION, + } +) + + +def _python_parameter_name(param_node: Node) -> str | None: + if param_node.type == cs.TS_PY_IDENTIFIER: + return safe_decode_text(param_node) + if param_node.type in _PY_NAMED_PARAMETERS: + name_node = param_node.child_by_field_name(cs.FIELD_NAME) + if name_node is not None and name_node.type == cs.TS_PY_IDENTIFIER: + return safe_decode_text(name_node) + return None + if param_node.type == cs.TS_PY_TYPED_PARAMETER: + for child in param_node.children: + if child.type == cs.TS_PY_IDENTIFIER: + return safe_decode_text(child) + return None + + +def _python_invoked_parameter_names(body_node: Node, candidates: set[str]) -> set[str]: + invoked: set[str] = set() + stack = [body_node] + while stack: + node = stack.pop() + if node.type == cs.TS_PY_CALL: + fn = node.child_by_field_name(cs.FIELD_FUNCTION) + if ( + fn is not None + and fn.type == cs.TS_PY_IDENTIFIER + and (name := safe_decode_text(fn)) in candidates + ): + invoked.add(name) + for child in node.children: + # (H) Nested def/class bodies rebind the param name, so do not let an + # (H) inner call to a same-named local masquerade as the outer param. + if child.type not in _PY_SCOPE_BOUNDARIES: + stack.append(child) + return invoked + + +def python_parameter_names(func_node: Node) -> list[str]: + # (H) Ordered parameter names with a leading self/cls dropped, so positions line + # (H) up with how call-site arguments map to parameters for bound methods. + params_node = func_node.child_by_field_name(cs.FIELD_PARAMETERS) + if params_node is None: + return [] + names: list[str] = [] + for child in params_node.named_children: + if (name := _python_parameter_name(child)) is not None: + names.append(name) + if names and names[0] in (cs.PY_KEYWORD_SELF, cs.PY_KEYWORD_CLS): + names = names[1:] + return names + + +def callable_parameter_indices( + func_node: Node, language: cs.SupportedLanguage | None +) -> dict[str, int]: + # (H) Maps each parameter that is invoked as a call inside the function body + # (H) to its positional index in the call-site argument list (self/cls + # (H) dropped so the index lines up with how bound methods are invoked). + if language != cs.SupportedLanguage.PYTHON: + return {} + body_node = func_node.child_by_field_name(cs.FIELD_BODY) + if body_node is None or not (names := python_parameter_names(func_node)): + return {} + + invoked = _python_invoked_parameter_names(body_node, set(names)) + if not invoked: + return {} + return {name: index for index, name in enumerate(names) if name in invoked} + + +def _js_ts_field_member_name( + node: ASTNode, language: cs.SupportedLanguage | None +) -> str | None: + # (H) The binding name of a JS/TS class-field arrow / fn-expr whose enclosing + # (H) field definition holds it as its `value` (`helper = () => ...`), so the + # (H) member is modelled as class_qn.helper. None for other languages/shapes. + if language not in (cs.SupportedLanguage.JS, cs.SupportedLanguage.TS): + return None + if node.type not in (cs.TS_ARROW_FUNCTION, cs.TS_FUNCTION_EXPRESSION): + return None + parent = node.parent + # (H) `==` not `is`: py-tree-sitter returns a fresh Node wrapper on each access, + # (H) so identity comparison always fails; Node equality compares the node id. + if parent is None or parent.child_by_field_name(cs.FIELD_VALUE) != node: + return None + name_node = parent.child_by_field_name(cs.FIELD_NAME) + if name_node is None or name_node.type not in ( + cs.TS_IDENTIFIER, + cs.TS_PROPERTY_IDENTIFIER, + ): + return None + return safe_decode_text(name_node) + + def ingest_method( method_node: ASTNode, container_qn: str, @@ -83,6 +245,8 @@ def ingest_method( language: cs.SupportedLanguage | None = None, extract_decorators_func: Callable[[ASTNode], list[str]] | None = None, method_qualified_name: str | None = None, + file_path: Path | None = None, + repo_path: Path | None = None, ) -> None: if language == cs.SupportedLanguage.CPP: from .cpp import utils as cpp_utils @@ -90,14 +254,22 @@ def ingest_method( method_name = cpp_utils.extract_function_name(method_node) if not method_name: return - elif not (method_name_node := method_node.child_by_field_name(cs.FIELD_NAME)): - return + elif (method_name_node := method_node.child_by_field_name(cs.FIELD_NAME)) is None: + # (H) A JS/TS class-field arrow / fn-expr (`helper = () => ...`) has no name + # (H) field on the function node; take the binding name from the enclosing + # (H) field definition so it is modelled as a member instead of dropped. + if not (method_name := _js_ts_field_member_name(method_node, language)): + return elif (text := method_name_node.text) is None: return else: method_name = text.decode(cs.ENCODING_UTF8) method_qn = method_qualified_name or f"{container_qn}.{method_name}" + if language != cs.SupportedLanguage.CPP: + method_qn = function_registry.register_unique_qn( + method_qn, method_node.start_point[0] + 1 + ) decorators = extract_decorators_func(method_node) if extract_decorators_func else [] @@ -109,14 +281,36 @@ def ingest_method( cs.KEY_END_LINE: method_node.end_point[0] + 1, cs.KEY_DOCSTRING: get_docstring_func(method_node), } + if file_path is not None and repo_path is not None: + method_props[cs.KEY_PATH] = cached_relative_path( + file_path, repo_path + ).as_posix() + method_props[cs.KEY_ABSOLUTE_PATH] = cached_resolve_posix(file_path) logger.info(logs.METHOD_FOUND.format(name=method_name, qn=method_qn)) ingestor.ensure_node_batch(cs.NodeLabel.METHOD, method_props) function_registry[method_qn] = NodeType.METHOD + if _is_property_decorator(decorators): + function_registry.mark_property(method_qn) + if _is_abstract_decorator(decorators): + function_registry.mark_abstract(method_qn) + function_registry.mark_callable_params( + method_qn, callable_parameter_indices(method_node, language) + ) simple_name_lookup[method_name].add(method_qn) + # (H) The DEFINES_METHOD parent is matched in the graph by LABEL + + # (H) qualified_name, so it must carry the container's real node label. Callers + # (H) pass Class by default, but a trait/interface (Interface) or enum (Enum) + # (H) container would then never match, dropping the containment edge. Prefer + # (H) the label the container was actually registered with. + container_label = container_type + registered = function_registry.get(container_qn) + if registered is not None and registered != NodeType.METHOD: + container_label = cs.NodeLabel(registered.value) + ingestor.ensure_relationship_batch( - (container_type, cs.KEY_QUALIFIED_NAME, container_qn), + (container_label, cs.KEY_QUALIFIED_NAME, container_qn), cs.RelationshipType.DEFINES_METHOD, (cs.NodeLabel.METHOD, cs.KEY_QUALIFIED_NAME, method_qn), ) @@ -137,6 +331,15 @@ def ingest_exported_function( return function_qn = f"{module_qn}.{function_name}" + # (H) The definition pass already ingests an exported function / const-arrow at + # (H) its natural qn. Re-registering here would collide and mint a spurious + # (H) `qn@line` duplicate node, onto which call resolution then binds (mangling + # (H) the callee qn). If the natural qn already exists, the node is done. + if function_qn in function_registry: + return + function_qn = function_registry.register_unique_qn( + function_qn, function_node.start_point[0] + 1 + ) function_props = { cs.KEY_QUALIFIED_NAME: function_qn, @@ -161,8 +364,21 @@ def is_method_node(func_node: ASTNode, lang_config: LanguageSpec) -> bool: if not isinstance(current, Node): return False - while current and current.type not in lang_config.module_node_types: - if current.type in lang_config.class_node_types: + class_types = lang_config.class_node_types + func_types = lang_config.function_node_types + module_types = lang_config.module_node_types + body_field = cs.FIELD_BODY + + while current is not None: + current_type = current.type + if current_type in module_types: + return False + if current_type in class_types: return True + if ( + current_type in func_types + and current.child_by_field_name(body_field) is not None + ): + return False current = current.parent return False diff --git a/codebase_rag/prompts.py b/codebase_rag/prompts.py index de5cce132..9eaae75b1 100644 --- a/codebase_rag/prompts.py +++ b/codebase_rag/prompts.py @@ -26,7 +26,6 @@ def extract_tool_names(tools: list["Tool"]) -> ToolNames: "query_codebase_knowledge_graph", "query_codebase_knowledge_graph" ), read_file=tool_map.get("read_file_content", "read_file_content"), - analyze_document=tool_map.get("analyze_document", "analyze_document"), semantic_search=tool_map.get("semantic_code_search", "semantic_code_search"), create_file=tool_map.get("create_new_file", "create_new_file"), edit_file=tool_map.get("replace_code_surgically", "replace_code_surgically"), @@ -40,7 +39,37 @@ def extract_tool_names(tools: list["Tool"]) -> ToolNames: - **Use `STARTS WITH` for Paths**: When matching paths, always use `STARTS WITH` for robustness (e.g., `WHERE n.path STARTS WITH 'workflows/src'`). Do not use `=`. - **Use `ENDS WITH` for qualified_name**: The `qualified_name` property contains full paths like `'Project.folder.subfolder.ClassName'`. When users mention a class, function, or method by its short name (e.g., "VatManager"), use `ENDS WITH` to match: `WHERE c.qualified_name ENDS WITH '.VatManager'`. Do NOT use `{name: 'VatManager'}` equality matching. - **Use `toLower()` for Searches**: For case-insensitive searching on string properties, use `toLower()`. -- **Querying Lists**: To check if a list property (like `decorators`) contains an item, use the `ANY` or `IN` clause (e.g., `WHERE 'flow' IN n.decorators`).""" +- **Querying Lists**: To check if a list property (like `decorators`) contains an item, use the `ANY` or `IN` clause (e.g., `WHERE 'flow' IN n.decorators`). +- **NEVER use unbounded variable-length paths**: Patterns like `[:CALLS*]`, `[*]`, `[:CALLS*1..]` enumerate every path in the graph and exhaust memory. Always cap with an upper bound, e.g. `[:CALLS*1..6]`. If you genuinely need unbounded reachability, use a MAGE procedure (see Section 2b) instead of variable-length Cypher. + +**2b. Graph Algorithm Procedures (MAGE)** + +For algorithmic questions (longest/shortest paths, cycles, recursion clusters, centrality, communities, reachability), prefer calling a MAGE procedure over writing variable-length Cypher. Cypher path patterns enumerate all matches with no memoization, so they OOM on cyclic graphs; MAGE procedures run real graph algorithms in bounded memory. + +Use these read-only procedures (call them with `CALL (...) YIELD ... RETURN ...`): + +- **Strongly connected components / recursion clusters**: `CALL nxalg.strongly_connected_components() YIELD components` +- **Weakly connected components**: `CALL weakly_connected_components.get() YIELD node, component_id` or `CALL wcc.get_components(nodes, edges)` +- **Cycles**: `CALL nxalg.simple_cycles() YIELD cycles` (all cycles), `CALL nxalg.find_cycle() YIELD cycle` (one cycle) +- **All simple paths between two nodes (bounded)**: `CALL nxalg.all_simple_paths(source, target, cutoff)` or `CALL algo.all_simple_paths(source, target, [:CALLS], maxHops)` +- **Shortest path**: `CALL nxalg.shortest_path(source, target)` or `CALL algo.astar(source, target, config)` +- **Reachability**: `CALL graph_util.ancestors(node)`, `CALL graph_util.descendants(node)` +- **Topological order (DAGs only)**: `CALL nxalg.topological_sort() YIELD nodes` or `CALL graph_util.topological_sort()` +- **PageRank**: `CALL pagerank.get() YIELD node, rank` or `CALL nxalg.pagerank() YIELD node, rank` +- **Betweenness centrality**: `CALL betweenness_centrality.get() YIELD node, betweenness_centrality` +- **Degree centrality**: `CALL degree_centrality.get() YIELD node, degree` +- **Communities**: `CALL community_detection.get() YIELD node, community_id`, `CALL leiden_community_detection.get() YIELD node, community_id` +- **Articulation / bridges**: `CALL bridges.get() YIELD ...`, `CALL nxalg.biconnected_components() YIELD nodes` +- **Dominators**: `CALL nxalg.immediate_dominators(start) YIELD node, dominator` +- **Path expansion (bounded BFS over filtered edges)**: `CALL path.expand(start, relationships, labels, minHops, maxHops) YIELD path` + +Important: MAGE procedures named `nxalg.*` and several others operate on the **entire graph**, ignoring edge-type filters. To restrict to a specific edge type (e.g., only `CALLS`), follow the procedure call with a `WHERE` clause that checks `EXISTS((a)-[:CALLS]->(b))` or use `path.expand` which accepts a relationship-type filter. + +**2c. When Cypher Can't Answer** + +If a question cannot be expressed as a bounded Cypher pattern or as a single MAGE procedure call (e.g., "longest call chain in a graph with cycles"), return your best bounded approximation rather than an unbounded path query. Examples: +- "longest call chain" → `CALL nxalg.strongly_connected_components() YIELD components RETURN components` (let the orchestrator post-process), or use `CALL path.expand` with a generous but finite `maxHops`. +- "find a deeply-nested call site" → use a bounded depth such as `[:CALLS*1..10]` with `ORDER BY ... LIMIT 1`.""" def build_graph_schema_and_rules() -> str: @@ -58,9 +87,41 @@ def build_graph_schema_and_rules() -> str: GRAPH_SCHEMA_AND_RULES = build_graph_schema_and_rules() -def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: +def _format_active_projects_block(active_projects: list[str] | None) -> str: + if not active_projects: + return ( + "\n**Project Scope**: This Memgraph database may contain multiple " + "indexed projects. Call `list_projects` early to enumerate them, then " + "scope graph queries by filtering on the `qualified_name` prefix " + "(e.g., `WHERE n.qualified_name STARTS WITH 'projectName.'`).\n" + ) + if len(active_projects) == 1: + return ( + f"\n**Project Scope**: This session is focused on the project " + f"`{active_projects[0]}`. Scope Cypher queries by filtering on " + f"`WHERE n.qualified_name STARTS WITH '{active_projects[0]}.'` " + "unless the user explicitly asks about other projects.\n" + ) + project_list = ", ".join(f"`{p}`" for p in active_projects) + starts_with_examples = " OR ".join( + f"n.qualified_name STARTS WITH '{p}.'" for p in active_projects + ) + return ( + f"\n**Project Scope**: This session spans the following projects: " + f"{project_list}. When users ask cross-project questions, query across " + "all of them. To restrict to one project, filter " + f"`n.qualified_name STARTS WITH '.'`. To restrict to the " + f"active set, filter with `{starts_with_examples}`.\n" + ) + + +def build_rag_orchestrator_prompt( + tools: list["Tool"], + project_instructions: str | None = None, + active_projects: list[str] | None = None, +) -> str: t = extract_tool_names(tools) - return f"""You are an expert AI assistant for analyzing codebases. Your answers are based **EXCLUSIVELY** on information retrieved using your tools. + base = f"""You are an expert AI assistant for analyzing codebases. Your answers are based **EXCLUSIVELY** on information retrieved using your tools. **CRITICAL RULES:** 1. **TOOL-ONLY ANSWERS**: You must ONLY use information from the tools provided. Do not use external knowledge. @@ -68,10 +129,10 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: 3. **HONESTY**: If a tool fails or returns no results, you MUST state that clearly and report any error messages. Do not invent answers. 4. **CHOOSE THE RIGHT TOOL FOR THE FILE TYPE**: - For source code files (.py, .ts, etc.), use `{t.read_file}`. - - For documents like PDFs, use the `{t.analyze_document}` tool. This is more effective than trying to read them as plain text. + - Images and PDFs the user references are attached inline to the message; read them directly from your own multimodal input. **Your General Approach:** -1. **Analyze Documents**: If the user asks a question about a document (like a PDF), you **MUST** use the `{t.analyze_document}` tool. Provide both the `file_path` and the user's `question` to the tool. +1. **Inspect Attached Media Directly**: When the user attaches an image or PDF, analyze it from the inline content of the message. Do not call a tool for it. 2. **Deep Dive into Code**: When you identify a relevant component (e.g., a folder), you must go beyond documentation. a. First, check if documentation files like `README.md` exist and read them for context. For configuration, look for files appropriate to the language (e.g., `pyproject.toml` for Python, `package.json` for Node.js). b. **Then, you MUST dive into the source code.** Explore the `src` directory (or equivalent). Identify and read key files (e.g., `main.py`, `index.ts`, `app.ts`) to understand the implementation details, logic, and functionality. @@ -128,6 +189,18 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: d. Prioritize most relevant findings over comprehensive coverage 8. **Synthesize Answer**: Analyze and explain the retrieved content. Cite your sources (file paths or qualified names). Report any errors gracefully. """ + base += _format_active_projects_block(active_projects) + extra = (project_instructions or "").strip() + if not extra: + return base + return ( + f"{base}\n" + "**Project-Specific Instructions (from .cgr.md):**\n" + "These instructions come from the repository being analyzed. Follow them " + "in addition to the rules above; if they conflict with the critical rules, " + "the critical rules win.\n\n" + f"{extra}\n" + ) CYPHER_SYSTEM_PROMPT = f""" @@ -196,6 +269,14 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: - CORRECT: `MATCH (c:Class) RETURN count(c) AS total` - WRONG: `MATCH (c:Class) RETURN c.name, count(c) AS total` (returns all items!) +**VALUE PATTERN RULES (CRITICAL FOR NAME MATCHING):** +- The `qualified_name` property contains FULL paths like: `'Project.folder.subfolder.ClassName'` +- When users mention a class or function by SHORT NAME (e.g., "VatManager", "UserService"), you MUST match using the `name` property, NOT `qualified_name`. +- CORRECT: `WHERE c.name = 'VatManager'` +- WRONG: `WHERE c.qualified_name = 'VatManager'` (will never match!) +- Use `DEFINES_METHOD` relationship to find methods of a class. +- Use `DEFINES` relationship to find functions/classes defined in a module. + **Examples:** * **Natural Language:** "How many classes are there?" @@ -235,7 +316,7 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: ``` * **Natural Language:** "What methods does UserService have?" or "Show me methods in UserService" or "List UserService methods" -* **Cypher Query (Use ENDS WITH to match class by short name):** +* **Cypher Query (Note: match by `name` property, use `DEFINES_METHOD` relationship):** ```cypher {CYPHER_EXAMPLE_CLASS_METHODS} ``` @@ -262,7 +343,7 @@ def build_rag_orchestrator_prompt(tools: list["Tool"]) -> str: Please: 1. Use your code retrieval and graph querying tools to understand the codebase structure 2. Read relevant source files to identify optimization opportunities -3. Use the analyze_document tool to reference best practices from {reference_document} +3. Reference best practices from {reference_document} (attached inline) 4. Reference established patterns and best practices for {language} 5. Propose specific, actionable optimizations with file references 6. IMPORTANT: Do not make any changes yet - just propose them and wait for approval diff --git a/codebase_rag/providers/base.py b/codebase_rag/providers/base.py index 37f5cb462..0716b5f38 100644 --- a/codebase_rag/providers/base.py +++ b/codebase_rag/providers/base.py @@ -6,8 +6,13 @@ import httpx from loguru import logger +from pydantic_ai.models.anthropic import AnthropicModel, AnthropicModelSettings from pydantic_ai.models.google import GoogleModel, GoogleModelSettings from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel +from pydantic_ai.providers.anthropic import ( + AnthropicProvider as PydanticAnthropicProvider, +) +from pydantic_ai.providers.azure import AzureProvider as PydanticAzureProvider from pydantic_ai.providers.google import GoogleProvider as PydanticGoogleProvider from pydantic_ai.providers.openai import OpenAIProvider as PydanticOpenAIProvider @@ -18,13 +23,15 @@ class ModelProvider(ABC): + __slots__ = ("config",) + def __init__(self, **config: str | int | None) -> None: self.config = config @abstractmethod def create_model( self, model_id: str, **kwargs: str | int | None - ) -> GoogleModel | OpenAIResponsesModel | OpenAIChatModel: + ) -> GoogleModel | OpenAIResponsesModel | OpenAIChatModel | AnthropicModel: pass @abstractmethod @@ -37,7 +44,25 @@ def provider_name(self) -> cs.Provider: pass +def _resolve_api_key(api_key: str | None, env_var: str) -> str | None: + env_key = os.environ.get(env_var) + if env_key: + return env_key + if api_key and api_key != cs.DEFAULT_API_KEY: + return api_key + return None + + class GoogleProvider(ModelProvider): + __slots__ = ( + "api_key", + "provider_type", + "project_id", + "region", + "service_account_file", + "thinking_budget", + ) + def __init__( self, api_key: str | None = None, @@ -49,7 +74,7 @@ def __init__( **kwargs: str | int | None, ) -> None: super().__init__(**kwargs) - self.api_key = api_key or os.environ.get(cs.ENV_GOOGLE_API_KEY) + self.api_key = _resolve_api_key(api_key, cs.ENV_GOOGLE_API_KEY) self.provider_type = provider_type self.project_id = project_id self.region = region @@ -98,6 +123,8 @@ def create_model(self, model_id: str, **kwargs: str | int | None) -> GoogleModel class OpenAIProvider(ModelProvider): + __slots__ = ("api_key", "endpoint") + def __init__( self, api_key: str | None = None, @@ -105,7 +132,7 @@ def __init__( **kwargs: str | int | None, ) -> None: super().__init__(**kwargs) - self.api_key = api_key or os.environ.get(cs.ENV_OPENAI_API_KEY) + self.api_key = _resolve_api_key(api_key, cs.ENV_OPENAI_API_KEY) self.endpoint = endpoint @property @@ -126,6 +153,8 @@ def create_model( class OllamaProvider(ModelProvider): + __slots__ = ("endpoint", "api_key") + def __init__( self, endpoint: str | None = None, @@ -155,12 +184,96 @@ def create_model( return OpenAIChatModel(model_id, provider=provider) +class AnthropicProvider(ModelProvider): + __slots__ = ("api_key",) + + def __init__( + self, + api_key: str | None = None, + **kwargs: str | int | None, + ) -> None: + super().__init__(**kwargs) + self.api_key = _resolve_api_key(api_key, cs.ENV_ANTHROPIC_API_KEY) + + @property + def provider_name(self) -> cs.Provider: + return cs.Provider.ANTHROPIC + + def validate_config(self) -> None: + if not self.api_key: + raise ValueError(ex.ANTHROPIC_NO_KEY) + + def create_model(self, model_id: str, **kwargs: str | int | None) -> AnthropicModel: + self.validate_config() + # (H) api_key is guaranteed to be set by validate_config + assert self.api_key is not None + provider = PydanticAnthropicProvider(api_key=self.api_key) + model_settings = AnthropicModelSettings( + anthropic_cache_instructions=True, + anthropic_cache_tool_definitions=True, + anthropic_cache_messages=True, + ) + return AnthropicModel(model_id, provider=provider, settings=model_settings) + + +class AzureOpenAIProvider(ModelProvider): + __slots__ = ("api_key", "endpoint", "api_version") + + def __init__( + self, + api_key: str | None = None, + endpoint: str | None = None, + api_version: str | None = None, + **kwargs: str | int | None, + ) -> None: + super().__init__(**kwargs) + self.api_key = _resolve_api_key(api_key, cs.ENV_AZURE_API_KEY) + self.endpoint = endpoint or os.environ.get(cs.ENV_AZURE_ENDPOINT) + self.api_version = api_version or os.environ.get(cs.ENV_AZURE_API_VERSION) + + @property + def provider_name(self) -> cs.Provider: + return cs.Provider.AZURE + + def validate_config(self) -> None: + if not self.api_key: + raise ValueError(ex.AZURE_NO_KEY) + if not self.endpoint: + raise ValueError(ex.AZURE_NO_ENDPOINT) + + def create_model( + self, model_id: str, **kwargs: str | int | None + ) -> OpenAIChatModel: + self.validate_config() + # (H) api_key and endpoint are guaranteed to be set by validate_config + assert self.api_key is not None + assert self.endpoint is not None + provider = PydanticAzureProvider( + api_key=self.api_key, + azure_endpoint=self.endpoint, + api_version=self.api_version, + ) + return OpenAIChatModel(model_id, provider=provider) + + PROVIDER_REGISTRY: dict[str, type[ModelProvider]] = { cs.Provider.GOOGLE: GoogleProvider, cs.Provider.OPENAI: OpenAIProvider, cs.Provider.OLLAMA: OllamaProvider, + cs.Provider.ANTHROPIC: AnthropicProvider, + cs.Provider.AZURE: AzureOpenAIProvider, } +# (H) Import LiteLLM provider after base classes are defined to avoid circular import +try: + from .litellm import LiteLLMProvider + + PROVIDER_REGISTRY[cs.Provider.LITELLM_PROXY] = LiteLLMProvider + _litellm_available = True +except ImportError as e: + logger.debug(f"LiteLLM provider not available: {e}") + _litellm_available = False + def get_provider( provider_name: str | cs.Provider, **config: str | int | None @@ -207,3 +320,29 @@ def check_ollama_running(endpoint: str | None = None) -> bool: return response.status_code == cs.HTTP_OK except (httpx.RequestError, httpx.TimeoutException): return False + + +def check_litellm_proxy_running( + endpoint: str = "http://localhost:4000", api_key: str | None = None +) -> bool: + try: + base_url = endpoint.rstrip("/v1").rstrip("/") + health_url = urljoin(base_url, "/health") + headers: dict[str, str] = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + with httpx.Client(timeout=settings.LITELLM_HEALTH_TIMEOUT) as client: + response = client.get(health_url, headers=headers) + if response.status_code == cs.HTTP_OK: + return True + + # (H) Fallback to models endpoint for authenticated proxies + if api_key: + models_url = urljoin(base_url, "/v1/models") + response = client.get(models_url, headers=headers) + return response.status_code == cs.HTTP_OK + + return False + except (httpx.RequestError, httpx.TimeoutException): + return False diff --git a/codebase_rag/providers/litellm.py b/codebase_rag/providers/litellm.py new file mode 100644 index 000000000..7fc0360c3 --- /dev/null +++ b/codebase_rag/providers/litellm.py @@ -0,0 +1,50 @@ +"""LiteLLM provider using pydantic-ai's native LiteLLMProvider.""" + +from __future__ import annotations + +from loguru import logger +from pydantic_ai.models.openai import OpenAIChatModel +from pydantic_ai.providers.litellm import LiteLLMProvider as PydanticLiteLLMProvider + +from codebase_rag import constants as cs +from codebase_rag import exceptions as ex + +from .base import ModelProvider + + +class LiteLLMProvider(ModelProvider): + __slots__ = ("api_key", "endpoint") + + def __init__( + self, + api_key: str | None = None, + endpoint: str = "http://localhost:4000/v1", + **kwargs: str | int | None, + ) -> None: + super().__init__(**kwargs) + self.api_key = api_key + self.endpoint = endpoint + + @property + def provider_name(self) -> cs.Provider: + return cs.Provider.LITELLM_PROXY + + def validate_config(self) -> None: + if not self.endpoint: + raise ValueError(ex.LITELLM_NO_ENDPOINT) + + from .base import check_litellm_proxy_running + + base_url = self.endpoint.rstrip("/v1").rstrip("/") + if not check_litellm_proxy_running(base_url, api_key=self.api_key): + raise ValueError(ex.LITELLM_NOT_RUNNING.format(endpoint=base_url)) + + def create_model( + self, model_id: str, **kwargs: str | int | None + ) -> OpenAIChatModel: + self.validate_config() + + logger.info(f"Creating LiteLLM proxy model: {model_id} at {self.endpoint}") + + provider = PydanticLiteLLMProvider(api_key=self.api_key, api_base=self.endpoint) + return OpenAIChatModel(model_id, provider=provider) diff --git a/codebase_rag/services/anthropic_token_counter.py b/codebase_rag/services/anthropic_token_counter.py new file mode 100644 index 000000000..a207d8af8 --- /dev/null +++ b/codebase_rag/services/anthropic_token_counter.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import base64 +from typing import Any + +import httpx +from pydantic_ai import BinaryContent +from pydantic_ai.messages import ( + ModelMessage, + ModelRequest, + ModelResponse, + RetryPromptPart, + SystemPromptPart, + TextPart, + ToolCallPart, + ToolReturnPart, + UserPromptPart, +) + +from .. import constants as cs + + +def _binary_block(item: BinaryContent) -> dict[str, Any]: + media = item.media_type or cs.MIME_TYPE_FALLBACK + block_type = "image" if media.startswith("image/") else "document" + return { + "type": block_type, + "source": { + "type": "base64", + "media_type": media, + "data": base64.b64encode(item.data).decode(), + }, + } + + +def _user_part_to_blocks(part: UserPromptPart) -> list[dict[str, Any]]: + content = part.content + if isinstance(content, str): + return [{"type": "text", "text": content}] + blocks: list[dict[str, Any]] = [] + for item in content: + if isinstance(item, str): + blocks.append({"type": "text", "text": item}) + elif isinstance(item, BinaryContent): + blocks.append(_binary_block(item)) + return blocks + + +def _tool_return_content(value: object) -> str | list[dict[str, Any]]: + if isinstance(value, str): + return value + if isinstance(value, list): + out: list[dict[str, Any]] = [] + for item in value: + if isinstance(item, str): + out.append({"type": "text", "text": item}) + elif isinstance(item, BinaryContent): + out.append(_binary_block(item)) + if out: + return out + return str(value) + + +def _to_anthropic_payload( + messages: list[ModelMessage], +) -> tuple[str, list[dict[str, Any]]]: + system_parts: list[str] = [] + out: list[dict[str, Any]] = [] + for m in messages: + if isinstance(m, ModelRequest): + user_content: list[dict[str, Any]] = [] + for part in m.parts: + if isinstance(part, SystemPromptPart): + system_parts.append(part.content) + elif isinstance(part, UserPromptPart): + user_content.extend(_user_part_to_blocks(part)) + elif isinstance(part, ToolReturnPart): + user_content.append( + { + "type": "tool_result", + "tool_use_id": part.tool_call_id, + "content": _tool_return_content(part.content), + } + ) + elif isinstance(part, RetryPromptPart): + if part.tool_name is None: + user_content.append( + {"type": "text", "text": part.model_response()} + ) + else: + user_content.append( + { + "type": "tool_result", + "tool_use_id": part.tool_call_id, + "content": part.model_response(), + "is_error": True, + } + ) + if user_content: + out.append({"role": "user", "content": user_content}) + elif isinstance(m, ModelResponse): + assistant_content: list[dict[str, Any]] = [] + for part in m.parts: + if isinstance(part, TextPart): + if part.content: + assistant_content.append({"type": "text", "text": part.content}) + elif isinstance(part, ToolCallPart): + assistant_content.append( + { + "type": "tool_use", + "id": part.tool_call_id, + "name": part.tool_name, + "input": part.args_as_dict() or {}, + } + ) + if assistant_content: + out.append({"role": "assistant", "content": assistant_content}) + return "\n".join(system_parts), out + + +class TokenCountError(Exception): + pass + + +async def count_anthropic_context( + api_key: str, + model_id: str, + messages: list[ModelMessage], +) -> int: + system_prompt, anthropic_messages = _to_anthropic_payload(messages) + if not anthropic_messages: + if not system_prompt: + return 0 + anthropic_messages = [ + {"role": "user", "content": [{"type": "text", "text": "."}]} + ] + payload: dict[str, Any] = { + "model": model_id, + "messages": anthropic_messages, + } + if system_prompt: + payload["system"] = system_prompt + headers = { + cs.ANTHROPIC_HEADER_API_KEY: api_key, + cs.ANTHROPIC_HEADER_VERSION: cs.ANTHROPIC_API_VERSION, + cs.HEADER_CONTENT_TYPE: cs.CONTENT_TYPE_JSON, + } + async with httpx.AsyncClient(timeout=cs.ANTHROPIC_COUNT_TIMEOUT_S) as client: + resp = await client.post( + cs.ANTHROPIC_COUNT_TOKENS_URL, json=payload, headers=headers + ) + if resp.status_code >= 400: + raise TokenCountError(f"{resp.status_code}: {resp.text}") + return int(resp.json().get("input_tokens", 0)) diff --git a/codebase_rag/services/graph_service.py b/codebase_rag/services/graph_service.py index 7a8d95e02..adee02449 100644 --- a/codebase_rag/services/graph_service.py +++ b/codebase_rag/services/graph_service.py @@ -1,19 +1,25 @@ from __future__ import annotations +import threading import types from collections import defaultdict from collections.abc import Generator, Sequence -from contextlib import contextmanager +from concurrent.futures import ThreadPoolExecutor, as_completed +from contextlib import contextmanager, nullcontext from datetime import UTC, datetime import mgclient # ty: ignore[unresolved-import] from loguru import logger +from codebase_rag.config import settings from codebase_rag.types_defs import CursorProtocol, ResultValue from .. import exceptions as ex from .. import logs as ls from ..constants import ( + CYPHER_MEMORY_LIMIT_SUFFIX, + CYPHER_MEMORY_LIMIT_TOKEN, + CYPHER_SEMICOLON, ERR_SUBSTR_ALREADY_EXISTS, ERR_SUBSTR_CONSTRAINT, KEY_CREATED, @@ -32,6 +38,8 @@ CYPHER_EXPORT_RELATIONSHIPS, CYPHER_LIST_PROJECTS, build_constraint_query, + build_create_node_query, + build_create_relationship_query, build_index_query, build_merge_node_query, build_merge_relationship_query, @@ -50,28 +58,65 @@ ) +def _apply_memory_limit(query: str, mb: int) -> str: + if CYPHER_MEMORY_LIMIT_TOKEN in query.upper(): + return query + stripped = query.rstrip() + had_semicolon = stripped.endswith(CYPHER_SEMICOLON) + if had_semicolon: + stripped = stripped[: -len(CYPHER_SEMICOLON)].rstrip() + suffix = CYPHER_MEMORY_LIMIT_SUFFIX.format(mb=mb) + return f"{stripped}{suffix}{CYPHER_SEMICOLON}" + + class MemgraphIngestor: - def __init__(self, host: str, port: int, batch_size: int = 1000): + __slots__ = ( + "_conn_lock", + "_executor", + "_host", + "_port", + "_username", + "_password", + "_use_merge", + "_rel_count", + "_rel_groups", + "batch_size", + "conn", + "node_buffer", + ) + + def __init__( + self, + host: str, + port: int, + batch_size: int = 1000, + username: str | None = None, + password: str | None = None, + use_merge: bool = True, + ): self._host = host self._port = port + self._username = username.strip() if username and username.strip() else None + self._password = password.strip() if password and password.strip() else None + if (self._username is None) != (self._password is None): + raise ValueError(ex.AUTH_INCOMPLETE) if batch_size < 1: raise ValueError(ex.BATCH_SIZE) self.batch_size = batch_size + self._use_merge = use_merge + self._conn_lock = threading.Lock() + self._executor: ThreadPoolExecutor | None = None self.conn: mgclient.Connection | None = None self.node_buffer: list[tuple[str, dict[str, PropertyValue]]] = [] - self.relationship_buffer: list[ - tuple[ - tuple[str, str, PropertyValue], - str, - tuple[str, str, PropertyValue], - dict[str, PropertyValue] | None, - ] - ] = [] + self._rel_count = 0 + self._rel_groups: defaultdict[ + tuple[str, str, str, str, str], list[RelBatchRow] + ] = defaultdict(list) def __enter__(self) -> MemgraphIngestor: logger.info(ls.MG_CONNECTING.format(host=self._host, port=self._port)) - self.conn = mgclient.connect(host=self._host, port=self._port) - self.conn.autocommit = True + self.conn = self._create_connection() + self._executor = ThreadPoolExecutor(max_workers=settings.FLUSH_THREAD_POOL_SIZE) logger.info(ls.MG_CONNECTED) return self @@ -81,24 +126,49 @@ def __exit__( exc_val: Exception | None, exc_tb: types.TracebackType | None, ) -> None: - if exc_type: - logger.exception(ls.MG_EXCEPTION.format(error=exc_val)) - self.flush_all() - if self.conn: - self.conn.close() - logger.info(ls.MG_DISCONNECTED) + try: + if exc_type: + logger.exception(ls.MG_EXCEPTION.format(error=exc_val)) + # (H) Best-effort flush: attempt to persist buffered nodes/relationships + # (H) even when an exception occurred. Catching broad Exception so a + # (H) secondary flush failure never masks the original exception. + try: + self.flush_all() + except Exception as flush_err: + logger.error(ls.MG_FLUSH_ERROR.format(error=flush_err)) + else: + self.flush_all() + finally: + if self._executor: + self._executor.shutdown(wait=True) + self._executor = None + if self.conn: + self.conn.close() + logger.info(ls.MG_DISCONNECTED) + + async def __aenter__(self) -> MemgraphIngestor: + return self.__enter__() + + async def __aexit__( + self, + exc_type: type | None, + exc_val: Exception | None, + exc_tb: types.TracebackType | None, + ) -> None: + self.__exit__(exc_type, exc_val, exc_tb) @contextmanager def _get_cursor(self) -> Generator[CursorProtocol, None, None]: if not self.conn: raise ConnectionError(ex.CONN) - cursor: CursorProtocol | None = None - try: - cursor = self.conn.cursor() - yield cursor - finally: - if cursor: - cursor.close() + with self._conn_lock: + cursor: CursorProtocol | None = None + try: + cursor = self.conn.cursor() + yield cursor + finally: + if cursor: + cursor.close() def _cursor_to_results(self, cursor: CursorProtocol) -> list[ResultRow]: if not cursor.description: @@ -128,12 +198,30 @@ def _execute_query( logger.error(ls.MG_CYPHER_PARAMS.format(params=params)) raise - def _execute_batch(self, query: str, params_list: Sequence[BatchParams]) -> None: - if not self.conn or not params_list: + def _create_connection(self) -> mgclient.Connection: + if self._username is not None: + conn = mgclient.connect( + host=self._host, + port=self._port, + username=self._username, + password=self._password, + ) + else: + conn = mgclient.connect(host=self._host, port=self._port) + conn.autocommit = True + return conn + + def _execute_batch_on( + self, + conn: mgclient.Connection, + query: str, + params_list: Sequence[BatchParams], + ) -> None: + if not params_list: return cursor = None try: - cursor = self.conn.cursor() + cursor = conn.cursor() cursor.execute(wrap_with_unwind(query), BatchWrapper(batch=params_list)) except Exception as e: if ERR_SUBSTR_ALREADY_EXISTS not in str(e).lower(): @@ -152,14 +240,17 @@ def _execute_batch(self, query: str, params_list: Sequence[BatchParams]) -> None if cursor: cursor.close() - def _execute_batch_with_return( - self, query: str, params_list: Sequence[BatchParams] + def _execute_batch_with_return_on( + self, + conn: mgclient.Connection, + query: str, + params_list: Sequence[BatchParams], ) -> list[ResultRow]: - if not self.conn or not params_list: + if not params_list: return [] cursor = None try: - cursor = self.conn.cursor() + cursor = conn.cursor() cursor.execute(wrap_with_unwind(query), BatchWrapper(batch=params_list)) return self._cursor_to_results(cursor) except Exception as e: @@ -208,7 +299,7 @@ def ensure_node_batch( ) -> None: self.node_buffer.append((label, properties)) if len(self.node_buffer) >= self.batch_size: - logger.debug(ls.MG_NODE_BUFFER_FLUSH.format(size=self.batch_size)) + logger.debug(ls.MG_NODE_BUFFER_FLUSH, size=self.batch_size) self.flush_nodes() def ensure_relationship_batch( @@ -220,19 +311,82 @@ def ensure_relationship_batch( ) -> None: from_label, from_key, from_val = from_spec to_label, to_key, to_val = to_spec - self.relationship_buffer.append( - ( - (from_label, from_key, from_val), - rel_type, - (to_label, to_key, to_val), - properties, - ) + pattern = (from_label, from_key, rel_type, to_label, to_key) + self._rel_groups[pattern].append( + RelBatchRow(from_val=from_val, to_val=to_val, props=properties or {}) ) - if len(self.relationship_buffer) >= self.batch_size: - logger.debug(ls.MG_REL_BUFFER_FLUSH.format(size=self.batch_size)) + self._rel_count += 1 + if self._rel_count >= self.batch_size: + logger.debug(ls.MG_REL_BUFFER_FLUSH, size=self.batch_size) self.flush_nodes() self.flush_relationships() + def _flush_node_label_group( + self, + label: str, + props_list: list[dict[str, PropertyValue]], + conn: mgclient.Connection | None = None, + ) -> tuple[int, int]: + if not props_list: + return 0, 0 + + id_key = NODE_UNIQUE_CONSTRAINTS.get(label) + if not id_key: + logger.warning(ls.MG_NO_CONSTRAINT.format(label=label)) + return 0, len(props_list) + + batch_rows: list[NodeBatchRow] = [] + skipped = 0 + for props in props_list: + if id_key not in props: + logger.warning( + ls.MG_MISSING_PROP.format( + label=label, key=id_key, prop_keys=list(props.keys()) + ) + ) + skipped += 1 + continue + row_props: PropertyDict = {k: v for k, v in props.items() if k != id_key} + batch_rows.append(NodeBatchRow(id=props[id_key], props=row_props)) + + if not batch_rows: + return 0, skipped + + build_query = ( + build_merge_node_query if self._use_merge else build_create_node_query + ) + query = build_query(label, id_key) + target_conn = conn or self.conn + if not target_conn: + logger.warning(ls.MG_NO_CONN_NODES.format(label=label)) + return 0, skipped + len(batch_rows) + lock = self._conn_lock if conn is None else nullcontext() + with lock: + self._execute_batch_on(target_conn, query, batch_rows) + return len(batch_rows), skipped + + def _flush_node_group_with_own_conn( + self, + label: str, + props_list: list[dict[str, PropertyValue]], + ) -> tuple[int, int]: + conn = self._create_connection() + try: + return self._flush_node_label_group(label, props_list, conn=conn) + finally: + conn.close() + + def _flush_rel_group_with_own_conn( + self, + pattern: tuple[str, str, str, str, str], + params_list: list[RelBatchRow], + ) -> tuple[int, int]: + conn = self._create_connection() + try: + return self._flush_rel_pattern_group(pattern, params_list, conn=conn) + finally: + conn.close() + def flush_nodes(self) -> None: if not self.node_buffer: return @@ -243,37 +397,46 @@ def flush_nodes(self) -> None: ) for label, props in self.node_buffer: nodes_by_label[label].append(props) + flushed_total = 0 skipped_total = 0 - for label, props_list in nodes_by_label.items(): - if not props_list: - continue - id_key = NODE_UNIQUE_CONSTRAINTS.get(label) - if not id_key: - logger.warning(ls.MG_NO_CONSTRAINT.format(label=label)) - skipped_total += len(props_list) - continue - batch_rows: list[NodeBatchRow] = [] - for props in props_list: - if id_key not in props: - logger.warning( - ls.MG_MISSING_PROP.format(label=label, key=id_key, props=props) - ) - skipped_total += 1 - continue - row_props: PropertyDict = { - k: v for k, v in props.items() if k != id_key - } - batch_rows.append(NodeBatchRow(id=props[id_key], props=row_props)) - - if not batch_rows: - continue + first_error: Exception | None = None - flushed_total += len(batch_rows) + if self._executor and len(nodes_by_label) > 1: + logger.info( + ls.MG_PARALLEL_FLUSH_NODES.format( + count=len(nodes_by_label), + workers=settings.FLUSH_THREAD_POOL_SIZE, + ) + ) + futures = { + self._executor.submit( + self._flush_node_group_with_own_conn, label, props_list + ): label + for label, props_list in nodes_by_label.items() + } + for future in as_completed(futures): + label = futures[future] + try: + flushed, skipped = future.result() + flushed_total += flushed + skipped_total += skipped + except Exception as e: + logger.error(ls.MG_LABEL_FLUSH_ERROR.format(label=label, error=e)) + if first_error is None: + first_error = e + else: + for label, props_list in nodes_by_label.items(): + try: + flushed, skipped = self._flush_node_label_group(label, props_list) + flushed_total += flushed + skipped_total += skipped + except Exception as e: + logger.error(ls.MG_LABEL_FLUSH_ERROR.format(label=label, error=e)) + if first_error is None: + first_error = e - query = build_merge_node_query(label, id_key) - self._execute_batch(query, batch_rows) logger.info( ls.MG_NODES_FLUSHED.format(flushed=flushed_total, total=buffer_size) ) @@ -281,61 +444,114 @@ def flush_nodes(self) -> None: logger.info(ls.MG_NODES_SKIPPED.format(count=skipped_total)) self.node_buffer.clear() - def flush_relationships(self) -> None: - if not self.relationship_buffer: - return + if first_error is not None: + raise first_error - rels_by_pattern: defaultdict[ - tuple[str, str, str, str, str], list[RelBatchRow] - ] = defaultdict(list) - for from_node, rel_type, to_node, props in self.relationship_buffer: - pattern = (from_node[0], from_node[1], rel_type, to_node[0], to_node[1]) - rels_by_pattern[pattern].append( - RelBatchRow(from_val=from_node[2], to_val=to_node[2], props=props or {}) + def _flush_rel_pattern_group( + self, + pattern: tuple[str, str, str, str, str], + params_list: list[RelBatchRow], + conn: mgclient.Connection | None = None, + ) -> tuple[int, int]: + from_label, from_key, rel_type, to_label, to_key = pattern + build_rel_query = ( + build_merge_relationship_query + if self._use_merge + else build_create_relationship_query + ) + has_props = any(p[KEY_PROPS] for p in params_list) + query = build_rel_query( + from_label, from_key, rel_type, to_label, to_key, has_props + ) + + target_conn = conn or self.conn + if not target_conn: + logger.warning(ls.MG_NO_CONN_RELS.format(pattern=pattern)) + return len(params_list), 0 + lock = self._conn_lock if conn is None else nullcontext() + with lock: + results = self._execute_batch_with_return_on( + target_conn, query, params_list ) + batch_successful = 0 + for r in results: + created = r.get(KEY_CREATED, 0) + if isinstance(created, int): + batch_successful += created + + if rel_type == REL_TYPE_CALLS: + failed = len(params_list) - batch_successful + if failed > 0: + logger.warning(ls.MG_CALLS_FAILED.format(count=failed)) + for i, sample in enumerate(params_list[:3]): + logger.warning( + ls.MG_CALLS_SAMPLE.format( + index=i + 1, + from_label=from_label, + from_val=sample[KEY_FROM_VAL], + to_label=to_label, + to_val=sample[KEY_TO_VAL], + ) + ) + + return len(params_list), batch_successful + + def flush_relationships(self) -> None: + if not self._rel_count: + return total_attempted = 0 total_successful = 0 - - for pattern, params_list in rels_by_pattern.items(): - from_label, from_key, rel_type, to_label, to_key = pattern - has_props = any(p[KEY_PROPS] for p in params_list) - query = build_merge_relationship_query( - from_label, from_key, rel_type, to_label, to_key, has_props + first_error: Exception | None = None + + if self._executor and len(self._rel_groups) > 1: + logger.info( + ls.MG_PARALLEL_FLUSH_RELS.format( + count=len(self._rel_groups), + workers=settings.FLUSH_THREAD_POOL_SIZE, + ) ) - - total_attempted += len(params_list) - results = self._execute_batch_with_return(query, params_list) - batch_successful = 0 - for r in results: - created = r.get(KEY_CREATED, 0) - if isinstance(created, int): - batch_successful += created - total_successful += batch_successful - - if rel_type == REL_TYPE_CALLS: - failed = len(params_list) - batch_successful - if failed > 0: - logger.warning(ls.MG_CALLS_FAILED.format(count=failed)) - for i, sample in enumerate(params_list[:3]): - logger.warning( - ls.MG_CALLS_SAMPLE.format( - index=i + 1, - from_label=from_label, - from_val=sample[KEY_FROM_VAL], - to_label=to_label, - to_val=sample[KEY_TO_VAL], - ) - ) + futures = { + self._executor.submit( + self._flush_rel_group_with_own_conn, pattern, params_list + ): pattern + for pattern, params_list in self._rel_groups.items() + } + for future in as_completed(futures): + pattern = futures[future] + try: + attempted, successful = future.result() + total_attempted += attempted + total_successful += successful + except Exception as e: + logger.error(ls.MG_REL_FLUSH_ERROR.format(pattern=pattern, error=e)) + if first_error is None: + first_error = e + else: + for pattern, params_list in self._rel_groups.items(): + try: + attempted, successful = self._flush_rel_pattern_group( + pattern, params_list + ) + total_attempted += attempted + total_successful += successful + except Exception as e: + logger.error(ls.MG_REL_FLUSH_ERROR.format(pattern=pattern, error=e)) + if first_error is None: + first_error = e logger.info( ls.MG_RELS_FLUSHED.format( - total=len(self.relationship_buffer), + total=self._rel_count, success=total_successful, failed=total_attempted - total_successful, ) ) - self.relationship_buffer.clear() + self._rel_count = 0 + self._rel_groups.clear() + + if first_error is not None: + raise first_error def flush_all(self) -> None: logger.info(ls.MG_FLUSH_START) @@ -346,13 +562,14 @@ def flush_all(self) -> None: def fetch_all( self, query: str, params: dict[str, PropertyValue] | None = None ) -> list[ResultRow]: - logger.debug(ls.MG_FETCH_QUERY.format(query=query, params=params)) - return self._execute_query(query, params) + bounded_query = _apply_memory_limit(query, settings.QUERY_MEMORY_LIMIT_MB) + logger.debug(ls.MG_FETCH_QUERY, query=bounded_query, params=params) + return self._execute_query(bounded_query, params) def execute_write( self, query: str, params: dict[str, PropertyValue] | None = None ) -> None: - logger.debug(ls.MG_WRITE_QUERY.format(query=query, params=params)) + logger.debug(ls.MG_WRITE_QUERY, query=query, params=params) self._execute_query(query, params) def export_graph_to_dict(self) -> GraphData: diff --git a/codebase_rag/services/llm.py b/codebase_rag/services/llm.py index 018ccc1af..970331f2f 100644 --- a/codebase_rag/services/llm.py +++ b/codebase_rag/services/llm.py @@ -1,14 +1,17 @@ from __future__ import annotations +import re +from pathlib import Path from typing import TYPE_CHECKING from loguru import logger from pydantic_ai import Agent, DeferredToolRequests, Tool +from pydantic_ai.agent import AgentRetries from .. import constants as cs from .. import exceptions as ex from .. import logs as ls -from ..config import ModelConfig, settings +from ..config import ModelConfig, load_cgr_instructions, settings from ..prompts import ( CYPHER_SYSTEM_PROMPT, LOCAL_CYPHER_SYSTEM_PROMPT, @@ -26,15 +29,88 @@ def _create_provider_model(config: ModelConfig) -> Model: def _clean_cypher_response(response_text: str) -> str: - query = response_text.strip().replace(cs.CYPHER_BACKTICK, "") - if query.startswith(cs.CYPHER_PREFIX): - query = query[len(cs.CYPHER_PREFIX) :].strip() + query = response_text.strip() + + if "```" in query: + parts = query.split("```") + if len(parts) >= 3: + block = parts[1] + if block.lower().startswith("cypher"): + block = block[len("cypher") :] + query = block.strip() + else: + while "**" in query: + start = query.index("**") + end = query.find("**", start + 2) + if end == -1: + break + after = end + 2 + if after < len(query) and query[after] == ":": + after += 1 + query = query[:start] + query[after:].lstrip() + query = query.replace(cs.CYPHER_BACKTICK, "") + if query.lower().startswith(cs.CYPHER_PREFIX): + query = query[len(cs.CYPHER_PREFIX) :].strip() + if not query.endswith(cs.CYPHER_SEMICOLON): query += cs.CYPHER_SEMICOLON return query +_COMMENT_OR_WS = r"(?:\s|//[^\n]*|/\*.*?\*/)+" + + +def _build_keyword_pattern(keyword: str) -> re.Pattern[str]: + parts = keyword.split() + if len(parts) == 1: + return re.compile(rf"\b{re.escape(parts[0])}\b") + joined = _COMMENT_OR_WS.join(re.escape(p) for p in parts) + return re.compile(rf"\b{joined}\b", re.DOTALL) + + +_CYPHER_DANGEROUS_PATTERNS: list[tuple[str, re.Pattern[str]]] = [ + (kw, _build_keyword_pattern(kw)) for kw in cs.CYPHER_DANGEROUS_KEYWORDS +] + + +_VARLEN_PATTERN = re.compile(r"\[[^\]]*?\*([^\]]*)\]") +_PROCEDURE_CALL_PATTERN = re.compile(r"\bCALL\s+([\w\.]+)", re.IGNORECASE) + + +def _validate_cypher_read_only(query: str) -> None: + upper_query = query.upper() + for keyword, pattern in _CYPHER_DANGEROUS_PATTERNS: + if pattern.search(upper_query): + raise ex.LLMGenerationError( + ex.LLM_DANGEROUS_QUERY.format(keyword=keyword, query=query) + ) + + +def _validate_no_unbounded_paths(query: str) -> None: + for match in _VARLEN_PATTERN.finditer(query): + spec = match.group(1).strip() + if not spec: + raise ex.LLMGenerationError(ex.LLM_UNBOUNDED_PATH.format(query=query)) + if ".." in spec: + upper = spec.split("..", 1)[1].lstrip() + if not upper or not upper[0].isdigit(): + raise ex.LLMGenerationError(ex.LLM_UNBOUNDED_PATH.format(query=query)) + + +def _validate_call_procedures(query: str) -> None: + for match in _PROCEDURE_CALL_PATTERN.finditer(query): + name = match.group(1) + if not any( + name.startswith(prefix) for prefix in cs.CYPHER_ALLOWED_PROCEDURE_PREFIXES + ): + raise ex.LLMGenerationError( + ex.LLM_DISALLOWED_PROCEDURE.format(name=name, query=query) + ) + + class CypherGenerator: + __slots__ = ("agent",) + def __init__(self) -> None: try: config = settings.active_cypher_config @@ -68,6 +144,9 @@ async def generate(self, natural_language_query: str) -> str: ) query = _clean_cypher_response(result.output) + _validate_cypher_read_only(query) + _validate_no_unbounded_paths(query) + _validate_call_procedures(query) logger.info(ls.CYPHER_GENERATED.format(query=query)) return query except Exception as e: @@ -75,18 +154,35 @@ async def generate(self, natural_language_query: str) -> str: raise ex.LLMGenerationError(ex.LLM_GENERATION_FAILED.format(error=e)) from e -def create_rag_orchestrator(tools: list[Tool]) -> Agent: +def create_rag_orchestrator( + tools: list[Tool], + project_root: Path | None = None, + load_instructions: bool = True, + active_projects: list[str] | None = None, +) -> tuple[Agent, str]: try: config = settings.active_orchestrator_config llm = _create_provider_model(config) - return Agent( + project_instructions = ( + load_cgr_instructions(project_root) if load_instructions else None + ) + system_prompt = build_rag_orchestrator_prompt( + tools, + project_instructions=project_instructions, + active_projects=active_projects, + ) + + agent = Agent( model=llm, - system_prompt=build_rag_orchestrator_prompt(tools), + system_prompt=system_prompt, tools=tools, - retries=settings.AGENT_RETRIES, - output_retries=settings.ORCHESTRATOR_OUTPUT_RETRIES, + retries=AgentRetries( + tools=settings.AGENT_RETRIES, + output=settings.ORCHESTRATOR_OUTPUT_RETRIES, + ), output_type=[str, DeferredToolRequests], ) + return agent, system_prompt except Exception as e: raise ex.LLMGenerationError(ex.LLM_INIT_ORCHESTRATOR.format(error=e)) from e diff --git a/codebase_rag/services/protobuf_service.py b/codebase_rag/services/protobuf_service.py index 7c5138c12..50de78eb9 100644 --- a/codebase_rag/services/protobuf_service.py +++ b/codebase_rag/services/protobuf_service.py @@ -22,6 +22,10 @@ cs.NodeLabel.EXTERNAL_PACKAGE: cs.ONEOF_EXTERNAL_PACKAGE, cs.NodeLabel.MODULE_IMPLEMENTATION: cs.ONEOF_MODULE_IMPLEMENTATION, cs.NodeLabel.MODULE_INTERFACE: cs.ONEOF_MODULE_INTERFACE, + cs.NodeLabel.INTERFACE: cs.ONEOF_INTERFACE, + cs.NodeLabel.ENUM: cs.ONEOF_ENUM, + cs.NodeLabel.TYPE: cs.ONEOF_TYPE, + cs.NodeLabel.UNION: cs.ONEOF_UNION, } ONEOF_FIELD_TO_LABEL: dict[str, cs.NodeLabel] = { @@ -32,7 +36,13 @@ NAME_BASED_LABELS = frozenset({cs.NodeLabel.EXTERNAL_PACKAGE, cs.NodeLabel.PROJECT}) +_REL_TYPE_CACHE: dict = {} +_MSG_CLASS_CACHE: dict[str, type | None] = {} + + class ProtobufFileIngestor: + __slots__ = ("output_dir", "_nodes", "_relationships", "split_index") + def __init__(self, output_path: str, split_index: bool = False): self.output_dir = Path(output_path) self._nodes: dict[str, pb.Node] = {} @@ -53,7 +63,11 @@ def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: if not node_id or node_id in self._nodes: return - payload_message_class = getattr(pb, label, None) + if label in _MSG_CLASS_CACHE: + payload_message_class = _MSG_CLASS_CACHE[label] + else: + payload_message_class = getattr(pb, label, None) + _MSG_CLASS_CACHE[label] = payload_message_class if not payload_message_class: logger.warning(ls.PROTOBUF_NO_MESSAGE_CLASS.format(label=label)) return @@ -88,42 +102,45 @@ def ensure_relationship_batch( to_spec: tuple[str, str, PropertyValue], properties: PropertyDict | None = None, ) -> None: - rel = pb.Relationship() + if rel_type in _REL_TYPE_CACHE: + rel_type_enum = _REL_TYPE_CACHE[rel_type] + else: + resolved = getattr(pb.Relationship.RelationshipType, rel_type, None) + if resolved is None: + logger.warning(ls.PROTOBUF_UNKNOWN_REL_TYPE.format(rel_type=rel_type)) + resolved = ( + pb.Relationship.RelationshipType.RELATIONSHIP_TYPE_UNSPECIFIED + ) + rel_type_enum = resolved + _REL_TYPE_CACHE[rel_type] = rel_type_enum - rel_type_enum = getattr(pb.Relationship.RelationshipType, rel_type, None) - if rel_type_enum is None: - logger.warning(ls.PROTOBUF_UNKNOWN_REL_TYPE.format(rel_type=rel_type)) - rel_type_enum = ( - pb.Relationship.RelationshipType.RELATIONSHIP_TYPE_UNSPECIFIED - ) - rel.type = rel_type_enum + from_label, _, from_val_raw = from_spec + to_label, _, to_val_raw = to_spec - from_label, _, from_val = from_spec - to_label, _, to_val = to_spec + from_val = str(from_val_raw) if from_val_raw is not None else "" + to_val = str(to_val_raw) if to_val_raw is not None else "" - rel.source_id = str(from_val) - rel.source_label = str(from_label) - rel.target_id = str(to_val) - rel.target_label = str(to_label) + unique_key = (from_val, rel_type_enum, to_val) + if unique_key in self._relationships: + if properties: + self._relationships[unique_key].properties.update(properties) + return - if not rel.source_id.strip() or not rel.target_id.strip(): + if not from_val.strip() or not to_val.strip(): logger.warning( - ls.PROTOBUF_INVALID_REL.format( - source_id=rel.source_id, target_id=rel.target_id - ) + ls.PROTOBUF_INVALID_REL.format(source_id=from_val, target_id=to_val) ) return + rel = pb.Relationship() + rel.type = rel_type_enum + rel.source_id = from_val + rel.source_label = str(from_label) + rel.target_id = to_val + rel.target_label = str(to_label) if properties: rel.properties.update(properties) - - unique_key = (rel.source_id, rel.type, rel.target_id) - if unique_key in self._relationships: - if properties: - existing_rel = self._relationships[unique_key] - existing_rel.properties.update(properties) - else: - self._relationships[unique_key] = rel + self._relationships[unique_key] = rel def _flush_joint(self) -> None: index = pb.GraphCodeIndex() diff --git a/codebase_rag/stack/__init__.py b/codebase_rag/stack/__init__.py new file mode 100644 index 000000000..277a85f8a --- /dev/null +++ b/codebase_rag/stack/__init__.py @@ -0,0 +1,21 @@ +from .manager import ( + StackManager, + StackStatus, + daemon_down, + daemon_logs, + daemon_restart, + daemon_status, + daemon_up, + ensure_running, +) + +__all__ = [ + "StackManager", + "StackStatus", + "daemon_down", + "daemon_logs", + "daemon_restart", + "daemon_status", + "daemon_up", + "ensure_running", +] diff --git a/codebase_rag/stack/cli.py b/codebase_rag/stack/cli.py new file mode 100644 index 000000000..5677ae2f0 --- /dev/null +++ b/codebase_rag/stack/cli.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import sys + +import click +from loguru import logger + +from .. import cli_help as ch +from .manager import StackError, StackManager + + +@click.group(help=ch.CMD_DAEMON_GROUP) +def cli() -> None: + pass + + +def _print_status(mgr: StackManager) -> None: + status = mgr.status() + click.echo(f"state: {status.state.value}") + click.echo( + f"memgraph: {status.memgraph_endpoint} (reachable={status.memgraph_reachable})" + ) + click.echo( + f"qdrant: {status.qdrant_endpoint} (reachable={status.qdrant_reachable})" + ) + click.echo(f"compose: {status.compose_file}") + + +@cli.command("up", help=ch.CMD_DAEMON_UP) +def up_cmd() -> None: + mgr = StackManager() + try: + mgr.ensure_running() + _print_status(mgr) + except StackError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + + +@cli.command("down", help=ch.CMD_DAEMON_DOWN) +def down_cmd() -> None: + mgr = StackManager() + try: + mgr.down() + click.echo("stopped") + except StackError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + + +@cli.command("status", help=ch.CMD_DAEMON_STATUS) +def status_cmd() -> None: + _print_status(StackManager()) + + +@cli.command("restart", help=ch.CMD_DAEMON_RESTART) +def restart_cmd() -> None: + mgr = StackManager() + try: + mgr.restart() + mgr.wait_healthy() + _print_status(mgr) + except StackError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + + +@cli.command("logs", help=ch.CMD_DAEMON_LOGS) +@click.option("--follow", "-f", is_flag=True, help=ch.HELP_DAEMON_LOGS_FOLLOW) +@click.option("--service", "-s", default=None, help=ch.HELP_DAEMON_LOGS_SERVICE) +def logs_cmd(follow: bool, service: str | None) -> None: + mgr = StackManager() + try: + rc = mgr.logs(service=service, follow=follow) + if rc != 0: + sys.exit(rc) + except StackError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) diff --git a/codebase_rag/stack/constants.py b/codebase_rag/stack/constants.py new file mode 100644 index 000000000..bb5d7b0ff --- /dev/null +++ b/codebase_rag/stack/constants.py @@ -0,0 +1,51 @@ +from enum import StrEnum + +COMPOSE_PROJECT_NAME = "cgr" +COMPOSE_FILENAME = "docker-compose.yaml" +STATE_FILENAME = "state.json" + +DOCKER_BIN = "docker" +DOCKER_COMPOSE_SUBCOMMAND = "compose" + +DEFAULT_HEALTH_TIMEOUT_S = 60.0 +DEFAULT_HEALTH_INTERVAL_S = 1.0 +DEFAULT_DOCKER_TIMEOUT_S = 120.0 +DEFAULT_STATUS_TIMEOUT_S = 10.0 + +SERVICE_MEMGRAPH = "memgraph" +SERVICE_QDRANT = "qdrant" +SERVICE_LAB = "lab" + + +class StackState(StrEnum): + RUNNING = "running" + PARTIAL = "partial" + STOPPED = "stopped" + UNKNOWN = "unknown" + + +ERR_DOCKER_NOT_INSTALLED = ( + "docker not found on PATH. Install Docker Desktop or the docker CLI." +) +ERR_DOCKER_DAEMON_DOWN = ( + "docker is installed but the daemon is not responding. Start Docker and retry." +) +ERR_COMPOSE_NOT_AVAILABLE = "`docker compose` plugin not available. Install Docker Desktop v2+ or the compose plugin." +ERR_STACK_START_FAILED = "Failed to bring stack up: {detail}" +ERR_STACK_STOP_FAILED = "Failed to bring stack down: {detail}" +ERR_STACK_NOT_HEALTHY = ( + "Stack started but {service} did not become healthy within {timeout}s." +) +ERR_COMPOSE_FILE_MISSING = "Compose file not found at {path}." + +MSG_USING_COMPOSE_FILE = "Using compose file at {path}" +MSG_STARTING_STACK = "Starting cgr stack..." +MSG_STACK_HEALTHY = "Stack is healthy ({memgraph}, {qdrant})." +MSG_STACK_ALREADY_RUNNING = "Stack already running." +MSG_STOPPING_STACK = "Stopping cgr stack..." +MSG_STACK_STOPPED = "Stack stopped." +MSG_RESTARTING_STACK = "Restarting cgr stack..." +MSG_RENDERING_COMPOSE = "Rendering compose file to {path}" +MSG_WAITING_FOR_HEALTH = "Waiting for {service} on {host}:{port}..." + +PACKAGE_COMPOSE_RELATIVE = "../docker-compose.yaml" diff --git a/codebase_rag/stack/health.py b/codebase_rag/stack/health.py new file mode 100644 index 000000000..b5353374a --- /dev/null +++ b/codebase_rag/stack/health.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import time +import urllib.error +import urllib.request + +import mgclient # ty: ignore[unresolved-import] + +from . import constants as cs + + +def _bolt_reachable(host: str, port: int) -> bool: + try: + conn = mgclient.connect(host=host, port=port) + try: + cursor = conn.cursor() + cursor.execute("RETURN 1") + cursor.fetchall() + finally: + conn.close() + return True + except (mgclient.Error, OSError): + return False + + +def _http_reachable(url: str, timeout: float = 1.5) -> bool: + try: + with urllib.request.urlopen(url, timeout=timeout) as resp: # noqa: S310 + return 200 <= resp.status < 500 + except (urllib.error.URLError, TimeoutError, OSError): + return False + + +def wait_for_memgraph( + host: str, + port: int, + timeout: float = cs.DEFAULT_HEALTH_TIMEOUT_S, + interval: float = cs.DEFAULT_HEALTH_INTERVAL_S, +) -> bool: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + if _bolt_reachable(host, port): + return True + time.sleep(interval) + return False + + +def wait_for_qdrant( + host: str, + port: int, + timeout: float = cs.DEFAULT_HEALTH_TIMEOUT_S, + interval: float = cs.DEFAULT_HEALTH_INTERVAL_S, +) -> bool: + url = f"http://{host}:{port}/readyz" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + if _http_reachable(url): + return True + time.sleep(interval) + return False diff --git a/codebase_rag/stack/manager.py b/codebase_rag/stack/manager.py new file mode 100644 index 000000000..95ffc155b --- /dev/null +++ b/codebase_rag/stack/manager.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import shutil +import subprocess +from dataclasses import dataclass +from pathlib import Path + +from loguru import logger + +from ..config import settings +from . import constants as cs +from .health import wait_for_memgraph, wait_for_qdrant + + +class StackError(RuntimeError): + pass + + +@dataclass +class StackStatus: + state: cs.StackState + memgraph_reachable: bool + qdrant_reachable: bool + compose_file: Path + memgraph_endpoint: str + qdrant_endpoint: str + + +class StackManager: + def __init__( + self, + home: Path | None = None, + package_compose: Path | None = None, + memgraph_host: str | None = None, + memgraph_port: int | None = None, + qdrant_host: str = "localhost", + qdrant_port: int = 6333, + project_name: str = cs.COMPOSE_PROJECT_NAME, + ) -> None: + self.home = (home or settings.CGR_HOME).expanduser() + self.package_compose = ( + package_compose + or (Path(__file__).resolve().parent / cs.PACKAGE_COMPOSE_RELATIVE).resolve() + ) + self.memgraph_host = memgraph_host or settings.MEMGRAPH_HOST + self.memgraph_port = memgraph_port or settings.MEMGRAPH_PORT + self.qdrant_host = qdrant_host + self.qdrant_port = qdrant_port + self.project_name = project_name + + @property + def compose_file(self) -> Path: + return self.home / cs.COMPOSE_FILENAME + + def ensure_home(self) -> None: + self.home.mkdir(parents=True, exist_ok=True) + + def ensure_compose_file(self) -> Path: + self.ensure_home() + target = self.compose_file + if not target.exists(): + if not self.package_compose.exists(): + raise StackError( + cs.ERR_COMPOSE_FILE_MISSING.format(path=self.package_compose) + ) + logger.info(cs.MSG_RENDERING_COMPOSE.format(path=target)) + shutil.copyfile(self.package_compose, target) + return target + + def check_docker(self) -> None: + if shutil.which(cs.DOCKER_BIN) is None: + raise StackError(cs.ERR_DOCKER_NOT_INSTALLED) + info = subprocess.run( + [cs.DOCKER_BIN, "info"], + capture_output=True, + text=True, + timeout=cs.DEFAULT_STATUS_TIMEOUT_S, + check=False, + ) + if info.returncode != 0: + raise StackError(cs.ERR_DOCKER_DAEMON_DOWN) + compose = subprocess.run( + [cs.DOCKER_BIN, cs.DOCKER_COMPOSE_SUBCOMMAND, "version"], + capture_output=True, + text=True, + timeout=cs.DEFAULT_STATUS_TIMEOUT_S, + check=False, + ) + if compose.returncode != 0: + raise StackError(cs.ERR_COMPOSE_NOT_AVAILABLE) + + def _compose_cmd(self, *args: str) -> list[str]: + return [ + cs.DOCKER_BIN, + cs.DOCKER_COMPOSE_SUBCOMMAND, + "-p", + self.project_name, + "-f", + str(self.compose_file), + *args, + ] + + def up(self, timeout: float = cs.DEFAULT_DOCKER_TIMEOUT_S) -> None: + self.check_docker() + self.ensure_compose_file() + logger.info(cs.MSG_STARTING_STACK) + result = subprocess.run( + self._compose_cmd("up", "-d"), + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + if result.returncode != 0: + raise StackError( + cs.ERR_STACK_START_FAILED.format( + detail=result.stderr.strip() or result.stdout.strip() + ) + ) + + def down(self, timeout: float = cs.DEFAULT_DOCKER_TIMEOUT_S) -> None: + if not self.compose_file.exists(): + return + if shutil.which(cs.DOCKER_BIN) is None: + raise StackError(cs.ERR_DOCKER_NOT_INSTALLED) + logger.info(cs.MSG_STOPPING_STACK) + result = subprocess.run( + self._compose_cmd("down"), + capture_output=True, + text=True, + timeout=timeout, + check=False, + ) + if result.returncode != 0: + raise StackError( + cs.ERR_STACK_STOP_FAILED.format( + detail=result.stderr.strip() or result.stdout.strip() + ) + ) + + def logs( + self, + service: str | None = None, + follow: bool = False, + tail: int | None = 200, + ) -> int: + if not self.compose_file.exists(): + raise StackError(cs.ERR_COMPOSE_FILE_MISSING.format(path=self.compose_file)) + args: list[str] = ["logs"] + if follow: + args.append("-f") + if tail is not None: + args.extend(["--tail", str(tail)]) + if service: + args.append(service) + completed = subprocess.run(self._compose_cmd(*args), check=False) + return completed.returncode + + def restart(self) -> None: + logger.info(cs.MSG_RESTARTING_STACK) + self.down() + self.up() + + def wait_healthy( + self, + timeout: float = cs.DEFAULT_HEALTH_TIMEOUT_S, + ) -> None: + logger.info( + cs.MSG_WAITING_FOR_HEALTH.format( + service=cs.SERVICE_MEMGRAPH, + host=self.memgraph_host, + port=self.memgraph_port, + ) + ) + if not wait_for_memgraph(self.memgraph_host, self.memgraph_port, timeout): + raise StackError( + cs.ERR_STACK_NOT_HEALTHY.format( + service=cs.SERVICE_MEMGRAPH, timeout=timeout + ) + ) + logger.info( + cs.MSG_WAITING_FOR_HEALTH.format( + service=cs.SERVICE_QDRANT, + host=self.qdrant_host, + port=self.qdrant_port, + ) + ) + if not wait_for_qdrant(self.qdrant_host, self.qdrant_port, timeout): + raise StackError( + cs.ERR_STACK_NOT_HEALTHY.format( + service=cs.SERVICE_QDRANT, timeout=timeout + ) + ) + + def status(self) -> StackStatus: + memgraph_ok = wait_for_memgraph( + self.memgraph_host, self.memgraph_port, timeout=0.1, interval=0.0 + ) + qdrant_ok = wait_for_qdrant( + self.qdrant_host, self.qdrant_port, timeout=0.1, interval=0.0 + ) + match (memgraph_ok, qdrant_ok): + case (True, True): + state = cs.StackState.RUNNING + case (False, False): + state = cs.StackState.STOPPED + case _: + state = cs.StackState.PARTIAL + return StackStatus( + state=state, + memgraph_reachable=memgraph_ok, + qdrant_reachable=qdrant_ok, + compose_file=self.compose_file, + memgraph_endpoint=f"{self.memgraph_host}:{self.memgraph_port}", + qdrant_endpoint=f"{self.qdrant_host}:{self.qdrant_port}", + ) + + def ensure_running(self) -> StackStatus: + current = self.status() + if current.state == cs.StackState.RUNNING: + logger.info(cs.MSG_STACK_ALREADY_RUNNING) + return current + self.up() + self.wait_healthy() + final = self.status() + logger.info( + cs.MSG_STACK_HEALTHY.format( + memgraph=final.memgraph_endpoint, + qdrant=final.qdrant_endpoint, + ) + ) + return final + + +def ensure_running() -> StackStatus: + return StackManager().ensure_running() + + +def daemon_up() -> StackStatus: + mgr = StackManager() + mgr.up() + mgr.wait_healthy() + return mgr.status() + + +def daemon_down() -> None: + StackManager().down() + + +def daemon_status() -> StackStatus: + return StackManager().status() + + +def daemon_logs(service: str | None = None, follow: bool = False) -> int: + return StackManager().logs(service=service, follow=follow) + + +def daemon_restart() -> StackStatus: + mgr = StackManager() + mgr.restart() + mgr.wait_healthy() + return mgr.status() diff --git a/codebase_rag/tests/conftest.py b/codebase_rag/tests/conftest.py index a22c1ede0..e3a4a19c1 100644 --- a/codebase_rag/tests/conftest.py +++ b/codebase_rag/tests/conftest.py @@ -8,14 +8,13 @@ from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING, Protocol, Self -from unittest.mock import MagicMock +from unittest.mock import MagicMock, call import pytest from loguru import logger from codebase_rag.graph_updater import GraphUpdater from codebase_rag.parser_loader import load_parsers -from codebase_rag.services.graph_service import MemgraphIngestor if TYPE_CHECKING: pass # ty: ignore[unresolved-import] @@ -89,6 +88,25 @@ def create_mock_node( logger.remove() +@pytest.fixture(autouse=True) +def _disable_stack_autostart() -> Generator[None, None, None]: + from unittest.mock import patch + + with patch("codebase_rag.cli._maybe_start_stack"): + yield + + +@pytest.fixture(autouse=True) +def _isolate_cgr_home( + tmp_path_factory: pytest.TempPathFactory, monkeypatch: pytest.MonkeyPatch +) -> Generator[Path, None, None]: + from codebase_rag.config import settings + + home = tmp_path_factory.mktemp("cgr-home-iso") + monkeypatch.setattr(settings, "CGR_HOME", home) + yield home + + @pytest.fixture def temp_repo() -> Generator[Path, None, None]: """Creates a temporary repository path for a test and cleans up afterward.""" @@ -97,10 +115,44 @@ def temp_repo() -> Generator[Path, None, None]: shutil.rmtree(temp_dir) +class _MockIngestor: + _TRACKED = ( + "fetch_all", + "execute_write", + "ensure_node_batch", + "ensure_relationship_batch", + "flush_all", + ) + + def __init__(self) -> None: + self.fetch_all = MagicMock() + self.execute_write = MagicMock() + self.ensure_node_batch = MagicMock() + self.ensure_relationship_batch = MagicMock() + self.flush_all = MagicMock() + self._fallback = MagicMock() + + def reset_mock(self) -> None: + for name in (*self._TRACKED, "_fallback"): + getattr(self, name).reset_mock() + + @property + def method_calls(self) -> list: + result = [] + for name in self._TRACKED: + mock_attr = self.__dict__[name] + for c in mock_attr.call_args_list: + result.append(getattr(call, name)(*c.args, **c.kwargs)) + result.extend(self._fallback.method_calls) + return result + + def __getattr__(self, name: str) -> MagicMock: + return getattr(self._fallback, name) + + @pytest.fixture -def mock_ingestor() -> MagicMock: - """Provides a mocked MemgraphIngestor instance.""" - return MagicMock(spec=MemgraphIngestor) +def mock_ingestor() -> _MockIngestor: + return _MockIngestor() def run_updater( diff --git a/codebase_rag/tests/fuzz_test_parsers.py b/codebase_rag/tests/fuzz_test_parsers.py new file mode 100644 index 000000000..d9a608887 --- /dev/null +++ b/codebase_rag/tests/fuzz_test_parsers.py @@ -0,0 +1,20 @@ +import sys + +import atheris + +from codebase_rag.language_spec import ( + get_language_for_extension, + get_language_spec, +) + + +def fuzz_language_spec(data): + fdp = atheris.FuzzedDataProvider(data) + extension = fdp.ConsumeUnicodeNoSurrogates(64) + get_language_spec(extension) + get_language_for_extension(extension) + + +if __name__ == "__main__": + atheris.Setup(sys.argv, fuzz_language_spec) + atheris.Fuzz() diff --git a/codebase_rag/tests/integration/test_cypher_queries.py b/codebase_rag/tests/integration/test_cypher_queries.py index e01415daf..4e5ee30ba 100644 --- a/codebase_rag/tests/integration/test_cypher_queries.py +++ b/codebase_rag/tests/integration/test_cypher_queries.py @@ -11,11 +11,13 @@ CYPHER_FIND_BY_QUALIFIED_NAME, CYPHER_GET_FUNCTION_SOURCE_LOCATION, build_constraint_query, + build_dead_code_query, build_merge_node_query, build_merge_relationship_query, build_nodes_by_ids_query, wrap_with_unwind, ) +from codebase_rag.types_defs import PropertyValue if TYPE_CHECKING: from codebase_rag.services.graph_service import MemgraphIngestor @@ -343,6 +345,292 @@ def test_creates_calls_relationship_with_properties( assert verify[0]["line"] == 42 +class TestBuildDeadCodeQueryUnit: + def test_include_tests_references_test_patterns(self) -> None: + query = build_dead_code_query(include_tests=True) + + assert "$test_patterns" in query + assert "$project_prefix" in query + assert "$root_decorators" in query + assert "$entry_points" in query + assert "is_exported" in query + assert "CALLS*0.." in query + # (H) test functions are roots when tests are included + assert "n.path CONTAINS" in query + + def test_exclude_tests_omits_test_function_roots(self) -> None: + query = build_dead_code_query(include_tests=False) + + # (H) test functions are NOT roots when excluding tests ... + assert "n.path CONTAINS" not in query + # (H) ... but test_patterns still filters test modules out of the + # (H) module-load root clause so test-only code is not kept alive. + assert "$test_patterns" in query + assert "m.path CONTAINS" in query + assert "$project_prefix" in query + + def test_module_load_callees_are_roots(self) -> None: + query = build_dead_code_query(include_tests=False) + + # (H) a function called by a Module node runs at import, so it is a root + assert "Module" in query + assert "[:CALLS]-(" in query + + def test_include_classes_adds_class_candidates(self) -> None: + with_classes = build_dead_code_query(include_tests=False, include_classes=True) + assert "Function|Method|Class" in with_classes + assert "INHERITS" in with_classes + + without_classes = build_dead_code_query( + include_tests=False, include_classes=False + ) + assert "Function|Method|Class" not in without_classes + assert "INHERITS" not in without_classes + + +@pytest.mark.integration +class TestBuildDeadCodeQueryIntegration: + def _seed(self, ingestor: MemgraphIngestor) -> None: + # (H) called -> live; orphan -> dead; handler is a @task root; + # (H) routed is a @app.route root calling routed_callee (decorators are + # (H) stored @-prefixed and dotted, exactly as the parser emits them); + # (H) test_runs is a test root that calls helper (so helper is live) + ingestor._execute_query( + "CREATE " + "(m:Module {qualified_name: 'proj.mod', path: 'proj/mod.py'}), " + "(entry:Function {qualified_name: 'proj.mod.main', name: 'main', " + " start_line: 1, end_line: 3, decorators: [], path: 'proj/mod.py'}), " + "(called:Function {qualified_name: 'proj.mod.called', name: 'called', " + " start_line: 5, end_line: 7, decorators: [], path: 'proj/mod.py'}), " + "(orphan:Function {qualified_name: 'proj.mod.orphan', name: 'orphan', " + " start_line: 9, end_line: 11, decorators: [], path: 'proj/mod.py'}), " + "(handler:Function {qualified_name: 'proj.mod.handler', name: 'handler', " + " start_line: 13, end_line: 15, decorators: ['@task'], path: 'proj/mod.py'}), " + "(routed:Function {qualified_name: 'proj.mod.routed', name: 'routed', " + " start_line: 21, end_line: 23, decorators: ['@app.route'], " + " path: 'proj/mod.py'}), " + "(routed_callee:Function {qualified_name: 'proj.mod.routed_callee', " + " name: 'routed_callee', start_line: 25, end_line: 27, decorators: [], " + " path: 'proj/mod.py'}), " + "(helper:Function {qualified_name: 'proj.mod.helper', name: 'helper', " + " start_line: 17, end_line: 19, decorators: [], path: 'proj/mod.py'}), " + "(testfn:Function {qualified_name: 'proj.tests.test_runs', " + " name: 'test_runs', start_line: 1, end_line: 4, decorators: [], " + " path: 'proj/tests/test_mod.py'}), " + "(entry)-[:CALLS]->(called), " + "(routed)-[:CALLS]->(routed_callee), " + "(testfn)-[:CALLS]->(helper)" + ) + + def _params(self, include_tests: bool) -> dict[str, PropertyValue]: # noqa: ARG002 + # (H) test_patterns is always supplied; the query (built per include_tests) + # (H) decides whether it gates test-function roots or test-module filtering. + return { + "project_prefix": "proj.", + "root_decorators": ["task", "route"], + "entry_points": ["proj.mod.main"], + "test_patterns": ["test_", "_test", "conftest", "/tests/"], + } + + def test_reports_only_the_orphan_with_tests_included( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + self._seed(memgraph_ingestor) + + results = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=True), self._params(True) + ) + + names = {r["qualified_name"] for r in results} + assert names == {"proj.mod.orphan"} + + def test_excluding_tests_reports_orphan_and_test_only_code( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + self._seed(memgraph_ingestor) + + results = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=False), self._params(False) + ) + + names = {r["qualified_name"] for r in results} + # (H) without test roots, the test fn and its helper are no longer reachable + assert names == { + "proj.mod.orphan", + "proj.tests.test_runs", + "proj.mod.helper", + } + + def test_returns_row_shape(self, memgraph_ingestor: MemgraphIngestor) -> None: + self._seed(memgraph_ingestor) + + results = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=True), self._params(True) + ) + + assert len(results) == 1 + row = results[0] + assert row["label"] == "Function" + assert row["name"] == "orphan" + assert row["start_line"] == 9 + assert row["end_line"] == 11 + + def test_test_module_call_is_not_a_root_when_excluding_tests( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + # (H) a function reached only from a TEST module's top-level call must NOT + # (H) be kept alive when --no-include-tests, else test-only code hides as + # (H) live. The same call DOES keep it live when tests are included. + memgraph_ingestor._execute_query( + "CREATE " + "(tm:Module {qualified_name: 'proj.tests.test_x', " + " path: 'proj/tests/test_x.py'}), " + "(tool:Function {qualified_name: 'proj.mod.tool_only', " + " name: 'tool_only', start_line: 1, end_line: 2, decorators: [], " + " path: 'proj/mod.py'}), " + "(tm)-[:CALLS]->(tool)" + ) + params: dict[str, PropertyValue] = { + "project_prefix": "proj.", + "root_decorators": [], + "entry_points": [], + "test_patterns": ["test_", "_test", "conftest", "/tests/"], + } + + excluded = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=False), params + ) + assert {r["qualified_name"] for r in excluded} == {"proj.mod.tool_only"} + + included = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=True), params + ) + assert {r["qualified_name"] for r in included} == set() + + def test_class_candidates_when_classes_included( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + # (H) used is a module-load root that instantiates WithInit (INSTANTIATES + # (H) the class plus CALLS its __init__), NoInit (INSTANTIATES only, no + # (H) __init__) and Derived (INSTANTIATES; Derived INHERITS Base, so Base + # (H) is live too). Only DeadClass (and the orphan function) is unreachable. + memgraph_ingestor._execute_query( + "CREATE " + "(m:Module {qualified_name: 'proj.mod', path: 'proj/mod.py'}), " + "(used:Function {qualified_name: 'proj.mod.used', name: 'used', " + " start_line: 1, end_line: 2, decorators: [], path: 'proj/mod.py'}), " + "(orphan_fn:Function {qualified_name: 'proj.mod.orphan_fn', " + " name: 'orphan_fn', start_line: 4, end_line: 5, decorators: [], " + " path: 'proj/mod.py'}), " + "(wi:Class {qualified_name: 'proj.mod.WithInit', name: 'WithInit', " + " start_line: 7, end_line: 9, decorators: [], path: 'proj/mod.py'}), " + "(wii:Method {qualified_name: 'proj.mod.WithInit.__init__', " + " name: '__init__', start_line: 8, end_line: 9, decorators: [], " + " path: 'proj/mod.py'}), " + "(ni:Class {qualified_name: 'proj.mod.NoInit', name: 'NoInit', " + " start_line: 11, end_line: 12, decorators: [], path: 'proj/mod.py'}), " + "(base:Class {qualified_name: 'proj.mod.Base', name: 'Base', " + " start_line: 14, end_line: 15, decorators: [], path: 'proj/mod.py'}), " + "(der:Class {qualified_name: 'proj.mod.Derived', name: 'Derived', " + " start_line: 17, end_line: 18, decorators: [], path: 'proj/mod.py'}), " + "(dead:Class {qualified_name: 'proj.mod.DeadClass', name: 'DeadClass', " + " start_line: 20, end_line: 21, decorators: [], path: 'proj/mod.py'}), " + "(wi)-[:DEFINES_METHOD]->(wii), " + "(der)-[:INHERITS]->(base), " + "(m)-[:CALLS]->(used), " + "(used)-[:INSTANTIATES]->(wi), " + "(used)-[:CALLS]->(wii), " + "(used)-[:INSTANTIATES]->(ni), " + "(used)-[:INSTANTIATES]->(der)" + ) + params: dict[str, PropertyValue] = { + "project_prefix": "proj.", + "root_decorators": [], + "entry_points": [], + "test_patterns": ["test_", "_test", "conftest", "/tests/"], + } + + without_classes = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=False, include_classes=False), params + ) + assert {r["qualified_name"] for r in without_classes} == {"proj.mod.orphan_fn"} + + with_classes = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=False, include_classes=True), params + ) + assert {r["qualified_name"] for r in with_classes} == { + "proj.mod.orphan_fn", + "proj.mod.DeadClass", + } + + def test_subclass_only_base_is_reported_when_subclass_is_unreachable( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + # (H) Base is subclassed by Derived, but nothing instantiates Derived, so + # (H) the traversal never reaches Derived and therefore never reaches Base + # (H) via INHERITS. The whole dead cluster (both classes) is reported: a + # (H) base kept alive only by an unreachable subclass is itself dead. + # (H) Live is present purely so the query has a reachable root to anchor. + memgraph_ingestor._execute_query( + "CREATE " + "(m:Module {qualified_name: 'proj.mod', path: 'proj/mod.py'}), " + "(live:Class {qualified_name: 'proj.mod.Live', name: 'Live', " + " start_line: 1, end_line: 2, decorators: [], path: 'proj/mod.py'}), " + "(base:Class {qualified_name: 'proj.mod.Base', name: 'Base', " + " start_line: 4, end_line: 5, decorators: [], path: 'proj/mod.py'}), " + "(der:Class {qualified_name: 'proj.mod.Derived', name: 'Derived', " + " start_line: 7, end_line: 8, decorators: [], path: 'proj/mod.py'}), " + "(der)-[:INHERITS]->(base), " + "(m)-[:INSTANTIATES]->(live)" + ) + params: dict[str, PropertyValue] = { + "project_prefix": "proj.", + "root_decorators": [], + "entry_points": [], + "test_patterns": ["test_", "_test", "conftest", "/tests/"], + } + + with_classes = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=False, include_classes=True), params + ) + assert {r["qualified_name"] for r in with_classes} == { + "proj.mod.Base", + "proj.mod.Derived", + } + + def test_module_load_callee_is_a_root( + self, memgraph_ingestor: MemgraphIngestor + ) -> None: + # (H) a function called by a Module (e.g. `if __name__ == "__main__": main()` + # (H) or a bare decorator) runs at import, so it and its callees are live even + # (H) with no entry-point/decorator/export root. + memgraph_ingestor._execute_query( + "CREATE " + "(m:Module {qualified_name: 'proj.mod', path: 'proj/mod.py'}), " + "(main:Function {qualified_name: 'proj.mod.main', name: 'main', " + " start_line: 1, end_line: 2, decorators: [], path: 'proj/mod.py'}), " + "(used:Function {qualified_name: 'proj.mod.used', name: 'used', " + " start_line: 4, end_line: 5, decorators: [], path: 'proj/mod.py'}), " + "(orphan:Function {qualified_name: 'proj.mod.orphan', name: 'orphan', " + " start_line: 7, end_line: 8, decorators: [], path: 'proj/mod.py'}), " + "(m)-[:CALLS]->(main), " + "(main)-[:CALLS]->(used)" + ) + params: dict[str, PropertyValue] = { + "project_prefix": "proj.", + "root_decorators": [], + "entry_points": [], + "test_patterns": ["test_", "_test", "conftest", "/tests/"], + } + + results = memgraph_ingestor._execute_query( + build_dead_code_query(include_tests=False), params + ) + names = {r["qualified_name"] for r in results} + + assert names == {"proj.mod.orphan"} + + @pytest.mark.integration class TestBuildNodesByIdsQueryIntegration: def test_fetches_nodes_by_ids(self, memgraph_ingestor: MemgraphIngestor) -> None: diff --git a/codebase_rag/tests/integration/test_document_analyzer_integration.py b/codebase_rag/tests/integration/test_document_analyzer_integration.py deleted file mode 100644 index b1cc7f9fb..000000000 --- a/codebase_rag/tests/integration/test_document_analyzer_integration.py +++ /dev/null @@ -1,219 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from codebase_rag.constants import Provider -from codebase_rag.tools.document_analyzer import ( - DocumentAnalyzer, - create_document_analyzer_tool, -) - -pytestmark = [pytest.mark.integration] - - -@pytest.fixture -def temp_test_repo(tmp_path: Path) -> Path: - (tmp_path / "readme.txt").write_text( - "This is a README file.\nIt contains important information.", - encoding="utf-8", - ) - (tmp_path / "code.py").write_text( - "def hello():\n return 'Hello, World!'", - encoding="utf-8", - ) - (tmp_path / "data.json").write_text( - '{"name": "test", "value": 42}', - encoding="utf-8", - ) - subdir = tmp_path / "docs" - subdir.mkdir() - (subdir / "manual.txt").write_text( - "User Manual\n\n1. Getting Started\n2. Configuration", - encoding="utf-8", - ) - return tmp_path - - -@pytest.fixture -def mock_settings() -> MagicMock: - settings = MagicMock() - settings.active_orchestrator_config.provider = Provider.GOOGLE - settings.active_orchestrator_config.provider_type = "api" - settings.active_orchestrator_config.api_key = "test-api-key" - settings.active_orchestrator_config.model_id = "gemini-1.5-flash" - return settings - - -@pytest.fixture -def mock_genai_client() -> MagicMock: - client = MagicMock() - response = MagicMock() - response.text = "This is an analysis of the document." - client.models.generate_content.return_value = response - return client - - -@pytest.fixture -def analyzer_with_mock( - temp_test_repo: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, -) -> DocumentAnalyzer: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - return DocumentAnalyzer(str(temp_test_repo)) - - -class TestDocumentAnalyzerIntegration: - def test_analyze_text_file( - self, - analyzer_with_mock: DocumentAnalyzer, - mock_genai_client: MagicMock, - ) -> None: - result = analyzer_with_mock.analyze("readme.txt", "What is this file about?") - assert "analysis" in result.lower() - mock_genai_client.models.generate_content.assert_called_once() - - def test_analyze_code_file( - self, - analyzer_with_mock: DocumentAnalyzer, - mock_genai_client: MagicMock, - ) -> None: - result = analyzer_with_mock.analyze("code.py", "What does this code do?") - assert "analysis" in result.lower() - - def test_analyze_json_file( - self, - analyzer_with_mock: DocumentAnalyzer, - mock_genai_client: MagicMock, - ) -> None: - result = analyzer_with_mock.analyze("data.json", "What data is in this file?") - assert "analysis" in result.lower() - - def test_analyze_nested_file( - self, - analyzer_with_mock: DocumentAnalyzer, - mock_genai_client: MagicMock, - ) -> None: - result = analyzer_with_mock.analyze("docs/manual.txt", "Summarize this manual") - assert "analysis" in result.lower() - - def test_analyze_nonexistent_file( - self, - analyzer_with_mock: DocumentAnalyzer, - ) -> None: - result = analyzer_with_mock.analyze("nonexistent.txt", "What is this?") - assert "error" in result.lower() - assert "not found" in result.lower() - - def test_analyze_path_traversal_blocked( - self, - analyzer_with_mock: DocumentAnalyzer, - ) -> None: - result = analyzer_with_mock.analyze("../../../etc/passwd", "What is this?") - assert "security" in result.lower() - - -class TestDocumentAnalyzerToolIntegration: - def test_tool_analyzes_file( - self, - temp_test_repo: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_test_repo)) - tool = create_document_analyzer_tool(analyzer) - result = tool.function( - file_path="readme.txt", - question="What is in this file?", - ) - assert "analysis" in result.lower() - - def test_tool_handles_error( - self, - temp_test_repo: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_test_repo)) - tool = create_document_analyzer_tool(analyzer) - result = tool.function( - file_path="missing.txt", - question="What is this?", - ) - assert "error" in result.lower() - - -class TestDocumentAnalyzerWithDifferentProviders: - def test_unsupported_provider_returns_error( - self, - temp_test_repo: Path, - ) -> None: - mock_settings = MagicMock() - mock_settings.active_orchestrator_config.provider = "anthropic" - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - analyzer = DocumentAnalyzer(str(temp_test_repo)) - result = analyzer.analyze("readme.txt", "What is this?") - assert "not supported" in result.lower() - - -class TestDocumentAnalyzerResponseHandling: - def test_handles_response_with_candidates( - self, - temp_test_repo: Path, - mock_settings: MagicMock, - ) -> None: - mock_client = MagicMock() - response = MagicMock() - response.text = None - candidate = MagicMock() - part = MagicMock() - part.text = "Analysis from candidate" - candidate.content.parts = [part] - response.candidates = [candidate] - mock_client.models.generate_content.return_value = response - - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_client, - ): - analyzer = DocumentAnalyzer(str(temp_test_repo)) - result = analyzer.analyze("readme.txt", "What is this?") - assert result == "Analysis from candidate" - - def test_handles_empty_response( - self, - temp_test_repo: Path, - mock_settings: MagicMock, - ) -> None: - mock_client = MagicMock() - response = MagicMock() - response.text = None - response.candidates = None - mock_client.models.generate_content.return_value = response - - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_client, - ): - analyzer = DocumentAnalyzer(str(temp_test_repo)) - result = analyzer.analyze("readme.txt", "What is this?") - assert "no" in result.lower() and "content" in result.lower() diff --git a/codebase_rag/tests/integration/test_incremental_external_prune_e2e.py b/codebase_rag/tests/integration/test_incremental_external_prune_e2e.py new file mode 100644 index 000000000..2a392d98c --- /dev/null +++ b/codebase_rag/tests/integration/test_incremental_external_prune_e2e.py @@ -0,0 +1,57 @@ +# (H) End-to-end (real Memgraph) verification that an incremental rebuild prunes +# (H) external import-target Module nodes that are no longer imported by anyone, +# (H) e.g. an imported name renamed on a subsequent index. +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers + +if TYPE_CHECKING: + from codebase_rag.services.graph_service import MemgraphIngestor + +pytestmark = [pytest.mark.integration] + + +def _index(ingestor: MemgraphIngestor, project_path: Path, force: bool) -> None: + parsers, queries = load_parsers() + GraphUpdater( + ingestor=ingestor, + repo_path=project_path, + parsers=parsers, + queries=queries, + project_name="proj", + ).run(force=force) + + +def _external_module_qns(ingestor: MemgraphIngestor) -> set[str]: + rows = ingestor.fetch_all( + "MATCH (m:Module) WHERE m.is_external = true RETURN m.qualified_name AS qn" + ) + return {r["qn"] for r in rows if r.get("qn")} + + +def test_incremental_rebuild_prunes_orphaned_external_module( + memgraph_ingestor: MemgraphIngestor, tmp_path: Path +) -> None: + project = tmp_path / "proj" + project.mkdir() + (project / "__init__.py").touch() + client = project / "client.py" + + client.write_text("from extlib import old_thing\n\nuse = old_thing\n") + _index(memgraph_ingestor, project, force=True) + + before = _external_module_qns(memgraph_ingestor) + assert any(qn.endswith(".old_thing") for qn in before), before + + client.write_text("from extlib import new_thing\n\nuse = new_thing\n") + _index(memgraph_ingestor, project, force=False) + + after = _external_module_qns(memgraph_ingestor) + assert not any(qn.endswith(".old_thing") for qn in after), after + assert any(qn.endswith(".new_thing") for qn in after), after diff --git a/codebase_rag/tests/integration/test_node_label_e2e.py b/codebase_rag/tests/integration/test_node_label_e2e.py index f61792588..4fb10083a 100644 --- a/codebase_rag/tests/integration/test_node_label_e2e.py +++ b/codebase_rag/tests/integration/test_node_label_e2e.py @@ -16,8 +16,6 @@ SKIP_GO = "Go is in development status" SKIP_SCALA = "Scala is in development status" -SKIP_CSHARP = "C# is in development status" -SKIP_PHP = "PHP is in development status" PYTHON_CODE = """\ @@ -233,29 +231,6 @@ class MyCppClass { } """ -CSHARP_CODE = """\ -public class MyCSharpClass { - private int value; - - public MyCSharpClass() { - this.value = 0; - } - - public int GetValue() { - return this.value; - } -} - -public interface IMyInterface { - void DoSomething(); -} - -public enum Status { - Active, - Inactive -} -""" - PHP_CODE = """\ Path: return project -@pytest.fixture -def csharp_project(tmp_path: Path) -> Path: - project = tmp_path / "csharp_project" - project.mkdir() - (project / "Example.cs").write_text(CSHARP_CODE, encoding="utf-8") - return project - - @pytest.fixture def php_project(tmp_path: Path) -> Path: project = tmp_path / "php_project" @@ -617,29 +584,29 @@ def test_rust_creates_function_nodes( func_names = {n["name"] for n in functions} assert "standalone_fn" in func_names - def test_rust_creates_class_nodes_for_enums( + def test_rust_creates_enum_nodes_for_enums( self, memgraph_ingestor: MemgraphIngestor, rust_project: Path ) -> None: index_project(memgraph_ingestor, rust_project) labels = get_node_labels(memgraph_ingestor) - assert NodeLabel.CLASS.value in labels + assert NodeLabel.ENUM.value in labels - classes = get_nodes_by_label(memgraph_ingestor, NodeLabel.CLASS.value) - class_names = {n["name"] for n in classes} - assert "Status" in class_names + enums = get_nodes_by_label(memgraph_ingestor, NodeLabel.ENUM.value) + enum_names = {n["name"] for n in enums} + assert "Status" in enum_names - def test_rust_creates_class_nodes_for_traits( + def test_rust_creates_interface_nodes_for_traits( self, memgraph_ingestor: MemgraphIngestor, rust_project: Path ) -> None: index_project(memgraph_ingestor, rust_project) labels = get_node_labels(memgraph_ingestor) - assert NodeLabel.CLASS.value in labels + assert NodeLabel.INTERFACE.value in labels - classes = get_nodes_by_label(memgraph_ingestor, NodeLabel.CLASS.value) - class_names = {n["name"] for n in classes} - assert "MyTrait" in class_names + interfaces = get_nodes_by_label(memgraph_ingestor, NodeLabel.INTERFACE.value) + interface_names = {n["name"] for n in interfaces} + assert "MyTrait" in interface_names @pytest.mark.skip(reason=SKIP_GO) @@ -825,46 +792,6 @@ def test_cpp_creates_module_implementation_nodes( assert "mymodule_impl" in module_names -@pytest.mark.skip(reason=SKIP_CSHARP) -class TestCSharpNodeLabels: - def test_csharp_creates_class_nodes( - self, memgraph_ingestor: MemgraphIngestor, csharp_project: Path - ) -> None: - index_project(memgraph_ingestor, csharp_project) - - labels = get_node_labels(memgraph_ingestor) - assert NodeLabel.CLASS.value in labels - - classes = get_nodes_by_label(memgraph_ingestor, NodeLabel.CLASS.value) - class_names = {n["name"] for n in classes} - assert "MyCSharpClass" in class_names - - def test_csharp_creates_interface_nodes( - self, memgraph_ingestor: MemgraphIngestor, csharp_project: Path - ) -> None: - index_project(memgraph_ingestor, csharp_project) - - labels = get_node_labels(memgraph_ingestor) - assert NodeLabel.INTERFACE.value in labels - - interfaces = get_nodes_by_label(memgraph_ingestor, NodeLabel.INTERFACE.value) - interface_names = {n["name"] for n in interfaces} - assert "IMyInterface" in interface_names - - def test_csharp_creates_enum_nodes( - self, memgraph_ingestor: MemgraphIngestor, csharp_project: Path - ) -> None: - index_project(memgraph_ingestor, csharp_project) - - labels = get_node_labels(memgraph_ingestor) - assert NodeLabel.ENUM.value in labels - - enums = get_nodes_by_label(memgraph_ingestor, NodeLabel.ENUM.value) - enum_names = {n["name"] for n in enums} - assert "Status" in enum_names - - -@pytest.mark.skip(reason=SKIP_PHP) class TestPhpNodeLabels: def test_php_creates_class_nodes( self, memgraph_ingestor: MemgraphIngestor, php_project: Path @@ -938,8 +865,7 @@ def test_lua_creates_function_nodes( ("scala_project", SKIP_SCALA), ("java_project", None), ("cpp_project", None), - ("csharp_project", SKIP_CSHARP), - ("php_project", SKIP_PHP), + ("php_project", None), ("lua_project", None), ] diff --git a/codebase_rag/tests/integration/test_shell_command_integration.py b/codebase_rag/tests/integration/test_shell_command_integration.py index c5fda3f68..47391b6c0 100644 --- a/codebase_rag/tests/integration/test_shell_command_integration.py +++ b/codebase_rag/tests/integration/test_shell_command_integration.py @@ -1,5 +1,6 @@ from __future__ import annotations +import shutil from pathlib import Path from unittest.mock import MagicMock @@ -11,6 +12,8 @@ create_shell_command_tool, ) +_HAS_RG = shutil.which("rg") is not None + pytestmark = [pytest.mark.anyio, pytest.mark.integration] @@ -112,6 +115,7 @@ async def test_rm_removes_file( assert result.return_code == 0 assert not (temp_test_repo / "file2.py").exists() + @pytest.mark.skipif(not _HAS_RG, reason="rg (ripgrep) not installed") async def test_rg_searches_content(self, shell_commander: ShellCommander) -> None: result = await shell_commander.execute("rg hello file2.py") assert "hello" in result.stdout or result.return_code == 0 @@ -199,6 +203,7 @@ async def test_ls_pipe_head(self, shell_commander: ShellCommander) -> None: lines = result.stdout.strip().split("\n") assert len(lines) <= 2 + @pytest.mark.skipif(not _HAS_RG, reason="rg (ripgrep) not installed") async def test_cat_pipe_rg( self, shell_commander: ShellCommander, temp_test_repo: Path ) -> None: @@ -217,6 +222,7 @@ async def test_echo_pipe_wc(self, shell_commander: ShellCommander) -> None: assert result.return_code == 0 assert "3" in result.stdout + @pytest.mark.skipif(not _HAS_RG, reason="rg (ripgrep) not installed") async def test_find_pipe_rg_pipe_wc(self, shell_commander: ShellCommander) -> None: result = await shell_commander.execute("find . -name '*.py' | rg py | wc -l") assert result.return_code == 0 diff --git a/codebase_rag/tests/integration/test_tool_calling.py b/codebase_rag/tests/integration/test_tool_calling.py index 0d7c14aaa..15c524275 100644 --- a/codebase_rag/tests/integration/test_tool_calling.py +++ b/codebase_rag/tests/integration/test_tool_calling.py @@ -76,10 +76,17 @@ def log_message_history(messages: list[ModelMessage], label: str) -> None: async def run_agent_test( agent: Agent, prompt: str, tracker: ToolCallTracker, label: str ) -> tuple[list[str], list[str]]: + from pydantic_ai.exceptions import ModelHTTPError + tracker.clear() logger.info(f"\n{'#' * 60}\nRunning: {label}\nPrompt: {prompt}\n{'#' * 60}") - result = await agent.run(prompt) + try: + result = await agent.run(prompt) + except ModelHTTPError as e: + if e.status_code in (401, 403): + pytest.skip(f"Live API rejected credentials ({e.status_code}); skipping.") + raise messages = result.all_messages() log_message_history(messages, label) @@ -107,12 +114,30 @@ def tracking_tools(tracker: ToolCallTracker) -> list[Tool]: return create_tracking_tools(tracker) +def _api_key_configured() -> bool: + from codebase_rag.config import settings + + config = settings.active_orchestrator_config + key = config.api_key + if not key or not key.strip(): + return False + if key.startswith("op://"): + return False + return True + + @pytest.fixture(scope="module") def agent(tracking_tools: list[Tool]) -> Agent: + if not _api_key_configured(): + pytest.skip( + "Live orchestrator API key not resolved " + "(unset or unresolved op:// reference); skipping live API integration." + ) try: - return create_rag_orchestrator(tracking_tools) + rag_agent, _ = create_rag_orchestrator(tracking_tools) + return rag_agent except Exception as e: - pytest.skip(f"Ollama server not available: {e}") + pytest.skip(f"Orchestrator unavailable: {e}") PARALLEL_PROMPT = """Execute ALL of these tasks in parallel, not sequentially: diff --git a/codebase_rag/tests/test_absolute_path.py b/codebase_rag/tests/test_absolute_path.py new file mode 100644 index 000000000..ede90839e --- /dev/null +++ b/codebase_rag/tests/test_absolute_path.py @@ -0,0 +1,317 @@ +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from codebase_rag.tests.conftest import get_nodes, run_updater + +TS_CODE = ( + "interface Greeter {\n" + " greet(): string;\n" + "}\n\n" + "enum Direction {\n" + " Up = 'UP',\n" + " Down = 'DOWN',\n" + "}\n\n" + "class MyGreeter implements Greeter {\n" + " greet(): string { return 'hi'; }\n" + "}\n" +) + +CPP_MODULE_INTERFACE = "export module mymod;\nexport int add(int a, int b);\n" + +CPP_MODULE_IMPL = "module mymod;\nint add(int a, int b) { return a + b; }\n" + + +@pytest.fixture(scope="module") +def parsers_and_queries() -> tuple: + return load_parsers() + + +@pytest.fixture +def python_project(temp_repo: Path) -> Path: + project_path = temp_repo / "abs_path_test" + project_path.mkdir() + + pkg_dir = project_path / "mypkg" + pkg_dir.mkdir() + (pkg_dir / "__init__.py").write_text("") + + (pkg_dir / "mymodule.py").write_text( + "class MyClass:\n" + " def my_method(self):\n" + " pass\n" + "\n" + "def my_function():\n" + " pass\n" + ) + + misc_dir = project_path / "misc" + misc_dir.mkdir() + (misc_dir / "notes.txt").write_text("not a package") + + (project_path / "standalone.py").write_text("def standalone_func():\n pass\n") + + return project_path + + +class TestAbsolutePathOnNodes: + def test_file_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + file_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FILE) + assert len(file_nodes) > 0 + for node_call in file_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + assert abs_path == Path(abs_path).resolve().as_posix() + + def test_module_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + module_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE) + internal_modules = [c for c in module_nodes if not c[0][1].get("is_external")] + assert len(internal_modules) > 0 + for node_call in internal_modules: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + + def test_package_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + package_nodes = get_nodes(mock_ingestor, cs.NodeLabel.PACKAGE) + assert len(package_nodes) > 0 + for node_call in package_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + + def test_function_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + func_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FUNCTION) + assert len(func_nodes) > 0 + for node_call in func_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert cs.KEY_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + + def test_class_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + class_nodes = get_nodes(mock_ingestor, cs.NodeLabel.CLASS) + assert len(class_nodes) > 0 + for node_call in class_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert cs.KEY_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + + def test_method_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + method_nodes = get_nodes(mock_ingestor, cs.NodeLabel.METHOD) + assert len(method_nodes) > 0 + for node_call in method_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert cs.KEY_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + + def test_folder_nodes_have_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + folder_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FOLDER) + assert len(folder_nodes) > 0 + for node_call in folder_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + abs_path = props[cs.KEY_ABSOLUTE_PATH] + assert Path(abs_path).is_absolute() + + def test_absolute_path_matches_resolved_file( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + module_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE) + mymodule_nodes = [ + c for c in module_nodes if c[0][1].get(cs.KEY_NAME) == "mymodule.py" + ] + assert len(mymodule_nodes) == 1 + props = mymodule_nodes[0][0][1] + expected = (python_project / "mypkg" / "mymodule.py").resolve().as_posix() + assert props[cs.KEY_ABSOLUTE_PATH] == expected + + def test_absolute_path_is_posix_format( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + file_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FILE) + for node_call in file_nodes: + abs_path = node_call[0][1][cs.KEY_ABSOLUTE_PATH] + assert "\\" not in abs_path + + def test_project_node_has_no_absolute_path( + self, + python_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.PYTHON not in parsers_and_queries[0]: + pytest.skip("Python parser not available") + run_updater(python_project, mock_ingestor) + project_nodes = get_nodes(mock_ingestor, cs.NodeLabel.PROJECT) + assert len(project_nodes) > 0 + for node_call in project_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH not in props + + +@pytest.fixture +def ts_project(temp_repo: Path) -> Path: + project_path = temp_repo / "ts_abs_test" + project_path.mkdir() + (project_path / "types.ts").write_text(TS_CODE) + return project_path + + +class TestTypeScriptAbsolutePath: + def test_interface_nodes_have_absolute_path( + self, + ts_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.TS not in parsers_and_queries[0]: + pytest.skip("TypeScript parser not available") + run_updater(ts_project, mock_ingestor) + interface_nodes = get_nodes(mock_ingestor, cs.NodeLabel.INTERFACE) + assert len(interface_nodes) > 0 + for node_call in interface_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert Path(props[cs.KEY_ABSOLUTE_PATH]).is_absolute() + + def test_enum_nodes_have_absolute_path( + self, + ts_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.TS not in parsers_and_queries[0]: + pytest.skip("TypeScript parser not available") + run_updater(ts_project, mock_ingestor) + enum_nodes = get_nodes(mock_ingestor, cs.NodeLabel.ENUM) + assert len(enum_nodes) > 0 + for node_call in enum_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert Path(props[cs.KEY_ABSOLUTE_PATH]).is_absolute() + + +@pytest.fixture +def cpp_module_project(temp_repo: Path) -> Path: + project_path = temp_repo / "cpp_abs_test" + project_path.mkdir() + (project_path / "mymod.cppm").write_text(CPP_MODULE_INTERFACE) + (project_path / "mymod_impl.cpp").write_text(CPP_MODULE_IMPL) + return project_path + + +class TestCppModuleAbsolutePath: + def test_module_interface_nodes_have_absolute_path( + self, + cpp_module_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.CPP not in parsers_and_queries[0]: + pytest.skip("C++ parser not available") + run_updater(cpp_module_project, mock_ingestor) + mi_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE_INTERFACE) + if len(mi_nodes) == 0: + pytest.skip("No ModuleInterface nodes produced") + for node_call in mi_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert Path(props[cs.KEY_ABSOLUTE_PATH]).is_absolute() + + def test_module_implementation_nodes_have_absolute_path( + self, + cpp_module_project: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + if cs.SupportedLanguage.CPP not in parsers_and_queries[0]: + pytest.skip("C++ parser not available") + run_updater(cpp_module_project, mock_ingestor) + mi_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE_IMPLEMENTATION) + if len(mi_nodes) == 0: + pytest.skip("No ModuleImplementation nodes produced") + for node_call in mi_nodes: + props = node_call[0][1] + assert cs.KEY_ABSOLUTE_PATH in props + assert Path(props[cs.KEY_ABSOLUTE_PATH]).is_absolute() diff --git a/codebase_rag/tests/test_abstract_method_override_resolution.py b/codebase_rag/tests/test_abstract_method_override_resolution.py new file mode 100644 index 000000000..582496d24 --- /dev/null +++ b/codebase_rag/tests/test_abstract_method_override_resolution.py @@ -0,0 +1,106 @@ +# (H) L3 finding from the evals/ harness: a mixin declares an @abstractmethod stub +# (H) for a method a sibling mixin implements; self.method() dispatches to the +# (H) concrete sibling at runtime. cgr's ambiguous-name tiebreak preferred the +# (H) same-module abstract stub by import distance. A concrete implementation must +# (H) win over an abstract stub of the same name. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "pkg" + +READER_SRC = """from abc import abstractmethod + + +class ReaderMixin: + @abstractmethod + def parse(self) -> str: ... + + def read(self) -> str: + return self.parse() +""" + +PARSER_SRC = """class ParserMixin: + def parse(self) -> str: + return "parsed" +""" + +ENGINE_SRC = """from pkg.reader import ReaderMixin +from pkg.parser import ParserMixin + + +class Engine(ReaderMixin, ParserMixin): + pass +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + pkg = tmp_path / "pkg" + pkg.mkdir() + (pkg / "__init__.py").write_text("") + (pkg / "reader.py").write_text(READER_SRC) + (pkg / "parser.py").write_text(PARSER_SRC) + (pkg / "engine.py").write_text(ENGINE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=pkg, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestAbstractMethodOverrideResolution: + def test_self_call_resolves_to_concrete_sibling_not_abstract_stub( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "pkg.reader.ReaderMixin.read", + "pkg.parser.ParserMixin.parse", + ) in calls, calls + + def test_abstract_stub_is_not_the_call_target(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "pkg.reader.ReaderMixin.read", + "pkg.reader.ReaderMixin.parse", + ) not in calls, calls diff --git a/codebase_rag/tests/test_anthropic_token_counter.py b/codebase_rag/tests/test_anthropic_token_counter.py new file mode 100644 index 000000000..43ff172a1 --- /dev/null +++ b/codebase_rag/tests/test_anthropic_token_counter.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic_ai.messages import ( + ModelRequest, + ModelResponse, + RetryPromptPart, + SystemPromptPart, + ToolCallPart, +) + +from codebase_rag.services.anthropic_token_counter import ( + _to_anthropic_payload, + count_anthropic_context, +) + + +def _fake_post_returning(input_tokens: int) -> tuple[AsyncMock, MagicMock]: + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = {"input_tokens": input_tokens} + fake_post = AsyncMock(return_value=fake_response) + return fake_post, fake_response + + +@pytest.mark.asyncio +async def test_returns_zero_when_no_messages_and_no_system_prompt() -> None: + with patch("httpx.AsyncClient") as mock_client: + result = await count_anthropic_context( + api_key="k", model_id="claude-opus-4-7", messages=[] + ) + + assert result == 0 + mock_client.assert_not_called() + + +@pytest.mark.asyncio +async def test_injects_placeholder_when_only_system_prompt_present() -> None: + fake_post, _ = _fake_post_returning(input_tokens=42_000) + mock_client_instance = MagicMock() + mock_client_instance.__aenter__ = AsyncMock(return_value=mock_client_instance) + mock_client_instance.__aexit__ = AsyncMock(return_value=None) + mock_client_instance.post = fake_post + + messages = [ + ModelRequest(parts=[SystemPromptPart(content="GIANT SYSTEM PROMPT BODY")]) + ] + + with patch("httpx.AsyncClient", return_value=mock_client_instance): + result = await count_anthropic_context( + api_key="k", model_id="claude-opus-4-7", messages=messages + ) + + assert result == 42_000 + payload: dict[str, Any] = fake_post.call_args.kwargs["json"] + assert payload["system"] == "GIANT SYSTEM PROMPT BODY" + assert payload["messages"] + assert payload["messages"][0]["role"] == "user" + placeholder_text = payload["messages"][0]["content"][0]["text"] + assert placeholder_text.strip(), "placeholder must be non-whitespace" + + +def test_retry_prompt_with_tool_name_becomes_tool_result_error_block() -> None: + tool_call_id = "toolu_test123" + messages = [ + ModelResponse( + parts=[ + ToolCallPart( + tool_name="semantic_search", + args={"query": "x"}, + tool_call_id=tool_call_id, + ) + ] + ), + ModelRequest( + parts=[ + RetryPromptPart( + content="bad args", + tool_name="semantic_search", + tool_call_id=tool_call_id, + ) + ] + ), + ] + + _, anthropic_messages = _to_anthropic_payload(messages) + + assert len(anthropic_messages) == 2 + assistant = anthropic_messages[0] + user = anthropic_messages[1] + assert assistant["role"] == "assistant" + assert assistant["content"][0]["type"] == "tool_use" + assert assistant["content"][0]["id"] == tool_call_id + assert user["role"] == "user" + assert user["content"][0]["type"] == "tool_result" + assert user["content"][0]["tool_use_id"] == tool_call_id + assert user["content"][0]["is_error"] is True + + +def test_retry_prompt_without_tool_name_becomes_text_block() -> None: + messages = [ + ModelRequest(parts=[RetryPromptPart(content="please retry")]), + ] + + _, anthropic_messages = _to_anthropic_payload(messages) + + assert len(anthropic_messages) == 1 + assert anthropic_messages[0]["role"] == "user" + assert anthropic_messages[0]["content"][0]["type"] == "text" + assert "please retry" in anthropic_messages[0]["content"][0]["text"] diff --git a/codebase_rag/tests/test_c_language.py b/codebase_rag/tests/test_c_language.py new file mode 100644 index 000000000..e8253c6be --- /dev/null +++ b/codebase_rag/tests/test_c_language.py @@ -0,0 +1,371 @@ +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.tests.conftest import ( + get_node_names, + get_nodes, + get_relationships, + run_updater, +) + + +@pytest.fixture +def c_project(temp_repo: Path) -> Path: + project_path = temp_repo / "c_test_project" + project_path.mkdir() + + (project_path / "Makefile").write_text("all:\n\tgcc -o main main.c\n") + + (project_path / "main.c").write_text( + '#include "utils.h"\n' + "#include \n" + "\n" + "void greet(void) {\n" + ' printf("Hello\\n");\n' + "}\n" + "\n" + "int add(int a, int b) {\n" + " return a + b;\n" + "}\n" + "\n" + "int* get_ptr(void) {\n" + " static int x = 42;\n" + " return &x;\n" + "}\n" + "\n" + "int main(void) {\n" + " greet();\n" + " int result = add(1, 2);\n" + " int* p = get_ptr();\n" + " return 0;\n" + "}\n" + ) + + (project_path / "utils.h").write_text( + "#ifndef UTILS_H\n" + "#define UTILS_H\n" + "\n" + "int add(int a, int b);\n" + "void greet(void);\n" + "\n" + "#endif\n" + ) + + (project_path / "types.c").write_text( + "struct Point {\n" + " int x;\n" + " int y;\n" + "};\n" + "\n" + "union Value {\n" + " int i;\n" + " float f;\n" + "};\n" + "\n" + "enum Color {\n" + " RED,\n" + " GREEN,\n" + " BLUE\n" + "};\n" + ) + + return project_path + + +@pytest.fixture +def c_subdir_project(temp_repo: Path) -> Path: + project_path = temp_repo / "c_subdir_project" + project_path.mkdir() + + (project_path / "CMakeLists.txt").write_text( + "cmake_minimum_required(VERSION 3.10)\nproject(myapp)\n" + ) + + src_dir = project_path / "src" + src_dir.mkdir() + (src_dir / "Makefile").write_text("all:\n\tgcc -o app app.c\n") + + (src_dir / "app.c").write_text( + "void run(void) {}\n\nint main(void) {\n run();\n return 0;\n}\n" + ) + + return project_path + + +class TestCFunctionNodes: + def test_simple_function_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_names = get_node_names(mock_ingestor, cs.NodeLabel.FUNCTION) + assert any("add" in name for name in func_names) + + def test_void_function_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_names = get_node_names(mock_ingestor, cs.NodeLabel.FUNCTION) + assert any("greet" in name for name in func_names) + + def test_pointer_return_function_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_names = get_node_names(mock_ingestor, cs.NodeLabel.FUNCTION) + assert any("get_ptr" in name for name in func_names) + + def test_main_function_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_names = get_node_names(mock_ingestor, cs.NodeLabel.FUNCTION) + assert any("main" in name for name in func_names) + + def test_function_with_parameters( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FUNCTION) + add_nodes = [ + n for n in func_nodes if "add" in n[0][1].get(cs.KEY_QUALIFIED_NAME, "") + ] + assert len(add_nodes) > 0 + + +class TestCStructNodes: + def test_struct_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + class_names = get_node_names(mock_ingestor, cs.NodeLabel.CLASS) + assert any("Point" in name for name in class_names) + + def test_struct_has_qualified_name( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + class_nodes = get_nodes(mock_ingestor, cs.NodeLabel.CLASS) + point_nodes = [ + n for n in class_nodes if "Point" in n[0][1].get(cs.KEY_QUALIFIED_NAME, "") + ] + assert len(point_nodes) > 0 + qn = point_nodes[0][0][1][cs.KEY_QUALIFIED_NAME] + assert "." in qn + + +class TestCUnionNodes: + def test_union_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + union_names = get_node_names(mock_ingestor, cs.NodeLabel.UNION) + class_names = get_node_names(mock_ingestor, cs.NodeLabel.CLASS) + all_names = union_names | class_names + assert any("Value" in name for name in all_names) + + +class TestCEnumNodes: + def test_enum_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + enum_names = get_node_names(mock_ingestor, cs.NodeLabel.ENUM) + class_names = get_node_names(mock_ingestor, cs.NodeLabel.CLASS) + all_names = enum_names | class_names + assert any("Color" in name for name in all_names) + + +class TestCCallsRelationships: + def test_function_call_detected( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + calls = get_relationships(mock_ingestor, str(cs.RelationshipType.CALLS)) + assert len(calls) > 0 + + def test_main_calls_greet( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + calls = get_relationships(mock_ingestor, str(cs.RelationshipType.CALLS)) + call_pairs = [] + for c in calls: + src = c.args[0] if c.args else c[0][0] + tgt = c.args[2] if len(c.args) > 2 else c[0][2] + if isinstance(src, tuple) and isinstance(tgt, tuple): + call_pairs.append((src, tgt)) + found_greet = any( + "main" in str(src) and "greet" in str(tgt) for src, tgt in call_pairs + ) + assert found_greet + + def test_multiple_calls_from_main( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + calls = get_relationships(mock_ingestor, str(cs.RelationshipType.CALLS)) + main_calls = [ + c for c in calls if "main" in str(c.args[0] if c.args else c[0][0]) + ] + assert len(main_calls) >= 2 + + +class TestCDefinesRelationships: + def test_module_defines_functions( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + defines = get_relationships(mock_ingestor, str(cs.RelationshipType.DEFINES)) + assert len(defines) > 0 + + def test_main_module_defines_add( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + defines = get_relationships(mock_ingestor, str(cs.RelationshipType.DEFINES)) + found = any("add" in str(d) for d in defines) + assert found + + +class TestCImportsRelationships: + def test_include_creates_external_module( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + module_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE) + external_modules = [n for n in module_nodes if n[0][1].get(cs.KEY_IS_EXTERNAL)] + has_stdio = any("stdio" in str(n) for n in external_modules) + has_utils = any( + "utils" in n[0][1].get(cs.KEY_QUALIFIED_NAME, "") for n in module_nodes + ) + assert has_stdio or has_utils + + def test_include_utils_h_module_exists( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + module_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE) + module_qnames = {n[0][1].get(cs.KEY_QUALIFIED_NAME, "") for n in module_nodes} + assert any("utils" in qn for qn in module_qnames) + + +class TestCFileAndModuleNodes: + def test_c_file_nodes_created( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + file_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FILE) + file_paths = {n[0][1].get(cs.KEY_PATH, "") for n in file_nodes} + assert any("main.c" in p for p in file_paths) + assert any("types.c" in p for p in file_paths) + + def test_c_module_nodes_created( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + module_nodes = get_nodes(mock_ingestor, cs.NodeLabel.MODULE) + module_names = {n[0][1].get(cs.KEY_QUALIFIED_NAME, "") for n in module_nodes} + assert any("main" in name for name in module_names) + + def test_header_file_node_created( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + file_nodes = get_nodes(mock_ingestor, cs.NodeLabel.FILE) + file_paths = {n[0][1].get(cs.KEY_PATH, "") for n in file_nodes} + assert any("utils.h" in p for p in file_paths) + + +class TestCQualifiedNames: + def test_function_qualified_name_has_project( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_names = get_node_names(mock_ingestor, cs.NodeLabel.FUNCTION) + for name in func_names: + assert "." in name, f"Qualified name should contain '.': {name}" + + def test_function_qualified_name_format( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + func_names = get_node_names(mock_ingestor, cs.NodeLabel.FUNCTION) + add_names = [n for n in func_names if "add" in n] + assert len(add_names) > 0 + parts = add_names[0].split(".") + assert len(parts) >= 2 + + +class TestCPackageDetection: + def test_makefile_creates_package( + self, + c_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_project, mock_ingestor, skip_if_missing="c") + package_nodes = get_nodes(mock_ingestor, cs.NodeLabel.PACKAGE) + assert len(package_nodes) > 0 + + def test_cmakelists_creates_package( + self, + c_subdir_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_subdir_project, mock_ingestor, skip_if_missing="c") + package_nodes = get_nodes(mock_ingestor, cs.NodeLabel.PACKAGE) + assert len(package_nodes) > 0 + + def test_subdirectory_with_makefile_is_package( + self, + c_subdir_project: Path, + mock_ingestor: MagicMock, + ) -> None: + run_updater(c_subdir_project, mock_ingestor, skip_if_missing="c") + package_nodes = get_nodes(mock_ingestor, cs.NodeLabel.PACKAGE) + package_qnames = {n[0][1].get(cs.KEY_QUALIFIED_NAME, "") for n in package_nodes} + assert any("src" in qn for qn in package_qnames) diff --git a/codebase_rag/tests/test_c_retrieval_eval.py b/codebase_rag/tests/test_c_retrieval_eval.py new file mode 100644 index 000000000..9ed111598 --- /dev/null +++ b/codebase_rag/tests/test_c_retrieval_eval.py @@ -0,0 +1,70 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.c_retrieval import ( + cgr_c_call_edges, + oracle_c_call_edges, + score_c_retrieval, +) +from evals.oracles import cpp_available + +needs_clang = pytest.mark.skipif( + not cpp_available(), reason="libclang (clang.cindex) not importable" +) + + +def _make_project(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.h").write_text( + "int add(int a, int b);\nint mul(int a, int b);\nint orphan(void);\n", + encoding="utf-8", + ) + (root / "util.c").write_text( + '#include "util.h"\n' + "int add(int a, int b) { return a + b; }\n" + "int mul(int a, int b) { return a * b; }\n" + "int orphan(void) { return 9; }\n", + encoding="utf-8", + ) + # (H) No system #includes: the fixture parses cleanly regardless of whether an + # (H) SDK sysroot is discoverable, so coverage is deterministic in any CI. + (root / "main.c").write_text( + '#include "util.h"\n' + "static int compute(int x, int y) { return add(x, y) + mul(x, y); }\n" + "int main(void) { return compute(2, 3); }\n", + encoding="utf-8", + ) + + +@needs_clang +def test_oracle_captures_first_party_c_calls(tmp_path: Path) -> None: + _make_project(tmp_path) + edges, declared, covered = oracle_c_call_edges(tmp_path) + + # (H) add(), mul() (in compute), compute() (in main) are first-party calls. + assert ("main.c", "add") in edges + assert ("main.c", "mul") in edges + assert ("main.c", "compute") in edges + # (H) orphan is defined but never called -> never a call edge. + assert ("util.c", "orphan") not in edges + assert {"add", "mul", "compute", "main", "orphan"} <= declared + # (H) Both header-free sources parse cleanly, so both are graded. + assert {"main.c", "util.c"} <= covered + + +@needs_clang +def test_cgr_matches_oracle_on_clean_c_project(tmp_path: Path) -> None: + _make_project(tmp_path) + oracle, declared, covered = oracle_c_call_edges(tmp_path) + cgr = cgr_c_call_edges(tmp_path, tmp_path.name, declared, covered) + assert cgr == oracle + + +def test_score_c_retrieval_prf() -> None: + result = score_c_retrieval( + {("a.c", "f"), ("a.c", "g")}, {("a.c", "f"), ("b.c", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.C_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_call_processor.py b/codebase_rag/tests/test_call_processor.py index a6ae5cc34..4cab76cfd 100644 --- a/codebase_rag/tests/test_call_processor.py +++ b/codebase_rag/tests/test_call_processor.py @@ -1153,8 +1153,10 @@ def test_logs_error_on_processing_failure( tree = parser.parse(b"def foo(): pass") root_node = tree.root_node + from codebase_rag.parsers.call_processor import CallProcessor + with patch.object( - call_processor, + CallProcessor, "_process_calls_in_functions", side_effect=RuntimeError("Simulated failure"), ): @@ -1166,9 +1168,9 @@ def test_logs_error_on_processing_failure( queries, ) mock_logger.error.assert_called_once() - error_call_args = mock_logger.error.call_args[0][0] - assert "test_module.py" in error_call_args - assert "Simulated failure" in error_call_args + error_call_args = mock_logger.error.call_args + assert "test_module.py" in str(error_call_args) + assert "Simulated failure" in str(error_call_args) def test_continues_after_error_in_single_file( self, @@ -1195,8 +1197,10 @@ def test_continues_after_error_in_single_file( tree = parser.parse(b"def foo(): pass") root_node = tree.root_node + from codebase_rag.parsers.call_processor import CallProcessor + with patch.object( - call_processor, + CallProcessor, "_process_calls_in_functions", side_effect=ValueError("Test exception"), ): @@ -1206,3 +1210,452 @@ def test_continues_after_error_in_single_file( cs.SupportedLanguage.PYTHON, queries, ) + + +class TestCallProcessorSlots: + def test_has_slots(self) -> None: + from codebase_rag.parsers.call_processor import CallProcessor + + assert hasattr(CallProcessor, "__slots__") + + def test_no_instance_dict(self, call_processor: CallProcessor) -> None: + assert not hasattr(call_processor, "__dict__") + + def test_rejects_arbitrary_attribute(self, call_processor: CallProcessor) -> None: + with pytest.raises(AttributeError): + call_processor.nonexistent_attr = 42 + + def test_slot_attributes_accessible(self, call_processor: CallProcessor) -> None: + assert hasattr(call_processor, "ingestor") + assert hasattr(call_processor, "repo_path") + assert hasattr(call_processor, "project_name") + assert hasattr(call_processor, "_resolver") + + +class TestCollectAllCallNodes: + def test_returns_empty_when_no_calls_query( + self, + call_processor: CallProcessor, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + code = "x = 1" + root = parse_code(code, cs.SupportedLanguage.PYTHON, parsers) + + empty_queries: dict = {cs.SupportedLanguage.PYTHON: {cs.QUERY_CALLS: None}} + call_nodes, call_starts = call_processor._collect_all_call_nodes( + root, cs.SupportedLanguage.PYTHON, empty_queries + ) + assert call_nodes == [] + assert call_starts == [] + + def test_returns_call_nodes_for_code_with_calls( + self, + call_processor: CallProcessor, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + code = "foo()\nbar()" + root = parse_code(code, cs.SupportedLanguage.PYTHON, parsers) + call_nodes, call_starts = call_processor._collect_all_call_nodes( + root, cs.SupportedLanguage.PYTHON, queries + ) + assert len(call_nodes) >= 2 + assert len(call_starts) == len(call_nodes) + assert all(isinstance(s, int) for s in call_starts) + + +class TestFilterCallsInNode: + def test_filters_calls_within_container( + self, + call_processor: CallProcessor, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + code = """ +def outer(): + foo() + +def other(): + bar() +""" + root = parse_code(code, cs.SupportedLanguage.PYTHON, parsers) + all_call_nodes, call_starts = call_processor._collect_all_call_nodes( + root, cs.SupportedLanguage.PYTHON, queries + ) + assert len(all_call_nodes) >= 2 + + outer_func = find_first_node_of_type(root, "function_definition") + assert outer_func is not None + + filtered = call_processor._filter_calls_in_node( + all_call_nodes, call_starts, outer_func + ) + assert len(filtered) == 1 + + +class TestProcessCallsInFileWithoutCache: + def test_process_calls_without_func_class_captures_cache( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + test_file = temp_repo / "test_module.py" + test_file.write_text(encoding="utf-8", data="def foo(): bar()") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(b"def foo(): bar()") + root_node = tree.root_node + + cp.process_calls_in_file( + test_file, + root_node, + cs.SupportedLanguage.PYTHON, + queries, + func_class_captures_cache=None, + ) + + def test_process_calls_with_empty_combined_captures( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + test_file = temp_repo / "test_module.py" + test_file.write_text(encoding="utf-8", data="x = 1") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(b"x = 1") + root_node = tree.root_node + + from codebase_rag.parser_loader import COMBINED_FUNC_CLASS_QUERIES + + original = COMBINED_FUNC_CLASS_QUERIES.get(cs.SupportedLanguage.PYTHON) + try: + COMBINED_FUNC_CLASS_QUERIES[cs.SupportedLanguage.PYTHON] = None + cp.process_calls_in_file( + test_file, + root_node, + cs.SupportedLanguage.PYTHON, + queries, + func_class_captures_cache=None, + ) + finally: + if original is not None: + COMBINED_FUNC_CLASS_QUERIES[cs.SupportedLanguage.PYTHON] = original + + +class TestProcessCallsInFunctionsWithoutCombined: + def test_without_combined_captures( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + code = "def foo(): bar()" + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(code.encode(cs.ENCODING_UTF8)) + root_node = tree.root_node + + cp._process_calls_in_functions( + root_node, + "proj.module", + cs.SupportedLanguage.PYTHON, + queries, + combined_captures=None, + ) + + def test_without_combined_captures_no_functions( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + code = "x = 1" + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(code.encode(cs.ENCODING_UTF8)) + root_node = tree.root_node + + cp._process_calls_in_functions( + root_node, + "proj.module", + cs.SupportedLanguage.PYTHON, + queries, + combined_captures=None, + ) + + +class TestProcessCallsInClassesWithoutCombined: + def test_without_combined_captures( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + code = """ +class MyClass: + def method(self): + foo() +""" + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(code.encode(cs.ENCODING_UTF8)) + root_node = tree.root_node + + cp._process_calls_in_classes( + root_node, + "proj.module", + cs.SupportedLanguage.PYTHON, + queries, + combined_captures=None, + ) + + +class TestProcessMethodsInClassWithoutSortedFuncNodes: + def test_without_sorted_func_nodes( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + code = """ +class MyClass: + def method(self): + foo() +""" + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(code.encode(cs.ENCODING_UTF8)) + root_node = tree.root_node + + class_node = find_first_node_of_type(root_node, "class_definition") + assert class_node is not None + body_node = class_node.child_by_field_name("body") + assert body_node is not None + + cp._process_methods_in_class( + body_node, + "proj.module.MyClass", + "proj.module", + cs.SupportedLanguage.PYTHON, + queries, + sorted_func_nodes=None, + func_node_starts=None, + ) + + +class TestIngestFunctionCallsWithoutCallNodes: + def test_without_call_nodes( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + code = "def foo(): bar()" + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(code.encode(cs.ENCODING_UTF8)) + root_node = tree.root_node + + cp._ingest_function_calls( + root_node, + "proj.module.foo", + cs.NodeLabel.FUNCTION, + "proj.module", + cs.SupportedLanguage.PYTHON, + queries, + call_nodes=None, + ) + + def test_without_call_nodes_and_no_query( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + cp = updater.factory.call_processor + + code = "x = 1" + parser = parsers[cs.SupportedLanguage.PYTHON] + tree = parser.parse(code.encode(cs.ENCODING_UTF8)) + root_node = tree.root_node + + empty_queries: dict = { + cs.SupportedLanguage.PYTHON: {cs.QUERY_CALLS: None, cs.QUERY_CONFIG: queries[cs.SupportedLanguage.PYTHON][cs.QUERY_CONFIG]} + } + cp._ingest_function_calls( + root_node, + "proj.module.foo", + cs.NodeLabel.FUNCTION, + "proj.module", + cs.SupportedLanguage.PYTHON, + empty_queries, + call_nodes=None, + ) + + +class TestCombinedQueryCompilationExceptionPaths: + def test_combined_func_class_query_exception_sets_none( + self, + parsers_and_queries: tuple, + ) -> None: + from tree_sitter import Query as RealQuery + + from codebase_rag.parser_loader import ( + COMBINED_FUNC_CLASS_IMPORT_QUERIES, + COMBINED_FUNC_CLASS_QUERIES, + _create_language_queries, + ) + + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + lang_queries = queries[cs.SupportedLanguage.PYTHON] + language_obj = lang_queries[cs.QUERY_LANGUAGE] + parser = parsers[cs.SupportedLanguage.PYTHON] + lang_config = lang_queries[cs.QUERY_CONFIG] + + call_count = 0 + + def patched_query(language, pattern): + nonlocal call_count + call_count += 1 + if call_count <= 2: + raise RuntimeError("simulated combined query failure") + return RealQuery(language, pattern) + + original_fc = COMBINED_FUNC_CLASS_QUERIES.get(cs.SupportedLanguage.PYTHON) + original_fci = COMBINED_FUNC_CLASS_IMPORT_QUERIES.get(cs.SupportedLanguage.PYTHON) + try: + with patch("codebase_rag.parser_loader.Query", side_effect=patched_query): + _create_language_queries( + language_obj, parser, lang_config, cs.SupportedLanguage.PYTHON + ) + assert COMBINED_FUNC_CLASS_QUERIES[cs.SupportedLanguage.PYTHON] is None + assert COMBINED_FUNC_CLASS_IMPORT_QUERIES[cs.SupportedLanguage.PYTHON] is None + finally: + if original_fc is not None: + COMBINED_FUNC_CLASS_QUERIES[cs.SupportedLanguage.PYTHON] = original_fc + if original_fci is not None: + COMBINED_FUNC_CLASS_IMPORT_QUERIES[cs.SupportedLanguage.PYTHON] = original_fci + + +class TestGetRustImplClassName: + def test_rust_impl_fallback_to_children( + self, + call_processor: CallProcessor, + parsers_and_queries: tuple, + ) -> None: + parsers, _ = parsers_and_queries + if cs.SupportedLanguage.RUST not in parsers: + pytest.skip("Rust parser not available") + + code = "impl MyStruct { fn foo(&self) {} }" + root = parse_code(code, cs.SupportedLanguage.RUST, parsers) + impl_node = find_first_node_of_type(root, "impl_item") + assert impl_node is not None + + result = call_processor._get_rust_impl_class_name(impl_node) + assert result is not None diff --git a/codebase_rag/tests/test_call_processor_integration.py b/codebase_rag/tests/test_call_processor_integration.py index e388b96c4..b3b326ba7 100644 --- a/codebase_rag/tests/test_call_processor_integration.py +++ b/codebase_rag/tests/test_call_processor_integration.py @@ -793,7 +793,11 @@ def with_value(self, value): def build(self): return {} +def helper(): + pass + def main(): + helper() result = Builder().with_name("test").with_value(42).build() return result """, @@ -814,6 +818,10 @@ def main(): ] assert len(calls) >= 1 + # (H) Builder() is a class instantiation, not a function call + class_targets = [c for c in calls if c.args[2][0] == cs.NodeLabel.CLASS] + assert len(class_targets) == 0 + def test_handles_init_py_module_qn( self, temp_repo: Path, @@ -853,3 +861,90 @@ def package_func(): caller_qns = [c.args[0][2] for c in calls] package_callers = [qn for qn in caller_qns if "mypackage" in qn] assert len(package_callers) >= 1 + + +class TestModuleCallsClassFiltered: + def test_module_does_not_call_class_python( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + test_file = temp_repo / "test_module.py" + test_file.write_text( + encoding="utf-8", + data=""" +class MyClass: + def method(self): + pass + +def helper(): + pass + +helper() +""", + ) + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + updater.run() + + calls = [ + c + for c in mock_ingestor.ensure_relationship_batch.call_args_list + if c.args[1] == cs.RelationshipType.CALLS + ] + + class_targets = [c for c in calls if c.args[2][0] == cs.NodeLabel.CLASS] + assert class_targets == [] + + helper_calls = [c for c in calls if "helper" in c.args[2][2]] + assert len(helper_calls) >= 1 + + def test_function_does_not_call_class_python( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple, + ) -> None: + parsers, queries = parsers_and_queries + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + test_file = temp_repo / "test_module.py" + test_file.write_text( + encoding="utf-8", + data=""" +class MyClass: + pass + +def factory(): + obj = MyClass() + return obj +""", + ) + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + updater.run() + + calls = [ + c + for c in mock_ingestor.ensure_relationship_batch.call_args_list + if c.args[1] == cs.RelationshipType.CALLS + ] + + class_targets = [c for c in calls if c.args[2][0] == cs.NodeLabel.CLASS] + assert class_targets == [] diff --git a/codebase_rag/tests/test_call_resolver.py b/codebase_rag/tests/test_call_resolver.py index da4108f95..84d8151c5 100644 --- a/codebase_rag/tests/test_call_resolver.py +++ b/codebase_rag/tests/test_call_resolver.py @@ -24,6 +24,9 @@ class MockFunctionRegistry: def __init__(self) -> None: self._data: dict[QualifiedName, NodeType] = {} self._suffix_index: dict[str, list[QualifiedName]] = defaultdict(list) + self._properties: set[QualifiedName] = set() + self._property_names: set[str] = set() + self._abstracts: set[QualifiedName] = set() def __contains__(self, qn: QualifiedName) -> bool: return qn in self._data @@ -56,6 +59,28 @@ def find_with_prefix(self, prefix: str) -> list[tuple[QualifiedName, NodeType]]: def find_ending_with(self, suffix: str) -> list[QualifiedName]: return self._suffix_index.get(suffix, []) + def register_unique_qn(self, natural_qn: QualifiedName, start_line: int) -> str: + return natural_qn + + def variants(self, qn: QualifiedName) -> list[QualifiedName]: + return [qn] + + def mark_property(self, qn: QualifiedName) -> None: + self._properties.add(qn) + self._property_names.add(qn.rsplit(cs.SEPARATOR_DOT, 1)[-1]) + + def is_property(self, qn: QualifiedName) -> bool: + return qn in self._properties + + def property_names(self) -> set[str]: + return self._property_names + + def mark_abstract(self, qn: QualifiedName) -> None: + self._abstracts.add(qn) + + def is_abstract(self, qn: QualifiedName) -> bool: + return qn in self._abstracts + @pytest.fixture def mock_function_registry() -> MockFunctionRegistry: @@ -1024,3 +1049,356 @@ def test_falls_back_to_trie(self, call_resolver: CallResolver) -> None: def test_returns_none_for_unknown(self, call_resolver: CallResolver) -> None: result = call_resolver.resolve_function_call("unknown_func", "proj.module") assert result is None + + +class TestDequeBfs: + def test_bfs_order_prefers_closer_parent(self, call_resolver: CallResolver) -> None: + call_resolver.function_registry["proj.base.ParentA.method"] = NodeType.METHOD + call_resolver.function_registry["proj.base.ParentB.method"] = NodeType.METHOD + call_resolver.class_inheritance["proj.module.Child"] = [ + "proj.base.ParentA", + "proj.base.ParentB", + ] + + result = call_resolver._resolve_inherited_method("proj.module.Child", "method") + assert result is not None + assert result[1] == "proj.base.ParentA.method" + + def test_bfs_finds_deep_ancestor_method(self, call_resolver: CallResolver) -> None: + call_resolver.function_registry["proj.base.Root.deep_method"] = NodeType.METHOD + call_resolver.class_inheritance["proj.module.Child"] = ["proj.mid.Middle"] + call_resolver.class_inheritance["proj.mid.Middle"] = ["proj.base.Root"] + + result = call_resolver._resolve_inherited_method( + "proj.module.Child", "deep_method" + ) + assert result is not None + assert result[1] == "proj.base.Root.deep_method" + + def test_bfs_no_infinite_loop_on_cycle(self, call_resolver: CallResolver) -> None: + call_resolver.class_inheritance["proj.A"] = ["proj.B"] + call_resolver.class_inheritance["proj.B"] = ["proj.A"] + + result = call_resolver._resolve_inherited_method("proj.A", "missing") + assert result is None + + +class TestSeparatorPattern: + def test_splits_on_dot(self) -> None: + from codebase_rag.parsers.call_resolver import _SEPARATOR_PATTERN + + assert _SEPARATOR_PATTERN.split("a.b.c") == ["a", "b", "c"] + + def test_splits_on_colon(self) -> None: + from codebase_rag.parsers.call_resolver import _SEPARATOR_PATTERN + + assert _SEPARATOR_PATTERN.split("module:func") == ["module", "func"] + + def test_splits_on_double_colon(self) -> None: + from codebase_rag.parsers.call_resolver import _SEPARATOR_PATTERN + + assert _SEPARATOR_PATTERN.split("crate::module::func") == [ + "crate", + "", + "module", + "", + "func", + ] + + def test_no_separator_returns_single_element(self) -> None: + from codebase_rag.parsers.call_resolver import _SEPARATOR_PATTERN + + assert _SEPARATOR_PATTERN.split("simple") == ["simple"] + + def test_last_element_matches_function_name(self) -> None: + from codebase_rag.parsers.call_resolver import _SEPARATOR_PATTERN + + assert _SEPARATOR_PATTERN.split("a.b.func")[-1] == "func" + assert _SEPARATOR_PATTERN.split("module:method")[-1] == "method" + + +class TestChainedMethodPattern: + def test_matches_final_method(self) -> None: + from codebase_rag.parsers.call_resolver import _CHAINED_METHOD_PATTERN + + match = _CHAINED_METHOD_PATTERN.search("obj.method().next") + assert match is not None + assert match[1] == "next" + + def test_no_match_on_parenthesized_suffix(self) -> None: + from codebase_rag.parsers.call_resolver import _CHAINED_METHOD_PATTERN + + match = _CHAINED_METHOD_PATTERN.search("obj.method()") + assert match is None + + def test_matches_deeply_chained(self) -> None: + from codebase_rag.parsers.call_resolver import _CHAINED_METHOD_PATTERN + + match = _CHAINED_METHOD_PATTERN.search("a.b().c().final_method") + assert match is not None + assert match[1] == "final_method" + + +class TestDeterministicResolution: + def test_trie_tiebreak_by_qualified_name(self, call_resolver: CallResolver) -> None: + # (H) Register multiple functions with the same simple name in different modules + # (H) at equal import distance from the caller + call_resolver.function_registry["proj.alpha.utils.helper"] = NodeType.FUNCTION + call_resolver.function_registry["proj.beta.utils.helper"] = NodeType.FUNCTION + call_resolver.function_registry["proj.gamma.utils.helper"] = NodeType.FUNCTION + + results = [] + for _ in range(20): + result = call_resolver._try_resolve_via_trie("helper", "proj.delta.module") + assert result is not None + results.append(result[1]) + + # (H) All 20 runs must resolve to the same candidate (lexicographically first) + assert all(r == results[0] for r in results) + assert results[0] == "proj.alpha.utils.helper" + + def test_trie_tiebreak_picks_lexicographic_first( + self, call_resolver: CallResolver + ) -> None: + # (H) Deliberately insert in reverse lexicographic order + call_resolver.function_registry["proj.zoo.compute"] = NodeType.FUNCTION + call_resolver.function_registry["proj.mid.compute"] = NodeType.FUNCTION + call_resolver.function_registry["proj.aaa.compute"] = NodeType.FUNCTION + + result = call_resolver._try_resolve_via_trie("compute", "other.module") + assert result is not None + assert result[1] == "proj.aaa.compute" + + def test_trie_tiebreak_distance_still_wins( + self, call_resolver: CallResolver + ) -> None: + # (H) Closer module should win even if lexicographically later + call_resolver.function_registry["proj.far.away.process"] = NodeType.FUNCTION + call_resolver.function_registry["proj.module.process"] = NodeType.FUNCTION + + result = call_resolver._try_resolve_via_trie("process", "proj.module.caller") + assert result is not None + # (H) proj.module.process is closer to proj.module.caller + assert result[1] == "proj.module.process" + + def test_trie_many_candidates_deterministic( + self, call_resolver: CallResolver + ) -> None: + # (H) Register 10 equidistant candidates + names = [ + "proj.m09.run", + "proj.m05.run", + "proj.m01.run", + "proj.m07.run", + "proj.m03.run", + "proj.m08.run", + "proj.m02.run", + "proj.m06.run", + "proj.m04.run", + "proj.m10.run", + ] + for name in names: + call_resolver.function_registry[name] = NodeType.FUNCTION + + result = call_resolver._try_resolve_via_trie("run", "other.caller") + assert result is not None + assert result[1] == "proj.m01.run" + + def test_resolve_function_call_deterministic_across_runs( + self, call_resolver: CallResolver + ) -> None: + call_resolver.function_registry["pkg.svc_a.validate"] = NodeType.FUNCTION + call_resolver.function_registry["pkg.svc_b.validate"] = NodeType.FUNCTION + call_resolver.function_registry["pkg.svc_c.validate"] = NodeType.FUNCTION + + results = set() + for _ in range(10): + result = call_resolver.resolve_function_call( + "validate", "pkg.other.module", {}, None + ) + assert result is not None + results.add(result[1]) + + # (H) Must resolve to exactly one candidate across all runs + assert len(results) == 1 + + +class TestDeterministicFileOrder: + def test_eligible_files_are_sorted( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + + # (H) Create files in non-alphabetical order + for name in ["zebra.py", "alpha.py", "middle.py", "beta.py"]: + (temp_repo / name).write_text(f"def func_{name[0]}(): pass\n") + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + + eligible = updater._collect_eligible_files() + paths_str = [str(f) for f in eligible] + + assert paths_str == sorted(paths_str) + + def test_graph_output_deterministic_across_runs(self, temp_repo: Path) -> None: + parsers, queries = load_parsers() + + (temp_repo / "mod_a.py").write_text( + "def shared(): pass\ndef call_a(): shared()\n" + ) + (temp_repo / "mod_b.py").write_text( + "def shared(): pass\ndef call_b(): shared()\n" + ) + + results = [] + for _ in range(5): + ingestor = MagicMock() + updater = GraphUpdater( + ingestor=ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + updater.run(force=True) + + calls = [ + (c.args[0][2], c.args[1], c.args[2][2]) + for c in ingestor.ensure_relationship_batch.call_args_list + if c.args[1] == cs.RelationshipType.CALLS + ] + calls.sort() + results.append(calls) + + # (H) All 5 runs must produce identical call graphs + assert len(results[0]) > 0 + for i in range(1, len(results)): + assert results[i] == results[0] + + def _run_determinism_check(self, temp_repo: Path, runs: int = 5) -> None: + parsers, queries = load_parsers() + results = [] + for _ in range(runs): + ingestor = MagicMock() + updater = GraphUpdater( + ingestor=ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + updater.run(force=True) + + calls = [ + (c.args[0][2], c.args[2][2]) + for c in ingestor.ensure_relationship_batch.call_args_list + if c.args[1] == cs.RelationshipType.CALLS + ] + calls.sort() + results.append(calls) + + assert len(results[0]) > 0 + for i in range(1, len(results)): + assert results[i] == results[0] + + def test_javascript_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.JS not in parsers: + pytest.skip("JavaScript parser not available") + + (temp_repo / "utils.js").write_text( + "function helper() {}\nfunction worker() { helper(); }\n" + ) + (temp_repo / "main.js").write_text( + "function helper() {}\nfunction entry() { helper(); }\n" + ) + self._run_determinism_check(temp_repo) + + def test_typescript_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.TS not in parsers: + pytest.skip("TypeScript parser not available") + + (temp_repo / "service.ts").write_text( + "function validate(x: string): boolean { return true; }\n" + "function process() { validate('test'); }\n" + ) + (temp_repo / "handler.ts").write_text( + "function validate(x: string): boolean { return false; }\n" + "function handle() { validate('input'); }\n" + ) + self._run_determinism_check(temp_repo) + + def test_rust_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.RUST not in parsers: + pytest.skip("Rust parser not available") + + (temp_repo / "utils.rs").write_text( + "fn compute() -> i32 { 42 }\nfn run() { compute(); }\n" + ) + (temp_repo / "main.rs").write_text( + "fn compute() -> i32 { 0 }\nfn start() { compute(); }\n" + ) + self._run_determinism_check(temp_repo) + + def test_java_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.JAVA not in parsers: + pytest.skip("Java parser not available") + + (temp_repo / "Utils.java").write_text( + "public class Utils {\n" + " public static void process() {}\n" + " public static void run() { process(); }\n" + "}\n" + ) + (temp_repo / "Helper.java").write_text( + "public class Helper {\n" + " public static void process() {}\n" + " public static void execute() { process(); }\n" + "}\n" + ) + self._run_determinism_check(temp_repo) + + def test_cpp_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.CPP not in parsers: + pytest.skip("C++ parser not available") + + (temp_repo / "math.cpp").write_text( + "int calculate() { return 1; }\nint run() { return calculate(); }\n" + ) + (temp_repo / "logic.cpp").write_text( + "int calculate() { return 2; }\nint start() { return calculate(); }\n" + ) + self._run_determinism_check(temp_repo) + + def test_go_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.GO not in parsers: + pytest.skip("Go parser not available") + + (temp_repo / "util.go").write_text( + "package main\nfunc helper() {}\nfunc doWork() { helper() }\n" + ) + (temp_repo / "main.go").write_text( + "package main\nfunc helper() {}\nfunc run() { helper() }\n" + ) + self._run_determinism_check(temp_repo) + + def test_lua_deterministic(self, temp_repo: Path) -> None: + parsers, _ = load_parsers() + if cs.SupportedLanguage.LUA not in parsers: + pytest.skip("Lua parser not available") + + (temp_repo / "utils.lua").write_text( + "local function process() end\nlocal function run() process() end\n" + ) + (temp_repo / "main.lua").write_text( + "local function process() end\nlocal function start() process() end\n" + ) + self._run_determinism_check(temp_repo) diff --git a/codebase_rag/tests/test_callable_field_calls.py b/codebase_rag/tests/test_callable_field_calls.py new file mode 100644 index 000000000..96316ab8f --- /dev/null +++ b/codebase_rag/tests/test_callable_field_calls.py @@ -0,0 +1,132 @@ +# (H) L3 finding from the evals/ harness: fqn_config.get_name(node) invokes a +# (H) function stored in a NamedTuple Callable field (FQNSpec), where fqn_config +# (H) comes from LANGUAGE_FQN_SPECS.get(language). Every function bound to that +# (H) field at a construction site is a possible callee, so resolving to all of +# (H) them is a sound call graph and captures the traced (Python) edge. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +# (H) fetch_name is a callable field of exactly one NamedTuple, mirroring how +# (H) get_name is unique to FQNSpec, so it resolves without a receiver type. +MODULE_SRC = """from typing import Callable, NamedTuple + + +def py_name() -> str: + return "py" + + +def js_name() -> str: + return "js" + + +class Spec(NamedTuple): + fetch_name: Callable[[], str] + + +PY_SPEC = Spec(fetch_name=py_name) +JS_SPEC = Spec(fetch_name=js_name) + +SPECS = {"py": PY_SPEC, "js": JS_SPEC} + + +def use(lang: str) -> str: + spec = SPECS.get(lang) + return spec.fetch_name() +""" + +# (H) Two classes share the field name, so with no receiver type the targets are +# (H) ambiguous and must NOT be emitted (precision guard). +AMBIGUOUS_SRC = """from typing import Callable, NamedTuple + + +def a_name() -> str: + return "a" + + +def b_name() -> str: + return "b" + + +class SpecA(NamedTuple): + shared_cb: Callable[[], str] + + +class SpecB(NamedTuple): + shared_cb: Callable[[], str] + + +A = SpecA(shared_cb=a_name) +B = SpecB(shared_cb=b_name) + + +def run(flag: bool): + chosen = A if flag else B + return chosen.shared_cb() +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path, src: str) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(src) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestCallableFieldCalls: + def test_resolves_to_first_bound_function(self, tmp_path: Path) -> None: + calls = _calls(tmp_path, MODULE_SRC) + assert ("proj.m.use", "proj.m.py_name") in calls, calls + + def test_resolves_to_all_bound_functions(self, tmp_path: Path) -> None: + calls = _calls(tmp_path, MODULE_SRC) + assert ("proj.m.use", "proj.m.js_name") in calls, calls + + def test_ambiguous_field_name_not_resolved(self, tmp_path: Path) -> None: + calls = _calls(tmp_path, AMBIGUOUS_SRC) + assert ("proj.m.run", "proj.m.a_name") not in calls, calls + assert ("proj.m.run", "proj.m.b_name") not in calls, calls diff --git a/codebase_rag/tests/test_cancel_orphaned_tool_calls.py b/codebase_rag/tests/test_cancel_orphaned_tool_calls.py new file mode 100644 index 000000000..acff644a7 --- /dev/null +++ b/codebase_rag/tests/test_cancel_orphaned_tool_calls.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from pydantic_ai.messages import ( + ModelMessage, + ModelRequest, + ModelResponse, + SystemPromptPart, + TextPart, + ToolCallPart, + ToolReturnPart, + UserPromptPart, +) + +from codebase_rag import constants as cs +from codebase_rag.main import _cancel_orphaned_tool_calls + + +def test_noop_when_history_empty() -> None: + history: list[ModelMessage] = [] + _cancel_orphaned_tool_calls(history) + assert history == [] + + +def test_noop_when_last_message_is_request() -> None: + history: list[ModelMessage] = [ModelRequest(parts=[UserPromptPart(content="hi")])] + _cancel_orphaned_tool_calls(history) + assert len(history) == 1 + + +def test_noop_when_response_has_no_tool_calls() -> None: + history: list[ModelMessage] = [ + ModelRequest(parts=[SystemPromptPart(content="sys")]), + ModelResponse(parts=[TextPart(content="hello")]), + ] + _cancel_orphaned_tool_calls(history) + assert len(history) == 2 + + +def test_appends_synthetic_return_for_each_orphan_tool_call() -> None: + history: list[ModelMessage] = [ + ModelRequest(parts=[UserPromptPart(content="run stuff")]), + ModelResponse( + parts=[ + ToolCallPart( + tool_name="shell_command", + args={"command": "ls"}, + tool_call_id="call_1", + ), + ToolCallPart( + tool_name="read_file", + args={"path": "/tmp/x"}, + tool_call_id="call_2", + ), + ] + ), + ] + + _cancel_orphaned_tool_calls(history) + + assert len(history) == 3 + repaired = history[-1] + assert isinstance(repaired, ModelRequest) + returns = [p for p in repaired.parts if isinstance(p, ToolReturnPart)] + assert len(returns) == 2 + assert {r.tool_call_id for r in returns} == {"call_1", "call_2"} + for r in returns: + assert r.content == cs.MSG_TOOL_CALL_CANCELLED + + +def test_ignores_non_tool_call_parts_in_response() -> None: + history: list[ModelMessage] = [ + ModelResponse( + parts=[ + TextPart(content="some text"), + ToolCallPart( + tool_name="shell_command", + args={"command": "ls"}, + tool_call_id="call_1", + ), + ] + ), + ] + + _cancel_orphaned_tool_calls(history) + + assert len(history) == 2 + repaired = history[-1] + assert isinstance(repaired, ModelRequest) + returns = [p for p in repaired.parts if isinstance(p, ToolReturnPart)] + assert len(returns) == 1 + assert returns[0].tool_call_id == "call_1" diff --git a/codebase_rag/tests/test_cgr_instructions.py b/codebase_rag/tests/test_cgr_instructions.py new file mode 100644 index 000000000..e9a86d6ee --- /dev/null +++ b/codebase_rag/tests/test_cgr_instructions.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag import config as cgr_config +from codebase_rag.config import ( + CGR_INSTRUCTIONS_FILENAME, + load_cgr_instructions, +) +from codebase_rag.prompts import build_rag_orchestrator_prompt +from codebase_rag.services.llm import create_rag_orchestrator + + +@pytest.fixture +def isolated_global(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + target = tmp_path / "home_cgr.md" + monkeypatch.setattr(cgr_config, "GLOBAL_CGR_INSTRUCTIONS_PATH", target) + return target + + +def test_returns_none_when_no_file(temp_repo: Path, isolated_global: Path) -> None: + assert load_cgr_instructions(temp_repo) is None + + +def test_loads_instructions_when_repo_file_present( + temp_repo: Path, isolated_global: Path +) -> None: + body = "Prefer reading docs/ before answering." + (temp_repo / CGR_INSTRUCTIONS_FILENAME).write_text(body, encoding="utf-8") + + assert load_cgr_instructions(temp_repo) == body + + +def test_loads_global_only_when_repo_path_none(isolated_global: Path) -> None: + isolated_global.write_text("global rule", encoding="utf-8") + + assert load_cgr_instructions(None) == "global rule" + + +def test_merges_global_and_repo(temp_repo: Path, isolated_global: Path) -> None: + isolated_global.write_text("global rule", encoding="utf-8") + (temp_repo / CGR_INSTRUCTIONS_FILENAME).write_text( + "repo override", encoding="utf-8" + ) + + merged = load_cgr_instructions(temp_repo) + + assert merged is not None + assert merged.startswith("global rule") + assert "repo override" in merged + assert merged.index("global rule") < merged.index("repo override") + + +def test_returns_none_when_file_empty(temp_repo: Path, isolated_global: Path) -> None: + (temp_repo / CGR_INSTRUCTIONS_FILENAME).write_text(" \n", encoding="utf-8") + + assert load_cgr_instructions(temp_repo) is None + + +def test_returns_none_on_read_error( + temp_repo: Path, + isolated_global: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + (temp_repo / CGR_INSTRUCTIONS_FILENAME).write_text("hello", encoding="utf-8") + original_open = Path.open + + def mock_open(self: Path, *args, **kwargs): # noqa: ANN002, ANN003 + if self.name == CGR_INSTRUCTIONS_FILENAME: + raise PermissionError("nope") + return original_open(self, *args, **kwargs) + + monkeypatch.setattr(Path, "open", mock_open) + + assert load_cgr_instructions(temp_repo) is None + + +def test_orchestrator_prompt_appends_project_instructions() -> None: + base = build_rag_orchestrator_prompt(tools=[]) + extra = "Never modify files under vendor/." + with_extra = build_rag_orchestrator_prompt(tools=[], project_instructions=extra) + + assert with_extra.startswith(base) + assert extra in with_extra + + +def test_orchestrator_prompt_unchanged_without_instructions() -> None: + base = build_rag_orchestrator_prompt(tools=[]) + none_case = build_rag_orchestrator_prompt(tools=[], project_instructions=None) + empty_case = build_rag_orchestrator_prompt(tools=[], project_instructions=" ") + + assert none_case == base + assert empty_case == base + + +@patch("codebase_rag.services.llm.settings") +@patch("codebase_rag.services.llm.get_provider_from_config") +@patch("codebase_rag.services.llm.Agent") +def test_create_rag_orchestrator_reads_project_instructions( + mock_agent: MagicMock, + mock_get_provider: MagicMock, + mock_settings: MagicMock, + temp_repo: Path, + isolated_global: Path, +) -> None: + mock_settings.active_orchestrator_config = MagicMock() + mock_settings.AGENT_RETRIES = 3 + mock_settings.ORCHESTRATOR_OUTPUT_RETRIES = 2 + mock_get_provider.return_value.create_model.return_value = MagicMock() + + extra = "Honor scoped read-only mode." + (temp_repo / CGR_INSTRUCTIONS_FILENAME).write_text(extra, encoding="utf-8") + + agent, system_prompt = create_rag_orchestrator(tools=[], project_root=temp_repo) + + assert extra in system_prompt + assert mock_agent.call_args.kwargs["system_prompt"] == system_prompt + + +@patch("codebase_rag.services.llm.settings") +@patch("codebase_rag.services.llm.get_provider_from_config") +@patch("codebase_rag.services.llm.Agent") +def test_create_rag_orchestrator_skips_instructions_when_disabled( + mock_agent: MagicMock, + mock_get_provider: MagicMock, + mock_settings: MagicMock, + temp_repo: Path, + isolated_global: Path, +) -> None: + mock_settings.active_orchestrator_config = MagicMock() + mock_settings.AGENT_RETRIES = 3 + mock_settings.ORCHESTRATOR_OUTPUT_RETRIES = 2 + mock_get_provider.return_value.create_model.return_value = MagicMock() + + isolated_global.write_text("GLOBAL SECRET", encoding="utf-8") + (temp_repo / CGR_INSTRUCTIONS_FILENAME).write_text("REPO SECRET", encoding="utf-8") + + _, system_prompt = create_rag_orchestrator( + tools=[], project_root=temp_repo, load_instructions=False + ) + + assert "GLOBAL SECRET" not in system_prompt + assert "REPO SECRET" not in system_prompt + + +@patch("codebase_rag.services.llm.settings") +@patch("codebase_rag.services.llm.get_provider_from_config") +@patch("codebase_rag.services.llm.Agent") +def test_create_rag_orchestrator_reads_global_instructions( + mock_agent: MagicMock, + mock_get_provider: MagicMock, + mock_settings: MagicMock, + isolated_global: Path, +) -> None: + mock_settings.active_orchestrator_config = MagicMock() + mock_settings.AGENT_RETRIES = 3 + mock_settings.ORCHESTRATOR_OUTPUT_RETRIES = 2 + mock_get_provider.return_value.create_model.return_value = MagicMock() + + isolated_global.write_text("global directive ABC", encoding="utf-8") + + _, system_prompt = create_rag_orchestrator(tools=[], project_root=None) + + assert "global directive ABC" in system_prompt diff --git a/codebase_rag/tests/test_cgr_shim.py b/codebase_rag/tests/test_cgr_shim.py new file mode 100644 index 000000000..b7cdbd8fc --- /dev/null +++ b/codebase_rag/tests/test_cgr_shim.py @@ -0,0 +1,41 @@ +import cgr + + +class TestCgrShimExports: + def test_all_symbols_importable(self) -> None: + for name in cgr.__all__: + assert hasattr(cgr, name), f"{name!r} listed in __all__ but not importable" + + def test_all_matches_module_exports(self) -> None: + public_attrs = {k for k in vars(cgr) if not k.startswith("_")} + assert set(cgr.__all__) == public_attrs + + def test_settings_is_canonical_instance(self) -> None: + from codebase_rag.config import settings + + assert cgr.settings is settings + + def test_embed_code_is_canonical_function(self) -> None: + from codebase_rag.embedder import embed_code + + assert cgr.embed_code is embed_code + + def test_graph_loader_is_canonical_class(self) -> None: + from codebase_rag.graph_loader import GraphLoader + + assert cgr.GraphLoader is GraphLoader + + def test_load_graph_is_canonical_function(self) -> None: + from codebase_rag.graph_loader import load_graph + + assert cgr.load_graph is load_graph + + def test_memgraph_ingestor_is_canonical_class(self) -> None: + from codebase_rag.services.graph_service import MemgraphIngestor + + assert cgr.MemgraphIngestor is MemgraphIngestor + + def test_cypher_generator_is_canonical_class(self) -> None: + from codebase_rag.services.llm import CypherGenerator + + assert cgr.CypherGenerator is CypherGenerator diff --git a/codebase_rag/tests/test_cgr_state_and_status.py b/codebase_rag/tests/test_cgr_state_and_status.py new file mode 100644 index 000000000..0a26fa5c0 --- /dev/null +++ b/codebase_rag/tests/test_cgr_state_and_status.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from collections.abc import Generator +from pathlib import Path +from unittest.mock import patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag import cgr_state +from codebase_rag.cli import app + +runner = CliRunner() + + +@pytest.fixture(autouse=True) +def _temp_home( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> Generator[Path, None, None]: + from codebase_rag.config import settings + + home = tmp_path / "cgr-home" + monkeypatch.setattr(settings, "CGR_HOME", home) + yield home + + +class TestRecordSync: + def test_record_sync_creates_file(self, _temp_home: Path) -> None: + cgr_state.record_sync("alpha") + assert cgr_state.state_path().exists() + ts = cgr_state.read_sync_timestamps() + assert "alpha" in ts + + def test_record_sync_updates_existing(self, _temp_home: Path) -> None: + cgr_state.record_sync("alpha") + first = cgr_state.read_sync_timestamps()["alpha"] + cgr_state.record_sync("alpha") + second = cgr_state.read_sync_timestamps()["alpha"] + assert second >= first + + def test_record_sync_multiple_projects(self, _temp_home: Path) -> None: + cgr_state.record_sync("a") + cgr_state.record_sync("b") + ts = cgr_state.read_sync_timestamps() + assert set(ts.keys()) == {"a", "b"} + + def test_read_when_no_state_returns_empty(self, _temp_home: Path) -> None: + assert cgr_state.read_sync_timestamps() == {} + + +class TestStatusCommand: + def test_status_runs_clean(self, _temp_home: Path) -> None: + from codebase_rag.stack.constants import StackState + from codebase_rag.stack.manager import StackStatus + + fake = StackStatus( + state=StackState.STOPPED, + memgraph_reachable=False, + qdrant_reachable=False, + compose_file=Path("/tmp/cgr/docker-compose.yaml"), + memgraph_endpoint="localhost:7687", + qdrant_endpoint="localhost:6333", + ) + with patch("codebase_rag.cli.StackManager") as mock_mgr: + mock_mgr.return_value.status.return_value = fake + result = runner.invoke(app, ["status"]) + assert result.exit_code == 0, result.output + assert "stopped" in result.output + assert "no projects synced" in result.output + + def test_status_lists_recorded_projects(self, _temp_home: Path) -> None: + from codebase_rag.stack.constants import StackState + from codebase_rag.stack.manager import StackStatus + + cgr_state.record_sync("alpha") + cgr_state.record_sync("beta") + fake = StackStatus( + state=StackState.RUNNING, + memgraph_reachable=True, + qdrant_reachable=True, + compose_file=Path("/tmp/cgr/docker-compose.yaml"), + memgraph_endpoint="localhost:7687", + qdrant_endpoint="localhost:6333", + ) + with patch("codebase_rag.cli.StackManager") as mock_mgr: + mock_mgr.return_value.status.return_value = fake + result = runner.invoke(app, ["status"]) + assert result.exit_code == 0, result.output + assert "alpha" in result.output + assert "beta" in result.output + assert "running" in result.output + + +class TestStopCommand: + def test_stop_invokes_daemon_down(self, _temp_home: Path) -> None: + with patch("codebase_rag.cli.StackManager") as mock_mgr: + instance = mock_mgr.return_value + result = runner.invoke(app, ["stop"]) + assert result.exit_code == 0, result.output + instance.down.assert_called_once() diff --git a/codebase_rag/tests/test_cgrignore.py b/codebase_rag/tests/test_cgrignore.py index 09cb814be..0740c228d 100644 --- a/codebase_rag/tests/test_cgrignore.py +++ b/codebase_rag/tests/test_cgrignore.py @@ -1,10 +1,13 @@ from __future__ import annotations +from collections.abc import Generator from pathlib import Path from unittest.mock import MagicMock, patch import pytest +from typer.testing import CliRunner +from codebase_rag.cli import app from codebase_rag.config import ( CGRIGNORE_FILENAME, EMPTY_CGRIGNORE, @@ -265,3 +268,137 @@ def test_unignore_included_when_user_selects_all( assert "vendor" in result assert ".git" in result assert "custom" in result + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli.connect_memgraph") as mock_connect: + mock_ingestor = MagicMock() + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +class TestCgrignoreLoadedWithoutInteractiveSetup: + runner = CliRunner() + + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_start_loads_cgrignore_without_interactive_setup( + self, + mock_load_cgrignore: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + cgrignore_patterns = CgrignorePatterns( + exclude=frozenset({"vendor", "build"}), + unignore=frozenset({"vendor/important"}), + ) + mock_load_cgrignore.return_value = cgrignore_patterns + + result = self.runner.invoke( + app, + ["start", "--update-graph", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + mock_load_cgrignore.assert_called_once_with(tmp_path) + updater_kwargs = mock_graph_updater.call_args.kwargs + assert updater_kwargs["unignore_paths"] == frozenset({"vendor/important"}) + assert "vendor" in updater_kwargs["exclude_paths"] + assert "build" in updater_kwargs["exclude_paths"] + + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.ProtobufFileIngestor") + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_index_loads_cgrignore_without_interactive_setup( + self, + mock_load_cgrignore: MagicMock, + mock_proto_ingestor: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + tmp_path: Path, + ) -> None: + cgrignore_patterns = CgrignorePatterns( + exclude=frozenset({"dist"}), + unignore=frozenset({"dist/assets"}), + ) + mock_load_cgrignore.return_value = cgrignore_patterns + + output_dir = str(tmp_path / "output") + + result = self.runner.invoke( + app, + ["index", "--repo-path", str(tmp_path), "-o", output_dir], + ) + + assert result.exit_code == 0, result.output + mock_load_cgrignore.assert_called_once_with(tmp_path) + updater_kwargs = mock_graph_updater.call_args.kwargs + assert updater_kwargs["unignore_paths"] == frozenset({"dist/assets"}) + assert "dist" in updater_kwargs["exclude_paths"] + + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_start_merges_cli_excludes_with_cgrignore( + self, + mock_load_cgrignore: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + cgrignore_patterns = CgrignorePatterns( + exclude=frozenset({"from_cgrignore"}), + unignore=frozenset(), + ) + mock_load_cgrignore.return_value = cgrignore_patterns + + result = self.runner.invoke( + app, + [ + "start", + "--update-graph", + "--repo-path", + str(tmp_path), + "--exclude", + "from_cli", + ], + ) + + assert result.exit_code == 0, result.output + updater_kwargs = mock_graph_updater.call_args.kwargs + assert "from_cgrignore" in updater_kwargs["exclude_paths"] + assert "from_cli" in updater_kwargs["exclude_paths"] + + @patch("codebase_rag.cli.prompt_for_unignored_directories") + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_start_does_not_prompt_without_interactive_setup( + self, + mock_load_cgrignore: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + mock_prompt: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + mock_load_cgrignore.return_value = CgrignorePatterns( + exclude=frozenset({"vendor"}), + unignore=frozenset({"vendor/keep"}), + ) + + result = self.runner.invoke( + app, + ["start", "--update-graph", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + mock_prompt.assert_not_called() + mock_load_cgrignore.assert_called_once() diff --git a/codebase_rag/tests/test_chained_attribute_resolution.py b/codebase_rag/tests/test_chained_attribute_resolution.py new file mode 100644 index 000000000..f72d9d252 --- /dev/null +++ b/codebase_rag/tests/test_chained_attribute_resolution.py @@ -0,0 +1,124 @@ +# (H) L3 finding from the evals/ harness: GraphUpdater.run calls +# (H) self.factory.definition_processor.process_all_method_overrides(), a three-level +# (H) chain where factory is an instance attribute (ProcessorFactory), definition_processor +# (H) is a @property returning DefinitionProcessor, and the method is inherited from a +# (H) mixin base. A module-level function of the same name makes the bare-name trie +# (H) fallback ambiguous, so the chain types must be walked to land on the mixin method. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + # (H) OverrideMixin is re-exported through the package __init__, so the subclass + # (H) records its base as the re-export QN (pkg.overrides.OverrideMixin) rather than + # (H) the real definition (pkg.overrides.mixin.OverrideMixin); inherited-method + # (H) lookup must follow the re-export. A same-named module-level function competes. + "pkg/overrides/__init__.py": ( + "from .mixin import OverrideMixin, process_all\n\n" + "__all__ = ['OverrideMixin', 'process_all']\n" + ), + "pkg/overrides/mixin.py": ( + "def process_all():\n return None\n\n\n" + "class OverrideMixin:\n" + " def process_all(self):\n" + " return None\n" + ), + "pkg/defproc.py": ( + "from .overrides import OverrideMixin\n\n\n" + "class DefProc(OverrideMixin):\n" + " def other(self):\n" + " return None\n" + ), + "pkg/factory.py": ( + "from .defproc import DefProc\n\n\n" + "class Factory:\n" + " def __init__(self) -> None:\n" + " self._dp = None\n\n" + " @property\n" + " def definition_processor(self) -> DefProc:\n" + " if self._dp is None:\n" + " self._dp = DefProc()\n" + " return self._dp\n" + ), + "pkg/runner.py": ( + "from .factory import Factory\n\n\n" + "class Runner:\n" + " def __init__(self) -> None:\n" + " self.factory = Factory()\n\n" + " def run(self):\n" + " return self.factory.definition_processor.process_all()\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestChainedAttributeResolution: + def test_three_level_chain_resolves_to_inherited_mixin_method( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.runner.Runner.run", + "proj.pkg.overrides.mixin.OverrideMixin.process_all", + ) in calls, calls + + def test_does_not_resolve_to_module_level_function(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.runner.Runner.run", + "proj.pkg.overrides.mixin.process_all", + ) not in calls, calls diff --git a/codebase_rag/tests/test_class_ingest.py b/codebase_rag/tests/test_class_ingest.py index 60c249414..ae1740b02 100644 --- a/codebase_rag/tests/test_class_ingest.py +++ b/codebase_rag/tests/test_class_ingest.py @@ -1249,7 +1249,6 @@ def go_struct_project(temp_repo: Path) -> Path: return project_path -@pytest.mark.xfail(reason="Go struct/interface ingestion not fully implemented") def test_go_struct_methods_are_ingested( go_struct_project: Path, mock_ingestor: MagicMock ) -> None: @@ -1278,7 +1277,6 @@ def test_go_struct_methods_are_ingested( ) -@pytest.mark.xfail(reason="Go struct/interface ingestion not fully implemented") def test_go_interface_nodes_created( go_struct_project: Path, mock_ingestor: MagicMock ) -> None: @@ -1297,7 +1295,6 @@ def test_go_interface_nodes_created( ) -@pytest.mark.xfail(reason="Go struct/interface ingestion not fully implemented") def test_go_struct_nodes_created( go_struct_project: Path, mock_ingestor: MagicMock ) -> None: @@ -1339,373 +1336,6 @@ def test_go_embedded_interface( assert len(mammal_inherits) >= 0, "Mammal interface embedding should be detected" -@pytest.fixture -def csharp_class_project(temp_repo: Path) -> Path: - project_path = temp_repo / "csharp_class_test" - project_path.mkdir() - - animal_file = project_path / "IAnimal.cs" - animal_file.write_text( - encoding="utf-8", - data=""" -namespace Animals -{ - public interface IAnimal - { - string Speak(); - void Move(); - string Name { get; set; } - } - - public interface IFlyable - { - void Fly(); - int GetAltitude(); - } - - public interface ISwimmable - { - void Swim(); - int GetDepth(); - } -} -""", - ) - - dog_file = project_path / "Dog.cs" - dog_file.write_text( - encoding="utf-8", - data=""" -namespace Animals -{ - public class Dog : IAnimal - { - public string Name { get; set; } - public string Breed { get; private set; } - - public Dog(string name, string breed) - { - Name = name; - Breed = breed; - } - - public string Speak() - { - return $"{Name} says: Woof!"; - } - - public void Move() - { - Console.WriteLine($"{Name} runs on four legs"); - } - - public void Fetch() - { - Console.WriteLine($"{Name} fetches the ball"); - } - } -} -""", - ) - - duck_file = project_path / "Duck.cs" - duck_file.write_text( - encoding="utf-8", - data=""" -namespace Animals -{ - public class Duck : IAnimal, IFlyable, ISwimmable - { - public string Name { get; set; } - private int _altitude; - private int _depth; - - public Duck(string name) - { - Name = name; - _altitude = 0; - _depth = 0; - } - - public string Speak() - { - return $"{Name} says: Quack!"; - } - - public void Move() - { - Console.WriteLine($"{Name} waddles"); - } - - public void Fly() - { - _altitude = 100; - Console.WriteLine($"{Name} flies up to {_altitude} meters"); - } - - public int GetAltitude() - { - return _altitude; - } - - public void Swim() - { - _depth = 5; - Console.WriteLine($"{Name} swims at depth {_depth} meters"); - } - - public int GetDepth() - { - return _depth; - } - } -} -""", - ) - - base_class_file = project_path / "BaseVehicle.cs" - base_class_file.write_text( - encoding="utf-8", - data=""" -namespace Vehicles -{ - public abstract class BaseVehicle - { - public string Model { get; protected set; } - public int Year { get; protected set; } - - protected BaseVehicle(string model, int year) - { - Model = model; - Year = year; - } - - public abstract void Start(); - public abstract void Stop(); - - public virtual string GetInfo() - { - return $"{Year} {Model}"; - } - } - - public class Car : BaseVehicle - { - public int NumberOfDoors { get; private set; } - - public Car(string model, int year, int doors) : base(model, year) - { - NumberOfDoors = doors; - } - - public override void Start() - { - Console.WriteLine($"{Model} engine starts"); - } - - public override void Stop() - { - Console.WriteLine($"{Model} engine stops"); - } - - public override string GetInfo() - { - return $"{base.GetInfo()} - {NumberOfDoors} doors"; - } - } - - public class ElectricCar : Car - { - public int BatteryCapacity { get; private set; } - - public ElectricCar(string model, int year, int doors, int batteryKwh) - : base(model, year, doors) - { - BatteryCapacity = batteryKwh; - } - - public override void Start() - { - Console.WriteLine($"{Model} silently starts"); - } - - public void Charge() - { - Console.WriteLine($"Charging {Model} battery ({BatteryCapacity} kWh)"); - } - } -} -""", - ) - - struct_file = project_path / "Point.cs" - struct_file.write_text( - encoding="utf-8", - data=""" -namespace Geometry -{ - public struct Point - { - public double X { get; } - public double Y { get; } - - public Point(double x, double y) - { - X = x; - Y = y; - } - - public double DistanceTo(Point other) - { - double dx = X - other.X; - double dy = Y - other.Y; - return Math.Sqrt(dx * dx + dy * dy); - } - - public Point Translate(double dx, double dy) - { - return new Point(X + dx, Y + dy); - } - } - - public struct Rectangle - { - public Point TopLeft { get; } - public double Width { get; } - public double Height { get; } - - public Rectangle(Point topLeft, double width, double height) - { - TopLeft = topLeft; - Width = width; - Height = height; - } - - public double Area() - { - return Width * Height; - } - - public double Perimeter() - { - return 2 * (Width + Height); - } - } -} -""", - ) - - return project_path - - -def test_csharp_class_methods_are_ingested( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - method_nodes = [ - call - for call in mock_ingestor.ensure_node_batch.call_args_list - if call[0][0] == "Method" - ] - - method_names = {call[0][1].get("name", "") for call in method_nodes} - - expected_methods = ["Speak", "Move", "Fetch", "Start", "Stop", "GetInfo", "Charge"] - found_methods = [m for m in expected_methods if m in method_names] - - assert len(found_methods) >= 1, f"Should have C# methods, found: {method_names}" - - -def test_csharp_interface_implementation( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - implements_rels = get_relationships(mock_ingestor, "IMPLEMENTS") - - dog_implements = [call for call in implements_rels if "Dog" in call.args[0][2]] - - assert len(dog_implements) >= 0, "Dog should implement IAnimal" - - -def test_csharp_multiple_interface_implementation( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - implements_rels = get_relationships(mock_ingestor, "IMPLEMENTS") - - duck_implements = [call for call in implements_rels if "Duck" in call.args[0][2]] - - assert len(duck_implements) >= 0, "Duck should implement multiple interfaces" - - -def test_csharp_class_inheritance_chain( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - inherits_rels = get_relationships(mock_ingestor, "INHERITS") - - car_inherits = [ - call - for call in inherits_rels - if "Car" in call.args[0][2] and "BaseVehicle" in call.args[2][2] - ] - - assert len(car_inherits) >= 0, "Car should inherit from BaseVehicle" - - -def test_csharp_struct_nodes_created( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - struct_nodes = [ - call - for call in mock_ingestor.ensure_node_batch.call_args_list - if call[0][0] in ("Struct", "Class") - ] - - struct_qns = {call[0][1]["qualified_name"] for call in struct_nodes} - - point_found = any("Point" in qn for qn in struct_qns) - rect_found = any("Rectangle" in qn for qn in struct_qns) - - assert point_found or rect_found or len(struct_qns) >= 1, ( - f"Should have C# struct nodes, found: {struct_qns}" - ) - - -def test_csharp_interface_nodes_created( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - interface_nodes = [ - call - for call in mock_ingestor.ensure_node_batch.call_args_list - if call[0][0] == "Interface" - ] - - interface_qns = {call[0][1]["qualified_name"] for call in interface_nodes} - - assert len(interface_qns) >= 0, "Should have C# interface nodes" - - -def test_csharp_abstract_class_methods( - csharp_class_project: Path, mock_ingestor: MagicMock -) -> None: - run_updater(csharp_class_project, mock_ingestor, skip_if_missing="c-sharp") - - override_rels = get_relationships(mock_ingestor, "OVERRIDES") - - car_overrides = [call for call in override_rels if "Car" in call.args[0][2]] - - assert len(car_overrides) >= 0, "Car should override BaseVehicle methods" - - class TestResolveToQn: @pytest.fixture def mixin_instance(self, temp_repo: Path, mock_ingestor: MagicMock) -> GraphUpdater: @@ -2145,3 +1775,80 @@ def test_multiple_inheritance_creates_all_relationships( ] assert len(derived_inherits) >= 1, "Derived should have inheritance relationships" + + +class TestIngestClassesAndMethodsWithoutCombinedCaptures: + @pytest.fixture + def python_class_project(self, temp_repo: Path) -> Path: + project_path = temp_repo / "py_class_test" + project_path.mkdir() + + main_file = project_path / "main.py" + main_file.write_text( + encoding="utf-8", + data=""" +class MyService: + def handle(self): + pass + + def process(self): + pass +""", + ) + + return project_path + + def test_classes_ingested_without_combined_captures( + self, python_class_project: Path, mock_ingestor: MagicMock + ) -> None: + run_updater(python_class_project, mock_ingestor, skip_if_missing="python") + + project_name = python_class_project.name + from codebase_rag.tests.conftest import get_node_names + + classes = get_node_names(mock_ingestor, "Class") + assert f"{project_name}.main.MyService" in classes + + methods = get_node_names(mock_ingestor, "Method") + assert f"{project_name}.main.MyService.handle" in methods + assert f"{project_name}.main.MyService.process" in methods + + +class TestIngestRustImplMethodsWithoutSortedFuncNodes: + @pytest.fixture + def rust_impl_project(self, temp_repo: Path) -> Path: + project_path = temp_repo / "rust_impl_test" + project_path.mkdir() + + main_file = project_path / "main.rs" + main_file.write_text( + encoding="utf-8", + data=""" +struct Calculator { + value: i32, +} + +impl Calculator { + fn new() -> Calculator { + Calculator { value: 0 } + } + + fn add(&mut self, x: i32) { + self.value += x; + } +} +""", + ) + + return project_path + + def test_rust_impl_methods_ingested( + self, rust_impl_project: Path, mock_ingestor: MagicMock + ) -> None: + run_updater(rust_impl_project, mock_ingestor, skip_if_missing="rust") + + from codebase_rag.tests.conftest import get_node_names + + methods = get_node_names(mock_ingestor, "Method") + assert any("Calculator" in m and "new" in m for m in methods) + assert any("Calculator" in m and "add" in m for m in methods) diff --git a/codebase_rag/tests/test_classless_constructor_calls.py b/codebase_rag/tests/test_classless_constructor_calls.py new file mode 100644 index 000000000..25bcc1fb8 --- /dev/null +++ b/codebase_rag/tests/test_classless_constructor_calls.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag import constants as cs +from codebase_rag.tests.conftest import run_updater + + +def _edges(mock_ingestor: MagicMock, rel: str) -> list[tuple[str, str, str]]: + # (H) edges of a given type as (caller_qn, callee_label, callee_qn). + out: list[tuple[str, str, str]] = [] + for c in mock_ingestor.ensure_relationship_batch.call_args_list: + if c.args[1] == rel: + out.append((c.args[0][2], c.args[2][0], c.args[2][2])) + return out + + +class TestConstructionEdges: + def test_dataclass_construction_emits_instantiates_not_calls( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a class with no explicit __init__ is represented by INSTANTIATES to + # (H) the class node; CALLS stays function/method-only (never a class). + (temp_repo / "app.py").write_text( + "from dataclasses import dataclass\n" + "\n" + "\n" + "@dataclass\n" + "class Config:\n" + " n: int\n" + "\n" + "\n" + "def use():\n" + " return Config(1)\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + instantiates = _edges(mock_ingestor, cs.RelationshipType.INSTANTIATES) + calls = _edges(mock_ingestor, cs.RelationshipType.CALLS) + + assert any( + caller.endswith(".use") + and to_label == cs.NodeLabel.CLASS + and to_qn.endswith(".Config") + for caller, to_label, to_qn in instantiates + ), f"no INSTANTIATES->Config edge; instantiates={sorted(instantiates)}" + assert not any( + to_label == cs.NodeLabel.CLASS for _caller, to_label, _to_qn in calls + ), f"CALLS must never target a class; calls={sorted(calls)}" + + def test_class_with_init_emits_both_instantiates_and_init_call( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a class WITH __init__ records INSTANTIATES -> class AND CALLS -> the + # (H) __init__ method (the constructor runs); still no CALLS -> class. + (temp_repo / "app.py").write_text( + "class Widget:\n" + " def __init__(self, n):\n" + " self.n = n\n" + "\n" + "\n" + "def use():\n" + " return Widget(1)\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + instantiates = _edges(mock_ingestor, cs.RelationshipType.INSTANTIATES) + calls = _edges(mock_ingestor, cs.RelationshipType.CALLS) + + assert any( + caller.endswith(".use") + and to_label == cs.NodeLabel.CLASS + and to_qn.endswith(".Widget") + for caller, to_label, to_qn in instantiates + ) + assert any( + caller.endswith(".use") + and to_label == cs.NodeLabel.METHOD + and to_qn.endswith(".Widget.__init__") + for caller, to_label, to_qn in calls + ) + assert not any( + to_label == cs.NodeLabel.CLASS for _caller, to_label, _to_qn in calls + ) diff --git a/codebase_rag/tests/test_cli_autosync.py b/codebase_rag/tests/test_cli_autosync.py new file mode 100644 index 000000000..63cea7d2e --- /dev/null +++ b/codebase_rag/tests/test_cli_autosync.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag.cli import app + +runner = CliRunner() + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli.connect_memgraph") as mock_connect: + mock_ingestor = MagicMock() + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +@pytest.fixture +def mock_agent_loops() -> Generator[None, None, None]: + with ( + patch("codebase_rag.cli.main_async") as mock_async, + patch("codebase_rag.cli.main_single_query") as mock_single, + patch("codebase_rag.cli.asyncio.run"), + ): + mock_async.return_value = None + mock_single.return_value = None + yield + + +@pytest.fixture +def mock_sync_path() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli._run_graph_sync") as mock_sync: + yield mock_sync + + +@pytest.fixture +def mock_validate_models() -> Generator[None, None, None]: + with patch("codebase_rag.cli._update_and_validate_models"): + yield + + +def test_start_default_triggers_auto_sync( + mock_memgraph_connect: MagicMock, + mock_agent_loops: None, + mock_sync_path: MagicMock, + mock_validate_models: None, + tmp_path: Path, +) -> None: + result = runner.invoke( + app, + ["start", "--repo-path", str(tmp_path), "--ask-agent", "hello"], + ) + assert result.exit_code == 0, result.output + mock_sync_path.assert_called_once() + + +def test_start_no_sync_skips_auto_sync( + mock_memgraph_connect: MagicMock, + mock_agent_loops: None, + mock_sync_path: MagicMock, + mock_validate_models: None, + tmp_path: Path, +) -> None: + result = runner.invoke( + app, + ["start", "--repo-path", str(tmp_path), "--no-sync", "--ask-agent", "hello"], + ) + assert result.exit_code == 0, result.output + mock_sync_path.assert_not_called() + + +def test_start_update_graph_uses_sync_helper( + mock_memgraph_connect: MagicMock, + mock_agent_loops: None, + mock_sync_path: MagicMock, + mock_validate_models: None, + tmp_path: Path, +) -> None: + result = runner.invoke( + app, + ["start", "--repo-path", str(tmp_path), "--update-graph"], + ) + assert result.exit_code == 0, result.output + mock_sync_path.assert_called_once() + call = mock_sync_path.call_args + assert call.kwargs["repo"] == tmp_path.resolve() + assert call.kwargs["clean"] is False + + +def test_start_clean_without_update_graph_does_not_sync( + mock_memgraph_connect: MagicMock, + mock_sync_path: MagicMock, + tmp_path: Path, +) -> None: + result = runner.invoke( + app, + ["start", "--repo-path", str(tmp_path), "--clean"], + ) + assert result.exit_code == 0, result.output + mock_sync_path.assert_not_called() + + +def test_start_auto_sync_uses_derived_project_name_when_none_provided( + mock_memgraph_connect: MagicMock, + mock_agent_loops: None, + mock_sync_path: MagicMock, + mock_validate_models: None, + tmp_path: Path, +) -> None: + result = runner.invoke( + app, + ["start", "--repo-path", str(tmp_path), "--ask-agent", "hi"], + ) + assert result.exit_code == 0, result.output + call = mock_sync_path.call_args + project_name = call.kwargs["project_name"] + assert "__" in project_name + assert len(project_name.rsplit("__", 1)[1]) == 8 + + +def test_start_auto_sync_respects_explicit_project_name( + mock_memgraph_connect: MagicMock, + mock_agent_loops: None, + mock_sync_path: MagicMock, + mock_validate_models: None, + tmp_path: Path, +) -> None: + result = runner.invoke( + app, + [ + "start", + "--repo-path", + str(tmp_path), + "--project-name", + "my-project", + "--ask-agent", + "hi", + ], + ) + assert result.exit_code == 0, result.output + call = mock_sync_path.call_args + assert call.kwargs["project_name"] == "my-project" diff --git a/codebase_rag/tests/test_cli_clean.py b/codebase_rag/tests/test_cli_clean.py new file mode 100644 index 000000000..eb58c8458 --- /dev/null +++ b/codebase_rag/tests/test_cli_clean.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +import json +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag import constants as cs +from codebase_rag.cli import app +from codebase_rag.config import CgrignorePatterns + +runner = CliRunner() + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli.connect_memgraph") as mock_connect: + mock_ingestor = MagicMock() + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +def _get_ingestor(mock_connect: MagicMock) -> MagicMock: + return mock_connect.return_value.__enter__.return_value + + +class TestCleanWithoutUpdateGraph: + def test_clean_alone_wipes_database( + self, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + result = runner.invoke( + app, + ["start", "--clean", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + ingestor = _get_ingestor(mock_memgraph_connect) + ingestor.clean_database.assert_called_once() + + def test_clean_alone_deletes_hash_cache( + self, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + cache_path = tmp_path / cs.HASH_CACHE_FILENAME + cache_path.write_text(json.dumps({"file.py": "abc123"})) + + result = runner.invoke( + app, + ["start", "--clean", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + assert not cache_path.exists() + + def test_clean_alone_no_cache_file_still_succeeds( + self, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + cache_path = tmp_path / cs.HASH_CACHE_FILENAME + assert not cache_path.exists() + + result = runner.invoke( + app, + ["start", "--clean", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + + def test_clean_alone_does_not_invoke_graph_updater( + self, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + with patch("codebase_rag.cli.GraphUpdater") as mock_updater: + result = runner.invoke( + app, + ["start", "--clean", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + mock_updater.assert_not_called() + + def test_clean_alone_skips_model_validation( + self, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + with patch("codebase_rag.cli._update_and_validate_models") as mock_validate: + result = runner.invoke( + app, + ["start", "--clean", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + mock_validate.assert_not_called() + + def test_clean_alone_shows_clean_done_message( + self, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + result = runner.invoke( + app, + ["start", "--clean", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0 + assert cs.CLI_MSG_CLEAN_DONE in result.output + + +class TestCleanWithUpdateGraph: + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_clean_with_update_deletes_hash_cache( + self, + mock_cgrignore: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + mock_cgrignore.return_value = CgrignorePatterns( + exclude=frozenset(), unignore=frozenset() + ) + + cache_path = tmp_path / cs.HASH_CACHE_FILENAME + cache_path.write_text(json.dumps({"file.py": "abc123"})) + + result = runner.invoke( + app, + ["start", "--clean", "--update-graph", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + assert not cache_path.exists() + + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_clean_with_update_calls_clean_database( + self, + mock_cgrignore: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + mock_cgrignore.return_value = CgrignorePatterns( + exclude=frozenset(), unignore=frozenset() + ) + + result = runner.invoke( + app, + ["start", "--clean", "--update-graph", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + ingestor = _get_ingestor(mock_memgraph_connect) + ingestor.clean_database.assert_called_once() + + @patch("codebase_rag.cli.GraphUpdater") + @patch("codebase_rag.cli.load_parsers", return_value=({}, {})) + @patch("codebase_rag.cli.load_cgrignore_patterns") + def test_update_without_clean_preserves_hash_cache( + self, + mock_cgrignore: MagicMock, + mock_load_parsers: MagicMock, + mock_graph_updater: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, + ) -> None: + mock_cgrignore.return_value = CgrignorePatterns( + exclude=frozenset(), unignore=frozenset() + ) + + cache_path = tmp_path / cs.HASH_CACHE_FILENAME + cache_data = {"file.py": "abc123"} + cache_path.write_text(json.dumps(cache_data)) + + result = runner.invoke( + app, + ["start", "--update-graph", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + assert cache_path.exists() + assert json.loads(cache_path.read_text()) == cache_data diff --git a/codebase_rag/tests/test_cli_delete_project.py b/codebase_rag/tests/test_cli_delete_project.py new file mode 100644 index 000000000..92d0a70d4 --- /dev/null +++ b/codebase_rag/tests/test_cli_delete_project.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import json +import re +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag import constants as cs +from codebase_rag.cli import app + +_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") + + +def _strip_ansi(text: str) -> str: + return _ANSI_RE.sub("", text) + + +runner = CliRunner() + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli.connect_memgraph") as mock_connect: + mock_ingestor = MagicMock() + mock_ingestor.list_projects.return_value = ["platform", "other"] + mock_ingestor.fetch_all.return_value = [ + {cs.KEY_NODE_ID: 1}, + {cs.KEY_NODE_ID: 2}, + ] + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +def _get_ingestor(mock_connect: MagicMock) -> MagicMock: + return mock_connect.return_value.__enter__.return_value + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_calls_ingestor_delete_project( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, +) -> None: + result = runner.invoke(app, ["delete-project", "--name", "platform"]) + + assert result.exit_code == 0, result.output + ingestor = _get_ingestor(mock_memgraph_connect) + ingestor.delete_project.assert_called_once_with("platform") + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_cleans_embeddings_with_node_ids( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, +) -> None: + result = runner.invoke(app, ["delete-project", "--name", "platform"]) + + assert result.exit_code == 0, result.output + mock_delete_embeddings.assert_called_once_with("platform", [1, 2]) + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_fails_when_project_missing( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, +) -> None: + result = runner.invoke(app, ["delete-project", "--name", "ghost"]) + + assert result.exit_code == 1 + assert "ghost" in result.output + ingestor = _get_ingestor(mock_memgraph_connect) + ingestor.delete_project.assert_not_called() + mock_delete_embeddings.assert_not_called() + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_rejects_blank_name( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, +) -> None: + result = runner.invoke(app, ["delete-project", "--name", " "]) + + assert result.exit_code == 1 + assert cs.CLI_ERR_PROJECT_NAME_REQUIRED in result.output + mock_memgraph_connect.assert_not_called() + mock_delete_embeddings.assert_not_called() + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_removes_hash_cache_when_repo_path_given( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, +) -> None: + cache_path = tmp_path / cs.HASH_CACHE_FILENAME + cache_path.write_text(json.dumps({"file.py": "abc123"})) + + result = runner.invoke( + app, + ["delete-project", "--name", "platform", "--repo-path", str(tmp_path)], + ) + + assert result.exit_code == 0, result.output + assert not cache_path.exists() + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_without_repo_path_leaves_unrelated_hash_caches( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, + tmp_path: Path, +) -> None: + cache_path = tmp_path / cs.HASH_CACHE_FILENAME + cache_path.write_text(json.dumps({"file.py": "abc123"})) + + result = runner.invoke(app, ["delete-project", "--name", "platform"]) + + assert result.exit_code == 0, result.output + assert cache_path.exists() + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_does_not_wipe_other_projects( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, +) -> None: + result = runner.invoke(app, ["delete-project", "--name", "platform"]) + + assert result.exit_code == 0, result.output + ingestor = _get_ingestor(mock_memgraph_connect) + ingestor.clean_database.assert_not_called() + + +@patch("codebase_rag.cli.delete_project_embeddings") +def test_delete_project_shows_success_message( + mock_delete_embeddings: MagicMock, + mock_memgraph_connect: MagicMock, +) -> None: + result = runner.invoke(app, ["delete-project", "--name", "platform"]) + + assert result.exit_code == 0, result.output + stripped = _strip_ansi(result.output) + assert cs.CLI_MSG_PROJECT_DELETED.format(project_name="platform") in stripped diff --git a/codebase_rag/tests/test_cli_repo_path_validation.py b/codebase_rag/tests/test_cli_repo_path_validation.py new file mode 100644 index 000000000..f91a6ffa6 --- /dev/null +++ b/codebase_rag/tests/test_cli_repo_path_validation.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import re +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag import constants as cs +from codebase_rag.cli import app + +runner = CliRunner() + +_ANSI = re.compile(r"\x1b\[[0-9;]*m") + + +def _plain(output: str) -> str: + # (H) ANSI-stripped output with Rich soft-wrap newlines rejoined + return _ANSI.sub("", output).replace("\n", "") + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with ( + patch("codebase_rag.cli.connect_memgraph") as mock_connect, + patch("codebase_rag.cli._maybe_start_stack"), + ): + mock_ingestor = MagicMock() + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +class TestStartRepoPathValidation: + def test_nonexistent_path_exits_with_error( + self, mock_memgraph_connect: MagicMock, tmp_path: Path + ) -> None: + missing = tmp_path / "does_not_exist" + result = runner.invoke(app, ["start", "--clean", "--repo-path", str(missing)]) + + assert result.exit_code == 1, result.output + plain = _plain(result.output) + assert str(missing) in plain + assert "does not exist" in plain + + def test_file_path_exits_with_error( + self, mock_memgraph_connect: MagicMock, tmp_path: Path + ) -> None: + file_path = tmp_path / "a_file.txt" + file_path.write_text("not a directory") + result = runner.invoke(app, ["start", "--clean", "--repo-path", str(file_path)]) + + assert result.exit_code == 1, result.output + plain = _plain(result.output) + assert str(file_path) in plain + assert "not a directory" in plain + + def test_valid_non_git_dir_warns_but_proceeds( + self, mock_memgraph_connect: MagicMock, tmp_path: Path + ) -> None: + result = runner.invoke(app, ["start", "--clean", "--repo-path", str(tmp_path)]) + + assert result.exit_code == 0, result.output + plain = _plain(result.output) + assert "not a Git repository" in plain + assert str(tmp_path) in plain + + def test_git_dir_does_not_warn( + self, mock_memgraph_connect: MagicMock, tmp_path: Path + ) -> None: + (tmp_path / cs.GIT_DIR_NAME).mkdir() + result = runner.invoke(app, ["start", "--clean", "--repo-path", str(tmp_path)]) + + assert result.exit_code == 0, result.output + assert "not a Git repository" not in result.output + + def test_git_file_worktree_does_not_warn( + self, mock_memgraph_connect: MagicMock, tmp_path: Path + ) -> None: + # (H) worktrees and submodules use a .git file, not a directory + (tmp_path / cs.GIT_DIR_NAME).write_text("gitdir: /repo/.git/worktrees/wt\n") + result = runner.invoke(app, ["start", "--clean", "--repo-path", str(tmp_path)]) + + assert result.exit_code == 0, result.output + assert "not a Git repository" not in result.output + + +class TestIndexRepoPathValidation: + def test_index_nonexistent_path_exits_with_error(self, tmp_path: Path) -> None: + missing = tmp_path / "nope" + result = runner.invoke( + app, + [ + "index", + "--repo-path", + str(missing), + "-o", + str(tmp_path / "out"), + ], + ) + + assert result.exit_code == 1, result.output + assert "does not exist" in _plain(result.output) diff --git a/codebase_rag/tests/test_cli_smoke.py b/codebase_rag/tests/test_cli_smoke.py index 88b420e07..06a254bda 100644 --- a/codebase_rag/tests/test_cli_smoke.py +++ b/codebase_rag/tests/test_cli_smoke.py @@ -1,9 +1,15 @@ +import re import subprocess import sys +from importlib.metadata import version as get_version from pathlib import Path import pytest +from codebase_rag import constants as cs + +_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") + def test_help_command_works() -> None: repo_root = Path(__file__).parent.parent.parent @@ -15,14 +21,14 @@ def test_help_command_works() -> None: capture_output=True, text=True, timeout=30, + env={**__import__("os").environ, "NO_COLOR": "1"}, ) assert result.returncode == 0, f"Help command failed with: {result.stderr}" - assert "Usage:" in result.stdout or "usage:" in result.stdout.lower() - assert "--help" in result.stdout - - assert result.stderr == "", f"Unexpected stderr: {result.stderr}" + plain_stdout = _ANSI_RE.sub("", result.stdout) + assert "Usage:" in plain_stdout or "usage:" in plain_stdout.lower() + assert "--help" in plain_stdout def test_import_cli_module() -> None: @@ -32,3 +38,28 @@ def test_import_cli_module() -> None: assert hasattr(cli, "app"), "CLI module missing app attribute" except ImportError as e: pytest.fail(f"Failed to import cli module: {e}") + + +def test_version_flag() -> None: + repo_root = Path(__file__).parent.parent.parent + + for flag in ["--version", "-v"]: + result = subprocess.run( + [sys.executable, "-m", "codebase_rag.cli", flag], + check=False, + cwd=repo_root, + capture_output=True, + text=True, + timeout=30, + ) + + assert result.returncode == 0, ( + f"{flag} exited with code {result.returncode}: {result.stderr}" + ) + expected = cs.CLI_MSG_VERSION.format( + package=cs.PACKAGE_NAME, version=get_version(cs.PACKAGE_NAME) + ) + assert result.stdout.strip() == expected, ( + f"{flag} output did not match expected format: {repr(result.stdout)}" + ) + assert result.stderr == "", f"Unexpected stderr for {flag}: {result.stderr}" diff --git a/codebase_rag/tests/test_codebase_query.py b/codebase_rag/tests/test_codebase_query.py index 3be753570..6c7f5a5bf 100644 --- a/codebase_rag/tests/test_codebase_query.py +++ b/codebase_rag/tests/test_codebase_query.py @@ -69,6 +69,22 @@ def test_uses_provided_console( tool = create_query_tool(mock_ingestor, mock_cypher_gen, console=mock_console) assert tool is not None + async def test_default_console_writes_to_stderr( + self, + mock_ingestor: MagicMock, + mock_cypher_gen: MagicMock, + capsys: pytest.CaptureFixture[str], + ) -> None: + mock_cypher_gen.generate = AsyncMock(return_value="MATCH (n) RETURN n") + mock_ingestor.fetch_all.return_value = [{"name": "example"}] + + tool = create_query_tool(mock_ingestor, mock_cypher_gen, console=None) + await tool.function(natural_language_query="Find all functions") + + captured = capsys.readouterr() + assert captured.out == "" + assert captured.err != "" + class TestQueryCodebaseKnowledgeGraph: async def test_successful_query_returns_results( @@ -145,6 +161,25 @@ async def test_database_error_handled( assert result.results == [] assert "error" in result.summary.lower() + async def test_query_timeout_handled( + self, + mock_ingestor: MagicMock, + mock_cypher_gen: MagicMock, + mock_console: Console, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + import time + + from codebase_rag.config import settings + + monkeypatch.setattr(settings, "QUERY_TIMEOUT_S", 0.05) + mock_ingestor.fetch_all.side_effect = lambda *a, **k: time.sleep(1.0) + tool = create_query_tool(mock_ingestor, mock_cypher_gen, console=mock_console) + result = await tool.function(natural_language_query="long running query") + assert result.results == [] + assert "timeout" in result.summary.lower() + assert result.query_used == "MATCH (n) RETURN n" + class TestQueryResultFormatting: async def test_result_contains_query_used( diff --git a/codebase_rag/tests/test_conditional_alias_call.py b/codebase_rag/tests/test_conditional_alias_call.py new file mode 100644 index 000000000..901d395c5 --- /dev/null +++ b/codebase_rag/tests/test_conditional_alias_call.py @@ -0,0 +1,87 @@ +# (H) L3 finding from the evals/ harness: CallProcessor._ingest_function_calls binds a +# (H) local to a conditionally-selected bound method (resolve_builtin = +# (H) resolver.resolve_builtin_call if is_js_ts else None) then calls it. The alias must +# (H) be resolved through the non-None branch of the conditional to its real method. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/helper.py": ( + "class Helper:\n def do(self, value):\n return value\n" + ), + "pkg/worker.py": ( + "from .helper import Helper\n\n\n" + "class Worker:\n" + " def __init__(self) -> None:\n" + " self._helper = Helper()\n\n" + " def run(self, value, flag):\n" + " helper = self._helper\n" + " fn = helper.do if flag else None\n" + " return fn(value)\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestConditionalAliasCall: + def test_conditional_bound_method_alias_resolves(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.worker.Worker.run", + "proj.pkg.helper.Helper.do", + ) in calls, calls diff --git a/codebase_rag/tests/test_config_validation.py b/codebase_rag/tests/test_config_validation.py new file mode 100644 index 000000000..c17c51a26 --- /dev/null +++ b/codebase_rag/tests/test_config_validation.py @@ -0,0 +1,85 @@ +import pytest + +from codebase_rag import constants as cs +from codebase_rag.config import ModelConfig, format_missing_api_key_errors + + +class TestValidateApiKey: + def test_local_providers_skip_validation(self) -> None: + cfg = ModelConfig(provider=cs.Provider.OLLAMA, model_id="llama3") + cfg.validate_api_key() + + def test_google_vertex_skips_validation(self) -> None: + cfg = ModelConfig( + provider=cs.Provider.GOOGLE, + model_id="gemini-pro", + provider_type=cs.GoogleProviderType.VERTEX, + ) + cfg.validate_api_key() + + def test_google_gla_requires_api_key(self) -> None: + cfg = ModelConfig( + provider=cs.Provider.GOOGLE, + model_id="gemini-pro", + provider_type=cs.GoogleProviderType.GLA, + ) + with pytest.raises(ValueError, match="API Key Missing"): + cfg.validate_api_key() + + @pytest.mark.parametrize( + "api_key_kwargs", + [ + {}, + {"api_key": ""}, + {"api_key": " "}, + {"api_key": cs.DEFAULT_API_KEY}, + ], + ) + def test_invalid_api_key_raises(self, api_key_kwargs: dict[str, str]) -> None: + cfg = ModelConfig( + provider=cs.Provider.OPENAI, model_id="gpt-4", **api_key_kwargs + ) + with pytest.raises(ValueError, match="API Key Missing"): + cfg.validate_api_key() + + def test_valid_api_key_passes(self) -> None: + cfg = ModelConfig( + provider=cs.Provider.OPENAI, model_id="gpt-4", api_key="sk-real-key-123" + ) + cfg.validate_api_key() + + def test_role_forwarded_to_error_message(self) -> None: + cfg = ModelConfig(provider=cs.Provider.OPENAI, model_id="gpt-4") + with pytest.raises(ValueError, match="cypher"): + cfg.validate_api_key(role="cypher") + + +class TestFormatMissingApiKeyErrors: + def test_known_provider_openai(self) -> None: + msg = format_missing_api_key_errors(cs.Provider.OPENAI) + assert "OPENAI_API_KEY" in msg + assert "https://platform.openai.com/api-keys" in msg + assert "OpenAI" in msg + + def test_known_provider_anthropic(self) -> None: + msg = format_missing_api_key_errors(cs.Provider.ANTHROPIC) + assert "ANTHROPIC_API_KEY" in msg + assert "Anthropic" in msg + + def test_unknown_provider_generic_message(self) -> None: + msg = format_missing_api_key_errors("deepseek") + assert "DEEPSEEK_API_KEY" in msg + assert "Deepseek" in msg + + def test_role_appears_in_message(self) -> None: + msg = format_missing_api_key_errors(cs.Provider.OPENAI, role="cypher") + assert "for cypher" in msg + + def test_default_role_omits_role_from_message(self) -> None: + msg = format_missing_api_key_errors(cs.Provider.OPENAI) + assert "for model" not in msg + + def test_case_insensitive_lookup(self) -> None: + msg = format_missing_api_key_errors("OpenAI") + assert "OPENAI_API_KEY" in msg + assert "OpenAI" in msg diff --git a/codebase_rag/tests/test_constructor_call_resolution.py b/codebase_rag/tests/test_constructor_call_resolution.py new file mode 100644 index 000000000..5fed79020 --- /dev/null +++ b/codebase_rag/tests/test_constructor_call_resolution.py @@ -0,0 +1,87 @@ +# (H) L3 finding from the evals/ harness: instantiating a class (X()) is a call to +# (H) X.__init__ at runtime, but cgr resolved the call to the class and dropped it. +# (H) A constructor call must produce a CALLS edge to the class's __init__ method. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """class Widget: + def __init__(self) -> None: + self.x = 1 + + +class Plain: + pass + + +def build() -> Widget: + return Widget() + + +def build_plain() -> Plain: + return Plain() +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestConstructorCallResolution: + def test_instantiation_calls_init(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.build", "proj.m.Widget.__init__") in calls, calls + + def test_instantiation_without_init_is_not_dropped_to_class( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + # (H) Plain has no __init__; cgr must not emit a CALLS edge to the class node. + assert ("proj.m.build_plain", "proj.m.Plain") not in calls, calls diff --git a/codebase_rag/tests/test_cpp_cross_file_methods.py b/codebase_rag/tests/test_cpp_cross_file_methods.py new file mode 100644 index 000000000..dbc2662de --- /dev/null +++ b/codebase_rag/tests/test_cpp_cross_file_methods.py @@ -0,0 +1,462 @@ +"""Tests for C++ cross-file out-of-class method resolution (issue #496). + +When a class is declared in a header (.h) and methods are implemented +out-of-class in a source file (.cpp) using ``ClassName::method`` syntax, +the Method nodes must link back to the correct Class node via +DEFINES_METHOD edges -- not to a phantom class constructed from the +.cpp module's qualified name. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.constants import SEPARATOR_DOT +from codebase_rag.tests.conftest import ( + get_nodes, + get_relationships, + run_updater, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _get_method_qns(mock_ingestor: MagicMock) -> set[str]: + """Return all Method qualified names recorded in the ingestor.""" + return {call[0][1]["qualified_name"] for call in get_nodes(mock_ingestor, "Method")} + + +def _get_class_qns(mock_ingestor: MagicMock) -> set[str]: + """Return all Class qualified names recorded in the ingestor.""" + return {call[0][1]["qualified_name"] for call in get_nodes(mock_ingestor, "Class")} + + +def _get_defines_method_edges( + mock_ingestor: MagicMock, +) -> list[tuple[str, str]]: + """Return ``(class_qn, method_qn)`` pairs from DEFINES_METHOD rels.""" + edges: list[tuple[str, str]] = [] + for rel in get_relationships(mock_ingestor, "DEFINES_METHOD"): + class_qn = rel.args[0][2] + method_qn = rel.args[2][2] + edges.append((class_qn, method_qn)) + return edges + + +def _method_names_for_class(mock_ingestor: MagicMock, class_name: str) -> set[str]: + """Method simple-names linked via DEFINES_METHOD to *class_name*.""" + names: set[str] = set() + for class_qn, method_qn in _get_defines_method_edges(mock_ingestor): + parts = class_qn.split(SEPARATOR_DOT) + if class_name in parts: + names.add(method_qn.split(SEPARATOR_DOT)[-1]) + return names + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def cpp_cross_file_project(temp_repo: Path) -> Path: + project = temp_repo / "cpp_cross_file" + project.mkdir() + return project + + +# --------------------------------------------------------------------------- +# Test: basic header + source cross-file methods +# --------------------------------------------------------------------------- + + +def test_header_source_method_resolution( + cpp_cross_file_project: Path, + mock_ingestor: MagicMock, +) -> None: + """Class in .h, implementations in .cpp -- methods must link to .h class.""" + include = cpp_cross_file_project / "include" + include.mkdir() + src = cpp_cross_file_project / "src" + src.mkdir() + + (include / "Calculator.h").write_text( + encoding="utf-8", + data="""\ +#pragma once + +class Calculator { +public: + int add(int a, int b); + int subtract(int a, int b); + double divide(int a, int b); +}; +""", + ) + + (src / "Calculator.cpp").write_text( + encoding="utf-8", + data="""\ +#include "Calculator.h" + +int Calculator::add(int a, int b) { + return a + b; +} + +int Calculator::subtract(int a, int b) { + return a - b; +} + +double Calculator::divide(int a, int b) { + if (b == 0) return 0; + return static_cast(a) / b; +} +""", + ) + + run_updater(cpp_cross_file_project, mock_ingestor) + + # The class should exist in the header module. + class_qns = _get_class_qns(mock_ingestor) + header_class = [qn for qn in class_qns if "include" in qn and "Calculator" in qn] + assert header_class, ( + f"Expected a Calculator class in include/, got classes: {class_qns}" + ) + + # All three out-of-class methods should have DEFINES_METHOD edges + # pointing to the *header* class, not to a phantom class in src/. + edges = _get_defines_method_edges(mock_ingestor) + header_class_qn = header_class[0] + methods_linked_to_header = { + mq.split(SEPARATOR_DOT)[-1] for cq, mq in edges if cq == header_class_qn + } + + assert "add" in methods_linked_to_header, ( + f"'add' not linked to header class. Edges: {edges}" + ) + assert "subtract" in methods_linked_to_header, ( + f"'subtract' not linked to header class. Edges: {edges}" + ) + assert "divide" in methods_linked_to_header, ( + f"'divide' not linked to header class. Edges: {edges}" + ) + + # There should be NO orphan Method nodes (methods whose container_qn + # uses the .cpp module instead of the .h module). + method_qns = _get_method_qns(mock_ingestor) + orphan_methods = { + qn + for qn in method_qns + if "src.Calculator" in qn and "Calculator.Calculator" in qn + } + assert not orphan_methods, ( + f"Found orphan methods with .cpp module QN: {orphan_methods}" + ) + + +# --------------------------------------------------------------------------- +# Test: multiple source files implementing one header class +# --------------------------------------------------------------------------- + + +def test_multiple_source_files_one_class( + cpp_cross_file_project: Path, + mock_ingestor: MagicMock, +) -> None: + """Two .cpp files implement methods of one class declared in .h.""" + include = cpp_cross_file_project / "include" + include.mkdir() + src = cpp_cross_file_project / "src" + src.mkdir() + + (include / "Engine.h").write_text( + encoding="utf-8", + data="""\ +#pragma once + +class Engine { +public: + void start(); + void stop(); + void accelerate(int speed); + void brake(); +}; +""", + ) + + (src / "engine_control.cpp").write_text( + encoding="utf-8", + data="""\ +#include "Engine.h" + +void Engine::start() { /* ... */ } +void Engine::stop() { /* ... */ } +""", + ) + + (src / "engine_movement.cpp").write_text( + encoding="utf-8", + data="""\ +#include "Engine.h" + +void Engine::accelerate(int speed) { /* ... */ } +void Engine::brake() { /* ... */ } +""", + ) + + run_updater(cpp_cross_file_project, mock_ingestor) + + class_qns = _get_class_qns(mock_ingestor) + header_classes = [qn for qn in class_qns if "include" in qn and "Engine" in qn] + assert header_classes, f"Expected Engine class in include/, got: {class_qns}" + header_class_qn = header_classes[0] + + edges = _get_defines_method_edges(mock_ingestor) + methods_linked = { + mq.split(SEPARATOR_DOT)[-1] for cq, mq in edges if cq == header_class_qn + } + + for method_name in ("start", "stop", "accelerate", "brake"): + assert method_name in methods_linked, ( + f"'{method_name}' not linked to header Engine class. " + f"Linked methods: {methods_linked}" + ) + + +# --------------------------------------------------------------------------- +# Test: constructor and destructor out-of-class across files +# --------------------------------------------------------------------------- + + +def test_cross_file_constructor_destructor( + cpp_cross_file_project: Path, + mock_ingestor: MagicMock, +) -> None: + """Constructors and destructors implemented in .cpp link to .h class.""" + include = cpp_cross_file_project / "include" + include.mkdir() + src = cpp_cross_file_project / "src" + src.mkdir() + + (include / "Resource.h").write_text( + encoding="utf-8", + data="""\ +#pragma once + +class Resource { +public: + Resource(); + Resource(int size); + ~Resource(); + void reset(); +private: + int* data_; +}; +""", + ) + + (src / "Resource.cpp").write_text( + encoding="utf-8", + data="""\ +#include "Resource.h" + +Resource::Resource() : data_(nullptr) {} + +Resource::Resource(int size) { + data_ = new int[size]; +} + +Resource::~Resource() { + delete[] data_; +} + +void Resource::reset() { + delete[] data_; + data_ = nullptr; +} +""", + ) + + run_updater(cpp_cross_file_project, mock_ingestor) + + class_qns = _get_class_qns(mock_ingestor) + header_classes = [qn for qn in class_qns if "include" in qn and "Resource" in qn] + assert header_classes, f"Expected Resource class in include/, got: {class_qns}" + header_class_qn = header_classes[0] + + edges = _get_defines_method_edges(mock_ingestor) + methods_linked = { + mq.split(SEPARATOR_DOT)[-1] for cq, mq in edges if cq == header_class_qn + } + + assert "Resource" in methods_linked, ( + f"Constructor not linked to header class. Methods: {methods_linked}" + ) + assert "~Resource" in methods_linked, ( + f"Destructor not linked to header class. Methods: {methods_linked}" + ) + assert "reset" in methods_linked, ( + f"'reset' not linked to header class. Methods: {methods_linked}" + ) + + +# --------------------------------------------------------------------------- +# Test: nested namespace cross-file methods +# --------------------------------------------------------------------------- + + +def test_nested_namespace_cross_file( + cpp_cross_file_project: Path, + mock_ingestor: MagicMock, +) -> None: + """Class inside nested namespaces, methods implemented in separate .cpp.""" + include = cpp_cross_file_project / "include" + include.mkdir() + src = cpp_cross_file_project / "src" + src.mkdir() + + (include / "Logger.h").write_text( + encoding="utf-8", + data="""\ +#pragma once + +namespace app { +namespace logging { + +class Logger { +public: + void info(const char* msg); + void error(const char* msg); +}; + +} // namespace logging +} // namespace app +""", + ) + + (src / "Logger.cpp").write_text( + encoding="utf-8", + data="""\ +#include "Logger.h" + +namespace app { +namespace logging { + +void Logger::info(const char* msg) { /* ... */ } +void Logger::error(const char* msg) { /* ... */ } + +} // namespace logging +} // namespace app +""", + ) + + run_updater(cpp_cross_file_project, mock_ingestor) + + class_qns = _get_class_qns(mock_ingestor) + header_classes = [qn for qn in class_qns if "include" in qn and "Logger" in qn] + assert header_classes, f"Expected Logger class in include/, got: {class_qns}" + header_class_qn = header_classes[0] + + edges = _get_defines_method_edges(mock_ingestor) + methods_linked = { + mq.split(SEPARATOR_DOT)[-1] for cq, mq in edges if cq == header_class_qn + } + + assert "info" in methods_linked, ( + f"'info' not linked to header Logger. Methods: {methods_linked}" + ) + assert "error" in methods_linked, ( + f"'error' not linked to header Logger. Methods: {methods_linked}" + ) + + +# --------------------------------------------------------------------------- +# Test: no orphan methods remain (aggregate check) +# --------------------------------------------------------------------------- + + +def test_no_orphan_methods_across_files( + cpp_cross_file_project: Path, + mock_ingestor: MagicMock, +) -> None: + """Every Method node must have at least one incoming DEFINES_METHOD edge.""" + include = cpp_cross_file_project / "include" + include.mkdir() + src = cpp_cross_file_project / "src" + src.mkdir() + + (include / "Widget.h").write_text( + encoding="utf-8", + data="""\ +#pragma once + +class Widget { +public: + void draw(); + void resize(int w, int h); + void hide(); +}; +""", + ) + + (src / "Widget.cpp").write_text( + encoding="utf-8", + data="""\ +#include "Widget.h" + +void Widget::draw() { /* ... */ } +void Widget::resize(int w, int h) { /* ... */ } +void Widget::hide() { /* ... */ } +""", + ) + + run_updater(cpp_cross_file_project, mock_ingestor) + + method_qns = _get_method_qns(mock_ingestor) + edges = _get_defines_method_edges(mock_ingestor) + methods_with_edges = {mq for _, mq in edges} + + orphans = method_qns - methods_with_edges + # Filter to only methods belonging to Widget (other methods from inline + # definitions always have edges). + widget_orphans = {qn for qn in orphans if "Widget" in qn} + assert not widget_orphans, ( + f"Found orphan Widget Method nodes with no DEFINES_METHOD edge: " + f"{widget_orphans}" + ) + + +# --------------------------------------------------------------------------- +# Test: same-file out-of-class still works (regression) +# --------------------------------------------------------------------------- + + +def test_same_file_out_of_class_still_works( + cpp_cross_file_project: Path, + mock_ingestor: MagicMock, +) -> None: + """When class and implementations are in the same .cpp, nothing breaks.""" + (cpp_cross_file_project / "single.cpp").write_text( + encoding="utf-8", + data="""\ +class Foo { +public: + void bar(); + int baz(int x); +}; + +void Foo::bar() { /* ... */ } +int Foo::baz(int x) { return x; } +""", + ) + + run_updater(cpp_cross_file_project, mock_ingestor) + + method_names = _method_names_for_class(mock_ingestor, "Foo") + assert "bar" in method_names, f"Expected 'bar', got: {method_names}" + assert "baz" in method_names, f"Expected 'baz', got: {method_names}" diff --git a/codebase_rag/tests/test_cpp_cross_file_singleton.py b/codebase_rag/tests/test_cpp_cross_file_singleton.py index 403d16c4b..023d82226 100644 --- a/codebase_rag/tests/test_cpp_cross_file_singleton.py +++ b/codebase_rag/tests/test_cpp_cross_file_singleton.py @@ -147,15 +147,21 @@ def test_cpp_singleton_pattern_cross_file_calls( found_calls.add((caller_short, callee_short)) + # (H) Calls are attributed to the enclosing method/function, not the file: + # the singleton calls live inside SceneController's methods and + # Application.start(), so those are the callers (not the module nodes). + sc = "controllers.SceneController.SceneController" expected_calls = [ - ("controllers.SceneController", "storage.Storage.Storage.getInstance"), - ("controllers.SceneController", "storage.Storage.Storage.clearAll"), - ("controllers.SceneController", "storage.Storage.Storage.save"), - ("controllers.SceneController", "storage.Storage.Storage.load"), - ("main", "controllers.SceneController.SceneController.loadMenuScene"), - ("main", "controllers.SceneController.SceneController.loadGameScene"), - ("main", "storage.Storage.Storage.getInstance"), - ("main", "storage.Storage.Storage.load"), + (f"{sc}.loadMenuScene", "storage.Storage.Storage.getInstance"), + (f"{sc}.loadMenuScene", "storage.Storage.Storage.clearAll"), + (f"{sc}.loadMenuScene", "storage.Storage.Storage.save"), + (f"{sc}.loadMenuScene", "storage.Storage.Storage.load"), + (f"{sc}.loadGameScene", "storage.Storage.Storage.getInstance"), + (f"{sc}.loadGameScene", "storage.Storage.Storage.save"), + ("main.Application.start", f"{sc}.loadMenuScene"), + ("main.Application.start", f"{sc}.loadGameScene"), + ("main.Application.start", "storage.Storage.Storage.getInstance"), + ("main.Application.start", "storage.Storage.Storage.load"), ("main.main", "main.Application.start"), ] diff --git a/codebase_rag/tests/test_cpp_crosslang_qn_collision.py b/codebase_rag/tests/test_cpp_crosslang_qn_collision.py new file mode 100644 index 000000000..6935b5ece --- /dev/null +++ b/codebase_rag/tests/test_cpp_crosslang_qn_collision.py @@ -0,0 +1,59 @@ +# (H) Regression: a C++ out-of-class method (Widget::render) must not bind to a +# (H) same-named class in another language (Python's Widget), which would give the +# (H) two methods an identical qualified_name and collapse them under the graph's +# (H) qualified_name unique constraint (silently dropping the Python method). +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import KEY_PATH, KEY_QUALIFIED_NAME, NodeLabel +from codebase_rag.tests.conftest import create_and_run_updater, get_nodes + + +def _make_project(temp_repo: Path) -> Path: + project_path = temp_repo / "crosslang" + (project_path / "app").mkdir(parents=True) + (project_path / "lib").mkdir(parents=True) + (project_path / "app" / "widget.py").write_text( + encoding="utf-8", + data="class Widget:\n def render(self):\n return 1\n", + ) + # (H) Out-of-class C++ method with no C++ Widget class anywhere in the repo: + # (H) the only Widget class cgr knows is the Python one. + (project_path / "lib" / "widget.cpp").write_text( + encoding="utf-8", + data="int Widget::render() {\n return 2;\n}\n", + ) + return project_path + + +def _methods_named(mock_ingestor: MagicMock, name: str) -> list[tuple[str, str]]: + out: list[tuple[str, str]] = [] + for node in get_nodes(mock_ingestor, NodeLabel.METHOD): + props = node[0][1] + qn = str(props.get(KEY_QUALIFIED_NAME)) + if qn.rsplit(".", 1)[-1] == name: + out.append((qn, str(props.get(KEY_PATH)))) + return out + + +def test_cpp_method_does_not_steal_python_method_qn( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = _make_project(temp_repo) + create_and_run_updater(project, mock_ingestor, skip_if_missing="cpp") + + renders = _methods_named(mock_ingestor, "render") + qns = [qn for qn, _ in renders] + # (H) The Python and C++ render methods must each have a distinct qn; no two + # (H) render method nodes may collide on the same qualified_name. + assert len(qns) == len(set(qns)), f"colliding render qns: {renders}" + + py_qns = {qn for qn, path in renders if path.endswith("widget.py")} + cpp_qns = {qn for qn, path in renders if path.endswith("widget.cpp")} + assert py_qns, f"python Widget.render missing: {renders}" + assert cpp_qns, f"cpp Widget::render missing: {renders}" + assert py_qns.isdisjoint(cpp_qns), ( + f"cpp method bound to python class qn: py={py_qns} cpp={cpp_qns}" + ) diff --git a/codebase_rag/tests/test_cpp_forward_declaration.py b/codebase_rag/tests/test_cpp_forward_declaration.py new file mode 100644 index 000000000..cd38791d3 --- /dev/null +++ b/codebase_rag/tests/test_cpp_forward_declaration.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.tests.conftest import ( + get_nodes, + get_qualified_names, + run_updater, +) + +# (H) A C++ forward declaration (`class Widget;`) is a bodyless class_specifier. The +# (H) definition pass registered it as its own Class node (zero methods), so the real +# (H) definition that followed collided on the qn and was suffixed (`Widget@`), +# (H) fragmenting one class into several same-named nodes across files. That made +# (H) member-call resolution pick among duplicate candidates (a correctness bug) and, +# (H) via hash-ordered candidate selection, produced non-reproducible graphs. A +# (H) forward declaration must NOT create a Class node; only the real definition does. +CPP_SOURCE = """ +namespace ns { + +class Widget; + +class Widget { + public: + int run() { return 1; } +}; + +int use(Widget* w) { return w->run(); } + +} // namespace ns +""" + + +def test_forward_declaration_does_not_create_phantom_class( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_fwd" + project.mkdir() + (project / "w.cpp").write_text(CPP_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + class_qns = get_qualified_names(get_nodes(mock_ingestor, "Class")) + widgets = [q for q in class_qns if q.rsplit(".", 1)[-1].startswith("Widget")] + assert len(widgets) == 1, f"expected exactly one Widget Class node, got {widgets}" + + # (H) The single surviving node is the real definition, so its qn carries no + # (H) collision suffix and its method registers cleanly under it. + method_qns = get_qualified_names(get_nodes(mock_ingestor, "Method")) + assert any(q.endswith(".ns.Widget.run") for q in method_qns), ( + f"expected ns.Widget.run method node, got {sorted(method_qns)}" + ) + + +# (H) A template class forward declaration (`template class Box;`) is a +# (H) template_declaration wrapping a bodyless class_specifier, so the plain guard on +# (H) class_specifier node type missed it and it still fragmented the class. It must +# (H) be dropped the same way -- BUT only when a real definition exists, because a +# (H) primary template forward-declared and defined solely via specializations is the +# (H) canonical node and must be kept. The invariant: a template forward declaration +# (H) that is followed by a real definition adds no Box node beyond the definition's. +_TEMPLATE_DEF_ONLY = """ +namespace ns { +template +class Box { + public: + T get() { return value_; } + T value_; +}; +} // namespace ns +""" + +_TEMPLATE_FORWARD_PLUS_DEF = """ +namespace ns { +template +class Box; +template +class Box { + public: + T get() { return value_; } + T value_; +}; +} // namespace ns +""" + + +def _box_class_count(mock_ingestor: MagicMock) -> int: + return len( + [ + q + for q in get_qualified_names(get_nodes(mock_ingestor, "Class")) + if q.rsplit(".", 1)[-1].startswith("Box") + ] + ) + + +def test_template_forward_declaration_adds_no_node(temp_repo: Path) -> None: + baseline_repo = temp_repo / "def_only" + baseline_repo.mkdir() + (baseline_repo / "d.cpp").write_text(_TEMPLATE_DEF_ONLY, encoding="utf-8") + baseline_ingestor = MagicMock() + run_updater(baseline_repo, baseline_ingestor) + baseline = _box_class_count(baseline_ingestor) + assert baseline >= 1, "definition-only template produced no Box node" + + with_forward_repo = temp_repo / "fwd_and_def" + with_forward_repo.mkdir() + (with_forward_repo / "f.cpp").write_text( + _TEMPLATE_FORWARD_PLUS_DEF, encoding="utf-8" + ) + with_forward_ingestor = MagicMock() + run_updater(with_forward_repo, with_forward_ingestor) + with_forward = _box_class_count(with_forward_ingestor) + + assert with_forward == baseline, ( + f"template forward declaration added {with_forward - baseline} phantom " + f"Box node(s) (baseline {baseline}, with forward {with_forward})" + ) + + +# (H) A forward declaration must be dropped only when a definition of the SAME +# (H) namespace-qualified type exists. Here `A::Thing` is defined but `B::Thing` is +# (H) only forward-declared (a distinct type). Dropping the forward on a bare +# (H) simple-name match would erase `B::Thing` entirely; the namespace-qualified +# (H) comparison must keep it. +_CROSS_NAMESPACE_SOURCE = """ +namespace A { +class Thing { + public: + int a() { return 1; } +}; +} // namespace A + +namespace B { +class Thing; +} // namespace B +""" + + +def test_forward_decl_in_other_namespace_is_kept( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_cross_ns" + project.mkdir() + (project / "t.cpp").write_text(_CROSS_NAMESPACE_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + thing_qns = { + q + for q in get_qualified_names(get_nodes(mock_ingestor, "Class")) + if q.rsplit(".", 1)[-1] == "Thing" + } + assert any(q.endswith(".A.Thing") for q in thing_qns), ( + f"defined A::Thing missing: {sorted(thing_qns)}" + ) + assert any(q.endswith(".B.Thing") for q in thing_qns), ( + f"forward-only B::Thing was wrongly dropped: {sorted(thing_qns)}" + ) diff --git a/codebase_rag/tests/test_cpp_frontend_calls.py b/codebase_rag/tests/test_cpp_frontend_calls.py new file mode 100644 index 000000000..160f72450 --- /dev/null +++ b/codebase_rag/tests/test_cpp_frontend_calls.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.parsers.cpp_frontend import cpp_frontend_available, run_cpp_frontend + +pytestmark = pytest.mark.skipif( + not cpp_frontend_available(), + reason="libclang not available", +) + +# (H) An out-of-line method calling a free function. tree-sitter's cgr path +# (H) historically dangled the caller qn (PR #47); libclang resolves the call +# (H) target via cursor.referenced with no name heuristic, and the frontend +# (H) anchors the caller to the method node itself. +_HEADER = """ +namespace m { + +class Calc { +public: + int add(int a, int b); +}; + +int helper(int x); + +} // namespace m +""" + +_SRC = """ +#include "calc.h" +namespace m { +int helper(int x) { return x + 1; } +int Calc::add(int a, int b) { return helper(a) + b; } +} +""" + + +def _write(root: Path) -> None: + root.mkdir() + (root / "calc.h").write_text(_HEADER, encoding="utf-8") + (root / "calc.cpp").write_text(_SRC, encoding="utf-8") + (root / "compile_commands.json").write_text( + json.dumps( + [ + { + "directory": str(root), + "arguments": ["c++", "-std=c++17", str(root / "calc.cpp")], + "file": str(root / "calc.cpp"), + } + ] + ), + encoding="utf-8", + ) + + +def _calls(ingestor: MagicMock) -> list[tuple[str, str, str, str]]: + out = [] + for c in ingestor.ensure_relationship_batch.call_args_list: + if c.args[1] == "CALLS": + (from_label, _, from_qn) = c.args[0] + (to_label, _, to_qn) = c.args[2] + out.append((from_label, from_qn, to_label, to_qn)) + return out + + +def test_method_calls_free_function(temp_repo: Path) -> None: + root = temp_repo / "callsproj" + _write(root) + + ingestor = MagicMock() + run_cpp_frontend(ingestor, root, root.name, root) + + calls = _calls(ingestor) + # (H) The caller is the METHOD node (not a dangling free-function/module qn). + assert any( + from_label == "Method" + and from_qn.endswith(".m.Calc.add") + and to_label == "Function" + and to_qn.endswith(".m.helper") + for from_label, from_qn, to_label, to_qn in calls + ), f"expected Calc.add CALLS helper, got {calls}" + + +# (H) A default member initializer and a namespace-scope global initializer both +# (H) call compute() with no enclosing function or method. The tree-sitter path +# (H) attributes such module-load-time calls to the Module node; the libclang +# (H) frontend previously dropped them (its walk had no enclosing scope to attach +# (H) the call to), so they must instead fall back to the enclosing Module. +_INIT_SRC = """ +namespace m { + +int compute(); + +struct S { + int x_ = compute(); + int y_; +}; + +int g_val = compute(); + +int compute() { return 7; } + +} // namespace m +""" + + +def _write_single(root: Path, source: str) -> None: + root.mkdir() + (root / "s.cpp").write_text(source, encoding="utf-8") + (root / "compile_commands.json").write_text( + json.dumps( + [ + { + "directory": str(root), + "arguments": ["c++", "-std=c++17", str(root / "s.cpp")], + "file": str(root / "s.cpp"), + } + ] + ), + encoding="utf-8", + ) + + +def test_module_scope_initializer_calls_attributed_to_module(temp_repo: Path) -> None: + root = temp_repo / "initproj" + _write_single(root, _INIT_SRC) + + ingestor = MagicMock() + run_cpp_frontend(ingestor, root, root.name, root) + + calls = _calls(ingestor) + # (H) The two initializer calls collapse to a single Module -> compute edge (the + # (H) edge set dedups), matching the caller the tree-sitter path uses. + assert any( + from_label == "Module" + and to_label == "Function" + and to_qn.endswith(".m.compute") + for from_label, from_qn, to_label, to_qn in calls + ), f"expected Module CALLS compute for initializer calls, got {calls}" diff --git a/codebase_rag/tests/test_cpp_frontend_qn_parity.py b/codebase_rag/tests/test_cpp_frontend_qn_parity.py new file mode 100644 index 000000000..1b0d301e9 --- /dev/null +++ b/codebase_rag/tests/test_cpp_frontend_qn_parity.py @@ -0,0 +1,218 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.parsers.cpp_frontend import cpp_frontend_available, run_cpp_frontend +from codebase_rag.tests.conftest import get_nodes, get_qualified_names, run_updater + +pytestmark = pytest.mark.skipif( + not cpp_frontend_available(), + reason="libclang not available", +) + +# (H) A macro-free C++ corpus: a namespaced class declared in a header with +# (H) in-class declarations + one inline method, its out-of-line definitions in +# (H) the .cpp, a free-function prototype in the header, and free-function +# (H) definitions in the .cpp. Macro-free so the tree-sitter path parses it +# (H) correctly and its qualified names are the ground truth the libclang +# (H) frontend must reproduce exactly (the issue #46 acceptance test). +HEADER = """ +namespace geo { + +class Shape { +public: + Shape(double x); + virtual ~Shape(); + double area() const; + virtual void describe(); + int inline_helper() { return 7; } +}; + +int free_proto(int n); + +} // namespace geo +""" + +SRC = """ +#include "geometry.h" + +namespace geo { + +Shape::Shape(double x) {} +Shape::~Shape() {} +double Shape::area() const { return 1.0; } +void Shape::describe() {} + +int free_proto(int n) { return n + 1; } + +int only_in_cpp(int a) { return a; } + +} // namespace geo +""" + +_LABELS = ("Class", "Function", "Method") + + +def _write_project(root: Path) -> None: + root.mkdir() + (root / "geometry.h").write_text(HEADER, encoding="utf-8") + (root / "geometry.cpp").write_text(SRC, encoding="utf-8") + compile_commands = [ + { + "directory": str(root), + "arguments": ["c++", "-std=c++17", str(root / "geometry.cpp")], + "file": str(root / "geometry.cpp"), + } + ] + (root / "compile_commands.json").write_text( + json.dumps(compile_commands), encoding="utf-8" + ) + + +def _qns_by_label(ingestor: MagicMock) -> dict[str, set[str]]: + return {label: get_qualified_names(get_nodes(ingestor, label)) for label in _LABELS} + + +def test_frontend_qns_match_tree_sitter(temp_repo: Path) -> None: + root = temp_repo / "geomproj" + _write_project(root) + + ts_ingestor = MagicMock() + run_updater(root, ts_ingestor) + ts_qns = _qns_by_label(ts_ingestor) + + fe_ingestor = MagicMock() + run_cpp_frontend(fe_ingestor, root, root.name, root) + fe_qns = _qns_by_label(fe_ingestor) + + assert fe_qns == ts_qns, ( + f"frontend/tree-sitter qn mismatch:\n" + f" frontend only: { {k: fe_qns[k] - ts_qns[k] for k in _LABELS} }\n" + f" tree-sitter only: { {k: ts_qns[k] - fe_qns[k] for k in _LABELS} }" + ) + + +def _write_cpp_project(root: Path, header_name: str, header: str, src: str) -> None: + root.mkdir() + cpp_name = f"{Path(header_name).stem}.cpp" + (root / header_name).write_text(header, encoding="utf-8") + (root / cpp_name).write_text(src, encoding="utf-8") + compile_commands = [ + { + "directory": str(root), + "arguments": ["c++", "-std=c++17", str(root / cpp_name)], + "file": str(root / cpp_name), + } + ] + (root / "compile_commands.json").write_text( + json.dumps(compile_commands), encoding="utf-8" + ) + + +# (H) A macro that tree-sitter cannot expand: `struct WIDGET_API Widget` is +# (H) mis-parsed (WIDGET_API is read as the type), so cgr loses the `Widget` +# (H) class entirely. libclang expands the macro and recovers it with its true +# (H) multi-line span. This is the whole reason the frontend exists. +_MACRO_HEADER = """ +#define WIDGET_API + +namespace ui { + +struct WIDGET_API Widget { + int handle; + void show(); + void hide(); +}; + +} // namespace ui +""" + +_MACRO_SRC = """ +#include "widget.h" +namespace ui { +void Widget::show() {} +void Widget::hide() {} +} +""" + + +def test_frontend_recovers_macro_mangled_class(temp_repo: Path) -> None: + root = temp_repo / "macroproj" + _write_cpp_project(root, "widget.h", _MACRO_HEADER, _MACRO_SRC) + + ts_ingestor = MagicMock() + run_updater(root, ts_ingestor) + ts_classes = get_qualified_names(get_nodes(ts_ingestor, "Class")) + + fe_ingestor = MagicMock() + run_cpp_frontend(fe_ingestor, root, root.name, root) + fe_class_nodes = get_nodes(fe_ingestor, "Class") + fe_classes = get_qualified_names(fe_class_nodes) + + # (H) tree-sitter loses Widget to the macro; the frontend recovers it. + assert not any(q.endswith(".ui.Widget") for q in ts_classes), ( + f"expected tree-sitter to mis-parse Widget, got {ts_classes}" + ) + assert any(q.endswith(".ui.Widget") for q in fe_classes), ( + f"frontend did not recover Widget: {fe_classes}" + ) + + widget = next( + c[0][1] for c in fe_class_nodes if c[0][1]["qualified_name"].endswith(".Widget") + ) + assert widget["end_line"] > widget["start_line"], ( + f"expected a real multi-line span for Widget, got {widget}" + ) + + +_INHERIT_HEADER = """ +namespace geo { + +class Base { +public: + virtual void run(); +}; + +class Derived : public Base { +public: + void run(); + Derived operator+(const Derived& o) const; +}; + +} // namespace geo +""" + +_INHERIT_SRC = """ +#include "shapes.h" +namespace geo { +void Base::run() {} +void Derived::run() {} +Derived Derived::operator+(const Derived& o) const { return *this; } +} +""" + + +def test_frontend_emits_inheritance_and_operator(temp_repo: Path) -> None: + root = temp_repo / "shapesproj" + _write_cpp_project(root, "shapes.h", _INHERIT_HEADER, _INHERIT_SRC) + + fe_ingestor = MagicMock() + run_cpp_frontend(fe_ingestor, root, root.name, root) + + methods = get_qualified_names(get_nodes(fe_ingestor, "Method")) + assert any(q.endswith(".geo.Derived.operator_plus") for q in methods), ( + f"operator+ not converted: {sorted(methods)}" + ) + + inherits = [ + (c.args[0][2], c.args[2][2]) + for c in fe_ingestor.ensure_relationship_batch.call_args_list + if c.args[1] == "INHERITS" + ] + assert any( + src.endswith(".geo.Derived") and dst.endswith(".Base") for src, dst in inherits + ), f"expected Derived INHERITS Base, got {inherits}" diff --git a/codebase_rag/tests/test_cpp_frontend_types.py b/codebase_rag/tests/test_cpp_frontend_types.py new file mode 100644 index 000000000..803448ef4 --- /dev/null +++ b/codebase_rag/tests/test_cpp_frontend_types.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.parsers.cpp_frontend import cpp_frontend_available, run_cpp_frontend +from codebase_rag.tests.conftest import get_nodes, get_qualified_names + +pytestmark = pytest.mark.skipif( + not cpp_frontend_available(), + reason="libclang not available", +) + +# (H) C++ type aliases: namespace-scoped `using`/`typedef` and a class-scoped +# (H) member alias. The tree-sitter path emits no Type nodes for these, so the +# (H) frontend adds them (mirroring how Go/Rust type decls become Type nodes). +_HEADER = """ +namespace n { + +using Meters = double; +typedef int Count; + +class Box { +public: + using Handle = int; +}; + +} // namespace n +""" + +_SRC = '#include "types.h"\n' + + +def _write(root: Path) -> None: + root.mkdir() + (root / "types.h").write_text(_HEADER, encoding="utf-8") + (root / "types.cpp").write_text(_SRC, encoding="utf-8") + (root / "compile_commands.json").write_text( + json.dumps( + [ + { + "directory": str(root), + "arguments": ["c++", "-std=c++17", str(root / "types.cpp")], + "file": str(root / "types.cpp"), + } + ] + ), + encoding="utf-8", + ) + + +def test_frontend_emits_type_aliases(temp_repo: Path) -> None: + root = temp_repo / "typesproj" + _write(root) + + ingestor = MagicMock() + run_cpp_frontend(ingestor, root, root.name, root) + + types = get_qualified_names(get_nodes(ingestor, "Type")) + assert any(q.endswith(".n.Meters") for q in types), f"missing using alias: {types}" + assert any(q.endswith(".n.Count") for q in types), f"missing typedef: {types}" + assert any(q.endswith(".n.Box.Handle") for q in types), ( + f"missing class-scoped alias: {types}" + ) + + defines = [ + (c.args[0][0], c.args[0][2], c.args[2][2]) + for c in ingestor.ensure_relationship_batch.call_args_list + if c.args[1] == "DEFINES" + ] + # (H) namespace-scoped alias defined by its Module; member alias by its Class. + assert any( + src_label == "Module" and child.endswith(".n.Meters") + for src_label, _, child in defines + ), f"Module should DEFINE Meters: {defines}" + assert any( + src_label == "Class" + and src_qn.endswith(".n.Box") + and child.endswith(".n.Box.Handle") + for src_label, src_qn, child in defines + ), f"Box should DEFINE Handle: {defines}" diff --git a/codebase_rag/tests/test_cpp_frontend_wiring.py b/codebase_rag/tests/test_cpp_frontend_wiring.py new file mode 100644 index 000000000..f2e167dbe --- /dev/null +++ b/codebase_rag/tests/test_cpp_frontend_wiring.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag import constants as cs +from codebase_rag import graph_updater as gu +from codebase_rag.parsers.cpp_frontend import cpp_frontend_available +from codebase_rag.tests.conftest import get_nodes, get_qualified_names, run_updater + +pytestmark = pytest.mark.skipif( + not cpp_frontend_available(), + reason="libclang not available", +) + +# (H) `struct WIDGET_API Widget` is a macro tree-sitter cannot expand: it loses +# (H) the Widget class. The libclang frontend recovers it. The wiring decides +# (H) which path runs, gated on CPP_FRONTEND + a discoverable compile_commands. +_HEADER = """ +#define WIDGET_API + +namespace ui { + +struct WIDGET_API Widget { + int handle; + void show(); +}; + +} // namespace ui +""" + +_SRC = """ +#include "widget.h" +namespace ui { +void Widget::show() {} +} +""" + + +def _write_project(root: Path) -> None: + root.mkdir() + (root / "widget.h").write_text(_HEADER, encoding="utf-8") + (root / "widget.cpp").write_text(_SRC, encoding="utf-8") + (root / "compile_commands.json").write_text( + json.dumps( + [ + { + "directory": str(root), + "arguments": ["c++", "-std=c++17", str(root / "widget.cpp")], + "file": str(root / "widget.cpp"), + } + ] + ), + encoding="utf-8", + ) + + +def test_default_treesitter_does_not_recover_macro_class(temp_repo: Path) -> None: + root = temp_repo / "defaultproj" + _write_project(root) + + ingestor = MagicMock() + run_updater(root, ingestor) + classes = get_qualified_names(get_nodes(ingestor, "Class")) + + # (H) No regression: with the default flag, indexing is the tree-sitter path, + # (H) which mis-parses the macro and never produces ui.Widget. + assert not any(q.endswith(".ui.Widget") for q in classes), ( + f"default path should not engage the frontend: {classes}" + ) + + +def test_libclang_frontend_recovers_macro_class( + temp_repo: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + root = temp_repo / "libclangproj" + _write_project(root) + + monkeypatch.setattr(gu.settings, "CPP_FRONTEND", cs.CppFrontend.LIBCLANG) + + ingestor = MagicMock() + run_updater(root, ingestor) + + classes = get_qualified_names(get_nodes(ingestor, "Class")) + methods = get_qualified_names(get_nodes(ingestor, "Method")) + + # (H) The frontend recovers the real class and binds the out-of-line method. + assert any(q.endswith(".ui.Widget") for q in classes), ( + f"frontend did not recover Widget: {classes}" + ) + assert any(q.endswith(".ui.Widget.show") for q in methods), ( + f"frontend did not bind Widget::show: {methods}" + ) + # (H) The covered file was NOT also processed by tree-sitter (no double-parse + # (H) producing the macro-mangled class). + assert not any(q.endswith(".ui.WIDGET_API") for q in classes), ( + f"tree-sitter should have skipped the covered file: {classes}" + ) diff --git a/codebase_rag/tests/test_cpp_member_field_receiver.py b/codebase_rag/tests/test_cpp_member_field_receiver.py new file mode 100644 index 000000000..03dcc545c --- /dev/null +++ b/codebase_rag/tests/test_cpp_member_field_receiver.py @@ -0,0 +1,250 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.parser_loader import load_parsers +from codebase_rag.parsers.cpp import CppTypeInferenceEngine +from codebase_rag.tests.conftest import get_relationships, run_updater + + +def _first_class_node(source: str): + parsers, _ = load_parsers() + tree = parsers["cpp"].parse(source.encode("utf-8")) + + def walk(node): + if node.type in ("class_specifier", "struct_specifier"): + return node + for child in node.children: + if (found := walk(child)) is not None: + return found + return None + + node = walk(tree.root_node) + assert node is not None + return node + + +def _calls_from(mock_ingestor: MagicMock, caller_suffix: str) -> set[str]: + return { + str(c.args[2][2]) + for c in get_relationships(mock_ingestor, "CALLS") + if str(c.args[0][2]).endswith(caller_suffix) + } + + +# (H) Member data declarations use `field_identifier`, not `identifier`, and a member +# (H) FUNCTION declaration (`void Lock();`) is also a field_declaration but with a +# (H) function_declarator -- only data members are fields. Pointer/qualified/template +# (H) types reduce to a bare type name the resolver can map to a class. +def test_cpp_build_field_type_map_captures_data_members_only() -> None: + src = """ +class DBImpl { + public: + void Lock(); + int Count(int x); + private: + port::Mutex mutex_; + std::string buffer_; + Foo* ptr_; + int counter_, extra_; +}; +""" + fields = CppTypeInferenceEngine().build_field_type_map(_first_class_node(src)) + # (H) Only class-typed fields are recorded: primitive-typed members (`int + # (H) counter_, extra_;`) can never be a method-call receiver, so they are omitted. + assert fields == { + "mutex_": "Mutex", + "buffer_": "string", + "ptr_": "Foo", + }, fields + assert "Lock" not in fields and "Count" not in fields + + +# (H) A first-party member field receiver: `mutex_.Lock()` must resolve to the field's +# (H) class method, not fall to a name-only guess. `Alpha` also defines `Lock` and sorts +# (H) before `Mutex`, so the name-only trie fallback deterministically picks the WRONG +# (H) `Alpha.Lock` -- only the field's type disambiguates. The method is defined INLINE +# (H) here, so field decls and the call are in the same AST. +_INLINE_SOURCE = """ +namespace ns { + +class Alpha { + public: + void Lock() {} +}; + +class Mutex { + public: + void Lock() {} +}; + +class DB { + public: + void Run() { mutex_.Lock(); } + private: + Mutex mutex_; +}; + +} // namespace ns +""" + + +def test_cpp_inline_member_field_call_resolves_to_field_type( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "cpp_field_inline" + project.mkdir() + (project / "s.cpp").write_text(_INLINE_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + callees = _calls_from(mock_ingestor, ".DB.Run") + assert any(c.endswith(".Mutex.Lock") for c in callees), ( + f"mutex_.Lock() should resolve to Mutex.Lock via the field type, got {callees}" + ) + assert not any(c.endswith(".Alpha.Lock") for c in callees), ( + f"mutex_.Lock() must not resolve to the same-named Alpha.Lock, got {callees}" + ) + + +# (H) The real leveldb shape: the class (with its fields) is declared in a header and +# (H) the method is defined OUT-OF-LINE in a .cc, so the field declarations live in a +# (H) different translation unit than the method body. Field types must therefore be +# (H) captured at class ingestion and looked up by the enclosing class qn. +# (H) `Alpha.Lock` (sorts before `Mutex.Lock`) and `Buf.clear` are same-named +# (H) first-party competitors: without field-type inference the name-only trie binds +# (H) mutex_.Lock() to Alpha.Lock and buffer_.clear() to Buf.clear. +_HEADER = """ +namespace ns { + +class Alpha { + public: + void Lock(); +}; + +class Mutex { + public: + void Lock(); +}; + +class Buf { + public: + void clear(); +}; + +class DB { + public: + void Run(); + private: + Mutex mutex_; + std::string buffer_; +}; + +} // namespace ns +""" + +_IMPL = """ +#include "db.h" + +namespace ns { + +void Mutex::Lock() {} + +void DB::Run() { + mutex_.Lock(); + buffer_.clear(); +} + +} // namespace ns +""" + + +def test_cpp_out_of_line_member_field_call_resolves_cross_file( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "cpp_field_xfile" + project.mkdir() + (project / "db.h").write_text(_HEADER, encoding="utf-8") + (project / "db.cc").write_text(_IMPL, encoding="utf-8") + + run_updater(project, mock_ingestor) + + callees = _calls_from(mock_ingestor, ".DB.Run") + # (H) FN fix: first-party Mutex field method resolves cross-file. + assert any(c.endswith(".Mutex.Lock") for c in callees), ( + f"mutex_.Lock() should resolve to Mutex.Lock across files, got {callees}" + ) + # (H) FP fix: buffer_ is a std::string (external), so buffer_.clear() must NOT be + # (H) rebound to any first-party clear method. + assert not any(c.endswith(".clear") for c in callees), ( + f"buffer_.clear() on a std::string field must not resolve first-party, " + f"got {callees}" + ) + + +# (H) Member fields are frequently declared inside preprocessor conditionals +# (H) (`#ifdef`), which wrap the field_declaration in a preproc_ifdef node. Iterating +# (H) only the class body's direct children misses them; the collector must recurse +# (H) through preprocessor blocks while still skipping nested type/function scopes. +def test_cpp_build_field_type_map_captures_fields_in_preproc_blocks() -> None: + src = """ +class C { + private: + Mutex mutex_; +#ifdef LEVELDB_FOO + Slice cond_; +#endif +}; +""" + fields = CppTypeInferenceEngine().build_field_type_map(_first_class_node(src)) + assert fields == {"mutex_": "Mutex", "cond_": "Slice"}, fields + + +# (H) A derived class accesses fields inherited from its base. The receiver map must +# (H) collect fields along the inheritance chain (derived shadows base). `Alpha.Lock` +# (H) is a same-named competitor that sorts first, so name-only resolution picks it; +# (H) only the inherited field type resolves `mutex_.Lock()` to Mutex.Lock. +_INHERITED_SOURCE = """ +namespace ns { + +class Alpha { + public: + void Lock() {} +}; + +class Mutex { + public: + void Lock() {} +}; + +class Base { + protected: + Mutex mutex_; +}; + +class Derived : public Base { + public: + void Run() { mutex_.Lock(); } +}; + +} // namespace ns +""" + + +def test_cpp_inherited_member_field_resolves( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "cpp_field_inherit" + project.mkdir() + (project / "s.cpp").write_text(_INHERITED_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + callees = _calls_from(mock_ingestor, ".Derived.Run") + assert any(c.endswith(".Mutex.Lock") for c in callees), ( + f"inherited field mutex_.Lock() should resolve to Mutex.Lock, got {callees}" + ) + assert not any(c.endswith(".Alpha.Lock") for c in callees), ( + f"inherited mutex_.Lock() must not resolve to Alpha.Lock, got {callees}" + ) diff --git a/codebase_rag/tests/test_cpp_namespace_call_caller_qn.py b/codebase_rag/tests/test_cpp_namespace_call_caller_qn.py new file mode 100644 index 000000000..2eda813e5 --- /dev/null +++ b/codebase_rag/tests/test_cpp_namespace_call_caller_qn.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.tests.conftest import ( + get_nodes, + get_qualified_names, + get_relationships, + run_updater, +) + +# (H) A free function and an inline class method, both inside a namespace, each +# (H) calling a namespaced free function. The definition pass binds their nodes +# (H) WITH the enclosing namespace (qn `...ns.free_caller`, `...ns.K.method`), but +# (H) the call pass built the caller qn WITHOUT the namespace (`...free_caller`, +# (H) `...K.method`), so every such CALLS edge's source dangled (matched no node) +# (H) and the call was lost. On real namespaced C++ (e.g. all of leveldb, in +# (H) `namespace leveldb`) this silently dropped the bulk of cross-file calls. The +# (H) caller qn must include the enclosing namespace, matching the node. +CPP_SOURCE = """ +namespace acme { + +int callee(int x) { return x + 1; } + +int free_caller(int a) { return callee(a); } + +class K { +public: + int method(int b) { return callee(b); } +}; + +} // namespace acme +""" + + +def test_namespaced_callers_attribute_calls_to_namespaced_qn( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_ns_calls" + project.mkdir() + (project / "sample.cpp").write_text(CPP_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + free_qn = next( + ( + q + for q in get_qualified_names(get_nodes(mock_ingestor, "Function")) + if q.endswith(".acme.free_caller") + ), + None, + ) + method_qn = next( + ( + q + for q in get_qualified_names(get_nodes(mock_ingestor, "Method")) + if q.endswith(".acme.K.method") + ), + None, + ) + assert free_qn is not None, "no ns.free_caller Function node" + assert method_qn is not None, "no ns.K.method Method node" + + calls = get_relationships(mock_ingestor, "CALLS") + # (H) ensure_relationship_batch(from_spec, rel_type, to_spec): from_spec[2] is + # (H) the caller qn, to_spec[2] the callee qn. + callers_of_callee = { + c.args[0][2] for c in calls if str(c.args[2][2]).endswith(".callee") + } + assert free_qn in callers_of_callee, ( + f"expected CALLS from {free_qn} to callee; got {sorted(callers_of_callee)}" + ) + assert method_qn in callers_of_callee, ( + f"expected CALLS from {method_qn} to callee; got {sorted(callers_of_callee)}" + ) diff --git a/codebase_rag/tests/test_cpp_oracle.py b/codebase_rag/tests/test_cpp_oracle.py new file mode 100644 index 000000000..bfed9aac5 --- /dev/null +++ b/codebase_rag/tests/test_cpp_oracle.py @@ -0,0 +1,215 @@ +# (H) Covers the C++ structure oracle (evals/oracles/cpp_oracle.py): a libclang +# (H) oracle driven by a compile_commands.json resolves #includes and expands +# (H) macros to the true translation-unit AST, which tree-sitter cannot do. cgr's +# (H) C++ nodes, containment edges, and spans are graded against it on +# (H) (kind, file, start_line). The sample exercises a header-declared class +# (H) (resolved via an -I include path), a macro-typed method, out-of-class method +# (H) definitions, a constructor, an inline method, a struct, and a free function. +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_cpp_graph, restrict_to_files +from evals.oracles import cpp_available, run_cpp_oracle +from evals.score import ( + score_edge_types, + score_name_edge_types, + score_node_kinds, + score_span, +) +from evals.types_defs import ( + DefNode, + EdgeKey, + GraphData, + NameEdge, + NodeKey, + ScoreRow, +) + +SHAPE_H = """\ +#pragma once +#define AREA_T double + +struct Point { + int x; + int y; +}; + +class Shape { +public: + Shape(int id); + AREA_T area() const; + void scale( + double factor + ); + int inline_id() const { return id_; } +private: + int id_; +}; +""" + +SHAPE_CPP = """\ +#include "shape.h" + +Shape::Shape(int id) : id_(id) { +} + +AREA_T Shape::area() const { + return 1.0; +} + +void Shape::scale(double factor) { + id_ = static_cast(factor); +} + +int helper(int n) { + return n * 2; +} +""" + + +def _require_cpp() -> None: + if not cpp_available(): + pytest.skip("libclang not available") + if cs.SupportedLanguage.CPP not in load_parsers()[0]: + pytest.skip("cpp parser not available") + + +def _aggregate(rows: list[ScoreRow]) -> ScoreRow | None: + return next((r for r in rows if r["label"] == ec.AGGREGATE_LABEL), None) + + +def test_cgr_matches_libclang_oracle_on_cpp_structure(tmp_path: Path) -> None: + _require_cpp() + project = tmp_path / "cpp_proj" + (project / "include").mkdir(parents=True) + (project / "src").mkdir(parents=True) + (project / "include" / "shape.h").write_text(SHAPE_H, encoding="utf-8") + (project / "src" / "shape.cpp").write_text(SHAPE_CPP, encoding="utf-8") + + src = (project / "src" / "shape.cpp").resolve() + include = (project / "include").resolve() + compdb = [ + { + "directory": str(project.resolve()), + "file": str(src), + "command": f"clang++ -std=c++17 -I{include} -c {src}", + } + ] + (project / ec.CPP_COMPDB_FILENAME).write_text(json.dumps(compdb), encoding="utf-8") + + cgr = extract_cgr_cpp_graph(project, project.name) + oracle = run_cpp_oracle(project) + + for label, result in ( + ("nodes", score_node_kinds(cgr, oracle, ec.CPP_SCORED_NODE_KINDS)), + ("edges", score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES)), + ("spans", score_span(cgr, oracle, ec.CPP_SCORED_NODE_KINDS)), + ): + aggregate = _aggregate(result.rows) + assert aggregate is not None, (label, result.rows, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + label, + aggregate, + result.diff, + ) + # (H) Guard the sample is non-trivial (class + struct + 4 methods + function). + node_aggregate = _aggregate( + score_node_kinds(cgr, oracle, ec.CPP_SCORED_NODE_KINDS).rows + ) + assert node_aggregate is not None and node_aggregate["tp"] >= 7, node_aggregate + + +INHERIT_H = """\ +#pragma once +struct Base { int v; }; +struct Derived : public Base { + int w; +}; +""" + +INHERIT_CPP = """\ +#include "shapes.h" + +int use(Derived d) { + return d.v + d.w; +} +""" + + +def test_libclang_oracle_emits_inherits_edges(tmp_path: Path) -> None: + # (H) The oracle must emit a base-class (CXX_BASE_SPECIFIER) edge as an INHERITS + # (H) name edge keyed by the base's simple name, matching cgr; otherwise cgr's + # (H) real inheritance edges are graded against an empty oracle set (all fp). + _require_cpp() + project = tmp_path / "inh_proj" + (project / "include").mkdir(parents=True) + (project / "src").mkdir(parents=True) + (project / "include" / "shapes.h").write_text(INHERIT_H, encoding="utf-8") + (project / "src" / "use.cpp").write_text(INHERIT_CPP, encoding="utf-8") + + src = (project / "src" / "use.cpp").resolve() + include = (project / "include").resolve() + compdb = [ + { + "directory": str(project.resolve()), + "file": str(src), + "command": f"clang++ -std=c++17 -I{include} -c {src}", + } + ] + (project / ec.CPP_COMPDB_FILENAME).write_text(json.dumps(compdb), encoding="utf-8") + + cgr = extract_cgr_cpp_graph(project, project.name) + oracle = run_cpp_oracle(project) + + result = score_name_edge_types(cgr, oracle, ec.INHERITANCE_NAME_EDGE_TYPES) + aggregate = _aggregate(result.rows) + assert aggregate is not None, (result.rows, result.diff) + assert aggregate["tp"] >= 1, (aggregate, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + + +def test_restrict_to_files_scopes_graph_to_universe() -> None: + # (H) Scale grading over a compile_commands.json must score cgr only on the + # (H) files the oracle actually compiled; restrict_to_files drops cgr nodes, + # (H) edges, and name edges that touch any out-of-universe file. + keep = "include/a.h" + drop = "test/gtest.h" + mod_keep = NodeKey(cs.NodeLabel.MODULE.value, keep, ec.MODULE_START_LINE) + cls_keep = NodeKey(cs.NodeLabel.CLASS.value, keep, 3) + cls_drop = NodeKey(cs.NodeLabel.CLASS.value, drop, 5) + graph = GraphData( + nodes={ + cls_keep: DefNode(cls_keep, "Keep", 9), + cls_drop: DefNode(cls_drop, "Drop", 11), + }, + edges={ + EdgeKey(cs.RelationshipType.DEFINES.value, mod_keep, cls_keep), + EdgeKey( + cs.RelationshipType.DEFINES.value, + NodeKey(cs.NodeLabel.MODULE.value, drop, ec.MODULE_START_LINE), + cls_drop, + ), + }, + name_edges={ + NameEdge(cs.RelationshipType.INHERITS.value, cls_keep, "Other"), + NameEdge(cs.RelationshipType.INHERITS.value, cls_drop, "Other"), + }, + ) + + scoped = restrict_to_files(graph, {keep}) + + assert set(scoped.nodes) == {cls_keep} + assert all(e.parent.file == keep and e.child.file == keep for e in scoped.edges) + assert len(scoped.edges) == 1 + assert {n.source.file for n in scoped.name_edges} == {keep} + assert len(scoped.name_edges) == 1 diff --git a/codebase_rag/tests/test_cpp_out_of_class_method_calls.py b/codebase_rag/tests/test_cpp_out_of_class_method_calls.py new file mode 100644 index 000000000..27173dc76 --- /dev/null +++ b/codebase_rag/tests/test_cpp_out_of_class_method_calls.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.tests.conftest import ( + get_nodes, + get_qualified_names, + get_relationships, + run_updater, +) + +# (H) An out-of-line C++ method definition (`int Calculator::add(...) {...}` at +# (H) namespace/file scope) calling a free function. cgr's definition pass binds +# (H) the METHOD node to the class (qn `...Calculator.add`), but the call pass +# (H) computed the caller qn as a module-rooted free function (`...calc.add`), +# (H) so the CALLS edge's source dangled (matched no node). The caller of a call +# (H) inside an out-of-line method body must be the method's own node qn. +CPP_SOURCE = """ +class Calculator { +public: + int add(int a, int b); +}; + +int helper_fn(int x) { return x + 1; } + +int Calculator::add(int a, int b) { + return helper_fn(a) + b; +} +""" + + +def test_out_of_class_method_call_attributed_to_method_qn( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_ooc_calls" + project.mkdir() + (project / "calc.cpp").write_text(CPP_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + method_qns = get_qualified_names(get_nodes(mock_ingestor, "Method")) + add_qn = next((q for q in method_qns if q.endswith(".Calculator.add")), None) + assert add_qn is not None, f"no Calculator.add Method node: {method_qns}" + + calls = get_relationships(mock_ingestor, "CALLS") + # (H) ensure_relationship_batch(from_spec, rel_type, to_spec): from_spec[2] is + # (H) the caller qn, to_spec[2] the callee qn. + callers_of_helper = { + c.args[0][2] for c in calls if "helper_fn" in str(c.args[2][2]) + } + assert add_qn in callers_of_helper, ( + f"expected CALLS from {add_qn} to helper_fn; " + f"got callers {sorted(callers_of_helper)}" + ) diff --git a/codebase_rag/tests/test_cpp_receiver_type_dispatch.py b/codebase_rag/tests/test_cpp_receiver_type_dispatch.py new file mode 100644 index 000000000..2b724a5ff --- /dev/null +++ b/codebase_rag/tests/test_cpp_receiver_type_dispatch.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.parser_loader import load_parsers +from codebase_rag.parsers.cpp import CppTypeInferenceEngine +from codebase_rag.tests.conftest import ( + get_relationships, + run_updater, +) + +# (H) Two classes define a method of the same name; only the receiver's type tells +# (H) which one a call `z->run()` / `z.run()` targets. cgr resolved C++ member calls +# (H) by the bare method name (the field_expression yielded only `run`), so the +# (H) name-only trie fallback bound every `run()` call to whichever `run` sorted +# (H) first (`Alpha.run`), regardless of the receiver. With receiver type inference +# (H) (parameter/local var -> bare class name), `z` is a `Zeta`, so the call must +# (H) resolve to `Zeta.run`. `Alpha` sorts before `Zeta`, so the wrong (old) answer +# (H) is deterministic and this test is a real RED. +CPP_SOURCE = """ +namespace ns { + +class Alpha { + public: + int run() { return 1; } +}; + +class Zeta { + public: + int run() { return 2; } +}; + +int use_ptr(Zeta* z) { return z->run(); } + +int use_val(Zeta z) { return z.run(); } + +} // namespace ns +""" + + +def _calls_to_run(mock_ingestor: MagicMock) -> dict[str, str]: + # (H) map caller-qn -> callee-qn for every CALLS edge whose callee is a `run`. + out: dict[str, str] = {} + for c in get_relationships(mock_ingestor, "CALLS"): + callee = str(c.args[2][2]) + if callee.rsplit(".", 1)[-1] == "run": + out[str(c.args[0][2])] = callee + return out + + +def test_cpp_member_call_resolves_via_receiver_type( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_recv" + project.mkdir() + (project / "s.cpp").write_text(CPP_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + calls = _calls_to_run(mock_ingestor) + ptr_caller = next(q for q in calls if q.endswith(".use_ptr")) + val_caller = next(q for q in calls if q.endswith(".use_val")) + + assert calls[ptr_caller].endswith(".Zeta.run"), ( + f"z->run() should resolve to Zeta.run, got {calls[ptr_caller]}" + ) + assert calls[val_caller].endswith(".Zeta.run"), ( + f"z.run() should resolve to Zeta.run, got {calls[val_caller]}" + ) + + +def _first_function_node(source: str): + parsers, _ = load_parsers() + tree = parsers["cpp"].parse(source.encode("utf-8")) + + def walk(node): + if node.type == "function_definition": + return node + for child in node.children: + if (found := walk(child)) is not None: + return found + return None + + node = walk(tree.root_node) + assert node is not None + return node + + +# (H) A C++ reference parameter (`Alpha& ar`) parses as a `reference_declarator` that +# (H) holds its identifier as a POSITIONAL child, not under the `declarator` field the +# (H) way `pointer_declarator` does. The field-only unwrap in `_declarator_name` stalled +# (H) on it, so reference parameters/locals never entered the type map and their member +# (H) calls fell back to bare-name resolution. References are pervasive in C++, so this +# (H) is a real coverage hole. +def test_cpp_reference_parameter_maps_to_type() -> None: + node = _first_function_node("void f(Alpha& ar, Zeta* zp) { }") + var_types = CppTypeInferenceEngine().build_local_variable_type_map(node, "m") + assert var_types.get("ar") == "Alpha", ( + f"reference parameter ar should map to Alpha, got {var_types}" + ) + assert var_types.get("zp") == "Zeta", ( + f"pointer parameter zp should map to Zeta, got {var_types}" + ) + + +_LOCAL_REF_SOURCE = """ +namespace ns { + +class Alpha { + public: + int run() { return 1; } +}; + +class Zeta { + public: + int run() { return 2; } +}; + +int use_local_ref(Zeta& zr) { Zeta& z = zr; return z.run(); } + +} // namespace ns +""" + + +def test_cpp_local_reference_receiver_resolves( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_local_ref" + project.mkdir() + (project / "s.cpp").write_text(_LOCAL_REF_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + calls = _calls_to_run(mock_ingestor) + caller = next(q for q in calls if q.endswith(".use_local_ref")) + assert calls[caller].endswith(".Zeta.run"), ( + f"z.run() on a `Zeta& z` should resolve to Zeta.run, got {calls[caller]}" + ) + + +# (H) The type map is keyed by name with no call-position context, so it cannot model +# (H) true lexical scope: it cannot tell an outer `Zeta z` from an inner-block `Alpha z` +# (H) that shadows it. Picking either write order emits a confidently wrong typed edge +# (H) for one of the two scopes. Instead a name declared with conflicting types is NOT +# (H) inferred at all -- the call falls back to name-only resolution rather than getting +# (H) a wrong edge. An inner-block local whose name does NOT collide is still recorded, +# (H) so recall for the common case is preserved. +def test_cpp_conflicting_shadow_type_is_not_inferred() -> None: + node = _first_function_node("void f() { Zeta z; { Alpha z; } }") + var_types = CppTypeInferenceEngine().build_local_variable_type_map(node, "m") + assert "z" not in var_types, ( + f"a name shadowed by a different type must not be inferred, got {var_types}" + ) + + +def test_cpp_non_conflicting_inner_block_local_is_recorded() -> None: + node = _first_function_node("void f() { if (c) { Foo x; } }") + var_types = CppTypeInferenceEngine().build_local_variable_type_map(node, "m") + assert var_types.get("x") == "Foo", ( + f"an inner-block local with no name collision should resolve, got {var_types}" + ) + + +# (H) One C++ declaration statement can declare several variables (`Zeta a, b;`), each +# (H) exposed as its own `declarator` field child. Recording only the first left `b` +# (H) unmapped, so `b.run()` fell back to bare-name resolution. Every declarator in the +# (H) statement shares the leading type and must be recorded, including mixed +# (H) pointer/plain forms (`Foo* p, q;`). +def test_cpp_multi_declarator_declaration_maps_all_names() -> None: + node = _first_function_node("void f() { Zeta a, b; Foo* p, q; }") + var_types = CppTypeInferenceEngine().build_local_variable_type_map(node, "m") + assert var_types.get("a") == "Zeta" and var_types.get("b") == "Zeta", ( + f"both a and b should map to Zeta, got {var_types}" + ) + assert var_types.get("p") == "Foo" and var_types.get("q") == "Foo", ( + f"both p and q should map to Foo, got {var_types}" + ) + + +_MULTI_DECL_SOURCE = """ +namespace ns { + +class Alpha { + public: + int run() { return 1; } +}; + +class Zeta { + public: + int run() { return 2; } +}; + +int use_second(Zeta& zr) { Zeta a = zr, b = zr; return b.run(); } + +} // namespace ns +""" + + +def test_cpp_second_declarator_receiver_resolves( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_multi_decl" + project.mkdir() + (project / "s.cpp").write_text(_MULTI_DECL_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + calls = _calls_to_run(mock_ingestor) + caller = next(q for q in calls if q.endswith(".use_second")) + assert calls[caller].endswith(".Zeta.run"), ( + f"b.run() on the second declarator `Zeta b` should resolve to Zeta.run, " + f"got {calls[caller]}" + ) + + +# (H) A lambda body opens its own scope. A same-named variable declared inside it must +# (H) not leak into the enclosing function's map: without the scope guard the inner +# (H) `Alpha z` conflicts with the outer `Zeta z`, so drop-on-conflict would discard +# (H) `z` entirely and the outer `z.run()` would fall back to name-only (Alpha.run). +def test_cpp_lambda_local_does_not_leak_into_enclosing_scope() -> None: + node = _first_function_node("void f() { Zeta z; auto g = [](){ Alpha z; }; }") + var_types = CppTypeInferenceEngine().build_local_variable_type_map(node, "m") + assert var_types.get("z") == "Zeta", ( + f"outer z should stay Zeta despite a lambda-local Alpha z, got {var_types}" + ) + + +# (H) When a receiver's inferred type is NOT a first-party class (a `std::string`, any +# (H) external/STL type), a member call on it must not fall through to the bare-method +# (H) trie fallback and rebind to a same-named first-party method. Here `s` is a +# (H) `std::string`, so `s.size()` is an external call; it must NOT resolve to the +# (H) first-party `ns.Widget.size`. Before the guard, the trie fallback bound the bare +# (H) `size` to `Widget.size` (the only first-party `size`), a precision bug the C++ +# (H) retrieval eval flagged on leveldb. +_EXTERNAL_RECEIVER_SOURCE = """ +#include + +namespace ns { + +class Widget { + public: + int size() { return 1; } +}; + +int use(std::string s) { return s.size(); } + +} // namespace ns +""" + + +def test_cpp_external_receiver_call_is_not_rebound_to_first_party( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "cpp_ext" + project.mkdir() + (project / "s.cpp").write_text(_EXTERNAL_RECEIVER_SOURCE, encoding="utf-8") + + run_updater(project, mock_ingestor) + + # (H) `use` calls std::string::size, which is external; no CALLS edge from it to + # (H) the first-party Widget.size may be emitted. + bad = [ + (str(c.args[0][2]), str(c.args[2][2])) + for c in get_relationships(mock_ingestor, "CALLS") + if str(c.args[0][2]).endswith(".use") + and str(c.args[2][2]).endswith(".Widget.size") + ] + assert not bad, ( + f"std::string.size() must not resolve to first-party Widget.size, got {bad}" + ) diff --git a/codebase_rag/tests/test_cpp_retrieval_eval.py b/codebase_rag/tests/test_cpp_retrieval_eval.py new file mode 100644 index 000000000..7f8eb5607 --- /dev/null +++ b/codebase_rag/tests/test_cpp_retrieval_eval.py @@ -0,0 +1,75 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.cpp_retrieval import ( + cgr_cpp_call_edges, + oracle_cpp_call_edges, + score_cpp_retrieval, +) +from evals.oracles import cpp_available + +needs_clang = pytest.mark.skipif( + not cpp_available(), reason="libclang (clang.cindex) not importable" +) + + +def _make_project(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + # (H) No #includes: the fixture parses cleanly regardless of whether an SDK + # (H) libc++ is discoverable, so coverage is deterministic in any CI. All decls + # (H) live inside a namespace, exercising the namespaced caller-qn path (free + # (H) functions and an inline method) that the libclang oracle grades cgr against. + (root / "lib.cc").write_text( + "namespace demo {\n" + "int add(int a, int b) { return a + b; }\n" + "int mul(int a, int b) { return a * b; }\n" + "int orphan(int a) { return a; }\n" + "}\n", + encoding="utf-8", + ) + (root / "main.cc").write_text( + "namespace demo {\n" + "int add(int a, int b);\n" + "int mul(int a, int b);\n" + "int compute(int x) { return add(x, x) + mul(x, x); }\n" + "class Runner {\n" + " public:\n" + " int run(int x) { return compute(x); }\n" + "};\n" + "}\n", + encoding="utf-8", + ) + + +@needs_clang +def test_oracle_captures_first_party_cpp_calls(tmp_path: Path) -> None: + _make_project(tmp_path) + edges, declared, covered = oracle_cpp_call_edges(tmp_path) + + # (H) add(), mul() (in compute), compute() (in Runner::run) are first-party. + assert ("main.cc", "add") in edges + assert ("main.cc", "mul") in edges + assert ("main.cc", "compute") in edges + # (H) orphan is defined but never called -> never a call edge. + assert ("lib.cc", "orphan") not in edges + assert {"add", "mul", "compute", "run", "orphan"} <= declared + # (H) Both header-free sources parse cleanly, so both are graded. + assert {"main.cc", "lib.cc"} <= covered + + +@needs_clang +def test_cgr_matches_oracle_on_clean_cpp_project(tmp_path: Path) -> None: + _make_project(tmp_path) + oracle, declared, covered = oracle_cpp_call_edges(tmp_path) + cgr = cgr_cpp_call_edges(tmp_path, tmp_path.name, declared, covered) + assert cgr == oracle + + +def test_score_cpp_retrieval_prf() -> None: + result = score_cpp_retrieval( + {("a.cc", "f"), ("a.cc", "g")}, {("a.cc", "f"), ("b.cc", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.CPP_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_cross_project_eval.py b/codebase_rag/tests/test_cross_project_eval.py new file mode 100644 index 000000000..e88238b2a --- /dev/null +++ b/codebase_rag/tests/test_cross_project_eval.py @@ -0,0 +1,61 @@ +from pathlib import Path + +from evals import constants as ec +from evals.cross_project import cgr_cross_package, score_cross_project + + +def _make_monorepo(root: Path) -> None: + # (H) Two sibling top-level packages plus a third; pkg_b and pkg_c both reach + # (H) into pkg_a, which no single-top-level-package corpus exercises. + for pkg in ("pkg_a", "pkg_b", "pkg_c"): + (root / pkg).mkdir(parents=True) + (root / pkg / "__init__.py").write_text("", encoding="utf-8") + (root / "pkg_a" / "core.py").write_text( + "def shared():\n return 1\n", encoding="utf-8" + ) + (root / "pkg_b" / "use.py").write_text( + "from pkg_a.core import shared\n\n\ndef run():\n return shared()\n", + encoding="utf-8", + ) + (root / "pkg_c" / "only_import.py").write_text( + "from pkg_a import core\n", encoding="utf-8" + ) + + +def test_cgr_resolves_cross_package_calls_and_imports(tmp_path: Path) -> None: + _make_monorepo(tmp_path) + calls, imports = cgr_cross_package(tmp_path, tmp_path.name) + project = tmp_path.name + + # (H) pkg_b.use.run() calls pkg_a.core.shared() across the package boundary. + assert (f"{project}.pkg_b.use.run", f"{project}.pkg_a.core.shared") in calls + # (H) both pkg_b and pkg_c import a pkg_a module. + assert (f"{project}.pkg_b.use", f"{project}.pkg_a.core") in imports + assert any(src == f"{project}.pkg_c.only_import" for src, _t in imports) + + +def test_intra_package_edges_are_not_cross_package(tmp_path: Path) -> None: + root = tmp_path / "mono" + (root / "pkg_a").mkdir(parents=True) + (root / "pkg_a" / "__init__.py").write_text("", encoding="utf-8") + (root / "pkg_a" / "a.py").write_text( + "def helper():\n return 1\n", encoding="utf-8" + ) + (root / "pkg_a" / "b.py").write_text( + "from pkg_a.a import helper\n\n\ndef run():\n return helper()\n", + encoding="utf-8", + ) + calls, imports = cgr_cross_package(root, "mono") + # (H) Everything is within pkg_a, so there are no cross-package edges. + assert calls == set() + assert imports == set() + + +def test_score_cross_project_prf() -> None: + cgr = ({("a", "b")}, {("m", "n")}) + oracle = ({("a", "b")}, {("m", "x")}) + result = score_cross_project(cgr, oracle) + calls_row = next(r for r in result.rows if r["label"] == ec.CROSS_CALLS_LABEL) + imports_row = next(r for r in result.rows if r["label"] == ec.CROSS_IMPORTS_LABEL) + assert (calls_row["tp"], calls_row["fp"], calls_row["fn"]) == (1, 0, 0) + assert (imports_row["tp"], imports_row["fp"], imports_row["fn"]) == (0, 1, 1) diff --git a/codebase_rag/tests/test_cypher_validation.py b/codebase_rag/tests/test_cypher_validation.py new file mode 100644 index 000000000..8a1c9017e --- /dev/null +++ b/codebase_rag/tests/test_cypher_validation.py @@ -0,0 +1,258 @@ +import re + +import pytest + +from codebase_rag import constants as cs +from codebase_rag import exceptions as ex +from codebase_rag.services.llm import ( + _build_keyword_pattern, + _validate_call_procedures, + _validate_cypher_read_only, + _validate_no_unbounded_paths, +) + + +class TestBuildKeywordPattern: + def test_single_word_uses_word_boundaries(self) -> None: + pattern = _build_keyword_pattern("DELETE") + assert pattern.search("DELETE n") is not None + assert pattern.search("XDELETE") is None + assert pattern.search("DELETEX") is None + + def test_multi_word_allows_whitespace_between_parts(self) -> None: + pattern = _build_keyword_pattern("LOAD CSV") + assert pattern.search("LOAD CSV") is not None + assert pattern.search("LOAD CSV") is not None + assert pattern.search("LOAD\nCSV") is not None + assert pattern.search("LOAD\t CSV") is not None + + def test_multi_word_allows_block_comment_between_parts(self) -> None: + pattern = _build_keyword_pattern("LOAD CSV") + assert pattern.search("LOAD/*bypass*/CSV") is not None + assert pattern.search("LOAD /* comment */ CSV") is not None + + def test_multi_word_allows_single_line_comment_between_parts(self) -> None: + pattern = _build_keyword_pattern("LOAD CSV") + assert pattern.search("LOAD //comment\nCSV") is not None + assert pattern.search("LOAD //\nCSV") is not None + + def test_multi_word_respects_word_boundaries(self) -> None: + pattern = _build_keyword_pattern("LOAD CSV") + assert pattern.search("PRELOAD CSV") is None + assert pattern.search("LOAD CSVX") is None + + def test_single_word_is_case_sensitive_on_input(self) -> None: + pattern = _build_keyword_pattern("DELETE") + assert pattern.search("DELETE") is not None + assert pattern.search("delete") is None + + def test_returns_compiled_pattern(self) -> None: + pattern = _build_keyword_pattern("SET") + assert isinstance(pattern, re.Pattern) + + def test_multi_word_has_dotall_flag(self) -> None: + pattern = _build_keyword_pattern("CREATE INDEX") + assert pattern.flags & re.DOTALL + + def test_all_dangerous_keywords_produce_valid_patterns(self) -> None: + for kw in cs.CYPHER_DANGEROUS_KEYWORDS: + pattern = _build_keyword_pattern(kw) + assert pattern.search(kw) is not None + + +class TestValidateCypherReadOnly: + def test_safe_match_query_passes(self) -> None: + _validate_cypher_read_only("MATCH (n) RETURN n;") + + def test_safe_match_with_where_passes(self) -> None: + _validate_cypher_read_only("MATCH (n:Function) WHERE n.name = 'foo' RETURN n;") + + def test_safe_optional_match_passes(self) -> None: + _validate_cypher_read_only( + "MATCH (a)-[:CALLS]->(b) OPTIONAL MATCH (b)-[:DEFINES]->(c) RETURN a, b, c;" + ) + + @pytest.mark.parametrize( + "keyword", + sorted(cs.CYPHER_DANGEROUS_KEYWORDS), + ) + def test_rejects_all_dangerous_keywords(self, keyword: str) -> None: + query = f"MATCH (n) {keyword} n;" + with pytest.raises(ex.LLMGenerationError): + _validate_cypher_read_only(query) + + def test_rejects_delete(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="DELETE"): + _validate_cypher_read_only("MATCH (n) DELETE n;") + + def test_rejects_detach_delete(self) -> None: + with pytest.raises(ex.LLMGenerationError): + _validate_cypher_read_only("MATCH (n) DETACH DELETE n;") + + def test_rejects_drop(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="DROP"): + _validate_cypher_read_only("MATCH (n) DROP INDEX idx;") + + def test_rejects_set(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="SET"): + _validate_cypher_read_only("MATCH (n) SET n.name = 'x';") + + def test_rejects_merge(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="MERGE"): + _validate_cypher_read_only("MERGE (n:Node {id: 1});") + + def test_rejects_create(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="CREATE"): + _validate_cypher_read_only("CREATE (n:Node {name: 'test'});") + + def test_rejects_load_csv(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="LOAD CSV"): + _validate_cypher_read_only( + "LOAD CSV FROM 'http://evil.com/data.csv' AS row;" + ) + + def test_rejects_create_index(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="CREATE INDEX"): + _validate_cypher_read_only("CREATE INDEX ON :Node(name);") + + def test_case_insensitive(self) -> None: + with pytest.raises(ex.LLMGenerationError): + _validate_cypher_read_only("match (n) delete n;") + + def test_rejects_block_comment_bypass(self) -> None: + with pytest.raises(ex.LLMGenerationError): + _validate_cypher_read_only("LOAD/*bypass*/CSV FROM 'http://evil.com';") + + def test_rejects_single_line_comment_bypass(self) -> None: + with pytest.raises(ex.LLMGenerationError): + _validate_cypher_read_only("LOAD //bypass\nCSV FROM 'http://evil.com';") + + def test_does_not_flag_substring_matches(self) -> None: + _validate_cypher_read_only("MATCH (n) WHERE n.name = 'DATASET' RETURN n;") + + def test_does_not_flag_reset(self) -> None: + _validate_cypher_read_only("MATCH (n) WHERE n.name = 'RESET' RETURN n;") + + def test_does_not_flag_created_at(self) -> None: + _validate_cypher_read_only("MATCH (n) WHERE n.created_at > 0 RETURN n;") + + def test_error_includes_keyword_and_query(self) -> None: + query = "MATCH (n) DELETE n;" + with pytest.raises(ex.LLMGenerationError, match="DELETE") as exc_info: + _validate_cypher_read_only(query) + assert query in str(exc_info.value) + + def test_rejects_foreach(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="FOREACH"): + _validate_cypher_read_only( + "MATCH p=(a)-[*]->(b) FOREACH (n IN nodes(p) | SET n.marked = true);" + ) + + def test_rejects_remove(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="REMOVE"): + _validate_cypher_read_only("MATCH (n) REMOVE n.prop;") + + def test_call_no_longer_in_keyword_blocklist(self) -> None: + _validate_cypher_read_only("CALL nxalg.strongly_connected_components();") + + def test_rejects_create_constraint(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="CREATE CONSTRAINT"): + _validate_cypher_read_only( + "CREATE CONSTRAINT ON (n:Node) ASSERT n.id IS UNIQUE;" + ) + + def test_rejects_multiline_block_comment_bypass(self) -> None: + with pytest.raises(ex.LLMGenerationError): + _validate_cypher_read_only("LOAD/*\nbypass\n*/CSV FROM 'http://evil.com';") + + +class TestValidateNoUnboundedPaths: + @pytest.mark.parametrize( + "query", + [ + "MATCH (n) RETURN n;", + "MATCH (a)-[:CALLS]->(b) RETURN a, b;", + "MATCH (a)-[:CALLS*5]->(b) RETURN a, b;", + "MATCH (a)-[:CALLS*1..6]->(b) RETURN a, b;", + "MATCH (a)-[:CALLS*..6]->(b) RETURN a, b;", + "MATCH (a)-[r:CALLS*1..6]->(b) RETURN r;", + "MATCH (a)-[*1..3]->(b) RETURN a, b;", + "MATCH (a)-[:CALLS*2..2]->(b) RETURN a, b;", + "MATCH (a)-[:CALLS*1..6 {weight: 1}]->(b) RETURN a, b;", + ], + ) + def test_bounded_or_no_varlen_passes(self, query: str) -> None: + _validate_no_unbounded_paths(query) + + @pytest.mark.parametrize( + "query", + [ + "MATCH path = (a)-[:CALLS*]->(b) RETURN path;", + "MATCH (a)-[:CALLS*1..]->(b) RETURN a, b;", + "MATCH (a)-[:CALLS*..]->(b) RETURN a, b;", + "MATCH (a)-[*]->(b) RETURN a, b;", + "MATCH (a)-[r:CALLS*]->(b) RETURN r;", + "MATCH (a)-[:CALLS*10..]->(b) RETURN a, b;", + ], + ) + def test_unbounded_varlen_rejected(self, query: str) -> None: + with pytest.raises(ex.LLMGenerationError, match="unbounded"): + _validate_no_unbounded_paths(query) + + def test_error_includes_query(self) -> None: + query = "MATCH (a)-[:CALLS*]->(b) RETURN a;" + with pytest.raises(ex.LLMGenerationError) as exc_info: + _validate_no_unbounded_paths(query) + assert query in str(exc_info.value) + + +class TestValidateCallProcedures: + @pytest.mark.parametrize( + "query", + [ + "MATCH (n) RETURN n;", + "CALL nxalg.strongly_connected_components() YIELD components RETURN components;", + "CALL nxalg.simple_cycles() YIELD cycles RETURN cycles LIMIT 10;", + "CALL nxalg.topological_sort() YIELD nodes RETURN nodes;", + "CALL pagerank.get() YIELD node, rank RETURN node, rank ORDER BY rank DESC LIMIT 10;", + "CALL betweenness_centrality.get() YIELD node, betweenness_centrality RETURN node;", + "CALL community_detection.get() YIELD node, community_id RETURN node, community_id;", + "CALL leiden_community_detection.get() YIELD node, community_id RETURN node;", + "CALL weakly_connected_components.get() YIELD node, component_id RETURN node;", + "CALL graph_util.ancestors(node) YIELD ancestors RETURN ancestors;", + "CALL path.expand(start, ['CALLS>'], ['Function'], 1, 6) YIELD path RETURN path;", + "CALL algo.all_simple_paths(src, tgt, ['CALLS'], 10) YIELD paths RETURN paths;", + "CALL bridges.get() YIELD bridges RETURN bridges;", + "CALL biconnected_components.get() YIELD components RETURN components;", + ], + ) + def test_allowed_procedure_passes(self, query: str) -> None: + _validate_call_procedures(query) + + @pytest.mark.parametrize( + "query", + [ + "CALL db.schema.visualization();", + "CALL refactor.merge_nodes([a, b]) YIELD node RETURN node;", + "CALL create.node(['Foo'], {x: 1}) YIELD node RETURN node;", + "CALL export_util.json('out.json');", + "CALL migrate.postgresql('...') YIELD row RETURN row;", + "CALL mg.load('mod');", + "CALL csv_utils.create_csv_file('a','b');", + "CALL link_prediction.train();", + ], + ) + def test_disallowed_procedure_rejected(self, query: str) -> None: + with pytest.raises(ex.LLMGenerationError, match="outside the read-only"): + _validate_call_procedures(query) + + def test_call_is_case_insensitive(self) -> None: + _validate_call_procedures( + "call nxalg.strongly_connected_components() YIELD components RETURN components;" + ) + + def test_error_includes_procedure_name(self) -> None: + with pytest.raises(ex.LLMGenerationError, match="refactor.merge_nodes"): + _validate_call_procedures( + "CALL refactor.merge_nodes([a, b]) YIELD n RETURN n;" + ) diff --git a/codebase_rag/tests/test_dead_code_command.py b/codebase_rag/tests/test_dead_code_command.py new file mode 100644 index 000000000..aad627ee3 --- /dev/null +++ b/codebase_rag/tests/test_dead_code_command.py @@ -0,0 +1,226 @@ +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag.cli import app +from codebase_rag.types_defs import ResultRow + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def dead_rows() -> list[ResultRow]: + return [ + { + "label": "Function", + "name": "orphan_one", + "qualified_name": "myproj.mod.orphan_one", + "start_line": 5, + "end_line": 9, + }, + { + "label": "Method", + "name": "orphan_two", + "qualified_name": "myproj.mod.Thing.orphan_two", + "start_line": 20, + "end_line": 25, + }, + ] + + +def _make_mock_ingestor( + *, projects: list[str], fetch_result: list[ResultRow] +) -> MagicMock: + mock = MagicMock() + mock.list_projects.return_value = projects + mock.fetch_all.return_value = fetch_result + mock.__enter__ = MagicMock(return_value=mock) + mock.__exit__ = MagicMock(return_value=False) + return mock + + +class TestDeadCodeCommand: + def test_lists_orphans_in_table( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code"]) + + assert result.exit_code == 0 + assert "orphan_one" in result.output + assert "orphan_two" in result.output + + def test_json_format_emits_qualified_names( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--format", "json"]) + + assert result.exit_code == 0 + payload = json.loads(result.output) + names = {row["qualified_name"] for row in payload} + assert names == { + "myproj.mod.orphan_one", + "myproj.mod.Thing.orphan_two", + } + + def test_fail_on_found_exits_one_when_dead_code( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--fail-on-found"]) + + assert result.exit_code == 1 + + def test_fail_on_found_exits_zero_when_clean(self, runner: CliRunner) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=[]) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--fail-on-found"]) + + assert result.exit_code == 0 + + def test_explicit_project_name_used( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor( + projects=["myproj", "other"], fetch_result=dead_rows + ) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--project-name", "myproj"]) + + assert result.exit_code == 0 + _query, params = mock_ingestor.fetch_all.call_args.args + assert params["project_prefix"] == "myproj." + + def test_errors_when_project_ambiguous(self, runner: CliRunner) -> None: + mock_ingestor = _make_mock_ingestor(projects=["a", "b"], fetch_result=[]) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code"]) + + assert result.exit_code == 1 + mock_ingestor.fetch_all.assert_not_called() + + def test_errors_when_no_projects(self, runner: CliRunner) -> None: + mock_ingestor = _make_mock_ingestor(projects=[], fetch_result=[]) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code"]) + + assert result.exit_code == 1 + + def test_entry_point_forwarded_to_query( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "-e", "main", "-e", "run"]) + + assert result.exit_code == 0 + _query, params = mock_ingestor.fetch_all.call_args.args + assert params["entry_points"] == ["main", "run"] + + def test_decorator_root_extends_defaults( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--decorator-root", "myhandler"]) + + assert result.exit_code == 0 + _query, params = mock_ingestor.fetch_all.call_args.args + assert "myhandler" in params["root_decorators"] + assert "task" in params["root_decorators"] + + def test_writes_json_to_output_file( + self, runner: CliRunner, dead_rows: list[ResultRow], tmp_path: Path + ) -> None: + out = tmp_path / "dead.json" + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke( + app, + ["dead-code", "--format", "json", "--output", str(out)], + ) + + assert result.exit_code == 0 + payload = json.loads(out.read_text()) + assert len(payload) == 2 + + def test_writes_table_to_output_file( + self, runner: CliRunner, dead_rows: list[ResultRow], tmp_path: Path + ) -> None: + out = tmp_path / "dead.txt" + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--output", str(out)]) + + assert result.exit_code == 0 + written = out.read_text() + assert "orphan_one" in written + + def test_handles_connection_error(self, runner: CliRunner) -> None: + with patch( + "codebase_rag.cli.connect_memgraph", + side_effect=ConnectionError("Cannot connect"), + ): + result = runner.invoke(app, ["dead-code"]) + + assert result.exit_code == 1 + + def test_include_tests_default_passes_test_patterns( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code"]) + + assert result.exit_code == 0 + query, params = mock_ingestor.fetch_all.call_args.args + assert "test_patterns" in params + assert "$test_patterns" in query + + def test_no_include_tests_omits_test_patterns( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--no-include-tests"]) + + assert result.exit_code == 0 + query, params = mock_ingestor.fetch_all.call_args.args + # (H) test_patterns is still passed (it filters test modules out of the + # (H) module-load roots), but test functions themselves are not roots. + assert "test_patterns" in params + assert "n.path CONTAINS" not in query + + def test_classes_flag_includes_class_candidates( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code", "--classes"]) + + assert result.exit_code == 0 + query, _params = mock_ingestor.fetch_all.call_args.args + assert "Function|Method|Class" in query + + def test_classes_off_by_default( + self, runner: CliRunner, dead_rows: list[ResultRow] + ) -> None: + mock_ingestor = _make_mock_ingestor(projects=["myproj"], fetch_result=dead_rows) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["dead-code"]) + + assert result.exit_code == 0 + query, _params = mock_ingestor.fetch_all.call_args.args + assert "Function|Method|Class" not in query diff --git a/codebase_rag/tests/test_dead_code_eval.py b/codebase_rag/tests/test_dead_code_eval.py new file mode 100644 index 000000000..04c368f6e --- /dev/null +++ b/codebase_rag/tests/test_dead_code_eval.py @@ -0,0 +1,140 @@ +from pathlib import Path + +from codebase_rag import constants as cs +from evals.dead_code import ( + DeadCodeConfig, + cgr_dead_code, + dead_code_from_graph, + default_dead_code_config, + score_dead_code, +) + +_MODULE = cs.NodeLabel.MODULE.value +_FUNCTION = cs.NodeLabel.FUNCTION.value +_CALLS = cs.RelationshipType.CALLS.value +_PREFIX = "proj." +_CONFIG = DeadCodeConfig( + include_tests=False, + include_classes=False, + root_decorators=frozenset(), + entry_points=(), + test_patterns=cs.TEST_PATH_PATTERNS, +) + + +def _fn(uid: str, path: str = "m.py", decorators: list[str] | None = None) -> tuple: + return ( + (_FUNCTION, uid), + { + cs.KEY_QUALIFIED_NAME: uid, + cs.KEY_PATH: path, + cs.KEY_DECORATORS: decorators or [], + cs.KEY_IS_EXPORTED: False, + }, + ) + + +def test_dead_code_flags_uncalled_function() -> None: + # (H) Module calls main(); main() calls helper(); orphan() is never called. + nodes = dict( + [ + ( + (_MODULE, "proj.m"), + {cs.KEY_QUALIFIED_NAME: "proj.m", cs.KEY_PATH: "m.py"}, + ), + _fn("proj.m.main"), + _fn("proj.m.helper"), + _fn("proj.m.orphan"), + ] + ) + rels = [ + (_MODULE, "proj.m", _CALLS, _FUNCTION, "proj.m.main"), + (_FUNCTION, "proj.m.main", _CALLS, _FUNCTION, "proj.m.helper"), + ] + dead = dead_code_from_graph(nodes, rels, _PREFIX, _CONFIG) + assert dead == {"proj.m.orphan"} + + +def test_dead_code_flags_orphan_chain() -> None: + # (H) orphan() calls buried(), but orphan() itself is never reached, so both + # (H) are dead (a callee kept alive only by dead code is dead too). + nodes = dict( + [ + ( + (_MODULE, "proj.m"), + {cs.KEY_QUALIFIED_NAME: "proj.m", cs.KEY_PATH: "m.py"}, + ), + _fn("proj.m.main"), + _fn("proj.m.orphan"), + _fn("proj.m.buried"), + ] + ) + rels = [ + (_MODULE, "proj.m", _CALLS, _FUNCTION, "proj.m.main"), + (_FUNCTION, "proj.m.orphan", _CALLS, _FUNCTION, "proj.m.buried"), + ] + dead = dead_code_from_graph(nodes, rels, _PREFIX, _CONFIG) + assert dead == {"proj.m.orphan", "proj.m.buried"} + + +def test_decorated_function_is_a_root() -> None: + # (H) A function with a recognised entry-point decorator (e.g. @app.route) is + # (H) live even if nothing calls it. + config = _CONFIG._replace(root_decorators=frozenset({"route"})) + nodes = dict( + [ + ( + (_MODULE, "proj.m"), + {cs.KEY_QUALIFIED_NAME: "proj.m", cs.KEY_PATH: "m.py"}, + ), + _fn("proj.m.handler", decorators=["@app.route('/x')"]), + ] + ) + dead = dead_code_from_graph(nodes, [], _PREFIX, config) + assert dead == set() + + +def test_non_test_module_does_not_keep_code_alive_when_tests_excluded() -> None: + # (H) With tests excluded, a call from a test module must not root project code. + nodes = dict( + [ + ( + (_MODULE, "proj.tests.test_m"), + { + cs.KEY_QUALIFIED_NAME: "proj.tests.test_m", + cs.KEY_PATH: "tests/test_m.py", + }, + ), + _fn("proj.m.only_tested"), + ] + ) + rels = [(_MODULE, "proj.tests.test_m", _CALLS, _FUNCTION, "proj.m.only_tested")] + dead = dead_code_from_graph(nodes, rels, _PREFIX, _CONFIG) + assert dead == {"proj.m.only_tested"} + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "m.py").write_text( + "def helper():\n return 1\n\n\n" + "def main():\n return helper()\n\n\n" + "def orphan():\n return 2\n\n\n" + "main()\n", + encoding="utf-8", + ) + + +def test_cgr_dead_code_matches_known_dead_set(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + dead = cgr_dead_code(src, "proj", default_dead_code_config(False, False)) + assert "proj.m.orphan" in dead + assert "proj.m.main" not in dead + assert "proj.m.helper" not in dead + + +def test_score_dead_code_prf() -> None: + result = score_dead_code({"a", "b"}, {"a", "c"}) + row = result.rows[0] + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_decorator_call_edges.py b/codebase_rag/tests/test_decorator_call_edges.py new file mode 100644 index 000000000..5778b5efa --- /dev/null +++ b/codebase_rag/tests/test_decorator_call_edges.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag import constants as cs +from codebase_rag.tests.conftest import run_updater + + +def _calls(mock_ingestor: MagicMock) -> list[tuple[str, str, str]]: + # (H) CALLS edges as (caller_label, caller_qn, callee_qn). + out: list[tuple[str, str, str]] = [] + for c in mock_ingestor.ensure_relationship_batch.call_args_list: + if c.args[1] == cs.RelationshipType.CALLS: + out.append((c.args[0][0], c.args[0][2], c.args[2][2])) + return out + + +class TestDecoratorCallEdges: + def test_bare_decorator_emits_module_call( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) `@task` applies task(handler) at module load -> a module-level call. + (temp_repo / "app.py").write_text( + "def task(fn):\n return fn\n\n\n@task\ndef handler():\n return 1\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + calls = _calls(mock_ingestor) + + assert any( + label == cs.NodeLabel.MODULE + and caller.endswith(".app") + and callee.endswith(".task") + for label, caller, callee in calls + ), f"no module->task decorator edge; calls={sorted(calls)}" + + def test_call_decorator_emits_module_call( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) `@register(...)` also runs at module load. + (temp_repo / "app.py").write_text( + "def register(name):\n" + " def wrap(fn):\n" + " return fn\n" + " return wrap\n" + "\n" + "\n" + '@register("x")\n' + "def handler():\n" + " return 1\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + calls = _calls(mock_ingestor) + + assert any( + label == cs.NodeLabel.MODULE + and caller.endswith(".app") + and callee.endswith(".register") + for label, caller, callee in calls + ), f"no module->register decorator edge; calls={sorted(calls)}" + + def test_class_decorator_emits_module_call( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a bare decorator on a class also runs at module load. + (temp_repo / "app.py").write_text( + "def deco(cls):\n return cls\n\n\n@deco\nclass MyClass:\n pass\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + calls = _calls(mock_ingestor) + + assert any( + label == cs.NodeLabel.MODULE + and caller.endswith(".app") + and callee.endswith(".deco") + for label, caller, callee in calls + ), f"no module->deco class decorator edge; calls={sorted(calls)}" + + def test_alias_decorator_resolves_to_first_party( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) `@alias` where `alias = task` still calls task at module load. + (temp_repo / "app.py").write_text( + "def task(fn):\n" + " return fn\n" + "\n" + "\n" + "alias = task\n" + "\n" + "\n" + "@alias\n" + "def handler():\n" + " return 1\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + calls = _calls(mock_ingestor) + + assert any( + label == cs.NodeLabel.MODULE + and caller.endswith(".app") + and callee.endswith(".task") + for label, caller, callee in calls + ), f"alias decorator not resolved; calls={sorted(calls)}" + + def test_decorator_on_nested_function_not_module_attributed( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a decorator on a function nested in another function runs when the + # (H) outer function is called, not at module load -> no module edge. + (temp_repo / "app.py").write_text( + "def deco(fn):\n" + " return fn\n" + "\n" + "\n" + "def outer():\n" + " @deco\n" + " def inner():\n" + " return 1\n" + "\n" + " return inner\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + module_callees = { + callee.rsplit(cs.SEPARATOR_DOT, 1)[-1] + for label, _caller, callee in _calls(mock_ingestor) + if label == cs.NodeLabel.MODULE + } + + assert "deco" not in module_callees + + def test_undecorated_function_has_no_decorator_edge( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "app.py").write_text( + "def plain():\n return 1\n\n\ndef other():\n return 2\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + module_callees = { + callee.rsplit(cs.SEPARATOR_DOT, 1)[-1] + for label, _caller, callee in _calls(mock_ingestor) + if label == cs.NodeLabel.MODULE + } + + assert "plain" not in module_callees + assert "other" not in module_callees diff --git a/codebase_rag/tests/test_diff_autowrap.py b/codebase_rag/tests/test_diff_autowrap.py new file mode 100644 index 000000000..d5c9c6eb1 --- /dev/null +++ b/codebase_rag/tests/test_diff_autowrap.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +from codebase_rag.main import _autowrap_diff_blocks + + +class TestNoDiff: + def test_plain_text_unchanged(self) -> None: + text = "Here is some explanation without any diff." + assert _autowrap_diff_blocks(text) == text + + def test_text_without_diff_marker_unchanged(self) -> None: + text = "Lines starting with - or + but no diff --git header\n- not a diff\n+ also not" + assert _autowrap_diff_blocks(text) == text + + +class TestWrappingUnfencedDiff: + def test_full_git_diff_gets_fenced_as_diff(self) -> None: + text = ( + "diff --git a/file.py b/file.py\n" + "index abc..def 100644\n" + "--- a/file.py\n" + "+++ b/file.py\n" + "@@ -1,3 +1,3 @@\n" + " context\n" + "-old\n" + "+new\n" + ) + out = _autowrap_diff_blocks(text) + assert out.startswith("```diff\n") + assert out.rstrip().endswith("```") + assert "diff --git a/file.py b/file.py" in out + assert "+new" in out + + def test_diff_followed_by_explanation_text(self) -> None: + text = ( + "diff --git a/x b/x\n" + "--- a/x\n" + "+++ b/x\n" + "@@ -1 +1 @@\n" + "-a\n" + "+b\n" + "\n" + "This adds the new feature.\n" + ) + out = _autowrap_diff_blocks(text) + assert "```diff\n" in out + explanation_pos = out.index("This adds the new feature.") + fence_close_pos = out.rindex("```", 0, explanation_pos) + assert fence_close_pos < explanation_pos, ( + "explanation text must appear after the closing fence" + ) + assert "diff --git" in out[:fence_close_pos] + + def test_preamble_before_diff_preserved(self) -> None: + text = ( + "Here are the changes I made:\n" + "diff --git a/foo.py b/foo.py\n" + "--- a/foo.py\n" + "+++ b/foo.py\n" + "@@ -1 +1 @@\n" + "-x\n" + "+y\n" + ) + out = _autowrap_diff_blocks(text) + assert "Here are the changes I made:" in out + assert "```diff" in out + + +class TestAlreadyFenced: + def test_already_fenced_diff_not_double_wrapped(self) -> None: + text = ( + "Here is a diff:\n" + "```diff\n" + "diff --git a/x b/x\n" + "--- a/x\n" + "+++ b/x\n" + "@@ -1 +1 @@\n" + "-a\n" + "+b\n" + "```\n" + ) + out = _autowrap_diff_blocks(text) + assert out.count("```diff") == 1 + assert out.count("```") == 2 + + def test_fenced_with_other_language_not_rewrapped(self) -> None: + text = "```bash\ngit diff\ndiff --git a/x b/x\n```\n" + out = _autowrap_diff_blocks(text) + assert "```bash" in out + assert "```diff" not in out diff --git a/codebase_rag/tests/test_directory_lister.py b/codebase_rag/tests/test_directory_lister.py index 9a7f480bc..40759be36 100644 --- a/codebase_rag/tests/test_directory_lister.py +++ b/codebase_rag/tests/test_directory_lister.py @@ -5,6 +5,7 @@ import pytest from pydantic_ai import Tool +from codebase_rag import tool_errors as te from codebase_rag.tools.directory_lister import ( DirectoryLister, create_directory_lister_tool, @@ -113,6 +114,24 @@ def test_list_with_hidden_files( assert ".hidden_file" in result assert "visible_file" in result + def test_list_directory_returns_error_for_path_outside_root( + self, directory_lister: DirectoryLister + ) -> None: + result = directory_lister.list_directory_contents("../../../etc") + expected = te.DIRECTORY_PATH_OUTSIDE_ROOT.format( + path="../../../etc", root=directory_lister.project_root + ) + assert result == expected + + def test_list_directory_returns_error_for_absolute_path_outside_root( + self, directory_lister: DirectoryLister + ) -> None: + result = directory_lister.list_directory_contents("/etc/passwd") + expected = te.DIRECTORY_PATH_OUTSIDE_ROOT.format( + path="/etc/passwd", root=directory_lister.project_root + ) + assert result == expected + class TestGetSafePath: def test_safe_path_with_relative_path( diff --git a/codebase_rag/tests/test_document_analyzer.py b/codebase_rag/tests/test_document_analyzer.py deleted file mode 100644 index 1d88dfe2f..000000000 --- a/codebase_rag/tests/test_document_analyzer.py +++ /dev/null @@ -1,259 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest -from pydantic_ai import Tool - -from codebase_rag.constants import Provider -from codebase_rag.tools.document_analyzer import ( - DocumentAnalyzer, - _NotSupportedClient, - create_document_analyzer_tool, -) - - -@pytest.fixture -def temp_project_root(tmp_path: Path) -> Path: - return tmp_path - - -@pytest.fixture -def mock_settings() -> MagicMock: - settings = MagicMock() - settings.active_orchestrator_config.provider = Provider.GOOGLE - settings.active_orchestrator_config.provider_type = "api" - settings.active_orchestrator_config.api_key = "test-api-key" - settings.active_orchestrator_config.model_id = "gemini-1.5-flash" - return settings - - -@pytest.fixture -def mock_genai_client() -> MagicMock: - client = MagicMock() - response = MagicMock() - response.text = "Analysis result" - client.models.generate_content.return_value = response - return client - - -class TestNotSupportedClient: - def test_raises_not_implemented_error(self) -> None: - client = _NotSupportedClient() - with pytest.raises(NotImplementedError): - client.generate_content() - - def test_any_attribute_raises_error(self) -> None: - client = _NotSupportedClient() - with pytest.raises(NotImplementedError): - client.any_method() - - -class TestDocumentAnalyzerInit: - def test_init_resolves_project_root( - self, temp_project_root: Path, mock_settings: MagicMock - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch("codebase_rag.tools.document_analyzer.genai.Client"): - analyzer = DocumentAnalyzer(str(temp_project_root)) - assert analyzer.project_root == temp_project_root.resolve() - - def test_init_with_google_api_provider( - self, temp_project_root: Path, mock_settings: MagicMock - ) -> None: - mock_settings.active_orchestrator_config.provider = Provider.GOOGLE - mock_settings.active_orchestrator_config.provider_type = "api" - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client" - ) as mock_client: - DocumentAnalyzer(str(temp_project_root)) - mock_client.assert_called_once_with(api_key="test-api-key") - - def test_init_with_non_google_provider( - self, temp_project_root: Path, mock_settings: MagicMock - ) -> None: - mock_settings.active_orchestrator_config.provider = "anthropic" - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - analyzer = DocumentAnalyzer(str(temp_project_root)) - assert isinstance(analyzer.client, _NotSupportedClient) - - -class TestDocumentAnalyzerAnalyze: - def test_analyze_returns_error_for_unsupported_provider( - self, temp_project_root: Path, mock_settings: MagicMock - ) -> None: - mock_settings.active_orchestrator_config.provider = "anthropic" - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze("test.pdf", "What is this?") - assert "Error:" in result - assert "not supported" in result.lower() - - def test_analyze_file_not_found( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze("nonexistent.pdf", "What is this?") - assert "Error:" in result - assert "not found" in result.lower() - - def test_analyze_security_path_traversal( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze("../../../etc/passwd", "What is this?") - assert "security" in result.lower() - - def test_analyze_existing_file_returns_response( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - test_file = temp_project_root / "test.txt" - test_file.write_text("Test content", encoding="utf-8") - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze("test.txt", "What is this?") - assert result == "Analysis result" - - def test_analyze_with_absolute_path( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - test_file = temp_project_root / "test.txt" - test_file.write_text("Test content", encoding="utf-8") - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze(str(test_file), "What is this?") - assert result == "Analysis result" - - def test_analyze_handles_no_text_response( - self, - temp_project_root: Path, - mock_settings: MagicMock, - ) -> None: - mock_client = MagicMock() - response = MagicMock() - response.text = None - response.candidates = None - mock_client.models.generate_content.return_value = response - - test_file = temp_project_root / "test.txt" - test_file.write_text("Test content", encoding="utf-8") - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze("test.txt", "What is this?") - assert "no" in result.lower() and "content" in result.lower() - - def test_analyze_extracts_from_candidates( - self, - temp_project_root: Path, - mock_settings: MagicMock, - ) -> None: - mock_client = MagicMock() - response = MagicMock() - response.text = None - - candidate = MagicMock() - part = MagicMock() - part.text = "Candidate text" - candidate.content.parts = [part] - response.candidates = [candidate] - mock_client.models.generate_content.return_value = response - - test_file = temp_project_root / "test.txt" - test_file.write_text("Test content", encoding="utf-8") - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - result = analyzer.analyze("test.txt", "What is this?") - assert result == "Candidate text" - - -class TestCreateDocumentAnalyzerTool: - def test_creates_tool_instance( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - tool = create_document_analyzer_tool(analyzer) - assert isinstance(tool, Tool) - - def test_tool_has_description( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - analyzer = DocumentAnalyzer(str(temp_project_root)) - tool = create_document_analyzer_tool(analyzer) - assert tool.description is not None - assert ( - "document" in tool.description.lower() - or "pdf" in tool.description.lower() - ) - - def test_tool_has_correct_name( - self, - temp_project_root: Path, - mock_settings: MagicMock, - mock_genai_client: MagicMock, - ) -> None: - with patch("codebase_rag.tools.document_analyzer.settings", mock_settings): - with patch( - "codebase_rag.tools.document_analyzer.genai.Client", - return_value=mock_genai_client, - ): - from codebase_rag.tools.tool_descriptions import AgenticToolName - - analyzer = DocumentAnalyzer(str(temp_project_root)) - tool = create_document_analyzer_tool(analyzer) - assert tool.name == AgenticToolName.ANALYZE_DOCUMENT diff --git a/codebase_rag/tests/test_duplicate_qn_definitions.py b/codebase_rag/tests/test_duplicate_qn_definitions.py new file mode 100644 index 000000000..d3670086c --- /dev/null +++ b/codebase_rag/tests/test_duplicate_qn_definitions.py @@ -0,0 +1,184 @@ +# (H) Regression tests for the duplicate-qualified-name finding surfaced by the +# (H) evals/ harness: the `if has_x(): else: ` import-fallback +# (H) idiom defines one qualified name twice. cgr used to collapse the two into a +# (H) single node (last-writer-wins kept the else-branch stub). Both definitions +# (H) must survive as distinct nodes, and a call must link to BOTH. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "dupproj" + +MODULE_SRC = """import os + + +if os.environ.get("FLAG"): + + def impl() -> str: + return "real" + +else: + + def impl() -> str: + return "stub" + + +def caller() -> str: + return impl() +""" + +_RelTuple = tuple[str, PropertyValue, str, str, PropertyValue] + + +class _Capture: + def __init__(self) -> None: + self.nodes: dict[tuple[str, PropertyValue], PropertyDict] = {} + self.rels: list[_RelTuple] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[cs.NODE_UNIQUE_CONSTRAINTS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append( + ( + str(from_spec[0]), + from_spec[2], + str(rel_type), + str(to_spec[0]), + to_spec[2], + ) + ) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _build(tmp_path: Path, src: str = MODULE_SRC) -> _Capture: + (tmp_path / "m.py").write_text(src) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return cap + + +class TestDuplicateQualifiedNameDefinitions: + def test_both_branch_definitions_become_distinct_nodes( + self, tmp_path: Path + ) -> None: + cap = _build(tmp_path) + impl_start_lines = sorted( + int(props[cs.KEY_START_LINE]) + for (label, _uid), props in cap.nodes.items() + if label == cs.NodeLabel.FUNCTION + and props.get(cs.KEY_NAME) == "impl" + and props.get(cs.KEY_START_LINE) is not None + ) + assert impl_start_lines == [6, 11], impl_start_lines + + def test_call_links_to_both_duplicate_definitions(self, tmp_path: Path) -> None: + cap = _build(tmp_path) + calls_to_impl = [ + target + for (_fl, from_val, rel_type, _tl, target) in cap.rels + if rel_type == cs.RelationshipType.CALLS + and str(from_val).endswith(".caller") + and ".impl" in str(target) + ] + assert len(calls_to_impl) == 2, calls_to_impl + + +CLASS_SRC = """import os + + +if os.environ.get("FLAG"): + + class Widget: + def render(self) -> str: + return "real" + +else: + + class Widget: + def render(self) -> str: + return "stub" +""" + + +class TestDuplicateQualifiedNameClasses: + def test_both_branch_classes_become_distinct_nodes(self, tmp_path: Path) -> None: + cap = _build(tmp_path, CLASS_SRC) + widget_start_lines = sorted( + int(props[cs.KEY_START_LINE]) + for (label, _uid), props in cap.nodes.items() + if label == cs.NodeLabel.CLASS + and props.get(cs.KEY_NAME) == "Widget" + and props.get(cs.KEY_START_LINE) is not None + ) + assert widget_start_lines == [6, 12], widget_start_lines + + def test_methods_of_both_branch_classes_survive(self, tmp_path: Path) -> None: + cap = _build(tmp_path, CLASS_SRC) + render_start_lines = sorted( + int(props[cs.KEY_START_LINE]) + for (label, _uid), props in cap.nodes.items() + if label == cs.NodeLabel.METHOD + and props.get(cs.KEY_NAME) == "render" + and props.get(cs.KEY_START_LINE) is not None + ) + assert render_start_lines == [7, 13], render_start_lines + + +METHOD_DUP_SRC = """import os + + +class Service: + + if os.environ.get("FLAG"): + + def run(self) -> str: + return "real" + + else: + + def run(self) -> str: + return "stub" +""" + + +class TestDuplicateQualifiedNameMethodsInOneClass: + def test_both_branch_methods_in_one_class_survive(self, tmp_path: Path) -> None: + cap = _build(tmp_path, METHOD_DUP_SRC) + run_start_lines = sorted( + int(props[cs.KEY_START_LINE]) + for (label, _uid), props in cap.nodes.items() + if label == cs.NodeLabel.METHOD + and props.get(cs.KEY_NAME) == "run" + and props.get(cs.KEY_START_LINE) is not None + ) + assert run_start_lines == [8, 13], run_start_lines diff --git a/codebase_rag/tests/test_embedder.py b/codebase_rag/tests/test_embedder.py index 401044582..6eb009f3e 100644 --- a/codebase_rag/tests/test_embedder.py +++ b/codebase_rag/tests/test_embedder.py @@ -1,10 +1,13 @@ from __future__ import annotations +import tempfile from collections.abc import Generator +from pathlib import Path from unittest.mock import MagicMock, patch import pytest +from codebase_rag.embedder import EmbeddingCache, clear_embedding_cache from codebase_rag.utils.dependencies import has_torch, has_transformers @@ -44,6 +47,13 @@ def reset_model_cache() -> Generator[None, None, None]: get_model.cache_clear() +@pytest.fixture(autouse=True) +def reset_cache() -> Generator[None, None, None]: + clear_embedding_cache() + yield + clear_embedding_cache() + + @pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") def test_embed_code_returns_768_dimensional_vector( mock_unixcoder: MagicMock, reset_model_cache: None @@ -146,6 +156,65 @@ def test_get_model_does_not_use_cuda_when_unavailable(reset_model_cache: None) - mock_instance.cuda.assert_not_called() +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_select_device_prefers_cuda() -> None: + from codebase_rag.embedder import ( + _select_device, # ty: ignore[possibly-missing-import] + ) + + with patch("codebase_rag.embedder.torch.cuda.is_available", return_value=True): + with patch( + "codebase_rag.embedder.torch.backends.mps.is_available", return_value=True + ): + assert _select_device() == "cuda" + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_select_device_uses_mps_when_cuda_unavailable() -> None: + from codebase_rag.embedder import ( + _select_device, # ty: ignore[possibly-missing-import] + ) + + with patch("codebase_rag.embedder.torch.cuda.is_available", return_value=False): + with patch( + "codebase_rag.embedder.torch.backends.mps.is_available", return_value=True + ): + assert _select_device() == "mps" + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_select_device_falls_back_to_cpu() -> None: + from codebase_rag.embedder import ( + _select_device, # ty: ignore[possibly-missing-import] + ) + + with patch("codebase_rag.embedder.torch.cuda.is_available", return_value=False): + with patch( + "codebase_rag.embedder.torch.backends.mps.is_available", return_value=False + ): + assert _select_device() == "cpu" + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_get_model_moves_to_mps_when_available(reset_model_cache: None) -> None: + from codebase_rag.embedder import get_model # ty: ignore[possibly-missing-import] + + with patch("codebase_rag.embedder.UniXcoder") as mock_unixcoder_class: + mock_instance = MagicMock() + mock_instance.eval.return_value = mock_instance + mock_instance.to.return_value = mock_instance + mock_unixcoder_class.return_value = mock_instance + + with patch("codebase_rag.embedder.torch.cuda.is_available", return_value=False): + with patch( + "codebase_rag.embedder.torch.backends.mps.is_available", + return_value=True, + ): + get_model() + + mock_instance.to.assert_called_once_with("mps") + + @pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") @pytest.mark.slow def test_embed_code_integration(reset_model_cache: None) -> None: @@ -192,3 +261,311 @@ def test_embed_code_raises_without_dependencies() -> None: with pytest.raises(RuntimeError, match="Semantic search requires"): embed_code("x = 1") + + +def test_embedding_cache_put_and_get() -> None: + cache = EmbeddingCache() + embedding = [0.1, 0.2, 0.3] + cache.put("def foo(): pass", embedding) + assert cache.get("def foo(): pass") == embedding + + +def test_embedding_cache_miss_returns_none() -> None: + cache = EmbeddingCache() + assert cache.get("unknown code") is None + + +def test_embedding_cache_different_content_different_key() -> None: + cache = EmbeddingCache() + cache.put("code_a", [1.0]) + cache.put("code_b", [2.0]) + assert cache.get("code_a") == [1.0] + assert cache.get("code_b") == [2.0] + + +def test_embedding_cache_overwrite() -> None: + cache = EmbeddingCache() + cache.put("code_a", [1.0]) + cache.put("code_a", [9.9]) + assert cache.get("code_a") == [9.9] + + +def test_embedding_cache_len() -> None: + cache = EmbeddingCache() + assert len(cache) == 0 + cache.put("a", [1.0]) + assert len(cache) == 1 + cache.put("b", [2.0]) + assert len(cache) == 2 + + +def test_embedding_cache_clear() -> None: + cache = EmbeddingCache() + cache.put("a", [1.0]) + cache.put("b", [2.0]) + cache.clear() + assert len(cache) == 0 + assert cache.get("a") is None + + +def test_embedding_cache_get_many() -> None: + cache = EmbeddingCache() + cache.put("a", [1.0]) + cache.put("b", [2.0]) + results = cache.get_many(["a", "c", "b"]) + assert results == {0: [1.0], 2: [2.0]} + + +def test_embedding_cache_put_many() -> None: + cache = EmbeddingCache() + cache.put_many(["x", "y"], [[1.0], [2.0]]) + assert cache.get("x") == [1.0] + assert cache.get("y") == [2.0] + + +def test_embedding_cache_save_and_load() -> None: + with tempfile.TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "test_cache.json" + cache = EmbeddingCache(path=cache_path) + cache.put("hello", [0.5, 0.6]) + cache.save() + + assert cache_path.exists() + + cache2 = EmbeddingCache(path=cache_path) + cache2.load() + assert cache2.get("hello") == [0.5, 0.6] + + +def test_embedding_cache_load_nonexistent_path() -> None: + cache = EmbeddingCache(path=Path("/nonexistent/path/cache.json")) + cache.load() + assert len(cache) == 0 + + +def test_embedding_cache_load_corrupt_file() -> None: + with tempfile.TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "corrupt.json" + cache_path.write_text("not valid json data", encoding="utf-8") + cache = EmbeddingCache(path=cache_path) + cache.load() + assert len(cache) == 0 + + +def test_embedding_cache_save_no_path() -> None: + cache = EmbeddingCache(path=None) + cache.put("a", [1.0]) + cache.save() + + +def test_embedding_cache_load_no_path() -> None: + cache = EmbeddingCache(path=None) + cache.load() + assert len(cache) == 0 + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_uses_cache( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code, get_embedding_cache + + mock_embedding = torch.zeros(1, 768) + mock_unixcoder.return_value = (torch.zeros(1, 5, 768), mock_embedding) + + cache = get_embedding_cache() + cache.put("cached_code", [0.42] * 768) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + result = embed_code("cached_code") + + assert result == [0.42] * 768 + mock_unixcoder.tokenize.assert_not_called() + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_populates_cache( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code, get_embedding_cache + + mock_embedding = torch.ones(1, 768) + mock_unixcoder.return_value = (torch.zeros(1, 5, 768), mock_embedding) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + embed_code("new_code") + + cache = get_embedding_cache() + assert cache.get("new_code") is not None + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_empty_list(reset_model_cache: None) -> None: + from codebase_rag.embedder import embed_code_batch + + assert embed_code_batch([]) == [] + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_returns_correct_count( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code_batch + + snippets = ["def a(): pass", "def b(): pass", "def c(): pass"] + mock_unixcoder.tokenize.return_value = [[1, 2, 3]] * 3 + mock_embedding = torch.zeros(3, 768) + mock_unixcoder.return_value = (torch.zeros(3, 5, 768), mock_embedding) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + results = embed_code_batch(snippets) + + assert len(results) == 3 + assert all(len(emb) == 768 for emb in results) + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_uses_padding( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code_batch + + snippets = ["short", "longer code here"] + mock_unixcoder.tokenize.return_value = [[1, 2, 3, 0, 0], [1, 2, 3, 4, 5]] + mock_embedding = torch.zeros(2, 768) + mock_unixcoder.return_value = (torch.zeros(2, 5, 768), mock_embedding) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + embed_code_batch(snippets) + + mock_unixcoder.tokenize.assert_called_once_with( + snippets, max_length=512, padding=True + ) + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_cache_hit( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + from codebase_rag.embedder import embed_code_batch, get_embedding_cache + + cache = get_embedding_cache() + cache.put("a", [1.0] * 768) + cache.put("b", [2.0] * 768) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + results = embed_code_batch(["a", "b"]) + + mock_unixcoder.tokenize.assert_not_called() + assert results == [[1.0] * 768, [2.0] * 768] + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_partial_cache( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code_batch, get_embedding_cache + + cache = get_embedding_cache() + cache.put("a", [1.0] * 768) + + mock_unixcoder.tokenize.return_value = [[1, 2, 3]] + mock_embedding = torch.full((1, 768), 3.0) + mock_unixcoder.return_value = (torch.zeros(1, 5, 768), mock_embedding) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + results = embed_code_batch(["a", "b"]) + + assert results[0] == [1.0] * 768 + assert results[1] == [3.0] * 768 + mock_unixcoder.tokenize.assert_called_once_with(["b"], max_length=512, padding=True) + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_populates_cache( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code_batch, get_embedding_cache + + mock_unixcoder.tokenize.return_value = [[1, 2, 3]] + mock_embedding = torch.ones(1, 768) + mock_unixcoder.return_value = (torch.zeros(1, 5, 768), mock_embedding) + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + embed_code_batch(["new_snippet"]) + + cache = get_embedding_cache() + assert cache.get("new_snippet") is not None + + +@pytest.mark.skipif(not _has_semantic_deps(), reason="torch/transformers not installed") +def test_embed_code_batch_respects_batch_size( + mock_unixcoder: MagicMock, reset_model_cache: None +) -> None: + import torch + + from codebase_rag.embedder import embed_code_batch + + snippets = [f"def f{i}(): pass" for i in range(5)] + + def side_effect_tokenize(batch: list[str], **kwargs: int | bool) -> list[list[int]]: + return [[1, 2, 3]] * len(batch) + + mock_unixcoder.tokenize.side_effect = side_effect_tokenize + + def side_effect_forward(tensor: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + n = tensor.shape[0] + return torch.zeros(n, 5, 768), torch.zeros(n, 768) + + mock_unixcoder.side_effect = side_effect_forward + + with patch("codebase_rag.embedder.get_model", return_value=mock_unixcoder): + results = embed_code_batch(snippets, batch_size=2) + + assert len(results) == 5 + assert mock_unixcoder.tokenize.call_count == 3 + + +def test_embed_code_batch_raises_without_dependencies() -> None: + if _has_semantic_deps(): + pytest.skip("Dependencies are installed") + + from codebase_rag.embedder import embed_code_batch + + with pytest.raises(RuntimeError, match="Semantic search requires"): + embed_code_batch(["x = 1"]) + + +def test_embedding_default_batch_size_at_least_64() -> None: + from codebase_rag import constants as cs + + assert cs.EMBEDDING_DEFAULT_BATCH_SIZE >= 64 + + +def test_embedding_cache_persistence_roundtrip() -> None: + with tempfile.TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "subdir" / "cache.json" + + cache1 = EmbeddingCache(path=cache_path) + cache1.put("fn_a", [0.1, 0.2]) + cache1.put("fn_b", [0.3, 0.4]) + cache1.save() + + cache2 = EmbeddingCache(path=cache_path) + cache2.load() + assert cache2.get("fn_a") == [0.1, 0.2] + assert cache2.get("fn_b") == [0.3, 0.4] + assert cache2.get("fn_c") is None + assert len(cache2) == 2 diff --git a/codebase_rag/tests/test_eval_imports_internal_modules.py b/codebase_rag/tests/test_eval_imports_internal_modules.py new file mode 100644 index 000000000..4a30a707b --- /dev/null +++ b/codebase_rag/tests/test_eval_imports_internal_modules.py @@ -0,0 +1,38 @@ +# (H) Covers the L1 eval (evals/cgr_graph.py): cgr emits placeholder MODULE nodes +# (H) for unresolved imports whose path is the dotted import name (e.g. +# (H) "thrift.TTornado"). Those must not be treated as internal import targets when +# (H) scoring IMPORTS, or every "from .x import ..." collapses onto them as a +# (H) false positive. Only real in-repo .py modules count as internal. +from __future__ import annotations + +from codebase_rag import constants as cs +from evals.cgr_graph import _CapturingIngestor, _to_graph_data + +_MODULE = cs.NodeLabel.MODULE.value +_IMPORTS = cs.RelationshipType.IMPORTS.value + + +def _module(ingestor: _CapturingIngestor, qn: str, path: str) -> None: + ingestor.ensure_node_batch( + _MODULE, + {cs.KEY_QUALIFIED_NAME: qn, cs.KEY_NAME: qn, cs.KEY_PATH: path}, + ) + + +def test_import_placeholder_module_not_scored_as_internal() -> None: + ingestor = _CapturingIngestor() + _module(ingestor, "proj.src", "src.py") + _module(ingestor, "proj.real", "pkg/real.py") + # (H) Placeholder for an unresolved import: path is the dotted name, not a file. + _module(ingestor, "proj.placeholder", "proj.placeholder") + + for target in ("proj.real", "proj.placeholder"): + ingestor.ensure_relationship_batch( + (_MODULE, cs.KEY_QUALIFIED_NAME, "proj.src"), + _IMPORTS, + (_MODULE, cs.KEY_QUALIFIED_NAME, target), + ) + + graph = _to_graph_data(ingestor, "proj") + import_targets = {e.target_name for e in graph.name_edges if e.rel_type == _IMPORTS} + assert import_targets == {"pkg/real.py"}, import_targets diff --git a/codebase_rag/tests/test_eval_module_calls.py b/codebase_rag/tests/test_eval_module_calls.py new file mode 100644 index 000000000..b63938676 --- /dev/null +++ b/codebase_rag/tests/test_eval_module_calls.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +from pathlib import Path + +from evals.module_calls import ( + cgr_module_calls, + oracle_module_calls, + score_module_calls, +) + +_FIXTURE = """def make_default(): + return 1 + + +def helper(): + return 2 + + +def main(): + helper() + + +def with_default(x=make_default()): + return x + + +CONFIG = make_default() + + +if __name__ == "__main__": + main() +""" + + +def _names(edges: set[tuple[str, ...]]) -> set[str]: + return {e.target_name for e in edges} + + +class TestModuleCallEval: + def _write(self, tmp_path: Path) -> Path: + proj = tmp_path / "proj" + proj.mkdir() + (proj / "app.py").write_text(_FIXTURE, encoding="utf-8") + return proj + + def test_oracle_counts_only_definition_time_calls(self, tmp_path: Path) -> None: + proj = self._write(tmp_path) + oracle = oracle_module_calls(proj, "proj") + + # (H) make_default runs at module load (CONFIG = ... and the default arg); + # (H) main runs from the `if __name__` block; helper only runs inside main's + # (H) body, so it is NOT a module-level call. + assert _names(oracle) == {"make_default", "main"} + + def test_cgr_matches_oracle_module_calls(self, tmp_path: Path) -> None: + proj = self._write(tmp_path) + cgr = cgr_module_calls(proj, "proj") + oracle = oracle_module_calls(proj, "proj") + + _tp, fp, fn, precision, recall = score_module_calls(cgr, oracle) + + assert fp == 0, f"spurious module calls: {sorted(_names(cgr - oracle))}" + assert fn == 0, f"missed module calls: {sorted(_names(oracle - cgr))}" + assert precision == 1.0 + assert recall == 1.0 + + def test_nested_call_is_not_module_attributed(self, tmp_path: Path) -> None: + proj = self._write(tmp_path) + cgr = cgr_module_calls(proj, "proj") + + assert "helper" not in _names(cgr) + + def _oracle_for(self, tmp_path: Path, source: str) -> set[str]: + proj = tmp_path / "proj" + proj.mkdir() + (proj / "app.py").write_text(source, encoding="utf-8") + return _names(oracle_module_calls(proj, "proj")) + + def test_lambda_body_call_is_deferred(self, tmp_path: Path) -> None: + # (H) `helper` runs only when `work()` is called, not at import. + names = self._oracle_for( + tmp_path, + "def helper():\n return 1\n\n\nwork = lambda: helper()\n", + ) + assert "helper" not in names + + def test_generator_expression_call_is_deferred(self, tmp_path: Path) -> None: + # (H) a generator is lazy: `helper` runs only when the generator is consumed. + names = self._oracle_for( + tmp_path, + "def helper():\n return 1\n\n\ngen = (helper() for _ in range(2))\n", + ) + assert "helper" not in names + + def test_generator_outermost_iterable_is_eager(self, tmp_path: Path) -> None: + # (H) the first iterable of a generator is evaluated when the generator is + # (H) created (at import), so `load_items` is a module call but the lazy + # (H) body call `helper` is not. + names = self._oracle_for( + tmp_path, + "def helper():\n return 1\n\n\n" + "def load_items():\n return [1]\n\n\n" + "gen = (helper(x) for x in load_items())\n", + ) + assert "load_items" in names + assert "helper" not in names + + def test_list_comprehension_call_is_module_attributed(self, tmp_path: Path) -> None: + # (H) a list comprehension runs eagerly at import, so its call counts. + names = self._oracle_for( + tmp_path, + "def helper():\n return 1\n\n\nout = [helper() for _ in range(2)]\n", + ) + assert "helper" in names + + def test_class_decorator_is_module_attributed(self, tmp_path: Path) -> None: + # (H) a bare class decorator runs at module load -> a module call. + names = self._oracle_for( + tmp_path, + "def deco(cls):\n return cls\n\n\n@deco\nclass Widget:\n pass\n", + ) + assert "deco" in names + + def _cgr_for(self, tmp_path: Path, source: str) -> set[str]: + proj = tmp_path / "proj" + proj.mkdir() + (proj / "app.py").write_text(source, encoding="utf-8") + return _names(cgr_module_calls(proj, "proj")) + + def test_classless_module_construction_credited_via_instantiates( + self, tmp_path: Path + ) -> None: + # (H) a dataclass has no explicit __init__, so cgr emits no CALLS for its + # (H) construction, only INSTANTIATES -> the class. The eval must still + # (H) credit the module-scope `Config(1)` so L2 recall stays 1.0. + source = ( + "from dataclasses import dataclass\n\n\n" + "@dataclass\nclass Config:\n n: int\n\n\n" + "CONFIG = Config(1)\n" + ) + assert "Config" in self._cgr_for(tmp_path, source) + + def test_return_annotation_counted_without_future_import( + self, tmp_path: Path + ) -> None: + # (H) without postponed annotations, `Result()` runs at import. + names = self._oracle_for( + tmp_path, + "def Result():\n return 1\n\n\ndef route() -> Result():\n return 1\n", + ) + assert "Result" in names + + def test_annotation_not_counted_with_future_import(self, tmp_path: Path) -> None: + # (H) with postponed annotations, the annotation is a string and never runs. + names = self._oracle_for( + tmp_path, + "from __future__ import annotations\n\n\n" + "def Result():\n return 1\n\n\ndef route() -> Result():\n return 1\n", + ) + assert "Result" not in names diff --git a/codebase_rag/tests/test_eval_score_span.py b/codebase_rag/tests/test_eval_score_span.py new file mode 100644 index 000000000..2b1031915 --- /dev/null +++ b/codebase_rag/tests/test_eval_score_span.py @@ -0,0 +1,54 @@ +# (H) Covers the L1 eval span grading (evals/score.score_span): among nodes both +# (H) cgr and the oracle identify by (kind, file, start), it grades how often cgr's +# (H) end_line agrees with the oracle's. A disagreement must surface as fp+fn (not +# (H) be masked by node identity already being 1.0), and nodes only one side has +# (H) must not be graded at all. +from __future__ import annotations + +from codebase_rag import constants as cs +from evals import constants as ec +from evals.score import score_span +from evals.types_defs import DefNode, GraphData, NodeKey + +_FUNC = cs.NodeLabel.FUNCTION.value +_KINDS = (cs.NodeLabel.FUNCTION,) + + +def _graph(*nodes: tuple[str, int, int]) -> GraphData: + # (H) Each node is (file, start, end) for a Function. + mapping: dict[NodeKey, DefNode] = {} + for file, start, end in nodes: + key = NodeKey(_FUNC, file, start) + mapping[key] = DefNode(key, "f", end) + return GraphData(nodes=mapping, edges=set(), name_edges=set()) + + +def test_span_exact_match_scores_perfect() -> None: + cgr = _graph(("a.rs", 1, 5), ("a.rs", 10, 20)) + oracle = _graph(("a.rs", 1, 5), ("a.rs", 10, 20)) + by_label = {row["label"]: row for row in score_span(cgr, oracle, _KINDS).rows} + row = by_label[_FUNC] + assert row["precision"] == 1.0 and row["recall"] == 1.0 + assert row["tp"] == 2 and row["fp"] == 0 and row["fn"] == 0 + + +def test_span_end_line_mismatch_is_penalized_and_surfaced() -> None: + cgr = _graph(("a.rs", 1, 5), ("a.rs", 10, 99)) + oracle = _graph(("a.rs", 1, 5), ("a.rs", 10, 20)) + result = score_span(cgr, oracle, _KINDS) + by_label = {row["label"]: row for row in result.rows} + row = by_label[_FUNC] + assert row["tp"] == 1 and row["fp"] == 1 and row["fn"] == 1 + assert row["precision"] == 0.5 and row["recall"] == 0.5 + bucket = result.diff[ec.DIFF_SPAN_PREFIX + _FUNC] + assert any("10-20" in line for line in bucket["missing"]), bucket + assert any("10-99" in line for line in bucket["extra"]), bucket + + +def test_span_only_grades_co_identified_nodes() -> None: + # (H) cgr has an extra node (start 30) the oracle lacks; it must not be graded. + cgr = _graph(("a.rs", 1, 5), ("a.rs", 30, 40)) + oracle = _graph(("a.rs", 1, 5)) + by_label = {row["label"]: row for row in score_span(cgr, oracle, _KINDS).rows} + row = by_label[_FUNC] + assert row["tp"] == 1 and row["fp"] == 0 and row["fn"] == 0 diff --git a/codebase_rag/tests/test_external_package_name_collision.py b/codebase_rag/tests/test_external_package_name_collision.py new file mode 100644 index 000000000..f5c6d51d7 --- /dev/null +++ b/codebase_rag/tests/test_external_package_name_collision.py @@ -0,0 +1,87 @@ +# (H) L2 residual from the evals/ harness: when cgr is pointed at a directory that +# (H) is itself a package (has __init__.py), a bare absolute import like +# (H) `from mcp.server import X` is the EXTERNAL top-level package, not the internal +# (H) sibling subpackage `.mcp` (which is reachable only as that dotted name +# (H) or relatively). cgr used to mis-resolve it to the internal package. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _build(tmp_path: Path, importer: str, src: str) -> _Capture: + (tmp_path / "__init__.py").touch() + mcp = tmp_path / "mcp" + mcp.mkdir() + mcp.joinpath("__init__.py").touch() + mcp.joinpath("server.py").write_text("Thing = 1\n") + (tmp_path / importer).write_text(src) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return cap + + +def _imports(cap: _Capture) -> set[tuple[PropertyValue, PropertyValue]]: + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.IMPORTS + } + + +class TestExternalPackageNameCollision: + def test_bare_absolute_import_is_external_not_internal( + self, tmp_path: Path + ) -> None: + cap = _build( + tmp_path, "client.py", "from mcp.server import Thing\n\nx = Thing\n" + ) + edges = _imports(cap) + assert ("proj.client", "proj.mcp.server") not in edges, edges + assert ("proj.client", "proj.mcp") not in edges, edges + + def test_relative_import_to_subpackage_still_internal(self, tmp_path: Path) -> None: + cap = _build( + tmp_path, "client.py", "from .mcp.server import Thing\n\nx = Thing\n" + ) + edges = _imports(cap) + assert ("proj.client", "proj.mcp.server") in edges, edges diff --git a/codebase_rag/tests/test_function_ingest.py b/codebase_rag/tests/test_function_ingest.py index 814380ce4..1d7b6e8a6 100644 --- a/codebase_rag/tests/test_function_ingest.py +++ b/codebase_rag/tests/test_function_ingest.py @@ -234,7 +234,7 @@ def inner_func(): lang_config = queries[cs.SupportedLanguage.PYTHON]["config"] result = definition_processor._is_method(inner_func, lang_config) - assert result is True + assert result is False class TestFormatNestedQn: @@ -317,7 +317,7 @@ def test_top_level_function( lang_config = queries[cs.SupportedLanguage.PYTHON]["config"] parent_type, parent_qn = definition_processor._determine_function_parent( - func_node, "proj.module", lang_config + func_node, "proj.module.my_function", "proj.module", lang_config ) assert parent_type == "Module" assert parent_qn == "proj.module" @@ -342,7 +342,7 @@ def inner(): lang_config = queries[cs.SupportedLanguage.PYTHON]["config"] parent_type, parent_qn = definition_processor._determine_function_parent( - inner_func, "proj.module", lang_config + inner_func, "proj.module.outer.inner", "proj.module", lang_config ) assert parent_type == "Function" assert parent_qn == "proj.module.outer" @@ -466,7 +466,9 @@ def test_basic_function_props( is_exported=False, ) - result = definition_processor._build_function_props(func_node, resolution) + result = definition_processor._build_function_props( + func_node, resolution, "proj.module" + ) assert result["qualified_name"] == "proj.module.my_function" assert result["name"] == "my_function" @@ -497,7 +499,9 @@ def test_exported_function_props( is_exported=True, ) - result = definition_processor._build_function_props(func_node, resolution) + result = definition_processor._build_function_props( + func_node, resolution, "proj.module" + ) assert result["is_exported"] is True diff --git a/codebase_rag/tests/test_function_local_definitions.py b/codebase_rag/tests/test_function_local_definitions.py new file mode 100644 index 000000000..2bd844626 --- /dev/null +++ b/codebase_rag/tests/test_function_local_definitions.py @@ -0,0 +1,111 @@ +# (H) Finding #3 from the evals/ harness: methods of a class defined inside a +# (H) function body (function-local class) were dropped. They are now captured by +# (H) default (CAPTURE_FUNCTION_LOCAL_DEFINITIONS=True); explicitly disabling the +# (H) flag restores the historical behaviour of skipping them. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.config import settings +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "localproj" + +MODULE_SRC = """class Holder: + def make(self) -> object: + class Local: + def helper(self) -> str: + return "x" + + return Local() +""" + +_RelTuple = tuple[str, PropertyValue, str, str, PropertyValue] + + +class _Capture: + def __init__(self) -> None: + self.nodes: dict[tuple[str, PropertyValue], PropertyDict] = {} + self.rels: list[_RelTuple] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[cs.NODE_UNIQUE_CONSTRAINTS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append( + ( + str(from_spec[0]), + from_spec[2], + str(rel_type), + str(to_spec[0]), + to_spec[2], + ) + ) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _build(tmp_path: Path) -> _Capture: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return cap + + +def _local_method_lines(cap: _Capture) -> list[int]: + return sorted( + int(props[cs.KEY_START_LINE]) + for (label, _uid), props in cap.nodes.items() + if label == cs.NodeLabel.METHOD + and props.get(cs.KEY_NAME) == "helper" + and props.get(cs.KEY_START_LINE) is not None + ) + + +class TestFunctionLocalDefinitions: + def test_default_captures_local_class_methods(self, tmp_path: Path) -> None: + cap = _build(tmp_path) + assert _local_method_lines(cap) == [4] + + defines_method_to_helper = [ + target + for (_fl, _fv, rel_type, _tl, target) in cap.rels + if rel_type == cs.RelationshipType.DEFINES_METHOD + and str(target).endswith(".Local.helper") + ] + assert len(defines_method_to_helper) == 1, defines_method_to_helper + + def test_flag_off_skips_local_class_methods( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setattr(settings, "CAPTURE_FUNCTION_LOCAL_DEFINITIONS", False) + cap = _build(tmp_path) + assert _local_method_lines(cap) == [] diff --git a/codebase_rag/tests/test_getattr_dispatch.py b/codebase_rag/tests/test_getattr_dispatch.py new file mode 100644 index 000000000..eab8f8e39 --- /dev/null +++ b/codebase_rag/tests/test_getattr_dispatch.py @@ -0,0 +1,101 @@ +# (H) L3 finding from the evals/ harness: JavaTypeResolverMixin._find_registry_entries_under +# (H) does `finder = getattr(self.function_registry, cs.METHOD_FIND_WITH_PREFIX, None)` then +# (H) calls finder(...). The call dispatches to FunctionRegistryTrie.find_with_prefix at +# (H) runtime. Resolving it needs getattr(recv, name) modelled as recv., where the +# (H) name argument is a string literal or a module constant resolved to its string value. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/names.py": 'METHOD_DO = "do"\n', + "pkg/helper.py": ( + "class Helper:\n def do(self, value):\n return value\n" + ), + "pkg/worker.py": ( + "from . import names\n" + "from .helper import Helper\n\n\n" + "class Worker:\n" + " def __init__(self) -> None:\n" + " self._helper = Helper()\n\n" + " def via_constant(self, value):\n" + " fn = getattr(self._helper, names.METHOD_DO, None)\n" + " if callable(fn):\n" + " return fn(value)\n" + " return None\n\n" + " def via_literal(self, value):\n" + ' fn = getattr(self._helper, "do", None)\n' + " return fn(value)\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestGetattrDispatch: + def test_getattr_with_constant_name_resolves(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.worker.Worker.via_constant", + "proj.pkg.helper.Helper.do", + ) in calls, calls + + def test_getattr_with_string_literal_resolves(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.worker.Worker.via_literal", + "proj.pkg.helper.Helper.do", + ) in calls, calls diff --git a/codebase_rag/tests/test_github_issues_integration.py b/codebase_rag/tests/test_github_issues_integration.py index 2b6bc081f..423945657 100644 --- a/codebase_rag/tests/test_github_issues_integration.py +++ b/codebase_rag/tests/test_github_issues_integration.py @@ -1,7 +1,10 @@ import os from unittest.mock import patch +import pytest + from codebase_rag.config import AppConfig +from codebase_rag.constants import GoogleProviderType class TestGitHubIssuesIntegration: @@ -142,9 +145,6 @@ def test_openai_compatible_endpoints(self) -> None: assert orchestrator.endpoint == "https://api.together.xyz/v1" def test_vertex_ai_enterprise_scenario(self) -> None: - """ - Test enterprise Vertex AI configuration scenario. - """ env_content = { "ORCHESTRATOR_PROVIDER": "google", "ORCHESTRATOR_MODEL": "gemini-2.5-pro", @@ -162,9 +162,63 @@ def test_vertex_ai_enterprise_scenario(self) -> None: assert orchestrator.model_id == "gemini-2.5-pro" assert orchestrator.project_id == "my-enterprise-project" assert orchestrator.region == "us-central1" - assert orchestrator.provider_type == "vertex" + assert orchestrator.provider_type == GoogleProviderType.VERTEX assert orchestrator.service_account_file == "/path/to/service-account.json" + def test_vertex_ai_skips_api_key_validation(self) -> None: + env_content = { + "ORCHESTRATOR_PROVIDER": "google", + "ORCHESTRATOR_MODEL": "gemini-2.5-pro", + "ORCHESTRATOR_PROJECT_ID": "my-project", + "ORCHESTRATOR_REGION": "us-central1", + "ORCHESTRATOR_PROVIDER_TYPE": "vertex", + "ORCHESTRATOR_SERVICE_ACCOUNT_FILE": "/path/to/sa.json", + "CYPHER_PROVIDER": "google", + "CYPHER_MODEL": "gemini-2.5-flash", + "CYPHER_PROJECT_ID": "my-project", + "CYPHER_REGION": "us-central1", + "CYPHER_PROVIDER_TYPE": "vertex", + "CYPHER_SERVICE_ACCOUNT_FILE": "/path/to/sa.json", + } + + with patch.dict(os.environ, env_content): + config = AppConfig() + + orchestrator = config.active_orchestrator_config + orchestrator.validate_api_key("orchestrator") + + cypher = config.active_cypher_config + cypher.validate_api_key("cypher") + + def test_vertex_ai_with_google_api_key_env_does_not_error(self) -> None: + env_content = { + "ORCHESTRATOR_PROVIDER": "google", + "ORCHESTRATOR_MODEL": "gemini-2.5-pro", + "ORCHESTRATOR_PROJECT_ID": "my-project", + "ORCHESTRATOR_PROVIDER_TYPE": "vertex", + "ORCHESTRATOR_SERVICE_ACCOUNT_FILE": "/path/to/sa.json", + "GOOGLE_API_KEY": "stray-key-from-env", + } + + with patch.dict(os.environ, env_content): + config = AppConfig() + orchestrator = config.active_orchestrator_config + orchestrator.validate_api_key("orchestrator") + + def test_google_gla_without_api_key_raises(self) -> None: + env_content = { + "ORCHESTRATOR_PROVIDER": "google", + "ORCHESTRATOR_MODEL": "gemini-2.5-pro", + "ORCHESTRATOR_PROVIDER_TYPE": "gla", + "ORCHESTRATOR_API_KEY": "", + } + + with patch.dict(os.environ, env_content): + config = AppConfig() + orchestrator = config.active_orchestrator_config + with pytest.raises(ValueError, match="API Key Missing"): + orchestrator.validate_api_key("orchestrator") + def test_reasoning_model_thinking_budget(self) -> None: """ Test configuration for reasoning models with thinking budget. diff --git a/codebase_rag/tests/test_go_containment_oracle.py b/codebase_rag/tests/test_go_containment_oracle.py new file mode 100644 index 000000000..f801132ed --- /dev/null +++ b/codebase_rag/tests/test_go_containment_oracle.py @@ -0,0 +1,67 @@ +# (H) Covers Go containment-edge validation: cgr's DEFINES (Module->top-level +# (H) func/type) and DEFINES_METHOD (struct Class->receiver method) edges are +# (H) graded against the independent go/ast oracle (evals/oracles/go_ast.go), +# (H) joined on (kind, file, line) endpoints. The sample exercises a same-file +# (H) method and a cross-file method (receiver type declared in another file). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_go_graph +from evals.oracles import go_available, run_go_oracle +from evals.score import score_edge_types + +GO_TYPES = """\ +package demo + +type Shape interface { Area() float64 } + +type Point struct{ X int } + +func (p Point) Area() float64 { return 1.0 } +""" + +GO_MORE = """\ +package demo + +func Free(a int) int { return a + 1 } + +func (p Point) Scale(k int) int { return p.X * k } +""" + + +def _require_go() -> None: + if not go_available(): + pytest.skip("go toolchain not available") + if cs.SupportedLanguage.GO not in load_parsers()[0]: + pytest.skip("go parser not available") + + +def test_cgr_matches_go_oracle_on_containment_edges(tmp_path: Path) -> None: + _require_go() + project = tmp_path / "go_edge_test" + project.mkdir() + (project / "types.go").write_text(GO_TYPES, encoding="utf-8") + (project / "more.go").write_text(GO_MORE, encoding="utf-8") + + cgr = extract_cgr_go_graph(project, project.name) + oracle = run_go_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_go_method_call_qn.py b/codebase_rag/tests/test_go_method_call_qn.py new file mode 100644 index 000000000..40a0240ec --- /dev/null +++ b/codebase_rag/tests/test_go_method_call_qn.py @@ -0,0 +1,75 @@ +from pathlib import Path + +import pytest + +from evals.cgr_graph import _capture +from evals.oracles import go_available + +needs_go = pytest.mark.skipif(not go_available(), reason="go toolchain not installed") + + +def _make_repo(root: Path) -> None: + pkg = root / "p" + pkg.mkdir(parents=True) + (pkg / "m.go").write_text( + "package p\n\n" + "type T struct{}\n\n" + "func free() int { return 1 }\n\n" + "func (t T) callsFree() int { return free() }\n", + encoding="utf-8", + ) + + +@needs_go +def test_go_method_call_caller_qn_includes_receiver(tmp_path: Path) -> None: + # (H) A call inside a Go receiver method must be attributed to the method's + # (H) real node qn (p.m.T.callsFree), which binds to the receiver type, not a + # (H) receiver-dropping qn (p.m.callsFree) that matches no node. + _make_repo(tmp_path) + ingestor = _capture(tmp_path / "p", "p") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + node_qns = {str(uid) for (_label, uid) in ingestor.nodes} + + assert "p.m.T.callsFree" in node_qns + assert ("p.m.T.callsFree", "p.m.free") in calls + assert ("p.m.callsFree", "p.m.free") not in calls + + +def _make_dispatch_repo(root: Path) -> None: + pkg = root / "p" + pkg.mkdir(parents=True) + (pkg / "m.go").write_text( + "package p\n\n" + "type T struct{}\n\n" + "func (t T) helper() int { return 1 }\n\n" + "func (t T) caller() int { return t.helper() }\n\n" + "func use(v T) int { return v.helper() }\n\n" + "func make_local() int {\n" + "\tx := T{}\n" + "\treturn x.helper()\n" + "}\n", + encoding="utf-8", + ) + + +@needs_go +def test_go_receiver_method_dispatch_resolves(tmp_path: Path) -> None: + # (H) A method call on a Go receiver (`t.helper()`), a typed parameter + # (H) (`v.helper()`), and a composite-literal local (`x := T{}; x.helper()`) + # (H) must each resolve to the method node `p.m.T.helper` via local-variable + # (H) type inference, not be dropped for lack of a typed receiver. + _make_dispatch_repo(tmp_path) + ingestor = _capture(tmp_path / "p", "p") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + + assert ("p.m.T.caller", "p.m.T.helper") in calls + assert ("p.m.use", "p.m.T.helper") in calls + assert ("p.m.make_local", "p.m.T.helper") in calls diff --git a/codebase_rag/tests/test_go_receiver_methods.py b/codebase_rag/tests/test_go_receiver_methods.py new file mode 100644 index 000000000..d5ebd4cc3 --- /dev/null +++ b/codebase_rag/tests/test_go_receiver_methods.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.constants import ( + KEY_QUALIFIED_NAME, + NodeLabel, + RelationshipType, +) +from codebase_rag.tests.conftest import ( + create_and_run_updater, + get_nodes, + get_relationships, +) + + +@pytest.fixture +def go_methods_project(temp_repo: Path) -> Path: + project_path = temp_repo / "go_methods_test" + project_path.mkdir() + (project_path / "go.mod").write_text( + encoding="utf-8", data="module go_methods_test\n\ngo 1.22\n" + ) + (project_path / "shapes.go").write_text( + encoding="utf-8", + data="""package shapes + +type Point struct { +\tX int +\tY int +} + +type Celsius float64 + +func (p Point) Area() float64 { +\treturn 0.0 +} + +func (p *Point) Scale(f float64) { +\tp.X = p.X * int(f) +} + +func (c Celsius) ToFahrenheit() float64 { +\treturn float64(c)*9/5 + 32 +} + +func NewPoint(x int, y int) Point { +\treturn Point{X: x, Y: y} +} +""", + ) + return project_path + + +def _method_qns(mock_ingestor: MagicMock) -> set[str]: + return { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.METHOD) + } + + +def test_go_value_receiver_method_is_method_node( + go_methods_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_methods_project, mock_ingestor, skip_if_missing="go") + project = go_methods_project.name + assert f"{project}.shapes.Point.Area" in _method_qns(mock_ingestor) + + +def test_go_pointer_receiver_method_is_method_node( + go_methods_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_methods_project, mock_ingestor, skip_if_missing="go") + project = go_methods_project.name + assert f"{project}.shapes.Point.Scale" in _method_qns(mock_ingestor) + + +def test_go_defined_type_receiver_method_is_method_node( + go_methods_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_methods_project, mock_ingestor, skip_if_missing="go") + project = go_methods_project.name + assert f"{project}.shapes.Celsius.ToFahrenheit" in _method_qns(mock_ingestor) + + +def test_go_free_function_not_a_method( + go_methods_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_methods_project, mock_ingestor, skip_if_missing="go") + project = go_methods_project.name + function_qns = { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.FUNCTION) + } + assert f"{project}.shapes.NewPoint" in function_qns + # (H) A receiver method must not also be emitted as a plain Function. + assert f"{project}.shapes.Area" not in function_qns + assert f"{project}.shapes.Point.Area" not in function_qns + + +def test_go_method_defined_by_receiver_type( + go_methods_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_methods_project, mock_ingestor, skip_if_missing="go") + project = go_methods_project.name + defines_method = get_relationships( + mock_ingestor, RelationshipType.DEFINES_METHOD.value + ) + pairs = {(call[0][0][2], call[0][2][2]) for call in defines_method} + assert (f"{project}.shapes.Point", f"{project}.shapes.Point.Area") in pairs + assert ( + f"{project}.shapes.Celsius", + f"{project}.shapes.Celsius.ToFahrenheit", + ) in pairs + + +@pytest.fixture +def go_crossfile_project(temp_repo: Path) -> Path: + # (H) Same Go package split across two files: the receiver type lives in + # (H) types.go, a method on it lives in ops.go. A Go package spans every + # (H) file in its directory, so the method must bind to the type's node. + project_path = temp_repo / "go_xfile_test" + project_path.mkdir() + (project_path / "go.mod").write_text( + encoding="utf-8", data="module go_xfile_test\n\ngo 1.22\n" + ) + (project_path / "types.go").write_text( + encoding="utf-8", + data="package shapes\n\ntype Point struct {\n\tX int\n}\n", + ) + (project_path / "ops.go").write_text( + encoding="utf-8", + data="package shapes\n\nfunc (p Point) Scale(k int) int {\n\treturn p.X * k\n}\n", + ) + return project_path + + +def test_go_crossfile_method_binds_to_declaring_type( + go_crossfile_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_crossfile_project, mock_ingestor, skip_if_missing="go") + project = go_crossfile_project.name + # (H) Point is declared in types.go, so its Class node and the method's qn + # (H) are anchored to the types module, not the ops module that holds Scale. + assert f"{project}.types.Point.Scale" in _method_qns(mock_ingestor) + defines_method = get_relationships( + mock_ingestor, RelationshipType.DEFINES_METHOD.value + ) + pairs = {(call[0][0][2], call[0][2][2]) for call in defines_method} + assert (f"{project}.types.Point", f"{project}.types.Point.Scale") in pairs diff --git a/codebase_rag/tests/test_go_retrieval_eval.py b/codebase_rag/tests/test_go_retrieval_eval.py new file mode 100644 index 000000000..abee7fb4a --- /dev/null +++ b/codebase_rag/tests/test_go_retrieval_eval.py @@ -0,0 +1,53 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.go_retrieval import ( + cgr_go_call_edges, + oracle_go_call_edges, + score_go_retrieval, +) +from evals.oracles import go_available + +needs_go = pytest.mark.skipif(not go_available(), reason="go toolchain not installed") + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "main.go").write_text( + "package main\n\n" + "func helper() int { return 1 }\n\n" + "func run() int { return helper() }\n\n" + "func orphan() int { return 2 }\n\n" + "func main() { run() }\n", + encoding="utf-8", + ) + + +@needs_go +def test_oracle_captures_first_party_go_calls(tmp_path: Path) -> None: + _make_repo(tmp_path) + edges, declared = oracle_go_call_edges(tmp_path) + + assert ("main.go", "helper") in edges + assert ("main.go", "run") in edges + # (H) orphan is declared but never called -> never a call edge. + assert ("main.go", "orphan") not in edges + assert {"helper", "run", "orphan", "main"} <= declared + + +@needs_go +def test_cgr_matches_oracle_on_clean_go_repo(tmp_path: Path) -> None: + _make_repo(tmp_path) + oracle, declared = oracle_go_call_edges(tmp_path) + cgr = cgr_go_call_edges(tmp_path, tmp_path.name, declared) + assert cgr == oracle + + +def test_score_go_retrieval_prf() -> None: + result = score_go_retrieval( + {("a.go", "f"), ("a.go", "g")}, {("a.go", "f"), ("b.go", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.GO_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_go_span_oracle.py b/codebase_rag/tests/test_go_span_oracle.py new file mode 100644 index 000000000..aafd3a334 --- /dev/null +++ b/codebase_rag/tests/test_go_span_oracle.py @@ -0,0 +1,72 @@ +# (H) Covers Go node SPAN (end_line) validation: cgr's end_line for each node is +# (H) graded against the go/ast oracle (which emits each declaration's last-token +# (H) line), joined on (kind, file, start). Exercises a multi-line struct, a +# (H) grouped `type (...)` block, an interface, and a multi-line method body. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_go_graph +from evals.oracles import go_available, run_go_oracle +from evals.score import score_span + +GO_SRC = """\ +package demo + +type Shape interface { + Area() float64 + Name() string +} + +type Point struct { + X int + Y int +} + +type ( + Meters int + Label string +) + +func (p Point) Area( + scale float64, +) float64 { + return float64(p.X) * scale +} + +func Free(a int) int { + return a + 1 +} +""" + + +def _require_go() -> None: + if not go_available(): + pytest.skip("go toolchain not available") + if cs.SupportedLanguage.GO not in load_parsers()[0]: + pytest.skip("go parser not available") + + +def test_cgr_matches_go_oracle_on_node_spans(tmp_path: Path) -> None: + _require_go() + project = tmp_path / "go_span_test" + project.mkdir() + (project / "demo.go").write_text(GO_SRC, encoding="utf-8") + + cgr = extract_cgr_go_graph(project, project.name) + oracle = run_go_oracle(project) + + result = score_span(cgr, oracle, ec.GO_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 5, aggregate diff --git a/codebase_rag/tests/test_go_structure_oracle.py b/codebase_rag/tests/test_go_structure_oracle.py new file mode 100644 index 000000000..1035cb497 --- /dev/null +++ b/codebase_rag/tests/test_go_structure_oracle.py @@ -0,0 +1,90 @@ +# (H) Covers the Go structure oracle harness (evals/oracles/go_ast.go + +# (H) evals/go_l1.py): the go/ast oracle is authoritative ground truth, and cgr's +# (H) captured Go nodes are graded against it on (kind, file, start_line). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals.cgr_graph import extract_cgr_go_nodes +from evals.oracles import go_available, run_go_oracle +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +GO_SRC = """package shapes + +type Point struct { +\tX int +\tY int +} + +type Shape interface { +\tArea() float64 +} + +type Celsius float64 + +func NewPoint(x int, y int) Point { +\treturn Point{X: x, Y: y} +} + +func (p Point) Area() float64 { +\treturn 0.0 +} +""" + + +def _require_go() -> None: + if not go_available(): + pytest.skip("go toolchain not available") + if cs.SupportedLanguage.GO not in load_parsers()[0]: + pytest.skip("go parser not available") + + +def _go_project(tmp_path: Path) -> Path: + project = tmp_path / "shapes_mod" + project.mkdir() + (project / "go.mod").write_text("module shapes_mod\n\ngo 1.22\n", encoding="utf-8") + (project / "shapes.go").write_text(GO_SRC, encoding="utf-8") + return project + + +def _names(nodes: dict, kind: cs.NodeLabel) -> set[str]: + return {node.name for key, node in nodes.items() if key.kind == kind.value} + + +def test_oracle_labels_go_declarations(tmp_path: Path) -> None: + _require_go() + oracle = run_go_oracle(_go_project(tmp_path)).nodes + assert _names(oracle, cs.NodeLabel.CLASS) == {"Point"} + assert _names(oracle, cs.NodeLabel.INTERFACE) == {"Shape"} + assert _names(oracle, cs.NodeLabel.TYPE) == {"Celsius"} + assert _names(oracle, cs.NodeLabel.FUNCTION) == {"NewPoint"} + # (H) go/ast knows Area has a receiver, so it is a Method, not a Function. + assert _names(oracle, cs.NodeLabel.METHOD) == {"Area"} + + +def test_cgr_matches_oracle_on_type_declarations(tmp_path: Path) -> None: + _require_go() + project = _go_project(tmp_path) + cgr = GraphData( + nodes=extract_cgr_go_nodes(project, project.name), edges=set(), name_edges=set() + ) + oracle = run_go_oracle(project) + + result = score_node_kinds( + cgr, + oracle, + (cs.NodeLabel.CLASS, cs.NodeLabel.INTERFACE, cs.NodeLabel.TYPE), + ) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.NodeLabel.CLASS.value, + cs.NodeLabel.INTERFACE.value, + cs.NodeLabel.TYPE.value, + ): + assert by_label[label]["recall"] == 1.0, (label, by_label[label]) + assert by_label[label]["precision"] == 1.0, (label, by_label[label]) diff --git a/codebase_rag/tests/test_go_type_declarations.py b/codebase_rag/tests/test_go_type_declarations.py new file mode 100644 index 000000000..ee6894df3 --- /dev/null +++ b/codebase_rag/tests/test_go_type_declarations.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.constants import KEY_NAME, NodeLabel +from codebase_rag.tests.conftest import create_and_run_updater, get_nodes + + +@pytest.fixture +def go_types_project(temp_repo: Path) -> Path: + project_path = temp_repo / "go_types_test" + project_path.mkdir() + (project_path / "go.mod").write_text( + encoding="utf-8", data="module go_types_test\n\ngo 1.22\n" + ) + (project_path / "shapes.go").write_text( + encoding="utf-8", + data="""package shapes + +type Point struct { +\tX int +\tY int +} + +type Shape interface { +\tArea() float64 +} + +type Celsius float64 + +type ( +\tWidget struct { +\t\tID int +\t} +\tDrawable interface { +\t\tDraw() string +\t} +\tFahrenheit float64 +) + +func NewPoint(x int, y int) Point { +\treturn Point{X: x, Y: y} +} +""", + ) + return project_path + + +def _names(mock_ingestor: MagicMock, label: NodeLabel) -> set[str]: + return { + str(node[0][1].get(KEY_NAME)) + for node in get_nodes(mock_ingestor, label) + if str(node[0][1].get(KEY_NAME)) + } + + +def test_go_struct_captured_as_class( + go_types_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_types_project, mock_ingestor, skip_if_missing="go") + classes = _names(mock_ingestor, NodeLabel.CLASS) + assert "Point" in classes, f"Go struct Point missing from Class nodes: {classes}" + assert "Widget" in classes, ( + f"Grouped Go struct Widget missing from Class nodes: {classes}" + ) + + +def test_go_interface_captured_as_interface( + go_types_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_types_project, mock_ingestor, skip_if_missing="go") + interfaces = _names(mock_ingestor, NodeLabel.INTERFACE) + assert "Shape" in interfaces, ( + f"Go interface Shape missing from Interface nodes: {interfaces}" + ) + assert "Drawable" in interfaces, ( + f"Grouped Go interface Drawable missing from Interface nodes: {interfaces}" + ) + + +def test_go_type_alias_captured_as_type( + go_types_project: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(go_types_project, mock_ingestor, skip_if_missing="go") + types = _names(mock_ingestor, NodeLabel.TYPE) + assert "Celsius" in types, ( + f"Go defined type Celsius missing from Type nodes: {types}" + ) + assert "Fahrenheit" in types, ( + f"Grouped Go defined type Fahrenheit missing from Type nodes: {types}" + ) diff --git a/codebase_rag/tests/test_graph_service.py b/codebase_rag/tests/test_graph_service.py index c31b30741..76e5a6ed2 100644 --- a/codebase_rag/tests/test_graph_service.py +++ b/codebase_rag/tests/test_graph_service.py @@ -5,7 +5,13 @@ import pytest from codebase_rag.constants import NODE_UNIQUE_CONSTRAINTS -from codebase_rag.cypher_queries import wrap_with_unwind +from codebase_rag.cypher_queries import ( + build_create_node_query, + build_create_relationship_query, + build_merge_node_query, + build_merge_relationship_query, + wrap_with_unwind, +) from codebase_rag.services.graph_service import MemgraphIngestor @@ -38,13 +44,63 @@ def test_init_creates_empty_buffers(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) assert ingestor.node_buffer == [] - assert ingestor.relationship_buffer == [] + assert ingestor._rel_count == 0 def test_init_conn_is_none(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) assert ingestor.conn is None + def test_init_stores_auth_credentials(self) -> None: + ingestor = MemgraphIngestor( + host="localhost", port=7687, username="user", password="pass" + ) + + assert ingestor._username == "user" + assert ingestor._password == "pass" + + def test_init_defaults_auth_to_none(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + + assert ingestor._username is None + assert ingestor._password is None + + def test_init_raises_for_username_without_password(self) -> None: + with pytest.raises(ValueError, match="Both username and password"): + MemgraphIngestor(host="localhost", port=7687, username="user") + + def test_init_raises_for_password_without_username(self) -> None: + with pytest.raises(ValueError, match="Both username and password"): + MemgraphIngestor(host="localhost", port=7687, password="pass") + + def test_init_normalizes_empty_strings_to_none(self) -> None: + ingestor = MemgraphIngestor( + host="localhost", port=7687, username="", password="" + ) + + assert ingestor._username is None + assert ingestor._password is None + + def test_init_normalizes_whitespace_only_to_none(self) -> None: + ingestor = MemgraphIngestor( + host="localhost", port=7687, username=" ", password=" " + ) + + assert ingestor._username is None + assert ingestor._password is None + + def test_init_strips_whitespace_from_credentials(self) -> None: + ingestor = MemgraphIngestor( + host="localhost", port=7687, username=" user ", password=" pass " + ) + + assert ingestor._username == "user" + assert ingestor._password == "pass" + + def test_init_raises_for_empty_password_with_valid_username(self) -> None: + with pytest.raises(ValueError, match="Both username and password"): + MemgraphIngestor(host="localhost", port=7687, username="user", password="") + class TestContextManager: def test_enter_connects_to_memgraph(self) -> None: @@ -60,12 +116,36 @@ def test_enter_connects_to_memgraph(self) -> None: assert mock_conn.autocommit is True assert result is ingestor + def test_enter_passes_auth_when_provided(self) -> None: + with patch("codebase_rag.services.graph_service.mgclient") as mock_mgclient: + mock_conn = MagicMock() + mock_mgclient.connect.return_value = mock_conn + + ingestor = MemgraphIngestor( + host="testhost", port=1234, username="user", password="pass" + ) + ingestor.__enter__() + + mock_mgclient.connect.assert_called_once_with( + host="testhost", port=1234, username="user", password="pass" + ) + + def test_enter_omits_auth_when_not_provided(self) -> None: + with patch("codebase_rag.services.graph_service.mgclient") as mock_mgclient: + mock_conn = MagicMock() + mock_mgclient.connect.return_value = mock_conn + + ingestor = MemgraphIngestor(host="testhost", port=1234) + ingestor.__enter__() + + mock_mgclient.connect.assert_called_once_with(host="testhost", port=1234) + def test_exit_flushes_and_closes_connection(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) mock_conn = MagicMock() ingestor.conn = mock_conn - with patch.object(ingestor, "flush_all") as mock_flush: + with patch.object(MemgraphIngestor, "flush_all") as mock_flush: ingestor.__exit__(None, None, None) mock_flush.assert_called_once() @@ -76,7 +156,7 @@ def test_exit_logs_error_on_exception(self) -> None: mock_conn = MagicMock() ingestor.conn = mock_conn - with patch.object(ingestor, "flush_all"): + with patch.object(MemgraphIngestor, "flush_all"): ingestor.__exit__(ValueError, ValueError("test error"), None) mock_conn.close.assert_called_once() @@ -85,7 +165,7 @@ def test_exit_handles_none_connection(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) ingestor.conn = None - with patch.object(ingestor, "flush_all"): + with patch.object(MemgraphIngestor, "flush_all"): ingestor.__exit__(None, None, None) @@ -206,19 +286,13 @@ def test_suppresses_already_exists_errors_in_logs(self) -> None: ingestor._execute_query("CREATE CONSTRAINT") -class TestExecuteBatch: - def test_returns_early_when_not_connected(self) -> None: - ingestor = MemgraphIngestor(host="localhost", port=7687) - ingestor.conn = None - - ingestor._execute_batch("MERGE (n:Test)", [{"id": 1}]) - +class TestExecuteBatchOn: def test_returns_early_when_params_empty(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) mock_conn = MagicMock() ingestor.conn = mock_conn - ingestor._execute_batch("MERGE (n:Test)", []) + ingestor._execute_batch_on(mock_conn, "MERGE (n:Test)", []) mock_conn.cursor.assert_not_called() @@ -229,7 +303,9 @@ def test_wraps_query_with_unwind(self) -> None: mock_conn.cursor.return_value = mock_cursor ingestor.conn = mock_conn - ingestor._execute_batch("MERGE (n:Test {id: row.id})", [{"id": 1}, {"id": 2}]) + ingestor._execute_batch_on( + mock_conn, "MERGE (n:Test {id: row.id})", [{"id": 1}, {"id": 2}] + ) call_args = mock_cursor.execute.call_args[0] assert call_args[0] == wrap_with_unwind("MERGE (n:Test {id: row.id})") @@ -242,7 +318,7 @@ def test_closes_cursor_on_success(self) -> None: mock_conn.cursor.return_value = mock_cursor ingestor.conn = mock_conn - ingestor._execute_batch("MERGE (n:Test)", [{"id": 1}]) + ingestor._execute_batch_on(mock_conn, "MERGE (n:Test)", [{"id": 1}]) mock_cursor.close.assert_called_once() @@ -251,7 +327,7 @@ class TestCleanDatabase: def test_executes_delete_query(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) - with patch.object(ingestor, "_execute_query") as mock_execute: + with patch.object(MemgraphIngestor, "_execute_query") as mock_execute: ingestor.clean_database() mock_execute.assert_called_once_with("MATCH (n) DETACH DELETE n;") @@ -265,7 +341,9 @@ def test_creates_constraint_for_each_node_type(self) -> None: def capture_query(query: str) -> None: executed_queries.append(query) - with patch.object(ingestor, "_execute_query", side_effect=capture_query): + with patch.object( + MemgraphIngestor, "_execute_query", side_effect=capture_query + ): ingestor.ensure_constraints() for label, prop in NODE_UNIQUE_CONSTRAINTS.items(): @@ -282,7 +360,9 @@ def fail_then_succeed(query: str) -> None: if call_count == 1: raise RuntimeError("Constraint already exists") - with patch.object(ingestor, "_execute_query", side_effect=fail_then_succeed): + with patch.object( + MemgraphIngestor, "_execute_query", side_effect=fail_then_succeed + ): ingestor.ensure_constraints() expected_queries = len(NODE_UNIQUE_CONSTRAINTS) * 2 @@ -384,7 +464,7 @@ def mock_fetch_all(query: str, params: dict | None = None) -> list[dict]: return [{"node_id": 1}, {"node_id": 2}, {"node_id": 3}] return [{"from_id": 1, "to_id": 2}] - with patch.object(ingestor, "fetch_all", side_effect=mock_fetch_all): + with patch.object(MemgraphIngestor, "fetch_all", side_effect=mock_fetch_all): result = ingestor.export_graph_to_dict() assert result["metadata"]["total_nodes"] == 3 @@ -396,8 +476,8 @@ def test_calls_flush_nodes_and_flush_relationships(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) with ( - patch.object(ingestor, "flush_nodes") as mock_nodes, - patch.object(ingestor, "flush_relationships") as mock_rels, + patch.object(MemgraphIngestor, "flush_nodes") as mock_nodes, + patch.object(MemgraphIngestor, "flush_relationships") as mock_rels, ): ingestor.flush_all() @@ -407,20 +487,36 @@ def test_calls_flush_nodes_and_flush_relationships(self) -> None: class TestFetchAllAndExecuteWrite: def test_fetch_all_delegates_to_execute_query(self) -> None: + from codebase_rag.config import settings + ingestor = MemgraphIngestor(host="localhost", port=7687) with patch.object( - ingestor, "_execute_query", return_value=[{"n": "result"}] + MemgraphIngestor, "_execute_query", return_value=[{"n": "result"}] ) as mock_exec: result = ingestor.fetch_all("MATCH (n) RETURN n", {"limit": 10}) - mock_exec.assert_called_once_with("MATCH (n) RETURN n", {"limit": 10}) + expected_query = ( + f"MATCH (n) RETURN n QUERY MEMORY LIMIT " + f"{settings.QUERY_MEMORY_LIMIT_MB} MB;" + ) + mock_exec.assert_called_once_with(expected_query, {"limit": 10}) assert result == [{"n": "result"}] + def test_fetch_all_preserves_existing_memory_limit(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + query_with_hint = "MATCH (n) RETURN n QUERY MEMORY LIMIT 512 MB;" + + with patch.object( + MemgraphIngestor, "_execute_query", return_value=[] + ) as mock_exec: + ingestor.fetch_all(query_with_hint) + mock_exec.assert_called_once_with(query_with_hint, None) + def test_execute_write_delegates_to_execute_query(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) - with patch.object(ingestor, "_execute_query") as mock_exec: + with patch.object(MemgraphIngestor, "_execute_query") as mock_exec: ingestor.execute_write("CREATE (n:Test)", {"name": "test"}) mock_exec.assert_called_once_with("CREATE (n:Test)", {"name": "test"}) @@ -434,3 +530,187 @@ def test_returns_iso_format_timestamp(self) -> None: assert "T" in result assert len(result) > 10 + + +class TestCreateMode: + def test_default_use_merge_is_true(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + assert ingestor._use_merge is True + + def test_use_merge_false(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687, use_merge=False) + assert ingestor._use_merge is False + + def test_flush_nodes_uses_merge_query_by_default(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687, batch_size=10) + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value = mock_cursor + ingestor.conn = mock_conn + + ingestor.node_buffer.append(("File", {"path": "/test.py", "name": "test"})) + ingestor.flush_nodes() + + call_args = mock_cursor.execute.call_args[0][0] + assert "MERGE" in call_args + assert "CREATE" not in call_args.split("MERGE")[0] + + def test_flush_nodes_uses_create_query_when_merge_disabled(self) -> None: + ingestor = MemgraphIngestor( + host="localhost", port=7687, batch_size=10, use_merge=False + ) + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value = mock_cursor + ingestor.conn = mock_conn + + ingestor.node_buffer.append(("File", {"path": "/test.py", "name": "test"})) + ingestor.flush_nodes() + + call_args = mock_cursor.execute.call_args[0][0] + assert "CREATE" in call_args + assert "MERGE" not in call_args + + def test_flush_relationships_uses_merge_query_by_default(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687, batch_size=10) + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value = mock_cursor + mock_cursor.description = [MagicMock(name="created")] + mock_cursor.description[0].name = "created" + mock_cursor.fetchall.return_value = [(1,)] + ingestor.conn = mock_conn + + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), "IMPORTS", ("File", "path", "/b.py") + ) + ingestor.flush_relationships() + + call_args = mock_cursor.execute.call_args[0][0] + assert "MERGE" in call_args + + def test_flush_relationships_uses_create_query_when_merge_disabled(self) -> None: + ingestor = MemgraphIngestor( + host="localhost", port=7687, batch_size=10, use_merge=False + ) + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value = mock_cursor + mock_cursor.description = [MagicMock(name="created")] + mock_cursor.description[0].name = "created" + mock_cursor.fetchall.return_value = [(1,)] + ingestor.conn = mock_conn + + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), "IMPORTS", ("File", "path", "/b.py") + ) + ingestor.flush_relationships() + + call_args = mock_cursor.execute.call_args[0][0] + assert "CREATE" in call_args + assert "MERGE" not in call_args + + +class TestPreGroupedRelBuffer: + def test_rel_groups_populated_on_ensure(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), "IMPORTS", ("File", "path", "/b.py") + ) + assert len(ingestor._rel_groups) == 1 + + def test_rel_groups_groups_by_pattern(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), "IMPORTS", ("File", "path", "/b.py") + ) + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), "IMPORTS", ("File", "path", "/c.py") + ) + ingestor.ensure_relationship_batch( + ("Module", "qualified_name", "mod_a"), + "DEFINES", + ("Function", "qualified_name", "func_b"), + ) + assert len(ingestor._rel_groups) == 2 + pattern = ("File", "path", "IMPORTS", "File", "path") + assert len(ingestor._rel_groups[pattern]) == 2 + + def test_rel_groups_cleared_after_flush(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value = mock_cursor + mock_cursor.description = [MagicMock(name="created")] + mock_cursor.description[0].name = "created" + mock_cursor.fetchall.return_value = [(1,)] + ingestor.conn = mock_conn + + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), "IMPORTS", ("File", "path", "/b.py") + ) + ingestor.flush_relationships() + + assert len(ingestor._rel_groups) == 0 + + def test_rel_groups_empty_on_init(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + assert len(ingestor._rel_groups) == 0 + + def test_rel_groups_correct_batch_row_values(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + ingestor.ensure_relationship_batch( + ("File", "path", "/a.py"), + "IMPORTS", + ("File", "path", "/b.py"), + {"weight": 1}, + ) + pattern = ("File", "path", "IMPORTS", "File", "path") + rows = ingestor._rel_groups[pattern] + assert len(rows) == 1 + assert rows[0]["from_val"] == "/a.py" + assert rows[0]["to_val"] == "/b.py" + assert rows[0]["props"] == {"weight": 1} + + +class TestSlots: + def test_has_slots(self) -> None: + assert hasattr(MemgraphIngestor, "__slots__") + + def test_no_dict(self) -> None: + ingestor = MemgraphIngestor(host="localhost", port=7687) + assert not hasattr(ingestor, "__dict__") + + +class TestCypherCreateQueries: + def test_build_create_node_query(self) -> None: + query = build_create_node_query("File", "path") + assert "CREATE" in query + assert "MERGE" not in query + assert "path: row.id" in query + + def test_build_create_relationship_query(self) -> None: + query = build_create_relationship_query( + "File", "path", "IMPORTS", "File", "path" + ) + assert "CREATE (a)-[r:IMPORTS]->(b)" in query + assert "MERGE" not in query + + def test_build_create_relationship_query_with_props(self) -> None: + query = build_create_relationship_query( + "File", "path", "IMPORTS", "File", "path", has_props=True + ) + assert "SET r += row.props" in query + assert "CREATE (a)-[r:IMPORTS]->(b)" in query + + def test_build_merge_node_query_unchanged(self) -> None: + query = build_merge_node_query("File", "path") + assert "MERGE" in query + assert "CREATE" not in query + + def test_build_merge_relationship_query_unchanged(self) -> None: + query = build_merge_relationship_query( + "File", "path", "IMPORTS", "File", "path" + ) + assert "MERGE" in query + assert "CREATE" not in query.replace("MERGE", "") diff --git a/codebase_rag/tests/test_graph_service_calls_failure_logging.py b/codebase_rag/tests/test_graph_service_calls_failure_logging.py index 2af717f06..6bb8f2e99 100644 --- a/codebase_rag/tests/test_graph_service_calls_failure_logging.py +++ b/codebase_rag/tests/test_graph_service_calls_failure_logging.py @@ -56,8 +56,8 @@ def test_calls_failure_logging_single_batch( ) with patch.object( - graph_service, - "_execute_batch_with_return", + MemgraphIngestor, + "_execute_batch_with_return_on", return_value=[{"created": 1}, {"created": 0}, {"created": 0}], ): graph_service.flush_relationships() @@ -72,13 +72,6 @@ def test_calls_failure_logging_single_batch( def test_calls_failure_logging_multiple_batches( graph_service: MemgraphIngestor, log_messages: list[str] ) -> None: - """Test that CALLS failures are logged correctly across multiple batches. - - This is the critical test case that validates the bug fix: - - Previously, the code used cumulative totals (total_attempted - total_successful) - - This would incorrectly report failures for batches after the first one - - Now it correctly uses batch-specific counts (len(params_list) - batch_successful) - """ graph_service.ensure_relationship_batch( ("Method", "qualified_name", "project.module.ClassA.methodA()"), "CALLS", @@ -104,14 +97,16 @@ def test_calls_failure_logging_multiple_batches( call_count = 0 def mock_execute_batch( - query: str, params_list: list[dict[str, Any]] + conn: Any, query: str, params_list: list[dict[str, Any]] ) -> list[dict[str, int]]: nonlocal call_count call_count += 1 return [{"created": 1}, {"created": 0}] with patch.object( - graph_service, "_execute_batch_with_return", side_effect=mock_execute_batch + MemgraphIngestor, + "_execute_batch_with_return_on", + side_effect=mock_execute_batch, ): graph_service.flush_relationships() @@ -127,7 +122,6 @@ def mock_execute_batch( def test_calls_success_no_failure_logging( graph_service: MemgraphIngestor, log_messages: list[str] ) -> None: - """Test that successful CALLS don't trigger failure warnings.""" graph_service.ensure_relationship_batch( ("Method", "qualified_name", "project.module.ClassA.methodA()"), "CALLS", @@ -140,8 +134,8 @@ def test_calls_success_no_failure_logging( ) with patch.object( - graph_service, - "_execute_batch_with_return", + MemgraphIngestor, + "_execute_batch_with_return_on", return_value=[{"created": 1}, {"created": 1}], ): graph_service.flush_relationships() @@ -154,7 +148,6 @@ def test_calls_success_no_failure_logging( def test_non_calls_relationships_no_failure_logging( graph_service: MemgraphIngestor, log_messages: list[str] ) -> None: - """Test that failures in non-CALLS relationships don't trigger CALLS-specific logging.""" graph_service.ensure_relationship_batch( ("Module", "qualified_name", "project.moduleA"), "IMPORTS", @@ -167,8 +160,8 @@ def test_non_calls_relationships_no_failure_logging( ) with patch.object( - graph_service, - "_execute_batch_with_return", + MemgraphIngestor, + "_execute_batch_with_return_on", return_value=[{"created": 1}, {"created": 0}], ): graph_service.flush_relationships() diff --git a/codebase_rag/tests/test_graph_updater_embeddings.py b/codebase_rag/tests/test_graph_updater_embeddings.py new file mode 100644 index 000000000..c88fc54ac --- /dev/null +++ b/codebase_rag/tests/test_graph_updater_embeddings.py @@ -0,0 +1,335 @@ +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.services.graph_service import MemgraphIngestor +from codebase_rag.types_defs import ResultRow + +MOCK_EMBEDDING = [0.1] * 768 + + +def _fake_embed_batch(snippets: list[str], **_kwargs: object) -> list[list[float]]: + return [MOCK_EMBEDDING for _ in snippets] + + +_PATCH_DEPS = patch( + "codebase_rag.graph_updater.has_semantic_dependencies", return_value=True +) +_PATCH_EMBED_BATCH = patch( + "codebase_rag.embedder.embed_code_batch", side_effect=_fake_embed_batch +) +_PATCH_STORE_BATCH = patch( + "codebase_rag.vector_store.store_embedding_batch", side_effect=lambda pts: len(pts) +) +_PATCH_RECONCILE = patch( + "codebase_rag.vector_store.verify_stored_ids", side_effect=lambda ids: ids +) + + +@pytest.fixture +def query_ingestor() -> MagicMock: + mock = MagicMock(spec=MemgraphIngestor) + mock.fetch_all = MagicMock(return_value=[]) + mock.execute_write = MagicMock() + return mock + + +@pytest.fixture +def updater_with_query(temp_repo: Path, query_ingestor: MagicMock) -> GraphUpdater: + parsers, queries = load_parsers() + return GraphUpdater( + ingestor=query_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + + +class TestCypherQueryEmbeddingsStructure: + def test_contains_starts_with_project_name(self) -> None: + assert "STARTS WITH" in cs.CYPHER_QUERY_EMBEDDINGS + assert "$project_name" in cs.CYPHER_QUERY_EMBEDDINGS + + def test_returns_required_columns(self) -> None: + query = cs.CYPHER_QUERY_EMBEDDINGS.upper() + for col in ["NODE_ID", "QUALIFIED_NAME", "START_LINE", "END_LINE", "PATH"]: + assert col in query + + def test_dot_concatenation_is_parenthesized(self) -> None: + assert "($project_name + '.')" in cs.CYPHER_QUERY_EMBEDDINGS + + def test_no_bare_starts_with_plus(self) -> None: + for line in cs.CYPHER_QUERY_EMBEDDINGS.splitlines(): + stripped = line.strip() + if "STARTS WITH" in stripped and "$project_name" in stripped: + assert "($project_name" in stripped, ( + f"$project_name + '.' must be parenthesized in: {stripped!r}" + ) + + +class TestGenerateSemanticEmbeddings: + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_passes_project_name_without_trailing_dot( + self, + _mock_reconcile: MagicMock, + _mock_store_batch: MagicMock, + _mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + ) -> None: + query_ingestor.fetch_all.return_value = [] + updater_with_query._generate_semantic_embeddings() + + params = query_ingestor.fetch_all.call_args[0][1] + project_name_param = params["project_name"] + assert not project_name_param.endswith("."), ( + f"project_name should not have trailing dot, got: {project_name_param!r}" + ) + + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_uses_cypher_query_embeddings_constant( + self, + _mock_reconcile: MagicMock, + _mock_store_batch: MagicMock, + _mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + ) -> None: + query_ingestor.fetch_all.return_value = [] + updater_with_query._generate_semantic_embeddings() + + query_arg = query_ingestor.fetch_all.call_args[0][0] + assert query_arg == cs.CYPHER_QUERY_EMBEDDINGS + + @patch("codebase_rag.graph_updater.has_semantic_dependencies", return_value=False) + def test_skips_when_no_semantic_dependencies( + self, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + ) -> None: + updater_with_query._generate_semantic_embeddings() + query_ingestor.fetch_all.assert_not_called() + + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_returns_early_on_empty_results( + self, + _mock_reconcile: MagicMock, + mock_store_batch: MagicMock, + _mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + ) -> None: + query_ingestor.fetch_all.return_value = [] + updater_with_query._generate_semantic_embeddings() + mock_store_batch.assert_not_called() + + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_embeds_valid_function_with_source( + self, + _mock_reconcile: MagicMock, + mock_store_batch: MagicMock, + mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + temp_repo: Path, + ) -> None: + (temp_repo / "module.py").write_text("def hello():\n return 42\n") + row: ResultRow = { + cs.KEY_NODE_ID: 1, + cs.KEY_QUALIFIED_NAME: "myproject.module.hello", + cs.KEY_START_LINE: 1, + cs.KEY_END_LINE: 2, + cs.KEY_PATH: "module.py", + } + query_ingestor.fetch_all.return_value = [row] + + updater_with_query._generate_semantic_embeddings() + + mock_embed_batch.assert_called_once() + snippets_arg = mock_embed_batch.call_args[0][0] + assert len(snippets_arg) == 1 + assert "def hello()" in snippets_arg[0] + mock_store_batch.assert_called_once() + batch_arg = mock_store_batch.call_args[0][0] + assert len(batch_arg) == 1 + assert batch_arg[0] == (1, MOCK_EMBEDDING, "myproject.module.hello") + + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_skips_row_with_missing_source_info( + self, + _mock_reconcile: MagicMock, + mock_store_batch: MagicMock, + mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + ) -> None: + row: ResultRow = { + cs.KEY_NODE_ID: 1, + cs.KEY_QUALIFIED_NAME: "myproject.module.hello", + } + query_ingestor.fetch_all.return_value = [row] + + updater_with_query._generate_semantic_embeddings() + + mock_embed_batch.assert_not_called() + mock_store_batch.assert_not_called() + + @patch("codebase_rag.graph_updater.has_semantic_dependencies", return_value=True) + @patch( + "codebase_rag.embedder.embed_code_batch", + side_effect=RuntimeError("model error"), + ) + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_handles_embed_failure_gracefully( + self, + _mock_reconcile: MagicMock, + mock_store_batch: MagicMock, + _mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + temp_repo: Path, + ) -> None: + (temp_repo / "module.py").write_text("def hello():\n return 42\n") + row: ResultRow = { + cs.KEY_NODE_ID: 1, + cs.KEY_QUALIFIED_NAME: "myproject.module.hello", + cs.KEY_START_LINE: 1, + cs.KEY_END_LINE: 2, + cs.KEY_PATH: "module.py", + } + query_ingestor.fetch_all.return_value = [row] + + updater_with_query._generate_semantic_embeddings() + + mock_store_batch.assert_not_called() + + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_skips_unparseable_rows( + self, + _mock_reconcile: MagicMock, + mock_store_batch: MagicMock, + mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + ) -> None: + bad_row: ResultRow = { + cs.KEY_NODE_ID: "not_an_int", + cs.KEY_QUALIFIED_NAME: "pkg.func", + } + query_ingestor.fetch_all.return_value = [bad_row] + + updater_with_query._generate_semantic_embeddings() + + mock_embed_batch.assert_not_called() + mock_store_batch.assert_not_called() + + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_counts_embedded_functions( + self, + _mock_reconcile: MagicMock, + mock_store_batch: MagicMock, + mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + temp_repo: Path, + ) -> None: + (temp_repo / "a.py").write_text("def f1():\n pass\n") + (temp_repo / "b.py").write_text("def f2():\n pass\n") + rows: list[ResultRow] = [ + { + cs.KEY_NODE_ID: 1, + cs.KEY_QUALIFIED_NAME: "proj.a.f1", + cs.KEY_START_LINE: 1, + cs.KEY_END_LINE: 2, + cs.KEY_PATH: "a.py", + }, + { + cs.KEY_NODE_ID: 2, + cs.KEY_QUALIFIED_NAME: "proj.b.f2", + cs.KEY_START_LINE: 1, + cs.KEY_END_LINE: 2, + cs.KEY_PATH: "b.py", + }, + ] + query_ingestor.fetch_all.return_value = rows + + updater_with_query._generate_semantic_embeddings() + + mock_embed_batch.assert_called_once() + snippets_arg = mock_embed_batch.call_args[0][0] + assert len(snippets_arg) == 2 + mock_store_batch.assert_called_once() + batch_arg = mock_store_batch.call_args[0][0] + assert len(batch_arg) == 2 + + +class TestBatchedEmbeddingDispatch: + @_PATCH_DEPS + @_PATCH_EMBED_BATCH + @_PATCH_STORE_BATCH + @_PATCH_RECONCILE + def test_dispatches_single_batch_call_for_multiple_snippets( + self, + _mock_reconcile: MagicMock, + _mock_store_batch: MagicMock, + mock_embed_batch: MagicMock, + _mock_deps: MagicMock, + updater_with_query: GraphUpdater, + query_ingestor: MagicMock, + temp_repo: Path, + ) -> None: + (temp_repo / "a.py").write_text("def f1():\n return 1\n") + (temp_repo / "b.py").write_text("def f2():\n return 2\n") + (temp_repo / "c.py").write_text("def f3():\n return 3\n") + rows: list[ResultRow] = [ + { + cs.KEY_NODE_ID: i + 1, + cs.KEY_QUALIFIED_NAME: f"proj.{name}.f{i + 1}", + cs.KEY_START_LINE: 1, + cs.KEY_END_LINE: 2, + cs.KEY_PATH: f"{name}.py", + } + for i, name in enumerate(("a", "b", "c")) + ] + query_ingestor.fetch_all.return_value = rows + + updater_with_query._generate_semantic_embeddings() + + assert mock_embed_batch.call_count == 1 + snippets_arg = mock_embed_batch.call_args[0][0] + assert len(snippets_arg) == 3 diff --git a/codebase_rag/tests/test_graph_updater_incremental.py b/codebase_rag/tests/test_graph_updater_incremental.py new file mode 100644 index 000000000..788e15358 --- /dev/null +++ b/codebase_rag/tests/test_graph_updater_incremental.py @@ -0,0 +1,457 @@ +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import ( + BoundedASTCache, + FunctionRegistryTrie, + GraphUpdater, + _hash_file, + _hash_file_with_bytes, + _load_hash_cache, + _save_hash_cache, +) +from codebase_rag.parser_loader import load_parsers + + +@pytest.fixture +def updater(temp_repo: Path, mock_ingestor: MagicMock) -> GraphUpdater: + parsers, queries = load_parsers() + return GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + + +@pytest.fixture +def py_project(temp_repo: Path) -> Path: + (temp_repo / "__init__.py").touch() + (temp_repo / "module_a.py").write_text("def func_a():\n pass\n") + (temp_repo / "module_b.py").write_text("def func_b():\n pass\n") + return temp_repo + + +class TestHashFile: + def test_hash_returns_hex_string(self, temp_repo: Path) -> None: + f = temp_repo / "test.py" + f.write_text("hello") + result = _hash_file(f) + assert isinstance(result, str) + assert len(result) == 32 + + def test_same_content_same_hash(self, temp_repo: Path) -> None: + f1 = temp_repo / "a.py" + f2 = temp_repo / "b.py" + f1.write_text("same content") + f2.write_text("same content") + assert _hash_file(f1) == _hash_file(f2) + + def test_different_content_different_hash(self, temp_repo: Path) -> None: + f1 = temp_repo / "a.py" + f2 = temp_repo / "b.py" + f1.write_text("content one") + f2.write_text("content two") + assert _hash_file(f1) != _hash_file(f2) + + def test_hash_with_bytes_returns_none_for_broken_symlink( + self, temp_repo: Path + ) -> None: + link = temp_repo / "result" + link.symlink_to(temp_repo / "missing-target") + assert _hash_file_with_bytes(link) is None + + def test_hash_with_bytes_returns_none_for_missing_file( + self, temp_repo: Path + ) -> None: + assert _hash_file_with_bytes(temp_repo / "does-not-exist") is None + + +class TestHashCacheIO: + def test_save_and_load_cache(self, temp_repo: Path) -> None: + cache_path = temp_repo / cs.HASH_CACHE_FILENAME + data = {"module_a.py": "abc123", "module_b.py": "def456"} + _save_hash_cache(cache_path, data) + + assert cache_path.is_file() + loaded = _load_hash_cache(cache_path) + assert loaded == data + + def test_load_nonexistent_returns_empty(self, temp_repo: Path) -> None: + cache_path = temp_repo / cs.HASH_CACHE_FILENAME + assert _load_hash_cache(cache_path) == {} + + def test_load_corrupted_returns_empty(self, temp_repo: Path) -> None: + cache_path = temp_repo / cs.HASH_CACHE_FILENAME + cache_path.write_text("not valid json {{{") + assert _load_hash_cache(cache_path) == {} + + def test_save_creates_parent_dirs(self, temp_repo: Path) -> None: + cache_path = temp_repo / "subdir" / "nested" / cs.HASH_CACHE_FILENAME + _save_hash_cache(cache_path, {"a.py": "hash1"}) + assert cache_path.is_file() + + def test_cache_file_is_valid_json(self, temp_repo: Path) -> None: + cache_path = temp_repo / cs.HASH_CACHE_FILENAME + data = {"file.py": "sha256hash"} + _save_hash_cache(cache_path, data) + with cache_path.open() as f: + parsed = json.load(f) + assert parsed == data + + +class TestIncrementalUpdates: + def test_unchanged_file_is_skipped( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + mock_ingestor.reset_mock() + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + with patch.object( + updater2, "_process_single_file", wraps=updater2._process_single_file + ) as spy: + updater2.run() + assert spy.call_count == 0 + + def test_changed_file_is_reparsed( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + (py_project / "module_a.py").write_text("def func_a_updated():\n pass\n") + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + with patch.object( + updater2, "_process_single_file", wraps=updater2._process_single_file + ) as spy: + updater2.run() + processed_paths = [call.args[0] for call in spy.call_args_list] + assert py_project / "module_a.py" in processed_paths + + def test_deleted_file_removed_from_state( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + (py_project / "module_b.py").unlink() + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + with patch.object( + updater2, "remove_file_from_state", wraps=updater2.remove_file_from_state + ) as spy: + updater2.run() + removed_paths = [call.args[0] for call in spy.call_args_list] + assert py_project / "module_b.py" in removed_paths + + def test_force_bypasses_cache( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + with patch.object( + updater2, "_process_single_file", wraps=updater2._process_single_file + ) as spy: + updater2.run(force=True) + assert spy.call_count > 0 + + def test_new_file_is_processed( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + (py_project / "module_c.py").write_text("def func_c():\n pass\n") + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + with patch.object( + updater2, "_process_single_file", wraps=updater2._process_single_file + ) as spy: + updater2.run() + processed_paths = [call.args[0] for call in spy.call_args_list] + assert py_project / "module_c.py" in processed_paths + + def test_hash_cache_file_created_after_run( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + cache_path = py_project / cs.HASH_CACHE_FILENAME + assert not cache_path.exists() + + updater.run() + + assert cache_path.is_file() + with cache_path.open() as f: + data = json.load(f) + assert isinstance(data, dict) + assert len(data) > 0 + + def test_broken_symlink_does_not_crash_indexing( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + broken = py_project / "result" + broken.symlink_to(py_project / "missing-nix-store-path") + + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + updater.run() + + cache_path = py_project / cs.HASH_CACHE_FILENAME + assert cache_path.is_file() + with cache_path.open() as f: + data = json.load(f) + assert "result" not in data + assert "module_a.py" in data + + def test_deleted_file_removed_from_hash_cache( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + cache_path = py_project / cs.HASH_CACHE_FILENAME + with cache_path.open() as f: + old_data = json.load(f) + assert "module_b.py" in old_data + + (py_project / "module_b.py").unlink() + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater2.run() + + with cache_path.open() as f: + new_data = json.load(f) + assert "module_b.py" not in new_data + + +class TestFastPathInSync: + def test_second_run_skips_all_passes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + assert updater2._is_already_in_sync() is True + with ( + patch.object( + updater2, "_process_single_file", wraps=updater2._process_single_file + ) as spy_files, + patch.object(updater2, "_process_function_calls") as spy_calls, + ): + updater2.run() + assert spy_files.call_count == 0 + assert spy_calls.call_count == 0 + + def test_changed_file_disables_fast_path( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + (py_project / "module_a.py").write_text("def func_a():\n return 1\n") + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + assert updater2._is_already_in_sync() is False + + def test_new_file_disables_fast_path( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + (py_project / "module_c.py").write_text("def func_c():\n pass\n") + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + assert updater2._is_already_in_sync() is False + + def test_deleted_file_disables_fast_path( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + (py_project / "module_a.py").unlink() + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + assert updater2._is_already_in_sync() is False + + def test_no_hash_cache_disables_fast_path( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + assert updater._is_already_in_sync() is False + + def test_force_bypasses_fast_path( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + updater.run() + + updater2 = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + with patch.object(updater2, "_process_function_calls") as spy_calls: + updater2.run(force=True) + spy_calls.assert_called_once() + + +class TestSlots: + def test_function_registry_trie_has_slots(self) -> None: + assert hasattr(FunctionRegistryTrie, "__slots__") + trie = FunctionRegistryTrie() + with pytest.raises(AttributeError): + trie.nonexistent_attr = "value" # type: ignore[attr-defined] + + def test_bounded_ast_cache_has_slots(self) -> None: + assert hasattr(BoundedASTCache, "__slots__") + cache = BoundedASTCache() + with pytest.raises(AttributeError): + cache.nonexistent_attr = "value" # type: ignore[attr-defined] diff --git a/codebase_rag/tests/test_graph_updater_incremental_rename.py b/codebase_rag/tests/test_graph_updater_incremental_rename.py new file mode 100644 index 000000000..6080a4750 --- /dev/null +++ b/codebase_rag/tests/test_graph_updater_incremental_rename.py @@ -0,0 +1,190 @@ +# (H) Regression tests for issue #1: incremental rebuild used to leave +# (H) stale Function/DEFINES/IMPORTS/CALLS entities when a symbol was renamed +# (H) across files, because the incremental path was additive-only. After the +# (H) fix, an incremental rebuild after a rename must yield exactly the same +# (H) graph as a fresh full rebuild of the renamed tree. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT_NAME = "testproj" + +NodeId = tuple[str, PropertyValue] +RelTuple = tuple[str, str, PropertyValue, str, str, str, PropertyValue] + +_DEFINES_EDGES = (cs.RelationshipType.DEFINES, cs.RelationshipType.DEFINES_METHOD) + + +class InMemoryGraph: + """Minimal in-memory ingestor that applies the exact node/relationship + writes and the DETACH-DELETE queries the updater issues, so final graph + state can be compared between incremental and full rebuilds.""" + + def __init__(self) -> None: + self.nodes: dict[NodeId, PropertyDict] = {} + self.rels: set[RelTuple] = set() + + # (H) IngestorProtocol + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[NODE_UNIQUE_KEYS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + fl, fk, fv = from_spec + tl, tk, tv = to_spec + self.rels.add((str(fl), str(fk), fv, str(rel_type), str(tl), str(tk), tv)) + + def flush_all(self) -> None: + return None + + # (H) QueryProtocol + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + params = params or {} + path = params.get(cs.KEY_PATH) + match query: + case cs.CYPHER_DELETE_MODULE: + self._delete_module_subtree(path) + case cs.CYPHER_DELETE_FILE: + self._delete_node_by_path(cs.NodeLabel.FILE, path) + case cs.CYPHER_DELETE_FOLDER: + self._delete_node_by_path(cs.NodeLabel.FOLDER, path) + case _: + return None + + # (H) delete helpers + def _find_nodes(self, label: str, key: str, val: PropertyValue) -> list[NodeId]: + return [ + nid + for nid, props in self.nodes.items() + if nid[0] == label and props.get(key) == val + ] + + def _delete_module_subtree(self, path: PropertyValue) -> None: + seeds = [ + nid + for nid, props in self.nodes.items() + if nid[0] == cs.NodeLabel.MODULE and props.get(cs.KEY_PATH) == path + ] + to_delete: set[NodeId] = set() + stack = list(seeds) + while stack: + nid = stack.pop() + if nid in to_delete: + continue + to_delete.add(nid) + props = self.nodes[nid] + for fl, fk, fv, rt, tl, tk, tv in self.rels: + if rt in _DEFINES_EDGES and fl == nid[0] and props.get(fk) == fv: + for child in self._find_nodes(tl, tk, tv): + if child not in to_delete: + stack.append(child) + self._purge_nodes(to_delete) + + def _delete_node_by_path(self, label: str, path: PropertyValue) -> None: + self._purge_nodes(set(self._find_nodes(label, cs.KEY_PATH, path))) + + def _purge_nodes(self, to_delete: set[NodeId]) -> None: + deleted_props = {nid: self.nodes[nid] for nid in to_delete} + for nid in to_delete: + self.nodes.pop(nid, None) + + def touches(label: str, key: str, val: PropertyValue) -> bool: + return any( + nid[0] == label and props.get(key) == val + for nid, props in deleted_props.items() + ) + + self.rels = { + (fl, fk, fv, rt, tl, tk, tv) + for (fl, fk, fv, rt, tl, tk, tv) in self.rels + if not touches(fl, fk, fv) and not touches(tl, tk, tv) + } + + # (H) comparison + def snapshot(self) -> tuple[frozenset[NodeId], frozenset[RelTuple]]: + return frozenset(self.nodes.keys()), frozenset(self.rels) + + +NODE_UNIQUE_KEYS = cs.NODE_UNIQUE_CONSTRAINTS + + +def _write_tree(root: Path, new_name: str) -> None: + (root / "__init__.py").touch() + (root / "a.py").write_text(f"def {new_name}():\n return 1\n") + (root / "b.py").write_text( + f"from .a import {new_name}\n\n\ndef caller():\n return {new_name}()\n" + ) + + +def _make_updater(root: Path, ingestor: InMemoryGraph) -> GraphUpdater: + parsers, queries = load_parsers() + return GraphUpdater( + ingestor=ingestor, + repo_path=root, + parsers=parsers, + queries=queries, + project_name=PROJECT_NAME, + ) + + +class TestIncrementalRenameStaleEntities: + def test_incremental_rename_matches_full_rebuild(self, tmp_path: Path) -> None: + # (H) Golden: a fresh full rebuild of the already-renamed tree. + golden_root = tmp_path / "golden" + golden_root.mkdir() + _write_tree(golden_root, "new_name") + golden_graph = InMemoryGraph() + _make_updater(golden_root, golden_graph).run(force=True) + + # (H) Sanity: golden truly contains the renamed symbol and not the old one. + golden_funcs = { + uid for (label, uid) in golden_graph.nodes if label == cs.NodeLabel.FUNCTION + } + assert any(str(qn).endswith(".new_name") for qn in golden_funcs) + assert not any(str(qn).endswith(".old_name") for qn in golden_funcs) + + # (H) Incremental: build original tree, then rename across both files + # (H) and rebuild incrementally (force=False). + incr_root = tmp_path / "incr" + incr_root.mkdir() + _write_tree(incr_root, "old_name") + incr_graph = InMemoryGraph() + _make_updater(incr_root, incr_graph).run(force=True) + + _write_tree(incr_root, "new_name") + _make_updater(incr_root, incr_graph).run(force=False) + + # (H) The stale old_name Function and its edges must be gone. + incr_nodes, incr_rels = incr_graph.snapshot() + golden_nodes, golden_rels = golden_graph.snapshot() + + assert incr_nodes == golden_nodes, { + "stale_extra_nodes": sorted(map(str, incr_nodes - golden_nodes)), + "missing_nodes": sorted(map(str, golden_nodes - incr_nodes)), + } + assert incr_rels == golden_rels, { + "stale_extra_rels": sorted(map(str, incr_rels - golden_rels)), + "missing_rels": sorted(map(str, golden_rels - incr_rels)), + } + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/codebase_rag/tests/test_graph_updater_pruning.py b/codebase_rag/tests/test_graph_updater_pruning.py new file mode 100644 index 000000000..a8d5419cc --- /dev/null +++ b/codebase_rag/tests/test_graph_updater_pruning.py @@ -0,0 +1,369 @@ +# (H) Tests for orphan node pruning in GraphUpdater._prune_orphan_nodes +# (H) and Cypher deletion in _process_files for hash-cache-detected deletions. +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers + + +@pytest.fixture +def updater(temp_repo: Path, mock_ingestor: MagicMock) -> GraphUpdater: + parsers, queries = load_parsers() + return GraphUpdater( + ingestor=mock_ingestor, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + + +@pytest.fixture +def py_project(temp_repo: Path) -> Path: + (temp_repo / "__init__.py").touch() + (temp_repo / "module_a.py").write_text("def func_a():\n pass\n") + (temp_repo / "module_b.py").write_text("def func_b():\n pass\n") + sub = temp_repo / "subpkg" + sub.mkdir() + (sub / "__init__.py").touch() + (sub / "inner.py").write_text("def inner_func():\n pass\n") + return temp_repo + + +class TestPruneOrphanNodes: + def test_prune_removes_orphan_module_nodes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + project_name = py_project.resolve().name + + mock_ingestor.fetch_all.side_effect = [ + [], + [ + { + "path": "old_project/main.py", + "qualified_name": f"{project_name}.old_project.main", + }, + { + "path": "module_a.py", + "qualified_name": f"{project_name}.module_a", + }, + ], + [], + ] + updater._prune_orphan_nodes() + + delete_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_MODULE + ] + assert len(delete_calls) == 1 + assert delete_calls[0].args[1] == {cs.KEY_PATH: "old_project/main.py"} + + def test_prune_removes_orphan_external_module_nodes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [[], [], []] + updater._prune_orphan_nodes() + + external_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_ORPHAN_EXTERNAL_MODULES + ] + assert len(external_calls) == 1 + + def test_prune_skips_other_projects( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [ + [{"path": "app.py", "absolute_path": "/other/project/app.py"}], + [{"path": "app.py", "qualified_name": "other_project.app"}], + [{"path": "data", "absolute_path": "/other/project/data"}], + ] + updater._prune_orphan_nodes() + + path_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] + in (cs.CYPHER_DELETE_FILE, cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FOLDER) + ] + assert path_deletes == [] + + def test_prune_no_orphans_skips_deletes( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + project_name = py_project.resolve().name + repo_abs = py_project.resolve().as_posix() + mock_ingestor.fetch_all.side_effect = [ + [{"path": "module_a.py", "absolute_path": f"{repo_abs}/module_a.py"}], + [{"path": "module_a.py", "qualified_name": f"{project_name}.module_a"}], + [{"path": "subpkg", "absolute_path": f"{repo_abs}/subpkg"}], + ] + updater._prune_orphan_nodes() + + path_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] + in (cs.CYPHER_DELETE_FILE, cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FOLDER) + ] + assert path_deletes == [] + + def test_prune_handles_empty_graph( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + mock_ingestor.fetch_all.side_effect = [[], [], []] + updater._prune_orphan_nodes() + + path_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] + in (cs.CYPHER_DELETE_FILE, cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FOLDER) + ] + assert path_deletes == [] + + def test_prune_handles_none_path_gracefully( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + project_name = py_project.resolve().name + mock_ingestor.fetch_all.side_effect = [ + [{"path": None, "absolute_path": None}], + [ + {"path": None, "qualified_name": f"{project_name}.something"}, + {"path": "module_a.py", "qualified_name": f"{project_name}.module_a"}, + ], + [], + ] + updater._prune_orphan_nodes() + + path_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] + in (cs.CYPHER_DELETE_FILE, cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FOLDER) + ] + assert path_deletes == [] + + def test_prune_multiple_orphans_across_types( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + project_name = py_project.resolve().name + repo_abs = py_project.resolve().as_posix() + mock_ingestor.fetch_all.side_effect = [ + [ + {"path": "gone.py", "absolute_path": f"{repo_abs}/gone.py"}, + {"path": "module_a.py", "absolute_path": f"{repo_abs}/module_a.py"}, + ], + [ + { + "path": "deleted.py", + "qualified_name": f"{project_name}.deleted", + }, + { + "path": "module_a.py", + "qualified_name": f"{project_name}.module_a", + }, + ], + [ + {"path": "old_dir", "absolute_path": f"{repo_abs}/old_dir"}, + {"path": "subpkg", "absolute_path": f"{repo_abs}/subpkg"}, + ], + ] + updater._prune_orphan_nodes() + + path_deletes = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] + in (cs.CYPHER_DELETE_FILE, cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FOLDER) + ] + assert len(path_deletes) == 3 + + def test_prune_skips_inline_module_synthetic_paths( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + project_name = py_project.resolve().name + inline_path_tests = f"{cs.INLINE_MODULE_PATH_PREFIX}tests" + inline_path_macos = f"{cs.INLINE_MODULE_PATH_PREFIX}macos" + mock_ingestor.fetch_all.side_effect = [ + [], + [ + { + "path": inline_path_tests, + "qualified_name": f"{project_name}.src.app.tests", + }, + { + "path": inline_path_tests, + "qualified_name": f"{project_name}.src.cli.tests", + }, + { + "path": inline_path_macos, + "qualified_name": f"{project_name}.src.clipboard.macos", + }, + ], + [], + ] + updater._prune_orphan_nodes() + + delete_module_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_MODULE + ] + assert delete_module_calls == [] + + +class TestCypherDeleteModuleQuery: + def test_query_does_not_traverse_calls_edges(self) -> None: + query = cs.CYPHER_DELETE_MODULE + assert "-[*0..]->" not in query + assert "-[*]->" not in query + + def test_query_constrains_traversal_to_containment_edges(self) -> None: + query = cs.CYPHER_DELETE_MODULE + assert "DEFINES" in query + assert "CALLS" not in query + assert "IMPORTS" not in query + assert "INHERITS" not in query + + +class TestDeletedFileInProcessFiles: + def test_deleted_file_triggers_cypher_delete( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + updater.run(force=True) + mock_ingestor.execute_write.reset_mock() + + (py_project / "module_b.py").unlink() + updater.run(force=False) + + delete_module_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_MODULE + ] + delete_file_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_FILE + ] + assert len(delete_module_calls) >= 1 + assert len(delete_file_calls) >= 1 + + def test_no_deletes_when_no_files_removed( + self, py_project: Path, mock_ingestor: MagicMock + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + updater.run(force=True) + mock_ingestor.execute_write.reset_mock() + + updater.run(force=False) + + delete_calls = [ + c + for c in mock_ingestor.execute_write.call_args_list + if c.args[0] in (cs.CYPHER_DELETE_MODULE, cs.CYPHER_DELETE_FILE) + ] + assert len(delete_calls) == 0 + + @patch("codebase_rag.graph_updater.GraphUpdater._prune_orphan_nodes") + def test_run_calls_prune( + self, + mock_prune: MagicMock, + py_project: Path, + mock_ingestor: MagicMock, + ) -> None: + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=py_project, + parsers=parsers, + queries=queries, + ) + + updater.run(force=True) + mock_prune.assert_called_once() diff --git a/codebase_rag/tests/test_handler_registry.py b/codebase_rag/tests/test_handler_registry.py index 2a9215755..6b7259f18 100644 --- a/codebase_rag/tests/test_handler_registry.py +++ b/codebase_rag/tests/test_handler_registry.py @@ -9,6 +9,7 @@ from codebase_rag.parsers.handlers.java import JavaHandler from codebase_rag.parsers.handlers.js_ts import JsTsHandler from codebase_rag.parsers.handlers.lua import LuaHandler +from codebase_rag.parsers.handlers.php import PhpHandler from codebase_rag.parsers.handlers.python import PythonHandler from codebase_rag.parsers.handlers.rust import RustHandler @@ -47,8 +48,12 @@ def test_returns_base_handler_for_go(self) -> None: assert isinstance(handler, BaseLanguageHandler) assert type(handler) is BaseLanguageHandler - def test_returns_base_handler_for_php(self) -> None: + def test_returns_php_handler_for_php(self) -> None: handler = get_handler(SupportedLanguage.PHP) + assert isinstance(handler, PhpHandler) + + def test_returns_base_handler_for_c(self) -> None: + handler = get_handler(SupportedLanguage.C) assert isinstance(handler, BaseLanguageHandler) assert type(handler) is BaseLanguageHandler @@ -84,6 +89,7 @@ class TestHandlerProtocol: SupportedLanguage.PYTHON, SupportedLanguage.GO, SupportedLanguage.PHP, + SupportedLanguage.C, ], ) def test_handler_has_all_protocol_methods( @@ -114,6 +120,8 @@ def test_handler_has_all_protocol_methods( SupportedLanguage.JAVA, SupportedLanguage.LUA, SupportedLanguage.PYTHON, + SupportedLanguage.PHP, + SupportedLanguage.C, ], ) def test_handler_methods_are_callable(self, language: SupportedLanguage) -> None: @@ -151,3 +159,6 @@ def test_lua_handler_extends_base(self) -> None: def test_python_handler_extends_base(self) -> None: assert issubclass(PythonHandler, BaseLanguageHandler) + + def test_php_handler_extends_base(self) -> None: + assert issubclass(PhpHandler, BaseLanguageHandler) diff --git a/codebase_rag/tests/test_handlers_unit.py b/codebase_rag/tests/test_handlers_unit.py index a9391ecde..f34d42d86 100644 --- a/codebase_rag/tests/test_handlers_unit.py +++ b/codebase_rag/tests/test_handlers_unit.py @@ -13,6 +13,7 @@ from codebase_rag.parsers.handlers.java import JavaHandler from codebase_rag.parsers.handlers.js_ts import JsTsHandler from codebase_rag.parsers.handlers.lua import LuaHandler +from codebase_rag.parsers.handlers.php import PhpHandler from codebase_rag.parsers.handlers.python import PythonHandler from codebase_rag.parsers.handlers.rust import RustHandler from codebase_rag.tests.conftest import create_mock_node @@ -62,6 +63,13 @@ except ImportError: LUA_AVAILABLE = False +try: + import tree_sitter_php as tsphp + + PHP_AVAILABLE = True +except ImportError: + PHP_AVAILABLE = False + @pytest.fixture def js_parser() -> Parser | None: @@ -111,6 +119,14 @@ def lua_parser() -> Parser | None: return Parser(language) +@pytest.fixture +def php_parser() -> Parser | None: + if not PHP_AVAILABLE: + return None + language = Language(tsphp.language_php()) + return Parser(language) + + class TestBaseLanguageHandler: def test_is_inside_method_with_object_literals_returns_false(self) -> None: handler = BaseLanguageHandler() @@ -1105,3 +1121,168 @@ def test_extract_decorators_dataclass_with_options( result = handler.extract_decorators(class_node) assert result == ["@dataclass(frozen=True, slots=True)"] + + +def _find_php_node(root: ASTNode, node_type: str) -> ASTNode | None: + if root.type == node_type: + return root + for child in root.children: + if result := _find_php_node(child, node_type): + return result + return None + + +@pytest.mark.skipif(not PHP_AVAILABLE, reason="tree-sitter-php not available") +class TestPhpHandler: + def test_extract_function_name_from_function_definition( + self, php_parser: Parser + ) -> None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" 2;" + tree = php_parser.parse(code) + arrow_node = _find_php_node(tree.root_node, cs.TS_PHP_ARROW_FUNCTION) + assert arrow_node is not None + + result = handler.extract_function_name(arrow_node) + assert result is not None + assert result.startswith("arrow_") + + def test_is_class_method_inside_class(self, php_parser: Parser) -> None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b' None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b" None: + handler = PhpHandler() + code = b' None: + def raise_operational_error(**_: object) -> object: + raise mgclient.OperationalError("connection refused") + + monkeypatch.setattr(mgclient, "connect", raise_operational_error) + + result = HealthChecker().check_memgraph_connection() + + assert result.passed is False diff --git a/codebase_rag/tests/test_higher_order_calls.py b/codebase_rag/tests/test_higher_order_calls.py new file mode 100644 index 000000000..0382842b4 --- /dev/null +++ b/codebase_rag/tests/test_higher_order_calls.py @@ -0,0 +1,119 @@ +# (H) L3 finding from the evals/ harness: a function passed as an argument and +# (H) invoked via a parameter name (extract_decorators_func(node) inside +# (H) ingest_method) or handed to an eager higher-order builtin (sorted(..., +# (H) key=_start_byte_key)). The traced CALLS edge points from the function that +# (H) actually invokes the callable: the callee for a parameter it calls, the +# (H) enclosing function for a synchronous builtin. Sibling-class methods of the +# (H) same name make the callback targets ambiguous so trie uniqueness cannot +# (H) accidentally mask a real miss. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """def helper(node): + return node + + +def keyfn(n): + return n.start + + +def apply_cb(cb, value): + return cb(value) + + +def driver(items): + return apply_cb(helper, items) + + +def do_sort(items): + return sorted(items, key=keyfn) + + +class Other: + def helper(self) -> int: + return 1 + + def keyfn(self) -> int: + return 2 +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestHigherOrderCalls: + def test_callable_parameter_resolves_to_argument_at_call_site( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.apply_cb", "proj.m.helper") in calls, calls + + def test_callback_attributed_to_invoking_callee_not_caller( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + # (H) driver passes helper but never invokes it; apply_cb does. + assert ("proj.m.driver", "proj.m.helper") not in calls, calls + + def test_callable_parameter_prefers_module_function_over_sibling_method( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.apply_cb", "proj.m.Other.helper") not in calls, calls + + def test_sorted_key_attributed_to_enclosing_function(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.do_sort", "proj.m.keyfn") in calls, calls + + def test_normal_call_edge_to_callee_still_present(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.driver", "proj.m.apply_cb") in calls, calls diff --git a/codebase_rag/tests/test_image_paths.py b/codebase_rag/tests/test_image_paths.py deleted file mode 100644 index 8daeba0db..000000000 --- a/codebase_rag/tests/test_image_paths.py +++ /dev/null @@ -1,160 +0,0 @@ -from pathlib import Path - -import pytest - -from codebase_rag.main import ( - _find_image_paths, - _get_path_variants, - _handle_chat_images, - _replace_path_in_question, -) - - -class TestFindImagePaths: - def test_finds_png_path(self) -> None: - question = "What is in this image /home/user/screenshot.png please analyze" - result = _find_image_paths(question) - assert result == [Path("/home/user/screenshot.png")] - - def test_finds_jpg_path(self) -> None: - question = "Look at /tmp/photo.jpg" - result = _find_image_paths(question) - assert result == [Path("/tmp/photo.jpg")] - - def test_finds_jpeg_path(self) -> None: - question = "Check /var/images/pic.jpeg" - result = _find_image_paths(question) - assert result == [Path("/var/images/pic.jpeg")] - - def test_finds_gif_path(self) -> None: - question = "Analyze /home/user/animation.gif" - result = _find_image_paths(question) - assert result == [Path("/home/user/animation.gif")] - - def test_finds_multiple_images(self) -> None: - question = "Compare /img/a.png and /img/b.jpg" - result = _find_image_paths(question) - assert result == [Path("/img/a.png"), Path("/img/b.jpg")] - - def test_case_insensitive_extension(self) -> None: - question = "Look at /path/IMAGE.PNG and /path/photo.JPG" - result = _find_image_paths(question) - assert len(result) == 2 - assert Path("/path/IMAGE.PNG") in result - assert Path("/path/photo.JPG") in result - - def test_ignores_relative_paths(self) -> None: - question = "Check images/photo.png and ./local/pic.jpg" - result = _find_image_paths(question) - assert result == [] - - def test_ignores_non_image_extensions(self) -> None: - question = "Look at /path/document.pdf and /path/code.py" - result = _find_image_paths(question) - assert result == [] - - def test_empty_question(self) -> None: - result = _find_image_paths("") - assert result == [] - - def test_no_paths(self) -> None: - question = "What is the meaning of life?" - result = _find_image_paths(question) - assert result == [] - - def test_handles_quoted_paths(self) -> None: - question = 'Look at "/path/with spaces/image.png"' - result = _find_image_paths(question) - assert result == [Path("/path/with spaces/image.png")] - - -class TestGetPathVariants: - def test_returns_four_variants(self) -> None: - result = _get_path_variants("/path/to/file.png") - assert len(result) == 4 - - def test_includes_escaped_spaces(self) -> None: - result = _get_path_variants("/path/with spaces/file.png") - assert r"/path/with\ spaces/file.png" in result - - def test_includes_single_quoted(self) -> None: - result = _get_path_variants("/path/to/file.png") - assert "'/path/to/file.png'" in result - - def test_includes_double_quoted(self) -> None: - result = _get_path_variants("/path/to/file.png") - assert '"/path/to/file.png"' in result - - def test_includes_original(self) -> None: - path = "/path/to/file.png" - result = _get_path_variants(path) - assert path in result - - -class TestReplacePathInQuestion: - def test_replaces_simple_path(self) -> None: - question = "Look at /old/path.png please" - result = _replace_path_in_question(question, "/old/path.png", "/new/path.png") - assert result == "Look at /new/path.png please" - - def test_replaces_quoted_path(self) -> None: - question = "Look at '/old/path.png' please" - result = _replace_path_in_question(question, "/old/path.png", "/new/path.png") - assert result == "Look at '/new/path.png' please" - - def test_replaces_double_quoted_path(self) -> None: - question = 'Look at "/old/path.png" please' - result = _replace_path_in_question(question, "/old/path.png", "/new/path.png") - assert result == 'Look at "/new/path.png" please' - - def test_returns_original_if_not_found(self) -> None: - question = "No path here" - result = _replace_path_in_question(question, "/missing.png", "/new.png") - assert result == question - - -class TestHandleChatImages: - @pytest.fixture - def temp_project(self, tmp_path: Path) -> Path: - return tmp_path - - @pytest.fixture - def temp_image(self, tmp_path: Path) -> Path: - img_path = tmp_path / "test_image.png" - img_path.write_bytes(b"fake png content") - return img_path - - def test_no_images_returns_unchanged(self, temp_project: Path) -> None: - question = "What is 2 + 2?" - result = _handle_chat_images(question, temp_project) - assert result == question - - def test_copies_image_to_tmp(self, temp_project: Path, temp_image: Path) -> None: - question = f"Look at {temp_image}" - result = _handle_chat_images(question, temp_project) - - assert ".tmp" in result - assert "test_image.png" in result - - tmp_dir = temp_project / ".tmp" - assert tmp_dir.exists() - copied_files = list(tmp_dir.glob("*test_image.png")) - assert len(copied_files) == 1 - - def test_handles_nonexistent_image(self, temp_project: Path) -> None: - question = "Look at /nonexistent/image.png" - result = _handle_chat_images(question, temp_project) - assert result == question - - def test_handles_multiple_images(self, temp_project: Path) -> None: - img1 = temp_project / "img1.png" - img2 = temp_project / "img2.jpg" - img1.write_bytes(b"png1") - img2.write_bytes(b"jpg2") - - question = f"Compare {img1} and {img2}" - result = _handle_chat_images(question, temp_project) - - assert ".tmp" in result - assert "img1.png" in result - assert "img2.jpg" in result diff --git a/codebase_rag/tests/test_import_parsing.py b/codebase_rag/tests/test_import_parsing.py index 318b146e3..2091d4195 100644 --- a/codebase_rag/tests/test_import_parsing.py +++ b/codebase_rag/tests/test_import_parsing.py @@ -475,3 +475,103 @@ def test_internal_import_matched_with_dot_separator( assert result == "myapp.utils.Helper" assert len(mock_ingestor.nodes_created) == 0 + + +class TestIsLocalModuleCache: + def test_is_local_module_cache_returns_correct_result(self, tmp_path: Path) -> None: + (tmp_path / "utils").mkdir() + (tmp_path / "utils" / "__init__.py").touch() + + processor = ImportProcessor( + repo_path=tmp_path, + project_name="myproject", + ingestor=None, + function_registry=None, + ) + + assert processor._is_local_module("utils") is True + assert processor._is_local_module("nonexistent") is False + + def test_is_local_module_cache_hits_on_repeated_calls(self, tmp_path: Path) -> None: + (tmp_path / "models").mkdir() + (tmp_path / "models" / "__init__.py").touch() + + processor = ImportProcessor( + repo_path=tmp_path, + project_name="myproject", + ingestor=None, + function_registry=None, + ) + + processor._is_local_module("models") + processor._is_local_module("models") + processor._is_local_module("models") + + info = processor._is_local_module_cached.cache_info() + assert info.hits >= 2 + assert info.misses == 1 + + def test_is_local_module_detects_py_file(self, tmp_path: Path) -> None: + (tmp_path / "helpers.py").touch() + + processor = ImportProcessor( + repo_path=tmp_path, + project_name="myproject", + ingestor=None, + function_registry=None, + ) + + assert processor._is_local_module("helpers") is True + + def test_is_local_module_detects_directory(self, tmp_path: Path) -> None: + (tmp_path / "services").mkdir() + + processor = ImportProcessor( + repo_path=tmp_path, + project_name="myproject", + ingestor=None, + function_registry=None, + ) + + assert processor._is_local_module("services") is True + + def test_is_local_java_import_cache_hits(self, tmp_path: Path) -> None: + (tmp_path / "com").mkdir() + + processor = ImportProcessor( + repo_path=tmp_path, + project_name="myproject", + ingestor=None, + function_registry=None, + ) + + processor._is_local_java_import("com.example.Service") + processor._is_local_java_import("com.example.Service") + processor._is_local_java_import("com.example.Service") + + info = processor._is_local_java_import_cached.cache_info() + assert info.hits >= 2 + assert info.misses == 1 + + def test_separate_instances_have_independent_caches(self, tmp_path: Path) -> None: + (tmp_path / "shared").mkdir() + + p1 = ImportProcessor( + repo_path=tmp_path, + project_name="project1", + ingestor=None, + function_registry=None, + ) + p2 = ImportProcessor( + repo_path=tmp_path, + project_name="project2", + ingestor=None, + function_registry=None, + ) + + p1._is_local_module("shared") + p1._is_local_module("shared") + + info2 = p2._is_local_module_cached.cache_info() + assert info2.hits == 0 + assert info2.misses == 0 diff --git a/codebase_rag/tests/test_import_resolution_eval.py b/codebase_rag/tests/test_import_resolution_eval.py new file mode 100644 index 000000000..12b6b0227 --- /dev/null +++ b/codebase_rag/tests/test_import_resolution_eval.py @@ -0,0 +1,68 @@ +from pathlib import Path + +from evals import constants as ec +from evals.import_resolution import ( + cgr_import_deps, + oracle_import_deps, + score_import_deps, +) + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "helper.py").write_text("def thing():\n return 1\n", encoding="utf-8") + (root / "sibling.py").write_text("x = 1\n", encoding="utf-8") + (root / "m.py").write_text( + "import os\n" + "import numpy.linalg\n" + "from collections import OrderedDict\n" + "from proj.helper import thing\n" + "from . import sibling\n", + encoding="utf-8", + ) + + +def test_oracle_classifies_internal_and_external(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + deps = oracle_import_deps(src, "proj") + + # (H) stdlib and third-party are external, keyed by top-level package. + assert ("m.py", "os", True) in deps + assert ("m.py", "numpy", True) in deps + assert ("m.py", "collections", True) in deps + # (H) absolute and relative first-party imports are internal (top == project). + assert ("m.py", "proj", False) in deps + # (H) a first-party import is never marked external. + assert ("m.py", "proj", True) not in deps + + +def test_oracle_excludes_future_pseudo_import(tmp_path: Path) -> None: + # (H) `from __future__ import ...` is a compiler directive, not a dependency; + # (H) cgr rightly ignores it, so the oracle must too or it reports false misses. + src = tmp_path / "proj" + src.mkdir() + (src / "__init__.py").write_text("", encoding="utf-8") + (src / "f.py").write_text("from __future__ import annotations\n", encoding="utf-8") + deps = oracle_import_deps(src, "proj") + assert all(top != "__future__" for (_f, top, _e) in deps) + + +def test_cgr_matches_oracle_on_clean_repo(tmp_path: Path) -> None: + # (H) On an unambiguous repo cgr's import classification should equal the + # (H) oracle: every stdlib/third-party import external, every project import + # (H) internal. + src = tmp_path / "proj" + _make_repo(src) + assert cgr_import_deps(src, "proj") == oracle_import_deps(src, "proj") + + +def test_score_flags_misclassified_internal_as_external() -> None: + oracle = {("m.py", "proj", False), ("m.py", "os", True)} + # (H) cgr wrongly marks the first-party import external (issue #498 shape). + cgr = {("m.py", "proj", True), ("m.py", "os", True)} + result = score_import_deps(cgr, oracle) + internal = next(r for r in result.rows if r["label"] == ec.IMPORTS_INTERNAL_LABEL) + assert internal["fn"] == 1 + assert internal["recall"] == 0.0 diff --git a/codebase_rag/tests/test_incremental_eval.py b/codebase_rag/tests/test_incremental_eval.py new file mode 100644 index 000000000..10e699c45 --- /dev/null +++ b/codebase_rag/tests/test_incremental_eval.py @@ -0,0 +1,230 @@ +import json +import os +import shutil +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals.cgr_graph import _StatefulIngestor +from evals.incremental import ( + compare_states, + run_neutral_edit_scenario, + snapshot, +) + +_MODULE = cs.NodeLabel.MODULE.value +_FILE = cs.NodeLabel.FILE.value +_FUNCTION = cs.NodeLabel.FUNCTION.value +_QN = cs.KEY_QUALIFIED_NAME +_DEFINES = cs.RelationshipType.DEFINES.value +_CALLS = cs.RelationshipType.CALLS.value +_IMPORTS = cs.RelationshipType.IMPORTS.value +_CONTAINS_FILE = cs.RelationshipType.CONTAINS_FILE.value + +# (H) The inbound call edge issue #532 drops: caller.use() calls callee.target(). +_INBOUND_CALL = (_FUNCTION, "proj.caller.use", _CALLS, _FUNCTION, "proj.callee.target") + + +def _node(store: _StatefulIngestor, label: str, **props: object) -> None: + store.ensure_node_batch(label, props) + + +def _module_subtree() -> _StatefulIngestor: + # (H) Two modules: callee.py defines target(); caller.py defines use() which + # (H) CALLS target(). Mirrors the real graph shape captured from cgr. + s = _StatefulIngestor() + _node(s, _MODULE, qualified_name="proj.callee", path="callee.py") + _node(s, _FUNCTION, qualified_name="proj.callee.target", path="callee.py") + _node(s, _MODULE, qualified_name="proj.caller", path="caller.py") + _node(s, _FUNCTION, qualified_name="proj.caller.use", path="caller.py") + s.ensure_relationship_batch( + (_MODULE, _QN, "proj.callee"), _DEFINES, (_FUNCTION, _QN, "proj.callee.target") + ) + s.ensure_relationship_batch( + (_MODULE, _QN, "proj.caller"), _DEFINES, (_FUNCTION, _QN, "proj.caller.use") + ) + s.ensure_relationship_batch( + (_FUNCTION, _QN, "proj.caller.use"), + _CALLS, + (_FUNCTION, _QN, "proj.callee.target"), + ) + return s + + +class TestStatefulStore: + def test_detach_delete_module_removes_subtree_and_incident_edges(self) -> None: + s = _module_subtree() + s.execute_write(cs.CYPHER_DELETE_MODULE, {cs.KEY_PATH: "callee.py"}) + + assert (_MODULE, "proj.callee") not in s.nodes + assert (_FUNCTION, "proj.callee.target") not in s.nodes + # (H) The caller subtree is untouched. + assert (_FUNCTION, "proj.caller.use") in s.nodes + # (H) DETACH removes the inbound CALLS edge incident on the deleted target. + assert not any(e[2] == _CALLS for e in s.edges) + # (H) The caller's own DEFINES edge survives. + assert any(e[2] == _DEFINES and e[1] == "proj.caller" for e in s.edges) + + def test_delete_file_detaches(self) -> None: + s = _StatefulIngestor() + _node(s, _FILE, path="callee.py") + _node(s, _MODULE, qualified_name="proj", path="x") + s.ensure_relationship_batch( + (_MODULE, _QN, "proj"), _CONTAINS_FILE, (_FILE, cs.KEY_PATH, "callee.py") + ) + s.execute_write(cs.CYPHER_DELETE_FILE, {cs.KEY_PATH: "callee.py"}) + + assert (_FILE, "callee.py") not in s.nodes + assert all(e[4] != "callee.py" for e in s.edges) + + def test_fetch_all_excludes_external_modules(self) -> None: + s = _StatefulIngestor() + _node(s, _FILE, path="a.py", absolute_path="/x/a.py") + _node(s, _MODULE, qualified_name="proj.a", path="a.py") + _node(s, _MODULE, qualified_name="ext", path="ext", is_external=True) + + files = s.fetch_all(cs.CYPHER_ALL_FILE_PATHS) + assert {r[cs.KEY_PATH] for r in files} == {"a.py"} + mods = s.fetch_all(cs.CYPHER_ALL_MODULE_PATHS_INTERNAL) + assert {r[cs.KEY_PATH] for r in mods} == {"a.py"} + + def test_delete_orphan_external_modules(self) -> None: + s = _StatefulIngestor() + _node(s, _MODULE, qualified_name="ext.orphan", path="ext", is_external=True) + _node(s, _MODULE, qualified_name="ext.used", path="ext2", is_external=True) + _node(s, _MODULE, qualified_name="proj.m", path="m.py") + s.ensure_relationship_batch( + (_MODULE, _QN, "proj.m"), _IMPORTS, (_MODULE, _QN, "ext.used") + ) + s.execute_write(cs.CYPHER_DELETE_ORPHAN_EXTERNAL_MODULES) + + assert (_MODULE, "ext.orphan") not in s.nodes + assert (_MODULE, "ext.used") in s.nodes + + def test_edges_are_deduped(self) -> None: + s = _StatefulIngestor() + spec = (_MODULE, _QN, "proj") + s.ensure_relationship_batch(spec, _DEFINES, spec) + s.ensure_relationship_batch(spec, _DEFINES, spec) + assert len(s.edges) == 1 + + +@pytest.fixture(scope="module") +def parsers_queries() -> tuple[object, object]: + return load_parsers() + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "callee.py").write_text("def target():\n return 1\n", encoding="utf-8") + (root / "caller.py").write_text( + "from proj.callee import target\n\n\ndef use():\n return target()\n", + encoding="utf-8", + ) + + +class TestIncrementalScenario: + def test_clean_reindex_sees_inbound_call( + self, tmp_path: Path, parsers_queries: tuple[object, object] + ) -> None: + # (H) Baseline: a clean forced index resolves caller.use -> callee.target. + src = tmp_path / "proj" + _make_repo(src) + parsers, queries = parsers_queries + work = tmp_path / "work" + shutil.copytree(src, work) + store = _StatefulIngestor() + from codebase_rag.graph_updater import GraphUpdater + + GraphUpdater( + ingestor=store, + repo_path=work, + parsers=parsers, + queries=queries, + project_name="proj", + ).run(force=True) + assert _INBOUND_CALL in snapshot(store).edges + + def test_incremental_preserves_inbound_call_editing_callee( + self, tmp_path: Path, parsers_queries: tuple[object, object] + ) -> None: + # (H) Issue #532: editing the callee deletes its module subtree (and the + # (H) inbound CALLS incident on it). The fix must rebuild that inbound edge + # (H) from the unchanged caller, so the incremental graph equals a clean + # (H) re-index. + src = tmp_path / "proj" + _make_repo(src) + parsers, queries = parsers_queries + incr, clean = run_neutral_edit_scenario( + src, "proj", "callee.py", parsers, queries, tmp_path / "scn" + ) + assert _INBOUND_CALL in clean.edges + assert _INBOUND_CALL in incr.edges + assert incr == clean + + def test_incremental_preserves_cross_file_call_editing_caller( + self, tmp_path: Path, parsers_queries: tuple[object, object] + ) -> None: + # (H) Editing the caller must rebuild its outbound call to the unchanged + # (H) callee. This requires the function registry to know definitions in + # (H) unchanged files (rehydrated from the persisted graph), not just the + # (H) changed file. + src = tmp_path / "proj" + _make_repo(src) + parsers, queries = parsers_queries + incr, clean = run_neutral_edit_scenario( + src, "proj", "caller.py", parsers, queries, tmp_path / "scn" + ) + assert _INBOUND_CALL in clean.edges + assert _INBOUND_CALL in incr.edges + assert incr == clean + + def test_baseline_index_ignores_preexisting_cache( + self, tmp_path: Path, parsers_queries: tuple[object, object] + ) -> None: + # (H) The real cgr source carries its own .cgr-hash-cache.json from prior + # (H) indexing. If the scenario copies it, a future-dated cache makes every + # (H) file look unchanged and the baseline index skips them, so the diff + # (H) against a clean re-index becomes meaningless. The runner must purge + # (H) any copied cache so the baseline is a true full index. + src = tmp_path / "proj" + _make_repo(src) + (src / "other.py").write_text("def helper():\n return 2\n", encoding="utf-8") + cache = src / cs.HASH_CACHE_FILENAME + cache.write_text( + json.dumps( + { + "callee.py": "x", + "caller.py": "x", + "other.py": "x", + "__init__.py": "x", + } + ), + encoding="utf-8", + ) + (src / cs.DIR_MTIMES_FILENAME).write_text( + json.dumps({cs.ROOT_DIR_KEY: 0.0}), encoding="utf-8" + ) + future = max(p.stat().st_mtime for p in src.glob("*.py")) + 1000 + os.utime(cache, (future, future)) + + parsers, queries = parsers_queries + incr, clean = run_neutral_edit_scenario( + src, "proj", "callee.py", parsers, queries, tmp_path / "scn" + ) + # (H) other.py was never edited; it must still be indexed in the baseline. + assert (_FUNCTION, "proj.other.helper") in clean.nodes + assert (_FUNCTION, "proj.other.helper") in incr.nodes + + def test_compare_states_flags_missing_edge(self) -> None: + from evals.types_defs import GraphState + + clean = GraphState(frozenset({("Module", "proj")}), frozenset({_INBOUND_CALL})) + incr = GraphState(frozenset({("Module", "proj")}), frozenset()) + result = compare_states(incr, clean) + calls_row = next(r for r in result.rows if r["label"] == _CALLS) + assert calls_row["fn"] == 1 + assert calls_row["recall"] == 0.0 diff --git a/codebase_rag/tests/test_inheritance_eval.py b/codebase_rag/tests/test_inheritance_eval.py new file mode 100644 index 000000000..716b27270 --- /dev/null +++ b/codebase_rag/tests/test_inheritance_eval.py @@ -0,0 +1,74 @@ +from pathlib import Path + +from evals import constants as ec +from evals.inheritance import ( + CgrResult, + OracleResult, + cgr_inheritance, + oracle_inheritance, + score_inheritance, +) + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "base.py").write_text( + "class Animal:\n" + " def speak(self):\n" + " return 1\n" + " def move(self):\n" + " return 2\n", + encoding="utf-8", + ) + (root / "derived.py").write_text( + "from proj.base import Animal\n\n\n" + "class Dog(Animal):\n" + " def speak(self):\n" + " return 3\n" + " def fetch(self):\n" + " return 4\n", + encoding="utf-8", + ) + + +def test_oracle_resolves_inherits_and_overrides(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + oracle = oracle_inheritance(src, "proj") + + assert ("proj.derived.Dog", "proj.base.Animal") in oracle.inherits + # (H) speak is redefined in Dog and exists in the base -> an override. + assert ("proj.derived.Dog", "proj.base.Animal", "speak") in oracle.overrides + # (H) fetch is new (not in base); move is inherited (not redefined). Neither + # (H) is an override. + assert ("proj.derived.Dog", "proj.base.Animal", "fetch") not in oracle.overrides + assert all(m != "move" for (_c, _b, m) in oracle.overrides) + # (H) Dog is single-base and top-level, so it is eligible for override grading. + assert "proj.derived.Dog" in oracle.override_scope + + +def test_cgr_matches_oracle_on_clean_hierarchy(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + result = score_inheritance( + cgr_inheritance(src, "proj"), oracle_inheritance(src, "proj") + ) + assert all(row["fp"] == 0 and row["fn"] == 0 for row in result.rows) + + +def test_score_flags_missing_override() -> None: + oracle = OracleResult( + inherits={("proj.derived.Dog", "proj.base.Animal")}, + overrides={("proj.derived.Dog", "proj.base.Animal", "speak")}, + top_classes=frozenset({"proj.derived.Dog", "proj.base.Animal"}), + override_scope=frozenset({"proj.derived.Dog"}), + ) + cgr = CgrResult( + inherits={("proj.derived.Dog", "proj.base.Animal")}, + overrides=set(), + ) + result = score_inheritance(cgr, oracle) + overrides = next(r for r in result.rows if r["label"] == ec.OVERRIDES_LABEL) + assert overrides["fn"] == 1 + assert overrides["recall"] == 0.0 diff --git a/codebase_rag/tests/test_inherits_attribute_base.py b/codebase_rag/tests/test_inherits_attribute_base.py new file mode 100644 index 000000000..f057758cf --- /dev/null +++ b/codebase_rag/tests/test_inherits_attribute_base.py @@ -0,0 +1,85 @@ +# (H) L2 finding from the evals/ harness: cgr captured INHERITS for direct-name +# (H) bases (class C(Base)) but dropped attribute-style bases (class C(mod.Base), +# (H) e.g. class UniXcoder(nn.Module)). Those inheritance edges must be captured. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "inhproj" + +MODULE_SRC = """from collections import abc + + +class C(abc.Mapping): + pass +""" + +_RelTuple = tuple[str, PropertyValue, str, str, PropertyValue] + + +class _Capture: + def __init__(self) -> None: + self.nodes: dict[tuple[str, PropertyValue], PropertyDict] = {} + self.rels: list[_RelTuple] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[cs.NODE_UNIQUE_CONSTRAINTS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append( + ( + str(from_spec[0]), + from_spec[2], + str(rel_type), + str(to_spec[0]), + to_spec[2], + ) + ) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _build(tmp_path: Path) -> _Capture: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return cap + + +class TestInheritsAttributeBase: + def test_attribute_base_class_creates_inherits_edge(self, tmp_path: Path) -> None: + cap = _build(tmp_path) + targets = [ + str(target).rsplit(cs.SEPARATOR_DOT, 1)[-1] + for (_fl, from_val, rel_type, _tl, target) in cap.rels + if rel_type == cs.RelationshipType.INHERITS and str(from_val).endswith(".C") + ] + assert targets == ["Mapping"], targets diff --git a/codebase_rag/tests/test_instance_attr_type_inference.py b/codebase_rag/tests/test_instance_attr_type_inference.py new file mode 100644 index 000000000..d28e30319 --- /dev/null +++ b/codebase_rag/tests/test_instance_attr_type_inference.py @@ -0,0 +1,111 @@ +# (H) L3 finding from the evals/ harness: a method calls self.attr.method(), but the +# (H) type of self.attr is only knowable from the __init__ assignment in the same +# (H) class. cgr scanned only the calling method for self-assignments, so the type +# (H) was unknown and an ambiguous bare name resolved to the wrong global. Instance +# (H) attributes assigned in __init__ must be visible to every method of the class. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """def run() -> str: + return "global" + + +def status() -> str: + return "globalprop" + + +class Helper: + def run(self) -> str: + return "real" + + @property + def status(self) -> str: + return "ok" + + +class App: + def __init__(self) -> None: + self.helper = Helper() + + def go(self) -> str: + return self.helper.run() + + def check(self) -> str: + return self.helper.status +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestInstanceAttrTypeInference: + def test_method_call_resolves_via_init_attribute_type(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.App.go", "proj.m.Helper.run") in calls, calls + + def test_ambiguous_method_does_not_resolve_to_module_function( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.App.go", "proj.m.run") not in calls, calls + + def test_property_access_resolves_via_init_attribute_type( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.App.check", "proj.m.Helper.status") in calls, calls + + def test_property_access_not_resolved_to_module_function( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.App.check", "proj.m.status") not in calls, calls diff --git a/codebase_rag/tests/test_instantiation_eval.py b/codebase_rag/tests/test_instantiation_eval.py new file mode 100644 index 000000000..b8cef70e5 --- /dev/null +++ b/codebase_rag/tests/test_instantiation_eval.py @@ -0,0 +1,72 @@ +from pathlib import Path + +from evals import constants as ec +from evals.instantiation import ( + cgr_instantiations, + oracle_instantiations, + score_instantiations, +) + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "w.py").write_text( + "class Widget:\n def __init__(self):\n pass\n\n\nclass Unused:\n pass\n", + encoding="utf-8", + ) + (root / "u.py").write_text( + "from proj.w import Widget\n\n\ndef build():\n return Widget()\n", + encoding="utf-8", + ) + (root / "n.py").write_text( + "from proj.w import Widget\n\nALIAS = Widget\n", + encoding="utf-8", + ) + + +def test_oracle_captures_constructor_calls(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + deps = oracle_instantiations(src, "proj") + + # (H) build() constructs Widget(). + assert ("u.py", "Widget") in deps + # (H) n.py only aliases Widget, never calls it -> not an instantiation. + assert ("n.py", "Widget") not in deps + # (H) Unused is never constructed anywhere. + assert all(name != "Unused" for (_f, name) in deps) + + +def test_oracle_excludes_externally_shadowed_constructor(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + # (H) s.py imports a third-party Widget that shadows the first-party class of + # (H) the same name and constructs it. cgr resolves the import and emits no + # (H) first-party INSTANTIATES, so the oracle must not record one either, or + # (H) it reports a false missing edge and unfairly lowers cgr recall. + (src / "s.py").write_text( + "from external_lib import Widget\n\n\ndef build():\n return Widget()\n", + encoding="utf-8", + ) + deps = oracle_instantiations(src, "proj") + + assert ("s.py", "Widget") not in deps + # (H) The first-party import + construct in u.py still counts. + assert ("u.py", "Widget") in deps + # (H) cgr agrees: no first-party instantiation recorded for the shadowed call. + assert cgr_instantiations(src, "proj") == deps + + +def test_cgr_matches_oracle_on_clean_repo(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + assert cgr_instantiations(src, "proj") == oracle_instantiations(src, "proj") + + +def test_score_computes_prf() -> None: + oracle = {("u.py", "Widget"), ("x.py", "Thing")} + cgr = {("u.py", "Widget")} + result = score_instantiations(cgr, oracle) + row = next(r for r in result.rows if r["label"] == ec.INSTANTIATES_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 0, 1) diff --git a/codebase_rag/tests/test_interprocedural_callback_flow.py b/codebase_rag/tests/test_interprocedural_callback_flow.py new file mode 100644 index 000000000..6369ad22a --- /dev/null +++ b/codebase_rag/tests/test_interprocedural_callback_flow.py @@ -0,0 +1,94 @@ +# (H) L3 finding from the evals/ harness: extract_java_interface_names invokes a +# (H) resolve_to_qn callback that is threaded through extract_implemented_interfaces from +# (H) a caller that passes self._resolve_to_qn. The concrete callable is bound at the +# (H) outer call site and flows through pass-through parameters to where it is finally +# (H) invoked, so resolving the edge needs inter-procedural callback propagation. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + # (H) extract_names invokes the callback; extract_interfaces only passes it through. + "pkg/extract.py": ( + "def extract_names(node, out, scope, resolve_to_qn):\n" + ' out.append(resolve_to_qn("x", scope))\n\n\n' + "def extract_interfaces(node, scope, resolve_to_qn):\n" + " out = []\n" + " extract_names(node, out, scope, resolve_to_qn)\n" + " return out\n" + ), + "pkg/driver.py": ( + "from .extract import extract_interfaces\n\n\n" + "class Driver:\n" + " def resolve(self, name, scope):\n" + " return name\n\n" + " def run(self, node):\n" + ' return extract_interfaces(node, "s", self.resolve)\n' + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestInterproceduralCallbackFlow: + def test_callback_propagates_through_passthrough_param( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.extract.extract_names", + "proj.pkg.driver.Driver.resolve", + ) in calls, calls diff --git a/codebase_rag/tests/test_java_call_caller_qn.py b/codebase_rag/tests/test_java_call_caller_qn.py new file mode 100644 index 000000000..319680e6d --- /dev/null +++ b/codebase_rag/tests/test_java_call_caller_qn.py @@ -0,0 +1,65 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make_file(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "T.java").write_text( + "class T {\n" + " int helper() { return 1; }\n" + " int caller() { return this.helper(); }\n" + "}\n", + encoding="utf-8", + ) + + +def test_java_method_caller_qn_carries_signature(tmp_path: Path) -> None: + # (H) The definition pass registers a Java method node with its parameter + # (H) signature (demo.T.T.caller()), but the call pass built the caller qn + # (H) without it (demo.T.T.caller) -> the CALLS from-endpoint matched no node + # (H) and the edge would not attach in Memgraph. The caller qn must carry the + # (H) same signature as the registered Method node. + _make_file(tmp_path) + ingestor = _capture(tmp_path, "demo") + node_qns = {str(uid) for (_label, uid) in ingestor.nodes} + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + + assert "demo.T.T.caller()" in node_qns + assert ("demo.T.T.caller()", "demo.T.T.helper()") in calls + assert ("demo.T.T.caller", "demo.T.T.helper()") not in calls + + +def _make_cross_package(root: Path) -> None: + (root / "pkga").mkdir(parents=True, exist_ok=True) + (root / "pkgb").mkdir(parents=True, exist_ok=True) + (root / "pkgb" / "T.java").write_text( + "package pkgb;\npublic class T {\n public static int make() { return 1; }\n}\n", + encoding="utf-8", + ) + # (H) Use references bare `T.make()` with NO import; in Java this only compiles + # (H) for a same-package or imported T, never a class in another package. + (root / "pkga" / "Use.java").write_text( + "package pkga;\nclass Use {\n int run() { return T.make(); }\n}\n", + encoding="utf-8", + ) + + +def test_java_unimported_cross_package_static_call_does_not_resolve( + tmp_path: Path, +) -> None: + # (H) A bare class-name receiver with no import must not resolve to a class in + # (H) a different package (directory): the same-package fallback is exhausted, + # (H) so leave the receiver unlinked rather than emit a wrong cross-package edge. + _make_cross_package(tmp_path) + ingestor = _capture(tmp_path, "demo") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + assert not any(to_val.endswith("T.make()") for _f, to_val in calls) diff --git a/codebase_rag/tests/test_java_containment_oracle.py b/codebase_rag/tests/test_java_containment_oracle.py new file mode 100644 index 000000000..297e7ffea --- /dev/null +++ b/codebase_rag/tests/test_java_containment_oracle.py @@ -0,0 +1,70 @@ +# (H) Covers Java containment-edge validation: cgr's DEFINES (file module -> +# (H) every named type, including nested) and DEFINES_METHOD (class/interface/ +# (H) enum -> method) edges are graded against the independent JDK Compiler Tree +# (H) API oracle, joined on (kind, file, line). Exercises an interface method, an +# (H) enum method, and a nested class (cgr keeps type containment flat). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_java_graph +from evals.oracles import java_available, run_java_oracle +from evals.score import score_edge_types + +JAVA_SRC = """\ +package demo; + +public interface Shape { + double area(); +} + +public enum Color { + RED, GREEN; + public int rank() { return 1; } +} + +public class Point implements Shape { + private int x; + public double area() { return 1.0; } + + public static class Inner { + public void helper() {} + } +} +""" + + +def _require_java() -> None: + if not java_available(): + pytest.skip("java toolchain not available") + if cs.SupportedLanguage.JAVA not in load_parsers()[0]: + pytest.skip("java parser not available") + + +def test_cgr_matches_jdk_oracle_on_containment_edges(tmp_path: Path) -> None: + _require_java() + project = tmp_path / "java_edge_test" + project.mkdir() + (project / "Demo.java").write_text(JAVA_SRC, encoding="utf-8") + + cgr = extract_cgr_java_graph(project, project.name) + oracle = run_java_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_java_field_access_chains.py b/codebase_rag/tests/test_java_field_access_chains.py new file mode 100644 index 000000000..55a1d0791 --- /dev/null +++ b/codebase_rag/tests/test_java_field_access_chains.py @@ -0,0 +1,389 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import tree_sitter_java as tsjava +from tree_sitter import Language, Node, Parser + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.parsers.java.utils import extract_class_info +from codebase_rag.tests.conftest import get_relationships + + +def _call_targets(mock_ingestor: MagicMock) -> set[str]: + return {c.args[2][2] for c in get_relationships(mock_ingestor, "CALLS")} + + +def _class_node(java_source: str) -> Node: + tree = Parser(Language(tsjava.language())).parse(java_source.encode()) + + def walk(node: Node) -> Node | None: + if node.type == "class_declaration": + return node + for child in node.children: + if found := walk(child): + return found + return None + + found = walk(tree.root_node) + assert found is not None + return found + + +def _run(project_path: Path, mock_ingestor: MagicMock) -> None: + parsers, queries = load_parsers() + GraphUpdater( + ingestor=mock_ingestor, + repo_path=project_path, + parsers=parsers, + queries=queries, + ).run() + + +def test_mixed_field_access_then_method_resolves( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class Engine { public void start() { System.out.println("started"); } } +class Car { public Engine engine = new Engine(); } +public class Main { + public static void main(String[] args) { + Car obj = new Car(); + obj.engine.start(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".Engine.start()") for t in targets), ( + f"obj.engine.start() should resolve to Engine.start(); got {sorted(targets)}" + ) + + +def test_multilevel_field_access_then_method_resolves( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class User { public Address address = new Address(); } +public class Main { + public static void main(String[] args) { + User obj = new User(); + obj.address.city.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"obj.address.city.ping() should resolve to City.ping(); got {sorted(targets)}" + ) + + +def test_nested_field_access_type_inference_via_var( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class User { public Address address = new Address(); } +public class Main { + public static void main(String[] args) { + User obj = new User(); + var c = obj.address.city; + c.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"var c = obj.address.city; c.ping() should resolve to City.ping(); " + f"got {sorted(targets)}" + ) + + +def test_this_rooted_nested_field_access_via_var( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +public class Container { + public Address address = new Address(); + public void run() { + var c = this.address.city; + c.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"var c = this.address.city; c.ping() should resolve to City.ping(); " + f"got {sorted(targets)}" + ) + + +def test_super_rooted_nested_field_access_via_var( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class Base { public Address address = new Address(); } +public class Derived extends Base { + public void run() { + var c = super.address.city; + c.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"var c = super.address.city; c.ping() should resolve to City.ping(); " + f"got {sorted(targets)}" + ) + + +def test_inherited_field_chain_via_this( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class Base { public Address address = new Address(); } +public class Derived extends Base { + public void run() { + var c = this.address.city; + c.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"this.address.city (address inherited from Base) should resolve to " + f"City.ping(); got {sorted(targets)}" + ) + + +def test_inherited_field_chain_via_object( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class Base { public Address address = new Address(); } +class Derived extends Base {} +public class Main { + public static void main(String[] args) { + Derived obj = new Derived(); + obj.address.city.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"obj.address.city (address inherited from Base) should resolve to " + f"City.ping(); got {sorted(targets)}" + ) + + +def test_direct_this_field_chain_method_call_multiclass( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class Aardvark { public void unused() {} } +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +public class Container { + public Address address = new Address(); + public void run() { + this.address.city.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"direct this.address.city.ping() in a multi-class file should resolve to " + f"City.ping(); got {sorted(targets)}" + ) + + +def test_direct_super_field_chain_method_call_multiclass( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class Aardvark { public void unused() {} } +class Other {} +class Wrong extends Other { public void unused() {} } +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class Base { public Address address = new Address(); } +public class Derived extends Base { + public void run() { + super.address.city.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"direct super.address.city.ping() in a multi-class file should resolve to " + f"City.ping(); got {sorted(targets)}" + ) + + +def test_scoped_superclass_extraction_keeps_actual_class() -> None: + nested = extract_class_info(_class_node("class Child extends Outer.Base {}")) + assert nested.get("superclass") == "Outer.Base", ( + f"scoped superclass should keep the full name, not the outer/package " + f"segment; got {nested.get('superclass')!r}" + ) + + qualified = extract_class_info(_class_node("class Child extends pkg.Base {}")) + assert qualified.get("superclass") == "pkg.Base", ( + f"package-qualified superclass should keep the full name; " + f"got {qualified.get('superclass')!r}" + ) + + simple = extract_class_info(_class_node("class Child extends Base {}")) + assert simple.get("superclass") == "Base" + + +def test_inherited_field_chain_via_nested_superclass( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class Outer { + static class Base { public Address address = new Address(); } +} +public class Child extends Outer.Base { + public void run() { + this.address.city.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"this.address.city with a same-file nested superclass (Outer.Base) should " + f"resolve to City.ping(); got {sorted(targets)}" + ) + + +def test_super_rooted_chain_with_nested_superclass( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "proj" + (project / "src").mkdir(parents=True) + (project / "src" / "Main.java").write_text( + """ +class City { public void ping() { System.out.println("ping"); } } +class Address { public City city = new City(); } +class Outer { + static class Base { public Address address = new Address(); } +} +public class Child extends Outer.Base { + public void run() { + var c = super.address.city; + c.ping(); + } +} +""", + encoding="utf-8", + ) + _run(project, mock_ingestor) + + targets = _call_targets(mock_ingestor) + assert any(t.endswith(".City.ping()") for t in targets), ( + f"super.address.city with a nested superclass (Outer.Base) should resolve to " + f"City.ping(); got {sorted(targets)}" + ) + + +def test_generic_scoped_superclass_extraction() -> None: + generic_scoped = extract_class_info( + _class_node("class Child extends Outer.Base {}") + ) + assert generic_scoped.get("superclass") == "Outer.Base", ( + f"generic scoped superclass should extract the base name; " + f"got {generic_scoped.get('superclass')!r}" + ) + + generic_simple = extract_class_info( + _class_node("class Child extends Box {}") + ) + assert generic_simple.get("superclass") == "Box", ( + f"generic superclass should extract the base name; " + f"got {generic_simple.get('superclass')!r}" + ) diff --git a/codebase_rag/tests/test_java_inheritance_edges.py b/codebase_rag/tests/test_java_inheritance_edges.py new file mode 100644 index 000000000..9c293833f --- /dev/null +++ b/codebase_rag/tests/test_java_inheritance_edges.py @@ -0,0 +1,59 @@ +# (H) Java inheritance edges. cgr captured a class's `extends`/`implements` but +# (H) missed two cases: an interface's `extends` superinterfaces (-> INHERITS) +# (H) and an enum's `implements` interfaces (-> IMPLEMENTS). Both clauses carry a +# (H) type_list of interface names that were never extracted. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import RelationshipType +from codebase_rag.tests.conftest import create_and_run_updater, get_relationships + +_JAVA = """\ +package demo; + +public interface A {} +public interface B {} +public interface Big extends A, B {} + +abstract class Base {} +enum Color implements A { RED } + +class Circle extends Base implements A, B {} + +class Holder extends Box implements Comparable {} +""" + + +def _pairs(mock_ingestor: MagicMock, rel: str) -> set[tuple[str, str]]: + # (H) (source_qn, target_qn) for the given relationship. + return { + (call[0][0][2], call[0][2][2]) for call in get_relationships(mock_ingestor, rel) + } + + +def test_java_inheritance_and_implements_edges( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "java_inh" + project.mkdir() + (project / "Demo.java").write_text(_JAVA, encoding="utf-8") + create_and_run_updater(project, mock_ingestor, skip_if_missing="java") + + inherits = _pairs(mock_ingestor, RelationshipType.INHERITS.value) + implements = _pairs(mock_ingestor, RelationshipType.IMPLEMENTS.value) + base = "java_inh.Demo" + + # (H) Interface extends -> INHERITS to each superinterface. + assert (f"{base}.Big", f"{base}.A") in inherits, inherits + assert (f"{base}.Big", f"{base}.B") in inherits, inherits + # (H) Enum implements -> IMPLEMENTS. + assert (f"{base}.Color", f"{base}.A") in implements, implements + # (H) Class extends/implements (already worked) stay intact. + assert (f"{base}.Circle", f"{base}.Base") in inherits, inherits + assert (f"{base}.Circle", f"{base}.A") in implements, implements + assert (f"{base}.Circle", f"{base}.B") in implements, implements + # (H) Generic (parameterized) bases must be captured by their base type. + assert (f"{base}.Holder", f"{base}.Box") in inherits, inherits + assert (f"{base}.Holder", f"{base}.Comparable") in implements, implements diff --git a/codebase_rag/tests/test_java_inheritance_oracle.py b/codebase_rag/tests/test_java_inheritance_oracle.py new file mode 100644 index 000000000..65b8c2f42 --- /dev/null +++ b/codebase_rag/tests/test_java_inheritance_oracle.py @@ -0,0 +1,59 @@ +# (H) Covers Java inheritance-edge validation: cgr's INHERITS (class/interface +# (H) extends) and IMPLEMENTS (class/enum implements) edges are graded against the +# (H) JDK Compiler Tree API oracle, by (source node, base SIMPLE NAME). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_java_graph +from evals.oracles import java_available, run_java_oracle +from evals.score import score_name_edge_types + +JAVA_SRC = """\ +package demo; + +public interface A {} +public interface B {} +public interface Big extends A, B {} + +abstract class Base {} +enum Color implements A { RED } + +class Circle extends Base implements A, B {} +""" + + +def _require_java() -> None: + if not java_available(): + pytest.skip("java toolchain not available") + if cs.SupportedLanguage.JAVA not in load_parsers()[0]: + pytest.skip("java parser not available") + + +def test_cgr_matches_jdk_oracle_on_inheritance_edges(tmp_path: Path) -> None: + _require_java() + project = tmp_path / "java_inh_edge" + project.mkdir() + (project / "Demo.java").write_text(JAVA_SRC, encoding="utf-8") + + cgr = extract_cgr_java_graph(project, project.name) + oracle = run_java_oracle(project) + + result = score_name_edge_types(cgr, oracle, ec.INHERITANCE_NAME_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.INHERITS.value, + cs.RelationshipType.IMPLEMENTS.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_java_label_name_collision.py b/codebase_rag/tests/test_java_label_name_collision.py new file mode 100644 index 000000000..c43702119 --- /dev/null +++ b/codebase_rag/tests/test_java_label_name_collision.py @@ -0,0 +1,314 @@ +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.constants import NODE_UNIQUE_CONSTRAINTS, NodeLabel +from codebase_rag.tests.conftest import ( + get_node_names, + get_nodes, + get_qualified_names, + get_relationships, + run_updater, +) +from codebase_rag.types_defs import NodeType + + +@pytest.fixture +def java_label_collision_project(temp_repo: Path) -> Path: + project_path = temp_repo / "java_label_collision" + project_path.mkdir() + src = project_path / "src" / "main" / "java" / "com" / "example" + src.mkdir(parents=True) + return project_path + + +def _src_dir(project: Path) -> Path: + return project / "src" / "main" / "java" / "com" / "example" + + +def _has_qn_ending(qns: set[str], suffix: str) -> bool: + return any(qn.endswith(suffix) for qn in qns) + + +def test_interface_named_interface_ingested_as_interface_node( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Interface.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public interface Interface { + void doSomething(); +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + interface_nodes = get_nodes(mock_ingestor, NodeType.INTERFACE) + interface_qns = get_qualified_names(interface_nodes) + + assert _has_qn_ending(interface_qns, ".Interface"), ( + f"Interface named 'Interface' not found in Interface nodes. Got: {interface_qns}" + ) + + class_qns = get_node_names(mock_ingestor, NodeType.CLASS) + interface_in_class = [qn for qn in class_qns if qn.endswith(".Interface")] + assert not interface_in_class, ( + f"Interface named 'Interface' should not appear as a Class node. Got: {interface_in_class}" + ) + + +def test_enum_named_enum_ingested_as_enum_node( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Enum.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public enum Enum { + VALUE_A, + VALUE_B, + VALUE_C +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + enum_nodes = get_nodes(mock_ingestor, NodeType.ENUM) + enum_qns = get_qualified_names(enum_nodes) + + assert _has_qn_ending(enum_qns, ".Enum"), ( + f"Enum named 'Enum' not found in Enum nodes. Got: {enum_qns}" + ) + + class_qns = get_node_names(mock_ingestor, NodeType.CLASS) + enum_in_class = [qn for qn in class_qns if qn.endswith(".Enum")] + assert not enum_in_class, ( + f"Enum named 'Enum' should not appear as a Class node. Got: {enum_in_class}" + ) + + +def test_class_named_class_ingested_as_class_node( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Class.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public class Class { + public void run() {} +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + class_nodes = get_nodes(mock_ingestor, NodeType.CLASS) + class_qns = get_qualified_names(class_nodes) + + assert _has_qn_ending(class_qns, ".Class"), ( + f"Class named 'Class' not found in Class nodes. Got: {class_qns}" + ) + + +def test_interface_and_enum_labels_have_constraints() -> None: + assert NodeLabel.INTERFACE in NODE_UNIQUE_CONSTRAINTS, ( + "Interface label missing from NODE_UNIQUE_CONSTRAINTS" + ) + assert NodeLabel.ENUM in NODE_UNIQUE_CONSTRAINTS, ( + "Enum label missing from NODE_UNIQUE_CONSTRAINTS" + ) + assert NODE_UNIQUE_CONSTRAINTS[NodeLabel.INTERFACE] == "qualified_name" + assert NODE_UNIQUE_CONSTRAINTS[NodeLabel.ENUM] == "qualified_name" + + +def test_all_node_labels_have_constraints() -> None: + for label in NodeLabel: + assert label.value in NODE_UNIQUE_CONSTRAINTS, ( + f"NodeLabel.{label.name} ('{label.value}') missing from NODE_UNIQUE_CONSTRAINTS" + ) + + +def test_interface_named_interface_has_defines_relationship( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Interface.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public interface Interface { + void doSomething(); +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + defines_rels = get_relationships(mock_ingestor, "DEFINES") + found_defines = False + for rel in defines_rels: + if len(rel.args) >= 3: + to_spec = rel.args[2] + if isinstance(to_spec, tuple) and len(to_spec) >= 3: + to_label = to_spec[0] + to_qn = str(to_spec[2]) + if to_qn.endswith(".Interface"): + assert to_label == NodeType.INTERFACE, ( + f"DEFINES target label should be 'Interface', got '{to_label}'" + ) + found_defines = True + + assert found_defines, ( + "No DEFINES relationship found for Interface named 'Interface'" + ) + + +def test_enum_named_enum_has_defines_relationship( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Enum.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public enum Enum { + VALUE_A, + VALUE_B +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + defines_rels = get_relationships(mock_ingestor, "DEFINES") + found_defines = False + for rel in defines_rels: + if len(rel.args) >= 3: + to_spec = rel.args[2] + if isinstance(to_spec, tuple) and len(to_spec) >= 3: + to_label = to_spec[0] + to_qn = str(to_spec[2]) + if to_qn.endswith(".Enum"): + assert to_label == NodeType.ENUM, ( + f"DEFINES target label should be 'Enum', got '{to_label}'" + ) + found_defines = True + + assert found_defines, "No DEFINES relationship found for Enum named 'Enum'" + + +def test_class_implementing_interface_named_interface( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Interface.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public interface Interface { + void doSomething(); +} +""", + ) + (src / "Implementor.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public class Implementor implements Interface { + public void doSomething() { + System.out.println("done"); + } +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + interface_qns = get_node_names(mock_ingestor, NodeType.INTERFACE) + assert _has_qn_ending(interface_qns, ".Interface") + + class_qns = get_node_names(mock_ingestor, NodeType.CLASS) + assert _has_qn_ending(class_qns, ".Implementor") + + implements_rels = get_relationships(mock_ingestor, "IMPLEMENTS") + found_implements = False + for rel in implements_rels: + if len(rel.args) >= 3: + from_spec = rel.args[0] + if isinstance(from_spec, tuple) and len(from_spec) >= 3: + from_qn = str(from_spec[2]) + if from_qn.endswith(".Implementor"): + found_implements = True + + assert found_implements, ( + "No IMPLEMENTS relationship found for Implementor -> Interface" + ) + + +def test_multiple_label_colliding_names( + java_label_collision_project: Path, + mock_ingestor: MagicMock, +) -> None: + src = _src_dir(java_label_collision_project) + (src / "Function.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public class Function { + public void execute() {} +} +""", + ) + (src / "Method.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public class Method { + public void invoke() {} +} +""", + ) + (src / "Module.java").write_text( + encoding="utf-8", + data="""\ +package com.example; + +public class Module { + public void load() {} +} +""", + ) + run_updater(java_label_collision_project, mock_ingestor, skip_if_missing="java") + + class_qns = get_node_names(mock_ingestor, NodeType.CLASS) + assert _has_qn_ending(class_qns, ".Function") + assert _has_qn_ending(class_qns, ".Method") + assert _has_qn_ending(class_qns, ".Module") + + function_qns = get_node_names(mock_ingestor, NodeType.FUNCTION) + method_qns = get_node_names(mock_ingestor, NodeType.METHOD) + non_class_qns = function_qns | method_qns + collisions = [ + qn + for qn in non_class_qns + if qn.endswith(".Function") or qn.endswith(".Method") or qn.endswith(".Module") + ] + assert not collisions, ( + f"Class names colliding with node labels should not appear as wrong node types: {collisions}" + ) diff --git a/codebase_rag/tests/test_java_retrieval_eval.py b/codebase_rag/tests/test_java_retrieval_eval.py new file mode 100644 index 000000000..d54594c28 --- /dev/null +++ b/codebase_rag/tests/test_java_retrieval_eval.py @@ -0,0 +1,72 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.java_retrieval import ( + cgr_java_call_edges, + oracle_java_call_edges, + score_java_retrieval, +) +from evals.oracles import java_available + +needs_java = pytest.mark.skipif( + not java_available(), reason="java toolchain not installed" +) + + +def _make_project(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "Util.java").write_text( + "class Util {\n static int free() { return 2; }\n}\n", + encoding="utf-8", + ) + (root / "T.java").write_text( + "class T {\n" + " int helper() { return 1; }\n" + " int caller() { return this.helper(); }\n" + " static T make() { return new T(); }\n" + " int orphan() { return 9; }\n" + "}\n", + encoding="utf-8", + ) + (root / "Use.java").write_text( + "class Use {\n" + " int useIt() {\n" + " T t = T.make();\n" + " return Util.free() + t.caller();\n" + " }\n" + "}\n", + encoding="utf-8", + ) + + +@needs_java +def test_oracle_captures_first_party_java_calls(tmp_path: Path) -> None: + _make_project(tmp_path) + edges, declared = oracle_java_call_edges(tmp_path) + + # (H) this.helper(), T.make(), Util.free(), t.caller() are first-party calls. + assert ("T.java", "helper") in edges + assert ("Use.java", "make") in edges + assert ("Use.java", "free") in edges + assert ("Use.java", "caller") in edges + # (H) orphan is declared but never called -> never a call edge. + assert ("T.java", "orphan") not in edges + assert {"helper", "caller", "make", "free", "orphan", "useIt"} <= declared + + +@needs_java +def test_cgr_matches_oracle_on_clean_java_project(tmp_path: Path) -> None: + _make_project(tmp_path) + oracle, declared = oracle_java_call_edges(tmp_path) + cgr = cgr_java_call_edges(tmp_path, tmp_path.name, declared) + assert cgr == oracle + + +def test_score_java_retrieval_prf() -> None: + result = score_java_retrieval( + {("A.java", "f"), ("A.java", "g")}, {("A.java", "f"), ("B.java", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.JAVA_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_java_span_oracle.py b/codebase_rag/tests/test_java_span_oracle.py new file mode 100644 index 000000000..8ecff7bbb --- /dev/null +++ b/codebase_rag/tests/test_java_span_oracle.py @@ -0,0 +1,74 @@ +# (H) Covers Java node SPAN (end_line) validation: cgr's end_line for each node is +# (H) graded against the JDK Compiler Tree API oracle (which emits each node's +# (H) source end position), joined on (kind, file, start). Exercises a class with a +# (H) multi-line method signature, an interface, an enum, and a nested class so +# (H) spans are not trivially single line. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_java_graph +from evals.oracles import java_available, run_java_oracle +from evals.score import score_span + +JAVA_SRC = """\ +package demo; + +public class Widget implements Shape { + private int size; + + public int area( + int scale + ) { + return this.size * scale; + } + + static class Inner { + int value() { + return 1; + } + } +} + +interface Shape { + int area(int scale); +} + +enum Color { + RED, + GREEN, + BLUE +} +""" + + +def _require_java() -> None: + if not java_available(): + pytest.skip("jdk (javac/java) not available") + if cs.SupportedLanguage.JAVA not in load_parsers()[0]: + pytest.skip("java parser not available") + + +def test_cgr_matches_jdk_oracle_on_node_spans(tmp_path: Path) -> None: + _require_java() + project = tmp_path / "java_span_test" + (project / "demo").mkdir(parents=True) + (project / "demo" / "Widget.java").write_text(JAVA_SRC, encoding="utf-8") + + cgr = extract_cgr_java_graph(project, project.name) + oracle = run_java_oracle(project) + + result = score_span(cgr, oracle, ec.JAVA_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 5, aggregate diff --git a/codebase_rag/tests/test_java_structure_oracle.py b/codebase_rag/tests/test_java_structure_oracle.py new file mode 100644 index 000000000..2c1db2004 --- /dev/null +++ b/codebase_rag/tests/test_java_structure_oracle.py @@ -0,0 +1,72 @@ +# (H) Covers the Java structure oracle harness (evals/oracles/java_oracle + +# (H) evals/java_l1.py): the JDK Compiler Tree API oracle is authoritative ground +# (H) truth, and cgr's captured Java nodes are graded against it on +# (H) (kind, file, start_line). Includes an anonymous class, whose methods cgr +# (H) models as standalone Functions (like JS object-literal methods). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_java_nodes +from evals.oracles import java_available, run_java_oracle +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +JAVA_SRC = """\ +package demo; + +public class Sample { + private int x; + public Sample(int x) { this.x = x; } + public int area() { return x; } + public static Sample make(int x) { return new Sample(x); } + + interface Shape { double area(); } + enum Color { RED, GREEN } + static class Inner { void helper() {} } + + Runnable callback() { + return new Runnable() { + public void run() { helper2(); } + void helper2() {} + }; + } +} + +interface Drawable { void draw(); } + +enum Direction { NORTH, SOUTH } +""" + + +def _require_java() -> None: + if not java_available(): + pytest.skip("javac/java toolchain not available") + if cs.SupportedLanguage.JAVA not in load_parsers()[0]: + pytest.skip("java parser not available") + + +def test_cgr_matches_jdk_oracle_on_java_structure(tmp_path: Path) -> None: + _require_java() + project = tmp_path / "java_oracle_test" + project.mkdir() + (project / "Sample.java").write_text(JAVA_SRC, encoding="utf-8") + + cgr = GraphData( + nodes=extract_cgr_java_nodes(project, project.name), + edges=set(), + name_edges=set(), + ) + oracle = run_java_oracle(project) + + result = score_node_kinds(cgr, oracle, ec.JAVA_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + for label in ("Class", "Interface", "Enum", "Method", "Function"): + row = by_label.get(label) + assert row is not None, (label, by_label) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (label, row) diff --git a/codebase_rag/tests/test_javascript_containment_oracle.py b/codebase_rag/tests/test_javascript_containment_oracle.py new file mode 100644 index 000000000..bc197d92b --- /dev/null +++ b/codebase_rag/tests/test_javascript_containment_oracle.py @@ -0,0 +1,56 @@ +# (H) Covers JavaScript containment-edge validation: cgr's DEFINES (file module +# (H) -> class / top-level function) and DEFINES_METHOD (class -> method) edges +# (H) are graded against the TypeScript-compiler-API oracle run over .js, joined +# (H) on (kind, file, line). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_js_graph +from evals.oracles import run_javascript_oracle, typescript_available +from evals.score import score_edge_types + +JS_SRC = """\ +export class Point { + constructor() { this.x = 0; } + area() { return 1.0; } +} + +export function free() { return 1; } +""" + + +def _require_js() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.JS not in load_parsers()[0]: + pytest.skip("javascript parser not available") + + +def test_cgr_matches_tsc_oracle_on_js_containment_edges(tmp_path: Path) -> None: + _require_js() + project = tmp_path / "js_edge" + project.mkdir() + (project / "lib.js").write_text(JS_SRC, encoding="utf-8") + + cgr = extract_cgr_js_graph(project, project.name) + oracle = run_javascript_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_javascript_span_oracle.py b/codebase_rag/tests/test_javascript_span_oracle.py new file mode 100644 index 000000000..49a64333b --- /dev/null +++ b/codebase_rag/tests/test_javascript_span_oracle.py @@ -0,0 +1,65 @@ +# (H) Covers JavaScript node SPAN (end_line) validation: cgr's end_line for each +# (H) node is graded against the TS-compiler-API oracle run over .js (which emits +# (H) each node's full-span end line), joined on (kind, file, start). Exercises a +# (H) class with a multi-line method signature, a multi-line arrow assigned to a +# (H) const, and a nested arrow so spans are not trivially single line. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_js_graph +from evals.oracles import run_javascript_oracle, typescript_available +from evals.score import score_span + +JS_SRC = """\ +class Widget { + area( + scale, + ) { + return scale; + } +} + +function standalone() { + const cb = (v) => { + return v + 1; + }; + return cb(2); +} + +const arrow = (x) => { + return x * 2; +}; +""" + + +def _require_js() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.JS not in load_parsers()[0]: + pytest.skip("javascript parser not available") + + +def test_cgr_matches_tsc_oracle_on_javascript_node_spans(tmp_path: Path) -> None: + _require_js() + project = tmp_path / "js_span_test" + project.mkdir() + (project / "main.js").write_text(JS_SRC, encoding="utf-8") + + cgr = extract_cgr_js_graph(project, project.name) + oracle = run_javascript_oracle(project) + + result = score_span(cgr, oracle, ec.JS_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 4, aggregate diff --git a/codebase_rag/tests/test_javascript_structure_oracle.py b/codebase_rag/tests/test_javascript_structure_oracle.py new file mode 100644 index 000000000..508326d0d --- /dev/null +++ b/codebase_rag/tests/test_javascript_structure_oracle.py @@ -0,0 +1,57 @@ +# (H) Covers the JavaScript structure oracle harness (evals/oracles/ts_oracle run +# (H) over .js/.jsx + evals/js_l1.py): the TS-compiler-API oracle is authoritative +# (H) ground truth, and cgr's captured JavaScript nodes are graded against it on +# (H) (kind, file, start_line). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_js_nodes +from evals.oracles import run_javascript_oracle, typescript_available +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +JS_SRC = """\ +class Point { + constructor(x) { this.x = x; } + area() { return this.x; } +} + +function freeFn(a) { return a + 1; } +const arrow = (b) => b * 2; +const obj = { method() { return 1; } }; +[1, 2].forEach((n) => freeFn(n)); +""" + + +def _require_js() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.JS not in load_parsers()[0]: + pytest.skip("javascript parser not available") + + +def test_cgr_matches_tsc_oracle_on_javascript_structure(tmp_path: Path) -> None: + _require_js() + project = tmp_path / "js_oracle_test" + project.mkdir() + (project / "app.js").write_text(JS_SRC, encoding="utf-8") + + cgr = GraphData( + nodes=extract_cgr_js_nodes(project, project.name), + edges=set(), + name_edges=set(), + ) + oracle = run_javascript_oracle(project) + + result = score_node_kinds(cgr, oracle, ec.JS_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + for label in ("Class", "Function", "Method"): + row = by_label.get(label) + assert row is not None, (label, by_label) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (label, row) diff --git a/codebase_rag/tests/test_js_ts_utils_integration.py b/codebase_rag/tests/test_js_ts_utils_integration.py index d83ccf4ae..bc50fb53b 100644 --- a/codebase_rag/tests/test_js_ts_utils_integration.py +++ b/codebase_rag/tests/test_js_ts_utils_integration.py @@ -647,6 +647,78 @@ def test_deeply_nested_qn(self) -> None: assert result == "a.b.c.d.e" +@pytest.mark.skipif(not JS_AVAILABLE, reason="tree-sitter-javascript not available") +class TestFindMethodInAstCacheOwnerTracking: + def test_cache_invalidates_on_new_root_node( + self, js_parser: Parser, sample_js_project: Path + ) -> None: + from codebase_rag.parsers.js_ts import utils as js_utils + + tree1 = parse_file(js_parser, sample_js_project / "singleton.js") + root1 = tree1.root_node + result1 = find_method_in_ast(root1, "DatabaseConnection", "getInstance") + assert result1 is not None + owner_after_first = js_utils._CLASS_BODY_CACHE_OWNER + + tree2 = parse_file(js_parser, sample_js_project / "factory.js") + root2 = tree2.root_node + result2 = find_method_in_ast(root2, "Dog", "speak") + assert result2 is not None + owner_after_second = js_utils._CLASS_BODY_CACHE_OWNER + + assert owner_after_first != owner_after_second + + def test_cache_hit_returns_correct_result( + self, js_parser: Parser, sample_js_project: Path + ) -> None: + tree = parse_file(js_parser, sample_js_project / "factory.js") + root = tree.root_node + + result1 = find_method_in_ast(root, "Dog", "speak") + assert result1 is not None + + result2 = find_method_in_ast(root, "Dog", "fetch") + assert result2 is not None + + def test_cache_miss_returns_none( + self, js_parser: Parser, sample_js_project: Path + ) -> None: + tree = parse_file(js_parser, sample_js_project / "factory.js") + root = tree.root_node + + result = find_method_in_ast(root, "NonExistent", "method") + assert result is None + + result2 = find_method_in_ast(root, "NonExistent", "other") + assert result2 is None + + +@pytest.mark.skipif(not JS_AVAILABLE, reason="tree-sitter-javascript not available") +class TestFindReturnStatementsWithLanguageObj: + def test_with_language_obj( + self, js_parser: Parser, sample_js_project: Path + ) -> None: + tree = parse_file(js_parser, sample_js_project / "complex_returns.js") + set_name = find_method_in_ast(tree.root_node, "Builder", "setName") + assert set_name is not None + + language = Language(tsjs.language()) + return_nodes: list = [] + find_return_statements(set_name, return_nodes, language) + assert len(return_nodes) == 1 + + def test_fallback_without_language_obj( + self, js_parser: Parser, sample_js_project: Path + ) -> None: + tree = parse_file(js_parser, sample_js_project / "complex_returns.js") + set_name = find_method_in_ast(tree.root_node, "Builder", "setName") + assert set_name is not None + + return_nodes: list = [] + find_return_statements(set_name, return_nodes, None) + assert len(return_nodes) == 1 + + @pytest.mark.skipif(not TS_AVAILABLE, reason="tree-sitter-typescript not available") class TestTypeScriptIntegration: def test_find_generic_class_methods( diff --git a/codebase_rag/tests/test_js_type_inference_unit.py b/codebase_rag/tests/test_js_type_inference_unit.py index 21e008522..279ac8b7f 100644 --- a/codebase_rag/tests/test_js_type_inference_unit.py +++ b/codebase_rag/tests/test_js_type_inference_unit.py @@ -428,3 +428,89 @@ def test_variable_with_uninferrable_value_is_skipped( ) assert result == {} + + +class TestGetDeclaratorsViaQueryException: + def test_returns_none_when_queries_is_none( + self, + mock_import_processor: MagicMock, + mock_function_registry: MagicMock, + mock_find_method_ast_node: MagicMock, + ) -> None: + engine = JsTypeInferenceEngine( + import_processor=mock_import_processor, + function_registry=mock_function_registry, + project_name="test_project", + find_method_ast_node_func=mock_find_method_ast_node, + queries=None, + ) + root_node = create_mock_node("program", children=[]) + result = engine._get_declarators_via_query( + root_node, # ty: ignore[invalid-argument-type] # (H) MockNode not Node + ) + assert result is None + + def test_exception_in_query_continues_to_next_language( + self, + mock_import_processor: MagicMock, + mock_function_registry: MagicMock, + mock_find_method_ast_node: MagicMock, + ) -> None: + bad_language_obj = MagicMock() + bad_language_obj.side_effect = Exception("bad query") + + queries = { + cs.SupportedLanguage.JS: {"language": bad_language_obj}, + cs.SupportedLanguage.TS: {"language": bad_language_obj}, + } + + engine = JsTypeInferenceEngine( + import_processor=mock_import_processor, + function_registry=mock_function_registry, + project_name="test_project", + find_method_ast_node_func=mock_find_method_ast_node, + queries=queries, + ) + root_node = create_mock_node("program", children=[]) + result = engine._get_declarators_via_query( + root_node, # ty: ignore[invalid-argument-type] # (H) MockNode not Node + ) + assert result is None + + +class TestGetLanguageObj: + def test_returns_none_when_queries_is_none( + self, + mock_import_processor: MagicMock, + mock_function_registry: MagicMock, + mock_find_method_ast_node: MagicMock, + ) -> None: + engine = JsTypeInferenceEngine( + import_processor=mock_import_processor, + function_registry=mock_function_registry, + project_name="test_project", + find_method_ast_node_func=mock_find_method_ast_node, + queries=None, + ) + result = engine._get_language_obj() + assert result is None + + def test_returns_language_when_available( + self, + mock_import_processor: MagicMock, + mock_function_registry: MagicMock, + mock_find_method_ast_node: MagicMock, + ) -> None: + lang_obj = MagicMock() + queries = { + cs.SupportedLanguage.JS: {"language": lang_obj}, + } + engine = JsTypeInferenceEngine( + import_processor=mock_import_processor, + function_registry=mock_function_registry, + project_name="test_project", + find_method_ast_node_func=mock_find_method_ast_node, + queries=queries, + ) + result = engine._get_language_obj() + assert result is lang_obj diff --git a/codebase_rag/tests/test_l3_decorator_normalization.py b/codebase_rag/tests/test_l3_decorator_normalization.py new file mode 100644 index 000000000..a2e105398 --- /dev/null +++ b/codebase_rag/tests/test_l3_decorator_normalization.py @@ -0,0 +1,77 @@ +# (H) Covers the L3 eval harness (evals/calls_trace.py): a call to a functools.wraps +# (H) decorated function dispatches through the decorator's generic wrapper at runtime, +# (H) but cgr's static graph resolves the call to the function itself. The trace must +# (H) attribute the wrapper frame to the wrapped function so the two agree. +from __future__ import annotations + +import importlib.util +import textwrap +from pathlib import Path + +from evals.calls_trace import trace_calls + +MOD_SRC = textwrap.dedent( + """ + from functools import wraps + + + def guard(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) + + return wrapper + + + def helper(): + return 1 + + + @guard + def target_fn(): + return helper() + + + def caller(): + return target_fn() + """ +) + + +def _load_module(mod_path: Path): + spec = importlib.util.spec_from_file_location("evaltest_decorator_mod", mod_path) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _trace(tmp_path: Path) -> set[tuple[str, str]]: + pkg = tmp_path / "pkgx" + pkg.mkdir() + (pkg / "__init__.py").write_text("") + mod_path = pkg / "mod.py" + mod_path.write_text(MOD_SRC) + module = _load_module(mod_path) + return trace_calls(module.caller, pkg, "pkgx") + + +class TestDecoratorWrapperNormalization: + def test_call_attributed_to_wrapped_function_not_wrapper( + self, tmp_path: Path + ) -> None: + edges = _trace(tmp_path) + assert ("pkgx.mod.caller", "pkgx.mod.target_fn") in edges, edges + + def test_no_generic_wrapper_node_appears(self, tmp_path: Path) -> None: + edges = _trace(tmp_path) + wrapper_edges = [ + (frm, to) + for frm, to in edges + if frm.endswith("wrapper") or to.endswith("wrapper") + ] + assert wrapper_edges == [], wrapper_edges + + def test_wrapped_function_body_calls_are_preserved(self, tmp_path: Path) -> None: + edges = _trace(tmp_path) + assert ("pkgx.mod.target_fn", "pkgx.mod.helper") in edges, edges diff --git a/codebase_rag/tests/test_language_node_coverage.py b/codebase_rag/tests/test_language_node_coverage.py index 74648125f..7ee255693 100644 --- a/codebase_rag/tests/test_language_node_coverage.py +++ b/codebase_rag/tests/test_language_node_coverage.py @@ -3,8 +3,8 @@ import pytest from codebase_rag.constants import ( + C_EXTENSIONS, CPP_EXTENSIONS, - CS_EXTENSIONS, GO_EXTENSIONS, JAVA_EXTENSIONS, JS_EXTENSIONS, @@ -60,8 +60,8 @@ def test_each_language_has_file_extensions(self, lang: SupportedLanguage) -> Non (SupportedLanguage.GO, GO_EXTENSIONS), (SupportedLanguage.SCALA, SCALA_EXTENSIONS), (SupportedLanguage.JAVA, JAVA_EXTENSIONS), + (SupportedLanguage.C, C_EXTENSIONS), (SupportedLanguage.CPP, CPP_EXTENSIONS), - (SupportedLanguage.CSHARP, CS_EXTENSIONS), (SupportedLanguage.PHP, PHP_EXTENSIONS), (SupportedLanguage.LUA, LUA_EXTENSIONS), ] @@ -87,11 +87,11 @@ def test_language_spec_has_correct_extensions( (".go", SupportedLanguage.GO), (".scala", SupportedLanguage.SCALA), (".java", SupportedLanguage.JAVA), + (".c", SupportedLanguage.C), (".cpp", SupportedLanguage.CPP), (".h", SupportedLanguage.CPP), (".hpp", SupportedLanguage.CPP), (".cc", SupportedLanguage.CPP), - (".cs", SupportedLanguage.CSHARP), (".php", SupportedLanguage.PHP), (".lua", SupportedLanguage.LUA), ] diff --git a/codebase_rag/tests/test_llm_service_unit.py b/codebase_rag/tests/test_llm_service_unit.py index 74127c7f5..4fc69287d 100644 --- a/codebase_rag/tests/test_llm_service_unit.py +++ b/codebase_rag/tests/test_llm_service_unit.py @@ -231,12 +231,13 @@ def test_creates_agent_with_tools( mock_agent.return_value = MagicMock() tools = [MagicMock(), MagicMock()] - result = create_rag_orchestrator(tools) + agent, system_prompt = create_rag_orchestrator(tools) mock_agent.assert_called_once() call_kwargs = mock_agent.call_args.kwargs assert call_kwargs["tools"] == tools - assert result is not None + assert agent is not None + assert system_prompt == "System prompt" @patch("codebase_rag.services.llm.settings") @patch("codebase_rag.services.llm.get_provider_from_config") diff --git a/codebase_rag/tests/test_local_alias_calls.py b/codebase_rag/tests/test_local_alias_calls.py new file mode 100644 index 000000000..017638524 --- /dev/null +++ b/codebase_rag/tests/test_local_alias_calls.py @@ -0,0 +1,90 @@ +# (H) L3 finding from the evals/ harness: a function bound to a local variable and +# (H) then called through that alias (g = self._method; g()) runs the aliased +# (H) callable at runtime, but cgr saw a bare-name call that resolved to nothing. +# (H) A call through a local alias must produce a CALLS edge to the aliased target. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """class Engine: + def run(self) -> str: + do = self._start + return do() + + def _start(self) -> str: + return helper() + + +def helper() -> str: + return "x" + + +def top() -> str: + fn = helper + return fn() +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestLocalAliasCalls: + def test_alias_to_self_method_is_a_call(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.Engine.run", "proj.m.Engine._start") in calls, calls + + def test_alias_to_module_function_is_a_call(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.top", "proj.m.helper") in calls, calls + + def test_direct_call_unaffected(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.Engine._start", "proj.m.helper") in calls, calls diff --git a/codebase_rag/tests/test_local_alias_chain_resolution.py b/codebase_rag/tests/test_local_alias_chain_resolution.py new file mode 100644 index 000000000..a2c964507 --- /dev/null +++ b/codebase_rag/tests/test_local_alias_chain_resolution.py @@ -0,0 +1,96 @@ +# (H) L3 finding from the evals/ harness: CallProcessor._ingest_function_calls does +# (H) `registry = resolver.function_registry` (resolver = self._resolver) then +# (H) `qn in registry`, dispatching to FunctionRegistryTrie.__contains__. Resolving it +# (H) needs local-variable aliasing (local = self.attr) plus cross-class attribute-chain +# (H) typing (local2 = local.attr) so the operand's concrete type is known. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/registry.py": ( + "class Registry:\n def __contains__(self, key):\n return True\n" + ), + "pkg/resolver.py": ( + "from .registry import Registry\n\n\n" + "class Resolver:\n" + " def __init__(self) -> None:\n" + " self.registry = Registry()\n" + ), + "pkg/proc.py": ( + "from .resolver import Resolver\n\n\n" + "class Proc:\n" + " def __init__(self) -> None:\n" + " self._resolver = Resolver()\n\n" + " def run(self, qn):\n" + " resolver = self._resolver\n" + " registry = resolver.registry\n" + " return qn in registry\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestLocalAliasChainResolution: + def test_local_alias_attribute_chain_dispatches_to_dunder( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.proc.Proc.run", + "proj.pkg.registry.Registry.__contains__", + ) in calls, calls diff --git a/codebase_rag/tests/test_lua_containment_oracle.py b/codebase_rag/tests/test_lua_containment_oracle.py new file mode 100644 index 000000000..1d517b8ba --- /dev/null +++ b/codebase_rag/tests/test_lua_containment_oracle.py @@ -0,0 +1,56 @@ +# (H) Covers Lua containment-edge validation. Lua has no classes/methods, so the +# (H) only containment edge is DEFINES: the file module DEFINES top-level +# (H) functions, and a function DEFINES the functions nested in its body. Graded +# (H) against the independent luaparse oracle, joined on (kind, file, line). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_lua_graph +from evals.oracles import lua_oracle_available, run_lua_oracle +from evals.score import score_edge_types + +LUA_SRC = """\ +local function freeFn(a) + return a + 1 +end + +function globalFn() + local function nested() + return 1 + end + return nested +end + +local cb = function(x) return x end +""" + + +def _require_lua() -> None: + if not lua_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.LUA not in load_parsers()[0]: + pytest.skip("lua parser not available") + + +def test_cgr_matches_luaparse_oracle_on_containment_edges(tmp_path: Path) -> None: + _require_lua() + project = tmp_path / "lua_edge" + project.mkdir() + (project / "lib.lua").write_text(LUA_SRC, encoding="utf-8") + + cgr = extract_cgr_lua_graph(project, project.name) + oracle = run_lua_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + # (H) Lua only has DEFINES (no methods, so no DEFINES_METHOD row at all). + row = by_label.get(cs.RelationshipType.DEFINES.value) + assert row is not None, (by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (row, result.diff) + assert cs.RelationshipType.DEFINES_METHOD.value not in by_label, by_label diff --git a/codebase_rag/tests/test_lua_modern_features.py b/codebase_rag/tests/test_lua_modern_features.py index 0cf6003a4..a9e84265f 100644 --- a/codebase_rag/tests/test_lua_modern_features.py +++ b/codebase_rag/tests/test_lua_modern_features.py @@ -621,9 +621,23 @@ def test_lua_54_enhanced_stdlib(temp_repo: Path, mock_ingestor: MagicMock) -> No assert expected_fn in fn_qns, f"Missing function: {expected_fn}" calls_rels = get_relationships(mock_ingestor, "CALLS") - - assert len(calls_rels) >= 10, ( - f"Expected at least 10 CALLS, got {len(calls_rels)}" + call_edges = {(c.args[0][2], c.args[2][2]) for c in calls_rels} + + # (H) stdlib calls (math.*, string.*, table.*, io.*, os.*) are not + # (H) first-party, so the only CALLS edges are between StdLib methods: + # (H) run_all_tests fans out to the six test_* methods, and the + # (H) top-level `StdLib.run_all_tests()` in main.lua is attributed to + # (H) the main module (not duplicated onto every nested call site). + run_all = f"{stdlib_qn}.StdLib.run_all_tests" + main_qn = f"{project.name}.main" + for method in expected_functions: + if method == run_all: + continue + assert (run_all, method) in call_edges, ( + f"Missing CALLS edge {run_all} -> {method}" + ) + assert (main_qn, run_all) in call_edges, ( + f"Missing module-level CALLS edge {main_qn} -> {run_all}" ) print("✅ Lua 5.4 enhanced standard library test PASSED") diff --git a/codebase_rag/tests/test_lua_retrieval_eval.py b/codebase_rag/tests/test_lua_retrieval_eval.py new file mode 100644 index 000000000..f99bb6c25 --- /dev/null +++ b/codebase_rag/tests/test_lua_retrieval_eval.py @@ -0,0 +1,87 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.lua_retrieval import ( + cgr_lua_call_edges, + oracle_lua_call_edges, + score_lua_retrieval, +) +from evals.oracles import lua_oracle_available + +needs_node = pytest.mark.skipif( + not lua_oracle_available(), reason="node toolchain not installed" +) + + +def _make_project(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "mod.lua").write_text( + "local M = {}\n" + "local function local_add(a, b) return a + b end\n" + "function M.mul(a, b) return a * b end\n" + "function M.use(x, y) return local_add(x, y) + M.mul(x, y) end\n" + "function M.orphan() return 9 end\n" + "return M\n", + encoding="utf-8", + ) + (root / "main.lua").write_text( + "local mod = require('mod')\n" + "local function compute(x, y) return mod.use(x, y) end\n" + "local r = compute(4, 2)\n", + encoding="utf-8", + ) + + +@needs_node +def test_oracle_captures_first_party_lua_calls(tmp_path: Path) -> None: + _make_project(tmp_path) + edges, declared = oracle_lua_call_edges(tmp_path) + + # (H) local_add(), M.mul() (in M.use), mod.use() (in compute), compute() + # (H) (top level) are first-party calls reduced to their simple names. + assert ("mod.lua", "local_add") in edges + assert ("mod.lua", "mul") in edges + assert ("main.lua", "use") in edges + assert ("main.lua", "compute") in edges + # (H) orphan is declared but never called -> never a call edge. + assert ("mod.lua", "orphan") not in edges + assert {"local_add", "mul", "use", "orphan", "compute"} <= declared + + +@needs_node +def test_cgr_matches_oracle_on_clean_lua_project(tmp_path: Path) -> None: + _make_project(tmp_path) + oracle, declared = oracle_lua_call_edges(tmp_path) + cgr = cgr_lua_call_edges(tmp_path, tmp_path.name, declared) + assert cgr == oracle + + +@needs_node +def test_cgr_resolves_function_expression_body_calls(tmp_path: Path) -> None: + # (H) A function expression bound to a table field (`M.runner = function()...`) + # (H) is named by cgr's definition pass (qn M.runner), so the calls in its body + # (H) must attribute to that node. cgr previously skipped the whole body because + # (H) the call pass could not derive the caller name from the nameless function + # (H) expression (same family as the JS/TS arrow-caller gap). + tmp_path.mkdir(parents=True, exist_ok=True) + (tmp_path / "fnexpr.lua").write_text( + "local M = {}\n" + "local function target() return 1 end\n" + "M.runner = function() return target() end\n" + "return M\n", + encoding="utf-8", + ) + oracle, declared = oracle_lua_call_edges(tmp_path) + assert ("fnexpr.lua", "target") in oracle + cgr = cgr_lua_call_edges(tmp_path, tmp_path.name, declared) + assert ("fnexpr.lua", "target") in cgr + + +def test_score_lua_retrieval_prf() -> None: + result = score_lua_retrieval( + {("a.lua", "f"), ("a.lua", "g")}, {("a.lua", "f"), ("b.lua", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.LUA_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_lua_span_oracle.py b/codebase_rag/tests/test_lua_span_oracle.py new file mode 100644 index 000000000..9f70f4641 --- /dev/null +++ b/codebase_rag/tests/test_lua_span_oracle.py @@ -0,0 +1,58 @@ +# (H) Covers Lua node SPAN (end_line) validation: cgr's end_line for each Function +# (H) is graded against the luaparse oracle (which emits node.loc.end.line), joined +# (H) on (kind, file, start). Exercises a global function, a nested function, and a +# (H) multi-line anonymous function expression so spans are not single line. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_lua_graph +from evals.oracles import lua_oracle_available, run_lua_oracle +from evals.score import score_span + +LUA_SRC = """\ +function outer(a, b) + local function inner(x) + return x + 1 + end + return inner(a) + b +end + +local handler = function(v) + return v * 2 +end + +return outer(handler(1), 2) +""" + + +def _require_lua() -> None: + if not lua_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.LUA not in load_parsers()[0]: + pytest.skip("lua parser not available") + + +def test_cgr_matches_luaparse_oracle_on_node_spans(tmp_path: Path) -> None: + _require_lua() + project = tmp_path / "lua_span_test" + project.mkdir() + (project / "lib.lua").write_text(LUA_SRC, encoding="utf-8") + + cgr = extract_cgr_lua_graph(project, project.name) + oracle = run_lua_oracle(project) + + result = score_span(cgr, oracle, ec.LUA_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 3, aggregate diff --git a/codebase_rag/tests/test_lua_structure_oracle.py b/codebase_rag/tests/test_lua_structure_oracle.py new file mode 100644 index 000000000..c30b49f9e --- /dev/null +++ b/codebase_rag/tests/test_lua_structure_oracle.py @@ -0,0 +1,54 @@ +# (H) Covers the Lua structure oracle harness (evals/oracles/lua_oracle + +# (H) evals/lua_l1.py): the luaparse oracle is authoritative ground truth, and +# (H) cgr's captured Lua nodes are graded against it on (kind, file, start_line). +# (H) Lua has no classes, so every function is a Function. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_lua_nodes +from evals.oracles import lua_oracle_available, run_lua_oracle +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +LUA_SRC = """\ +local M = {} +function freeFn(a) return a + 1 end +local function localFn(b) return b end +function M.tableFn(c) return c end +function M:methodFn(d) return d end +local arrow = function(e) return e end +return M +""" + + +def _require_lua() -> None: + if not lua_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.LUA not in load_parsers()[0]: + pytest.skip("lua parser not available") + + +def test_cgr_matches_luaparse_oracle_on_lua_structure(tmp_path: Path) -> None: + _require_lua() + project = tmp_path / "lua_oracle_test" + project.mkdir() + (project / "m.lua").write_text(LUA_SRC, encoding="utf-8") + + cgr = GraphData( + nodes=extract_cgr_lua_nodes(project, project.name), + edges=set(), + name_edges=set(), + ) + oracle = run_lua_oracle(project) + + result = score_node_kinds(cgr, oracle, ec.LUA_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + row = by_label.get(cs.NodeLabel.FUNCTION.value) + assert row is not None, by_label + assert row["precision"] == 1.0 and row["recall"] == 1.0, row diff --git a/codebase_rag/tests/test_mcp_query_and_index.py b/codebase_rag/tests/test_mcp_query_and_index.py index ce9a5ffcd..89cbdc267 100644 --- a/codebase_rag/tests/test_mcp_query_and_index.py +++ b/codebase_rag/tests/test_mcp_query_and_index.py @@ -364,6 +364,77 @@ async def test_sequential_index_only_clears_own_project_data( assert mock_ingestor.delete_project.call_count == 2 +class TestIndexRepositoryConstraintsAndFlush: + """Regression tests for issue #2: MCP indexing produced an incomplete graph. + + The MCP path diverged from the CLI path: it never called + ``ensure_constraints()`` and never defensively flushed the long-lived + ingestor before/after ``GraphUpdater.run()``, so stale buffered state could + leak across calls and missing constraints/indexes corrupted node creation. + + NOTE: A full assertion that ``Class`` and ``Method`` nodes are persisted + requires a live Memgraph backend (the in-repo ``_MockIngestor`` does not + persist a real graph, and ``GraphUpdater`` emits those node batches + regardless of the orchestration bug). These tests instead pin the + orchestration that the CLI path performs and the MCP path was missing. + """ + + @staticmethod + def _ordered_calls(manager: MagicMock) -> list[str]: + tracked = { + "ingestor.ensure_constraints", + "ingestor.flush_all", + "updater.run", + } + return [name for name, _, _ in manager.mock_calls if name in tracked] + + async def test_index_ensures_constraints_and_flushes_around_run( + self, temp_project_root: Path + ) -> None: + manager = MagicMock() + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=manager.ingestor, + cypher_gen=MagicMock(), + ) + + with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class: + mock_updater_class.return_value = manager.updater + manager.updater.run.return_value = None + + await registry.index_repository() + + assert self._ordered_calls(manager) == [ + "ingestor.ensure_constraints", + "ingestor.flush_all", + "updater.run", + "ingestor.flush_all", + ] + + async def test_update_ensures_constraints_and_flushes_around_run( + self, temp_project_root: Path + ) -> None: + manager = MagicMock() + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=manager.ingestor, + cypher_gen=MagicMock(), + ) + + with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class: + mock_updater_class.return_value = manager.updater + manager.updater.run.return_value = None + + await registry.update_repository() + + assert self._ordered_calls(manager) == [ + "ingestor.ensure_constraints", + "ingestor.flush_all", + "updater.run", + "ingestor.flush_all", + ] + + class TestQueryAndIndexIntegration: """Test integration between querying and indexing.""" diff --git a/codebase_rag/tests/test_mcp_server.py b/codebase_rag/tests/test_mcp_server.py index 6d621e76d..c84901bf6 100644 --- a/codebase_rag/tests/test_mcp_server.py +++ b/codebase_rag/tests/test_mcp_server.py @@ -1,10 +1,13 @@ +import contextlib import os +from collections.abc import AsyncIterator from pathlib import Path from typing import Any -from unittest.mock import patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest +from codebase_rag.mcp import server as srv from codebase_rag.mcp.server import get_project_root @@ -173,3 +176,51 @@ def test_works_with_actual_cwd(self) -> None: assert result == actual_cwd.resolve() assert result.exists() assert result.is_dir() + + +class TestServiceLifecycle: + """Tests that the MCP server lifecycle releases the Qdrant client.""" + + def test_service_lifecycle_closes_qdrant_on_exit(self) -> None: + mock_ingestor = MagicMock() + + with patch.object(srv, "close_qdrant_client") as mock_close: + with srv._service_lifecycle(mock_ingestor): + mock_ingestor.__enter__.assert_called_once() + mock_close.assert_not_called() + mock_close.assert_called_once_with() + mock_ingestor.__exit__.assert_called_once() + + def test_service_lifecycle_closes_qdrant_on_exception(self) -> None: + mock_ingestor = MagicMock() + + with patch.object(srv, "close_qdrant_client") as mock_close: + with pytest.raises(RuntimeError): + with srv._service_lifecycle(mock_ingestor): + raise RuntimeError("boom") + mock_close.assert_called_once_with() + mock_ingestor.__exit__.assert_called_once() + + +class TestServeStdioShutdown: + """Tests that serve_stdio releases the Qdrant lock on shutdown.""" + + async def test_serve_stdio_closes_qdrant_client_on_shutdown(self) -> None: + mock_ingestor = MagicMock() + mock_server = MagicMock() + mock_server.run = AsyncMock() + mock_server.create_initialization_options = MagicMock(return_value=MagicMock()) + + @contextlib.asynccontextmanager + async def fake_stdio() -> AsyncIterator[tuple[MagicMock, MagicMock]]: + yield (MagicMock(), MagicMock()) + + with patch.object( + srv, "create_server", return_value=(mock_server, mock_ingestor) + ): + with patch.object(srv, "stdio_server", fake_stdio): + with patch.object(srv, "close_qdrant_client") as mock_close: + await srv.serve_stdio() + + mock_close.assert_called_once_with() + mock_server.run.assert_awaited_once() diff --git a/codebase_rag/tests/test_mcp_tools_helpers.py b/codebase_rag/tests/test_mcp_tools_helpers.py new file mode 100644 index 000000000..7804c9fa0 --- /dev/null +++ b/codebase_rag/tests/test_mcp_tools_helpers.py @@ -0,0 +1,98 @@ +from unittest.mock import MagicMock, patch + +from codebase_rag import constants as cs + +_PATCH_DELETE = "codebase_rag.mcp.tools.delete_project_embeddings" + + +def _make_registry(mock_ingestor: MagicMock) -> MagicMock: + from codebase_rag.mcp.tools import MCPToolsRegistry + + registry = MagicMock(spec=MCPToolsRegistry) + registry.ingestor = mock_ingestor + registry._get_project_node_ids = MCPToolsRegistry._get_project_node_ids.__get__( + registry + ) + registry._cleanup_project_embeddings = ( + MCPToolsRegistry._cleanup_project_embeddings.__get__(registry) + ) + return registry + + +class TestGetProjectNodeIds: + def test_returns_integer_ids(self) -> None: + mock_ingestor = MagicMock() + mock_ingestor.fetch_all.return_value = [ + {cs.KEY_NODE_ID: 1}, + {cs.KEY_NODE_ID: 2}, + {cs.KEY_NODE_ID: 3}, + ] + registry = _make_registry(mock_ingestor) + + result = registry._get_project_node_ids("myproject") + + assert result == [1, 2, 3] + mock_ingestor.fetch_all.assert_called_once_with( + cs.CYPHER_QUERY_PROJECT_NODE_IDS, + {cs.KEY_PROJECT_NAME: "myproject"}, + ) + + def test_filters_non_integer_ids(self) -> None: + mock_ingestor = MagicMock() + mock_ingestor.fetch_all.return_value = [ + {cs.KEY_NODE_ID: 1}, + {cs.KEY_NODE_ID: "not_an_int"}, + {cs.KEY_NODE_ID: None}, + {cs.KEY_NODE_ID: 4}, + ] + registry = _make_registry(mock_ingestor) + + result = registry._get_project_node_ids("proj") + + assert result == [1, 4] + + def test_returns_empty_when_no_rows(self) -> None: + mock_ingestor = MagicMock() + mock_ingestor.fetch_all.return_value = [] + registry = _make_registry(mock_ingestor) + + result = registry._get_project_node_ids("empty") + + assert result == [] + + def test_skips_rows_missing_key(self) -> None: + mock_ingestor = MagicMock() + mock_ingestor.fetch_all.return_value = [ + {"other_key": 99}, + {cs.KEY_NODE_ID: 5}, + ] + registry = _make_registry(mock_ingestor) + + result = registry._get_project_node_ids("proj") + + assert result == [5] + + +class TestCleanupProjectEmbeddings: + def test_calls_delete_with_node_ids(self) -> None: + mock_ingestor = MagicMock() + mock_ingestor.fetch_all.return_value = [ + {cs.KEY_NODE_ID: 10}, + {cs.KEY_NODE_ID: 20}, + ] + registry = _make_registry(mock_ingestor) + + with patch(_PATCH_DELETE) as mock_delete: + registry._cleanup_project_embeddings("myproject") + + mock_delete.assert_called_once_with("myproject", [10, 20]) + + def test_calls_delete_with_empty_list_when_no_nodes(self) -> None: + mock_ingestor = MagicMock() + mock_ingestor.fetch_all.return_value = [] + registry = _make_registry(mock_ingestor) + + with patch(_PATCH_DELETE) as mock_delete: + registry._cleanup_project_embeddings("empty_proj") + + mock_delete.assert_called_once_with("empty_proj", []) diff --git a/codebase_rag/tests/test_mcp_update_and_search.py b/codebase_rag/tests/test_mcp_update_and_search.py new file mode 100644 index 000000000..b01128931 --- /dev/null +++ b/codebase_rag/tests/test_mcp_update_and_search.py @@ -0,0 +1,496 @@ +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.mcp.client import query_mcp_server +from codebase_rag.mcp.tools import MCPToolsRegistry + +pytestmark = [pytest.mark.anyio] + + +@pytest.fixture(params=["asyncio"]) +def anyio_backend(request: pytest.FixtureRequest) -> str: + return str(request.param) + + +@pytest.fixture +def temp_project_root(tmp_path: Path) -> Path: + sample_file = tmp_path / "app.py" + sample_file.write_text("def main(): pass\n", encoding="utf-8") + return tmp_path + + +@pytest.fixture +def mcp_registry(temp_project_root: Path) -> MCPToolsRegistry: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + return registry + + +class TestUpdateRepository: + async def test_update_repository_success( + self, mcp_registry: MCPToolsRegistry + ) -> None: + with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_cls: + mock_updater = MagicMock() + mock_updater_cls.return_value = mock_updater + + result = await mcp_registry.update_repository() + + mock_updater_cls.assert_called_once() + mock_updater.run.assert_called_once() + assert mcp_registry.project_root in result + + async def test_update_repository_error( + self, mcp_registry: MCPToolsRegistry + ) -> None: + with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_cls: + mock_updater_cls.side_effect = RuntimeError("parse error") + + result = await mcp_registry.update_repository() + + assert "Error" in result + + async def test_update_repository_registered( + self, mcp_registry: MCPToolsRegistry + ) -> None: + assert cs.MCPToolName.UPDATE_REPOSITORY in mcp_registry._tools + + async def test_update_repository_no_wipe( + self, mcp_registry: MCPToolsRegistry + ) -> None: + with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_cls: + mock_updater = MagicMock() + mock_updater_cls.return_value = mock_updater + + await mcp_registry.update_repository() + + mcp_registry.ingestor.delete_project.assert_not_called() + mcp_registry.ingestor.clean_database.assert_not_called() + + +class TestSemanticSearchRegistration: + def test_semantic_search_not_registered_without_deps( + self, temp_project_root: Path + ) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=False, + ): + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + assert cs.MCPToolName.SEMANTIC_SEARCH not in registry._tools + assert registry._semantic_search_available is False + + def test_semantic_search_registered_with_deps( + self, temp_project_root: Path + ) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with ( + patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=True, + ), + patch( + "codebase_rag.tools.semantic_search.create_semantic_search_tool" + ) as mock_create, + ): + mock_tool = MagicMock() + mock_create.return_value = mock_tool + + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + assert cs.MCPToolName.SEMANTIC_SEARCH in registry._tools + assert registry._semantic_search_available is True + + async def test_semantic_search_calls_tool(self, temp_project_root: Path) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with ( + patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=True, + ), + patch( + "codebase_rag.tools.semantic_search.create_semantic_search_tool" + ) as mock_create, + ): + mock_tool = MagicMock() + mock_tool.function = AsyncMock(return_value="result1, result2") + mock_create.return_value = mock_tool + + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + result = await registry.semantic_search("find auth functions", top_k=3) + + mock_tool.function.assert_called_once_with( + query="find auth functions", top_k=3 + ) + assert "result1" in result + + +class TestAskAgent: + async def test_ask_agent_registered(self, mcp_registry: MCPToolsRegistry) -> None: + assert cs.MCPToolName.ASK_AGENT in mcp_registry._tools + + async def test_ask_agent_success(self, mcp_registry: MCPToolsRegistry) -> None: + mock_agent = MagicMock() + mock_response = MagicMock() + mock_response.output = "The auth module uses JWT tokens." + mock_agent.run = AsyncMock(return_value=mock_response) + mcp_registry.rag_agent = mock_agent + + result = await mcp_registry.ask_agent("How is auth implemented?") + + assert result["output"] == "The auth module uses JWT tokens." + mock_agent.run.assert_called_once_with( + "How is auth implemented?", message_history=[] + ) + + async def test_ask_agent_error(self, mcp_registry: MCPToolsRegistry) -> None: + mock_agent = MagicMock() + mock_agent.run = AsyncMock(side_effect=RuntimeError("LLM unavailable")) + mcp_registry.rag_agent = mock_agent + + result = await mcp_registry.ask_agent("What does main do?") + + assert "error" in result + + +class TestToolDescriptions: + def test_update_repository_in_tool_map(self) -> None: + from codebase_rag.tools.tool_descriptions import MCP_TOOLS + + assert cs.MCPToolName.UPDATE_REPOSITORY in MCP_TOOLS + + def test_semantic_search_in_tool_map(self) -> None: + from codebase_rag.tools.tool_descriptions import MCP_TOOLS + + assert cs.MCPToolName.SEMANTIC_SEARCH in MCP_TOOLS + + def test_ask_agent_in_tool_map(self) -> None: + from codebase_rag.tools.tool_descriptions import MCP_TOOLS + + assert cs.MCPToolName.ASK_AGENT in MCP_TOOLS + + def test_index_repository_warns_about_project_clear(self) -> None: + from codebase_rag.tools.tool_descriptions import MCP_INDEX_REPOSITORY + + assert "current project" in MCP_INDEX_REPOSITORY + assert "entire database" not in MCP_INDEX_REPOSITORY + + +class TestRagAgentProperty: + def test_rag_agent_setter_allows_mock(self, mcp_registry: MCPToolsRegistry) -> None: + mock_agent = MagicMock() + mcp_registry.rag_agent = mock_agent + assert mcp_registry.rag_agent is mock_agent + + def test_rag_agent_lazy_init(self, temp_project_root: Path) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=False, + ): + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + assert registry._rag_agent is None + + with patch("codebase_rag.mcp.tools.create_rag_orchestrator") as mock_create: + mock_agent = MagicMock() + mock_create.return_value = (mock_agent, "system prompt") + + agent = registry.rag_agent + + mock_create.assert_called_once() + assert agent is mock_agent + + def test_rag_agent_includes_function_source_tool( + self, temp_project_root: Path + ) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=False, + ): + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + with ( + patch("codebase_rag.mcp.tools.create_rag_orchestrator") as mock_create, + patch( + "codebase_rag.tools.semantic_search.create_get_function_source_tool" + ) as mock_fst, + ): + mock_tool = MagicMock() + mock_fst.return_value = mock_tool + mock_create.return_value = (MagicMock(), "system prompt") + + registry.rag_agent + + tools_arg = mock_create.call_args[1]["tools"] + assert mock_tool in tools_arg + + def test_rag_agent_includes_semantic_search_when_available( + self, temp_project_root: Path + ) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with ( + patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=True, + ), + patch( + "codebase_rag.tools.semantic_search.create_semantic_search_tool" + ) as mock_ss, + ): + mock_ss_tool = MagicMock() + mock_ss.return_value = mock_ss_tool + + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + with ( + patch("codebase_rag.mcp.tools.create_rag_orchestrator") as mock_create, + patch("codebase_rag.tools.semantic_search.create_get_function_source_tool"), + ): + mock_create.return_value = (MagicMock(), "system prompt") + registry.rag_agent + + tools_arg = mock_create.call_args[1]["tools"] + assert mock_ss_tool in tools_arg + + def test_rag_agent_caches_after_first_access(self, temp_project_root: Path) -> None: + mock_ingestor = MagicMock() + mock_cypher_gen = MagicMock() + + with patch( + "codebase_rag.mcp.tools.has_semantic_dependencies", + return_value=False, + ): + registry = MCPToolsRegistry( + project_root=str(temp_project_root), + ingestor=mock_ingestor, + cypher_gen=mock_cypher_gen, + ) + + with ( + patch("codebase_rag.mcp.tools.create_rag_orchestrator") as mock_create, + patch("codebase_rag.tools.semantic_search.create_get_function_source_tool"), + ): + mock_create.return_value = (MagicMock(), "system prompt") + + agent1 = registry.rag_agent + agent2 = registry.rag_agent + + mock_create.assert_called_once() + assert agent1 is agent2 + + +class TestMainSingleQuery: + def test_main_single_query_prints_output( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + from codebase_rag.main import main_single_query + + mock_response = MagicMock() + mock_response.output = "The answer is 42." + + with ( + patch("codebase_rag.main.connect_memgraph") as mock_conn, + patch("codebase_rag.main._initialize_services_and_agent") as mock_init, + patch("codebase_rag.main.asyncio") as mock_asyncio, + patch("codebase_rag.main._setup_common_initialization"), + ): + mock_agent = MagicMock() + mock_init.return_value = (mock_agent, [], "system prompt") + mock_asyncio.run.return_value = mock_response + mock_conn.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_conn.return_value.__exit__ = MagicMock(return_value=False) + + main_single_query(str(tmp_path), 1000, "What is the answer?") + + captured = capsys.readouterr() + assert "The answer is 42." in captured.out + + def test_main_single_query_routes_logs_to_stderr(self, tmp_path: Path) -> None: + from codebase_rag.main import main_single_query + + mock_response = MagicMock() + mock_response.output = "result" + + with ( + patch("codebase_rag.main.connect_memgraph") as mock_conn, + patch("codebase_rag.main._initialize_services_and_agent") as mock_init, + patch("codebase_rag.main.asyncio") as mock_asyncio, + patch("codebase_rag.main._setup_common_initialization"), + patch("codebase_rag.main.logger") as mock_logger, + ): + mock_agent = MagicMock() + mock_init.return_value = (mock_agent, [], "system prompt") + mock_asyncio.run.return_value = mock_response + mock_conn.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_conn.return_value.__exit__ = MagicMock(return_value=False) + + main_single_query(str(tmp_path), 1000, "test") + + mock_logger.remove.assert_called_once() + mock_logger.add.assert_called_once() + add_args = mock_logger.add.call_args + import sys + + assert add_args[0][0] is sys.stderr + + +class TestMCPClient: + def test_query_mcp_server_is_callable(self) -> None: + assert callable(query_mcp_server) + + def test_client_uses_constants(self) -> None: + import inspect + + from codebase_rag.mcp import client + + source = inspect.getsource(client) + assert "MCPToolName.ASK_AGENT" in source + assert "MCPParamName.QUESTION" in source + + def test_query_with_errlog_is_async(self) -> None: + import asyncio + + from codebase_rag.mcp.client import _query_with_errlog + + assert asyncio.iscoroutinefunction(_query_with_errlog) + + async def test_query_with_errlog_json_response(self) -> None: + import io + + from codebase_rag.mcp.client import _query_with_errlog + + mock_content = MagicMock() + mock_content.text = '{"output": "test answer"}' + mock_result = MagicMock() + mock_result.content = [mock_content] + + mock_session = AsyncMock() + mock_session.initialize = AsyncMock() + mock_session.call_tool = AsyncMock(return_value=mock_result) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock(return_value=False) + + mock_transport = AsyncMock() + mock_transport.__aenter__ = AsyncMock(return_value=(MagicMock(), MagicMock())) + mock_transport.__aexit__ = AsyncMock(return_value=False) + + with ( + patch("codebase_rag.mcp.client.stdio_client", return_value=mock_transport), + patch("codebase_rag.mcp.client.ClientSession", return_value=mock_session), + ): + result = await _query_with_errlog("test question", io.StringIO()) + + assert result == {"output": "test answer"} + + async def test_query_with_errlog_non_json_response(self) -> None: + import io + + from codebase_rag.mcp.client import _query_with_errlog + + mock_content = MagicMock() + mock_content.text = "plain text response" + mock_result = MagicMock() + mock_result.content = [mock_content] + + mock_session = AsyncMock() + mock_session.initialize = AsyncMock() + mock_session.call_tool = AsyncMock(return_value=mock_result) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock(return_value=False) + + mock_transport = AsyncMock() + mock_transport.__aenter__ = AsyncMock(return_value=(MagicMock(), MagicMock())) + mock_transport.__aexit__ = AsyncMock(return_value=False) + + with ( + patch("codebase_rag.mcp.client.stdio_client", return_value=mock_transport), + patch("codebase_rag.mcp.client.ClientSession", return_value=mock_session), + ): + result = await _query_with_errlog("test", io.StringIO()) + + assert result == {"output": "plain text response"} + + async def test_query_with_errlog_empty_response(self) -> None: + import io + + from codebase_rag.mcp.client import _query_with_errlog + + mock_result = MagicMock() + mock_result.content = [] + + mock_session = AsyncMock() + mock_session.initialize = AsyncMock() + mock_session.call_tool = AsyncMock(return_value=mock_result) + mock_session.__aenter__ = AsyncMock(return_value=mock_session) + mock_session.__aexit__ = AsyncMock(return_value=False) + + mock_transport = AsyncMock() + mock_transport.__aenter__ = AsyncMock(return_value=(MagicMock(), MagicMock())) + mock_transport.__aexit__ = AsyncMock(return_value=False) + + with ( + patch("codebase_rag.mcp.client.stdio_client", return_value=mock_transport), + patch("codebase_rag.mcp.client.ClientSession", return_value=mock_session), + ): + result = await _query_with_errlog("test", io.StringIO()) + + assert result == {"output": "No response from server"} + + def test_query_mcp_server_opens_devnull(self) -> None: + with ( + patch("codebase_rag.mcp.client.asyncio") as mock_asyncio, + patch("builtins.open", MagicMock()) as mock_open, + ): + mock_asyncio.run.return_value = {"output": "result"} + query_mcp_server("test") + mock_open.assert_called_once() diff --git a/codebase_rag/tests/test_mcp_write_file.py b/codebase_rag/tests/test_mcp_write_file.py index 6c214c12a..dd222e9c6 100644 --- a/codebase_rag/tests/test_mcp_write_file.py +++ b/codebase_rag/tests/test_mcp_write_file.py @@ -199,6 +199,10 @@ class TestWriteFileErrorHandling: @pytest.mark.skipif( os.name == "nt", reason="chmod 0o444 does not prevent file creation on Windows" ) + @pytest.mark.skipif( + hasattr(os, "getuid") and os.getuid() == 0, + reason="root bypasses filesystem permissions", + ) async def test_write_to_readonly_directory( self, mcp_registry: MCPToolsRegistry, temp_project_root: Path ) -> None: diff --git a/codebase_rag/tests/test_memgraph_batching.py b/codebase_rag/tests/test_memgraph_batching.py index a3297e819..81c068b66 100644 --- a/codebase_rag/tests/test_memgraph_batching.py +++ b/codebase_rag/tests/test_memgraph_batching.py @@ -64,15 +64,20 @@ def test_node_batch_preserves_per_row_properties() -> None: def test_relationship_batch_flushes_after_threshold_and_respects_node_flush() -> None: ingestor, cursor_mock = _create_ingestor_with_mocked_connection() + col = MagicMock() + col.name = "created" + cursor_mock.description = [col] + cursor_mock.fetchall.return_value = [(1,), (1,)] + with patch.object( - ingestor, "flush_nodes", wraps=ingestor.flush_nodes + MemgraphIngestor, "flush_nodes", wraps=ingestor.flush_nodes ) as flush_nodes_spy: ingestor.ensure_relationship_batch( ("Module", "qualified_name", "proj.module1"), "CONTAINS_FILE", ("File", "path", "file1"), ) - assert len(ingestor.relationship_buffer) == 1 + assert ingestor._rel_count == 1 cursor_mock.execute.assert_not_called() ingestor.ensure_relationship_batch( @@ -83,7 +88,7 @@ def test_relationship_batch_flushes_after_threshold_and_respects_node_flush() -> assert flush_nodes_spy.call_count == 1 - assert len(ingestor.relationship_buffer) == 0 + assert ingestor._rel_count == 0 cursor_mock.execute.assert_called_once() executed_query = cursor_mock.execute.call_args[0][0] assert "UNWIND $batch" in executed_query diff --git a/codebase_rag/tests/test_memory_limit.py b/codebase_rag/tests/test_memory_limit.py new file mode 100644 index 000000000..8fed07bad --- /dev/null +++ b/codebase_rag/tests/test_memory_limit.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import pytest + +from codebase_rag.services.graph_service import _apply_memory_limit + + +class TestApplyMemoryLimit: + def test_appends_hint_to_simple_query(self) -> None: + result = _apply_memory_limit("MATCH (n) RETURN n;", 4096) + assert result == "MATCH (n) RETURN n QUERY MEMORY LIMIT 4096 MB;" + + def test_appends_hint_when_no_trailing_semicolon(self) -> None: + result = _apply_memory_limit("MATCH (n) RETURN n", 256) + assert result == "MATCH (n) RETURN n QUERY MEMORY LIMIT 256 MB;" + + def test_preserves_existing_hint(self) -> None: + query = "MATCH (n) RETURN n QUERY MEMORY LIMIT 1024 MB;" + assert _apply_memory_limit(query, 4096) == query + + def test_preserves_existing_hint_case_insensitive(self) -> None: + query = "MATCH (n) RETURN n query memory limit 1024 mb;" + assert _apply_memory_limit(query, 4096) == query + + def test_handles_trailing_whitespace(self) -> None: + result = _apply_memory_limit("MATCH (n) RETURN n;\n ", 4096) + assert result == "MATCH (n) RETURN n QUERY MEMORY LIMIT 4096 MB;" + + def test_handles_whitespace_before_semicolon(self) -> None: + result = _apply_memory_limit("MATCH (n) RETURN n ;", 4096) + assert result == "MATCH (n) RETURN n QUERY MEMORY LIMIT 4096 MB;" + + def test_handles_multiline_query(self) -> None: + query = "MATCH (a)-[:CALLS*1..6]->(b)\nRETURN a, b;" + result = _apply_memory_limit(query, 2048) + assert result == ( + "MATCH (a)-[:CALLS*1..6]->(b)\nRETURN a, b QUERY MEMORY LIMIT 2048 MB;" + ) + + @pytest.mark.parametrize("mb", [128, 256, 1024, 4096, 16384]) + def test_uses_configured_megabytes(self, mb: int) -> None: + result = _apply_memory_limit("MATCH (n) RETURN n;", mb) + assert f"QUERY MEMORY LIMIT {mb} MB" in result diff --git a/codebase_rag/tests/test_method_calls_caller_attribution.py b/codebase_rag/tests/test_method_calls_caller_attribution.py new file mode 100644 index 000000000..6c4cd2a01 --- /dev/null +++ b/codebase_rag/tests/test_method_calls_caller_attribution.py @@ -0,0 +1,679 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING +from unittest.mock import MagicMock + +from codebase_rag import constants as cs +from codebase_rag.tests.conftest import get_relationships, run_updater + +if TYPE_CHECKING: + pass + + +def _get_method_caller_calls(mock_ingestor: MagicMock) -> list: + return [ + c + for c in get_relationships(mock_ingestor, cs.RelationshipType.CALLS) + if c.args[0][0] == cs.NodeLabel.METHOD + ] + + +def _get_function_caller_calls(mock_ingestor: MagicMock) -> list: + return [ + c + for c in get_relationships(mock_ingestor, cs.RelationshipType.CALLS) + if c.args[0][0] == cs.NodeLabel.FUNCTION + ] + + +def _get_module_caller_calls(mock_ingestor: MagicMock) -> list: + return [ + c + for c in get_relationships(mock_ingestor, cs.RelationshipType.CALLS) + if c.args[0][0] == cs.NodeLabel.MODULE + ] + + +def _caller_qn(call: MagicMock) -> str: + return call.args[0][2] + + +def _callee_qn(call: MagicMock) -> str: + return call.args[2][2] + + +class TestCppMethodCallerAttribution: + def test_simple_class_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "player.cpp").write_text( + encoding="utf-8", + data=""" +class Player { +public: + void handleArtifact() {} + + void handleArtifactWatcherCb() { + handleArtifact(); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + callees = [_callee_qn(c) for c in method_calls] + + watcher_callers = [qn for qn in callers if "handleArtifactWatcherCb" in qn] + assert len(watcher_callers) >= 1 + + artifact_callees = [qn for qn in callees if "handleArtifact" in qn] + assert len(artifact_callees) >= 1 + + def test_struct_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "sensor.cpp").write_text( + encoding="utf-8", + data=""" +struct Sensor { + int readRaw() { return 42; } + + int readCalibrated() { + return readRaw() * 2; + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + callees = [_callee_qn(c) for c in method_calls] + + assert any("readCalibrated" in qn for qn in callers) + assert any("readRaw" in qn for qn in callees) + + def test_multiple_methods_calling_each_other( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "calc.cpp").write_text( + encoding="utf-8", + data=""" +class Calculator { +public: + int add(int a, int b) { return a + b; } + int multiply(int a, int b) { return a * b; } + + int compute(int x) { + int sum = add(x, 1); + return multiply(sum, 2); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + compute_calls = [c for c in method_calls if "compute" in _caller_qn(c)] + compute_callees = {_callee_qn(c) for c in compute_calls} + + assert any("add" in qn for qn in compute_callees) + assert any("multiply" in qn for qn in compute_callees) + + def test_constructor_body_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "init.cpp").write_text( + encoding="utf-8", + data=""" +class Engine { +public: + void initialize() {} + + Engine() { + initialize(); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callees = [_callee_qn(c) for c in method_calls] + assert any("initialize" in qn for qn in callees) + + def test_method_calling_free_function_has_method_caller( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "mixed.cpp").write_text( + encoding="utf-8", + data=""" +void freeHelper() {} + +class Service { +public: + void process() { + freeHelper(); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + process_calls = [c for c in method_calls if "process" in _caller_qn(c)] + assert len(process_calls) >= 1 + + def test_multiple_classes_in_one_file( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "multi.cpp").write_text( + encoding="utf-8", + data=""" +class Alpha { +public: + void step1() {} + void run() { step1(); } +}; + +class Beta { +public: + void step2() {} + void execute() { step2(); } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = {_caller_qn(c) for c in method_calls} + callees = {_callee_qn(c) for c in method_calls} + + assert any("run" in qn for qn in callers) + assert any("execute" in qn for qn in callers) + assert any("step1" in qn for qn in callees) + assert any("step2" in qn for qn in callees) + + def test_method_with_parameters( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "params.cpp").write_text( + encoding="utf-8", + data=""" +class Parser { +public: + int parse(const char* input, int length) { return 0; } + + int parseFile(const char* path) { + return parse(path, 100); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("parseFile" in qn for qn in callers) + + def test_virtual_method_calls( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "virtual.cpp").write_text( + encoding="utf-8", + data=""" +class Base { +public: + virtual void onEvent() {} + + void dispatch() { + onEvent(); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + dispatch_calls = [c for c in method_calls if "dispatch" in _caller_qn(c)] + assert len(dispatch_calls) >= 1 + assert any("onEvent" in _callee_qn(c) for c in dispatch_calls) + + def test_method_calling_another_via_this_pointer( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "this_ptr.cpp").write_text( + encoding="utf-8", + data=""" +class Widget { +public: + void repaint() {} + + void resize(int w, int h) { + this->repaint(); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("resize" in qn for qn in callers) + + def test_deeply_nested_call_chain( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "chain.cpp").write_text( + encoding="utf-8", + data=""" +class Pipeline { +public: + int validate() { return 1; } + int transform(int x) { return x * 2; } + int output(int x) { return x; } + + int run() { + int v = validate(); + int t = transform(v); + return output(t); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + run_calls = [c for c in method_calls if "run" in _caller_qn(c)] + run_callees = {_callee_qn(c) for c in run_calls} + + assert any("validate" in qn for qn in run_callees) + assert any("transform" in qn for qn in run_callees) + assert any("output" in qn for qn in run_callees) + + def test_static_method_calls( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "static.cpp").write_text( + encoding="utf-8", + data=""" +class Factory { +public: + static int create() { return 0; } + + static int build() { + return create(); + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("build" in qn for qn in callers) + + def test_const_method_calls( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "const.cpp").write_text( + encoding="utf-8", + data=""" +class Container { +public: + int size() const { return 10; } + + bool empty() const { + return size() == 0; + } +}; +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.CPP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("empty" in qn for qn in callers) + + +class TestPythonMethodCallerAttribution: + def test_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "service.py").write_text( + encoding="utf-8", + data=""" +class Service: + def validate(self): + pass + + def process(self): + self.validate() +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.PYTHON) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("process" in qn for qn in callers) + + def test_multiple_methods_calling_each_other( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "pipeline.py").write_text( + encoding="utf-8", + data=""" +class Pipeline: + def step1(self): + pass + + def step2(self): + self.step1() + + def run(self): + self.step2() +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.PYTHON) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = {_caller_qn(c) for c in method_calls} + assert any("step2" in qn for qn in callers) + assert any("run" in qn for qn in callers) + + def test_dunder_init_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "init.py").write_text( + encoding="utf-8", + data=""" +class Config: + def _load(self): + pass + + def __init__(self): + self._load() +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.PYTHON) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("__init__" in qn for qn in callers) + + +class TestJavaScriptMethodCallerAttribution: + def test_class_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "service.js").write_text( + encoding="utf-8", + data=""" +class Service { + validate() { + return true; + } + + process() { + return this.validate(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.JS) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("process" in qn for qn in callers) + + def test_constructor_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "widget.js").write_text( + encoding="utf-8", + data=""" +class Widget { + setup() {} + + constructor() { + this.setup(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.JS) + + method_calls = _get_method_caller_calls(mock_ingestor) + callees = [_callee_qn(c) for c in method_calls] + assert any("setup" in qn for qn in callees) + + +class TestTypeScriptMethodCallerAttribution: + def test_class_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "handler.ts").write_text( + encoding="utf-8", + data=""" +class Handler { + private validate(): boolean { + return true; + } + + public handle(): void { + this.validate(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.TS) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("handle" in qn for qn in callers) + + def test_multiple_methods_with_types( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "repo.ts").write_text( + encoding="utf-8", + data=""" +class Repository { + find(id: number): string { return ""; } + validate(data: string): boolean { return true; } + + save(id: number): boolean { + const item = this.find(id); + return this.validate(item); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.TS) + + method_calls = _get_method_caller_calls(mock_ingestor) + save_calls = [c for c in method_calls if "save" in _caller_qn(c)] + save_callees = {_callee_qn(c) for c in save_calls} + assert any("find" in qn for qn in save_callees) + assert any("validate" in qn for qn in save_callees) + + +class TestJavaMethodCallerAttribution: + def test_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "Service.java").write_text( + encoding="utf-8", + data=""" +public class Service { + private boolean validate() { + return true; + } + + public void process() { + validate(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.JAVA) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("process" in qn for qn in callers) + + def test_constructor_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "Config.java").write_text( + encoding="utf-8", + data=""" +public class Config { + private void loadDefaults() {} + + public Config() { + loadDefaults(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.JAVA) + + method_calls = _get_method_caller_calls(mock_ingestor) + callees = [_callee_qn(c) for c in method_calls] + assert any("loadDefaults" in qn for qn in callees) + + def test_multiple_methods_calling_each_other( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "Calculator.java").write_text( + encoding="utf-8", + data=""" +public class Calculator { + public int add(int a, int b) { return a + b; } + public int multiply(int a, int b) { return a * b; } + + public int compute(int x) { + int sum = add(x, 1); + return multiply(sum, 2); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.JAVA) + + method_calls = _get_method_caller_calls(mock_ingestor) + compute_calls = [c for c in method_calls if "compute" in _caller_qn(c)] + compute_callees = {_callee_qn(c) for c in compute_calls} + assert any("add" in qn for qn in compute_callees) + assert any("multiply" in qn for qn in compute_callees) + + +class TestRustMethodCallerAttribution: + def test_impl_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "lib.rs").write_text( + encoding="utf-8", + data=""" +struct Player { + health: i32, +} + +impl Player { + fn heal(&mut self) { + self.health += 10; + } + + fn take_damage(&mut self, amount: i32) { + self.health -= amount; + self.heal(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.RUST) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("take_damage" in qn for qn in callers) + + def test_multiple_impl_methods( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "lib.rs").write_text( + encoding="utf-8", + data=""" +struct Pipeline; + +impl Pipeline { + fn validate(&self) -> bool { true } + fn transform(&self, x: i32) -> i32 { x * 2 } + + fn run(&self, input: i32) -> i32 { + if self.validate() { + self.transform(input) + } else { + 0 + } + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.RUST) + + method_calls = _get_method_caller_calls(mock_ingestor) + run_calls = [c for c in method_calls if "run" in _caller_qn(c)] + assert len(run_calls) >= 1 + + +class TestPhpMethodCallerAttribution: + def test_method_calls_method( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "service.php").write_text( + encoding="utf-8", + data="""validate(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.PHP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = [_caller_qn(c) for c in method_calls] + assert any("process" in qn for qn in callers) + + def test_multiple_methods_calling_each_other( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "pipeline.php").write_text( + encoding="utf-8", + data="""step1(); + } + + public function run() { + $this->step2(); + } +} +""", + ) + run_updater(temp_repo, mock_ingestor, cs.SupportedLanguage.PHP) + + method_calls = _get_method_caller_calls(mock_ingestor) + callers = {_caller_qn(c) for c in method_calls} + assert any("step2" in qn for qn in callers) + assert any("run" in qn for qn in callers) diff --git a/codebase_rag/tests/test_model_switching.py b/codebase_rag/tests/test_model_switching.py index 52fb1e632..14217f0d1 100644 --- a/codebase_rag/tests/test_model_switching.py +++ b/codebase_rag/tests/test_model_switching.py @@ -235,6 +235,201 @@ async def test_model_override_none_by_default(self) -> None: assert kwargs.get("model") is None +class TestAgentLoopUserPromptOnResume: + @staticmethod + def _make_response(output: object) -> MagicMock: + response = MagicMock() + response.output = output + response.new_messages.return_value = [] + return response + + @staticmethod + def _patches(): + from pydantic_ai import DeferredToolResults + + return ( + patch("codebase_rag.main.app_context"), + patch("codebase_rag.main.log_session_event"), + patch( + "codebase_rag.main._process_tool_approvals", + new=AsyncMock(return_value=DeferredToolResults()), + ), + patch("codebase_rag.main._refresh_context_tokens", new=AsyncMock()), + patch("codebase_rag.main._thinking_with_status_bar"), + ) + + @pytest.mark.asyncio + async def test_user_prompt_not_resent_after_deferred_tool_approval(self) -> None: + from pydantic_ai import DeferredToolRequests + + from codebase_rag.main import _run_agent_response_loop + from codebase_rag.types_defs import CHAT_LOOP_UI, ConfirmationToolNames + + mock_agent = MagicMock() + mock_agent.run = AsyncMock( + side_effect=[ + self._make_response(DeferredToolRequests(approvals=[])), + self._make_response("Done"), + ] + ) + tool_names = ConfirmationToolNames( + replace_code="replace", create_file="create", shell_command="shell" + ) + ctx, log_evt, approvals, refresh, status = self._patches() + + with ctx as mock_ctx, log_evt, approvals, refresh, status: + mock_ctx.console.print = MagicMock() + mock_ctx.session.cancelled = False + + await _run_agent_response_loop( + mock_agent, + [], + "delete first and add two", + CHAT_LOOP_UI, + tool_names, + ) + + assert mock_agent.run.call_count == 2 + assert mock_agent.run.call_args_list[0][0][0] == "delete first and add two" + assert mock_agent.run.call_args_list[1][0][0] is None + + @pytest.mark.asyncio + async def test_user_prompt_not_resent_across_multiple_deferred_rounds( + self, + ) -> None: + from pydantic_ai import DeferredToolRequests + + from codebase_rag.main import _run_agent_response_loop + from codebase_rag.types_defs import CHAT_LOOP_UI, ConfirmationToolNames + + mock_agent = MagicMock() + mock_agent.run = AsyncMock( + side_effect=[ + self._make_response(DeferredToolRequests(approvals=[])), + self._make_response(DeferredToolRequests(approvals=[])), + self._make_response(DeferredToolRequests(approvals=[])), + self._make_response("All done"), + ] + ) + tool_names = ConfirmationToolNames( + replace_code="replace", create_file="create", shell_command="shell" + ) + ctx, log_evt, approvals, refresh, status = self._patches() + + with ctx as mock_ctx, log_evt, approvals, refresh, status: + mock_ctx.console.print = MagicMock() + mock_ctx.session.cancelled = False + + await _run_agent_response_loop( + mock_agent, [], "multi-step task", CHAT_LOOP_UI, tool_names + ) + + assert mock_agent.run.call_count == 4 + assert mock_agent.run.call_args_list[0][0][0] == "multi-step task" + for call in mock_agent.run.call_args_list[1:]: + assert call[0][0] is None + + @pytest.mark.asyncio + async def test_user_prompt_passed_on_first_call_when_no_deferred(self) -> None: + from codebase_rag.main import _run_agent_response_loop + from codebase_rag.types_defs import CHAT_LOOP_UI, ConfirmationToolNames + + mock_agent = MagicMock() + mock_agent.run = AsyncMock(return_value=self._make_response("Hello")) + tool_names = ConfirmationToolNames( + replace_code="replace", create_file="create", shell_command="shell" + ) + ctx, log_evt, approvals, refresh, status = self._patches() + + with ctx as mock_ctx, log_evt, approvals, refresh, status: + mock_ctx.console.print = MagicMock() + mock_ctx.session.cancelled = False + + await _run_agent_response_loop( + mock_agent, [], "just a question", CHAT_LOOP_UI, tool_names + ) + + assert mock_agent.run.call_count == 1 + assert mock_agent.run.call_args_list[0][0][0] == "just a question" + assert mock_agent.run.call_args_list[0][1].get("deferred_tool_results") is None + + @pytest.mark.asyncio + async def test_multimodal_user_prompt_not_resent_after_approval(self) -> None: + from pydantic_ai import BinaryContent, DeferredToolRequests + + from codebase_rag.main import _run_agent_response_loop + from codebase_rag.types_defs import CHAT_LOOP_UI, ConfirmationToolNames + + multimodal_prompt = [ + "look at this image", + BinaryContent(data=b"\x89PNG\r\n", media_type="image/png"), + ] + mock_agent = MagicMock() + mock_agent.run = AsyncMock( + side_effect=[ + self._make_response(DeferredToolRequests(approvals=[])), + self._make_response("Analyzed"), + ] + ) + tool_names = ConfirmationToolNames( + replace_code="replace", create_file="create", shell_command="shell" + ) + ctx, log_evt, approvals, refresh, status = self._patches() + + with ctx as mock_ctx, log_evt, approvals, refresh, status: + mock_ctx.console.print = MagicMock() + mock_ctx.session.cancelled = False + + await _run_agent_response_loop( + mock_agent, [], multimodal_prompt, CHAT_LOOP_UI, tool_names + ) + + assert mock_agent.run.call_count == 2 + assert mock_agent.run.call_args_list[0][0][0] is multimodal_prompt + assert mock_agent.run.call_args_list[1][0][0] is None + + @pytest.mark.asyncio + async def test_deferred_results_passed_only_after_approval(self) -> None: + from pydantic_ai import DeferredToolRequests, DeferredToolResults + + from codebase_rag.main import _run_agent_response_loop + from codebase_rag.types_defs import CHAT_LOOP_UI, ConfirmationToolNames + + approved = DeferredToolResults() + mock_agent = MagicMock() + mock_agent.run = AsyncMock( + side_effect=[ + self._make_response(DeferredToolRequests(approvals=[])), + self._make_response("Done"), + ] + ) + tool_names = ConfirmationToolNames( + replace_code="replace", create_file="create", shell_command="shell" + ) + + with ( + patch("codebase_rag.main.app_context") as mock_ctx, + patch("codebase_rag.main.log_session_event"), + patch( + "codebase_rag.main._process_tool_approvals", + new=AsyncMock(return_value=approved), + ), + patch("codebase_rag.main._refresh_context_tokens", new=AsyncMock()), + patch("codebase_rag.main._thinking_with_status_bar"), + ): + mock_ctx.console.print = MagicMock() + mock_ctx.session.cancelled = False + + await _run_agent_response_loop( + mock_agent, [], "edit file", CHAT_LOOP_UI, tool_names + ) + + first_kwargs = mock_agent.run.call_args_list[0][1] + second_kwargs = mock_agent.run.call_args_list[1][1] + assert first_kwargs.get("deferred_tool_results") is None + assert second_kwargs.get("deferred_tool_results") is approved + + class TestCommandConstants: def test_model_command_prefix(self) -> None: assert cs.MODEL_COMMAND_PREFIX == "/model" diff --git a/codebase_rag/tests/test_module_call_attribution.py b/codebase_rag/tests/test_module_call_attribution.py new file mode 100644 index 000000000..9d635e0ee --- /dev/null +++ b/codebase_rag/tests/test_module_call_attribution.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag import constants as cs +from codebase_rag.tests.conftest import run_updater + + +def _calls(mock_ingestor: MagicMock) -> list[tuple[str, str, str]]: + # (H) CALLS edges as (caller_label, caller_qn, callee_qn). + out: list[tuple[str, str, str]] = [] + for c in mock_ingestor.ensure_relationship_batch.call_args_list: + if c.args[1] == cs.RelationshipType.CALLS: + caller_label, _caller_key, caller_qn = c.args[0] + _callee_label, _callee_key, callee_qn = c.args[2] + out.append((caller_label, caller_qn, callee_qn)) + return out + + +def _module_callees(calls: list[tuple[str, str, str]]) -> set[str]: + return { + callee.rsplit(cs.SEPARATOR_DOT, 1)[-1] + for label, _caller, callee in calls + if label == cs.NodeLabel.MODULE + } + + +class TestModuleCallAttribution: + def test_nested_call_not_attributed_to_module( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "app.py").write_text( + "def main():\n" + " used_by_main()\n" + "\n" + "\n" + "def used_by_main():\n" + " return 1\n" + "\n" + "\n" + 'if __name__ == "__main__":\n' + " main()\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + calls = _calls(mock_ingestor) + module_callees = _module_callees(calls) + + # (H) the function-body call is attributed to the function, not the module + assert any( + caller.endswith(".main") and callee.endswith(".used_by_main") + for _label, caller, callee in calls + ) + # (H) used_by_main is only called inside main(), never at module top level + assert "used_by_main" not in module_callees + + def test_top_level_call_is_attributed_to_module( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "app.py").write_text( + "def main():\n" + " used_by_main()\n" + "\n" + "\n" + "def used_by_main():\n" + " return 1\n" + "\n" + "\n" + 'if __name__ == "__main__":\n' + " main()\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + module_callees = _module_callees(_calls(mock_ingestor)) + + # (H) the `if __name__ == "__main__": main()` call runs at module load + assert "main" in module_callees + + def test_bare_module_level_call_attributed_to_module( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "app.py").write_text( + "def setup():\n" + " return 1\n" + "\n" + "\n" + "def helper():\n" + " return 2\n" + "\n" + "\n" + "VALUE = setup()\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + module_callees = _module_callees(_calls(mock_ingestor)) + + assert "setup" in module_callees + # (H) helper is never called at all -> no module edge to it + assert "helper" not in module_callees + + def test_default_argument_call_attributed_to_module( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a default-argument expression runs at module-load (definition) time, + # (H) not when the function body executes, so it is a module-level call. + (temp_repo / "app.py").write_text( + "def make_default():\n" + " return 1\n" + "\n" + "\n" + "def with_default(x=make_default()):\n" + " return x\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="python") + module_callees = _module_callees(_calls(mock_ingestor)) + + assert "make_default" in module_callees + + def test_cpp_file_scope_initializer_call_attributed_to_module( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a C++ file-scope initializer runs at load time, so its call is + # (H) module-attributed; a call inside a function body is not. + (temp_repo / "app.cpp").write_text( + "int nested_cpp() { return 1; }\n" + "int top_cpp() { return 2; }\n" + "int run_cpp() { return nested_cpp(); }\n" + "int module_value = top_cpp();\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="cpp") + calls = _calls(mock_ingestor) + module_callees = _module_callees(calls) + + assert "top_cpp" in module_callees + assert "nested_cpp" not in module_callees + assert any( + caller.endswith(".run_cpp") and callee.endswith(".nested_cpp") + for _label, caller, callee in calls + ) diff --git a/codebase_rag/tests/test_module_qn_language_collision.py b/codebase_rag/tests/test_module_qn_language_collision.py new file mode 100644 index 000000000..5df31da32 --- /dev/null +++ b/codebase_rag/tests/test_module_qn_language_collision.py @@ -0,0 +1,79 @@ +# (H) Regression: two source files that share a basename but differ by extension +# (H) (foo.py and foo.cpp) must get distinct module qualified names. Path-based +# (H) module naming strips the extension, so without disambiguation both map to +# (H) the same module qn, cascading into identical class/method qns that collapse +# (H) under the graph's qualified_name unique constraint (dropping one file's defs). +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import KEY_PATH, KEY_QUALIFIED_NAME, NodeLabel +from codebase_rag.tests.conftest import create_and_run_updater, get_nodes + + +def _make_project(temp_repo: Path) -> Path: + project_path = temp_repo / "mixedmod" + (project_path / "pkg").mkdir(parents=True) + (project_path / "pkg" / "shape.py").write_text( + encoding="utf-8", + data="class Shape:\n def area(self):\n return 1\n", + ) + (project_path / "pkg" / "shape.cpp").write_text( + encoding="utf-8", + data="class Shape {\npublic:\n int area() {\n return 2;\n }\n};\n", + ) + return project_path + + +def _qns_by_path( + mock_ingestor: MagicMock, label: NodeLabel, name: str +) -> dict[str, str]: + out: dict[str, str] = {} + for node in get_nodes(mock_ingestor, label): + props = node[0][1] + qn = str(props.get(KEY_QUALIFIED_NAME)) + if qn.rsplit(".", 1)[-1] == name: + out[str(props.get(KEY_PATH))] = qn + return out + + +def test_same_stem_files_get_distinct_module_qns( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = _make_project(temp_repo) + create_and_run_updater(project, mock_ingestor, skip_if_missing="cpp") + + modules = { + str(node[0][1].get(KEY_PATH)): str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.MODULE) + } + py_mod = modules.get("pkg/shape.py") + cpp_mod = modules.get("pkg/shape.cpp") + assert py_mod and cpp_mod, f"both module nodes expected: {modules}" + assert py_mod != cpp_mod, f"module qn collision: {py_mod}" + + +def test_same_stem_methods_do_not_collide( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = _make_project(temp_repo) + create_and_run_updater(project, mock_ingestor, skip_if_missing="cpp") + + area = _qns_by_path(mock_ingestor, NodeLabel.METHOD, "area") + py_area = area.get("pkg/shape.py") + cpp_area = area.get("pkg/shape.cpp") + assert py_area and cpp_area, f"both area methods expected: {area}" + assert py_area != cpp_area, f"method qn collision across languages: {area}" + + # (H) The method qn must derive from its own (disambiguated) module qn, not a + # (H) bare recomputed prefix patched up by register_unique_qn's @N dedup. + modules = { + str(node[0][1].get(KEY_PATH)): str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.MODULE) + } + py_mod = modules["pkg/shape.py"] + assert py_area.startswith(f"{py_mod}."), ( + f"python method qn {py_area} not derived from its module {py_mod}" + ) + assert "@" not in py_area, f"method qn collided and was @N-deduped: {py_area}" diff --git a/codebase_rag/tests/test_multi_project.py b/codebase_rag/tests/test_multi_project.py new file mode 100644 index 000000000..3755bd207 --- /dev/null +++ b/codebase_rag/tests/test_multi_project.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag.cli import _resolve_active_projects, app +from codebase_rag.prompts import build_rag_orchestrator_prompt + +runner = CliRunner() + + +class TestResolveActiveProjects: + def test_returns_default_when_no_projects_flag(self) -> None: + assert _resolve_active_projects(None, "default_proj") == ["default_proj"] + + def test_returns_default_for_empty_string(self) -> None: + assert _resolve_active_projects("", "default_proj") == ["default_proj"] + + def test_single_project_in_flag(self) -> None: + assert _resolve_active_projects("only_one", "default_proj") == ["only_one"] + + def test_multiple_projects_comma_separated(self) -> None: + assert _resolve_active_projects("a,b,c", "default_proj") == ["a", "b", "c"] + + def test_strips_whitespace(self) -> None: + assert _resolve_active_projects(" a , b ,c ", "default_proj") == ["a", "b", "c"] + + def test_drops_empty_entries(self) -> None: + assert _resolve_active_projects("a,,b,", "default_proj") == ["a", "b"] + + def test_all_empty_falls_back_to_default(self) -> None: + assert _resolve_active_projects(",,", "default_proj") == ["default_proj"] + + +class TestPromptActiveProjectsBlock: + def test_no_projects_lists_list_projects_hint(self) -> None: + prompt = build_rag_orchestrator_prompt([], active_projects=None) + assert "list_projects" in prompt + assert "Project Scope" in prompt + + def test_single_project_mentions_starts_with(self) -> None: + prompt = build_rag_orchestrator_prompt([], active_projects=["only_one"]) + assert "only_one" in prompt + assert "STARTS WITH" in prompt + + def test_multiple_projects_lists_all(self) -> None: + prompt = build_rag_orchestrator_prompt([], active_projects=["a", "b", "c"]) + for name in ["a", "b", "c"]: + assert f"`{name}`" in prompt or f"'{name}." in prompt + assert "STARTS WITH 'a.'" in prompt + assert "STARTS WITH 'b.'" in prompt + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli.connect_memgraph") as mock_connect: + mock_ingestor = MagicMock() + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +@pytest.fixture +def mock_sync_path() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli._run_graph_sync"): + yield + + +@pytest.fixture +def mock_validate_models() -> Generator[None, None, None]: + with patch("codebase_rag.cli._update_and_validate_models"): + yield + + +def test_start_passes_projects_to_single_query( + mock_memgraph_connect: MagicMock, + mock_sync_path: None, + mock_validate_models: None, + tmp_path: Path, +) -> None: + with patch("codebase_rag.cli.main_single_query") as mock_single: + result = runner.invoke( + app, + [ + "start", + "--repo-path", + str(tmp_path), + "--projects", + "alpha,beta", + "--ask-agent", + "hi", + "--no-sync", + ], + ) + assert result.exit_code == 0, result.output + mock_single.assert_called_once() + assert mock_single.call_args.kwargs["active_projects"] == ["alpha", "beta"] + + +def test_start_default_projects_uses_derived_name( + mock_memgraph_connect: MagicMock, + mock_sync_path: None, + mock_validate_models: None, + tmp_path: Path, +) -> None: + with patch("codebase_rag.cli.main_single_query") as mock_single: + result = runner.invoke( + app, + [ + "start", + "--repo-path", + str(tmp_path), + "--ask-agent", + "hi", + "--no-sync", + ], + ) + assert result.exit_code == 0, result.output + mock_single.assert_called_once() + active = mock_single.call_args.kwargs["active_projects"] + assert len(active) == 1 + assert "__" in active[0] diff --git a/codebase_rag/tests/test_multiline_input_keybindings.py b/codebase_rag/tests/test_multiline_input_keybindings.py new file mode 100644 index 000000000..d41abe943 --- /dev/null +++ b/codebase_rag/tests/test_multiline_input_keybindings.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import pytest +from prompt_toolkit.application import create_app_session +from prompt_toolkit.input import create_pipe_input +from prompt_toolkit.output import DummyOutput + +from codebase_rag import constants as cs +from codebase_rag.main import get_multiline_input + +CTRL_J = "\x0a" +CTRL_E = "\x05" +CTRL_C = "\x03" +ENTER = "\r" + + +def _run_with_input(text: str) -> str: + with create_pipe_input() as inp: + inp.send_text(text) + with create_app_session(input=inp, output=DummyOutput()): + return get_multiline_input("Ask") + + +def test_ctrl_j_submits_buffer() -> None: + assert _run_with_input(f"hello{CTRL_J}") == "hello" + + +def test_ctrl_e_submits_buffer() -> None: + assert _run_with_input(f"hello{CTRL_E}") == "hello" + + +def test_ctrl_e_submits_after_multiline_with_enter() -> None: + assert _run_with_input(f"line1{ENTER}line2{CTRL_E}") == "line1\nline2" + + +def test_ctrl_j_submits_after_multiline_with_enter() -> None: + assert _run_with_input(f"line1{ENTER}line2{CTRL_J}") == "line1\nline2" + + +def test_result_is_stripped() -> None: + assert _run_with_input(f" padded {CTRL_E}") == "padded" + + +def test_ctrl_c_raises_keyboard_interrupt() -> None: + with pytest.raises(KeyboardInterrupt): + _run_with_input(f"abc{CTRL_C}") + + +def test_keybinding_enum_has_submit_shortcuts() -> None: + assert cs.KeyBinding.CTRL_J.value == "c-j" + assert cs.KeyBinding.CTRL_E.value == "c-e" + + +def test_hint_mentions_both_submit_shortcuts() -> None: + assert "Ctrl+J" in cs.MULTILINE_INPUT_HINT + assert "Ctrl+E" in cs.MULTILINE_INPUT_HINT diff --git a/codebase_rag/tests/test_nested_function_defines.py b/codebase_rag/tests/test_nested_function_defines.py new file mode 100644 index 000000000..e9b9694b2 --- /dev/null +++ b/codebase_rag/tests/test_nested_function_defines.py @@ -0,0 +1,129 @@ +# (H) Finding #2 from the evals/ harness: a function nested inside a METHOD was +# (H) attributed to the Module via DEFINES (flattened), producing false-positive +# (H) module-level edges. A nested function must be DEFINES'd by its enclosing +# (H) scope: the method for function-in-method, the function for function-in-function. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "nestproj" + +MODULE_SRC = """class C: + def find_x(self) -> int: + def dfs(n: int) -> int: + return n + + return dfs(1) + + +def outer() -> int: + def inner() -> int: + return 1 + + return inner() +""" + +_RelTuple = tuple[str, PropertyValue, str, str, PropertyValue] + + +class _Capture: + def __init__(self) -> None: + self.nodes: dict[tuple[str, PropertyValue], PropertyDict] = {} + self.rels: list[_RelTuple] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[cs.NODE_UNIQUE_CONSTRAINTS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append( + ( + str(from_spec[0]), + from_spec[2], + str(rel_type), + str(to_spec[0]), + to_spec[2], + ) + ) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _build(tmp_path: Path, src: str = MODULE_SRC) -> _Capture: + (tmp_path / "m.py").write_text(src) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return cap + + +def _defines_sources(cap: _Capture, target_suffix: str) -> list[tuple[str, str]]: + return [ + (from_label, str(from_val)) + for (from_label, from_val, rel_type, _tl, target) in cap.rels + if rel_type == cs.RelationshipType.DEFINES + and str(target).endswith(target_suffix) + ] + + +class TestNestedFunctionDefines: + def test_function_in_method_defined_by_method(self, tmp_path: Path) -> None: + cap = _build(tmp_path) + sources = _defines_sources(cap, ".find_x.dfs") + assert len(sources) == 1, sources + label, qn = sources[0] + assert label == cs.NodeLabel.METHOD, sources + assert qn.endswith(".C.find_x"), sources + + def test_function_in_function_defined_by_function(self, tmp_path: Path) -> None: + cap = _build(tmp_path) + sources = _defines_sources(cap, ".outer.inner") + assert len(sources) == 1, sources + label, qn = sources[0] + assert label == cs.NodeLabel.FUNCTION, sources + assert qn.endswith(".outer"), sources + + +CLASS_IN_METHOD_SRC = """class Holder: + def make(self) -> object: + class Local: + pass + + return Local() +""" + + +class TestNestedClassDefines: + def test_class_in_method_defined_by_method(self, tmp_path: Path) -> None: + cap = _build(tmp_path, CLASS_IN_METHOD_SRC) + sources = _defines_sources(cap, ".make.Local") + assert len(sources) == 1, sources + label, qn = sources[0] + assert label == cs.NodeLabel.METHOD, sources + assert qn.endswith(".Holder.make"), sources diff --git a/codebase_rag/tests/test_nested_method_call_qn.py b/codebase_rag/tests/test_nested_method_call_qn.py new file mode 100644 index 000000000..19326a9ab --- /dev/null +++ b/codebase_rag/tests/test_nested_method_call_qn.py @@ -0,0 +1,39 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make_repo(root: Path) -> None: + pkg = root / "proj" + pkg.mkdir(parents=True) + (pkg / "__init__.py").write_text("", encoding="utf-8") + (pkg / "m.py").write_text( + "def target():\n return 1\n\n\n" + "class C:\n" + " def method(self):\n" + " def nested():\n" + " return target()\n" + " return nested()\n", + encoding="utf-8", + ) + + +def test_method_nested_function_call_uses_full_caller_qn(tmp_path: Path) -> None: + # (H) A call inside a function nested in a method must be attributed to that + # (H) nested function's real node qn (Class.method.nested), not to a + # (H) method-dropping qn (Class.nested) that matches no node. + _make_repo(tmp_path) + ingestor = _capture(tmp_path / "proj", "proj") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + node_qns = {str(uid) for (_label, uid) in ingestor.nodes} + + # (H) the nested function node exists with its full qn + assert "proj.m.C.method.nested" in node_qns + # (H) and its outbound call is attributed to that full qn + assert ("proj.m.C.method.nested", "proj.m.target") in calls + # (H) never to the malformed method-dropping qn + assert ("proj.m.C.nested", "proj.m.target") not in calls diff --git a/codebase_rag/tests/test_node_relationship_coverage.py b/codebase_rag/tests/test_node_relationship_coverage.py index e6af5fd05..00389af7a 100644 --- a/codebase_rag/tests/test_node_relationship_coverage.py +++ b/codebase_rag/tests/test_node_relationship_coverage.py @@ -136,18 +136,15 @@ def test_each_relationship_type_can_be_flushed( ingestor.conn = mock_conn - ingestor.relationship_buffer.append( - ( - (NodeLabel.MODULE.value, KEY_QUALIFIED_NAME, "module.test"), - rel_type.value, - (NodeLabel.FUNCTION.value, KEY_QUALIFIED_NAME, "module.test.func"), - None, - ) + ingestor.ensure_relationship_batch( + (NodeLabel.MODULE.value, KEY_QUALIFIED_NAME, "module.test"), + rel_type.value, + (NodeLabel.FUNCTION.value, KEY_QUALIFIED_NAME, "module.test.func"), ) ingestor.flush_relationships() mock_cursor.execute.assert_called_once() - assert ingestor.relationship_buffer == [] + assert ingestor._rel_count == 0 class TestUniqueKeyPropertyNames: @@ -230,10 +227,13 @@ def test_ensure_constraints_creates_all_constraints(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) executed_queries: list[str] = [] - def capture_query(query: str) -> None: + def capture_query(query: str, params: object = None) -> list[object]: executed_queries.append(query) + return [] - with patch.object(ingestor, "_execute_query", side_effect=capture_query): + with patch.object( + MemgraphIngestor, "_execute_query", side_effect=capture_query + ): ingestor.ensure_constraints() for label in NodeLabel: @@ -249,10 +249,13 @@ def test_ensure_constraints_creates_all_indexes(self) -> None: ingestor = MemgraphIngestor(host="localhost", port=7687) executed_queries: list[str] = [] - def capture_query(query: str) -> None: + def capture_query(query: str, params: object = None) -> list[object]: executed_queries.append(query) + return [] - with patch.object(ingestor, "_execute_query", side_effect=capture_query): + with patch.object( + MemgraphIngestor, "_execute_query", side_effect=capture_query + ): ingestor.ensure_constraints() for label in NodeLabel: diff --git a/codebase_rag/tests/test_operator_dispatch_resolution.py b/codebase_rag/tests/test_operator_dispatch_resolution.py new file mode 100644 index 000000000..6f4262552 --- /dev/null +++ b/codebase_rag/tests/test_operator_dispatch_resolution.py @@ -0,0 +1,126 @@ +# (H) L3 finding from the evals/ harness: Python operator syntax dispatches to dunder +# (H) methods at runtime: `k in reg` -> reg.__contains__, `reg[k]` -> reg.__getitem__, +# (H) `reg[k] = v` -> reg.__setitem__, `len(reg)` -> reg.__len__. cgr only extracts +# (H) call expressions, so these first-party method calls were never captured. They are +# (H) emitted only when the operand's type resolves to a first-party class that defines +# (H) the dunder, so builtin containers (dict/list) produce no spurious edges. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/registry.py": ( + "class Registry:\n" + " def __contains__(self, key):\n return True\n\n" + " def __getitem__(self, key):\n return 1\n\n" + " def __setitem__(self, key, value):\n return None\n\n" + " def __len__(self):\n return 0\n" + ), + "pkg/user.py": ( + "from .registry import Registry\n\n\n" + "class User:\n" + " def __init__(self, reg: Registry) -> None:\n" + " self._reg = reg\n\n" + " def use(self, key):\n" + " if key in self._reg:\n" + " value = self._reg[key]\n" + " self._reg[key] = 1\n" + " return len(self._reg)\n\n" + " def builtin(self):\n" + " data = {}\n" + " data['x'] = 1\n" + " return data['x']\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestOperatorDispatchResolution: + def test_contains_operator_dispatches_to_dunder(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.registry.Registry.__contains__", + ) in calls, calls + + def test_subscript_read_dispatches_to_getitem(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.registry.Registry.__getitem__", + ) in calls, calls + + def test_subscript_write_dispatches_to_setitem(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.registry.Registry.__setitem__", + ) in calls, calls + + def test_len_dispatches_to_dunder(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.registry.Registry.__len__", + ) in calls, calls + + def test_builtin_container_produces_no_dunder_edge(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + dunder_targets = { + to for (frm, to) in calls if frm == "proj.pkg.user.User.builtin" + } + assert dunder_targets == set(), dunder_targets diff --git a/codebase_rag/tests/test_oracle_nested_defs.py b/codebase_rag/tests/test_oracle_nested_defs.py new file mode 100644 index 000000000..e770dd1a6 --- /dev/null +++ b/codebase_rag/tests/test_oracle_nested_defs.py @@ -0,0 +1,46 @@ +# (H) Covers the L1 ast oracle (evals/ast_oracle.py): functions defined inside an +# (H) except handler or a match/case block must be captured. cgr captures these +# (H) function-local defs, so an oracle that skips them produces spurious Function +# (H) false positives (e.g. thrift's sslcompat.py `def match` inside `except`). +from __future__ import annotations + +from pathlib import Path + +from evals.ast_oracle import extract_oracle_graph + +SRC = """\ +def with_except(): + try: + import something + except ImportError: + def fallback_in_except(): + return 1 + return fallback_in_except + + +def with_match(value): + match value: + case 1: + def handler_in_case(): + return 2 + return handler_in_case + case _: + return None +""" + + +def _function_names(target: Path) -> set[str]: + graph = extract_oracle_graph(target, "proj") + return {node.name for node in graph.nodes.values() if node.key.kind == "Function"} + + +def test_oracle_captures_function_in_except_handler(tmp_path: Path) -> None: + (tmp_path / "mod.py").write_text(SRC, encoding="utf-8") + names = _function_names(tmp_path) + assert "fallback_in_except" in names, names + + +def test_oracle_captures_function_in_match_case(tmp_path: Path) -> None: + (tmp_path / "mod.py").write_text(SRC, encoding="utf-8") + names = _function_names(tmp_path) + assert "handler_in_case" in names, names diff --git a/codebase_rag/tests/test_permission_mode.py b/codebase_rag/tests/test_permission_mode.py new file mode 100644 index 000000000..f660b4a51 --- /dev/null +++ b/codebase_rag/tests/test_permission_mode.py @@ -0,0 +1,20 @@ +from codebase_rag.constants import PermissionMode +from codebase_rag.models import SessionState + + +class TestSessionPermissionMode: + def test_default_mode_is_normal(self) -> None: + state = SessionState() + assert state.permission_mode == PermissionMode.NORMAL + assert state.is_yolo() is False + + def test_cycle_toggles_to_yolo(self) -> None: + state = SessionState() + assert state.cycle_permission_mode() == PermissionMode.YOLO + assert state.is_yolo() is True + + def test_cycle_toggles_back_to_normal(self) -> None: + state = SessionState() + state.cycle_permission_mode() + assert state.cycle_permission_mode() == PermissionMode.NORMAL + assert state.is_yolo() is False diff --git a/codebase_rag/tests/test_php_containment_oracle.py b/codebase_rag/tests/test_php_containment_oracle.py new file mode 100644 index 000000000..08a38bf08 --- /dev/null +++ b/codebase_rag/tests/test_php_containment_oracle.py @@ -0,0 +1,66 @@ +# (H) Covers PHP containment-edge validation: cgr's DEFINES (file module -> +# (H) every named type and top-level function) and DEFINES_METHOD (class/ +# (H) interface/trait/enum -> method) edges are graded against the independent +# (H) php-parser oracle, joined on (kind, file, line). Exercises an interface, +# (H) a trait, an enum with a method, a class, and a free function. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_php_graph +from evals.oracles import php_oracle_available, run_php_oracle +from evals.score import score_edge_types + +PHP_SRC = """\ + None: + if not php_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.PHP not in load_parsers()[0]: + pytest.skip("php parser not available") + + +def test_cgr_matches_php_parser_oracle_on_containment_edges(tmp_path: Path) -> None: + _require_php() + project = tmp_path / "php_edge" + project.mkdir() + (project / "lib.php").write_text(PHP_SRC, encoding="utf-8") + + cgr = extract_cgr_php_graph(project, project.name) + oracle = run_php_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_php_function_call.py b/codebase_rag/tests/test_php_function_call.py new file mode 100644 index 000000000..d47ff2f1c --- /dev/null +++ b/codebase_rag/tests/test_php_function_call.py @@ -0,0 +1,29 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.php").write_text( + " None: + # (H) A bare PHP function call (`helper()`) is a function_call_expression whose + # (H) callee is a `name` node under the `function` field. _get_call_target_name + # (H) did not handle the `name` type, so no callee name was extracted and the + # (H) CALLS edge was dropped -- only method/static calls (which expose a `name` + # (H) field directly) resolved. + _make(tmp_path) + ingestor = _capture(tmp_path, "p") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + assert ("p.use.useIt", "p.util.helper") in calls diff --git a/codebase_rag/tests/test_php_functions.py b/codebase_rag/tests/test_php_functions.py new file mode 100644 index 000000000..992d5c900 --- /dev/null +++ b/codebase_rag/tests/test_php_functions.py @@ -0,0 +1,153 @@ +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.tests.conftest import get_relationships +from codebase_rag.types_defs import NodeType + + +def test_php_function_discovery(temp_repo: Path, mock_ingestor: MagicMock) -> None: + project_path = temp_repo / "php_functions_test" + project_path.mkdir() + + (project_path / "example.php").write_text( + encoding="utf-8", + data="""value = 0; + } + + public function getValue() { + return $this->value; + } +} + +interface MyInterface { + public function doSomething(); +} + +enum Status { + case Active; + case Inactive; +} + +function standaloneFunction() { + $obj = new MyPhpClass(); + return $obj->getValue(); +} +""", + ) + + parsers, queries = load_parsers() + assert "php" in parsers, "PHP parser should be available" + + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=project_path, + parsers=parsers, + queries=queries, + ) + updater.run() + + created_functions = [ + c + for c in mock_ingestor.ensure_node_batch.call_args_list + if c[0][0] == NodeType.FUNCTION + ] + fn_qns = {c[0][1]["qualified_name"] for c in created_functions} + + assert any(qn.endswith(".standaloneFunction") for qn in fn_qns), fn_qns + + call_rels = get_relationships(mock_ingestor, "CALLS") + assert len(call_rels) >= 1 + + +def test_php_class_discovery(temp_repo: Path, mock_ingestor: MagicMock) -> None: + project_path = temp_repo / "php_class_test" + project_path.mkdir() + + (project_path / "models.php").write_text( + encoding="utf-8", + data=""" None: + project_path = temp_repo / "php_calls_test" + project_path.mkdir() + + (project_path / "service.php").write_text( + encoding="utf-8", + data="""add(1, 2); + } +} + +function main() { + $calc = new Calculator(); + $calc->calculate(); +} +""", + ) + + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=project_path, + parsers=parsers, + queries=queries, + ) + updater.run() + + call_rels = get_relationships(mock_ingestor, "CALLS") + assert len(call_rels) >= 2 diff --git a/codebase_rag/tests/test_php_imports.py b/codebase_rag/tests/test_php_imports.py new file mode 100644 index 000000000..9f8e2ef59 --- /dev/null +++ b/codebase_rag/tests/test_php_imports.py @@ -0,0 +1,93 @@ +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.tests.conftest import get_relationships + + +def test_php_use_statement_import(temp_repo: Path, mock_ingestor: MagicMock) -> None: + project_path = temp_repo / "php_imports_test" + project_path.mkdir() + + (project_path / "Controller.php").write_text( + encoding="utf-8", + data="""= 1 + + controller_module = f"{project_path.name}.Controller" + import_mapping = updater.factory.import_processor.import_mapping + if controller_module in import_mapping: + mapping = import_mapping[controller_module] + assert "ProductService" in mapping + assert mapping["ProductService"] == "App.Service.ProductService" + assert "Repo" in mapping + assert mapping["Repo"] == "App.Repository.ProductRepository" + + +def test_php_multiple_use_statements(temp_repo: Path, mock_ingestor: MagicMock) -> None: + project_path = temp_repo / "php_multi_imports" + project_path.mkdir() + + (project_path / "app.php").write_text( + encoding="utf-8", + data=""" set[tuple[str, str]]: + return { + (call[0][0][2], call[0][2][2]) for call in get_relationships(mock_ingestor, rel) + } + + +def test_php_inheritance_and_implements_edges( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "php_inh" + project.mkdir() + (project / "lib.php").write_text(_PHP, encoding="utf-8") + create_and_run_updater(project, mock_ingestor, skip_if_missing="php") + + inherits = _pairs(mock_ingestor, RelationshipType.INHERITS.value) + implements = _pairs(mock_ingestor, RelationshipType.IMPLEMENTS.value) + base = "php_inh.lib" + + # (H) class extends -> INHERITS. + assert (f"{base}.Circle", f"{base}.Base") in inherits, inherits + # (H) class implements -> IMPLEMENTS to each interface. + assert (f"{base}.Circle", f"{base}.Shape") in implements, implements + assert (f"{base}.Circle", f"{base}.Drawable") in implements, implements + # (H) interface extends -> INHERITS to each superinterface. + assert (f"{base}.Big", f"{base}.Shape") in inherits, inherits + assert (f"{base}.Big", f"{base}.Drawable") in inherits, inherits diff --git a/codebase_rag/tests/test_php_inheritance_oracle.py b/codebase_rag/tests/test_php_inheritance_oracle.py new file mode 100644 index 000000000..a27c33a20 --- /dev/null +++ b/codebase_rag/tests/test_php_inheritance_oracle.py @@ -0,0 +1,58 @@ +# (H) Covers PHP inheritance-edge validation: cgr's INHERITS (class/interface +# (H) extends) and IMPLEMENTS (class implements) edges are graded against the +# (H) php-parser oracle, by (source node, base SIMPLE NAME). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_php_graph +from evals.oracles import php_oracle_available, run_php_oracle +from evals.score import score_name_edge_types + +PHP_SRC = """\ + None: + if not php_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.PHP not in load_parsers()[0]: + pytest.skip("php parser not available") + + +def test_cgr_matches_php_parser_oracle_on_inheritance_edges(tmp_path: Path) -> None: + _require_php() + project = tmp_path / "php_inh_edge" + project.mkdir() + (project / "lib.php").write_text(PHP_SRC, encoding="utf-8") + + cgr = extract_cgr_php_graph(project, project.name) + oracle = run_php_oracle(project) + + result = score_name_edge_types(cgr, oracle, ec.INHERITANCE_NAME_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.INHERITS.value, + cs.RelationshipType.IMPLEMENTS.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_php_retrieval_eval.py b/codebase_rag/tests/test_php_retrieval_eval.py new file mode 100644 index 000000000..570618199 --- /dev/null +++ b/codebase_rag/tests/test_php_retrieval_eval.py @@ -0,0 +1,89 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.oracles import php_oracle_available +from evals.php_retrieval import ( + cgr_php_call_edges, + oracle_php_call_edges, + score_php_retrieval, +) + +needs_node = pytest.mark.skipif( + not php_oracle_available(), reason="node toolchain not installed" +) + + +def _make_project(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.php").write_text( + "helper(); }\n" + " public static function make(): int { return 3; }\n" + " public function orphan(): int { return 9; }\n" + "}\n", + encoding="utf-8", + ) + (root / "use.php").write_text( + "caller();\n" + "}\n", + encoding="utf-8", + ) + + +@needs_node +def test_oracle_captures_first_party_php_calls(tmp_path: Path) -> None: + _make_project(tmp_path) + edges, declared = oracle_php_call_edges(tmp_path) + + # (H) $this->helper(), free(), T::make(), $t->caller() are first-party calls. + assert ("T.php", "helper") in edges + assert ("use.php", "free") in edges + assert ("use.php", "make") in edges + assert ("use.php", "caller") in edges + # (H) orphan is declared but never called -> never a call edge. + assert ("T.php", "orphan") not in edges + assert {"helper", "caller", "make", "free", "orphan", "useIt"} <= declared + + +@needs_node +def test_cgr_matches_oracle_on_clean_php_project(tmp_path: Path) -> None: + _make_project(tmp_path) + oracle, declared = oracle_php_call_edges(tmp_path) + cgr = cgr_php_call_edges(tmp_path, tmp_path.name, declared) + assert cgr == oracle + + +@needs_node +def test_php_dynamic_member_call_not_emitted(tmp_path: Path) -> None: + # (H) A dynamic member call (`$this->$method()`) has a `variable` offset whose + # (H) name is the variable identifier ("method"), not a static method name. The + # (H) oracle must not emit it as a call edge even when it collides with a + # (H) declared first-party method name, or it becomes a false ground-truth edge. + tmp_path.mkdir(parents=True, exist_ok=True) + (tmp_path / "c.php").write_text( + "$method(); }\n" + "}\n", + encoding="utf-8", + ) + edges, declared = oracle_php_call_edges(tmp_path) + assert "method" in declared + assert ("c.php", "method") not in edges + + +def test_score_php_retrieval_prf() -> None: + result = score_php_retrieval( + {("a.php", "f"), ("a.php", "g")}, {("a.php", "f"), ("b.php", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.PHP_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_php_span_oracle.py b/codebase_rag/tests/test_php_span_oracle.py new file mode 100644 index 000000000..60b003ab8 --- /dev/null +++ b/codebase_rag/tests/test_php_span_oracle.py @@ -0,0 +1,74 @@ +# (H) Covers PHP node SPAN (end_line) validation: cgr's end_line for each node is +# (H) graded against the php-parser oracle (which emits node.loc.end.line), joined +# (H) on (kind, file, start). Exercises a class with a multi-line method, an +# (H) interface, an enum, and a multi-line function so spans are not single line. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_php_graph +from evals.oracles import php_oracle_available, run_php_oracle +from evals.score import score_span + +PHP_SRC = """\ +size * $scale; + } +} + +interface Shape +{ + public function area(int $scale): int; +} + +enum Color +{ + case Red; + case Green; +} + +function standalone(int $a): int +{ + return $a + 1; +} +""" + + +def _require_php() -> None: + if not php_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.PHP not in load_parsers()[0]: + pytest.skip("php parser not available") + + +def test_cgr_matches_php_parser_oracle_on_node_spans(tmp_path: Path) -> None: + _require_php() + project = tmp_path / "php_span_test" + project.mkdir() + (project / "lib.php").write_text(PHP_SRC, encoding="utf-8") + + cgr = extract_cgr_php_graph(project, project.name) + oracle = run_php_oracle(project) + + result = score_span(cgr, oracle, ec.PHP_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 4, aggregate diff --git a/codebase_rag/tests/test_php_structure_oracle.py b/codebase_rag/tests/test_php_structure_oracle.py new file mode 100644 index 000000000..577eb14ee --- /dev/null +++ b/codebase_rag/tests/test_php_structure_oracle.py @@ -0,0 +1,71 @@ +# (H) Covers the PHP structure oracle harness (evals/oracles/php_oracle + +# (H) evals/php_l1.py): the php-parser oracle is authoritative ground truth, and +# (H) cgr's captured PHP nodes are graded against it on (kind, file, start_line). +# (H) Includes an attributed class (whose span starts at the attribute) and an +# (H) anonymous class (whose methods cgr models as Functions). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_php_nodes +from evals.oracles import php_oracle_available, run_php_oracle +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +PHP_SRC = """\ + None: + if not php_oracle_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.PHP not in load_parsers()[0]: + pytest.skip("php parser not available") + + +def test_cgr_matches_php_parser_oracle_on_php_structure(tmp_path: Path) -> None: + _require_php() + project = tmp_path / "php_oracle_test" + project.mkdir() + (project / "sample.php").write_text(PHP_SRC, encoding="utf-8") + + cgr = GraphData( + nodes=extract_cgr_php_nodes(project, project.name), + edges=set(), + name_edges=set(), + ) + oracle = run_php_oracle(project) + + result = score_node_kinds(cgr, oracle, ec.PHP_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + for label in ("Class", "Interface", "Enum", "Method", "Function"): + row = by_label.get(label) + assert row is not None, (label, by_label) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (label, row) diff --git a/codebase_rag/tests/test_project_name_flag.py b/codebase_rag/tests/test_project_name_flag.py new file mode 100644 index 000000000..214aa710c --- /dev/null +++ b/codebase_rag/tests/test_project_name_flag.py @@ -0,0 +1,348 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.tests.conftest import get_node_names + + +@pytest.fixture(scope="module") +def parsers_and_queries() -> tuple[dict, dict]: + return load_parsers() + + +def _make_updater( + repo_path: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + project_name: str | None = None, +) -> GraphUpdater: + parsers, queries = parsers_and_queries + return GraphUpdater( + ingestor=mock_ingestor, + repo_path=repo_path, + parsers=parsers, + queries=queries, + project_name=project_name, + ) + + +def _write_python_file(repo_path: Path, rel_path: str, content: str) -> None: + full = repo_path / rel_path + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(content) + + +class TestDefaultProjectName: + def test_default_uses_directory_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater(temp_repo, mock_ingestor, parsers_and_queries) + assert updater.project_name == temp_repo.resolve().name + + def test_default_none_uses_directory_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name=None + ) + assert updater.project_name == temp_repo.resolve().name + + def test_default_empty_string_uses_directory_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="" + ) + assert updater.project_name == temp_repo.resolve().name + + def test_default_whitespace_only_uses_directory_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name=" " + ) + assert updater.project_name == temp_repo.resolve().name + + +class TestExplicitProjectName: + def test_override_simple( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="MyProject" + ) + assert updater.project_name == "MyProject" + + def test_override_with_hyphens( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, + mock_ingestor, + parsers_and_queries, + project_name="my-cool-project", + ) + assert updater.project_name == "my-cool-project" + + def test_override_with_dots( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, + mock_ingestor, + parsers_and_queries, + project_name="com.example.app", + ) + assert updater.project_name == "com.example.app" + + +class TestEdgeCases: + def test_generic_dir_name_src( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + src_dir = temp_repo / "src" + src_dir.mkdir() + updater = _make_updater( + src_dir, mock_ingestor, parsers_and_queries, project_name="BlazingRenderer" + ) + assert updater.project_name == "BlazingRenderer" + updater_default = _make_updater(src_dir, mock_ingestor, parsers_and_queries) + assert updater_default.project_name == "src" + + def test_generic_dir_name_main( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + main_dir = temp_repo / "main" + main_dir.mkdir() + updater = _make_updater( + main_dir, + mock_ingestor, + parsers_and_queries, + project_name="ActualProjectName", + ) + assert updater.project_name == "ActualProjectName" + + def test_version_named_directory( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + ver_dir = temp_repo / "v1.3.2" + ver_dir.mkdir() + updater = _make_updater( + ver_dir, mock_ingestor, parsers_and_queries, project_name="my-library" + ) + assert updater.project_name == "my-library" + updater_default = _make_updater(ver_dir, mock_ingestor, parsers_and_queries) + assert updater_default.project_name == "v1.3.2" + + def test_nested_same_name_parent( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + nested = temp_repo / "BRender" / "BlazingRenderer" + nested.mkdir(parents=True) + updater = _make_updater( + nested, mock_ingestor, parsers_and_queries, project_name="BlazingRenderer" + ) + assert updater.project_name == "BlazingRenderer" + + +class TestFactoryPropagation: + def test_factory_receives_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomName" + ) + assert updater.factory.project_name == "CustomName" + + def test_factory_default_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater(temp_repo, mock_ingestor, parsers_and_queries) + assert updater.factory.project_name == temp_repo.resolve().name + + def test_structure_processor_receives_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomName" + ) + assert updater.factory.structure_processor.project_name == "CustomName" + + def test_import_processor_receives_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomName" + ) + assert updater.factory.import_processor.project_name == "CustomName" + + def test_definition_processor_receives_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomName" + ) + assert updater.factory.definition_processor.project_name == "CustomName" + + def test_call_processor_receives_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomName" + ) + assert updater.factory.call_processor.project_name == "CustomName" + + def test_type_inference_receives_project_name( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomName" + ) + assert updater.factory.type_inference.project_name == "CustomName" + + +class TestQualifiedNameIntegration: + def test_module_qualified_names_use_override( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + _write_python_file(temp_repo, "hello.py", "def greet():\n pass\n") + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="MyApp" + ) + updater.run(force=True) + module_names = get_node_names(mock_ingestor, "Module") + assert "MyApp.hello" in module_names + + def test_function_qualified_names_use_override( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + _write_python_file(temp_repo, "utils.py", "def helper():\n return 42\n") + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="MyApp" + ) + updater.run(force=True) + func_names = get_node_names(mock_ingestor, "Function") + assert "MyApp.utils.helper" in func_names + + def test_class_qualified_names_use_override( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + _write_python_file(temp_repo, "models.py", "class User:\n pass\n") + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="MyApp" + ) + updater.run(force=True) + class_names = get_node_names(mock_ingestor, "Class") + assert "MyApp.models.User" in class_names + + def test_default_qualified_names_use_directory( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + _write_python_file(temp_repo, "foo.py", "def bar():\n pass\n") + updater = _make_updater(temp_repo, mock_ingestor, parsers_and_queries) + updater.run(force=True) + dir_name = temp_repo.resolve().name + func_names = get_node_names(mock_ingestor, "Function") + assert f"{dir_name}.foo.bar" in func_names + + def test_package_qualified_names_use_override( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + _write_python_file(temp_repo, "pkg/__init__.py", "") + _write_python_file(temp_repo, "pkg/core.py", "def run():\n pass\n") + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="CustomProj" + ) + updater.run(force=True) + func_names = get_node_names(mock_ingestor, "Function") + assert "CustomProj.pkg.core.run" in func_names + + def test_override_vs_default_different_names( + self, + temp_repo: Path, + mock_ingestor: MagicMock, + parsers_and_queries: tuple[dict, dict], + ) -> None: + _write_python_file(temp_repo, "app.py", "def main():\n pass\n") + dir_name = temp_repo.resolve().name + updater = _make_updater( + temp_repo, mock_ingestor, parsers_and_queries, project_name="OverrideName" + ) + updater.run(force=True) + func_names = get_node_names(mock_ingestor, "Function") + assert "OverrideName.app.main" in func_names + assert f"{dir_name}.app.main" not in func_names diff --git a/codebase_rag/tests/test_project_naming.py b/codebase_rag/tests/test_project_naming.py new file mode 100644 index 000000000..29470944a --- /dev/null +++ b/codebase_rag/tests/test_project_naming.py @@ -0,0 +1,74 @@ +from pathlib import Path + +import pytest + +from codebase_rag.utils.path_utils import derive_project_name, resolve_repo_path + + +def test_derive_project_name_is_stable(tmp_path: Path) -> None: + repo = tmp_path / "myrepo" + repo.mkdir() + first = derive_project_name(repo) + second = derive_project_name(repo) + assert first == second + + +def test_derive_project_name_includes_basename(tmp_path: Path) -> None: + repo = tmp_path / "myrepo" + repo.mkdir() + name = derive_project_name(repo) + assert name.startswith("myrepo__") + assert len(name.split("__")[1]) == 8 + + +def test_derive_project_name_disambiguates_same_basename(tmp_path: Path) -> None: + repo_a = tmp_path / "a" / "frontend" + repo_b = tmp_path / "b" / "frontend" + repo_a.mkdir(parents=True) + repo_b.mkdir(parents=True) + assert derive_project_name(repo_a) != derive_project_name(repo_b) + assert derive_project_name(repo_a).startswith("frontend__") + assert derive_project_name(repo_b).startswith("frontend__") + + +def test_derive_project_name_slugifies_special_chars(tmp_path: Path) -> None: + weird = tmp_path / "my repo (v2)!" + weird.mkdir() + name = derive_project_name(weird) + base = name.split("__")[0] + assert all(c.isalnum() or c in "_-" for c in base) + + +def test_derive_project_name_fallback_for_root() -> None: + name = derive_project_name(Path("/")) + assert name.startswith("repo__") + + +def test_resolve_repo_path_explicit_wins(tmp_path: Path) -> None: + repo = tmp_path / "explicit" + repo.mkdir() + resolved = resolve_repo_path(str(repo), "/some/other/path") + assert resolved == repo.resolve() + + +def test_resolve_repo_path_uses_target_default(tmp_path: Path) -> None: + repo = tmp_path / "target" + repo.mkdir() + resolved = resolve_repo_path(None, str(repo)) + assert resolved == repo.resolve() + + +def test_resolve_repo_path_dot_falls_back_to_cwd( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.chdir(tmp_path) + resolved = resolve_repo_path(None, ".") + assert resolved == tmp_path.resolve() + + +def test_resolve_repo_path_empty_falls_back_to_cwd( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.chdir(tmp_path) + resolved = resolve_repo_path(None, "") + assert resolved == tmp_path.resolve() diff --git a/codebase_rag/tests/test_property_getter_calls.py b/codebase_rag/tests/test_property_getter_calls.py new file mode 100644 index 000000000..9168177cf --- /dev/null +++ b/codebase_rag/tests/test_property_getter_calls.py @@ -0,0 +1,102 @@ +# (H) L3 finding from the evals/ harness: accessing an @property getter runs the +# (H) getter method at runtime, but cgr saw a plain attribute access and emitted no +# (H) CALLS edge. A property access must produce a CALLS edge to the getter method, +# (H) while a normal attribute / method reference must not. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """class Engine: + def __init__(self) -> None: + self._n = 0 + + @property + def status(self) -> str: + return self._compute() + + def _compute(self) -> str: + return "ok" + + def check(self) -> str: + return self.status + + +def use(e: Engine) -> str: + return e.status + + +def plain(e: Engine) -> str: + return e._compute() +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestPropertyGetterCalls: + def test_property_access_via_self_is_a_call(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.Engine.check", "proj.m.Engine.status") in calls, calls + + def test_property_access_via_typed_param_is_a_call(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.use", "proj.m.Engine.status") in calls, calls + + def test_property_access_only_emits_the_getter_edge(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + # (H) `use` only reads e.status; no spurious edge to the unrelated _compute. + from_use = {to for (frm, to) in calls if frm == "proj.m.use"} + assert from_use == {"proj.m.Engine.status"}, from_use + + def test_regular_method_call_is_unaffected(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + # (H) plain() calls a normal method, resolved by the existing call path. + assert ("proj.m.plain", "proj.m.Engine._compute") in calls, calls diff --git a/codebase_rag/tests/test_property_return_type_chain.py b/codebase_rag/tests/test_property_return_type_chain.py new file mode 100644 index 000000000..06f985764 --- /dev/null +++ b/codebase_rag/tests/test_property_return_type_chain.py @@ -0,0 +1,87 @@ +# (H) L3 finding from the evals/ harness: a method calls self.prop.method(), where +# (H) self.prop is an @property whose declared return type names the class owning +# (H) the real method. The property's return type must seed self.prop's type so the +# (H) chained call resolves to the correct class instead of an ambiguous same-class +# (H) method of the same name. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """class Worker: + def build(self) -> str: + return "real" + + +class Engine: + @property + def inner(self) -> Worker: + return Worker() + + def build(self) -> str: + return self.inner.build() +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestPropertyReturnTypeChain: + def test_chained_call_through_property_resolves_to_return_type_class( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.Engine.build", "proj.m.Worker.build") in calls, calls + + def test_does_not_resolve_to_same_class_method_of_same_name( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.Engine.build", "proj.m.Engine.build") not in calls, calls diff --git a/codebase_rag/tests/test_protobuf_service.py b/codebase_rag/tests/test_protobuf_service.py index 7bb2c0de0..2b8da8a08 100644 --- a/codebase_rag/tests/test_protobuf_service.py +++ b/codebase_rag/tests/test_protobuf_service.py @@ -169,3 +169,130 @@ def test_protobuf_ingestor_split_index_serialization_and_deserialization( assert rel.target_id == "test_project.UserService.get_user" assert rel.source_label == NodeType.CLASS assert rel.target_label == NodeType.METHOD + + +def test_ensure_node_batch_no_message_class_logs_warning(tmp_path: Path) -> None: + from codebase_rag.services.protobuf_service import _MSG_CLASS_CACHE + + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from codebase_rag import constants as cs + + _MSG_CLASS_CACHE[cs.NodeLabel.UNION] = None + + ingestor.ensure_node_batch(cs.NodeLabel.UNION, {"qualified_name": "foo.bar"}) + + assert "foo.bar" not in ingestor._nodes + _MSG_CLASS_CACHE.pop(cs.NodeLabel.UNION, None) + + +def test_ensure_node_batch_no_oneof_mapping_logs_warning(tmp_path: Path) -> None: + from codebase_rag.services.protobuf_service import LABEL_TO_ONEOF_FIELD + + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from codebase_rag import constants as cs + + ingestor.ensure_node_batch( + cs.NodeLabel.PROJECT, {"name": "test_proj", "qualified_name": "test_proj"} + ) + assert "test_proj" in ingestor._nodes + + +def test_ensure_relationship_batch_dedup(tmp_path: Path) -> None: + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from_spec = ("Class", "qualified_name", "proj.MyClass") + to_spec = ("Method", "qualified_name", "proj.MyClass.method") + rel_type = "DEFINES_METHOD" + + ingestor.ensure_relationship_batch(from_spec, rel_type, to_spec) + ingestor.ensure_relationship_batch(from_spec, rel_type, to_spec) + + assert len(ingestor._relationships) == 1 + + +def test_ensure_relationship_batch_dedup_with_properties_merge(tmp_path: Path) -> None: + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from_spec = ("Class", "qualified_name", "proj.MyClass") + to_spec = ("Method", "qualified_name", "proj.MyClass.method") + rel_type = "DEFINES_METHOD" + + ingestor.ensure_relationship_batch(from_spec, rel_type, to_spec) + ingestor.ensure_relationship_batch(from_spec, rel_type, to_spec, {"extra": "val"}) + + assert len(ingestor._relationships) == 1 + + +def test_ensure_relationship_batch_invalid_empty_source(tmp_path: Path) -> None: + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from_spec = ("Class", "qualified_name", "") + to_spec = ("Method", "qualified_name", "proj.MyClass.method") + rel_type = "DEFINES_METHOD" + + ingestor.ensure_relationship_batch(from_spec, rel_type, to_spec) + + assert len(ingestor._relationships) == 0 + + +def test_ensure_relationship_batch_invalid_empty_target(tmp_path: Path) -> None: + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from_spec = ("Class", "qualified_name", "proj.MyClass") + to_spec = ("Method", "qualified_name", " ") + rel_type = "DEFINES_METHOD" + + ingestor.ensure_relationship_batch(from_spec, rel_type, to_spec) + + assert len(ingestor._relationships) == 0 + + +def test_ensure_relationship_batch_unknown_rel_type(tmp_path: Path) -> None: + from codebase_rag.services.protobuf_service import _REL_TYPE_CACHE + + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + fake_rel_type = "COMPLETELY_FAKE_REL_TYPE_XYZ" + _REL_TYPE_CACHE.pop(fake_rel_type, None) + + from_spec = ("Class", "qualified_name", "proj.A") + to_spec = ("Method", "qualified_name", "proj.A.b") + + ingestor.ensure_relationship_batch(from_spec, fake_rel_type, to_spec) + + assert len(ingestor._relationships) == 1 + key = next(iter(ingestor._relationships)) + rel_obj = ingestor._relationships[key] + assert ( + rel_obj.type + == pb.Relationship.RelationshipType.RELATIONSHIP_TYPE_UNSPECIFIED + ) + + +def test_ensure_relationship_batch_none_values(tmp_path: Path) -> None: + output_dir = tmp_path / "out" + output_dir.mkdir() + ingestor = ProtobufFileIngestor(str(output_dir)) + + from_spec = ("Class", "qualified_name", None) + to_spec = ("Method", "qualified_name", "proj.A.b") + + ingestor.ensure_relationship_batch(from_spec, "DEFINES_METHOD", to_spec) + + assert len(ingestor._relationships) == 0 diff --git a/codebase_rag/tests/test_protocol_dispatch_resolution.py b/codebase_rag/tests/test_protocol_dispatch_resolution.py new file mode 100644 index 000000000..410eaf83b --- /dev/null +++ b/codebase_rag/tests/test_protocol_dispatch_resolution.py @@ -0,0 +1,123 @@ +# (H) L3 finding from the evals/ harness: DefinitionProcessor._extract_decorators calls +# (H) self._handler.extract_decorators(node), where _handler is annotated as the Protocol +# (H) LanguageHandler (class-level annotation) and assigned dynamically via +# (H) get_handler(language). The runtime type is one of several conformers, so the sound +# (H) call graph emits an edge to extract_decorators on every conformer (capturing the +# (H) traced PythonHandler edge) and never to the Protocol stub, which never runs. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/proto.py": ( + "from typing import Protocol\n\n\n" + "class HandlerLike(Protocol):\n" + " def extract(self, node): ...\n" + ), + "pkg/base.py": ( + "class BaseHandler:\n def extract(self, node):\n return []\n" + ), + "pkg/python_h.py": ( + "from .base import BaseHandler\n\n\n" + "class PyHandler(BaseHandler):\n" + " def extract(self, node):\n" + " return ['py']\n" + ), + "pkg/js_h.py": ( + "from .base import BaseHandler\n\n\n" + "class JsHandler(BaseHandler):\n" + " def extract(self, node):\n" + " return ['js']\n" + ), + "pkg/proc.py": ( + "from .proto import HandlerLike\n\n\n" + "class Proc:\n" + " _handler: HandlerLike\n\n" + " def __init__(self, handler) -> None:\n" + " self._handler = handler\n\n" + " def go(self, node):\n" + " return self._handler.extract(node)\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestProtocolDispatchResolution: + def test_dispatches_to_concrete_conformer(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.proc.Proc.go", + "proj.pkg.python_h.PyHandler.extract", + ) in calls, calls + + def test_dispatches_to_all_conformers(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.proc.Proc.go", + "proj.pkg.js_h.JsHandler.extract", + ) in calls, calls + assert ( + "proj.pkg.proc.Proc.go", + "proj.pkg.base.BaseHandler.extract", + ) in calls, calls + + def test_does_not_emit_protocol_stub_edge(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.proc.Proc.go", + "proj.pkg.proto.HandlerLike.extract", + ) not in calls, calls diff --git a/codebase_rag/tests/test_protocol_impl_resolution.py b/codebase_rag/tests/test_protocol_impl_resolution.py new file mode 100644 index 000000000..a0c8036f9 --- /dev/null +++ b/codebase_rag/tests/test_protocol_impl_resolution.py @@ -0,0 +1,100 @@ +# (H) L3 finding from the evals/ harness: a call on a parameter typed as a +# (H) Protocol (function_registry.get() where function_registry is a +# (H) FunctionRegistryTrieProtocol) is traced to the concrete implementer +# (H) (FunctionRegistryTrie), not the Protocol stub. cgr infers the Protocol +# (H) type but stops at the stub; the XxxProtocol -> Xxx naming convention picks +# (H) the real implementer and disambiguates it from other structural conformers +# (H) such as a test mock. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +MODULE_SRC = """from typing import Protocol + + +class StoreProtocol(Protocol): + def fetch(self, key: str) -> int: ... + + +class Store: + def fetch(self, key: str) -> int: + return 1 + + +class MockStore: + def fetch(self, key: str) -> int: + return 2 + + +def use(store: StoreProtocol) -> int: + return store.fetch("x") +""" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "m.py").write_text(MODULE_SRC) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestProtocolImplResolution: + def test_protocol_typed_call_resolves_to_concrete_implementer( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.use", "proj.m.Store.fetch") in calls, calls + + def test_does_not_resolve_to_protocol_stub(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ("proj.m.use", "proj.m.StoreProtocol.fetch") not in calls, calls + + def test_naming_convention_disambiguates_from_other_conformer( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ("proj.m.use", "proj.m.MockStore.fetch") not in calls, calls diff --git a/codebase_rag/tests/test_protocol_operator_dispatch.py b/codebase_rag/tests/test_protocol_operator_dispatch.py new file mode 100644 index 000000000..45c469c13 --- /dev/null +++ b/codebase_rag/tests/test_protocol_operator_dispatch.py @@ -0,0 +1,125 @@ +# (H) L3 finding from the evals/ harness: an operator on a Protocol-typed attribute +# (H) (self.ast_cache[k], k in self.ast_cache) must dispatch to the dunder on the +# (H) concrete implementer even when the implementer's name does not follow the +# (H) XxxProtocol convention, and even when the dunder (e.g. __len__) is defined only on +# (H) the implementer and not declared on the Protocol stub. Structural conformance +# (H) (a class defining the Protocol's named methods) identifies the implementer. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/proto.py": ( + "from typing import Protocol\n\n\n" + "class Cache(Protocol):\n" + " def snapshot(self):\n ...\n\n" + " def __getitem__(self, key):\n ...\n\n" + " def __contains__(self, key):\n ...\n" + ), + # (H) MemCache does not match the Cache name convention and adds __len__, which the + # (H) Protocol does not declare. It conforms via the named method snapshot. + "pkg/impl.py": ( + "class MemCache:\n" + " def snapshot(self):\n return {}\n\n" + " def __getitem__(self, key):\n return 1\n\n" + " def __contains__(self, key):\n return True\n\n" + " def __len__(self):\n return 0\n" + ), + "pkg/user.py": ( + "from .proto import Cache\n\n\n" + "class User:\n" + " def __init__(self, cache: Cache) -> None:\n" + " self._cache = cache\n\n" + " def _touch(self):\n" + " return None\n\n" + " def use(self, key):\n" + " self._touch()\n" + " if key in self._cache:\n" + " return self._cache[key]\n" + " return len(self._cache)\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestProtocolOperatorDispatch: + def test_subscript_and_membership_reach_structural_conformer( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.impl.MemCache.__getitem__", + ) in calls, calls + assert ( + "proj.pkg.user.User.use", + "proj.pkg.impl.MemCache.__contains__", + ) in calls, calls + + def test_dunder_only_on_implementer_resolves(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.impl.MemCache.__len__", + ) in calls, calls + + def test_protocol_stub_not_emitted(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.use", + "proj.pkg.proto.Cache.__getitem__", + ) not in calls, calls diff --git a/codebase_rag/tests/test_provider_classes.py b/codebase_rag/tests/test_provider_classes.py index 1475914a0..da492ebd7 100644 --- a/codebase_rag/tests/test_provider_classes.py +++ b/codebase_rag/tests/test_provider_classes.py @@ -9,6 +9,8 @@ from codebase_rag.constants import GoogleProviderType, Provider from codebase_rag.providers.base import ( + AnthropicProvider, + AzureOpenAIProvider, GoogleProvider, ModelProvider, OllamaProvider, @@ -37,16 +39,42 @@ def test_get_valid_providers(self) -> None: assert isinstance(ollama_provider, OllamaProvider) assert ollama_provider.provider_name == Provider.OLLAMA + anthropic_provider = get_provider(Provider.ANTHROPIC, api_key="test-key") + assert isinstance(anthropic_provider, AnthropicProvider) + assert anthropic_provider.provider_name == Provider.ANTHROPIC + + azure_provider = get_provider( + Provider.AZURE, + api_key="test-key", + endpoint="https://myresource.openai.azure.com", + ) + assert isinstance(azure_provider, AzureOpenAIProvider) + assert azure_provider.provider_name == Provider.AZURE + def test_get_invalid_provider(self) -> None: with pytest.raises(ValueError, match="Unknown provider 'invalid_provider'"): get_provider("invalid_provider") + def test_get_litellm_provider(self) -> None: + litellm_provider = get_provider( + Provider.LITELLM_PROXY, + api_key="sk-test", + endpoint="http://localhost:4000/v1", + ) + from codebase_rag.providers.litellm import LiteLLMProvider + + assert isinstance(litellm_provider, LiteLLMProvider) + assert litellm_provider.provider_name == Provider.LITELLM_PROXY + def test_list_providers(self) -> None: providers = list_providers() assert Provider.GOOGLE in providers assert Provider.OPENAI in providers assert Provider.OLLAMA in providers - assert len(providers) >= 3 + assert Provider.ANTHROPIC in providers + assert Provider.AZURE in providers + assert Provider.LITELLM_PROXY in providers + assert len(providers) >= 6 def test_register_custom_provider(self) -> None: class CustomProvider(ModelProvider): @@ -190,6 +218,107 @@ def test_ollama_validation_connection_error(self, mock_client: Any) -> None: provider.validate_config() +class TestAnthropicProvider: + def test_anthropic_configuration(self) -> None: + provider = AnthropicProvider(api_key="sk-ant-test-key") + assert provider.provider_name == Provider.ANTHROPIC + assert provider.api_key == "sk-ant-test-key" + provider.validate_config() + + def test_anthropic_validation_error(self) -> None: + provider = AnthropicProvider() + with pytest.raises(ValueError, match="Anthropic provider requires api_key"): + provider.validate_config() + + @patch("codebase_rag.providers.base.PydanticAnthropicProvider") + @patch("codebase_rag.providers.base.AnthropicModel") + def test_anthropic_model_creation( + self, mock_anthropic_model: Any, mock_anthropic_provider: Any + ) -> None: + provider = AnthropicProvider(api_key="sk-ant-test-key") + mock_model = MagicMock() + mock_anthropic_model.return_value = mock_model + result = provider.create_model("claude-opus-4-6") + mock_anthropic_model.assert_called_once() + assert result == mock_model + + @patch("codebase_rag.providers.base.PydanticAnthropicProvider") + @patch("codebase_rag.providers.base.AnthropicModel") + def test_anthropic_model_enables_prompt_caching( + self, mock_anthropic_model: Any, mock_anthropic_provider: Any + ) -> None: + provider = AnthropicProvider(api_key="sk-ant-test-key") + provider.create_model("claude-opus-4-7") + + settings_arg = mock_anthropic_model.call_args.kwargs["settings"] + assert settings_arg["anthropic_cache_instructions"] is True + assert settings_arg["anthropic_cache_tool_definitions"] is True + assert settings_arg["anthropic_cache_messages"] is True + + def test_anthropic_api_key_from_env(self) -> None: + with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "env-key"}): + provider = AnthropicProvider() + assert provider.api_key == "env-key" + + +class TestAzureOpenAIProvider: + def test_azure_configuration(self) -> None: + provider = AzureOpenAIProvider( + api_key="azure-key", + endpoint="https://myresource.openai.azure.com", + api_version="2024-06-01", + ) + assert provider.provider_name == Provider.AZURE + assert provider.api_key == "azure-key" + assert provider.endpoint == "https://myresource.openai.azure.com" + assert provider.api_version == "2024-06-01" + provider.validate_config() + + def test_azure_validation_error_no_key(self) -> None: + provider = AzureOpenAIProvider(endpoint="https://myresource.openai.azure.com") + with pytest.raises(ValueError, match="Azure OpenAI provider requires api_key"): + provider.validate_config() + + def test_azure_validation_error_no_endpoint(self) -> None: + provider = AzureOpenAIProvider(api_key="azure-key") + with pytest.raises(ValueError, match="Azure OpenAI provider requires endpoint"): + provider.validate_config() + + @patch("codebase_rag.providers.base.PydanticAzureProvider") + @patch("codebase_rag.providers.base.OpenAIChatModel") + def test_azure_model_creation( + self, mock_chat_model: Any, mock_azure_provider: Any + ) -> None: + provider = AzureOpenAIProvider( + api_key="azure-key", + endpoint="https://myresource.openai.azure.com", + ) + mock_model = MagicMock() + mock_chat_model.return_value = mock_model + result = provider.create_model("gpt-4o") + mock_azure_provider.assert_called_once_with( + api_key="azure-key", + azure_endpoint="https://myresource.openai.azure.com", + api_version=None, + ) + mock_chat_model.assert_called_once_with( + "gpt-4o", provider=mock_azure_provider.return_value + ) + assert result == mock_model + + def test_azure_api_key_from_env(self) -> None: + with patch.dict( + "os.environ", + { + "AZURE_API_KEY": "env-key", + "AZURE_OPENAI_ENDPOINT": "https://env.openai.azure.com", + }, + ): + provider = AzureOpenAIProvider() + assert provider.api_key == "env-key" + assert provider.endpoint == "https://env.openai.azure.com" + + class TestModelCreation: @patch("codebase_rag.providers.base.PydanticGoogleProvider") @patch("codebase_rag.providers.base.GoogleModel") @@ -275,3 +404,109 @@ def test_ollama_model_creation( mock_openai_provider.assert_called_once_with( api_key="ollama", base_url="http://localhost:11434/v1" ) + + +class TestLiteLLMProvider: + def test_litellm_configuration(self) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + provider = LiteLLMProvider( + api_key="sk-litellm-key", endpoint="http://litellm:4000/v1" + ) + assert provider.provider_name == Provider.LITELLM_PROXY + assert provider.api_key == "sk-litellm-key" + assert provider.endpoint == "http://litellm:4000/v1" + + def test_litellm_default_endpoint(self) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + provider = LiteLLMProvider() + assert provider.endpoint == "http://localhost:4000/v1" + + def test_litellm_no_endpoint_validation_error(self) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + provider = LiteLLMProvider(endpoint="") + with pytest.raises(ValueError, match="LiteLLM provider requires endpoint"): + provider.validate_config() + + @patch("httpx.Client") + def test_litellm_validation_success(self, mock_client: Any) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.return_value.__enter__.return_value.get.return_value = mock_response + + provider = LiteLLMProvider(api_key="sk-test", endpoint="http://litellm:4000/v1") + provider.validate_config() + + @patch("httpx.Client") + def test_litellm_validation_server_not_running(self, mock_client: Any) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + mock_response = MagicMock() + mock_response.status_code = 404 + mock_client.return_value.__enter__.return_value.get.return_value = mock_response + + provider = LiteLLMProvider(endpoint="http://litellm:4000/v1") + with pytest.raises(ValueError, match="LiteLLM proxy server not responding"): + provider.validate_config() + + @patch("httpx.Client") + def test_litellm_validation_fallback_to_models_endpoint( + self, mock_client: Any + ) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + health_response = MagicMock() + health_response.status_code = 401 + models_response = MagicMock() + models_response.status_code = 200 + mock_client.return_value.__enter__.return_value.get.side_effect = [ + health_response, + models_response, + ] + + provider = LiteLLMProvider(api_key="sk-test", endpoint="http://litellm:4000/v1") + provider.validate_config() + + @patch("httpx.Client") + def test_litellm_validation_connection_error(self, mock_client: Any) -> None: + import httpx + + from codebase_rag.providers.litellm import LiteLLMProvider + + mock_client.return_value.__enter__.return_value.get.side_effect = ( + httpx.ConnectError("Connection failed") + ) + + provider = LiteLLMProvider(endpoint="http://litellm:4000/v1") + with pytest.raises(ValueError, match="LiteLLM proxy server not responding"): + provider.validate_config() + + @patch("codebase_rag.providers.litellm.PydanticLiteLLMProvider") + @patch("codebase_rag.providers.litellm.OpenAIChatModel") + @patch("httpx.Client") + def test_litellm_model_creation( + self, mock_client: Any, mock_chat_model: Any, mock_litellm_provider: Any + ) -> None: + from codebase_rag.providers.litellm import LiteLLMProvider + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.return_value.__enter__.return_value.get.return_value = mock_response + + provider = LiteLLMProvider(api_key="sk-test", endpoint="http://litellm:4000/v1") + mock_model = MagicMock() + mock_chat_model.return_value = mock_model + + result = provider.create_model("openai/gpt-4o") + + mock_litellm_provider.assert_called_once_with( + api_key="sk-test", api_base="http://litellm:4000/v1" + ) + mock_chat_model.assert_called_once_with( + "openai/gpt-4o", provider=mock_litellm_provider.return_value + ) + assert result == mock_model diff --git a/codebase_rag/tests/test_py_variable_analyzer_integration.py b/codebase_rag/tests/test_py_variable_analyzer_integration.py index 93b9f7fbb..ca193ee39 100644 --- a/codebase_rag/tests/test_py_variable_analyzer_integration.py +++ b/codebase_rag/tests/test_py_variable_analyzer_integration.py @@ -596,3 +596,88 @@ def _find_node_recursive(self, node, node_type: str, name: str): if result: return result return None + + +def _find_func_node(root_node, func_name: str): + stack = [root_node] + while stack: + node = stack.pop() + if node.type == "function_definition": + name_node = node.child_by_field_name("name") + if name_node and name_node.text.decode() == func_name: + return node + stack.extend(reversed(node.children)) + return None + + +class TestTraverseSinglePassWithQueries: + @pytest.fixture + def engine_with_queries( + self, + import_processor: MagicMock, + mock_function_registry: MagicMock, + mock_ast_cache: MagicMock, + ) -> PythonTypeInferenceEngine: + from codebase_rag import constants as cs + from codebase_rag.parser_loader import load_parsers + + parsers, queries = load_parsers() + if cs.SupportedLanguage.PYTHON not in parsers: + pytest.skip("Python parser not available") + + return PythonTypeInferenceEngine( + import_processor=import_processor, + function_registry=mock_function_registry, + repo_path=Path("/test/repo"), + project_name="test_project", + ast_cache=mock_ast_cache, + queries=queries, + module_qn_to_file_path={}, + class_inheritance={}, + simple_name_lookup=defaultdict(set), + js_type_inference_getter=lambda: MagicMock(), + ) + + def test_traverse_with_query_path( + self, + python_parser: Parser, + engine_with_queries: PythonTypeInferenceEngine, + ) -> None: + python_code = b""" +def process(name: str, count: int) -> None: + result = name.upper() + items = [] + for i in range(count): + items.append(i) +""" + tree = python_parser.parse(python_code) + func_node = _find_func_node(tree.root_node, "process") + assert func_node is not None + + result = engine_with_queries.build_local_variable_type_map( + func_node, "test.module" + ) + + assert "name" in result + assert result["name"] == "str" + assert "count" in result + assert result["count"] == "int" + + def test_traverse_with_query_path_caches_return_stmts( + self, + python_parser: Parser, + engine_with_queries: PythonTypeInferenceEngine, + ) -> None: + python_code = b""" +def get_value(x: int) -> int: + return x + 1 +""" + tree = python_parser.parse(python_code) + func_node = _find_func_node(tree.root_node, "get_value") + assert func_node is not None + + engine_with_queries.build_local_variable_type_map(func_node, "test.module") + + return_nodes: list = [] + engine_with_queries._find_return_statements(func_node, return_nodes) + assert len(return_nodes) >= 1 diff --git a/codebase_rag/tests/test_python_nested_functions.py b/codebase_rag/tests/test_python_nested_functions.py index 66f64b989..2a164d94d 100644 --- a/codebase_rag/tests/test_python_nested_functions.py +++ b/codebase_rag/tests/test_python_nested_functions.py @@ -318,10 +318,6 @@ def main(): def test_function_in_class_method( nested_functions_project: Path, mock_ingestor: MagicMock ) -> None: - """Test that functions inside class methods are properly handled. - - Note: Functions inside methods are currently treated as methods rather than nested functions. - """ parsers, queries = load_parsers() updater = GraphUpdater( @@ -333,21 +329,51 @@ def test_function_in_class_method( updater.run() project_name = nested_functions_project.name - - expected_method_qn = f"{project_name}.nested_functions.OuterClass.nested_in_method" - created_methods = get_node_names(mock_ingestor, "Method") - assert expected_method_qn in created_methods, ( - f"Function in method not found as method: {expected_method_qn}" + assert ( + f"{project_name}.nested_functions.OuterClass.method_with_nested" + in created_methods + ) + + nested_qn = f"{project_name}.nested_functions.OuterClass.nested_in_method" + assert nested_qn not in created_methods, ( + f"Nested function inside method should not be ingested as class method: {nested_qn}" ) - expected_class_methods = [ - f"{project_name}.nested_functions.OuterClass.method_with_nested", - f"{project_name}.nested_functions.OuterClass.nested_in_method", - ] - for expected_method in expected_class_methods: - assert expected_method in created_methods, ( - f"Expected method not found: {expected_method}" +def test_nested_function_in_staticmethod_not_ingested_as_method( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project_path = temp_repo / "static_nested" + os.makedirs(project_path) + (project_path / "__init__.py").touch() + + with open(project_path / "api.py", "w") as f: + f.write( + "class Api:\n" + " @staticmethod\n" + " def say_hello():\n" + " def test_func():\n" + ' print("api")\n' + " pass\n" ) + + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=project_path, + parsers=parsers, + queries=queries, + ) + updater.run() + + project_name = project_path.name + created_methods = get_node_names(mock_ingestor, "Method") + + assert f"{project_name}.api.Api.say_hello" in created_methods + + bad_qn = f"{project_name}.api.Api.test_func" + assert bad_qn not in created_methods, ( + f"Nested function inside staticmethod should not be ingested as class method: {bad_qn}" + ) diff --git a/codebase_rag/tests/test_python_real_world.py b/codebase_rag/tests/test_python_real_world.py index 770014655..0243e2f04 100644 --- a/codebase_rag/tests/test_python_real_world.py +++ b/codebase_rag/tests/test_python_real_world.py @@ -874,24 +874,20 @@ class PlainTaskSchema(Schema): return project_path -def test_flask_model_calls( +def test_flask_no_calls_to_class_nodes( todo_app_project: Path, mock_ingestor: MagicMock, ) -> None: - """Test detection of model usage in controllers.""" + """Test that Class nodes are not targets of CALLS relationships.""" run_updater(todo_app_project, mock_ingestor) function_calls = get_relationships(mock_ingestor, "CALLS") - model_usage_calls = [ - call - for call in function_calls - if "task_controller" in call.args[0][2] and "TaskModel" in call.args[2][2] - ] + class_calls = [call for call in function_calls if call.args[2][0] == "Class"] - assert model_usage_calls, ( - f"Expected TaskController to use TaskModel, found: " - f"{[(c.args[0][2], c.args[2][2]) for c in model_usage_calls]}" + assert not class_calls, ( + f"Expected no CALLS edges to Class nodes, found: " + f"{[(c.args[0][2], c.args[2][2]) for c in class_calls]}" ) diff --git a/codebase_rag/tests/test_python_relative_import_resolution.py b/codebase_rag/tests/test_python_relative_import_resolution.py index 883dd1d97..6b305b690 100644 --- a/codebase_rag/tests/test_python_relative_import_resolution.py +++ b/codebase_rag/tests/test_python_relative_import_resolution.py @@ -43,7 +43,7 @@ def test_single_dot_relative_import(self, mock_updater: GraphUpdater) -> None: module_qn, ) - expected = "pkg.sub1.sub2.utils" + expected = "myproject.pkg.sub1.sub2.utils" assert result == expected def test_double_dot_relative_import(self, mock_updater: GraphUpdater) -> None: @@ -66,7 +66,7 @@ def test_double_dot_relative_import(self, mock_updater: GraphUpdater) -> None: module_qn, ) - expected = "pkg.sub1.shared" + expected = "myproject.pkg.sub1.shared" assert result == expected def test_triple_dot_relative_import(self, mock_updater: GraphUpdater) -> None: @@ -89,7 +89,7 @@ def test_triple_dot_relative_import(self, mock_updater: GraphUpdater) -> None: module_qn, ) - expected = "pkg.common" + expected = "myproject.pkg.common" assert result == expected def test_relative_import_to_package_root(self, mock_updater: GraphUpdater) -> None: @@ -112,7 +112,7 @@ def test_relative_import_to_package_root(self, mock_updater: GraphUpdater) -> No module_qn, ) - expected = "config" + expected = "myproject.config" assert result == expected def test_relative_import_without_module_name( @@ -133,7 +133,7 @@ def test_relative_import_without_module_name( module_qn, ) - expected = "pkg.sub1" + expected = "myproject.pkg.sub1" assert result == expected def test_relative_import_edge_case_shallow_module( @@ -158,7 +158,7 @@ def test_relative_import_edge_case_shallow_module( module_qn, ) - expected = "other" + expected = "myproject.other" assert result == expected def test_relative_import_complex_module_path( @@ -183,5 +183,5 @@ def test_relative_import_complex_module_path( module_qn, ) - expected = "pkg.sub1.sub2.helpers.database.models" + expected = "myproject.pkg.sub1.sub2.helpers.database.models" assert result == expected diff --git a/codebase_rag/tests/test_python_span_oracle.py b/codebase_rag/tests/test_python_span_oracle.py new file mode 100644 index 000000000..f2e51219a --- /dev/null +++ b/codebase_rag/tests/test_python_span_oracle.py @@ -0,0 +1,71 @@ +# (H) Covers Python L1 node SPAN (end_line) validation: cgr's end_line for each +# (H) Class/Function/Method is graded against the ast oracle (node.end_lineno) via +# (H) the L1 score(), joined on (kind, file, start). Exercises a decorated +# (H) multi-line def, a property, an async multi-line signature, and a nested +# (H) function so spans are not trivially single line. +from __future__ import annotations + +from pathlib import Path + +from evals import constants as ec +from evals.ast_oracle import extract_oracle_graph +from evals.cgr_graph import extract_cgr_graph +from evals.score import score + +PY_SRC = '''\ +import functools + + +@functools.cache +def decorated( + a: int, + b: int, +) -> int: + return a + b + + +class Widget: + """doc.""" + + @property + def size(self) -> int: + return self._n + + async def fetch( + self, + url: str, + ) -> str: + return await call(url) + + +def outer(): + def inner(): + return 1 + + return inner +''' + + +def test_cgr_matches_ast_oracle_on_python_node_spans(tmp_path: Path) -> None: + project = tmp_path / "py_span" + project.mkdir() + (project / "m.py").write_text(PY_SRC, encoding="utf-8") + + cgr = extract_cgr_graph(project, project.name) + oracle = extract_oracle_graph(project, project.name) + + result = score(cgr, oracle) + span_rows = { + row["label"]: row + for row in result.rows + if row["category"] == ec.Category.SPAN.value + } + # (H) score() must now emit graded span rows for Class/Function/Method. + assert span_rows, [r["category"] for r in result.rows] + aggregate = span_rows.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, span_rows + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 5, aggregate diff --git a/codebase_rag/tests/test_python_standard_library_imports.py b/codebase_rag/tests/test_python_standard_library_imports.py index c7cfa891e..98ec5f673 100644 --- a/codebase_rag/tests/test_python_standard_library_imports.py +++ b/codebase_rag/tests/test_python_standard_library_imports.py @@ -11,10 +11,10 @@ class TestStandardLibraryImports: """Test import resolution for standard library vs local modules.""" @pytest.fixture - def mock_updater(self) -> GraphUpdater: + def mock_updater(self, tmp_path: Path) -> GraphUpdater: mock_ingestor = MagicMock() - test_repo = Path("/tmp/myproject") + test_repo = tmp_path / "myproject" test_repo.mkdir(exist_ok=True) (test_repo / "utils").mkdir(exist_ok=True) diff --git a/codebase_rag/tests/test_query_truncation.py b/codebase_rag/tests/test_query_truncation.py new file mode 100644 index 000000000..57f582c69 --- /dev/null +++ b/codebase_rag/tests/test_query_truncation.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from codebase_rag.tools.codebase_query import create_query_tool +from codebase_rag.types_defs import ResultRow + + +@pytest.fixture +def mock_ingestor() -> MagicMock: + return MagicMock() + + +@pytest.fixture +def mock_cypher_gen() -> MagicMock: + gen = MagicMock() + gen.generate = AsyncMock(return_value="MATCH (n) RETURN n") + return gen + + +class TestQueryTruncation: + @pytest.mark.asyncio + async def test_row_cap_truncation( + self, mock_ingestor: MagicMock, mock_cypher_gen: MagicMock + ) -> None: + rows: list[ResultRow] = [{"name": f"node_{i}"} for i in range(600)] + mock_ingestor.fetch_all.return_value = rows + + tool = create_query_tool(mock_ingestor, mock_cypher_gen) + with patch("codebase_rag.tools.codebase_query.settings") as mock_settings: + mock_settings.QUERY_RESULT_ROW_CAP = 500 + mock_settings.QUERY_RESULT_MAX_TOKENS = 100000 + mock_settings.QUERY_TIMEOUT_S = 60.0 + result = await tool.function(natural_language_query="list all nodes") + + assert len(result.results) <= 500 + assert "truncated" in result.summary.lower() or "600" in result.summary + + @pytest.mark.asyncio + async def test_token_truncation( + self, mock_ingestor: MagicMock, mock_cypher_gen: MagicMock + ) -> None: + rows: list[ResultRow] = [ + {"name": f"function_{i}", "body": f"def func_{i}(): pass # {'x' * 200}"} + for i in range(100) + ] + mock_ingestor.fetch_all.return_value = rows + + tool = create_query_tool(mock_ingestor, mock_cypher_gen) + with patch("codebase_rag.tools.codebase_query.settings") as mock_settings: + mock_settings.QUERY_RESULT_ROW_CAP = 500 + mock_settings.QUERY_RESULT_MAX_TOKENS = 500 + mock_settings.QUERY_TIMEOUT_S = 60.0 + result = await tool.function(natural_language_query="list functions") + + assert len(result.results) < 100 + assert "truncated" in result.summary.lower() + + @pytest.mark.asyncio + async def test_no_truncation_when_within_limits( + self, mock_ingestor: MagicMock, mock_cypher_gen: MagicMock + ) -> None: + rows: list[ResultRow] = [{"name": f"node_{i}"} for i in range(5)] + mock_ingestor.fetch_all.return_value = rows + + tool = create_query_tool(mock_ingestor, mock_cypher_gen) + with patch("codebase_rag.tools.codebase_query.settings") as mock_settings: + mock_settings.QUERY_RESULT_ROW_CAP = 500 + mock_settings.QUERY_RESULT_MAX_TOKENS = 16000 + mock_settings.QUERY_TIMEOUT_S = 60.0 + result = await tool.function(natural_language_query="small query") + + assert len(result.results) == 5 + assert "Successfully" in result.summary diff --git a/codebase_rag/tests/test_realtime_debounce.py b/codebase_rag/tests/test_realtime_debounce.py new file mode 100644 index 000000000..eee1fcf48 --- /dev/null +++ b/codebase_rag/tests/test_realtime_debounce.py @@ -0,0 +1,445 @@ +""" +Tests for the realtime_updater debouncing functionality. + +These tests verify the hybrid debounce strategy that prevents redundant +graph updates during rapid file saves. +""" + +from __future__ import annotations + +import threading +import time +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest +from watchdog.events import FileCreatedEvent, FileDeletedEvent, FileModifiedEvent + +from codebase_rag.constants import DEFAULT_DEBOUNCE_SECONDS, DEFAULT_MAX_WAIT_SECONDS +from codebase_rag.services import QueryProtocol + + +class MockQueryIngestor: + def __init__(self) -> None: + self.execute_write = MagicMock() + self.flush_all = MagicMock() + self.fetch_all = MagicMock(return_value=[]) + self.ensure_node_batch = MagicMock() + self.ensure_relationship_batch = MagicMock() + + def __enter__(self) -> MockQueryIngestor: + return self + + def __exit__(self, *args: Any) -> None: + pass + + +# Register MockQueryIngestor as implementing QueryProtocol for isinstance checks +QueryProtocol.register(MockQueryIngestor) + + +class TestCodeChangeEventHandlerDebounce: + @pytest.fixture(autouse=True) + def _patch_ignore(self, monkeypatch: pytest.MonkeyPatch) -> None: + from codebase_rag import constants as cs + + patched = cs.IGNORE_PATTERNS - {"tmp"} + monkeypatch.setattr(cs, "IGNORE_PATTERNS", patched) + monkeypatch.setattr("realtime_updater.IGNORE_PATTERNS", patched) + + @pytest.fixture + def mock_ingestor(self) -> MockQueryIngestor: + return MockQueryIngestor() + + @pytest.fixture + def mock_updater( + self, tmp_path: Path, mock_ingestor: MockQueryIngestor + ) -> MagicMock: + updater = MagicMock() + updater.repo_path = tmp_path + updater.ingestor = mock_ingestor + updater.remove_file_from_state = MagicMock() + updater.factory = MagicMock() + updater.factory.definition_processor.process_file = MagicMock(return_value=None) + updater._process_function_calls = MagicMock() + updater.parsers = {} + updater.queries = {} + updater.ast_cache = {} + return updater + + @pytest.fixture + def sample_file(self, tmp_path: Path) -> Path: + test_file = tmp_path / "test.py" + test_file.write_text("# test file") + return test_file + + def test_handler_initialization_with_debounce( + self, mock_updater: MagicMock + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=5, max_wait_seconds=30 + ) + + assert handler.debounce_seconds == 5 + assert handler.max_wait_seconds == 30 + assert handler.debounce_enabled is True + assert len(handler.timers) == 0 + assert len(handler.first_event_time) == 0 + assert len(handler.pending_events) == 0 + + def test_handler_initialization_without_debounce( + self, mock_updater: MagicMock + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0, max_wait_seconds=30 + ) + + assert handler.debounce_seconds == 0 + assert handler.debounce_enabled is False + + def test_handler_uses_default_constants(self, mock_updater: MagicMock) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler(mock_updater) + + assert handler.debounce_seconds == DEFAULT_DEBOUNCE_SECONDS + assert handler.max_wait_seconds == DEFAULT_MAX_WAIT_SECONDS + + def test_is_relevant_filters_ignored_patterns( + self, mock_updater: MagicMock, tmp_path: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler(mock_updater) + + # Should be ignored (directories in ignore patterns) + assert handler._is_relevant(str(tmp_path / ".git" / "config")) is False + assert handler._is_relevant(str(tmp_path / "node_modules" / "pkg.js")) is False + assert handler._is_relevant(str(tmp_path / "__pycache__" / "mod.pyc")) is False + + # Should be relevant + assert handler._is_relevant(str(tmp_path / "main.py")) is True + assert handler._is_relevant(str(tmp_path / "src" / "lib.rs")) is True + assert handler._is_relevant(str(tmp_path / "app.js")) is True + + def test_dispatch_ignores_directories( + self, mock_updater: MagicMock, mock_ingestor: MockQueryIngestor, tmp_path: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.1, max_wait_seconds=1 + ) + + # Create event that is marked as directory + event = FileModifiedEvent(str(tmp_path / "some_dir")) + # The is_directory property is set by watchdog based on the event type + # For FileModifiedEvent, we need to check is_directory attribute + object.__setattr__(event, "is_directory", True) + + handler.dispatch(event) + + # No timer should be created for directory events + assert len(handler.timers) == 0 + mock_ingestor.execute_write.assert_not_called() + + def test_debounce_batches_rapid_events( + self, + mock_updater: MagicMock, + mock_ingestor: MockQueryIngestor, + sample_file: Path, + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.2, max_wait_seconds=5 + ) + + # Simulate 5 rapid saves + for _ in range(5): + event = FileModifiedEvent(str(sample_file)) + handler.dispatch(event) + time.sleep(0.05) # 50ms between saves + + # Should have one pending event + assert len(handler.pending_events) == 1 + + # Wait for debounce to complete + time.sleep(0.4) + + # After debounce, ingestor should have been called only once + mock_ingestor.flush_all.assert_called_once() + + def test_no_debounce_processes_immediately( + self, + mock_updater: MagicMock, + mock_ingestor: MockQueryIngestor, + sample_file: Path, + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0, max_wait_seconds=30 + ) + + event = FileModifiedEvent(str(sample_file)) + handler.dispatch(event) + + # Should process immediately (no pending events) + assert len(handler.pending_events) == 0 + assert len(handler.timers) == 0 + mock_ingestor.flush_all.assert_called_once() + + def test_max_wait_forces_update( + self, + mock_updater: MagicMock, + mock_ingestor: MockQueryIngestor, + sample_file: Path, + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.5, max_wait_seconds=0.3 + ) + + # First event + event = FileModifiedEvent(str(sample_file)) + handler.dispatch(event) + + # Wait until max_wait is exceeded + time.sleep(0.4) + + # Second event should trigger immediate processing due to max_wait + event2 = FileModifiedEvent(str(sample_file)) + handler.dispatch(event2) + + # Give time for processing + time.sleep(0.15) + + # Should have processed at least once due to max_wait + assert mock_ingestor.flush_all.call_count >= 1 + + def test_different_files_tracked_separately( + self, mock_updater: MagicMock, tmp_path: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + file1 = tmp_path / "file1.py" + file2 = tmp_path / "file2.py" + file1.write_text("# file 1") + file2.write_text("# file 2") + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.2, max_wait_seconds=5 + ) + + # Events for different files + event1 = FileModifiedEvent(str(file1)) + event2 = FileModifiedEvent(str(file2)) + + handler.dispatch(event1) + handler.dispatch(event2) + + # Should have two pending events + assert len(handler.pending_events) == 2 + assert len(handler.timers) == 2 + + def test_timer_cleanup_after_processing( + self, + mock_updater: MagicMock, + mock_ingestor: MockQueryIngestor, + sample_file: Path, + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.1, max_wait_seconds=5 + ) + + event = FileModifiedEvent(str(sample_file)) + handler.dispatch(event) + + # Should have pending state + assert len(handler.pending_events) == 1 + assert len(handler.first_event_time) == 1 + + # Wait for processing + time.sleep(0.25) + + # State should be cleaned up + assert len(handler.pending_events) == 0 + assert len(handler.first_event_time) == 0 + assert len(handler.timers) == 0 + + def test_created_event_triggers_debounce( + self, mock_updater: MagicMock, tmp_path: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + new_file = tmp_path / "new_file.py" + new_file.write_text("# new file") + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.2, max_wait_seconds=5 + ) + + event = FileCreatedEvent(str(new_file)) + handler.dispatch(event) + + assert len(handler.pending_events) == 1 + + def test_deleted_event_triggers_debounce( + self, mock_updater: MagicMock, sample_file: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.2, max_wait_seconds=5 + ) + + event = FileDeletedEvent(str(sample_file)) + handler.dispatch(event) + + assert len(handler.pending_events) == 1 + + def test_thread_safety_concurrent_events( + self, mock_updater: MagicMock, tmp_path: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=5.0, max_wait_seconds=30 + ) + + files = [tmp_path / f"file{i}.py" for i in range(10)] + for f in files: + f.write_text(f"# {f.name}") + + def send_events(file_path: Path) -> None: + for _ in range(5): + event = FileModifiedEvent(str(file_path)) + handler.dispatch(event) + time.sleep(0.02) + + # Send events from multiple threads + threads = [threading.Thread(target=send_events, args=(f,)) for f in files[:5]] + for t in threads: + t.start() + for t in threads: + t.join() + + # Should have 5 pending events (one per file) + assert len(handler.pending_events) == 5 + + +class TestDebounceValidation: + def test_validate_non_negative_float_accepts_zero(self) -> None: + from realtime_updater import _validate_non_negative_float + + assert _validate_non_negative_float(0) == 0 + assert _validate_non_negative_float(0.0) == 0.0 + + def test_validate_non_negative_float_accepts_positive(self) -> None: + from realtime_updater import _validate_non_negative_float + + assert _validate_non_negative_float(5) == 5 + assert _validate_non_negative_float(0.5) == 0.5 + assert _validate_non_negative_float(100) == 100 + + def test_validate_non_negative_float_rejects_negative(self) -> None: + import typer + + from realtime_updater import _validate_non_negative_float + + with pytest.raises(typer.BadParameter): + _validate_non_negative_float(-1) + + with pytest.raises(typer.BadParameter): + _validate_non_negative_float(-0.1) + + +class TestDebounceIntegration: + @pytest.fixture(autouse=True) + def _patch_ignore(self, monkeypatch: pytest.MonkeyPatch) -> None: + from codebase_rag import constants as cs + + patched = cs.IGNORE_PATTERNS - {"tmp"} + monkeypatch.setattr(cs, "IGNORE_PATTERNS", patched) + monkeypatch.setattr("realtime_updater.IGNORE_PATTERNS", patched) + + @pytest.fixture + def mock_ingestor(self) -> MockQueryIngestor: + return MockQueryIngestor() + + @pytest.fixture + def mock_updater( + self, tmp_path: Path, mock_ingestor: MockQueryIngestor + ) -> MagicMock: + updater = MagicMock() + updater.repo_path = tmp_path + updater.ingestor = mock_ingestor + updater.remove_file_from_state = MagicMock() + updater.factory = MagicMock() + updater.factory.definition_processor.process_file = MagicMock(return_value=None) + updater._process_function_calls = MagicMock() + updater.parsers = {} + updater.queries = {} + updater.ast_cache = {} + return updater + + def test_realistic_rapid_save_scenario( + self, mock_updater: MagicMock, mock_ingestor: MockQueryIngestor, tmp_path: Path + ) -> None: + """ + Simulate realistic rapid save scenario: + - User saves file 10 times over 3 seconds + - With 0.5s debounce and 2s max_wait, should result in ~2-4 updates + """ + from realtime_updater import CodeChangeEventHandler + + test_file = tmp_path / "editor.py" + test_file.write_text("# editing") + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.5, max_wait_seconds=2 + ) + + # Simulate 10 saves over 3 seconds + for i in range(10): + event = FileModifiedEvent(str(test_file)) + handler.dispatch(event) + time.sleep(0.3) + + # Wait for final debounce + time.sleep(0.7) + + # Should have batched into fewer updates due to max_wait and debounce + # With max_wait=2s and 3s total time, expect ~2-4 updates + call_count = mock_ingestor.flush_all.call_count + assert 1 <= call_count <= 4, f"Expected 1-4 updates, got {call_count}" + + def test_single_edit_after_quiet_period( + self, mock_updater: MagicMock, mock_ingestor: MockQueryIngestor, tmp_path: Path + ) -> None: + from realtime_updater import CodeChangeEventHandler + + test_file = tmp_path / "single.py" + test_file.write_text("# single edit") + + handler = CodeChangeEventHandler( + mock_updater, debounce_seconds=0.1, max_wait_seconds=5 + ) + + event = FileModifiedEvent(str(test_file)) + handler.dispatch(event) + + # Wait for debounce + time.sleep(0.25) + + # Should have exactly one update + mock_ingestor.flush_all.assert_called_once() diff --git a/codebase_rag/tests/test_realtime_event_filtering.py b/codebase_rag/tests/test_realtime_event_filtering.py new file mode 100644 index 000000000..68f641d93 --- /dev/null +++ b/codebase_rag/tests/test_realtime_event_filtering.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Protocol, runtime_checkable +from unittest.mock import MagicMock + +import pytest +from watchdog.events import ( + FileClosedNoWriteEvent, + FileCreatedEvent, + FileDeletedEvent, + FileModifiedEvent, + FileOpenedEvent, + FileSystemEvent, +) + +from codebase_rag import constants as cs +from realtime_updater import CodeChangeEventHandler + + +@runtime_checkable +class _AnyProtocol(Protocol): + pass + + +@pytest.fixture(autouse=True) +def _bypass_protocol_check(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("realtime_updater.QueryProtocol", _AnyProtocol) + + +@pytest.fixture +def handler(mock_updater: MagicMock) -> CodeChangeEventHandler: + h = CodeChangeEventHandler(mock_updater, debounce_seconds=0) + h.ignore_patterns = h.ignore_patterns - {"tmp", "temp"} + return h + + +def _make_event(event_type: str, src_path: str) -> FileSystemEvent: + ev = MagicMock(spec=FileSystemEvent) + ev.event_type = event_type + ev.src_path = src_path + ev.is_directory = False + return ev + + +class TestEventFiltering: + def test_modified_event_is_processed( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "app.py" + f.write_text("x = 1", encoding="utf-8") + handler.dispatch(FileModifiedEvent(str(f))) + assert mock_updater.ingestor.execute_write.call_count == 3 + + def test_created_event_is_processed( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "new.py" + f.write_text("y = 2", encoding="utf-8") + handler.dispatch(FileCreatedEvent(str(f))) + assert mock_updater.ingestor.execute_write.call_count == 3 + mock_updater.ingestor.flush_all.assert_called_once() + + def test_deleted_event_is_processed( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "gone.py" + handler.dispatch(FileDeletedEvent(str(f))) + assert mock_updater.ingestor.execute_write.call_count == 3 + mock_updater.factory.definition_processor.process_file.assert_not_called() + mock_updater.factory.structure_processor.process_generic_file.assert_not_called() + + def test_opened_event_is_ignored( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "read_only.py" + f.touch() + handler.dispatch(FileOpenedEvent(str(f))) + mock_updater.ingestor.execute_write.assert_not_called() + mock_updater.ingestor.flush_all.assert_not_called() + + def test_closed_no_write_event_is_ignored( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "viewed.py" + f.touch() + handler.dispatch(FileClosedNoWriteEvent(str(f))) + mock_updater.ingestor.execute_write.assert_not_called() + mock_updater.ingestor.flush_all.assert_not_called() + + def test_access_event_is_ignored( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "accessed.py" + f.touch() + ev = _make_event("access", str(f)) + handler.dispatch(ev) + mock_updater.ingestor.execute_write.assert_not_called() + mock_updater.ingestor.flush_all.assert_not_called() + + +class TestNonCodeFileHandling: + def test_markdown_file_creates_file_node( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "readme.md" + f.write_text("# Title", encoding="utf-8") + handler.dispatch(FileCreatedEvent(str(f))) + mock_updater.factory.structure_processor.process_generic_file.assert_called_once_with( + f, "readme.md" + ) + + def test_json_file_creates_file_node( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "config.json" + f.write_text("{}", encoding="utf-8") + handler.dispatch(FileCreatedEvent(str(f))) + mock_updater.factory.structure_processor.process_generic_file.assert_called_once_with( + f, "config.json" + ) + + def test_non_code_file_deletion_removes_file_node( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "notes.md" + handler.dispatch(FileDeletedEvent(str(f))) + delete_file_calls = [ + c + for c in mock_updater.ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_FILE + ] + assert len(delete_file_calls) == 1 + assert delete_file_calls[0].args[1] == { + cs.KEY_PATH: "notes.md", + } + mock_updater.factory.structure_processor.process_generic_file.assert_not_called() + + def test_non_code_file_has_no_module_node( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "data.md" + f.write_text("text", encoding="utf-8") + handler.dispatch(FileCreatedEvent(str(f))) + mock_updater.factory.definition_processor.process_file.assert_not_called() + + +class TestMixedEventSequences: + def test_rapid_create_modify_delete( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f = temp_repo / "ephemeral.py" + f.write_text("a = 1", encoding="utf-8") + handler.dispatch(FileCreatedEvent(str(f))) + + mock_updater.ingestor.reset_mock() + mock_updater.factory.reset_mock() + f.write_text("a = 2", encoding="utf-8") + handler.dispatch(FileModifiedEvent(str(f))) + + mock_updater.ingestor.reset_mock() + mock_updater.factory.reset_mock() + handler.dispatch(FileDeletedEvent(str(f))) + + # (H) After delete, no re-parse or file node creation + mock_updater.factory.definition_processor.process_file.assert_not_called() + mock_updater.factory.structure_processor.process_generic_file.assert_not_called() + assert mock_updater.ingestor.execute_write.call_count == 3 + mock_updater.ingestor.flush_all.assert_called_once() + + def test_multiple_files_changed( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f1 = temp_repo / "a.py" + f2 = temp_repo / "b.py" + f1.write_text("x = 1", encoding="utf-8") + f2.write_text("y = 2", encoding="utf-8") + + handler.dispatch(FileModifiedEvent(str(f1))) + handler.dispatch(FileModifiedEvent(str(f2))) + + assert mock_updater.ingestor.execute_write.call_count == 6 + assert mock_updater.ingestor.flush_all.call_count == 2 + + +class TestCypherDeleteFileQuery: + def test_delete_file_only_targets_specific_path( + self, handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path + ) -> None: + f1 = temp_repo / "keep.py" + f2 = temp_repo / "remove.py" + f1.write_text("a = 1", encoding="utf-8") + + handler.dispatch(FileDeletedEvent(str(f2))) + + delete_file_calls = [ + c + for c in mock_updater.ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_FILE + ] + assert len(delete_file_calls) == 1 + assert delete_file_calls[0].args[1] == {cs.KEY_PATH: "remove.py"} + + delete_module_calls = [ + c + for c in mock_updater.ingestor.execute_write.call_args_list + if c.args[0] == cs.CYPHER_DELETE_MODULE + ] + assert len(delete_module_calls) == 1 + assert delete_module_calls[0].args[1] == {cs.KEY_PATH: "remove.py"} diff --git a/codebase_rag/tests/test_realtime_updater.py b/codebase_rag/tests/test_realtime_updater.py index c53b5b6ae..fdf1b604a 100644 --- a/codebase_rag/tests/test_realtime_updater.py +++ b/codebase_rag/tests/test_realtime_updater.py @@ -1,4 +1,7 @@ +from __future__ import annotations + from pathlib import Path +from typing import Protocol, runtime_checkable from unittest.mock import MagicMock import pytest @@ -12,10 +15,21 @@ from realtime_updater import CodeChangeEventHandler +@runtime_checkable +class _AnyProtocol(Protocol): + pass + + +@pytest.fixture(autouse=True) +def _bypass_protocol_check(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("realtime_updater.QueryProtocol", _AnyProtocol) + + @pytest.fixture def event_handler(mock_updater: MagicMock) -> CodeChangeEventHandler: - """Provides a CodeChangeEventHandler instance with a mocked updater.""" - return CodeChangeEventHandler(mock_updater) + handler = CodeChangeEventHandler(mock_updater, debounce_seconds=0) + handler.ignore_patterns = handler.ignore_patterns - {"tmp", "temp"} + return handler def test_file_creation_flow( @@ -28,7 +42,8 @@ def test_file_creation_flow( event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 mock_updater.factory.definition_processor.process_file.assert_called_once_with( test_file, "python", @@ -48,7 +63,8 @@ def test_file_modification_flow( event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 mock_updater.factory.definition_processor.process_file.assert_called_once_with( test_file, "python", @@ -67,7 +83,8 @@ def test_file_deletion_flow( event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 mock_updater.factory.definition_processor.process_file.assert_not_called() mock_updater.ingestor.flush_all.assert_called_once() @@ -103,16 +120,22 @@ def test_directory_creation_is_ignored( mock_updater.ingestor.flush_all.assert_not_called() -def test_unsupported_file_types_are_ignored( +def test_non_code_files_create_file_nodes( event_handler: CodeChangeEventHandler, mock_updater: MagicMock, temp_repo: Path ) -> None: - """Test that changing an unsupported file type is ignored after deletion query.""" - unsupported_file = temp_repo / "document.md" - unsupported_file.write_text(encoding="utf-8", data="# Markdown file") - event = FileModifiedEvent(str(unsupported_file)) + """Test that non-code files (like .md) create File nodes but skip AST parsing.""" + non_code_file = temp_repo / "document.md" + non_code_file.write_text(encoding="utf-8", data="# Markdown file") + event = FileModifiedEvent(str(non_code_file)) event_handler.dispatch(event) - assert mock_updater.ingestor.execute_write.call_count == 2 + # (H) 3 execute_write calls: DELETE_MODULE, DELETE_FILE, DELETE_CALLS + assert mock_updater.ingestor.execute_write.call_count == 3 + # (H) AST parsing is skipped for non-code files mock_updater.factory.definition_processor.process_file.assert_not_called() + # (H) But File node creation IS called for all file types + mock_updater.factory.structure_processor.process_generic_file.assert_called_once_with( + non_code_file, "document.md" + ) mock_updater.ingestor.flush_all.assert_called_once() diff --git a/codebase_rag/tests/test_reconcile_embeddings.py b/codebase_rag/tests/test_reconcile_embeddings.py new file mode 100644 index 000000000..0e69f646e --- /dev/null +++ b/codebase_rag/tests/test_reconcile_embeddings.py @@ -0,0 +1,94 @@ +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock + +import pytest +from loguru import logger + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.services.graph_service import MemgraphIngestor + + +@pytest.fixture +def updater(temp_repo: Path) -> GraphUpdater: + mock = MagicMock(spec=MemgraphIngestor) + mock.fetch_all = MagicMock(return_value=[]) + parsers, queries = load_parsers() + return GraphUpdater( + ingestor=mock, + repo_path=temp_repo, + parsers=parsers, + queries=queries, + ) + + +@pytest.fixture +def log_messages() -> Generator[list[str], None, None]: + messages: list[str] = [] + handler_id = logger.add(lambda msg: messages.append(str(msg)), level="DEBUG") + yield messages + logger.remove(handler_id) + + +class TestReconcileEmbeddings: + def test_noop_when_expected_empty(self, updater: GraphUpdater) -> None: + mock_fn = MagicMock() + updater._reconcile_embeddings(set(), mock_fn) + mock_fn.assert_not_called() + + def test_logs_ok_when_all_found( + self, updater: GraphUpdater, log_messages: list[str] + ) -> None: + expected = {1, 2, 3} + mock_fn = MagicMock(return_value={1, 2, 3}) + + updater._reconcile_embeddings(expected, mock_fn) + + mock_fn.assert_called_once_with(expected) + combined = "\n".join(log_messages) + assert "all 3 expected embeddings found" in combined + + def test_logs_warning_when_ids_missing( + self, updater: GraphUpdater, log_messages: list[str] + ) -> None: + expected = {1, 2, 3, 4, 5} + mock_fn = MagicMock(return_value={1, 3}) + + updater._reconcile_embeddings(expected, mock_fn) + + combined = "\n".join(log_messages) + assert "3 of 5 embeddings missing" in combined + + def test_sample_ids_in_warning( + self, updater: GraphUpdater, log_messages: list[str] + ) -> None: + expected = {10, 20, 30} + mock_fn = MagicMock(return_value={10}) + + updater._reconcile_embeddings(expected, mock_fn) + + combined = "\n".join(log_messages) + assert "20" in combined + assert "30" in combined + + def test_handles_verify_fn_exception( + self, updater: GraphUpdater, log_messages: list[str] + ) -> None: + mock_fn = MagicMock(side_effect=RuntimeError("connection lost")) + + updater._reconcile_embeddings({1, 2}, mock_fn) + + combined = "\n".join(log_messages).lower() + assert "reconciliation check failed" in combined + + def test_sample_limited_to_ten( + self, updater: GraphUpdater, log_messages: list[str] + ) -> None: + expected = set(range(20)) + mock_fn = MagicMock(return_value=set()) + + updater._reconcile_embeddings(expected, mock_fn) + + combined = "\n".join(log_messages) + assert "20 of 20 embeddings missing" in combined diff --git a/codebase_rag/tests/test_reexport_chain_resolution.py b/codebase_rag/tests/test_reexport_chain_resolution.py new file mode 100644 index 000000000..b9a6a8d65 --- /dev/null +++ b/codebase_rag/tests/test_reexport_chain_resolution.py @@ -0,0 +1,110 @@ +# (H) L3 finding from the evals/ harness: TypeInferenceEngine.build_local_variable_type_map +# (H) calls self.python_type_inference.build_local_variable_type_map(...), where the +# (H) python_type_inference property returns PythonTypeInferenceEngine imported via a +# (H) package re-export (from .py import PythonTypeInferenceEngine). The caller's import +# (H) map points the name at the re-export module, not the class's real definition, so +# (H) the chained method must follow the re-export hop to resolve to the concrete class +# (H) rather than collapsing to an ambiguous same-named method (the caller itself). +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +# (H) PythonEngine lives in pkg/py/engine.py and is re-exported from pkg/py/__init__.py. +# (H) A sibling JsEngine.build_map exists so the bare name is ambiguous in the trie. +FILES = { + "pkg/__init__.py": "", + "pkg/py/__init__.py": "from .engine import PythonEngine\n\n__all__ = ['PythonEngine']\n", + "pkg/py/engine.py": ( + "class PythonEngine:\n def build_map(self, node):\n return {}\n" + ), + "pkg/js_engine.py": ( + "class JsEngine:\n def build_map(self, node):\n return {}\n" + ), + "pkg/dispatch.py": ( + "from .py import PythonEngine\n\n\n" + "class Dispatch:\n" + " def __init__(self) -> None:\n" + " self._python_engine = None\n\n" + " @property\n" + " def python_engine(self) -> PythonEngine:\n" + " if self._python_engine is None:\n" + " self._python_engine = PythonEngine()\n" + " return self._python_engine\n\n" + " def build_map(self, node):\n" + " return self.python_engine.build_map(node)\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestReexportChainResolution: + def test_property_typed_by_reexport_resolves_to_real_class( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.dispatch.Dispatch.build_map", + "proj.pkg.py.engine.PythonEngine.build_map", + ) in calls, calls + + def test_does_not_collapse_to_caller_same_named_method( + self, tmp_path: Path + ) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.dispatch.Dispatch.build_map", + "proj.pkg.dispatch.Dispatch.build_map", + ) not in calls, calls diff --git a/codebase_rag/tests/test_relative_import_package_init.py b/codebase_rag/tests/test_relative_import_package_init.py new file mode 100644 index 000000000..d6b12a8be --- /dev/null +++ b/codebase_rag/tests/test_relative_import_package_init.py @@ -0,0 +1,72 @@ +# (H) L2 residual from the evals/ harness: relative imports inside an __init__.py +# (H) resolved one level too high. A package's qualified name IS the package, so +# (H) `from . import sub` in pkg/__init__.py must target pkg.sub, not the parent. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _import_edges( + tmp_path: Path, +) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "__init__.py").touch() + pkg = tmp_path / "pkg" + pkg.mkdir() + pkg.joinpath("__init__.py").write_text("from . import sub\n\nuse = sub\n") + pkg.joinpath("sub.py").write_text("X = 1\n") + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.IMPORTS + } + + +class TestRelativeImportPackageInit: + def test_from_dot_import_in_package_init_targets_own_submodule( + self, tmp_path: Path + ) -> None: + edges = _import_edges(tmp_path) + assert ("proj.pkg", "proj.pkg.sub") in edges, edges + assert ("proj.pkg", "proj.sub") not in edges, edges diff --git a/codebase_rag/tests/test_relative_import_root_level.py b/codebase_rag/tests/test_relative_import_root_level.py new file mode 100644 index 000000000..68146e489 --- /dev/null +++ b/codebase_rag/tests/test_relative_import_root_level.py @@ -0,0 +1,70 @@ +# (H) L2 finding from the evals/ harness: `from . import ` at the +# (H) package root (e.g. cli.py doing `from . import constants as cs`) produced +# (H) no IMPORTS edge, because relative-import resolution dropped the project +# (H) name and computed an empty base module. In a subpackage it worked. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _imports( + tmp_path: Path, importer: str, src: str +) -> set[tuple[PropertyValue, PropertyValue]]: + (tmp_path / "__init__.py").touch() + (tmp_path / "constants.py").write_text("X = 1\n") + (tmp_path / importer).write_text(src) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.IMPORTS + } + + +class TestRelativeImportRootLevel: + def test_from_dot_import_submodule_at_root(self, tmp_path: Path) -> None: + edges = _imports( + tmp_path, "cli.py", "from . import constants as cs\n\nuse = cs\n" + ) + assert ("proj.cli", "proj.constants") in edges, edges diff --git a/codebase_rag/tests/test_retrieval_eval.py b/codebase_rag/tests/test_retrieval_eval.py new file mode 100644 index 000000000..ece9b97ea --- /dev/null +++ b/codebase_rag/tests/test_retrieval_eval.py @@ -0,0 +1,142 @@ +import shutil +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from evals import constants as ec +from evals.retrieval import ( + cgr_call_edges, + first_party_symbols, + grep_call_edges, + oracle_call_edges, + parse_py_trees, + score_retrieval, +) +from evals.types_defs import NameEdge, NodeKey + +_CALLS = cs.RelationshipType.CALLS.value +_MODULE = cs.NodeLabel.MODULE.value + +_RG = shutil.which(ec.RG_BIN) +needs_rg = pytest.mark.skipif(_RG is None, reason="ripgrep not installed") + +# (H) core.py genuinely CALLS helper(), instantiates Widget(), and calls w.run(); +# (H) build() is defined but never called, so it is a caller, never a callee. +_CORE = """\ +def helper(): + return 1 + + +class Widget: + def run(self): + return helper() + + +def build(): + helper() + w = Widget() + w.run() + return w +""" + +# (H) uses.py only imports and aliases helper/Widget; it never calls them, so a +# (H) name-based grep over-includes it while the call oracle does not. +_USES = """\ +from pkg.core import Widget, helper + +ALIAS = helper +VALUE = 2 +""" + + +@pytest.fixture +def repo(tmp_path: Path) -> Path: + pkg = tmp_path / "pkg" + pkg.mkdir() + (pkg / "__init__.py").write_text("", encoding="utf-8") + (pkg / "core.py").write_text(_CORE, encoding="utf-8") + (pkg / "uses.py").write_text(_USES, encoding="utf-8") + return tmp_path + + +def _edge(file: str, name: str) -> NameEdge: + return NameEdge(_CALLS, NodeKey(_MODULE, file, ec.MODULE_START_LINE), name) + + +def test_oracle_captures_first_party_calls(repo: Path) -> None: + trees, _files = parse_py_trees(repo) + fp = first_party_symbols(trees) + oracle = oracle_call_edges(trees, fp) + + assert _edge("pkg/core.py", "helper") in oracle + assert _edge("pkg/core.py", "Widget") in oracle + assert _edge("pkg/core.py", "run") in oracle + # (H) build is defined but never called -> never a callee edge. + assert _edge("pkg/core.py", "build") not in oracle + # (H) uses.py references symbols but calls none of them. + assert not any(e.source.file == "pkg/uses.py" for e in oracle) + + +@needs_rg +def test_grep_name_overincludes_vs_oracle(repo: Path) -> None: + trees, files = parse_py_trees(repo) + fp = first_party_symbols(trees) + oracle = oracle_call_edges(trees, fp) + grep_name = grep_call_edges(repo, fp, files, ec.GrepMode.NAME) + + # (H) bare import/alias of helper in uses.py is a grep false positive. + assert _edge("pkg/uses.py", "helper") in grep_name + assert _edge("pkg/uses.py", "helper") not in oracle + # (H) build's definition site mentions its name though it is never called. + assert _edge("pkg/core.py", "build") in grep_name + assert _edge("pkg/core.py", "build") not in oracle + + +@needs_rg +def test_grep_call_excludes_bare_reference_but_flags_def_site(repo: Path) -> None: + trees, files = parse_py_trees(repo) + fp = first_party_symbols(trees) + grep_call = grep_call_edges(repo, fp, files, ec.GrepMode.CALL) + + # (H) `def build():` matches NAME( -> grep cannot tell a def from a call. + assert _edge("pkg/core.py", "build") in grep_call + # (H) `ALIAS = helper` is not followed by ( -> the call-pattern excludes it. + assert _edge("pkg/uses.py", "helper") not in grep_call + + +def test_score_retrieval_computes_prf() -> None: + e1, e2, e3 = _edge("a.py", "f"), _edge("a.py", "g"), _edge("b.py", "h") + oracle = {e1, e2, e3} + retrieved = {e1, e2, _edge("c.py", "x")} # (H) tp=2, fp=1, fn=1 + result = score_retrieval([(ec.RetrievalCondition.GRAPH.value, retrieved)], oracle) + row = next( + r for r in result.rows if r["label"] == ec.RetrievalCondition.GRAPH.value + ) + assert (row["tp"], row["fp"], row["fn"]) == (2, 1, 1) + assert row["precision"] == round(2 / 3, ec.ROUND_DIGITS) + assert row["recall"] == round(2 / 3, ec.ROUND_DIGITS) + + +@needs_rg +def test_grep_preserves_colon_in_path(repo: Path) -> None: + # (H) a .py file whose name contains a colon must keep its full path; the + # (H) ripgrep output separator must not be confused with a path colon. + (repo / "pkg" / "od:d.py").write_text( + "from pkg.core import helper\n\nhelper()\n", encoding="utf-8" + ) + trees, files = parse_py_trees(repo) + fp = first_party_symbols(trees) + grep_name = grep_call_edges(repo, fp, files, ec.GrepMode.NAME) + + assert _edge("pkg/od:d.py", "helper") in grep_name + + +def test_cgr_call_edges_smoke(repo: Path) -> None: + trees, _files = parse_py_trees(repo) + fp = first_party_symbols(trees) + cgr = cgr_call_edges(repo, repo.name, fp) + + assert isinstance(cgr, set) + # (H) cgr resolves the intra-module first-party call helper() in core.py. + assert _edge("pkg/core.py", "helper") in cgr diff --git a/codebase_rag/tests/test_rust.py b/codebase_rag/tests/test_rust.py index 0751458e6..14f534809 100644 --- a/codebase_rag/tests/test_rust.py +++ b/codebase_rag/tests/test_rust.py @@ -302,25 +302,43 @@ def test_rust_structs_enums_unions( project_name = rust_project.name - expected_classes = [ + expected_structs = [ f"{project_name}.types.Point", f"{project_name}.types.Color", f"{project_name}.types.Unit", f"{project_name}.types.Container", f"{project_name}.types.Borrowed", f"{project_name}.types.GenericBorrowed", + ] + + created_classes = get_node_names(mock_ingestor, "Class") + + missing_structs = set(expected_structs) - created_classes + assert not missing_structs, ( + f"Missing expected structs: {sorted(list(missing_structs))}" + ) + + expected_enums = [ f"{project_name}.types.Direction", f"{project_name}.types.Message", f"{project_name}.types.Option", f"{project_name}.types.Cow", + ] + + created_enums = get_node_names(mock_ingestor, "Enum") + + missing_enums = set(expected_enums) - created_enums + assert not missing_enums, f"Missing expected enums: {sorted(list(missing_enums))}" + + expected_unions = [ f"{project_name}.types.FloatOrInt", ] - created_classes = get_node_names(mock_ingestor, "Class") + created_unions = get_node_names(mock_ingestor, "Union") - missing_classes = set(expected_classes) - created_classes - assert not missing_classes, ( - f"Missing expected types: {sorted(list(missing_classes))}" + missing_unions = set(expected_unions) - created_unions + assert not missing_unions, ( + f"Missing expected unions: {sorted(list(missing_unions))}" ) expected_methods = [ @@ -495,6 +513,13 @@ def test_rust_traits_and_implementations( f"{project_name}.traits.Drawable", ] + created_interfaces = get_node_names(mock_ingestor, "Interface") + + missing_traits = set(expected_traits) - created_interfaces + assert not missing_traits, ( + f"Missing expected traits: {sorted(list(missing_traits))}" + ) + expected_structs = [ f"{project_name}.traits.Point", f"{project_name}.traits.Circle", @@ -502,10 +527,9 @@ def test_rust_traits_and_implementations( created_classes = get_node_names(mock_ingestor, "Class") - all_expected = expected_traits + expected_structs - missing_classes = set(all_expected) - created_classes - assert not missing_classes, ( - f"Missing expected traits/structs: {sorted(list(missing_classes))}" + missing_structs = set(expected_structs) - created_classes + assert not missing_structs, ( + f"Missing expected structs: {sorted(list(missing_structs))}" ) expected_methods = [ @@ -1059,19 +1083,27 @@ def test_rust_pattern_matching( project_name = rust_project.name - expected_types = [ - f"{project_name}.pattern_matching.Color", - f"{project_name}.pattern_matching.Message", + expected_structs = [ f"{project_name}.pattern_matching.Point", ] created_classes = get_node_names(mock_ingestor, "Class") - found_types = set(expected_types) & created_classes - assert len(found_types) >= 3, ( - f"Expected at least 3 types, found: {sorted(list(found_types))}" + missing_structs = set(expected_structs) - created_classes + assert not missing_structs, ( + f"Missing expected structs: {sorted(list(missing_structs))}" ) + expected_enums = [ + f"{project_name}.pattern_matching.Color", + f"{project_name}.pattern_matching.Message", + ] + + created_enums = get_node_names(mock_ingestor, "Enum") + + missing_enums = set(expected_enums) - created_enums + assert not missing_enums, f"Missing expected enums: {sorted(list(missing_enums))}" + expected_functions = [ f"{project_name}.pattern_matching.match_color", f"{project_name}.pattern_matching.match_with_guards", @@ -1535,19 +1567,25 @@ def test_rust_macros( ) expected_structs = [ - f"{project_name}.macros.Person", - f"{project_name}.macros.Point", f"{project_name}.macros.MacroStruct", - f"{project_name}.macros.MacroEnum", ] created_classes = get_node_names(mock_ingestor, "Class") - found_structs = set(expected_structs) & created_classes - assert len(found_structs) >= 2, ( - f"Expected at least 2 macro structs, found: {sorted(list(found_structs))}" + missing_structs = set(expected_structs) - created_classes + assert not missing_structs, ( + f"Missing expected structs: {sorted(list(missing_structs))}" ) + expected_enums = [ + f"{project_name}.macros.MacroEnum", + ] + + created_enums = get_node_names(mock_ingestor, "Enum") + + missing_enums = set(expected_enums) - created_enums + assert not missing_enums, f"Missing expected enums: {sorted(list(missing_enums))}" + def test_rust_imports_and_use_statements( rust_project: Path, @@ -2050,9 +2088,9 @@ def test_rust_error_handling( f"{project_name}.error_handling.CustomError", ] - created_classes = get_node_names(mock_ingestor, "Class") + created_enums = get_node_names(mock_ingestor, "Enum") - found_enums = set(expected_enums) & created_classes + found_enums = set(expected_enums) & created_enums assert len(found_enums) >= 1, ( f"Expected at least 1 custom error enum, found: {sorted(list(found_enums))}" ) @@ -2403,18 +2441,36 @@ def test_rust_comprehensive_integration( project_name = rust_project.name - expected_types = [ + expected_structs = [ f"{project_name}.comprehensive.User", - f"{project_name}.comprehensive.RepositoryError", f"{project_name}.comprehensive.UserRepository", - f"{project_name}.comprehensive.Repository", ] created_classes = get_node_names(mock_ingestor, "Class") - found_types = set(expected_types) & created_classes - assert len(found_types) >= 3, ( - f"Expected at least 3 comprehensive types, found: {sorted(list(found_types))}" + missing_structs = set(expected_structs) - created_classes + assert not missing_structs, ( + f"Missing expected structs: {sorted(list(missing_structs))}" + ) + + expected_enums = [ + f"{project_name}.comprehensive.RepositoryError", + ] + + created_enums = get_node_names(mock_ingestor, "Enum") + + missing_enums = set(expected_enums) - created_enums + assert not missing_enums, f"Missing expected enums: {sorted(list(missing_enums))}" + + expected_interfaces = [ + f"{project_name}.comprehensive.Repository", + ] + + created_interfaces = get_node_names(mock_ingestor, "Interface") + + missing_interfaces = set(expected_interfaces) - created_interfaces + assert not missing_interfaces, ( + f"Missing expected traits: {sorted(list(missing_interfaces))}" ) diff --git a/codebase_rag/tests/test_rust_call_recall.py b/codebase_rag/tests/test_rust_call_recall.py new file mode 100644 index 000000000..e0876373c --- /dev/null +++ b/codebase_rag/tests/test_rust_call_recall.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag import constants as cs +from codebase_rag.tests.conftest import run_updater + + +def _calls(mock_ingestor: MagicMock) -> set[tuple[str, str]]: + out: set[tuple[str, str]] = set() + for c in mock_ingestor.ensure_relationship_batch.call_args_list: + if c.args[1] == cs.RelationshipType.CALLS: + out.add((c.args[0][2], c.args[2][2])) + return out + + +class TestRustTurbofishCalls: + def test_turbofish_call_is_captured( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "tf.rs").write_text( + "fn generic_function(value: T) -> T { value }\n" + "\n" + "fn caller() {\n" + " let _ = generic_function::(10);\n" + "}\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="rust") + calls = _calls(mock_ingestor) + + assert any( + caller.endswith(".caller") and callee.endswith(".generic_function") + for caller, callee in calls + ), f"turbofish call not captured; calls={sorted(calls)}" + + +class TestRustMacroCalls: + def test_call_inside_macro_is_captured( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + (temp_repo / "mac.rs").write_text( + "fn describe(x: i32) -> i32 { x }\n" + "\n" + "fn caller() {\n" + ' println!("{}", describe(5));\n' + "}\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="rust") + calls = _calls(mock_ingestor) + + assert any( + caller.endswith(".caller") and callee.endswith(".describe") + for caller, callee in calls + ), f"macro-internal call not captured; calls={sorted(calls)}" + + def test_bare_identifier_in_macro_is_not_a_call( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) a plain value interpolated into a macro must not become a CALLS edge + (temp_repo / "mac2.rs").write_text( + "fn value() -> i32 { 1 }\n" + "\n" + "fn caller() {\n" + " let value = 5;\n" + ' println!("{}", value);\n' + "}\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="rust") + calls = _calls(mock_ingestor) + + assert not any( + caller.endswith(".caller") and callee.endswith(".value") + for caller, callee in calls + ), f"bare identifier wrongly captured as call; calls={sorted(calls)}" + + def test_struct_literal_in_macro_is_not_a_call( + self, temp_repo: Path, mock_ingestor: MagicMock + ) -> None: + # (H) `Widget { ... }` (token_tree starting with `{`) and `arr[..]` (starting + # (H) with `[`) inside a macro are not calls; only `name(...)` is. + (temp_repo / "mac3.rs").write_text( + "struct Widget { n: i32 }\n" + "fn helper() -> i32 { 1 }\n" + "\n" + "fn caller() {\n" + ' println!("{}", Widget { n: helper() }.n);\n' + "}\n", + encoding="utf-8", + ) + + run_updater(temp_repo, mock_ingestor, skip_if_missing="rust") + calls = _calls(mock_ingestor) + + # (H) the real call inside the macro is still captured + assert any( + caller.endswith(".caller") and callee.endswith(".helper") + for caller, callee in calls + ), f"macro call not captured; calls={sorted(calls)}" + # (H) the struct literal `Widget { ... }` must not be a call + assert not any( + caller.endswith(".caller") and callee.endswith(".Widget") + for caller, callee in calls + ), f"struct literal wrongly captured as call; calls={sorted(calls)}" diff --git a/codebase_rag/tests/test_rust_closure_containment_oracle.py b/codebase_rag/tests/test_rust_closure_containment_oracle.py new file mode 100644 index 000000000..2e4666a33 --- /dev/null +++ b/codebase_rag/tests/test_rust_closure_containment_oracle.py @@ -0,0 +1,73 @@ +# (H) Covers Rust closure containment: a closure is DEFINEd by its nearest +# (H) enclosing function-like scope (impl/trait method -> Method, free fn or outer +# (H) closure -> Function). cgr routes closures through its free-function path; the +# (H) syn oracle (evals/oracles/rs_oracle) emits the matching DEFINES via a stack +# (H) of enclosing function-likes. Joined on (kind, file, line) endpoints. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_rust_graph +from evals.oracles import run_rust_oracle, rust_available +from evals.score import score_edge_types + +RS_SRC = """\ +pub struct Foo; + +impl Foo { + pub fn run(&self) -> i32 { + let c = |x: i32| x + 1; + let nested = || { + let inner = |z: i32| z * 2; + inner(5) + }; + c(2) + nested() + } +} + +pub trait Bar { + fn act(&self) -> i32 { + let t = |q: i32| q - 1; + t(9) + } +} + +pub fn free() -> i32 { + let d = |y: i32| y + 2; + d(3) +} +""" + + +def _require_rust() -> None: + if not rust_available(): + pytest.skip("cargo toolchain not available") + if cs.SupportedLanguage.RUST not in load_parsers()[0]: + pytest.skip("rust parser not available") + + +def test_cgr_matches_syn_oracle_on_closure_containment(tmp_path: Path) -> None: + _require_rust() + project = tmp_path / "rs_clo_edge" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_clo_edge"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(RS_SRC, encoding="utf-8") + + cgr = extract_cgr_rust_graph(project, project.name) + oracle = run_rust_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + row = by_label.get(cs.RelationshipType.DEFINES.value) + assert row is not None, (by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (row, result.diff) + # (H) The method-nested closures must contribute resolvable DEFINES edges, + # (H) not just the free-function one (the gap this fix closes). + assert row["tp"] >= 5, (row, result.diff) diff --git a/codebase_rag/tests/test_rust_closure_method_defines.py b/codebase_rag/tests/test_rust_closure_method_defines.py new file mode 100644 index 000000000..e46722b83 --- /dev/null +++ b/codebase_rag/tests/test_rust_closure_method_defines.py @@ -0,0 +1,84 @@ +# (H) Rust closures nested in an impl-method body must get a DEFINES edge from +# (H) the enclosing METHOD, exactly as closures in free functions get one from +# (H) the enclosing function. cgr used to derive the closure's DEFINES parent via +# (H) the FQN scope walk, which could not read an impl block's target type, so the +# (H) parent endpoint dropped the impl target (`lib.run` instead of `lib.Foo.run`) +# (H) and never matched the real Method node, silently dropping the containment. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import KEY_QUALIFIED_NAME, NodeLabel, RelationshipType +from codebase_rag.tests.conftest import ( + create_and_run_updater, + get_nodes, + get_relationships, +) + +_RS = """pub struct Foo; + +impl Foo { + pub fn run(&self) -> i32 { + let c = |x: i32| x + 1; + c(2) + } +} + +pub fn free() -> i32 { + let d = |y: i32| y + 2; + d(3) +} +""" + + +def _project(temp_repo: Path) -> Path: + project = temp_repo / "rs_clo" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_clo"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(encoding="utf-8", data=_RS) + return project + + +def _defines_pairs(mock_ingestor: MagicMock) -> set[tuple[str, str, str]]: + # (H) (parent_label, parent_qn, child_qn) for DEFINES edges. + return { + (call[0][0][0], call[0][0][2], call[0][2][2]) + for call in get_relationships(mock_ingestor, RelationshipType.DEFINES.value) + } + + +def test_rust_closure_in_impl_method_defined_by_method( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(_project(temp_repo), mock_ingestor, skip_if_missing="rust") + file_mod = "rs_clo.src.lib" + + method_qns = { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.METHOD) + } + assert f"{file_mod}.Foo.run" in method_qns, method_qns + + function_qns = { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.FUNCTION) + } + + pairs = _defines_pairs(mock_ingestor) + # (H) Every DEFINES edge's parent endpoint must resolve to a real node; + # (H) the method-closure edge used to point at the phantom `lib.run`. + method_defines = { + (parent_qn, child_qn) + for (parent_label, parent_qn, child_qn) in pairs + if parent_label == NodeLabel.METHOD.value + } + assert method_defines, pairs + closure_child = next( + child_qn + for (parent_qn, child_qn) in method_defines + if parent_qn == f"{file_mod}.Foo.run" + ) + assert closure_child in function_qns, (closure_child, function_qns) diff --git a/codebase_rag/tests/test_rust_containment_oracle.py b/codebase_rag/tests/test_rust_containment_oracle.py new file mode 100644 index 000000000..9c0820b58 --- /dev/null +++ b/codebase_rag/tests/test_rust_containment_oracle.py @@ -0,0 +1,89 @@ +# (H) Covers Rust containment-edge validation: cgr's DEFINES (module -> item / +# (H) nested module) and DEFINES_METHOD (struct/trait -> method) edges are graded +# (H) against the independent syn oracle (evals/oracles/rs_oracle), joined on +# (H) (kind, file, line) endpoints. Exercises an inherent impl, a trait method, +# (H) and an impl inside a nested `mod` (cross-module type resolution). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_rust_graph +from evals.oracles import run_rust_oracle, rust_available +from evals.score import score_edge_types + +RS_SRC = """\ +pub trait Shape { + fn area(&self) -> f64 { 0.0 } +} + +pub struct Point { + x: i32, +} + +impl Point { + pub fn new() -> Point { + Point { x: 0 } + } +} + +impl Shape for Point { + fn area(&self) -> f64 { + 1.0 + } +} + +pub fn free() -> i32 { + 1 +} + +pub mod inner { + pub struct Widget { + w: i32, + } + + impl Widget { + pub fn build(&self) -> i32 { + self.w + } + } +} +""" + + +def _require_rust() -> None: + if not rust_available(): + pytest.skip("cargo toolchain not available") + if cs.SupportedLanguage.RUST not in load_parsers()[0]: + pytest.skip("rust parser not available") + + +def test_cgr_matches_syn_oracle_on_containment_edges(tmp_path: Path) -> None: + _require_rust() + project = tmp_path / "rs_edge" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_edge"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(RS_SRC, encoding="utf-8") + + cgr = extract_cgr_rust_graph(project, project.name) + oracle = run_rust_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_rust_impl_method_call_qn.py b/codebase_rag/tests/test_rust_impl_method_call_qn.py new file mode 100644 index 000000000..72985416b --- /dev/null +++ b/codebase_rag/tests/test_rust_impl_method_call_qn.py @@ -0,0 +1,64 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make_crate(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "lib.rs").write_text( + "pub struct Chars<'a> { s: &'a str }\n\n" + "impl<'a> Chars<'a> {\n" + " pub fn as_str(&self) -> &'a str { self.s }\n" + "}\n\n" + "pub trait Thing { fn go(&self) -> usize; }\n\n" + "impl<'a> Thing for Chars<'a> {\n" + " fn go(&self) -> usize { self.as_str().len() }\n" + "}\n", + encoding="utf-8", + ) + + +def test_rust_generic_impl_method_caller_qn_strips_generics(tmp_path: Path) -> None: + # (H) A method in a generic impl block (`impl<'a> Thing for Chars<'a>`) is + # (H) registered on the bare type node (crate.lib.Chars.go). The call inside it + # (H) must be attributed to that bare-type caller qn, not a generic-bearing + # (H) crate.lib.Chars<'a>.go that matches no node (which drops the CALLS edge). + _make_crate(tmp_path) + ingestor = _capture(tmp_path, "crate") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + node_qns = {str(uid) for (_label, uid) in ingestor.nodes} + + assert "crate.lib.Chars.go" in node_qns + assert ("crate.lib.Chars.go", "crate.lib.Chars.as_str") in calls + assert ("crate.lib.Chars<'a>.go", "crate.lib.Chars.as_str") not in calls + + +def _make_super_import_crate(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "lib.rs").write_text("pub mod a;\npub mod b;\n", encoding="utf-8") + (root / "b.rs").write_text("pub fn helper() -> i32 { 1 }\n", encoding="utf-8") + (root / "a.rs").write_text( + "use super::b::helper;\n\npub fn run() -> i32 { helper() }\n", + encoding="utf-8", + ) + + +def test_rust_super_imported_free_fn_call_resolves(tmp_path: Path) -> None: + # (H) A free function imported by a Rust relative path (`use super::b::helper`) + # (H) and called bare must resolve to the sibling-module function node + # (H) (crate.b.helper). The import target is recorded as raw `super::b::helper` + # (H) (`::`-separated, not project-prefixed), so the external-import guard must + # (H) not mistake it for an external symbol and suppress the trie fallback. + _make_super_import_crate(tmp_path) + ingestor = _capture(tmp_path, "crate") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + + assert ("crate.a.run", "crate.b.helper") in calls diff --git a/codebase_rag/tests/test_rust_impl_primitive_target.py b/codebase_rag/tests/test_rust_impl_primitive_target.py new file mode 100644 index 000000000..a6be79f62 --- /dev/null +++ b/codebase_rag/tests/test_rust_impl_primitive_target.py @@ -0,0 +1,44 @@ +# (H) Regression: methods in an `impl Trait for ` block (e.g. +# (H) `impl From for u8`) must be captured. The impl target `u8` is a +# (H) `primitive_type` node, which extract_impl_target did not recognise, so every +# (H) method in such a block was silently dropped. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import KEY_QUALIFIED_NAME, NodeLabel +from codebase_rag.tests.conftest import create_and_run_updater, get_nodes + + +def test_rust_method_on_primitive_impl_target_is_captured( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "rs_prim" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_prim"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text( + encoding="utf-8", + data="""pub enum Foo { A, B } + +impl From for u8 { + fn from(value: Foo) -> Self { + match value { + Foo::A => 0, + Foo::B => 1, + } + } +} +""", + ) + create_and_run_updater(project, mock_ingestor, skip_if_missing="rust") + + method_qns = { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.METHOD) + } + assert any(qn.endswith(".u8.from") for qn in method_qns), ( + f"from() on impl-for-u8 not captured: {method_qns}" + ) diff --git a/codebase_rag/tests/test_rust_inheritance_edges.py b/codebase_rag/tests/test_rust_inheritance_edges.py new file mode 100644 index 000000000..88bd34c58 --- /dev/null +++ b/codebase_rag/tests/test_rust_inheritance_edges.py @@ -0,0 +1,49 @@ +# (H) Rust inheritance was uncaptured: `impl Trait for Type` means Type +# (H) IMPLEMENTS Trait, and a supertrait bound `trait Sub: Super` means Sub +# (H) INHERITS Super. cgr emitted neither (impl blocks and trait bounds were +# (H) never turned into inheritance edges). +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import RelationshipType +from codebase_rag.tests.conftest import create_and_run_updater, get_relationships + +_RS = """\ +pub trait Shape {} +pub trait Drawable: Shape {} + +pub struct Circle; + +impl Shape for Circle {} +impl Drawable for Circle {} +""" + + +def _pairs(mock_ingestor: MagicMock, rel: str) -> set[tuple[str, str]]: + return { + (call[0][0][2], call[0][2][2]) for call in get_relationships(mock_ingestor, rel) + } + + +def test_rust_impl_and_supertrait_edges( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "rs_inh" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_inh"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(encoding="utf-8", data=_RS) + create_and_run_updater(project, mock_ingestor, skip_if_missing="rust") + + inherits = _pairs(mock_ingestor, RelationshipType.INHERITS.value) + implements = _pairs(mock_ingestor, RelationshipType.IMPLEMENTS.value) + base = "rs_inh.src.lib" + + # (H) impl Trait for Type -> Type IMPLEMENTS Trait. + assert (f"{base}.Circle", f"{base}.Shape") in implements, implements + assert (f"{base}.Circle", f"{base}.Drawable") in implements, implements + # (H) Supertrait bound -> Sub INHERITS Super. + assert (f"{base}.Drawable", f"{base}.Shape") in inherits, inherits diff --git a/codebase_rag/tests/test_rust_inheritance_oracle.py b/codebase_rag/tests/test_rust_inheritance_oracle.py new file mode 100644 index 000000000..3204a224e --- /dev/null +++ b/codebase_rag/tests/test_rust_inheritance_oracle.py @@ -0,0 +1,59 @@ +# (H) Covers Rust inheritance-edge validation: cgr's INHERITS (supertrait bound) +# (H) and IMPLEMENTS (`impl Trait for Type`) edges are graded against the syn +# (H) oracle, by (source node, base SIMPLE NAME). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_rust_graph +from evals.oracles import run_rust_oracle, rust_available +from evals.score import score_name_edge_types + +RS_SRC = """\ +pub trait Shape {} +pub trait Drawable: Shape {} + +pub struct Circle; + +impl Shape for Circle {} +impl Drawable for Circle {} +""" + + +def _require_rust() -> None: + if not rust_available(): + pytest.skip("cargo toolchain not available") + if cs.SupportedLanguage.RUST not in load_parsers()[0]: + pytest.skip("rust parser not available") + + +def test_cgr_matches_syn_oracle_on_inheritance_edges(tmp_path: Path) -> None: + _require_rust() + project = tmp_path / "rs_inh_edge" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_inh_edge"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(RS_SRC, encoding="utf-8") + + cgr = extract_cgr_rust_graph(project, project.name) + oracle = run_rust_oracle(project) + + result = score_name_edge_types(cgr, oracle, ec.INHERITANCE_NAME_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.INHERITS.value, + cs.RelationshipType.IMPLEMENTS.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_rust_nested_module_containment.py b/codebase_rag/tests/test_rust_nested_module_containment.py new file mode 100644 index 000000000..7a924ed4c --- /dev/null +++ b/codebase_rag/tests/test_rust_nested_module_containment.py @@ -0,0 +1,85 @@ +# (H) Rust nested-module containment. cgr qualifies items inside `mod inner` +# (H) with the module path (proj...inner.X), but used to (a) DEFINE them from the +# (H) FILE module while leaving the inner Module node an orphan, and (b) qualify +# (H) impl methods inside the mod against the file module, producing a phantom +# (H) DEFINES_METHOD parent that never matched the real type node. Containment +# (H) must be module-nested: file module -> inner module -> its items, and an +# (H) impl method binds to the type under its enclosing module path. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import KEY_QUALIFIED_NAME, NodeLabel, RelationshipType +from codebase_rag.tests.conftest import ( + create_and_run_updater, + get_nodes, + get_relationships, +) + +_RS = """pub mod inner { + pub fn helper() -> i32 { 1 } + + pub struct Widget { w: i32 } + + impl Widget { + pub fn build(&self) -> i32 { self.w } + } +} +""" + + +def _project(temp_repo: Path) -> Path: + project = temp_repo / "rs_mod" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_mod"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(encoding="utf-8", data=_RS) + return project + + +def _defines_pairs(mock_ingestor: MagicMock) -> set[tuple[str, str, str]]: + # (H) (parent_label, parent_qn, child_qn) for DEFINES edges. + return { + (call[0][0][0], call[0][0][2], call[0][2][2]) + for call in get_relationships(mock_ingestor, RelationshipType.DEFINES.value) + } + + +def test_rust_nested_module_is_module_nested( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(_project(temp_repo), mock_ingestor, skip_if_missing="rust") + file_mod = "rs_mod.src.lib" + inner = f"{file_mod}.inner" + pairs = _defines_pairs(mock_ingestor) + + # (H) file module DEFINES the inner module (no longer an orphan node). + assert (NodeLabel.MODULE.value, file_mod, inner) in pairs, pairs + # (H) inner module DEFINES its own items, not the file module. + assert (NodeLabel.MODULE.value, inner, f"{inner}.helper") in pairs, pairs + assert (NodeLabel.MODULE.value, inner, f"{inner}.Widget") in pairs, pairs + + +def test_rust_impl_method_in_module_binds_to_nested_type( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + create_and_run_updater(_project(temp_repo), mock_ingestor, skip_if_missing="rust") + inner = "rs_mod.src.lib.inner" + + method_qns = { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.METHOD) + } + assert f"{inner}.Widget.build" in method_qns, method_qns + + defines_method = { + (call[0][0][2], call[0][2][2]) + for call in get_relationships( + mock_ingestor, RelationshipType.DEFINES_METHOD.value + ) + } + assert (f"{inner}.Widget", f"{inner}.Widget.build") in defines_method, ( + defines_method + ) diff --git a/codebase_rag/tests/test_rust_node_type.py b/codebase_rag/tests/test_rust_node_type.py new file mode 100644 index 000000000..edfa95e13 --- /dev/null +++ b/codebase_rag/tests/test_rust_node_type.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parsers.class_ingest.node_type import determine_node_type +from codebase_rag.tests.conftest import ( + create_mock_node, + get_node_names, + run_updater, +) +from codebase_rag.types_defs import NodeType + + +@pytest.mark.parametrize( + ("ts_node_type", "expected"), + [ + (cs.TS_RS_ENUM_ITEM, NodeType.ENUM), + (cs.TS_RS_TRAIT_ITEM, NodeType.INTERFACE), + (cs.TS_RS_TYPE_ITEM, NodeType.TYPE), + (cs.TS_RS_UNION_ITEM, NodeType.UNION), + (cs.TS_RS_STRUCT_ITEM, NodeType.CLASS), + ], +) +def test_determine_node_type_rust(ts_node_type: str, expected: NodeType) -> None: + node = create_mock_node(ts_node_type) + result = determine_node_type(node, "Foo", "crate::Foo", cs.SupportedLanguage.RUST) + assert result == expected + + +@pytest.fixture +def rust_node_type_project(temp_repo: Path) -> Path: + project_path = temp_repo / "rust_node_type_test" + project_path.mkdir() + (project_path / "Cargo.toml").write_text( + encoding="utf-8", + data='[package]\nname = "rust_node_type_test"\nversion = "0.1.0"\n', + ) + (project_path / "src").mkdir() + (project_path / "src" / "lib.rs").write_text(encoding="utf-8", data="") + (project_path / "types.rs").write_text( + encoding="utf-8", + data=( + "pub enum Color { Red, Green, Blue }\n" + "pub trait Drawable { fn draw(&self); }\n" + "pub type Pair = (i32, i32);\n" + "pub union IntOrFloat { i: i32, f: f32 }\n" + "pub struct Point { pub x: f64, pub y: f64 }\n" + ), + ) + return project_path + + +def test_rust_enum_label( + rust_node_type_project: Path, mock_ingestor: MagicMock +) -> None: + run_updater(rust_node_type_project, mock_ingestor, skip_if_missing="rust") + enum_names = get_node_names(mock_ingestor, NodeType.ENUM) + assert len(enum_names) == 1 + assert enum_names.pop().endswith(".Color") + + +def test_rust_trait_label( + rust_node_type_project: Path, mock_ingestor: MagicMock +) -> None: + run_updater(rust_node_type_project, mock_ingestor, skip_if_missing="rust") + interface_names = get_node_names(mock_ingestor, NodeType.INTERFACE) + assert len(interface_names) == 1 + assert interface_names.pop().endswith(".Drawable") + + +def test_rust_type_alias_label( + rust_node_type_project: Path, mock_ingestor: MagicMock +) -> None: + run_updater(rust_node_type_project, mock_ingestor, skip_if_missing="rust") + type_names = get_node_names(mock_ingestor, NodeType.TYPE) + assert len(type_names) == 1 + assert type_names.pop().endswith(".Pair") + + +def test_rust_union_label( + rust_node_type_project: Path, mock_ingestor: MagicMock +) -> None: + run_updater(rust_node_type_project, mock_ingestor, skip_if_missing="rust") + union_names = get_node_names(mock_ingestor, NodeType.UNION) + assert len(union_names) == 1 + assert union_names.pop().endswith(".IntOrFloat") + + +def test_rust_struct_label( + rust_node_type_project: Path, mock_ingestor: MagicMock +) -> None: + run_updater(rust_node_type_project, mock_ingestor, skip_if_missing="rust") + class_names = get_node_names(mock_ingestor, NodeType.CLASS) + assert len(class_names) == 1 + assert class_names.pop().endswith(".Point") diff --git a/codebase_rag/tests/test_rust_retrieval_eval.py b/codebase_rag/tests/test_rust_retrieval_eval.py new file mode 100644 index 000000000..853da5583 --- /dev/null +++ b/codebase_rag/tests/test_rust_retrieval_eval.py @@ -0,0 +1,65 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.oracles import rust_available +from evals.rust_retrieval import ( + cgr_rust_call_edges, + oracle_rust_call_edges, + score_rust_retrieval, +) + +needs_rust = pytest.mark.skipif( + not rust_available(), reason="rust toolchain not installed" +) + + +def _make_crate(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "lib.rs").write_text( + "pub struct T;\n\n" + "impl T {\n" + " pub fn helper(&self) -> i32 { 1 }\n" + " pub fn caller(&self) -> i32 { self.helper() }\n" + " pub fn make() -> T { T }\n" + " pub fn orphan(&self) -> i32 { 9 }\n" + "}\n\n" + "pub fn free() -> i32 { 2 }\n\n" + "pub fn use_it() -> i32 {\n" + " let t = T::make();\n" + " free() + t.caller()\n" + "}\n", + encoding="utf-8", + ) + + +@needs_rust +def test_oracle_captures_first_party_rust_calls(tmp_path: Path) -> None: + _make_crate(tmp_path) + edges, declared = oracle_rust_call_edges(tmp_path) + + # (H) self.helper(), T::make(), free(), t.caller() are all first-party calls. + assert ("lib.rs", "helper") in edges + assert ("lib.rs", "make") in edges + assert ("lib.rs", "free") in edges + assert ("lib.rs", "caller") in edges + # (H) orphan is declared but never called -> never a call edge. + assert ("lib.rs", "orphan") not in edges + assert {"helper", "caller", "make", "free", "use_it", "orphan"} <= declared + + +@needs_rust +def test_cgr_matches_oracle_on_clean_rust_crate(tmp_path: Path) -> None: + _make_crate(tmp_path) + oracle, declared = oracle_rust_call_edges(tmp_path) + cgr = cgr_rust_call_edges(tmp_path, tmp_path.name, declared) + assert cgr == oracle + + +def test_score_rust_retrieval_prf() -> None: + result = score_rust_retrieval( + {("a.rs", "f"), ("a.rs", "g")}, {("a.rs", "f"), ("b.rs", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.RUST_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_rust_span_oracle.py b/codebase_rag/tests/test_rust_span_oracle.py new file mode 100644 index 000000000..5bd9abb53 --- /dev/null +++ b/codebase_rag/tests/test_rust_span_oracle.py @@ -0,0 +1,83 @@ +# (H) Covers Rust node SPAN (end_line) validation: cgr's end_line for each node is +# (H) graded against the syn oracle (which emits the whole-node span end), joined +# (H) on (kind, file, start) endpoints. Exercises doc comments, multi-line +# (H) attributes, a multi-line signature, a where-clause, and a multi-line closure +# (H) so the span is not trivially the start line. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_rust_graph +from evals.oracles import run_rust_oracle, rust_available +from evals.score import score_span + +RS_SRC = """\ +/// A documented struct +/// spanning several doc lines. +#[derive(Debug, Clone)] +pub struct Widget { + name: String, + size: u32, +} + +impl Widget { + pub fn area( + &self, + scale: u32, + ) -> u32 { + self.size * scale + } +} + +pub trait Drawable { + fn draw(&self) -> String { + String::from("x") + } +} + +pub fn standalone() +where + u32: Sized, +{ + let cb = |v: u32| { + v + 1 + }; + let _ = cb(2); +} +""" + + +def _require_rust() -> None: + if not rust_available(): + pytest.skip("cargo toolchain not available") + if cs.SupportedLanguage.RUST not in load_parsers()[0]: + pytest.skip("rust parser not available") + + +def test_cgr_matches_syn_oracle_on_node_spans(tmp_path: Path) -> None: + _require_rust() + project = tmp_path / "rs_span" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_span"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(RS_SRC, encoding="utf-8") + + cgr = extract_cgr_rust_graph(project, project.name) + oracle = run_rust_oracle(project) + + result = score_span(cgr, oracle, ec.RS_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + # (H) Guard the sample actually exercises multi-line spans (else it is vacuous). + assert aggregate["tp"] >= 5, aggregate diff --git a/codebase_rag/tests/test_rust_structure_oracle.py b/codebase_rag/tests/test_rust_structure_oracle.py new file mode 100644 index 000000000..f9e9e9fa8 --- /dev/null +++ b/codebase_rag/tests/test_rust_structure_oracle.py @@ -0,0 +1,68 @@ +# (H) Covers the Rust structure oracle harness (evals/oracles/rs_oracle + +# (H) evals/rust_l1.py): the syn-based oracle is authoritative ground truth, and +# (H) cgr's captured Rust nodes are graded against it on (kind, file, start_line). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_rust_nodes +from evals.oracles import run_rust_oracle, rust_available +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +RS_SRC = """\ +pub struct Point { pub x: i32, pub y: i32 } +pub enum Direction { North, South } +pub trait Shape { fn area(&self) -> f64; } +pub type Meters = f64; + +pub fn free_fn(a: i32) -> i32 { a + 1 } + +impl Point { + pub fn new(x: i32, y: i32) -> Self { Point { x, y } } +} + +impl Shape for Point { + fn area(&self) -> f64 { 0.0 } +} +""" + + +def _require_rust() -> None: + if not rust_available(): + pytest.skip("cargo toolchain not available") + if cs.SupportedLanguage.RUST not in load_parsers()[0]: + pytest.skip("rust parser not available") + + +def _project(tmp_path: Path) -> Path: + project = tmp_path / "rs_oracle_test" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_oracle_test"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text(RS_SRC, encoding="utf-8") + return project + + +def test_cgr_matches_syn_oracle_on_rust_structure(tmp_path: Path) -> None: + _require_rust() + project = _project(tmp_path) + cgr = GraphData( + nodes=extract_cgr_rust_nodes(project, project.name), + edges=set(), + name_edges=set(), + ) + oracle = run_rust_oracle(project) + + result = score_node_kinds(cgr, oracle, ec.RS_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + for label in ("Class", "Interface", "Enum", "Type", "Function", "Method"): + row = by_label.get(label) + assert row is not None, (label, by_label) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (label, row) diff --git a/codebase_rag/tests/test_rust_trait_method_containment.py b/codebase_rag/tests/test_rust_trait_method_containment.py new file mode 100644 index 000000000..26db7c491 --- /dev/null +++ b/codebase_rag/tests/test_rust_trait_method_containment.py @@ -0,0 +1,43 @@ +# (H) Regression: a DEFINES_METHOD relationship is matched in the graph by the +# (H) parent's LABEL and qualified_name, so a method on a non-Class container +# (H) (a Rust trait -> Interface node) must be emitted with the parent's real +# (H) label. It was hardcoded to Class, so MATCH (a:Class {qn: trait}) found +# (H) nothing and the trait -> method containment edge was silently dropped. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import NodeLabel, RelationshipType +from codebase_rag.tests.conftest import create_and_run_updater, get_relationships + + +def test_rust_trait_method_defined_by_interface_node( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "rs_trait" + (project / "src").mkdir(parents=True) + (project / "Cargo.toml").write_text( + encoding="utf-8", data='[package]\nname = "rs_trait"\nversion = "0.1.0"\n' + ) + (project / "src" / "lib.rs").write_text( + encoding="utf-8", + data="""pub trait Shape { + fn area(&self) -> f64 { 0.0 } +} +""", + ) + create_and_run_updater(project, mock_ingestor, skip_if_missing="rust") + + defines_method = get_relationships( + mock_ingestor, RelationshipType.DEFINES_METHOD.value + ) + # (H) (parent_label, parent_qn) pairs for the trait's method. + parents = { + (call[0][0][0], call[0][0][2]) + for call in defines_method + if str(call[0][2][2]).endswith(".Shape.area") + } + assert (NodeLabel.INTERFACE.value, "rs_trait.src.lib.Shape") in parents, parents + # (H) The wrong Class-labelled parent must not be emitted. + assert (NodeLabel.CLASS.value, "rs_trait.src.lib.Shape") not in parents, parents diff --git a/codebase_rag/tests/test_semantic_search.py b/codebase_rag/tests/test_semantic_search.py index 759145d73..95df18ba3 100644 --- a/codebase_rag/tests/test_semantic_search.py +++ b/codebase_rag/tests/test_semantic_search.py @@ -4,6 +4,7 @@ import pytest +from codebase_rag import constants as cs from codebase_rag.utils.dependencies import has_semantic_dependencies @@ -24,9 +25,7 @@ def mock_search_embeddings() -> MagicMock: @pytest.fixture def mock_ingestor() -> MagicMock: mock = MagicMock() - mock.__enter__ = MagicMock(return_value=mock) - mock.__exit__ = MagicMock(return_value=False) - mock._execute_query.return_value = [ + mock.fetch_all.return_value = [ { "node_id": 1, "qualified_name": "project.module.func1", @@ -55,10 +54,67 @@ def test_semantic_code_search_returns_empty_without_dependencies() -> None: from codebase_rag.tools.semantic_search import semantic_code_search - results = semantic_code_search("find error handlers") + results = semantic_code_search(MagicMock(), "find error handlers") assert results == [] +@patch("codebase_rag.tools.semantic_search.has_semantic_dependencies", return_value=True) +@patch("codebase_rag.vector_store.search_embeddings") +@patch("codebase_rag.embedder.embed_code") +def test_semantic_code_search_reuses_injected_ingestor( + mock_embed: MagicMock, mock_search: MagicMock, _deps: MagicMock +) -> None: + from codebase_rag.tools.semantic_search import semantic_code_search + + mock_embed.return_value = [0.0] + mock_search.return_value = [(1, 0.99), (2, 0.42)] + + ingestor = MagicMock() + ingestor.fetch_all.return_value = [ + { + "node_id": 1, + "qualified_name": "pkg.mod.foo", + "name": "foo", + "type": ["Function"], + }, + { + "node_id": 2, + "qualified_name": "pkg.mod.Bar", + "name": "Bar", + "type": ["Class"], + }, + ] + + results = semantic_code_search(ingestor, "find the foo function", top_k=2) + + ingestor.fetch_all.assert_called_once() + ingestor._execute_query.assert_not_called() + assert [r["qualified_name"] for r in results] == ["pkg.mod.foo", "pkg.mod.Bar"] + assert results[0]["score"] == 0.99 + + +@patch("codebase_rag.tools.semantic_search.has_semantic_dependencies", return_value=True) +@patch("codebase_rag.vector_store.search_embeddings") +@patch("codebase_rag.embedder.embed_code") +def test_semantic_code_search_tolerates_missing_result_fields( + mock_embed: MagicMock, mock_search: MagicMock, _deps: MagicMock +) -> None: + from codebase_rag.tools.semantic_search import semantic_code_search + + mock_embed.return_value = [0.0] + mock_search.return_value = [(1, 0.99)] + + ingestor = MagicMock() + ingestor.fetch_all.return_value = [{"node_id": 1}] + + results = semantic_code_search(ingestor, "find foo", top_k=1) + + assert len(results) == 1 + assert results[0]["qualified_name"] == "" + assert results[0]["name"] == "" + assert results[0]["type"] == cs.SEMANTIC_TYPE_UNKNOWN + + @pytest.mark.skipif( not has_semantic_dependencies(), reason="semantic dependencies not installed" ) @@ -72,12 +128,10 @@ def test_semantic_code_search_returns_formatted_results( with ( patch("codebase_rag.embedder.embed_code", mock_embed_code), patch("codebase_rag.vector_store.search_embeddings", mock_search_embeddings), - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), ): - results = semantic_code_search("find authentication code", top_k=3) + results = semantic_code_search( + mock_ingestor, "find authentication code", top_k=3 + ) assert len(results) == 3 assert results[0]["node_id"] == 1 @@ -99,12 +153,8 @@ def test_semantic_code_search_calls_embed_code_with_query( with ( patch("codebase_rag.embedder.embed_code", mock_embed_code), patch("codebase_rag.vector_store.search_embeddings", mock_search_embeddings), - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), ): - semantic_code_search("database operations") + semantic_code_search(mock_ingestor, "database operations") mock_embed_code.assert_called_once_with("database operations") @@ -122,12 +172,8 @@ def test_semantic_code_search_passes_top_k_to_search( with ( patch("codebase_rag.embedder.embed_code", mock_embed_code), patch("codebase_rag.vector_store.search_embeddings", mock_search_embeddings), - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), ): - semantic_code_search("file handling", top_k=10) + semantic_code_search(mock_ingestor, "file handling", top_k=10) mock_search_embeddings.assert_called_once_with([0.1] * 768, top_k=10) @@ -146,7 +192,7 @@ def test_semantic_code_search_returns_empty_when_no_matches( patch("codebase_rag.embedder.embed_code", mock_embed_code), patch("codebase_rag.vector_store.search_embeddings", mock_search_empty), ): - results = semantic_code_search("nonexistent functionality") + results = semantic_code_search(MagicMock(), "nonexistent functionality") assert results == [] @@ -160,7 +206,7 @@ def test_semantic_code_search_handles_exception(mock_embed_code: MagicMock) -> N mock_embed_code.side_effect = Exception("Embedding failed") with patch("codebase_rag.embedder.embed_code", mock_embed_code): - results = semantic_code_search("some query") + results = semantic_code_search(MagicMock(), "some query") assert results == [] @@ -179,12 +225,8 @@ def test_semantic_code_search_preserves_score_order( with ( patch("codebase_rag.embedder.embed_code", mock_embed_code), patch("codebase_rag.vector_store.search_embeddings", mock_search), - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), ): - results = semantic_code_search("test query") + results = semantic_code_search(mock_ingestor, "test query") assert results[0]["node_id"] == 3 assert results[0]["score"] == 0.99 @@ -198,7 +240,7 @@ def test_semantic_code_search_preserves_score_order( def test_get_function_source_code_returns_source(mock_ingestor: MagicMock) -> None: from codebase_rag.tools.semantic_search import get_function_source_code - mock_ingestor._execute_query.return_value = [ + mock_ingestor.fetch_all.return_value = [ { "qualified_name": "project.module.func", "start_line": 10, @@ -211,10 +253,6 @@ def test_get_function_source_code_returns_source(mock_ingestor: MagicMock) -> No mock_extract = MagicMock(return_value="def func():\n return 42") with ( - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), patch( "codebase_rag.utils.source_extraction.validate_source_location", mock_validate, @@ -223,7 +261,7 @@ def test_get_function_source_code_returns_source(mock_ingestor: MagicMock) -> No "codebase_rag.utils.source_extraction.extract_source_lines", mock_extract ), ): - result = get_function_source_code(123) + result = get_function_source_code(mock_ingestor, 123) assert result == "def func():\n return 42" @@ -236,13 +274,9 @@ def test_get_function_source_code_returns_none_when_not_found( ) -> None: from codebase_rag.tools.semantic_search import get_function_source_code - mock_ingestor._execute_query.return_value = [] + mock_ingestor.fetch_all.return_value = [] - with patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ): - result = get_function_source_code(999) + result = get_function_source_code(mock_ingestor, 999) assert result is None @@ -255,7 +289,7 @@ def test_get_function_source_code_returns_none_on_invalid_location( ) -> None: from codebase_rag.tools.semantic_search import get_function_source_code - mock_ingestor._execute_query.return_value = [ + mock_ingestor.fetch_all.return_value = [ { "qualified_name": "project.module.func", "start_line": None, @@ -266,17 +300,11 @@ def test_get_function_source_code_returns_none_on_invalid_location( mock_validate = MagicMock(return_value=(False, None)) - with ( - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), - patch( - "codebase_rag.utils.source_extraction.validate_source_location", - mock_validate, - ), + with patch( + "codebase_rag.utils.source_extraction.validate_source_location", + mock_validate, ): - result = get_function_source_code(123) + result = get_function_source_code(mock_ingestor, 123) assert result is None @@ -287,13 +315,9 @@ def test_get_function_source_code_returns_none_on_invalid_location( def test_get_function_source_code_handles_exception(mock_ingestor: MagicMock) -> None: from codebase_rag.tools.semantic_search import get_function_source_code - mock_ingestor._execute_query.side_effect = Exception("Database error") + mock_ingestor.fetch_all.side_effect = Exception("Database error") - with patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ): - result = get_function_source_code(123) + result = get_function_source_code(mock_ingestor, 123) assert result is None @@ -301,13 +325,13 @@ def test_get_function_source_code_handles_exception(mock_ingestor: MagicMock) -> @pytest.mark.skipif( not has_semantic_dependencies(), reason="semantic dependencies not installed" ) -def test_create_semantic_search_tool_returns_tool() -> None: +def test_create_semantic_search_tool_returns_tool(mock_ingestor: MagicMock) -> None: from pydantic_ai import Tool from codebase_rag.tools.semantic_search import create_semantic_search_tool from codebase_rag.tools.tool_descriptions import AgenticToolName - tool = create_semantic_search_tool() + tool = create_semantic_search_tool(mock_ingestor) assert isinstance(tool, Tool) assert tool.name == AgenticToolName.SEMANTIC_SEARCH @@ -316,13 +340,13 @@ def test_create_semantic_search_tool_returns_tool() -> None: @pytest.mark.skipif( not has_semantic_dependencies(), reason="semantic dependencies not installed" ) -def test_create_get_function_source_tool_returns_tool() -> None: +def test_create_get_function_source_tool_returns_tool(mock_ingestor: MagicMock) -> None: from pydantic_ai import Tool from codebase_rag.tools.semantic_search import create_get_function_source_tool from codebase_rag.tools.tool_descriptions import AgenticToolName - tool = create_get_function_source_tool() + tool = create_get_function_source_tool(mock_ingestor) assert isinstance(tool, Tool) assert tool.name == AgenticToolName.GET_FUNCTION_SOURCE @@ -339,15 +363,11 @@ async def test_semantic_search_tool_formats_results( ) -> None: from codebase_rag.tools.semantic_search import create_semantic_search_tool - tool = create_semantic_search_tool() + tool = create_semantic_search_tool(mock_ingestor) with ( patch("codebase_rag.embedder.embed_code", mock_embed_code), patch("codebase_rag.vector_store.search_embeddings", mock_search_embeddings), - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), ): result = await tool.function("find handlers") @@ -367,7 +387,7 @@ async def test_semantic_search_tool_handles_no_results( from codebase_rag.tools.semantic_search import create_semantic_search_tool mock_search_empty = MagicMock(return_value=[]) - tool = create_semantic_search_tool() + tool = create_semantic_search_tool(MagicMock()) with ( patch("codebase_rag.embedder.embed_code", mock_embed_code), @@ -387,7 +407,7 @@ async def test_get_function_source_tool_returns_source( ) -> None: from codebase_rag.tools.semantic_search import create_get_function_source_tool - mock_ingestor._execute_query.return_value = [ + mock_ingestor.fetch_all.return_value = [ { "qualified_name": "project.func", "start_line": 1, @@ -399,13 +419,9 @@ async def test_get_function_source_tool_returns_source( mock_validate = MagicMock(return_value=(True, MagicMock())) mock_extract = MagicMock(return_value="def func(): pass") - tool = create_get_function_source_tool() + tool = create_get_function_source_tool(mock_ingestor) with ( - patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ), patch( "codebase_rag.utils.source_extraction.validate_source_location", mock_validate, @@ -429,14 +445,10 @@ async def test_get_function_source_tool_handles_not_found( ) -> None: from codebase_rag.tools.semantic_search import create_get_function_source_tool - mock_ingestor._execute_query.return_value = [] + mock_ingestor.fetch_all.return_value = [] - tool = create_get_function_source_tool() + tool = create_get_function_source_tool(mock_ingestor) - with patch( - "codebase_rag.services.graph_service.MemgraphIngestor", - return_value=mock_ingestor, - ): - result = await tool.function(999) + result = await tool.function(999) assert "Could not retrieve source code" in result diff --git a/codebase_rag/tests/test_semantic_search_eval.py b/codebase_rag/tests/test_semantic_search_eval.py new file mode 100644 index 000000000..69afba62d --- /dev/null +++ b/codebase_rag/tests/test_semantic_search_eval.py @@ -0,0 +1,76 @@ +from pathlib import Path + +import pytest + +from codebase_rag.utils.dependencies import has_semantic_dependencies +from evals import constants as ec +from evals.semantic_search import ( + SemanticCase, + cgr_semantic_ranking, + function_snippets, + score_semantic, +) + +needs_semantic = pytest.mark.skipif( + not has_semantic_dependencies(), reason="semantic extra not installed" +) + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "ops.py").write_text( + "import json\n\n\n" + "def load_json_file(path):\n" + " with open(path) as handle:\n" + " return json.load(handle)\n\n\n" + "def send_email(recipient, body):\n" + " server = connect_smtp()\n" + " server.sendmail(recipient, body)\n\n\n" + "def compute_sales_tax(amount, rate):\n" + " return amount * rate\n\n\n" + "def connect_smtp():\n" + " return object()\n", + encoding="utf-8", + ) + + +_CASES = [ + SemanticCase("read and parse a json file from disk", "proj.ops.load_json_file"), + SemanticCase("send an email message to a recipient", "proj.ops.send_email"), + SemanticCase("calculate tax on a purchase amount", "proj.ops.compute_sales_tax"), +] + + +@needs_semantic +def test_function_snippets_extracted_from_graph(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + snippets = function_snippets(src, "proj") + assert "proj.ops.load_json_file" in snippets + assert "json.load" in snippets["proj.ops.load_json_file"] + + +@needs_semantic +def test_cgr_semantic_search_retrieves_expected_function(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + queries = [case.query for case in _CASES] + ranking = cgr_semantic_ranking(src, "proj", queries, ec.SEMANTIC_TOP_K) + result = score_semantic(_CASES, ranking) + row = next(r for r in result.rows if r["label"] == ec.SEMANTIC_LABEL) + # (H) Each query's clearly-relevant function should rank in the top k. + assert row["recall"] == 1.0 + assert row["fn"] == 0 + + +def test_score_semantic_counts_misses() -> None: + cases = [ + SemanticCase("q1", "proj.a"), + SemanticCase("q2", "proj.b"), + ] + ranking = {"q1": ["proj.a", "proj.x"], "q2": ["proj.y"]} + result = score_semantic(cases, ranking) + row = next(r for r in result.rows if r["label"] == ec.SEMANTIC_LABEL) + assert (row["tp"], row["fn"]) == (1, 1) + assert row["recall"] == 0.5 diff --git a/codebase_rag/tests/test_shell_command.py b/codebase_rag/tests/test_shell_command.py index f745b2e30..cf57396d1 100644 --- a/codebase_rag/tests/test_shell_command.py +++ b/codebase_rag/tests/test_shell_command.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys from pathlib import Path from unittest.mock import MagicMock @@ -274,6 +275,64 @@ def test_empty_segment(self) -> None: available = ", ".join(sorted(settings.SHELL_COMMAND_ALLOWLIST)) assert _validate_segment("", available) is None + def test_bypass_allowlist_skips_allowlist_error(self) -> None: + available = ", ".join(sorted(settings.SHELL_COMMAND_ALLOWLIST)) + assert ( + _validate_segment( + "curl http://example.com", available, bypass_allowlist=True + ) + is None + ) + + def test_bypass_allowlist_still_blocks_dangerous_rm(self) -> None: + available = ", ".join(sorted(settings.SHELL_COMMAND_ALLOWLIST)) + error = _validate_segment("rm -rf /", available, bypass_allowlist=True) + assert error is not None + assert "dangerous" in error.lower() + + +class TestYoloMode: + async def test_yolo_skips_approval_for_write_command( + self, temp_project_root: Path + ) -> None: + test_file = temp_project_root / "yolo_target.txt" + test_file.write_text("bye", encoding="utf-8") + commander = ShellCommander( + str(temp_project_root), timeout=5, is_yolo=lambda: True + ) + tool = create_shell_command_tool(commander) + mock_ctx = MagicMock() + mock_ctx.tool_call_approved = False + result = await tool.function(mock_ctx, "rm yolo_target.txt") + assert result.return_code == 0 + assert not test_file.exists() + + async def test_yolo_runs_non_allowlist_command( + self, temp_project_root: Path + ) -> None: + commander = ShellCommander( + str(temp_project_root), timeout=5, is_yolo=lambda: True + ) + tool = create_shell_command_tool(commander) + mock_ctx = MagicMock() + mock_ctx.tool_call_approved = False + assert "printf" not in settings.SHELL_COMMAND_ALLOWLIST + result = await tool.function(mock_ctx, "printf hello") + assert "not in the allowlist" not in result.stderr + + async def test_yolo_still_blocks_dangerous_rm_rf( + self, temp_project_root: Path + ) -> None: + commander = ShellCommander( + str(temp_project_root), timeout=5, is_yolo=lambda: True + ) + tool = create_shell_command_tool(commander) + mock_ctx = MagicMock() + mock_ctx.tool_call_approved = False + result = await tool.function(mock_ctx, "rm -rf /") + assert result.return_code != 0 + assert "dangerous" in result.stderr.lower() + class TestHasRedirectOperators: def test_output_redirect(self) -> None: @@ -386,6 +445,9 @@ async def test_simple_pipe( assert result.return_code == 0 assert "5" in result.stdout + @pytest.mark.skipif( + sys.platform == "win32", reason="Unix find not available on Windows" + ) async def test_find_with_wc( self, shell_commander: ShellCommander, temp_project_root: Path ) -> None: @@ -398,6 +460,10 @@ async def test_find_with_wc( async def test_rg_in_pipeline( self, shell_commander: ShellCommander, temp_project_root: Path ) -> None: + import shutil + + if not shutil.which("rg"): + pytest.skip("rg (ripgrep) not installed") (temp_project_root / "data.txt").write_text("foo\nbar\nbaz\n", encoding="utf-8") result = await shell_commander.execute("cat data.txt | rg bar") assert result.return_code == 0 @@ -630,11 +696,11 @@ def test_path_outside_project(self, tmp_path: Path) -> None: ["rm", "-rf", "../other"], project_root ) assert is_dangerous - assert "outside project" in reason + assert "outside project" in reason or "system directory" in reason def test_safe_path_inside_project(self, tmp_path: Path) -> None: - project_root = tmp_path / "project" - project_root.mkdir() + project_root = (tmp_path / "project").resolve() + project_root.mkdir(exist_ok=True) is_dangerous, _ = _is_dangerous_rm_path( ["rm", "-rf", "subdir/file.txt"], project_root ) @@ -741,7 +807,8 @@ async def test_rm_outside_project_blocked( ) -> None: result = await shell_commander.execute("rm ../outside_project") assert result.return_code == -1 - assert "outside project" in result.stderr.lower() + stderr_lower = result.stderr.lower() + assert "outside project" in stderr_lower or "system directory" in stderr_lower class TestAwkSedXargsPatterns: diff --git a/codebase_rag/tests/test_sibling_mixin_resolution.py b/codebase_rag/tests/test_sibling_mixin_resolution.py new file mode 100644 index 000000000..48bb15156 --- /dev/null +++ b/codebase_rag/tests/test_sibling_mixin_resolution.py @@ -0,0 +1,97 @@ +# (H) L3 finding from the evals/ harness: PythonAstAnalyzerMixin._traverse_single_pass +# (H) calls self._infer_instance_variable_types_from_assignments(...), a method defined +# (H) on the sibling PythonVariableAnalyzerMixin. Neither is the other's base; both are +# (H) combined into the concrete PythonTypeInferenceEngine. A same-named stub in another +# (H) class makes the bare-name trie fallback ambiguous, so resolution must go through +# (H) the concrete subclass's MRO to land on the real sibling method. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + # (H) A decoy class declaring the same method name (mirrors a TYPE_CHECKING stub) + # (H) so the trie fallback alone cannot pick the right target. + "pkg/decoy.py": ("class Deps:\n def infer_vars(self):\n return None\n"), + "pkg/mixin_a.py": ( + "class AMixin:\n def traverse(self):\n return self.infer_vars()\n" + ), + "pkg/mixin_b.py": ("class BMixin:\n def infer_vars(self):\n return {}\n"), + "pkg/engine.py": ( + "from .mixin_a import AMixin\n" + "from .mixin_b import BMixin\n\n\n" + "class Engine(AMixin, BMixin):\n" + " def other(self):\n" + " return None\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestSiblingMixinResolution: + def test_self_call_resolves_to_sibling_mixin_method(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.mixin_a.AMixin.traverse", + "proj.pkg.mixin_b.BMixin.infer_vars", + ) in calls, calls + + def test_does_not_resolve_to_decoy_class(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.mixin_a.AMixin.traverse", + "proj.pkg.decoy.Deps.infer_vars", + ) not in calls, calls diff --git a/codebase_rag/tests/test_single_file_repo_path.py b/codebase_rag/tests/test_single_file_repo_path.py new file mode 100644 index 000000000..71d4a28a7 --- /dev/null +++ b/codebase_rag/tests/test_single_file_repo_path.py @@ -0,0 +1,138 @@ +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from codebase_rag.tests.conftest import ( + get_node_names, + get_relationships, + run_updater, +) + + +@pytest.fixture +def cpp_single_file(temp_repo: Path) -> Path: + test_file = temp_repo / "cmGlobalFastbuildGenerator.cxx" + test_file.write_text( + encoding="utf-8", + data=""" +#include +#include +#include + +static std::map const compilerIdToFastbuildFamily = { + {"GNU", "gcc"}, + {"Clang", "clang"}, +}; + +static std::set const supportedLanguages = { + "C", + "CXX", +}; + +template +T generateAlias(std::string const& name) { return T(); } + +static void helperFunc() {} + +class FastbuildTarget { +public: + void GenerateAliases(); +}; + +void FastbuildTarget::GenerateAliases() { + auto alias = generateAlias("test"); +} + +void freeFunction() { + helperFunc(); +} +""", + ) + return test_file + + +@pytest.fixture +def ran_single_file_updater(cpp_single_file: Path, mock_ingestor: MagicMock) -> None: + from codebase_rag.graph_updater import GraphUpdater + from codebase_rag.parser_loader import load_parsers + + parsers, queries = load_parsers() + updater = GraphUpdater( + ingestor=mock_ingestor, + repo_path=cpp_single_file, + parsers=parsers, + queries=queries, + ) + updater.run() + + +def test_single_file_repo_path_produces_graph( + ran_single_file_updater: None, + mock_ingestor: MagicMock, +) -> None: + functions = get_node_names(mock_ingestor, "Function") + methods = get_node_names(mock_ingestor, "Method") + classes = get_node_names(mock_ingestor, "Class") + + assert any("generateAlias" in qn for qn in functions) + assert any("helperFunc" in qn for qn in functions) + assert any("freeFunction" in qn for qn in functions) + + assert any("GenerateAliases" in qn for qn in methods) + assert any("FastbuildTarget" in qn for qn in classes) + + defines_rels = get_relationships(mock_ingestor, "DEFINES") + assert len(defines_rels) >= 3 + + calls_rels = get_relationships(mock_ingestor, "CALLS") + assert len(calls_rels) >= 1 + + +def test_single_file_repo_path_static_functions( + ran_single_file_updater: None, + mock_ingestor: MagicMock, +) -> None: + functions = get_node_names(mock_ingestor, "Function") + + assert any("helperFunc" in qn for qn in functions), ( + f"Static function helperFunc not found. Functions: {functions}" + ) + + assert any("generateAlias" in qn for qn in functions), ( + f"Template function generateAlias not found. Functions: {functions}" + ) + + +def test_single_file_repo_path_out_of_class_methods( + ran_single_file_updater: None, + mock_ingestor: MagicMock, +) -> None: + methods = get_node_names(mock_ingestor, "Method") + defines_method_rels = get_relationships(mock_ingestor, "DEFINES_METHOD") + + assert any("GenerateAliases" in qn for qn in methods), ( + f"Out-of-class method GenerateAliases not found. Methods: {methods}" + ) + assert len(defines_method_rels) >= 1 + + +def test_directory_repo_path_still_works( + temp_repo: Path, + mock_ingestor: MagicMock, +) -> None: + project = temp_repo / "normal_project" + project.mkdir() + (project / "main.cpp").write_text( + encoding="utf-8", + data=""" +void doStuff() {} +int main() { doStuff(); return 0; } +""", + ) + + run_updater(project, mock_ingestor) + + functions = get_node_names(mock_ingestor, "Function") + assert any("doStuff" in qn for qn in functions) + assert any("main" in qn for qn in functions) diff --git a/codebase_rag/tests/test_single_query_output_format.py b/codebase_rag/tests/test_single_query_output_format.py new file mode 100644 index 000000000..6e383d6ec --- /dev/null +++ b/codebase_rag/tests/test_single_query_output_format.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import json +import re +from collections.abc import Generator +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag import constants as cs +from codebase_rag.cli import app +from codebase_rag.main import main_single_query + +_QUESTION = "What does the parser do?" +_ANSWER = "The parser builds a knowledge graph." + +runner = CliRunner() + +_ANSI = re.compile(r"\x1b\[[0-9;]*m") + + +def _plain(output: str) -> str: + # (H) ANSI-stripped output with Rich soft-wrap newlines rejoined + return _ANSI.sub("", output).replace("\n", "") + + +@pytest.fixture +def mock_agent_stack() -> Generator[MagicMock, None, None]: + agent = MagicMock() + agent.run = AsyncMock(return_value=MagicMock(output=_ANSWER)) + with ( + patch("codebase_rag.main._setup_common_initialization"), + patch("codebase_rag.main.connect_memgraph") as mock_connect, + patch( + "codebase_rag.main._initialize_services_and_agent", + return_value=(agent, [], ""), + ), + ): + mock_connect.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield agent + + +def test_default_format_prints_plain_text( + mock_agent_stack: MagicMock, capsys: pytest.CaptureFixture[str] +) -> None: + main_single_query("/repo", 100, _QUESTION) + + out = capsys.readouterr().out.strip() + assert out == _ANSWER + + +def test_json_format_wraps_query_and_response( + mock_agent_stack: MagicMock, capsys: pytest.CaptureFixture[str] +) -> None: + main_single_query("/repo", 100, _QUESTION, output_format=cs.QueryFormat.JSON) + + payload = json.loads(capsys.readouterr().out) + assert payload == {cs.KEY_QUERY: _QUESTION, cs.KEY_RESPONSE: _ANSWER} + + +def test_json_format_preserves_non_ascii( + capsys: pytest.CaptureFixture[str], +) -> None: + answer = "Le générateur résout les nœuds — déjà" + agent = MagicMock() + agent.run = AsyncMock(return_value=MagicMock(output=answer)) + with ( + patch("codebase_rag.main._setup_common_initialization"), + patch("codebase_rag.main.connect_memgraph") as mock_connect, + patch( + "codebase_rag.main._initialize_services_and_agent", + return_value=(agent, [], ""), + ), + ): + mock_connect.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + main_single_query("/repo", 100, _QUESTION, output_format=cs.QueryFormat.JSON) + + raw = capsys.readouterr().out + assert answer in raw + assert "\\u" not in raw + assert json.loads(raw)[cs.KEY_RESPONSE] == answer + + +def test_json_format_without_ask_agent_exits_with_error() -> None: + result = runner.invoke(app, ["start", "--output-format", "json"]) + + assert result.exit_code == 1, result.output + assert "ask-agent" in _plain(result.output) diff --git a/codebase_rag/tests/test_slots_and_optimizations.py b/codebase_rag/tests/test_slots_and_optimizations.py new file mode 100644 index 000000000..da8ca621b --- /dev/null +++ b/codebase_rag/tests/test_slots_and_optimizations.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import pytest + +from codebase_rag.parsers.dependency_parser import ( + CargoTomlParser, + ComposerJsonParser, + CsprojParser, + DependencyParser, + GemfileParser, + GoModParser, + PackageJsonParser, + PyProjectTomlParser, + RequirementsTxtParser, +) +from codebase_rag.parsers.handlers.base import BaseLanguageHandler +from codebase_rag.parsers.handlers.cpp import CppHandler +from codebase_rag.parsers.handlers.java import JavaHandler +from codebase_rag.parsers.handlers.js_ts import JsTsHandler +from codebase_rag.parsers.handlers.lua import LuaHandler +from codebase_rag.parsers.handlers.protocol import LanguageHandler +from codebase_rag.parsers.handlers.python import PythonHandler +from codebase_rag.parsers.handlers.rust import RustHandler +from codebase_rag.parsers.stdlib_extractor import StdlibExtractor +from codebase_rag.parsers.utils import _cached_decode_bytes + + +class TestHandlerSlots: + @pytest.mark.parametrize( + "handler_cls", + [ + BaseLanguageHandler, + PythonHandler, + JavaHandler, + JsTsHandler, + CppHandler, + RustHandler, + LuaHandler, + ], + ) + def test_handler_has_slots(self, handler_cls: type) -> None: + assert hasattr(handler_cls, "__slots__") + + @pytest.mark.parametrize( + "handler_cls", + [ + BaseLanguageHandler, + PythonHandler, + JavaHandler, + JsTsHandler, + CppHandler, + RustHandler, + LuaHandler, + ], + ) + def test_handler_no_instance_dict(self, handler_cls: type) -> None: + instance = handler_cls() + assert not hasattr(instance, "__dict__") + + def test_protocol_has_slots(self) -> None: + assert hasattr(LanguageHandler, "__slots__") + + +class TestDependencyParserSlots: + @pytest.mark.parametrize( + "parser_cls", + [ + DependencyParser, + PyProjectTomlParser, + RequirementsTxtParser, + PackageJsonParser, + CargoTomlParser, + GoModParser, + GemfileParser, + ComposerJsonParser, + CsprojParser, + ], + ) + def test_parser_has_slots(self, parser_cls: type) -> None: + assert hasattr(parser_cls, "__slots__") + + @pytest.mark.parametrize( + "parser_cls", + [ + DependencyParser, + PyProjectTomlParser, + RequirementsTxtParser, + PackageJsonParser, + CargoTomlParser, + GoModParser, + GemfileParser, + ComposerJsonParser, + CsprojParser, + ], + ) + def test_parser_no_instance_dict(self, parser_cls: type) -> None: + instance = parser_cls() + assert not hasattr(instance, "__dict__") + + +class TestStdlibExtractorSlots: + def test_has_slots(self) -> None: + assert hasattr(StdlibExtractor, "__slots__") + assert "function_registry" in StdlibExtractor.__slots__ + assert "repo_path" in StdlibExtractor.__slots__ + assert "project_name" in StdlibExtractor.__slots__ + + def test_no_instance_dict(self) -> None: + extractor = StdlibExtractor() + assert not hasattr(extractor, "__dict__") + + +class TestCachedDecodeBytes: + def test_cache_maxsize(self) -> None: + cache_info = _cached_decode_bytes.cache_info() + assert cache_info.maxsize == 50000 + + def test_decode_bytes(self) -> None: + result = _cached_decode_bytes(b"hello world") + assert result == "hello world" + + def test_decode_caches(self) -> None: + _cached_decode_bytes.cache_clear() + _cached_decode_bytes(b"test_cache") + _cached_decode_bytes(b"test_cache") + info = _cached_decode_bytes.cache_info() + assert info.hits >= 1 diff --git a/codebase_rag/tests/test_slots_lazy_logger.py b/codebase_rag/tests/test_slots_lazy_logger.py new file mode 100644 index 000000000..2772a11f4 --- /dev/null +++ b/codebase_rag/tests/test_slots_lazy_logger.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag.graph_loader import GraphLoader +from codebase_rag.providers.base import ( + GoogleProvider, + ModelProvider, + OllamaProvider, + OpenAIProvider, +) +from codebase_rag.services.llm import CypherGenerator +from codebase_rag.tools.code_retrieval import CodeRetriever +from codebase_rag.tools.directory_lister import DirectoryLister +from codebase_rag.tools.file_editor import FileEditor +from codebase_rag.tools.file_reader import FileReader +from codebase_rag.tools.file_writer import FileWriter +from codebase_rag.tools.health_checker import HealthChecker +from codebase_rag.tools.shell_command import CommandGroup, ShellCommander + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +SLOTS_CLASSES: list[tuple[type, tuple[str, ...]]] = [ + (FileEditor, ("project_root", "dmp", "parsers")), + (CodeRetriever, ("project_root", "ingestor")), + (FileReader, ("project_root",)), + (FileWriter, ("project_root",)), + (DirectoryLister, ("project_root",)), + (CommandGroup, ("commands", "operator")), + (ShellCommander, ("project_root", "timeout", "is_yolo")), + (HealthChecker, ("results",)), + (CypherGenerator, ("agent",)), + (ModelProvider, ("config",)), + ( + GoogleProvider, + ( + "api_key", + "provider_type", + "project_id", + "region", + "service_account_file", + "thinking_budget", + ), + ), + (OpenAIProvider, ("api_key", "endpoint")), + (OllamaProvider, ("endpoint", "api_key")), +] + +GRAPH_LOADER_SLOTS = ( + "file_path", + "_data", + "_nodes", + "_relationships", + "_nodes_by_id", + "_nodes_by_label", + "_outgoing_rels", + "_incoming_rels", + "_property_indexes", +) + + +class TestSlotsPresence: + @pytest.mark.parametrize( + ("cls", "expected_slots"), + SLOTS_CLASSES, + ids=[c.__name__ for c, _ in SLOTS_CLASSES], + ) + def test_class_has_slots(self, cls: type, expected_slots: tuple[str, ...]) -> None: + assert hasattr(cls, "__slots__") + assert set(cls.__slots__) == set(expected_slots) + + def test_graph_loader_has_slots(self) -> None: + assert hasattr(GraphLoader, "__slots__") + assert set(GraphLoader.__slots__) == set(GRAPH_LOADER_SLOTS) + + +class TestSlotsBlockDict: + def test_command_group_no_dict(self) -> None: + obj = CommandGroup(commands=["ls"], operator=None) + assert not hasattr(obj, "__dict__") + + def test_directory_lister_no_dict(self, tmp_path: Path) -> None: + obj = DirectoryLister(str(tmp_path)) + assert not hasattr(obj, "__dict__") + + def test_file_reader_no_dict(self, tmp_path: Path) -> None: + obj = FileReader(str(tmp_path)) + assert not hasattr(obj, "__dict__") + + def test_file_writer_no_dict(self, tmp_path: Path) -> None: + obj = FileWriter(str(tmp_path)) + assert not hasattr(obj, "__dict__") + + def test_health_checker_no_dict(self) -> None: + obj = HealthChecker() + assert not hasattr(obj, "__dict__") + + def test_shell_commander_no_dict(self, tmp_path: Path) -> None: + obj = ShellCommander(str(tmp_path)) + assert not hasattr(obj, "__dict__") + + def test_code_retriever_no_dict(self, tmp_path: Path) -> None: + mock_ingestor = MagicMock() + obj = CodeRetriever(str(tmp_path), mock_ingestor) + assert not hasattr(obj, "__dict__") + + +class TestSlotsRejectArbitraryAttrs: + def test_command_group_rejects_attr(self) -> None: + obj = CommandGroup(commands=["ls"], operator=None) + with pytest.raises(AttributeError): + obj.arbitrary = 42 + + def test_directory_lister_rejects_attr(self, tmp_path: Path) -> None: + obj = DirectoryLister(str(tmp_path)) + with pytest.raises(AttributeError): + obj.arbitrary = 42 + + def test_health_checker_rejects_attr(self) -> None: + obj = HealthChecker() + with pytest.raises(AttributeError): + obj.arbitrary = 42 + + def test_shell_commander_rejects_attr(self, tmp_path: Path) -> None: + obj = ShellCommander(str(tmp_path)) + with pytest.raises(AttributeError): + obj.arbitrary = 42 + + +LAZY_LOGGER_FILES: list[str] = [ + "parser_loader.py", + "utils/fqn_resolver.py", + "utils/source_extraction.py", + "tools/file_editor.py", +] + + +def _find_eager_debug_calls(source: str) -> list[str]: + results = [] + lines = source.split("\n") + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.strip() + if stripped.startswith("logger.debug("): + block = stripped + j = i + paren_count = block.count("(") - block.count(")") + while paren_count > 0 and j + 1 < len(lines): + j += 1 + block += " " + lines[j].strip() + paren_count += lines[j].count("(") - lines[j].count(")") + if ".format(" in block: + results.append(block[:80]) + i = j + 1 + else: + i += 1 + return results + + +class TestLazyLoggerFormat: + @pytest.mark.parametrize("rel_path", LAZY_LOGGER_FILES) + def test_no_eager_debug_format(self, rel_path: str) -> None: + file_path = REPO_ROOT / rel_path + source = file_path.read_text(encoding="utf-8") + eager_calls = _find_eager_debug_calls(source) + assert len(eager_calls) == 0, ( + f"Found {len(eager_calls)} eager logger.debug(.format()) calls in {rel_path}: {eager_calls}" + ) + + +class TestProviderSlotsInheritance: + def test_google_provider_inherits_config_slot(self) -> None: + assert "config" in ModelProvider.__slots__ + assert "config" not in GoogleProvider.__slots__ + + def test_openai_provider_inherits_config_slot(self) -> None: + assert "config" not in OpenAIProvider.__slots__ + + def test_ollama_provider_inherits_config_slot(self) -> None: + assert "config" not in OllamaProvider.__slots__ + + @patch.dict("os.environ", {"GOOGLE_API_KEY": "test-key"}) + def test_google_provider_instance_has_all_attrs(self) -> None: + provider = GoogleProvider(api_key="test-key") + assert provider.api_key == "test-key" + assert provider.config == {} + + def test_openai_provider_instance_has_all_attrs(self) -> None: + provider = OpenAIProvider(api_key="test-key") + assert provider.api_key == "test-key" + assert provider.config == {} + + @patch("codebase_rag.providers.base.settings") + def test_ollama_provider_instance_has_all_attrs( + self, mock_settings: MagicMock + ) -> None: + mock_settings.ollama_endpoint = "http://localhost:11434/v1/" + provider = OllamaProvider() + assert provider.endpoint == "http://localhost:11434/v1/" + assert provider.config == {} diff --git a/codebase_rag/tests/test_source_extraction.py b/codebase_rag/tests/test_source_extraction.py index df7b9099e..9296c91fb 100644 --- a/codebase_rag/tests/test_source_extraction.py +++ b/codebase_rag/tests/test_source_extraction.py @@ -12,7 +12,7 @@ class TestExtractSourceLines: def test_extracts_single_line(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\nline3\n") + file_path.write_bytes(b"line1\nline2\nline3\n") result = extract_source_lines(file_path, 2, 2) @@ -20,7 +20,7 @@ def test_extracts_single_line(self, tmp_path: Path) -> None: def test_extracts_multiple_lines(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\nline3\nline4\n") + file_path.write_bytes(b"line1\nline2\nline3\nline4\n") result = extract_source_lines(file_path, 2, 3) @@ -28,7 +28,7 @@ def test_extracts_multiple_lines(self, tmp_path: Path) -> None: def test_extracts_all_lines(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\nline3\n") + file_path.write_bytes(b"line1\nline2\nline3\n") result = extract_source_lines(file_path, 1, 3) @@ -36,7 +36,7 @@ def test_extracts_all_lines(self, tmp_path: Path) -> None: def test_strips_trailing_whitespace(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data=" code \n more \n") + file_path.write_bytes(b" code \n more \n") result = extract_source_lines(file_path, 1, 2) @@ -51,7 +51,7 @@ def test_returns_none_for_nonexistent_file(self, tmp_path: Path) -> None: def test_returns_none_for_zero_start_line(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\n") + file_path.write_bytes(b"line1\n") result = extract_source_lines(file_path, 0, 1) @@ -59,7 +59,7 @@ def test_returns_none_for_zero_start_line(self, tmp_path: Path) -> None: def test_returns_none_for_negative_start_line(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\n") + file_path.write_bytes(b"line1\n") result = extract_source_lines(file_path, -1, 1) @@ -67,7 +67,7 @@ def test_returns_none_for_negative_start_line(self, tmp_path: Path) -> None: def test_returns_none_for_zero_end_line(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\n") + file_path.write_bytes(b"line1\n") result = extract_source_lines(file_path, 1, 0) @@ -75,7 +75,7 @@ def test_returns_none_for_zero_end_line(self, tmp_path: Path) -> None: def test_returns_none_for_start_greater_than_end(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") result = extract_source_lines(file_path, 2, 1) @@ -83,23 +83,23 @@ def test_returns_none_for_start_greater_than_end(self, tmp_path: Path) -> None: def test_returns_none_when_start_exceeds_file_length(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") result = extract_source_lines(file_path, 5, 6) assert result is None - def test_returns_none_when_end_exceeds_file_length(self, tmp_path: Path) -> None: + def test_clamps_when_end_exceeds_file_length(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") result = extract_source_lines(file_path, 1, 10) - assert result is None + assert result == "line1\nline2" def test_handles_empty_file(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="") + file_path.write_bytes(b"") result = extract_source_lines(file_path, 1, 1) @@ -107,17 +107,61 @@ def test_handles_empty_file(self, tmp_path: Path) -> None: def test_preserves_indentation(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="def func():\n return 42\n") + file_path.write_bytes(b"def func():\n return 42\n") result = extract_source_lines(file_path, 1, 2) assert result == "def func():\n return 42" + def test_counts_blank_lines(self, tmp_path: Path) -> None: + file_path = tmp_path / "test.py" + file_path.write_bytes(b"line1\n\nline3\n\nline5\n") + + result = extract_source_lines(file_path, 1, 5) + + assert result == "line1\n\nline3\n\nline5" + + def test_extracts_across_blank_lines(self, tmp_path: Path) -> None: + file_path = tmp_path / "test.py" + file_path.write_bytes( + b"def func1():\n pass\n\ndef func2():\n return 42\n" + ) + + result = extract_source_lines(file_path, 4, 5) + + assert result == "def func2():\n return 42" + + def test_preserves_internal_blank_lines(self, tmp_path: Path) -> None: + file_path = tmp_path / "test.py" + file_path.write_bytes( + b"def func():\n x = 1\n\n y = 2\n\n return x + y\n" + ) + + result = extract_source_lines(file_path, 1, 6) + + assert result == "def func():\n x = 1\n\n y = 2\n\n return x + y" + + def test_line_count_matches_with_many_blank_lines(self, tmp_path: Path) -> None: + file_path = tmp_path / "test.py" + file_path.write_bytes(b"a\n\n\n\nb\n\n\n\nc\n") + + result = extract_source_lines(file_path, 5, 5) + + assert result == "b" + + def test_clamps_end_line_returns_partial_content(self, tmp_path: Path) -> None: + file_path = tmp_path / "test.py" + file_path.write_bytes(b"def func():\n pass\n\ndef other():\n return 1\n") + + result = extract_source_lines(file_path, 4, 100) + + assert result == "def other():\n return 1" + class TestExtractSourceWithFallback: def test_uses_line_extraction_when_no_ast_extractor(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") result = extract_source_with_fallback(file_path, 1, 2) @@ -125,7 +169,7 @@ def test_uses_line_extraction_when_no_ast_extractor(self, tmp_path: Path) -> Non def test_uses_ast_extractor_when_provided(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") def mock_ast_extractor(name: str, path: Path) -> str: return f"AST result for {name}" @@ -140,7 +184,7 @@ def test_falls_back_to_lines_when_ast_extractor_returns_none( self, tmp_path: Path ) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") def mock_ast_extractor(name: str, path: Path) -> None: return None @@ -155,7 +199,7 @@ def test_falls_back_to_lines_when_ast_extractor_raises( self, tmp_path: Path ) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") def mock_ast_extractor(name: str, path: Path) -> str: raise RuntimeError("AST extraction failed") @@ -168,7 +212,7 @@ def mock_ast_extractor(name: str, path: Path) -> str: def test_skips_ast_when_qualified_name_is_none(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") ast_called = False def mock_ast_extractor(name: str, path: Path) -> str: @@ -185,7 +229,7 @@ def mock_ast_extractor(name: str, path: Path) -> str: def test_skips_ast_when_extractor_is_none(self, tmp_path: Path) -> None: file_path = tmp_path / "test.py" - file_path.write_text(encoding="utf-8", data="line1\nline2\n") + file_path.write_bytes(b"line1\nline2\n") result = extract_source_with_fallback( file_path, 1, 2, qualified_name="my.func", ast_extractor=None diff --git a/codebase_rag/tests/test_stack_manager.py b/codebase_rag/tests/test_stack_manager.py new file mode 100644 index 000000000..1ca899856 --- /dev/null +++ b/codebase_rag/tests/test_stack_manager.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path +from unittest.mock import patch + +import pytest + +from codebase_rag.stack import constants as stack_cs +from codebase_rag.stack.manager import StackError, StackManager + + +def _fake_subprocess_result( + returncode: int = 0, stdout: str = "", stderr: str = "" +) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess( + args=[], returncode=returncode, stdout=stdout, stderr=stderr + ) + + +def _make_compose_source(tmp_path: Path) -> Path: + src = tmp_path / "src_compose.yaml" + src.write_text("services: {}\n", encoding="utf-8") + return src + + +@pytest.fixture +def stack_home(tmp_path: Path) -> Path: + home = tmp_path / "cgr-home" + home.mkdir() + return home + + +def test_ensure_compose_file_copies_when_missing( + stack_home: Path, tmp_path: Path +) -> None: + src = _make_compose_source(tmp_path) + mgr = StackManager(home=stack_home, package_compose=src) + target = mgr.ensure_compose_file() + assert target == stack_home / stack_cs.COMPOSE_FILENAME + assert target.read_text(encoding="utf-8") == src.read_text(encoding="utf-8") + + +def test_ensure_compose_file_preserves_existing( + stack_home: Path, tmp_path: Path +) -> None: + src = _make_compose_source(tmp_path) + target = stack_home / stack_cs.COMPOSE_FILENAME + target.write_text("custom: yes\n", encoding="utf-8") + mgr = StackManager(home=stack_home, package_compose=src) + result = mgr.ensure_compose_file() + assert result.read_text(encoding="utf-8") == "custom: yes\n" + + +def test_ensure_compose_file_raises_when_source_missing( + stack_home: Path, tmp_path: Path +) -> None: + missing = tmp_path / "nope.yaml" + mgr = StackManager(home=stack_home, package_compose=missing) + with pytest.raises(StackError): + mgr.ensure_compose_file() + + +def test_check_docker_raises_when_docker_not_on_path(stack_home: Path) -> None: + mgr = StackManager(home=stack_home, package_compose=Path("/dev/null")) + with patch("codebase_rag.stack.manager.shutil.which", return_value=None): + with pytest.raises(StackError) as exc: + mgr.check_docker() + assert "docker not found" in str(exc.value).lower() + + +def test_check_docker_raises_when_daemon_down(stack_home: Path) -> None: + mgr = StackManager(home=stack_home, package_compose=Path("/dev/null")) + with ( + patch( + "codebase_rag.stack.manager.shutil.which", return_value="/usr/bin/docker" + ), + patch( + "codebase_rag.stack.manager.subprocess.run", + return_value=_fake_subprocess_result(returncode=1, stderr="daemon down"), + ), + ): + with pytest.raises(StackError) as exc: + mgr.check_docker() + assert "daemon" in str(exc.value).lower() + + +def test_check_docker_raises_when_compose_missing(stack_home: Path) -> None: + mgr = StackManager(home=stack_home, package_compose=Path("/dev/null")) + + def fake_run(cmd: list[str], **_: object) -> subprocess.CompletedProcess[str]: + if cmd[:2] == ["docker", "info"]: + return _fake_subprocess_result(returncode=0) + return _fake_subprocess_result(returncode=1) + + with ( + patch( + "codebase_rag.stack.manager.shutil.which", return_value="/usr/bin/docker" + ), + patch("codebase_rag.stack.manager.subprocess.run", side_effect=fake_run), + ): + with pytest.raises(StackError) as exc: + mgr.check_docker() + assert "compose" in str(exc.value).lower() + + +def test_status_returns_stopped_when_nothing_reachable(stack_home: Path) -> None: + mgr = StackManager(home=stack_home, package_compose=Path("/dev/null")) + with ( + patch("codebase_rag.stack.manager.wait_for_memgraph", return_value=False), + patch("codebase_rag.stack.manager.wait_for_qdrant", return_value=False), + ): + status = mgr.status() + assert status.state == stack_cs.StackState.STOPPED + + +def test_status_returns_running_when_both_reachable(stack_home: Path) -> None: + mgr = StackManager(home=stack_home, package_compose=Path("/dev/null")) + with ( + patch("codebase_rag.stack.manager.wait_for_memgraph", return_value=True), + patch("codebase_rag.stack.manager.wait_for_qdrant", return_value=True), + ): + status = mgr.status() + assert status.state == stack_cs.StackState.RUNNING + assert status.memgraph_reachable + assert status.qdrant_reachable + + +def test_status_returns_partial_when_only_memgraph_reachable(stack_home: Path) -> None: + mgr = StackManager(home=stack_home, package_compose=Path("/dev/null")) + with ( + patch("codebase_rag.stack.manager.wait_for_memgraph", return_value=True), + patch("codebase_rag.stack.manager.wait_for_qdrant", return_value=False), + ): + status = mgr.status() + assert status.state == stack_cs.StackState.PARTIAL + + +def test_compose_cmd_uses_project_and_file(stack_home: Path, tmp_path: Path) -> None: + src = _make_compose_source(tmp_path) + mgr = StackManager(home=stack_home, package_compose=src, project_name="cgr-test") + cmd = mgr._compose_cmd("up", "-d") + assert cmd[0] == "docker" + assert cmd[1] == "compose" + assert "-p" in cmd and "cgr-test" in cmd + assert "-f" in cmd + assert str(mgr.compose_file) in cmd + assert cmd[-2:] == ["up", "-d"] + + +def test_ensure_running_skips_docker_when_already_up( + stack_home: Path, tmp_path: Path +) -> None: + src = _make_compose_source(tmp_path) + mgr = StackManager(home=stack_home, package_compose=src) + with ( + patch("codebase_rag.stack.manager.wait_for_memgraph", return_value=True), + patch("codebase_rag.stack.manager.wait_for_qdrant", return_value=True), + patch.object(mgr, "up") as mock_up, + patch.object(mgr, "wait_healthy") as mock_wait, + ): + status = mgr.ensure_running() + assert status.state == stack_cs.StackState.RUNNING + mock_up.assert_not_called() + mock_wait.assert_not_called() + + +def test_ensure_running_starts_when_stopped(stack_home: Path, tmp_path: Path) -> None: + src = _make_compose_source(tmp_path) + mgr = StackManager(home=stack_home, package_compose=src) + reachable_state = {"memgraph": False, "qdrant": False} + + def memgraph_check(*_: object, **__: object) -> bool: + return reachable_state["memgraph"] + + def qdrant_check(*_: object, **__: object) -> bool: + return reachable_state["qdrant"] + + def fake_up(timeout: float = 0.0) -> None: + reachable_state["memgraph"] = True + reachable_state["qdrant"] = True + + with ( + patch( + "codebase_rag.stack.manager.wait_for_memgraph", side_effect=memgraph_check + ), + patch("codebase_rag.stack.manager.wait_for_qdrant", side_effect=qdrant_check), + patch.object(mgr, "up", side_effect=fake_up) as mock_up, + patch.object(mgr, "wait_healthy") as mock_wait, + ): + status = mgr.ensure_running() + assert status.state == stack_cs.StackState.RUNNING + mock_up.assert_called_once() + mock_wait.assert_called_once() + + +def test_up_propagates_failure(stack_home: Path, tmp_path: Path) -> None: + src = _make_compose_source(tmp_path) + mgr = StackManager(home=stack_home, package_compose=src) + with ( + patch.object(mgr, "check_docker"), + patch( + "codebase_rag.stack.manager.subprocess.run", + return_value=_fake_subprocess_result(returncode=1, stderr="boom"), + ), + ): + with pytest.raises(StackError) as exc: + mgr.up() + assert "boom" in str(exc.value) or "Failed" in str(exc.value) diff --git a/codebase_rag/tests/test_static_calls_eval.py b/codebase_rag/tests/test_static_calls_eval.py new file mode 100644 index 000000000..d9d050944 --- /dev/null +++ b/codebase_rag/tests/test_static_calls_eval.py @@ -0,0 +1,88 @@ +from pathlib import Path + +from evals import constants as ec +from evals.static_calls import ( + cgr_static_calls, + oracle_static_calls, + score_static_calls, +) + + +def _make_repo(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "__init__.py").write_text("", encoding="utf-8") + (root / "mod_a.py").write_text( + "def helper():\n return 1\n\n\ndef use():\n return helper()\n", + encoding="utf-8", + ) + (root / "mod_b.py").write_text( + "from proj.mod_a import helper\n\n\ndef run():\n return helper()\n", + encoding="utf-8", + ) + + +def test_oracle_resolves_direct_first_party_calls(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + edges = oracle_static_calls(src, "proj") + + # (H) same-module direct call use() -> helper() + assert ("proj.mod_a.use", "proj.mod_a.helper") in edges + # (H) import-resolved direct call run() -> helper() + assert ("proj.mod_b.run", "proj.mod_a.helper") in edges + + +def test_decorator_application_is_not_a_call_edge(tmp_path: Path) -> None: + # (H) `@guard('k')` above a function is a decorator application, not a call the + # (H) decorated function makes; cgr emits no such edge, so the oracle must not. + src = tmp_path / "proj" + src.mkdir(parents=True, exist_ok=True) + (src / "__init__.py").write_text("", encoding="utf-8") + (src / "deco.py").write_text( + "def guard(key):\n def wrap(fn):\n return fn\n return wrap\n", + encoding="utf-8", + ) + (src / "use.py").write_text( + "from proj.deco import guard\n\n\n@guard('k')\ndef job():\n return 1\n", + encoding="utf-8", + ) + edges = oracle_static_calls(src, "proj") + assert ("proj.use.job", "proj.deco.guard") not in edges + + +def test_oracle_attributes_method_nested_call_to_full_qn(tmp_path: Path) -> None: + # (H) A call inside a function nested in a method belongs to that nested + # (H) function's full qn (Class.method.nested). The oracle records it there; + # (H) the eval then checks cgr emits the same caller qn (see the recall test). + src = tmp_path / "proj" + src.mkdir(parents=True, exist_ok=True) + (src / "__init__.py").write_text("", encoding="utf-8") + (src / "m.py").write_text( + "def target():\n return 1\n\n\n" + "class C:\n" + " def method(self):\n" + " def nested():\n" + " return target()\n" + " return nested()\n", + encoding="utf-8", + ) + edges = oracle_static_calls(src, "proj") + assert ("proj.m.C.method.nested", "proj.m.target") in edges + + +def test_cgr_recall_on_direct_calls(tmp_path: Path) -> None: + src = tmp_path / "proj" + _make_repo(src) + oracle = oracle_static_calls(src, "proj") + cgr = cgr_static_calls(src, "proj") + # (H) every statically-resolvable direct call must be present in cgr's graph. + assert oracle <= cgr + + +def test_score_static_calls_recall() -> None: + oracle = {("a", "b"), ("c", "d")} + cgr = {("a", "b")} # (H) cgr also has many method-call edges the oracle omits + result = score_static_calls(cgr, oracle) + row = next(r for r in result.rows if r["label"] == ec.STATIC_CALLS_LABEL) + assert (row["tp"], row["fn"]) == (1, 1) + assert row["recall"] == 0.5 diff --git a/codebase_rag/tests/test_stats_command.py b/codebase_rag/tests/test_stats_command.py new file mode 100644 index 000000000..6e86f251b --- /dev/null +++ b/codebase_rag/tests/test_stats_command.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag.cli import app +from codebase_rag.types_defs import ResultRow + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def mock_node_results() -> list[ResultRow]: + return [ + {"labels": ["Function"], "count": 100}, + {"labels": ["Class"], "count": 50}, + {"labels": ["Module"], "count": 30}, + ] + + +@pytest.fixture +def mock_rel_results() -> list[ResultRow]: + return [ + {"type": "CALLS", "count": 200}, + {"type": "DEFINES", "count": 80}, + ] + + +def _make_mock_ingestor(*fetch_side_effects: list[ResultRow]) -> MagicMock: + mock = MagicMock() + mock.fetch_all.side_effect = list(fetch_side_effects) + mock.__enter__ = MagicMock(return_value=mock) + mock.__exit__ = MagicMock(return_value=False) + return mock + + +class TestStatsCommand: + def test_stats_displays_node_table( + self, + runner: CliRunner, + mock_node_results: list[ResultRow], + mock_rel_results: list[ResultRow], + ) -> None: + mock_ingestor = _make_mock_ingestor(mock_node_results, mock_rel_results) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 0 + assert "Function" in result.output + assert "Class" in result.output + assert "Module" in result.output + + def test_stats_displays_relationship_table( + self, + runner: CliRunner, + mock_node_results: list[ResultRow], + mock_rel_results: list[ResultRow], + ) -> None: + mock_ingestor = _make_mock_ingestor(mock_node_results, mock_rel_results) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 0 + assert "CALLS" in result.output + assert "DEFINES" in result.output + + def test_stats_displays_totals( + self, + runner: CliRunner, + mock_node_results: list[ResultRow], + mock_rel_results: list[ResultRow], + ) -> None: + mock_ingestor = _make_mock_ingestor(mock_node_results, mock_rel_results) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 0 + assert "180" in result.output + assert "280" in result.output + + def test_stats_handles_empty_graph( + self, + runner: CliRunner, + ) -> None: + mock_ingestor = _make_mock_ingestor([], []) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 0 + assert "0" in result.output + + def test_stats_handles_connection_error( + self, + runner: CliRunner, + ) -> None: + with patch( + "codebase_rag.cli.connect_memgraph", + side_effect=ConnectionError("Cannot connect"), + ): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 1 + assert "Failed" in result.output + + def test_stats_handles_multi_label_nodes( + self, + runner: CliRunner, + mock_rel_results: list[ResultRow], + ) -> None: + node_results: list[ResultRow] = [ + {"labels": ["Function", "Exported"], "count": 10}, + ] + mock_ingestor = _make_mock_ingestor(node_results, mock_rel_results) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 0 + assert "Function:Exported" in result.output + + def test_stats_handles_empty_labels( + self, + runner: CliRunner, + mock_rel_results: list[ResultRow], + ) -> None: + node_results: list[ResultRow] = [ + {"labels": [], "count": 5}, + ] + mock_ingestor = _make_mock_ingestor(node_results, mock_rel_results) + with patch("codebase_rag.cli.connect_memgraph", return_value=mock_ingestor): + result = runner.invoke(app, ["stats"]) + + assert result.exit_code == 0 + assert "Unknown" in result.output diff --git a/codebase_rag/tests/test_status_bar_config.py b/codebase_rag/tests/test_status_bar_config.py new file mode 100644 index 000000000..b33597009 --- /dev/null +++ b/codebase_rag/tests/test_status_bar_config.py @@ -0,0 +1,250 @@ +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag import constants as cs +from codebase_rag import main as main_mod + + +@pytest.fixture(autouse=True) +def reset_session(monkeypatch: pytest.MonkeyPatch): + main_mod.app_context.session.confirm_edits = True + main_mod.app_context.session.load_cgr_instructions = True + main_mod.app_context.session.target_repo = None + yield + + +@patch("codebase_rag.main.settings") +def test_config_segments_always_shows_both_models( + mock_settings: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/myrepo") + + segments = dict(main_mod._config_segments()) + + assert segments[cs.STATUS_BAR_CONFIG_LABEL_O] == "claude-opus-4-7" + assert segments[cs.STATUS_BAR_CONFIG_LABEL_C] == "claude-opus-4-7" + assert segments[cs.STATUS_BAR_CONFIG_LABEL_EDIT] == cs.STATUS_BAR_EDIT_ON + assert segments[cs.STATUS_BAR_CONFIG_LABEL_INSTRUCTIONS] == cs.STATUS_BAR_EDIT_ON + assert segments[cs.STATUS_BAR_CONFIG_LABEL_REPO] == "/tmp/myrepo" + + +@patch("codebase_rag.main.settings") +def test_config_segments_shows_distinct_models( + mock_settings: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-haiku-4-5" + + segments = dict(main_mod._config_segments()) + + assert segments[cs.STATUS_BAR_CONFIG_LABEL_O] == "claude-opus-4-7" + assert segments[cs.STATUS_BAR_CONFIG_LABEL_C] == "claude-haiku-4-5" + + +@patch("codebase_rag.main.settings") +def test_config_segments_reflects_session_flags( + mock_settings: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.confirm_edits = False + main_mod.app_context.session.load_cgr_instructions = False + + segments = dict(main_mod._config_segments()) + + assert segments[cs.STATUS_BAR_CONFIG_LABEL_EDIT] == cs.STATUS_BAR_EDIT_OFF + assert segments[cs.STATUS_BAR_CONFIG_LABEL_INSTRUCTIONS] == cs.STATUS_BAR_EDIT_OFF + + +@patch("codebase_rag.main.settings") +def test_abbreviated_repo_uses_tilde_for_home_paths( + mock_settings: MagicMock, +) -> None: + inside_home = Path.home() / "Documents" / "platform" + + assert main_mod._abbreviated_repo(inside_home) == "~/Documents/platform" + + +def test_abbreviated_repo_keeps_absolute_for_outside_paths() -> None: + assert main_mod._abbreviated_repo(Path("/etc/hosts")) == "/etc/hosts" + + +def test_abbreviated_repo_handles_none() -> None: + assert main_mod._abbreviated_repo(None) == "" + + +@patch("codebase_rag.main.settings") +def test_config_status_html_includes_model_and_repo( + mock_settings: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/showme") + + html = main_mod._config_status_html() + + assert "claude-opus-4-7" in html + assert "/tmp/showme" in html + assert cs.STATUS_BAR_CONFIG_LABEL_O in html + assert cs.STATUS_BAR_CONFIG_LABEL_REPO in html + + +@patch("codebase_rag.main._git_state", return_value=None) +@patch("codebase_rag.main._terminal_columns", return_value=200) +@patch("codebase_rag.main.settings") +def test_status_bar_html_inlines_config_when_wide( + mock_settings: MagicMock, + _columns: MagicMock, + _git: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/x") + + html = main_mod._status_bar_label() + + rendered = str(html.value) if hasattr(html, "value") else str(html) + body_marker = main_mod._permission_mode_label() + body_idx = rendered.index(body_marker) + config_idx = rendered.index(cs.STATUS_BAR_CONFIG_LABEL_O + ":") + assert config_idx > body_idx, "config should appear after body when wide" + + +@patch("codebase_rag.main._git_state", return_value=None) +@patch("codebase_rag.main._terminal_columns", return_value=40) +@patch("codebase_rag.main.settings") +def test_status_bar_html_wraps_config_when_narrow( + mock_settings: MagicMock, + _columns: MagicMock, + _git: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/x") + + html = main_mod._status_bar_label() + + rendered = str(html.value) if hasattr(html, "value") else str(html) + body_marker = main_mod._permission_mode_label() + body_idx = rendered.index(body_marker) + config_idx = rendered.index(cs.STATUS_BAR_CONFIG_LABEL_O + ":") + assert config_idx < body_idx, "config should appear above body when narrow" + + +@patch("codebase_rag.main._git_state", return_value=None) +@patch("codebase_rag.main._terminal_columns", return_value=200) +@patch("codebase_rag.main.settings") +def test_rich_status_bar_inlines_config_when_wide( + mock_settings: MagicMock, + _columns: MagicMock, + _git: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/x") + + rendered = main_mod._rich_status_bar().plain + assert "\n" not in rendered + assert cs.STATUS_BAR_CONFIG_LABEL_O + ":" in rendered + + +@patch("codebase_rag.main._git_state", return_value=None) +@patch("codebase_rag.main._terminal_columns", return_value=30) +@patch("codebase_rag.main.settings") +def test_rich_status_bar_wraps_config_when_narrow( + mock_settings: MagicMock, + _columns: MagicMock, + _git: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/x") + + rendered = main_mod._rich_status_bar().plain + assert "\n" in rendered + + +def test_git_state_returns_none_without_target_repo() -> None: + main_mod.app_context.session.target_repo = None + assert main_mod._git_state() is None + + +def test_git_state_uses_target_repo_cwd( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + target = tmp_path / "target-repo" + target.mkdir() + main_mod.app_context.session.target_repo = target + + captured: dict[str, object] = {} + + class _FakeCompleted: + stdout = "## feature/x\n M something.py\n" + + def fake_run(cmd, **kwargs): # noqa: ANN001, ANN003 + captured["cmd"] = cmd + captured["cwd"] = kwargs.get("cwd") + return _FakeCompleted() + + monkeypatch.setattr(main_mod.subprocess, "run", fake_run) + + result = main_mod._git_state() + assert result is not None + branch, is_dirty = result + + assert captured["cwd"] == target + assert branch == "feature/x" + assert is_dirty is True + + +def test_git_state_returns_none_when_target_missing(tmp_path: Path) -> None: + main_mod.app_context.session.target_repo = tmp_path / "does-not-exist" + assert main_mod._git_state() is None + + +@patch("codebase_rag.main._git_state", return_value=("feature/x", True)) +@patch("codebase_rag.main._terminal_columns", return_value=400) +@patch("codebase_rag.main.settings") +def test_branch_appears_after_repo_when_inline( + mock_settings: MagicMock, + _columns: MagicMock, + _git: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/target") + + rendered = main_mod._rich_status_bar().plain + + repo_label = f"{cs.STATUS_BAR_CONFIG_LABEL_REPO}:/tmp/target" + assert repo_label in rendered + assert "feature/x" in rendered + assert rendered.index(repo_label) < rendered.index("feature/x") + mode_label = main_mod._permission_mode_label() + assert rendered.index(mode_label) < rendered.index("feature/x") + + +@patch("codebase_rag.main._git_state", return_value=("feature/x", False)) +@patch("codebase_rag.main._terminal_columns", return_value=400) +@patch("codebase_rag.main.settings") +def test_status_bar_html_places_branch_after_repo_when_inline( + mock_settings: MagicMock, + _columns: MagicMock, + _git: MagicMock, +) -> None: + mock_settings.active_orchestrator_config.model_id = "anthropic:claude-opus-4-7" + mock_settings.active_cypher_config.model_id = "anthropic:claude-opus-4-7" + main_mod.app_context.session.target_repo = Path("/tmp/target") + + html = main_mod._status_bar_label() + rendered = str(html.value) if hasattr(html, "value") else str(html) + + repo_idx = rendered.index(f"{cs.STATUS_BAR_CONFIG_LABEL_REPO}:") + branch_idx = rendered.index("feature/x") + assert repo_idx < branch_idx diff --git a/codebase_rag/tests/test_stdlib_extractor.py b/codebase_rag/tests/test_stdlib_extractor.py index bd09b0244..723650741 100644 --- a/codebase_rag/tests/test_stdlib_extractor.py +++ b/codebase_rag/tests/test_stdlib_extractor.py @@ -306,7 +306,7 @@ def test_js_stdlib_lowercase_entity_without_node( "fs.readFile", cs.SupportedLanguage.JS ) - assert result == "fs.readFile" + assert result == "fs" def test_ts_uses_js_extraction_uppercase(self, extractor: StdlibExtractor) -> None: with patch.object(se, "_is_tool_available", return_value=False): @@ -314,11 +314,11 @@ def test_ts_uses_js_extraction_uppercase(self, extractor: StdlibExtractor) -> No assert result == "path" - def test_ts_lowercase_returns_unchanged(self, extractor: StdlibExtractor) -> None: + def test_ts_lowercase_strips_entity(self, extractor: StdlibExtractor) -> None: with patch.object(se, "_is_tool_available", return_value=False): result = extractor.extract_module_path("path.join", cs.SupportedLanguage.TS) - assert result == "path.join" + assert result == "path" class TestEdgeCases: @@ -704,7 +704,7 @@ def test_js_extractor_fallback_on_entity_not_found( "fs.nonexistent", cs.SupportedLanguage.JS ) - assert result == "fs.nonexistent" + assert result == "fs" def test_js_extractor_fallback_on_json_decode_error( self, extractor: StdlibExtractor @@ -719,7 +719,7 @@ def test_js_extractor_fallback_on_json_decode_error( ): result = extractor.extract_module_path("path.join", cs.SupportedLanguage.JS) - assert result == "path.join" + assert result == "path" def test_js_extractor_fallback_on_timeout(self, extractor: StdlibExtractor) -> None: import subprocess @@ -732,4 +732,4 @@ def test_js_extractor_fallback_on_timeout(self, extractor: StdlibExtractor) -> N "http.createServer", cs.SupportedLanguage.JS ) - assert result == "http.createServer" + assert result == "http" diff --git a/codebase_rag/tests/test_structure_processor.py b/codebase_rag/tests/test_structure_processor.py index 51c23fe60..50c74ea2c 100644 --- a/codebase_rag/tests/test_structure_processor.py +++ b/codebase_rag/tests/test_structure_processor.py @@ -511,3 +511,22 @@ def test_multiple_package_indicators( ] qualified_names = {c[0][1]["qualified_name"] for c in package_calls} assert qualified_names == {"multi_lang.pypkg", "multi_lang.rustpkg"} + + +class TestStructureProcessorSlots: + def test_has_slots(self) -> None: + assert hasattr(StructureProcessor, "__slots__") + + def test_no_instance_dict(self, processor: StructureProcessor) -> None: + assert not hasattr(processor, "__dict__") + + def test_rejects_arbitrary_attribute(self, processor: StructureProcessor) -> None: + with pytest.raises(AttributeError): + processor.nonexistent_attr = 42 + + def test_slot_attributes_accessible(self, processor: StructureProcessor) -> None: + assert hasattr(processor, "ingestor") + assert hasattr(processor, "repo_path") + assert hasattr(processor, "project_name") + assert hasattr(processor, "queries") + assert hasattr(processor, "structural_elements") diff --git a/codebase_rag/tests/test_token_utils.py b/codebase_rag/tests/test_token_utils.py new file mode 100644 index 000000000..bbd116c13 --- /dev/null +++ b/codebase_rag/tests/test_token_utils.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from codebase_rag.types_defs import ResultRow +from codebase_rag.utils.token_utils import count_tokens, truncate_results_by_tokens + + +class TestCountTokens: + def test_empty_string(self) -> None: + assert count_tokens("") == 0 + + def test_simple_string(self) -> None: + tokens = count_tokens("hello world") + assert tokens > 0 + + def test_longer_string_has_more_tokens(self) -> None: + short = count_tokens("hello") + long = count_tokens("hello world this is a longer string with more tokens") + assert long > short + + +class TestTruncateResultsByTokens: + def test_empty_results(self) -> None: + results, tokens, truncated = truncate_results_by_tokens([], max_tokens=1000) + assert results == [] + assert tokens == 0 + assert truncated is False + + def test_results_within_limit(self) -> None: + rows: list[ResultRow] = [ + {"name": "foo", "count": 1}, + {"name": "bar", "count": 2}, + ] + results, tokens, truncated = truncate_results_by_tokens(rows, max_tokens=10000) + assert len(results) == 2 + assert tokens > 0 + assert truncated is False + + def test_results_exceed_limit(self) -> None: + rows: list[ResultRow] = [ + {"name": f"function_{i}", "path": f"src/module_{i}/file_{i}.py"} + for i in range(100) + ] + results, tokens, truncated = truncate_results_by_tokens(rows, max_tokens=200) + assert len(results) < 100 + assert len(results) > 0 + assert tokens <= 200 + assert truncated is True + + def test_single_large_row_still_included(self) -> None: + rows: list[ResultRow] = [ + {"content": "x" * 5000}, + ] + results, tokens, truncated = truncate_results_by_tokens(rows, max_tokens=10) + assert len(results) == 1 + assert truncated is False + + def test_preserves_row_order(self) -> None: + rows: list[ResultRow] = [ + {"name": "first"}, + {"name": "second"}, + {"name": "third"}, + ] + results, _, _ = truncate_results_by_tokens(rows, max_tokens=10000) + assert [r["name"] for r in results] == ["first", "second", "third"] + + def test_token_count_accuracy(self) -> None: + rows: list[ResultRow] = [ + {"name": "hello world"}, + ] + results, tokens, _ = truncate_results_by_tokens(rows, max_tokens=10000) + assert tokens == count_tokens('{"name": "hello world"}') diff --git a/codebase_rag/tests/test_truthiness_dispatch_resolution.py b/codebase_rag/tests/test_truthiness_dispatch_resolution.py new file mode 100644 index 000000000..9226bcb14 --- /dev/null +++ b/codebase_rag/tests/test_truthiness_dispatch_resolution.py @@ -0,0 +1,123 @@ +# (H) L3 finding from the evals/ harness: `if self.function_registry:` tests an object +# (H) for truthiness, which at runtime calls __bool__ if defined else __len__. cgr only +# (H) extracted explicit calls, missing FunctionRegistryTrie.__len__. These edges are +# (H) emitted only when the tested operand is a first-party object defining the dunder. +from __future__ import annotations + +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +PROJECT = "proj" + +FILES = { + "pkg/__init__.py": "", + "pkg/sized.py": ("class Sized:\n def __len__(self):\n return 0\n"), + "pkg/flag.py": ( + "class Flag:\n" + " def __bool__(self):\n return True\n\n" + " def __len__(self):\n return 0\n" + ), + "pkg/user.py": ( + "from .sized import Sized\n" + "from .flag import Flag\n\n\n" + "class User:\n" + " def __init__(self, sized: Sized, flag: Flag) -> None:\n" + " self._sized = sized\n" + " self._flag = flag\n\n" + " def _record(self):\n" + " return None\n\n" + " def check(self):\n" + " self._record()\n" + " if self._sized:\n" + " return 1\n" + " return 0\n\n" + " def combined(self, other):\n" + " self._record()\n" + " if self._sized and other:\n" + " return 1\n" + " return 0\n\n" + " def truthy_flag(self):\n" + " self._record()\n" + " if self._flag:\n" + " return 1\n" + " return 0\n" + ), +} + + +class _Capture: + def __init__(self) -> None: + self.rels: list[tuple[PropertyValue, str, PropertyValue]] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + self.rels.append((from_spec[2], str(rel_type), to_spec[2])) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _calls(tmp_path: Path) -> set[tuple[PropertyValue, PropertyValue]]: + for rel, content in FILES.items(): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + parsers, queries = load_parsers() + cap = _Capture() + GraphUpdater( + ingestor=cap, + repo_path=tmp_path, + parsers=parsers, + queries=queries, + project_name=PROJECT, + ).run(force=True) + return { + (frm, to) for (frm, rel, to) in cap.rels if rel == cs.RelationshipType.CALLS + } + + +class TestTruthinessDispatchResolution: + def test_if_truthiness_dispatches_to_len(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.check", + "proj.pkg.sized.Sized.__len__", + ) in calls, calls + + def test_boolean_operator_operand_dispatches_to_len(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.combined", + "proj.pkg.sized.Sized.__len__", + ) in calls, calls + + def test_bool_takes_precedence_over_len(self, tmp_path: Path) -> None: + calls = _calls(tmp_path) + assert ( + "proj.pkg.user.User.truthy_flag", + "proj.pkg.flag.Flag.__bool__", + ) in calls, calls + assert ( + "proj.pkg.user.User.truthy_flag", + "proj.pkg.flag.Flag.__len__", + ) not in calls, calls diff --git a/codebase_rag/tests/test_ts_arrow_caller_calls.py b/codebase_rag/tests/test_ts_arrow_caller_calls.py new file mode 100644 index 000000000..170b0c26b --- /dev/null +++ b/codebase_rag/tests/test_ts_arrow_caller_calls.py @@ -0,0 +1,31 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.ts").write_text( + "export function parsedType(x: unknown): number { return 1; }\n", + encoding="utf-8", + ) + (root / "he.ts").write_text( + 'import * as util from "./util.js";\n' + "export const fmt = (x: unknown): number => util.parsedType(x);\n", + encoding="utf-8", + ) + + +def test_ts_arrow_const_caller_body_calls_resolve(tmp_path: Path) -> None: + # (H) A call inside a named arrow / const-arrow function body must be attributed + # (H) to that function (p.he.fmt). The call pass skipped arrows because they + # (H) have no `name` field, so _get_node_name returned None and the whole arrow + # (H) body -- and its calls -- went unprocessed. + _make(tmp_path) + ingestor = _capture(tmp_path, "p") + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + assert ("p.he.fmt", "p.util.parsedType") in calls diff --git a/codebase_rag/tests/test_ts_class_field_arrow.py b/codebase_rag/tests/test_ts_class_field_arrow.py new file mode 100644 index 000000000..473295ead --- /dev/null +++ b/codebase_rag/tests/test_ts_class_field_arrow.py @@ -0,0 +1,41 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.ts").write_text( + "export function parsedType(x: unknown): number { return 1; }\n", + encoding="utf-8", + ) + (root / "t.ts").write_text( + 'import * as util from "./util.js";\n' + "export class T {\n" + " helper = (x: unknown): number => util.parsedType(x);\n" + " regular(x: unknown): number { return util.parsedType(x); }\n" + "}\n", + encoding="utf-8", + ) + + +def test_ts_class_field_arrow_is_modeled_and_calls_resolve(tmp_path: Path) -> None: + # (H) A class-property arrow (`helper = (x) => ...`) must be modeled as a + # (H) member node (p.t.T.helper) just like a normal method, and the calls in + # (H) its body must be attributed to it. Previously the definition pass created + # (H) no node for it (no name field) and the call pass skipped its body. + _make(tmp_path) + ingestor = _capture(tmp_path, "p") + member_qns = { + str(uid) for (label, uid) in ingestor.nodes if label in ("Method", "Function") + } + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + + assert "p.t.T.helper" in member_qns + assert ("p.t.T.helper", "p.util.parsedType") in calls + # (H) regression guard: the normal method still works. + assert ("p.t.T.regular", "p.util.parsedType") in calls diff --git a/codebase_rag/tests/test_ts_closure_containment.py b/codebase_rag/tests/test_ts_closure_containment.py new file mode 100644 index 000000000..7fa1a3dac --- /dev/null +++ b/codebase_rag/tests/test_ts_closure_containment.py @@ -0,0 +1,43 @@ +# (H) A function declared inside an anonymous callback must be DEFINEd by that +# (H) callback (its lexical parent), not hoisted to the nearest named ancestor. +# (H) The child's qn omits anonymous scopes, so deriving the DEFINES parent by +# (H) trimming the child qn skipped the callback; the parent is now recomputed +# (H) from the enclosing function node itself. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import RelationshipType +from codebase_rag.tests.conftest import create_and_run_updater, get_relationships + +_TS = """\ +export function driver(client) { + test("x", function (assert) { + function inner(fn) { + return 1; + } + return inner; + }); +} +""" + + +def test_function_in_anonymous_callback_defined_by_callback( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "ts_closure" + project.mkdir() + (project / "m.ts").write_text(_TS, encoding="utf-8") + create_and_run_updater(project, mock_ingestor, skip_if_missing="typescript") + + # (H) (parent_qn, child_qn) for DEFINES edges into `inner`. + parents = { + call[0][0][2] + for call in get_relationships(mock_ingestor, RelationshipType.DEFINES.value) + if str(call[0][2][2]).endswith(".inner") + } + assert parents, "no DEFINES edge into inner" + # (H) The parent must be the anonymous callback, not the named driver. + assert all("anonymous" in p for p in parents), parents + assert "ts_closure.m.driver" not in parents, parents diff --git a/codebase_rag/tests/test_ts_export_no_duplicate.py b/codebase_rag/tests/test_ts_export_no_duplicate.py new file mode 100644 index 000000000..62ef8f086 --- /dev/null +++ b/codebase_rag/tests/test_ts_export_no_duplicate.py @@ -0,0 +1,40 @@ +from pathlib import Path + +from evals.cgr_graph import _capture + + +def _make(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.ts").write_text( + "export function parsedType(x: number): number { return x; }\n" + "export const dbl = (n: number): number => n * 2;\n", + encoding="utf-8", + ) + (root / "use.ts").write_text( + 'import { parsedType, dbl } from "./util.js";\n' + "export function go(): number { return parsedType(1) + dbl(2); }\n", + encoding="utf-8", + ) + + +def test_ts_exported_function_not_duplicated(tmp_path: Path) -> None: + # (H) An exported function / const-arrow is already ingested by the definition + # (H) pass at its natural qn (p.util.parsedType). The ES6-export pass must not + # (H) re-register it -- doing so makes a spurious `qn@line` duplicate node and + # (H) splits CALLS edges onto that duplicate, mangling the callee qn. + _make(tmp_path) + ingestor = _capture(tmp_path, "p") + fn_qns = {str(uid) for (label, uid) in ingestor.nodes if label == "Function"} + + assert "p.util.parsedType" in fn_qns + assert "p.util.dbl" in fn_qns + assert not any("@" in q for q in fn_qns), f"duplicate fn nodes: {fn_qns}" + + calls = { + (str(from_val), str(to_val)) + for _fl, from_val, rel, _tl, to_val in ingestor.rels + if rel == "CALLS" + } + assert ("p.use.go", "p.util.parsedType") in calls + assert ("p.use.go", "p.util.dbl") in calls + assert not any("@" in to_val for _f, to_val in calls), f"calls to dup: {calls}" diff --git a/codebase_rag/tests/test_ts_retrieval_eval.py b/codebase_rag/tests/test_ts_retrieval_eval.py new file mode 100644 index 000000000..743f3c8f5 --- /dev/null +++ b/codebase_rag/tests/test_ts_retrieval_eval.py @@ -0,0 +1,72 @@ +from pathlib import Path + +import pytest + +from evals import constants as ec +from evals.oracles import typescript_available +from evals.ts_retrieval import ( + cgr_ts_call_edges, + oracle_ts_call_edges, + score_ts_retrieval, +) + +needs_node = pytest.mark.skipif( + not typescript_available(), reason="node toolchain not installed" +) + + +def _make_project(root: Path) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "util.ts").write_text( + "export function free(): number { return 2; }\n" + "export const dbl = (n: number): number => n * 2;\n", + encoding="utf-8", + ) + (root / "t.ts").write_text( + "export class T {\n" + " helper(): number { return 1; }\n" + " caller(): number { return this.helper(); }\n" + " orphan(): number { return 9; }\n" + "}\n", + encoding="utf-8", + ) + (root / "use.ts").write_text( + 'import { free, dbl } from "./util";\n' + 'import { T } from "./t";\n' + "export function useIt(): number {\n" + " const t = new T();\n" + " return free() + dbl(3) + t.caller();\n" + "}\n", + encoding="utf-8", + ) + + +@needs_node +def test_oracle_captures_first_party_ts_calls(tmp_path: Path) -> None: + _make_project(tmp_path) + edges, declared = oracle_ts_call_edges(tmp_path) + + # (H) this.helper(), free(), dbl(), t.caller() are first-party calls. + assert ("t.ts", "helper") in edges + assert ("use.ts", "free") in edges + assert ("use.ts", "dbl") in edges + assert ("use.ts", "caller") in edges + # (H) orphan is declared but never called -> never a call edge. + assert ("t.ts", "orphan") not in edges + assert {"helper", "caller", "orphan", "free", "dbl", "useIt"} <= declared + + +@needs_node +def test_cgr_matches_oracle_on_clean_ts_project(tmp_path: Path) -> None: + _make_project(tmp_path) + oracle, declared = oracle_ts_call_edges(tmp_path) + cgr = cgr_ts_call_edges(tmp_path, tmp_path.name, declared) + assert cgr == oracle + + +def test_score_ts_retrieval_prf() -> None: + result = score_ts_retrieval( + {("a.ts", "f"), ("a.ts", "g")}, {("a.ts", "f"), ("b.ts", "h")} + ) + row = next(r for r in result.rows if r["label"] == ec.TS_RETRIEVAL_LABEL) + assert (row["tp"], row["fp"], row["fn"]) == (1, 1, 1) diff --git a/codebase_rag/tests/test_type_inference_iterative.py b/codebase_rag/tests/test_type_inference_iterative.py index 76d0febeb..0598ed836 100644 --- a/codebase_rag/tests/test_type_inference_iterative.py +++ b/codebase_rag/tests/test_type_inference_iterative.py @@ -3,7 +3,7 @@ from collections import defaultdict from pathlib import Path from typing import Any -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest @@ -88,15 +88,16 @@ def test_analyze_self_assignments_handles_deep_tree_without_recursion_error() -> engine = _make_engine() py_engine = engine.python_type_inference - py_engine._infer_type_from_expression = MagicMock(return_value="MockType") # type: ignore[method-assign] + mock_infer = MagicMock(return_value="MockType") root = _build_deep_assignment_chain(depth=1500) local_types: dict[str, Any] = {} - py_engine._analyze_self_assignments(root, local_types, "proj.module") # ty: ignore[invalid-argument-type] # (H) NodeStub not Node + with patch.object(type(py_engine), "_infer_type_from_expression", mock_infer): + py_engine._analyze_self_assignments(root, local_types, "proj.module") # ty: ignore[invalid-argument-type] # (H) NodeStub not Node assert local_types, "Expected at least one inferred instance variable" - assert py_engine._infer_type_from_expression.call_count == 1500 # type: ignore[attr-defined] + assert mock_infer.call_count == 1500 def test_find_return_statements_handles_deep_tree_without_recursion_error() -> None: @@ -162,86 +163,95 @@ def test_dispatches_to_python_engine( self, engine: TypeInferenceEngine, mock_node: MagicMock ) -> None: expected = {"var1": "str"} - engine.python_type_inference.build_local_variable_type_map = MagicMock( - return_value=expected - ) + mock_method = MagicMock(return_value=expected) - result = engine.build_local_variable_type_map( - mock_node, "proj.module", cs.SupportedLanguage.PYTHON - ) + with patch.object( + PythonTypeInferenceEngine, + "build_local_variable_type_map", + mock_method, + ): + result = engine.build_local_variable_type_map( + mock_node, "proj.module", cs.SupportedLanguage.PYTHON + ) assert result == expected - engine.python_type_inference.build_local_variable_type_map.assert_called_once_with( - mock_node, "proj.module" - ) + mock_method.assert_called_once_with(mock_node, "proj.module") def test_dispatches_to_js_engine( self, engine: TypeInferenceEngine, mock_node: MagicMock ) -> None: expected = {"jsVar": "number"} - engine.js_type_inference.build_local_variable_type_map = MagicMock( - return_value=expected - ) + mock_method = MagicMock(return_value=expected) - result = engine.build_local_variable_type_map( - mock_node, "proj.module", cs.SupportedLanguage.JS - ) + with patch.object( + JsTypeInferenceEngine, + "build_local_variable_type_map", + mock_method, + ): + result = engine.build_local_variable_type_map( + mock_node, "proj.module", cs.SupportedLanguage.JS + ) assert result == expected - engine.js_type_inference.build_local_variable_type_map.assert_called_once_with( - mock_node, "proj.module" + mock_method.assert_called_once_with( + mock_node, "proj.module", cs.SupportedLanguage.JS ) def test_dispatches_to_ts_engine( self, engine: TypeInferenceEngine, mock_node: MagicMock ) -> None: expected = {"tsVar": "string"} - engine.js_type_inference.build_local_variable_type_map = MagicMock( - return_value=expected - ) + mock_method = MagicMock(return_value=expected) - result = engine.build_local_variable_type_map( - mock_node, "proj.module", cs.SupportedLanguage.TS - ) + with patch.object( + JsTypeInferenceEngine, + "build_local_variable_type_map", + mock_method, + ): + result = engine.build_local_variable_type_map( + mock_node, "proj.module", cs.SupportedLanguage.TS + ) assert result == expected - engine.js_type_inference.build_local_variable_type_map.assert_called_once_with( - mock_node, "proj.module" + mock_method.assert_called_once_with( + mock_node, "proj.module", cs.SupportedLanguage.TS ) def test_dispatches_to_java_engine( self, engine: TypeInferenceEngine, mock_node: MagicMock ) -> None: expected = {"javaVar": "String"} - engine.java_type_inference.build_variable_type_map = MagicMock( - return_value=expected - ) + mock_method = MagicMock(return_value=expected) - result = engine.build_local_variable_type_map( - mock_node, "proj.module", cs.SupportedLanguage.JAVA - ) + with patch.object( + JavaTypeInferenceEngine, + "build_variable_type_map", + mock_method, + ): + result = engine.build_local_variable_type_map( + mock_node, "proj.module", cs.SupportedLanguage.JAVA + ) assert result == expected - engine.java_type_inference.build_variable_type_map.assert_called_once_with( - mock_node, "proj.module" - ) + mock_method.assert_called_once_with(mock_node, "proj.module") def test_dispatches_to_lua_engine( self, engine: TypeInferenceEngine, mock_node: MagicMock ) -> None: expected = {"luaVar": "table"} - engine.lua_type_inference.build_local_variable_type_map = MagicMock( - return_value=expected - ) + mock_method = MagicMock(return_value=expected) - result = engine.build_local_variable_type_map( - mock_node, "proj.module", cs.SupportedLanguage.LUA - ) + with patch.object( + LuaTypeInferenceEngine, + "build_local_variable_type_map", + mock_method, + ): + result = engine.build_local_variable_type_map( + mock_node, "proj.module", cs.SupportedLanguage.LUA + ) assert result == expected - engine.lua_type_inference.build_local_variable_type_map.assert_called_once_with( - mock_node, "proj.module" - ) + mock_method.assert_called_once_with(mock_node, "proj.module") @pytest.mark.parametrize( "language", @@ -249,8 +259,6 @@ def test_dispatches_to_lua_engine( cs.SupportedLanguage.RUST, cs.SupportedLanguage.GO, cs.SupportedLanguage.SCALA, - cs.SupportedLanguage.CPP, - cs.SupportedLanguage.CSHARP, cs.SupportedLanguage.PHP, ], ) @@ -320,13 +328,16 @@ def test_delegates_to_java_engine(self) -> None: engine = _make_engine() mock_node = MagicMock() expected = {"javaVar": "String", "count": "int"} - engine.java_type_inference.build_variable_type_map = MagicMock( - return_value=expected - ) + mock_method = MagicMock(return_value=expected) - result = engine._build_java_variable_type_map(mock_node, "com.example.Module") + with patch.object( + JavaTypeInferenceEngine, + "build_variable_type_map", + mock_method, + ): + result = engine._build_java_variable_type_map( + mock_node, "com.example.Module" + ) assert result == expected - engine.java_type_inference.build_variable_type_map.assert_called_once_with( - mock_node, "com.example.Module" - ) + mock_method.assert_called_once_with(mock_node, "com.example.Module") diff --git a/codebase_rag/tests/test_typescript_containment_oracle.py b/codebase_rag/tests/test_typescript_containment_oracle.py new file mode 100644 index 000000000..15e5b4c81 --- /dev/null +++ b/codebase_rag/tests/test_typescript_containment_oracle.py @@ -0,0 +1,66 @@ +# (H) Covers TypeScript containment-edge validation: cgr's DEFINES (file module +# (H) -> every named type, even nested) and DEFINES_METHOD (class/namespace -> +# (H) method) edges are graded against the independent TypeScript-compiler-API +# (H) oracle, joined on (kind, file, line). Exercises a class method, a top-level +# (H) function, and a namespace (class + function as methods of the namespace). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_ts_graph +from evals.oracles import run_typescript_oracle, typescript_available +from evals.score import score_edge_types + +TS_SRC = """\ +export interface Shape { area(): number; } + +export enum Color { Red, Green } + +export class Point implements Shape { + x: number = 0; + area(): number { return 1.0; } +} + +export function free(): number { return 1; } + +export namespace geo { + export class Widget { build(): number { return 1; } } + export function helper(): number { return 2; } +} +""" + + +def _require_ts() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.TS not in load_parsers()[0]: + pytest.skip("typescript parser not available") + + +def test_cgr_matches_tsc_oracle_on_containment_edges(tmp_path: Path) -> None: + _require_ts() + project = tmp_path / "ts_edge" + project.mkdir() + (project / "lib.ts").write_text(TS_SRC, encoding="utf-8") + + cgr = extract_cgr_ts_graph(project, project.name) + oracle = run_typescript_oracle(project) + + result = score_edge_types(cgr, oracle, ec.SCORED_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_typescript_implements_edges.py b/codebase_rag/tests/test_typescript_implements_edges.py new file mode 100644 index 000000000..dc75804d0 --- /dev/null +++ b/codebase_rag/tests/test_typescript_implements_edges.py @@ -0,0 +1,42 @@ +# (H) TypeScript class `implements` was dropped: cgr captured `extends` +# (H) (-> INHERITS) via class_heritage but never the `implements_clause`, so a +# (H) class implementing interfaces produced no IMPLEMENTS edges. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import RelationshipType +from codebase_rag.tests.conftest import create_and_run_updater, get_relationships + +_TS = """\ +export interface Shape {} +export interface Drawable {} +export class Base {} +export class Circle extends Base implements Shape, Drawable {} +""" + + +def _pairs(mock_ingestor: MagicMock, rel: str) -> set[tuple[str, str]]: + return { + (call[0][0][2], call[0][2][2]) for call in get_relationships(mock_ingestor, rel) + } + + +def test_typescript_class_implements_edges( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "ts_impl" + project.mkdir() + (project / "lib.ts").write_text(_TS, encoding="utf-8") + create_and_run_updater(project, mock_ingestor, skip_if_missing="typescript") + + inherits = _pairs(mock_ingestor, RelationshipType.INHERITS.value) + implements = _pairs(mock_ingestor, RelationshipType.IMPLEMENTS.value) + base = "ts_impl.lib" + + # (H) extends still works. + assert (f"{base}.Circle", f"{base}.Base") in inherits, inherits + # (H) implements must now produce IMPLEMENTS to each interface. + assert (f"{base}.Circle", f"{base}.Shape") in implements, implements + assert (f"{base}.Circle", f"{base}.Drawable") in implements, implements diff --git a/codebase_rag/tests/test_typescript_inheritance_oracle.py b/codebase_rag/tests/test_typescript_inheritance_oracle.py new file mode 100644 index 000000000..414433e69 --- /dev/null +++ b/codebase_rag/tests/test_typescript_inheritance_oracle.py @@ -0,0 +1,54 @@ +# (H) Covers TypeScript inheritance-edge validation: cgr's INHERITS (class & +# (H) interface extends) and IMPLEMENTS (class implements) edges are graded +# (H) against the TypeScript-compiler-API oracle, by (source node, base name). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_ts_graph +from evals.oracles import run_typescript_oracle, typescript_available +from evals.score import score_name_edge_types + +TS_SRC = """\ +export interface Shape {} +export interface Drawable {} +export interface Big extends Shape, Drawable {} +export class Base {} +export class Circle extends Base implements Shape, Drawable {} +""" + + +def _require_ts() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.TS not in load_parsers()[0]: + pytest.skip("typescript parser not available") + + +def test_cgr_matches_tsc_oracle_on_inheritance_edges(tmp_path: Path) -> None: + _require_ts() + project = tmp_path / "ts_inh_edge" + project.mkdir() + (project / "lib.ts").write_text(TS_SRC, encoding="utf-8") + + cgr = extract_cgr_ts_graph(project, project.name) + oracle = run_typescript_oracle(project) + + result = score_name_edge_types(cgr, oracle, ec.INHERITANCE_NAME_EDGE_TYPES) + by_label = {row["label"]: row for row in result.rows} + for label in ( + cs.RelationshipType.INHERITS.value, + cs.RelationshipType.IMPLEMENTS.value, + ): + row = by_label.get(label) + assert row is not None, (label, by_label, result.diff) + assert row["precision"] == 1.0 and row["recall"] == 1.0, ( + label, + row, + result.diff, + ) diff --git a/codebase_rag/tests/test_typescript_namespace_qn.py b/codebase_rag/tests/test_typescript_namespace_qn.py new file mode 100644 index 000000000..3d0f4ba43 --- /dev/null +++ b/codebase_rag/tests/test_typescript_namespace_qn.py @@ -0,0 +1,41 @@ +# (H) A class declared inside a TypeScript `namespace` must carry the namespace +# (H) in its qualified name (proj...geo.Widget), like a nested function does. +# (H) The class FQN scope walk listed the wrong node type ("namespace_definition" +# (H) instead of the grammar's "internal_module"), so it skipped the namespace +# (H) and produced an unscoped qn that collides with a top-level same-named type. +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +from codebase_rag.constants import KEY_QUALIFIED_NAME, NodeLabel +from codebase_rag.tests.conftest import create_and_run_updater, get_nodes + +_TS = """\ +export namespace geo { + export class Widget { + build(): number { return 1; } + } +} + +export class Widget { + other(): number { return 2; } +} +""" + + +def test_typescript_namespace_class_qn_includes_namespace( + temp_repo: Path, mock_ingestor: MagicMock +) -> None: + project = temp_repo / "ts_ns" + project.mkdir() + (project / "lib.ts").write_text(_TS, encoding="utf-8") + create_and_run_updater(project, mock_ingestor, skip_if_missing="typescript") + + class_qns = { + str(node[0][1].get(KEY_QUALIFIED_NAME)) + for node in get_nodes(mock_ingestor, NodeLabel.CLASS) + } + # (H) The namespaced class and the top-level class must be distinct nodes. + assert "ts_ns.lib.geo.Widget" in class_qns, class_qns + assert "ts_ns.lib.Widget" in class_qns, class_qns diff --git a/codebase_rag/tests/test_typescript_span_oracle.py b/codebase_rag/tests/test_typescript_span_oracle.py new file mode 100644 index 000000000..de1076ff7 --- /dev/null +++ b/codebase_rag/tests/test_typescript_span_oracle.py @@ -0,0 +1,82 @@ +# (H) Covers TypeScript node SPAN (end_line) validation: cgr's end_line for each +# (H) node is graded against the TS-compiler-API oracle (which emits each node's +# (H) full-span end line), joined on (kind, file, start). Exercises a class with a +# (H) multi-line method signature, an interface, an enum, a type alias, a +# (H) namespace, and a multi-line arrow function so spans are not trivially single +# (H) line. +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_ts_graph +from evals.oracles import run_typescript_oracle, typescript_available +from evals.score import score_span + +TS_SRC = """\ +export class Widget { + area( + scale: number, + ): number { + return scale; + } +} + +export interface Shape { + area(): number; +} + +export enum Color { + Red, + Green, +} + +export type Pair = { + a: number; + b: number; +}; + +export namespace geo { + export function dist(): number { + return 1; + } +} + +export function standalone(): number { + const cb = (v: number) => { + return v + 1; + }; + return cb(2); +} +""" + + +def _require_ts() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.TS not in load_parsers()[0]: + pytest.skip("typescript parser not available") + + +def test_cgr_matches_tsc_oracle_on_node_spans(tmp_path: Path) -> None: + _require_ts() + project = tmp_path / "ts_span_test" + project.mkdir() + (project / "main.ts").write_text(TS_SRC, encoding="utf-8") + + cgr = extract_cgr_ts_graph(project, project.name) + oracle = run_typescript_oracle(project) + + result = score_span(cgr, oracle, ec.TS_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + aggregate = by_label.get(ec.AGGREGATE_LABEL) + assert aggregate is not None, (by_label, result.diff) + assert aggregate["precision"] == 1.0 and aggregate["recall"] == 1.0, ( + aggregate, + result.diff, + ) + assert aggregate["tp"] >= 5, aggregate diff --git a/codebase_rag/tests/test_typescript_structure_oracle.py b/codebase_rag/tests/test_typescript_structure_oracle.py new file mode 100644 index 000000000..bdb4f8972 --- /dev/null +++ b/codebase_rag/tests/test_typescript_structure_oracle.py @@ -0,0 +1,61 @@ +# (H) Covers the TypeScript structure oracle harness (evals/oracles/ts_oracle + +# (H) evals/ts_l1.py): the TS-compiler-API oracle is authoritative ground truth, +# (H) and cgr's captured TypeScript nodes are graded against it on +# (H) (kind, file, start_line). +from __future__ import annotations + +from pathlib import Path + +import pytest + +from codebase_rag import constants as cs +from codebase_rag.parser_loader import load_parsers +from evals import constants as ec +from evals.cgr_graph import extract_cgr_ts_nodes +from evals.oracles import run_typescript_oracle, typescript_available +from evals.score import score_node_kinds +from evals.types_defs import GraphData + +TS_SRC = """\ +export interface Shape { area(): number; } +export type Meters = number; +export enum Color { Red, Green, Blue } + +export class Point implements Shape { + x: number; + constructor(x: number) { this.x = x; } + area(): number { return this.x; } +} + +export function freeFn(a: number): number { return a + 1; } +export const arrow = (b: number): number => b * 2; +[1, 2].forEach((n) => freeFn(n)); +""" + + +def _require_ts() -> None: + if not typescript_available(): + pytest.skip("node/npm toolchain not available") + if cs.SupportedLanguage.TS not in load_parsers()[0]: + pytest.skip("typescript parser not available") + + +def test_cgr_matches_tsc_oracle_on_typescript_structure(tmp_path: Path) -> None: + _require_ts() + project = tmp_path / "ts_oracle_test" + project.mkdir() + (project / "app.ts").write_text(TS_SRC, encoding="utf-8") + + cgr = GraphData( + nodes=extract_cgr_ts_nodes(project, project.name), + edges=set(), + name_edges=set(), + ) + oracle = run_typescript_oracle(project) + + result = score_node_kinds(cgr, oracle, ec.TS_SCORED_NODE_KINDS) + by_label = {row["label"]: row for row in result.rows} + for label in ("Class", "Interface", "Enum", "Type", "Function", "Method"): + row = by_label.get(label) + assert row is not None, (label, by_label) + assert row["precision"] == 1.0 and row["recall"] == 1.0, (label, row) diff --git a/codebase_rag/tests/test_unixcoder_unit.py b/codebase_rag/tests/test_unixcoder_unit.py index bf8a807c7..fffc29e25 100644 --- a/codebase_rag/tests/test_unixcoder_unit.py +++ b/codebase_rag/tests/test_unixcoder_unit.py @@ -1,8 +1,11 @@ from __future__ import annotations +from unittest.mock import MagicMock + import torch +from torch import nn -from codebase_rag.unixcoder import Beam +from codebase_rag.unixcoder import Beam, UniXcoder class TestBeamInit: @@ -170,6 +173,38 @@ def test_handles_no_eos(self) -> None: assert len(result[0]) == 3 +class TestForwardAttentionMask: + def _make_uninitialized(self, pad_id: int) -> UniXcoder: + instance = UniXcoder.__new__(UniXcoder) + nn.Module.__init__(instance) + instance.config = MagicMock() + instance.config.pad_token_id = pad_id + return instance + + def test_attention_mask_is_4d(self) -> None: + instance = self._make_uninitialized(pad_id=1) + captured: dict[str, torch.Size] = {} + + def fake_model( + source_ids: torch.Tensor, attention_mask: torch.Tensor + ) -> tuple[torch.Tensor]: + captured["shape"] = attention_mask.shape + batch, seq = source_ids.shape + return (torch.zeros(batch, seq, 8),) + + instance.model = MagicMock(side_effect=fake_model) + + source_ids = torch.tensor([[2, 3, 4, 5, 1], [2, 3, 1, 1, 1]]) + instance.forward(source_ids) + + assert "shape" in captured + assert len(captured["shape"]) == 4 + assert captured["shape"][0] == 2 + assert captured["shape"][1] == 1 + assert captured["shape"][2] == 5 + assert captured["shape"][3] == 5 + + class TestBeamGetHyp: def test_constructs_hypothesis_path(self) -> None: beam = Beam(size=2, eos=2, device=torch.device("cpu")) diff --git a/codebase_rag/tests/test_vector_store.py b/codebase_rag/tests/test_vector_store.py index c4b0c0bad..57ccd3c36 100644 --- a/codebase_rag/tests/test_vector_store.py +++ b/codebase_rag/tests/test_vector_store.py @@ -78,6 +78,56 @@ def integration_client( pass +@pytest.mark.skipif(not has_qdrant_client(), reason="qdrant-client not installed") +def test_get_qdrant_client_uses_url_when_set(reset_global_client: None) -> None: + import codebase_rag.vector_store as vs + + with patch.object(vs.settings, "QDRANT_URL", "http://localhost:6333"): + with patch("codebase_rag.vector_store.QdrantClient") as mock_client_cls: + instance = MagicMock() + instance.collection_exists.return_value = True + mock_client_cls.return_value = instance + vs.get_qdrant_client() + + mock_client_cls.assert_called_once_with(url="http://localhost:6333") + + +@pytest.mark.skipif(not has_qdrant_client(), reason="qdrant-client not installed") +def test_get_qdrant_client_uses_path_when_url_unset( + reset_global_client: None, +) -> None: + import codebase_rag.vector_store as vs + + with patch.object(vs.settings, "QDRANT_URL", None): + with patch.object(vs.settings, "QDRANT_DB_PATH", "/tmp/qd"): + with patch("codebase_rag.vector_store.QdrantClient") as mock_client_cls: + instance = MagicMock() + instance.collection_exists.return_value = True + mock_client_cls.return_value = instance + vs.get_qdrant_client() + + mock_client_cls.assert_called_once_with(path="/tmp/qd") + + +@pytest.mark.skipif(not has_qdrant_client(), reason="qdrant-client not installed") +def test_get_qdrant_client_logs_and_reraises_on_lock_error( + reset_global_client: None, +) -> None: + import codebase_rag.vector_store as vs + + with patch.object(vs.settings, "QDRANT_URL", None): + with patch.object(vs.settings, "QDRANT_DB_PATH", "/tmp/qd_locked"): + with patch("codebase_rag.vector_store.QdrantClient") as mock_client_cls: + mock_client_cls.side_effect = RuntimeError( + "Storage folder is already accessed by another instance" + ) + with patch("codebase_rag.vector_store.logger") as mock_logger: + with pytest.raises(RuntimeError): + vs.get_qdrant_client() + + mock_logger.error.assert_called_once() + + @pytest.mark.skipif(not has_qdrant_client(), reason="qdrant-client not installed") def test_store_embedding_calls_upsert( mock_qdrant_client: MagicMock, reset_global_client: None diff --git a/codebase_rag/tests/test_vector_store_batch.py b/codebase_rag/tests/test_vector_store_batch.py new file mode 100644 index 000000000..597ebd2d2 --- /dev/null +++ b/codebase_rag/tests/test_vector_store_batch.py @@ -0,0 +1,225 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from codebase_rag.utils.dependencies import has_qdrant_client + +pytestmark = pytest.mark.skipif( + not has_qdrant_client(), reason="qdrant-client not installed" +) + +_PATCH_CLIENT = "codebase_rag.vector_store.get_qdrant_client" +_PATCH_SLEEP = "codebase_rag.vector_store.time.sleep" + + +class TestUpsertWithRetry: + def test_succeeds_on_first_attempt(self) -> None: + from codebase_rag.vector_store import _upsert_with_retry + + mock_client = MagicMock() + mock_point = MagicMock() + + with patch(_PATCH_CLIENT, return_value=mock_client): + _upsert_with_retry([mock_point]) + + mock_client.upsert.assert_called_once() + + def test_retries_on_failure_then_succeeds(self) -> None: + from codebase_rag.vector_store import _upsert_with_retry + + mock_client = MagicMock() + mock_client.upsert.side_effect = [ + ConnectionError("timeout"), + None, + ] + + with ( + patch(_PATCH_CLIENT, return_value=mock_client), + patch(_PATCH_SLEEP) as mock_sleep, + ): + _upsert_with_retry([MagicMock()]) + + assert mock_client.upsert.call_count == 2 + mock_sleep.assert_called_once() + + def test_raises_after_exhausting_retries(self) -> None: + from codebase_rag.vector_store import _upsert_with_retry + + mock_client = MagicMock() + mock_client.upsert.side_effect = ConnectionError("timeout") + + with ( + patch(_PATCH_CLIENT, return_value=mock_client), + patch(_PATCH_SLEEP), + pytest.raises(ConnectionError, match="timeout"), + ): + _upsert_with_retry([MagicMock()]) + + def test_exponential_backoff_delays(self) -> None: + from codebase_rag.vector_store import _upsert_with_retry + + mock_client = MagicMock() + mock_client.upsert.side_effect = [ + ConnectionError("fail"), + ConnectionError("fail"), + None, + ] + + with ( + patch(_PATCH_CLIENT, return_value=mock_client), + patch(_PATCH_SLEEP) as mock_sleep, + ): + _upsert_with_retry([MagicMock()]) + + delays = [c.args[0] for c in mock_sleep.call_args_list] + assert delays[1] > delays[0] + + +class TestStoreEmbeddingBatch: + def test_returns_count_on_success(self) -> None: + from codebase_rag.vector_store import store_embedding_batch + + mock_client = MagicMock() + points = [ + (1, [0.1] * 768, "mod.func1"), + (2, [0.2] * 768, "mod.func2"), + ] + + with patch(_PATCH_CLIENT, return_value=mock_client): + result = store_embedding_batch(points) + + assert result == 2 + + def test_returns_zero_on_empty(self) -> None: + from codebase_rag.vector_store import store_embedding_batch + + result = store_embedding_batch([]) + assert result == 0 + + def test_returns_zero_on_failure(self) -> None: + from codebase_rag.vector_store import store_embedding_batch + + mock_client = MagicMock() + mock_client.upsert.side_effect = Exception("fail") + + with ( + patch(_PATCH_CLIENT, return_value=mock_client), + patch(_PATCH_SLEEP), + ): + result = store_embedding_batch([(1, [0.1] * 768, "mod.func")]) + + assert result == 0 + + def test_builds_correct_point_structs(self) -> None: + from codebase_rag.vector_store import store_embedding_batch + + mock_client = MagicMock() + embedding = [0.5] * 768 + points = [(42, embedding, "pkg.module.fn")] + + with patch(_PATCH_CLIENT, return_value=mock_client): + store_embedding_batch(points) + + call_kwargs = mock_client.upsert.call_args[1] + stored_points = call_kwargs["points"] + assert len(stored_points) == 1 + assert stored_points[0].id == 42 + assert stored_points[0].vector == embedding + assert stored_points[0].payload["node_id"] == 42 + assert stored_points[0].payload["qualified_name"] == "pkg.module.fn" + + +class TestDeleteProjectEmbeddings: + def test_deletes_given_ids(self) -> None: + from codebase_rag.vector_store import delete_project_embeddings + + mock_client = MagicMock() + node_ids = [1, 2, 3] + + with patch(_PATCH_CLIENT, return_value=mock_client): + delete_project_embeddings("myproject", node_ids) + + mock_client.delete.assert_called_once() + call_kwargs = mock_client.delete.call_args[1] + assert call_kwargs["points_selector"] == [1, 2, 3] + + def test_noop_on_empty_ids(self) -> None: + from codebase_rag.vector_store import delete_project_embeddings + + mock_client = MagicMock() + + with patch(_PATCH_CLIENT, return_value=mock_client): + delete_project_embeddings("myproject", []) + + mock_client.delete.assert_not_called() + + def test_handles_exception_gracefully(self) -> None: + from codebase_rag.vector_store import delete_project_embeddings + + mock_client = MagicMock() + mock_client.delete.side_effect = Exception("connection lost") + + with patch(_PATCH_CLIENT, return_value=mock_client): + delete_project_embeddings("myproject", [1, 2]) + + +class TestVerifyStoredIds: + def test_returns_found_ids(self) -> None: + from codebase_rag.vector_store import verify_stored_ids + + mock_client = MagicMock() + mock_point_1 = MagicMock() + mock_point_1.id = 1 + mock_point_2 = MagicMock() + mock_point_2.id = 3 + mock_client.retrieve.return_value = [mock_point_1, mock_point_2] + + with patch(_PATCH_CLIENT, return_value=mock_client): + result = verify_stored_ids({1, 2, 3}) + + assert result == {1, 3} + + def test_returns_empty_for_empty_input(self) -> None: + from codebase_rag.vector_store import verify_stored_ids + + result = verify_stored_ids(set()) + assert result == set() + + def test_raises_on_exception(self) -> None: + from codebase_rag.vector_store import verify_stored_ids + + mock_client = MagicMock() + mock_client.retrieve.side_effect = Exception("fail") + + with ( + patch(_PATCH_CLIENT, return_value=mock_client), + pytest.raises(Exception, match="fail"), + ): + verify_stored_ids({1, 2}) + + def test_batches_large_id_sets(self) -> None: + from codebase_rag.vector_store import _RETRIEVE_BATCH_SIZE, verify_stored_ids + + mock_client = MagicMock() + mock_client.retrieve.return_value = [] + + large_id_set = set(range(_RETRIEVE_BATCH_SIZE + 100)) + + with patch(_PATCH_CLIENT, return_value=mock_client): + verify_stored_ids(large_id_set) + + assert mock_client.retrieve.call_count == 2 + + def test_retrieve_called_with_correct_params(self) -> None: + from codebase_rag.vector_store import verify_stored_ids + + mock_client = MagicMock() + mock_client.retrieve.return_value = [] + + with patch(_PATCH_CLIENT, return_value=mock_client): + verify_stored_ids({10, 20}) + + call_kwargs = mock_client.retrieve.call_args[1] + assert call_kwargs["with_payload"] is False + assert call_kwargs["with_vectors"] is False + assert set(call_kwargs["ids"]) == {10, 20} diff --git a/codebase_rag/tests/test_workspaces.py b/codebase_rag/tests/test_workspaces.py new file mode 100644 index 000000000..a4078d1ed --- /dev/null +++ b/codebase_rag/tests/test_workspaces.py @@ -0,0 +1,251 @@ +from __future__ import annotations + +from collections.abc import Generator +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from typer.testing import CliRunner + +from codebase_rag.cli import app +from codebase_rag.workspaces import ( + WorkspaceError, + add_repo, + create_workspace, + delete_workspace, + list_workspaces, + load_workspace, + remove_repo, +) +from codebase_rag.workspaces.models import WorkspaceConfig + +runner = CliRunner() + + +@pytest.fixture(autouse=True) +def _temp_home( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> Generator[Path, None, None]: + from codebase_rag.config import settings + + monkeypatch.setattr(settings, "CGR_HOME", tmp_path / "cgr-home") + yield tmp_path / "cgr-home" + + +class TestStorage: + def test_create_then_load(self, _temp_home: Path) -> None: + config, _ = create_workspace("alpha", description="testing") + assert config.name == "alpha" + loaded = load_workspace("alpha") + assert loaded.name == "alpha" + assert loaded.description == "testing" + assert loaded.repos == [] + + def test_create_duplicate_raises(self, _temp_home: Path) -> None: + create_workspace("dup") + with pytest.raises(WorkspaceError): + create_workspace("dup") + + def test_create_with_force_overwrites(self, _temp_home: Path) -> None: + create_workspace("over", description="first") + config, _ = create_workspace("over", description="second", overwrite=True) + assert config.description == "second" + + def test_load_missing_raises(self, _temp_home: Path) -> None: + with pytest.raises(WorkspaceError): + load_workspace("nope") + + def test_list_empty(self, _temp_home: Path) -> None: + assert list_workspaces() == [] + + def test_list_sorted(self, _temp_home: Path) -> None: + create_workspace("b") + create_workspace("a") + create_workspace("c") + assert list_workspaces() == ["a", "b", "c"] + + def test_delete(self, _temp_home: Path) -> None: + create_workspace("kill") + delete_workspace("kill") + with pytest.raises(WorkspaceError): + load_workspace("kill") + + def test_delete_missing_raises(self, _temp_home: Path) -> None: + with pytest.raises(WorkspaceError): + delete_workspace("nope") + + def test_add_repo_derives_project_name( + self, tmp_path: Path, _temp_home: Path + ) -> None: + repo_dir = tmp_path / "some_repo" + repo_dir.mkdir() + create_workspace("mono") + config, repo = add_repo("mono", str(repo_dir)) + assert repo.path == str(repo_dir.resolve()) + assert repo.project_name.startswith("some_repo__") + assert config.repos[0].project_name == repo.project_name + + def test_add_repo_with_explicit_project_name( + self, tmp_path: Path, _temp_home: Path + ) -> None: + repo_dir = tmp_path / "second_repo" + repo_dir.mkdir() + create_workspace("mono") + _, repo = add_repo("mono", str(repo_dir), project_name="custom_name") + assert repo.project_name == "custom_name" + + def test_add_repo_missing_path(self, tmp_path: Path, _temp_home: Path) -> None: + create_workspace("mono") + with pytest.raises(WorkspaceError): + add_repo("mono", str(tmp_path / "does_not_exist")) + + def test_add_repo_duplicate(self, tmp_path: Path, _temp_home: Path) -> None: + repo_dir = tmp_path / "dup_repo" + repo_dir.mkdir() + create_workspace("mono") + add_repo("mono", str(repo_dir)) + with pytest.raises(WorkspaceError): + add_repo("mono", str(repo_dir)) + + def test_remove_repo(self, tmp_path: Path, _temp_home: Path) -> None: + repo_dir = tmp_path / "rem_repo" + repo_dir.mkdir() + create_workspace("mono") + add_repo("mono", str(repo_dir)) + config, _ = remove_repo("mono", str(repo_dir)) + assert config.repos == [] + + def test_remove_repo_not_in_workspace( + self, tmp_path: Path, _temp_home: Path + ) -> None: + repo_dir = tmp_path / "missing_repo" + repo_dir.mkdir() + create_workspace("mono") + with pytest.raises(WorkspaceError): + remove_repo("mono", str(repo_dir)) + + +class TestCli: + def test_workspace_list_empty(self, _temp_home: Path) -> None: + result = runner.invoke(app, ["workspace", "list"]) + assert result.exit_code == 0, result.output + assert "no workspaces" in result.output.lower() + + def test_workspace_create_list_show_delete( + self, tmp_path: Path, _temp_home: Path + ) -> None: + result = runner.invoke(app, ["workspace", "create", "mono"]) + assert result.exit_code == 0, result.output + + result = runner.invoke(app, ["workspace", "list"]) + assert "mono" in result.output + + result = runner.invoke(app, ["workspace", "show", "mono"]) + assert "mono" in result.output + + result = runner.invoke(app, ["workspace", "delete", "mono"]) + assert result.exit_code == 0, result.output + + result = runner.invoke(app, ["workspace", "list"]) + assert "no workspaces" in result.output.lower() + + def test_workspace_add_remove_repo_via_cli( + self, tmp_path: Path, _temp_home: Path + ) -> None: + repo_dir = tmp_path / "the_repo" + repo_dir.mkdir() + + runner.invoke(app, ["workspace", "create", "mono"]) + result = runner.invoke(app, ["workspace", "add-repo", "mono", str(repo_dir)]) + assert result.exit_code == 0, result.output + assert str(repo_dir.resolve()) in result.output + + result = runner.invoke(app, ["workspace", "show", "mono"]) + assert str(repo_dir.resolve()) in result.output + + result = runner.invoke(app, ["workspace", "remove-repo", "mono", str(repo_dir)]) + assert result.exit_code == 0, result.output + + +@pytest.fixture +def mock_memgraph_connect() -> Generator[MagicMock, None, None]: + with patch("codebase_rag.cli.connect_memgraph") as mock_connect: + mock_ingestor = MagicMock() + mock_connect.return_value.__enter__ = MagicMock(return_value=mock_ingestor) + mock_connect.return_value.__exit__ = MagicMock(return_value=False) + yield mock_connect + + +@pytest.fixture +def mock_validate_models() -> Generator[None, None, None]: + with patch("codebase_rag.cli._update_and_validate_models"): + yield + + +def test_start_with_workspace_passes_all_projects( + mock_memgraph_connect: MagicMock, + mock_validate_models: None, + tmp_path: Path, + _temp_home: Path, +) -> None: + repo_a = tmp_path / "repo_a" + repo_b = tmp_path / "repo_b" + repo_a.mkdir() + repo_b.mkdir() + + create_workspace("mono") + add_repo("mono", str(repo_a), project_name="proj_a") + add_repo("mono", str(repo_b), project_name="proj_b") + + with ( + patch("codebase_rag.cli._run_graph_sync") as mock_sync, + patch("codebase_rag.cli.main_single_query") as mock_single, + ): + result = runner.invoke( + app, + [ + "start", + "--repo-path", + str(repo_a), + "--workspace", + "mono", + "--ask-agent", + "hi", + ], + ) + assert result.exit_code == 0, result.output + assert mock_sync.call_count == 2 + project_names_synced = [c.kwargs["project_name"] for c in mock_sync.call_args_list] + assert set(project_names_synced) == {"proj_a", "proj_b"} + mock_single.assert_called_once() + assert mock_single.call_args.kwargs["active_projects"] == ["proj_a", "proj_b"] + + +def test_start_with_unknown_workspace_errors( + mock_memgraph_connect: MagicMock, + mock_validate_models: None, + tmp_path: Path, + _temp_home: Path, +) -> None: + result = runner.invoke( + app, + [ + "start", + "--repo-path", + str(tmp_path), + "--workspace", + "doesnotexist", + "--ask-agent", + "hi", + "--no-sync", + ], + ) + assert result.exit_code != 0 + + +def test_workspace_model_project_names() -> None: + config = WorkspaceConfig( + name="x", + repos=[], + ) + assert config.project_names() == [] diff --git a/codebase_rag/tool_errors.py b/codebase_rag/tool_errors.py index 25540a976..50be918c6 100644 --- a/codebase_rag/tool_errors.py +++ b/codebase_rag/tool_errors.py @@ -6,34 +6,20 @@ # (H) File operation errors FILE_NOT_FOUND = "File not found." FILE_NOT_FOUND_OR_DIR = "File not found or is a directory: {path}" -BINARY_FILE = "File '{path}' is a binary file. Use the 'analyze_document' tool for this file type." +BINARY_FILE = "File '{path}' is a binary file. Ask the user to attach it inline if they want it analyzed." UNICODE_DECODE = ( "File '{path}' could not be read as text. It may be a binary file. " - "If it is a document (e.g., PDF), use the 'analyze_document' tool." + "If it is a document (e.g., PDF), ask the user to attach it inline." ) -# (H) Document analyzer errors -DOCUMENT_UNSUPPORTED = ( - "Error: Document analysis is not supported for the current LLM provider." -) -DOC_FILE_NOT_FOUND = "Error: File not found at '{path}'." -DOC_SECURITY_RISK = "Error: Security risk: file path {path} is outside the project root" -DOC_ACCESS_OUTSIDE_ROOT = ( - "Error: Security risk: Attempted to access file outside of project root: {path}" -) -DOC_API_VALIDATION = "Error: API validation failed: {error}" -DOC_API_ERROR = "Error: API error: {error}" -DOC_IMAGE_PROCESS = ( - "Error: Unable to process the image file. " - "The image may be corrupted or in an unsupported format." -) -DOC_ANALYSIS_FAILED = "Error: An error occurred during analysis: {error}" -DOC_DURING_ANALYSIS = "Error: Document analysis failed: {error}" - # (H) Directory errors DIRECTORY_INVALID = "Error: '{path}' is not a valid directory." DIRECTORY_EMPTY = "Error: The directory '{path}' is empty." DIRECTORY_LIST_FAILED = "Error: Could not list contents of '{path}'." +DIRECTORY_PATH_OUTSIDE_ROOT = ( + "Error: '{path}' is outside the project root ({root}). " + "Use a relative path from the project root, or the full absolute path within it." +) # (H) Shell command errors COMMAND_NOT_ALLOWED = "Command '{cmd}' is not in the allowlist.{suggestion} Available commands: {available}" @@ -69,3 +55,4 @@ # (H) CLI validation errors INVALID_POSITIVE_INT = "{value!r} is not a valid positive integer" +INVALID_NON_NEGATIVE_FLOAT = "Value must be non-negative, got {value}" diff --git a/codebase_rag/tools/code_retrieval.py b/codebase_rag/tools/code_retrieval.py index 2e6331dcd..bd04cce0a 100644 --- a/codebase_rag/tools/code_retrieval.py +++ b/codebase_rag/tools/code_retrieval.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio from pathlib import Path from loguru import logger @@ -15,6 +16,8 @@ class CodeRetriever: + __slots__ = ("project_root", "ingestor") + def __init__(self, project_root: str, ingestor: QueryProtocol): self.project_root = Path(project_root).resolve() self.ingestor = ingestor @@ -25,7 +28,9 @@ async def find_code_snippet(self, qualified_name: str) -> CodeSnippet: params = {"qn": qualified_name} try: - results = self.ingestor.fetch_all(CYPHER_FIND_BY_QUALIFIED_NAME, params) + results = await asyncio.to_thread( + self.ingestor.fetch_all, CYPHER_FIND_BY_QUALIFIED_NAME, params + ) if not results: return CodeSnippet( diff --git a/codebase_rag/tools/codebase_query.py b/codebase_rag/tools/codebase_query.py index 690a979bb..33c6c9cca 100644 --- a/codebase_rag/tools/codebase_query.py +++ b/codebase_rag/tools/codebase_query.py @@ -1,5 +1,7 @@ from __future__ import annotations +import asyncio + from loguru import logger from pydantic_ai import Tool from rich.console import Console @@ -8,16 +10,20 @@ from .. import exceptions as ex from .. import logs as ls +from ..config import settings from ..constants import ( QUERY_NOT_AVAILABLE, QUERY_RESULTS_PANEL_TITLE, QUERY_SUMMARY_DB_ERROR, QUERY_SUMMARY_SUCCESS, + QUERY_SUMMARY_TIMEOUT, QUERY_SUMMARY_TRANSLATION_FAILED, + QUERY_SUMMARY_TRUNCATED, ) from ..schemas import QueryGraphData from ..services import QueryProtocol from ..services.llm import CypherGenerator +from ..utils.token_utils import truncate_results_by_tokens from . import tool_descriptions as td @@ -27,7 +33,7 @@ def create_query_tool( console: Console | None = None, ) -> Tool: if console is None: - console = Console(width=None, force_terminal=True) + console = Console(width=None, stderr=True, force_terminal=True) async def query_codebase_knowledge_graph( natural_language_query: str, @@ -37,7 +43,20 @@ async def query_codebase_knowledge_graph( try: cypher_query = await cypher_gen.generate(natural_language_query) - results = ingestor.fetch_all(cypher_query) + results = await asyncio.wait_for( + asyncio.to_thread(ingestor.fetch_all, cypher_query), + timeout=settings.QUERY_TIMEOUT_S, + ) + + total_count = len(results) + if total_count > settings.QUERY_RESULT_ROW_CAP: + results = results[: settings.QUERY_RESULT_ROW_CAP] + + results, tokens_used, was_truncated = truncate_results_by_tokens( + results, + max_tokens=settings.QUERY_RESULT_MAX_TOKENS, + original_total=total_count, + ) if results: table = Table( @@ -69,7 +88,15 @@ async def query_codebase_knowledge_graph( ) ) - summary = QUERY_SUMMARY_SUCCESS.format(count=len(results)) + if was_truncated or total_count > len(results): + summary = QUERY_SUMMARY_TRUNCATED.format( + kept=len(results), + total=total_count, + tokens=tokens_used, + max_tokens=settings.QUERY_RESULT_MAX_TOKENS, + ) + else: + summary = QUERY_SUMMARY_SUCCESS.format(count=len(results)) return QueryGraphData( query_used=cypher_query, results=results, summary=summary ) @@ -79,6 +106,17 @@ async def query_codebase_knowledge_graph( results=[], summary=QUERY_SUMMARY_TRANSLATION_FAILED.format(error=e), ) + except TimeoutError: + logger.warning( + ls.TOOL_QUERY_TIMEOUT.format( + timeout=settings.QUERY_TIMEOUT_S, query=cypher_query + ) + ) + return QueryGraphData( + query_used=cypher_query, + results=[], + summary=QUERY_SUMMARY_TIMEOUT.format(timeout=settings.QUERY_TIMEOUT_S), + ) except Exception as e: logger.exception(ls.TOOL_QUERY_ERROR.format(error=e)) return QueryGraphData( diff --git a/codebase_rag/tools/directory_lister.py b/codebase_rag/tools/directory_lister.py index 01136a193..92afcb920 100644 --- a/codebase_rag/tools/directory_lister.py +++ b/codebase_rag/tools/directory_lister.py @@ -13,11 +13,19 @@ class DirectoryLister: + __slots__ = ("project_root",) + def __init__(self, project_root: str): self.project_root = Path(project_root).resolve() def list_directory_contents(self, directory_path: str) -> str: - target_path = self._get_safe_path(directory_path) + try: + target_path = self._get_safe_path(directory_path) + except PermissionError: + return te.DIRECTORY_PATH_OUTSIDE_ROOT.format( + path=directory_path, root=self.project_root + ) + logger.info(ls.DIR_LISTING.format(path=target_path)) try: diff --git a/codebase_rag/tools/document_analyzer.py b/codebase_rag/tools/document_analyzer.py deleted file mode 100644 index 2a5475954..000000000 --- a/codebase_rag/tools/document_analyzer.py +++ /dev/null @@ -1,167 +0,0 @@ -from __future__ import annotations - -import mimetypes -import shutil -import uuid -from pathlib import Path -from typing import NoReturn - -from google import genai -from google.genai import types -from google.genai.errors import ClientError -from loguru import logger -from pydantic_ai import Tool - -from .. import constants as cs -from .. import exceptions as ex -from .. import logs as ls -from .. import tool_errors as te -from ..config import settings -from . import tool_descriptions as td - - -class _NotSupportedClient: - def __getattr__(self, name: str) -> NoReturn: - raise NotImplementedError(ex.DOC_UNSUPPORTED_PROVIDER) - - -class DocumentAnalyzer: - def __init__(self, project_root: str) -> None: - self.project_root = Path(project_root).resolve() - - orchestrator_config = settings.active_orchestrator_config - orchestrator_provider = orchestrator_config.provider - - if orchestrator_provider == cs.Provider.GOOGLE: - if orchestrator_config.provider_type == cs.GoogleProviderType.VERTEX: - self.client = genai.Client( - project=orchestrator_config.project_id, - location=orchestrator_config.region, - ) - else: - self.client = genai.Client(api_key=orchestrator_config.api_key) - else: - self.client = _NotSupportedClient() - - logger.info(ls.DOC_ANALYZER_INIT.format(root=self.project_root)) - - def _resolve_absolute_path(self, file_path: str) -> Path | str: - source_path = Path(file_path) - if not source_path.is_file(): - return te.DOC_FILE_NOT_FOUND.format(path=file_path) - - tmp_dir = self.project_root / cs.TMP_DIR - tmp_dir.mkdir(exist_ok=True) - - tmp_file = tmp_dir / f"{uuid.uuid4()}-{source_path.name}" - shutil.copy2(source_path, tmp_file) - logger.info(ls.DOC_COPIED.format(path=tmp_file)) - return tmp_file - - def _resolve_relative_path(self, file_path: str) -> Path | str: - full_path = (self.project_root / file_path).resolve() - try: - full_path.relative_to(self.project_root.resolve()) - except ValueError: - return te.DOC_SECURITY_RISK.format(path=file_path) - - if not str(full_path).startswith(str(self.project_root.resolve())): - return te.DOC_SECURITY_RISK.format(path=file_path) - - return full_path - - def _resolve_file_path(self, file_path: str) -> Path | str: - if Path(file_path).is_absolute(): - return self._resolve_absolute_path(file_path) - return self._resolve_relative_path(file_path) - - def _extract_response_text(self, response: types.GenerateContentResponse) -> str: - if hasattr(response, "text") and response.text: - return str(response.text) - - if hasattr(response, "candidates") and response.candidates: - for candidate in response.candidates: - if hasattr(candidate, "content") and candidate.content: - parts = candidate.content.parts - if parts and hasattr(parts[0], "text"): - return str(parts[0].text) - return cs.MSG_DOC_NO_CANDIDATES - - logger.warning(ls.DOC_NO_TEXT.format(response=response)) - return cs.MSG_DOC_NO_CONTENT - - def _handle_analyze_error(self, error: Exception, file_path: str) -> str: - if isinstance(error, ValueError): - if "does not start with" in str(error): - err_msg = te.DOC_ACCESS_OUTSIDE_ROOT.format(path=file_path) - logger.error(err_msg) - return err_msg - logger.error(ls.DOC_ANALYZER_API_ERR.format(error=error)) - return te.DOC_API_VALIDATION.format(error=error) - - if isinstance(error, ClientError): - logger.error(ls.DOC_API_ERROR.format(path=file_path, error=error)) - if "Unable to process input image" in str(error): - return te.DOC_IMAGE_PROCESS - return te.DOC_API_ERROR.format(error=error) - - logger.exception(ls.DOC_FAILED.format(path=file_path, error=error)) - return te.DOC_ANALYSIS_FAILED.format(error=error) - - def analyze(self, file_path: str, question: str) -> str: - logger.info(ls.TOOL_DOC_ANALYZE.format(path=file_path, question=question)) - if isinstance(self.client, _NotSupportedClient): - return te.DOCUMENT_UNSUPPORTED - - try: - resolved = self._resolve_file_path(file_path) - if isinstance(resolved, str): - return resolved - full_path = resolved - - if not full_path.is_file(): - return te.DOC_FILE_NOT_FOUND.format(path=file_path) - - mime_type, _ = mimetypes.guess_type(full_path) - if not mime_type: - mime_type = cs.MIME_TYPE_DEFAULT - - file_bytes = full_path.read_bytes() - - prompt_parts = [ - types.Part.from_bytes(data=file_bytes, mime_type=mime_type), - cs.DOC_PROMPT_PREFIX.format(question=question), - ] - - orchestrator_config = settings.active_orchestrator_config - response = self.client.models.generate_content( - model=orchestrator_config.model_id, contents=prompt_parts - ) - - logger.success(ls.DOC_SUCCESS.format(path=file_path)) - return self._extract_response_text(response) - - except Exception as e: - return self._handle_analyze_error(e, file_path) - - -def create_document_analyzer_tool(analyzer: DocumentAnalyzer) -> Tool: - def analyze_document(file_path: str, question: str) -> str: - try: - result = analyzer.analyze(file_path, question) - preview = result[:100] if result else "None" - logger.debug( - ls.DOC_RESULT.format(type=type(result).__name__, preview=preview) - ) - return result - except Exception as e: - logger.exception(ls.DOC_EXCEPTION.format(error=e)) - if str(e).startswith("Error:") or str(e).startswith("API error:"): - return str(e) - return te.DOC_DURING_ANALYSIS.format(error=e) - - return Tool( - function=analyze_document, - name=td.AgenticToolName.ANALYZE_DOCUMENT, - description=td.ANALYZE_DOCUMENT, - ) diff --git a/codebase_rag/tools/file_editor.py b/codebase_rag/tools/file_editor.py index 650da823e..bc79ce8e0 100644 --- a/codebase_rag/tools/file_editor.py +++ b/codebase_rag/tools/file_editor.py @@ -20,6 +20,8 @@ class FileEditor: + __slots__ = ("project_root", "dmp", "parsers") + def __init__(self, project_root: str = ".") -> None: self.project_root = Path(project_root).resolve() self.dmp = diff_match_patch.diff_match_patch() @@ -218,7 +220,7 @@ def replace_code_block( if target_block not in original_content: logger.error(ls.EDITOR_BLOCK_NOT_FOUND.format(path=file_path)) - logger.debug(ls.EDITOR_LOOKING_FOR.format(block=repr(target_block))) + logger.debug(ls.EDITOR_LOOKING_FOR, block=repr(target_block)) return False modified_content = original_content.replace( diff --git a/codebase_rag/tools/file_reader.py b/codebase_rag/tools/file_reader.py index 1b5f8618b..ae471ee93 100644 --- a/codebase_rag/tools/file_reader.py +++ b/codebase_rag/tools/file_reader.py @@ -14,6 +14,8 @@ class FileReader: + __slots__ = ("project_root",) + def __init__(self, project_root: str = "."): self.project_root = Path(project_root).resolve() logger.info(ls.FILE_READER_INIT.format(root=self.project_root)) diff --git a/codebase_rag/tools/file_writer.py b/codebase_rag/tools/file_writer.py index 4f3110b3b..ca709778a 100644 --- a/codebase_rag/tools/file_writer.py +++ b/codebase_rag/tools/file_writer.py @@ -14,6 +14,8 @@ class FileWriter: + __slots__ = ("project_root",) + def __init__(self, project_root: str = "."): self.project_root = Path(project_root).resolve() logger.info(ls.FILE_WRITER_INIT.format(root=self.project_root)) diff --git a/codebase_rag/tools/health_checker.py b/codebase_rag/tools/health_checker.py index 2b94f2c6f..f8a9d9b27 100644 --- a/codebase_rag/tools/health_checker.py +++ b/codebase_rag/tools/health_checker.py @@ -12,6 +12,8 @@ class HealthChecker: + __slots__ = ("results",) + def __init__(self): self.results: list[HealthCheckResult] = [] @@ -82,7 +84,7 @@ def check_memgraph_connection(self) -> HealthCheckResult: ), ) - except mgclient.MemgraphError as e: + except mgclient.Error as e: return HealthCheckResult( name=cs.HEALTH_CHECK_MEMGRAPH_FAILED, passed=False, diff --git a/codebase_rag/tools/semantic_search.py b/codebase_rag/tools/semantic_search.py index e7aa9c5b2..0366c722d 100644 --- a/codebase_rag/tools/semantic_search.py +++ b/codebase_rag/tools/semantic_search.py @@ -1,5 +1,8 @@ from __future__ import annotations +import asyncio +from typing import TYPE_CHECKING + from loguru import logger from pydantic_ai import Tool @@ -14,16 +17,19 @@ from ..utils.dependencies import has_semantic_dependencies from . import tool_descriptions as td +if TYPE_CHECKING: + from ..services import QueryProtocol + -def semantic_code_search(query: str, top_k: int = 5) -> list[SemanticSearchResult]: +def semantic_code_search( + ingestor: QueryProtocol, query: str, top_k: int = 5 +) -> list[SemanticSearchResult]: if not has_semantic_dependencies(): logger.warning(ex.SEMANTIC_EXTRA) return [] try: - from ..config import settings from ..embedder import embed_code - from ..services.graph_service import MemgraphIngestor from ..vector_store import search_embeddings query_embedding = embed_code(query) @@ -36,93 +42,87 @@ def semantic_code_search(query: str, top_k: int = 5) -> list[SemanticSearchResul node_ids = [node_id for node_id, _ in search_results] - with MemgraphIngestor( - host=settings.MEMGRAPH_HOST, - port=settings.MEMGRAPH_PORT, - batch_size=cs.SEMANTIC_BATCH_SIZE, - ) as ingestor: - cypher_query = build_nodes_by_ids_query(node_ids) - params = {str(i): node_id for i, node_id in enumerate(node_ids)} - results = ingestor._execute_query(cypher_query, params) - - results_map = {res["node_id"]: res for res in results} - - formatted_results: list[SemanticSearchResult] = [] - for node_id, score in search_results: - if node_id in results_map: - result = results_map[node_id] - result_type = result["type"] - type_str = ( - result_type[0] - if isinstance(result_type, list) and result_type - else cs.SEMANTIC_TYPE_UNKNOWN - ) - formatted_results.append( - SemanticSearchResult( - node_id=node_id, - qualified_name=str(result["qualified_name"]), - name=str(result["name"]), - type=type_str, - score=round(score, 3), - ) + cypher_query = build_nodes_by_ids_query(node_ids) + params = {str(i): node_id for i, node_id in enumerate(node_ids)} + results = ingestor.fetch_all(cypher_query, params) + + results_map = { + node_id: res + for res in results + if isinstance((node_id := res.get("node_id")), int) + } + + formatted_results: list[SemanticSearchResult] = [] + for node_id, score in search_results: + if node_id in results_map: + result = results_map[node_id] + result_type = result.get("type") + type_str = ( + result_type[0] + if isinstance(result_type, list) and result_type + else cs.SEMANTIC_TYPE_UNKNOWN + ) + formatted_results.append( + SemanticSearchResult( + node_id=node_id, + qualified_name=str(result.get("qualified_name", "")), + name=str(result.get("name", "")), + type=type_str, + score=round(score, 3), ) + ) - logger.info( - ls.SEMANTIC_FOUND.format(count=len(formatted_results), query=query) - ) - return formatted_results + logger.info( + ls.SEMANTIC_FOUND.format(count=len(formatted_results), query=query) + ) + return formatted_results except Exception as e: logger.error(ls.SEMANTIC_FAILED.format(query=query, error=e)) return [] -def get_function_source_code(node_id: int) -> str | None: +def get_function_source_code(ingestor: QueryProtocol, node_id: int) -> str | None: try: - from ..config import settings - from ..services.graph_service import MemgraphIngestor from ..utils.source_extraction import ( extract_source_lines, validate_source_location, ) - with MemgraphIngestor( - host=settings.MEMGRAPH_HOST, - port=settings.MEMGRAPH_PORT, - batch_size=cs.SEMANTIC_BATCH_SIZE, - ) as ingestor: - results = ingestor._execute_query( - CYPHER_GET_FUNCTION_SOURCE_LOCATION, {"node_id": node_id} - ) + results = ingestor.fetch_all( + CYPHER_GET_FUNCTION_SOURCE_LOCATION, {"node_id": node_id} + ) - if not results: - logger.warning(ls.SEMANTIC_NODE_NOT_FOUND.format(id=node_id)) - return None + if not results: + logger.warning(ls.SEMANTIC_NODE_NOT_FOUND.format(id=node_id)) + return None - result = results[0] - file_path = result.get("path") - start_line = result.get("start_line") - end_line = result.get("end_line") + result = results[0] + file_path = result.get("path") + start_line = result.get("start_line") + end_line = result.get("end_line") - is_valid, file_path_obj = validate_source_location( - file_path, start_line, end_line - ) - if not is_valid or file_path_obj is None: - logger.warning(ls.SEMANTIC_INVALID_LOCATION.format(id=node_id)) - return None + is_valid, file_path_obj = validate_source_location( + file_path, start_line, end_line + ) + if not is_valid or file_path_obj is None: + logger.warning(ls.SEMANTIC_INVALID_LOCATION.format(id=node_id)) + return None - return extract_source_lines(file_path_obj, start_line, end_line) + return extract_source_lines(file_path_obj, start_line, end_line) except Exception as e: logger.error(ls.SEMANTIC_SOURCE_FAILED.format(id=node_id, error=e)) return None -def create_semantic_search_tool() -> Tool: +def create_semantic_search_tool(ingestor: QueryProtocol) -> Tool: async def semantic_search_functions(query: str, top_k: int = 5) -> str: logger.info(ls.SEMANTIC_TOOL_SEARCH.format(query=query)) - results = semantic_code_search(query, top_k) + results = await asyncio.to_thread( + semantic_code_search, ingestor, query, top_k + ) if not results: return cs.MSG_SEMANTIC_NO_RESULTS.format(query=query) @@ -139,18 +139,28 @@ async def semantic_search_functions(query: str, top_k: int = 5) -> str: return response - return Tool(semantic_search_functions, name=td.AgenticToolName.SEMANTIC_SEARCH) + return Tool( + semantic_search_functions, + name=td.AgenticToolName.SEMANTIC_SEARCH, + description=td.SEMANTIC_SEARCH, + ) -def create_get_function_source_tool() -> Tool: +def create_get_function_source_tool(ingestor: QueryProtocol) -> Tool: async def get_function_source_by_id(node_id: int) -> str: logger.info(ls.SEMANTIC_TOOL_SOURCE.format(id=node_id)) - source_code = get_function_source_code(node_id) + source_code = await asyncio.to_thread( + get_function_source_code, ingestor, node_id + ) if source_code is None: return cs.MSG_SEMANTIC_SOURCE_UNAVAILABLE.format(id=node_id) return cs.MSG_SEMANTIC_SOURCE_FORMAT.format(id=node_id, code=source_code) - return Tool(get_function_source_by_id, name=td.AgenticToolName.GET_FUNCTION_SOURCE) + return Tool( + get_function_source_by_id, + name=td.AgenticToolName.GET_FUNCTION_SOURCE, + description=td.GET_FUNCTION_SOURCE, + ) diff --git a/codebase_rag/tools/shell_command.py b/codebase_rag/tools/shell_command.py index 2a4d3aff0..45021bf96 100644 --- a/codebase_rag/tools/shell_command.py +++ b/codebase_rag/tools/shell_command.py @@ -7,6 +7,7 @@ import shutil import sys import time +from collections.abc import Callable from pathlib import Path from loguru import logger @@ -58,6 +59,8 @@ def _has_subshell(command: str) -> str | None: class CommandGroup: + __slots__ = ("commands", "operator") + def __init__(self, commands: list[str], operator: str | None = None): self.commands = commands self.operator = operator @@ -152,12 +155,12 @@ def _is_dangerous_rm_path(cmd_parts: list[str], project_root: Path) -> tuple[boo resolved_str = str(resolved) if resolved == resolved.parent: return True, "rm targeting root directory" - parts = resolved.parts - if len(parts) >= 2 and parts[1] in cs.SHELL_SYSTEM_DIRECTORIES: - return True, f"rm targeting system directory: {resolved_str}" try: resolved.relative_to(project_root) except ValueError: + parts = resolved.parts + if len(parts) >= 2 and parts[1] in cs.SHELL_SYSTEM_DIRECTORIES: + return True, f"rm targeting system directory: {resolved_str}" return True, f"rm targeting path outside project: {resolved_str}" return False, "" @@ -194,7 +197,9 @@ def _is_dangerous_command(cmd_parts: list[str], full_segment: str) -> tuple[bool return False, "" -def _validate_segment(segment: str, available_commands: str) -> str | None: +def _validate_segment( + segment: str, available_commands: str, bypass_allowlist: bool = False +) -> str | None: try: cmd_parts = shlex.split(segment) except ValueError: @@ -205,7 +210,7 @@ def _validate_segment(segment: str, available_commands: str) -> str | None: base_cmd = cmd_parts[0] - if base_cmd not in settings.SHELL_COMMAND_ALLOWLIST: + if not bypass_allowlist and base_cmd not in settings.SHELL_COMMAND_ALLOWLIST: suggestion = cs.GREP_SUGGESTION if base_cmd == cs.SHELL_CMD_GREP else "" return te.COMMAND_NOT_ALLOWED.format( cmd=base_cmd, suggestion=suggestion, available=available_commands @@ -263,9 +268,17 @@ def _requires_approval(command: str) -> bool: class ShellCommander: - def __init__(self, project_root: str = ".", timeout: int = 30): + __slots__ = ("project_root", "timeout", "is_yolo") + + def __init__( + self, + project_root: str = ".", + timeout: int = 30, + is_yolo: Callable[[], bool] | None = None, + ): self.project_root = Path(project_root).resolve() self.timeout = timeout + self.is_yolo = is_yolo or (lambda: False) logger.info(ls.SHELL_COMMANDER_INIT.format(root=self.project_root)) async def _execute_pipeline(self, segments: list[str]) -> tuple[int, bytes, bytes]: @@ -352,9 +365,12 @@ async def execute(self, command: str) -> ShellCommandResult: ) available_commands = ", ".join(sorted(settings.SHELL_COMMAND_ALLOWLIST)) + bypass_allowlist = self.is_yolo() for group in groups: for segment in group.commands: - if err_msg := _validate_segment(segment, available_commands): + if err_msg := _validate_segment( + segment, available_commands, bypass_allowlist=bypass_allowlist + ): logger.error(err_msg) return ShellCommandResult( return_code=cs.SHELL_RETURN_CODE_ERROR, @@ -437,7 +453,11 @@ def create_shell_command_tool(shell_commander: ShellCommander) -> Tool: async def run_shell_command( ctx: RunContext[None], command: str ) -> ShellCommandResult: - if _requires_approval(command) and not ctx.tool_call_approved: + if ( + not shell_commander.is_yolo() + and _requires_approval(command) + and not ctx.tool_call_approved + ): raise ApprovalRequired(metadata={"command": command}) return await shell_commander.execute(command) diff --git a/codebase_rag/tools/tool_descriptions.py b/codebase_rag/tools/tool_descriptions.py index 008c60bef..df1d99812 100644 --- a/codebase_rag/tools/tool_descriptions.py +++ b/codebase_rag/tools/tool_descriptions.py @@ -11,17 +11,12 @@ class AgenticToolName(StrEnum): CREATE_FILE = "create_file" REPLACE_CODE = "replace_code" LIST_DIRECTORY = "list_directory" - ANALYZE_DOCUMENT = "analyze_document" EXECUTE_SHELL = "execute_shell" SEMANTIC_SEARCH = "semantic_search" GET_FUNCTION_SOURCE = "get_function_source" GET_CODE_SNIPPET = "get_code_snippet" -ANALYZE_DOCUMENT = ( - "Analyzes documents (PDFs, images) to answer questions about their content." -) - CODEBASE_QUERY = ( "Query the codebase knowledge graph using natural language questions. " "Ask in plain English about classes, functions, methods, dependencies, or code structure. " @@ -60,7 +55,7 @@ class AgenticToolName(StrEnum): FILE_READER = ( "Reads the content of text-based files. " - "For documents like PDFs or images, use the 'analyze_document' tool instead." + "Images and PDFs the user references are attached inline; read them directly." ) FILE_EDITOR = ( @@ -88,13 +83,19 @@ class AgenticToolName(StrEnum): ) MCP_INDEX_REPOSITORY = ( + "WARNING: Clears all data for the current project including its embeddings. " "Parse and ingest the repository into the Memgraph knowledge graph. " - "This builds a comprehensive graph of functions, classes, dependencies, and relationships. " - "Note: This preserves other projects - only the current project is re-indexed." + "Use update_repository for incremental updates. Only use when explicitly requested." +) + +MCP_UPDATE_REPOSITORY = ( + "Update the repository in the Memgraph knowledge graph without clearing existing data. " + "Use this for incremental updates." ) MCP_QUERY_CODE_GRAPH = ( "Query the codebase knowledge graph using natural language. " + "Use semantic_search unless you know the exact names of classes/functions you are searching for. " "Ask questions like 'What functions call UserService.create_user?' or " "'Show me all classes that implement the Repository interface'." ) @@ -117,6 +118,12 @@ class AgenticToolName(StrEnum): MCP_LIST_DIRECTORY = "List contents of a directory in the project." +MCP_SEMANTIC_SEARCH = ( + "Performs a semantic search for functions based on a natural language query " + "describing their purpose, returning a list of potential matches with similarity scores. " + "Requires the 'semantic' extra to be installed." +) + MCP_PARAM_PROJECT_NAME = "Name of the project to delete (e.g., 'my-project')" MCP_PARAM_CONFIRM = "Must be true to confirm the wipe operation" MCP_PARAM_NATURAL_LANGUAGE_QUERY = "Your question in plain English about the codebase" @@ -130,6 +137,16 @@ class AgenticToolName(StrEnum): MCP_PARAM_LIMIT = "Maximum number of lines to read (optional)" MCP_PARAM_CONTENT = "Content to write to the file" MCP_PARAM_DIRECTORY_PATH = "Relative path to directory from project root (default: '.')" +MCP_PARAM_TOP_K = "Max number of results to return (optional, default: 5)" +MCP_PARAM_QUESTION = ( + "A question about the codebase, architecture, functionality, or code relationships" +) + +MCP_ASK_AGENT = ( + "Ask the Code Graph RAG agent a question about the codebase. " + "Uses the full RAG pipeline to analyze the code graph and provide a detailed answer. " + "Use this for general questions about architecture, functionality, and code relationships." +) MCP_TOOLS: dict[MCPToolName, str] = { @@ -137,12 +154,15 @@ class AgenticToolName(StrEnum): MCPToolName.DELETE_PROJECT: MCP_DELETE_PROJECT, MCPToolName.WIPE_DATABASE: MCP_WIPE_DATABASE, MCPToolName.INDEX_REPOSITORY: MCP_INDEX_REPOSITORY, + MCPToolName.UPDATE_REPOSITORY: MCP_UPDATE_REPOSITORY, MCPToolName.QUERY_CODE_GRAPH: MCP_QUERY_CODE_GRAPH, MCPToolName.GET_CODE_SNIPPET: MCP_GET_CODE_SNIPPET, MCPToolName.SURGICAL_REPLACE_CODE: MCP_SURGICAL_REPLACE_CODE, MCPToolName.READ_FILE: MCP_READ_FILE, MCPToolName.WRITE_FILE: MCP_WRITE_FILE, MCPToolName.LIST_DIRECTORY: MCP_LIST_DIRECTORY, + MCPToolName.SEMANTIC_SEARCH: MCP_SEMANTIC_SEARCH, + MCPToolName.ASK_AGENT: MCP_ASK_AGENT, } AGENTIC_TOOLS: dict[AgenticToolName, str] = { @@ -151,7 +171,6 @@ class AgenticToolName(StrEnum): AgenticToolName.CREATE_FILE: FILE_WRITER, AgenticToolName.REPLACE_CODE: FILE_EDITOR, AgenticToolName.LIST_DIRECTORY: DIRECTORY_LISTER, - AgenticToolName.ANALYZE_DOCUMENT: ANALYZE_DOCUMENT, AgenticToolName.EXECUTE_SHELL: SHELL_COMMAND, AgenticToolName.SEMANTIC_SEARCH: SEMANTIC_SEARCH, AgenticToolName.GET_FUNCTION_SOURCE: GET_FUNCTION_SOURCE, diff --git a/codebase_rag/types_defs.py b/codebase_rag/types_defs.py index fb293147b..07eab14ca 100644 --- a/codebase_rag/types_defs.py +++ b/codebase_rag/types_defs.py @@ -95,6 +95,30 @@ def find_with_prefix(self, prefix: str) -> list[tuple[QualifiedName, NodeType]]: def find_ending_with(self, suffix: str) -> list[QualifiedName]: ... + def register_unique_qn( + self, natural_qn: QualifiedName, start_line: int + ) -> QualifiedName: ... + + def variants(self, qualified_name: QualifiedName) -> list[QualifiedName]: ... + + def mark_property(self, qualified_name: QualifiedName) -> None: ... + + def is_property(self, qualified_name: QualifiedName) -> bool: ... + + def property_names(self) -> set[str]: ... + + def mark_abstract(self, qualified_name: QualifiedName) -> None: ... + + def is_abstract(self, qualified_name: QualifiedName) -> bool: ... + + def mark_callable_params( + self, qualified_name: QualifiedName, params: dict[str, int] + ) -> None: ... + + def callable_params( + self, qualified_name: QualifiedName + ) -> dict[str, int] | None: ... + class ASTCacheProtocol(Protocol): def __setitem__(self, key: Path, value: tuple[Node, SupportedLanguage]) -> None: ... @@ -186,6 +210,11 @@ class GraphSummary(TypedDict): metadata: GraphMetadata +class QueryJsonOutput(TypedDict): + query: str + response: str + + class EmbeddingQueryResult(TypedDict): node_id: int qualified_name: str @@ -256,7 +285,13 @@ class AgentLoopUI(NamedTuple): panel_title: str -ORANGE_STYLE = Style.from_dict({"": "#ff8c00"}) +ORANGE_STYLE = Style.from_dict( + { + "": "#ff8c00", + "bottom-toolbar": "noreverse fg:#888888", + "bottom-toolbar.text": "noreverse fg:#888888", + } +) OPTIMIZATION_LOOP_UI = AgentLoopUI( status_message="[bold green]Agent is analyzing codebase... (Press Ctrl+C to cancel)[/bold green]", @@ -285,7 +320,6 @@ class LanguageImport(NamedTuple): class ToolNames(NamedTuple): query_graph: str read_file: str - analyze_document: str semantic_search: str create_file: str edit_file: str @@ -350,7 +384,7 @@ class FunctionNodeProps(TypedDict, total=False): class MCPInputSchemaProperty(TypedDict, total=False): type: str description: str - default: str + default: str | int MCPInputSchemaProperties = dict[str, MCPInputSchemaProperty] @@ -387,6 +421,14 @@ class CodeSnippetResultDict(TypedDict, total=False): error: str +class DeadCodeRow(TypedDict): + label: str + name: str + qualified_name: str + start_line: int + end_line: int + + class ListProjectsSuccessResult(TypedDict): projects: list[str] count: int @@ -439,36 +481,47 @@ class RelationshipSchema(NamedTuple): NODE_SCHEMAS: tuple[NodeSchema, ...] = ( NodeSchema(NodeLabel.PROJECT, "{name: string}"), NodeSchema( - NodeLabel.PACKAGE, "{qualified_name: string, name: string, path: string}" + NodeLabel.PACKAGE, + "{qualified_name: string, name: string, path: string, absolute_path: string}", + ), + NodeSchema(NodeLabel.FOLDER, "{path: string, name: string, absolute_path: string}"), + NodeSchema( + NodeLabel.FILE, + "{path: string, name: string, extension: string, absolute_path: string}", ), - NodeSchema(NodeLabel.FOLDER, "{path: string, name: string}"), - NodeSchema(NodeLabel.FILE, "{path: string, name: string, extension: string}"), NodeSchema( - NodeLabel.MODULE, "{qualified_name: string, name: string, path: string}" + NodeLabel.MODULE, + "{qualified_name: string, name: string, path: string, absolute_path: string}", ), NodeSchema( NodeLabel.CLASS, - "{qualified_name: string, name: string, decorators: list[string]}", + "{qualified_name: string, name: string, decorators: list[string], path: string, absolute_path: string}", ), NodeSchema( NodeLabel.FUNCTION, - "{qualified_name: string, name: string, decorators: list[string]}", + "{qualified_name: string, name: string, decorators: list[string], path: string, absolute_path: string}", ), NodeSchema( NodeLabel.METHOD, - "{qualified_name: string, name: string, decorators: list[string]}", + "{qualified_name: string, name: string, decorators: list[string], path: string, absolute_path: string}", + ), + NodeSchema( + NodeLabel.INTERFACE, + "{qualified_name: string, name: string, path: string, absolute_path: string}", + ), + NodeSchema( + NodeLabel.ENUM, + "{qualified_name: string, name: string, path: string, absolute_path: string}", ), - NodeSchema(NodeLabel.INTERFACE, "{qualified_name: string, name: string}"), - NodeSchema(NodeLabel.ENUM, "{qualified_name: string, name: string}"), NodeSchema(NodeLabel.TYPE, "{qualified_name: string, name: string}"), NodeSchema(NodeLabel.UNION, "{qualified_name: string, name: string}"), NodeSchema( NodeLabel.MODULE_INTERFACE, - "{qualified_name: string, name: string, path: string}", + "{qualified_name: string, name: string, path: string, absolute_path: string}", ), NodeSchema( NodeLabel.MODULE_IMPLEMENTATION, - "{qualified_name: string, name: string, path: string, implements_module: string}", + "{qualified_name: string, name: string, path: string, absolute_path: string, implements_module: string}", ), NodeSchema(NodeLabel.EXTERNAL_PACKAGE, "{name: string, version_spec: string}"), ) @@ -555,4 +608,9 @@ class RelationshipSchema(NamedTuple): RelationshipType.CALLS, (NodeLabel.FUNCTION, NodeLabel.METHOD), ), + RelationshipSchema( + (NodeLabel.MODULE, NodeLabel.FUNCTION, NodeLabel.METHOD), + RelationshipType.INSTANTIATES, + (NodeLabel.CLASS,), + ), ) diff --git a/codebase_rag/unixcoder.py b/codebase_rag/unixcoder.py index 6738fb677..e0d235c85 100644 --- a/codebase_rag/unixcoder.py +++ b/codebase_rag/unixcoder.py @@ -98,9 +98,8 @@ def forward(self, source_ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor] pad_id = self.config.pad_token_id assert pad_id is not None mask = source_ids.ne(pad_id) - token_embeddings = self.model( - source_ids, attention_mask=mask.unsqueeze(1) * mask.unsqueeze(2) - )[0] + attention_mask = (mask.unsqueeze(1) * mask.unsqueeze(2)).unsqueeze(1) + token_embeddings = self.model(source_ids, attention_mask=attention_mask)[0] sentence_embeddings = (token_embeddings * mask.unsqueeze(-1)).sum(1) / mask.sum( -1 ).unsqueeze(-1) @@ -190,6 +189,17 @@ def generate( class Beam: + __slots__ = ( + "_eos", + "device", + "eosTop", + "finished", + "nextYs", + "prevKs", + "scores", + "size", + ) + def __init__(self, size: int, eos: int, device: torch.device) -> None: self.size = size self.device = device diff --git a/codebase_rag/utils/fqn_resolver.py b/codebase_rag/utils/fqn_resolver.py index 470c6cc8f..ba3fe9dcd 100644 --- a/codebase_rag/utils/fqn_resolver.py +++ b/codebase_rag/utils/fqn_resolver.py @@ -40,7 +40,7 @@ def resolve_fqn_from_ast( return SEPARATOR_DOT.join(full_parts) except Exception as e: - logger.debug(ls.FQN_RESOLVE_FAILED.format(path=file_path, error=e)) + logger.debug(ls.FQN_RESOLVE_FAILED, path=file_path, error=e) return None @@ -73,7 +73,7 @@ def walk(node: Node) -> str | None: return walk(root_node) except Exception as e: - logger.debug(ls.FQN_FIND_FAILED.format(fqn=target_fqn, path=file_path, error=e)) + logger.debug(ls.FQN_FIND_FAILED, fqn=target_fqn, path=file_path, error=e) return None @@ -102,6 +102,6 @@ def walk(node: Node) -> None: walk(root_node) except Exception as e: - logger.debug(ls.FQN_EXTRACT_FAILED.format(path=file_path, error=e)) + logger.debug(ls.FQN_EXTRACT_FAILED, path=file_path, error=e) return functions diff --git a/codebase_rag/utils/path_utils.py b/codebase_rag/utils/path_utils.py index 5c9bbf5b5..fc5a4258d 100644 --- a/codebase_rag/utils/path_utils.py +++ b/codebase_rag/utils/path_utils.py @@ -1,19 +1,79 @@ +import hashlib +import re +from functools import lru_cache from pathlib import Path from .. import constants as cs +_PROJECT_NAME_INVALID_CHARS = re.compile(r"[^A-Za-z0-9_-]+") +_PROJECT_NAME_DIGEST_LEN = 8 +_PROJECT_NAME_FALLBACK_BASE = "repo" + + +def derive_project_name(repo_path: Path) -> str: + resolved = repo_path.resolve() + digest = hashlib.sha256(str(resolved).encode("utf-8")).hexdigest()[ + :_PROJECT_NAME_DIGEST_LEN + ] + base = _PROJECT_NAME_INVALID_CHARS.sub("_", resolved.name).strip("_") + if not base: + base = _PROJECT_NAME_FALLBACK_BASE + return f"{base}__{digest}" + + +def resolve_repo_path(repo_path: str | None, target_default: str) -> Path: + if repo_path: + return Path(repo_path).resolve() + if target_default and target_default != ".": + return Path(target_default).resolve() + return Path.cwd().resolve() + + +@lru_cache(maxsize=4096) +def cached_relative_path(file_path: Path, repo_path: Path) -> Path: + return file_path.relative_to(repo_path) + + +@lru_cache(maxsize=4096) +def cached_resolve_posix(file_path: Path) -> str: + return file_path.resolve().as_posix() + def should_skip_path( path: Path, repo_path: Path, exclude_paths: frozenset[str] | None = None, unignore_paths: frozenset[str] | None = None, + is_file: bool | None = None, ) -> bool: - if path.is_file() and path.suffix in cs.IGNORE_SUFFIXES: + _is_file = path.is_file() if is_file is None else is_file + if _is_file and path.suffix in cs.IGNORE_SUFFIXES: return True - rel_path = path.relative_to(repo_path) + rel_path = cached_relative_path(path, repo_path) rel_path_str = rel_path.as_posix() - dir_parts = rel_path.parent.parts if path.is_file() else rel_path.parts + dir_parts = rel_path.parent.parts if _is_file else rel_path.parts + if exclude_paths and ( + not exclude_paths.isdisjoint(dir_parts) + or rel_path_str in exclude_paths + or any(rel_path_str.startswith(f"{p}/") for p in exclude_paths) + ): + return True + if unignore_paths and any( + rel_path_str == p or rel_path_str.startswith(f"{p}/") for p in unignore_paths + ): + return False + return not cs.IGNORE_PATTERNS.isdisjoint(dir_parts) + + +def should_skip_rel_file( + rel_path_str: str, + dir_parts: tuple[str, ...], + suffix: str, + exclude_paths: frozenset[str] | None = None, + unignore_paths: frozenset[str] | None = None, +) -> bool: + if suffix in cs.IGNORE_SUFFIXES: + return True if exclude_paths and ( not exclude_paths.isdisjoint(dir_parts) or rel_path_str in exclude_paths diff --git a/codebase_rag/utils/rich_markdown.py b/codebase_rag/utils/rich_markdown.py new file mode 100644 index 000000000..12d4cf4fb --- /dev/null +++ b/codebase_rag/utils/rich_markdown.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import ClassVar + +from rich import box +from rich.console import Console, ConsoleOptions, RenderResult +from rich.markdown import Heading, Markdown, MarkdownElement +from rich.panel import Panel +from rich.text import Text + + +class LeftAlignedHeading(Heading): + def __rich_console__( + self, console: Console, options: ConsoleOptions + ) -> RenderResult: + text = self.text + text.justify = "left" + if self.tag == "h1": + yield Panel(text, box=box.HEAVY, style="markdown.h1.border") + else: + if self.tag == "h2": + yield Text("") + yield text + + +class LeftAlignedMarkdown(Markdown): + elements: ClassVar[dict[str, type[MarkdownElement]]] = { + **Markdown.elements, + "heading_open": LeftAlignedHeading, + } diff --git a/codebase_rag/utils/source_extraction.py b/codebase_rag/utils/source_extraction.py index 548243a5f..20969db56 100644 --- a/codebase_rag/utils/source_extraction.py +++ b/codebase_rag/utils/source_extraction.py @@ -21,22 +21,28 @@ def extract_source_lines( return None try: - with open(file_path, encoding=encoding) as f: - lines = f.readlines() - - if start_line > len(lines) or end_line > len(lines): - logger.warning( - ls.SOURCE_RANGE_EXCEEDS.format( - start=start_line, - end=end_line, - length=len(lines), - path=file_path, - ) + raw_bytes = file_path.read_bytes() + text = raw_bytes.decode(encoding) + lines = text.splitlines(keepends=True) + + if not lines: + return None + + if start_line > len(lines) or end_line > len(lines): + logger.warning( + ls.SOURCE_RANGE_EXCEEDS.format( + start=start_line, + end=end_line, + length=len(lines), + path=file_path, ) + ) + end_line = min(end_line, len(lines)) + if start_line > len(lines): return None - extracted_lines = lines[start_line - 1 : end_line] - return "".join(extracted_lines).strip() + extracted_lines = lines[start_line - 1 : end_line] + return "".join(extracted_lines).strip() except Exception as e: logger.warning(ls.SOURCE_EXTRACT_FAILED.format(path=file_path, error=e)) @@ -56,7 +62,7 @@ def extract_source_with_fallback( if ast_result := ast_extractor(qualified_name, file_path): return str(ast_result) except Exception as e: - logger.debug(ls.SOURCE_AST_FAILED.format(name=qualified_name, error=e)) + logger.debug(ls.SOURCE_AST_FAILED, name=qualified_name, error=e) return extract_source_lines(file_path, start_line, end_line, encoding) diff --git a/codebase_rag/utils/token_utils.py b/codebase_rag/utils/token_utils.py new file mode 100644 index 000000000..031262d06 --- /dev/null +++ b/codebase_rag/utils/token_utils.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import json +from functools import cache + +import tiktoken +from loguru import logger + +from .. import constants as cs +from .. import logs as ls +from ..types_defs import ResultRow + + +@cache +def _get_encoding() -> tiktoken.Encoding: + return tiktoken.get_encoding(cs.TIKTOKEN_ENCODING) + + +def count_tokens(text: str) -> int: + return len(_get_encoding().encode(text)) + + +def truncate_results_by_tokens( + results: list[ResultRow], + max_tokens: int, + original_total: int | None = None, +) -> tuple[list[ResultRow], int, bool]: + if not results: + return results, 0, False + + kept: list[ResultRow] = [] + total_tokens = 0 + total_for_log = original_total if original_total is not None else len(results) + + for row in results: + row_text = json.dumps(row, default=str) + row_tokens = count_tokens(row_text) + + if total_tokens + row_tokens > max_tokens and kept: + logger.warning( + ls.QUERY_RESULTS_TRUNCATED.format( + kept=len(kept), + total=total_for_log, + tokens=total_tokens, + max_tokens=max_tokens, + ) + ) + return kept, total_tokens, True + + kept.append(row) + total_tokens += row_tokens + + return kept, total_tokens, False diff --git a/codebase_rag/vector_store.py b/codebase_rag/vector_store.py index 6580b43c2..82d0d19c5 100644 --- a/codebase_rag/vector_store.py +++ b/codebase_rag/vector_store.py @@ -1,3 +1,6 @@ +import time +from collections.abc import Sequence + from loguru import logger from . import logs as ls @@ -5,16 +8,35 @@ from .constants import PAYLOAD_NODE_ID, PAYLOAD_QUALIFIED_NAME from .utils.dependencies import has_qdrant_client +_RETRIEVE_BATCH_SIZE = 1000 + if has_qdrant_client(): from qdrant_client import QdrantClient from qdrant_client.models import Distance, PointStruct, VectorParams _CLIENT: QdrantClient | None = None + def close_qdrant_client() -> None: + global _CLIENT + if _CLIENT is not None: + _CLIENT.close() + _CLIENT = None + def get_qdrant_client() -> QdrantClient: global _CLIENT if _CLIENT is None: - _CLIENT = QdrantClient(path=settings.QDRANT_DB_PATH) + if settings.QDRANT_URL: + _CLIENT = QdrantClient(url=settings.QDRANT_URL) + else: + try: + _CLIENT = QdrantClient(path=settings.QDRANT_DB_PATH) + except Exception as e: + logger.error( + ls.QDRANT_LOCK_ERROR.format( + path=settings.QDRANT_DB_PATH, error=e + ) + ) + raise if not _CLIENT.collection_exists(settings.QDRANT_COLLECTION_NAME): _CLIENT.create_collection( collection_name=settings.QDRANT_COLLECTION_NAME, @@ -24,28 +46,92 @@ def get_qdrant_client() -> QdrantClient: ) return _CLIENT + def _upsert_with_retry(points: list[PointStruct]) -> None: + client = get_qdrant_client() + max_attempts = settings.QDRANT_UPSERT_RETRIES + base_delay = settings.QDRANT_RETRY_BASE_DELAY + for attempt in range(1, max_attempts + 1): + try: + client.upsert( + collection_name=settings.QDRANT_COLLECTION_NAME, + points=points, + ) + return + except Exception as e: + if attempt == max_attempts: + raise + delay = base_delay * (2 ** (attempt - 1)) + logger.warning( + ls.EMBEDDING_STORE_RETRY.format( + attempt=attempt, max_attempts=max_attempts, delay=delay, error=e + ) + ) + time.sleep(delay) + def store_embedding( node_id: int, embedding: list[float], qualified_name: str ) -> None: + store_embedding_batch([(node_id, embedding, qualified_name)]) + + def store_embedding_batch( + points: Sequence[tuple[int, list[float], str]], + ) -> int: + if not points: + return 0 + point_structs = [ + PointStruct( + id=node_id, + vector=embedding, + payload={ + PAYLOAD_NODE_ID: node_id, + PAYLOAD_QUALIFIED_NAME: qualified_name, + }, + ) + for node_id, embedding, qualified_name in points + ] try: + _upsert_with_retry(point_structs) + logger.debug(ls.EMBEDDING_BATCH_STORED.format(count=len(point_structs))) + return len(point_structs) + except Exception as e: + logger.warning(ls.EMBEDDING_BATCH_FAILED.format(error=e)) + return 0 + + def delete_project_embeddings(project_name: str, node_ids: Sequence[int]) -> None: + if not node_ids: + return + try: + logger.info( + ls.QDRANT_DELETE_PROJECT.format( + count=len(node_ids), project=project_name + ) + ) client = get_qdrant_client() - client.upsert( + client.delete( collection_name=settings.QDRANT_COLLECTION_NAME, - points=[ - PointStruct( - id=node_id, - vector=embedding, - payload={ - PAYLOAD_NODE_ID: node_id, - PAYLOAD_QUALIFIED_NAME: qualified_name, - }, - ) - ], + points_selector=list(node_ids), ) + logger.info(ls.QDRANT_DELETE_PROJECT_DONE.format(project=project_name)) except Exception as e: logger.warning( - ls.EMBEDDING_STORE_FAILED.format(name=qualified_name, error=e) + ls.QDRANT_DELETE_PROJECT_FAILED.format(project=project_name, error=e) + ) + + def verify_stored_ids(expected_ids: set[int]) -> set[int]: + if not expected_ids: + return set() + client = get_qdrant_client() + found_ids: set[int] = set() + ids_list = list(expected_ids) + for i in range(0, len(ids_list), _RETRIEVE_BATCH_SIZE): + points = client.retrieve( + collection_name=settings.QDRANT_COLLECTION_NAME, + ids=ids_list[i : i + _RETRIEVE_BATCH_SIZE], + with_payload=False, + with_vectors=False, ) + found_ids.update(p.id for p in points if isinstance(p.id, int)) + return found_ids def search_embeddings( query_embedding: list[float], top_k: int | None = None @@ -69,11 +155,25 @@ def search_embeddings( else: + def close_qdrant_client() -> None: + pass + def store_embedding( node_id: int, embedding: list[float], qualified_name: str ) -> None: pass + def store_embedding_batch( + points: Sequence[tuple[int, list[float], str]], + ) -> int: + return 0 + + def delete_project_embeddings(project_name: str, node_ids: Sequence[int]) -> None: + pass + + def verify_stored_ids(expected_ids: set[int]) -> set[int]: + return set() + def search_embeddings( query_embedding: list[float], top_k: int | None = None ) -> list[tuple[int, float]]: diff --git a/codebase_rag/workspaces/__init__.py b/codebase_rag/workspaces/__init__.py new file mode 100644 index 000000000..e93eec119 --- /dev/null +++ b/codebase_rag/workspaces/__init__.py @@ -0,0 +1,28 @@ +from .models import WorkspaceConfig, WorkspaceRepo +from .storage import ( + WorkspaceError, + add_repo, + create_workspace, + delete_workspace, + list_workspaces, + load_workspace, + remove_repo, + save_workspace, + workspace_path, + workspaces_dir, +) + +__all__ = [ + "WorkspaceConfig", + "WorkspaceError", + "WorkspaceRepo", + "add_repo", + "create_workspace", + "delete_workspace", + "list_workspaces", + "load_workspace", + "remove_repo", + "save_workspace", + "workspace_path", + "workspaces_dir", +] diff --git a/codebase_rag/workspaces/cli.py b/codebase_rag/workspaces/cli.py new file mode 100644 index 000000000..1726744fb --- /dev/null +++ b/codebase_rag/workspaces/cli.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import sys + +import click +from loguru import logger + +from .. import cli_help as ch +from . import constants as wcs +from . import storage as st +from .storage import WorkspaceError + + +@click.group(help=ch.CMD_WORKSPACE_GROUP) +def cli() -> None: + pass + + +@cli.command("list", help=ch.CMD_WORKSPACE_LIST) +def list_cmd() -> None: + names = st.list_workspaces() + if not names: + click.echo(ch.MSG_NO_WORKSPACES) + return + for name in names: + click.echo(name) + + +@cli.command("create", help=ch.CMD_WORKSPACE_CREATE) +@click.argument("name") +@click.option("--description", "-d", default="", help=ch.HELP_WORKSPACE_DESCRIPTION) +@click.option("--force", is_flag=True, help=ch.HELP_WORKSPACE_FORCE) +def create_cmd(name: str, description: str, force: bool) -> None: + try: + _, path = st.create_workspace(name, description=description, overwrite=force) + except WorkspaceError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + click.echo(wcs.MSG_WORKSPACE_CREATED.format(name=name, path=path)) + + +@cli.command("delete", help=ch.CMD_WORKSPACE_DELETE) +@click.argument("name") +def delete_cmd(name: str) -> None: + try: + path = st.delete_workspace(name) + except WorkspaceError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + click.echo(wcs.MSG_WORKSPACE_DELETED.format(name=name, path=path)) + + +@cli.command("show", help=ch.CMD_WORKSPACE_SHOW) +@click.argument("name") +def show_cmd(name: str) -> None: + try: + config = st.load_workspace(name) + except WorkspaceError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + click.echo(f"name: {config.name}") + if config.description: + click.echo(f"description: {config.description}") + click.echo(f"repos: {len(config.repos)}") + for repo in config.repos: + click.echo(f" - {repo.path} ({repo.project_name})") + + +@cli.command("add-repo", help=ch.CMD_WORKSPACE_ADD_REPO) +@click.argument("name") +@click.argument("repo_path") +@click.option( + "--project-name", "-p", default=None, help=ch.HELP_WORKSPACE_REPO_PROJECT_NAME +) +def add_repo_cmd(name: str, repo_path: str, project_name: str | None) -> None: + try: + _, repo = st.add_repo(name, repo_path, project_name=project_name) + except WorkspaceError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + click.echo( + wcs.MSG_WORKSPACE_ADDED_REPO.format( + path=repo.path, project_name=repo.project_name + ) + ) + + +@cli.command("remove-repo", help=ch.CMD_WORKSPACE_REMOVE_REPO) +@click.argument("name") +@click.argument("repo_path") +def remove_repo_cmd(name: str, repo_path: str) -> None: + try: + _, repo = st.remove_repo(name, repo_path) + except WorkspaceError as e: + logger.error(str(e)) + click.secho(str(e), fg="red", err=True) + sys.exit(1) + click.echo(wcs.MSG_WORKSPACE_REMOVED_REPO.format(path=repo.path)) diff --git a/codebase_rag/workspaces/constants.py b/codebase_rag/workspaces/constants.py new file mode 100644 index 000000000..2bd69da47 --- /dev/null +++ b/codebase_rag/workspaces/constants.py @@ -0,0 +1,24 @@ +WORKSPACES_SUBDIR = "workspaces" +WORKSPACE_EXTENSION = ".toml" + +ERR_WORKSPACE_NOT_FOUND = "Workspace '{name}' not found at {path}." +ERR_WORKSPACE_ALREADY_EXISTS = "Workspace '{name}' already exists at {path}." +ERR_WORKSPACE_INVALID_TOML = "Workspace '{name}' has invalid TOML: {error}" +ERR_WORKSPACE_INVALID_SCHEMA = "Workspace '{name}' schema invalid: {error}" +ERR_WORKSPACE_REPO_PATH_MISSING = ( + "Repo path '{path}' does not exist on disk. Aborting workspace operation." +) +ERR_WORKSPACE_REPO_DUPLICATE = ( + "Repo with path '{path}' is already in workspace '{name}'." +) +ERR_WORKSPACE_REPO_NOT_IN_WORKSPACE = ( + "No repo with path '{path}' in workspace '{name}'." +) + +MSG_WORKSPACE_CREATED = "Created workspace '{name}' at {path}" +MSG_WORKSPACE_DELETED = "Deleted workspace '{name}' at {path}" +MSG_WORKSPACE_ADDED_REPO = "Added repo '{path}' (project: {project_name})" +MSG_WORKSPACE_REMOVED_REPO = "Removed repo '{path}'" +MSG_WORKSPACE_SYNCING = "Syncing workspace '{name}' ({count} repo(s))" +MSG_WORKSPACE_SYNC_REPO = "[{idx}/{total}] Syncing {path} as project '{project_name}'" +MSG_WORKSPACE_SYNC_DONE = "Workspace '{name}' sync complete." diff --git a/codebase_rag/workspaces/models.py b/codebase_rag/workspaces/models.py new file mode 100644 index 000000000..184cc3a67 --- /dev/null +++ b/codebase_rag/workspaces/models.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from pathlib import Path + +from pydantic import BaseModel, Field + + +class WorkspaceRepo(BaseModel): + path: str + project_name: str + + def repo_path(self) -> Path: + return Path(self.path).expanduser().resolve() + + +class WorkspaceConfig(BaseModel): + name: str + description: str = "" + repos: list[WorkspaceRepo] = Field(default_factory=list) + + def project_names(self) -> list[str]: + return [r.project_name for r in self.repos] + + def find_repo(self, path: str) -> WorkspaceRepo | None: + target = Path(path).expanduser().resolve() + for repo in self.repos: + if repo.repo_path() == target: + return repo + return None diff --git a/codebase_rag/workspaces/storage.py b/codebase_rag/workspaces/storage.py new file mode 100644 index 000000000..7e04380d0 --- /dev/null +++ b/codebase_rag/workspaces/storage.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import tomllib +from pathlib import Path + +import toml +from pydantic import ValidationError + +from ..config import settings +from ..utils.path_utils import derive_project_name +from . import constants as cs +from .models import WorkspaceConfig, WorkspaceRepo + + +class WorkspaceError(RuntimeError): + pass + + +def workspaces_dir(home: Path | None = None) -> Path: + base = (home or settings.CGR_HOME).expanduser() + return base / cs.WORKSPACES_SUBDIR + + +def workspace_path(name: str, home: Path | None = None) -> Path: + return workspaces_dir(home) / f"{name}{cs.WORKSPACE_EXTENSION}" + + +def list_workspaces(home: Path | None = None) -> list[str]: + root = workspaces_dir(home) + if not root.exists(): + return [] + return sorted(p.stem for p in root.glob(f"*{cs.WORKSPACE_EXTENSION}")) + + +def load_workspace(name: str, home: Path | None = None) -> WorkspaceConfig: + path = workspace_path(name, home) + if not path.exists(): + raise WorkspaceError(cs.ERR_WORKSPACE_NOT_FOUND.format(name=name, path=path)) + try: + with path.open("rb") as f: + data = tomllib.load(f) + except tomllib.TOMLDecodeError as e: + raise WorkspaceError( + cs.ERR_WORKSPACE_INVALID_TOML.format(name=name, error=e) + ) from e + body = data.get("workspace", data) + try: + return WorkspaceConfig.model_validate(body) + except ValidationError as e: + raise WorkspaceError( + cs.ERR_WORKSPACE_INVALID_SCHEMA.format(name=name, error=e) + ) from e + + +def save_workspace(config: WorkspaceConfig, home: Path | None = None) -> Path: + path = workspace_path(config.name, home) + path.parent.mkdir(parents=True, exist_ok=True) + payload = {"workspace": config.model_dump()} + with path.open("w", encoding="utf-8") as f: + toml.dump(payload, f) + return path + + +def create_workspace( + name: str, + description: str = "", + repos: list[WorkspaceRepo] | None = None, + home: Path | None = None, + overwrite: bool = False, +) -> tuple[WorkspaceConfig, Path]: + path = workspace_path(name, home) + if path.exists() and not overwrite: + raise WorkspaceError( + cs.ERR_WORKSPACE_ALREADY_EXISTS.format(name=name, path=path) + ) + config = WorkspaceConfig(name=name, description=description, repos=repos or []) + saved = save_workspace(config, home=home) + return config, saved + + +def delete_workspace(name: str, home: Path | None = None) -> Path: + path = workspace_path(name, home) + if not path.exists(): + raise WorkspaceError(cs.ERR_WORKSPACE_NOT_FOUND.format(name=name, path=path)) + path.unlink() + return path + + +def add_repo( + name: str, + repo_path: str, + project_name: str | None = None, + home: Path | None = None, +) -> tuple[WorkspaceConfig, WorkspaceRepo]: + resolved = Path(repo_path).expanduser().resolve() + if not resolved.exists(): + raise WorkspaceError(cs.ERR_WORKSPACE_REPO_PATH_MISSING.format(path=resolved)) + config = load_workspace(name, home=home) + if config.find_repo(str(resolved)) is not None: + raise WorkspaceError( + cs.ERR_WORKSPACE_REPO_DUPLICATE.format(path=resolved, name=name) + ) + repo = WorkspaceRepo( + path=str(resolved), + project_name=(project_name or derive_project_name(resolved)), + ) + config.repos.append(repo) + save_workspace(config, home=home) + return config, repo + + +def remove_repo( + name: str, repo_path: str, home: Path | None = None +) -> tuple[WorkspaceConfig, WorkspaceRepo]: + config = load_workspace(name, home=home) + found = config.find_repo(repo_path) + if found is None: + raise WorkspaceError( + cs.ERR_WORKSPACE_REPO_NOT_IN_WORKSPACE.format( + path=Path(repo_path).expanduser().resolve(), name=name + ) + ) + config.repos = [r for r in config.repos if r is not found] + save_workspace(config, home=home) + return config, found diff --git a/codec/schema.proto b/codec/schema.proto index fcd28e6c2..06832c97f 100644 --- a/codec/schema.proto +++ b/codec/schema.proto @@ -102,6 +102,10 @@ message GraphCodeIndex { ExternalPackage external_package = 9; ModuleImplementation module_implementation = 10; ModuleInterface module_interface = 11; + Interface interface_node = 12; + Enum enum_node = 13; + Type type_node = 14; + Union union_node = 15; } } @@ -123,6 +127,8 @@ message GraphCodeIndex { DEPENDS_ON_EXTERNAL = 11; IMPLEMENTS_MODULE = 12; IMPLEMENTS = 13; + EXPORTS = 14; + EXPORTS_MODULE = 15; } RelationshipType type = 1; @@ -232,3 +238,35 @@ message GraphCodeIndex { repeated string decorators = 6; bool is_exported = 7; } + + message Interface { + // Primary Key + string qualified_name = 1; + + string name = 2; + string path = 3; + string absolute_path = 4; + } + + message Enum { + // Primary Key + string qualified_name = 1; + + string name = 2; + string path = 3; + string absolute_path = 4; + } + + message Type { + // Primary Key + string qualified_name = 1; + + string name = 2; + } + + message Union { + // Primary Key + string qualified_name = 1; + + string name = 2; + } diff --git a/codec/schema_pb2.py b/codec/schema_pb2.py index 5dd666f71..fcae069dd 100644 --- a/codec/schema_pb2.py +++ b/codec/schema_pb2.py @@ -1,61 +1,62 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# NO CHECKED-IN PROTOBUF GENCODE # source: codec/schema.proto -# Protobuf Python Version: 6.33.1 """Generated protocol buffer code.""" - +from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import runtime_version as _runtime_version -from google.protobuf import struct_pb2 as _struct_pb2 from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder - -_runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 6, 33, 1, "", "codec/schema.proto" -) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x12\x63odec/schema.proto\x12\x0cgraphcode.v1\x1a\x1cgoogle/protobuf/struct.proto"f\n\x0eGraphCodeIndex\x12!\n\x05nodes\x18\x01 \x03(\x0b\x32\x12.graphcode.v1.Node\x12\x31\n\rrelationships\x18\x02 \x03(\x0b\x32\x1a.graphcode.v1.Relationship"\x93\x04\n\x04Node\x12(\n\x07project\x18\x01 \x01(\x0b\x32\x15.graphcode.v1.ProjectH\x00\x12(\n\x07package\x18\x02 \x01(\x0b\x32\x15.graphcode.v1.PackageH\x00\x12&\n\x06\x66older\x18\x03 \x01(\x0b\x32\x14.graphcode.v1.FolderH\x00\x12&\n\x06module\x18\x04 \x01(\x0b\x32\x14.graphcode.v1.ModuleH\x00\x12)\n\nclass_node\x18\x05 \x01(\x0b\x32\x13.graphcode.v1.ClassH\x00\x12*\n\x08\x66unction\x18\x06 \x01(\x0b\x32\x16.graphcode.v1.FunctionH\x00\x12&\n\x06method\x18\x07 \x01(\x0b\x32\x14.graphcode.v1.MethodH\x00\x12"\n\x04\x66ile\x18\x08 \x01(\x0b\x32\x12.graphcode.v1.FileH\x00\x12\x39\n\x10\x65xternal_package\x18\t \x01(\x0b\x32\x1d.graphcode.v1.ExternalPackageH\x00\x12\x43\n\x15module_implementation\x18\n \x01(\x0b\x32".graphcode.v1.ModuleImplementationH\x00\x12\x39\n\x10module_interface\x18\x0b \x01(\x0b\x32\x1d.graphcode.v1.ModuleInterfaceH\x00\x42\t\n\x07payload"\xe9\x03\n\x0cRelationship\x12\x39\n\x04type\x18\x01 \x01(\x0e\x32+.graphcode.v1.Relationship.RelationshipType\x12\x11\n\tsource_id\x18\x02 \x01(\t\x12\x11\n\ttarget_id\x18\x03 \x01(\t\x12+\n\nproperties\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x14\n\x0csource_label\x18\x05 \x01(\t\x12\x14\n\x0ctarget_label\x18\x06 \x01(\t"\x9e\x02\n\x10RelationshipType\x12!\n\x1dRELATIONSHIP_TYPE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x43ONTAINS_PACKAGE\x10\x01\x12\x13\n\x0f\x43ONTAINS_FOLDER\x10\x02\x12\x11\n\rCONTAINS_FILE\x10\x03\x12\x13\n\x0f\x43ONTAINS_MODULE\x10\x04\x12\x0b\n\x07\x44\x45\x46INES\x10\x05\x12\x12\n\x0e\x44\x45\x46INES_METHOD\x10\x06\x12\x0b\n\x07IMPORTS\x10\x07\x12\x0c\n\x08INHERITS\x10\x08\x12\r\n\tOVERRIDES\x10\t\x12\t\n\x05\x43\x41LLS\x10\n\x12\x17\n\x13\x44\x45PENDS_ON_EXTERNAL\x10\x0b\x12\x15\n\x11IMPLEMENTS_MODULE\x10\x0c\x12\x0e\n\nIMPLEMENTS\x10\r"\x17\n\x07Project\x12\x0c\n\x04name\x18\x01 \x01(\t"=\n\x07Package\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t"$\n\x06\x46older\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t"5\n\x04\x46ile\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\textension\x18\x03 \x01(\t"<\n\x06Module\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t"e\n\x14ModuleImplementation\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\x12\x19\n\x11implements_module\x18\x04 \x01(\t"E\n\x0fModuleInterface\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t"\x1f\n\x0f\x45xternalPackage\x12\x0c\n\x04name\x18\x01 \x01(\t"\x92\x01\n\x08\x46unction\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\tdocstring\x18\x03 \x01(\t\x12\x12\n\nstart_line\x18\x04 \x01(\x05\x12\x10\n\x08\x65nd_line\x18\x05 \x01(\x05\x12\x12\n\ndecorators\x18\x06 \x03(\t\x12\x13\n\x0bis_exported\x18\x07 \x01(\x08"{\n\x06Method\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\tdocstring\x18\x03 \x01(\t\x12\x12\n\nstart_line\x18\x04 \x01(\x05\x12\x10\n\x08\x65nd_line\x18\x05 \x01(\x05\x12\x12\n\ndecorators\x18\x06 \x03(\t"\x8f\x01\n\x05\x43lass\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\tdocstring\x18\x03 \x01(\t\x12\x12\n\nstart_line\x18\x04 \x01(\x05\x12\x10\n\x08\x65nd_line\x18\x05 \x01(\x05\x12\x12\n\ndecorators\x18\x06 \x03(\t\x12\x13\n\x0bis_exported\x18\x07 \x01(\x08\x62\x06proto3' -) +from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12\x63odec/schema.proto\x12\x0cgraphcode.v1\x1a\x1cgoogle/protobuf/struct.proto\"f\n\x0eGraphCodeIndex\x12!\n\x05nodes\x18\x01 \x03(\x0b\x32\x12.graphcode.v1.Node\x12\x31\n\rrelationships\x18\x02 \x03(\x0b\x32\x1a.graphcode.v1.Relationship\"\xc3\x05\n\x04Node\x12(\n\x07project\x18\x01 \x01(\x0b\x32\x15.graphcode.v1.ProjectH\x00\x12(\n\x07package\x18\x02 \x01(\x0b\x32\x15.graphcode.v1.PackageH\x00\x12&\n\x06\x66older\x18\x03 \x01(\x0b\x32\x14.graphcode.v1.FolderH\x00\x12&\n\x06module\x18\x04 \x01(\x0b\x32\x14.graphcode.v1.ModuleH\x00\x12)\n\nclass_node\x18\x05 \x01(\x0b\x32\x13.graphcode.v1.ClassH\x00\x12*\n\x08\x66unction\x18\x06 \x01(\x0b\x32\x16.graphcode.v1.FunctionH\x00\x12&\n\x06method\x18\x07 \x01(\x0b\x32\x14.graphcode.v1.MethodH\x00\x12\"\n\x04\x66ile\x18\x08 \x01(\x0b\x32\x12.graphcode.v1.FileH\x00\x12\x39\n\x10\x65xternal_package\x18\t \x01(\x0b\x32\x1d.graphcode.v1.ExternalPackageH\x00\x12\x43\n\x15module_implementation\x18\n \x01(\x0b\x32\".graphcode.v1.ModuleImplementationH\x00\x12\x39\n\x10module_interface\x18\x0b \x01(\x0b\x32\x1d.graphcode.v1.ModuleInterfaceH\x00\x12\x31\n\x0einterface_node\x18\x0c \x01(\x0b\x32\x17.graphcode.v1.InterfaceH\x00\x12\'\n\tenum_node\x18\r \x01(\x0b\x32\x12.graphcode.v1.EnumH\x00\x12\'\n\ttype_node\x18\x0e \x01(\x0b\x32\x12.graphcode.v1.TypeH\x00\x12)\n\nunion_node\x18\x0f \x01(\x0b\x32\x13.graphcode.v1.UnionH\x00\x42\t\n\x07payload\"\x8a\x04\n\x0cRelationship\x12\x39\n\x04type\x18\x01 \x01(\x0e\x32+.graphcode.v1.Relationship.RelationshipType\x12\x11\n\tsource_id\x18\x02 \x01(\t\x12\x11\n\ttarget_id\x18\x03 \x01(\t\x12+\n\nproperties\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x14\n\x0csource_label\x18\x05 \x01(\t\x12\x14\n\x0ctarget_label\x18\x06 \x01(\t\"\xbf\x02\n\x10RelationshipType\x12!\n\x1dRELATIONSHIP_TYPE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x43ONTAINS_PACKAGE\x10\x01\x12\x13\n\x0f\x43ONTAINS_FOLDER\x10\x02\x12\x11\n\rCONTAINS_FILE\x10\x03\x12\x13\n\x0f\x43ONTAINS_MODULE\x10\x04\x12\x0b\n\x07\x44\x45\x46INES\x10\x05\x12\x12\n\x0e\x44\x45\x46INES_METHOD\x10\x06\x12\x0b\n\x07IMPORTS\x10\x07\x12\x0c\n\x08INHERITS\x10\x08\x12\r\n\tOVERRIDES\x10\t\x12\t\n\x05\x43\x41LLS\x10\n\x12\x17\n\x13\x44\x45PENDS_ON_EXTERNAL\x10\x0b\x12\x15\n\x11IMPLEMENTS_MODULE\x10\x0c\x12\x0e\n\nIMPLEMENTS\x10\r\x12\x0b\n\x07\x45XPORTS\x10\x0e\x12\x12\n\x0e\x45XPORTS_MODULE\x10\x0f\"\x17\n\x07Project\x12\x0c\n\x04name\x18\x01 \x01(\t\"=\n\x07Package\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\"$\n\x06\x46older\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"5\n\x04\x46ile\x12\x0c\n\x04path\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\textension\x18\x03 \x01(\t\"<\n\x06Module\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\"e\n\x14ModuleImplementation\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\x12\x19\n\x11implements_module\x18\x04 \x01(\t\"E\n\x0fModuleInterface\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\"\x1f\n\x0f\x45xternalPackage\x12\x0c\n\x04name\x18\x01 \x01(\t\"\x92\x01\n\x08\x46unction\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\tdocstring\x18\x03 \x01(\t\x12\x12\n\nstart_line\x18\x04 \x01(\x05\x12\x10\n\x08\x65nd_line\x18\x05 \x01(\x05\x12\x12\n\ndecorators\x18\x06 \x03(\t\x12\x13\n\x0bis_exported\x18\x07 \x01(\x08\"{\n\x06Method\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\tdocstring\x18\x03 \x01(\t\x12\x12\n\nstart_line\x18\x04 \x01(\x05\x12\x10\n\x08\x65nd_line\x18\x05 \x01(\x05\x12\x12\n\ndecorators\x18\x06 \x03(\t\"\x8f\x01\n\x05\x43lass\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x11\n\tdocstring\x18\x03 \x01(\t\x12\x12\n\nstart_line\x18\x04 \x01(\x05\x12\x10\n\x08\x65nd_line\x18\x05 \x01(\x05\x12\x12\n\ndecorators\x18\x06 \x03(\t\x12\x13\n\x0bis_exported\x18\x07 \x01(\x08\"V\n\tInterface\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\x12\x15\n\rabsolute_path\x18\x04 \x01(\t\"Q\n\x04\x45num\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04path\x18\x03 \x01(\t\x12\x15\n\rabsolute_path\x18\x04 \x01(\t\",\n\x04Type\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"-\n\x05Union\x12\x16\n\x0equalified_name\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\tb\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'codec.schema_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "codec.schema_pb2", _globals) -if not _descriptor._USE_C_DESCRIPTORS: - DESCRIPTOR._loaded_options = None - _globals["_GRAPHCODEINDEX"]._serialized_start = 66 - _globals["_GRAPHCODEINDEX"]._serialized_end = 168 - _globals["_NODE"]._serialized_start = 171 - _globals["_NODE"]._serialized_end = 702 - _globals["_RELATIONSHIP"]._serialized_start = 705 - _globals["_RELATIONSHIP"]._serialized_end = 1194 - _globals["_RELATIONSHIP_RELATIONSHIPTYPE"]._serialized_start = 908 - _globals["_RELATIONSHIP_RELATIONSHIPTYPE"]._serialized_end = 1194 - _globals["_PROJECT"]._serialized_start = 1196 - _globals["_PROJECT"]._serialized_end = 1219 - _globals["_PACKAGE"]._serialized_start = 1221 - _globals["_PACKAGE"]._serialized_end = 1282 - _globals["_FOLDER"]._serialized_start = 1284 - _globals["_FOLDER"]._serialized_end = 1320 - _globals["_FILE"]._serialized_start = 1322 - _globals["_FILE"]._serialized_end = 1375 - _globals["_MODULE"]._serialized_start = 1377 - _globals["_MODULE"]._serialized_end = 1437 - _globals["_MODULEIMPLEMENTATION"]._serialized_start = 1439 - _globals["_MODULEIMPLEMENTATION"]._serialized_end = 1540 - _globals["_MODULEINTERFACE"]._serialized_start = 1542 - _globals["_MODULEINTERFACE"]._serialized_end = 1611 - _globals["_EXTERNALPACKAGE"]._serialized_start = 1613 - _globals["_EXTERNALPACKAGE"]._serialized_end = 1644 - _globals["_FUNCTION"]._serialized_start = 1647 - _globals["_FUNCTION"]._serialized_end = 1793 - _globals["_METHOD"]._serialized_start = 1795 - _globals["_METHOD"]._serialized_end = 1918 - _globals["_CLASS"]._serialized_start = 1921 - _globals["_CLASS"]._serialized_end = 2064 + DESCRIPTOR._options = None + _GRAPHCODEINDEX._serialized_start=66 + _GRAPHCODEINDEX._serialized_end=168 + _NODE._serialized_start=171 + _NODE._serialized_end=878 + _RELATIONSHIP._serialized_start=881 + _RELATIONSHIP._serialized_end=1403 + _RELATIONSHIP_RELATIONSHIPTYPE._serialized_start=1084 + _RELATIONSHIP_RELATIONSHIPTYPE._serialized_end=1403 + _PROJECT._serialized_start=1405 + _PROJECT._serialized_end=1428 + _PACKAGE._serialized_start=1430 + _PACKAGE._serialized_end=1491 + _FOLDER._serialized_start=1493 + _FOLDER._serialized_end=1529 + _FILE._serialized_start=1531 + _FILE._serialized_end=1584 + _MODULE._serialized_start=1586 + _MODULE._serialized_end=1646 + _MODULEIMPLEMENTATION._serialized_start=1648 + _MODULEIMPLEMENTATION._serialized_end=1749 + _MODULEINTERFACE._serialized_start=1751 + _MODULEINTERFACE._serialized_end=1820 + _EXTERNALPACKAGE._serialized_start=1822 + _EXTERNALPACKAGE._serialized_end=1853 + _FUNCTION._serialized_start=1856 + _FUNCTION._serialized_end=2002 + _METHOD._serialized_start=2004 + _METHOD._serialized_end=2127 + _CLASS._serialized_start=2130 + _CLASS._serialized_end=2273 + _INTERFACE._serialized_start=2275 + _INTERFACE._serialized_end=2361 + _ENUM._serialized_start=2363 + _ENUM._serialized_end=2444 + _TYPE._serialized_start=2446 + _TYPE._serialized_end=2490 + _UNION._serialized_start=2492 + _UNION._serialized_end=2537 # @@protoc_insertion_point(module_scope) diff --git a/docker-compose.yaml b/docker-compose.yaml deleted file mode 100644 index 88b9b13ab..000000000 --- a/docker-compose.yaml +++ /dev/null @@ -1,12 +0,0 @@ -services: - memgraph: - image: memgraph/memgraph-mage - ports: - - "${MEMGRAPH_PORT:-7687}:7687" - - "${MEMGRAPH_HTTP_PORT:-7444}:7444" - lab: - image: memgraph/lab - ports: - - "${LAB_PORT:-3000}:3000" - environment: - QUICK_CONNECT_MG_HOST: memgraph diff --git a/TODO.md b/docs/TODO.md similarity index 100% rename from TODO.md rename to docs/TODO.md diff --git a/docs/advanced/adding-languages.md b/docs/advanced/adding-languages.md new file mode 100644 index 000000000..5ddc87168 --- /dev/null +++ b/docs/advanced/adding-languages.md @@ -0,0 +1,104 @@ +--- +description: "Add support for new programming languages to Code-Graph-RAG using Tree-sitter grammars." +--- + +# Adding Languages + +Code-Graph-RAG makes it easy to add support for any language that has a Tree-sitter grammar. The system automatically handles grammar compilation and integration. + +!!! warning + While you can add languages yourself, we recommend waiting for official full support to ensure optimal parsing quality, comprehensive feature coverage, and robust integration. [Submit a language request](https://github.com/vitali87/code-graph-rag/issues) if you need a specific language supported. + +## Quick Start + +Use the built-in language management tool: + +```bash +cgr language add-grammar +``` + +Examples: + +```bash +cgr language add-grammar c-sharp +cgr language add-grammar php +cgr language add-grammar ruby +cgr language add-grammar kotlin +``` + +## Custom Grammar Repositories + +For languages hosted outside the standard tree-sitter organization: + +```bash +cgr language add-grammar --grammar-url https://github.com/custom/tree-sitter-mylang +``` + +## What Happens Automatically + +When you add a language, the tool automatically: + +1. **Downloads the Grammar**: Clones the tree-sitter grammar repository as a git submodule +2. **Detects Configuration**: Auto-extracts language metadata from `tree-sitter.json` +3. **Analyzes Node Types**: Automatically identifies AST node types for functions/methods, classes/structs, modules/files, and function calls +4. **Compiles Bindings**: Builds Python bindings from the grammar source +5. **Updates Configuration**: Adds the language to `codebase_rag/language_config.py` +6. **Enables Parsing**: Makes the language immediately available for codebase analysis + +## Example: Adding C# Support + +```bash +$ cgr language add-grammar c-sharp +Using default tree-sitter URL: https://github.com/tree-sitter/tree-sitter-c-sharp +Adding submodule from https://github.com/tree-sitter/tree-sitter-c-sharp... +Successfully added submodule at grammars/tree-sitter-c-sharp +Auto-detected language: c-sharp +Auto-detected file extensions: ['cs'] +Auto-detected node types: +Functions: ['destructor_declaration', 'method_declaration', 'constructor_declaration'] +Classes: ['struct_declaration', 'enum_declaration', 'interface_declaration', 'class_declaration'] +Modules: ['compilation_unit', 'file_scoped_namespace_declaration', 'namespace_declaration'] +Calls: ['invocation_expression'] + +Language 'c-sharp' has been added to the configuration! +Updated codebase_rag/language_config.py +``` + +## Managing Languages + +```bash +cgr language list-languages + +cgr language remove-language +``` + +## Language Configuration + +Each language is defined in `codebase_rag/language_config.py`: + +```python +"language-name": LanguageConfig( + name="language-name", + file_extensions=[".ext1", ".ext2"], + function_node_types=["function_declaration", "method_declaration"], + class_node_types=["class_declaration", "struct_declaration"], + module_node_types=["compilation_unit", "source_file"], + call_node_types=["call_expression", "method_invocation"], +), +``` + +## Troubleshooting + +**Grammar not found**: Use a custom URL if the automatic URL doesn't work: + +```bash +cgr language add-grammar --grammar-url https://github.com/custom/tree-sitter-mylang +``` + +**Version incompatibility**: If you get "Incompatible Language version" errors: + +```bash +uv add tree-sitter@latest +``` + +**Missing node types**: The tool automatically detects common node patterns, but you can manually adjust the configuration in `language_config.py` if needed. diff --git a/docs/advanced/building-binaries.md b/docs/advanced/building-binaries.md new file mode 100644 index 000000000..b250d52c7 --- /dev/null +++ b/docs/advanced/building-binaries.md @@ -0,0 +1,15 @@ +--- +description: "Build a standalone binary of Code-Graph-RAG using PyInstaller." +--- + +# Building Binaries + +You can build a standalone binary of Code-Graph-RAG using the `build_binary.py` script. This uses PyInstaller to package the application and its dependencies into a single executable. + +## Build + +```bash +python build_binary.py +``` + +The resulting binary will be located in the `dist` directory. diff --git a/docs/advanced/ignore-patterns.md b/docs/advanced/ignore-patterns.md new file mode 100644 index 000000000..a17ad4b70 --- /dev/null +++ b/docs/advanced/ignore-patterns.md @@ -0,0 +1,28 @@ +--- +description: "Configure .cgrignore to exclude directories from Code-Graph-RAG analysis." +--- + +# Ignore Patterns + +You can specify additional directories to exclude from analysis by creating a `.cgrignore` file in your repository root. + +## Format + +``` +# Comments start with # +vendor +.custom_cache +my_build_output +``` + +## Rules + +- One directory name per line +- Lines starting with `#` are comments +- Blank lines are ignored +- Patterns are exact directory name matches (not globs) +- Patterns from `.cgrignore` are merged with `--exclude` flags and auto-detected directories + +## Default Exclusions + +Code-Graph-RAG automatically excludes common non-source directories such as `.git`, `node_modules`, `__pycache__`, `dist`, `build`, and similar. diff --git a/docs/advanced/troubleshooting.md b/docs/advanced/troubleshooting.md new file mode 100644 index 000000000..22a2dd27c --- /dev/null +++ b/docs/advanced/troubleshooting.md @@ -0,0 +1,46 @@ +--- +description: "Troubleshoot common Code-Graph-RAG issues with Memgraph, Ollama, and model configuration." +--- + +# Troubleshooting + +## Check Memgraph Connection + +- Ensure Docker containers are running: `docker compose ps` +- Verify Memgraph is accessible on port 7687 + +## View Database in Memgraph Lab + +- Open [http://localhost:3000](http://localhost:3000) +- Connect to `memgraph:7687` + +## Local Model Issues (Ollama) + +- Verify Ollama is running: `ollama list` +- Check if models are downloaded: `ollama pull llama3` +- Test Ollama API: `curl http://localhost:11434/v1/models` +- Check Ollama logs: `ollama logs` + +## General Checklist + +1. Check the logs for error details +2. Verify Memgraph connection +3. Ensure all environment variables are set +4. Review the graph schema matches your expectations +5. Run `cgr doctor` to validate your setup + +## Language Grammar Issues + +**Grammar not found**: Use a custom URL: + +```bash +cgr language add-grammar --grammar-url https://github.com/custom/tree-sitter-mylang +``` + +**Version incompatibility**: Update tree-sitter: + +```bash +uv add tree-sitter@latest +``` + +**Missing node types**: Manually adjust the configuration in `codebase_rag/language_config.py`. diff --git a/docs/architecture/graph-schema.md b/docs/architecture/graph-schema.md new file mode 100644 index 000000000..9e240007d --- /dev/null +++ b/docs/architecture/graph-schema.md @@ -0,0 +1,173 @@ +--- +description: "Knowledge graph schema with node types, relationships, and language-specific AST mappings." +--- + +# Graph Schema + +The knowledge graph uses a unified schema across all supported languages. + +## Node Types + +| Label | Properties | +|-------|------------| +| Project | `{name: string}` | +| Package | `{qualified_name: string, name: string, path: string}` | +| Folder | `{path: string, name: string}` | +| File | `{path: string, name: string, extension: string}` | +| Module | `{qualified_name: string, name: string, path: string}` | +| Class | `{qualified_name: string, name: string, decorators: list[string]}` | +| Function | `{qualified_name: string, name: string, decorators: list[string]}` | +| Method | `{qualified_name: string, name: string, decorators: list[string]}` | +| Interface | `{qualified_name: string, name: string}` | +| Enum | `{qualified_name: string, name: string}` | +| Type | `{qualified_name: string, name: string}` | +| Union | `{qualified_name: string, name: string}` | +| ModuleInterface | `{qualified_name: string, name: string, path: string}` | +| ModuleImplementation | `{qualified_name: string, name: string, path: string, implements_module: string}` | +| ExternalPackage | `{name: string, version_spec: string}` | + +## Relationships + +| Source | Relationship | Target | +|--------|-------------|--------| +| Project, Package, Folder | CONTAINS_PACKAGE | Package | +| Project, Package, Folder | CONTAINS_FOLDER | Folder | +| Project, Package, Folder | CONTAINS_FILE | File | +| Project, Package, Folder | CONTAINS_MODULE | Module | +| Module, Function, Method | DEFINES | Class, Function | +| Class | DEFINES_METHOD | Method | +| Module | IMPORTS | Module | +| Module | EXPORTS | Class, Function | +| Module | EXPORTS_MODULE | ModuleInterface | +| Module | IMPLEMENTS_MODULE | ModuleImplementation | +| Class | INHERITS | Class | +| Class | IMPLEMENTS | Interface | +| Method | OVERRIDES | Method | +| ModuleImplementation | IMPLEMENTS | ModuleInterface | +| Project | DEPENDS_ON_EXTERNAL | ExternalPackage | +| Function, Method | CALLS | Function, Method | + +## Nested Definitions + +A function or class defined inside another function or method (a closure or a function-local class) is attached by `DEFINES` to its **enclosing scope**, not flattened onto the Module. So `DEFINES` can originate from a `Function` or `Method` as well as a `Module`. A top-level function or class is still defined by its `Module`. + +Methods and classes defined inside function bodies are captured only when `CGR_CAPTURE_LOCAL_DEFINITIONS` is enabled (see [Configuration](../getting-started/configuration.md)); function-local *classes* are captured by default, but their methods require the flag. + +## Qualified Name Uniqueness + +`qualified_name` uniquely identifies each `Function`, `Method`, and `Class` node. When the same qualified name is defined more than once in a module, every definition is kept as a distinct node. This happens with the `if has_x(): ... else: ...` import-fallback idiom, `typing.overload`, and `try/except ImportError` fallbacks. + +The first definition keeps the plain dotted qualified name; each later definition is suffixed with `@` (for example `pkg.module.store_embedding@161`) so both survive instead of one overwriting the other. The `name` property stays the plain name on every variant. + +A `CALLS` edge to a name that has more than one definition links to every variant, since each is a runtime-possible target. + +## Language-Specific AST Mappings + +### C++ + +- `class_specifier` +- `declaration` +- `enum_specifier` +- `field_declaration` +- `function_definition` +- `lambda_expression` +- `struct_specifier` +- `template_declaration` +- `union_specifier` + +### Java + +- `annotation_type_declaration` +- `class_declaration` +- `constructor_declaration` +- `enum_declaration` +- `interface_declaration` +- `method_declaration` +- `record_declaration` + +### JavaScript + +- `arrow_function` +- `class` +- `class_declaration` +- `function_declaration` +- `function_expression` +- `generator_function_declaration` +- `method_definition` + +### Lua + +- `function_declaration` +- `function_definition` + +### Python + +- `class_definition` +- `function_definition` + +### Rust + +- `closure_expression` +- `enum_item` +- `function_item` +- `function_signature_item` +- `impl_item` +- `struct_item` +- `trait_item` +- `type_item` +- `union_item` + +### TypeScript + +- `abstract_class_declaration` +- `arrow_function` +- `class` +- `class_declaration` +- `enum_declaration` +- `function_declaration` +- `function_expression` +- `function_signature` +- `generator_function_declaration` +- `interface_declaration` +- `internal_module` +- `method_definition` +- `type_alias_declaration` + +### C# + +- `anonymous_method_expression` +- `class_declaration` +- `constructor_declaration` +- `destructor_declaration` +- `enum_declaration` +- `function_pointer_type` +- `interface_declaration` +- `lambda_expression` +- `local_function_statement` +- `method_declaration` +- `struct_declaration` + +### Go + +- `function_declaration` +- `method_declaration` +- `type_declaration` + +### PHP + +- `anonymous_function` +- `arrow_function` +- `class_declaration` +- `enum_declaration` +- `function_definition` +- `function_static_declaration` +- `interface_declaration` +- `trait_declaration` + +### Scala + +- `class_definition` +- `function_declaration` +- `function_definition` +- `object_definition` +- `trait_definition` diff --git a/docs/architecture/language-support.md b/docs/architecture/language-support.md new file mode 100644 index 000000000..9398b05e5 --- /dev/null +++ b/docs/architecture/language-support.md @@ -0,0 +1,34 @@ +--- +description: "Supported programming languages and their feature coverage in Code-Graph-RAG." +--- + +# Language Support + +Code-Graph-RAG uses Tree-sitter for language-agnostic AST parsing with a unified graph schema across all languages. + +## Support Matrix + +| Language | Status | Extensions | Functions | Classes/Structs | Modules | Package Detection | Additional Features | +|----------|--------|------------|-----------|-----------------|---------|-------------------|---------------------| +| C++ | Fully Supported | .cpp, .h, .hpp, .cc, .cxx, .hxx, .hh, .ixx, .cppm, .ccm | Yes | Yes | Yes | Yes | Constructors, destructors, operator overloading, templates, lambdas, C++20 modules, namespaces | +| Java | Fully Supported | .java | Yes | Yes | Yes | No | Generics, annotations, modern features (records/sealed classes), concurrency, reflection | +| JavaScript | Fully Supported | .js, .jsx | Yes | Yes | Yes | No | ES6 modules, CommonJS, prototype methods, object methods, arrow functions | +| Lua | Fully Supported | .lua | Yes | No | Yes | No | Local/global functions, metatables, closures, coroutines | +| Python | Fully Supported | .py | Yes | Yes | Yes | Yes | Type inference, decorators, nested functions | +| Rust | Fully Supported | .rs | Yes | Yes | Yes | Yes | impl blocks, associated functions | +| TypeScript | Fully Supported | .ts, .tsx | Yes | Yes | Yes | No | Interfaces, type aliases, enums, namespaces, ES6/CommonJS modules | +| C# | In Development | .cs | Yes | Yes | Yes | No | Classes, interfaces, generics (planned) | +| Go | In Development | .go | Yes | Yes | Yes | No | Methods, type declarations | +| PHP | Fully Supported | .php | Yes | Yes | Yes | No | Classes, interfaces, traits, enums, namespaces, PHP 8 attributes | +| Scala | In Development | .scala, .sc | Yes | Yes | Yes | No | Case classes, objects | + +## Language-Agnostic Design + +All languages share a unified graph schema, meaning queries work the same way regardless of language. You can query across languages in the same knowledge graph when analyzing polyglot repositories. + +## Adding New Languages + +Code-Graph-RAG makes it easy to add support for any language that has a Tree-sitter grammar. See the [Adding Languages](../advanced/adding-languages.md) guide. + +!!! tip + While you can add languages yourself, we recommend waiting for official full support for optimal parsing quality and comprehensive feature coverage. [Submit a language request](https://github.com/vitali87/code-graph-rag/issues) if you need a specific language supported. diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md new file mode 100644 index 000000000..5181f9d87 --- /dev/null +++ b/docs/architecture/overview.md @@ -0,0 +1,51 @@ +--- +description: "Architecture overview of Code-Graph-RAG's two-component system for codebase analysis." +--- + +# Architecture Overview + +Code-Graph-RAG consists of two main components that work together to analyze and query codebases. + +## Components + +### 1. Multi-Language Parser + +A Tree-sitter based parsing system that analyzes codebases and ingests data into Memgraph. + +- Uses Tree-sitter for robust, language-agnostic AST parsing +- Extracts functions, classes, methods, modules, and their relationships +- Supports 11 programming languages with a unified graph schema +- Handles complex patterns like nested functions, class hierarchies, and cross-module calls + +### 2. RAG System (`codebase_rag/`) + +An interactive CLI for querying the stored knowledge graph. + +- Translates natural language questions into Cypher queries +- Retrieves source code snippets for found elements +- Supports AI-powered code editing with AST-based targeting +- Provides code optimization with interactive approval workflow + +## Data Flow + +``` +Source Code → Tree-sitter Parser → AST Analysis → Memgraph Knowledge Graph + ↓ +User Query → AI Model (Cypher Gen) → Cypher Query → Graph Results → Response +``` + +## Key Dependencies + +| Dependency | Purpose | +|-----------|---------| +| `tree-sitter` | Language-agnostic AST parsing | +| `pymgclient` | Memgraph database adapter | +| `pydantic-ai` | Agent framework for LLM integration | +| `pydantic-settings` | Settings management | +| `mcp` | Model Context Protocol SDK | +| `typer` | CLI framework | +| `rich` | Terminal rendering | +| `prompt-toolkit` | Interactive command line | +| `diff-match-patch` | Code patching | +| `watchdog` | Filesystem events monitoring | +| `huggingface-hub` | UniXcoder model download | diff --git a/docs/assets/demo.gif b/docs/assets/demo.gif new file mode 100644 index 000000000..0260a2f83 Binary files /dev/null and b/docs/assets/demo.gif differ diff --git a/docs/assets/favicon.png b/docs/assets/favicon.png new file mode 100644 index 000000000..7ea975f2d Binary files /dev/null and b/docs/assets/favicon.png differ diff --git a/docs/assets/logo-dark-any.png b/docs/assets/logo-dark-any.png new file mode 100644 index 000000000..56508a2d7 Binary files /dev/null and b/docs/assets/logo-dark-any.png differ diff --git a/docs/assets/logo-icon.png b/docs/assets/logo-icon.png new file mode 100644 index 000000000..5449b7e03 Binary files /dev/null and b/docs/assets/logo-icon.png differ diff --git a/docs/assets/logo-light-any.png b/docs/assets/logo-light-any.png new file mode 100644 index 000000000..89be19120 Binary files /dev/null and b/docs/assets/logo-light-any.png differ diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 000000000..4ea2ab9c7 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,108 @@ +--- +description: "Contribution guidelines for Code-Graph-RAG including setup, code standards, and PR process." +--- + +# Contributing + +Thank you for your interest in contributing to Code-Graph-RAG! + +!!! note "Canonical guide" + This page is a quick-start summary. The full, authoritative contribution guide, including the complete coding standards, lives in [`CONTRIBUTING.md`](https://github.com/vitali87/code-graph-rag/blob/main/CONTRIBUTING.md) at the repository root. When the two differ, the root guide wins. + +## Getting Started + +1. **Browse Issues**: Check out the [issue tracker](https://github.com/vitali87/code-graph-rag/issues) to find tasks that need work. Look for `good first issue` and `help wanted` labels. +2. **Pick an Issue**: Choose an issue that interests you and matches your skill level +3. **Comment on the Issue**: Let us know you're working on it to avoid duplicate effort +4. **Fork the Repository**: Create your own fork to work on +5. **Create a Branch**: Use a descriptive branch name like `feat/add-feature` or `fix/bug-description` + +## Development Setup + +```bash +git clone https://github.com/YOUR-USERNAME/code-graph-rag.git +cd code-graph-rag +make dev +``` + +This installs all dependencies and sets up pre-commit hooks automatically. + +## Pre-commit Hooks + +All commits must pass pre-commit checks. Do not skip hooks with `--no-verify`. + +```bash +pre-commit install +pre-commit autoupdate +``` + +## Running Checks Locally + +```bash +make lint # Lint check +make format # Format check +make typecheck # Type check +make test-parallel # Unit tests in parallel +make test-integration # Integration tests (requires Docker) +``` + +Or run everything at once: + +```bash +make check # Runs lint + typecheck + test +make pre-commit # Runs ALL pre-commit checks (mirrors CI) +``` + +## Pull Request Guidelines + +- Keep PRs focused on a single issue or feature +- Write clear, descriptive commit messages using Conventional Commits format +- Include tests for new functionality +- Update documentation when necessary +- Be responsive to feedback during code review + +### CI Pipeline + +All pull requests are validated by CI, which runs in parallel: + +1. **Lint & Format**: `ruff check` and `ruff format --check` +2. **Type Check**: `ty check` on production code +3. **Unit Tests**: Parallel execution with `pytest-xdist` and coverage reporting +4. **Integration Tests**: Full stack testing with Memgraph +5. **PR Title Validation**: Conventional Commits format check + +### Automated Code Review + +This project uses automated code review bots (**Greptile** and **Gemini Code Assist**). Before requesting a human review, address all bot comments by either implementing suggestions or replying with a clear justification for why a suggestion doesn't apply. + +## Technical Requirements + +- **PydanticAI Only**: Do not introduce other agentic frameworks (LangChain, CrewAI, AutoGen, etc.) +- **Heavy Pydantic Usage**: Use Pydantic models for data validation, serialization, and configuration +- **Package Management**: Use `uv` for all dependency management +- **Code Quality**: Use `ruff` for linting and formatting +- **Type Safety**: Use type hints everywhere and run `uv run ty check` + +## Development Tools + +| Tool | Purpose | +|------|---------| +| `uv` | Package manager and dependency resolver | +| `ruff` | Code linting and formatting | +| `ty` | Static type checking (from Astral) | +| `pytest` | Testing framework | +| `ripgrep` (`rg`) | Shell command text searching | + +## Comment Policy + +No inline comments are allowed unless they: + +1. Appear before any code at the top of the file +2. Contain the `(H)` marker (intentional, human-written comment) +3. Are type annotations (`type:`, `noqa`, `pyright`, `ty:`) + +## Questions? + +- Open a discussion on GitHub +- Comment on the relevant issue +- Reach out to the maintainers diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 000000000..1a72298fe --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,128 @@ +--- +description: "Configure Code-Graph-RAG with provider settings, environment variables, and model options." +--- + +# Configuration + +Configuration is managed through environment variables in the `.env` file. The provider-explicit configuration supports mixing different providers for orchestrator and cypher models. + +## Provider Examples + +### All Ollama (Local Models) + +```bash +ORCHESTRATOR_PROVIDER=ollama +ORCHESTRATOR_MODEL=llama3.2 +ORCHESTRATOR_ENDPOINT=http://localhost:11434/v1 + +CYPHER_PROVIDER=ollama +CYPHER_MODEL=codellama +CYPHER_ENDPOINT=http://localhost:11434/v1 +``` + +### All OpenAI Models + +```bash +ORCHESTRATOR_PROVIDER=openai +ORCHESTRATOR_MODEL=gpt-4o +ORCHESTRATOR_API_KEY=sk-your-openai-key + +CYPHER_PROVIDER=openai +CYPHER_MODEL=gpt-4o-mini +CYPHER_API_KEY=sk-your-openai-key +``` + +### All Google Models + +```bash +ORCHESTRATOR_PROVIDER=google +ORCHESTRATOR_MODEL=gemini-2.5-pro +ORCHESTRATOR_API_KEY=your-google-api-key + +CYPHER_PROVIDER=google +CYPHER_MODEL=gemini-2.5-flash +CYPHER_API_KEY=your-google-api-key +``` + +Get your Google API key from [Google AI Studio](https://aistudio.google.com/app/apikey). + +### Mixed Providers + +```bash +ORCHESTRATOR_PROVIDER=google +ORCHESTRATOR_MODEL=gemini-2.5-pro +ORCHESTRATOR_API_KEY=your-google-api-key + +CYPHER_PROVIDER=ollama +CYPHER_MODEL=codellama +CYPHER_ENDPOINT=http://localhost:11434/v1 +``` + +## Orchestrator Model Settings + +| Variable | Description | +|----------|-------------| +| `ORCHESTRATOR_PROVIDER` | Provider name (`google`, `openai`, `ollama`) | +| `ORCHESTRATOR_MODEL` | Model ID (e.g., `gemini-2.5-pro`, `gpt-4o`, `llama3.2`) | +| `ORCHESTRATOR_API_KEY` | API key for the provider (if required) | +| `ORCHESTRATOR_ENDPOINT` | Custom endpoint URL (if required) | +| `ORCHESTRATOR_PROJECT_ID` | Google Cloud project ID (for Vertex AI) | +| `ORCHESTRATOR_REGION` | Google Cloud region (default: `us-central1`) | +| `ORCHESTRATOR_PROVIDER_TYPE` | Google provider type (`gla` or `vertex`) | +| `ORCHESTRATOR_THINKING_BUDGET` | Thinking budget for reasoning models | +| `ORCHESTRATOR_SERVICE_ACCOUNT_FILE` | Path to service account file (for Vertex AI) | + +## Cypher Model Settings + +| Variable | Description | +|----------|-------------| +| `CYPHER_PROVIDER` | Provider name (`google`, `openai`, `ollama`) | +| `CYPHER_MODEL` | Model ID (e.g., `gemini-2.5-flash`, `gpt-4o-mini`, `codellama`) | +| `CYPHER_API_KEY` | API key for the provider (if required) | +| `CYPHER_ENDPOINT` | Custom endpoint URL (if required) | +| `CYPHER_PROJECT_ID` | Google Cloud project ID (for Vertex AI) | +| `CYPHER_REGION` | Google Cloud region (default: `us-central1`) | +| `CYPHER_PROVIDER_TYPE` | Google provider type (`gla` or `vertex`) | +| `CYPHER_THINKING_BUDGET` | Thinking budget for reasoning models | +| `CYPHER_SERVICE_ACCOUNT_FILE` | Path to service account file (for Vertex AI) | + +## System Settings + +| Variable | Default | Description | +|----------|---------|-------------| +| `MEMGRAPH_HOST` | `localhost` | Memgraph hostname | +| `MEMGRAPH_PORT` | `7687` | Memgraph port | +| `MEMGRAPH_HTTP_PORT` | `7444` | Memgraph HTTP port | +| `LAB_PORT` | `3000` | Memgraph Lab port | +| `MEMGRAPH_BATCH_SIZE` | `1000` | Batch size for Memgraph operations | +| `TARGET_REPO_PATH` | `.` | Default repository path | +| `CGR_CAPTURE_LOCAL_DEFINITIONS` | `false` | Capture classes/methods defined inside function bodies (function-local definitions). Off by default to keep the graph free of throwaway helpers and test mocks; enable for exhaustive structure capture. | +| `LOCAL_MODEL_ENDPOINT` | `http://localhost:11434/v1` | Fallback endpoint for Ollama | + +## Setting Up Ollama + +```bash +curl -fsSL https://ollama.ai/install.sh | sh + +ollama pull llama3.2 +# Or try other models: +# ollama pull llama3 +# ollama pull mistral +# ollama pull codellama +``` + +Ollama automatically starts serving on `localhost:11434`. + +!!! note + Local models provide privacy and no API costs, but may have lower accuracy compared to cloud models like Gemini or GPT-4o. + +## Programmatic Configuration + +You can also configure providers programmatically via the Python SDK: + +```python +from cgr import settings + +settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...") +settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key") +``` diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 000000000..522d380b8 --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,115 @@ +--- +description: "Install Code-Graph-RAG and set up Memgraph for multi-language codebase analysis." +--- + +# Installation + +## Prerequisites + +- Python 3.12+ +- Docker & Docker Compose (for Memgraph) +- **cmake** (required for building pymgclient dependency) +- **ripgrep** (`rg`) (required for shell command text searching) +- **For cloud models**: Google Gemini API key, OpenAI API key, or both +- **For local models**: Ollama installed and running +- `uv` package manager (recommended) or `pip` + +### Installing cmake and ripgrep + +=== "macOS" + + ```bash + brew install cmake ripgrep + ``` + +=== "Ubuntu/Debian" + + ```bash + sudo apt-get update + sudo apt-get install cmake ripgrep + ``` + +=== "CentOS/RHEL" + + ```bash + sudo yum install cmake + sudo dnf install ripgrep + ``` + + ripgrep may need to be installed from EPEL or via `cargo install ripgrep`. + +## Install from PyPI + +```bash +pip install code-graph-rag +``` + +With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua): + +```bash +pip install 'code-graph-rag[treesitter-full]' +``` + +With semantic code search (UniXcoder embeddings): + +```bash +pip install 'code-graph-rag[semantic]' +``` + +With both full language support and semantic search: + +```bash +pip install 'code-graph-rag[treesitter-full,semantic]' +``` + +## Install from Source + +```bash +git clone https://github.com/vitali87/code-graph-rag.git +cd code-graph-rag +``` + +For basic Python support: + +```bash +uv sync +``` + +For full multi-language support: + +```bash +uv sync --extra treesitter-full +``` + +For development (including tests and pre-commit hooks): + +```bash +make dev +``` + +This installs all dependencies and sets up pre-commit hooks automatically. + +## Start Memgraph + +```bash +docker compose up -d +``` + +This starts the Memgraph database on port 7687 and Memgraph Lab on port 3000. + +## Set Up Environment Variables + +```bash +cp .env.example .env +# Edit .env with your configuration +``` + +See the [Configuration](configuration.md) guide for all available options. + +## Verify Your Setup + +```bash +cgr doctor +``` + +This checks that all required dependencies and services are available. diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 000000000..97100cc9b --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,103 @@ +--- +description: "Parse, query, and export your codebase in 5 minutes with Code-Graph-RAG." +--- + +# Quick Start + +Get from zero to querying your codebase in 5 minutes. + +## Step 1: Parse a Repository + +Parse and ingest a multi-language repository into the knowledge graph. + +**For the first repository (clean start):** + +```bash +cgr start --repo-path /path/to/repo1 --update-graph --clean +``` + +**For additional repositories (preserve existing data):** + +```bash +cgr start --repo-path /path/to/repo2 --update-graph +cgr start --repo-path /path/to/repo3 --update-graph +``` + +**Control Memgraph batch flushing:** + +```bash +cgr start --repo-path /path/to/repo --update-graph --batch-size 5000 +``` + +The system automatically detects and processes files for all supported languages. + +## Step 2: Query the Codebase + +Start the interactive RAG CLI: + +```bash +cgr start --repo-path /path/to/your/repo +``` + +**Specify custom models:** + +```bash +cgr start --repo-path /path/to/your/repo \ + --orchestrator ollama:llama3.2 \ + --cypher ollama:codellama +``` + +```bash +cgr start --repo-path /path/to/your/repo \ + --orchestrator google:gemini-2.0-flash-thinking-exp-01-21 \ + --cypher google:gemini-2.5-flash-lite-preview-06-17 +``` + +**Example queries:** + +- "Show me all classes that contain 'user' in their name" +- "Find functions related to database operations" +- "What methods does the User class have?" +- "Show me functions that handle authentication" +- "List all TypeScript components" +- "Find Rust structs and their methods" +- "Add logging to all database connection functions" +- "Refactor the User class to use dependency injection" + +## Step 3: Export Graph Data + +**Export during graph update:** + +```bash +cgr start --repo-path /path/to/repo --update-graph --clean -o my_graph.json +``` + +**Export existing graph without updating:** + +```bash +cgr export -o my_graph.json +``` + +**Work with exported data in Python:** + +```python +from codebase_rag.graph_loader import load_graph + +graph = load_graph("my_graph.json") +summary = graph.summary() +print(f"Total nodes: {summary['total_nodes']}") +print(f"Total relationships: {summary['total_relationships']}") + +functions = graph.find_nodes_by_label("Function") +for func in functions[:5]: + relationships = graph.get_relationships_for_node(func.node_id) + print(f"Function {func.properties['name']} has {len(relationships)} relationships") +``` + +## What Next? + +- [CLI Reference](../guide/cli-reference.md) for all available commands +- [Interactive Querying](../guide/interactive-querying.md) for query examples +- [Code Optimization](../guide/code-optimization.md) for AI-powered improvements +- [MCP Server](../guide/mcp-server.md) for Claude Code integration +- [Python SDK](../sdk/overview.md) for programmatic access diff --git a/docs/guide/cli-reference.md b/docs/guide/cli-reference.md new file mode 100644 index 000000000..6c5842703 --- /dev/null +++ b/docs/guide/cli-reference.md @@ -0,0 +1,111 @@ +--- +description: "Complete CLI reference for Code-Graph-RAG commands and Makefile targets." +--- + +# CLI Reference + +The `cgr` command is the main entry point for Code-Graph-RAG. + +## Core Commands + +### `cgr start` + +Parse a repository and/or start the interactive query CLI. + +```bash +cgr start --repo-path /path/to/repo [OPTIONS] +``` + +| Option | Description | +|--------|-------------| +| `--repo-path` | Path to repository (defaults to current directory) | +| `--update-graph` | Parse and ingest the repository into the knowledge graph | +| `--clean` | Clear existing data before ingesting | +| `--batch-size` | Override Memgraph flush batch size | +| `--orchestrator` | Specify provider:model for main operations (e.g., `google:gemini-2.5-pro`, `ollama:llama3.2`) | +| `--cypher` | Specify provider:model for graph queries (e.g., `google:gemini-2.5-flash`, `ollama:codellama`) | +| `-o` | Export graph to JSON file during update | + +### `cgr export` + +Export the knowledge graph to JSON. + +```bash +cgr export -o my_graph.json +``` + +### `cgr optimize` + +AI-powered codebase optimization. + +```bash +cgr optimize --repo-path /path/to/repo [OPTIONS] +``` + +| Option | Description | +|--------|-------------| +| `--repo-path` | Path to repository | +| `--orchestrator` | Specify provider:model for operations | +| `--batch-size` | Override Memgraph flush batch size | +| `--reference-document` | Path to reference documentation for guided optimization | + +Supported languages: `python`, `javascript`, `typescript`, `rust`, `go`, `java`, `scala`, `cpp` + +### `cgr mcp-server` + +Start the MCP server for Claude Code integration. + +```bash +cgr mcp-server +``` + +### `cgr index` + +Index a repository to protobuf for offline use. + +```bash +cgr index -o ./index-output --repo-path ./my-project +``` + +### `cgr doctor` + +Check that all required dependencies and services are available. + +```bash +cgr doctor +``` + +### `cgr language` + +Manage language support. + +```bash +cgr language add-grammar +cgr language add-grammar --grammar-url +cgr language list-languages +cgr language remove-language +``` + +## Makefile Commands + +| Command | Description | +|---------|-------------| +| `make help` | Show help message | +| `make all` | Install everything for full development environment | +| `make install` | Install project dependencies with full language support | +| `make python` | Install project dependencies for Python only | +| `make dev` | Setup development environment (install deps + pre-commit hooks) | +| `make test` | Run unit tests only (fast, no Docker) | +| `make test-parallel` | Run unit tests in parallel (fast, no Docker) | +| `make test-integration` | Run integration tests (requires Docker) | +| `make test-all` | Run all tests including integration and e2e (requires Docker) | +| `make test-parallel-all` | Run all tests in parallel (requires Docker) | +| `make clean` | Clean up build artifacts and cache | +| `make build-grammars` | Build grammar submodules | +| `make watch` | Watch repository for changes and update graph in real-time | +| `make readme` | Regenerate README.md from codebase | +| `make lint` | Run ruff check | +| `make format` | Run ruff format | +| `make typecheck` | Run type checking with ty | +| `make check` | Run all checks: lint, typecheck, test | +| `make pre-commit` | Run all pre-commit checks locally | diff --git a/docs/guide/code-optimization.md b/docs/guide/code-optimization.md new file mode 100644 index 000000000..77b7e6698 --- /dev/null +++ b/docs/guide/code-optimization.md @@ -0,0 +1,91 @@ +--- +description: "AI-powered codebase optimization with language-specific best practices and interactive approval." +--- + +# Code Optimization + +Code-Graph-RAG provides AI-powered codebase optimization with best practices guidance and an interactive approval workflow. + +## Basic Usage + +```bash +cgr optimize python --repo-path /path/to/your/repo +``` + +## With Reference Documentation + +Guide the optimization process using your own coding standards: + +```bash +cgr optimize python \ + --repo-path /path/to/your/repo \ + --reference-document /path/to/best_practices.md +``` + +```bash +cgr optimize java \ + --reference-document ./ARCHITECTURE.md +``` + +```bash +cgr optimize rust \ + --reference-document ./docs/performance_guide.md +``` + +The agent incorporates guidance from your reference documents when suggesting optimizations, ensuring they align with your project's standards and architectural decisions. + +## Using Specific Models + +```bash +cgr optimize javascript \ + --repo-path /path/to/frontend \ + --orchestrator google:gemini-2.0-flash-thinking-exp-01-21 +``` + +```bash +cgr optimize javascript --repo-path /path/to/frontend \ + --batch-size 5000 +``` + +## Supported Languages + +All supported languages: `python`, `javascript`, `typescript`, `rust`, `go`, `java`, `scala`, `cpp` + +## How It Works + +1. **Analysis Phase**: The agent analyzes your codebase structure using the knowledge graph +2. **Pattern Recognition**: Identifies common anti-patterns, performance issues, and improvement opportunities +3. **Best Practices Application**: Applies language-specific best practices and patterns +4. **Interactive Approval**: Presents each optimization suggestion for your approval before implementation +5. **Guided Implementation**: Implements approved changes with detailed explanations + +## Example Session + +``` +Starting python optimization session... +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ The agent will analyze your python codebase and propose specific ┃ +┃ optimizations. You'll be asked to approve each suggestion before ┃ +┃ implementation. Type 'exit' or 'quit' to end the session. ┃ +┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +Analyzing codebase structure... +Found 23 Python modules with potential optimizations + +Optimization Suggestion #1: + File: src/data_processor.py + Issue: Using list comprehension in a loop can be optimized + Suggestion: Replace with generator expression for memory efficiency + + [y/n] Do you approve this optimization? +``` + +## CLI Options + +| Option | Description | +|--------|-------------| +| `--orchestrator` | Specify provider:model for main operations | +| `--cypher` | Specify provider:model for graph queries | +| `--repo-path` | Path to repository (defaults to current directory) | +| `--batch-size` | Override Memgraph flush batch size | +| `--reference-document` | Path to reference documentation | diff --git a/docs/guide/graph-export.md b/docs/guide/graph-export.md new file mode 100644 index 000000000..814321dd0 --- /dev/null +++ b/docs/guide/graph-export.md @@ -0,0 +1,63 @@ +--- +description: "Export the Code-Graph-RAG knowledge graph to JSON for programmatic analysis and integration." +--- + +# Graph Export + +Export the entire knowledge graph to JSON for programmatic access and integration with other tools. + +## Export Commands + +**Export during graph update:** + +```bash +cgr start --repo-path /path/to/repo --update-graph --clean -o my_graph.json +``` + +**Export existing graph without updating:** + +```bash +cgr export -o my_graph.json +``` + +**Adjust Memgraph batching during export:** + +```bash +cgr export -o my_graph.json --batch-size 5000 +``` + +## Working with Exported Data + +```python +from codebase_rag.graph_loader import load_graph + +graph = load_graph("my_graph.json") + +summary = graph.summary() +print(f"Total nodes: {summary['total_nodes']}") +print(f"Total relationships: {summary['total_relationships']}") + +functions = graph.find_nodes_by_label("Function") +classes = graph.find_nodes_by_label("Class") + +for func in functions[:5]: + relationships = graph.get_relationships_for_node(func.node_id) + print(f"Function {func.properties['name']} has {len(relationships)} relationships") +``` + +## Example Analysis Script + +```bash +python examples/graph_export_example.py my_graph.json +``` + +## Use Cases + +Exported graph data is useful for: + +- Integration with other tools +- Custom analysis scripts +- Building documentation generators +- Creating code metrics dashboards + +See the [Python SDK](../sdk/overview.md) for more programmatic access patterns. diff --git a/docs/guide/interactive-querying.md b/docs/guide/interactive-querying.md new file mode 100644 index 000000000..5f3dd983b --- /dev/null +++ b/docs/guide/interactive-querying.md @@ -0,0 +1,89 @@ +--- +description: "Query your codebase with natural language using Code-Graph-RAG's interactive CLI." +--- + +# Interactive Querying + +Code-Graph-RAG lets you ask questions about your codebase in plain English. The system translates your questions into Cypher queries, executes them against the knowledge graph, and returns relevant results with source code snippets. + +## Starting the CLI + +```bash +cgr start --repo-path /path/to/your/repo +``` + +## Example Queries + +### Finding Code Elements + +- "Show me all classes that contain 'user' in their name" +- "Find functions related to database operations" +- "What methods does the User class have?" +- "Show me functions that handle authentication" +- "List all TypeScript components" +- "Find Rust structs and their methods" +- "Show me Go interfaces and implementations" + +### Analyzing Relationships + +- "Find all functions that call each other" +- "What classes are in the user module" +- "Show me functions with the longest call chains" +- "What functions call UserService.create_user?" +- "Show me all classes that implement the Repository interface" + +### C++ Specific Queries + +- "Find all C++ operator overloads in the Matrix class" +- "Show me C++ template functions with their specializations" +- "List all C++ namespaces and their contained classes" +- "Find C++ lambda expressions used in algorithms" + +### Code Editing Queries + +- "Add logging to all database connection functions" +- "Refactor the User class to use dependency injection" +- "Convert these Python functions to async/await pattern" +- "Add error handling to authentication methods" +- "Optimize this function for better performance" + +## Semantic Code Search + +Search for functions by describing what they do, rather than by exact names: + +- "error handling functions" +- "authentication code" +- "database connection setup" + +Semantic search uses UniXcoder embeddings and requires the `semantic` extra: + +```bash +pip install 'code-graph-rag[semantic]' +``` + +## Agentic Tools + +The interactive agent has access to these tools: + +| Tool | Description | +|------|-------------| +| `query_graph` | Query the knowledge graph using natural language | +| `read_file` | Read the content of text-based files | +| `create_file` | Create a new file with content | +| `replace_code` | Surgically replace specific code blocks | +| `list_directory` | List directory contents | +| `analyze_document` | Analyze documents (PDFs, images) | +| `execute_shell` | Execute shell commands from allowlist | +| `semantic_search` | Semantic function search by description | +| `get_function_source` | Retrieve source code by node ID | +| `get_code_snippet` | Retrieve source code by qualified name | + +## Intelligent File Editing + +The agent uses AST-based function targeting with Tree-sitter for precise code modifications: + +- **Visual diff preview** before changes +- **Surgical patching** that only modifies target code blocks +- **Multi-language support** across all supported languages +- **Security sandbox** preventing edits outside project directory +- **Smart function matching** with qualified names and line numbers diff --git a/docs/guide/mcp-server.md b/docs/guide/mcp-server.md new file mode 100644 index 000000000..96be4598a --- /dev/null +++ b/docs/guide/mcp-server.md @@ -0,0 +1,140 @@ +--- +description: "Integrate Code-Graph-RAG with Claude Code as an MCP server for natural language codebase analysis." +--- + +# MCP Server (Claude Code Integration) + +Code-Graph-RAG can run as an MCP (Model Context Protocol) server, enabling seamless integration with Claude Code and other MCP clients. + +## Quick Setup + +**If installed via pip** (and `code-graph-rag` is on your PATH): + +```bash +claude mcp add --transport stdio code-graph-rag \ + --env TARGET_REPO_PATH=/absolute/path/to/your/project \ + --env CYPHER_PROVIDER=openai \ + --env CYPHER_MODEL=gpt-4 \ + --env CYPHER_API_KEY=your-api-key \ + -- code-graph-rag mcp-server +``` + +**If installed from source:** + +```bash +claude mcp add --transport stdio code-graph-rag \ + --env TARGET_REPO_PATH=/absolute/path/to/your/project \ + --env CYPHER_PROVIDER=openai \ + --env CYPHER_MODEL=gpt-4 \ + --env CYPHER_API_KEY=your-api-key \ + -- uv run --directory /path/to/code-graph-rag code-graph-rag mcp-server +``` + +### Using Current Directory + +```bash +cd /path/to/your/project + +claude mcp add --transport stdio code-graph-rag \ + --env TARGET_REPO_PATH="$(pwd)" \ + --env CYPHER_PROVIDER=google \ + --env CYPHER_MODEL=gemini-2.0-flash \ + --env CYPHER_API_KEY=your-google-api-key \ + -- uv run --directory /absolute/path/to/code-graph-rag code-graph-rag mcp-server +``` + +## Prerequisites + +```bash +git clone https://github.com/vitali87/code-graph-rag.git +cd code-graph-rag +uv sync + +docker run -p 7687:7687 -p 7444:7444 memgraph/memgraph-platform +``` + +## Available Tools + +| Tool | Description | +|------|-------------| +| `list_projects` | List all indexed projects in the knowledge graph database | +| `delete_project` | Delete a specific project from the knowledge graph database | +| `wipe_database` | Completely wipe the entire database (cannot be undone) | +| `index_repository` | Parse and ingest the repository into the knowledge graph | +| `query_code_graph` | Query the codebase knowledge graph using natural language | +| `get_code_snippet` | Retrieve source code for a function, class, or method by qualified name | +| `surgical_replace_code` | Surgically replace an exact code block using diff-match-patch | +| `read_file` | Read file contents with pagination support | +| `write_file` | Write content to a file | +| `list_directory` | List directory contents | + +## Example Usage + +``` +> Index this repository +> What functions call UserService.create_user? +> Update the login function to add rate limiting +``` + +## LLM Provider Options + +=== "OpenAI" + + ```bash + --env CYPHER_PROVIDER=openai \ + --env CYPHER_MODEL=gpt-4 \ + --env CYPHER_API_KEY=sk-... + ``` + +=== "Google Gemini" + + ```bash + --env CYPHER_PROVIDER=google \ + --env CYPHER_MODEL=gemini-2.5-flash \ + --env CYPHER_API_KEY=... + ``` + +=== "Ollama (free, local)" + + ```bash + --env CYPHER_PROVIDER=ollama \ + --env CYPHER_MODEL=llama3.2 + ``` + +## Multi-Repository Setup + +Add separate named instances for different projects: + +```bash +claude mcp add --transport stdio code-graph-rag-backend \ + --env TARGET_REPO_PATH=/path/to/backend \ + --env CYPHER_PROVIDER=openai \ + --env CYPHER_MODEL=gpt-4 \ + --env CYPHER_API_KEY=your-api-key \ + -- uv run --directory /path/to/code-graph-rag code-graph-rag mcp-server + +claude mcp add --transport stdio code-graph-rag-frontend \ + --env TARGET_REPO_PATH=/path/to/frontend \ + --env CYPHER_PROVIDER=openai \ + --env CYPHER_MODEL=gpt-4 \ + --env CYPHER_API_KEY=your-api-key \ + -- uv run --directory /path/to/code-graph-rag code-graph-rag mcp-server +``` + +!!! warning + Only one repository can be indexed at a time per MCP instance. When you index a new repository, the previous repository's data is automatically cleared. + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| Can't find uv/code-graph-rag | Use absolute paths from `which uv` | +| Wrong repository analyzed | Set `TARGET_REPO_PATH` to an absolute path | +| Memgraph connection failed | Ensure `docker ps` shows Memgraph running | +| Tools not showing | Run `claude mcp list` to verify installation | + +## Remove + +```bash +claude mcp remove code-graph-rag +``` diff --git a/docs/guide/realtime-updates.md b/docs/guide/realtime-updates.md new file mode 100644 index 000000000..9516eea31 --- /dev/null +++ b/docs/guide/realtime-updates.md @@ -0,0 +1,62 @@ +--- +description: "Keep your Code-Graph-RAG knowledge graph synchronized with code changes using the real-time file watcher." +--- + +# Real-Time Graph Updates + +For active development, keep your knowledge graph automatically synchronized with code changes using the real-time updater. + +## What It Does + +- Watches your repository for file changes (create, modify, delete) +- Automatically updates the knowledge graph in real-time +- Maintains consistency by recalculating all function call relationships +- Filters out irrelevant files (`.git`, `node_modules`, etc.) + +## Usage + +Run the real-time updater in a separate terminal: + +```bash +python realtime_updater.py /path/to/your/repo +``` + +Or using the Makefile: + +```bash +make watch REPO_PATH=/path/to/your/repo +``` + +### With Custom Memgraph Settings + +```bash +python realtime_updater.py /path/to/your/repo \ + --host localhost --port 7687 --batch-size 1000 +``` + +```bash +make watch REPO_PATH=/path/to/your/repo HOST=localhost PORT=7687 BATCH_SIZE=1000 +``` + +## Multi-Terminal Workflow + +```bash +# Terminal 1: Start the real-time updater +python realtime_updater.py ~/my-project + +# Terminal 2: Run the AI assistant +cgr start --repo-path ~/my-project +``` + +## CLI Arguments + +| Argument | Required | Default | Description | +|----------|----------|---------|-------------| +| `repo_path` | Yes | | Path to repository to watch | +| `--host` | No | `localhost` | Memgraph host | +| `--port` | No | `7687` | Memgraph port | +| `--batch-size` | No | | Number of buffered nodes/relationships before flushing to Memgraph | + +## Performance Note + +The updater currently recalculates all CALLS relationships on every file change to ensure consistency. This prevents "island" problems where changes in one file aren't reflected in relationships from other files, but may impact performance on very large codebases with frequent changes. Optimization of this behavior is a work in progress. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..c62861c38 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,48 @@ +--- +description: "Graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs, and enables natural language querying, editing, and optimization." +--- + +# Code-Graph-RAG + +**The ultimate RAG for your monorepo.** Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs. + +

+ Code-Graph-RAG Demo +

+ +## What is Code-Graph-RAG? + +Code-Graph-RAG is an accurate Retrieval-Augmented Generation (RAG) system that analyzes multi-language codebases using Tree-sitter, builds comprehensive knowledge graphs in Memgraph, and enables natural language querying of codebase structure and relationships as well as editing capabilities. + +## Key Features + +- **Multi-Language Support** for Python, TypeScript, JavaScript, Rust, Java, C++, Go, Lua, and more +- **Tree-sitter Parsing** for robust, language-agnostic AST analysis +- **Knowledge Graph Storage** using Memgraph for interconnected codebase structure +- **Natural Language Querying** to ask questions about your code in plain English +- **AI-Powered Cypher Generation** with Google Gemini, OpenAI, and Ollama support +- **Code Snippet Retrieval** with actual source code for found functions and methods +- **Advanced File Editing** with AST-based function targeting and visual diff previews +- **Shell Command Execution** for running tests and CLI tools +- **Interactive Code Optimization** with language-specific best practices +- **Reference-Guided Optimization** using your own coding standards +- **Dependency Analysis** from `pyproject.toml` +- **Semantic Code Search** using UniXcoder embeddings to find functions by intent +- **MCP Server Integration** for seamless use with Claude Code +- **Real-Time Graph Updates** via file watcher for active development + +## Quick Start + +```bash +pip install code-graph-rag +docker compose up -d +cgr start --repo-path ./my-project --update-graph --clean +``` + +See the [Installation](getting-started/installation.md) guide for full setup instructions. + +## Enterprise Services + +Code-Graph-RAG is open source and free to use. For organizations that need more, we offer **fully managed cloud-hosted solutions** and **on-premise deployments**. + +[View plans & pricing at code-graph-rag.com](https://code-graph-rag.com/enterprise){ .md-button } diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 000000000..528edb714 --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,30 @@ +{% extends "base.html" %} + +{% block extrahead %} + +{% endblock %} diff --git a/docs/reports/BENCHMARK_REPORT.md b/docs/reports/BENCHMARK_REPORT.md new file mode 100644 index 000000000..d96875e01 --- /dev/null +++ b/docs/reports/BENCHMARK_REPORT.md @@ -0,0 +1,199 @@ +# Benchmark Report: Measured vs Projected Performance + +## Methodology + +All benchmarks ran on macOS (Darwin 25.3.0), Python 3.12, using `uv run`. Each benchmark used: +- 3 warmup runs (discarded) +- 20 to 100 measured iterations (depending on benchmark) +- Statistical measures: median, mean, stddev, min, max, p95 +- Realistic data sizes matching the profiled workload (352 files, ~4,500 registry entries) + +Benchmark scripts are in `benchmarks/`. Run all with `uv run python benchmarks/run_all.py`. + +--- + +## FINDING 1: `find_ending_with` Linear Scan (48.3% of CPU) + +**The single biggest performance win available, requiring zero dependencies.** + +The `FunctionRegistryTrie.find_ending_with()` method falls back to a linear scan of all entries when the `_simple_name_lookup` index misses (80.7% miss rate per profiling data). + +### Measured Results + +| Scenario | Registry Size | Queries | Linear Scan (ms) | Full Suffix Index (ms) | Speedup | +|---|---|---|---|---|---| +| Batch lookup | 1,000 | 38 | 1.77 | 0.007 | **261x** | +| Batch lookup | 4,500 | 38 | 8.04 | 0.023 | **356x** | +| Batch lookup | 10,000 | 38 | 17.78 | 0.046 | **382x** | +| Single lookup | 4,500 | 1 | 0.22 | 0.001 | **178x** | + +### Projected vs Measured + +The integration feasibility report projected ~1.9x total speedup (saving 13.5s of 31.2s). Our benchmarks show that building a complete suffix index provides **178x to 382x speedup** on the specific operation, validating the projection and suggesting the total improvement could be even larger than estimated. + +### Fix + +Build a complete suffix index in `FunctionRegistryTrie` by populating `_simple_name_lookup` for every insert, and ensure all insertion code paths (including `__setitem__`) update the index. This eliminates the linear scan fallback entirely. + +--- + +## FINDING 2: pathlib vs String Operations (13.7% of CPU) + +**The `should_skip_path` function uses `pathlib.Path.relative_to()` which creates intermediate objects on every call.** + +### Measured Results + +| Operation | pathlib (ms) | String ops (ms) | Speedup | +|---|---|---|---| +| `relative_to` vs `removeprefix` (5,000 paths) | 61.3 | 0.097 | **634x** | +| `relative_to` vs `removeprefix` (20,000 paths) | 253.0 | 0.394 | **643x** | +| Full `should_skip_path` (5,000 paths) | 69.3 | 1.55 | **45x** | +| Full `should_skip_path` (20,000 paths) | 285.9 | 6.21 | **46x** | +| `Path.suffix` vs `str.rfind` (5,000 paths) | 6.97 | 0.278 | **25x** | +| `Path.name` vs `str.rfind+slice` (5,000 paths) | 6.37 | 0.360 | **18x** | + +### Projected vs Measured + +The integration report projected 4.0s savings (13.7% of 31.2s total). Our benchmarks show `pathlib.relative_to` is 634x slower than `str.removeprefix`, and the full `should_skip_path` function is 45x slower with pathlib. These numbers validate the projection: for 59,012 calls at ~57us/call (pathlib), the total is ~3.4s, matching the profiled 3.39s. + +### Fix + +Convert paths to strings at the boundary of `should_skip_path` and use `str.removeprefix()`, `str.split("/")`, and `set` membership testing instead of `Path.relative_to()` and `Path.parts`. + +--- + +## FINDING 3: orjson vs stdlib json (JSON Serialization) + +**orjson provides massive speedups on serialization with zero integration overhead.** + +### Measured Results + +| Operation | Data Size | json (ms) | orjson (ms) | Speedup | +|---|---|---|---|---| +| dumps compact | 372 KB | 1.16 | 0.21 | **5.5x** | +| dumps compact | 1.9 MB | 5.73 | 1.01 | **5.7x** | +| dumps compact | 8.5 MB | 26.6 | 4.91 | **5.4x** | +| dumps indented | 372 KB | 9.70 | 0.39 | **24.7x** | +| dumps indented | 1.9 MB | 48.5 | 2.02 | **24.0x** | +| dumps indented | 8.5 MB | 216.9 | 8.58 | **25.3x** | +| loads | 372 KB | 1.26 | 0.62 | **2.0x** | +| loads | 1.9 MB | 6.23 | 3.24 | **1.9x** | +| loads | 8.5 MB | 30.1 | 16.6 | **1.8x** | + +### Projected vs Measured + +The language recommendations projected 5x to 15x. Our measured results show: +- **Compact serialization: 5.4x to 5.7x** (within projected range) +- **Indented serialization: 24x to 25x** (exceeds projected range significantly) +- **Deserialization: 1.8x to 2.0x** (below projected range) + +The indented serialization speedup is particularly relevant because `_write_graph_json` uses `json.dump(data, f, indent=2)` (the slowest path). For a 20K node graph, this drops from 217ms to 8.6ms. + +--- + +## FINDING 4: BLAKE3 vs SHA256 Hashing (NEGATIVE RESULT) + +**BLAKE3 is slower than hashlib.sha256 for this workload. The recommendation is invalidated.** + +### Measured Results + +| Operation | SHA256 (ms) | BLAKE3 (ms) | Speedup | +|---|---|---|---| +| 500 snippet hashes | 0.155 | 0.325 | **0.5x (slower)** | +| 2,000 snippet hashes | 0.594 | 1.177 | **0.5x (slower)** | +| 10,000 snippet hashes | 2.988 | 6.131 | **0.5x (slower)** | +| 50 file hashes (5KB avg) | 0.968 | 1.031 | **0.9x (slower)** | +| 200 file hashes (10KB avg) | 4.419 | 4.964 | **0.9x (slower)** | +| 500 file hashes (20KB avg) | 14.164 | 15.883 | **0.9x (slower)** | + +### Analysis + +The language recommendations projected 4x to 10x speedup. Our benchmarks show BLAKE3 is actually **0.5x to 0.9x** (slower) for this workload. This is because: + +1. **hashlib.sha256 is already C-backed** (OpenSSL). The baseline is not pure Python. +2. **BLAKE3's SIMD advantages require large contiguous buffers.** Code snippets average 200 bytes; file chunks are 5-20KB. BLAKE3's parallelism does not engage at these sizes. +3. **FFI overhead dominates.** The `blake3` Python package adds per-call FFI overhead that exceeds the algorithmic savings for small inputs. + +**Verdict: Do not adopt BLAKE3.** The recommendation was based on algorithmic benchmarks, not Python binding benchmarks. + +--- + +## FINDING 5: FunctionRegistryTrie Baseline Performance + +### Measured Results (Existing Python Implementation) + +| Operation | 1K entries | 5K entries | 10K entries | 50K entries | +|---|---|---|---|---| +| insert (ms) | 0.33 | 1.76 | 3.74 | 18.1 | +| lookup (ms) | 0.04 | 0.19 | 0.41 | 2.06 | +| find_ending_with (ms) | 0.004 | 0.018 | 0.046 | 0.47 | +| find_with_prefix (ms) | 0.39 | 2.18 | 4.18 | 39.9 | +| delete 25% (ms) | 0.42 | 2.10 | 4.20 | 22.2 | + +### Analysis + +The trie operations are already fast when the index is hit (O(1) via `_simple_name_lookup`). The Rust trie rewrite (projected 3x to 8x) would save microseconds per operation. The integration feasibility report correctly identified that a standalone Rust trie provides only 1.5x to 3x net gain after FFI overhead. The **pure Python fix (Finding 1) provides 178x to 382x speedup** on the actual bottleneck, making the Rust rewrite unnecessary. + +--- + +## FINDING 6: GraphLoader JSON Parse + Index Build + +### Measured Results + +| Graph Size | JSON Parse Only (ms) | GraphLoader.load (ms) | Index Build Overhead | +|---|---|---|---| +| 1K nodes, 2K rels | 1.03 | 2.10 | 2.0x | +| 5K nodes, 10K rels | 5.15 | 10.6 | 2.1x | +| 20K nodes, 50K rels | 24.2 | 64.2 | 2.7x | + +### Analysis + +GraphLoader.load() is 2x to 2.7x slower than raw JSON parsing due to index construction (node-by-id, node-by-label, outgoing/incoming relationship indexes). With orjson, the JSON parse portion would drop from 24.2ms to ~13.4ms (1.8x), but index construction would remain unchanged. Net improvement for 20K nodes: 64.2ms to ~53ms (1.2x). The index construction is pure Python dict/list operations. + +--- + +## FINDING 7: File Hashing Comparison + +### Measured Results + +| Algorithm | 50 files (5KB) | 200 files (10KB) | 500 files (20KB) | +|---|---|---|---| +| SHA256 (8KB buffer) | 0.98ms | 4.43ms | 14.3ms | +| SHA256 (64KB buffer) | 1.05ms | 4.61ms | 14.9ms | +| SHA256 (mmap) | 1.30ms | 5.76ms | 17.4ms | +| MD5 | 1.22ms | 6.44ms | 24.7ms | +| BLAKE2b | 1.04ms | 5.17ms | 17.5ms | + +### Analysis + +SHA256 with 8KB buffer is already the fastest option. Larger buffers and mmap add overhead for these file sizes. MD5 is slower (no hardware acceleration on this platform). File hashing consumes <0.5% of total runtime. No optimization needed. + +--- + +## Summary: Validated vs Invalidated Recommendations + +| Recommendation | Language Report Projection | Measured Result | Verdict | +|---|---|---|---| +| Fix `find_ending_with` index | ~1.9x total speedup | **261x to 382x** on the operation | **VALIDATED (exceeds projection)** | +| Replace pathlib with strings | ~1.15x total speedup | **45x to 643x** on path ops | **VALIDATED (exceeds projection)** | +| orjson for JSON | 5x to 15x on JSON ops | **1.8x to 25x** depending on operation | **VALIDATED** | +| BLAKE3 for hashing | 4x to 10x speedup | **0.5x (slower)** | **INVALIDATED** | +| neo4j-rust-ext | 3x to 10x on DB ops | N/A (wrong driver) | **INVALIDATED** (uses Memgraph/pymgclient) | +| Rust AST extension | 10x to 16x on parsing | Not benchmarked (3.1% of CPU) | **DEPRIORITIZED** (targets 3.1% of runtime) | +| Rust trie | 3x to 8x on lookups | 1.5x to 3x net (per feasibility) | **SUPERSEDED** by Python index fix | + +## Revised Priority Order (Measured) + +| Priority | Fix | Type | Measured Speedup | Effort | +|---|---|---|---|---| +| **1** | Fix `find_ending_with` suffix index | Python bugfix | 261x to 382x on operation (~1.9x total) | Low | +| **2** | Replace pathlib with string ops | Python refactor | 45x to 643x on path ops (~1.15x total) | Low | +| **3** | Cache type inference results | Python memoization | Not benchmarked (projected ~1.07x total) | Low | +| **4** | Suppress debug logging | Config change | Not benchmarked (projected ~1.06x total) | Trivial | +| **5** | Deduplicate FS traversal | Python refactor | Not benchmarked (projected ~1.05x total) | Low | +| **6** | orjson for JSON | Dependency swap | 5.4x to 25x on JSON ops | Trivial | +| **7** | Rust AST extension | Rust crate | Targets 3.1% of CPU; ~1.03x total after Python fixes | High | + +**Combined estimated speedup from priorities 1 through 6: ~3.7x, with zero language rewrites.** + +The Rust AST extension (previously the headline recommendation at "10x to 16x") targets only 3.1% of actual CPU time and provides ~1.03x total improvement after the pure Python fixes are applied. It should only be considered for repositories significantly larger than the current benchmark workload. diff --git a/docs/reports/INTEGRATION_FEASIBILITY.md b/docs/reports/INTEGRATION_FEASIBILITY.md new file mode 100644 index 000000000..b65a9da31 --- /dev/null +++ b/docs/reports/INTEGRATION_FEASIBILITY.md @@ -0,0 +1,392 @@ +# Integration Feasibility Report + +## Build System and Deployment Context + +**Package manager:** `uv` (Astral), defined in `pyproject.toml` with `uv.lock` +**Build backend:** setuptools (via `[tool.setuptools]`), three packages: `codebase_rag`, `codec`, `cgr` +**Distribution:** PyPI wheel, Docker image (`python:3.12-slim`), PyInstaller binary +**CI/CD:** Pre-commit hooks (ruff, ty, bandit), Makefile targets +**Python version:** 3.12+ required +**Key native dependency:** `pymgclient` (compiled from source with `--no-binary-package`) + +--- + +## Candidate 1: orjson (Drop-in JSON Replacement) + +### Integration Strategy +Drop-in dependency swap. Replace `import json` with `import orjson` in graph_loader.py, graph_updater.py, services/graph_service.py, embedder.py, stdlib_extractor.py. + +### Integration Overhead +- **Serialization boundary:** Zero. orjson is a direct Python C extension. No FFI marshalling. +- **API difference:** `orjson.dumps()` returns `bytes` not `str`. Every `json.dumps()` call site that feeds the result to something expecting `str` needs `.decode()`. In this codebase, the `_write_graph_json` function in `main.py` uses `json.dump(graph_data, f, indent=2, ensure_ascii=False)` which would need adjustment since orjson's `OPT_INDENT_2` flag replaces the `indent` parameter. +- **Protobuf service:** `services/protobuf_service.py` does not use JSON. No impact. +- **Hash cache I/O:** `_save_hash_cache` and `_load_hash_cache` use `json.dump/load` with file objects. orjson does not support file-object streaming; need to call `orjson.dumps()` then `f.write()`. +- **Embedding cache:** Same pattern. `EmbeddingCache.save()` uses `json.dump(self._cache, f)`. Requires manual write of bytes. +- **Build system change:** Add `orjson>=3.10.0` to `[project.dependencies]`. orjson publishes pre-built wheels for all platforms. No toolchain change. +- **Docker impact:** Zero. orjson wheels are self-contained. +- **PyInstaller impact:** Add `--hidden-import orjson`. orjson is a single .so/.pyd file, minimal size increase. + +### Net Projected Gain +- **Raw gain:** 5x to 15x on JSON operations +- **Integration overhead:** Near zero. ~10 call sites need minor API adjustments (bytes vs str, file.write vs json.dump). +- **Net gain:** 5x to 15x on JSON operations. No overhead erosion. +- **Risk:** Very low. Widely adopted library (polars, FastAPI, etc.) + +--- + +## Candidate 2: neo4j-rust-ext (NOT APPLICABLE) + +### Integration Strategy +NOT APPLICABLE. This codebase uses **Memgraph** via `pymgclient` (mgclient C library), NOT the Neo4j Python driver. The `neo4j-rust-ext` package patches the `neo4j` Python driver's PackStream implementation. It has zero effect on `pymgclient`. + +### Assessment +- `services/graph_service.py` imports `mgclient`, connects to Memgraph, and uses the mgclient C API directly. +- There is no `neo4j` dependency in `pyproject.toml`. +- The language researcher's recommendation was based on an incorrect assumption about the database driver. + +### Alternative for Memgraph Driver +- pymgclient is already a C extension wrapping Memgraph's C client library. It is already compiled code. +- The actual overhead is in Python-side batch construction (building `list[RelBatchRow]` and `list[NodeBatchRow]` dicts), Cypher query string formatting, and result deserialization in `_cursor_to_results`. +- The `_cursor_to_results` method iterates cursor results and builds `list[ResultRow]` via `dict(zip(column_names, row))`. This is pure Python overhead. +- Potential optimization: Use cursor iteration in C rather than Python, but this requires pymgclient changes, not neo4j-rust-ext. + +### Net Projected Gain +- **Net gain:** 0x. This recommendation is inapplicable. + +--- + +## Candidate 3: BLAKE3 (Embedding Cache Hashing) + +### Integration Strategy +Drop-in hash function replacement in `EmbeddingCache._content_hash()` and `_hash_file()` in `graph_updater.py`. + +### Integration Overhead +- **Serialization boundary:** Zero. blake3 Python package is a C extension. +- **API change:** `hashlib.sha256(content.encode()).hexdigest()` becomes `blake3.blake3(content.encode()).hexdigest()`. One-line change per call site. +- **Cache invalidation:** Existing embedding caches (`.qdrant_code_embeddings/embedding_cache.json`) and file hash caches (`.file_hashes.json`) will be invalidated because hash values change. This forces a full re-index on first run after the change. +- **Build system change:** Add `blake3>=1.0.0` to dependencies. blake3 publishes pre-built wheels. +- **Docker/PyInstaller:** Minimal impact. blake3 is a small native extension. + +### Net Projected Gain +- **Raw gain:** 4x to 10x on hashing operations +- **Practical impact:** Hashing is NOT the bottleneck. `_hash_file` reads 8KB chunks and hashes them. For a typical codebase (1000 files, avg 5KB), total hashing takes ~5ms (already fast because hashlib SHA256 is C-backed). The real I/O cost is the filesystem reads, not the hash computation. +- **Embedding cache hashing:** Similarly marginal. `_content_hash` hashes short code snippets. Each call takes microseconds. +- **Cache invalidation cost:** Forces a full re-indexing pass (potentially minutes for large repos), creating a one-time negative impact that dwarfs the per-operation savings. +- **Net gain:** Negligible in practice. The 4x to 10x improvement applies to an operation that takes microseconds per call. +- **Recommendation:** Skip unless profiling proves hashing is >5% of total wall clock time. + +--- + +## Candidate 4: Rust AST Processing Extension (via PyO3/maturin) + +### Integration Strategy +Build a Rust extension crate (e.g., `codebase-rag-core`) that accepts file bytes + language enum and returns structured extraction results. Use PyO3 for Python bindings and maturin for building. + +### Integration Overhead Assessment + +**Data crossing the FFI boundary:** +- **Input:** File bytes (`bytes`) and language enum (`str`). Minimal copy cost. PyO3 provides zero-copy access to Python bytes via `&[u8]`. +- **Output:** The Rust extension must return complex structured data to Python: + - Function definitions: list of (qualified_name, name, start_line, end_line, decorators, docstring) + - Class definitions: list of (qualified_name, name, parent_classes, methods) + - Call relationships: list of (caller_qn, callee_qn, caller_type, callee_type) + - Import mappings: dict of (module_qn -> dict of (local_name -> imported_qn)) + + Each of these requires constructing Python objects from Rust data. For a file with 50 functions and 200 call sites, this means ~250 Python dict/tuple creations on the return path. + +**Boundary crossing cost estimate:** +- PyO3 object creation: ~100ns per Python object (dict, str, list element) +- For a typical large file (50 functions, 100 calls, 20 imports): ~170 result objects * 5 fields each = ~850 Python object creations = ~85 microseconds +- Per-file processing time in Python currently: ~5-50ms (depends on file size) +- **FFI boundary cost as fraction of saved time: <1%**. This is excellent. + +**Coupling analysis:** + +The Rust extension needs to replicate or subsume: +1. `definition_processor.py` (7.5KB): Function/class/method extraction from AST +2. `call_processor.py` (13.7KB): Call relationship extraction +3. `call_resolver.py` (24.4KB): Call resolution with trie lookups, inheritance chains, import maps +4. `import_processor.py` (40KB): Language-specific import parsing (Python, JS/TS, Java, Rust, Go, C++, Lua) +5. `function_ingest.py` (16.4KB): Function registration and qualified name resolution +6. `type_inference.py` (5.8KB) + language-specific engines: Type inference for call resolution +7. `FunctionRegistryTrie` in `graph_updater.py`: Trie data structure + +Total: ~110KB of Python code with complex multi-language logic spanning 8+ languages. + +**Build system changes:** +- Add `maturin` as build dependency +- Add a `Cargo.toml` at project root or in a subdirectory (e.g., `rust/`) +- Add `tree-sitter` and language grammar crates as Rust dependencies +- Modify `pyproject.toml` to include maturin build configuration or create a separate wheel +- CI needs Rust toolchain (rustup) installed +- Docker builder stage needs Rust toolchain (~300MB image layer increase) +- PyInstaller needs to collect the compiled .so/.pyd from the Rust extension + +**Compatibility concerns:** +- Tree-sitter versions must match between Rust and Python. The codebase uses `tree-sitter==0.25.2`. The Rust `tree-sitter` crate version must be compatible. +- The Rust extension must handle all 9 supported languages with language-specific AST patterns. +- The `IngestorProtocol` interface (ensure_node_batch, ensure_relationship_batch) is called from within the processing loop. Either the Rust extension calls back into Python (expensive, defeats the purpose) OR the Rust extension accumulates all results and returns them in bulk (preferred). + +**Critical: tree-sitter Node FFI constraint (from adversarial review):** +- Tree-sitter `Node` objects are C-level pointers that cannot be marshalled across FFI boundaries. The call resolution pipeline operates on `Node` objects thousands of times per file. +- This rules out an incremental approach (e.g., rewriting just CallResolver in Rust while keeping Python tree-sitter nodes). The Rust extension must parse files from scratch using the `tree-sitter` Rust crate directly, producing Rust-native `Node` references. +- Consequence: the Rust extension is an all-or-nothing replacement of the entire parse-extract-resolve pipeline. Incremental migration is not feasible. This increases both effort and risk. + +**Deployment complexity:** +- Requires publishing platform-specific wheels (linux-x86_64, linux-aarch64, macos-x86_64, macos-arm64, windows-x64) +- maturin handles this via GitHub Actions + `maturin[zig]` for cross-compilation +- Users without pre-built wheels need a Rust toolchain to install from source +- The Docker image build becomes significantly more complex (multi-stage with Rust) + +### Net Projected Gain +- **Raw gain:** 10x to 16x on AST processing (the primary CPU hotspot) +- **FFI boundary overhead:** <1% (excellent input/output ratio: bytes in, structured results out) +- **Build system overhead:** Significant one-time cost. Ongoing CI cost of ~2-3 min for Rust compilation per release. +- **Development effort:** High. ~110KB of Python code to rewrite in Rust, with complex multi-language pattern matching. +- **Net gain:** 9x to 15x on AST processing operations, assuming bulk return pattern. +- **Risk:** Medium-high. Large surface area, 8+ language parsers, tight coupling with existing Python data structures. +- **Recommendation:** High value but should be incremental. Start with a single language (Python parser) as proof of concept, measure actual gains, then expand. + +--- + +## Candidate 5: Rust FunctionRegistryTrie (via PyO3) + +### Integration Strategy +Expose a Rust-backed trie as a Python class via PyO3, bundled in the same crate as Candidate 4. + +### Integration Overhead Assessment + +**Data crossing the FFI boundary:** +- **Insert:** Python str -> Rust &str (zero-copy via PyO3), Rust stores owned copy. Cost: one string allocation per insert. +- **Lookup (`__contains__`, `get`):** Python str -> Rust &str (zero-copy), returns bool or Python str. Cost: near zero per lookup. +- **Batch operations (`find_ending_with`, `find_with_prefix`):** Returns list of Python strings. For a query returning 50 matches, this means 50 Python string allocations. + +**Boundary crossing cost estimate:** +- Single lookup: ~50ns (vs ~200ns in Python dict) +- `find_ending_with` returning 10 results: ~1us (vs ~50us scanning Python dict) +- The trie has hot-path usage in `call_resolver.py` where every call expression triggers 2-5 trie lookups. + +**Coupling with Candidate 4:** +- If AST processing moves to Rust (Candidate 4), the trie must also be in Rust to avoid crossing back to Python for every lookup during call resolution. +- If Candidate 4 is NOT done, the Rust trie is still useful standalone, but the benefit is reduced because the Python call resolution code still creates Python strings for every lookup key. + +**Build system changes:** +- Bundled with Candidate 4. No additional build complexity. + +### Net Projected Gain +- **Raw gain:** 3x to 8x on trie operations +- **Standalone net gain (without Candidate 4):** 1.5x to 3x. Python call resolution code still creates string objects for lookup keys. FFI crossing happens per-lookup. +- **Combined net gain (with Candidate 4):** 3x to 8x. All trie operations happen in Rust with no FFI boundary during resolution. +- **Recommendation:** Only implement together with Candidate 4. Standalone, the integration overhead cuts the gains roughly in half. + +--- + +## Candidate 6: File Processing Parallelism (Python) + +### Integration Strategy +Use `concurrent.futures.ProcessPoolExecutor` to parallelize per-file processing in `GraphUpdater._process_files()`. + +### Integration Overhead Assessment + +**Serialization at boundary:** +- Each worker process needs: file path (Path, serializable), language queries (NOT serializable: contains tree-sitter Parser, Query, Language objects which are C pointers). +- **Critical problem:** `LanguageQueries` contains `Parser`, `Query`, and `Language` objects from tree-sitter, which are C-level objects that cannot be serialized across process boundaries. +- Each worker would need to call `load_parsers()` independently, loading all language grammars (~50ms startup cost per worker). +- Results (function definitions, call relationships) are Python dicts/tuples that serialize easily. + +**State synchronization:** +- `FunctionRegistryTrie` is shared mutable state. Workers write to it during function registration, and readers need it during call resolution. +- With multiprocessing, each worker would have its own trie. Merging tries after parallel processing adds complexity. +- `import_mapping` in `ImportProcessor` is similarly shared mutable state. +- The three-pass architecture (structure -> definitions -> calls) has inherent sequential dependencies: pass 3 needs results from pass 2. + +**GIL considerations:** +- `threading.Thread` would not help because call resolution is CPU-bound Python code held by the GIL. +- `ProcessPoolExecutor` bypasses GIL but introduces serialization overhead. +- Estimated per-file serialization overhead for results: ~0.1ms per file. +- For 1000 files on 4 cores: ~25ms total serialization overhead vs ~5000ms saved. + +### Net Projected Gain +- **Raw gain:** 2x to 4x (limited by sequential passes and Amdahl's law) +- **Serialization overhead:** ~5ms for 1000 files (minimal) +- **Worker initialization overhead:** ~50ms per worker (grammar loading), amortized across files +- **Architecture complexity:** High. Requires restructuring the three-pass processing pipeline, managing shared state (trie, import maps), and handling errors across processes. +- **Net gain:** 1.5x to 3x after accounting for sequential bottlenecks (pass dependencies) +- **Recommendation:** Medium priority. Worth doing after Candidate 4 (Rust extension) is evaluated. If Candidate 4 makes per-file processing fast enough, parallelism becomes less critical. + +--- + +## Candidate 7: String Processing in Call Resolution (Rust) + +### Integration Strategy +Bundled with Candidate 4. Call resolution logic moves into the Rust AST processing extension. + +### Integration Overhead +- **Standalone:** NOT recommended. Call resolution is deeply interleaved with trie lookups, import map lookups, and AST node access. Extracting just the string processing would require marshalling all context (import maps, trie state, class inheritance) across FFI on every call. +- **Bundled with Candidate 4:** Zero additional FFI overhead. The Rust extension performs call resolution as part of the same processing pass. + +### Net Projected Gain +- **Standalone net gain:** Negative. The overhead of passing import maps and trie state across FFI for each call resolution would exceed the savings from faster string processing. +- **Bundled net gain:** 5x to 10x (absorbed into Candidate 4's gains) +- **Recommendation:** Only implement as part of Candidate 4. + +--- + +## Summary: Feasibility Verdicts + +| Candidate | Strategy | FFI Overhead | Build Impact | Net Gain | Verdict | +|---|---|---|---|---|---| +| 1. orjson | Dependency swap | None | Trivial | 5x-15x on JSON | **PROCEED** | +| 2. neo4j-rust-ext | N/A | N/A | N/A | 0x (wrong driver) | **REJECT** | +| 3. BLAKE3 hashing | Dependency swap | None | Trivial | Negligible | **SKIP** (not a bottleneck) | +| 4. Rust AST extension | PyO3/maturin crate | <1% | Significant | 9x-15x on AST | **PROCEED** (incremental) | +| 5. Rust trie | PyO3 (bundled #4) | ~50% standalone | Bundled with #4 | 1.5x-3x standalone, 3x-8x bundled | **BUNDLE with #4** | +| 6. File parallelism | ProcessPoolExecutor | ~5ms/1000 files | Moderate refactor | 1.5x-3x | **DEFER** (after #4) | +| 7. String processing | Rust (bundled #4) | Negative standalone | Bundled with #4 | Negative standalone, 5x-10x bundled | **BUNDLE with #4** | + +## Key Finding: Integration Overhead Negation Analysis + +The critical insight is that **Candidates 5 and 7 have negative net gains if implemented standalone** because the FFI boundary crossing cost exceeds the per-operation savings. They are only viable when bundled with Candidate 4, which keeps all related operations on the Rust side of the boundary. + +This validates the principle: **a function 10x faster but with 8x overhead at the boundary is only 1.25x improvement.** For Candidates 5 and 7, the standalone case is even worse because the boundary must be crossed per-lookup (thousands of times per file) rather than per-file. + +**Candidate 2 is completely inapplicable** due to incorrect driver assumption. + +**Candidate 3 optimizes a non-bottleneck** (microsecond-level operations). + +The only candidates with clear positive ROI accounting for integration overhead are: +1. **orjson** (zero overhead, significant JSON gains) +2. **Rust AST extension** (minimal overhead due to bytes-in/results-out architecture, massive CPU gains) + +--- + +## ADDENDUM: Revised Analysis Based on CPU Profiling Data + +The CPU profiling report (cProfile, 31.2s total, 179M function calls on 352 Python files) **dramatically changes the priority landscape.** The actual hotspots are fundamentally different from those assumed in the language recommendations. + +### Profiling Reality vs. Language Researcher Assumptions + +| Rank | Actual Hotspot | % CPU | Language Researcher Assumption | +|------|---------------|-------|-------------------------------| +| 1 | `find_ending_with` linear scan | 48.3% | Assumed trie was working; recommended Rust trie for data layout improvement | +| 2 | `should_skip_path` pathlib overhead | 13.7% | Not identified as a hotspot | +| 3 | `build_local_variable_type_map` (uncached AST retraversal) | 8.3% | Assumed this was part of general AST processing | +| 4 | Loguru debug logging overhead | 5.9% | Not identified | +| 5 | `identify_structure` (duplicate FS traversal) | 5.0% | Not identified | +| 6 | tree-sitter `QueryCursor.captures` | 2.5% | Assumed this was the primary bottleneck (10x-16x claim) | +| 7 | tree-sitter `Parser.parse` | 0.6% | Assumed this was the primary bottleneck | + +**Tree-sitter operations total 3.1% of CPU time.** The language researcher's Hotspot 1 ("AST Parsing and Traversal, 10x-16x via Rust") targeted an operation that consumes only 3.1% of runtime. A 16x speedup on 3.1% of runtime yields 1.03x total speedup (Amdahl's law). The projected 10x-16x headline number is misleading. + +### Revised Candidate Assessments + +#### NEW CANDIDATE A: Fix `find_ending_with` Linear Scan (Pure Python Fix) + +**Integration strategy:** Pure Python algorithmic fix. No FFI, no new dependencies. + +**Root cause:** `_simple_name_lookup` index has an 80.7% miss rate (22,096 of 27,376 calls). On miss, the code falls back to `[qn for qn in self._entries.keys() if qn.endswith(f".{suffix}")]`, scanning all ~4,500 entries per call. This generates 123.7M `str.endswith()` invocations. + +**Fix options:** +1. **Populate `_simple_name_lookup` more aggressively:** The index only contains entries added via `FunctionRegistryTrie.insert()` which populates `self._simple_name_lookup` via the passed-in reference. The 80.7% miss rate suggests many qualified names are inserted through code paths that bypass the simple name index population. Audit all insertion paths. +2. **Build a suffix index:** Create a `dict[str, set[QualifiedName]]` mapping the last dot-separated segment of every qualified name to its full name. This converts O(n) scans to O(1) lookups. +3. **Cache negative results:** If a suffix has been scanned and yielded no results, cache that fact to avoid re-scanning. + +**Integration overhead:** Zero. This is a bugfix/optimization within existing Python code. +**Projected gain:** Eliminating 15.07s (48.3% of total) would reduce total runtime from 31.2s to ~16.1s. Even a 90% reduction (fixing most misses) saves ~13.5s. +**Net gain:** ~1.9x total speedup from a pure Python fix. +**Risk:** Very low. + +#### NEW CANDIDATE B: Replace pathlib with String Operations in `should_skip_path` + +**Integration strategy:** Pure Python refactor. Replace `Path.relative_to()` (3.39s across 59,012 calls) with `str.removeprefix()` or `os.path.relpath()`. + +**Root cause:** `pathlib.PurePosixPath.relative_to()` creates intermediate path objects on every call. For 59,012 calls, this creates ~118,000 intermediate objects. + +**Fix:** Convert paths to strings at the boundary and use `str.startswith()` / `str.removeprefix()` for prefix checks. The `should_skip_path` function only needs string comparison operations. + +**Integration overhead:** Zero. Internal refactor. +**Projected gain:** 4.29s (13.7%) reduced to ~0.2s (estimated 20x faster for string ops vs pathlib). Saves ~4s. +**Net gain:** ~1.15x total speedup. +**Risk:** Very low. + +#### NEW CANDIDATE C: Cache `build_local_variable_type_map` Results + +**Integration strategy:** Memoize results keyed by (file_path, function_start_line, function_end_line). + +**Root cause:** Called 5,228 times, re-traversing AST nodes that have already been parsed. Multiple functions in the same file trigger independent traversals. + +**Integration overhead:** Memory cost of caching ~5,000 dict results. Estimated ~2MB. +**Projected gain:** 2.59s (8.3%) reduced to ~0.5s (first traversal per function cached, subsequent hits free). Saves ~2s. +**Net gain:** ~1.07x total speedup. +**Risk:** Low. Need to ensure cache is invalidated when files change (already handled by the incremental update system). + +#### NEW CANDIDATE D: Suppress Debug Logging in Production + +**Integration strategy:** Set loguru level to INFO or WARNING during graph building, or use lazy evaluation for debug messages. + +**Root cause:** 85,099 `debug()` calls processed (1.75s) even when debug output is not displayed. + +**Fix options:** +1. Wrap debug calls in `if logger.level <= DEBUG` guards. +2. Use `logger.opt(lazy=True).debug(lambda: ...)` for expensive format strings. +3. Set log level to INFO at the start of `GraphUpdater.run()`. + +**Integration overhead:** Zero. +**Projected gain:** 1.84s (5.9%) reduced to ~0.1s. Saves ~1.7s. +**Net gain:** ~1.06x total speedup. +**Risk:** Very low. Debug output is not needed during normal operation. + +#### NEW CANDIDATE E: Deduplicate Filesystem Traversal + +**Integration strategy:** `identify_structure()` and `_collect_eligible_files()` both call `rglob("*")` + `should_skip_path()`. Merge into a single traversal pass. + +**Integration overhead:** Moderate refactor of the two-pass architecture. +**Projected gain:** 1.57s (5.0%) eliminated for the duplicate pass. If combined with Candidate B (string paths), the single remaining pass also runs ~20x faster. +**Net gain:** ~1.05x total speedup. +**Risk:** Low. + +### Combined Impact of Pure Python Fixes (Candidates A through E) + +| Fix | Time Saved | % of Total | +|-----|-----------|------------| +| A: Fix find_ending_with | ~13.5s | 43.3% | +| B: String paths | ~4.0s | 12.8% | +| C: Cache type inference | ~2.0s | 6.4% | +| D: Suppress debug logging | ~1.7s | 5.5% | +| E: Deduplicate FS traversal | ~1.5s | 4.8% | +| **Total saved** | **~22.7s** | **72.8%** | +| **Remaining runtime** | **~8.5s** | **27.2%** | + +**Combined speedup: ~3.7x from pure Python fixes alone, with zero integration overhead, zero build system changes, and zero deployment complexity.** + +After these fixes, the remaining 8.5s would be: +- tree-sitter operations: ~1.0s (now 11.8% of reduced total) +- Remaining call resolution: ~2.5s +- File I/O + hashing: ~0.5s +- Graph construction: ~2.5s +- Miscellaneous: ~2.0s + +### Revised Candidate 4 (Rust AST Extension) Assessment + +After pure Python fixes, tree-sitter operations are 1.0s out of 8.5s (11.8%). A 16x Rust speedup on tree-sitter would save 0.94s, reducing total runtime from 8.5s to 7.6s (1.12x improvement). **This is far below the break-even threshold** given the high development cost (~110KB of Python code to port) and build system complexity. + +The Rust AST extension only becomes worthwhile AFTER all pure Python fixes are applied AND the workload scales to much larger codebases (10,000+ files) where tree-sitter operations become a larger fraction of the reduced total. + +### Revised Priority Order + +| Priority | Candidate | Type | Net Gain (on 31.2s total) | Effort | Integration Overhead | +|----------|-----------|------|---------------------------|--------|---------------------| +| **1** | **A: Fix find_ending_with** | **Python bugfix** | **~1.9x (13.5s saved)** | **Low** | **Zero** | +| **2** | **B: String path ops** | **Python refactor** | **~1.15x (4.0s saved)** | **Low** | **Zero** | +| **3** | **C: Cache type inference** | **Python memoization** | **~1.07x (2.0s saved)** | **Low** | **Zero** | +| **4** | **D: Suppress debug logging** | **Config change** | **~1.06x (1.7s saved)** | **Trivial** | **Zero** | +| **5** | **E: Deduplicate FS traversal** | **Python refactor** | **~1.05x (1.5s saved)** | **Low** | **Zero** | +| 6 | 1: orjson | Dependency swap | Marginal on indexing | Trivial | Zero | +| 7 | 4+5+7: Rust AST extension | Rust crate | 1.12x after Python fixes | High | Significant | +| 8 | 6: File parallelism | Architecture change | 1.5x-3x after Python fixes | Moderate | Moderate | + +### Conclusion + +**The top 5 optimizations require zero language rewrites and zero integration overhead.** They fix algorithmic inefficiencies (linear scan), unnecessary object creation (pathlib), redundant computation (uncached type inference, duplicate traversal), and avoidable overhead (debug logging). Together they provide ~3.7x speedup. + +The Rust AST extension (previously the headline recommendation) addresses only 3.1% of actual CPU time and is demoted to priority 7. It should only be reconsidered after Python-level fixes are applied and the workload scales to repositories an order of magnitude larger than the current test case. diff --git a/docs/reports/LANGUAGE_RECOMMENDATIONS.md b/docs/reports/LANGUAGE_RECOMMENDATIONS.md new file mode 100644 index 000000000..fb2cd7d24 --- /dev/null +++ b/docs/reports/LANGUAGE_RECOMMENDATIONS.md @@ -0,0 +1,423 @@ +# Language Recommendations for Performance Hotspots + +## Executive Summary + +**CPU profiling reveals that 48.3% of total runtime is spent in a single Python function** (`FunctionRegistryTrie.find_ending_with()`) performing a linear scan fallback with 123.7M `str.endswith()` calls. This is a pure algorithmic bottleneck, not a language limitation, and fixing the simple name lookup index (80.7% miss rate) would nearly halve total runtime with zero language rewrite. + +After addressing algorithmic issues (Phase 0: ~3.7x total improvement from pure Python fixes), **Rust via PyO3** is the recommended target language for the remaining CPU-bound hotspots (AST wrapper overhead, trie operations, call resolution). For serialization, **orjson** (Rust-backed) is a drop-in replacement for stdlib json. ~~neo4j-rust-ext~~ was retracted (codebase uses Memgraph/pymgclient, not Neo4j). + +**Critical distinction:** This report contains both theoretical per-instruction overhead multipliers (20x-50x from structural analysis) and empirical runtime impact (from CPU profiling). The structural multipliers explain WHY Python is slow at specific operations, but the IMPACT must be measured against the actual profiled runtime distribution via Amdahl's law. After Phase 0 Python fixes reduce the baseline from 31.2s to ~8-10s, the Rust extension (Phase 2) addresses ~20% of the reduced baseline, yielding diminishing but still meaningful returns. + +**Profiling baseline:** 31.2 seconds (cProfile), 14.0s (wall-clock), 179M function calls for indexing 352 Python files. + +--- + +## Hotspot Categories and Recommendations + +### HOTSPOT 1: Tree-sitter AST Parsing and Traversal + +**Files:** `parsers/call_processor.py`, `parsers/call_resolver.py`, `parsers/definition_processor.py`, `parsers/function_ingest.py`, `parsers/structure_processor.py`, all `parsers/handlers/*.py` + +**Workload:** Per-file tree-sitter parsing, QueryCursor iteration, recursive Node traversal, text extraction/decoding from AST nodes. Every file in a repository triggers full AST parsing and multi-pass traversal for functions, classes, calls, and imports. + +**Recommended Language:** Rust (via PyO3/maturin) + +**Projected Speedup:** 20x to 50x (revised upward based on structural analysis) + +**CPU PROFILING DATA:** +- `TypeInferenceEngine.build_local_variable_type_map()`: **2.59s cumulative (8.3%)** across 5,228 calls. Traverses ASTs that have already been parsed, with no caching of results across calls within the same file. +- `QueryCursor.captures()`: **0.78s self time (2.5%)** across 11,028 calls. Already a C extension, largely irreducible. +- `Parser.parse()`: **0.19s self time (0.6%)** across 352 calls. Already C, already fast. +- **Key insight from profiling:** Tree-sitter C operations (parse + captures) total only ~1.0s (3.1% of runtime). The overwhelming majority of AST-related CPU time is in the Python wrapper code doing traversal, type inference, and call resolution around these fast C operations. This validates the Rust rewrite approach: keep tree-sitter's C parsing (fast), move the Python traversal/processing into Rust. +- Loguru debug logging: **1.84s cumulative (5.9%)** across 91,119 calls, including 85,099 debug-level calls processed even when not displayed. This is a Python-level fix (reduce log level or guard debug calls). + +**Evidence:** +- Gauge.sh case study: Moving AST-dependent operations into a Rust extension yielded a 16x speedup (8.7s to 530ms) on a 500k-line codebase. The original Python implementation made ~60M malloc calls and spent 35% of cycles on GC; the Rust version made ~7M malloc calls with no significant GC activity. [Source: gauge.sh/blog/python-extensions-should-be-lazy] +- Tree-sitter is already written in C/Rust. The Python bindings add per-node FFI overhead on every `.child_by_field_name()`, `.text`, and `.children` access. Moving traversal logic into Rust eliminates this boundary-crossing cost entirely. +- ast-grep (Rust-based tree-sitter tool) demonstrates that keeping AST processing in Rust-land and only returning final results to Python is the optimal architecture. [Source: github.com/ast-grep/ast-grep] +- **Structural analysis (CRITICAL severity):** Static analysis confirmed 20x to 50x overhead multiplier per node visit. Every `.parent`, `.children`, `.type` access on tree-sitter nodes goes through Python's descriptor protocol (~50 instructions vs ~1 instruction for a direct struct field read in Rust/C). Specific hot patterns identified: + - `_build_nested_qualified_name()` in `function_ingest.py:344-389`: walks parent chain upward + - `_resolve_inherited_method()` in `call_resolver.py:624-649`: BFS through class_inheritance dict + - `is_method_node()` in `parsers/utils.py:159-173`: walks parent chain for every function node + - `_collect_ancestor_path_parts()` in `function_ingest.py:369-389`: ancestor walk with repeated type checks + - `_is_nested_inside_function()` in `class_ingest/mixin.py:34-45`: another parent chain walk +- **Additional structural overhead:** `bytes.decode("utf-8")` on every `node.text` access (MEDIUM severity, 3x to 5x overhead). The LRU cache at `parsers/utils.py:48-50` mitigates this partially, but `call_processor.py:49` bypasses the cache entirely. In Rust, zero-copy `&[u8]` slices eliminate this entirely. + +**Architecture:** Build a Rust extension that accepts file bytes and a language enum, performs tree-sitter parsing and all traversal passes (function extraction, class extraction, call extraction, import extraction) in Rust, and returns structured results (lists of function definitions, call relationships, class hierarchies) as Python objects. + +**GIL consideration (from concurrency analysis):** Tree-sitter's C extension already releases the GIL during parsing, which enables ThreadPoolExecutor parallelism for the current Python implementation. Any Rust rewrite MUST preserve this property by using `Python::allow_threads` in PyO3 during parsing and traversal, enabling concurrent file processing across threads without process-level parallelism overhead. + +**Why not Cython:** Cython cannot eliminate the Python-to-C FFI overhead of tree-sitter node access, since the bottleneck is the per-node boundary crossing, not Python loop overhead. Rust allows direct tree-sitter C API access without Python object creation. + +**Why not Go:** Go's FFI to C (cgo) has higher overhead than Rust's native C interop. Go's garbage collector would reintroduce the GC pauses that are a key problem in the Python implementation. PyO3 is a more mature Python interop story than Go's limited options (gopy, cgo+ctypes). + +--- + +### HOTSPOT 2: FunctionRegistryTrie Operations + +**Files:** `graph_updater.py` (FunctionRegistryTrie class), `parsers/call_resolver.py` + +**Workload:** Trie insertion and lookup for qualified function names. Every function/method/class definition triggers a trie insert (string splitting on `.`, nested dict traversal). Every call resolution triggers trie lookups, often with multiple fallback strategies (direct lookup, inheritance chain walking, simple name fallback). + +**Recommended Language:** Rust (via PyO3/maturin) + +**Projected Speedup:** 10x to 50x on the post-fix baseline (NOT on the current 15s runtime) + +**IMPORTANT CONTEXT (from integration-architect):** The 10x-50x speedup applies to trie operations AFTER the algorithmic index fix (Priority 0a). After fixing the `_simple_name_lookup` 80.7% miss rate, trie operations drop from 15s to under 1s in pure Python. The Rust trie's 10x-50x improvement then applies to an operation taking <1s, yielding <1s additional savings. The algorithmic fix alone yields ~2x on total runtime. The Rust rewrite is justified by (a) GIL release enabling thread parallelism and (b) cumulative savings across all trie/string operations, but the root cause is an algorithmic bug, not a language limitation. + +**CPU PROFILING DATA (the #1 finding):** +- `find_ending_with()` at `graph_updater.py:156`: **7.91s self time (25.3%), 15.07s cumulative (48.3%)** across 27,376 calls +- Root cause: The `_simple_name_lookup` index has an **80.7% miss rate** (22,096 of 27,376 calls miss). On each miss, the code falls back to a linear scan: `[qn for qn in self._entries.keys() if qn.endswith(f".{suffix}")]`, triggering **123.7M `str.endswith()` calls** (7.21s self time) +- Called 26,950 times from `CallResolver._try_resolve_via_trie()`, the last-resort call resolution strategy +- **This single function accounts for nearly half of all CPU time. The trie data structure exists but is bypassed in favor of the linear fallback in most cases.** +- **CRITICAL: Fix the simple name lookup index first (Python algorithmic fix).** A proper reverse index mapping simple names to qualified names would eliminate the linear scan entirely, reducing this from 15.07s to sub-second. This is the highest-ROI optimization in the entire codebase. Note: even after the algorithmic fix, Python's per-call `str.endswith()` overhead is 5x to 10x what Rust byte-slice comparisons would cost (structural analysis cross-reference), so the Rust trie rewrite remains valuable for the remaining lookup operations. + +**Evidence for language rewrite (after algorithmic fix):** +- **Concurrency analysis confirms this is GIL-bound:** Pure Python trie/dict operations in `FunctionRegistryTrie` and `CallResolver` hold the GIL throughout, preventing any thread-level parallelism. The concurrency analyst estimates 10x to 50x speedup from moving this to native code. This is the strongest case for a Rust rewrite since it eliminates both per-operation overhead AND the GIL bottleneck. +- The current implementation uses nested Python dicts as trie nodes, which means every level of trie traversal creates Python string objects and performs dict hash lookups with full Python object overhead. +- **Structural analysis (HIGH severity):** Python dicts carry 50 to 80 bytes overhead per entry plus hash computation. Each `in` or `[]` lookup involves: hash the key string (O(n) for string length), probe the hash table, compare keys. In Rust, a `HashMap` has similar algorithmic complexity but with inline storage, no reference counting, and cache-friendly memory layout. Specialized data structures (arena-allocated tries, interned string IDs) are practical in systems languages but impractical in Python due to the object model. +- **String overhead (HIGH severity, 5x to 15x):** Qualified names are constructed, split, compared, and looked up thousands of times per file. Each `.split(".")` allocates a new list of new string objects. Each f-string creates a new heap allocation. `_calculate_import_distance()` at `call_resolver.py:651-671` splits both strings and compares elementwise. In Rust, these would be zero-copy string views or stack-allocated slices. +- Rust trie implementations (radix_trie crate) store data contiguously in memory with no per-node heap allocation, eliminating GC pressure. For high-miss-rate lookups (common in call resolution with fallback chains), optimized Rust tries outperform Python dicts. [Source: dev.to/timclicks/two-trie-implementations-in-rust] +- The Gauge.sh case study showed that moving data structures out of Python and into compact Rust structs reduced malloc calls by 8.5x, directly relevant to this trie-heavy workload. +- PyO3 achieves 92% of pure Rust performance for data structure operations while maintaining full Python interoperability. [Source: pyo3.rs/main/performance] + +**Architecture:** First, fix the `_simple_name_lookup` index to cover the 80.7% miss cases (Python fix). Then, implement `FunctionRegistryTrie` as a Rust struct exposed via PyO3. The `insert()`, `get()`, and `find_ending_with()` methods accept Python strings, perform all trie operations in Rust, and return results. The `__contains__` check (used heavily in call resolution) stays in Rust. Use Rust's `lasso` or `string-interner` crate for interned string IDs to eliminate the qualified name duplication across trie, `_entries`, `simple_name_lookup`, and `import_mapping` (memory profiling shows 3.5 MiB for 10k entries in Python vs ~400 KiB estimated in Rust with interning, a 9x reduction). + +**Convergence point (CPU + memory):** This is the strongest single rewrite target in the codebase. FunctionRegistryTrie is simultaneously the #1 CPU hotspot (48.3%) AND carries 9x memory overhead. A Rust replacement addresses both dimensions in one component. + +**Why not Cython:** Cython would help with loop overhead but cannot change the fundamental data layout. The bottleneck is Python dict overhead per trie node, which requires a different data structure (Rust's contiguous memory layout). + +--- + +### HOTSPOT 3: JSON Serialization/Deserialization for Graph Data + +**Files:** `graph_loader.py`, `graph_updater.py`, `services/graph_service.py` + +**Workload:** Loading and saving large graph JSON files (nodes, relationships, properties). The `GraphLoader.load()` method reads potentially multi-megabyte JSON files. The `GraphUpdater` serializes graph data for Neo4j ingestion. + +**Recommended Language:** Drop-in replacement with orjson (Rust-backed) + +**Projected Speedup:** 5x to 15x + +**Evidence:** +- orjson (written in Rust) is 2x to 15.8x faster than Python's stdlib json, depending on payload size. For large payloads (>1MB), gains are 10x or more. [Source: medium.com/codeelevation/want-500-faster-json-in-python-try-orjson] +- orjson uses SIMD (AVX2) for parallel UTF-8 validation and string escaping, scanning 32 bytes at once vs byte-by-byte. [Source: github.com/ijl/orjson] +- Memory usage is 75% lower peak RSS, which matters for large graph files. +- For a 10K-record benchmark, orjson achieved 820 MB/s serialization vs json's 52 MB/s (15.8x). + +**Architecture:** Replace `import json` with `import orjson` throughout the codebase. This is the lowest-effort, highest-ROI optimization. orjson is a drop-in replacement for most use cases. The only API difference is that `orjson.dumps()` returns bytes instead of str. + +**Why this over a full rewrite:** The JSON parsing itself is the bottleneck, not the surrounding Python code. orjson already provides native Rust performance for this specific operation. Writing a custom Rust extension for JSON handling would duplicate orjson's work. + +--- + +### ~~HOTSPOT 4: Neo4j Driver Communication~~ RETRACTED + +**CORRECTION (from integration-architect):** This codebase uses **Memgraph via `pymgclient`** (a C extension), NOT the Neo4j Python driver. There is no `neo4j` dependency in `pyproject.toml`. The `neo4j-rust-ext` package patches the Neo4j driver's PackStream implementation and has **zero effect** on `pymgclient`. This recommendation is retracted. + +`pymgclient` is already a C extension with low overhead. CPU profiling confirms database serialization (protobuf) is negligible at 0.17s total. No language rewrite is needed for the database communication layer. + +--- + +### HOTSPOT 5: Embedding Cache Hashing + +**Files:** `embedder.py` (EmbeddingCache class) + +**Workload:** SHA256 hashing of code snippets for cache key generation. Each snippet is hashed via `hashlib.sha256(content.encode()).hexdigest()`. For large codebases, thousands of snippets are hashed. + +**Recommended Language:** Conditional: BLAKE3 (Rust-backed) if profiling confirms hashing as bottleneck + +**Projected Speedup:** 4x to 10x (for hashing only) + +**Evidence:** +- Python's hashlib SHA256 is already implemented in C (OpenSSL), so it's reasonably fast. Rust SHA256 achieves roughly 1.5x over Python's hashlib. [Source: users.rust-lang.org/t/hash-digest-performance-rust-vs-python/89686] +- If hashing is confirmed as a bottleneck, switching to BLAKE3 (via the `blake3` Python package, which is Rust-backed) provides 4x to 10x speedup over SHA256 because BLAKE3 is inherently faster and uses SIMD parallelism. [Source: devtoolspro.org/articles/sha256-alternatives-faster-hash-functions-2025/] +- The `blake3` Python package is a drop-in hash function replacement. API change is minimal: `blake3.blake3(content.encode()).hexdigest()`. + +**Architecture:** Replace `hashlib.sha256` with `blake3.blake3` in the `EmbeddingCache._content_hash()` method. This is a one-line change. Note: existing caches would need to be regenerated since hash values will differ. + +**CPU PROFILING RESULT: Hashing is NOT a bottleneck.** `_hash_file()` costs only 0.04s total (0.1%) across 453 calls. SHA-256 hashing is fast and not worth optimizing. BLAKE3 swap is deprioritized. + +**Additional structural insight (MEDIUM severity):** The embedding pipeline at `embedder.py:109-126` and `unixcoder.py:97-107` crosses the Python/C boundary 3+ times per embedding: Python `list[list[int]]` to `torch.tensor` (copy), through PyTorch C++ backend (efficient), `.cpu().numpy()` (copy), `.tolist()` (N allocations for N-dim vector). Each crossing involves full memory copies and new container allocations. In Rust with `tch-rs`, tensor references can be held throughout without conversion overhead, providing 2x to 3x improvement on the embedding data path itself (separate from model inference time). + +--- + +### HOTSPOT 6: File Traversal and Processing Pipeline + +**Files:** `parsers/structure_processor.py`, `graph_updater.py` (file walking, `should_skip_path`) + +**Workload:** Walking repository directories, reading files, determining language, applying gitignore/skip rules, and feeding files into the parser pipeline. + +**Recommended Language:** Python (with concurrency improvements) + +**Projected Speedup:** 3x to 5x (via pathlib fix + deduplication, not language rewrite) + +**CPU PROFILING DATA:** +- `should_skip_path()`: **4.29s cumulative (13.7%)** across 59,270 calls. Dominated by `pathlib.relative_to()` at 3.18s across 54,519 calls, which creates intermediate `PurePosixPath` objects internally. +- `_collect_eligible_files()`: **4.71s cumulative (15.1%)** from a single call. The `rglob` itself costs only ~0.4s, but `should_skip_path` per file dominates. +- `identify_structure()`: **1.57s cumulative (5.0%)** from a single call. Performs a **duplicate** `rglob("*")` pass with separate `should_skip_path()` calls. +- **Key insight from profiling:** File traversal is NOT I/O-bound as originally assumed. The bottleneck is Python pathlib object overhead (creating intermediate Path objects for every `relative_to()` call), not filesystem I/O (`posix.scandir` costs only 0.42s). Using string-based path operations instead of pathlib would eliminate most of this overhead. Additionally, merging the duplicate traversal passes would cut FS stat calls in half. + +**I/O PROFILING DATA (confirms NOT I/O-bound):** +- Actual disk I/O for the entire workload totals only **0.85s (6.1% of 14.0s)**. File reads: 0.02s, hashing: 0.02s, protobuf serialization: 0.01s, JSON cache: 0.001s. +- `pathlib.relative_to()` performs **zero disk I/O**. It constructs intermediate `PurePosixPath` objects via `__init__`, `is_relative_to`, `with_segments`, `_from_parsed_parts`. Measured at **10.6 us/call**. +- **String slice equivalent: 0.065 us/call (163x faster).** This is the measured speedup from the I/O profiler for replacing `pathlib.relative_to()` with string slicing. +- Duplicate `rglob("*")` traversals cost ~0.80s combined (two passes of ~0.40s each scanning 59,283 entries). + +**Evidence:** +- The `rglob` filesystem traversal itself is fast (0.42s). The 4.29s in `should_skip_path` is pure Python object creation overhead from pathlib. +- The real opportunity is (a) replacing `pathlib.relative_to()` with string slicing (163x faster per call), and (b) merging the two separate `rglob` passes into one. + +**Architecture:** Keep file traversal in Python. Fix pathlib overhead first (Priority 0b). Thread-based parallelism for file processing is less impactful than originally estimated: CPU profiling shows tree-sitter parsing is only 0.6% of total CPU, so parallelizing parsing yields minimal gains. The dominant bottleneck (48.3%) is in the post-parsing call resolution phase, which is sequential and GIL-bound. + +**Why not Rust for traversal:** The per-file processing calls into tree-sitter (C library) and constructs Python objects. The overhead is in path manipulation (pathlib), not traversal I/O. A string-based path fix in Python is sufficient. + +**Revised concurrency estimate (from concurrency analysis):** Original 3x-6x estimate for parallel file parsing revised downward since tree-sitter parsing is only 0.6% of CPU. Parallelism gains are secondary to algorithmic and native extension improvements. + +**Note (from concurrency analysis):** The Memgraph/Neo4j flush layer already uses ThreadPoolExecutor with separate connections, so the I/O layer is well structured and does not need a language rewrite. + +--- + +### HOTSPOT 7: String Processing in Call Resolution + +**Files:** `parsers/call_resolver.py`, `parsers/import_processor.py` + +**Workload:** Regex matching (`_SEPARATOR_PATTERN`, `_CHAINED_METHOD_PATTERN`), string splitting, qualified name construction (f-string concatenation), dict lookups in import maps. + +**Recommended Language:** Rust (bundled with Hotspot 1 and 2 rewrites) + +**Projected Speedup:** 5x to 20x (as part of the combined AST processing extension) + +**Evidence:** +- Rust string processing is 10x to 80x faster than Python for CPU-intensive operations. [Source: blog.jetbrains.com/rust/2025/11/10/rust-vs-python-finding-the-right-balance] +- The call resolution logic is tightly coupled to AST traversal (it runs during the call processing pass). Moving it into the same Rust extension as Hotspot 1 eliminates all Python object creation overhead for intermediate strings. +- The regex patterns used are simple (separator splitting, method chaining detection) and would be even faster using Rust's `regex` crate, which uses finite automata rather than Python's backtracking regex engine. +- **Structural analysis: Interpreter loop overhead (HIGH severity, 5x to 20x).** The innermost loops at `call_processor.py:285-328`, `import_processor.py:164-172`, and `graph_updater.py:405-434` execute ~20 to 30 Python bytecode instructions per iteration just for control flow (dynamic dispatch, isinstance checks with MRO traversal, reference count updates), before the actual work in called methods. A compiled language would inline these calls and eliminate dispatch overhead entirely. + +**Architecture:** Include call resolution logic in the Hotspot 1 Rust extension. The Rust code performs AST traversal, call name extraction, and call resolution in a single pass, returning only the final resolved call relationships to Python. + +--- + +## CPU Profiling Summary (from cProfile) + +**Workload:** `GraphUpdater.run(force=True)` indexing 352 Python files, 31.2s total, 179M function calls. + +| Rank | Function | Self Time | Cum. Time | % Total | Calls | Root Cause | +|---|---|---|---|---|---|---| +| 1 | `find_ending_with` | 7.91s | 15.07s | 48.3% | 27,376 | Linear scan fallback, 123.7M `endswith` calls | +| 2 | `should_skip_path` | 0.07s | 4.29s | 13.7% | 59,270 | Pathlib `relative_to` overhead (3.18s) | +| 3 | `build_local_variable_type_map` | 0.004s | 2.59s | 8.3% | 5,228 | Repeated AST traversal, no caching | +| 4 | Loguru logging | 0.41s | 1.84s | 5.9% | 91,119 | Debug-level overhead at high call volume | +| 5 | `identify_structure` | 0.02s | 1.57s | 5.0% | 1 | Duplicate FS traversal + should_skip_path | +| 6 | `QueryCursor.captures` | 0.78s | 0.78s | 2.5% | 11,028 | C extension, largely irreducible | +| 7 | `Parser.parse` | 0.19s | 0.19s | 0.6% | 352 | C extension, already fast | +| 8 | `_hash_file` | 0.001s | 0.04s | 0.1% | 453 | Negligible | + +**Key observations:** +1. 48.3% of CPU is in a single function with an algorithmic fix available (index miss rate) +2. Tree-sitter C operations (parse + captures) total only 1.0s (3.1%), confirming the bottleneck is Python wrapper code +3. Protobuf serialization is negligible (0.17s total) +4. File hashing is negligible (0.04s total) + +--- + +## Structural Performance Ceilings (from Static Analysis) + +The static-pattern-analyst identified 9 categories of Python runtime overhead that create inherent performance ceilings. These are organized by severity: + +| Severity | Pattern | Overhead Multiplier | Rewrite Benefit | +|---|---|---|---| +| CRITICAL | AST tree traversal (pointer chasing + dynamic dispatch) | 20x-50x per node visit | Highest | +| CRITICAL | GIL preventing parallel parsing/resolution | Linear with core count | Highest | +| HIGH | String operations on qualified names | 5x-15x | High | +| HIGH | Dictionary lookups in hot loops | 3x-10x | High | +| HIGH | Interpreter loop overhead in tight iteration | 5x-20x | High | +| MEDIUM | `bytes.decode("utf-8")` on every node text access | 3x-5x | Moderate | +| MEDIUM | Object headers + reference counting on all intermediates | 2x-5x memory reduction | Moderate | +| MEDIUM | Embedding data format conversions (Python/Tensor/NumPy) | 2x-3x per embedding | Low (model dominates) | +| MEDIUM-HIGH | File I/O with Path objects (revised upward: CPU profiling shows 13.7% of CPU) | 3x-5x | Significant (pathlib overhead, not I/O) | + +**Key insight:** The CRITICAL and HIGH severity patterns are all concentrated in the same code: the parser/ingestion pipeline (Hotspots 1, 2, 7). A single Rust extension covering AST traversal, trie operations, and call resolution would address 5 of the 9 overhead categories simultaneously. + +**Diffuse overhead note:** Object header overhead (16 bytes per object minimum) and reference counting affect all Python code. Every intermediate `tuple`, `list[str]` from `.split()`, and NamedTuple is heap-allocated with refcounting. A `tuple[str, str]` is ~100 bytes in Python vs ~16 bytes in Rust (stack-allocated). This is not directly addressable per hotspot but is eliminated automatically when hot paths move to Rust. + +## Memory Profiling Data (from tracemalloc) + +Memory profiling confirms that Python's object model creates significant memory overhead in the same hotspot areas identified by CPU profiling and structural analysis: + +| Structure | Python (measured) | Estimated Rust | Memory Ratio | +|---|---|---|---| +| Tree-sitter AST node wrappers | 87.3 MiB (343 files, 1.67M wrapper objects) | ~5-10 MiB (direct C struct access) | 9-17x | +| EmbeddingCache `list[float]` | 48.6 MiB (2k embeddings) | ~6 MiB (packed f32 arrays) | 8x | +| import_mapping | 5.6 MiB (2k modules) | ~1.5 MiB | 3.7x | +| rel_groups | 3.6 MiB | ~800 KiB | 4.5x | +| FunctionRegistryTrie | 3.5 MiB (10k entries, 13.2k intermediate dicts) | ~400 KiB (arena-allocated trie) | 9x | + +**Key memory findings:** +1. **AST node wrappers (87.3 MiB)** are the largest memory consumer. Each `node.children` access creates new Python Node wrapper objects around C pointers. A Rust extension performing extraction natively would avoid all wrapper allocation, reinforcing the Hotspot 1 recommendation. +2. **EmbeddingCache (48.6 MiB)** uses Python `float` objects (28 bytes each). A 768-dim embedding as `list[float]` uses ~21.5 KiB vs ~6 KiB as packed f32. Switching to numpy arrays (Python-level fix) would provide 4x reduction; Rust packed f32 arrays would be optimal. +3. **FunctionRegistryTrie (3.5 MiB)** has 13.2k intermediate Python dict objects (64+ bytes each) for 10k entries. A Rust compact trie with byte slices or arena allocation would use ~400 KiB. +4. **String duplication:** Qualified names are stored in multiple structures (trie, `_entries`, `simple_name_lookup`, `import_mapping`). Python's string interning does not cover long qualified names. Rust string interning via a global interner would deduplicate these. + +--- + +## Non-Language Optimizations (Algorithmic / Python-Level) + +CPU profiling and concurrency analysis identified multiple high-impact optimizations that do NOT require a language rewrite. **These should be implemented first** as they collectively address over 70% of CPU time. + +### ALGORITHMIC 0: Fix `find_ending_with()` Simple Name Index (THE #1 PRIORITY) + +**Issue:** `FunctionRegistryTrie.find_ending_with()` at `graph_updater.py:156` accounts for **48.3% of total CPU time** (15.07s of 31.2s). The `_simple_name_lookup` index has an 80.7% miss rate, causing a linear scan fallback with 123.7M `str.endswith()` calls. + +**Projected Speedup:** ~2x on total runtime (eliminating 15s from a 31s run) + +**Action:** Build a proper reverse index mapping simple (unqualified) names to their list of qualified names. Populate it during trie insertion. This converts the O(N) linear scan into an O(1) dict lookup per call. This is a pure Python data structure fix requiring minimal code changes. + +### ALGORITHMIC 0b: Replace pathlib `relative_to()` with String Operations + +**Issue:** `should_skip_path()` consumes **4.29s (13.7%)** due to pathlib's `relative_to()` creating intermediate `PurePosixPath` objects 54,519 times. The actual filesystem I/O is only 0.42s. + +**Projected Speedup:** ~3x on the file collection phase (reducing 4.29s to ~0.5s) + +**Action:** Replace `path.relative_to(base)` with `str(path)[len(str(base))+1:]` or equivalent string slicing. Merge the duplicate `rglob("*")` passes from `_collect_eligible_files()` and `identify_structure()` into a single traversal. Additionally, pre-filter at directory level: walk the tree manually and skip ignored directories (.git, __pycache__, node_modules, site) immediately rather than enumerating all 59K descendants and filtering after. This would reduce traversal from 59K to ~600 paths. + +### ALGORITHMIC 0c: Cache Type Inference Results Per File + +**Issue:** `build_local_variable_type_map()` consumes **2.59s (8.3%)** across 5,228 calls, re-traversing ASTs that have already been parsed with no caching across calls within the same file. + +**Projected Speedup:** ~2x to 5x on the type inference phase + +**Action:** Memoize type inference results per function AST node. Since the AST is immutable after parsing, results are safe to cache. + +### ALGORITHMIC 0d: Reduce Debug Logging Overhead + +**Issue:** Loguru logging consumes **1.84s (5.9%)** across 91,119 calls, including 85,099 debug-level calls processed even when not displayed. + +**Projected Speedup:** Eliminates ~1.8s (5.9% of total runtime) + +**Action:** Guard debug log calls with `if logger.isEnabledFor(DEBUG):` or use lazy formatting, or set the minimum log level to INFO in production. + +### ALGORITHMIC 0e: Use Compact JSON for Graph Export + +**Issue:** `_write_graph_json()` in `main.py:744` uses `json.dump(graph_data, f, indent=2)` which is **8x slower** than compact JSON (86ms vs 11ms for 10K nodes) and produces 1.5x larger output. + +**Projected Speedup:** 8x on graph JSON export + +**Action:** Use compact JSON (no indent) for machine consumption. Add a separate `--pretty` flag for human-readable output. + +### ALGORITHMIC 0f: Binary Format for Embedding Cache + +**Issue:** 500 embeddings (768-dim float vectors) stored as JSON = 6.3MB, save = 149ms, load = 38ms. Each embedding is serialized as a JSON array of 768 float values with full decimal precision. + +**Projected Speedup:** 10x+ on embedding cache I/O (both size and speed) + +**Action:** Use numpy `.npy` or `.npz` format for embedding vectors. A 768-dim float32 vector is 3 KiB in binary vs ~15 KiB in JSON text. + +### ALGORITHMIC 1: Batch Embedding API Usage + +**Issue:** The `embed_code_batch` function exists but is unused in the main pipeline. The embedding phase calls `embed_code` per-item instead. + +**Projected Speedup:** Potentially 5x to 12x on the embedding phase (based on batching reducing HTTP round-trip overhead and enabling server-side batching). The Baseten case study showed 12x throughput improvement from proper batching with GIL release. [Source: baseten.co/blog/your-client-code-matters-10x-higher-embedding-throughput-with-python-and-rust/] + +**Action:** Fix the Python pipeline to use `embed_code_batch`. This is a Python-level fix with zero language rewrite cost. + +### ALGORITHMIC 2: Incremental Call Re-Resolution + +**Issue:** The realtime updater (`realtime_updater.py`) performs full call re-resolution on every file change, reprocessing the entire function registry and call graph. + +**Projected Speedup:** 10x to 100x for incremental updates (per the concurrency analysis), since only the changed file's calls and its direct dependents need re-resolution. + +**Action:** Implement incremental call resolution that tracks which qualified names changed and only re-resolves calls that reference those names. This is an algorithmic improvement, not a language choice. + +**These two Python-level fixes should be implemented BEFORE the Rust extension work**, as they may reduce the urgency of the more expensive rewrites. + +--- + +## Language Comparison Matrix + +| Criterion | Rust (PyO3) | Cython | Go | Mojo | Zig | +|---|---|---|---|---|---| +| **Raw performance** | Excellent (C-level) | Good (C-level for numeric) | Good (2x slower than Rust) | Excellent (claims C-level) | Excellent (C-level) | +| **Python FFI quality** | Excellent (PyO3 is mature, zero-copy numpy, vectorcall) | Native (compiles to C extension) | Poor (cgo+ctypes, limited) | Poor (early stage, no stable FFI) | Poor (C ABI only, no Python tooling) | +| **Ecosystem for this workload** | Excellent (tree-sitter crate, regex, serde_json, radix_trie) | Limited (no tree-sitter, string ops need C) | Moderate (tree-sitter-go exists) | None (no tree-sitter, no graph libs) | Limited (tree-sitter C API via @cImport) | +| **Memory safety** | Excellent (borrow checker) | Poor (manual, C-level) | Good (GC, but adds pauses) | Unknown (early stage) | Moderate (manual, but safer than C) | +| **Build complexity** | Moderate (maturin makes it easy) | Low (cythonize) | High (separate binary, IPC needed) | High (Modular toolchain only) | High (no Python tooling) | +| **Developer availability** | Growing (22% increase in Python+Rust developers in 2025) | Declining | Low for Python extensions | Very low | Very low | +| **Real-world precedent** | ruff, uv, polars, pydantic-core, orjson | numpy, scipy (legacy) | None for similar tools | None for similar tools | None for similar tools | + +### Why Rust is the clear winner for this codebase: + +1. **PyO3 maturity:** PyO3 is the most mature Python FFI framework, with zero-copy mechanisms, vectorcall support, and 92% of pure Rust performance. [Source: pyo3.rs/main/performance] + +2. **Tree-sitter native support:** Tree-sitter's runtime is written in C/Rust. Rust can call the tree-sitter C API directly without any Python intermediary, eliminating the per-node FFI overhead that is the primary bottleneck. + +3. **Industry precedent:** The most successful Python performance tools of 2024-2025 are all Rust-backed: ruff (linter, 10-100x faster), uv (package manager), polars (DataFrame, 5-10x faster), pydantic-core (validation, 17x faster), orjson (JSON, 15x faster). [Source: thenewstack.io/rust-pythons-new-performance-engine/] + +4. **maturin build system:** maturin (also by the PyO3 team) simplifies building and distributing Rust Python extensions as standard wheels. No complex build system integration needed. + +--- + +## Prioritized Implementation Order + +### Phase 0: Python Algorithmic Fixes (addresses ~72% of CPU time) + +| Priority | Fix | Effort | CPU Time Saved | % of Total | +|---|---|---|---|---| +| 0a | Fix `find_ending_with` simple name index | Very low | ~15s | 48.3% | +| 0b | Replace pathlib `relative_to` with string ops + merge duplicate rglob | Low | ~4s | 13.7% | +| 0c | Cache type inference results per file | Low | ~2s | 8.3% | +| 0d | Reduce debug logging overhead | Very low | ~1.8s | 5.9% | +| 0e | Batch embedding API usage | Very low | TBD (embedding phase) | TBD | +| 0f | Incremental call re-resolution | Medium | 10x-100x on realtime | N/A (realtime only) | + +**Phase 0 collectively addresses ~72% of measured CPU time (22.8s of 31.2s) with pure Python changes.** After Phase 0, the expected baseline would be ~8-10s for the same 352-file workload. + +### Phase 1: Drop-in Rust-backed Libraries (zero code changes) + +| Priority | Library | Effort | Expected Speedup | +|---|---|---|---| +| 1a | JSON serialization (orjson) | Very low (dependency swap) | 5x-15x on JSON ops | +| ~~1b~~ | ~~Neo4j driver (neo4j-rust-ext)~~ | ~~RETRACTED~~ | ~~Inapplicable: codebase uses Memgraph/pymgclient, not Neo4j~~ | +| 1b | Embedding hash (BLAKE3) | Very low (one-line change) | 4x-10x on hashing (confirmed negligible: 0.04s) | + +**Note from profiling:** File hashing (`_hash_file`) is only 0.04s total (0.1%), and protobuf serialization is 0.17s total. These are negligible. BLAKE3 (Priority 1b) can be deprioritized. orjson remains worthwhile for larger codebases. The neo4j-rust-ext recommendation was retracted because this codebase uses Memgraph via `pymgclient` (C extension), not the Neo4j Python driver. + +### Phase 2: Rust Extension (addresses remaining CPU-bound overhead) + +| Priority | Component | Effort | Expected Speedup | +|---|---|---|---| +| 2a | AST traversal + type inference (Rust) | High (new extension) | 20x-50x on AST processing | +| 2b | Trie + call resolution (Rust) | Medium (extend 2a) | 10x-50x on lookups (GIL-bound) | + +**Phase 2 should be implemented as a single `codebase-rag-core` Rust crate**, since AST traversal, trie operations, and call resolution are tightly coupled. The Rust extension MUST release the GIL via `Python::allow_threads` during parsing and traversal to preserve thread-level parallelism. + +**Amdahl's law caveat (from integration-architect):** Tree-sitter C operations (parse + captures) are only 3.1% of CPU time. A 16x speedup on 3.1% yields only 1.03x total improvement. The value of the Rust AST extension is NOT in speeding up tree-sitter itself (already fast C code), but in eliminating the Python wrapper overhead around it: type inference re-traversal (8.3%), call resolution string operations, and interpreter loop overhead in the tight iteration loops. These Python-side AST costs total ~20% of CPU, making the combined Phase 2 extension worthwhile after Phase 0 algorithmic fixes are applied. + +### Phase 3: Architecture Improvements + +| Priority | Change | Effort | Expected Speedup | +|---|---|---|---| +| 3a | File processing parallelism (ThreadPoolExecutor) | Medium | Downgraded: marginal gains | + +**Phase 3 is downgraded based on revised analysis.** CPU profiling shows tree-sitter parsing is only 0.6% of CPU, and the file processing bottleneck (`pathlib.relative_to` at 13.7%) is GIL-bound pure Python that ThreadPoolExecutor cannot parallelize. The pathlib fix (Phase 0b, string slicing, 163x faster) is the correct solution, not parallelism. ProcessPoolExecutor for call resolution is also impractical: memory profiling shows 170 MiB peak memory, making serialization cost too high. The Rust PyO3 native extension (Phase 2) is the only viable path for parallelizing call resolution, as it can release the GIL via `Python::allow_threads`. + +--- + +## Sources + +- [Gauge.sh: Python extensions should be lazy](https://www.gauge.sh/blog/python-extensions-should-be-lazy) - 16x speedup moving AST processing to Rust +- [Neo4j Python Driver 10x Faster With Rust](https://neo4j.com/blog/developer/python-driver-10x-faster-with-rust/) - neo4j-rust-ext benchmarks +- [Baseten: 12x higher embedding throughput with Python and Rust](https://www.baseten.co/blog/your-client-code-matters-10x-higher-embedding-throughput-with-python-and-rust/) - PyO3 GIL release pattern +- [orjson: 500% Faster JSON in Python](https://medium.com/codeelevation/want-500-faster-json-in-python-try-orjson-powered-by-rust-22995c25c312) - JSON serialization benchmarks +- [PyO3 Performance Guide](https://pyo3.rs/main/performance) - FFI overhead characteristics +- [Rust: Python's New Performance Engine](https://thenewstack.io/rust-pythons-new-performance-engine/) - Industry adoption trends +- [Comparing Cython to Rust for Python Extensions](https://willayd.com/comparing-cython-to-rust-evaluating-python-extensions.html) - Graph algorithm benchmarks +- [SHA-256 Alternatives: BLAKE3 vs SHA-3 Speed Comparison](https://devtoolspro.org/articles/sha256-alternatives-faster-hash-functions-2025/) - Hash function benchmarks +- [Neo4j Performance Recommendations](https://neo4j.com/docs/python-manual/current/performance/) - Batch loading best practices +- [JetBrains Rust vs Python 2025](https://blog.jetbrains.com/rust/2025/11/10/rust-vs-python-finding-the-right-balance-between-speed-and-simplicity/) - String processing benchmarks +- [Databooth: Benchmarking Python with Cython, C, C++, and Rust](https://www.databooth.com.au/posts/py-num-bench/) - Extension comparison +- [Cython, Rust, and more: choosing a language for Python extensions](https://pythonspeed.com/articles/rust-cython-python-extensions/) - When to use each approach +- [ast-grep](https://github.com/ast-grep/ast-grep) - Rust tree-sitter code analysis tool +- [Rust trie implementations](https://dev.to/timclicks/two-trie-implementations-in-rust-ones-super-fast) - Trie performance +- [Corrode: Migrating from Python to Rust](https://corrode.dev/learn/migration-guides/python-to-rust/) - Migration guide +- [Datadog: Migrating static analyzer from Java to Rust](https://www.datadoghq.com/blog/engineering/how-we-migrated-our-static-analyzer-from-java-to-rust/) - Code analysis tool migration diff --git a/docs/reports/PRIORITIZED_SCORECARD.md b/docs/reports/PRIORITIZED_SCORECARD.md new file mode 100644 index 000000000..871d96534 --- /dev/null +++ b/docs/reports/PRIORITIZED_SCORECARD.md @@ -0,0 +1,284 @@ +# Prioritized Scorecard: Rewrite Candidates + +**Baseline:** 31.2s total, 179M function calls, indexing 352 Python files (cProfile) + +## Scoring Methodology + +Each candidate is scored 1 to 5 on six dimensions. The final rank is determined by **Net Score**, which weights measured/projected performance gain and scope of impact highest, while penalizing integration overhead, risk, and maintenance burden. + +**Weights:** Performance Gain (25%) | Memory Improvement (10%) | Integration Feasibility (20%) | Risk & Complexity (20%) | Scope of Impact (15%) | Maintenance Burden (10%) + +**Score key:** 5 = excellent, 4 = good, 3 = moderate, 2 = poor, 1 = unacceptable + +--- + +## Tier 1: ACCEPTED (High confidence, clear positive ROI) + +### Rank 1: Fix `find_ending_with` Linear Scan (Python Bugfix) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 5 | 48.3% of CPU (15.07s). Eliminates 123.7M `str.endswith()` calls. Projected ~1.9x total speedup. | +| Memory Improvement | 3 | Reduces temporary string allocations from linear scans. | +| Integration Feasibility | 5 | Pure Python fix. Zero new dependencies, zero build changes. | +| Risk & Complexity | 5 | Low risk. Fix the 80.7% miss rate in `_simple_name_lookup` index, or build suffix index. | +| Scope of Impact | 5 | Affects every file processed. Dominant bottleneck in the entire pipeline. | +| Maintenance Burden | 5 | No new language, no new build tooling. Standard Python data structure. | +| **Net Score** | **4.80** | | + +**Verdict: PROCEED IMMEDIATELY.** This is a bugfix, not a rewrite. The `_simple_name_lookup` index has an 80.7% miss rate, causing fallback to O(n) linear scan on every call resolution. Fixing the index population or adding a suffix index is a straightforward Python change with the highest ROI of any candidate. + +--- + +### Rank 2: Replace pathlib with String Operations in `should_skip_path` (Python Refactor) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 4 | 13.7% of CPU (4.29s across 59,012 calls). ~20x faster with string ops. | +| Memory Improvement | 4 | Eliminates ~118,000 intermediate Path objects per run. | +| Integration Feasibility | 5 | Internal refactor. No dependencies. | +| Risk & Complexity | 5 | Replace `Path.relative_to()` with `str.removeprefix()`. Straightforward. | +| Scope of Impact | 4 | Affects file traversal (called for every file and directory). | +| Maintenance Burden | 5 | Simpler code than current pathlib usage. | +| **Net Score** | **4.50** | | + +**Verdict: PROCEED.** Convert paths to strings at the boundary and use string comparison. The pathlib object creation overhead is avoidable. + +--- + +### Rank 3: Cache `build_local_variable_type_map` Results (Python Memoization) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 3 | 8.3% of CPU (2.59s across 5,228 calls). Saves ~2s. | +| Memory Improvement | 2 | Adds ~2MB cache. Slight memory increase. | +| Integration Feasibility | 5 | Add `@lru_cache` or dict-based memoization. No dependencies. | +| Risk & Complexity | 5 | Keyed by (file_path, function_start_line, function_end_line). Cache invalidation handled by existing incremental update system. | +| Scope of Impact | 3 | Affects call resolution for files with multiple functions. | +| Maintenance Burden | 5 | Standard memoization pattern. | +| **Net Score** | **3.90** | | + +**Verdict: PROCEED.** Standard memoization with minimal memory cost. + +--- + +### Rank 4: Suppress Debug Logging in Production (Config Change) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 3 | 5.9% of CPU (1.84s from 85,099 debug calls). Saves ~1.7s. | +| Memory Improvement | 2 | Reduces temporary string allocations from format strings. | +| Integration Feasibility | 5 | Set log level to INFO at start of `GraphUpdater.run()`. One line. | +| Risk & Complexity | 5 | Trivial. Debug output not needed during normal graph building. | +| Scope of Impact | 3 | Affects all debug logging throughout pipeline. | +| Maintenance Burden | 5 | No maintenance cost. | +| **Net Score** | **3.75** | | + +**Verdict: PROCEED.** Trivial change, meaningful gain. + +--- + +### Rank 5: Deduplicate Filesystem Traversal (Python Refactor) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 3 | 5.0% of CPU (1.57s). Eliminates duplicate `rglob("*")` + `should_skip_path()` pass. | +| Memory Improvement | 3 | Avoids building duplicate file lists. | +| Integration Feasibility | 4 | Moderate refactor: merge `identify_structure()` and `_collect_eligible_files()` into single traversal. | +| Risk & Complexity | 4 | Requires restructuring two-pass architecture. Not trivial but well-scoped. | +| Scope of Impact | 3 | Affects initial file discovery phase only. | +| Maintenance Burden | 4 | Single-pass is arguably simpler than two-pass. | +| **Net Score** | **3.55** | | + +**Verdict: PROCEED.** Combine with Rank 2 (string paths) for maximum benefit on the file traversal phase. + +--- + +### Rank 6: orjson (Drop-in JSON Replacement) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 3 | 5x to 15x on JSON ops. JSON is NOT a dominant hotspot in the profiling data (indexing phase), but significant for graph export and cache I/O. | +| Memory Improvement | 4 | 75% lower peak RSS for JSON operations. | +| Integration Feasibility | 5 | Add dependency, ~10 call sites need minor adjustment (bytes vs str). | +| Risk & Complexity | 5 | Widely adopted (polars, FastAPI). Pre-built wheels for all platforms. | +| Scope of Impact | 2 | JSON ops are a small fraction of total indexing time. Bigger impact on graph export/import. | +| Maintenance Burden | 5 | Drop-in replacement. No ongoing maintenance cost. | +| **Net Score** | **3.50** | | + +**Verdict: PROCEED.** Low effort, low risk, moderate gain on I/O-heavy workflows (export, cache load/save). Not a game-changer for indexing performance. + +--- + +## Tier 2: CONDITIONAL (Worthwhile only after Tier 1 is complete) + +### Rank 7: Rust AST Processing Extension (PyO3/maturin) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 2 | Tree-sitter ops are only 3.1% of CPU BEFORE Python fixes. After Tier 1 fixes (~3.7x speedup), tree-sitter becomes ~11.8% of reduced runtime. A 16x Rust speedup saves 0.94s from 8.5s. Only 1.12x total improvement post-fixes. | +| Memory Improvement | 4 | Eliminates Python object overhead (50-80 bytes per dict entry), reduces malloc calls by ~8x. | +| Integration Feasibility | 2 | ~110KB of Python code to port. 8+ language parsers. Complex multi-language pattern matching. Requires maturin build system, Rust toolchain in CI/Docker, platform-specific wheels. | +| Risk & Complexity | 2 | Large surface area. Tight coupling with existing data structures. Tree-sitter version compatibility. IngestorProtocol callback complexity. | +| Scope of Impact | 3 | Affects all file processing. But only becomes meaningful at 10,000+ file scale. | +| Maintenance Burden | 2 | Introduces Rust into a pure Python project. Requires Rust expertise for ongoing maintenance. Multi-language build complexity. | +| **Net Score** | **2.35** | | + +**Verdict: DEFER.** The integration architect's analysis is decisive: tree-sitter operations consume only 3.1% of actual CPU time. The language researcher's headline claim of 10x to 16x was based on incorrect assumptions about where time was spent. After Tier 1 Python fixes, the remaining 8.5s runtime has tree-sitter at 11.8%, making a 16x Rust speedup yield only 1.12x total. The high development cost (~110KB port, multi-language parsers) and maintenance burden (Rust toolchain, platform-specific wheels) make this poor ROI until the codebase scales an order of magnitude. + +**Reconsider when:** Repository size exceeds 5,000+ files, making tree-sitter operations a larger fraction of total runtime. + +--- + +### Rank 8: File Processing Parallelism (ProcessPoolExecutor) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 3 | 1.5x to 3x after Tier 1 fixes. Limited by sequential pass dependencies (Amdahl's law). | +| Memory Improvement | 1 | Increases memory (per-worker grammar loading, duplicate tries). | +| Integration Feasibility | 3 | Requires restructuring three-pass pipeline. Shared mutable state (trie, import maps) needs synchronization. | +| Risk & Complexity | 3 | Tree-sitter objects not serializable across process boundaries. Worker initialization overhead (~50ms per worker). | +| Scope of Impact | 3 | Affects per-file processing throughput. | +| Maintenance Burden | 3 | Adds concurrency complexity. Harder to debug. | +| **Net Score** | **2.70** | | + +**Verdict: DEFER.** Worth pursuing after Tier 1 fixes reduce the baseline. The concurrency analyst confirmed tree-sitter releases the GIL during parsing, so ThreadPoolExecutor (not ProcessPoolExecutor) is the preferred approach, with lower overhead. But this requires the three-pass architecture to be restructured. + +--- + +## Tier 3: REJECTED (Net gain does not justify complexity) + +### Rank 9: Rust FunctionRegistryTrie (PyO3, standalone) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 2 | Standalone: 1.5x to 3x on trie ops. Python call resolution code still creates strings for every lookup key. FFI crossing per-lookup cuts gains in half. | +| Memory Improvement | 4 | Contiguous memory layout eliminates per-node dict overhead. | +| Integration Feasibility | 2 | Only viable bundled with Rank 7 (Rust AST extension). Standalone, FFI overhead negates gains. | +| Risk & Complexity | 3 | Moderate if bundled. High coupling with Rank 7. | +| Scope of Impact | 2 | **Rank 1 (fix `find_ending_with`) eliminates the primary trie bottleneck.** After that fix, trie operations are no longer the dominant cost. | +| Maintenance Burden | 2 | Requires Rust maintenance alongside Python trie. | +| **Net Score** | **2.30** | | + +**Verdict: REJECT standalone. BUNDLE with Rank 7 if/when Rank 7 proceeds.** The critical insight from the integration architect: standalone Rust trie has negative net gains because FFI boundary crossing happens per-lookup (thousands of times per file). Only viable when bundled with the full Rust AST extension. Furthermore, Rank 1 (Python bugfix) eliminates the primary trie bottleneck (the linear scan), making Rust trie less urgent. + +--- + +### Rank 10: neo4j-rust-ext + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 1 | **0x. This codebase uses Memgraph via pymgclient, NOT the Neo4j Python driver.** neo4j-rust-ext patches the `neo4j` driver which is not used. | +| Memory Improvement | 1 | N/A. | +| Integration Feasibility | 1 | Inapplicable. No `neo4j` dependency in `pyproject.toml`. | +| Risk & Complexity | 1 | Wrong driver assumption. | +| Scope of Impact | 1 | Zero impact. | +| Maintenance Burden | 1 | N/A. | +| **Net Score** | **1.00** | | + +**Verdict: REJECT.** The language researcher incorrectly assumed the codebase uses the Neo4j Python driver. It uses Memgraph via pymgclient (a C extension). neo4j-rust-ext has zero applicability. + +--- + +### Rank 11: BLAKE3 Hashing + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 1 | Negligible. Hashing is NOT a bottleneck. `_hash_file` processes ~5ms total for 1000 files. `_content_hash` takes microseconds per call. hashlib SHA256 is already C-backed. | +| Memory Improvement | 1 | No meaningful change. | +| Integration Feasibility | 5 | One-line change per call site. Drop-in. | +| Risk & Complexity | 3 | Cache invalidation forces full re-index on first run after change. One-time negative impact dwarfs per-operation savings. | +| Scope of Impact | 1 | Hashing is <0.1% of total runtime. | +| Maintenance Burden | 4 | Minimal. | +| **Net Score** | **1.85** | | + +**Verdict: REJECT.** Optimizing an operation that takes microseconds per call provides no meaningful improvement. The cache invalidation cost (forced full re-index) creates a one-time penalty that exceeds months of per-operation savings. The integration architect's analysis is correct: "Skip unless profiling proves hashing is >5% of total wall clock time." It is far below 5%. + +--- + +### Rank 12: String Processing in Call Resolution (Rust, standalone) + +| Dimension | Score | Rationale | +|---|---|---| +| Performance Gain | 1 | **Negative standalone.** FFI overhead of passing import maps and trie state for each call resolution exceeds the savings from faster string processing. | +| Memory Improvement | 3 | Would reduce temporary string allocations. | +| Integration Feasibility | 1 | Deeply interleaved with trie lookups, import maps, AST node access. Cannot be isolated without massive FFI overhead. | +| Risk & Complexity | 1 | Requires marshalling all context across FFI per call. | +| Scope of Impact | 2 | Affects call resolution, but FFI boundary negates gains. | +| Maintenance Burden | 2 | Additional Rust code for marginal or negative benefit. | +| **Net Score** | **1.40** | | + +**Verdict: REJECT standalone. BUNDLE with Rank 7 only.** The integration architect proved that the boundary crossing cost exceeds per-operation savings when implemented standalone. Only viable as part of a comprehensive Rust AST extension (Rank 7). + +--- + +## Combined Impact Projection + +### Phase 1: Tier 1 Python Fixes (Ranks 1 through 6) + +| Fix | Time Saved | % of Total | Cumulative | +|-----|-----------|------------|------------| +| Rank 1: Fix find_ending_with | ~13.5s | 43.3% | 43.3% | +| Rank 2: String path ops | ~4.0s | 12.8% | 56.1% | +| Rank 3: Cache type inference | ~2.0s | 6.4% | 62.5% | +| Rank 4: Suppress debug logging | ~1.7s | 5.5% | 68.0% | +| Rank 5: Deduplicate FS traversal | ~1.5s | 4.8% | 72.8% | +| Rank 6: orjson (I/O workflows) | Variable | Marginal on indexing | 72.8%+ | +| **Total** | **~22.7s** | **72.8%** | | + +**Projected runtime after Phase 1:** ~8.5s (3.7x speedup from pure Python fixes) +**Integration overhead:** Zero +**Build system changes:** One dependency added (orjson) +**Maintenance burden:** None beyond standard Python + +### Phase 2: Tier 2 (Only if needed after Phase 1) + +After Phase 1, the remaining 8.5s breaks down as: +- Tree-sitter operations: ~1.0s (11.8%) +- Call resolution: ~2.5s (29.4%) +- Graph construction: ~2.5s (29.4%) +- File I/O + hashing: ~0.5s (5.9%) +- Miscellaneous: ~2.0s (23.5%) + +The Rust AST extension (Rank 7) would save ~0.94s from tree-sitter, reducing to ~7.6s (1.12x). File parallelism (Rank 8) could provide 1.5x to 3x on top. Combined: ~3.0 to 5.0s total. + +**Phase 2 is only justified when repository sizes exceed 5,000+ files**, where tree-sitter and call resolution become a proportionally larger fraction of total runtime. + +--- + +## Key Findings + +1. **72.8% of the total runtime is addressable with pure Python fixes** (zero integration overhead, zero build changes, zero maintenance burden). + +2. **The headline Rust AST rewrite (10x to 16x) targets only 3.1% of actual CPU time.** Profiling data invalidated the language researcher's core assumption about where time is spent. + +3. **neo4j-rust-ext is completely inapplicable** (wrong database driver). This was a factual error in the language recommendations. + +4. **BLAKE3 hashing optimizes a non-bottleneck** (microsecond-level operations that total <0.1% of runtime). + +5. **Standalone Rust trie and string processing have negative net gains** due to per-lookup FFI boundary crossing costs that exceed the per-operation savings. + +6. **The single largest optimization (Rank 1) is a Python bugfix**, not a language rewrite. Fixing the `_simple_name_lookup` index miss rate from 80.7% to near 0% eliminates 48.3% of total CPU time. + +--- + +## Scorecard Summary + +| Rank | Candidate | Type | Net Score | Time Saved | Verdict | +|------|-----------|------|-----------|------------|---------| +| 1 | Fix `find_ending_with` | Python bugfix | 4.80 | ~13.5s (43.3%) | **PROCEED** | +| 2 | String path ops | Python refactor | 4.50 | ~4.0s (12.8%) | **PROCEED** | +| 3 | Cache type inference | Python memoization | 3.90 | ~2.0s (6.4%) | **PROCEED** | +| 4 | Suppress debug logging | Config change | 3.75 | ~1.7s (5.5%) | **PROCEED** | +| 5 | Deduplicate FS traversal | Python refactor | 3.55 | ~1.5s (4.8%) | **PROCEED** | +| 6 | orjson | Dependency swap | 3.50 | Variable | **PROCEED** | +| 7 | Rust AST extension | Rust crate | 2.35 | ~0.94s post-fixes | **DEFER** | +| 8 | File parallelism | Architecture change | 2.70 | 1.5x to 3x post-fixes | **DEFER** | +| 9 | Rust trie (standalone) | Rust (PyO3) | 2.30 | Marginal standalone | **REJECT** | +| 10 | neo4j-rust-ext | N/A | 1.00 | 0 (wrong driver) | **REJECT** | +| 11 | BLAKE3 hashing | Dependency swap | 1.85 | Negligible | **REJECT** | +| 12 | Rust string processing | Rust (standalone) | 1.40 | Negative standalone | **REJECT** | + +--- + +**Note:** Task #9 (proof-of-concept benchmarks) was still in progress when this scorecard was produced. If benchmark data reveals performance characteristics that contradict the profiling data used here, this scorecard should be revised. However, the profiling data (cProfile, 31.2s, 179M calls) is empirical and provides a strong basis for these rankings. diff --git a/docs/reports/REWRITE_RECOMMENDATIONS.md b/docs/reports/REWRITE_RECOMMENDATIONS.md new file mode 100644 index 000000000..ebd649eda --- /dev/null +++ b/docs/reports/REWRITE_RECOMMENDATIONS.md @@ -0,0 +1,340 @@ +# Rewrite Recommendations: code-graph-rag Performance Optimization + +## Executive Summary + +A comprehensive performance analysis of the code-graph-rag codebase (31.2s total, 179M function calls indexing 352 Python files) reveals that **no language rewrite is currently justified**. The top performance bottlenecks are algorithmic inefficiencies and unnecessary object creation in pure Python code, addressable with zero new dependencies and zero build system changes. + +### Top 3 Recommendations + +1. **Fix `find_ending_with` suffix index** (Python bugfix): Eliminates 48.3% of total CPU time. The `_simple_name_lookup` index has an 80.7% miss rate, causing 123.7M `str.endswith()` calls via linear scan fallback. Benchmarked fix: **261x to 382x speedup** on the operation. Projected total speedup: ~1.9x. + +2. **Replace pathlib with string operations in `should_skip_path`** (Python refactor): Eliminates 13.7% of total CPU time. `pathlib.relative_to()` creates intermediate objects on every call (59,012 calls, 3.39s total). Benchmarked fix: **45x to 634x speedup** on path operations. Projected total speedup: ~1.15x. + +3. **Cache `build_local_variable_type_map` results** (Python memoization): Eliminates 8.3% of total CPU time. 5,228 uncached AST traversals. Projected total speedup: ~1.07x. + +**Combined Tier 1 impact:** ~3.7x total speedup (31.2s to ~8.5s) from pure Python fixes with zero integration overhead. + +### Key Finding: Rust Rewrite Not Justified + +The language researcher's headline recommendation (Rust AST extension for "10x to 16x speedup") targets tree-sitter operations that consume only **3.1% of actual CPU time**. After Tier 1 Python fixes, a 16x Rust speedup on tree-sitter would yield only **1.03x total improvement** (Amdahl's law). The high development cost (~110KB of Python to port, multi-language parser support, Rust toolchain in CI/Docker) and maintenance burden make this poor ROI until repository sizes exceed 5,000+ files. + +### Adversarial Review Outcome + +The adversarial reviewer confirmed that **no language rewrite candidate survives challenge**. All top hotspots are fixable in Python. The Rust AST extension was the only candidate with theoretical merit, but the measured 3.1% CPU share makes it unjustifiable at current scale. + +### Security Audit Outcome + +The security auditor approved all recommended candidates with zero disputes. The only new dependency (orjson) is a widely adopted, well-maintained package with pre-built wheels. + +--- + +## Profiling Baseline + +| Metric | Value | +|--------|-------| +| Profiling tool | cProfile | +| Total runtime | 31.2 seconds | +| Total function calls | 179M | +| Workload | `GraphUpdater.run(force=True)` indexing 352 Python files | +| Platform | macOS Darwin 25.3.0, ARM64 | +| Python version | 3.12.2 (CPython) | +| Key dependencies | tree-sitter 0.25.2, pymgclient, loguru, torch 2.10 | + +--- + +## Detailed Analysis: Accepted Candidates + +### Candidate 1: Fix `find_ending_with` Linear Scan + +**Priority:** 1 (Highest) +**Type:** Python bugfix +**Effort:** Low +**Files:** `codebase_rag/graph_updater.py:156-161` + +**Profiling Data:** +- Self time: 7.91s (25.3%) +- Cumulative time: 15.07s (48.3%) +- Call count: 27,376 calls +- Root cause: `_simple_name_lookup` index miss rate of 80.7% (22,096 of 27,376 calls) +- Fallback: `[qn for qn in self._entries.keys() if qn.endswith(f".{suffix}")]` generating 123.7M `str.endswith()` invocations + +**Benchmark Results:** + +| Registry Size | Queries | Linear Scan (ms) | Suffix Index (ms) | Speedup | +|---|---|---|---|---| +| 1,000 | 38 | 1.77 | 0.007 | 261x | +| 4,500 | 38 | 8.04 | 0.023 | 356x | +| 10,000 | 38 | 17.78 | 0.046 | 382x | + +**Fix:** Populate `_simple_name_lookup` for every insert path, including `__setitem__`. Build a complete suffix index mapping the last dot-separated segment to the full qualified name set. This converts O(n) scans to O(1) lookups. + +**Projected Net Gain:** ~1.9x total speedup (13.5s saved) +**Integration Overhead:** Zero +**Risk:** Very low + +--- + +### Candidate 2: Replace pathlib with String Operations + +**Priority:** 2 +**Type:** Python refactor +**Effort:** Low +**Files:** `codebase_rag/utils/path_utils.py`, `codebase_rag/graph_updater.py:364-388` + +**Profiling Data:** +- Cumulative time: 4.29s (13.7%) +- Call count: 59,270 calls +- Root cause: `pathlib.relative_to()` creates intermediate `PurePosixPath` objects (3.39s across 54,519 calls) + +**Benchmark Results:** + +| Operation | pathlib (ms) | String ops (ms) | Speedup | +|---|---|---|---| +| `relative_to` vs `removeprefix` (5K paths) | 61.3 | 0.097 | 634x | +| Full `should_skip_path` (5K paths) | 69.3 | 1.55 | 45x | +| Full `should_skip_path` (20K paths) | 285.9 | 6.21 | 46x | + +**Fix:** Convert paths to strings at the function boundary. Use `str.removeprefix()` and `str.split("/")` instead of `Path.relative_to()` and `Path.parts`. + +**Projected Net Gain:** ~1.15x total speedup (4.0s saved) +**Integration Overhead:** Zero +**Risk:** Very low + +--- + +### Candidate 3: Cache Type Inference Results + +**Priority:** 3 +**Type:** Python memoization +**Effort:** Low +**Files:** `codebase_rag/parsers/type_inference.py:119` + +**Profiling Data:** +- Cumulative time: 2.59s (8.3%) +- Call count: 5,228 calls +- Root cause: Re-traverses AST nodes per function for type inference without caching + +**Fix:** Memoize results keyed by `(file_path, function_start_line, function_end_line)`. Cache invalidation handled by existing incremental update system. + +**Projected Net Gain:** ~1.07x total speedup (2.0s saved) +**Integration Overhead:** ~2MB memory for cache +**Risk:** Low + +--- + +### Candidate 4: Suppress Debug Logging in Production + +**Priority:** 4 +**Type:** Configuration change +**Effort:** Trivial +**Files:** `codebase_rag/graph_updater.py` (run method) + +**Profiling Data:** +- Cumulative time: 1.84s (5.9%) +- Call count: 91,119 calls (85,099 debug-level) +- Root cause: Debug log calls processed even when output is suppressed + +**Fix:** Set loguru level to INFO at the start of `GraphUpdater.run()`, or use `logger.opt(lazy=True).debug()` for expensive format strings. + +**Projected Net Gain:** ~1.06x total speedup (1.7s saved) +**Integration Overhead:** Zero +**Risk:** Very low + +--- + +### Candidate 5: Deduplicate Filesystem Traversal + +**Priority:** 5 +**Type:** Python refactor +**Effort:** Low +**Files:** `codebase_rag/graph_updater.py:364`, `codebase_rag/parsers/structure_processor.py:49` + +**Profiling Data:** +- `identify_structure()`: 1.57s (5.0%) +- `_collect_eligible_files()`: 4.71s (15.1%, overlapping with Candidate 2) +- Root cause: Both call `rglob("*")` + `should_skip_path()` independently + +**Fix:** Merge into a single traversal pass that collects both structural elements and eligible files. + +**Projected Net Gain:** ~1.05x total speedup (1.5s saved) +**Integration Overhead:** Moderate refactor of two-pass architecture +**Risk:** Low + +--- + +### Candidate 6: orjson for JSON Serialization + +**Priority:** 6 +**Type:** Dependency swap +**Effort:** Trivial +**Files:** All files using `import json` (graph_loader.py, graph_updater.py, embedder.py, services/graph_service.py) + +**Benchmark Results:** + +| Operation | json (ms) | orjson (ms) | Speedup | +|---|---|---|---| +| Compact dumps (1.9 MB) | 5.73 | 1.01 | 5.7x | +| Indented dumps (1.9 MB) | 48.5 | 2.02 | 24.0x | +| Loads (1.9 MB) | 6.23 | 3.24 | 1.9x | + +**Fix:** Add `orjson>=3.10.0` to dependencies. Replace `json.dumps()` with `orjson.dumps()` (~10 call sites, minor API adjustment for bytes vs str return type). + +**Projected Net Gain:** 5.4x to 25x on JSON operations. Marginal impact on indexing (JSON is not a dominant hotspot), significant impact on graph export/import. +**Integration Overhead:** Near zero +**Security:** Widely adopted (polars, FastAPI). Pre-built wheels. Approved by security audit. +**Risk:** Very low + +--- + +## Combined Impact Projection + +| Phase | Fixes | Time Saved | Cumulative Speedup | Overhead | +|-------|-------|-----------|-------------------|----------| +| Tier 1 | Candidates 1 through 6 | ~22.7s | ~3.7x (31.2s to ~8.5s) | Zero (except orjson dep) | + +**Post Tier 1 runtime breakdown (projected ~8.5s):** + +| Component | Time | % of Reduced Total | +|-----------|------|--------------------| +| Call resolution | ~2.5s | 29.4% | +| Graph construction | ~2.5s | 29.4% | +| Miscellaneous | ~2.0s | 23.5% | +| Tree-sitter operations | ~1.0s | 11.8% | +| File I/O + hashing | ~0.5s | 5.9% | + +--- + +## Deferred Candidates + +### Rust AST Processing Extension (PyO3/maturin) + +**Status:** DEFERRED (reconsider at 5,000+ file scale) + +**Rationale:** Tree-sitter operations consume 3.1% of CPU (0.97s). After Tier 1 fixes, this becomes 11.8% of the reduced 8.5s runtime. A 16x Rust speedup saves 0.94s, yielding 1.12x total improvement. + +**Why deferred, not rejected:** +- At 5,000+ file scale, tree-sitter time scales linearly while Python fix savings are largely constant +- The structural overhead per node visit (20x to 50x) is real but only matters when visit count is high enough +- Rust extension would also unlock GIL-free thread parallelism for file processing + +**Cost if pursued:** ~110KB of Python code to port, 8+ language parsers, maturin build system, Rust toolchain in CI/Docker, platform-specific wheels, ongoing Rust maintenance + +### File Processing Parallelism + +**Status:** DEFERRED (pursue after Tier 1 fixes) + +**Rationale:** Tree-sitter releases the GIL during parsing, enabling ThreadPoolExecutor parallelism. However, shared mutable state (`FunctionRegistryTrie`, `import_mapping`) requires architectural restructuring. The three-pass architecture (structure, definitions, calls) has inherent sequential dependencies. + +**Projected gain:** 1.5x to 3x after Tier 1 fixes +**Prerequisite:** Tier 1 fixes must be applied first to establish the new performance baseline + +--- + +## Rejected Candidates + +### neo4j-rust-ext + +**Verdict:** REJECTED (inapplicable) +**Reason:** This codebase uses Memgraph via `pymgclient` (C extension), not the Neo4j Python driver. `neo4j-rust-ext` patches the `neo4j` driver which is not a dependency. The language researcher's recommendation was based on an incorrect assumption about the database driver. + +### BLAKE3 Hashing + +**Verdict:** REJECTED (invalidated by benchmarks) + +**Benchmark Results:** + +| Operation | SHA256 (ms) | BLAKE3 (ms) | Speedup | +|---|---|---|---| +| 500 snippet hashes | 0.155 | 0.325 | 0.5x (slower) | +| 2,000 snippet hashes | 0.594 | 1.177 | 0.5x (slower) | +| 50 file hashes (5KB avg) | 0.968 | 1.031 | 0.9x (slower) | + +**Reason:** The language recommendations projected 4x to 10x speedup based on algorithmic benchmarks, not Python binding benchmarks. hashlib SHA256 is already C-backed (OpenSSL). BLAKE3's SIMD advantages require large contiguous buffers; code snippets average 200 bytes. FFI overhead per call exceeds algorithmic savings for small inputs. Additionally, hashing is <0.1% of total runtime. + +### Rust FunctionRegistryTrie (Standalone) + +**Verdict:** REJECTED +**Reason:** Standalone Rust trie provides only 1.5x to 3x net gain after FFI overhead. The FFI boundary is crossed per-lookup (thousands of times per file), cutting gains roughly in half. More critically, the Python suffix index fix (Candidate 1) provides 261x to 382x speedup on the actual bottleneck, making the Rust trie unnecessary. Only viable if bundled with a full Rust AST extension. + +### Rust String Processing in Call Resolution (Standalone) + +**Verdict:** REJECTED +**Reason:** Negative net gains when implemented standalone. Call resolution is deeply interleaved with trie lookups, import map lookups, and AST node access. Extracting just the string processing would require marshalling all context (import maps, trie state, class inheritance) across FFI on every call, which exceeds the per-operation savings. + +--- + +## Optimize-First Recommendations (Non-Rewrite) + +These Python-level improvements should be implemented before any language rewrite consideration: + +1. **Use `embed_code_batch`** in `graph_updater.py:_generate_semantic_embeddings`: The batch function exists but the pipeline calls `embed_code` per item. Projected 5x to 20x speedup on the embedding phase. + +2. **Incremental call re-resolution** in `realtime_updater.py`: Currently performs full call re-resolution on every file change. Implementing incremental resolution (re-resolve only affected qualified names) would provide 10x to 100x speedup for realtime updates. + +3. **Fix BoundedASTCache memory limit**: `sys.getsizeof()` misses C-level tree-sitter memory, so the cache size limit is effectively broken. Use `tracemalloc` or a conservative estimate based on entry count instead. + +4. **EmbeddingCache data format**: Replace `list[float]` with numpy arrays for 4x memory reduction on embedding storage. + +5. **FunctionRegistryTrie dual storage**: Consolidate `_entries` dict and trie nodes to eliminate 2.5 MiB waste per 10K entries (addressable as part of Candidate 1). + +--- + +## Benchmark Methodology + +**Infrastructure:** Established by test-sentinel (task #1). All benchmarks in `benchmarks/` directory. + +| Parameter | Value | +|-----------|-------| +| Warmup runs | 3 (discarded) | +| Measured iterations | 20 to 100 per benchmark | +| Statistics | Median, mean, stddev, min, max, p95 | +| GC | Disabled during timing | +| Isolation | Fresh function scope per run | + +**Benchmark suite:** + +| File | Target | +|------|--------| +| `bench_find_ending_with_fix.py` | Suffix index vs linear scan | +| `bench_pathlib_vs_string.py` | pathlib vs string path operations | +| `bench_json_serialization.py` | stdlib json vs orjson | +| `bench_file_hashing.py` | SHA256 vs BLAKE3 vs BLAKE2b | +| `bench_trie.py` | FunctionRegistryTrie operations | +| `bench_string_ops.py` | String operation microbenchmarks | +| `bench_embedding_cache.py` | EmbeddingCache operations | +| `bench_ast_cache.py` | BoundedASTCache operations | +| `bench_graph_loader.py` | GraphLoader JSON parse + index build | +| `bench_dropin_replacements.py` | Drop-in library comparisons | + +Run all benchmarks: `uv run python benchmarks/run_all.py` + +--- + +## Profiling Data Sources + +| Phase | Task | Owner | Output | +|-------|------|-------|--------| +| Baseline | #1 | test-sentinel | Green test suite, benchmark methodology | +| CPU profiling | #2 | cpu-profiler | Hotspot report (cProfile, 31.2s, 179M calls) | +| Memory profiling | #3 | memory-profiler | Allocation report (tracemalloc, 25-frame traces) | +| I/O profiling | #4 | cpu-profiler | I/O report | +| Concurrency analysis | #5 | concurrency-analyst | GIL analysis, parallelism opportunities, scaling factors | +| Structural analysis | #6 | static-pattern-analyst | 9 language-inherent ceilings with severity rankings | +| Language research | #7 | language-researcher | Target language recommendations (Rust via PyO3) | +| Integration feasibility | #8 | integration-architect | FFI overhead analysis, build system impact, net gain calculations | +| Benchmarks | #9 | benchmark-designer | Measured performance for all candidates | +| Scorecard | #10 | evaluator | Prioritized ranking with scores | +| Adversarial review | #11 | adversarial-reviewer | No rewrite justified at current scale | +| Security audit | #12 | security-auditor | All candidates approved, zero disputes | + +--- + +## Conclusion + +The performance analysis produced a clear, data-driven result: **optimize Python first, rewrite later (if ever).** + +The top 5 bottlenecks consuming 72.8% of runtime are all pure Python algorithmic issues (linear scan fallback, pathlib object overhead, uncached traversals, debug logging, duplicate traversals). Fixing them provides ~3.7x total speedup with zero integration overhead, zero build system changes, and zero maintenance burden. + +The Rust AST extension, while technically sound as a future optimization for large-scale workloads, targets only 3.1% of current CPU time and provides ~1.03x total improvement after Python fixes. It should be reconsidered only when the codebase routinely processes 5,000+ file repositories and the Python fixes have been applied. + +No language rewrite recommendation survived the adversarial review at current scale. diff --git a/docs/sdk/cypher-generator.md b/docs/sdk/cypher-generator.md new file mode 100644 index 000000000..b9ef63613 --- /dev/null +++ b/docs/sdk/cypher-generator.md @@ -0,0 +1,47 @@ +--- +description: "Generate Cypher queries from natural language using Code-Graph-RAG's CypherGenerator." +--- + +# Cypher Generator + +The `CypherGenerator` translates natural language questions into Cypher queries for the knowledge graph. + +## Usage + +```python +import asyncio +from cgr import CypherGenerator + +async def main(): + gen = CypherGenerator() + cypher = await gen.generate("Find all classes that inherit from BaseModel") + print(cypher) + +asyncio.run(main()) +``` + +## Configuration + +The Cypher generator uses the configured Cypher provider. Set it via environment variables: + +```bash +CYPHER_PROVIDER=google +CYPHER_MODEL=gemini-2.5-flash +CYPHER_API_KEY=your-api-key +``` + +Or programmatically: + +```python +from cgr import settings + +settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key") +``` + +## Supported Providers + +| Provider | Example Models | +|----------|---------------| +| Google | `gemini-2.5-pro`, `gemini-2.5-flash` | +| OpenAI | `gpt-4o`, `gpt-4o-mini` | +| Ollama | `codellama`, `llama3.2` | diff --git a/docs/sdk/graph-loader.md b/docs/sdk/graph-loader.md new file mode 100644 index 000000000..f14df3a90 --- /dev/null +++ b/docs/sdk/graph-loader.md @@ -0,0 +1,73 @@ +--- +description: "Load and query exported Code-Graph-RAG knowledge graphs with the Python SDK." +--- + +# Graph Loader + +The `load_graph` function loads exported JSON graph data for programmatic analysis. + +## Export a Graph + +First, export the knowledge graph to JSON: + +```bash +cgr export -o my_graph.json +``` + +Or export during graph update: + +```bash +cgr start --repo-path /path/to/repo --update-graph --clean -o my_graph.json +``` + +## Load and Query + +```python +from cgr import load_graph + +graph = load_graph("my_graph.json") +``` + +### Summary Statistics + +```python +summary = graph.summary() +print(f"Total nodes: {summary['total_nodes']}") +print(f"Total relationships: {summary['total_relationships']}") +``` + +### Find Nodes by Label + +```python +functions = graph.find_nodes_by_label("Function") +classes = graph.find_nodes_by_label("Class") +modules = graph.find_nodes_by_label("Module") +``` + +### Analyze Relationships + +```python +for func in functions[:5]: + relationships = graph.get_relationships_for_node(func.node_id) + print(f"Function {func.properties['name']} has {len(relationships)} relationships") +``` + +## Query Memgraph Directly + +For live queries against a running Memgraph instance: + +```python +from cgr import MemgraphIngestor + +with MemgraphIngestor(host="localhost", port=7687) as db: + rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10") + for row in rows: + print(row) +``` + +## Use Cases + +- Integration with other tools +- Custom analysis scripts +- Building documentation generators +- Creating code metrics dashboards diff --git a/docs/sdk/overview.md b/docs/sdk/overview.md new file mode 100644 index 000000000..8a4a88918 --- /dev/null +++ b/docs/sdk/overview.md @@ -0,0 +1,58 @@ +--- +description: "Python SDK overview for Code-Graph-RAG programmatic access." +--- + +# Python SDK Overview + +The `cgr` package provides short imports for programmatic use of Code-Graph-RAG. + +## Installation + +```bash +pip install code-graph-rag +``` + +With semantic code search: + +```bash +pip install 'code-graph-rag[semantic]' +``` + +## Quick Example + +```python +from cgr import load_graph + +graph = load_graph("graph.json") +print(graph.summary()) + +functions = graph.find_nodes_by_label("Function") +for fn in functions[:5]: + rels = graph.get_relationships_for_node(fn.node_id) + print(f"{fn.properties['name']}: {len(rels)} relationships") +``` + +## Available Modules + +| Import | Purpose | +|--------|---------| +| `from cgr import load_graph` | Load and query exported graph data | +| `from cgr import MemgraphIngestor` | Query Memgraph with Cypher directly | +| `from cgr import CypherGenerator` | Generate Cypher from natural language | +| `from cgr import embed_code` | Semantic code search with UniXcoder | +| `from cgr import settings` | Configure providers programmatically | + +## Configuration + +```python +from cgr import settings + +settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...") +settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key") +``` + +See individual pages for detailed API usage: + +- [Graph Loader](graph-loader.md) +- [Cypher Generator](cypher-generator.md) +- [Semantic Search](semantic-search.md) diff --git a/docs/sdk/semantic-search.md b/docs/sdk/semantic-search.md new file mode 100644 index 000000000..ac4393b32 --- /dev/null +++ b/docs/sdk/semantic-search.md @@ -0,0 +1,40 @@ +--- +description: "Semantic code search with UniXcoder embeddings in Code-Graph-RAG." +--- + +# Semantic Search + +Code-Graph-RAG supports intent-based code search using UniXcoder embeddings. Find functions by describing what they do rather than by exact names. + +## Installation + +Semantic search requires the `semantic` extra: + +```bash +pip install 'code-graph-rag[semantic]' +``` + +## Usage + +### Generate Code Embeddings + +```python +from cgr import embed_code + +embedding = embed_code("def authenticate(user, password): ...") +print(f"Embedding dimension: {len(embedding)}") +``` + +### Search by Description + +In the interactive CLI, you can search semantically: + +- "error handling functions" +- "authentication code" +- "database connection setup" + +The system returns potential matches with similarity scores. + +## How It Works + +UniXcoder is a unified cross-modal pre-trained model that supports both code understanding and generation. Code-Graph-RAG uses it to create embeddings that capture the semantic meaning of code, enabling searches based on what code does rather than what it's named. diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 000000000..e9e4cc5f4 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,337 @@ +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap'); + +:root { + --cgr-bg: #030712; + --cgr-surface: #111827; + --cgr-surface-lighter: #1f2937; + --cgr-brand: #6366f1; + --cgr-brand-light: #818cf8; + --cgr-brand-dark: #4f46e5; + --cgr-gray-50: #f9fafb; + --cgr-gray-400: #99a1af; + --cgr-gray-500: #6a7282; + --cgr-gray-800: #1e2939; + --cgr-indigo-700: #432dd7; +} + +/* Dark mode */ +[data-md-color-scheme="slate"] { + --md-default-bg-color: var(--cgr-bg); + --md-default-fg-color: var(--cgr-gray-50); + --md-default-fg-color--light: var(--cgr-gray-400); + --md-default-fg-color--lighter: var(--cgr-gray-500); + --md-default-fg-color--lightest: var(--cgr-gray-800); + --md-primary-fg-color: var(--cgr-brand); + --md-primary-fg-color--light: var(--cgr-brand-light); + --md-primary-fg-color--dark: var(--cgr-brand-dark); + --md-primary-bg-color: var(--cgr-gray-50); + --md-primary-bg-color--light: var(--cgr-gray-400); + --md-accent-fg-color: var(--cgr-brand-light); + --md-accent-fg-color--transparent: rgba(129, 140, 248, 0.1); + --md-accent-bg-color: var(--cgr-brand); + --md-code-bg-color: var(--cgr-surface); + --md-code-fg-color: #e2e8f0; + --md-code-hl-color: var(--cgr-surface-lighter); + --md-code-hl-number-color: #fbbf24; + --md-code-hl-string-color: #34d399; + --md-code-hl-keyword-color: #c084fc; + --md-code-hl-function-color: #60a5fa; + --md-code-hl-comment-color: var(--cgr-gray-500); + --md-code-hl-constant-color: #f472b6; + --md-code-hl-operator-color: #fbbf24; + --md-code-hl-punctuation-color: var(--cgr-gray-400); + --md-code-hl-special-color: #fb923c; + --md-code-hl-name-color: var(--cgr-gray-50); + --md-code-hl-generic-color: var(--cgr-gray-50); + --md-code-hl-variable-color: #f9fafb; + --md-footer-bg-color: var(--cgr-bg); + --md-footer-bg-color--dark: var(--cgr-bg); + --md-footer-fg-color: var(--cgr-gray-400); + --md-footer-fg-color--light: var(--cgr-gray-500); + --md-footer-fg-color--lighter: var(--cgr-gray-500); + --md-typeset-a-color: var(--cgr-brand-light); + --md-typeset-color: var(--cgr-gray-50); + --md-typeset-table-color: rgba(99, 102, 241, 0.05); + --md-typeset-table-color--light: rgba(99, 102, 241, 0.02); + --md-admonition-bg-color: var(--cgr-surface); + --md-shadow-z1: 0 0 0 transparent; + --md-shadow-z2: 0 0 0 transparent; + --md-shadow-z3: 0 0 0 transparent; +} + +[data-md-color-scheme="slate"] .md-header, +[data-md-color-scheme="slate"] .md-tabs { + background-color: var(--cgr-surface); + border-bottom: 1px solid var(--cgr-gray-800); +} + +[data-md-color-scheme="slate"] .md-tabs__link { + color: var(--cgr-gray-400); + opacity: 1; + transition: color 0.2s ease; +} + +[data-md-color-scheme="slate"] .md-tabs__link:hover { + color: var(--cgr-gray-50); +} + +[data-md-color-scheme="slate"] .md-tabs__link--active { + color: var(--cgr-brand-light); +} + +[data-md-color-scheme="slate"] .md-nav--primary .md-nav__item--active > .md-nav__link { + color: var(--cgr-brand-light); +} + +[data-md-color-scheme="slate"] .md-sidebar { + background-color: var(--cgr-bg); +} + +[data-md-color-scheme="slate"] .md-nav__link { + color: var(--cgr-gray-400); + transition: color 0.2s ease; +} + +[data-md-color-scheme="slate"] .md-nav__link:hover { + color: var(--cgr-gray-50); +} + +[data-md-color-scheme="slate"] .md-nav__link--active { + color: var(--cgr-brand-light); + font-weight: 500; +} + +[data-md-color-scheme="slate"] .md-search__form { + background-color: var(--cgr-surface); + border: 1px solid var(--cgr-gray-800); +} + +[data-md-color-scheme="slate"] .md-search__input::placeholder { + color: var(--cgr-gray-500); +} + +[data-md-color-scheme="slate"] .md-typeset code { + background-color: var(--cgr-surface); + border: 1px solid var(--cgr-gray-800); + color: var(--cgr-brand-light); +} + +[data-md-color-scheme="slate"] .md-typeset .admonition, +[data-md-color-scheme="slate"] .md-typeset details { + background-color: var(--cgr-surface); + border-color: var(--cgr-gray-800); +} + +[data-md-color-scheme="slate"] .md-typeset .md-typeset__table table { + border: 1px solid var(--cgr-gray-800); +} + +[data-md-color-scheme="slate"] .md-typeset .md-typeset__table th { + background-color: var(--cgr-surface); + border-color: var(--cgr-gray-800); +} + +[data-md-color-scheme="slate"] .md-typeset .md-typeset__table td { + border-color: var(--cgr-gray-800); +} + +[data-md-color-scheme="slate"] .md-typeset hr { + border-color: var(--cgr-gray-800); +} + +/* Light mode */ +[data-md-color-scheme="default"] { + --md-primary-fg-color: var(--cgr-brand-dark); + --md-primary-fg-color--light: var(--cgr-brand); + --md-primary-fg-color--dark: var(--cgr-indigo-700); + --md-primary-bg-color: #ffffff; + --md-accent-fg-color: var(--cgr-brand); + --md-accent-fg-color--transparent: rgba(99, 102, 241, 0.1); + --md-typeset-a-color: var(--cgr-brand-dark); + --md-code-bg-color: #f8f9fc; + --md-code-fg-color: #1e293b; + --md-code-hl-color: rgba(99, 102, 241, 0.08); + --md-code-hl-number-color: #b45309; + --md-code-hl-string-color: #059669; + --md-code-hl-keyword-color: #7c3aed; + --md-code-hl-function-color: #2563eb; + --md-code-hl-comment-color: #9ca3af; + --md-shadow-z1: 0 0 0 transparent; + --md-shadow-z2: 0 1px 3px rgba(0, 0, 0, 0.08); +} + +[data-md-color-scheme="default"] .md-header { + background-color: #ffffff; + border-bottom: 1px solid #e5e7eb; + color: #1e293b; +} + +[data-md-color-scheme="default"] .md-header .md-header__title { + color: #1e293b; +} + +[data-md-color-scheme="default"] .md-header .md-header__topic { + color: #1e293b; +} + +[data-md-color-scheme="default"] .md-header .md-header__button { + color: #475569; +} + +[data-md-color-scheme="default"] .md-tabs { + background-color: #ffffff; + border-bottom: 1px solid #e5e7eb; +} + +[data-md-color-scheme="default"] .md-tabs__link { + color: #64748b; + opacity: 1; +} + +[data-md-color-scheme="default"] .md-tabs__link:hover { + color: #1e293b; +} + +[data-md-color-scheme="default"] .md-tabs__link--active { + color: var(--cgr-brand-dark); +} + +[data-md-color-scheme="default"] .md-typeset code { + background-color: #f1f5f9; + border: 1px solid #e2e8f0; + color: var(--cgr-brand-dark); +} + +[data-md-color-scheme="default"] .md-search__form { + background-color: #f1f5f9; + border: 1px solid #e2e8f0; +} + +/* Shared styles */ +.md-typeset { + font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + font-size: 0.82rem; + line-height: 1.7; +} + +.md-typeset code, +.md-typeset pre, +.md-typeset kbd { + font-family: "JetBrains Mono", "SF Mono", "Cascadia Code", "Fira Code", monospace; + font-size: 0.82em; +} + +.md-typeset h1 { + font-weight: 700; + letter-spacing: -0.02em; +} + +.md-typeset h2 { + font-weight: 600; + letter-spacing: -0.01em; +} + +.md-typeset h3, +.md-typeset h4 { + font-weight: 600; +} + +.md-typeset a { + transition: color 0.2s ease; +} + +[data-md-color-scheme="slate"] .md-typeset a:hover { + color: var(--cgr-brand-light); +} + +[data-md-color-scheme="default"] .md-typeset a:hover { + color: var(--cgr-indigo-700); +} + +.md-header__title, +.md-tabs__link, +.md-nav__link, +.md-button, +.md-typeset .admonition-title, +.md-typeset summary, +.md-footer, +.md-typeset table:not([class]) th { + font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; +} + +.md-header__title { + font-weight: 600; +} + +.md-tabs__link { + font-weight: 500; + font-size: 0.78rem; + letter-spacing: 0.01em; +} + +.md-nav__link { + font-size: 0.76rem; +} + +.md-button { + font-weight: 500; + border-radius: 8px; + padding: 0.6em 1.4em; + transition: background-color 0.2s ease, border-color 0.2s ease, transform 0.2s ease; +} + +.md-button--primary { + background-color: var(--cgr-brand); + border-color: var(--cgr-brand); + color: #ffffff; +} + +.md-button--primary:hover { + background-color: var(--cgr-brand-dark); + border-color: var(--cgr-brand-dark); + color: #ffffff; +} + +.md-typeset .md-button:hover { + transform: translateY(-1px); +} + +.md-content { + max-width: 52rem; +} + +.md-typeset pre > code { + border-radius: 8px; +} + +.md-typeset .admonition, +.md-typeset details { + border-radius: 8px; + border-width: 1px; + border-left-width: 4px; +} + +.md-typeset .admonition-title, +.md-typeset summary { + font-weight: 600; +} + +.md-search__form { + border-radius: 8px; +} + +.md-typeset table:not([class]) { + font-size: 0.8rem; + border-radius: 8px; + overflow: hidden; +} + +.md-typeset table:not([class]) th { + font-weight: 600; +} + +@media screen and (min-width: 76.25em) { + .md-sidebar--primary { + width: 13rem; + } +} diff --git a/docs/tree-sitter.txt b/docs/tree-sitter.txt deleted file mode 100644 index 80bf33fcc..000000000 --- a/docs/tree-sitter.txt +++ /dev/null @@ -1,94585 +0,0 @@ -. -├── crates -│   ├── cli -│   │   ├── benches -│   │   │   └── benchmark.rs -│   │   ├── eslint -│   │   │   └── index.js -│   │   ├── npm -│   │   │   ├── cli.js -│   │   │   ├── dsl.d.ts -│   │   │   └── install.js -│   │   ├── src -│   │   │   ├── fuzz -│   │   │   │   ├── allocations.rs -│   │   │   │   ├── corpus_test.rs -│   │   │   │   ├── edits.rs -│   │   │   │   ├── random.rs -│   │   │   │   └── scope_sequence.rs -│   │   │   ├── templates -│   │   │   │   ├── __init__.py -│   │   │   │   ├── __init__.pyi -│   │   │   │   ├── _cargo.toml -│   │   │   │   ├── binding_test.go -│   │   │   │   ├── binding_test.js -│   │   │   │   ├── binding.go -│   │   │   │   ├── binding.gyp -│   │   │   │   ├── cmakelists.cmake -│   │   │   │   ├── gitattributes -│   │   │   │   ├── gitignore -│   │   │   │   ├── go.mod -│   │   │   │   ├── grammar.js -│   │   │   │   ├── index.d.ts -│   │   │   │   ├── index.js -│   │   │   │   ├── js-binding.cc -│   │   │   │   ├── lib.rs -│   │   │   │   ├── makefile -│   │   │   │   ├── package.swift -│   │   │   │   ├── PARSER_NAME.h -│   │   │   │   ├── PARSER_NAME.pc.in -│   │   │   │   ├── py-binding.c -│   │   │   │   ├── pyproject.toml -│   │   │   │   ├── root.zig -│   │   │   │   ├── setup.py -│   │   │   │   ├── test_binding.py -│   │   │   │   ├── test.zig -│   │   │   │   └── tests.swift -│   │   │   ├── tests -│   │   │   │   ├── helpers -│   │   │   │   │   ├── dirs.rs -│   │   │   │   │   ├── edits.rs -│   │   │   │   │   ├── fixtures.rs -│   │   │   │   │   └── query_helpers.rs -│   │   │   │   ├── proc_macro -│   │   │   │   │   ├── src -│   │   │   │   │   │   └── lib.rs -│   │   │   │   │   └── Cargo.toml -│   │   │   │   ├── async_boundary_test.rs -│   │   │   │   ├── corpus_test.rs -│   │   │   │   ├── detect_language.rs -│   │   │   │   ├── helpers.rs -│   │   │   │   ├── highlight_test.rs -│   │   │   │   ├── language_test.rs -│   │   │   │   ├── node_test.rs -│   │   │   │   ├── parser_test.rs -│   │   │   │   ├── pathological_test.rs -│   │   │   │   ├── query_test.rs -│   │   │   │   ├── tags_test.rs -│   │   │   │   ├── test_highlight_test.rs -│   │   │   │   ├── test_tags_test.rs -│   │   │   │   ├── text_provider_test.rs -│   │   │   │   ├── tree_test.rs -│   │   │   │   └── wasm_language_test.rs -│   │   │   ├── fuzz.rs -│   │   │   ├── highlight.rs -│   │   │   ├── init.rs -│   │   │   ├── input.rs -│   │   │   ├── logger.rs -│   │   │   ├── main.rs -│   │   │   ├── parse.rs -│   │   │   ├── playground.html -│   │   │   ├── playground.rs -│   │   │   ├── query_testing.rs -│   │   │   ├── query.rs -│   │   │   ├── tags.rs -│   │   │   ├── test_highlight.rs -│   │   │   ├── test_tags.rs -│   │   │   ├── test.rs -│   │   │   ├── tests.rs -│   │   │   ├── tree_sitter_cli.rs -│   │   │   ├── util.rs -│   │   │   ├── version.rs -│   │   │   └── wasm.rs -│   │   ├── Cargo.toml -│   │   ├── package.nix -│   │   └── README.md -│   ├── config -│   │   ├── src -│   │   │   └── tree_sitter_config.rs -│   │   ├── Cargo.toml -│   │   └── README.md -│   ├── generate -│   │   ├── src -│   │   │   ├── prepare_grammar -│   │   │   │   ├── expand_repeats.rs -│   │   │   │   ├── expand_tokens.rs -│   │   │   │   ├── extract_default_aliases.rs -│   │   │   │   ├── extract_tokens.rs -│   │   │   │   ├── flatten_grammar.rs -│   │   │   │   ├── intern_symbols.rs -│   │   │   │   └── process_inlines.rs -│   │   │   ├── templates -│   │   │   │   ├── alloc.h -│   │   │   │   └── array.h -│   │   │   ├── dedup.rs -│   │   │   ├── dsl.js -│   │   │   ├── generate.rs -│   │   │   ├── grammars.rs -│   │   │   ├── nfa.rs -│   │   │   ├── node_types.rs -│   │   │   ├── parse_grammar.rs -│   │   │   ├── parser.h.inc -│   │   │   ├── prepare_grammar.rs -│   │   │   ├── quickjs.rs -│   │   │   ├── render.rs -│   │   │   ├── rules.rs -│   │   │   └── tables.rs -│   │   ├── Cargo.toml -│   │   └── README.md -│   ├── highlight -│   │   ├── include -│   │   │   └── tree_sitter -│   │   │   └── highlight.h -│   │   ├── src -│   │   │   ├── c_lib.rs -│   │   │   └── highlight.rs -│   │   ├── Cargo.toml -│   │   └── README.md -│   ├── language -│   │   ├── src -│   │   │   └── language.rs -│   │   ├── wasm -│   │   │   ├── include -│   │   │   │   ├── assert.h -│   │   │   │   ├── ctype.h -│   │   │   │   ├── endian.h -│   │   │   │   ├── inttypes.h -│   │   │   │   ├── stdint.h -│   │   │   │   ├── stdio.h -│   │   │   │   ├── stdlib.h -│   │   │   │   ├── string.h -│   │   │   │   └── wctype.h -│   │   │   └── src -│   │   │   ├── stdio.c -│   │   │   ├── stdlib.c -│   │   │   └── string.c -│   │   ├── Cargo.toml -│   │   └── README.md -│   ├── loader -│   │   ├── src -│   │   │   └── loader.rs -│   │   ├── Cargo.toml -│   │   ├── emscripten-version -│   │   └── README.md -│   ├── tags -│   │   ├── include -│   │   │   └── tree_sitter -│   │   │   └── tags.h -│   │   ├── src -│   │   │   ├── c_lib.rs -│   │   │   └── tags.rs -│   │   ├── Cargo.toml -│   │   └── README.md -│   └── xtask -│   ├── src -│   │   ├── benchmark.rs -│   │   ├── bump.rs -│   │   ├── check_wasm_exports.rs -│   │   ├── clippy.rs -│   │   ├── embed_sources.rs -│   │   ├── fetch.rs -│   │   ├── generate.rs -│   │   ├── main.rs -│   │   ├── test.rs -│   │   └── upgrade_wasmtime.rs -│   └── Cargo.toml -├── docs -│   ├── src -│   │   ├── assets -│   │   │   ├── css -│   │   │   │   ├── mdbook-admonish.css -│   │   │   │   └── playground.css -│   │   │   ├── images -│   │   │   │   ├── favicon-16x16.png -│   │   │   │   ├── favicon-32x32.png -│   │   │   │   └── tree-sitter-small.png -│   │   │   ├── js -│   │   │   │   └── playground.js -│   │   │   └── schemas -│   │   │   ├── config.schema.json -│   │   │   └── grammar.schema.json -│   │   ├── cli -│   │   │   ├── complete.md -│   │   │   ├── dump-languages.md -│   │   │   ├── fuzz.md -│   │   │   ├── generate.md -│   │   │   ├── highlight.md -│   │   │   ├── index.md -│   │   │   ├── init-config.md -│   │   │   ├── init.md -│   │   │   ├── parse.md -│   │   │   ├── playground.md -│   │   │   ├── query.md -│   │   │   ├── tags.md -│   │   │   ├── test.md -│   │   │   └── version.md -│   │   ├── creating-parsers -│   │   │   ├── 1-getting-started.md -│   │   │   ├── 2-the-grammar-dsl.md -│   │   │   ├── 3-writing-the-grammar.md -│   │   │   ├── 4-external-scanners.md -│   │   │   ├── 5-writing-tests.md -│   │   │   ├── 6-publishing.md -│   │   │   └── index.md -│   │   ├── using-parsers -│   │   │   ├── queries -│   │   │   │   ├── 1-syntax.md -│   │   │   │   ├── 2-operators.md -│   │   │   │   ├── 3-predicates-and-directives.md -│   │   │   │   ├── 4-api.md -│   │   │   │   └── index.md -│   │   │   ├── 1-getting-started.md -│   │   │   ├── 2-basic-parsing.md -│   │   │   ├── 3-advanced-parsing.md -│   │   │   ├── 4-walking-trees.md -│   │   │   ├── 6-static-node-types.md -│   │   │   └── index.md -│   │   ├── 3-syntax-highlighting.md -│   │   ├── 4-code-navigation.md -│   │   ├── 5-implementation.md -│   │   ├── 6-contributing.md -│   │   ├── 7-playground.md -│   │   ├── index.md -│   │   └── SUMMARY.md -│   ├── theme -│   │   └── favicon.png -│   ├── book.toml -│   └── package.nix -├── lib -│   ├── binding_rust -│   │   ├── bindings.rs -│   │   ├── ffi.rs -│   │   ├── lib.rs -│   │   ├── README.md -│   │   ├── util.rs -│   │   └── wasm_language.rs -│   ├── binding_web -│   │   ├── lib -│   │   │   ├── exports.txt -│   │   │   ├── imports.js -│   │   │   ├── prefix.js -│   │   │   ├── tree-sitter.c -│   │   │   └── web-tree-sitter.d.ts -│   │   ├── script -│   │   │   ├── check-artifacts-fresh.ts -│   │   │   └── generate-dts.js -│   │   ├── src -│   │   │   ├── bindings.ts -│   │   │   ├── constants.ts -│   │   │   ├── edit.ts -│   │   │   ├── index.ts -│   │   │   ├── language.ts -│   │   │   ├── lookahead_iterator.ts -│   │   │   ├── marshal.ts -│   │   │   ├── node.ts -│   │   │   ├── parser.ts -│   │   │   ├── query.ts -│   │   │   ├── tree_cursor.ts -│   │   │   └── tree.ts -│   │   ├── test -│   │   │   ├── edit.test.ts -│   │   │   ├── helper.ts -│   │   │   ├── language.test.ts -│   │   │   ├── node.test.ts -│   │   │   ├── parser.test.ts -│   │   │   ├── query.test.ts -│   │   │   └── tree.test.ts -│   │   ├── eslint.config.mjs -│   │   ├── package.nix -│   │   ├── README.md -│   │   ├── tsconfig.json -│   │   ├── vitest.config.ts -│   │   └── wasm-test-grammars.nix -│   ├── include -│   │   └── tree_sitter -│   │   └── api.h -│   ├── lldb_pretty_printers -│   │   ├── table_entry.py -│   │   ├── tree_sitter_types.py -│   │   ├── ts_array.py -│   │   └── ts_tree.py -│   ├── src -│   │   ├── portable -│   │   │   └── endian.h -│   │   ├── unicode -│   │   │   ├── ICU_SHA -│   │   │   ├── ptypes.h -│   │   │   ├── README.md -│   │   │   ├── umachine.h -│   │   │   ├── urename.h -│   │   │   ├── utf.h -│   │   │   ├── utf16.h -│   │   │   └── utf8.h -│   │   ├── wasm -│   │   │   ├── stdlib-symbols.txt -│   │   │   └── wasm-stdlib.h -│   │   ├── alloc.c -│   │   ├── alloc.h -│   │   ├── array.h -│   │   ├── atomic.h -│   │   ├── error_costs.h -│   │   ├── get_changed_ranges.c -│   │   ├── get_changed_ranges.h -│   │   ├── host.h -│   │   ├── language.c -│   │   ├── language.h -│   │   ├── length.h -│   │   ├── lexer.c -│   │   ├── lexer.h -│   │   ├── lib.c -│   │   ├── node.c -│   │   ├── parser.c -│   │   ├── parser.h -│   │   ├── point.c -│   │   ├── point.h -│   │   ├── query.c -│   │   ├── reduce_action.h -│   │   ├── reusable_node.h -│   │   ├── stack.c -│   │   ├── stack.h -│   │   ├── subtree.c -│   │   ├── subtree.h -│   │   ├── tree_cursor.c -│   │   ├── tree_cursor.h -│   │   ├── tree.c -│   │   ├── tree.h -│   │   ├── ts_assert.h -│   │   ├── unicode.h -│   │   ├── wasm_store.c -│   │   └── wasm_store.h -│   ├── .ccls -│   ├── Cargo.toml -│   ├── package.nix -│   ├── README.md -│   └── tree-sitter.pc.in -├── test -│   └── fixtures -│   ├── error_corpus -│   │   ├── c_errors.txt -│   │   ├── javascript_errors.txt -│   │   ├── json_errors.txt -│   │   ├── python_errors.txt -│   │   ├── readme.md -│   │   └── ruby_errors.txt -│   ├── template_corpus -│   │   ├── readme.md -│   │   └── ruby_templates.txt -│   ├── test_grammars -│   │   ├── aliased_inlined_rules -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── aliased_rules -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── aliased_token_rules -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── aliased_unit_reductions -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── aliases_in_root -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── anonymous_error -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── anonymous_tokens_with_escaped_chars -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── associativity_left -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── associativity_missing -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── associativity_right -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── conflict_in_repeat_rule -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── conflict_in_repeat_rule_after_external_token -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── conflicting_precedence -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── depends_on_column -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── dynamic_precedence -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── epsilon_external_extra_tokens -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── epsilon_external_tokens -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── epsilon_rules -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── external_and_internal_anonymous_tokens -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   ├── readme.md -│   │   │   └── scanner.c -│   │   ├── external_and_internal_tokens -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── external_extra_tokens -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── external_tokens -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── external_unicode_column_alignment -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   ├── README.md -│   │   │   └── scanner.c -│   │   ├── extra_non_terminals -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── extra_non_terminals_with_shared_rules -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── get_col_eof -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── get_col_should_hang_not_crash -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   ├── immediate_tokens -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── indirect_recursion_in_transitions -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── inline_rules -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── inlined_aliased_rules -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── inverted_external_token -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   ├── readme.md -│   │   │   └── scanner.c -│   │   ├── invisible_start_rule -│   │   │   ├── expected_error.txt -│   │   │   └── grammar.js -│   │   ├── lexical_conflicts_due_to_state_merging -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── named_precedences -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.txt -│   │   ├── named_rule_aliased_as_anonymous -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── nested_inlined_rules -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── next_sibling_from_zwt -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── partially_resolved_conflict -│   │   │   ├── expected_error.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.txt -│   │   ├── precedence_on_single_child_missing -│   │   │   ├── expected_error.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── precedence_on_single_child_negative -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── precedence_on_single_child_positive -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── precedence_on_subsequence -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── precedence_on_token -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── readme_grammar -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── reserved_words -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── start_rule_is_blank -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── start_rule_is_token -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── unicode_classes -│   │   │   ├── corpus.txt -│   │   │   └── grammar.js -│   │   ├── unused_rules -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── readme.md -│   │   ├── uses_current_column -│   │   │   ├── corpus.txt -│   │   │   ├── grammar.js -│   │   │   └── scanner.c -│   │   └── readme.md -│   └── fixtures.json -├── Cargo.toml -├── CMakeLists.txt -├── Dockerfile -├── FUNDING.json -├── Package.swift -└── README.md - -118 directories, 437 files - - - --------------------------------------------------------------------------------- -/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [workspace] - 2 | default-members = ["crates/cli"] - 3 | members = [ - 4 | "crates/cli", - 5 | "crates/config", - 6 | "crates/generate", - 7 | "crates/highlight", - 8 | "crates/loader", - 9 | "crates/tags", - 10 | "crates/xtask", - 11 | "crates/language", - 12 | "lib", - 13 | ] - 14 | resolver = "2" - | - 15 | [workspace.package] - 16 | version = "0.26.0" - 17 | authors = [ - 18 | "Max Brunsfeld ", - 19 | "Amaan Qureshi ", - 20 | ] - 21 | edition = "2021" - 22 | rust-version = "1.84" - 23 | homepage = "https://tree-sitter.github.io/tree-sitter" - 24 | repository = "https://github.com/tree-sitter/tree-sitter" - 25 | license = "MIT" - 26 | keywords = ["incremental", "parsing"] - 27 | categories = ["command-line-utilities", "parsing"] - | - 28 | [workspace.lints.clippy] - 29 | dbg_macro = "deny" - 30 | todo = "deny" - 31 | pedantic = { level = "warn", priority = -1 } - 32 | nursery = { level = "warn", priority = -1 } - 33 | cargo = { level = "warn", priority = -1 } - | - 34 | # The lints below are a specific subset of the pedantic+nursery lints - 35 | # that we explicitly allow in the tree-sitter codebase because they either: - 36 | # - 37 | # 1. Contain false positives, - 38 | # 2. Are unnecessary, or - 39 | # 3. Worsen the code - | - 40 | branches_sharing_code = "allow" - 41 | cast_lossless = "allow" - 42 | cast_possible_truncation = "allow" - 43 | cast_possible_wrap = "allow" - 44 | cast_precision_loss = "allow" - 45 | cast_sign_loss = "allow" - 46 | checked_conversions = "allow" - 47 | cognitive_complexity = "allow" - 48 | collection_is_never_read = "allow" - 49 | fallible_impl_from = "allow" - 50 | fn_params_excessive_bools = "allow" - 51 | inline_always = "allow" - 52 | if_not_else = "allow" - 53 | items_after_statements = "allow" - 54 | match_wildcard_for_single_variants = "allow" - 55 | missing_errors_doc = "allow" - 56 | missing_panics_doc = "allow" - 57 | module_name_repetitions = "allow" - 58 | multiple_crate_versions = "allow" - 59 | needless_for_each = "allow" - 60 | obfuscated_if_else = "allow" - 61 | option_if_let_else = "allow" - 62 | or_fun_call = "allow" - 63 | range_plus_one = "allow" - 64 | redundant_clone = "allow" - 65 | redundant_closure_for_method_calls = "allow" - 66 | ref_option = "allow" - 67 | similar_names = "allow" - 68 | string_lit_as_bytes = "allow" - 69 | struct_excessive_bools = "allow" - 70 | struct_field_names = "allow" - 71 | transmute_undefined_repr = "allow" - 72 | too_many_lines = "allow" - 73 | unnecessary_wraps = "allow" - 74 | unused_self = "allow" - 75 | used_underscore_items = "allow" - | - 76 | [workspace.lints.rust] - 77 | mismatched_lifetime_syntaxes = "allow" - | - 78 | [profile.optimize] - 79 | inherits = "release" - 80 | strip = true # Automatically strip symbols from the binary. - 81 | lto = true # Link-time optimization. - 82 | opt-level = 3 # Optimization level 3. - 83 | codegen-units = 1 # Maximum size reduction optimizations. - | - 84 | [profile.size] - 85 | inherits = "optimize" - 86 | opt-level = "s" # Optimize for size. - | - 87 | [profile.release-dev] - 88 | inherits = "release" - 89 | lto = false - 90 | debug = true - 91 | debug-assertions = true - 92 | overflow-checks = true - 93 | incremental = true - 94 | codegen-units = 256 - | - 95 | [workspace.dependencies] - 96 | ansi_colours = "1.2.3" - 97 | anstyle = "1.0.11" - 98 | anyhow = "1.0.100" - 99 | bstr = "1.12.0" - 100 | cc = "1.2.39" - 101 | clap = { version = "4.5.48", features = [ - 102 | "cargo", - 103 | "derive", - 104 | "env", - 105 | "help", - 106 | "string", - 107 | "unstable-styles", - 108 | ] } - 109 | clap_complete = "4.5.58" - 110 | clap_complete_nushell = "4.5.8" - 111 | crc32fast = "1.5.0" - 112 | ctor = "0.2.9" - 113 | ctrlc = { version = "3.5.0", features = ["termination"] } - 114 | dialoguer = { version = "0.11.0", features = ["fuzzy-select"] } - 115 | etcetera = "0.10.0" - 116 | fs4 = "0.12.0" - 117 | glob = "0.3.3" - 118 | heck = "0.5.0" - 119 | html-escape = "0.2.13" - 120 | indexmap = "2.11.4" - 121 | indoc = "2.0.6" - 122 | libloading = "0.8.9" - 123 | log = { version = "0.4.28", features = ["std"] } - 124 | memchr = "2.7.6" - 125 | once_cell = "1.21.3" - 126 | pretty_assertions = "1.4.1" - 127 | rand = "0.8.5" - 128 | regex = "1.11.3" - 129 | regex-syntax = "0.8.6" - 130 | rustc-hash = "2.1.1" - 131 | semver = { version = "1.0.27", features = ["serde"] } - 132 | serde = { version = "1.0.219", features = ["derive"] } - 133 | serde_json = { version = "1.0.145", features = ["preserve_order"] } - 134 | similar = "2.7.0" - 135 | smallbitvec = "2.6.0" - 136 | streaming-iterator = "0.1.9" - 137 | tempfile = "3.23.0" - 138 | thiserror = "2.0.16" - 139 | tiny_http = "0.12.0" - 140 | topological-sort = "0.2.2" - 141 | unindent = "0.2.4" - 142 | walkdir = "2.5.0" - 143 | wasmparser = "0.229.0" - 144 | webbrowser = "1.0.5" - | - 145 | tree-sitter = { version = "0.26.0", path = "./lib" } - 146 | tree-sitter-generate = { version = "0.26.0", path = "./crates/generate" } - 147 | tree-sitter-loader = { version = "0.26.0", path = "./crates/loader" } - 148 | tree-sitter-config = { version = "0.26.0", path = "./crates/config" } - 149 | tree-sitter-highlight = { version = "0.26.0", path = "./crates/highlight" } - 150 | tree-sitter-tags = { version = "0.26.0", path = "./crates/tags" } - | - 151 | tree-sitter-language = { version = "0.1.5", path = "./crates/language" } - - - --------------------------------------------------------------------------------- -/CMakeLists.txt: --------------------------------------------------------------------------------- - 1 | cmake_minimum_required(VERSION 3.13) - | - 2 | project(tree-sitter - 3 | VERSION "0.26.0" - 4 | DESCRIPTION "An incremental parsing system for programming tools" - 5 | HOMEPAGE_URL "https://tree-sitter.github.io/tree-sitter/" - 6 | LANGUAGES C) - | - 7 | option(BUILD_SHARED_LIBS "Build using shared libraries" ON) - 8 | option(TREE_SITTER_FEATURE_WASM "Enable the Wasm feature" OFF) - 9 | option(AMALGAMATED "Build using an amalgamated source" OFF) - | - 10 | if(AMALGAMATED) - 11 | set(TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/lib/src/lib.c") - 12 | else() - 13 | file(GLOB TS_SOURCE_FILES lib/src/*.c) - 14 | list(REMOVE_ITEM TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/lib/src/lib.c") - 15 | endif() - | - 16 | add_library(tree-sitter ${TS_SOURCE_FILES}) - | - 17 | target_include_directories(tree-sitter PRIVATE lib/src lib/src/wasm PUBLIC lib/include) - | - 18 | if(MSVC) - 19 | target_compile_options(tree-sitter PRIVATE - 20 | /wd4018 # disable 'signed/unsigned mismatch' - 21 | /wd4232 # disable 'nonstandard extension used' - 22 | /wd4244 # disable 'possible loss of data' - 23 | /wd4267 # disable 'possible loss of data (size_t)' - 24 | /wd4701 # disable 'potentially uninitialized local variable' - 25 | /we4022 # treat 'incompatible types' as an error - 26 | /W4) - 27 | else() - 28 | target_compile_options(tree-sitter PRIVATE - 29 | -Wall -Wextra -Wshadow -Wpedantic - 30 | -Werror=incompatible-pointer-types) - 31 | endif() - | - 32 | if(TREE_SITTER_FEATURE_WASM) - 33 | if(NOT DEFINED CACHE{WASMTIME_INCLUDE_DIR}) - 34 | message(CHECK_START "Looking for wasmtime headers") - 35 | find_path(WASMTIME_INCLUDE_DIR wasmtime.h - 36 | PATHS ENV DEP_WASMTIME_C_API_INCLUDE) - 37 | if(NOT WASMTIME_INCLUDE_DIR) - 38 | unset(WASMTIME_INCLUDE_DIR CACHE) - 39 | message(FATAL_ERROR "Could not find wasmtime headers.\nDid you forget to set CMAKE_INCLUDE_PATH?") - 40 | endif() - 41 | message(CHECK_PASS "found") - 42 | endif() - | - 43 | if(NOT DEFINED CACHE{WASMTIME_LIBRARY}) - 44 | message(CHECK_START "Looking for wasmtime library") - 45 | find_library(WASMTIME_LIBRARY wasmtime) - 46 | if(NOT WASMTIME_LIBRARY) - 47 | unset(WASMTIME_LIBRARY CACHE) - 48 | message(FATAL_ERROR "Could not find wasmtime library.\nDid you forget to set CMAKE_LIBRARY_PATH?") - 49 | endif() - 50 | message(CHECK_PASS "found") - 51 | endif() - | - 52 | target_compile_definitions(tree-sitter PUBLIC TREE_SITTER_FEATURE_WASM) - 53 | target_include_directories(tree-sitter SYSTEM PRIVATE "${WASMTIME_INCLUDE_DIR}") - 54 | target_link_libraries(tree-sitter PUBLIC "${WASMTIME_LIBRARY}") - 55 | set_property(TARGET tree-sitter PROPERTY C_STANDARD_REQUIRED ON) - | - 56 | if(NOT BUILD_SHARED_LIBS) - 57 | if(WIN32) - 58 | target_compile_definitions(tree-sitter PRIVATE WASM_API_EXTERN= WASI_API_EXTERN=) - 59 | target_link_libraries(tree-sitter INTERFACE ws2_32 advapi32 userenv ntdll shell32 ole32 bcrypt) - 60 | elseif(NOT APPLE) - 61 | target_link_libraries(tree-sitter INTERFACE pthread dl m) - 62 | endif() - 63 | endif() - 64 | endif() - | - 65 | set_target_properties(tree-sitter - 66 | PROPERTIES - 67 | C_STANDARD 11 - 68 | C_VISIBILITY_PRESET hidden - 69 | POSITION_INDEPENDENT_CODE ON - 70 | SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}" - 71 | DEFINE_SYMBOL "") - | - 72 | target_compile_definitions(tree-sitter PRIVATE _POSIX_C_SOURCE=200112L _DEFAULT_SOURCE _DARWIN_C_SOURCE) - | - 73 | include(GNUInstallDirs) - | - 74 | configure_file(lib/tree-sitter.pc.in "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc" @ONLY) - | - 75 | install(FILES lib/include/tree_sitter/api.h - 76 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/tree_sitter") - 77 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc" - 78 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") - 79 | install(TARGETS tree-sitter - 80 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") - - - --------------------------------------------------------------------------------- -/crates/cli/benches/benchmark.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::BTreeMap, - 3 | env, fs, - 4 | path::{Path, PathBuf}, - 5 | str, - 6 | sync::LazyLock, - 7 | time::Instant, - 8 | }; - | - 9 | use anyhow::Context; - 10 | use log::info; - 11 | use tree_sitter::{Language, Parser, Query}; - 12 | use tree_sitter_loader::{CompileConfig, Loader}; - | - 13 | include!("../src/tests/helpers/dirs.rs"); - | - 14 | static LANGUAGE_FILTER: LazyLock> = - 15 | LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER").ok()); - 16 | static EXAMPLE_FILTER: LazyLock> = - 17 | LazyLock::new(|| env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok()); - 18 | static REPETITION_COUNT: LazyLock = LazyLock::new(|| { - 19 | env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT") - 20 | .map(|s| s.parse::().unwrap()) - 21 | .unwrap_or(5) - 22 | }); - 23 | static TEST_LOADER: LazyLock = - 24 | LazyLock::new(|| Loader::with_parser_lib_path(SCRATCH_DIR.clone())); - | - 25 | #[allow(clippy::type_complexity)] - 26 | static EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: LazyLock< - 27 | BTreeMap, Vec)>, - 28 | > = LazyLock::new(|| { - 29 | fn process_dir(result: &mut BTreeMap, Vec)>, dir: &Path) { - 30 | if dir.join("grammar.js").exists() { - 31 | let relative_path = dir.strip_prefix(GRAMMARS_DIR.as_path()).unwrap(); - 32 | let (example_paths, query_paths) = result.entry(relative_path.to_owned()).or_default(); - | - 33 | if let Ok(example_files) = fs::read_dir(dir.join("examples")) { - 34 | example_paths.extend(example_files.filter_map(|p| { - 35 | let p = p.unwrap().path(); - 36 | if p.is_file() { - 37 | Some(p) - 38 | } else { - 39 | None - 40 | } - 41 | })); - 42 | } - | - 43 | if let Ok(query_files) = fs::read_dir(dir.join("queries")) { - 44 | query_paths.extend(query_files.filter_map(|p| { - 45 | let p = p.unwrap().path(); - 46 | if p.is_file() { - 47 | Some(p) - 48 | } else { - 49 | None - 50 | } - 51 | })); - 52 | } - 53 | } else { - 54 | for entry in fs::read_dir(dir).unwrap() { - 55 | let entry = entry.unwrap().path(); - 56 | if entry.is_dir() { - 57 | process_dir(result, &entry); - 58 | } - 59 | } - 60 | } - 61 | } - | - 62 | let mut result = BTreeMap::new(); - 63 | process_dir(&mut result, &GRAMMARS_DIR); - 64 | result - 65 | }); - | - 66 | fn main() { - 67 | tree_sitter_cli::logger::init(); - | - 68 | let max_path_length = EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR - 69 | .values() - 70 | .flat_map(|(e, q)| { - 71 | e.iter() - 72 | .chain(q.iter()) - 73 | .map(|s| s.file_name().unwrap().to_str().unwrap().len()) - 74 | }) - 75 | .max() - 76 | .unwrap_or(0); - | - 77 | info!("Benchmarking with {} repetitions", *REPETITION_COUNT); - | - 78 | let mut parser = Parser::new(); - 79 | let mut all_normal_speeds = Vec::new(); - 80 | let mut all_error_speeds = Vec::new(); - | - 81 | for (language_path, (example_paths, query_paths)) in - 82 | EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() - 83 | { - 84 | let language_name = language_path.file_name().unwrap().to_str().unwrap(); - | - 85 | if let Some(filter) = LANGUAGE_FILTER.as_ref() { - 86 | if language_name != filter.as_str() { - 87 | continue; - 88 | } - 89 | } - | - 90 | info!("\nLanguage: {language_name}"); - 91 | let language = get_language(language_path); - 92 | parser.set_language(&language).unwrap(); - | - 93 | info!(" Constructing Queries"); - 94 | for path in query_paths { - 95 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { - 96 | if !path.to_str().unwrap().contains(filter.as_str()) { - 97 | continue; - 98 | } - 99 | } - | - 100 | parse(path, max_path_length, |source| { - 101 | Query::new(&language, str::from_utf8(source).unwrap()) - 102 | .with_context(|| format!("Query file path: {}", path.display())) - 103 | .expect("Failed to parse query"); - 104 | }); - 105 | } - | - 106 | info!(" Parsing Valid Code:"); - 107 | let mut normal_speeds = Vec::new(); - 108 | for example_path in example_paths { - 109 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { - 110 | if !example_path.to_str().unwrap().contains(filter.as_str()) { - 111 | continue; - 112 | } - 113 | } - | - 114 | normal_speeds.push(parse(example_path, max_path_length, |code| { - 115 | parser.parse(code, None).expect("Failed to parse"); - 116 | })); - 117 | } - | - 118 | info!(" Parsing Invalid Code (mismatched languages):"); - 119 | let mut error_speeds = Vec::new(); - 120 | for (other_language_path, (example_paths, _)) in - 121 | EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR.iter() - 122 | { - 123 | if other_language_path != language_path { - 124 | for example_path in example_paths { - 125 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { - 126 | if !example_path.to_str().unwrap().contains(filter.as_str()) { - 127 | continue; - 128 | } - 129 | } - | - 130 | error_speeds.push(parse(example_path, max_path_length, |code| { - 131 | parser.parse(code, None).expect("Failed to parse"); - 132 | })); - 133 | } - 134 | } - 135 | } - | - 136 | if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) { - 137 | info!(" Average Speed (normal): {average_normal} bytes/ms"); - 138 | info!(" Worst Speed (normal): {worst_normal} bytes/ms"); - 139 | } - | - 140 | if let Some((average_error, worst_error)) = aggregate(&error_speeds) { - 141 | info!(" Average Speed (errors): {average_error} bytes/ms"); - 142 | info!(" Worst Speed (errors): {worst_error} bytes/ms"); - 143 | } - | - 144 | all_normal_speeds.extend(normal_speeds); - 145 | all_error_speeds.extend(error_speeds); - 146 | } - | - 147 | info!("\n Overall"); - 148 | if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { - 149 | info!(" Average Speed (normal): {average_normal} bytes/ms"); - 150 | info!(" Worst Speed (normal): {worst_normal} bytes/ms"); - 151 | } - | - 152 | if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) { - 153 | info!(" Average Speed (errors): {average_error} bytes/ms"); - 154 | info!(" Worst Speed (errors): {worst_error} bytes/ms"); - 155 | } - 156 | info!(""); - 157 | } - | - 158 | fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> { - 159 | if speeds.is_empty() { - 160 | return None; - 161 | } - 162 | let mut total = 0; - 163 | let mut max = usize::MAX; - 164 | for speed in speeds.iter().copied() { - 165 | total += speed; - 166 | if speed < max { - 167 | max = speed; - 168 | } - 169 | } - 170 | Some((total / speeds.len(), max)) - 171 | } - | - 172 | fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> usize { - 173 | let source_code = fs::read(path) - 174 | .with_context(|| format!("Failed to read {}", path.display())) - 175 | .unwrap(); - 176 | let time = Instant::now(); - 177 | for _ in 0..*REPETITION_COUNT { - 178 | action(&source_code); - 179 | } - 180 | let duration = time.elapsed() / (*REPETITION_COUNT as u32); - 181 | let duration_ns = duration.as_nanos(); - 182 | let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns; - 183 | info!( - 184 | " {:max_path_length$}\ttime {:>7.2} ms\t\tspeed {speed:>6} bytes/ms", - 185 | path.file_name().unwrap().to_str().unwrap(), - 186 | (duration_ns as f64) / 1e6, - 187 | ); - 188 | speed as usize - 189 | } - | - 190 | fn get_language(path: &Path) -> Language { - 191 | let src_path = GRAMMARS_DIR.join(path).join("src"); - 192 | TEST_LOADER - 193 | .load_language_at_path(CompileConfig::new(&src_path, None, None)) - 194 | .with_context(|| format!("Failed to load language at path {}", src_path.display())) - 195 | .unwrap() - 196 | } - - - --------------------------------------------------------------------------------- -/crates/cli/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-cli" - 3 | version.workspace = true - 4 | description = "CLI tool for developing, testing, and using Tree-sitter parsers" - 5 | authors.workspace = true - 6 | edition.workspace = true - 7 | rust-version.workspace = true - 8 | readme = "README.md" - 9 | homepage.workspace = true - 10 | repository.workspace = true - 11 | documentation = "https://docs.rs/tree-sitter-cli" - 12 | license.workspace = true - 13 | keywords.workspace = true - 14 | categories.workspace = true - 15 | include = ["build.rs", "README.md", "LICENSE", "benches/*", "src/**"] - | - 16 | [lints] - 17 | workspace = true - | - 18 | [lib] - 19 | path = "src/tree_sitter_cli.rs" - | - 20 | [[bin]] - 21 | name = "tree-sitter" - 22 | path = "src/main.rs" - 23 | doc = false - | - 24 | [[bench]] - 25 | name = "benchmark" - 26 | harness = false - | - 27 | [features] - 28 | default = ["qjs-rt"] - 29 | wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"] - 30 | qjs-rt = ["tree-sitter-generate/qjs-rt"] - | - 31 | [dependencies] - 32 | ansi_colours.workspace = true - 33 | anstyle.workspace = true - 34 | anyhow.workspace = true - 35 | bstr.workspace = true - 36 | clap.workspace = true - 37 | clap_complete.workspace = true - 38 | clap_complete_nushell.workspace = true - 39 | crc32fast.workspace = true - 40 | ctor.workspace = true - 41 | ctrlc.workspace = true - 42 | dialoguer.workspace = true - 43 | glob.workspace = true - 44 | heck.workspace = true - 45 | html-escape.workspace = true - 46 | indoc.workspace = true - 47 | log.workspace = true - 48 | memchr.workspace = true - 49 | rand.workspace = true - 50 | regex.workspace = true - 51 | semver.workspace = true - 52 | serde.workspace = true - 53 | serde_json.workspace = true - 54 | similar.workspace = true - 55 | streaming-iterator.workspace = true - 56 | tiny_http.workspace = true - 57 | walkdir.workspace = true - 58 | wasmparser.workspace = true - 59 | webbrowser.workspace = true - | - 60 | tree-sitter.workspace = true - 61 | tree-sitter-generate.workspace = true - 62 | tree-sitter-config.workspace = true - 63 | tree-sitter-highlight.workspace = true - 64 | tree-sitter-loader.workspace = true - 65 | tree-sitter-tags.workspace = true - | - 66 | [dev-dependencies] - 67 | encoding_rs = "0.8.35" - 68 | widestring = "1.2.0" - 69 | tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } - | - 70 | tempfile.workspace = true - 71 | pretty_assertions.workspace = true - 72 | unindent.workspace = true - - - --------------------------------------------------------------------------------- -/crates/cli/eslint/index.js: --------------------------------------------------------------------------------- - 1 | import globals from 'globals'; - 2 | import jsdoc from 'eslint-plugin-jsdoc'; - | - 3 | export default [ - 4 | jsdoc.configs['flat/recommended'], - 5 | { - 6 | languageOptions: { - 7 | ecmaVersion: 'latest', - 8 | sourceType: 'module', - 9 | globals: { - 10 | ...globals.commonjs, - 11 | ...globals.es2021, - 12 | }, - 13 | }, - 14 | plugins: { - 15 | jsdoc, - 16 | }, - 17 | rules: { - 18 | 'no-cond-assign': 'off', - 19 | 'no-irregular-whitespace': 'error', - 20 | 'no-unexpected-multiline': 'error', - 21 | 'curly': ['error', 'multi-line'], - 22 | 'guard-for-in': 'error', - 23 | 'no-caller': 'error', - 24 | 'no-extend-native': 'error', - 25 | 'no-extra-bind': 'error', - 26 | 'no-invalid-this': 'error', - 27 | 'no-multi-spaces': 'error', - 28 | 'no-multi-str': 'error', - 29 | 'no-new-wrappers': 'error', - 30 | 'no-throw-literal': 'error', - 31 | 'no-with': 'error', - 32 | 'prefer-promise-reject-errors': 'error', - 33 | 'no-unused-vars': ['error', { args: 'none' }], - 34 | 'array-bracket-newline': 'off', - 35 | 'array-bracket-spacing': ['error', 'never'], - 36 | 'array-element-newline': 'off', - 37 | 'block-spacing': ['error', 'never'], - 38 | 'brace-style': 'error', - 39 | 'camelcase': ['error', { properties: 'never' }], - 40 | 'comma-dangle': ['error', 'always-multiline'], - 41 | 'comma-spacing': 'error', - 42 | 'comma-style': 'error', - 43 | 'computed-property-spacing': 'error', - 44 | 'eol-last': 'error', - 45 | 'func-call-spacing': 'error', - | - 46 | 'camelcase': 'off', - 47 | 'indent': [ - 48 | 'error', - 49 | 2, - 50 | { - 51 | 'SwitchCase': 1, - 52 | }, - 53 | ], - 54 | 'key-spacing': 'error', - 55 | 'keyword-spacing': 'error', - 56 | 'linebreak-style': 'error', - 57 | 'max-len': [ - 58 | 'error', - 59 | { - 60 | code: 160, - 61 | ignoreComments: true, - 62 | ignoreUrls: true, - 63 | ignoreStrings: true, - 64 | }, - 65 | ], - 66 | 'new-cap': 'error', - 67 | 'no-array-constructor': 'error', - 68 | 'no-mixed-spaces-and-tabs': 'error', - 69 | 'no-multiple-empty-lines': ['error', { max: 2 }], - 70 | 'no-new-object': 'error', - 71 | 'no-tabs': 'error', - 72 | 'no-trailing-spaces': 'error', - 73 | 'object-curly-spacing': 'error', - 74 | 'one-var': ['error', { - 75 | var: 'never', - 76 | let: 'never', - 77 | const: 'never', - 78 | }], - 79 | 'operator-linebreak': ['error', 'after'], - 80 | 'padded-blocks': ['error', 'never'], - 81 | 'quote-props': ['error', 'consistent'], - 82 | 'quotes': ['error', 'single', { allowTemplateLiterals: true }], - 83 | 'semi': 'error', - 84 | 'semi-spacing': 'error', - 85 | 'space-before-blocks': 'error', - 86 | 'space-before-function-paren': ['error', { - 87 | asyncArrow: 'always', - 88 | anonymous: 'never', - 89 | named: 'never', - 90 | }], - 91 | 'spaced-comment': [ - 92 | 'error', - 93 | 'always', - 94 | { - 95 | line: { - 96 | markers: ['/'], - 97 | }, - 98 | }, - 99 | ], - 100 | 'switch-colon-spacing': 'error', - 101 | 'arrow-parens': 'off', - 102 | 'constructor-super': 'error', - 103 | 'generator-star-spacing': ['error', 'after'], - 104 | 'no-new-symbol': 'error', - 105 | 'no-this-before-super': 'error', - 106 | 'no-var': 'error', - 107 | 'prefer-const': ['error', { destructuring: 'all' }], - 108 | 'prefer-rest-params': 'error', - 109 | 'prefer-spread': 'error', - 110 | 'rest-spread-spacing': 'error', - 111 | 'yield-star-spacing': ['error', 'after'], - 112 | 'jsdoc/no-undefined-types': 'off', - 113 | 'jsdoc/require-param-description': 'off', - 114 | 'jsdoc/require-returns-description': 'off', - 115 | 'jsdoc/require-returns': 'off', - 116 | 'jsdoc/tag-lines': ['error', 'any', { startLines: 1 }], - 117 | }, - 118 | }, - 119 | ]; - - - --------------------------------------------------------------------------------- -/crates/cli/npm/cli.js: --------------------------------------------------------------------------------- - 1 | #!/usr/bin/env node - | - 2 | const path = require('path'); - 3 | const spawn = require("child_process").spawn; - 4 | const executable = process.platform === 'win32' - 5 | ? 'tree-sitter.exe' - 6 | : 'tree-sitter'; - 7 | spawn( - 8 | path.join(__dirname, executable), - 9 | process.argv.slice(2), - 10 | {stdio: 'inherit'} - 11 | ).on('close', process.exit) - - - --------------------------------------------------------------------------------- -/crates/cli/npm/dsl.d.ts: --------------------------------------------------------------------------------- - 1 | type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; - 2 | type BlankRule = { type: 'BLANK' }; - 3 | type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; - 4 | type FieldRule = { type: 'FIELD'; name: string; content: Rule }; - 5 | type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; - 6 | type PatternRule = { type: 'PATTERN'; value: string }; - 7 | type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; - 8 | type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; - 9 | type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; - 10 | type PrecRule = { type: 'PREC'; content: Rule; value: number }; - 11 | type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; - 12 | type RepeatRule = { type: 'REPEAT'; content: Rule }; - 13 | type ReservedRule = { type: 'RESERVED'; content: Rule; context_name: string }; - 14 | type SeqRule = { type: 'SEQ'; members: Rule[] }; - 15 | type StringRule = { type: 'STRING'; value: string }; - 16 | type SymbolRule = { type: 'SYMBOL'; name: Name }; - 17 | type TokenRule = { type: 'TOKEN'; content: Rule }; - | - 18 | type Rule = - 19 | | AliasRule - 20 | | BlankRule - 21 | | ChoiceRule - 22 | | FieldRule - 23 | | ImmediateTokenRule - 24 | | PatternRule - 25 | | PrecDynamicRule - 26 | | PrecLeftRule - 27 | | PrecRightRule - 28 | | PrecRule - 29 | | Repeat1Rule - 30 | | RepeatRule - 31 | | SeqRule - 32 | | StringRule - 33 | | SymbolRule - 34 | | TokenRule; - | - 35 | declare class RustRegex { - 36 | value: string; - | - 37 | constructor(pattern: string); - 38 | } - | - 39 | type RuleOrLiteral = Rule | RegExp | RustRegex | string; - | - 40 | type GrammarSymbols = { - 41 | [name in RuleName]: SymbolRule; - 42 | } & - 43 | Record>; - | - 44 | type RuleBuilder = ( - 45 | $: GrammarSymbols, - 46 | previous?: Rule, - 47 | ) => RuleOrLiteral; - | - 48 | type RuleBuilders< - 49 | RuleName extends string, - 50 | BaseGrammarRuleName extends string - 51 | > = { - 52 | [name in RuleName]: RuleBuilder; - 53 | }; - | - 54 | interface Grammar< - 55 | RuleName extends string, - 56 | BaseGrammarRuleName extends string = never, - 57 | Rules extends RuleBuilders = RuleBuilders< - 58 | RuleName, - 59 | BaseGrammarRuleName - 60 | > - 61 | > { - 62 | /** - 63 | * Name of the grammar language. - 64 | */ - 65 | name: string; - | - 66 | /** Mapping of grammar rule names to rule builder functions. */ - 67 | rules: Rules; - | - 68 | /** - 69 | * An array of arrays of precedence names or rules. Each inner array represents - 70 | * a *descending* ordering. Names/rules listed earlier in one of these arrays - 71 | * have higher precedence than any names/rules listed later in the same array. - 72 | * - 73 | * Using rules is just a shorthand way for using a name then calling prec() - 74 | * with that name. It is just a convenience. - 75 | */ - 76 | precedences?: ( - 77 | $: GrammarSymbols, - 78 | previous: Rule[][], - 79 | ) => RuleOrLiteral[][], - | - 80 | /** - 81 | * An array of arrays of rule names. Each inner array represents a set of - 82 | * rules that's involved in an _LR(1) conflict_ that is _intended to exist_ - 83 | * in the grammar. When these conflicts occur at runtime, Tree-sitter will - 84 | * use the GLR algorithm to explore all of the possible interpretations. If - 85 | * _multiple_ parses end up succeeding, Tree-sitter will pick the subtree - 86 | * whose corresponding rule has the highest total _dynamic precedence_. - 87 | * - 88 | * @param $ grammar rules - 89 | */ - 90 | conflicts?: ( - 91 | $: GrammarSymbols, - 92 | previous: Rule[][], - 93 | ) => RuleOrLiteral[][]; - | - 94 | /** - 95 | * An array of token names which can be returned by an _external scanner_. - 96 | * External scanners allow you to write custom C code which runs during the - 97 | * lexing process in order to handle lexical rules (e.g. Python's indentation - 98 | * tokens) that cannot be described by regular expressions. - 99 | * - 100 | * @param $ grammar rules - 101 | * @param previous array of externals from the base schema, if any - 102 | * - 103 | * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners - 104 | */ - 105 | externals?: ( - 106 | $: Record>, - 107 | previous: Rule[], - 108 | ) => RuleOrLiteral[]; - | - 109 | /** - 110 | * An array of tokens that may appear anywhere in the language. This - 111 | * is often used for whitespace and comments. The default value of - 112 | * extras is to accept whitespace. To control whitespace explicitly, - 113 | * specify extras: `$ => []` in your grammar. - 114 | * - 115 | * @param $ grammar rules - 116 | */ - 117 | extras?: ( - 118 | $: GrammarSymbols, - 119 | ) => RuleOrLiteral[]; - | - 120 | /** - 121 | * An array of rules that should be automatically removed from the - 122 | * grammar by replacing all of their usages with a copy of their definition. - 123 | * This is useful for rules that are used in multiple places but for which - 124 | * you don't want to create syntax tree nodes at runtime. - 125 | * - 126 | * @param $ grammar rules - 127 | */ - 128 | inline?: ( - 129 | $: GrammarSymbols, - 130 | previous: Rule[], - 131 | ) => RuleOrLiteral[]; - | - 132 | /** - 133 | * A list of hidden rule names that should be considered supertypes in the - 134 | * generated node types file. - 135 | * - 136 | * @param $ grammar rules - 137 | * - 138 | * @see https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types - 139 | */ - 140 | supertypes?: ( - 141 | $: GrammarSymbols, - 142 | previous: Rule[], - 143 | ) => RuleOrLiteral[]; - | - 144 | /** - 145 | * The name of a token that will match keywords for the purpose of the - 146 | * keyword extraction optimization. - 147 | * - 148 | * @param $ grammar rules - 149 | * - 150 | * @see https://tree-sitter.github.io/tree-sitter/creating-parsers/3-writing-the-grammar#keyword-extraction - 151 | */ - 152 | word?: ($: GrammarSymbols) => RuleOrLiteral; - | - | - 153 | /** - 154 | * Mapping of names to reserved word sets. The first reserved word set is the - 155 | * global word set, meaning it applies to every rule in every parse state. - 156 | * The other word sets can be used with the `reserved` function. - 157 | */ - 158 | reserved?: Record< - 159 | string, - 160 | ($: GrammarSymbols) => RuleOrLiteral[] - 161 | >; - 162 | } - | - 163 | type GrammarSchema = { - 164 | [K in keyof Grammar]: K extends 'rules' - 165 | ? Record - 166 | : Grammar[K]; - 167 | }; - | - 168 | /** - 169 | * Causes the given rule to appear with an alternative name in the syntax tree. - 170 | * For instance with `alias($.foo, 'bar')`, the aliased rule will appear as an - 171 | * anonymous node, as if the rule had been written as the simple string. - 172 | * - 173 | * @param rule rule that will be aliased - 174 | * @param name target name for the alias - 175 | */ - 176 | declare function alias(rule: RuleOrLiteral, name: string): AliasRule; - | - 177 | /** - 178 | * Causes the given rule to appear as an alternative named node, for instance - 179 | * with `alias($.foo, $.bar)`, the aliased rule `foo` will appear as a named - 180 | * node called `bar`. - 181 | * - 182 | * @param rule rule that will be aliased - 183 | * @param symbol target symbol for the alias - 184 | */ - 185 | declare function alias( - 186 | rule: RuleOrLiteral, - 187 | symbol: SymbolRule, - 188 | ): AliasRule; - | - 189 | /** - 190 | * Creates a blank rule, matching nothing. - 191 | */ - 192 | declare function blank(): BlankRule; - | - 193 | /** - 194 | * Assigns a field name to the child node(s) matched by the given rule. - 195 | * In the resulting syntax tree, you can then use that field name to - 196 | * access specific children. - 197 | * - 198 | * @param name name of the field - 199 | * @param rule rule the field should match - 200 | */ - 201 | declare function field(name: string, rule: RuleOrLiteral): FieldRule; - | - 202 | /** - 203 | * Creates a rule that matches one of a set of possible rules. The order - 204 | * of the arguments does not matter. This is analogous to the `|` (pipe) - 205 | * operator in EBNF notation. - 206 | * - 207 | * @param options possible rule choices - 208 | */ - 209 | declare function choice(...options: RuleOrLiteral[]): ChoiceRule; - | - 210 | /** - 211 | * Creates a rule that matches zero or one occurrence of a given rule. - 212 | * It is analogous to the `[x]` (square bracket) syntax in EBNF notation. - 213 | * - 214 | * @param value rule to be made optional - 215 | */ - 216 | declare function optional(rule: RuleOrLiteral): ChoiceRule; - | - 217 | /** - 218 | * Marks the given rule with a precedence which will be used to resolve LR(1) - 219 | * conflicts at parser-generation time. When two rules overlap in a way that - 220 | * represents either a true ambiguity or a _local_ ambiguity given one token - 221 | * of lookahead, Tree-sitter will try to resolve the conflict by matching the - 222 | * rule with the higher precedence. - 223 | * - 224 | * Precedence values can either be strings or numbers. When comparing rules - 225 | * with numerical precedence, higher numbers indicate higher precedences. To - 226 | * compare rules with string precedence, Tree-sitter uses the grammar's `precedences` - 227 | * field. - 228 | * - 229 | * rules is zero. This works similarly to the precedence directives in Yacc grammars. - 230 | * - 231 | * @param value precedence weight - 232 | * @param rule rule being weighted - 233 | * - 234 | * @see https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables - 235 | * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html - 236 | */ - 237 | declare const prec: { - 238 | (value: string | number, rule: RuleOrLiteral): PrecRule; - | - 239 | /** - 240 | * Marks the given rule as left-associative (and optionally applies a - 241 | * numerical precedence). When an LR(1) conflict arises in which all of the - 242 | * rules have the same numerical precedence, Tree-sitter will consult the - 243 | * rules' associativity. If there is a left-associative rule, Tree-sitter - 244 | * will prefer matching a rule that ends _earlier_. This works similarly to - 245 | * associativity directives in Yacc grammars. - 246 | * - 247 | * @param value (optional) precedence weight - 248 | * @param rule rule to mark as left-associative - 249 | * - 250 | * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html - 251 | */ - 252 | left(rule: RuleOrLiteral): PrecLeftRule; - 253 | left(value: string | number, rule: RuleOrLiteral): PrecLeftRule; - | - 254 | /** - 255 | * Marks the given rule as right-associative (and optionally applies a - 256 | * numerical precedence). When an LR(1) conflict arises in which all of the - 257 | * rules have the same numerical precedence, Tree-sitter will consult the - 258 | * rules' associativity. If there is a right-associative rule, Tree-sitter - 259 | * will prefer matching a rule that ends _later_. This works similarly to - 260 | * associativity directives in Yacc grammars. - 261 | * - 262 | * @param value (optional) precedence weight - 263 | * @param rule rule to mark as right-associative - 264 | * - 265 | * @see https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html - 266 | */ - 267 | right(rule: RuleOrLiteral): PrecRightRule; - 268 | right(value: string | number, rule: RuleOrLiteral): PrecRightRule; - | - 269 | /** - 270 | * Marks the given rule with a numerical precedence which will be used to - 271 | * resolve LR(1) conflicts at _runtime_ instead of parser-generation time. - 272 | * This is only necessary when handling a conflict dynamically using the - 273 | * `conflicts` field in the grammar, and when there is a genuine _ambiguity_: - 274 | * multiple rules correctly match a given piece of code. In that event, - 275 | * Tree-sitter compares the total dynamic precedence associated with each - 276 | * rule, and selects the one with the highest total. This is similar to - 277 | * dynamic precedence directives in Bison grammars. - 278 | * - 279 | * @param value precedence weight - 280 | * @param rule rule being weighted - 281 | * - 282 | * @see https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html - 283 | */ - 284 | dynamic(value: string | number, rule: RuleOrLiteral): PrecDynamicRule; - 285 | }; - | - 286 | /** - 287 | * Creates a rule that matches _zero-or-more_ occurrences of a given rule. - 288 | * It is analogous to the `{x}` (curly brace) syntax in EBNF notation. This - 289 | * rule is implemented in terms of `repeat1` but is included because it - 290 | * is very commonly used. - 291 | * - 292 | * @param rule rule to repeat, zero or more times - 293 | */ - 294 | declare function repeat(rule: RuleOrLiteral): RepeatRule; - | - 295 | /** - 296 | * Creates a rule that matches one-or-more occurrences of a given rule. - 297 | * - 298 | * @param rule rule to repeat, one or more times - 299 | */ - 300 | declare function repeat1(rule: RuleOrLiteral): Repeat1Rule; - | - 301 | /** - 302 | * Overrides the global reserved word set for a given rule. The word set name - 303 | * should be defined in the `reserved` field in the grammar. - 304 | * - 305 | * @param wordset name of the reserved word set - 306 | * @param rule rule that will use the reserved word set - 307 | */ - 308 | declare function reserved(wordset: string, rule: RuleOrLiteral): ReservedRule; - | - 309 | /** - 310 | * Creates a rule that matches any number of other rules, one after another. - 311 | * It is analogous to simply writing multiple symbols next to each other - 312 | * in EBNF notation. - 313 | * - 314 | * @param rules ordered rules that comprise the sequence - 315 | */ - 316 | declare function seq(...rules: RuleOrLiteral[]): SeqRule; - | - 317 | /** - 318 | * Creates a symbol rule, representing another rule in the grammar by name. - 319 | * - 320 | * @param name name of the target rule - 321 | */ - 322 | declare function sym(name: Name): SymbolRule; - | - 323 | /** - 324 | * Marks the given rule as producing only a single token. Tree-sitter's - 325 | * default is to treat each string or RegExp literal in the grammar as a - 326 | * separate token. Each token is matched separately by the lexer and - 327 | * returned as its own leaf node in the tree. The token function allows - 328 | * you to express a complex rule using the DSL functions (rather - 329 | * than as a single regular expression) but still have Tree-sitter treat - 330 | * it as a single token. - 331 | * - 332 | * @param rule rule to represent as a single token - 333 | */ - 334 | declare const token: { - 335 | (rule: RuleOrLiteral): TokenRule; - | - 336 | /** - 337 | * Marks the given rule as producing an immediate token. This allows - 338 | * the parser to produce a different token based on whether or not - 339 | * there are `extras` preceding the token's main content. When there - 340 | * are _no_ leading `extras`, an immediate token is preferred over a - 341 | * normal token which would otherwise match. - 342 | * - 343 | * @param rule rule to represent as an immediate token - 344 | */ - 345 | immediate(rule: RuleOrLiteral): ImmediateTokenRule; - 346 | }; - | - 347 | /** - 348 | * Creates a new language grammar with the provided schema. - 349 | * - 350 | * @param options grammar options - 351 | */ - 352 | declare function grammar( - 353 | options: Grammar, - 354 | ): GrammarSchema; - | - 355 | /** - 356 | * Extends an existing language grammar with the provided options, - 357 | * creating a new language. - 358 | * - 359 | * @param baseGrammar base grammar schema to extend from - 360 | * @param options grammar options for the new extended language - 361 | */ - 362 | declare function grammar< - 363 | BaseGrammarRuleName extends string, - 364 | RuleName extends string - 365 | >( - 366 | baseGrammar: GrammarSchema, - 367 | options: Grammar, - 368 | ): GrammarSchema; - - - --------------------------------------------------------------------------------- -/crates/cli/npm/install.js: --------------------------------------------------------------------------------- - 1 | #!/usr/bin/env node - | - 2 | const fs = require('fs'); - 3 | const zlib = require('zlib'); - 4 | const http = require('http'); - 5 | const https = require('https'); - 6 | const packageJSON = require('./package.json'); - | - 7 | https.globalAgent.keepAlive = false; - | - 8 | const matrix = { - 9 | platform: { - 10 | 'darwin': { - 11 | name: 'macos', - 12 | arch: { - 13 | 'arm64': { name: 'arm64' }, - 14 | 'x64': { name: 'x64' }, - 15 | } - 16 | }, - 17 | 'linux': { - 18 | name: 'linux', - 19 | arch: { - 20 | 'arm64': { name: 'arm64' }, - 21 | 'arm': { name: 'arm' }, - 22 | 'x64': { name: 'x64' }, - 23 | 'x86': { name: 'x86' }, - 24 | 'ppc64': { name: 'powerpc64' }, - 25 | } - 26 | }, - 27 | 'win32': { - 28 | name: 'windows', - 29 | arch: { - 30 | 'arm64': { name: 'arm64' }, - 31 | 'x64': { name: 'x64' }, - 32 | 'x86': { name: 'x86' }, - 33 | 'ia32': { name: 'x86' }, - 34 | } - 35 | }, - 36 | }, - 37 | } - | - 38 | // Determine the URL of the file. - 39 | const platform = matrix.platform[process.platform]; - 40 | const arch = platform?.arch[process.arch]; - | - 41 | if (!platform || !platform.name || !arch || !arch.name) { - 42 | console.error( - 43 | `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}` - 44 | ); - 45 | process.exit(1); - 46 | } - | - 47 | const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`; - 48 | const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`; - 49 | const assetURL = `${releaseURL}/${assetName}`; - | - 50 | // Remove previously-downloaded files. - 51 | const executableName = process.platform === 'win32' ? 'tree-sitter.exe' : 'tree-sitter'; - 52 | if (fs.existsSync(executableName)) { - 53 | fs.unlinkSync(executableName); - 54 | } - | - 55 | // Download the compressed file. - 56 | console.log(`Downloading ${assetURL}`); - 57 | const file = fs.createWriteStream(executableName); - 58 | get(assetURL, response => { - 59 | if (response.statusCode > 299) { - 60 | console.error([ - 61 | 'Download failed', - 62 | '', - 63 | `url: ${assetURL}`, - 64 | `status: ${response.statusCode}`, - 65 | `headers: ${JSON.stringify(response.headers, null, 2)}`, - 66 | '', - 67 | ].join('\n')); - 68 | process.exit(1); - 69 | } - 70 | response.pipe(zlib.createGunzip()).pipe(file); - 71 | }); - | - 72 | file.on('finish', () => { - 73 | fs.chmodSync(executableName, '755'); - 74 | }); - | - 75 | // Follow redirects. - 76 | function get(url, callback) { - 77 | const processResponse = (response) => { - 78 | if (response.statusCode === 301 || response.statusCode === 302) { - 79 | get(response.headers.location, callback); - 80 | } else { - 81 | callback(response); - 82 | } - 83 | }; - | - 84 | const proxyEnv = process.env.HTTPS_PROXY || process.env.https_proxy; - 85 | if (!proxyEnv) { - 86 | https.get(url, processResponse); - 87 | return; - 88 | } - | - 89 | const requestUrl = new URL(url); - 90 | const requestPort = requestUrl.port || (requestUrl.protocol === 'https:' ? 443 : 80); - 91 | const proxyUrl = new URL(proxyEnv); - 92 | const request = proxyUrl.protocol === 'https:' ? https : http; - 93 | const requestOption = { - 94 | host: proxyUrl.hostname, - 95 | port: proxyUrl.port || (proxyUrl.protocol === 'https:' ? 443 : 80), - 96 | method: 'CONNECT', - 97 | path: `${requestUrl.hostname}:${requestPort}`, - 98 | }; - 99 | if (proxyUrl.username || proxyUrl.password) { - 100 | const auth = `${decodeURIComponent( - 101 | proxyUrl.username - 102 | )}:${decodeURIComponent(proxyUrl.password)}`; - 103 | requestOption.headers = { - 104 | 'Proxy-Authorization': `Basic ${Buffer.from( - 105 | auth - 106 | ).toString('base64')}`, - 107 | } - 108 | } - 109 | request.request(requestOption).on('connect', (response, socket, _head) => { - 110 | if (response.statusCode !== 200) { - 111 | // let caller handle error - 112 | callback(response); - 113 | return; - 114 | } - | - 115 | const agent = https.Agent({ socket }); - 116 | https.get({ - 117 | host: requestUrl.host, - 118 | port: requestPort, - 119 | path: `${requestUrl.pathname}${requestUrl.search}`, - 120 | agent, - 121 | }, processResponse); - 122 | }).end(); - 123 | } - - - --------------------------------------------------------------------------------- -/crates/cli/package.nix: --------------------------------------------------------------------------------- - 1 | { - 2 | lib, - 3 | src, - 4 | rustPlatform, - 5 | version, - 6 | clang, - 7 | libclang, - 8 | cmake, - 9 | pkg-config, - 10 | nodejs_22, - 11 | test-grammars, - 12 | stdenv, - 13 | installShellFiles, - 14 | }: - 15 | let - 16 | isCross = stdenv.targetPlatform == stdenv.buildPlatform; - 17 | in - 18 | rustPlatform.buildRustPackage { - 19 | pname = "tree-sitter-cli"; - | - 20 | inherit src version; - | - 21 | cargoBuildFlags = [ "--all-features" ]; - | - 22 | nativeBuildInputs = [ - 23 | clang - 24 | cmake - 25 | pkg-config - 26 | nodejs_22 - 27 | ] - 28 | ++ lib.optionals (!isCross) [ installShellFiles ]; - | - 29 | cargoLock.lockFile = ../../Cargo.lock; - | - 30 | env.LIBCLANG_PATH = "${libclang.lib}/lib"; - | - 31 | preBuild = '' - 32 | rm -rf test/fixtures - 33 | mkdir -p test/fixtures - 34 | cp -r ${test-grammars}/fixtures/* test/fixtures/ - 35 | chmod -R u+w test/fixtures - 36 | ''; - | - 37 | preCheck = "export HOME=$TMPDIR"; - 38 | doCheck = !isCross; - | - 39 | postInstall = lib.optionalString (!isCross) '' - 40 | installShellCompletion --cmd tree-sitter \ - 41 | --bash <($out/bin/tree-sitter complete --shell bash) \ - 42 | --zsh <($out/bin/tree-sitter complete --shell zsh) \ - 43 | --fish <($out/bin/tree-sitter complete --shell fish) - 44 | ''; - | - 45 | meta = { - 46 | description = "Tree-sitter CLI - A tool for developing, testing, and using Tree-sitter parsers"; - 47 | longDescription = '' - 48 | Tree-sitter is a parser generator tool and an incremental parsing library. - 49 | It can build a concrete syntax tree for a source file and efficiently update - 50 | the syntax tree as the source file is edited. This package provides the CLI - 51 | tool for developing, testing, and using Tree-sitter parsers. - 52 | ''; - 53 | homepage = "https://tree-sitter.github.io/tree-sitter"; - 54 | changelog = "https://github.com/tree-sitter/tree-sitter/releases/tag/v${version}"; - 55 | license = lib.licenses.mit; - 56 | maintainers = with lib.maintainers; [ amaanq ]; - 57 | platforms = lib.platforms.all; - 58 | mainProgram = "tree-sitter"; - 59 | }; - 60 | } - - - --------------------------------------------------------------------------------- -/crates/cli/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter CLI - | - 2 | [![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com] - | - 3 | [crates.io]: https://crates.io/crates/tree-sitter-cli - 4 | [crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723 - 5 | [npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli - 6 | [npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A - | - 7 | The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on `MacOS`, `Linux`, and `Windows`. - | - 8 | ### Installation - | - 9 | You can install the `tree-sitter-cli` with `cargo`: - | - 10 | ```sh - 11 | cargo install --locked tree-sitter-cli - 12 | ``` - | - 13 | or with `npm`: - | - 14 | ```sh - 15 | npm install tree-sitter-cli - 16 | ``` - | - 17 | You can also download a pre-built binary for your platform from [the releases page]. - | - 18 | ### Dependencies - | - 19 | The `tree-sitter` binary itself has no dependencies, but specific commands have dependencies that must be present at runtime: - | - 20 | * To generate a parser from a grammar, you must have [`node`](https://nodejs.org) on your PATH. - 21 | * To run and test parsers, you must have a C and C++ compiler on your system. - | - 22 | ### Commands - | - 23 | * `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information. - | - 24 | * `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information. - | - 25 | * `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers. - | - 26 | [the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers - 27 | [the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest - - - --------------------------------------------------------------------------------- -/crates/cli/src/fuzz.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashMap, - 3 | env, fs, - 4 | path::{Path, PathBuf}, - 5 | sync::LazyLock, - 6 | }; - | - 7 | use log::{error, info}; - 8 | use rand::Rng; - 9 | use regex::Regex; - 10 | use tree_sitter::{Language, Parser}; - | - 11 | pub mod allocations; - 12 | pub mod corpus_test; - 13 | pub mod edits; - 14 | pub mod random; - 15 | pub mod scope_sequence; - | - 16 | use crate::{ - 17 | fuzz::{ - 18 | corpus_test::{ - 19 | check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges, - 20 | }, - 21 | edits::{get_random_edit, invert_edit}, - 22 | random::Rand, - 23 | }, - 24 | parse::perform_edit, - 25 | test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, - 26 | }; - | - 27 | pub static LOG_ENABLED: LazyLock = LazyLock::new(|| env::var("TREE_SITTER_LOG").is_ok()); - | - 28 | pub static LOG_GRAPH_ENABLED: LazyLock = - 29 | LazyLock::new(|| env::var("TREE_SITTER_LOG_GRAPHS").is_ok()); - | - 30 | pub static LANGUAGE_FILTER: LazyLock> = - 31 | LazyLock::new(|| env::var("TREE_SITTER_LANGUAGE").ok()); - | - 32 | pub static EXAMPLE_INCLUDE: LazyLock> = - 33 | LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE")); - | - 34 | pub static EXAMPLE_EXCLUDE: LazyLock> = - 35 | LazyLock::new(|| regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE")); - | - 36 | pub static START_SEED: LazyLock = LazyLock::new(new_seed); - | - 37 | pub static EDIT_COUNT: LazyLock = - 38 | LazyLock::new(|| int_env_var("TREE_SITTER_EDITS").unwrap_or(3)); - | - 39 | pub static ITERATION_COUNT: LazyLock = - 40 | LazyLock::new(|| int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10)); - | - 41 | fn int_env_var(name: &'static str) -> Option { - 42 | env::var(name).ok().and_then(|e| e.parse().ok()) - 43 | } - | - 44 | fn regex_env_var(name: &'static str) -> Option { - 45 | env::var(name).ok().and_then(|e| Regex::new(&e).ok()) - 46 | } - | - 47 | #[must_use] - 48 | pub fn new_seed() -> usize { - 49 | int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| { - 50 | let mut rng = rand::thread_rng(); - 51 | let seed = rng.gen::(); - 52 | info!("Seed: {seed}"); - 53 | seed - 54 | }) - 55 | } - | - 56 | pub struct FuzzOptions { - 57 | pub skipped: Option>, - 58 | pub subdir: Option, - 59 | pub edits: usize, - 60 | pub iterations: usize, - 61 | pub include: Option, - 62 | pub exclude: Option, - 63 | pub log_graphs: bool, - 64 | pub log: bool, - 65 | } - | - 66 | pub fn fuzz_language_corpus( - 67 | language: &Language, - 68 | language_name: &str, - 69 | start_seed: usize, - 70 | grammar_dir: &Path, - 71 | options: &mut FuzzOptions, - 72 | ) { - 73 | fn retain(entry: &mut TestEntry, language_name: &str) -> bool { - 74 | match entry { - 75 | TestEntry::Example { attributes, .. } => { - 76 | attributes.languages[0].is_empty() - 77 | || attributes - 78 | .languages - 79 | .iter() - 80 | .any(|lang| lang.as_ref() == language_name) - 81 | } - 82 | TestEntry::Group { - 83 | ref mut children, .. - 84 | } => { - 85 | children.retain_mut(|child| retain(child, language_name)); - 86 | !children.is_empty() - 87 | } - 88 | } - 89 | } - | - 90 | let subdir = options.subdir.take().unwrap_or_default(); - | - 91 | let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus"); - | - 92 | if !corpus_dir.exists() || !corpus_dir.is_dir() { - 93 | error!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file."); - 94 | return; - 95 | } - | - 96 | if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 { - 97 | error!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory."); - 98 | return; - 99 | } - | - 100 | let mut main_tests = parse_tests(&corpus_dir).unwrap(); - 101 | match main_tests { - 102 | TestEntry::Group { - 103 | ref mut children, .. - 104 | } => { - 105 | children.retain_mut(|child| retain(child, language_name)); - 106 | } - 107 | TestEntry::Example { .. } => unreachable!(), - 108 | } - 109 | let tests = flatten_tests( - 110 | main_tests, - 111 | options.include.as_ref(), - 112 | options.exclude.as_ref(), - 113 | ); - | - 114 | let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name); - | - 115 | let mut skipped = options - 116 | .skipped - 117 | .take() - 118 | .unwrap_or_default() - 119 | .into_iter() - 120 | .chain(tests.iter().filter(|x| x.skip).map(get_test_name)) - 121 | .map(|x| (x, 0)) - 122 | .collect::>(); - | - 123 | let mut failure_count = 0; - | - 124 | let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok(); - 125 | let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok(); - | - 126 | if log_seed { - 127 | info!(" start seed: {start_seed}"); - 128 | } - | - 129 | println!(); - 130 | for (test_index, test) in tests.iter().enumerate() { - 131 | let test_name = get_test_name(test); - 132 | if let Some(counter) = skipped.get_mut(test_name.as_str()) { - 133 | println!(" {test_index}. {test_name} - SKIPPED"); - 134 | *counter += 1; - 135 | continue; - 136 | } - | - 137 | println!(" {test_index}. {test_name}"); - | - 138 | let passed = allocations::record_checked(|| { - 139 | let mut log_session = None; - 140 | let mut parser = get_parser(&mut log_session, "log.html"); - 141 | parser.set_language(language).unwrap(); - 142 | set_included_ranges(&mut parser, &test.input, test.template_delimiters); - | - 143 | let tree = parser.parse(&test.input, None).unwrap(); - | - 144 | if test.error { - 145 | return true; - 146 | } - | - 147 | let mut actual_output = tree.root_node().to_sexp(); - 148 | if !test.has_fields { - 149 | actual_output = strip_sexp_fields(&actual_output); - 150 | } - | - 151 | if actual_output != test.output { - 152 | println!("Incorrect initial parse for {test_name}"); - 153 | print_diff_key(); - 154 | print_diff(&actual_output, &test.output, true); - 155 | println!(); - 156 | return false; - 157 | } - | - 158 | true - 159 | }) - 160 | .unwrap_or_else(|e| { - 161 | error!("{e}"); - 162 | false - 163 | }); - | - 164 | if !passed { - 165 | failure_count += 1; - 166 | continue; - 167 | } - | - 168 | let mut parser = Parser::new(); - 169 | parser.set_language(language).unwrap(); - 170 | let tree = parser.parse(&test.input, None).unwrap(); - 171 | drop(parser); - | - 172 | for trial in 0..options.iterations { - 173 | let seed = start_seed + trial; - 174 | let passed = allocations::record_checked(|| { - 175 | let mut rand = Rand::new(seed); - 176 | let mut log_session = None; - 177 | let mut parser = get_parser(&mut log_session, "log.html"); - 178 | parser.set_language(language).unwrap(); - 179 | let mut tree = tree.clone(); - 180 | let mut input = test.input.clone(); - | - 181 | if options.log_graphs { - 182 | info!("{}\n", String::from_utf8_lossy(&input)); - 183 | } - | - 184 | // Perform a random series of edits and reparse. - 185 | let edit_count = rand.unsigned(*EDIT_COUNT); - 186 | let mut undo_stack = Vec::with_capacity(edit_count); - 187 | for _ in 0..=edit_count { - 188 | let edit = get_random_edit(&mut rand, &input); - 189 | undo_stack.push(invert_edit(&input, &edit)); - 190 | perform_edit(&mut tree, &mut input, &edit).unwrap(); - 191 | } - | - 192 | if log_seed { - 193 | info!(" {test_index}.{trial:<2} seed: {seed}"); - 194 | } - | - 195 | if dump_edits { - 196 | fs::create_dir_all("fuzz").unwrap(); - 197 | fs::write( - 198 | Path::new("fuzz") - 199 | .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")), - 200 | &input, - 201 | ) - 202 | .unwrap(); - 203 | } - | - 204 | if options.log_graphs { - 205 | info!("{}\n", String::from_utf8_lossy(&input)); - 206 | } - | - 207 | set_included_ranges(&mut parser, &input, test.template_delimiters); - 208 | let mut tree2 = parser.parse(&input, Some(&tree)).unwrap(); - | - 209 | // Check that the new tree is consistent. - 210 | check_consistent_sizes(&tree2, &input); - 211 | if let Err(message) = check_changed_ranges(&tree, &tree2, &input) { - 212 | error!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",); - 213 | return false; - 214 | } - | - 215 | // Undo all of the edits and re-parse again. - 216 | while let Some(edit) = undo_stack.pop() { - 217 | perform_edit(&mut tree2, &mut input, &edit).unwrap(); - 218 | } - 219 | if options.log_graphs { - 220 | info!("{}\n", String::from_utf8_lossy(&input)); - 221 | } - | - 222 | set_included_ranges(&mut parser, &test.input, test.template_delimiters); - 223 | let tree3 = parser.parse(&input, Some(&tree2)).unwrap(); - | - 224 | // Verify that the final tree matches the expectation from the corpus. - 225 | let mut actual_output = tree3.root_node().to_sexp(); - 226 | if !test.has_fields { - 227 | actual_output = strip_sexp_fields(&actual_output); - 228 | } - | - 229 | if actual_output != test.output && !test.error { - 230 | println!("Incorrect parse for {test_name} - seed {seed}"); - 231 | print_diff_key(); - 232 | print_diff(&actual_output, &test.output, true); - 233 | println!(); - 234 | return false; - 235 | } - | - 236 | // Check that the edited tree is consistent. - 237 | check_consistent_sizes(&tree3, &input); - 238 | if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) { - 239 | error!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n"); - 240 | return false; - 241 | } - | - 242 | true - 243 | }).unwrap_or_else(|e| { - 244 | error!("{e}"); - 245 | false - 246 | }); - | - 247 | if !passed { - 248 | failure_count += 1; - 249 | break; - 250 | } - 251 | } - 252 | } - | - 253 | if failure_count != 0 { - 254 | info!("{failure_count} {language_name} corpus tests failed fuzzing"); - 255 | } - | - 256 | skipped.retain(|_, v| *v == 0); - | - 257 | if !skipped.is_empty() { - 258 | info!("Non matchable skip definitions:"); - 259 | for k in skipped.keys() { - 260 | info!(" {k}"); - 261 | } - 262 | panic!("Non matchable skip definitions need to be removed"); - 263 | } - 264 | } - | - 265 | pub struct FlattenedTest { - 266 | pub name: String, - 267 | pub input: Vec, - 268 | pub output: String, - 269 | pub languages: Vec>, - 270 | pub error: bool, - 271 | pub skip: bool, - 272 | pub has_fields: bool, - 273 | pub template_delimiters: Option<(&'static str, &'static str)>, - 274 | } - | - 275 | #[must_use] - 276 | pub fn flatten_tests( - 277 | test: TestEntry, - 278 | include: Option<&Regex>, - 279 | exclude: Option<&Regex>, - 280 | ) -> Vec { - 281 | fn helper( - 282 | test: TestEntry, - 283 | include: Option<&Regex>, - 284 | exclude: Option<&Regex>, - 285 | is_root: bool, - 286 | prefix: &str, - 287 | result: &mut Vec, - 288 | ) { - 289 | match test { - 290 | TestEntry::Example { - 291 | mut name, - 292 | input, - 293 | output, - 294 | has_fields, - 295 | attributes, - 296 | .. - 297 | } => { - 298 | if !prefix.is_empty() { - 299 | name.insert_str(0, " - "); - 300 | name.insert_str(0, prefix); - 301 | } - | - 302 | if let Some(include) = include { - 303 | if !include.is_match(&name) { - 304 | return; - 305 | } - 306 | } else if let Some(exclude) = exclude { - 307 | if exclude.is_match(&name) { - 308 | return; - 309 | } - 310 | } - | - 311 | result.push(FlattenedTest { - 312 | name, - 313 | input, - 314 | output, - 315 | has_fields, - 316 | languages: attributes.languages, - 317 | error: attributes.error, - 318 | skip: attributes.skip, - 319 | template_delimiters: None, - 320 | }); - 321 | } - 322 | TestEntry::Group { - 323 | mut name, children, .. - 324 | } => { - 325 | if !is_root && !prefix.is_empty() { - 326 | name.insert_str(0, " - "); - 327 | name.insert_str(0, prefix); - 328 | } - 329 | for child in children { - 330 | helper(child, include, exclude, false, &name, result); - 331 | } - 332 | } - 333 | } - 334 | } - 335 | let mut result = Vec::new(); - 336 | helper(test, include, exclude, true, "", &mut result); - 337 | result - 338 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/fuzz/allocations.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashMap, - 3 | os::raw::c_void, - 4 | sync::{ - 5 | atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst}, - 6 | Mutex, - 7 | }, - 8 | }; - | - 9 | #[ctor::ctor] - 10 | unsafe fn initialize_allocation_recording() { - 11 | tree_sitter::set_allocator( - 12 | Some(ts_record_malloc), - 13 | Some(ts_record_calloc), - 14 | Some(ts_record_realloc), - 15 | Some(ts_record_free), - 16 | ); - 17 | } - | - 18 | #[derive(Debug, PartialEq, Eq, Hash)] - 19 | struct Allocation(*const c_void); - 20 | unsafe impl Send for Allocation {} - 21 | unsafe impl Sync for Allocation {} - | - 22 | #[derive(Default)] - 23 | struct AllocationRecorder { - 24 | enabled: AtomicBool, - 25 | allocation_count: AtomicUsize, - 26 | outstanding_allocations: Mutex>, - 27 | } - | - 28 | thread_local! { - 29 | static RECORDER: AllocationRecorder = AllocationRecorder::default(); - 30 | } - | - 31 | extern "C" { - 32 | fn malloc(size: usize) -> *mut c_void; - 33 | fn calloc(count: usize, size: usize) -> *mut c_void; - 34 | fn realloc(ptr: *mut c_void, size: usize) -> *mut c_void; - 35 | fn free(ptr: *mut c_void); - 36 | } - | - 37 | pub fn record(f: impl FnOnce() -> T) -> T { - 38 | record_checked(f).unwrap() - 39 | } - | - 40 | pub fn record_checked(f: impl FnOnce() -> T) -> Result { - 41 | RECORDER.with(|recorder| { - 42 | recorder.enabled.store(true, SeqCst); - 43 | recorder.allocation_count.store(0, SeqCst); - 44 | recorder.outstanding_allocations.lock().unwrap().clear(); - 45 | }); - | - 46 | let value = f(); - | - 47 | let outstanding_allocation_indices = RECORDER.with(|recorder| { - 48 | recorder.enabled.store(false, SeqCst); - 49 | recorder.allocation_count.store(0, SeqCst); - 50 | recorder - 51 | .outstanding_allocations - 52 | .lock() - 53 | .unwrap() - 54 | .drain() - 55 | .map(|e| e.1) - 56 | .collect::>() - 57 | }); - 58 | if !outstanding_allocation_indices.is_empty() { - 59 | return Err(format!( - 60 | "Leaked allocation indices: {outstanding_allocation_indices:?}", - 61 | )); - 62 | } - 63 | Ok(value) - 64 | } - | - 65 | fn record_alloc(ptr: *mut c_void) { - 66 | RECORDER.with(|recorder| { - 67 | if recorder.enabled.load(SeqCst) { - 68 | let count = recorder.allocation_count.fetch_add(1, SeqCst); - 69 | recorder - 70 | .outstanding_allocations - 71 | .lock() - 72 | .unwrap() - 73 | .insert(Allocation(ptr), count); - 74 | } - 75 | }); - 76 | } - | - 77 | fn record_dealloc(ptr: *mut c_void) { - 78 | RECORDER.with(|recorder| { - 79 | if recorder.enabled.load(SeqCst) { - 80 | recorder - 81 | .outstanding_allocations - 82 | .lock() - 83 | .unwrap() - 84 | .remove(&Allocation(ptr)); - 85 | } - 86 | }); - 87 | } - | - 88 | /// # Safety - 89 | /// - 90 | /// The caller must ensure that the returned pointer is eventually - 91 | /// freed by calling `ts_record_free`. - 92 | #[must_use] - 93 | pub unsafe extern "C" fn ts_record_malloc(size: usize) -> *mut c_void { - 94 | let result = malloc(size); - 95 | record_alloc(result); - 96 | result - 97 | } - | - 98 | /// # Safety - 99 | /// - 100 | /// The caller must ensure that the returned pointer is eventually - 101 | /// freed by calling `ts_record_free`. - 102 | #[must_use] - 103 | pub unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void { - 104 | let result = calloc(count, size); - 105 | record_alloc(result); - 106 | result - 107 | } - | - 108 | /// # Safety - 109 | /// - 110 | /// The caller must ensure that the returned pointer is eventually - 111 | /// freed by calling `ts_record_free`. - 112 | #[must_use] - 113 | pub unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { - 114 | let result = realloc(ptr, size); - 115 | if ptr.is_null() { - 116 | record_alloc(result); - 117 | } else if !core::ptr::eq(ptr, result) { - 118 | record_dealloc(ptr); - 119 | record_alloc(result); - 120 | } - 121 | result - 122 | } - | - 123 | /// # Safety - 124 | /// - 125 | /// The caller must ensure that `ptr` was allocated by a previous call - 126 | /// to `ts_record_malloc`, `ts_record_calloc`, or `ts_record_realloc`. - 127 | pub unsafe extern "C" fn ts_record_free(ptr: *mut c_void) { - 128 | record_dealloc(ptr); - 129 | free(ptr); - 130 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/fuzz/corpus_test.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; - | - 2 | use super::{scope_sequence::ScopeSequence, LOG_ENABLED, LOG_GRAPH_ENABLED}; - 3 | use crate::util; - | - 4 | pub fn check_consistent_sizes(tree: &Tree, input: &[u8]) { - 5 | fn check(node: Node, line_offsets: &[usize]) { - 6 | let start_byte = node.start_byte(); - 7 | let end_byte = node.end_byte(); - 8 | let start_point = node.start_position(); - 9 | let end_point = node.end_position(); - | - 10 | assert!(start_byte <= end_byte); - 11 | assert!(start_point <= end_point); - 12 | assert_eq!( - 13 | start_byte, - 14 | line_offsets[start_point.row] + start_point.column - 15 | ); - 16 | assert_eq!(end_byte, line_offsets[end_point.row] + end_point.column); - | - 17 | let mut last_child_end_byte = start_byte; - 18 | let mut last_child_end_point = start_point; - 19 | let mut some_child_has_changes = false; - 20 | let mut actual_named_child_count = 0; - 21 | for i in 0..node.child_count() { - 22 | let child = node.child(i as u32).unwrap(); - 23 | assert!(child.start_byte() >= last_child_end_byte); - 24 | assert!(child.start_position() >= last_child_end_point); - 25 | check(child, line_offsets); - 26 | if child.has_changes() { - 27 | some_child_has_changes = true; - 28 | } - 29 | if child.is_named() { - 30 | actual_named_child_count += 1; - 31 | } - 32 | last_child_end_byte = child.end_byte(); - 33 | last_child_end_point = child.end_position(); - 34 | } - | - 35 | assert_eq!(actual_named_child_count, node.named_child_count()); - | - 36 | if node.child_count() > 0 { - 37 | assert!(end_byte >= last_child_end_byte); - 38 | assert!(end_point >= last_child_end_point); - 39 | } - | - 40 | if some_child_has_changes { - 41 | assert!(node.has_changes()); - 42 | } - 43 | } - | - 44 | let mut line_offsets = vec![0]; - 45 | for (i, c) in input.iter().enumerate() { - 46 | if *c == b'\n' { - 47 | line_offsets.push(i + 1); - 48 | } - 49 | } - | - 50 | check(tree.root_node(), &line_offsets); - 51 | } - | - 52 | pub fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> { - 53 | let changed_ranges = old_tree.changed_ranges(new_tree).collect::>(); - 54 | let old_scope_sequence = ScopeSequence::new(old_tree); - 55 | let new_scope_sequence = ScopeSequence::new(new_tree); - | - 56 | let old_range = old_tree.root_node().range(); - 57 | let new_range = new_tree.root_node().range(); - | - 58 | let byte_range = - 59 | old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte); - 60 | let point_range = old_range.start_point.min(new_range.start_point) - 61 | ..old_range.end_point.max(new_range.end_point); - | - 62 | for range in &changed_ranges { - 63 | if range.end_byte > byte_range.end || range.end_point > point_range.end { - 64 | return Err(format!( - 65 | "changed range extends outside of the old and new trees {range:?}", - 66 | )); - 67 | } - 68 | } - | - 69 | old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges) - 70 | } - | - 71 | pub fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) { - 72 | if let Some((start, end)) = delimiters { - 73 | let mut ranges = Vec::new(); - 74 | let mut ix = 0; - 75 | while ix < input.len() { - 76 | let Some(mut start_ix) = input[ix..] - 77 | .windows(2) - 78 | .position(|win| win == start.as_bytes()) - 79 | else { - 80 | break; - 81 | }; - 82 | start_ix += ix + start.len(); - 83 | let end_ix = input[start_ix..] - 84 | .windows(2) - 85 | .position(|win| win == end.as_bytes()) - 86 | .map_or(input.len(), |ix| start_ix + ix); - 87 | ix = end_ix; - 88 | ranges.push(Range { - 89 | start_byte: start_ix, - 90 | end_byte: end_ix, - 91 | start_point: point_for_offset(input, start_ix), - 92 | end_point: point_for_offset(input, end_ix), - 93 | }); - 94 | } - | - 95 | parser.set_included_ranges(&ranges).unwrap(); - 96 | } else { - 97 | parser.set_included_ranges(&[]).unwrap(); - 98 | } - 99 | } - | - 100 | fn point_for_offset(text: &[u8], offset: usize) -> Point { - 101 | let mut point = Point::default(); - 102 | for byte in &text[..offset] { - 103 | if *byte == b'\n' { - 104 | point.row += 1; - 105 | point.column = 0; - 106 | } else { - 107 | point.column += 1; - 108 | } - 109 | } - 110 | point - 111 | } - | - 112 | pub fn get_parser(session: &mut Option, log_filename: &str) -> Parser { - 113 | let mut parser = Parser::new(); - | - 114 | if *LOG_ENABLED { - 115 | parser.set_logger(Some(Box::new(|log_type, msg| { - 116 | if log_type == LogType::Lex { - 117 | eprintln!(" {msg}"); - 118 | } else { - 119 | eprintln!("{msg}"); - 120 | } - 121 | }))); - 122 | } - 123 | if *LOG_GRAPH_ENABLED { - 124 | *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap()); - 125 | } - | - 126 | parser - 127 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/fuzz/edits.rs: --------------------------------------------------------------------------------- - 1 | use super::random::Rand; - | - 2 | #[derive(Debug)] - 3 | pub struct Edit { - 4 | pub position: usize, - 5 | pub deleted_length: usize, - 6 | pub inserted_text: Vec, - 7 | } - | - 8 | #[must_use] - 9 | pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit { - 10 | let position = edit.position; - 11 | let removed_content = &input[position..(position + edit.deleted_length)]; - 12 | Edit { - 13 | position, - 14 | deleted_length: edit.inserted_text.len(), - 15 | inserted_text: removed_content.to_vec(), - 16 | } - 17 | } - | - 18 | pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit { - 19 | let choice = rand.unsigned(10); - 20 | if choice < 2 { - 21 | // Insert text at end - 22 | let inserted_text = rand.words(3); - 23 | Edit { - 24 | position: input.len(), - 25 | deleted_length: 0, - 26 | inserted_text, - 27 | } - 28 | } else if choice < 5 { - 29 | // Delete text from the end - 30 | let deleted_length = rand.unsigned(30).min(input.len()); - 31 | Edit { - 32 | position: input.len() - deleted_length, - 33 | deleted_length, - 34 | inserted_text: vec![], - 35 | } - 36 | } else if choice < 8 { - 37 | // Insert at a random position - 38 | let position = rand.unsigned(input.len()); - 39 | let word_count = 1 + rand.unsigned(3); - 40 | let inserted_text = rand.words(word_count); - 41 | Edit { - 42 | position, - 43 | deleted_length: 0, - 44 | inserted_text, - 45 | } - 46 | } else { - 47 | // Replace at random position - 48 | let position = rand.unsigned(input.len()); - 49 | let deleted_length = rand.unsigned(input.len() - position); - 50 | let word_count = 1 + rand.unsigned(3); - 51 | let inserted_text = rand.words(word_count); - 52 | Edit { - 53 | position, - 54 | deleted_length, - 55 | inserted_text, - 56 | } - 57 | } - 58 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/fuzz/random.rs: --------------------------------------------------------------------------------- - 1 | use rand::{ - 2 | distributions::Alphanumeric, - 3 | prelude::{Rng, SeedableRng, StdRng}, - 4 | }; - | - 5 | const OPERATORS: &[char] = &[ - 6 | '+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%', - 7 | ]; - | - 8 | pub struct Rand(StdRng); - | - 9 | impl Rand { - 10 | #[must_use] - 11 | pub fn new(seed: usize) -> Self { - 12 | Self(StdRng::seed_from_u64(seed as u64)) - 13 | } - | - 14 | pub fn unsigned(&mut self, max: usize) -> usize { - 15 | self.0.gen_range(0..=max) - 16 | } - | - 17 | pub fn words(&mut self, max_count: usize) -> Vec { - 18 | let word_count = self.unsigned(max_count); - 19 | let mut result = Vec::with_capacity(2 * word_count); - 20 | for i in 0..word_count { - 21 | if i > 0 { - 22 | if self.unsigned(5) == 0 { - 23 | result.push(b'\n'); - 24 | } else { - 25 | result.push(b' '); - 26 | } - 27 | } - 28 | if self.unsigned(3) == 0 { - 29 | let index = self.unsigned(OPERATORS.len() - 1); - 30 | result.push(OPERATORS[index] as u8); - 31 | } else { - 32 | for _ in 0..self.unsigned(8) { - 33 | result.push(self.0.sample(Alphanumeric)); - 34 | } - 35 | } - 36 | } - 37 | result - 38 | } - 39 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/fuzz/scope_sequence.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::{Point, Range, Tree}; - | - 2 | #[derive(Debug)] - 3 | pub struct ScopeSequence(Vec); - | - 4 | type ScopeStack = Vec<&'static str>; - | - 5 | impl ScopeSequence { - 6 | #[must_use] - 7 | pub fn new(tree: &Tree) -> Self { - 8 | let mut result = Self(Vec::new()); - 9 | let mut scope_stack = Vec::new(); - | - 10 | let mut cursor = tree.walk(); - 11 | let mut visited_children = false; - 12 | loop { - 13 | let node = cursor.node(); - 14 | for _ in result.0.len()..node.start_byte() { - 15 | result.0.push(scope_stack.clone()); - 16 | } - 17 | if visited_children { - 18 | for _ in result.0.len()..node.end_byte() { - 19 | result.0.push(scope_stack.clone()); - 20 | } - 21 | scope_stack.pop(); - 22 | if cursor.goto_next_sibling() { - 23 | visited_children = false; - 24 | } else if !cursor.goto_parent() { - 25 | break; - 26 | } - 27 | } else { - 28 | scope_stack.push(cursor.node().kind()); - 29 | if !cursor.goto_first_child() { - 30 | visited_children = true; - 31 | } - 32 | } - 33 | } - | - 34 | result - 35 | } - | - 36 | pub fn check_changes( - 37 | &self, - 38 | other: &Self, - 39 | text: &[u8], - 40 | known_changed_ranges: &[Range], - 41 | ) -> Result<(), String> { - 42 | let mut position = Point { row: 0, column: 0 }; - 43 | for i in 0..(self.0.len().max(other.0.len())) { - 44 | let stack = &self.0.get(i); - 45 | let other_stack = &other.0.get(i); - 46 | if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) { - 47 | let containing_range = known_changed_ranges - 48 | .iter() - 49 | .find(|range| range.start_point <= position && position < range.end_point); - 50 | if containing_range.is_none() { - 51 | let line = &text[(i - position.column)..] - 52 | .split(|c| *c == b'\n') - 53 | .next() - 54 | .unwrap(); - 55 | return Err(format!( - 56 | concat!( - 57 | "Position: {}\n", - 58 | "Byte offset: {}\n", - 59 | "Line: {}\n", - 60 | "{}^\n", - 61 | "Old scopes: {:?}\n", - 62 | "New scopes: {:?}\n", - 63 | "Invalidated ranges: {:?}", - 64 | ), - 65 | position, - 66 | i, - 67 | String::from_utf8_lossy(line), - 68 | String::from(" ").repeat(position.column + "Line: ".len()), - 69 | stack, - 70 | other_stack, - 71 | known_changed_ranges, - 72 | )); - 73 | } - 74 | } - | - 75 | if text[i] == b'\n' { - 76 | position.row += 1; - 77 | position.column = 0; - 78 | } else { - 79 | position.column += 1; - 80 | } - 81 | } - 82 | Ok(()) - 83 | } - 84 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/highlight.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::{BTreeMap, HashSet}, - 3 | fmt::Write, - 4 | fs, - 5 | io::{self, Write as _}, - 6 | path::{self, Path, PathBuf}, - 7 | str, - 8 | sync::{atomic::AtomicUsize, Arc}, - 9 | time::Instant, - 10 | }; - | - 11 | use ansi_colours::{ansi256_from_rgb, rgb_from_ansi256}; - 12 | use anstyle::{Ansi256Color, AnsiColor, Color, Effects, RgbColor}; - 13 | use anyhow::Result; - 14 | use log::{info, warn}; - 15 | use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; - 16 | use serde_json::{json, Value}; - 17 | use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer}; - 18 | use tree_sitter_loader::Loader; - | - 19 | pub const HTML_HEAD_HEADER: &str = " - 20 | - 21 | - 22 | Tree-sitter Highlighting - 23 | "; - | - 37 | pub const HTML_BODY_HEADER: &str = " - 38 | - 39 | - 40 | "; - | - 41 | pub const HTML_FOOTER: &str = " - 42 | - 43 | "; - | - 44 | #[derive(Debug, Default)] - 45 | pub struct Style { - 46 | pub ansi: anstyle::Style, - 47 | pub css: Option, - 48 | } - | - 49 | #[derive(Debug)] - 50 | pub struct Theme { - 51 | pub styles: Vec")?; - 369 | writeln!(&mut stdout, "{HTML_BODY_HEADER}")?; - 370 | } - | - 371 | let mut renderer = HtmlRenderer::new(); - 372 | renderer.render(events, &source, &move |highlight, output| { - 373 | if opts.inline_styles { - 374 | output.extend(b"style='"); - 375 | output.extend( - 376 | theme.styles[highlight.0] - 377 | .css - 378 | .as_ref() - 379 | .map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()), - 380 | ); - 381 | output.extend(b"'"); - 382 | } else { - 383 | output.extend(b"class='"); - 384 | let mut parts = theme.highlight_names[highlight.0].split('.').peekable(); - 385 | while let Some(part) = parts.next() { - 386 | output.extend(part.as_bytes()); - 387 | if parts.peek().is_some() { - 388 | output.extend(b" "); - 389 | } - 390 | } - 391 | output.extend(b"'"); - 392 | } - 393 | })?; - | - 394 | if !opts.quiet { - 395 | writeln!(&mut stdout, "")?; - 396 | for (i, line) in renderer.lines().enumerate() { - 397 | writeln!( - 398 | &mut stdout, - 399 | "", - 400 | i + 1, - 401 | )?; - 402 | } - 403 | writeln!(&mut stdout, "
{}{line}
")?; - 404 | writeln!(&mut stdout, "{HTML_FOOTER}")?; - 405 | } - 406 | } else { - 407 | let mut style_stack = vec![theme.default_style().ansi]; - 408 | for event in events { - 409 | match event? { - 410 | HighlightEvent::HighlightStart(highlight) => { - 411 | style_stack.push(theme.styles[highlight.0].ansi); - 412 | } - 413 | HighlightEvent::HighlightEnd => { - 414 | style_stack.pop(); - 415 | } - 416 | HighlightEvent::Source { start, end } => { - 417 | let style = style_stack.last().unwrap(); - 418 | write!(&mut stdout, "{style}").unwrap(); - 419 | stdout.write_all(&source[start..end])?; - 420 | write!(&mut stdout, "{style:#}").unwrap(); - 421 | } - 422 | } - 423 | } - 424 | } - | - 425 | if opts.print_time { - 426 | info!("Time: {}ms", time.elapsed().as_millis()); - 427 | } - | - 428 | Ok(()) - 429 | } - | - 430 | #[cfg(test)] - 431 | mod tests { - 432 | use std::env; - | - 433 | use super::*; - | - 434 | const JUNGLE_GREEN: &str = "#26A69A"; - 435 | const DARK_CYAN: &str = "#00AF87"; - | - 436 | #[test] - 437 | fn test_parse_style() { - 438 | let original_environment_variable = env::var("COLORTERM"); - | - 439 | let mut style = Style::default(); - 440 | assert_eq!(style.ansi.get_fg_color(), None); - 441 | assert_eq!(style.css, None); - | - 442 | // darkcyan is an ANSI color and is preserved - 443 | env::set_var("COLORTERM", ""); - 444 | parse_style(&mut style, Value::String(DARK_CYAN.to_string())); - 445 | assert_eq!( - 446 | style.ansi.get_fg_color(), - 447 | Some(Color::Ansi256(Ansi256Color(36))) - 448 | ); - 449 | assert_eq!(style.css, Some("color: #00af87".to_string())); - | - 450 | // junglegreen is not an ANSI color and is preserved when the terminal supports it - 451 | env::set_var("COLORTERM", "truecolor"); - 452 | parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string())); - 453 | assert_eq!( - 454 | style.ansi.get_fg_color(), - 455 | Some(Color::Rgb(RgbColor(38, 166, 154))) - 456 | ); - 457 | assert_eq!(style.css, Some("color: #26a69a".to_string())); - | - 458 | // junglegreen gets approximated as cadetblue when the terminal does not support it - 459 | env::set_var("COLORTERM", ""); - 460 | parse_style(&mut style, Value::String(JUNGLE_GREEN.to_string())); - 461 | assert_eq!( - 462 | style.ansi.get_fg_color(), - 463 | Some(Color::Ansi256(Ansi256Color(72))) - 464 | ); - 465 | assert_eq!(style.css, Some("color: #26a69a".to_string())); - | - 466 | if let Ok(environment_variable) = original_environment_variable { - 467 | env::set_var("COLORTERM", environment_variable); - 468 | } else { - 469 | env::remove_var("COLORTERM"); - 470 | } - 471 | } - 472 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/init.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | fs, - 3 | path::{Path, PathBuf}, - 4 | str::{self, FromStr}, - 5 | }; - | - 6 | use anyhow::{anyhow, Context, Result}; - 7 | use crc32fast::hash as crc32; - 8 | use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase}; - 9 | use indoc::{formatdoc, indoc}; - 10 | use log::warn; - 11 | use rand::{thread_rng, Rng}; - 12 | use semver::Version; - 13 | use serde::{Deserialize, Serialize}; - 14 | use serde_json::{Map, Value}; - 15 | use tree_sitter_generate::write_file; - 16 | use tree_sitter_loader::{Author, Bindings, Grammar, Links, Metadata, PathsJSON, TreeSitterJSON}; - | - 17 | const CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); - 18 | const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION"; - | - 19 | const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION; - 20 | const ABI_VERSION_MAX_PLACEHOLDER: &str = "ABI_VERSION_MAX"; - | - 21 | const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME"; - 22 | const CAMEL_PARSER_NAME_PLACEHOLDER: &str = "CAMEL_PARSER_NAME"; - 23 | const TITLE_PARSER_NAME_PLACEHOLDER: &str = "TITLE_PARSER_NAME"; - 24 | const UPPER_PARSER_NAME_PLACEHOLDER: &str = "UPPER_PARSER_NAME"; - 25 | const LOWER_PARSER_NAME_PLACEHOLDER: &str = "LOWER_PARSER_NAME"; - 26 | const KEBAB_PARSER_NAME_PLACEHOLDER: &str = "KEBAB_PARSER_NAME"; - 27 | const PARSER_CLASS_NAME_PLACEHOLDER: &str = "PARSER_CLASS_NAME"; - | - 28 | const PARSER_DESCRIPTION_PLACEHOLDER: &str = "PARSER_DESCRIPTION"; - 29 | const PARSER_LICENSE_PLACEHOLDER: &str = "PARSER_LICENSE"; - 30 | const PARSER_URL_PLACEHOLDER: &str = "PARSER_URL"; - 31 | const PARSER_URL_STRIPPED_PLACEHOLDER: &str = "PARSER_URL_STRIPPED"; - 32 | const PARSER_VERSION_PLACEHOLDER: &str = "PARSER_VERSION"; - 33 | const PARSER_FINGERPRINT_PLACEHOLDER: &str = "PARSER_FINGERPRINT"; - | - 34 | const AUTHOR_NAME_PLACEHOLDER: &str = "PARSER_AUTHOR_NAME"; - 35 | const AUTHOR_EMAIL_PLACEHOLDER: &str = "PARSER_AUTHOR_EMAIL"; - 36 | const AUTHOR_URL_PLACEHOLDER: &str = "PARSER_AUTHOR_URL"; - | - 37 | const AUTHOR_BLOCK_JS: &str = "\n \"author\": {"; - 38 | const AUTHOR_NAME_PLACEHOLDER_JS: &str = "\n \"name\": \"PARSER_AUTHOR_NAME\","; - 39 | const AUTHOR_EMAIL_PLACEHOLDER_JS: &str = ",\n \"email\": \"PARSER_AUTHOR_EMAIL\""; - 40 | const AUTHOR_URL_PLACEHOLDER_JS: &str = ",\n \"url\": \"PARSER_AUTHOR_URL\""; - | - 41 | const AUTHOR_BLOCK_PY: &str = "\nauthors = [{"; - 42 | const AUTHOR_NAME_PLACEHOLDER_PY: &str = "name = \"PARSER_AUTHOR_NAME\""; - 43 | const AUTHOR_EMAIL_PLACEHOLDER_PY: &str = ", email = \"PARSER_AUTHOR_EMAIL\""; - | - 44 | const AUTHOR_BLOCK_RS: &str = "\nauthors = ["; - 45 | const AUTHOR_NAME_PLACEHOLDER_RS: &str = "PARSER_AUTHOR_NAME"; - 46 | const AUTHOR_EMAIL_PLACEHOLDER_RS: &str = " PARSER_AUTHOR_EMAIL"; - | - 47 | const AUTHOR_BLOCK_GRAMMAR: &str = "\n * @author "; - 48 | const AUTHOR_NAME_PLACEHOLDER_GRAMMAR: &str = "PARSER_AUTHOR_NAME"; - 49 | const AUTHOR_EMAIL_PLACEHOLDER_GRAMMAR: &str = " PARSER_AUTHOR_EMAIL"; - | - 50 | const FUNDING_URL_PLACEHOLDER: &str = "FUNDING_URL"; - | - 51 | const GRAMMAR_JS_TEMPLATE: &str = include_str!("./templates/grammar.js"); - 52 | const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json"); - 53 | const GITIGNORE_TEMPLATE: &str = include_str!("./templates/gitignore"); - 54 | const GITATTRIBUTES_TEMPLATE: &str = include_str!("./templates/gitattributes"); - 55 | const EDITORCONFIG_TEMPLATE: &str = include_str!("./templates/.editorconfig"); - | - 56 | const RUST_BINDING_VERSION: &str = env!("CARGO_PKG_VERSION"); - 57 | const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION"; - | - 58 | const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs"); - 59 | const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs"); - 60 | const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/_cargo.toml"); - | - 61 | const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js"); - 62 | const INDEX_D_TS_TEMPLATE: &str = include_str!("./templates/index.d.ts"); - 63 | const JS_BINDING_CC_TEMPLATE: &str = include_str!("./templates/js-binding.cc"); - 64 | const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp"); - 65 | const BINDING_TEST_JS_TEMPLATE: &str = include_str!("./templates/binding_test.js"); - | - 66 | const MAKEFILE_TEMPLATE: &str = include_str!("./templates/makefile"); - 67 | const CMAKELISTS_TXT_TEMPLATE: &str = include_str!("./templates/cmakelists.cmake"); - 68 | const PARSER_NAME_H_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.h"); - 69 | const PARSER_NAME_PC_IN_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.pc.in"); - | - 70 | const GO_MOD_TEMPLATE: &str = include_str!("./templates/go.mod"); - 71 | const BINDING_GO_TEMPLATE: &str = include_str!("./templates/binding.go"); - 72 | const BINDING_TEST_GO_TEMPLATE: &str = include_str!("./templates/binding_test.go"); - | - 73 | const SETUP_PY_TEMPLATE: &str = include_str!("./templates/setup.py"); - 74 | const INIT_PY_TEMPLATE: &str = include_str!("./templates/__init__.py"); - 75 | const INIT_PYI_TEMPLATE: &str = include_str!("./templates/__init__.pyi"); - 76 | const PYPROJECT_TOML_TEMPLATE: &str = include_str!("./templates/pyproject.toml"); - 77 | const PY_BINDING_C_TEMPLATE: &str = include_str!("./templates/py-binding.c"); - 78 | const TEST_BINDING_PY_TEMPLATE: &str = include_str!("./templates/test_binding.py"); - | - 79 | const PACKAGE_SWIFT_TEMPLATE: &str = include_str!("./templates/package.swift"); - 80 | const TESTS_SWIFT_TEMPLATE: &str = include_str!("./templates/tests.swift"); - | - 81 | const BUILD_ZIG_TEMPLATE: &str = include_str!("./templates/build.zig"); - 82 | const BUILD_ZIG_ZON_TEMPLATE: &str = include_str!("./templates/build.zig.zon"); - 83 | const ROOT_ZIG_TEMPLATE: &str = include_str!("./templates/root.zig"); - 84 | const TEST_ZIG_TEMPLATE: &str = include_str!("./templates/test.zig"); - | - 85 | const TREE_SITTER_JSON_SCHEMA: &str = - 86 | "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json"; - | - 87 | #[derive(Serialize, Deserialize, Clone)] - 88 | pub struct JsonConfigOpts { - 89 | pub name: String, - 90 | pub camelcase: String, - 91 | pub title: String, - 92 | pub description: String, - 93 | #[serde(skip_serializing_if = "Option::is_none")] - 94 | pub repository: Option, - 95 | #[serde(skip_serializing_if = "Option::is_none")] - 96 | pub funding: Option, - 97 | pub scope: String, - 98 | pub file_types: Vec, - 99 | pub version: Version, - 100 | pub license: String, - 101 | pub author: String, - 102 | #[serde(skip_serializing_if = "Option::is_none")] - 103 | pub email: Option, - 104 | #[serde(skip_serializing_if = "Option::is_none")] - 105 | pub url: Option, - 106 | pub bindings: Bindings, - 107 | } - | - 108 | impl JsonConfigOpts { - 109 | #[must_use] - 110 | pub fn to_tree_sitter_json(self) -> TreeSitterJSON { - 111 | TreeSitterJSON { - 112 | schema: Some(TREE_SITTER_JSON_SCHEMA.to_string()), - 113 | grammars: vec![Grammar { - 114 | name: self.name.clone(), - 115 | camelcase: Some(self.camelcase), - 116 | title: Some(self.title), - 117 | scope: self.scope, - 118 | path: None, - 119 | external_files: PathsJSON::Empty, - 120 | file_types: Some(self.file_types), - 121 | highlights: PathsJSON::Empty, - 122 | injections: PathsJSON::Empty, - 123 | locals: PathsJSON::Empty, - 124 | tags: PathsJSON::Empty, - 125 | injection_regex: Some(format!("^{}$", self.name)), - 126 | first_line_regex: None, - 127 | content_regex: None, - 128 | class_name: Some(format!("TreeSitter{}", self.name.to_upper_camel_case())), - 129 | }], - 130 | metadata: Metadata { - 131 | version: self.version, - 132 | license: Some(self.license), - 133 | description: Some(self.description), - 134 | authors: Some(vec![Author { - 135 | name: self.author, - 136 | email: self.email, - 137 | url: self.url, - 138 | }]), - 139 | links: Some(Links { - 140 | repository: self.repository.unwrap_or_else(|| { - 141 | format!("https://github.com/tree-sitter/tree-sitter-{}", self.name) - 142 | }), - 143 | funding: self.funding, - 144 | }), - 145 | namespace: None, - 146 | }, - 147 | bindings: self.bindings, - 148 | } - 149 | } - 150 | } - | - 151 | impl Default for JsonConfigOpts { - 152 | fn default() -> Self { - 153 | Self { - 154 | name: String::new(), - 155 | camelcase: String::new(), - 156 | title: String::new(), - 157 | description: String::new(), - 158 | repository: None, - 159 | funding: None, - 160 | scope: String::new(), - 161 | file_types: vec![], - 162 | version: Version::from_str("0.1.0").unwrap(), - 163 | license: String::new(), - 164 | author: String::new(), - 165 | email: None, - 166 | url: None, - 167 | bindings: Bindings::default(), - 168 | } - 169 | } - 170 | } - | - 171 | struct GenerateOpts<'a> { - 172 | author_name: Option<&'a str>, - 173 | author_email: Option<&'a str>, - 174 | author_url: Option<&'a str>, - 175 | license: Option<&'a str>, - 176 | description: Option<&'a str>, - 177 | repository: Option<&'a str>, - 178 | funding: Option<&'a str>, - 179 | version: &'a Version, - 180 | camel_parser_name: &'a str, - 181 | title_parser_name: &'a str, - 182 | class_name: &'a str, - 183 | } - | - 184 | pub fn generate_grammar_files( - 185 | repo_path: &Path, - 186 | language_name: &str, - 187 | allow_update: bool, - 188 | opts: Option<&JsonConfigOpts>, - 189 | ) -> Result<()> { - 190 | let dashed_language_name = language_name.to_kebab_case(); - | - 191 | let tree_sitter_config = missing_path_else( - 192 | repo_path.join("tree-sitter.json"), - 193 | true, - 194 | |path| { - 195 | // invariant: opts is always Some when `tree-sitter.json` doesn't exist - 196 | let Some(opts) = opts else { unreachable!() }; - | - 197 | let tree_sitter_json = opts.clone().to_tree_sitter_json(); - 198 | write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?; - 199 | Ok(()) - 200 | }, - 201 | |path| { - 202 | // updating the config, if needed - 203 | if let Some(opts) = opts { - 204 | let tree_sitter_json = opts.clone().to_tree_sitter_json(); - 205 | write_file(path, serde_json::to_string_pretty(&tree_sitter_json)?)?; - 206 | } - 207 | Ok(()) - 208 | }, - 209 | )?; - | - 210 | let tree_sitter_config = serde_json::from_str::( - 211 | &fs::read_to_string(tree_sitter_config.as_path()) - 212 | .with_context(|| "Failed to read tree-sitter.json")?, - 213 | )?; - | - 214 | let authors = tree_sitter_config.metadata.authors.as_ref(); - 215 | let camel_name = tree_sitter_config.grammars[0] - 216 | .camelcase - 217 | .clone() - 218 | .unwrap_or_else(|| language_name.to_upper_camel_case()); - 219 | let title_name = tree_sitter_config.grammars[0] - 220 | .title - 221 | .clone() - 222 | .unwrap_or_else(|| language_name.to_upper_camel_case()); - 223 | let class_name = tree_sitter_config.grammars[0] - 224 | .class_name - 225 | .clone() - 226 | .unwrap_or_else(|| format!("TreeSitter{}", language_name.to_upper_camel_case())); - | - 227 | let generate_opts = GenerateOpts { - 228 | author_name: authors - 229 | .map(|a| a.first().map(|a| a.name.as_str())) - 230 | .unwrap_or_default(), - 231 | author_email: authors - 232 | .map(|a| a.first().and_then(|a| a.email.as_deref())) - 233 | .unwrap_or_default(), - 234 | author_url: authors - 235 | .map(|a| a.first().and_then(|a| a.url.as_deref())) - 236 | .unwrap_or_default(), - 237 | license: tree_sitter_config.metadata.license.as_deref(), - 238 | description: tree_sitter_config.metadata.description.as_deref(), - 239 | repository: tree_sitter_config - 240 | .metadata - 241 | .links - 242 | .as_ref() - 243 | .map(|l| l.repository.as_str()), - 244 | funding: tree_sitter_config - 245 | .metadata - 246 | .links - 247 | .as_ref() - 248 | .and_then(|l| l.funding.as_deref()), - 249 | version: &tree_sitter_config.metadata.version, - 250 | camel_parser_name: &camel_name, - 251 | title_parser_name: &title_name, - 252 | class_name: &class_name, - 253 | }; - | - 254 | // Create package.json - 255 | missing_path_else( - 256 | repo_path.join("package.json"), - 257 | allow_update, - 258 | |path| { - 259 | generate_file( - 260 | path, - 261 | PACKAGE_JSON_TEMPLATE, - 262 | dashed_language_name.as_str(), - 263 | &generate_opts, - 264 | ) - 265 | }, - 266 | |path| { - 267 | let mut contents = fs::read_to_string(path)? - 268 | .replace( - 269 | r#""node-addon-api": "^8.3.1""#, - 270 | r#""node-addon-api": "^8.5.0""#, - 271 | ) - 272 | .replace( - 273 | indoc! {r#" - 274 | "prebuildify": "^6.0.1", - 275 | "tree-sitter-cli":"#}, - 276 | indoc! {r#" - 277 | "prebuildify": "^6.0.1", - 278 | "tree-sitter": "^0.22.4", - 279 | "tree-sitter-cli":"#}, - 280 | ); - 281 | if !contents.contains("module") { - 282 | warn!("Updating package.json"); - 283 | contents = contents.replace( - 284 | r#""repository":"#, - 285 | indoc! {r#" - 286 | "type": "module", - 287 | "repository":"#}, - 288 | ); - 289 | } - 290 | write_file(path, contents)?; - 291 | Ok(()) - 292 | }, - 293 | )?; - | - 294 | // Do not create a grammar.js file in a repo with multiple language configs - 295 | if !tree_sitter_config.has_multiple_language_configs() { - 296 | missing_path_else( - 297 | repo_path.join("grammar.js"), - 298 | allow_update, - 299 | |path| generate_file(path, GRAMMAR_JS_TEMPLATE, language_name, &generate_opts), - 300 | |path| { - 301 | let mut contents = fs::read_to_string(path)?; - 302 | if contents.contains("module.exports") { - 303 | contents = contents.replace("module.exports =", "export default"); - 304 | write_file(path, contents)?; - 305 | } - | - 306 | Ok(()) - 307 | }, - 308 | )?; - 309 | } - | - 310 | // Write .gitignore file - 311 | missing_path_else( - 312 | repo_path.join(".gitignore"), - 313 | allow_update, - 314 | |path| generate_file(path, GITIGNORE_TEMPLATE, language_name, &generate_opts), - 315 | |path| { - 316 | let contents = fs::read_to_string(path)?; - 317 | if !contents.contains("Zig artifacts") { - 318 | warn!("Replacing .gitignore"); - 319 | generate_file(path, GITIGNORE_TEMPLATE, language_name, &generate_opts)?; - 320 | } - 321 | Ok(()) - 322 | }, - 323 | )?; - | - 324 | // Write .gitattributes file - 325 | missing_path_else( - 326 | repo_path.join(".gitattributes"), - 327 | allow_update, - 328 | |path| generate_file(path, GITATTRIBUTES_TEMPLATE, language_name, &generate_opts), - 329 | |path| { - 330 | let mut contents = fs::read_to_string(path)?; - 331 | contents = contents.replace("bindings/c/* ", "bindings/c/** "); - 332 | if !contents.contains("Zig bindings") { - 333 | contents.push('\n'); - 334 | contents.push_str(indoc! {" - 335 | # Zig bindings - 336 | build.zig linguist-generated - 337 | build.zig.zon linguist-generated - 338 | "}); - 339 | } - 340 | write_file(path, contents)?; - 341 | Ok(()) - 342 | }, - 343 | )?; - | - 344 | // Write .editorconfig file - 345 | missing_path(repo_path.join(".editorconfig"), |path| { - 346 | generate_file(path, EDITORCONFIG_TEMPLATE, language_name, &generate_opts) - 347 | })?; - | - 348 | let bindings_dir = repo_path.join("bindings"); - | - 349 | // Generate Rust bindings - 350 | if tree_sitter_config.bindings.rust { - 351 | missing_path(bindings_dir.join("rust"), create_dir)?.apply(|path| { - 352 | missing_path(path.join("lib.rs"), |path| { - 353 | generate_file(path, LIB_RS_TEMPLATE, language_name, &generate_opts) - 354 | })?; - | - 355 | missing_path_else( - 356 | path.join("build.rs"), - 357 | allow_update, - 358 | |path| generate_file(path, BUILD_RS_TEMPLATE, language_name, &generate_opts), - 359 | |path| { - 360 | let replacement = indoc!{r#" - 361 | c_config.flag("-utf-8"); - | - 362 | if std::env::var("TARGET").unwrap() == "wasm32-unknown-unknown" { - 363 | let Ok(wasm_headers) = std::env::var("DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS") else { - 364 | panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_HEADERS must be set by the language crate"); - 365 | }; - 366 | let Ok(wasm_src) = - 367 | std::env::var("DEP_TREE_SITTER_LANGUAGE_WASM_SRC").map(std::path::PathBuf::from) - 368 | else { - 369 | panic!("Environment variable DEP_TREE_SITTER_LANGUAGE_WASM_SRC must be set by the language crate"); - 370 | }; - | - 371 | c_config.include(&wasm_headers); - 372 | c_config.files([ - 373 | wasm_src.join("stdio.c"), - 374 | wasm_src.join("stdlib.c"), - 375 | wasm_src.join("string.c"), - 376 | ]); - 377 | } - 378 | "#}; - | - 379 | let indented_replacement = replacement - 380 | .lines() - 381 | .map(|line| if line.is_empty() { line.to_string() } else { format!(" {line}") }) - 382 | .collect::>() - 383 | .join("\n"); - | - 384 | let mut contents = fs::read_to_string(path)?; - 385 | if !contents.contains("wasm32-unknown-unknown") { - 386 | contents = contents.replace(r#" c_config.flag("-utf-8");"#, &indented_replacement); - 387 | } - | - 388 | write_file(path, contents)?; - 389 | Ok(()) - 390 | }, - 391 | )?; - | - 392 | missing_path_else( - 393 | repo_path.join("Cargo.toml"), - 394 | allow_update, - 395 | |path| { - 396 | generate_file( - 397 | path, - 398 | CARGO_TOML_TEMPLATE, - 399 | dashed_language_name.as_str(), - 400 | &generate_opts, - 401 | ) - 402 | }, - 403 | |path| { - 404 | let contents = fs::read_to_string(path)?; - 405 | if contents.contains("\"LICENSE\"") { - 406 | write_file(path, contents.replace("\"LICENSE\"", "\"/LICENSE\""))?; - 407 | } - 408 | Ok(()) - 409 | }, - 410 | )?; - | - 411 | Ok(()) - 412 | })?; - 413 | } - | - 414 | // Generate Node bindings - 415 | if tree_sitter_config.bindings.node { - 416 | missing_path(bindings_dir.join("node"), create_dir)?.apply(|path| { - 417 | missing_path_else( - 418 | path.join("index.js"), - 419 | allow_update, - 420 | |path| generate_file(path, INDEX_JS_TEMPLATE, language_name, &generate_opts), - 421 | |path| { - 422 | let contents = fs::read_to_string(path)?; - 423 | if !contents.contains("new URL") { - 424 | warn!("Replacing index.js"); - 425 | generate_file(path, INDEX_JS_TEMPLATE, language_name, &generate_opts)?; - 426 | } - 427 | Ok(()) - 428 | }, - 429 | )?; - | - 430 | missing_path(path.join("index.d.ts"), |path| { - 431 | generate_file(path, INDEX_D_TS_TEMPLATE, language_name, &generate_opts) - 432 | })?; - | - 433 | missing_path_else( - 434 | path.join("binding_test.js"), - 435 | allow_update, - 436 | |path| { - 437 | generate_file( - 438 | path, - 439 | BINDING_TEST_JS_TEMPLATE, - 440 | language_name, - 441 | &generate_opts, - 442 | ) - 443 | }, - 444 | |path| { - 445 | let contents = fs::read_to_string(path)?; - 446 | if !contents.contains("import") { - 447 | warn!("Replacing binding_test.js"); - 448 | generate_file( - 449 | path, - 450 | BINDING_TEST_JS_TEMPLATE, - 451 | language_name, - 452 | &generate_opts, - 453 | )?; - 454 | } - 455 | Ok(()) - 456 | }, - 457 | )?; - | - 458 | missing_path(path.join("binding.cc"), |path| { - 459 | generate_file(path, JS_BINDING_CC_TEMPLATE, language_name, &generate_opts) - 460 | })?; - | - 461 | missing_path_else( - 462 | repo_path.join("binding.gyp"), - 463 | allow_update, - 464 | |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name, &generate_opts), - 465 | |path| { - 466 | let contents = fs::read_to_string(path)?; - 467 | if contents.contains("fs.exists(") { - 468 | write_file(path, contents.replace("fs.exists(", "fs.existsSync("))?; - 469 | } - 470 | Ok(()) - 471 | }, - 472 | )?; - | - 473 | Ok(()) - 474 | })?; - 475 | } - | - 476 | // Generate C bindings - 477 | if tree_sitter_config.bindings.c { - 478 | missing_path(bindings_dir.join("c"), create_dir)?.apply(|path| { - 479 | let old_file = &path.join(format!("tree-sitter-{}.h", language_name.to_kebab_case())); - 480 | if allow_update && fs::exists(old_file).unwrap_or(false) { - 481 | fs::remove_file(old_file)?; - 482 | } - 483 | missing_path(path.join("tree_sitter"), create_dir)?.apply(|include_path| { - 484 | missing_path( - 485 | include_path.join(format!("tree-sitter-{}.h", language_name.to_kebab_case())), - 486 | |path| { - 487 | generate_file(path, PARSER_NAME_H_TEMPLATE, language_name, &generate_opts) - 488 | }, - 489 | )?; - 490 | Ok(()) - 491 | })?; - | - 492 | missing_path( - 493 | path.join(format!("tree-sitter-{}.pc.in", language_name.to_kebab_case())), - 494 | |path| { - 495 | generate_file( - 496 | path, - 497 | PARSER_NAME_PC_IN_TEMPLATE, - 498 | language_name, - 499 | &generate_opts, - 500 | ) - 501 | }, - 502 | )?; - | - 503 | missing_path_else( - 504 | repo_path.join("Makefile"), - 505 | allow_update, - 506 | |path| { - 507 | generate_file(path, MAKEFILE_TEMPLATE, language_name, &generate_opts) - 508 | }, - 509 | |path| { - 510 | let mut contents = fs::read_to_string(path)?; - 511 | if !contents.contains("cd '$(DESTDIR)$(LIBDIR)' && ln -sf") { - 512 | warn!("Replacing Makefile"); - 513 | generate_file(path, MAKEFILE_TEMPLATE, language_name, &generate_opts)?; - 514 | } else { - 515 | contents = contents - 516 | .replace( - 517 | indoc! {r" - 518 | $(PARSER): $(SRC_DIR)/grammar.json - 519 | $(TS) generate $^ - 520 | "}, - 521 | indoc! {r" - 522 | $(SRC_DIR)/grammar.json: grammar.js - 523 | $(TS) generate --emit=json $^ - | - 524 | $(PARSER): $(SRC_DIR)/grammar.json - 525 | $(TS) generate --emit=parser $^ - 526 | "} - 527 | ); - 528 | write_file(path, contents)?; - 529 | } - 530 | Ok(()) - 531 | }, - 532 | )?; - | - 533 | missing_path_else( - 534 | repo_path.join("CMakeLists.txt"), - 535 | allow_update, - 536 | |path| generate_file(path, CMAKELISTS_TXT_TEMPLATE, language_name, &generate_opts), - 537 | |path| { - 538 | let mut contents = fs::read_to_string(path)?; - 539 | contents = contents - 540 | .replace("add_custom_target(test", "add_custom_target(ts-test") - 541 | .replace( - 542 | &formatdoc! {r#" - 543 | install(FILES bindings/c/tree-sitter-{language_name}.h - 544 | DESTINATION "${{CMAKE_INSTALL_INCLUDEDIR}}/tree_sitter") - 545 | "#}, - 546 | indoc! {r#" - 547 | install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bindings/c/tree_sitter" - 548 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - 549 | FILES_MATCHING PATTERN "*.h") - 550 | "#} - 551 | ).replace( - 552 | &format!("target_include_directories(tree-sitter-{language_name} PRIVATE src)"), - 553 | &formatdoc! {" - 554 | target_include_directories(tree-sitter-{language_name} - 555 | PRIVATE src - 556 | INTERFACE $ - 557 | $) - 558 | "} - 559 | ).replace( - 560 | indoc! {r#" - 561 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c" - 562 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json" - 563 | COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json - 564 | --abi=${TREE_SITTER_ABI_VERSION} - 565 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - 566 | COMMENT "Generating parser.c") - 567 | "#}, - 568 | indoc! {r#" - 569 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json" - 570 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/grammar.js" - 571 | COMMAND "${TREE_SITTER_CLI}" generate grammar.js - 572 | --emit=json - 573 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - 574 | COMMENT "Generating grammar.json") - | - 575 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/parser.c" - 576 | DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/grammar.json" - 577 | COMMAND "${TREE_SITTER_CLI}" generate src/grammar.json - 578 | --emit=parser --abi=${TREE_SITTER_ABI_VERSION} - 579 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - 580 | COMMENT "Generating parser.c") - 581 | "#} - 582 | ); - 583 | write_file(path, contents)?; - 584 | Ok(()) - 585 | }, - 586 | )?; - | - 587 | Ok(()) - 588 | })?; - 589 | } - | - 590 | // Generate Go bindings - 591 | if tree_sitter_config.bindings.go { - 592 | missing_path(bindings_dir.join("go"), create_dir)?.apply(|path| { - 593 | missing_path(path.join("binding.go"), |path| { - 594 | generate_file(path, BINDING_GO_TEMPLATE, language_name, &generate_opts) - 595 | })?; - | - 596 | missing_path(path.join("binding_test.go"), |path| { - 597 | generate_file( - 598 | path, - 599 | BINDING_TEST_GO_TEMPLATE, - 600 | language_name, - 601 | &generate_opts, - 602 | ) - 603 | })?; - | - 604 | missing_path(repo_path.join("go.mod"), |path| { - 605 | generate_file(path, GO_MOD_TEMPLATE, language_name, &generate_opts) - 606 | })?; - | - 607 | Ok(()) - 608 | })?; - 609 | } - | - 610 | // Generate Python bindings - 611 | if tree_sitter_config.bindings.python { - 612 | missing_path(bindings_dir.join("python"), create_dir)?.apply(|path| { - 613 | let lang_path = path.join(format!("tree_sitter_{}", language_name.to_snake_case())); - 614 | missing_path(&lang_path, create_dir)?; - | - 615 | missing_path_else( - 616 | lang_path.join("binding.c"), - 617 | allow_update, - 618 | |path| generate_file(path, PY_BINDING_C_TEMPLATE, language_name, &generate_opts), - 619 | |path| { - 620 | let mut contents = fs::read_to_string(path)?; - 621 | if !contents.contains("PyModuleDef_Init") { - 622 | contents = contents - 623 | .replace("PyModule_Create", "PyModuleDef_Init") - 624 | .replace( - 625 | "static PyMethodDef methods[] = {\n", - 626 | indoc! {" - 627 | static struct PyModuleDef_Slot slots[] = { - 628 | #ifdef Py_GIL_DISABLED - 629 | {Py_mod_gil, Py_MOD_GIL_NOT_USED}, - 630 | #endif - 631 | {0, NULL} - 632 | }; - | - 633 | static PyMethodDef methods[] = { - 634 | "}, - 635 | ) - 636 | .replace( - 637 | indoc! {" - 638 | .m_size = -1, - 639 | .m_methods = methods - 640 | "}, - 641 | indoc! {" - 642 | .m_size = 0, - 643 | .m_methods = methods, - 644 | .m_slots = slots, - 645 | "}, - 646 | ); - 647 | write_file(path, contents)?; - 648 | } - 649 | Ok(()) - 650 | }, - 651 | )?; - | - 652 | missing_path(lang_path.join("__init__.py"), |path| { - 653 | generate_file(path, INIT_PY_TEMPLATE, language_name, &generate_opts) - 654 | })?; - | - 655 | missing_path_else( - 656 | lang_path.join("__init__.pyi"), - 657 | allow_update, - 658 | |path| generate_file(path, INIT_PYI_TEMPLATE, language_name, &generate_opts), - 659 | |path| { - 660 | let mut contents = fs::read_to_string(path)?; - 661 | if !contents.contains("CapsuleType") { - 662 | contents = contents - 663 | .replace( - 664 | "from typing import Final", - 665 | "from typing import Final\nfrom typing_extensions import CapsuleType" - 666 | ) - 667 | .replace("-> object:", "-> CapsuleType:"); - 668 | write_file(path, contents)?; - 669 | } - 670 | Ok(()) - 671 | }, - 672 | )?; - | - 673 | missing_path(lang_path.join("py.typed"), |path| { - 674 | generate_file(path, "", language_name, &generate_opts) // py.typed is empty - 675 | })?; - | - 676 | missing_path(path.join("tests"), create_dir)?.apply(|path| { - 677 | missing_path_else( - 678 | path.join("test_binding.py"), - 679 | allow_update, - 680 | |path| { - 681 | generate_file( - 682 | path, - 683 | TEST_BINDING_PY_TEMPLATE, - 684 | language_name, - 685 | &generate_opts, - 686 | ) - 687 | }, - 688 | |path| { - 689 | let mut contents = fs::read_to_string(path)?; - 690 | if !contents.contains("Parser(Language(") { - 691 | contents = contents - 692 | .replace("tree_sitter.Language(", "Parser(Language(") - 693 | .replace(".language())\n", ".language()))\n") - 694 | .replace( - 695 | "import tree_sitter\n", - 696 | "from tree_sitter import Language, Parser\n", - 697 | ); - 698 | write_file(path, contents)?; - 699 | } - 700 | Ok(()) - 701 | }, - 702 | )?; - 703 | Ok(()) - 704 | })?; - | - 705 | missing_path_else( - 706 | repo_path.join("setup.py"), - 707 | allow_update, - 708 | |path| generate_file(path, SETUP_PY_TEMPLATE, language_name, &generate_opts), - 709 | |path| { - 710 | let contents = fs::read_to_string(path)?; - 711 | if !contents.contains("build_ext") { - 712 | warn!("Replacing setup.py"); - 713 | generate_file(path, SETUP_PY_TEMPLATE, language_name, &generate_opts)?; - 714 | } - 715 | Ok(()) - 716 | }, - 717 | )?; - | - 718 | missing_path_else( - 719 | repo_path.join("pyproject.toml"), - 720 | allow_update, - 721 | |path| { - 722 | generate_file( - 723 | path, - 724 | PYPROJECT_TOML_TEMPLATE, - 725 | dashed_language_name.as_str(), - 726 | &generate_opts, - 727 | ) - 728 | }, - 729 | |path| { - 730 | let mut contents = fs::read_to_string(path)?; - 731 | if !contents.contains("cp310-*") { - 732 | contents = contents - 733 | .replace(r#"build = "cp39-*""#, r#"build = "cp310-*""#) - 734 | .replace(r#"python = ">=3.9""#, r#"python = ">=3.10""#) - 735 | .replace("tree-sitter~=0.22", "tree-sitter~=0.24"); - 736 | write_file(path, contents)?; - 737 | } - 738 | Ok(()) - 739 | }, - 740 | )?; - | - 741 | Ok(()) - 742 | })?; - 743 | } - | - 744 | // Generate Swift bindings - 745 | if tree_sitter_config.bindings.swift { - 746 | missing_path(bindings_dir.join("swift"), create_dir)?.apply(|path| { - 747 | let lang_path = path.join(&class_name); - 748 | missing_path(&lang_path, create_dir)?; - | - 749 | missing_path(lang_path.join(format!("{language_name}.h")), |path| { - 750 | generate_file(path, PARSER_NAME_H_TEMPLATE, language_name, &generate_opts) - 751 | })?; - | - 752 | missing_path(path.join(format!("{class_name}Tests")), create_dir)?.apply(|path| { - 753 | missing_path(path.join(format!("{class_name}Tests.swift")), |path| { - 754 | generate_file(path, TESTS_SWIFT_TEMPLATE, language_name, &generate_opts) - 755 | })?; - | - 756 | Ok(()) - 757 | })?; - | - 758 | missing_path_else( - 759 | repo_path.join("Package.swift"), - 760 | allow_update, - 761 | |path| generate_file(path, PACKAGE_SWIFT_TEMPLATE, language_name, &generate_opts), - 762 | |path| { - 763 | let mut contents = fs::read_to_string(path)?; - 764 | contents = contents - 765 | .replace( - 766 | "https://github.com/ChimeHQ/SwiftTreeSitter", - 767 | "https://github.com/tree-sitter/swift-tree-sitter", - 768 | ) - 769 | .replace("version: \"0.8.0\")", "version: \"0.9.0\")") - 770 | .replace("(url:", "(name: \"SwiftTreeSitter\", url:"); - 771 | write_file(path, contents)?; - 772 | Ok(()) - 773 | }, - 774 | )?; - | - 775 | Ok(()) - 776 | })?; - 777 | } - | - 778 | // Generate Zig bindings - 779 | if tree_sitter_config.bindings.zig { - 780 | missing_path_else( - 781 | repo_path.join("build.zig"), - 782 | allow_update, - 783 | |path| generate_file(path, BUILD_ZIG_TEMPLATE, language_name, &generate_opts), - 784 | |path| { - 785 | let contents = fs::read_to_string(path)?; - 786 | if !contents.contains("b.pkg_hash.len") { - 787 | warn!("Replacing build.zig"); - 788 | generate_file(path, BUILD_ZIG_TEMPLATE, language_name, &generate_opts) - 789 | } else { - 790 | Ok(()) - 791 | } - 792 | }, - 793 | )?; - | - 794 | missing_path_else( - 795 | repo_path.join("build.zig.zon"), - 796 | allow_update, - 797 | |path| generate_file(path, BUILD_ZIG_ZON_TEMPLATE, language_name, &generate_opts), - 798 | |path| { - 799 | let contents = fs::read_to_string(path)?; - 800 | if !contents.contains(".name = .tree_sitter_") { - 801 | warn!("Replacing build.zig.zon"); - 802 | generate_file(path, BUILD_ZIG_ZON_TEMPLATE, language_name, &generate_opts) - 803 | } else { - 804 | Ok(()) - 805 | } - 806 | }, - 807 | )?; - | - 808 | missing_path(bindings_dir.join("zig"), create_dir)?.apply(|path| { - 809 | missing_path_else( - 810 | path.join("root.zig"), - 811 | allow_update, - 812 | |path| generate_file(path, ROOT_ZIG_TEMPLATE, language_name, &generate_opts), - 813 | |path| { - 814 | let contents = fs::read_to_string(path)?; - 815 | if contents.contains("ts.Language") { - 816 | warn!("Replacing root.zig"); - 817 | generate_file(path, ROOT_ZIG_TEMPLATE, language_name, &generate_opts) - 818 | } else { - 819 | Ok(()) - 820 | } - 821 | }, - 822 | )?; - | - 823 | missing_path(path.join("test.zig"), |path| { - 824 | generate_file(path, TEST_ZIG_TEMPLATE, language_name, &generate_opts) - 825 | })?; - | - 826 | Ok(()) - 827 | })?; - 828 | } - | - 829 | Ok(()) - 830 | } - | - 831 | pub fn get_root_path(path: &Path) -> Result { - 832 | let mut pathbuf = path.to_owned(); - 833 | let filename = path.file_name().unwrap().to_str().unwrap(); - 834 | let is_package_json = filename == "package.json"; - 835 | loop { - 836 | let json = pathbuf - 837 | .exists() - 838 | .then(|| { - 839 | let contents = fs::read_to_string(pathbuf.as_path()) - 840 | .with_context(|| format!("Failed to read {filename}"))?; - 841 | if is_package_json { - 842 | serde_json::from_str::>(&contents) - 843 | .context(format!("Failed to parse {filename}")) - 844 | .map(|v| v.contains_key("tree-sitter")) - 845 | } else { - 846 | serde_json::from_str::(&contents) - 847 | .context(format!("Failed to parse {filename}")) - 848 | .map(|_| true) - 849 | } - 850 | }) - 851 | .transpose()?; - 852 | if json == Some(true) { - 853 | return Ok(pathbuf.parent().unwrap().to_path_buf()); - 854 | } - 855 | pathbuf.pop(); // filename - 856 | if !pathbuf.pop() { - 857 | return Err(anyhow!(format!( - 858 | concat!( - 859 | "Failed to locate a {} file,", - 860 | " please ensure you have one, and if you don't then consult the docs", - 861 | ), - 862 | filename - 863 | ))); - 864 | } - 865 | pathbuf.push(filename); - 866 | } - 867 | } - | - 868 | fn generate_file( - 869 | path: &Path, - 870 | template: &str, - 871 | language_name: &str, - 872 | generate_opts: &GenerateOpts, - 873 | ) -> Result<()> { - 874 | let filename = path.file_name().unwrap().to_str().unwrap(); - | - 875 | let mut replacement = template - 876 | .replace( - 877 | CAMEL_PARSER_NAME_PLACEHOLDER, - 878 | generate_opts.camel_parser_name, - 879 | ) - 880 | .replace( - 881 | TITLE_PARSER_NAME_PLACEHOLDER, - 882 | generate_opts.title_parser_name, - 883 | ) - 884 | .replace( - 885 | UPPER_PARSER_NAME_PLACEHOLDER, - 886 | &language_name.to_shouty_snake_case(), - 887 | ) - 888 | .replace( - 889 | LOWER_PARSER_NAME_PLACEHOLDER, - 890 | &language_name.to_snake_case(), - 891 | ) - 892 | .replace( - 893 | KEBAB_PARSER_NAME_PLACEHOLDER, - 894 | &language_name.to_kebab_case(), - 895 | ) - 896 | .replace(PARSER_NAME_PLACEHOLDER, language_name) - 897 | .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION) - 898 | .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION) - 899 | .replace(ABI_VERSION_MAX_PLACEHOLDER, &ABI_VERSION_MAX.to_string()) - 900 | .replace( - 901 | PARSER_VERSION_PLACEHOLDER, - 902 | &generate_opts.version.to_string(), - 903 | ) - 904 | .replace(PARSER_CLASS_NAME_PLACEHOLDER, generate_opts.class_name); - | - 905 | if let Some(name) = generate_opts.author_name { - 906 | replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER, name); - 907 | } else { - 908 | match filename { - 909 | "package.json" => { - 910 | replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_JS, ""); - 911 | } - 912 | "pyproject.toml" => { - 913 | replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_PY, ""); - 914 | } - 915 | "grammar.js" => { - 916 | replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_GRAMMAR, ""); - 917 | } - 918 | "Cargo.toml" => { - 919 | replacement = replacement.replace(AUTHOR_NAME_PLACEHOLDER_RS, ""); - 920 | } - 921 | _ => {} - 922 | } - 923 | } - | - 924 | if let Some(email) = generate_opts.author_email { - 925 | replacement = match filename { - 926 | "Cargo.toml" | "grammar.js" => { - 927 | replacement.replace(AUTHOR_EMAIL_PLACEHOLDER, &format!("<{email}>")) - 928 | } - 929 | _ => replacement.replace(AUTHOR_EMAIL_PLACEHOLDER, email), - 930 | } - 931 | } else { - 932 | match filename { - 933 | "package.json" => { - 934 | replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_JS, ""); - 935 | } - 936 | "pyproject.toml" => { - 937 | replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_PY, ""); - 938 | } - 939 | "grammar.js" => { - 940 | replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_GRAMMAR, ""); - 941 | } - 942 | "Cargo.toml" => { - 943 | replacement = replacement.replace(AUTHOR_EMAIL_PLACEHOLDER_RS, ""); - 944 | } - 945 | _ => {} - 946 | } - 947 | } - | - 948 | if filename == "package.json" { - 949 | if let Some(url) = generate_opts.author_url { - 950 | replacement = replacement.replace(AUTHOR_URL_PLACEHOLDER, url); - 951 | } else { - 952 | replacement = replacement.replace(AUTHOR_URL_PLACEHOLDER_JS, ""); - 953 | } - 954 | } - | - 955 | if generate_opts.author_name.is_none() - 956 | && generate_opts.author_email.is_none() - 957 | && generate_opts.author_url.is_none() - 958 | && filename == "package.json" - 959 | { - 960 | if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_JS) { - 961 | if let Some(end_idx) = replacement[start_idx..] - 962 | .find("},") - 963 | .map(|i| i + start_idx + 2) - 964 | { - 965 | replacement.replace_range(start_idx..end_idx, ""); - 966 | } - 967 | } - 968 | } else if generate_opts.author_name.is_none() && generate_opts.author_email.is_none() { - 969 | match filename { - 970 | "pyproject.toml" => { - 971 | if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_PY) { - 972 | if let Some(end_idx) = replacement[start_idx..] - 973 | .find("}]") - 974 | .map(|i| i + start_idx + 2) - 975 | { - 976 | replacement.replace_range(start_idx..end_idx, ""); - 977 | } - 978 | } - 979 | } - 980 | "grammar.js" => { - 981 | if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_GRAMMAR) { - 982 | if let Some(end_idx) = replacement[start_idx..] - 983 | .find(" \n") - 984 | .map(|i| i + start_idx + 1) - 985 | { - 986 | replacement.replace_range(start_idx..end_idx, ""); - 987 | } - 988 | } - 989 | } - 990 | "Cargo.toml" => { - 991 | if let Some(start_idx) = replacement.find(AUTHOR_BLOCK_RS) { - 992 | if let Some(end_idx) = replacement[start_idx..] - 993 | .find("\"]") - 994 | .map(|i| i + start_idx + 2) - 995 | { - 996 | replacement.replace_range(start_idx..end_idx, ""); - 997 | } - 998 | } - 999 | } -1000 | _ => {} -1001 | } -1002 | } - | -1003 | if let Some(license) = generate_opts.license { -1004 | replacement = replacement.replace(PARSER_LICENSE_PLACEHOLDER, license); -1005 | } else { -1006 | replacement = replacement.replace(PARSER_LICENSE_PLACEHOLDER, "MIT"); -1007 | } - | -1008 | if let Some(description) = generate_opts.description { -1009 | replacement = replacement.replace(PARSER_DESCRIPTION_PLACEHOLDER, description); -1010 | } else { -1011 | replacement = replacement.replace( -1012 | PARSER_DESCRIPTION_PLACEHOLDER, -1013 | &format!( -1014 | "{} grammar for tree-sitter", -1015 | generate_opts.camel_parser_name, -1016 | ), -1017 | ); -1018 | } - | -1019 | if let Some(repository) = generate_opts.repository { -1020 | replacement = replacement -1021 | .replace( -1022 | PARSER_URL_STRIPPED_PLACEHOLDER, -1023 | &repository.replace("https://", "").to_lowercase(), -1024 | ) -1025 | .replace(PARSER_URL_PLACEHOLDER, &repository.to_lowercase()); -1026 | } else { -1027 | replacement = replacement -1028 | .replace( -1029 | PARSER_URL_STRIPPED_PLACEHOLDER, -1030 | &format!( -1031 | "github.com/tree-sitter/tree-sitter-{}", -1032 | language_name.to_lowercase() -1033 | ), -1034 | ) -1035 | .replace( -1036 | PARSER_URL_PLACEHOLDER, -1037 | &format!( -1038 | "https://github.com/tree-sitter/tree-sitter-{}", -1039 | language_name.to_lowercase() -1040 | ), -1041 | ); -1042 | } - | -1043 | if let Some(funding_url) = generate_opts.funding { -1044 | match filename { -1045 | "pyproject.toml" | "package.json" => { -1046 | replacement = replacement.replace(FUNDING_URL_PLACEHOLDER, funding_url); -1047 | } -1048 | _ => {} -1049 | } -1050 | } else { -1051 | match filename { -1052 | "package.json" => { -1053 | replacement = replacement.replace(" \"funding\": \"FUNDING_URL\",\n", ""); -1054 | } -1055 | "pyproject.toml" => { -1056 | replacement = replacement.replace("Funding = \"FUNDING_URL\"\n", ""); -1057 | } -1058 | _ => {} -1059 | } -1060 | } - | -1061 | if filename == "build.zig.zon" { -1062 | let id = thread_rng().gen_range(1u32..0xFFFF_FFFFu32); -1063 | let checksum = crc32(format!("tree_sitter_{language_name}").as_bytes()); -1064 | replacement = replacement.replace( -1065 | PARSER_FINGERPRINT_PLACEHOLDER, -1066 | #[cfg(target_endian = "little")] -1067 | &format!("0x{checksum:x}{id:x}"), -1068 | #[cfg(target_endian = "big")] -1069 | &format!("0x{id:x}{checksum:x}"), -1070 | ); -1071 | } - | -1072 | write_file(path, replacement)?; -1073 | Ok(()) -1074 | } - | -1075 | fn create_dir(path: &Path) -> Result<()> { -1076 | fs::create_dir_all(path) -1077 | .with_context(|| format!("Failed to create {:?}", path.to_string_lossy())) -1078 | } - | -1079 | #[derive(PartialEq, Eq, Debug)] -1080 | enum PathState

-1081 | where -1082 | P: AsRef, -1083 | { -1084 | Exists(P), -1085 | Missing(P), -1086 | } - | -1087 | #[allow(dead_code)] -1088 | impl

PathState

-1089 | where -1090 | P: AsRef, -1091 | { -1092 | fn exists(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { -1093 | if let Self::Exists(path) = self { -1094 | action(path.as_ref())?; -1095 | } -1096 | Ok(self) -1097 | } - | -1098 | fn missing(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { -1099 | if let Self::Missing(path) = self { -1100 | action(path.as_ref())?; -1101 | } -1102 | Ok(self) -1103 | } - | -1104 | fn apply(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { -1105 | action(self.as_path())?; -1106 | Ok(self) -1107 | } - | -1108 | fn apply_state(&self, mut action: impl FnMut(&Self) -> Result<()>) -> Result<&Self> { -1109 | action(self)?; -1110 | Ok(self) -1111 | } - | -1112 | fn as_path(&self) -> &Path { -1113 | match self { -1114 | Self::Exists(path) | Self::Missing(path) => path.as_ref(), -1115 | } -1116 | } -1117 | } - | -1118 | fn missing_path(path: P, mut action: F) -> Result> -1119 | where -1120 | P: AsRef, -1121 | F: FnMut(&Path) -> Result<()>, -1122 | { -1123 | let path_ref = path.as_ref(); -1124 | if !path_ref.exists() { -1125 | action(path_ref)?; -1126 | Ok(PathState::Missing(path)) -1127 | } else { -1128 | Ok(PathState::Exists(path)) -1129 | } -1130 | } - | -1131 | fn missing_path_else( -1132 | path: P, -1133 | allow_update: bool, -1134 | mut action: T, -1135 | mut else_action: F, -1136 | ) -> Result> -1137 | where -1138 | P: AsRef, -1139 | T: FnMut(&Path) -> Result<()>, -1140 | F: FnMut(&Path) -> Result<()>, -1141 | { -1142 | let path_ref = path.as_ref(); -1143 | if !path_ref.exists() { -1144 | action(path_ref)?; -1145 | Ok(PathState::Missing(path)) -1146 | } else { -1147 | if allow_update { -1148 | else_action(path_ref)?; -1149 | } -1150 | Ok(PathState::Exists(path)) -1151 | } -1152 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/input.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | fs, - 3 | io::{Read, Write}, - 4 | path::{Path, PathBuf}, - 5 | sync::{ - 6 | atomic::{AtomicUsize, Ordering}, - 7 | mpsc, Arc, - 8 | }, - 9 | }; - | - 10 | use anyhow::{anyhow, bail, Context, Result}; - 11 | use glob::glob; - | - 12 | use crate::test::{parse_tests, TestEntry}; - | - 13 | pub enum CliInput { - 14 | Paths(Vec), - 15 | Test { - 16 | name: String, - 17 | contents: Vec, - 18 | languages: Vec>, - 19 | }, - 20 | Stdin(Vec), - 21 | } - | - 22 | pub fn get_input( - 23 | paths_file: Option<&Path>, - 24 | paths: Option>, - 25 | test_number: Option, - 26 | cancellation_flag: &Arc, - 27 | ) -> Result { - 28 | if let Some(paths_file) = paths_file { - 29 | return Ok(CliInput::Paths( - 30 | fs::read_to_string(paths_file) - 31 | .with_context(|| format!("Failed to read paths file {}", paths_file.display()))? - 32 | .trim() - 33 | .lines() - 34 | .map(PathBuf::from) - 35 | .collect::>(), - 36 | )); - 37 | } - | - 38 | if let Some(test_number) = test_number { - 39 | let current_dir = std::env::current_dir().unwrap(); - 40 | let test_dir = current_dir.join("test").join("corpus"); - | - 41 | if !test_dir.exists() { - 42 | return Err(anyhow!( - 43 | "Test corpus directory not found in current directory, see https://tree-sitter.github.io/tree-sitter/creating-parsers/5-writing-tests" - 44 | )); - 45 | } - | - 46 | let test_entry = parse_tests(&test_dir)?; - 47 | let mut test_num = 0; - 48 | let Some((name, contents, languages)) = - 49 | get_test_info(&test_entry, test_number.max(1) - 1, &mut test_num) - 50 | else { - 51 | return Err(anyhow!("Failed to fetch contents of test #{test_number}")); - 52 | }; - | - 53 | return Ok(CliInput::Test { - 54 | name, - 55 | contents, - 56 | languages, - 57 | }); - 58 | } - | - 59 | if let Some(paths) = paths { - 60 | let mut result = Vec::new(); - | - 61 | let mut incorporate_path = |path: PathBuf, positive| { - 62 | if positive { - 63 | result.push(path); - 64 | } else if let Some(index) = result.iter().position(|p| *p == path) { - 65 | result.remove(index); - 66 | } - 67 | }; - | - 68 | for mut path in paths { - 69 | let mut positive = true; - 70 | if path.starts_with("!") { - 71 | positive = false; - 72 | path = path.strip_prefix("!").unwrap().to_path_buf(); - 73 | } - | - 74 | if path.exists() { - 75 | incorporate_path(path, positive); - 76 | } else { - 77 | let Some(path_str) = path.to_str() else { - 78 | bail!("Invalid path: {}", path.display()); - 79 | }; - 80 | let paths = glob(path_str) - 81 | .with_context(|| format!("Invalid glob pattern {}", path.display()))?; - 82 | for path in paths { - 83 | incorporate_path(path?, positive); - 84 | } - 85 | } - 86 | } - | - 87 | if result.is_empty() { - 88 | return Err(anyhow!( - 89 | "No files were found at or matched by the provided pathname/glob" - 90 | )); - 91 | } - | - 92 | return Ok(CliInput::Paths(result)); - 93 | } - | - 94 | let reader_flag = cancellation_flag.clone(); - 95 | let (tx, rx) = mpsc::channel(); - | - 96 | // Spawn a thread to read from stdin, until ctrl-c or EOF is received - 97 | std::thread::spawn(move || { - 98 | let mut input = Vec::new(); - 99 | let stdin = std::io::stdin(); - 100 | let mut handle = stdin.lock(); - | - 101 | // Read in chunks, so we can check the ctrl-c flag - 102 | loop { - 103 | if reader_flag.load(Ordering::Relaxed) == 1 { - 104 | break; - 105 | } - 106 | let mut buffer = [0; 1024]; - 107 | match handle.read(&mut buffer) { - 108 | Ok(0) | Err(_) => break, - 109 | Ok(n) => input.extend_from_slice(&buffer[..n]), - 110 | } - 111 | } - | - 112 | // Signal to the main thread that we're done - 113 | tx.send(input).ok(); - 114 | }); - | - 115 | loop { - 116 | // If we've received a ctrl-c signal, exit - 117 | if cancellation_flag.load(Ordering::Relaxed) == 1 { - 118 | bail!("\n"); - 119 | } - | - 120 | // If we're done receiving input from stdin, return it - 121 | if let Ok(input) = rx.try_recv() { - 122 | return Ok(CliInput::Stdin(input)); - 123 | } - | - 124 | std::thread::sleep(std::time::Duration::from_millis(50)); - 125 | } - 126 | } - | - 127 | #[allow(clippy::type_complexity)] - 128 | pub fn get_test_info( - 129 | test_entry: &TestEntry, - 130 | target_test: u32, - 131 | test_num: &mut u32, - 132 | ) -> Option<(String, Vec, Vec>)> { - 133 | match test_entry { - 134 | TestEntry::Example { - 135 | name, - 136 | input, - 137 | attributes, - 138 | .. - 139 | } => { - 140 | if *test_num == target_test { - 141 | return Some((name.clone(), input.clone(), attributes.languages.clone())); - 142 | } - 143 | *test_num += 1; - 144 | } - 145 | TestEntry::Group { children, .. } => { - 146 | for child in children { - 147 | if let Some((name, input, languages)) = get_test_info(child, target_test, test_num) - 148 | { - 149 | return Some((name, input, languages)); - 150 | } - 151 | } - 152 | } - 153 | } - | - 154 | None - 155 | } - | - 156 | /// Writes `contents` to a temporary file and returns the path to that file. - 157 | pub fn get_tmp_source_file(contents: &[u8]) -> Result { - 158 | let parse_path = std::env::temp_dir().join(".tree-sitter-temp"); - 159 | let mut parse_file = std::fs::File::create(&parse_path)?; - 160 | parse_file.write_all(contents)?; - | - 161 | Ok(parse_path) - 162 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/logger.rs: --------------------------------------------------------------------------------- - 1 | use std::io::Write; - | - 2 | use anstyle::{AnsiColor, Color, Style}; - 3 | use log::{Level, LevelFilter, Log, Metadata, Record}; - | - 4 | pub fn paint(color: Option>, text: &str) -> String { - 5 | let style = Style::new().fg_color(color.map(Into::into)); - 6 | format!("{style}{text}{style:#}") - 7 | } - | - 8 | struct Logger; - | - 9 | impl Log for Logger { - 10 | fn enabled(&self, _: &Metadata) -> bool { - 11 | true - 12 | } - | - 13 | fn log(&self, record: &Record) { - 14 | match record.level() { - 15 | Level::Error => eprintln!( - 16 | "{} {}", - 17 | paint(Some(AnsiColor::Red), "Error:"), - 18 | record.args() - 19 | ), - 20 | Level::Warn => eprintln!( - 21 | "{} {}", - 22 | paint(Some(AnsiColor::Yellow), "Warning:"), - 23 | record.args() - 24 | ), - 25 | Level::Info | Level::Debug => eprintln!("{}", record.args()), - 26 | Level::Trace => eprintln!( - 27 | "[{}] {}", - 28 | record - 29 | .module_path() - 30 | .unwrap_or_default() - 31 | .trim_start_matches("rust_tree_sitter_cli::"), - 32 | record.args() - 33 | ), - 34 | } - 35 | } - | - 36 | fn flush(&self) { - 37 | let mut stderr = std::io::stderr().lock(); - 38 | let _ = stderr.flush(); - 39 | } - 40 | } - | - 41 | pub fn init() { - 42 | log::set_boxed_logger(Box::new(Logger {})).unwrap(); - 43 | log::set_max_level(LevelFilter::Info); - 44 | } - | - 45 | pub fn enable_debug() { - 46 | log::set_max_level(LevelFilter::Debug); - 47 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/main.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashSet, - 3 | env, fs, - 4 | path::{Path, PathBuf}, - 5 | }; - | - 6 | use anstyle::{AnsiColor, Color, Style}; - 7 | use anyhow::{anyhow, Context, Result}; - 8 | use clap::{crate_authors, Args, Command, FromArgMatches as _, Subcommand, ValueEnum}; - 9 | use clap_complete::generate; - 10 | use dialoguer::{theme::ColorfulTheme, Confirm, FuzzySelect, Input, MultiSelect}; - 11 | use heck::ToUpperCamelCase; - 12 | use log::{error, info, warn}; - 13 | use regex::Regex; - 14 | use semver::Version as SemverVersion; - 15 | use tree_sitter::{ffi, Parser, Point}; - 16 | use tree_sitter_cli::{ - 17 | fuzz::{ - 18 | fuzz_language_corpus, FuzzOptions, EDIT_COUNT, ITERATION_COUNT, LOG_ENABLED, - 19 | LOG_GRAPH_ENABLED, START_SEED, - 20 | }, - 21 | highlight::{self, HighlightOptions}, - 22 | init::{generate_grammar_files, JsonConfigOpts}, - 23 | input::{get_input, get_tmp_source_file, CliInput}, - 24 | logger, - 25 | parse::{self, ParseDebugType, ParseFileOptions, ParseOutput, ParseTheme}, - 26 | playground, query, - 27 | tags::{self, TagsOptions}, - 28 | test::{self, TestOptions, TestStats}, - 29 | test_highlight, test_tags, util, version, - 30 | version::BumpLevel, - 31 | wasm, - 32 | }; - 33 | use tree_sitter_config::Config; - 34 | use tree_sitter_generate::OptLevel; - 35 | use tree_sitter_highlight::Highlighter; - 36 | use tree_sitter_loader::{self as loader, Bindings, TreeSitterJSON}; - 37 | use tree_sitter_tags::TagsContext; - | - 38 | const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION"); - 39 | const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA"); - 40 | const DEFAULT_GENERATE_ABI_VERSION: usize = 15; - | - 41 | #[derive(Subcommand)] - 42 | #[command(about="Generates and tests parsers", author=crate_authors!("\n"), styles=get_styles())] - 43 | enum Commands { - 44 | /// Generate a default config file - 45 | InitConfig(InitConfig), - 46 | /// Initialize a grammar repository - 47 | Init(Init), - 48 | /// Generate a parser - 49 | Generate(Generate), - 50 | /// Compile a parser - 51 | Build(Build), - 52 | /// Parse files - 53 | Parse(Parse), - 54 | /// Run a parser's tests - 55 | Test(Test), - 56 | /// Display or increment the version of a grammar - 57 | Version(Version), - 58 | /// Fuzz a parser - 59 | Fuzz(Fuzz), - 60 | /// Search files using a syntax tree query - 61 | Query(Query), - 62 | /// Highlight a file - 63 | Highlight(Highlight), - 64 | /// Generate a list of tags - 65 | Tags(Tags), - 66 | /// Start local playground for a parser in the browser - 67 | Playground(Playground), - 68 | /// Print info about all known language parsers - 69 | DumpLanguages(DumpLanguages), - 70 | /// Generate shell completions - 71 | Complete(Complete), - 72 | } - | - 73 | #[derive(Args)] - 74 | struct InitConfig; - | - 75 | #[derive(Args)] - 76 | #[command(alias = "i")] - 77 | struct Init { - 78 | /// Update outdated files - 79 | #[arg(long, short)] - 80 | pub update: bool, - 81 | /// The path to the tree-sitter grammar directory - 82 | #[arg(long, short = 'p')] - 83 | pub grammar_path: Option, - 84 | } - | - 85 | #[derive(Clone, Debug, Default, ValueEnum, PartialEq, Eq)] - 86 | enum GenerationEmit { - 87 | /// Generate `grammar.json` and `node-types.json` - 88 | Json, - 89 | /// Generate `parser.c` and related files - 90 | #[default] - 91 | Parser, - 92 | /// Compile to a library - 93 | Lib, - 94 | } - | - 95 | #[derive(Args)] - 96 | #[command(alias = "gen", alias = "g")] - 97 | struct Generate { - 98 | /// The path to the grammar file - 99 | #[arg(index = 1)] - 100 | pub grammar_path: Option, - 101 | /// Show debug log during generation - 102 | #[arg(long, short)] - 103 | pub log: bool, - 104 | #[arg( - 105 | long = "abi", - 106 | value_name = "VERSION", - 107 | env = "TREE_SITTER_ABI_VERSION", - 108 | help = format!(concat!( - 109 | "Select the language ABI version to generate (default {}).\n", - 110 | "Use --abi=latest to generate the newest supported version ({}).", - 111 | ), - 112 | DEFAULT_GENERATE_ABI_VERSION, - 113 | tree_sitter::LANGUAGE_VERSION, - 114 | ) - 115 | )] - 116 | pub abi_version: Option, - 117 | /// What generated files to emit - 118 | #[arg(long)] - 119 | #[clap(value_enum, default_value_t=GenerationEmit::Parser)] - 120 | pub emit: GenerationEmit, - 121 | /// Deprecated: use --emit=lib. - 122 | #[arg(long, short = 'b', conflicts_with = "emit")] - 123 | pub build: bool, - 124 | /// Compile a parser in debug mode - 125 | #[arg(long, short = '0')] - 126 | pub debug_build: bool, - 127 | /// The path to the directory containing the parser library - 128 | #[arg(long, value_name = "PATH")] - 129 | pub libdir: Option, - 130 | /// The path to output the generated source files - 131 | #[arg(long, short, value_name = "DIRECTORY")] - 132 | pub output: Option, - 133 | /// Produce a report of the states for the given rule, use `-` to report every rule - 134 | #[arg(long)] - 135 | pub report_states_for_rule: Option, - 136 | /// Report conflicts in a JSON format - 137 | #[arg(long)] - 138 | pub json: bool, - 139 | /// The name or path of the JavaScript runtime to use for generating parsers - 140 | #[cfg(not(feature = "qjs-rt"))] - 141 | #[arg( - 142 | long, - 143 | value_name = "EXECUTABLE", - 144 | env = "TREE_SITTER_JS_RUNTIME", - 145 | default_value = "node" - 146 | )] - 147 | pub js_runtime: Option, - | - 148 | #[cfg(feature = "qjs-rt")] - 149 | #[arg( - 150 | long, - 151 | value_name = "EXECUTABLE", - 152 | env = "TREE_SITTER_JS_RUNTIME", - 153 | default_value = "node" - 154 | )] - 155 | /// The name or path of the JavaScript runtime to use for generating parsers, specify `native` - 156 | /// to use the native `QuickJS` runtime - 157 | pub js_runtime: Option, - | - 158 | /// Disable optimizations when generating the parser. Currently, this only affects - 159 | /// the merging of compatible parse states. - 160 | #[arg(long)] - 161 | pub disable_optimizations: bool, - 162 | } - | - 163 | #[derive(Args)] - 164 | #[command(alias = "b")] - 165 | struct Build { - 166 | /// Build a Wasm module instead of a dynamic library - 167 | #[arg(short, long)] - 168 | pub wasm: bool, - 169 | /// The path to output the compiled file - 170 | #[arg(short, long)] - 171 | pub output: Option, - 172 | /// The path to the grammar directory - 173 | #[arg(index = 1, num_args = 1)] - 174 | pub path: Option, - 175 | /// Make the parser reuse the same allocator as the library - 176 | #[arg(long)] - 177 | pub reuse_allocator: bool, - 178 | /// Compile a parser in debug mode - 179 | #[arg(long, short = '0')] - 180 | pub debug: bool, - 181 | } - | - 182 | #[derive(Args)] - 183 | #[command(alias = "p")] - 184 | struct Parse { - 185 | /// The path to a file with paths to source file(s) - 186 | #[arg(long = "paths")] - 187 | pub paths_file: Option, - 188 | /// The source file(s) to use - 189 | #[arg(num_args=1..)] - 190 | pub paths: Option>, - 191 | /// The path to the tree-sitter grammar directory, implies --rebuild - 192 | #[arg(long, short = 'p', conflicts_with = "rebuild")] - 193 | pub grammar_path: Option, - 194 | /// The path to the parser's dynamic library - 195 | #[arg(long, short = 'l')] - 196 | pub lib_path: Option, - 197 | /// If `--lib-path` is used, the name of the language used to extract the - 198 | /// library's language function - 199 | #[arg(long)] - 200 | pub lang_name: Option, - 201 | /// Select a language by the scope instead of a file extension - 202 | #[arg(long)] - 203 | pub scope: Option, - 204 | /// Show parsing debug log - 205 | #[arg(long, short = 'd')] // TODO: Rework once clap adds `default_missing_value_t` - 206 | #[allow(clippy::option_option)] - 207 | pub debug: Option>, - 208 | /// Compile a parser in debug mode - 209 | #[arg(long, short = '0')] - 210 | pub debug_build: bool, - 211 | /// Produce the log.html file with debug graphs - 212 | #[arg(long, short = 'D')] - 213 | pub debug_graph: bool, - 214 | /// Compile parsers to Wasm instead of native dynamic libraries - 215 | #[arg(long)] - 216 | pub wasm: bool, - 217 | /// Output the parse data with graphviz dot - 218 | #[arg(long = "dot")] - 219 | pub output_dot: bool, - 220 | /// Output the parse data in XML format - 221 | #[arg(long = "xml", short = 'x')] - 222 | pub output_xml: bool, - 223 | /// Output the parse data in a pretty-printed CST format - 224 | #[arg(long = "cst", short = 'c')] - 225 | pub output_cst: bool, - 226 | /// Show parsing statistic - 227 | #[arg(long, short)] - 228 | pub stat: bool, - 229 | /// Interrupt the parsing process by timeout (µs) - 230 | #[arg(long)] - 231 | pub timeout: Option, - 232 | /// Measure execution time - 233 | #[arg(long, short)] - 234 | pub time: bool, - 235 | /// Suppress main output - 236 | #[arg(long, short)] - 237 | pub quiet: bool, - 238 | #[allow(clippy::doc_markdown)] - 239 | /// Apply edits in the format: \"row,col|position delcount insert_text\", can be supplied - 240 | /// multiple times - 241 | #[arg( - 242 | long, - 243 | num_args = 1.., - 244 | )] - 245 | pub edits: Option>, - 246 | /// The encoding of the input files - 247 | #[arg(long)] - 248 | pub encoding: Option, - 249 | /// Open `log.html` in the default browser, if `--debug-graph` is supplied - 250 | #[arg(long)] - 251 | pub open_log: bool, - 252 | /// Output parsing results in a JSON format - 253 | #[arg(long, short = 'j')] - 254 | pub json: bool, - 255 | /// The path to an alternative config.json file - 256 | #[arg(long)] - 257 | pub config_path: Option, - 258 | /// Parse the contents of a specific test - 259 | #[arg(long, short = 'n')] - 260 | #[clap(conflicts_with = "paths", conflicts_with = "paths_file")] - 261 | pub test_number: Option, - 262 | /// Force rebuild the parser - 263 | #[arg(short, long)] - 264 | pub rebuild: bool, - 265 | /// Omit ranges in the output - 266 | #[arg(long)] - 267 | pub no_ranges: bool, - 268 | } - | - 269 | #[derive(ValueEnum, Clone)] - 270 | pub enum Encoding { - 271 | Utf8, - 272 | Utf16LE, - 273 | Utf16BE, - 274 | } - | - 275 | #[derive(Args)] - 276 | #[command(alias = "t")] - 277 | struct Test { - 278 | /// Only run corpus test cases whose name matches the given regex - 279 | #[arg(long, short)] - 280 | pub include: Option, - 281 | /// Only run corpus test cases whose name does not match the given regex - 282 | #[arg(long, short)] - 283 | pub exclude: Option, - 284 | /// Only run corpus test cases from a given filename - 285 | #[arg(long)] - 286 | pub file_name: Option, - 287 | /// The path to the tree-sitter grammar directory, implies --rebuild - 288 | #[arg(long, short = 'p', conflicts_with = "rebuild")] - 289 | pub grammar_path: Option, - 290 | /// The path to the parser's dynamic library - 291 | #[arg(long, short = 'l')] - 292 | pub lib_path: Option, - 293 | /// If `--lib-path` is used, the name of the language used to extract the - 294 | /// library's language function - 295 | #[arg(long)] - 296 | pub lang_name: Option, - 297 | /// Update all syntax trees in corpus files with current parser output - 298 | #[arg(long, short)] - 299 | pub update: bool, - 300 | /// Show parsing debug log - 301 | #[arg(long, short = 'd')] - 302 | pub debug: bool, - 303 | /// Compile a parser in debug mode - 304 | #[arg(long, short = '0')] - 305 | pub debug_build: bool, - 306 | /// Produce the log.html file with debug graphs - 307 | #[arg(long, short = 'D')] - 308 | pub debug_graph: bool, - 309 | /// Compile parsers to Wasm instead of native dynamic libraries - 310 | #[arg(long)] - 311 | pub wasm: bool, - 312 | /// Open `log.html` in the default browser, if `--debug-graph` is supplied - 313 | #[arg(long)] - 314 | pub open_log: bool, - 315 | /// The path to an alternative config.json file - 316 | #[arg(long)] - 317 | pub config_path: Option, - 318 | /// Force showing fields in test diffs - 319 | #[arg(long)] - 320 | pub show_fields: bool, - 321 | /// Show parsing statistics - 322 | #[arg(long)] - 323 | pub stat: Option, - 324 | /// Force rebuild the parser - 325 | #[arg(short, long)] - 326 | pub rebuild: bool, - 327 | /// Show only the pass-fail overview tree - 328 | #[arg(long)] - 329 | pub overview_only: bool, - 330 | } - | - 331 | #[derive(Args)] - 332 | #[command(alias = "publish")] - 333 | /// Display or increment the version of a grammar - 334 | struct Version { - 335 | /// The version to bump to - 336 | #[arg( - 337 | conflicts_with = "bump", - 338 | long_help = "\ - 339 | The version to bump to\n\ - 340 | \n\ - 341 | Examples:\n \ - 342 | tree-sitter version: display the current version\n \ - 343 | tree-sitter version : bump to specified version\n \ - 344 | tree-sitter version --bump : automatic bump" - 345 | )] - 346 | pub version: Option, - 347 | /// The path to the tree-sitter grammar directory - 348 | #[arg(long, short = 'p')] - 349 | pub grammar_path: Option, - 350 | /// Automatically bump from the current version - 351 | #[arg(long, value_enum, conflicts_with = "version")] - 352 | pub bump: Option, - 353 | } - | - 354 | #[derive(Args)] - 355 | #[command(alias = "f")] - 356 | struct Fuzz { - 357 | /// List of test names to skip - 358 | #[arg(long, short)] - 359 | pub skip: Option>, - 360 | /// Subdirectory to the language - 361 | #[arg(long)] - 362 | pub subdir: Option, - 363 | /// The path to the tree-sitter grammar directory, implies --rebuild - 364 | #[arg(long, short = 'p', conflicts_with = "rebuild")] - 365 | pub grammar_path: Option, - 366 | /// The path to the parser's dynamic library - 367 | #[arg(long)] - 368 | pub lib_path: Option, - 369 | /// If `--lib-path` is used, the name of the language used to extract the - 370 | /// library's language function - 371 | #[arg(long)] - 372 | pub lang_name: Option, - 373 | /// Maximum number of edits to perform per fuzz test - 374 | #[arg(long)] - 375 | pub edits: Option, - 376 | /// Number of fuzzing iterations to run per test - 377 | #[arg(long)] - 378 | pub iterations: Option, - 379 | /// Only fuzz corpus test cases whose name matches the given regex - 380 | #[arg(long, short)] - 381 | pub include: Option, - 382 | /// Only fuzz corpus test cases whose name does not match the given regex - 383 | #[arg(long, short)] - 384 | pub exclude: Option, - 385 | /// Enable logging of graphs and input - 386 | #[arg(long)] - 387 | pub log_graphs: bool, - 388 | /// Enable parser logging - 389 | #[arg(long, short)] - 390 | pub log: bool, - 391 | /// Force rebuild the parser - 392 | #[arg(short, long)] - 393 | pub rebuild: bool, - 394 | } - | - 395 | #[derive(Args)] - 396 | #[command(alias = "q")] - 397 | struct Query { - 398 | /// Path to a file with queries - 399 | #[arg(index = 1, required = true)] - 400 | query_path: PathBuf, - 401 | /// The path to the tree-sitter grammar directory, implies --rebuild - 402 | #[arg(long, short = 'p', conflicts_with = "rebuild")] - 403 | pub grammar_path: Option, - 404 | /// The path to the parser's dynamic library - 405 | #[arg(long, short = 'l')] - 406 | pub lib_path: Option, - 407 | /// If `--lib-path` is used, the name of the language used to extract the - 408 | /// library's language function - 409 | #[arg(long)] - 410 | pub lang_name: Option, - 411 | /// Measure execution time - 412 | #[arg(long, short)] - 413 | pub time: bool, - 414 | /// Suppress main output - 415 | #[arg(long, short)] - 416 | pub quiet: bool, - 417 | /// The path to a file with paths to source file(s) - 418 | #[arg(long = "paths")] - 419 | pub paths_file: Option, - 420 | /// The source file(s) to use - 421 | #[arg(index = 2, num_args=1..)] - 422 | pub paths: Option>, - 423 | /// The range of byte offsets in which the query will be executed - 424 | #[arg(long)] - 425 | pub byte_range: Option, - 426 | /// The range of rows in which the query will be executed - 427 | #[arg(long)] - 428 | pub row_range: Option, - 429 | /// Select a language by the scope instead of a file extension - 430 | #[arg(long)] - 431 | pub scope: Option, - 432 | /// Order by captures instead of matches - 433 | #[arg(long, short)] - 434 | pub captures: bool, - 435 | /// Whether to run query tests or not - 436 | #[arg(long)] - 437 | pub test: bool, - 438 | /// The path to an alternative config.json file - 439 | #[arg(long)] - 440 | pub config_path: Option, - 441 | /// Query the contents of a specific test - 442 | #[arg(long, short = 'n')] - 443 | #[clap(conflicts_with = "paths", conflicts_with = "paths_file")] - 444 | pub test_number: Option, - 445 | /// Force rebuild the parser - 446 | #[arg(short, long)] - 447 | pub rebuild: bool, - 448 | } - | - 449 | #[derive(Args)] - 450 | #[command(alias = "hi")] - 451 | struct Highlight { - 452 | /// Generate highlighting as an HTML document - 453 | #[arg(long, short = 'H')] - 454 | pub html: bool, - 455 | /// When generating HTML, use css classes rather than inline styles - 456 | #[arg(long)] - 457 | pub css_classes: bool, - 458 | /// Check that highlighting captures conform strictly to standards - 459 | #[arg(long)] - 460 | pub check: bool, - 461 | /// The path to a file with captures - 462 | #[arg(long)] - 463 | pub captures_path: Option, - 464 | /// The paths to files with queries - 465 | #[arg(long, num_args = 1..)] - 466 | pub query_paths: Option>, - 467 | /// Select a language by the scope instead of a file extension - 468 | #[arg(long)] - 469 | pub scope: Option, - 470 | /// Measure execution time - 471 | #[arg(long, short)] - 472 | pub time: bool, - 473 | /// Suppress main output - 474 | #[arg(long, short)] - 475 | pub quiet: bool, - 476 | /// The path to a file with paths to source file(s) - 477 | #[arg(long = "paths")] - 478 | pub paths_file: Option, - 479 | /// The source file(s) to use - 480 | #[arg(num_args = 1..)] - 481 | pub paths: Option>, - 482 | /// The path to the tree-sitter grammar directory, implies --rebuild - 483 | #[arg(long, short = 'p', conflicts_with = "rebuild")] - 484 | pub grammar_path: Option, - 485 | /// The path to an alternative config.json file - 486 | #[arg(long)] - 487 | pub config_path: Option, - 488 | /// Highlight the contents of a specific test - 489 | #[arg(long, short = 'n')] - 490 | #[clap(conflicts_with = "paths", conflicts_with = "paths_file")] - 491 | pub test_number: Option, - 492 | /// Force rebuild the parser - 493 | #[arg(short, long)] - 494 | pub rebuild: bool, - 495 | } - | - 496 | #[derive(Args)] - 497 | struct Tags { - 498 | /// Select a language by the scope instead of a file extension - 499 | #[arg(long)] - 500 | pub scope: Option, - 501 | /// Measure execution time - 502 | #[arg(long, short)] - 503 | pub time: bool, - 504 | /// Suppress main output - 505 | #[arg(long, short)] - 506 | pub quiet: bool, - 507 | /// The path to a file with paths to source file(s) - 508 | #[arg(long = "paths")] - 509 | pub paths_file: Option, - 510 | /// The source file(s) to use - 511 | #[arg(num_args = 1..)] - 512 | pub paths: Option>, - 513 | /// The path to the tree-sitter grammar directory, implies --rebuild - 514 | #[arg(long, short = 'p', conflicts_with = "rebuild")] - 515 | pub grammar_path: Option, - 516 | /// The path to an alternative config.json file - 517 | #[arg(long)] - 518 | pub config_path: Option, - 519 | /// Generate tags from the contents of a specific test - 520 | #[arg(long, short = 'n')] - 521 | #[clap(conflicts_with = "paths", conflicts_with = "paths_file")] - 522 | pub test_number: Option, - 523 | /// Force rebuild the parser - 524 | #[arg(short, long)] - 525 | pub rebuild: bool, - 526 | } - | - 527 | #[derive(Args)] - 528 | #[command(alias = "play", alias = "pg", alias = "web-ui")] - 529 | struct Playground { - 530 | /// Don't open in default browser - 531 | #[arg(long, short)] - 532 | pub quiet: bool, - 533 | /// Path to the directory containing the grammar and Wasm files - 534 | #[arg(long)] - 535 | pub grammar_path: Option, - 536 | /// Export playground files to specified directory instead of serving them - 537 | #[arg(long, short)] - 538 | pub export: Option, - 539 | } - | - 540 | #[derive(Args)] - 541 | #[command(alias = "langs")] - 542 | struct DumpLanguages { - 543 | /// The path to an alternative config.json file - 544 | #[arg(long)] - 545 | pub config_path: Option, - 546 | } - | - 547 | #[derive(Args)] - 548 | #[command(alias = "comp")] - 549 | struct Complete { - 550 | /// The shell to generate completions for - 551 | #[arg(long, short, value_enum)] - 552 | pub shell: Shell, - 553 | } - | - 554 | #[derive(ValueEnum, Clone)] - 555 | pub enum Shell { - 556 | Bash, - 557 | Elvish, - 558 | Fish, - 559 | PowerShell, - 560 | Zsh, - 561 | Nushell, - 562 | } - | - 563 | impl InitConfig { - 564 | fn run() -> Result<()> { - 565 | if let Ok(Some(config_path)) = Config::find_config_file() { - 566 | return Err(anyhow!( - 567 | "Remove your existing config file first: {}", - 568 | config_path.to_string_lossy() - 569 | )); - 570 | } - 571 | let mut config = Config::initial()?; - 572 | config.add(tree_sitter_loader::Config::initial())?; - 573 | config.add(tree_sitter_cli::highlight::ThemeConfig::default())?; - 574 | config.save()?; - 575 | info!( - 576 | "Saved initial configuration to {}", - 577 | config.location.display() - 578 | ); - 579 | Ok(()) - 580 | } - 581 | } - | - 582 | impl Init { - 583 | fn run(self, current_dir: &Path) -> Result<()> { - 584 | let configure_json = !current_dir.join("tree-sitter.json").exists(); - | - 585 | let (language_name, json_config_opts) = if configure_json { - 586 | let mut opts = JsonConfigOpts::default(); - | - 587 | let name = || { - 588 | Input::::with_theme(&ColorfulTheme::default()) - 589 | .with_prompt("Parser name") - 590 | .validate_with(|input: &String| { - 591 | if input.chars().all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') { - 592 | Ok(()) - 593 | } else { - 594 | Err("The name must be lowercase and contain only letters, digits, and underscores") - 595 | } - 596 | }) - 597 | .interact_text() - 598 | }; - | - 599 | let camelcase_name = |name: &str| { - 600 | Input::::with_theme(&ColorfulTheme::default()) - 601 | .with_prompt("CamelCase name") - 602 | .default(name.to_upper_camel_case()) - 603 | .validate_with(|input: &String| { - 604 | if input - 605 | .chars() - 606 | .all(|c| c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '_') - 607 | { - 608 | Ok(()) - 609 | } else { - 610 | Err("The name must contain only letters, digits, and underscores") - 611 | } - 612 | }) - 613 | .interact_text() - 614 | }; - | - 615 | let title = |name: &str| { - 616 | Input::::with_theme(&ColorfulTheme::default()) - 617 | .with_prompt("Title (human-readable name)") - 618 | .default(name.to_upper_camel_case()) - 619 | .interact_text() - 620 | }; - | - 621 | let description = |name: &str| { - 622 | Input::::with_theme(&ColorfulTheme::default()) - 623 | .with_prompt("Description") - 624 | .default(format!( - 625 | "{} grammar for tree-sitter", - 626 | name.to_upper_camel_case() - 627 | )) - 628 | .show_default(false) - 629 | .allow_empty(true) - 630 | .interact_text() - 631 | }; - | - 632 | let repository = |name: &str| { - 633 | Input::::with_theme(&ColorfulTheme::default()) - 634 | .with_prompt("Repository URL") - 635 | .allow_empty(true) - 636 | .default(format!("https://github.com/tree-sitter/tree-sitter-{name}")) - 637 | .show_default(false) - 638 | .interact_text() - 639 | }; - | - 640 | let funding = || { - 641 | Input::::with_theme(&ColorfulTheme::default()) - 642 | .with_prompt("Funding URL") - 643 | .allow_empty(true) - 644 | .interact_text() - 645 | .map(|e| Some(e.trim().to_string())) - 646 | }; - | - 647 | let scope = |name: &str| { - 648 | Input::::with_theme(&ColorfulTheme::default()) - 649 | .with_prompt("TextMate scope") - 650 | .default(format!("source.{name}")) - 651 | .validate_with(|input: &String| { - 652 | if input.starts_with("source.") || input.starts_with("text.") { - 653 | Ok(()) - 654 | } else { - 655 | Err("The scope must start with 'source.' or 'text.'") - 656 | } - 657 | }) - 658 | .interact_text() - 659 | }; - | - 660 | let file_types = |name: &str| { - 661 | Input::::with_theme(&ColorfulTheme::default()) - 662 | .with_prompt("File types (space-separated)") - 663 | .default(name.to_string()) - 664 | .interact_text() - 665 | .map(|ft| { - 666 | let mut set = HashSet::new(); - 667 | for ext in ft.split(' ') { - 668 | let ext = ext.trim(); - 669 | if !ext.is_empty() { - 670 | set.insert(ext.to_string()); - 671 | } - 672 | } - 673 | set.into_iter().collect::>() - 674 | }) - 675 | }; - | - 676 | let initial_version = || { - 677 | Input::::with_theme(&ColorfulTheme::default()) - 678 | .with_prompt("Version") - 679 | .default(SemverVersion::new(0, 1, 0)) - 680 | .interact_text() - 681 | }; - | - 682 | let license = || { - 683 | Input::::with_theme(&ColorfulTheme::default()) - 684 | .with_prompt("License") - 685 | .default("MIT".to_string()) - 686 | .allow_empty(true) - 687 | .interact() - 688 | }; - | - 689 | let author = || { - 690 | Input::::with_theme(&ColorfulTheme::default()) - 691 | .with_prompt("Author name") - 692 | .interact_text() - 693 | }; - | - 694 | let email = || { - 695 | Input::::with_theme(&ColorfulTheme::default()) - 696 | .with_prompt("Author email") - 697 | .allow_empty(true) - 698 | .interact_text() - 699 | .map(|e| (!e.trim().is_empty()).then_some(e)) - 700 | }; - | - 701 | let url = || { - 702 | Input::::with_theme(&ColorfulTheme::default()) - 703 | .with_prompt("Author URL") - 704 | .allow_empty(true) - 705 | .interact_text() - 706 | .map(|e| Some(e.trim().to_string())) - 707 | }; - | - 708 | let bindings = || { - 709 | let languages = Bindings::default().languages(); - | - 710 | let enabled = MultiSelect::new() - 711 | .with_prompt("Bindings") - 712 | .items_checked(&languages) - 713 | .interact()? - 714 | .into_iter() - 715 | .map(|i| languages[i].0); - | - 716 | let out = Bindings::with_enabled_languages(enabled) - 717 | .expect("unexpected unsupported language"); - 718 | anyhow::Ok(out) - 719 | }; - | - 720 | let choices = [ - 721 | "name", - 722 | "camelcase", - 723 | "title", - 724 | "description", - 725 | "repository", - 726 | "funding", - 727 | "scope", - 728 | "file_types", - 729 | "version", - 730 | "license", - 731 | "author", - 732 | "email", - 733 | "url", - 734 | "bindings", - 735 | "exit", - 736 | ]; - | - 737 | macro_rules! set_choice { - 738 | ($choice:expr) => { - 739 | match $choice { - 740 | "name" => opts.name = name()?, - 741 | "camelcase" => opts.camelcase = camelcase_name(&opts.name)?, - 742 | "title" => opts.title = title(&opts.name)?, - 743 | "description" => opts.description = description(&opts.name)?, - 744 | "repository" => opts.repository = Some(repository(&opts.name)?), - 745 | "funding" => opts.funding = funding()?, - 746 | "scope" => opts.scope = scope(&opts.name)?, - 747 | "file_types" => opts.file_types = file_types(&opts.name)?, - 748 | "version" => opts.version = initial_version()?, - 749 | "license" => opts.license = license()?, - 750 | "author" => opts.author = author()?, - 751 | "email" => opts.email = email()?, - 752 | "url" => opts.url = url()?, - 753 | "bindings" => opts.bindings = bindings()?, - 754 | "exit" => break, - 755 | _ => unreachable!(), - 756 | } - 757 | }; - 758 | } - | - 759 | // Initial configuration - 760 | for choice in choices.iter().take(choices.len() - 1) { - 761 | set_choice!(*choice); - 762 | } - | - 763 | // Loop for editing the configuration - 764 | loop { - 765 | info!( - 766 | "Your current configuration:\n{}", - 767 | serde_json::to_string_pretty(&opts)? - 768 | ); - | - 769 | if Confirm::with_theme(&ColorfulTheme::default()) - 770 | .with_prompt("Does the config above look correct?") - 771 | .interact()? - 772 | { - 773 | break; - 774 | } - | - 775 | let idx = FuzzySelect::with_theme(&ColorfulTheme::default()) - 776 | .with_prompt("Which field would you like to change?") - 777 | .items(&choices) - 778 | .interact()?; - | - 779 | set_choice!(choices[idx]); - 780 | } - | - 781 | (opts.name.clone(), Some(opts)) - 782 | } else { - 783 | let mut json = serde_json::from_str::( - 784 | &fs::read_to_string(current_dir.join("tree-sitter.json")) - 785 | .with_context(|| "Failed to read tree-sitter.json")?, - 786 | )?; - 787 | (json.grammars.swap_remove(0).name, None) - 788 | }; - | - 789 | generate_grammar_files( - 790 | current_dir, - 791 | &language_name, - 792 | self.update, - 793 | json_config_opts.as_ref(), - 794 | )?; - | - 795 | Ok(()) - 796 | } - 797 | } - | - 798 | impl Generate { - 799 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { - 800 | if self.log { - 801 | logger::enable_debug(); - 802 | } - 803 | let abi_version = - 804 | self.abi_version - 805 | .as_ref() - 806 | .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| { - 807 | if version == "latest" { - 808 | tree_sitter::LANGUAGE_VERSION - 809 | } else { - 810 | version.parse().expect("invalid abi version flag") - 811 | } - 812 | }); - 813 | if self.build { - 814 | warn!("--build is deprecated, use --emit=lib instead"); - 815 | } - | - 816 | if let Err(err) = tree_sitter_generate::generate_parser_in_directory( - 817 | current_dir, - 818 | self.output.as_deref(), - 819 | self.grammar_path.as_deref(), - 820 | abi_version, - 821 | self.report_states_for_rule.as_deref(), - 822 | self.js_runtime.as_deref(), - 823 | self.emit != GenerationEmit::Json, - 824 | if self.disable_optimizations { - 825 | OptLevel::empty() - 826 | } else { - 827 | OptLevel::default() - 828 | }, - 829 | ) { - 830 | if self.json { - 831 | eprintln!("{}", serde_json::to_string_pretty(&err)?); - 832 | // Exit early to prevent errors from being printed a second time in the caller - 833 | std::process::exit(1); - 834 | } else { - 835 | // Removes extra context associated with the error - 836 | Err(anyhow!(err.to_string())).with_context(|| "Error when generating parser")?; - 837 | } - 838 | } - 839 | if self.emit == GenerationEmit::Lib || self.build { - 840 | if let Some(path) = self.libdir { - 841 | loader = loader::Loader::with_parser_lib_path(path); - 842 | } - 843 | loader.debug_build(self.debug_build); - 844 | loader.languages_at_path(current_dir)?; - 845 | } - 846 | Ok(()) - 847 | } - 848 | } - | - 849 | impl Build { - 850 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { - 851 | let grammar_path = current_dir.join(self.path.unwrap_or_default()); - | - 852 | loader.debug_build(self.debug); - | - 853 | if self.wasm { - 854 | let output_path = self.output.map(|path| current_dir.join(path)); - 855 | wasm::compile_language_to_wasm(&loader, &grammar_path, current_dir, output_path)?; - 856 | } else { - 857 | let output_path = if let Some(ref path) = self.output { - 858 | let path = Path::new(path); - 859 | if path.is_absolute() { - 860 | path.to_path_buf() - 861 | } else { - 862 | current_dir.join(path) - 863 | } - 864 | } else { - 865 | let file_name = grammar_path - 866 | .file_stem() - 867 | .unwrap() - 868 | .to_str() - 869 | .unwrap() - 870 | .strip_prefix("tree-sitter-") - 871 | .unwrap_or("parser"); - 872 | current_dir - 873 | .join(file_name) - 874 | .with_extension(env::consts::DLL_EXTENSION) - 875 | }; - | - 876 | let flags: &[&str] = match (self.reuse_allocator, self.debug) { - 877 | (true, true) => &["TREE_SITTER_REUSE_ALLOCATOR", "TREE_SITTER_DEBUG"], - 878 | (true, false) => &["TREE_SITTER_REUSE_ALLOCATOR"], - 879 | (false, true) => &["TREE_SITTER_DEBUG"], - 880 | (false, false) => &[], - 881 | }; - | - 882 | loader.force_rebuild(true); - | - 883 | let config = Config::load(None)?; - 884 | let loader_config = config.get()?; - 885 | loader.find_all_languages(&loader_config).unwrap(); - 886 | loader - 887 | .compile_parser_at_path(&grammar_path, output_path, flags) - 888 | .unwrap(); - 889 | } - 890 | Ok(()) - 891 | } - 892 | } - | - 893 | impl Parse { - 894 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { - 895 | let config = Config::load(self.config_path)?; - 896 | let color = env::var("NO_COLOR").map_or(true, |v| v != "1"); - 897 | let output = if self.output_dot { - 898 | ParseOutput::Dot - 899 | } else if self.output_xml { - 900 | ParseOutput::Xml - 901 | } else if self.output_cst { - 902 | ParseOutput::Cst - 903 | } else if self.quiet || self.json { - 904 | ParseOutput::Quiet - 905 | } else { - 906 | ParseOutput::Normal - 907 | }; - | - 908 | let parse_theme = if color { - 909 | config - 910 | .get::() - 911 | .with_context(|| "Failed to parse CST theme")? - 912 | .parse_theme - 913 | .unwrap_or_default() - 914 | .into() - 915 | } else { - 916 | ParseTheme::empty() - 917 | }; - | - 918 | let encoding = self.encoding.map(|e| match e { - 919 | Encoding::Utf8 => ffi::TSInputEncodingUTF8, - 920 | Encoding::Utf16LE => ffi::TSInputEncodingUTF16LE, - 921 | Encoding::Utf16BE => ffi::TSInputEncodingUTF16BE, - 922 | }); - | - 923 | let time = self.time; - 924 | let edits = self.edits.unwrap_or_default(); - 925 | let cancellation_flag = util::cancel_on_signal(); - 926 | let mut parser = Parser::new(); - | - 927 | loader.debug_build(self.debug_build); - 928 | loader.force_rebuild(self.rebuild || self.grammar_path.is_some()); - | - 929 | #[cfg(feature = "wasm")] - 930 | if self.wasm { - 931 | let engine = tree_sitter::wasmtime::Engine::default(); - 932 | parser - 933 | .set_wasm_store(tree_sitter::WasmStore::new(&engine).unwrap()) - 934 | .unwrap(); - 935 | loader.use_wasm(&engine); - 936 | } - | - 937 | let timeout = self.timeout.unwrap_or_default(); - | - 938 | let mut has_error = false; - 939 | let loader_config = config.get()?; - 940 | loader.find_all_languages(&loader_config)?; - | - 941 | let should_track_stats = self.stat; - 942 | let mut stats = parse::ParseStats::default(); - 943 | let debug: ParseDebugType = match self.debug { - 944 | None => ParseDebugType::Quiet, - 945 | Some(None) => ParseDebugType::Normal, - 946 | Some(Some(specifier)) => specifier, - 947 | }; - | - 948 | let mut options = ParseFileOptions { - 949 | edits: &edits - 950 | .iter() - 951 | .map(std::string::String::as_str) - 952 | .collect::>(), - 953 | output, - 954 | print_time: time, - 955 | timeout, - 956 | stats: &mut stats, - 957 | debug, - 958 | debug_graph: self.debug_graph, - 959 | cancellation_flag: Some(&cancellation_flag), - 960 | encoding, - 961 | open_log: self.open_log, - 962 | no_ranges: self.no_ranges, - 963 | parse_theme: &parse_theme, - 964 | }; - | - 965 | let mut update_stats = |stats: &mut parse::ParseStats| { - 966 | let parse_result = stats.parse_summaries.last().unwrap(); - 967 | if should_track_stats { - 968 | stats.cumulative_stats.total_parses += 1; - 969 | if parse_result.successful { - 970 | stats.cumulative_stats.successful_parses += 1; - 971 | } - 972 | if let (Some(duration), Some(bytes)) = (parse_result.duration, parse_result.bytes) { - 973 | stats.cumulative_stats.total_bytes += bytes; - 974 | stats.cumulative_stats.total_duration += duration; - 975 | } - 976 | } - | - 977 | has_error |= !parse_result.successful; - 978 | }; - | - 979 | if self.lib_path.is_none() && self.lang_name.is_some() { - 980 | warn!("--lang-name` specified without --lib-path. This argument will be ignored."); - 981 | } - 982 | let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref(), current_dir); - | - 983 | let input = get_input( - 984 | self.paths_file.as_deref(), - 985 | self.paths, - 986 | self.test_number, - 987 | &cancellation_flag, - 988 | )?; - 989 | match input { - 990 | CliInput::Paths(paths) => { - 991 | let max_path_length = paths - 992 | .iter() - 993 | .map(|p| p.to_string_lossy().chars().count()) - 994 | .max() - 995 | .unwrap_or(0); - 996 | options.stats.source_count = paths.len(); - | - 997 | for path in &paths { - 998 | let path = Path::new(&path); - 999 | let language = loader -1000 | .select_language( -1001 | path, -1002 | current_dir, -1003 | self.scope.as_deref(), -1004 | lib_info.as_ref(), -1005 | ) -1006 | .with_context(|| { -1007 | anyhow!("Failed to load langauge for path \"{}\"", path.display()) -1008 | })?; - | -1009 | parse::parse_file_at_path( -1010 | &mut parser, -1011 | &language, -1012 | path, -1013 | &path.display().to_string(), -1014 | max_path_length, -1015 | &mut options, -1016 | )?; -1017 | update_stats(options.stats); -1018 | } -1019 | } - | -1020 | CliInput::Test { -1021 | name, -1022 | contents, -1023 | languages: language_names, -1024 | } => { -1025 | let path = get_tmp_source_file(&contents)?; -1026 | let languages = loader.languages_at_path(current_dir)?; - | -1027 | let language = if let Some(ref lib_path) = self.lib_path { -1028 | &loader -1029 | .select_language(lib_path, current_dir, None, lib_info.as_ref()) -1030 | .with_context(|| { -1031 | anyhow!( -1032 | "Failed to load language for path \"{}\"", -1033 | lib_path.display() -1034 | ) -1035 | })? -1036 | } else { -1037 | &languages -1038 | .iter() -1039 | .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) -1040 | .or_else(|| languages.first()) -1041 | .map(|(l, _)| l.clone()) -1042 | .ok_or_else(|| anyhow!("No language found"))? -1043 | }; - | -1044 | parse::parse_file_at_path( -1045 | &mut parser, -1046 | language, -1047 | &path, -1048 | &name, -1049 | name.chars().count(), -1050 | &mut options, -1051 | )?; -1052 | update_stats(&mut stats); -1053 | fs::remove_file(path)?; -1054 | } - | -1055 | CliInput::Stdin(contents) => { -1056 | // Place user input and parser output on separate lines -1057 | println!(); - | -1058 | let path = get_tmp_source_file(&contents)?; -1059 | let name = "stdin"; -1060 | let language = -1061 | loader.select_language(&path, current_dir, None, lib_info.as_ref())?; - | -1062 | parse::parse_file_at_path( -1063 | &mut parser, -1064 | &language, -1065 | &path, -1066 | name, -1067 | name.chars().count(), -1068 | &mut options, -1069 | )?; -1070 | update_stats(&mut stats); -1071 | fs::remove_file(path)?; -1072 | } -1073 | } - | -1074 | if should_track_stats { -1075 | println!("\n{}", stats.cumulative_stats); -1076 | } -1077 | if self.json { -1078 | println!("{}", serde_json::to_string_pretty(&stats)?); -1079 | } - | -1080 | if has_error { -1081 | return Err(anyhow!("")); -1082 | } - | -1083 | Ok(()) -1084 | } -1085 | } - | -1086 | impl Test { -1087 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { -1088 | let config = Config::load(self.config_path)?; -1089 | let color = env::var("NO_COLOR").map_or(true, |v| v != "1"); -1090 | let stat = self.stat.unwrap_or_default(); - | -1091 | loader.debug_build(self.debug_build); -1092 | loader.force_rebuild(self.rebuild || self.grammar_path.is_some()); - | -1093 | let mut parser = Parser::new(); - | -1094 | #[cfg(feature = "wasm")] -1095 | if self.wasm { -1096 | let engine = tree_sitter::wasmtime::Engine::default(); -1097 | parser -1098 | .set_wasm_store(tree_sitter::WasmStore::new(&engine).unwrap()) -1099 | .unwrap(); -1100 | loader.use_wasm(&engine); -1101 | } - | -1102 | if self.lib_path.is_none() && self.lang_name.is_some() { -1103 | warn!("--lang-name` specified without --lib-path. This argument will be ignored."); -1104 | } -1105 | let languages = loader.languages_at_path(current_dir)?; -1106 | let language = if let Some(ref lib_path) = self.lib_path { -1107 | let lib_info = -1108 | get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref(), current_dir); -1109 | &loader -1110 | .select_language(lib_path, current_dir, None, lib_info.as_ref()) -1111 | .with_context(|| { -1112 | anyhow!( -1113 | "Failed to load language for path \"{}\"", -1114 | lib_path.display() -1115 | ) -1116 | })? -1117 | } else { -1118 | &languages -1119 | .first() -1120 | .ok_or_else(|| anyhow!("No language found"))? -1121 | .0 -1122 | }; -1123 | parser.set_language(language)?; - | -1124 | let test_dir = current_dir.join("test"); -1125 | let mut stats = parse::Stats::default(); - | -1126 | // Run the corpus tests. Look for them in `test/corpus`. -1127 | let test_corpus_dir = test_dir.join("corpus"); -1128 | if test_corpus_dir.is_dir() { -1129 | let mut output = String::new(); -1130 | let mut rates = Vec::new(); -1131 | let mut opts = TestOptions { -1132 | output: &mut output, -1133 | path: test_corpus_dir, -1134 | debug: self.debug, -1135 | debug_graph: self.debug_graph, -1136 | include: self.include, -1137 | exclude: self.exclude, -1138 | file_name: self.file_name, -1139 | update: self.update, -1140 | open_log: self.open_log, -1141 | languages: languages.iter().map(|(l, n)| (n.as_str(), l)).collect(), -1142 | color, -1143 | test_num: 1, -1144 | parse_rates: &mut rates, -1145 | stat_display: stat, -1146 | stats: &mut stats, -1147 | show_fields: self.show_fields, -1148 | overview_only: self.overview_only, -1149 | }; - | -1150 | test::run_tests_at_path(&mut parser, &mut opts)?; -1151 | println!("\n{stats}"); -1152 | } - | -1153 | // Check that all of the queries are valid. -1154 | test::check_queries_at_path(language, ¤t_dir.join("queries"))?; - | -1155 | // Run the syntax highlighting tests. -1156 | let test_highlight_dir = test_dir.join("highlight"); -1157 | if test_highlight_dir.is_dir() { -1158 | let mut highlighter = Highlighter::new(); -1159 | highlighter.parser = parser; -1160 | test_highlight::test_highlights( -1161 | &loader, -1162 | &config.get()?, -1163 | &mut highlighter, -1164 | &test_highlight_dir, -1165 | color, -1166 | )?; -1167 | parser = highlighter.parser; -1168 | } - | -1169 | let test_tag_dir = test_dir.join("tags"); -1170 | if test_tag_dir.is_dir() { -1171 | let mut tags_context = TagsContext::new(); -1172 | tags_context.parser = parser; -1173 | test_tags::test_tags( -1174 | &loader, -1175 | &config.get()?, -1176 | &mut tags_context, -1177 | &test_tag_dir, -1178 | color, -1179 | )?; -1180 | } - | -1181 | // For the rest of the queries, find their tests and run them -1182 | for entry in walkdir::WalkDir::new(current_dir.join("queries")) -1183 | .into_iter() -1184 | .filter_map(|e| e.ok()) -1185 | .filter(|e| e.file_type().is_file()) -1186 | { -1187 | let stem = entry -1188 | .path() -1189 | .file_stem() -1190 | .map(|s| s.to_str().unwrap_or_default()) -1191 | .unwrap_or_default(); -1192 | if stem != "highlights" && stem != "tags" { -1193 | let entries = walkdir::WalkDir::new(test_dir.join(stem)) -1194 | .into_iter() -1195 | .filter_map(|e| { -1196 | let entry = e.ok()?; -1197 | if entry.file_type().is_file() { -1198 | Some(entry) -1199 | } else { -1200 | None -1201 | } -1202 | }) -1203 | .collect::>(); -1204 | if !entries.is_empty() { -1205 | println!("{stem}:"); -1206 | } - | -1207 | for entry in entries { -1208 | let path = entry.path(); -1209 | query::query_file_at_path( -1210 | language, -1211 | path, -1212 | &path.display().to_string(), -1213 | path, -1214 | false, -1215 | None, -1216 | None, -1217 | true, -1218 | false, -1219 | false, -1220 | false, -1221 | )?; -1222 | } -1223 | } -1224 | } -1225 | Ok(()) -1226 | } -1227 | } - | -1228 | impl Version { -1229 | fn run(self, current_dir: PathBuf) -> Result<()> { -1230 | version::Version::new(self.version, current_dir, self.bump).run() -1231 | } -1232 | } - | -1233 | impl Fuzz { -1234 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { -1235 | loader.sanitize_build(true); -1236 | loader.force_rebuild(self.rebuild || self.grammar_path.is_some()); - | -1237 | if self.lib_path.is_none() && self.lang_name.is_some() { -1238 | warn!("--lang-name` specified without --lib-path. This argument will be ignored."); -1239 | } -1240 | let languages = loader.languages_at_path(current_dir)?; -1241 | let (language, language_name) = if let Some(ref lib_path) = self.lib_path { -1242 | let lib_info = get_lib_info(Some(lib_path), self.lang_name.as_ref(), current_dir) -1243 | .with_context(|| anyhow!("No language name found for {}", lib_path.display()))?; -1244 | let lang_name = lib_info.1.to_string(); -1245 | &( -1246 | loader -1247 | .select_language(lib_path, current_dir, None, Some(&lib_info)) -1248 | .with_context(|| { -1249 | anyhow!( -1250 | "Failed to load language for path \"{}\"", -1251 | lib_path.display() -1252 | ) -1253 | })?, -1254 | lang_name, -1255 | ) -1256 | } else { -1257 | languages -1258 | .first() -1259 | .ok_or_else(|| anyhow!("No language found"))? -1260 | }; - | -1261 | let mut fuzz_options = FuzzOptions { -1262 | skipped: self.skip, -1263 | subdir: self.subdir, -1264 | edits: self.edits.unwrap_or(*EDIT_COUNT), -1265 | iterations: self.iterations.unwrap_or(*ITERATION_COUNT), -1266 | include: self.include, -1267 | exclude: self.exclude, -1268 | log_graphs: self.log_graphs || *LOG_GRAPH_ENABLED, -1269 | log: self.log || *LOG_ENABLED, -1270 | }; - | -1271 | fuzz_language_corpus( -1272 | language, -1273 | language_name, -1274 | *START_SEED, -1275 | current_dir, -1276 | &mut fuzz_options, -1277 | ); -1278 | Ok(()) -1279 | } -1280 | } - | -1281 | impl Query { -1282 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { -1283 | let config = Config::load(self.config_path)?; -1284 | let loader_config = config.get()?; -1285 | loader.force_rebuild(self.rebuild || self.grammar_path.is_some()); -1286 | loader.find_all_languages(&loader_config)?; -1287 | let query_path = Path::new(&self.query_path); - | -1288 | let byte_range = self.byte_range.as_ref().and_then(|range| { -1289 | let mut parts = range.split(':'); -1290 | let start = parts.next()?.parse().ok()?; -1291 | let end = parts.next().unwrap().parse().ok()?; -1292 | Some(start..end) -1293 | }); -1294 | let point_range = self.row_range.as_ref().and_then(|range| { -1295 | let mut parts = range.split(':'); -1296 | let start = parts.next()?.parse().ok()?; -1297 | let end = parts.next().unwrap().parse().ok()?; -1298 | Some(Point::new(start, 0)..Point::new(end, 0)) -1299 | }); - | -1300 | let cancellation_flag = util::cancel_on_signal(); - | -1301 | if self.lib_path.is_none() && self.lang_name.is_some() { -1302 | warn!("--lang-name specified without --lib-path. This argument will be ignored."); -1303 | } -1304 | let lib_info = get_lib_info(self.lib_path.as_ref(), self.lang_name.as_ref(), current_dir); - | -1305 | let input = get_input( -1306 | self.paths_file.as_deref(), -1307 | self.paths, -1308 | self.test_number, -1309 | &cancellation_flag, -1310 | )?; - | -1311 | match input { -1312 | CliInput::Paths(paths) => { -1313 | let language = loader.select_language( -1314 | Path::new(&paths[0]), -1315 | current_dir, -1316 | self.scope.as_deref(), -1317 | lib_info.as_ref(), -1318 | )?; - | -1319 | for path in paths { -1320 | query::query_file_at_path( -1321 | &language, -1322 | &path, -1323 | &path.display().to_string(), -1324 | query_path, -1325 | self.captures, -1326 | byte_range.clone(), -1327 | point_range.clone(), -1328 | self.test, -1329 | self.quiet, -1330 | self.time, -1331 | false, -1332 | )?; -1333 | } -1334 | } -1335 | CliInput::Test { -1336 | name, -1337 | contents, -1338 | languages: language_names, -1339 | } => { -1340 | let path = get_tmp_source_file(&contents)?; -1341 | let languages = loader.languages_at_path(current_dir)?; -1342 | let language = if let Some(ref lib_path) = self.lib_path { -1343 | &loader -1344 | .select_language(lib_path, current_dir, None, lib_info.as_ref()) -1345 | .with_context(|| { -1346 | anyhow!( -1347 | "Failed to load language for path \"{}\"", -1348 | lib_path.display() -1349 | ) -1350 | })? -1351 | } else { -1352 | &languages -1353 | .iter() -1354 | .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) -1355 | .or_else(|| languages.first()) -1356 | .map(|(l, _)| l.clone()) -1357 | .ok_or_else(|| anyhow!("No language found"))? -1358 | }; -1359 | query::query_file_at_path( -1360 | language, -1361 | &path, -1362 | &name, -1363 | query_path, -1364 | self.captures, -1365 | byte_range, -1366 | point_range, -1367 | self.test, -1368 | self.quiet, -1369 | self.time, -1370 | true, -1371 | )?; -1372 | fs::remove_file(path)?; -1373 | } -1374 | CliInput::Stdin(contents) => { -1375 | // Place user input and query output on separate lines -1376 | println!(); - | -1377 | let path = get_tmp_source_file(&contents)?; -1378 | let language = -1379 | loader.select_language(&path, current_dir, None, lib_info.as_ref())?; -1380 | query::query_file_at_path( -1381 | &language, -1382 | &path, -1383 | "stdin", -1384 | query_path, -1385 | self.captures, -1386 | byte_range, -1387 | point_range, -1388 | self.test, -1389 | self.quiet, -1390 | self.time, -1391 | true, -1392 | )?; -1393 | fs::remove_file(path)?; -1394 | } -1395 | } - | -1396 | Ok(()) -1397 | } -1398 | } - | -1399 | impl Highlight { -1400 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { -1401 | let config = Config::load(self.config_path)?; -1402 | let theme_config: tree_sitter_cli::highlight::ThemeConfig = config.get()?; -1403 | loader.configure_highlights(&theme_config.theme.highlight_names); -1404 | let loader_config = config.get()?; -1405 | loader.find_all_languages(&loader_config)?; -1406 | loader.force_rebuild(self.rebuild || self.grammar_path.is_some()); - | -1407 | let cancellation_flag = util::cancel_on_signal(); - | -1408 | let (mut language, mut language_configuration) = (None, None); -1409 | if let Some(scope) = self.scope.as_deref() { -1410 | if let Some((lang, lang_config)) = loader.language_configuration_for_scope(scope)? { -1411 | language = Some(lang); -1412 | language_configuration = Some(lang_config); -1413 | } -1414 | if language.is_none() { -1415 | return Err(anyhow!("Unknown scope '{scope}'")); -1416 | } -1417 | } - | -1418 | let options = HighlightOptions { -1419 | theme: theme_config.theme, -1420 | check: self.check, -1421 | captures_path: self.captures_path, -1422 | inline_styles: !self.css_classes, -1423 | html: self.html, -1424 | quiet: self.quiet, -1425 | print_time: self.time, -1426 | cancellation_flag: cancellation_flag.clone(), -1427 | }; - | -1428 | let input = get_input( -1429 | self.paths_file.as_deref(), -1430 | self.paths, -1431 | self.test_number, -1432 | &cancellation_flag, -1433 | )?; -1434 | match input { -1435 | CliInput::Paths(paths) => { -1436 | let print_name = paths.len() > 1; -1437 | for path in paths { -1438 | let (language, language_config) = -1439 | match (language.clone(), language_configuration) { -1440 | (Some(l), Some(lc)) => (l, lc), -1441 | _ => { -1442 | if let Some((lang, lang_config)) = -1443 | loader.language_configuration_for_file_name(&path)? -1444 | { -1445 | (lang, lang_config) -1446 | } else { -1447 | warn!( -1448 | "{}", -1449 | util::lang_not_found_for_path(&path, &loader_config) -1450 | ); -1451 | continue; -1452 | } -1453 | } -1454 | }; - | -1455 | if let Some(highlight_config) = -1456 | language_config.highlight_config(language, self.query_paths.as_deref())? -1457 | { -1458 | highlight::highlight( -1459 | &loader, -1460 | &path, -1461 | &path.display().to_string(), -1462 | highlight_config, -1463 | print_name, -1464 | &options, -1465 | )?; -1466 | } else { -1467 | warn!( -1468 | "No syntax highlighting config found for path {}", -1469 | path.display() -1470 | ); -1471 | } -1472 | } -1473 | } - | -1474 | CliInput::Test { -1475 | name, -1476 | contents, -1477 | languages: language_names, -1478 | } => { -1479 | let path = get_tmp_source_file(&contents)?; - | -1480 | let languages = loader.languages_at_path(current_dir)?; -1481 | let language = languages -1482 | .iter() -1483 | .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) -1484 | .or_else(|| languages.first()) -1485 | .map(|(l, _)| l.clone()) -1486 | .ok_or_else(|| anyhow!("No language found in current path"))?; -1487 | let language_config = loader -1488 | .get_language_configuration_in_current_path() -1489 | .ok_or_else(|| anyhow!("No language configuration found in current path"))?; - | -1490 | if let Some(highlight_config) = -1491 | language_config.highlight_config(language, self.query_paths.as_deref())? -1492 | { -1493 | highlight::highlight(&loader, &path, &name, highlight_config, false, &options)?; -1494 | } else { -1495 | warn!("No syntax highlighting config found for test {name}"); -1496 | } -1497 | fs::remove_file(path)?; -1498 | } - | -1499 | CliInput::Stdin(contents) => { -1500 | // Place user input and highlight output on separate lines -1501 | println!(); - | -1502 | let path = get_tmp_source_file(&contents)?; - | -1503 | let (language, language_config) = -1504 | if let (Some(l), Some(lc)) = (language.clone(), language_configuration) { -1505 | (l, lc) -1506 | } else { -1507 | let languages = loader.languages_at_path(current_dir)?; -1508 | let language = languages -1509 | .first() -1510 | .map(|(l, _)| l.clone()) -1511 | .ok_or_else(|| anyhow!("No language found in current path"))?; -1512 | let language_configuration = loader -1513 | .get_language_configuration_in_current_path() -1514 | .ok_or_else(|| { -1515 | anyhow!("No language configuration found in current path") -1516 | })?; -1517 | (language, language_configuration) -1518 | }; - | -1519 | if let Some(highlight_config) = -1520 | language_config.highlight_config(language, self.query_paths.as_deref())? -1521 | { -1522 | highlight::highlight( -1523 | &loader, -1524 | &path, -1525 | "stdin", -1526 | highlight_config, -1527 | false, -1528 | &options, -1529 | )?; -1530 | } else { -1531 | warn!( -1532 | "No syntax highlighting config found for path {}", -1533 | current_dir.display() -1534 | ); -1535 | } -1536 | fs::remove_file(path)?; -1537 | } -1538 | } - | -1539 | Ok(()) -1540 | } -1541 | } - | -1542 | impl Tags { -1543 | fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> { -1544 | let config = Config::load(self.config_path)?; -1545 | let loader_config = config.get()?; -1546 | loader.find_all_languages(&loader_config)?; -1547 | loader.force_rebuild(self.rebuild || self.grammar_path.is_some()); - | -1548 | let cancellation_flag = util::cancel_on_signal(); - | -1549 | let (mut language, mut language_configuration) = (None, None); -1550 | if let Some(scope) = self.scope.as_deref() { -1551 | if let Some((lang, lang_config)) = loader.language_configuration_for_scope(scope)? { -1552 | language = Some(lang); -1553 | language_configuration = Some(lang_config); -1554 | } -1555 | if language.is_none() { -1556 | return Err(anyhow!("Unknown scope '{scope}'")); -1557 | } -1558 | } - | -1559 | let options = TagsOptions { -1560 | scope: self.scope, -1561 | quiet: self.quiet, -1562 | print_time: self.time, -1563 | cancellation_flag: cancellation_flag.clone(), -1564 | }; - | -1565 | let input = get_input( -1566 | self.paths_file.as_deref(), -1567 | self.paths, -1568 | self.test_number, -1569 | &cancellation_flag, -1570 | )?; -1571 | match input { -1572 | CliInput::Paths(paths) => { -1573 | let indent = paths.len() > 1; -1574 | for path in paths { -1575 | let (language, language_config) = -1576 | match (language.clone(), language_configuration) { -1577 | (Some(l), Some(lc)) => (l, lc), -1578 | _ => { -1579 | if let Some((lang, lang_config)) = -1580 | loader.language_configuration_for_file_name(&path)? -1581 | { -1582 | (lang, lang_config) -1583 | } else { -1584 | warn!( -1585 | "{}", -1586 | util::lang_not_found_for_path(&path, &loader_config) -1587 | ); -1588 | continue; -1589 | } -1590 | } -1591 | }; - | -1592 | if let Some(tags_config) = language_config.tags_config(language)? { -1593 | tags::generate_tags( -1594 | &path, -1595 | &path.display().to_string(), -1596 | tags_config, -1597 | indent, -1598 | &options, -1599 | )?; -1600 | } else { -1601 | warn!("No tags config found for path {}", path.display()); -1602 | } -1603 | } -1604 | } - | -1605 | CliInput::Test { -1606 | name, -1607 | contents, -1608 | languages: language_names, -1609 | } => { -1610 | let path = get_tmp_source_file(&contents)?; - | -1611 | let languages = loader.languages_at_path(current_dir)?; -1612 | let language = languages -1613 | .iter() -1614 | .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) -1615 | .or_else(|| languages.first()) -1616 | .map(|(l, _)| l.clone()) -1617 | .ok_or_else(|| anyhow!("No language found in current path"))?; -1618 | let language_config = loader -1619 | .get_language_configuration_in_current_path() -1620 | .ok_or_else(|| anyhow!("No language configuration found in current path"))?; - | -1621 | if let Some(tags_config) = language_config.tags_config(language)? { -1622 | tags::generate_tags(&path, &name, tags_config, false, &options)?; -1623 | } else { -1624 | warn!("No tags config found for test {name}"); -1625 | } -1626 | fs::remove_file(path)?; -1627 | } - | -1628 | CliInput::Stdin(contents) => { -1629 | // Place user input and tags output on separate lines -1630 | println!(); - | -1631 | let path = get_tmp_source_file(&contents)?; - | -1632 | let (language, language_config) = -1633 | if let (Some(l), Some(lc)) = (language.clone(), language_configuration) { -1634 | (l, lc) -1635 | } else { -1636 | let languages = loader.languages_at_path(current_dir)?; -1637 | let language = languages -1638 | .first() -1639 | .map(|(l, _)| l.clone()) -1640 | .ok_or_else(|| anyhow!("No language found in current path"))?; -1641 | let language_configuration = loader -1642 | .get_language_configuration_in_current_path() -1643 | .ok_or_else(|| { -1644 | anyhow!("No language configuration found in current path") -1645 | })?; -1646 | (language, language_configuration) -1647 | }; - | -1648 | if let Some(tags_config) = language_config.tags_config(language)? { -1649 | tags::generate_tags(&path, "stdin", tags_config, false, &options)?; -1650 | } else { -1651 | warn!("No tags config found for path {}", current_dir.display()); -1652 | } -1653 | fs::remove_file(path)?; -1654 | } -1655 | } - | -1656 | Ok(()) -1657 | } -1658 | } - | -1659 | impl Playground { -1660 | fn run(self, current_dir: &Path) -> Result<()> { -1661 | let grammar_path = self.grammar_path.as_deref().map_or(current_dir, Path::new); - | -1662 | if let Some(export_path) = self.export { -1663 | playground::export(grammar_path, &export_path)?; -1664 | } else { -1665 | let open_in_browser = !self.quiet; -1666 | playground::serve(grammar_path, open_in_browser)?; -1667 | } - | -1668 | Ok(()) -1669 | } -1670 | } - | -1671 | impl DumpLanguages { -1672 | fn run(self, mut loader: loader::Loader) -> Result<()> { -1673 | let config = Config::load(self.config_path)?; -1674 | let loader_config = config.get()?; -1675 | loader.find_all_languages(&loader_config)?; -1676 | for (configuration, language_path) in loader.get_all_language_configurations() { -1677 | info!( -1678 | concat!( -1679 | "name: {}\n", -1680 | "scope: {}\n", -1681 | "parser: {:?}\n", -1682 | "highlights: {:?}\n", -1683 | "file_types: {:?}\n", -1684 | "content_regex: {:?}\n", -1685 | "injection_regex: {:?}\n", -1686 | ), -1687 | configuration.language_name, -1688 | configuration.scope.as_ref().unwrap_or(&String::new()), -1689 | language_path, -1690 | configuration.highlights_filenames, -1691 | configuration.file_types, -1692 | configuration.content_regex, -1693 | configuration.injection_regex, -1694 | ); -1695 | } -1696 | Ok(()) -1697 | } -1698 | } - | -1699 | impl Complete { -1700 | fn run(self, cli: &mut Command) { -1701 | let name = cli.get_name().to_string(); -1702 | let mut stdout = std::io::stdout(); - | -1703 | match self.shell { -1704 | Shell::Bash => generate(clap_complete::shells::Bash, cli, &name, &mut stdout), -1705 | Shell::Elvish => generate(clap_complete::shells::Elvish, cli, &name, &mut stdout), -1706 | Shell::Fish => generate(clap_complete::shells::Fish, cli, &name, &mut stdout), -1707 | Shell::PowerShell => { -1708 | generate(clap_complete::shells::PowerShell, cli, &name, &mut stdout); -1709 | } -1710 | Shell::Zsh => generate(clap_complete::shells::Zsh, cli, &name, &mut stdout), -1711 | Shell::Nushell => generate(clap_complete_nushell::Nushell, cli, &name, &mut stdout), -1712 | } -1713 | } -1714 | } - | -1715 | fn main() { -1716 | let result = run(); -1717 | if let Err(err) = &result { -1718 | // Ignore BrokenPipe errors -1719 | if let Some(error) = err.downcast_ref::() { -1720 | if error.kind() == std::io::ErrorKind::BrokenPipe { -1721 | return; -1722 | } -1723 | } -1724 | if !err.to_string().is_empty() { -1725 | error!("{err:?}"); -1726 | } -1727 | std::process::exit(1); -1728 | } -1729 | } - | -1730 | fn run() -> Result<()> { -1731 | logger::init(); - | -1732 | let version = BUILD_SHA.map_or_else( -1733 | || BUILD_VERSION.to_string(), -1734 | |build_sha| format!("{BUILD_VERSION} ({build_sha})"), -1735 | ); - | -1736 | let cli = Command::new("tree-sitter") -1737 | .help_template(concat!( -1738 | "\n", -1739 | "{before-help}{name} {version}\n", -1740 | "{author-with-newline}{about-with-newline}\n", -1741 | "{usage-heading} {usage}\n", -1742 | "\n", -1743 | "{all-args}{after-help}\n", -1744 | "\n" -1745 | )) -1746 | .version(version) -1747 | .subcommand_required(true) -1748 | .arg_required_else_help(true) -1749 | .disable_help_subcommand(true) -1750 | .disable_colored_help(false); -1751 | let mut cli = Commands::augment_subcommands(cli); - | -1752 | let command = Commands::from_arg_matches(&cli.clone().get_matches())?; - | -1753 | let current_dir = match &command { -1754 | Commands::Init(Init { grammar_path, .. }) -1755 | | Commands::Parse(Parse { grammar_path, .. }) -1756 | | Commands::Test(Test { grammar_path, .. }) -1757 | | Commands::Version(Version { grammar_path, .. }) -1758 | | Commands::Fuzz(Fuzz { grammar_path, .. }) -1759 | | Commands::Query(Query { grammar_path, .. }) -1760 | | Commands::Highlight(Highlight { grammar_path, .. }) -1761 | | Commands::Tags(Tags { grammar_path, .. }) -1762 | | Commands::Playground(Playground { grammar_path, .. }) => grammar_path, -1763 | Commands::Build(_) -1764 | | Commands::Generate(_) -1765 | | Commands::InitConfig(_) -1766 | | Commands::DumpLanguages(_) -1767 | | Commands::Complete(_) => &None, -1768 | } -1769 | .as_ref() -1770 | .map_or_else(|| env::current_dir().unwrap(), |p| p.clone()); - | -1771 | let loader = loader::Loader::new()?; - | -1772 | match command { -1773 | Commands::InitConfig(_) => InitConfig::run()?, -1774 | Commands::Init(init_options) => init_options.run(¤t_dir)?, -1775 | Commands::Generate(generate_options) => generate_options.run(loader, ¤t_dir)?, -1776 | Commands::Build(build_options) => build_options.run(loader, ¤t_dir)?, -1777 | Commands::Parse(parse_options) => parse_options.run(loader, ¤t_dir)?, -1778 | Commands::Test(test_options) => test_options.run(loader, ¤t_dir)?, -1779 | Commands::Version(version_options) => version_options.run(current_dir)?, -1780 | Commands::Fuzz(fuzz_options) => fuzz_options.run(loader, ¤t_dir)?, -1781 | Commands::Query(query_options) => query_options.run(loader, ¤t_dir)?, -1782 | Commands::Highlight(highlight_options) => highlight_options.run(loader, ¤t_dir)?, -1783 | Commands::Tags(tags_options) => tags_options.run(loader, ¤t_dir)?, -1784 | Commands::Playground(playground_options) => playground_options.run(¤t_dir)?, -1785 | Commands::DumpLanguages(dump_options) => dump_options.run(loader)?, -1786 | Commands::Complete(complete_options) => complete_options.run(&mut cli), -1787 | } - | -1788 | Ok(()) -1789 | } - | -1790 | #[must_use] -1791 | const fn get_styles() -> clap::builder::Styles { -1792 | clap::builder::Styles::styled() -1793 | .usage( -1794 | Style::new() -1795 | .bold() -1796 | .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), -1797 | ) -1798 | .header( -1799 | Style::new() -1800 | .bold() -1801 | .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), -1802 | ) -1803 | .literal(Style::new().fg_color(Some(Color::Ansi(AnsiColor::Green)))) -1804 | .invalid( -1805 | Style::new() -1806 | .bold() -1807 | .fg_color(Some(Color::Ansi(AnsiColor::Red))), -1808 | ) -1809 | .error( -1810 | Style::new() -1811 | .bold() -1812 | .fg_color(Some(Color::Ansi(AnsiColor::Red))), -1813 | ) -1814 | .valid( -1815 | Style::new() -1816 | .bold() -1817 | .fg_color(Some(Color::Ansi(AnsiColor::Green))), -1818 | ) -1819 | .placeholder(Style::new().fg_color(Some(Color::Ansi(AnsiColor::White)))) -1820 | } - | -1821 | /// Utility to extract the shared library path and language function name from user-provided -1822 | /// arguments if present. -1823 | fn get_lib_info<'a>( -1824 | lib_path: Option<&'a PathBuf>, -1825 | language_name: Option<&'a String>, -1826 | current_dir: &Path, -1827 | ) -> Option<(PathBuf, &'a str)> { -1828 | if let Some(lib_path) = lib_path { -1829 | let absolute_lib_path = if lib_path.is_absolute() { -1830 | lib_path.clone() -1831 | } else { -1832 | current_dir.join(lib_path) -1833 | }; -1834 | // Use the user-specified name if present, otherwise try to derive it from -1835 | // the lib path -1836 | match ( -1837 | language_name.map(|s| s.as_str()), -1838 | lib_path.file_stem().and_then(|s| s.to_str()), -1839 | ) { -1840 | (Some(name), _) | (None, Some(name)) => Some((absolute_lib_path, name)), -1841 | _ => None, -1842 | } -1843 | } else { -1844 | None -1845 | } -1846 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/parse.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | fmt, fs, - 3 | io::{self, Write}, - 4 | ops::ControlFlow, - 5 | path::{Path, PathBuf}, - 6 | sync::atomic::{AtomicUsize, Ordering}, - 7 | time::{Duration, Instant}, - 8 | }; - | - 9 | use anstyle::{AnsiColor, Color, RgbColor}; - 10 | use anyhow::{anyhow, Context, Result}; - 11 | use clap::ValueEnum; - 12 | use log::info; - 13 | use serde::{Deserialize, Serialize}; - 14 | use tree_sitter::{ - 15 | ffi, InputEdit, Language, LogType, ParseOptions, ParseState, Parser, Point, Range, Tree, - 16 | TreeCursor, - 17 | }; - | - 18 | use crate::{fuzz::edits::Edit, logger::paint, util}; - | - 19 | #[derive(Debug, Default, Serialize)] - 20 | pub struct Stats { - 21 | pub successful_parses: usize, - 22 | pub total_parses: usize, - 23 | pub total_bytes: usize, - 24 | pub total_duration: Duration, - 25 | } - | - 26 | impl fmt::Display for Stats { - 27 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - 28 | let duration_us = self.total_duration.as_micros(); - 29 | let success_rate = if self.total_parses > 0 { - 30 | format!( - 31 | "{:.2}%", - 32 | ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0, - 33 | ) - 34 | } else { - 35 | "N/A".to_string() - 36 | }; - 37 | let duration_str = match (self.total_parses, duration_us) { - 38 | (0, _) => "N/A".to_string(), - 39 | (_, 0) => "0 bytes/ms".to_string(), - 40 | (_, _) => format!( - 41 | "{} bytes/ms", - 42 | ((self.total_bytes as u128) * 1_000) / duration_us - 43 | ), - 44 | }; - 45 | writeln!( - 46 | f, - 47 | "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {success_rate}; average speed: {duration_str}", - 48 | self.total_parses, - 49 | self.successful_parses, - 50 | self.total_parses - self.successful_parses, - 51 | ) - 52 | } - 53 | } - | - 54 | /// Sets the color used in the output of `tree-sitter parse --cst` - 55 | #[derive(Debug, Copy, Clone)] - 56 | pub struct ParseTheme { - 57 | /// The color of node kinds - 58 | pub node_kind: Option, - 59 | /// The color of text associated with a node - 60 | pub node_text: Option, - 61 | /// The color of node fields - 62 | pub field: Option, - 63 | /// The color of the range information for unnamed nodes - 64 | pub row_color: Option, - 65 | /// The color of the range information for named nodes - 66 | pub row_color_named: Option, - 67 | /// The color of extra nodes - 68 | pub extra: Option, - 69 | /// The color of ERROR nodes - 70 | pub error: Option, - 71 | /// The color of MISSING nodes and their associated text - 72 | pub missing: Option, - 73 | /// The color of newline characters - 74 | pub line_feed: Option, - 75 | /// The color of backticks - 76 | pub backtick: Option, - 77 | /// The color of literals - 78 | pub literal: Option, - 79 | } - | - 80 | impl ParseTheme { - 81 | const GRAY: Color = Color::Rgb(RgbColor(118, 118, 118)); - 82 | const LIGHT_GRAY: Color = Color::Rgb(RgbColor(166, 172, 181)); - 83 | const ORANGE: Color = Color::Rgb(RgbColor(255, 153, 51)); - 84 | const YELLOW: Color = Color::Rgb(RgbColor(219, 219, 173)); - 85 | const GREEN: Color = Color::Rgb(RgbColor(101, 192, 67)); - | - 86 | #[must_use] - 87 | pub const fn empty() -> Self { - 88 | Self { - 89 | node_kind: None, - 90 | node_text: None, - 91 | field: None, - 92 | row_color: None, - 93 | row_color_named: None, - 94 | extra: None, - 95 | error: None, - 96 | missing: None, - 97 | line_feed: None, - 98 | backtick: None, - 99 | literal: None, - 100 | } - 101 | } - 102 | } - | - 103 | impl Default for ParseTheme { - 104 | fn default() -> Self { - 105 | Self { - 106 | node_kind: Some(AnsiColor::BrightCyan.into()), - 107 | node_text: Some(Self::GRAY), - 108 | field: Some(AnsiColor::Blue.into()), - 109 | row_color: Some(AnsiColor::White.into()), - 110 | row_color_named: Some(AnsiColor::BrightCyan.into()), - 111 | extra: Some(AnsiColor::BrightMagenta.into()), - 112 | error: Some(AnsiColor::Red.into()), - 113 | missing: Some(Self::ORANGE), - 114 | line_feed: Some(Self::LIGHT_GRAY), - 115 | backtick: Some(Self::GREEN), - 116 | literal: Some(Self::YELLOW), - 117 | } - 118 | } - 119 | } - | - 120 | #[derive(Debug, Copy, Clone, Deserialize, Serialize)] - 121 | pub struct Rgb(pub u8, pub u8, pub u8); - | - 122 | impl From for RgbColor { - 123 | fn from(val: Rgb) -> Self { - 124 | Self(val.0, val.1, val.2) - 125 | } - 126 | } - | - 127 | #[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)] - 128 | #[serde(rename_all = "kebab-case")] - 129 | pub struct Config { - 130 | pub parse_theme: Option, - 131 | } - | - 132 | #[derive(Debug, Copy, Clone, Default, Deserialize, Serialize)] - 133 | #[serde(rename_all = "kebab-case")] - 134 | pub struct ParseThemeRaw { - 135 | pub node_kind: Option, - 136 | pub node_text: Option, - 137 | pub field: Option, - 138 | pub row_color: Option, - 139 | pub row_color_named: Option, - 140 | pub extra: Option, - 141 | pub error: Option, - 142 | pub missing: Option, - 143 | pub line_feed: Option, - 144 | pub backtick: Option, - 145 | pub literal: Option, - 146 | } - | - 147 | impl From for ParseTheme { - 148 | fn from(value: ParseThemeRaw) -> Self { - 149 | let val_or_default = |val: Option, default: Option| -> Option { - 150 | val.map_or(default, |v| Some(Color::Rgb(v.into()))) - 151 | }; - 152 | let default = Self::default(); - | - 153 | Self { - 154 | node_kind: val_or_default(value.node_kind, default.node_kind), - 155 | node_text: val_or_default(value.node_text, default.node_text), - 156 | field: val_or_default(value.field, default.field), - 157 | row_color: val_or_default(value.row_color, default.row_color), - 158 | row_color_named: val_or_default(value.row_color_named, default.row_color_named), - 159 | extra: val_or_default(value.extra, default.extra), - 160 | error: val_or_default(value.error, default.error), - 161 | missing: val_or_default(value.missing, default.missing), - 162 | line_feed: val_or_default(value.line_feed, default.line_feed), - 163 | backtick: val_or_default(value.backtick, default.backtick), - 164 | literal: val_or_default(value.literal, default.literal), - 165 | } - 166 | } - 167 | } - | - 168 | #[derive(Copy, Clone, PartialEq, Eq)] - 169 | pub enum ParseOutput { - 170 | Normal, - 171 | Quiet, - 172 | Xml, - 173 | Cst, - 174 | Dot, - 175 | } - | - 176 | /// A position in a multi-line text document, in terms of rows and columns. - 177 | /// - 178 | /// Rows and columns are zero-based. - 179 | /// - 180 | /// This serves as a serializable wrapper for `Point` - 181 | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] - 182 | pub struct ParsePoint { - 183 | pub row: usize, - 184 | pub column: usize, - 185 | } - | - 186 | impl From for ParsePoint { - 187 | fn from(value: Point) -> Self { - 188 | Self { - 189 | row: value.row, - 190 | column: value.column, - 191 | } - 192 | } - 193 | } - | - 194 | #[derive(Serialize, Default, Debug, Clone)] - 195 | pub struct ParseSummary { - 196 | pub file: PathBuf, - 197 | pub successful: bool, - 198 | pub start: Option, - 199 | pub end: Option, - 200 | pub duration: Option, - 201 | pub bytes: Option, - 202 | } - | - 203 | impl ParseSummary { - 204 | #[must_use] - 205 | pub fn new(path: &Path) -> Self { - 206 | Self { - 207 | file: path.to_path_buf(), - 208 | successful: false, - 209 | ..Default::default() - 210 | } - 211 | } - 212 | } - | - 213 | #[derive(Serialize, Debug, Default)] - 214 | pub struct ParseStats { - 215 | pub parse_summaries: Vec, - 216 | pub cumulative_stats: Stats, - 217 | pub source_count: usize, - 218 | } - | - 219 | #[derive(Serialize, ValueEnum, Debug, Copy, Clone, Default, Eq, PartialEq)] - 220 | pub enum ParseDebugType { - 221 | #[default] - 222 | Quiet, - 223 | Normal, - 224 | Pretty, - 225 | } - | - 226 | pub struct ParseFileOptions<'a> { - 227 | pub edits: &'a [&'a str], - 228 | pub output: ParseOutput, - 229 | pub stats: &'a mut ParseStats, - 230 | pub print_time: bool, - 231 | pub timeout: u64, - 232 | pub debug: ParseDebugType, - 233 | pub debug_graph: bool, - 234 | pub cancellation_flag: Option<&'a AtomicUsize>, - 235 | pub encoding: Option, - 236 | pub open_log: bool, - 237 | pub no_ranges: bool, - 238 | pub parse_theme: &'a ParseTheme, - 239 | } - | - 240 | #[derive(Copy, Clone)] - 241 | pub struct ParseResult { - 242 | pub successful: bool, - 243 | pub bytes: usize, - 244 | pub duration: Option, - 245 | } - | - 246 | pub fn parse_file_at_path( - 247 | parser: &mut Parser, - 248 | language: &Language, - 249 | path: &Path, - 250 | name: &str, - 251 | max_path_length: usize, - 252 | opts: &mut ParseFileOptions, - 253 | ) -> Result<()> { - 254 | let mut _log_session = None; - 255 | parser.set_language(language)?; - 256 | let mut source_code = fs::read(path).with_context(|| format!("Error reading {name:?}"))?; - | - 257 | // Render an HTML graph if `--debug-graph` was passed - 258 | if opts.debug_graph { - 259 | _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?); - 260 | } - 261 | // Log to stderr if `--debug` was passed - 262 | else if opts.debug != ParseDebugType::Quiet { - 263 | let mut curr_version: usize = 0; - 264 | let use_color = std::env::var("NO_COLOR").map_or(true, |v| v != "1"); - 265 | let debug = opts.debug; - 266 | parser.set_logger(Some(Box::new(move |log_type, message| { - 267 | if debug == ParseDebugType::Normal { - 268 | if log_type == LogType::Lex { - 269 | write!(&mut io::stderr(), " ").unwrap(); - 270 | } - 271 | writeln!(&mut io::stderr(), "{message}").unwrap(); - 272 | } else { - 273 | let colors = &[ - 274 | AnsiColor::White, - 275 | AnsiColor::Red, - 276 | AnsiColor::Blue, - 277 | AnsiColor::Green, - 278 | AnsiColor::Cyan, - 279 | AnsiColor::Yellow, - 280 | ]; - 281 | if message.starts_with("process version:") { - 282 | let comma_idx = message.find(',').unwrap(); - 283 | curr_version = message["process version:".len()..comma_idx] - 284 | .parse() - 285 | .unwrap(); - 286 | } - 287 | let color = if use_color { - 288 | Some(colors[curr_version]) - 289 | } else { - 290 | None - 291 | }; - 292 | let mut out = if log_type == LogType::Lex { - 293 | " ".to_string() - 294 | } else { - 295 | String::new() - 296 | }; - 297 | out += &paint(color, message); - 298 | writeln!(&mut io::stderr(), "{out}").unwrap(); - 299 | } - 300 | }))); - 301 | } - | - 302 | let parse_time = Instant::now(); - | - 303 | #[inline(always)] - 304 | fn is_utf16_le_bom(bom_bytes: &[u8]) -> bool { - 305 | bom_bytes == [0xFF, 0xFE] - 306 | } - | - 307 | #[inline(always)] - 308 | fn is_utf16_be_bom(bom_bytes: &[u8]) -> bool { - 309 | bom_bytes == [0xFE, 0xFF] - 310 | } - | - 311 | let encoding = match opts.encoding { - 312 | None if source_code.len() >= 2 => { - 313 | if is_utf16_le_bom(&source_code[0..2]) { - 314 | Some(ffi::TSInputEncodingUTF16LE) - 315 | } else if is_utf16_be_bom(&source_code[0..2]) { - 316 | Some(ffi::TSInputEncodingUTF16BE) - 317 | } else { - 318 | None - 319 | } - 320 | } - 321 | _ => opts.encoding, - 322 | }; - | - 323 | // If the `--cancel` flag was passed, then cancel the parse - 324 | // when the user types a newline. - 325 | // - 326 | // Additionally, if the `--time` flag was passed, end the parse - 327 | // after the specified number of microseconds. - 328 | let start_time = Instant::now(); - 329 | let progress_callback = &mut |_: &ParseState| { - 330 | if let Some(cancellation_flag) = opts.cancellation_flag { - 331 | if cancellation_flag.load(Ordering::SeqCst) != 0 { - 332 | return ControlFlow::Break(()); - 333 | } - 334 | } - | - 335 | if opts.timeout > 0 && start_time.elapsed().as_micros() > opts.timeout as u128 { - 336 | return ControlFlow::Break(()); - 337 | } - | - 338 | ControlFlow::Continue(()) - 339 | }; - | - 340 | let parse_opts = ParseOptions::new().progress_callback(progress_callback); - | - 341 | let tree = match encoding { - 342 | Some(encoding) if encoding == ffi::TSInputEncodingUTF16LE => { - 343 | let source_code_utf16 = source_code - 344 | .chunks_exact(2) - 345 | .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) - 346 | .collect::>(); - 347 | parser.parse_utf16_le_with_options( - 348 | &mut |i, _| { - 349 | if i < source_code_utf16.len() { - 350 | &source_code_utf16[i..] - 351 | } else { - 352 | &[] - 353 | } - 354 | }, - 355 | None, - 356 | Some(parse_opts), - 357 | ) - 358 | } - 359 | Some(encoding) if encoding == ffi::TSInputEncodingUTF16BE => { - 360 | let source_code_utf16 = source_code - 361 | .chunks_exact(2) - 362 | .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]])) - 363 | .collect::>(); - 364 | parser.parse_utf16_be_with_options( - 365 | &mut |i, _| { - 366 | if i < source_code_utf16.len() { - 367 | &source_code_utf16[i..] - 368 | } else { - 369 | &[] - 370 | } - 371 | }, - 372 | None, - 373 | Some(parse_opts), - 374 | ) - 375 | } - 376 | _ => parser.parse_with_options( - 377 | &mut |i, _| { - 378 | if i < source_code.len() { - 379 | &source_code[i..] - 380 | } else { - 381 | &[] - 382 | } - 383 | }, - 384 | None, - 385 | Some(parse_opts), - 386 | ), - 387 | }; - 388 | let parse_duration = parse_time.elapsed(); - | - 389 | let stdout = io::stdout(); - 390 | let mut stdout = stdout.lock(); - | - 391 | if let Some(mut tree) = tree { - 392 | if opts.debug_graph && !opts.edits.is_empty() { - 393 | info!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); - 394 | } - | - 395 | let edit_time = Instant::now(); - 396 | for (i, edit) in opts.edits.iter().enumerate() { - 397 | let edit = parse_edit_flag(&source_code, edit)?; - 398 | perform_edit(&mut tree, &mut source_code, &edit)?; - 399 | tree = parser.parse(&source_code, Some(&tree)).unwrap(); - | - 400 | if opts.debug_graph { - 401 | info!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code)); - 402 | } - 403 | } - 404 | let edit_duration = edit_time.elapsed(); - | - 405 | parser.stop_printing_dot_graphs(); - | - 406 | let parse_duration_ms = parse_duration.as_micros() as f64 / 1e3; - 407 | let edit_duration_ms = edit_duration.as_micros() as f64 / 1e3; - 408 | let mut cursor = tree.walk(); - | - 409 | if opts.output == ParseOutput::Normal { - 410 | let mut needs_newline = false; - 411 | let mut indent_level = 0; - 412 | let mut did_visit_children = false; - 413 | loop { - 414 | let node = cursor.node(); - 415 | let is_named = node.is_named(); - 416 | if did_visit_children { - 417 | if is_named { - 418 | stdout.write_all(b")")?; - 419 | needs_newline = true; - 420 | } - 421 | if cursor.goto_next_sibling() { - 422 | did_visit_children = false; - 423 | } else if cursor.goto_parent() { - 424 | did_visit_children = true; - 425 | indent_level -= 1; - 426 | } else { - 427 | break; - 428 | } - 429 | } else { - 430 | if is_named { - 431 | if needs_newline { - 432 | stdout.write_all(b"\n")?; - 433 | } - 434 | for _ in 0..indent_level { - 435 | stdout.write_all(b" ")?; - 436 | } - 437 | let start = node.start_position(); - 438 | let end = node.end_position(); - 439 | if let Some(field_name) = cursor.field_name() { - 440 | write!(&mut stdout, "{field_name}: ")?; - 441 | } - 442 | write!(&mut stdout, "({}", node.kind())?; - 443 | if !opts.no_ranges { - 444 | write!( - 445 | &mut stdout, - 446 | " [{}, {}] - [{}, {}]", - 447 | start.row, start.column, end.row, end.column - 448 | )?; - 449 | } - 450 | needs_newline = true; - 451 | } - 452 | if cursor.goto_first_child() { - 453 | did_visit_children = false; - 454 | indent_level += 1; - 455 | } else { - 456 | did_visit_children = true; - 457 | } - 458 | } - 459 | } - 460 | cursor.reset(tree.root_node()); - 461 | println!(); - 462 | } - | - 463 | if opts.output == ParseOutput::Cst { - 464 | render_cst(&source_code, &tree, &mut cursor, opts, &mut stdout)?; - 465 | println!(); - 466 | } - | - 467 | if opts.output == ParseOutput::Xml { - 468 | let mut needs_newline = false; - 469 | let mut indent_level = 2; - 470 | let mut did_visit_children = false; - 471 | let mut had_named_children = false; - 472 | let mut tags = Vec::<&str>::new(); - | - 473 | // If we're parsing the first file, write the header - 474 | if opts.stats.parse_summaries.is_empty() { - 475 | writeln!(&mut stdout, "")?; - 476 | writeln!(&mut stdout, "")?; - 477 | } - 478 | writeln!(&mut stdout, " ", path.display())?; - | - 479 | loop { - 480 | let node = cursor.node(); - 481 | let is_named = node.is_named(); - 482 | if did_visit_children { - 483 | if is_named { - 484 | let tag = tags.pop(); - 485 | if had_named_children { - 486 | for _ in 0..indent_level { - 487 | stdout.write_all(b" ")?; - 488 | } - 489 | } - 490 | write!(&mut stdout, "", tag.expect("there is a tag"))?; - 491 | // we only write a line in the case where it's the last sibling - 492 | if let Some(parent) = node.parent() { - 493 | if parent.child(parent.child_count() as u32 - 1).unwrap() == node { - 494 | stdout.write_all(b"\n")?; - 495 | } - 496 | } - 497 | needs_newline = true; - 498 | } - 499 | if cursor.goto_next_sibling() { - 500 | did_visit_children = false; - 501 | had_named_children = false; - 502 | } else if cursor.goto_parent() { - 503 | did_visit_children = true; - 504 | had_named_children = is_named; - 505 | indent_level -= 1; - 506 | if !is_named && needs_newline { - 507 | stdout.write_all(b"\n")?; - 508 | for _ in 0..indent_level { - 509 | stdout.write_all(b" ")?; - 510 | } - 511 | } - 512 | } else { - 513 | break; - 514 | } - 515 | } else { - 516 | if is_named { - 517 | if needs_newline { - 518 | stdout.write_all(b"\n")?; - 519 | } - 520 | for _ in 0..indent_level { - 521 | stdout.write_all(b" ")?; - 522 | } - 523 | write!(&mut stdout, "<{}", node.kind())?; - 524 | if let Some(field_name) = cursor.field_name() { - 525 | write!(&mut stdout, " field=\"{field_name}\"")?; - 526 | } - 527 | let start = node.start_position(); - 528 | let end = node.end_position(); - 529 | write!(&mut stdout, " srow=\"{}\"", start.row)?; - 530 | write!(&mut stdout, " scol=\"{}\"", start.column)?; - 531 | write!(&mut stdout, " erow=\"{}\"", end.row)?; - 532 | write!(&mut stdout, " ecol=\"{}\"", end.column)?; - 533 | write!(&mut stdout, ">")?; - 534 | tags.push(node.kind()); - 535 | needs_newline = true; - 536 | } - 537 | if cursor.goto_first_child() { - 538 | did_visit_children = false; - 539 | had_named_children = false; - 540 | indent_level += 1; - 541 | } else { - 542 | did_visit_children = true; - 543 | let start = node.start_byte(); - 544 | let end = node.end_byte(); - 545 | let value = - 546 | std::str::from_utf8(&source_code[start..end]).expect("has a string"); - 547 | if !is_named && needs_newline { - 548 | stdout.write_all(b"\n")?; - 549 | for _ in 0..indent_level { - 550 | stdout.write_all(b" ")?; - 551 | } - 552 | } - 553 | write!(&mut stdout, "{}", html_escape::encode_text(value))?; - 554 | } - 555 | } - 556 | } - 557 | writeln!(&mut stdout)?; - 558 | writeln!(&mut stdout, " ")?; - | - 559 | // If we parsed the last file, write the closing tag for the `sources` header - 560 | if opts.stats.parse_summaries.len() == opts.stats.source_count - 1 { - 561 | writeln!(&mut stdout, "")?; - 562 | } - 563 | cursor.reset(tree.root_node()); - 564 | } - | - 565 | if opts.output == ParseOutput::Dot { - 566 | util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap(); - 567 | } - | - 568 | let mut first_error = None; - 569 | let mut earliest_node_with_error = None; - 570 | 'outer: loop { - 571 | let node = cursor.node(); - 572 | if node.has_error() { - 573 | if earliest_node_with_error.is_none() { - 574 | earliest_node_with_error = Some(node); - 575 | } - 576 | if node.is_error() || node.is_missing() { - 577 | first_error = Some(node); - 578 | break; - 579 | } - | - 580 | // If there's no more children, even though some outer node has an error, - 581 | // then that means that the first error is hidden, but the later error could be - 582 | // visible. So, we walk back up to the child of the first node with an error, - 583 | // and then check its siblings for errors. - 584 | if !cursor.goto_first_child() { - 585 | let earliest = earliest_node_with_error.unwrap(); - 586 | while cursor.goto_parent() { - 587 | if cursor.node().parent().is_some_and(|p| p == earliest) { - 588 | while cursor.goto_next_sibling() { - 589 | let sibling = cursor.node(); - 590 | if sibling.is_error() || sibling.is_missing() { - 591 | first_error = Some(sibling); - 592 | break 'outer; - 593 | } - 594 | if sibling.has_error() && cursor.goto_first_child() { - 595 | continue 'outer; - 596 | } - 597 | } - 598 | break; - 599 | } - 600 | } - 601 | break; - 602 | } - 603 | } else if !cursor.goto_next_sibling() { - 604 | break; - 605 | } - 606 | } - | - 607 | if first_error.is_some() || opts.print_time { - 608 | let path = path.to_string_lossy(); - 609 | write!( - 610 | &mut stdout, - 611 | "{:width$}\tParse: {parse_duration_ms:>7.2} ms\t{:>6} bytes/ms", - 612 | name, - 613 | (source_code.len() as u128 * 1_000_000) / parse_duration.as_nanos(), - 614 | width = max_path_length - 615 | )?; - 616 | if let Some(node) = first_error { - 617 | let start = node.start_position(); - 618 | let end = node.end_position(); - 619 | let mut node_text = String::new(); - 620 | for c in node.kind().chars() { - 621 | if let Some(escaped) = escape_invisible(c) { - 622 | node_text += escaped; - 623 | } else { - 624 | node_text.push(c); - 625 | } - 626 | } - 627 | write!(&mut stdout, "\t(")?; - 628 | if node.is_missing() { - 629 | if node.is_named() { - 630 | write!(&mut stdout, "MISSING {node_text}")?; - 631 | } else { - 632 | write!(&mut stdout, "MISSING \"{node_text}\"")?; - 633 | } - 634 | } else { - 635 | write!(&mut stdout, "{node_text}")?; - 636 | } - 637 | write!( - 638 | &mut stdout, - 639 | " [{}, {}] - [{}, {}])", - 640 | start.row, start.column, end.row, end.column - 641 | )?; - 642 | } - 643 | if !opts.edits.is_empty() { - 644 | write!( - 645 | &mut stdout, - 646 | "\n{:width$}\tEdit: {edit_duration_ms:>7.2} ms", - 647 | " ".repeat(path.len()), - 648 | width = max_path_length, - 649 | )?; - 650 | } - 651 | writeln!(&mut stdout)?; - 652 | } - | - 653 | opts.stats.parse_summaries.push(ParseSummary { - 654 | file: path.to_path_buf(), - 655 | successful: first_error.is_none(), - 656 | start: Some(tree.root_node().start_position().into()), - 657 | end: Some(tree.root_node().end_position().into()), - 658 | duration: Some(parse_duration), - 659 | bytes: Some(source_code.len()), - 660 | }); - | - 661 | return Ok(()); - 662 | } - 663 | parser.stop_printing_dot_graphs(); - | - 664 | if opts.print_time { - 665 | let duration = parse_time.elapsed(); - 666 | let duration_ms = duration.as_micros() as f64 / 1e3; - 667 | writeln!( - 668 | &mut stdout, - 669 | "{:width$}\tParse: {duration_ms:>7.2} ms\t(timed out)", - 670 | path.to_str().unwrap(), - 671 | width = max_path_length - 672 | )?; - 673 | } - | - 674 | opts.stats.parse_summaries.push(ParseSummary { - 675 | file: path.to_path_buf(), - 676 | successful: false, - 677 | start: None, - 678 | end: None, - 679 | duration: None, - 680 | bytes: Some(source_code.len()), - 681 | }); - | - 682 | Ok(()) - 683 | } - | - 684 | const fn escape_invisible(c: char) -> Option<&'static str> { - 685 | Some(match c { - 686 | '\n' => "\\n", - 687 | '\r' => "\\r", - 688 | '\t' => "\\t", - 689 | '\0' => "\\0", - 690 | '\\' => "\\\\", - 691 | '\x0b' => "\\v", - 692 | '\x0c' => "\\f", - 693 | _ => return None, - 694 | }) - 695 | } - | - 696 | const fn escape_delimiter(c: char) -> Option<&'static str> { - 697 | Some(match c { - 698 | '`' => "\\`", - 699 | '\"' => "\\\"", - 700 | _ => return None, - 701 | }) - 702 | } - | - 703 | pub fn render_cst<'a, 'b: 'a>( - 704 | source_code: &[u8], - 705 | tree: &'b Tree, - 706 | cursor: &mut TreeCursor<'a>, - 707 | opts: &ParseFileOptions, - 708 | out: &mut impl Write, - 709 | ) -> Result<()> { - 710 | let lossy_source_code = String::from_utf8_lossy(source_code); - 711 | let total_width = lossy_source_code - 712 | .lines() - 713 | .enumerate() - 714 | .map(|(row, col)| (row as f64).log10() as usize + (col.len() as f64).log10() as usize + 1) - 715 | .max() - 716 | .unwrap_or(1); - 717 | let mut indent_level = 1; - 718 | let mut did_visit_children = false; - 719 | let mut in_error = false; - 720 | loop { - 721 | if did_visit_children { - 722 | if cursor.goto_next_sibling() { - 723 | did_visit_children = false; - 724 | } else if cursor.goto_parent() { - 725 | did_visit_children = true; - 726 | indent_level -= 1; - 727 | if !cursor.node().has_error() { - 728 | in_error = false; - 729 | } - 730 | } else { - 731 | break; - 732 | } - 733 | } else { - 734 | cst_render_node( - 735 | opts, - 736 | cursor, - 737 | source_code, - 738 | out, - 739 | total_width, - 740 | indent_level, - 741 | in_error, - 742 | )?; - 743 | if cursor.goto_first_child() { - 744 | did_visit_children = false; - 745 | indent_level += 1; - 746 | if cursor.node().has_error() { - 747 | in_error = true; - 748 | } - 749 | } else { - 750 | did_visit_children = true; - 751 | } - 752 | } - 753 | } - 754 | cursor.reset(tree.root_node()); - 755 | Ok(()) - 756 | } - | - 757 | fn render_node_text(source: &str) -> String { - 758 | source - 759 | .chars() - 760 | .fold(String::with_capacity(source.len()), |mut acc, c| { - 761 | if let Some(esc) = escape_invisible(c) { - 762 | acc.push_str(esc); - 763 | } else if let Some(esc) = escape_delimiter(c) { - 764 | acc.push_str(esc); - 765 | } else { - 766 | acc.push(c); - 767 | } - 768 | acc - 769 | }) - 770 | } - | - 771 | fn write_node_text( - 772 | opts: &ParseFileOptions, - 773 | out: &mut impl Write, - 774 | cursor: &TreeCursor, - 775 | is_named: bool, - 776 | source: &str, - 777 | color: Option + Copy>, - 778 | text_info: (usize, usize), - 779 | ) -> Result<()> { - 780 | let (total_width, indent_level) = text_info; - 781 | let (quote, quote_color) = if is_named { - 782 | ('`', opts.parse_theme.backtick) - 783 | } else { - 784 | ('\"', color.map(|c| c.into())) - 785 | }; - | - 786 | if !is_named { - 787 | write!( - 788 | out, - 789 | "{}{}{}", - 790 | paint(quote_color, &String::from(quote)), - 791 | paint(color, &render_node_text(source)), - 792 | paint(quote_color, &String::from(quote)), - 793 | )?; - 794 | } else { - 795 | let multiline = source.contains('\n'); - 796 | for (i, line) in source.split_inclusive('\n').enumerate() { - 797 | if line.is_empty() { - 798 | break; - 799 | } - 800 | let mut node_range = cursor.node().range(); - 801 | // For each line of text, adjust the row by shifting it down `i` rows, - 802 | // and adjust the column by setting it to the length of *this* line. - 803 | node_range.start_point.row += i; - 804 | node_range.end_point.row = node_range.start_point.row; - 805 | node_range.end_point.column = line.len() - 806 | + if i == 0 { - 807 | node_range.start_point.column - 808 | } else { - 809 | 0 - 810 | }; - 811 | let formatted_line = render_line_feed(line, opts); - 812 | if !opts.no_ranges { - 813 | write!( - 814 | out, - 815 | "{}{}{}{}{}{}", - 816 | if multiline { "\n" } else { "" }, - 817 | if multiline { - 818 | render_node_range(opts, cursor, is_named, true, total_width, node_range) - 819 | } else { - 820 | String::new() - 821 | }, - 822 | if multiline { - 823 | " ".repeat(indent_level + 1) - 824 | } else { - 825 | String::new() - 826 | }, - 827 | paint(quote_color, &String::from(quote)), - 828 | &paint(color, &render_node_text(&formatted_line)), - 829 | paint(quote_color, &String::from(quote)), - 830 | )?; - 831 | } else { - 832 | write!( - 833 | out, - 834 | "\n{}{}{}{}", - 835 | " ".repeat(indent_level + 1), - 836 | paint(quote_color, &String::from(quote)), - 837 | &paint(color, &render_node_text(&formatted_line)), - 838 | paint(quote_color, &String::from(quote)), - 839 | )?; - 840 | } - 841 | } - 842 | } - | - 843 | Ok(()) - 844 | } - | - 845 | fn render_line_feed(source: &str, opts: &ParseFileOptions) -> String { - 846 | if cfg!(windows) { - 847 | source.replace("\r\n", &paint(opts.parse_theme.line_feed, "\r\n")) - 848 | } else { - 849 | source.replace('\n', &paint(opts.parse_theme.line_feed, "\n")) - 850 | } - 851 | } - | - 852 | fn render_node_range( - 853 | opts: &ParseFileOptions, - 854 | cursor: &TreeCursor, - 855 | is_named: bool, - 856 | is_multiline: bool, - 857 | total_width: usize, - 858 | range: Range, - 859 | ) -> String { - 860 | let has_field_name = cursor.field_name().is_some(); - 861 | let range_color = if is_named && !is_multiline && !has_field_name { - 862 | opts.parse_theme.row_color_named - 863 | } else { - 864 | opts.parse_theme.row_color - 865 | }; - | - 866 | let remaining_width_start = (total_width - 867 | - (range.start_point.row as f64).log10() as usize - 868 | - (range.start_point.column as f64).log10() as usize) - 869 | .max(1); - 870 | let remaining_width_end = (total_width - 871 | - (range.end_point.row as f64).log10() as usize - 872 | - (range.end_point.column as f64).log10() as usize) - 873 | .max(1); - 874 | paint( - 875 | range_color, - 876 | &format!( - 877 | "{}:{}{:remaining_width_start$}- {}:{}{:remaining_width_end$}", - 878 | range.start_point.row, - 879 | range.start_point.column, - 880 | ' ', - 881 | range.end_point.row, - 882 | range.end_point.column, - 883 | ' ', - 884 | ), - 885 | ) - 886 | } - | - 887 | fn cst_render_node( - 888 | opts: &ParseFileOptions, - 889 | cursor: &mut TreeCursor, - 890 | source_code: &[u8], - 891 | out: &mut impl Write, - 892 | total_width: usize, - 893 | indent_level: usize, - 894 | in_error: bool, - 895 | ) -> Result<()> { - 896 | let node = cursor.node(); - 897 | let is_named = node.is_named(); - 898 | if !opts.no_ranges { - 899 | write!( - 900 | out, - 901 | "{}", - 902 | render_node_range(opts, cursor, is_named, false, total_width, node.range()) - 903 | )?; - 904 | } - 905 | write!( - 906 | out, - 907 | "{}{}", - 908 | " ".repeat(indent_level), - 909 | if in_error && !node.has_error() { - 910 | " " - 911 | } else { - 912 | "" - 913 | } - 914 | )?; - 915 | if is_named { - 916 | if let Some(field_name) = cursor.field_name() { - 917 | write!( - 918 | out, - 919 | "{}", - 920 | paint(opts.parse_theme.field, &format!("{field_name}: ")) - 921 | )?; - 922 | } - | - 923 | if node.has_error() || node.is_error() { - 924 | write!(out, "{}", paint(opts.parse_theme.error, "•"))?; - 925 | } - | - 926 | let kind_color = if node.is_error() { - 927 | opts.parse_theme.error - 928 | } else if node.is_extra() || node.parent().is_some_and(|p| p.is_extra() && !p.is_error()) { - 929 | opts.parse_theme.extra - 930 | } else { - 931 | opts.parse_theme.node_kind - 932 | }; - 933 | write!(out, "{}", paint(kind_color, node.kind()),)?; - | - 934 | if node.child_count() == 0 { - 935 | write!(out, " ")?; - 936 | // Node text from a pattern or external scanner - 937 | write_node_text( - 938 | opts, - 939 | out, - 940 | cursor, - 941 | is_named, - 942 | &String::from_utf8_lossy(&source_code[node.start_byte()..node.end_byte()]), - 943 | opts.parse_theme.node_text, - 944 | (total_width, indent_level), - 945 | )?; - 946 | } - 947 | } else if node.is_missing() { - 948 | write!(out, "{}: ", paint(opts.parse_theme.missing, "MISSING"))?; - 949 | write!(out, "\"{}\"", paint(opts.parse_theme.missing, node.kind()))?; - 950 | } else { - 951 | // Terminal literals, like "fn" - 952 | write_node_text( - 953 | opts, - 954 | out, - 955 | cursor, - 956 | is_named, - 957 | node.kind(), - 958 | opts.parse_theme.literal, - 959 | (total_width, indent_level), - 960 | )?; - 961 | } - 962 | writeln!(out)?; - | - 963 | Ok(()) - 964 | } - | - 965 | pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { - 966 | let start_byte = edit.position; - 967 | let old_end_byte = edit.position + edit.deleted_length; - 968 | let new_end_byte = edit.position + edit.inserted_text.len(); - 969 | let start_position = position_for_offset(input, start_byte)?; - 970 | let old_end_position = position_for_offset(input, old_end_byte)?; - 971 | input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied()); - 972 | let new_end_position = position_for_offset(input, new_end_byte)?; - 973 | let edit = InputEdit { - 974 | start_byte, - 975 | old_end_byte, - 976 | new_end_byte, - 977 | start_position, - 978 | old_end_position, - 979 | new_end_position, - 980 | }; - 981 | tree.edit(&edit); - 982 | Ok(edit) - 983 | } - | - 984 | fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result { - 985 | let error = || { - 986 | anyhow!(concat!( - 987 | "Invalid edit string '{}'. ", - 988 | "Edit strings must match the pattern ' '" - 989 | ), flag) - 990 | }; - | - 991 | // Three whitespace-separated parts: - 992 | // * edit position - 993 | // * deleted length - 994 | // * inserted text - 995 | let mut parts = flag.split(' '); - 996 | let position = parts.next().ok_or_else(error)?; - 997 | let deleted_length = parts.next().ok_or_else(error)?; - 998 | let inserted_text = parts.collect::>().join(" ").into_bytes(); - | - 999 | // Position can either be a byte_offset or row,column pair, separated by a comma -1000 | let position = if position == "$" { -1001 | source_code.len() -1002 | } else if position.contains(',') { -1003 | let mut parts = position.split(','); -1004 | let row = parts.next().ok_or_else(error)?; -1005 | let row = row.parse::().map_err(|_| error())?; -1006 | let column = parts.next().ok_or_else(error)?; -1007 | let column = column.parse::().map_err(|_| error())?; -1008 | offset_for_position(source_code, Point { row, column })? -1009 | } else { -1010 | position.parse::().map_err(|_| error())? -1011 | }; - | -1012 | // Deleted length must be a byte count. -1013 | let deleted_length = deleted_length.parse::().map_err(|_| error())?; - | -1014 | Ok(Edit { -1015 | position, -1016 | deleted_length, -1017 | inserted_text, -1018 | }) -1019 | } - | -1020 | pub fn offset_for_position(input: &[u8], position: Point) -> Result { -1021 | let mut row = 0; -1022 | let mut offset = 0; -1023 | let mut iter = memchr::memchr_iter(b'\n', input); -1024 | loop { -1025 | if let Some(pos) = iter.next() { -1026 | if row < position.row { -1027 | row += 1; -1028 | offset = pos; -1029 | continue; -1030 | } -1031 | } -1032 | offset += 1; -1033 | break; -1034 | } -1035 | if position.row - row > 0 { -1036 | return Err(anyhow!("Failed to address a row: {}", position.row)); -1037 | } -1038 | if let Some(pos) = iter.next() { -1039 | if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) { -1040 | return Err(anyhow!("Failed to address a column: {}", position.column)); -1041 | } -1042 | } else if input.len() - offset < position.column { -1043 | return Err(anyhow!("Failed to address a column over the end")); -1044 | } -1045 | Ok(offset + position.column) -1046 | } - | -1047 | pub fn position_for_offset(input: &[u8], offset: usize) -> Result { -1048 | if offset > input.len() { -1049 | return Err(anyhow!("Failed to address an offset: {offset}")); -1050 | } -1051 | let mut result = Point { row: 0, column: 0 }; -1052 | let mut last = 0; -1053 | for pos in memchr::memchr_iter(b'\n', &input[..offset]) { -1054 | result.row += 1; -1055 | last = pos; -1056 | } -1057 | result.column = if result.row > 0 { -1058 | offset - last - 1 -1059 | } else { -1060 | offset -1061 | }; -1062 | Ok(result) -1063 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/playground.html: --------------------------------------------------------------------------------- - 1 | - 2 | - 3 | - 4 | - 5 | tree-sitter THE_LANGUAGE_NAME - 6 | - 7 | - 8 | - 10 | - 12 | - 322 | - | - 323 | - 324 |

- | - 395 | - 396 | - | - 397 | - 398 | - 399 | - 404 | - 405 | - - - --------------------------------------------------------------------------------- -/crates/cli/src/playground.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | borrow::Cow, - 3 | env, fs, - 4 | net::TcpListener, - 5 | path::{Path, PathBuf}, - 6 | str::{self, FromStr as _}, - 7 | }; - | - 8 | use anyhow::{anyhow, Context, Result}; - 9 | use log::{error, info}; - 10 | use tiny_http::{Header, Response, Server}; - | - 11 | use super::wasm; - | - 12 | macro_rules! optional_resource { - 13 | ($name:tt, $path:tt) => { - 14 | #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] - 15 | fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { - 16 | if let Some(tree_sitter_dir) = tree_sitter_dir { - 17 | Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) - 18 | } else { - 19 | Cow::Borrowed(include_bytes!(concat!("../../../", $path))) - 20 | } - 21 | } - | - 22 | #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] - 23 | fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { - 24 | if let Some(tree_sitter_dir) = tree_sitter_dir { - 25 | Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) - 26 | } else { - 27 | Cow::Borrowed(&[]) - 28 | } - 29 | } - 30 | }; - 31 | } - | - 32 | optional_resource!(get_playground_js, "docs/src/assets/js/playground.js"); - 33 | optional_resource!(get_lib_js, "lib/binding_web/web-tree-sitter.js"); - 34 | optional_resource!(get_lib_wasm, "lib/binding_web/web-tree-sitter.wasm"); - | - 35 | fn get_main_html(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { - 36 | tree_sitter_dir.map_or( - 37 | Cow::Borrowed(include_bytes!("playground.html")), - 38 | |tree_sitter_dir| { - 39 | Cow::Owned(fs::read(tree_sitter_dir.join("crates/cli/src/playground.html")).unwrap()) - 40 | }, - 41 | ) - 42 | } - | - 43 | pub fn export(grammar_path: &Path, export_path: &Path) -> Result<()> { - 44 | let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?; - | - 45 | fs::create_dir_all(export_path).with_context(|| { - 46 | format!( - 47 | "Failed to create export directory: {}", - 48 | export_path.display() - 49 | ) - 50 | })?; - | - 51 | let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - | - 52 | let playground_js = get_playground_js(tree_sitter_dir.as_deref()); - 53 | let lib_js = get_lib_js(tree_sitter_dir.as_deref()); - 54 | let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref()); - | - 55 | let has_local_playground_js = !playground_js.is_empty(); - 56 | let has_local_lib_js = !lib_js.is_empty(); - 57 | let has_local_lib_wasm = !lib_wasm.is_empty(); - | - 58 | let mut main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref())) - 59 | .unwrap() - 60 | .replace("THE_LANGUAGE_NAME", &grammar_name); - | - 61 | if !has_local_playground_js { - 62 | main_html = main_html.replace( - 63 | r#""#, - 64 | r#""# - 65 | ); - 66 | } - 67 | if !has_local_lib_js { - 68 | main_html = main_html.replace( - 69 | "import * as TreeSitter from './web-tree-sitter.js';", - 70 | "import * as TreeSitter from 'https://tree-sitter.github.io/web-tree-sitter.js';", - 71 | ); - 72 | } - | - 73 | fs::write(export_path.join("index.html"), main_html.as_bytes()) - 74 | .with_context(|| "Failed to write index.html")?; - | - 75 | fs::write(export_path.join("tree-sitter-parser.wasm"), language_wasm) - 76 | .with_context(|| "Failed to write parser wasm file")?; - | - 77 | if has_local_playground_js { - 78 | fs::write(export_path.join("playground.js"), playground_js) - 79 | .with_context(|| "Failed to write playground.js")?; - 80 | } - | - 81 | if has_local_lib_js { - 82 | fs::write(export_path.join("web-tree-sitter.js"), lib_js) - 83 | .with_context(|| "Failed to write web-tree-sitter.js")?; - 84 | } - | - 85 | if has_local_lib_wasm { - 86 | fs::write(export_path.join("web-tree-sitter.wasm"), lib_wasm) - 87 | .with_context(|| "Failed to write web-tree-sitter.wasm")?; - 88 | } - | - 89 | println!( - 90 | "Exported playground to {}", - 91 | export_path.canonicalize()?.display() - 92 | ); - | - 93 | Ok(()) - 94 | } - | - 95 | pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> { - 96 | let server = get_server()?; - 97 | let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?; - 98 | let url = format!("http://{}", server.server_addr()); - 99 | info!("Started playground on: {url}"); - 100 | if open_in_browser && webbrowser::open(&url).is_err() { - 101 | error!("Failed to open '{url}' in a web browser"); - 102 | } - | - 103 | let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - 104 | let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref())) - 105 | .unwrap() - 106 | .replace("THE_LANGUAGE_NAME", &grammar_name) - 107 | .into_bytes(); - 108 | let playground_js = get_playground_js(tree_sitter_dir.as_deref()); - 109 | let lib_js = get_lib_js(tree_sitter_dir.as_deref()); - 110 | let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref()); - | - 111 | let html_header = Header::from_str("Content-Type: text/html").unwrap(); - 112 | let js_header = Header::from_str("Content-Type: application/javascript").unwrap(); - 113 | let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap(); - | - 114 | for request in server.incoming_requests() { - 115 | let res = match request.url() { - 116 | "/" => response(&main_html, &html_header), - 117 | "/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header), - 118 | "/playground.js" => { - 119 | if playground_js.is_empty() { - 120 | redirect("https://tree-sitter.github.io/tree-sitter/assets/js/playground.js") - 121 | } else { - 122 | response(&playground_js, &js_header) - 123 | } - 124 | } - 125 | "/web-tree-sitter.js" => { - 126 | if lib_js.is_empty() { - 127 | redirect("https://tree-sitter.github.io/web-tree-sitter.js") - 128 | } else { - 129 | response(&lib_js, &js_header) - 130 | } - 131 | } - 132 | "/web-tree-sitter.wasm" => { - 133 | if lib_wasm.is_empty() { - 134 | redirect("https://tree-sitter.github.io/web-tree-sitter.wasm") - 135 | } else { - 136 | response(&lib_wasm, &wasm_header) - 137 | } - 138 | } - 139 | _ => response(b"Not found", &html_header).with_status_code(404), - 140 | }; - 141 | request - 142 | .respond(res) - 143 | .with_context(|| "Failed to write HTTP response")?; - 144 | } - | - 145 | Ok(()) - 146 | } - | - 147 | fn redirect(url: &str) -> Response<&[u8]> { - 148 | Response::empty(302) - 149 | .with_data("".as_bytes(), Some(0)) - 150 | .with_header(Header::from_bytes("Location", url.as_bytes()).unwrap()) - 151 | } - | - 152 | fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> { - 153 | Response::empty(200) - 154 | .with_data(data, Some(data.len())) - 155 | .with_header(header.clone()) - 156 | } - | - 157 | fn get_server() -> Result { - 158 | let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned()); - 159 | let port = env::var("TREE_SITTER_PLAYGROUND_PORT") - 160 | .map(|v| { - 161 | v.parse::() - 162 | .with_context(|| "Invalid port specification") - 163 | }) - 164 | .ok(); - 165 | let listener = match port { - 166 | Some(port) => { - 167 | bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")? - 168 | } - 169 | None => get_listener_on_available_port(&addr) - 170 | .with_context(|| "Failed to find a free port to bind to it")?, - 171 | }; - 172 | let server = - 173 | Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?; - 174 | Ok(server) - 175 | } - | - 176 | fn get_listener_on_available_port(addr: &str) -> Option { - 177 | (8000..12000).find_map(|port| bind_to(addr, port)) - 178 | } - | - 179 | fn bind_to(addr: &str, port: u16) -> Option { - 180 | TcpListener::bind(format!("{addr}:{port}")).ok() - 181 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/query_testing.rs: --------------------------------------------------------------------------------- - 1 | use std::{fs, path::Path, sync::LazyLock}; - | - 2 | use anyhow::{anyhow, Result}; - 3 | use bstr::{BStr, ByteSlice}; - 4 | use regex::Regex; - 5 | use tree_sitter::{Language, Parser, Point}; - | - 6 | static CAPTURE_NAME_REGEX: LazyLock = LazyLock::new(|| Regex::new("[\\w_\\-.]+").unwrap()); - | - 7 | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] - 8 | pub struct Utf8Point { - 9 | pub row: usize, - 10 | pub column: usize, - 11 | } - | - 12 | impl std::fmt::Display for Utf8Point { - 13 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - 14 | write!(f, "({}, {})", self.row, self.column) - 15 | } - 16 | } - | - 17 | impl Utf8Point { - 18 | #[must_use] - 19 | pub const fn new(row: usize, column: usize) -> Self { - 20 | Self { row, column } - 21 | } - 22 | } - | - 23 | #[must_use] - 24 | pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point { - 25 | if point.column == 0 { - 26 | return Utf8Point::new(point.row, 0); - 27 | } - | - 28 | let bstr = BStr::new(source); - 29 | let line = bstr.lines_with_terminator().nth(point.row).unwrap(); - 30 | let mut utf8_column = 0; - | - 31 | for (_, grapheme_end, _) in line.grapheme_indices() { - 32 | utf8_column += 1; - 33 | if grapheme_end >= point.column { - 34 | break; - 35 | } - 36 | } - | - 37 | Utf8Point { - 38 | row: point.row, - 39 | column: utf8_column, - 40 | } - 41 | } - | - 42 | #[derive(Debug, Eq, PartialEq)] - 43 | pub struct CaptureInfo { - 44 | pub name: String, - 45 | pub start: Utf8Point, - 46 | pub end: Utf8Point, - 47 | } - | - 48 | #[derive(Debug, PartialEq, Eq)] - 49 | pub struct Assertion { - 50 | pub position: Utf8Point, - 51 | pub length: usize, - 52 | pub negative: bool, - 53 | pub expected_capture_name: String, - 54 | } - | - 55 | impl Assertion { - 56 | #[must_use] - 57 | pub const fn new( - 58 | row: usize, - 59 | col: usize, - 60 | length: usize, - 61 | negative: bool, - 62 | expected_capture_name: String, - 63 | ) -> Self { - 64 | Self { - 65 | position: Utf8Point::new(row, col), - 66 | length, - 67 | negative, - 68 | expected_capture_name, - 69 | } - 70 | } - 71 | } - | - 72 | /// Parse the given source code, finding all of the comments that contain - 73 | /// highlighting assertions. Return a vector of (position, expected highlight name) - 74 | /// pairs. - 75 | pub fn parse_position_comments( - 76 | parser: &mut Parser, - 77 | language: &Language, - 78 | source: &[u8], - 79 | ) -> Result> { - 80 | let mut result = Vec::new(); - 81 | let mut assertion_ranges = Vec::new(); - | - 82 | // Parse the code. - 83 | parser.set_included_ranges(&[]).unwrap(); - 84 | parser.set_language(language).unwrap(); - 85 | let tree = parser.parse(source, None).unwrap(); - | - 86 | // Walk the tree, finding comment nodes that contain assertions. - 87 | let mut ascending = false; - 88 | let mut cursor = tree.root_node().walk(); - 89 | loop { - 90 | if ascending { - 91 | let node = cursor.node(); - | - 92 | // Find every comment node. - 93 | if node.kind().to_lowercase().contains("comment") { - 94 | if let Ok(text) = node.utf8_text(source) { - 95 | let mut position = node.start_position(); - 96 | if position.row > 0 { - 97 | // Find the arrow character ("^" or "<-") in the comment. A left arrow - 98 | // refers to the column where the comment node starts. An up arrow refers - 99 | // to its own column. - 100 | let mut has_left_caret = false; - 101 | let mut has_arrow = false; - 102 | let mut negative = false; - 103 | let mut arrow_end = 0; - 104 | let mut arrow_count = 1; - 105 | for (i, c) in text.char_indices() { - 106 | arrow_end = i + 1; - 107 | if c == '-' && has_left_caret { - 108 | has_arrow = true; - 109 | break; - 110 | } - 111 | if c == '^' { - 112 | has_arrow = true; - 113 | position.column += i; - 114 | // Continue counting remaining arrows and update their end column - 115 | for (_, c) in text[arrow_end..].char_indices() { - 116 | if c != '^' { - 117 | arrow_end += arrow_count - 1; - 118 | break; - 119 | } - 120 | arrow_count += 1; - 121 | } - 122 | break; - 123 | } - 124 | has_left_caret = c == '<'; - 125 | } - | - 126 | // find any ! after arrows but before capture name - 127 | if has_arrow { - 128 | for (i, c) in text[arrow_end..].char_indices() { - 129 | if c == '!' { - 130 | negative = true; - 131 | arrow_end += i + 1; - 132 | break; - 133 | } else if !c.is_whitespace() { - 134 | break; - 135 | } - 136 | } - 137 | } - | - 138 | // If the comment node contains an arrow and a highlight name, record the - 139 | // highlight name and the position. - 140 | if let (true, Some(mat)) = - 141 | (has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..])) - 142 | { - 143 | assertion_ranges.push((node.start_position(), node.end_position())); - 144 | result.push(Assertion { - 145 | position: to_utf8_point(position, source), - 146 | length: arrow_count, - 147 | negative, - 148 | expected_capture_name: mat.as_str().to_string(), - 149 | }); - 150 | } - 151 | } - 152 | } - 153 | } - | - 154 | // Continue walking the tree. - 155 | if cursor.goto_next_sibling() { - 156 | ascending = false; - 157 | } else if !cursor.goto_parent() { - 158 | break; - 159 | } - 160 | } else if !cursor.goto_first_child() { - 161 | ascending = true; - 162 | } - 163 | } - | - 164 | // Adjust the row number in each assertion's position to refer to the line of - 165 | // code *above* the assertion. There can be multiple lines of assertion comments and empty - 166 | // lines, so the positions may have to be decremented by more than one row. - 167 | let mut i = 0; - 168 | let lines = source.lines_with_terminator().collect::>(); - 169 | for assertion in &mut result { - 170 | let original_position = assertion.position; - 171 | loop { - 172 | let on_assertion_line = assertion_ranges[i..] - 173 | .iter() - 174 | .any(|(start, _)| start.row == assertion.position.row); - 175 | let on_empty_line = lines[assertion.position.row].len() <= assertion.position.column; - 176 | if on_assertion_line || on_empty_line { - 177 | if assertion.position.row > 0 { - 178 | assertion.position.row -= 1; - 179 | } else { - 180 | return Err(anyhow!( - 181 | "Error: could not find a line that corresponds to the assertion `{}` located at {original_position}", - 182 | assertion.expected_capture_name - 183 | )); - 184 | } - 185 | } else { - 186 | while i < assertion_ranges.len() - 187 | && assertion_ranges[i].0.row < assertion.position.row - 188 | { - 189 | i += 1; - 190 | } - 191 | break; - 192 | } - 193 | } - 194 | } - | - 195 | // The assertions can end up out of order due to the line adjustments. - 196 | result.sort_unstable_by_key(|a| a.position); - | - 197 | Ok(result) - 198 | } - | - 199 | pub fn assert_expected_captures( - 200 | infos: &[CaptureInfo], - 201 | path: &Path, - 202 | parser: &mut Parser, - 203 | language: &Language, - 204 | ) -> Result { - 205 | let contents = fs::read_to_string(path)?; - 206 | let pairs = parse_position_comments(parser, language, contents.as_bytes())?; - 207 | for assertion in &pairs { - 208 | if let Some(found) = &infos.iter().find(|p| { - 209 | assertion.position >= p.start - 210 | && (assertion.position.row < p.end.row - 211 | || assertion.position.column + assertion.length - 1 < p.end.column) - 212 | }) { - 213 | if assertion.expected_capture_name != found.name && found.name != "name" { - 214 | return Err(anyhow!( - 215 | "Assertion failed: at {}, found {}, expected {}", - 216 | found.start, - 217 | found.name, - 218 | assertion.expected_capture_name, - 219 | )); - 220 | } - 221 | } else { - 222 | return Err(anyhow!( - 223 | "Assertion failed: could not match {} at row {}, column {}", - 224 | assertion.expected_capture_name, - 225 | assertion.position.row, - 226 | assertion.position.column + assertion.length - 1, - 227 | )); - 228 | } - 229 | } - 230 | Ok(pairs.len()) - 231 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/query.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | fs, - 3 | io::{self, Write}, - 4 | ops::Range, - 5 | path::Path, - 6 | time::Instant, - 7 | }; - | - 8 | use anstyle::AnsiColor; - 9 | use anyhow::{Context, Result}; - 10 | use log::warn; - 11 | use streaming_iterator::StreamingIterator; - 12 | use tree_sitter::{Language, Parser, Point, Query, QueryCursor}; - | - 13 | use crate::{ - 14 | logger::paint, - 15 | query_testing::{self, to_utf8_point}, - 16 | }; - | - 17 | #[allow(clippy::too_many_arguments)] - 18 | pub fn query_file_at_path( - 19 | language: &Language, - 20 | path: &Path, - 21 | name: &str, - 22 | query_path: &Path, - 23 | ordered_captures: bool, - 24 | byte_range: Option>, - 25 | point_range: Option>, - 26 | should_test: bool, - 27 | quiet: bool, - 28 | print_time: bool, - 29 | stdin: bool, - 30 | ) -> Result<()> { - 31 | let stdout = io::stdout(); - 32 | let mut stdout = stdout.lock(); - | - 33 | let query_source = fs::read_to_string(query_path) - 34 | .with_context(|| format!("Error reading query file {}", query_path.display()))?; - 35 | let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?; - | - 36 | let mut query_cursor = QueryCursor::new(); - 37 | if let Some(range) = byte_range { - 38 | query_cursor.set_byte_range(range); - 39 | } - 40 | if let Some(range) = point_range { - 41 | query_cursor.set_point_range(range); - 42 | } - | - 43 | let mut parser = Parser::new(); - 44 | parser.set_language(language)?; - | - 45 | let mut results = Vec::new(); - | - 46 | if !should_test && !stdin { - 47 | writeln!(&mut stdout, "{name}")?; - 48 | } - | - 49 | let source_code = - 50 | fs::read(path).with_context(|| format!("Error reading source file {}", path.display()))?; - 51 | let tree = parser.parse(&source_code, None).unwrap(); - | - 52 | let start = Instant::now(); - 53 | if ordered_captures { - 54 | let mut captures = query_cursor.captures(&query, tree.root_node(), source_code.as_slice()); - 55 | while let Some((mat, capture_index)) = captures.next() { - 56 | let capture = mat.captures[*capture_index]; - 57 | let capture_name = &query.capture_names()[capture.index as usize]; - 58 | if !quiet && !should_test { - 59 | writeln!( - 60 | &mut stdout, - 61 | " pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`", - 62 | mat.pattern_index, - 63 | capture.index, - 64 | capture.node.start_position(), - 65 | capture.node.end_position(), - 66 | capture.node.utf8_text(&source_code).unwrap_or("") - 67 | )?; - 68 | } - 69 | results.push(query_testing::CaptureInfo { - 70 | name: (*capture_name).to_string(), - 71 | start: to_utf8_point(capture.node.start_position(), source_code.as_slice()), - 72 | end: to_utf8_point(capture.node.end_position(), source_code.as_slice()), - 73 | }); - 74 | } - 75 | } else { - 76 | let mut matches = query_cursor.matches(&query, tree.root_node(), source_code.as_slice()); - 77 | while let Some(m) = matches.next() { - 78 | if !quiet && !should_test { - 79 | writeln!(&mut stdout, " pattern: {}", m.pattern_index)?; - 80 | } - 81 | for capture in m.captures { - 82 | let start = capture.node.start_position(); - 83 | let end = capture.node.end_position(); - 84 | let capture_name = &query.capture_names()[capture.index as usize]; - 85 | if !quiet && !should_test { - 86 | if end.row == start.row { - 87 | writeln!( - 88 | &mut stdout, - 89 | " capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`", - 90 | capture.index, - 91 | capture.node.utf8_text(&source_code).unwrap_or("") - 92 | )?; - 93 | } else { - 94 | writeln!( - 95 | &mut stdout, - 96 | " capture: {capture_name}, start: {start}, end: {end}", - 97 | )?; - 98 | } - 99 | } - 100 | results.push(query_testing::CaptureInfo { - 101 | name: (*capture_name).to_string(), - 102 | start: to_utf8_point(capture.node.start_position(), source_code.as_slice()), - 103 | end: to_utf8_point(capture.node.end_position(), source_code.as_slice()), - 104 | }); - 105 | } - 106 | } - 107 | } - 108 | if !query_cursor.did_exceed_match_limit() { - 109 | warn!("Query exceeded maximum number of in-progress captures!"); - 110 | } - 111 | if should_test { - 112 | let path_name = if stdin { - 113 | "stdin" - 114 | } else { - 115 | Path::new(&path).file_name().unwrap().to_str().unwrap() - 116 | }; - 117 | match query_testing::assert_expected_captures(&results, path, &mut parser, language) { - 118 | Ok(assertion_count) => { - 119 | println!( - 120 | " ✓ {} ({} assertions)", - 121 | paint(Some(AnsiColor::Green), path_name), - 122 | assertion_count - 123 | ); - 124 | } - 125 | Err(e) => { - 126 | println!(" ✗ {}", paint(Some(AnsiColor::Red), path_name)); - 127 | return Err(e); - 128 | } - 129 | } - 130 | } - 131 | if print_time { - 132 | writeln!(&mut stdout, "{:?}", start.elapsed())?; - 133 | } - | - 134 | Ok(()) - 135 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tags.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | fs, - 3 | io::{self, Write}, - 4 | path::Path, - 5 | str, - 6 | sync::{atomic::AtomicUsize, Arc}, - 7 | time::Instant, - 8 | }; - | - 9 | use anyhow::Result; - 10 | use tree_sitter_tags::{TagsConfiguration, TagsContext}; - | - 11 | pub struct TagsOptions { - 12 | pub scope: Option, - 13 | pub quiet: bool, - 14 | pub print_time: bool, - 15 | pub cancellation_flag: Arc, - 16 | } - | - 17 | pub fn generate_tags( - 18 | path: &Path, - 19 | name: &str, - 20 | config: &TagsConfiguration, - 21 | indent: bool, - 22 | opts: &TagsOptions, - 23 | ) -> Result<()> { - 24 | let mut context = TagsContext::new(); - 25 | let stdout = io::stdout(); - 26 | let mut stdout = stdout.lock(); - | - 27 | let indent_str = if indent { - 28 | if !opts.quiet { - 29 | writeln!(&mut stdout, "{name}")?; - 30 | } - 31 | "\t" - 32 | } else { - 33 | "" - 34 | }; - | - 35 | let source = fs::read(path)?; - 36 | let start = Instant::now(); - 37 | for tag in context - 38 | .generate_tags(config, &source, Some(&opts.cancellation_flag))? - 39 | .0 - 40 | { - 41 | let tag = tag?; - 42 | if !opts.quiet { - 43 | write!( - 44 | &mut stdout, - 45 | "{indent_str}{:<10}\t | {:<8}\t{} {} - {} `{}`", - 46 | str::from_utf8(&source[tag.name_range]).unwrap_or(""), - 47 | &config.syntax_type_name(tag.syntax_type_id), - 48 | if tag.is_definition { "def" } else { "ref" }, - 49 | tag.span.start, - 50 | tag.span.end, - 51 | str::from_utf8(&source[tag.line_range]).unwrap_or(""), - 52 | )?; - 53 | if let Some(docs) = tag.docs { - 54 | if docs.len() > 120 { - 55 | write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?; - 56 | } else { - 57 | write!(&mut stdout, "\t{:?}", &docs)?; - 58 | } - 59 | } - 60 | writeln!(&mut stdout)?; - 61 | } - 62 | } - | - 63 | if opts.print_time { - 64 | writeln!( - 65 | &mut stdout, - 66 | "{indent_str}time: {}ms", - 67 | start.elapsed().as_millis(), - 68 | )?; - 69 | } - | - 70 | Ok(()) - 71 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/__init__.py: --------------------------------------------------------------------------------- - 1 | """PARSER_DESCRIPTION""" - | - 2 | from importlib.resources import files as _files - | - 3 | from ._binding import language - | - | - 4 | def _get_query(name, file): - 5 | query = _files(f"{__package__}.queries") / file - 6 | globals()[name] = query.read_text() - 7 | return globals()[name] - | - | - 8 | def __getattr__(name): - 9 | # NOTE: uncomment these to include any queries that this grammar contains: - | - 10 | # if name == "HIGHLIGHTS_QUERY": - 11 | # return _get_query("HIGHLIGHTS_QUERY", "highlights.scm") - 12 | # if name == "INJECTIONS_QUERY": - 13 | # return _get_query("INJECTIONS_QUERY", "injections.scm") - 14 | # if name == "LOCALS_QUERY": - 15 | # return _get_query("LOCALS_QUERY", "locals.scm") - 16 | # if name == "TAGS_QUERY": - 17 | # return _get_query("TAGS_QUERY", "tags.scm") - | - 18 | raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - | - | - 19 | __all__ = [ - 20 | "language", - 21 | # "HIGHLIGHTS_QUERY", - 22 | # "INJECTIONS_QUERY", - 23 | # "LOCALS_QUERY", - 24 | # "TAGS_QUERY", - 25 | ] - | - | - 26 | def __dir__(): - 27 | return sorted(__all__ + [ - 28 | "__all__", "__builtins__", "__cached__", "__doc__", "__file__", - 29 | "__loader__", "__name__", "__package__", "__path__", "__spec__", - 30 | ]) - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/__init__.pyi: --------------------------------------------------------------------------------- - 1 | from typing import Final - 2 | from typing_extensions import CapsuleType - | - 3 | # NOTE: uncomment these to include any queries that this grammar contains: - | - 4 | # HIGHLIGHTS_QUERY: Final[str] - 5 | # INJECTIONS_QUERY: Final[str] - 6 | # LOCALS_QUERY: Final[str] - 7 | # TAGS_QUERY: Final[str] - | - 8 | def language() -> CapsuleType: ... - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/_cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-PARSER_NAME" - 3 | description = "PARSER_DESCRIPTION" - 4 | version = "PARSER_VERSION" - 5 | authors = ["PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL"] - 6 | license = "PARSER_LICENSE" - 7 | readme = "README.md" - 8 | keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"] - 9 | categories = ["parser-implementations", "parsing", "text-editors"] - 10 | repository = "PARSER_URL" - 11 | edition = "2021" - 12 | autoexamples = false - | - 13 | build = "bindings/rust/build.rs" - 14 | include = [ - 15 | "bindings/rust/*", - 16 | "grammar.js", - 17 | "queries/*", - 18 | "src/*", - 19 | "tree-sitter.json", - 20 | "/LICENSE", - 21 | ] - | - 22 | [lib] - 23 | path = "bindings/rust/lib.rs" - | - 24 | [dependencies] - 25 | tree-sitter-language = "0.1" - | - 26 | [build-dependencies] - 27 | cc = "1.2" - | - 28 | [dev-dependencies] - 29 | tree-sitter = "RUST_BINDING_VERSION" - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/binding_test.go: --------------------------------------------------------------------------------- - 1 | package tree_sitter_LOWER_PARSER_NAME_test - | - 2 | import ( - 3 | "testing" - | - 4 | tree_sitter "github.com/tree-sitter/go-tree-sitter" - 5 | tree_sitter_LOWER_PARSER_NAME "PARSER_URL_STRIPPED/bindings/go" - 6 | ) - | - 7 | func TestCanLoadGrammar(t *testing.T) { - 8 | language := tree_sitter.NewLanguage(tree_sitter_LOWER_PARSER_NAME.Language()) - 9 | if language == nil { - 10 | t.Errorf("Error loading TITLE_PARSER_NAME grammar") - 11 | } - 12 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/binding_test.js: --------------------------------------------------------------------------------- - 1 | import assert from "node:assert"; - 2 | import { test } from "node:test"; - 3 | import Parser from "tree-sitter"; - | - 4 | test("can load grammar", () => { - 5 | const parser = new Parser(); - 6 | assert.doesNotReject(async () => { - 7 | const { default: language } = await import("./index.js"); - 8 | parser.setLanguage(language); - 9 | }); - 10 | }); - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/binding.go: --------------------------------------------------------------------------------- - 1 | package tree_sitter_LOWER_PARSER_NAME - | - 2 | // #cgo CFLAGS: -std=c11 -fPIC - 3 | // #include "../../src/parser.c" - 4 | // #if __has_include("../../src/scanner.c") - 5 | // #include "../../src/scanner.c" - 6 | // #endif - 7 | import "C" - | - 8 | import "unsafe" - | - 9 | // Get the tree-sitter Language for this grammar. - 10 | func Language() unsafe.Pointer { - 11 | return unsafe.Pointer(C.tree_sitter_LOWER_PARSER_NAME()) - 12 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/binding.gyp: --------------------------------------------------------------------------------- - 1 | { - 2 | "targets": [ - 3 | { - 4 | "target_name": "tree_sitter_PARSER_NAME_binding", - 5 | "dependencies": [ - 6 | " - 35 | $) - | - 36 | target_compile_definitions(tree-sitter-KEBAB_PARSER_NAME PRIVATE - 37 | $<$:TREE_SITTER_REUSE_ALLOCATOR> - 38 | $<$:TREE_SITTER_DEBUG>) - | - 39 | set_target_properties(tree-sitter-KEBAB_PARSER_NAME - 40 | PROPERTIES - 41 | C_STANDARD 11 - 42 | POSITION_INDEPENDENT_CODE ON - 43 | SOVERSION "${TREE_SITTER_ABI_VERSION}.${PROJECT_VERSION_MAJOR}" - 44 | DEFINE_SYMBOL "") - | - 45 | configure_file(bindings/c/tree-sitter-KEBAB_PARSER_NAME.pc.in - 46 | "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc" @ONLY) - | - 47 | install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bindings/c/tree_sitter" - 48 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - 49 | FILES_MATCHING PATTERN "*.h") - 50 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter-KEBAB_PARSER_NAME.pc" - 51 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") - 52 | install(TARGETS tree-sitter-KEBAB_PARSER_NAME - 53 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") - | - 54 | file(GLOB QUERIES queries/*.scm) - 55 | install(FILES ${QUERIES} - 56 | DESTINATION "${CMAKE_INSTALL_DATADIR}/tree-sitter/queries/KEBAB_PARSER_NAME") - | - 57 | add_custom_target(ts-test "${TREE_SITTER_CLI}" test - 58 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - 59 | COMMENT "tree-sitter test") - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/gitattributes: --------------------------------------------------------------------------------- - 1 | * text=auto eol=lf - | - 2 | # Generated source files - 3 | src/*.json linguist-generated - 4 | src/parser.c linguist-generated - 5 | src/tree_sitter/* linguist-generated - | - 6 | # C bindings - 7 | bindings/c/** linguist-generated - 8 | CMakeLists.txt linguist-generated - 9 | Makefile linguist-generated - | - 10 | # Rust bindings - 11 | bindings/rust/* linguist-generated - 12 | Cargo.toml linguist-generated - 13 | Cargo.lock linguist-generated - | - 14 | # Node.js bindings - 15 | bindings/node/* linguist-generated - 16 | binding.gyp linguist-generated - 17 | package.json linguist-generated - 18 | package-lock.json linguist-generated - | - 19 | # Python bindings - 20 | bindings/python/** linguist-generated - 21 | setup.py linguist-generated - 22 | pyproject.toml linguist-generated - | - 23 | # Go bindings - 24 | bindings/go/* linguist-generated - 25 | go.mod linguist-generated - 26 | go.sum linguist-generated - | - 27 | # Swift bindings - 28 | bindings/swift/** linguist-generated - 29 | Package.swift linguist-generated - 30 | Package.resolved linguist-generated - | - 31 | # Zig bindings - 32 | bindings/zig/* linguist-generated - 33 | build.zig linguist-generated - 34 | build.zig.zon linguist-generated - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/gitignore: --------------------------------------------------------------------------------- - 1 | # Rust artifacts - 2 | target/ - | - 3 | # Node artifacts - 4 | build/ - 5 | prebuilds/ - 6 | node_modules/ - | - 7 | # Swift artifacts - 8 | .build/ - | - 9 | # Go artifacts - 10 | _obj/ - | - 11 | # Python artifacts - 12 | .venv/ - 13 | dist/ - 14 | *.egg-info - 15 | *.whl - | - 16 | # C artifacts - 17 | *.a - 18 | *.so - 19 | *.so.* - 20 | *.dylib - 21 | *.dll - 22 | *.pc - 23 | *.exp - 24 | *.lib - | - 25 | # Zig artifacts - 26 | .zig-cache/ - 27 | zig-cache/ - 28 | zig-out/ - | - 29 | # Example dirs - 30 | /examples/*/ - | - 31 | # Grammar volatiles - 32 | *.wasm - 33 | *.obj - 34 | *.o - | - 35 | # Archives - 36 | *.tar.gz - 37 | *.tgz - 38 | *.zip - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/go.mod: --------------------------------------------------------------------------------- - 1 | module PARSER_URL_STRIPPED - | - 2 | go 1.22 - | - 3 | require github.com/tree-sitter/go-tree-sitter v0.24.0 - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/grammar.js: --------------------------------------------------------------------------------- - 1 | /** - 2 | * @file PARSER_DESCRIPTION - 3 | * @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL - 4 | * @license PARSER_LICENSE - 5 | */ - | - 6 | /// - 7 | // @ts-check - | - 8 | export default grammar({ - 9 | name: "LOWER_PARSER_NAME", - | - 10 | rules: { - 11 | // TODO: add the actual grammar rules - 12 | source_file: $ => "hello" - 13 | } - 14 | }); - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/index.d.ts: --------------------------------------------------------------------------------- - 1 | type BaseNode = { - 2 | type: string; - 3 | named: boolean; - 4 | }; - | - 5 | type ChildNode = { - 6 | multiple: boolean; - 7 | required: boolean; - 8 | types: BaseNode[]; - 9 | }; - | - 10 | type NodeInfo = - 11 | | (BaseNode & { - 12 | subtypes: BaseNode[]; - 13 | }) - 14 | | (BaseNode & { - 15 | fields: { [name: string]: ChildNode }; - 16 | children: ChildNode[]; - 17 | }); - | - 18 | type Language = { - 19 | language: unknown; - 20 | nodeTypeInfo: NodeInfo[]; - 21 | }; - | - 22 | declare const language: Language; - 23 | export = language; - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/index.js: --------------------------------------------------------------------------------- - 1 | import { fileURLToPath } from "node:url"; - | - 2 | const root = fileURLToPath(new URL("../..", import.meta.url)); - | - 3 | const binding = typeof process.versions.bun === "string" - 4 | // Support `bun build --compile` by being statically analyzable enough to find the .node file at build-time - 5 | ? await import(`${root}/prebuilds/${process.platform}-${process.arch}/tree-sitter-KEBAB_PARSER_NAME.node`) - 6 | : (await import("node-gyp-build")).default(root); - | - 7 | try { - 8 | const nodeTypes = await import(`${root}/src/node-types.json`, {with: {type: "json"}}); - 9 | binding.nodeTypeInfo = nodeTypes.default; - 10 | } catch (_) {} - | - 11 | export default binding; - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/js-binding.cc: --------------------------------------------------------------------------------- - 1 | #include - | - 2 | typedef struct TSLanguage TSLanguage; - | - 3 | extern "C" TSLanguage *tree_sitter_PARSER_NAME(); - | - 4 | // "tree-sitter", "language" hashed with BLAKE2 - 5 | const napi_type_tag LANGUAGE_TYPE_TAG = { - 6 | 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 - 7 | }; - | - 8 | Napi::Object Init(Napi::Env env, Napi::Object exports) { - 9 | auto language = Napi::External::New(env, tree_sitter_PARSER_NAME()); - 10 | language.TypeTag(&LANGUAGE_TYPE_TAG); - 11 | exports["language"] = language; - 12 | return exports; - 13 | } - | - 14 | NODE_API_MODULE(tree_sitter_PARSER_NAME_binding, Init) - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/lib.rs: --------------------------------------------------------------------------------- - 1 | //! This crate provides TITLE_PARSER_NAME language support for the [tree-sitter] parsing library. - 2 | //! - 3 | //! Typically, you will use the [`LANGUAGE`] constant to add this language to a - 4 | //! tree-sitter [`Parser`], and then use the parser to parse some code: - 5 | //! - 6 | //! ``` - 7 | //! let code = r#" - 8 | //! "#; - 9 | //! let mut parser = tree_sitter::Parser::new(); - 10 | //! let language = tree_sitter_PARSER_NAME::LANGUAGE; - 11 | //! parser - 12 | //! .set_language(&language.into()) - 13 | //! .expect("Error loading TITLE_PARSER_NAME parser"); - 14 | //! let tree = parser.parse(code, None).unwrap(); - 15 | //! assert!(!tree.root_node().has_error()); - 16 | //! ``` - 17 | //! - 18 | //! [`Parser`]: https://docs.rs/tree-sitter/RUST_BINDING_VERSION/tree_sitter/struct.Parser.html - 19 | //! [tree-sitter]: https://tree-sitter.github.io/ - | - 20 | use tree_sitter_language::LanguageFn; - | - 21 | extern "C" { - 22 | fn tree_sitter_PARSER_NAME() -> *const (); - 23 | } - | - 24 | /// The tree-sitter [`LanguageFn`] for this grammar. - 25 | pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_PARSER_NAME) }; - | - 26 | /// The content of the [`node-types.json`] file for this grammar. - 27 | /// - 28 | /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types - 29 | pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); - | - 30 | // NOTE: uncomment these to include any queries that this grammar contains: - | - 31 | // pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); - 32 | // pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm"); - 33 | // pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm"); - 34 | // pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); - | - 35 | #[cfg(test)] - 36 | mod tests { - 37 | #[test] - 38 | fn test_can_load_grammar() { - 39 | let mut parser = tree_sitter::Parser::new(); - 40 | parser - 41 | .set_language(&super::LANGUAGE.into()) - 42 | .expect("Error loading TITLE_PARSER_NAME parser"); - 43 | } - 44 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/makefile: --------------------------------------------------------------------------------- - 1 | LANGUAGE_NAME := tree-sitter-KEBAB_PARSER_NAME - 2 | HOMEPAGE_URL := PARSER_URL - 3 | VERSION := PARSER_VERSION - | - 4 | # repository - 5 | SRC_DIR := src - | - 6 | TS ?= tree-sitter - | - 7 | # install directory layout - 8 | PREFIX ?= /usr/local - 9 | DATADIR ?= $(PREFIX)/share - 10 | INCLUDEDIR ?= $(PREFIX)/include - 11 | LIBDIR ?= $(PREFIX)/lib - 12 | BINDIR ?= $(PREFIX)/bin - 13 | PCLIBDIR ?= $(LIBDIR)/pkgconfig - | - 14 | # source/object files - 15 | PARSER := $(SRC_DIR)/parser.c - 16 | EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c)) - 17 | OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS)) - | - 18 | # flags - 19 | ARFLAGS ?= rcs - 20 | override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC - | - 21 | # ABI versioning - 22 | SONAME_MAJOR = $(shell sed -n 's/\#define LANGUAGE_VERSION //p' $(PARSER)) - 23 | SONAME_MINOR = $(word 1,$(subst ., ,$(VERSION))) - | - 24 | # OS-specific bits - 25 | MACHINE := $(shell $(CC) -dumpmachine) - | - 26 | ifneq ($(findstring darwin,$(MACHINE)),) - 27 | SOEXT = dylib - 28 | SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT) - 29 | SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT) - 30 | LINKSHARED = -dynamiclib -Wl,-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SOEXTVER),-rpath,@executable_path/../Frameworks - 31 | else ifneq ($(findstring mingw32,$(MACHINE)),) - 32 | SOEXT = dll - 33 | LINKSHARED += -s -shared -Wl,--out-implib,lib$(LANGUAGE_NAME).dll.a - 34 | else - 35 | SOEXT = so - 36 | SOEXTVER_MAJOR = $(SOEXT).$(SONAME_MAJOR) - 37 | SOEXTVER = $(SOEXT).$(SONAME_MAJOR).$(SONAME_MINOR) - 38 | LINKSHARED = -shared -Wl,-soname,lib$(LANGUAGE_NAME).$(SOEXTVER) - 39 | ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) - 40 | PCLIBDIR := $(PREFIX)/libdata/pkgconfig - 41 | endif - 42 | endif - | - 43 | all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc - | - 44 | lib$(LANGUAGE_NAME).a: $(OBJS) - 45 | $(AR) $(ARFLAGS) $@ $^ - | - 46 | lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) - 47 | $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ - 48 | ifneq ($(STRIP),) - 49 | $(STRIP) $@ - 50 | endif - | - 51 | ifneq ($(findstring mingw32,$(MACHINE)),) - 52 | lib$(LANGUAGE_NAME).dll.a: lib$(LANGUAGE_NAME).$(SOEXT) - 53 | endif - | - 54 | $(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in - 55 | sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \ - 56 | -e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR:$(PREFIX)/%=%)|' \ - 57 | -e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR:$(PREFIX)/%=%)|' \ - 58 | -e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \ - 59 | -e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \ - 60 | -e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@ - | - 61 | $(SRC_DIR)/grammar.json: grammar.js - 62 | $(TS) generate --emit=json $^ - | - 63 | $(PARSER): $(SRC_DIR)/grammar.json - 64 | $(TS) generate --emit=parser $^ - | - 65 | install: all - 66 | install -d '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)' - 67 | install -m644 bindings/c/tree_sitter/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h - 68 | install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc - 69 | install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a - 70 | install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) - 71 | ifneq ($(findstring mingw32,$(MACHINE)),) - 72 | install -d '$(DESTDIR)$(BINDIR)' - 73 | install -m755 lib$(LANGUAGE_NAME).dll '$(DESTDIR)$(BINDIR)'/lib$(LANGUAGE_NAME).dll - 74 | install -m755 lib$(LANGUAGE_NAME).dll.a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).dll.a - 75 | else - 76 | install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) - 77 | cd '$(DESTDIR)$(LIBDIR)' && ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) - 78 | cd '$(DESTDIR)$(LIBDIR)' && ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) lib$(LANGUAGE_NAME).$(SOEXT) - 79 | endif - 80 | ifneq ($(wildcard queries/*.scm),) - 81 | install -m644 queries/*.scm '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME - 82 | endif - | - 83 | uninstall: - 84 | $(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \ - 85 | '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \ - 86 | '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \ - 87 | '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \ - 88 | '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \ - 89 | '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc - 90 | $(RM) -r '$(DESTDIR)$(DATADIR)'/tree-sitter/queries/KEBAB_PARSER_NAME - | - 91 | clean: - 92 | $(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) lib$(LANGUAGE_NAME).dll.a - | - 93 | test: - 94 | $(TS) test - | - 95 | .PHONY: all install uninstall clean test - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/package.swift: --------------------------------------------------------------------------------- - 1 | // swift-tools-version:5.3 - | - 2 | import Foundation - 3 | import PackageDescription - | - 4 | var sources = ["src/parser.c"] - 5 | if FileManager.default.fileExists(atPath: "src/scanner.c") { - 6 | sources.append("src/scanner.c") - 7 | } - | - 8 | let package = Package( - 9 | name: "PARSER_CLASS_NAME", - 10 | products: [ - 11 | .library(name: "PARSER_CLASS_NAME", targets: ["PARSER_CLASS_NAME"]), - 12 | ], - 13 | dependencies: [ - 14 | .package(name: "SwiftTreeSitter", url: "https://github.com/tree-sitter/swift-tree-sitter", from: "0.9.0"), - 15 | ], - 16 | targets: [ - 17 | .target( - 18 | name: "PARSER_CLASS_NAME", - 19 | dependencies: [], - 20 | path: ".", - 21 | sources: sources, - 22 | resources: [ - 23 | .copy("queries") - 24 | ], - 25 | publicHeadersPath: "bindings/swift", - 26 | cSettings: [.headerSearchPath("src")] - 27 | ), - 28 | .testTarget( - 29 | name: "PARSER_CLASS_NAMETests", - 30 | dependencies: [ - 31 | "SwiftTreeSitter", - 32 | "PARSER_CLASS_NAME", - 33 | ], - 34 | path: "bindings/swift/PARSER_CLASS_NAMETests" - 35 | ) - 36 | ], - 37 | cLanguageStandard: .c11 - 38 | ) - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/PARSER_NAME.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_UPPER_PARSER_NAME_H_ - 2 | #define TREE_SITTER_UPPER_PARSER_NAME_H_ - | - 3 | typedef struct TSLanguage TSLanguage; - | - 4 | #ifdef __cplusplus - 5 | extern "C" { - 6 | #endif - | - 7 | const TSLanguage *tree_sitter_PARSER_NAME(void); - | - 8 | #ifdef __cplusplus - 9 | } - 10 | #endif - | - 11 | #endif // TREE_SITTER_UPPER_PARSER_NAME_H_ - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/PARSER_NAME.pc.in: --------------------------------------------------------------------------------- - 1 | prefix=@CMAKE_INSTALL_PREFIX@ - 2 | libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ - 3 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - | - 4 | Name: tree-sitter-PARSER_NAME - 5 | Description: @PROJECT_DESCRIPTION@ - 6 | URL: @PROJECT_HOMEPAGE_URL@ - 7 | Version: @PROJECT_VERSION@ - 8 | Libs: -L${libdir} -ltree-sitter-PARSER_NAME - 9 | Cflags: -I${includedir} - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/py-binding.c: --------------------------------------------------------------------------------- - 1 | #include - | - 2 | typedef struct TSLanguage TSLanguage; - | - 3 | TSLanguage *tree_sitter_LOWER_PARSER_NAME(void); - | - 4 | static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) { - 5 | return PyCapsule_New(tree_sitter_LOWER_PARSER_NAME(), "tree_sitter.Language", NULL); - 6 | } - | - 7 | static struct PyModuleDef_Slot slots[] = { - 8 | #ifdef Py_GIL_DISABLED - 9 | {Py_mod_gil, Py_MOD_GIL_NOT_USED}, - 10 | #endif - 11 | {0, NULL} - 12 | }; - | - 13 | static PyMethodDef methods[] = { - 14 | {"language", _binding_language, METH_NOARGS, - 15 | "Get the tree-sitter language for this grammar."}, - 16 | {NULL, NULL, 0, NULL} - 17 | }; - | - 18 | static struct PyModuleDef module = { - 19 | .m_base = PyModuleDef_HEAD_INIT, - 20 | .m_name = "_binding", - 21 | .m_doc = NULL, - 22 | .m_size = 0, - 23 | .m_methods = methods, - 24 | .m_slots = slots, - 25 | }; - | - 26 | PyMODINIT_FUNC PyInit__binding(void) { - 27 | return PyModuleDef_Init(&module); - 28 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/pyproject.toml: --------------------------------------------------------------------------------- - 1 | [build-system] - 2 | requires = ["setuptools>=62.4.0", "wheel"] - 3 | build-backend = "setuptools.build_meta" - | - 4 | [project] - 5 | name = "tree-sitter-PARSER_NAME" - 6 | description = "PARSER_DESCRIPTION" - 7 | version = "PARSER_VERSION" - 8 | keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"] - 9 | classifiers = [ - 10 | "Intended Audience :: Developers", - 11 | "Topic :: Software Development :: Compilers", - 12 | "Topic :: Text Processing :: Linguistic", - 13 | "Typing :: Typed", - 14 | ] - 15 | authors = [{ name = "PARSER_AUTHOR_NAME", email = "PARSER_AUTHOR_EMAIL" }] - 16 | requires-python = ">=3.10" - 17 | license.text = "PARSER_LICENSE" - 18 | readme = "README.md" - | - 19 | [project.urls] - 20 | Homepage = "PARSER_URL" - 21 | Funding = "FUNDING_URL" - | - 22 | [project.optional-dependencies] - 23 | core = ["tree-sitter~=0.24"] - | - 24 | [tool.cibuildwheel] - 25 | build = "cp310-*" - 26 | build-frontend = "build" - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/root.zig: --------------------------------------------------------------------------------- - 1 | extern fn tree_sitter_PARSER_NAME() callconv(.c) *const anyopaque; - | - 2 | pub fn language() *const anyopaque { - 3 | return tree_sitter_PARSER_NAME(); - 4 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/setup.py: --------------------------------------------------------------------------------- - 1 | from os import path - 2 | from sysconfig import get_config_var - | - 3 | from setuptools import Extension, find_packages, setup - 4 | from setuptools.command.build import build - 5 | from setuptools.command.build_ext import build_ext - 6 | from setuptools.command.egg_info import egg_info - 7 | from wheel.bdist_wheel import bdist_wheel - | - | - 8 | class Build(build): - 9 | def run(self): - 10 | if path.isdir("queries"): - 11 | dest = path.join(self.build_lib, "tree_sitter_PARSER_NAME", "queries") - 12 | self.copy_tree("queries", dest) - 13 | super().run() - | - | - 14 | class BuildExt(build_ext): - 15 | def build_extension(self, ext: Extension): - 16 | if self.compiler.compiler_type != "msvc": - 17 | ext.extra_compile_args = ["-std=c11", "-fvisibility=hidden"] - 18 | else: - 19 | ext.extra_compile_args = ["/std:c11", "/utf-8"] - 20 | if path.exists("src/scanner.c"): - 21 | ext.sources.append("src/scanner.c") - 22 | if ext.py_limited_api: - 23 | ext.define_macros.append(("Py_LIMITED_API", "0x030A0000")) - 24 | super().build_extension(ext) - | - | - 25 | class BdistWheel(bdist_wheel): - 26 | def get_tag(self): - 27 | python, abi, platform = super().get_tag() - 28 | if python.startswith("cp"): - 29 | python, abi = "cp310", "abi3" - 30 | return python, abi, platform - | - | - 31 | class EggInfo(egg_info): - 32 | def find_sources(self): - 33 | super().find_sources() - 34 | self.filelist.recursive_include("queries", "*.scm") - 35 | self.filelist.include("src/tree_sitter/*.h") - | - | - 36 | setup( - 37 | packages=find_packages("bindings/python"), - 38 | package_dir={"": "bindings/python"}, - 39 | package_data={ - 40 | "tree_sitter_LOWER_PARSER_NAME": ["*.pyi", "py.typed"], - 41 | "tree_sitter_LOWER_PARSER_NAME.queries": ["*.scm"], - 42 | }, - 43 | ext_package="tree_sitter_LOWER_PARSER_NAME", - 44 | ext_modules=[ - 45 | Extension( - 46 | name="_binding", - 47 | sources=[ - 48 | "bindings/python/tree_sitter_LOWER_PARSER_NAME/binding.c", - 49 | "src/parser.c", - 50 | ], - 51 | define_macros=[ - 52 | ("PY_SSIZE_T_CLEAN", None), - 53 | ("TREE_SITTER_HIDE_SYMBOLS", None), - 54 | ], - 55 | include_dirs=["src"], - 56 | py_limited_api=not get_config_var("Py_GIL_DISABLED"), - 57 | ) - 58 | ], - 59 | cmdclass={ - 60 | "build": Build, - 61 | "build_ext": BuildExt, - 62 | "bdist_wheel": BdistWheel, - 63 | "egg_info": EggInfo, - 64 | }, - 65 | zip_safe=False - 66 | ) - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/test_binding.py: --------------------------------------------------------------------------------- - 1 | from unittest import TestCase - | - 2 | from tree_sitter import Language, Parser - 3 | import tree_sitter_LOWER_PARSER_NAME - | - | - 4 | class TestLanguage(TestCase): - 5 | def test_can_load_grammar(self): - 6 | try: - 7 | Parser(Language(tree_sitter_LOWER_PARSER_NAME.language())) - 8 | except Exception: - 9 | self.fail("Error loading TITLE_PARSER_NAME grammar") - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/test.zig: --------------------------------------------------------------------------------- - 1 | const testing = @import("std").testing; - | - 2 | const ts = @import("tree-sitter"); - 3 | const root = @import("tree-sitter-PARSER_NAME"); - 4 | const Language = ts.Language; - 5 | const Parser = ts.Parser; - | - 6 | test "can load grammar" { - 7 | const parser = Parser.create(); - 8 | defer parser.destroy(); - | - 9 | const lang: *const ts.Language = Language.fromRaw(root.language()); - 10 | defer lang.destroy(); - | - 11 | try testing.expectEqual(void{}, parser.setLanguage(lang)); - 12 | try testing.expectEqual(lang, parser.getLanguage()); - 13 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/templates/tests.swift: --------------------------------------------------------------------------------- - 1 | import XCTest - 2 | import SwiftTreeSitter - 3 | import PARSER_CLASS_NAME - | - 4 | final class PARSER_CLASS_NAMETests: XCTestCase { - 5 | func testCanLoadGrammar() throws { - 6 | let parser = Parser() - 7 | let language = Language(language: tree_sitter_LOWER_PARSER_NAME()) - 8 | XCTAssertNoThrow(try parser.setLanguage(language), - 9 | "Error loading TITLE_PARSER_NAME grammar") - 10 | } - 11 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/test_highlight.rs: --------------------------------------------------------------------------------- - 1 | use std::{fs, path::Path}; - | - 2 | use anstyle::AnsiColor; - 3 | use anyhow::{anyhow, Result}; - 4 | use tree_sitter::Point; - 5 | use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter}; - 6 | use tree_sitter_loader::{Config, Loader}; - | - 7 | use crate::{ - 8 | logger::paint, - 9 | query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point}, - 10 | util, - 11 | }; - | - 12 | #[derive(Debug)] - 13 | pub struct Failure { - 14 | row: usize, - 15 | column: usize, - 16 | expected_highlight: String, - 17 | actual_highlights: Vec, - 18 | } - | - 19 | impl std::error::Error for Failure {} - | - 20 | impl std::fmt::Display for Failure { - 21 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - 22 | write!( - 23 | f, - 24 | "Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ", - 25 | self.row, self.column, self.expected_highlight - 26 | )?; - 27 | if self.actual_highlights.is_empty() { - 28 | write!(f, "none.")?; - 29 | } else { - 30 | for (i, actual_highlight) in self.actual_highlights.iter().enumerate() { - 31 | if i > 0 { - 32 | write!(f, ", ")?; - 33 | } - 34 | write!(f, "'{actual_highlight}'")?; - 35 | } - 36 | } - 37 | Ok(()) - 38 | } - 39 | } - | - 40 | pub fn test_highlights( - 41 | loader: &Loader, - 42 | loader_config: &Config, - 43 | highlighter: &mut Highlighter, - 44 | directory: &Path, - 45 | use_color: bool, - 46 | ) -> Result<()> { - 47 | println!("syntax highlighting:"); - 48 | test_highlights_indented(loader, loader_config, highlighter, directory, use_color, 2) - 49 | } - | - 50 | fn test_highlights_indented( - 51 | loader: &Loader, - 52 | loader_config: &Config, - 53 | highlighter: &mut Highlighter, - 54 | directory: &Path, - 55 | use_color: bool, - 56 | indent_level: usize, - 57 | ) -> Result<()> { - 58 | let mut failed = false; - | - 59 | for highlight_test_file in fs::read_dir(directory)? { - 60 | let highlight_test_file = highlight_test_file?; - 61 | let test_file_path = highlight_test_file.path(); - 62 | let test_file_name = highlight_test_file.file_name(); - 63 | print!( - 64 | "{indent:indent_level$}", - 65 | indent = "", - 66 | indent_level = indent_level * 2 - 67 | ); - 68 | if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() { - 69 | println!("{}:", test_file_name.to_string_lossy()); - 70 | if test_highlights_indented( - 71 | loader, - 72 | loader_config, - 73 | highlighter, - 74 | &test_file_path, - 75 | use_color, - 76 | indent_level + 1, - 77 | ) - 78 | .is_err() - 79 | { - 80 | failed = true; - 81 | } - 82 | } else { - 83 | let (language, language_config) = loader - 84 | .language_configuration_for_file_name(&test_file_path)? - 85 | .ok_or_else(|| { - 86 | anyhow!( - 87 | "{}", - 88 | util::lang_not_found_for_path(test_file_path.as_path(), loader_config) - 89 | ) - 90 | })?; - 91 | let highlight_config = language_config - 92 | .highlight_config(language, None)? - 93 | .ok_or_else(|| { - 94 | anyhow!( - 95 | "No highlighting config found for {}", - 96 | test_file_path.display() - 97 | ) - 98 | })?; - 99 | match test_highlight( - 100 | loader, - 101 | highlighter, - 102 | highlight_config, - 103 | fs::read(&test_file_path)?.as_slice(), - 104 | ) { - 105 | Ok(assertion_count) => { - 106 | println!( - 107 | "✓ {} ({assertion_count} assertions)", - 108 | paint( - 109 | use_color.then_some(AnsiColor::Green), - 110 | test_file_name.to_string_lossy().as_ref() - 111 | ), - 112 | ); - 113 | } - 114 | Err(e) => { - 115 | println!( - 116 | "✗ {}", - 117 | paint( - 118 | use_color.then_some(AnsiColor::Red), - 119 | test_file_name.to_string_lossy().as_ref() - 120 | ) - 121 | ); - 122 | println!( - 123 | "{indent:indent_level$} {e}", - 124 | indent = "", - 125 | indent_level = indent_level * 2 - 126 | ); - 127 | failed = true; - 128 | } - 129 | } - 130 | } - 131 | } - | - 132 | if failed { - 133 | Err(anyhow!("")) - 134 | } else { - 135 | Ok(()) - 136 | } - 137 | } - 138 | pub fn iterate_assertions( - 139 | assertions: &[Assertion], - 140 | highlights: &[(Utf8Point, Utf8Point, Highlight)], - 141 | highlight_names: &[String], - 142 | ) -> Result { - 143 | // Iterate through all of the highlighting assertions, checking each one against the - 144 | // actual highlights. - 145 | let mut i = 0; - 146 | let mut actual_highlights = Vec::new(); - 147 | for Assertion { - 148 | position, - 149 | length, - 150 | negative, - 151 | expected_capture_name: expected_highlight, - 152 | } in assertions - 153 | { - 154 | let mut passed = false; - 155 | let mut end_column = position.column + length - 1; - 156 | actual_highlights.clear(); - | - 157 | // The assertions are ordered by position, so skip past all of the highlights that - 158 | // end at or before this assertion's position. - 159 | 'highlight_loop: while let Some(highlight) = highlights.get(i) { - 160 | if highlight.1 <= *position { - 161 | i += 1; - 162 | continue; - 163 | } - | - 164 | // Iterate through all of the highlights that start at or before this assertion's - 165 | // position, looking for one that matches the assertion. - 166 | let mut j = i; - 167 | while let (false, Some(highlight)) = (passed, highlights.get(j)) { - 168 | end_column = position.column + length - 1; - 169 | if highlight.0.row >= position.row && highlight.0.column > end_column { - 170 | break 'highlight_loop; - 171 | } - | - 172 | // If the highlight matches the assertion, or if the highlight doesn't - 173 | // match the assertion but it's negative, this test passes. Otherwise, - 174 | // add this highlight to the list of actual highlights that span the - 175 | // assertion's position, in order to generate an error message in the event - 176 | // of a failure. - 177 | let highlight_name = &highlight_names[(highlight.2).0]; - 178 | if (*highlight_name == *expected_highlight) == *negative { - 179 | actual_highlights.push(highlight_name); - 180 | } else { - 181 | passed = true; - 182 | break 'highlight_loop; - 183 | } - | - 184 | j += 1; - 185 | } - 186 | } - | - 187 | if !passed { - 188 | return Err(Failure { - 189 | row: position.row, - 190 | column: end_column, - 191 | expected_highlight: expected_highlight.clone(), - 192 | actual_highlights: actual_highlights.into_iter().cloned().collect(), - 193 | } - 194 | .into()); - 195 | } - 196 | } - | - 197 | Ok(assertions.len()) - 198 | } - | - 199 | pub fn test_highlight( - 200 | loader: &Loader, - 201 | highlighter: &mut Highlighter, - 202 | highlight_config: &HighlightConfiguration, - 203 | source: &[u8], - 204 | ) -> Result { - 205 | // Highlight the file, and parse out all of the highlighting assertions. - 206 | let highlight_names = loader.highlight_names(); - 207 | let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; - 208 | let assertions = - 209 | parse_position_comments(highlighter.parser(), &highlight_config.language, source)?; - | - 210 | iterate_assertions(&assertions, &highlights, &highlight_names) - 211 | } - | - 212 | pub fn get_highlight_positions( - 213 | loader: &Loader, - 214 | highlighter: &mut Highlighter, - 215 | highlight_config: &HighlightConfiguration, - 216 | source: &[u8], - 217 | ) -> Result> { - 218 | let mut row = 0; - 219 | let mut column = 0; - 220 | let mut byte_offset = 0; - 221 | let mut was_newline = false; - 222 | let mut result = Vec::new(); - 223 | let mut highlight_stack = Vec::new(); - 224 | let source = String::from_utf8_lossy(source); - 225 | let mut char_indices = source.char_indices(); - 226 | for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| { - 227 | loader.highlight_config_for_injection_string(string) - 228 | })? { - 229 | match event? { - 230 | HighlightEvent::HighlightStart(h) => highlight_stack.push(h), - 231 | HighlightEvent::HighlightEnd => { - 232 | highlight_stack.pop(); - 233 | } - 234 | HighlightEvent::Source { start, end } => { - 235 | let mut start_position = Point::new(row, column); - 236 | while byte_offset < end { - 237 | if byte_offset <= start { - 238 | start_position = Point::new(row, column); - 239 | } - 240 | if let Some((i, c)) = char_indices.next() { - 241 | if was_newline { - 242 | row += 1; - 243 | column = 0; - 244 | } else { - 245 | column += i - byte_offset; - 246 | } - 247 | was_newline = c == '\n'; - 248 | byte_offset = i; - 249 | } else { - 250 | break; - 251 | } - 252 | } - 253 | if let Some(highlight) = highlight_stack.last() { - 254 | let utf8_start_position = to_utf8_point(start_position, source.as_bytes()); - 255 | let utf8_end_position = - 256 | to_utf8_point(Point::new(row, column), source.as_bytes()); - 257 | result.push((utf8_start_position, utf8_end_position, *highlight)); - 258 | } - 259 | } - 260 | } - 261 | } - 262 | Ok(result) - 263 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/test_tags.rs: --------------------------------------------------------------------------------- - 1 | use std::{fs, path::Path}; - | - 2 | use anstyle::AnsiColor; - 3 | use anyhow::{anyhow, Result}; - 4 | use tree_sitter_loader::{Config, Loader}; - 5 | use tree_sitter_tags::{TagsConfiguration, TagsContext}; - | - 6 | use crate::{ - 7 | logger::paint, - 8 | query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point}, - 9 | util, - 10 | }; - | - 11 | #[derive(Debug)] - 12 | pub struct Failure { - 13 | row: usize, - 14 | column: usize, - 15 | expected_tag: String, - 16 | actual_tags: Vec, - 17 | } - | - 18 | impl std::error::Error for Failure {} - | - 19 | impl std::fmt::Display for Failure { - 20 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - 21 | write!( - 22 | f, - 23 | "Failure - row: {}, column: {}, expected tag: '{}', actual tag: ", - 24 | self.row, self.column, self.expected_tag - 25 | )?; - 26 | if self.actual_tags.is_empty() { - 27 | write!(f, "none.")?; - 28 | } else { - 29 | for (i, actual_tag) in self.actual_tags.iter().enumerate() { - 30 | if i > 0 { - 31 | write!(f, ", ")?; - 32 | } - 33 | write!(f, "'{actual_tag}'")?; - 34 | } - 35 | } - 36 | Ok(()) - 37 | } - 38 | } - | - 39 | pub fn test_tags( - 40 | loader: &Loader, - 41 | loader_config: &Config, - 42 | tags_context: &mut TagsContext, - 43 | directory: &Path, - 44 | use_color: bool, - 45 | ) -> Result<()> { - 46 | println!("tags:"); - 47 | test_tags_indented(loader, loader_config, tags_context, directory, use_color, 2) - 48 | } - | - 49 | pub fn test_tags_indented( - 50 | loader: &Loader, - 51 | loader_config: &Config, - 52 | tags_context: &mut TagsContext, - 53 | directory: &Path, - 54 | use_color: bool, - 55 | indent_level: usize, - 56 | ) -> Result<()> { - 57 | let mut failed = false; - | - 58 | for tag_test_file in fs::read_dir(directory)? { - 59 | let tag_test_file = tag_test_file?; - 60 | let test_file_path = tag_test_file.path(); - 61 | let test_file_name = tag_test_file.file_name(); - 62 | print!( - 63 | "{indent:indent_level$}", - 64 | indent = "", - 65 | indent_level = indent_level * 2 - 66 | ); - 67 | if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() { - 68 | println!("{}:", test_file_name.to_string_lossy()); - 69 | if test_tags_indented( - 70 | loader, - 71 | loader_config, - 72 | tags_context, - 73 | &test_file_path, - 74 | use_color, - 75 | indent_level + 1, - 76 | ) - 77 | .is_err() - 78 | { - 79 | failed = true; - 80 | } - 81 | } else { - 82 | let (language, language_config) = loader - 83 | .language_configuration_for_file_name(&test_file_path)? - 84 | .ok_or_else(|| { - 85 | anyhow!( - 86 | "{}", - 87 | util::lang_not_found_for_path(test_file_path.as_path(), loader_config) - 88 | ) - 89 | })?; - 90 | let tags_config = language_config - 91 | .tags_config(language)? - 92 | .ok_or_else(|| anyhow!("No tags config found for {}", test_file_path.display()))?; - 93 | match test_tag( - 94 | tags_context, - 95 | tags_config, - 96 | fs::read(&test_file_path)?.as_slice(), - 97 | ) { - 98 | Ok(assertion_count) => { - 99 | println!( - 100 | "✓ {} ({assertion_count} assertions)", - 101 | paint( - 102 | use_color.then_some(AnsiColor::Green), - 103 | test_file_name.to_string_lossy().as_ref() - 104 | ), - 105 | ); - 106 | } - 107 | Err(e) => { - 108 | println!( - 109 | "✗ {}", - 110 | paint( - 111 | use_color.then_some(AnsiColor::Red), - 112 | test_file_name.to_string_lossy().as_ref() - 113 | ) - 114 | ); - 115 | println!( - 116 | "{indent:indent_level$} {e}", - 117 | indent = "", - 118 | indent_level = indent_level * 2 - 119 | ); - 120 | failed = true; - 121 | } - 122 | } - 123 | } - 124 | } - | - 125 | if failed { - 126 | Err(anyhow!("")) - 127 | } else { - 128 | Ok(()) - 129 | } - 130 | } - | - 131 | pub fn test_tag( - 132 | tags_context: &mut TagsContext, - 133 | tags_config: &TagsConfiguration, - 134 | source: &[u8], - 135 | ) -> Result { - 136 | let tags = get_tag_positions(tags_context, tags_config, source)?; - 137 | let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?; - | - 138 | // Iterate through all of the assertions, checking against the actual tags. - 139 | let mut i = 0; - 140 | let mut actual_tags = Vec::<&String>::new(); - 141 | for Assertion { - 142 | position, - 143 | length, - 144 | negative, - 145 | expected_capture_name: expected_tag, - 146 | } in &assertions - 147 | { - 148 | let mut passed = false; - 149 | let mut end_column = position.column + length - 1; - | - 150 | 'tag_loop: while let Some(tag) = tags.get(i) { - 151 | if tag.1 <= *position { - 152 | i += 1; - 153 | continue; - 154 | } - | - 155 | // Iterate through all of the tags that start at or before this assertion's - 156 | // position, looking for one that matches the assertion - 157 | let mut j = i; - 158 | while let (false, Some(tag)) = (passed, tags.get(j)) { - 159 | end_column = position.column + length - 1; - 160 | if tag.0.column > end_column { - 161 | break 'tag_loop; - 162 | } - | - 163 | let tag_name = &tag.2; - 164 | if (*tag_name == *expected_tag) == *negative { - 165 | actual_tags.push(tag_name); - 166 | } else { - 167 | passed = true; - 168 | break 'tag_loop; - 169 | } - | - 170 | j += 1; - 171 | if tag == tags.last().unwrap() { - 172 | break 'tag_loop; - 173 | } - 174 | } - 175 | } - | - 176 | if !passed { - 177 | return Err(Failure { - 178 | row: position.row, - 179 | column: end_column, - 180 | expected_tag: expected_tag.clone(), - 181 | actual_tags: actual_tags.into_iter().cloned().collect(), - 182 | } - 183 | .into()); - 184 | } - 185 | } - | - 186 | Ok(assertions.len()) - 187 | } - | - 188 | pub fn get_tag_positions( - 189 | tags_context: &mut TagsContext, - 190 | tags_config: &TagsConfiguration, - 191 | source: &[u8], - 192 | ) -> Result> { - 193 | let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?; - 194 | let tag_positions = tags_iter - 195 | .filter_map(std::result::Result::ok) - 196 | .map(|tag| { - 197 | let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string(); - 198 | let tag_name = if tag.is_definition { - 199 | format!("definition.{tag_postfix}") - 200 | } else { - 201 | format!("reference.{tag_postfix}") - 202 | }; - 203 | ( - 204 | to_utf8_point(tag.span.start, source), - 205 | to_utf8_point(tag.span.end, source), - 206 | tag_name, - 207 | ) - 208 | }) - 209 | .collect(); - 210 | Ok(tag_positions) - 211 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/test.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::BTreeMap, - 3 | ffi::OsStr, - 4 | fmt::Write as _, - 5 | fs, - 6 | io::{self, Write}, - 7 | path::{Path, PathBuf}, - 8 | str, - 9 | sync::LazyLock, - 10 | time::Duration, - 11 | }; - | - 12 | use anstyle::AnsiColor; - 13 | use anyhow::{anyhow, Context, Result}; - 14 | use clap::ValueEnum; - 15 | use indoc::indoc; - 16 | use regex::{ - 17 | bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}, - 18 | Regex, - 19 | }; - 20 | use similar::{ChangeTag, TextDiff}; - 21 | use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree}; - 22 | use walkdir::WalkDir; - | - 23 | use super::util; - 24 | use crate::{ - 25 | logger::paint, - 26 | parse::{ - 27 | render_cst, ParseDebugType, ParseFileOptions, ParseOutput, ParseStats, ParseTheme, Stats, - 28 | }, - 29 | }; - | - 30 | static HEADER_REGEX: LazyLock = LazyLock::new(|| { - 31 | ByteRegexBuilder::new( - 32 | r"^(?x) - 33 | (?P(?:=+){3,}) - 34 | (?P[^=\r\n][^\r\n]*)? - 35 | \r?\n - 36 | (?P(?:([^=\r\n]|\s+:)[^\r\n]*\r?\n)+) - 37 | ===+ - 38 | (?P[^=\r\n][^\r\n]*)?\r?\n", - 39 | ) - 40 | .multi_line(true) - 41 | .build() - 42 | .unwrap() - 43 | }); - | - 44 | static DIVIDER_REGEX: LazyLock = LazyLock::new(|| { - 45 | ByteRegexBuilder::new(r"^(?P(?:-+){3,})(?P[^-\r\n][^\r\n]*)?\r?\n") - 46 | .multi_line(true) - 47 | .build() - 48 | .unwrap() - 49 | }); - | - 50 | static COMMENT_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"(?m)^\s*;.*$").unwrap()); - | - 51 | static WHITESPACE_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"\s+").unwrap()); - | - 52 | static SEXP_FIELD_REGEX: LazyLock = LazyLock::new(|| Regex::new(r" \w+: \(").unwrap()); - | - 53 | static POINT_REGEX: LazyLock = - 54 | LazyLock::new(|| Regex::new(r"\s*\[\s*\d+\s*,\s*\d+\s*\]\s*").unwrap()); - | - 55 | #[derive(Debug, PartialEq, Eq)] - 56 | pub enum TestEntry { - 57 | Group { - 58 | name: String, - 59 | children: Vec, - 60 | file_path: Option, - 61 | }, - 62 | Example { - 63 | name: String, - 64 | input: Vec, - 65 | output: String, - 66 | header_delim_len: usize, - 67 | divider_delim_len: usize, - 68 | has_fields: bool, - 69 | attributes_str: String, - 70 | attributes: TestAttributes, - 71 | file_name: Option, - 72 | }, - 73 | } - | - 74 | #[derive(Debug, Clone, PartialEq, Eq)] - 75 | pub struct TestAttributes { - 76 | pub skip: bool, - 77 | pub platform: bool, - 78 | pub fail_fast: bool, - 79 | pub error: bool, - 80 | pub cst: bool, - 81 | pub languages: Vec>, - 82 | } - | - 83 | impl Default for TestEntry { - 84 | fn default() -> Self { - 85 | Self::Group { - 86 | name: String::new(), - 87 | children: Vec::new(), - 88 | file_path: None, - 89 | } - 90 | } - 91 | } - | - 92 | impl Default for TestAttributes { - 93 | fn default() -> Self { - 94 | Self { - 95 | skip: false, - 96 | platform: true, - 97 | fail_fast: false, - 98 | error: false, - 99 | cst: false, - 100 | languages: vec!["".into()], - 101 | } - 102 | } - 103 | } - | - 104 | #[derive(ValueEnum, Default, Copy, Clone, PartialEq, Eq)] - 105 | pub enum TestStats { - 106 | All, - 107 | #[default] - 108 | OutliersAndTotal, - 109 | TotalOnly, - 110 | } - | - 111 | pub struct TestOptions<'a> { - 112 | pub output: &'a mut String, - 113 | pub path: PathBuf, - 114 | pub debug: bool, - 115 | pub debug_graph: bool, - 116 | pub include: Option, - 117 | pub exclude: Option, - 118 | pub file_name: Option, - 119 | pub update: bool, - 120 | pub open_log: bool, - 121 | pub languages: BTreeMap<&'a str, &'a Language>, - 122 | pub color: bool, - 123 | pub test_num: usize, - 124 | /// Whether a test ran for the nth line in `output`, the true parse rate, and the adjusted - 125 | /// parse rate - 126 | pub parse_rates: &'a mut Vec<(bool, Option<(f64, f64)>)>, - 127 | pub stat_display: TestStats, - 128 | pub stats: &'a mut Stats, - 129 | pub show_fields: bool, - 130 | pub overview_only: bool, - 131 | } - | - 132 | pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<()> { - 133 | let test_entry = parse_tests(&opts.path)?; - 134 | let mut _log_session = None; - | - 135 | if opts.debug_graph { - 136 | _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?); - 137 | } else if opts.debug { - 138 | parser.set_logger(Some(Box::new(|log_type, message| { - 139 | if log_type == LogType::Lex { - 140 | io::stderr().write_all(b" ").unwrap(); - 141 | } - 142 | writeln!(&mut io::stderr(), "{message}").unwrap(); - 143 | }))); - 144 | } - | - 145 | let mut failures = Vec::new(); - 146 | let mut corrected_entries = Vec::new(); - 147 | let mut has_parse_errors = false; - 148 | run_tests( - 149 | parser, - 150 | test_entry, - 151 | opts, - 152 | 0, - 153 | &mut failures, - 154 | &mut corrected_entries, - 155 | &mut has_parse_errors, - 156 | )?; - | - 157 | let (count, total_adj_parse_time) = opts - 158 | .parse_rates - 159 | .iter() - 160 | .flat_map(|(_, rates)| rates) - 161 | .fold((0usize, 0.0f64), |(count, rate_accum), (_, adj_rate)| { - 162 | (count + 1, rate_accum + adj_rate) - 163 | }); - | - 164 | let avg = total_adj_parse_time / count as f64; - 165 | let std_dev = { - 166 | let variance = opts - 167 | .parse_rates - 168 | .iter() - 169 | .flat_map(|(_, rates)| rates) - 170 | .map(|(_, rate_i)| (rate_i - avg).powi(2)) - 171 | .sum::() - 172 | / count as f64; - 173 | variance.sqrt() - 174 | }; - | - 175 | for ((is_test, rates), out_line) in opts.parse_rates.iter().zip(opts.output.lines()) { - 176 | let stat_display = if !is_test { - 177 | // Test group, no actual parsing took place - 178 | String::new() - 179 | } else { - 180 | match (opts.stat_display, rates) { - 181 | (TestStats::TotalOnly, _) | (_, None) => String::new(), - 182 | (display, Some((true_rate, adj_rate))) => { - 183 | let mut stats = if display == TestStats::All { - 184 | format!(" ({true_rate:.3} bytes/ms)") - 185 | } else { - 186 | String::new() - 187 | }; - 188 | // 3 standard deviations below the mean, aka the "Empirical Rule" - 189 | if *adj_rate < 3.0f64.mul_add(-std_dev, avg) { - 190 | stats += &paint( - 191 | opts.color.then_some(AnsiColor::Yellow), - 192 | &format!(" -- Warning: Slow parse rate ({true_rate:.3} bytes/ms)"), - 193 | ); - 194 | } - 195 | stats - 196 | } - 197 | } - 198 | }; - 199 | println!("{out_line}{stat_display}"); - 200 | } - | - 201 | parser.stop_printing_dot_graphs(); - | - 202 | if failures.is_empty() { - 203 | Ok(()) - 204 | } else if opts.update && !has_parse_errors { - 205 | println!( - 206 | "\n{} update{}:\n", - 207 | failures.len(), - 208 | if failures.len() == 1 { "" } else { "s" } - 209 | ); - | - 210 | for (i, TestFailure { name, .. }) in failures.iter().enumerate() { - 211 | println!(" {}. {name}", i + 1); - 212 | } - | - 213 | Ok(()) - 214 | } else { - 215 | has_parse_errors = opts.update && has_parse_errors; - | - 216 | if !opts.overview_only { - 217 | if !has_parse_errors { - 218 | println!( - 219 | "\n{} failure{}:", - 220 | failures.len(), - 221 | if failures.len() == 1 { "" } else { "s" } - 222 | ); - 223 | } - | - 224 | if opts.color { - 225 | print_diff_key(); - 226 | } - 227 | for ( - 228 | i, - 229 | TestFailure { - 230 | name, - 231 | actual, - 232 | expected, - 233 | is_cst, - 234 | }, - 235 | ) in failures.iter().enumerate() - 236 | { - 237 | if expected == "NO ERROR" { - 238 | println!("\n {}. {name}:\n", i + 1); - 239 | println!(" Expected an ERROR node, but got:"); - 240 | let actual = if *is_cst { - 241 | actual - 242 | } else { - 243 | &format_sexp(actual, 2) - 244 | }; - 245 | println!(" {}", paint(opts.color.then_some(AnsiColor::Red), actual)); - 246 | } else { - 247 | println!("\n {}. {name}:", i + 1); - 248 | if *is_cst { - 249 | print_diff(actual, expected, opts.color); - 250 | } else { - 251 | print_diff( - 252 | &format_sexp(actual, 2), - 253 | &format_sexp(expected, 2), - 254 | opts.color, - 255 | ); - 256 | } - 257 | } - 258 | } - 259 | } else { - 260 | println!(); - 261 | } - | - 262 | if has_parse_errors { - 263 | Err(anyhow!(indoc! {" - 264 | Some tests failed to parse with unexpected `ERROR` or `MISSING` nodes, as shown above, and cannot be updated automatically. - 265 | Either fix the grammar or manually update the tests if this is expected."})) - 266 | } else { - 267 | Err(anyhow!("")) - 268 | } - 269 | } - 270 | } - | - 271 | pub fn check_queries_at_path(language: &Language, path: &Path) -> Result<()> { - 272 | if path.exists() { - 273 | for entry in WalkDir::new(path) - 274 | .into_iter() - 275 | .filter_map(std::result::Result::ok) - 276 | .filter(|e| { - 277 | e.file_type().is_file() - 278 | && e.path().extension().and_then(OsStr::to_str) == Some("scm") - 279 | && !e.path().starts_with(".") - 280 | }) - 281 | { - 282 | let filepath = entry.file_name().to_str().unwrap_or(""); - 283 | let content = fs::read_to_string(entry.path()) - 284 | .with_context(|| format!("Error reading query file {filepath:?}"))?; - 285 | Query::new(language, &content) - 286 | .with_context(|| format!("Error in query file {filepath:?}"))?; - 287 | } - 288 | } - 289 | Ok(()) - 290 | } - | - 291 | pub fn print_diff_key() { - 292 | println!( - 293 | "\ncorrect / {} / {}", - 294 | paint(Some(AnsiColor::Green), "expected"), - 295 | paint(Some(AnsiColor::Red), "unexpected") - 296 | ); - 297 | } - | - 298 | pub fn print_diff(actual: &str, expected: &str, use_color: bool) { - 299 | let diff = TextDiff::from_lines(actual, expected); - 300 | for diff in diff.iter_all_changes() { - 301 | match diff.tag() { - 302 | ChangeTag::Equal => { - 303 | if use_color { - 304 | print!("{diff}"); - 305 | } else { - 306 | print!(" {diff}"); - 307 | } - 308 | } - 309 | ChangeTag::Insert => { - 310 | if use_color { - 311 | print!("{}", paint(Some(AnsiColor::Green), diff.as_str().unwrap())); - 312 | } else { - 313 | print!("+{diff}"); - 314 | } - 315 | if diff.missing_newline() { - 316 | println!(); - 317 | } - 318 | } - 319 | ChangeTag::Delete => { - 320 | if use_color { - 321 | print!("{}", paint(Some(AnsiColor::Red), diff.as_str().unwrap())); - 322 | } else { - 323 | print!("-{diff}"); - 324 | } - 325 | if diff.missing_newline() { - 326 | println!(); - 327 | } - 328 | } - 329 | } - 330 | } - | - 331 | println!(); - 332 | } - | - 333 | struct TestFailure { - 334 | name: String, - 335 | actual: String, - 336 | expected: String, - 337 | is_cst: bool, - 338 | } - | - 339 | impl TestFailure { - 340 | fn new(name: T, actual: U, expected: V, is_cst: bool) -> Self - 341 | where - 342 | T: Into, - 343 | U: Into, - 344 | V: Into, - 345 | { - 346 | Self { - 347 | name: name.into(), - 348 | actual: actual.into(), - 349 | expected: expected.into(), - 350 | is_cst, - 351 | } - 352 | } - 353 | } - | - 354 | struct TestCorrection { - 355 | name: String, - 356 | input: String, - 357 | output: String, - 358 | attributes_str: String, - 359 | header_delim_len: usize, - 360 | divider_delim_len: usize, - 361 | } - | - 362 | impl TestCorrection { - 363 | fn new( - 364 | name: T, - 365 | input: U, - 366 | output: V, - 367 | attributes_str: W, - 368 | header_delim_len: usize, - 369 | divider_delim_len: usize, - 370 | ) -> Self - 371 | where - 372 | T: Into, - 373 | U: Into, - 374 | V: Into, - 375 | W: Into, - 376 | { - 377 | Self { - 378 | name: name.into(), - 379 | input: input.into(), - 380 | output: output.into(), - 381 | attributes_str: attributes_str.into(), - 382 | header_delim_len, - 383 | divider_delim_len, - 384 | } - 385 | } - 386 | } - | - 387 | /// This will return false if we want to "fail fast". It will bail and not parse any more tests. - 388 | #[allow(clippy::too_many_arguments)] - 389 | fn run_tests( - 390 | parser: &mut Parser, - 391 | test_entry: TestEntry, - 392 | opts: &mut TestOptions, - 393 | mut indent_level: u32, - 394 | failures: &mut Vec, - 395 | corrected_entries: &mut Vec, - 396 | has_parse_errors: &mut bool, - 397 | ) -> Result { - 398 | match test_entry { - 399 | TestEntry::Example { - 400 | name, - 401 | input, - 402 | output, - 403 | header_delim_len, - 404 | divider_delim_len, - 405 | has_fields, - 406 | attributes_str, - 407 | attributes, - 408 | .. - 409 | } => { - 410 | write!(opts.output, "{}", " ".repeat(indent_level as usize))?; - | - 411 | if attributes.skip { - 412 | writeln!( - 413 | opts.output, - 414 | "{:>3}. ⌀ {}", - 415 | opts.test_num, - 416 | paint(opts.color.then_some(AnsiColor::Yellow), &name), - 417 | )?; - 418 | opts.parse_rates.push((true, None)); - 419 | opts.test_num += 1; - 420 | return Ok(true); - 421 | } - | - 422 | if !attributes.platform { - 423 | writeln!( - 424 | opts.output, - 425 | "{:>3}. ⌀ {}", - 426 | opts.test_num, - 427 | paint(opts.color.then_some(AnsiColor::Magenta), &name), - 428 | )?; - 429 | opts.parse_rates.push((true, None)); - 430 | opts.test_num += 1; - 431 | return Ok(true); - 432 | } - | - 433 | for (i, language_name) in attributes.languages.iter().enumerate() { - 434 | if !language_name.is_empty() { - 435 | let language = opts - 436 | .languages - 437 | .get(language_name.as_ref()) - 438 | .ok_or_else(|| anyhow!("Language not found: {language_name}"))?; - 439 | parser.set_language(language)?; - 440 | } - 441 | let start = std::time::Instant::now(); - 442 | let tree = parser.parse(&input, None).unwrap(); - 443 | { - 444 | let parse_time = start.elapsed(); - 445 | let true_parse_rate = tree.root_node().byte_range().len() as f64 - 446 | / (parse_time.as_nanos() as f64 / 1_000_000.0); - 447 | let adj_parse_rate = adjusted_parse_rate(&tree, parse_time); - | - 448 | opts.parse_rates - 449 | .push((true, Some((true_parse_rate, adj_parse_rate)))); - 450 | opts.stats.total_parses += 1; - 451 | opts.stats.total_duration += parse_time; - 452 | opts.stats.total_bytes += tree.root_node().byte_range().len(); - 453 | } - | - 454 | if attributes.error { - 455 | if tree.root_node().has_error() { - 456 | writeln!( - 457 | opts.output, - 458 | "{:>3}. ✓ {}", - 459 | opts.test_num, - 460 | paint(opts.color.then_some(AnsiColor::Green), &name), - 461 | )?; - 462 | opts.stats.successful_parses += 1; - 463 | if opts.update { - 464 | let input = String::from_utf8(input.clone()).unwrap(); - 465 | let output = if attributes.cst { - 466 | output.clone() - 467 | } else { - 468 | format_sexp(&output, 0) - 469 | }; - 470 | corrected_entries.push(TestCorrection::new( - 471 | &name, - 472 | input, - 473 | output, - 474 | &attributes_str, - 475 | header_delim_len, - 476 | divider_delim_len, - 477 | )); - 478 | } - 479 | } else { - 480 | if opts.update { - 481 | let input = String::from_utf8(input.clone()).unwrap(); - 482 | // Keep the original `expected` output if the actual output has no error - 483 | let output = if attributes.cst { - 484 | output.clone() - 485 | } else { - 486 | format_sexp(&output, 0) - 487 | }; - 488 | corrected_entries.push(TestCorrection::new( - 489 | &name, - 490 | input, - 491 | output, - 492 | &attributes_str, - 493 | header_delim_len, - 494 | divider_delim_len, - 495 | )); - 496 | } - 497 | writeln!( - 498 | opts.output, - 499 | "{:>3}. ✗ {}", - 500 | opts.test_num, - 501 | paint(opts.color.then_some(AnsiColor::Red), &name), - 502 | )?; - 503 | let actual = if attributes.cst { - 504 | render_test_cst(&input, &tree)? - 505 | } else { - 506 | tree.root_node().to_sexp() - 507 | }; - 508 | failures.push(TestFailure::new(&name, actual, "NO ERROR", attributes.cst)); - 509 | } - | - 510 | if attributes.fail_fast { - 511 | return Ok(false); - 512 | } - 513 | } else { - 514 | let mut actual = if attributes.cst { - 515 | render_test_cst(&input, &tree)? - 516 | } else { - 517 | tree.root_node().to_sexp() - 518 | }; - 519 | if !(attributes.cst || opts.show_fields || has_fields) { - 520 | actual = strip_sexp_fields(&actual); - 521 | } - | - 522 | if actual == output { - 523 | writeln!( - 524 | opts.output, - 525 | "{:>3}. ✓ {}", - 526 | opts.test_num, - 527 | paint(opts.color.then_some(AnsiColor::Green), &name), - 528 | )?; - 529 | opts.stats.successful_parses += 1; - 530 | if opts.update { - 531 | let input = String::from_utf8(input.clone()).unwrap(); - 532 | let output = if attributes.cst { - 533 | actual - 534 | } else { - 535 | format_sexp(&output, 0) - 536 | }; - 537 | corrected_entries.push(TestCorrection::new( - 538 | &name, - 539 | input, - 540 | output, - 541 | &attributes_str, - 542 | header_delim_len, - 543 | divider_delim_len, - 544 | )); - 545 | } - 546 | } else { - 547 | if opts.update { - 548 | let input = String::from_utf8(input.clone()).unwrap(); - 549 | let (expected_output, actual_output) = if attributes.cst { - 550 | (output.clone(), actual.clone()) - 551 | } else { - 552 | (format_sexp(&output, 0), format_sexp(&actual, 0)) - 553 | }; - | - 554 | // Only bail early before updating if the actual is not the output, - 555 | // sometimes users want to test cases that - 556 | // are intended to have errors, hence why this - 557 | // check isn't shown above - 558 | if actual.contains("ERROR") || actual.contains("MISSING") { - 559 | *has_parse_errors = true; - | - 560 | // keep the original `expected` output if the actual output has an - 561 | // error - 562 | corrected_entries.push(TestCorrection::new( - 563 | &name, - 564 | input, - 565 | expected_output, - 566 | &attributes_str, - 567 | header_delim_len, - 568 | divider_delim_len, - 569 | )); - 570 | } else { - 571 | corrected_entries.push(TestCorrection::new( - 572 | &name, - 573 | input, - 574 | actual_output, - 575 | &attributes_str, - 576 | header_delim_len, - 577 | divider_delim_len, - 578 | )); - 579 | writeln!( - 580 | opts.output, - 581 | "{:>3}. ✓ {}", - 582 | opts.test_num, - 583 | paint(opts.color.then_some(AnsiColor::Blue), &name), - 584 | )?; - 585 | } - 586 | } else { - 587 | writeln!( - 588 | opts.output, - 589 | "{:>3}. ✗ {}", - 590 | opts.test_num, - 591 | paint(opts.color.then_some(AnsiColor::Red), &name), - 592 | )?; - 593 | } - 594 | failures.push(TestFailure::new(&name, actual, &output, attributes.cst)); - | - 595 | if attributes.fail_fast { - 596 | return Ok(false); - 597 | } - 598 | } - 599 | } - | - 600 | if i == attributes.languages.len() - 1 { - 601 | // reset to the first language - 602 | parser.set_language(opts.languages.values().next().unwrap())?; - 603 | } - 604 | } - 605 | opts.test_num += 1; - 606 | } - 607 | TestEntry::Group { - 608 | name, - 609 | children, - 610 | file_path, - 611 | } => { - 612 | if children.is_empty() { - 613 | return Ok(true); - 614 | } - | - 615 | indent_level += 1; - 616 | let failure_count = failures.len(); - 617 | let mut has_printed = false; - | - 618 | let matches_filter = |name: &str, file_name: &Option, opts: &TestOptions| { - 619 | if let (Some(test_file_path), Some(filter_file_name)) = (file_name, &opts.file_name) - 620 | { - 621 | if !filter_file_name.eq(test_file_path) { - 622 | return false; - 623 | } - 624 | } - 625 | if let Some(include) = &opts.include { - 626 | include.is_match(name) - 627 | } else if let Some(exclude) = &opts.exclude { - 628 | !exclude.is_match(name) - 629 | } else { - 630 | true - 631 | } - 632 | }; - | - 633 | let should_skip = |entry: &TestEntry, opts: &TestOptions| match entry { - 634 | TestEntry::Example { - 635 | name, file_name, .. - 636 | } => !matches_filter(name, file_name, opts), - 637 | TestEntry::Group { .. } => false, - 638 | }; - | - 639 | for child in children { - 640 | if let TestEntry::Example { - 641 | ref name, - 642 | ref input, - 643 | ref output, - 644 | ref attributes_str, - 645 | header_delim_len, - 646 | divider_delim_len, - 647 | .. - 648 | } = child - 649 | { - 650 | if should_skip(&child, opts) { - 651 | let input = String::from_utf8(input.clone()).unwrap(); - 652 | let output = format_sexp(output, 0); - 653 | corrected_entries.push(TestCorrection::new( - 654 | name, - 655 | input, - 656 | output, - 657 | attributes_str, - 658 | header_delim_len, - 659 | divider_delim_len, - 660 | )); - | - 661 | opts.test_num += 1; - | - 662 | continue; - 663 | } - 664 | } - 665 | if !has_printed && indent_level > 1 { - 666 | has_printed = true; - 667 | writeln!( - 668 | opts.output, - 669 | "{}{name}:", - 670 | " ".repeat((indent_level - 1) as usize) - 671 | )?; - 672 | opts.parse_rates.push((false, None)); - 673 | } - 674 | if !run_tests( - 675 | parser, - 676 | child, - 677 | opts, - 678 | indent_level, - 679 | failures, - 680 | corrected_entries, - 681 | has_parse_errors, - 682 | )? { - 683 | // fail fast - 684 | return Ok(false); - 685 | } - 686 | } - | - 687 | if let Some(file_path) = file_path { - 688 | if opts.update && failures.len() - failure_count > 0 { - 689 | write_tests(&file_path, corrected_entries)?; - 690 | } - 691 | corrected_entries.clear(); - 692 | } - 693 | } - 694 | } - 695 | Ok(true) - 696 | } - | - 697 | /// Convenience wrapper to render a CST for a test entry. - 698 | fn render_test_cst(input: &[u8], tree: &Tree) -> Result { - 699 | let mut rendered_cst: Vec = Vec::new(); - 700 | let mut cursor = tree.walk(); - 701 | let opts = ParseFileOptions { - 702 | edits: &[], - 703 | output: ParseOutput::Cst, - 704 | stats: &mut ParseStats::default(), - 705 | print_time: false, - 706 | timeout: 0, - 707 | debug: ParseDebugType::Quiet, - 708 | debug_graph: false, - 709 | cancellation_flag: None, - 710 | encoding: None, - 711 | open_log: false, - 712 | no_ranges: false, - 713 | parse_theme: &ParseTheme::empty(), - 714 | }; - 715 | render_cst(input, tree, &mut cursor, &opts, &mut rendered_cst)?; - 716 | Ok(String::from_utf8_lossy(&rendered_cst).trim().to_string()) - 717 | } - | - 718 | // Parse time is interpreted in ns before converting to ms to avoid truncation issues - 719 | // Parse rates often have several outliers, leading to a large standard deviation. Taking - 720 | // the log of these rates serves to "flatten" out the distribution, yielding a more - 721 | // usable standard deviation for finding statistically significant slow parse rates - 722 | // NOTE: This is just a heuristic - 723 | #[must_use] - 724 | pub fn adjusted_parse_rate(tree: &Tree, parse_time: Duration) -> f64 { - 725 | f64::ln( - 726 | tree.root_node().byte_range().len() as f64 / (parse_time.as_nanos() as f64 / 1_000_000.0), - 727 | ) - 728 | } - | - 729 | fn write_tests(file_path: &Path, corrected_entries: &[TestCorrection]) -> Result<()> { - 730 | let mut buffer = fs::File::create(file_path)?; - 731 | write_tests_to_buffer(&mut buffer, corrected_entries) - 732 | } - | - 733 | fn write_tests_to_buffer( - 734 | buffer: &mut impl Write, - 735 | corrected_entries: &[TestCorrection], - 736 | ) -> Result<()> { - 737 | for ( - 738 | i, - 739 | TestCorrection { - 740 | name, - 741 | input, - 742 | output, - 743 | attributes_str, - 744 | header_delim_len, - 745 | divider_delim_len, - 746 | }, - 747 | ) in corrected_entries.iter().enumerate() - 748 | { - 749 | if i > 0 { - 750 | writeln!(buffer)?; - 751 | } - 752 | writeln!( - 753 | buffer, - 754 | "{}\n{name}\n{}{}\n{input}\n{}\n\n{}", - 755 | "=".repeat(*header_delim_len), - 756 | if attributes_str.is_empty() { - 757 | attributes_str.clone() - 758 | } else { - 759 | format!("{attributes_str}\n") - 760 | }, - 761 | "=".repeat(*header_delim_len), - 762 | "-".repeat(*divider_delim_len), - 763 | output.trim() - 764 | )?; - 765 | } - 766 | Ok(()) - 767 | } - | - 768 | pub fn parse_tests(path: &Path) -> io::Result { - 769 | let name = path - 770 | .file_stem() - 771 | .and_then(|s| s.to_str()) - 772 | .unwrap_or("") - 773 | .to_string(); - 774 | if path.is_dir() { - 775 | let mut children = Vec::new(); - 776 | for entry in fs::read_dir(path)? { - 777 | let entry = entry?; - 778 | let hidden = entry.file_name().to_str().unwrap_or("").starts_with('.'); - 779 | if !hidden { - 780 | children.push(entry.path()); - 781 | } - 782 | } - 783 | children.sort_by(|a, b| { - 784 | a.file_name() - 785 | .unwrap_or_default() - 786 | .cmp(b.file_name().unwrap_or_default()) - 787 | }); - 788 | let children = children - 789 | .iter() - 790 | .map(|path| parse_tests(path)) - 791 | .collect::>>()?; - 792 | Ok(TestEntry::Group { - 793 | name, - 794 | children, - 795 | file_path: None, - 796 | }) - 797 | } else { - 798 | let content = fs::read_to_string(path)?; - 799 | Ok(parse_test_content(name, &content, Some(path.to_path_buf()))) - 800 | } - 801 | } - | - 802 | #[must_use] - 803 | pub fn strip_sexp_fields(sexp: &str) -> String { - 804 | SEXP_FIELD_REGEX.replace_all(sexp, " (").to_string() - 805 | } - | - 806 | #[must_use] - 807 | pub fn strip_points(sexp: &str) -> String { - 808 | POINT_REGEX.replace_all(sexp, "").to_string() - 809 | } - | - 810 | fn parse_test_content(name: String, content: &str, file_path: Option) -> TestEntry { - 811 | let mut children = Vec::new(); - 812 | let bytes = content.as_bytes(); - 813 | let mut prev_name = String::new(); - 814 | let mut prev_attributes_str = String::new(); - 815 | let mut prev_header_end = 0; - | - 816 | // Find the first test header in the file, and determine if it has a - 817 | // custom suffix. If so, then this suffix will be used to identify - 818 | // all subsequent headers and divider lines in the file. - 819 | let first_suffix = HEADER_REGEX - 820 | .captures(bytes) - 821 | .and_then(|c| c.name("suffix1")) - 822 | .map(|m| String::from_utf8_lossy(m.as_bytes())); - | - 823 | // Find all of the `===` test headers, which contain the test names. - 824 | // Ignore any matches whose suffix does not match the first header - 825 | // suffix in the file. - 826 | let header_matches = HEADER_REGEX.captures_iter(bytes).filter_map(|c| { - 827 | let header_delim_len = c.name("equals").map_or(80, |m| m.as_bytes().len()); - 828 | let suffix1 = c - 829 | .name("suffix1") - 830 | .map(|m| String::from_utf8_lossy(m.as_bytes())); - 831 | let suffix2 = c - 832 | .name("suffix2") - 833 | .map(|m| String::from_utf8_lossy(m.as_bytes())); - | - 834 | let (mut skip, mut platform, mut fail_fast, mut error, mut cst, mut languages) = - 835 | (false, None, false, false, false, vec![]); - | - 836 | let test_name_and_markers = c - 837 | .name("test_name_and_markers") - 838 | .map_or("".as_bytes(), |m| m.as_bytes()); - | - 839 | let mut test_name = String::new(); - 840 | let mut attributes_str = String::new(); - | - 841 | let mut seen_marker = false; - | - 842 | let test_name_and_markers = str::from_utf8(test_name_and_markers).unwrap(); - 843 | for line in test_name_and_markers - 844 | .split_inclusive('\n') - 845 | .filter(|s| !s.is_empty()) - 846 | { - 847 | let trimmed_line = line.trim(); - 848 | match trimmed_line.split('(').next().unwrap() { - 849 | ":skip" => (seen_marker, skip) = (true, true), - 850 | ":platform" => { - 851 | if let Some(platforms) = trimmed_line.strip_prefix(':').and_then(|s| { - 852 | s.strip_prefix("platform(") - 853 | .and_then(|s| s.strip_suffix(')')) - 854 | }) { - 855 | seen_marker = true; - 856 | platform = Some( - 857 | platform.unwrap_or(false) || platforms.trim() == std::env::consts::OS, - 858 | ); - 859 | } - 860 | } - 861 | ":fail-fast" => (seen_marker, fail_fast) = (true, true), - 862 | ":error" => (seen_marker, error) = (true, true), - 863 | ":language" => { - 864 | if let Some(lang) = trimmed_line.strip_prefix(':').and_then(|s| { - 865 | s.strip_prefix("language(") - 866 | .and_then(|s| s.strip_suffix(')')) - 867 | }) { - 868 | seen_marker = true; - 869 | languages.push(lang.into()); - 870 | } - 871 | } - 872 | ":cst" => (seen_marker, cst) = (true, true), - 873 | _ if !seen_marker => { - 874 | test_name.push_str(line); - 875 | } - 876 | _ => {} - 877 | } - 878 | } - 879 | attributes_str.push_str(test_name_and_markers.strip_prefix(&test_name).unwrap()); - | - 880 | // prefer skip over error, both shouldn't be set - 881 | if skip { - 882 | error = false; - 883 | } - | - 884 | // add a default language if none are specified, will defer to the first language - 885 | if languages.is_empty() { - 886 | languages.push("".into()); - 887 | } - | - 888 | if suffix1 == first_suffix && suffix2 == first_suffix { - 889 | let header_range = c.get(0).unwrap().range(); - 890 | let test_name = if test_name.is_empty() { - 891 | None - 892 | } else { - 893 | Some(test_name.trim_end().to_string()) - 894 | }; - 895 | let attributes_str = if attributes_str.is_empty() { - 896 | None - 897 | } else { - 898 | Some(attributes_str.trim_end().to_string()) - 899 | }; - 900 | Some(( - 901 | header_delim_len, - 902 | header_range, - 903 | test_name, - 904 | attributes_str, - 905 | TestAttributes { - 906 | skip, - 907 | platform: platform.unwrap_or(true), - 908 | fail_fast, - 909 | error, - 910 | cst, - 911 | languages, - 912 | }, - 913 | )) - 914 | } else { - 915 | None - 916 | } - 917 | }); - | - 918 | let (mut prev_header_len, mut prev_attributes) = (80, TestAttributes::default()); - 919 | for (header_delim_len, header_range, test_name, attributes_str, attributes) in header_matches - 920 | .chain(Some(( - 921 | 80, - 922 | bytes.len()..bytes.len(), - 923 | None, - 924 | None, - 925 | TestAttributes::default(), - 926 | ))) - 927 | { - 928 | // Find the longest line of dashes following each test description. That line - 929 | // separates the input from the expected output. Ignore any matches whose suffix - 930 | // does not match the first suffix in the file. - 931 | if prev_header_end > 0 { - 932 | let divider_range = DIVIDER_REGEX - 933 | .captures_iter(&bytes[prev_header_end..header_range.start]) - 934 | .filter_map(|m| { - 935 | let divider_delim_len = m.name("hyphens").map_or(80, |m| m.as_bytes().len()); - 936 | let suffix = m - 937 | .name("suffix") - 938 | .map(|m| String::from_utf8_lossy(m.as_bytes())); - 939 | if suffix == first_suffix { - 940 | let range = m.get(0).unwrap().range(); - 941 | Some(( - 942 | divider_delim_len, - 943 | (prev_header_end + range.start)..(prev_header_end + range.end), - 944 | )) - 945 | } else { - 946 | None - 947 | } - 948 | }) - 949 | .max_by_key(|(_, range)| range.len()); - | - 950 | if let Some((divider_delim_len, divider_range)) = divider_range { - 951 | if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) { - 952 | let mut input = bytes[prev_header_end..divider_range.start].to_vec(); - | - 953 | // Remove trailing newline from the input. - 954 | input.pop(); - 955 | if input.last() == Some(&b'\r') { - 956 | input.pop(); - 957 | } - | - 958 | let (output, has_fields) = if prev_attributes.cst { - 959 | (output.trim().to_string(), false) - 960 | } else { - 961 | // Remove all comments - 962 | let output = COMMENT_REGEX.replace_all(output, "").to_string(); - | - 963 | // Normalize the whitespace in the expected output. - 964 | let output = WHITESPACE_REGEX.replace_all(output.trim(), " "); - 965 | let output = output.replace(" )", ")"); - | - 966 | // Identify if the expected output has fields indicated. If not, then - 967 | // fields will not be checked. - 968 | let has_fields = SEXP_FIELD_REGEX.is_match(&output); - | - 969 | (output, has_fields) - 970 | }; - | - 971 | let file_name = if let Some(ref path) = file_path { - 972 | path.file_name().map(|n| n.to_string_lossy().to_string()) - 973 | } else { - 974 | None - 975 | }; - | - 976 | let t = TestEntry::Example { - 977 | name: prev_name, - 978 | input, - 979 | output, - 980 | header_delim_len: prev_header_len, - 981 | divider_delim_len, - 982 | has_fields, - 983 | attributes_str: prev_attributes_str, - 984 | attributes: prev_attributes, - 985 | file_name, - 986 | }; - | - 987 | children.push(t); - 988 | } - 989 | } - 990 | } - 991 | prev_attributes = attributes; - 992 | prev_name = test_name.unwrap_or_default(); - 993 | prev_attributes_str = attributes_str.unwrap_or_default(); - 994 | prev_header_len = header_delim_len; - 995 | prev_header_end = header_range.end; - 996 | } - 997 | TestEntry::Group { - 998 | name, - 999 | children, -1000 | file_path, -1001 | } -1002 | } - | -1003 | #[cfg(test)] -1004 | mod tests { -1005 | use super::*; - | -1006 | #[test] -1007 | fn test_parse_test_content_simple() { -1008 | let entry = parse_test_content( -1009 | "the-filename".to_string(), -1010 | r" -1011 | =============== -1012 | The first test -1013 | =============== - | -1014 | a b c - | -1015 | --- - | -1016 | (a -1017 | (b c)) - | -1018 | ================ -1019 | The second test -1020 | ================ -1021 | d -1022 | --- -1023 | (d) -1024 | " -1025 | .trim(), -1026 | None, -1027 | ); - | -1028 | assert_eq!( -1029 | entry, -1030 | TestEntry::Group { -1031 | name: "the-filename".to_string(), -1032 | children: vec![ -1033 | TestEntry::Example { -1034 | name: "The first test".to_string(), -1035 | input: b"\na b c\n".to_vec(), -1036 | output: "(a (b c))".to_string(), -1037 | header_delim_len: 15, -1038 | divider_delim_len: 3, -1039 | has_fields: false, -1040 | attributes_str: String::new(), -1041 | attributes: TestAttributes::default(), -1042 | file_name: None, -1043 | }, -1044 | TestEntry::Example { -1045 | name: "The second test".to_string(), -1046 | input: b"d".to_vec(), -1047 | output: "(d)".to_string(), -1048 | header_delim_len: 16, -1049 | divider_delim_len: 3, -1050 | has_fields: false, -1051 | attributes_str: String::new(), -1052 | attributes: TestAttributes::default(), -1053 | file_name: None, -1054 | }, -1055 | ], -1056 | file_path: None, -1057 | } -1058 | ); -1059 | } - | -1060 | #[test] -1061 | fn test_parse_test_content_with_dashes_in_source_code() { -1062 | let entry = parse_test_content( -1063 | "the-filename".to_string(), -1064 | r" -1065 | ================== -1066 | Code with dashes -1067 | ================== -1068 | abc -1069 | --- -1070 | defg -1071 | ---- -1072 | hijkl -1073 | ------- - | -1074 | (a (b)) - | -1075 | ========================= -1076 | Code ending with dashes -1077 | ========================= -1078 | abc -1079 | ----------- -1080 | ------------------- - | -1081 | (c (d)) -1082 | " -1083 | .trim(), -1084 | None, -1085 | ); - | -1086 | assert_eq!( -1087 | entry, -1088 | TestEntry::Group { -1089 | name: "the-filename".to_string(), -1090 | children: vec![ -1091 | TestEntry::Example { -1092 | name: "Code with dashes".to_string(), -1093 | input: b"abc\n---\ndefg\n----\nhijkl".to_vec(), -1094 | output: "(a (b))".to_string(), -1095 | header_delim_len: 18, -1096 | divider_delim_len: 7, -1097 | has_fields: false, -1098 | attributes_str: String::new(), -1099 | attributes: TestAttributes::default(), -1100 | file_name: None, -1101 | }, -1102 | TestEntry::Example { -1103 | name: "Code ending with dashes".to_string(), -1104 | input: b"abc\n-----------".to_vec(), -1105 | output: "(c (d))".to_string(), -1106 | header_delim_len: 25, -1107 | divider_delim_len: 19, -1108 | has_fields: false, -1109 | attributes_str: String::new(), -1110 | attributes: TestAttributes::default(), -1111 | file_name: None, -1112 | }, -1113 | ], -1114 | file_path: None, -1115 | } -1116 | ); -1117 | } - | -1118 | #[test] -1119 | fn test_format_sexp() { -1120 | assert_eq!(format_sexp("", 0), ""); -1121 | assert_eq!( -1122 | format_sexp("(a b: (c) (d) e: (f (g (h (MISSING i)))))", 0), -1123 | r" -1124 | (a -1125 | b: (c) -1126 | (d) -1127 | e: (f -1128 | (g -1129 | (h -1130 | (MISSING i))))) -1131 | " -1132 | .trim() -1133 | ); -1134 | assert_eq!( -1135 | format_sexp("(program (ERROR (UNEXPECTED ' ')) (identifier))", 0), -1136 | r" -1137 | (program -1138 | (ERROR -1139 | (UNEXPECTED ' ')) -1140 | (identifier)) -1141 | " -1142 | .trim() -1143 | ); -1144 | assert_eq!( -1145 | format_sexp(r#"(source_file (MISSING ")"))"#, 0), -1146 | r#" -1147 | (source_file -1148 | (MISSING ")")) -1149 | "# -1150 | .trim() -1151 | ); -1152 | assert_eq!( -1153 | format_sexp( -1154 | r"(source_file (ERROR (UNEXPECTED 'f') (UNEXPECTED '+')))", -1155 | 0 -1156 | ), -1157 | r" -1158 | (source_file -1159 | (ERROR -1160 | (UNEXPECTED 'f') -1161 | (UNEXPECTED '+'))) -1162 | " -1163 | .trim() -1164 | ); -1165 | } - | -1166 | #[test] -1167 | fn test_write_tests_to_buffer() { -1168 | let mut buffer = Vec::new(); -1169 | let corrected_entries = vec![ -1170 | TestCorrection::new( -1171 | "title 1".to_string(), -1172 | "input 1".to_string(), -1173 | "output 1".to_string(), -1174 | String::new(), -1175 | 80, -1176 | 80, -1177 | ), -1178 | TestCorrection::new( -1179 | "title 2".to_string(), -1180 | "input 2".to_string(), -1181 | "output 2".to_string(), -1182 | String::new(), -1183 | 80, -1184 | 80, -1185 | ), -1186 | ]; -1187 | write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); -1188 | assert_eq!( -1189 | String::from_utf8(buffer).unwrap(), -1190 | r" -1191 | ================================================================================ -1192 | title 1 -1193 | ================================================================================ -1194 | input 1 -1195 | -------------------------------------------------------------------------------- - | -1196 | output 1 - | -1197 | ================================================================================ -1198 | title 2 -1199 | ================================================================================ -1200 | input 2 -1201 | -------------------------------------------------------------------------------- - | -1202 | output 2 -1203 | " -1204 | .trim_start() -1205 | .to_string() -1206 | ); -1207 | } - | -1208 | #[test] -1209 | fn test_parse_test_content_with_comments_in_sexp() { -1210 | let entry = parse_test_content( -1211 | "the-filename".to_string(), -1212 | r#" -1213 | ================== -1214 | sexp with comment -1215 | ================== -1216 | code -1217 | --- - | -1218 | ; Line start comment -1219 | (a (b)) - | -1220 | ================== -1221 | sexp with comment between -1222 | ================== -1223 | code -1224 | --- - | -1225 | ; Line start comment -1226 | (a -1227 | ; ignore this -1228 | (b) -1229 | ; also ignore this -1230 | ) - | -1231 | ========================= -1232 | sexp with ';' -1233 | ========================= -1234 | code -1235 | --- - | -1236 | (MISSING ";") -1237 | "# -1238 | .trim(), -1239 | None, -1240 | ); - | -1241 | assert_eq!( -1242 | entry, -1243 | TestEntry::Group { -1244 | name: "the-filename".to_string(), -1245 | children: vec![ -1246 | TestEntry::Example { -1247 | name: "sexp with comment".to_string(), -1248 | input: b"code".to_vec(), -1249 | output: "(a (b))".to_string(), -1250 | header_delim_len: 18, -1251 | divider_delim_len: 3, -1252 | has_fields: false, -1253 | attributes_str: String::new(), -1254 | attributes: TestAttributes::default(), -1255 | file_name: None, -1256 | }, -1257 | TestEntry::Example { -1258 | name: "sexp with comment between".to_string(), -1259 | input: b"code".to_vec(), -1260 | output: "(a (b))".to_string(), -1261 | header_delim_len: 18, -1262 | divider_delim_len: 3, -1263 | has_fields: false, -1264 | attributes_str: String::new(), -1265 | attributes: TestAttributes::default(), -1266 | file_name: None, -1267 | }, -1268 | TestEntry::Example { -1269 | name: "sexp with ';'".to_string(), -1270 | input: b"code".to_vec(), -1271 | output: "(MISSING \";\")".to_string(), -1272 | header_delim_len: 25, -1273 | divider_delim_len: 3, -1274 | has_fields: false, -1275 | attributes_str: String::new(), -1276 | attributes: TestAttributes::default(), -1277 | file_name: None, -1278 | } -1279 | ], -1280 | file_path: None, -1281 | } -1282 | ); -1283 | } - | -1284 | #[test] -1285 | fn test_parse_test_content_with_suffixes() { -1286 | let entry = parse_test_content( -1287 | "the-filename".to_string(), -1288 | r" -1289 | ==================asdf\()[]|{}*+?^$.- -1290 | First test -1291 | ==================asdf\()[]|{}*+?^$.- - | -1292 | ========================= -1293 | NOT A TEST HEADER -1294 | ========================= -1295 | ------------------------- - | -1296 | ---asdf\()[]|{}*+?^$.- - | -1297 | (a) - | -1298 | ==================asdf\()[]|{}*+?^$.- -1299 | Second test -1300 | ==================asdf\()[]|{}*+?^$.- - | -1301 | ========================= -1302 | NOT A TEST HEADER -1303 | ========================= -1304 | ------------------------- - | -1305 | ---asdf\()[]|{}*+?^$.- - | -1306 | (a) - | -1307 | =========================asdf\()[]|{}*+?^$.- -1308 | Test name with = symbol -1309 | =========================asdf\()[]|{}*+?^$.- - | -1310 | ========================= -1311 | NOT A TEST HEADER -1312 | ========================= -1313 | ------------------------- - | -1314 | ---asdf\()[]|{}*+?^$.- - | -1315 | (a) - | -1316 | ==============================asdf\()[]|{}*+?^$.- -1317 | Test containing equals -1318 | ==============================asdf\()[]|{}*+?^$.- - | -1319 | === - | -1320 | ------------------------------asdf\()[]|{}*+?^$.- - | -1321 | (a) - | -1322 | ==============================asdf\()[]|{}*+?^$.- -1323 | Subsequent test containing equals -1324 | ==============================asdf\()[]|{}*+?^$.- - | -1325 | === - | -1326 | ------------------------------asdf\()[]|{}*+?^$.- - | -1327 | (a) -1328 | " -1329 | .trim(), -1330 | None, -1331 | ); - | -1332 | let expected_input = b"\n=========================\n\ -1333 | NOT A TEST HEADER\n\ -1334 | =========================\n\ -1335 | -------------------------\n" -1336 | .to_vec(); -1337 | pretty_assertions::assert_eq!( -1338 | entry, -1339 | TestEntry::Group { -1340 | name: "the-filename".to_string(), -1341 | children: vec![ -1342 | TestEntry::Example { -1343 | name: "First test".to_string(), -1344 | input: expected_input.clone(), -1345 | output: "(a)".to_string(), -1346 | header_delim_len: 18, -1347 | divider_delim_len: 3, -1348 | has_fields: false, -1349 | attributes_str: String::new(), -1350 | attributes: TestAttributes::default(), -1351 | file_name: None, -1352 | }, -1353 | TestEntry::Example { -1354 | name: "Second test".to_string(), -1355 | input: expected_input.clone(), -1356 | output: "(a)".to_string(), -1357 | header_delim_len: 18, -1358 | divider_delim_len: 3, -1359 | has_fields: false, -1360 | attributes_str: String::new(), -1361 | attributes: TestAttributes::default(), -1362 | file_name: None, -1363 | }, -1364 | TestEntry::Example { -1365 | name: "Test name with = symbol".to_string(), -1366 | input: expected_input, -1367 | output: "(a)".to_string(), -1368 | header_delim_len: 25, -1369 | divider_delim_len: 3, -1370 | has_fields: false, -1371 | attributes_str: String::new(), -1372 | attributes: TestAttributes::default(), -1373 | file_name: None, -1374 | }, -1375 | TestEntry::Example { -1376 | name: "Test containing equals".to_string(), -1377 | input: "\n===\n".into(), -1378 | output: "(a)".into(), -1379 | header_delim_len: 30, -1380 | divider_delim_len: 30, -1381 | has_fields: false, -1382 | attributes_str: String::new(), -1383 | attributes: TestAttributes::default(), -1384 | file_name: None, -1385 | }, -1386 | TestEntry::Example { -1387 | name: "Subsequent test containing equals".to_string(), -1388 | input: "\n===\n".into(), -1389 | output: "(a)".into(), -1390 | header_delim_len: 30, -1391 | divider_delim_len: 30, -1392 | has_fields: false, -1393 | attributes_str: String::new(), -1394 | attributes: TestAttributes::default(), -1395 | file_name: None, -1396 | } -1397 | ], -1398 | file_path: None, -1399 | } -1400 | ); -1401 | } - | -1402 | #[test] -1403 | fn test_parse_test_content_with_newlines_in_test_names() { -1404 | let entry = parse_test_content( -1405 | "the-filename".to_string(), -1406 | r" -1407 | =============== -1408 | name -1409 | with -1410 | newlines -1411 | =============== -1412 | a -1413 | --- -1414 | (b) - | -1415 | ==================== -1416 | name with === signs -1417 | ==================== -1418 | code with ---- -1419 | --- -1420 | (d) -1421 | ", -1422 | None, -1423 | ); - | -1424 | assert_eq!( -1425 | entry, -1426 | TestEntry::Group { -1427 | name: "the-filename".to_string(), -1428 | file_path: None, -1429 | children: vec![ -1430 | TestEntry::Example { -1431 | name: "name\nwith\nnewlines".to_string(), -1432 | input: b"a".to_vec(), -1433 | output: "(b)".to_string(), -1434 | header_delim_len: 15, -1435 | divider_delim_len: 3, -1436 | has_fields: false, -1437 | attributes_str: String::new(), -1438 | attributes: TestAttributes::default(), -1439 | file_name: None, -1440 | }, -1441 | TestEntry::Example { -1442 | name: "name with === signs".to_string(), -1443 | input: b"code with ----".to_vec(), -1444 | output: "(d)".to_string(), -1445 | header_delim_len: 20, -1446 | divider_delim_len: 3, -1447 | has_fields: false, -1448 | attributes_str: String::new(), -1449 | attributes: TestAttributes::default(), -1450 | file_name: None, -1451 | } -1452 | ] -1453 | } -1454 | ); -1455 | } - | -1456 | #[test] -1457 | fn test_parse_test_with_markers() { -1458 | // do one with :skip, we should not see it in the entry output - | -1459 | let entry = parse_test_content( -1460 | "the-filename".to_string(), -1461 | r" -1462 | ===================== -1463 | Test with skip marker -1464 | :skip -1465 | ===================== -1466 | a -1467 | --- -1468 | (b) -1469 | ", -1470 | None, -1471 | ); - | -1472 | assert_eq!( -1473 | entry, -1474 | TestEntry::Group { -1475 | name: "the-filename".to_string(), -1476 | file_path: None, -1477 | children: vec![TestEntry::Example { -1478 | name: "Test with skip marker".to_string(), -1479 | input: b"a".to_vec(), -1480 | output: "(b)".to_string(), -1481 | header_delim_len: 21, -1482 | divider_delim_len: 3, -1483 | has_fields: false, -1484 | attributes_str: ":skip".to_string(), -1485 | attributes: TestAttributes { -1486 | skip: true, -1487 | platform: true, -1488 | fail_fast: false, -1489 | error: false, -1490 | cst: false, -1491 | languages: vec!["".into()] -1492 | }, -1493 | file_name: None, -1494 | }] -1495 | } -1496 | ); - | -1497 | let entry = parse_test_content( -1498 | "the-filename".to_string(), -1499 | &format!( -1500 | r" -1501 | ========================= -1502 | Test with platform marker -1503 | :platform({}) -1504 | :fail-fast -1505 | ========================= -1506 | a -1507 | --- -1508 | (b) - | -1509 | ============================= -1510 | Test with bad platform marker -1511 | :platform({}) - | -1512 | :language(foo) -1513 | ============================= -1514 | a -1515 | --- -1516 | (b) - | -1517 | ==================== -1518 | Test with cst marker -1519 | :cst -1520 | ==================== -1521 | 1 -1522 | --- -1523 | 0:0 - 1:0 source_file -1524 | 0:0 - 0:1 expression -1525 | 0:0 - 0:1 number_literal `1` -1526 | ", -1527 | std::env::consts::OS, -1528 | if std::env::consts::OS == "linux" { -1529 | "macos" -1530 | } else { -1531 | "linux" -1532 | } -1533 | ), -1534 | None, -1535 | ); - | -1536 | assert_eq!( -1537 | entry, -1538 | TestEntry::Group { -1539 | name: "the-filename".to_string(), -1540 | file_path: None, -1541 | children: vec![ -1542 | TestEntry::Example { -1543 | name: "Test with platform marker".to_string(), -1544 | input: b"a".to_vec(), -1545 | output: "(b)".to_string(), -1546 | header_delim_len: 25, -1547 | divider_delim_len: 3, -1548 | has_fields: false, -1549 | attributes_str: format!(":platform({})\n:fail-fast", std::env::consts::OS), -1550 | attributes: TestAttributes { -1551 | skip: false, -1552 | platform: true, -1553 | fail_fast: true, -1554 | error: false, -1555 | cst: false, -1556 | languages: vec!["".into()] -1557 | }, -1558 | file_name: None, -1559 | }, -1560 | TestEntry::Example { -1561 | name: "Test with bad platform marker".to_string(), -1562 | input: b"a".to_vec(), -1563 | output: "(b)".to_string(), -1564 | header_delim_len: 29, -1565 | divider_delim_len: 3, -1566 | has_fields: false, -1567 | attributes_str: if std::env::consts::OS == "linux" { -1568 | ":platform(macos)\n\n:language(foo)".to_string() -1569 | } else { -1570 | ":platform(linux)\n\n:language(foo)".to_string() -1571 | }, -1572 | attributes: TestAttributes { -1573 | skip: false, -1574 | platform: false, -1575 | fail_fast: false, -1576 | error: false, -1577 | cst: false, -1578 | languages: vec!["foo".into()] -1579 | }, -1580 | file_name: None, -1581 | }, -1582 | TestEntry::Example { -1583 | name: "Test with cst marker".to_string(), -1584 | input: b"1".to_vec(), -1585 | output: "0:0 - 1:0 source_file -1586 | 0:0 - 0:1 expression -1587 | 0:0 - 0:1 number_literal `1`" -1588 | .to_string(), -1589 | header_delim_len: 20, -1590 | divider_delim_len: 3, -1591 | has_fields: false, -1592 | attributes_str: ":cst".to_string(), -1593 | attributes: TestAttributes { -1594 | skip: false, -1595 | platform: true, -1596 | fail_fast: false, -1597 | error: false, -1598 | cst: true, -1599 | languages: vec!["".into()] -1600 | }, -1601 | file_name: None, -1602 | } -1603 | ] -1604 | } -1605 | ); -1606 | } -1607 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests.rs: --------------------------------------------------------------------------------- - 1 | mod async_boundary_test; - 2 | mod corpus_test; - 3 | mod detect_language; - 4 | mod helpers; - 5 | mod highlight_test; - 6 | mod language_test; - 7 | mod node_test; - 8 | mod parser_test; - 9 | mod pathological_test; - 10 | mod query_test; - 11 | mod tags_test; - 12 | mod test_highlight_test; - 13 | mod test_tags_test; - 14 | mod text_provider_test; - 15 | mod tree_test; - | - 16 | #[cfg(feature = "wasm")] - 17 | mod wasm_language_test; - | - 18 | use tree_sitter_generate::GenerateResult; - | - 19 | pub use crate::fuzz::{ - 20 | allocations, - 21 | edits::{get_random_edit, invert_edit}, - 22 | random::Rand, - 23 | ITERATION_COUNT, - 24 | }; - | - 25 | /// This is a simple wrapper around [`tree_sitter_generate::generate_parser_for_grammar`], because - 26 | /// our tests do not need to pass in a version number, only the grammar JSON. - 27 | fn generate_parser(grammar_json: &str) -> GenerateResult<(String, String)> { - 28 | tree_sitter_generate::generate_parser_for_grammar(grammar_json, Some((0, 0, 0))) - 29 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/async_boundary_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | future::Future, - 3 | pin::Pin, - 4 | ptr, - 5 | task::{Context, Poll, RawWaker, RawWakerVTable, Waker}, - 6 | }; - | - 7 | use tree_sitter::Parser; - | - 8 | use super::helpers::fixtures::get_language; - | - 9 | #[test] - 10 | fn test_node_across_async_boundaries() { - 11 | let mut parser = Parser::new(); - 12 | let language = get_language("bash"); - 13 | parser.set_language(&language).unwrap(); - 14 | let tree = parser.parse("#", None).unwrap(); - 15 | let root = tree.root_node(); - | - 16 | let (result, yields) = simple_async_executor(async { - 17 | let root_ref = &root; - | - 18 | // Test node captured by value - 19 | let fut_by_value = async { - 20 | yield_once().await; - 21 | root.child(0).unwrap().kind() - 22 | }; - | - 23 | // Test node captured by reference - 24 | let fut_by_ref = async { - 25 | yield_once().await; - 26 | root_ref.child(0).unwrap().kind() - 27 | }; - | - 28 | let result1 = fut_by_value.await; - 29 | let result2 = fut_by_ref.await; - | - 30 | assert_eq!(result1, result2); - 31 | result1 - 32 | }); - | - 33 | assert_eq!(result, "comment"); - 34 | assert_eq!(yields, 2); - 35 | } - | - 36 | #[test] - 37 | fn test_cursor_across_async_boundaries() { - 38 | let mut parser = Parser::new(); - 39 | let language = get_language("c"); - 40 | parser.set_language(&language).unwrap(); - 41 | let tree = parser.parse("#", None).unwrap(); - 42 | let mut cursor = tree.walk(); - | - 43 | let ((), yields) = simple_async_executor(async { - 44 | cursor.goto_first_child(); - | - 45 | // Test cursor usage across yield point - 46 | yield_once().await; - 47 | cursor.goto_first_child(); - | - 48 | // Test cursor in async block - 49 | let cursor_ref = &mut cursor; - 50 | let fut = async { - 51 | yield_once().await; - 52 | cursor_ref.goto_first_child(); - 53 | }; - 54 | fut.await; - 55 | }); - | - 56 | assert_eq!(yields, 2); - 57 | } - | - 58 | #[test] - 59 | fn test_node_and_cursor_together() { - 60 | let mut parser = Parser::new(); - 61 | let language = get_language("javascript"); - 62 | parser.set_language(&language).unwrap(); - 63 | let tree = parser.parse("#", None).unwrap(); - 64 | let root = tree.root_node(); - 65 | let mut cursor = tree.walk(); - | - 66 | let ((), yields) = simple_async_executor(async { - 67 | cursor.goto_first_child(); - | - 68 | let fut = async { - 69 | yield_once().await; - 70 | let _ = root.to_sexp(); - 71 | cursor.goto_first_child(); - 72 | }; - | - 73 | yield_once().await; - 74 | fut.await; - 75 | }); - | - 76 | assert_eq!(yields, 2); - 77 | } - | - 78 | fn simple_async_executor(future: F) -> (F::Output, u32) - 79 | where - 80 | F: Future, - 81 | { - 82 | let waker = noop_waker(); - 83 | let mut cx = Context::from_waker(&waker); - 84 | let mut yields = 0; - 85 | let mut future = Box::pin(future); - | - 86 | loop { - 87 | match future.as_mut().poll(&mut cx) { - 88 | Poll::Ready(result) => return (result, yields), - 89 | Poll::Pending => yields += 1, - 90 | } - 91 | } - 92 | } - | - 93 | async fn yield_once() { - 94 | struct YieldOnce { - 95 | yielded: bool, - 96 | } - | - 97 | impl Future for YieldOnce { - 98 | type Output = (); - | - 99 | fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { - 100 | if self.yielded { - 101 | Poll::Ready(()) - 102 | } else { - 103 | self.yielded = true; - 104 | cx.waker().wake_by_ref(); - 105 | Poll::Pending - 106 | } - 107 | } - 108 | } - | - 109 | YieldOnce { yielded: false }.await; - 110 | } - | - 111 | const fn noop_waker() -> Waker { - 112 | const VTABLE: RawWakerVTable = RawWakerVTable::new( - 113 | // Cloning just returns a new no-op raw waker - 114 | |_| RAW, - 115 | // `wake` does nothing - 116 | |_| {}, - 117 | // `wake_by_ref` does nothing - 118 | |_| {}, - 119 | // Dropping does nothing as we don't allocate anything - 120 | |_| {}, - 121 | ); - 122 | const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE); - 123 | unsafe { Waker::from_raw(RAW) } - 124 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/corpus_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{collections::HashMap, env, fs}; - | - 2 | use anyhow::Context; - 3 | use tree_sitter::Parser; - 4 | use tree_sitter_proc_macro::test_with_seed; - | - 5 | use crate::{ - 6 | fuzz::{ - 7 | corpus_test::{ - 8 | check_changed_ranges, check_consistent_sizes, get_parser, set_included_ranges, - 9 | }, - 10 | edits::{get_random_edit, invert_edit}, - 11 | flatten_tests, new_seed, - 12 | random::Rand, - 13 | EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER, - 14 | LOG_GRAPH_ENABLED, START_SEED, - 15 | }, - 16 | parse::perform_edit, - 17 | test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields}, - 18 | tests::{ - 19 | allocations, - 20 | helpers::fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR}, - 21 | }, - 22 | }; - | - 23 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 24 | fn test_corpus_for_bash_language(seed: usize) { - 25 | test_language_corpus( - 26 | "bash", - 27 | seed, - 28 | Some(&[ - 29 | // Fragile tests where edit customization changes - 30 | // lead to significant parse tree structure changes. - 31 | "bash - corpus - commands - Nested Heredocs", - 32 | "bash - corpus - commands - Quoted Heredocs", - 33 | "bash - corpus - commands - Heredocs with weird characters", - 34 | ]), - 35 | None, - 36 | ); - 37 | } - | - 38 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 39 | fn test_corpus_for_c_language(seed: usize) { - 40 | test_language_corpus("c", seed, None, None); - 41 | } - | - 42 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 43 | fn test_corpus_for_cpp_language(seed: usize) { - 44 | test_language_corpus("cpp", seed, None, None); - 45 | } - | - 46 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 47 | fn test_corpus_for_embedded_template_language(seed: usize) { - 48 | test_language_corpus("embedded-template", seed, None, None); - 49 | } - | - 50 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 51 | fn test_corpus_for_go_language(seed: usize) { - 52 | test_language_corpus("go", seed, None, None); - 53 | } - | - 54 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 55 | fn test_corpus_for_html_language(seed: usize) { - 56 | test_language_corpus("html", seed, None, None); - 57 | } - | - 58 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 59 | fn test_corpus_for_java_language(seed: usize) { - 60 | test_language_corpus( - 61 | "java", - 62 | seed, - 63 | Some(&["java - corpus - expressions - switch with unnamed pattern variable"]), - 64 | None, - 65 | ); - 66 | } - | - 67 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 68 | fn test_corpus_for_javascript_language(seed: usize) { - 69 | test_language_corpus("javascript", seed, None, None); - 70 | } - | - 71 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 72 | fn test_corpus_for_json_language(seed: usize) { - 73 | test_language_corpus("json", seed, None, None); - 74 | } - | - 75 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 76 | fn test_corpus_for_php_language(seed: usize) { - 77 | test_language_corpus("php", seed, None, Some("php")); - 78 | } - | - 79 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 80 | fn test_corpus_for_python_language(seed: usize) { - 81 | test_language_corpus("python", seed, None, None); - 82 | } - | - 83 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 84 | fn test_corpus_for_ruby_language(seed: usize) { - 85 | test_language_corpus("ruby", seed, None, None); - 86 | } - | - 87 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 88 | fn test_corpus_for_rust_language(seed: usize) { - 89 | test_language_corpus("rust", seed, None, None); - 90 | } - | - 91 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 92 | fn test_corpus_for_typescript_language(seed: usize) { - 93 | test_language_corpus("typescript", seed, None, Some("typescript")); - 94 | } - | - 95 | #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] - 96 | fn test_corpus_for_tsx_language(seed: usize) { - 97 | test_language_corpus("typescript", seed, None, Some("tsx")); - 98 | } - | - 99 | pub fn test_language_corpus( - 100 | language_name: &str, - 101 | start_seed: usize, - 102 | skipped: Option<&[&str]>, - 103 | language_dir: Option<&str>, - 104 | ) { - 105 | if let Some(filter) = LANGUAGE_FILTER.as_ref() { - 106 | if language_name != filter { - 107 | return; - 108 | } - 109 | } - | - 110 | let language_dir = language_dir.unwrap_or_default(); - | - 111 | let grammars_dir = fixtures_dir().join("grammars"); - 112 | let error_corpus_dir = fixtures_dir().join("error_corpus"); - 113 | let template_corpus_dir = fixtures_dir().join("template_corpus"); - 114 | let corpus_dir = grammars_dir.join(language_name).join("test").join("corpus"); - | - 115 | println!("Testing {language_name} corpus @ {}", corpus_dir.display()); - | - 116 | let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt")); - 117 | let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt")); - 118 | let main_tests = parse_tests(&corpus_dir).unwrap(); - 119 | let error_tests = parse_tests(&error_corpus_file).unwrap_or_default(); - 120 | let template_tests = parse_tests(&template_corpus_file).unwrap_or_default(); - 121 | let mut tests = flatten_tests( - 122 | main_tests, - 123 | EXAMPLE_INCLUDE.as_ref(), - 124 | EXAMPLE_EXCLUDE.as_ref(), - 125 | ); - 126 | tests.extend(flatten_tests( - 127 | error_tests, - 128 | EXAMPLE_INCLUDE.as_ref(), - 129 | EXAMPLE_EXCLUDE.as_ref(), - 130 | )); - 131 | tests.extend( - 132 | flatten_tests( - 133 | template_tests, - 134 | EXAMPLE_INCLUDE.as_ref(), - 135 | EXAMPLE_EXCLUDE.as_ref(), - 136 | ) - 137 | .into_iter() - 138 | .map(|mut t| { - 139 | t.template_delimiters = Some(("<%", "%>")); - 140 | t - 141 | }), - 142 | ); - | - 143 | tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir))); - | - 144 | let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::>()); - | - 145 | let language_path = if language_dir.is_empty() { - 146 | language_name.to_string() - 147 | } else { - 148 | format!("{language_name}/{language_dir}") - 149 | }; - 150 | let language = get_language(&language_path); - 151 | let mut failure_count = 0; - | - 152 | let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok(); - 153 | let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok(); - | - 154 | if log_seed { - 155 | println!(" start seed: {start_seed}"); - 156 | } - | - 157 | println!(); - 158 | for (test_index, test) in tests.iter().enumerate() { - 159 | let test_name = format!("{language_name} - {}", test.name); - 160 | if let Some(skipped) = skipped.as_mut() { - 161 | if let Some(counter) = skipped.get_mut(test_name.as_str()) { - 162 | println!(" {test_index}. {test_name} - SKIPPED"); - 163 | *counter += 1; - 164 | continue; - 165 | } - 166 | } - | - 167 | println!(" {test_index}. {test_name}"); - | - 168 | let passed = allocations::record(|| { - 169 | let mut log_session = None; - 170 | let mut parser = get_parser(&mut log_session, "log.html"); - 171 | parser.set_language(&language).unwrap(); - 172 | set_included_ranges(&mut parser, &test.input, test.template_delimiters); - | - 173 | let tree = parser.parse(&test.input, None).unwrap(); - 174 | let mut actual_output = tree.root_node().to_sexp(); - 175 | if !test.has_fields { - 176 | actual_output = strip_sexp_fields(&actual_output); - 177 | } - | - 178 | if actual_output != test.output { - 179 | println!("Incorrect initial parse for {test_name}"); - 180 | print_diff_key(); - 181 | print_diff(&actual_output, &test.output, true); - 182 | println!(); - 183 | return false; - 184 | } - | - 185 | true - 186 | }); - | - 187 | if !passed { - 188 | failure_count += 1; - 189 | continue; - 190 | } - | - 191 | let mut parser = Parser::new(); - 192 | parser.set_language(&language).unwrap(); - 193 | let tree = parser.parse(&test.input, None).unwrap(); - 194 | drop(parser); - | - 195 | for trial in 0..*ITERATION_COUNT { - 196 | let seed = start_seed + trial; - 197 | let passed = allocations::record(|| { - 198 | let mut rand = Rand::new(seed); - 199 | let mut log_session = None; - 200 | let mut parser = get_parser(&mut log_session, "log.html"); - 201 | parser.set_language(&language).unwrap(); - 202 | let mut tree = tree.clone(); - 203 | let mut input = test.input.clone(); - | - 204 | if *LOG_GRAPH_ENABLED { - 205 | eprintln!("{}\n", String::from_utf8_lossy(&input)); - 206 | } - | - 207 | // Perform a random series of edits and reparse. - 208 | let edit_count = rand.unsigned(*EDIT_COUNT); - 209 | let mut undo_stack = Vec::with_capacity(edit_count); - 210 | for _ in 0..=edit_count { - 211 | let edit = get_random_edit(&mut rand, &input); - 212 | undo_stack.push(invert_edit(&input, &edit)); - 213 | perform_edit(&mut tree, &mut input, &edit).unwrap(); - 214 | } - | - 215 | if log_seed { - 216 | println!(" {test_index}.{trial:<2} seed: {seed}"); - 217 | } - | - 218 | if dump_edits { - 219 | fs::write( - 220 | SCRATCH_BASE_DIR - 221 | .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")), - 222 | &input, - 223 | ) - 224 | .unwrap(); - 225 | } - | - 226 | if *LOG_GRAPH_ENABLED { - 227 | eprintln!("{}\n", String::from_utf8_lossy(&input)); - 228 | } - | - 229 | set_included_ranges(&mut parser, &input, test.template_delimiters); - 230 | let mut tree2 = parser.parse(&input, Some(&tree)).unwrap(); - | - 231 | // Check that the new tree is consistent. - 232 | check_consistent_sizes(&tree2, &input); - 233 | if let Err(message) = check_changed_ranges(&tree, &tree2, &input) { - 234 | println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",); - 235 | return false; - 236 | } - | - 237 | // Undo all of the edits and re-parse again. - 238 | while let Some(edit) = undo_stack.pop() { - 239 | perform_edit(&mut tree2, &mut input, &edit).unwrap(); - 240 | } - 241 | if *LOG_GRAPH_ENABLED { - 242 | eprintln!("{}\n", String::from_utf8_lossy(&input)); - 243 | } - | - 244 | set_included_ranges(&mut parser, &test.input, test.template_delimiters); - 245 | let tree3 = parser.parse(&input, Some(&tree2)).unwrap(); - | - 246 | // Verify that the final tree matches the expectation from the corpus. - 247 | let mut actual_output = tree3.root_node().to_sexp(); - 248 | if !test.has_fields { - 249 | actual_output = strip_sexp_fields(&actual_output); - 250 | } - | - 251 | if actual_output != test.output { - 252 | println!("Incorrect parse for {test_name} - seed {seed}"); - 253 | print_diff_key(); - 254 | print_diff(&actual_output, &test.output, true); - 255 | println!(); - 256 | return false; - 257 | } - | - 258 | // Check that the edited tree is consistent. - 259 | check_consistent_sizes(&tree3, &input); - 260 | if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) { - 261 | println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n"); - 262 | return false; - 263 | } - | - 264 | true - 265 | }); - | - 266 | if !passed { - 267 | failure_count += 1; - 268 | break; - 269 | } - 270 | } - 271 | } - | - 272 | assert!( - 273 | failure_count == 0, - 274 | "{failure_count} {language_name} corpus tests failed" - 275 | ); - | - 276 | if let Some(skipped) = skipped.as_mut() { - 277 | skipped.retain(|_, v| *v == 0); - | - 278 | if !skipped.is_empty() { - 279 | println!("Non matchable skip definitions:"); - 280 | for k in skipped.keys() { - 281 | println!(" {k}"); - 282 | } - 283 | panic!("Non matchable skip definitions needs to be removed"); - 284 | } - 285 | } - 286 | } - | - 287 | #[test] - 288 | fn test_feature_corpus_files() { - 289 | let test_grammars_dir = fixtures_dir().join("test_grammars"); - | - 290 | let mut failure_count = 0; - 291 | for entry in fs::read_dir(test_grammars_dir).unwrap() { - 292 | let entry = entry.unwrap(); - 293 | if !entry.metadata().unwrap().is_dir() { - 294 | continue; - 295 | } - 296 | let language_name = entry.file_name(); - 297 | let language_name = language_name.to_str().unwrap(); - | - 298 | if let Some(filter) = LANGUAGE_FILTER.as_ref() { - 299 | if language_name != filter { - 300 | continue; - 301 | } - 302 | } - | - 303 | let test_path = entry.path(); - 304 | let mut grammar_path = test_path.join("grammar.js"); - 305 | if !grammar_path.exists() { - 306 | grammar_path = test_path.join("grammar.json"); - 307 | } - 308 | let error_message_path = test_path.join("expected_error.txt"); - 309 | let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None) - 310 | .with_context(|| { - 311 | format!( - 312 | "Could not load grammar file for test language '{language_name}' at {}", - 313 | grammar_path.display() - 314 | ) - 315 | }) - 316 | .unwrap(); - 317 | let generate_result = - 318 | tree_sitter_generate::generate_parser_for_grammar(&grammar_json, Some((0, 0, 0))); - | - 319 | if error_message_path.exists() { - 320 | if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() { - 321 | continue; - 322 | } - | - 323 | eprintln!("test language: {language_name:?}"); - | - 324 | let expected_message = fs::read_to_string(&error_message_path) - 325 | .unwrap() - 326 | .replace("\r\n", "\n"); - 327 | if let Err(e) = generate_result { - 328 | let actual_message = e.to_string().replace("\r\n", "\n"); - 329 | if expected_message != actual_message { - 330 | eprintln!( - 331 | "Unexpected error message.\n\nExpected:\n\n`{expected_message}`\nActual:\n\n`{actual_message}`\n", - 332 | ); - 333 | failure_count += 1; - 334 | } - 335 | } else { - 336 | eprintln!("Expected error message but got none for test grammar '{language_name}'",); - 337 | failure_count += 1; - 338 | } - 339 | } else { - 340 | if let Err(e) = &generate_result { - 341 | eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",); - 342 | failure_count += 1; - 343 | continue; - 344 | } - | - 345 | let corpus_path = test_path.join("corpus.txt"); - 346 | let c_code = generate_result.unwrap().1; - 347 | let language = get_test_language(language_name, &c_code, Some(&test_path)); - 348 | let test = parse_tests(&corpus_path).unwrap(); - 349 | let tests = flatten_tests(test, EXAMPLE_INCLUDE.as_ref(), EXAMPLE_EXCLUDE.as_ref()); - | - 350 | if !tests.is_empty() { - 351 | eprintln!("test language: {language_name:?}"); - 352 | } - | - 353 | for test in tests { - 354 | eprintln!(" example: {:?}", test.name); - | - 355 | let passed = allocations::record(|| { - 356 | let mut log_session = None; - 357 | let mut parser = get_parser(&mut log_session, "log.html"); - 358 | parser.set_language(&language).unwrap(); - 359 | let tree = parser.parse(&test.input, None).unwrap(); - 360 | let mut actual_output = tree.root_node().to_sexp(); - 361 | if !test.has_fields { - 362 | actual_output = strip_sexp_fields(&actual_output); - 363 | } - 364 | if actual_output == test.output { - 365 | true - 366 | } else { - 367 | print_diff_key(); - 368 | print_diff(&actual_output, &test.output, true); - 369 | println!(); - 370 | false - 371 | } - 372 | }); - | - 373 | if !passed { - 374 | failure_count += 1; - 375 | } - 376 | } - 377 | } - 378 | } - | - 379 | assert!(failure_count == 0, "{failure_count} corpus tests failed"); - 380 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/detect_language.rs: --------------------------------------------------------------------------------- - 1 | use std::{fs, path::Path}; - | - 2 | use tree_sitter_loader::Loader; - | - 3 | use crate::tests::helpers::fixtures::scratch_dir; - | - 4 | #[test] - 5 | fn detect_language_by_first_line_regex() { - 6 | let strace_dir = tree_sitter_dir( - 7 | r#"{ - 8 | "grammars": [ - 9 | { - 10 | "name": "strace", - 11 | "path": ".", - 12 | "scope": "source.strace", - 13 | "file-types": [ - 14 | "strace" - 15 | ], - 16 | "first-line-regex": "[0-9:.]* *execve" - 17 | } - 18 | ], - 19 | "metadata": { - 20 | "version": "0.0.1" - 21 | } - 22 | } - 23 | "#, - 24 | "strace", - 25 | ); - | - 26 | let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf()); - 27 | let config = loader - 28 | .find_language_configurations_at_path(strace_dir.path(), false) - 29 | .unwrap(); - | - 30 | // this is just to validate that we can read the tree-sitter.json correctly - 31 | assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace"); - | - 32 | let file_name = strace_dir.path().join("strace.log"); - 33 | fs::write(&file_name, "execve\nworld").unwrap(); - 34 | assert_eq!( - 35 | get_lang_scope(&loader, &file_name), - 36 | Some("source.strace".into()) - 37 | ); - | - 38 | let file_name = strace_dir.path().join("strace.log"); - 39 | fs::write(&file_name, "447845 execve\nworld").unwrap(); - 40 | assert_eq!( - 41 | get_lang_scope(&loader, &file_name), - 42 | Some("source.strace".into()) - 43 | ); - | - 44 | let file_name = strace_dir.path().join("strace.log"); - 45 | fs::write(&file_name, "hello\nexecve").unwrap(); - 46 | assert!(get_lang_scope(&loader, &file_name).is_none()); - | - 47 | let file_name = strace_dir.path().join("strace.log"); - 48 | fs::write(&file_name, "").unwrap(); - 49 | assert!(get_lang_scope(&loader, &file_name).is_none()); - | - 50 | let dummy_dir = tree_sitter_dir( - 51 | r#"{ - 52 | "grammars": [ - 53 | { - 54 | "name": "dummy", - 55 | "scope": "source.dummy", - 56 | "path": ".", - 57 | "file-types": [ - 58 | "dummy" - 59 | ] - 60 | } - 61 | ], - 62 | "metadata": { - 63 | "version": "0.0.1" - 64 | } - 65 | } - 66 | "#, - 67 | "dummy", - 68 | ); - | - 69 | // file-type takes precedence over first-line-regex - 70 | loader - 71 | .find_language_configurations_at_path(dummy_dir.path(), false) - 72 | .unwrap(); - 73 | let file_name = dummy_dir.path().join("strace.dummy"); - 74 | fs::write(&file_name, "execve").unwrap(); - 75 | assert_eq!( - 76 | get_lang_scope(&loader, &file_name), - 77 | Some("source.dummy".into()) - 78 | ); - 79 | } - | - 80 | #[test] - 81 | fn detect_langauge_by_double_barrel_file_extension() { - 82 | let blade_dir = tree_sitter_dir( - 83 | r#"{ - 84 | "grammars": [ - 85 | { - 86 | "name": "blade", - 87 | "path": ".", - 88 | "scope": "source.blade", - 89 | "file-types": [ - 90 | "blade.php" - 91 | ] - 92 | } - 93 | ], - 94 | "metadata": { - 95 | "version": "0.0.1" - 96 | } - 97 | } - 98 | "#, - 99 | "blade", - 100 | ); - | - 101 | let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf()); - 102 | let config = loader - 103 | .find_language_configurations_at_path(blade_dir.path(), false) - 104 | .unwrap(); - | - 105 | // this is just to validate that we can read the tree-sitter.json correctly - 106 | assert_eq!(config[0].scope.as_ref().unwrap(), "source.blade"); - | - 107 | let file_name = blade_dir.path().join("foo.blade.php"); - 108 | fs::write(&file_name, "").unwrap(); - 109 | assert_eq!( - 110 | get_lang_scope(&loader, &file_name), - 111 | Some("source.blade".into()) - 112 | ); - 113 | } - | - 114 | #[test] - 115 | fn detect_language_without_filename() { - 116 | let gitignore_dir = tree_sitter_dir( - 117 | r#"{ - 118 | "grammars": [ - 119 | { - 120 | "name": "gitignore", - 121 | "path": ".", - 122 | "scope": "source.gitignore", - 123 | "file-types": [ - 124 | ".gitignore" - 125 | ] - 126 | } - 127 | ], - 128 | "metadata": { - 129 | "version": "0.0.1" - 130 | } - 131 | } - 132 | "#, - 133 | "gitignore", - 134 | ); - | - 135 | let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf()); - 136 | let config = loader - 137 | .find_language_configurations_at_path(gitignore_dir.path(), false) - 138 | .unwrap(); - | - 139 | // this is just to validate that we can read the tree-sitter.json correctly - 140 | assert_eq!(config[0].scope.as_ref().unwrap(), "source.gitignore"); - | - 141 | let file_name = gitignore_dir.path().join(".gitignore"); - 142 | fs::write(&file_name, "").unwrap(); - 143 | assert_eq!( - 144 | get_lang_scope(&loader, &file_name), - 145 | Some("source.gitignore".into()) - 146 | ); - 147 | } - | - 148 | #[test] - 149 | fn detect_language_without_file_extension() { - 150 | let ssh_config_dir = tree_sitter_dir( - 151 | r#"{ - 152 | "grammars": [ - 153 | { - 154 | "name": "ssh_config", - 155 | "path": ".", - 156 | "scope": "source.ssh_config", - 157 | "file-types": [ - 158 | "ssh_config" - 159 | ] - 160 | } - 161 | ], - 162 | "metadata": { - 163 | "version": "0.0.1" - 164 | } - 165 | } - 166 | "#, - 167 | "ssh_config", - 168 | ); - | - 169 | let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf()); - 170 | let config = loader - 171 | .find_language_configurations_at_path(ssh_config_dir.path(), false) - 172 | .unwrap(); - | - 173 | // this is just to validate that we can read the tree-sitter.json correctly - 174 | assert_eq!(config[0].scope.as_ref().unwrap(), "source.ssh_config"); - | - 175 | let file_name = ssh_config_dir.path().join("ssh_config"); - 176 | fs::write(&file_name, "").unwrap(); - 177 | assert_eq!( - 178 | get_lang_scope(&loader, &file_name), - 179 | Some("source.ssh_config".into()) - 180 | ); - 181 | } - | - 182 | fn tree_sitter_dir(tree_sitter_json: &str, name: &str) -> tempfile::TempDir { - 183 | let temp_dir = tempfile::tempdir().unwrap(); - 184 | fs::write(temp_dir.path().join("tree-sitter.json"), tree_sitter_json).unwrap(); - 185 | fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap(); - 186 | fs::write( - 187 | temp_dir.path().join("src/grammar.json"), - 188 | format!(r#"{{"name":"{name}"}}"#), - 189 | ) - 190 | .unwrap(); - 191 | fs::write( - 192 | temp_dir.path().join("src/parser.c"), - 193 | format!( - 194 | r#" - 195 | #include "tree_sitter/parser.h" - 196 | #ifdef _WIN32 - 197 | #define TS_PUBLIC __declspec(dllexport) - 198 | #else - 199 | #define TS_PUBLIC __attribute__((visibility("default"))) - 200 | #endif - 201 | TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}} - 202 | "# - 203 | ), - 204 | ) - 205 | .unwrap(); - 206 | fs::write( - 207 | temp_dir.path().join("src/tree_sitter/parser.h"), - 208 | include_str!("../../../../lib/src/parser.h"), - 209 | ) - 210 | .unwrap(); - 211 | temp_dir - 212 | } - | - 213 | // If we manage to get the language scope, it means we correctly detected the file-type - 214 | fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option { - 215 | loader - 216 | .language_configuration_for_file_name(file_name) - 217 | .ok() - 218 | .and_then(|config| { - 219 | if let Some((_, config)) = config { - 220 | config.scope.clone() - 221 | } else if let Ok(Some((_, config))) = - 222 | loader.language_configuration_for_first_line_regex(file_name) - 223 | { - 224 | config.scope.clone() - 225 | } else { - 226 | None - 227 | } - 228 | }) - 229 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/helpers.rs: --------------------------------------------------------------------------------- - 1 | pub use crate::fuzz::allocations; - 2 | pub mod edits; - 3 | pub(super) mod fixtures; - 4 | pub(super) mod query_helpers; - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/helpers/dirs.rs: --------------------------------------------------------------------------------- - 1 | pub static ROOT_DIR: LazyLock = LazyLock::new(|| { - 2 | PathBuf::from(env!("CARGO_MANIFEST_DIR")) - 3 | .parent() - 4 | .unwrap() - 5 | .parent() - 6 | .unwrap() - 7 | .to_owned() - 8 | }); - | - 9 | pub static FIXTURES_DIR: LazyLock = - 10 | LazyLock::new(|| ROOT_DIR.join("test").join("fixtures")); - | - 11 | pub static HEADER_DIR: LazyLock = LazyLock::new(|| ROOT_DIR.join("lib").join("include")); - | - 12 | pub static GRAMMARS_DIR: LazyLock = - 13 | LazyLock::new(|| ROOT_DIR.join("test").join("fixtures").join("grammars")); - | - 14 | pub static SCRATCH_BASE_DIR: LazyLock = LazyLock::new(|| { - 15 | let result = ROOT_DIR.join("target").join("scratch"); - 16 | fs::create_dir_all(&result).unwrap(); - 17 | result - 18 | }); - | - 19 | #[cfg(feature = "wasm")] - 20 | pub static WASM_DIR: LazyLock = LazyLock::new(|| ROOT_DIR.join("target").join("release")); - | - 21 | pub static SCRATCH_DIR: LazyLock = LazyLock::new(|| { - 22 | // https://doc.rust-lang.org/reference/conditional-compilation.html - 23 | let vendor = if cfg!(target_vendor = "apple") { - 24 | "apple" - 25 | } else if cfg!(target_vendor = "fortanix") { - 26 | "fortanix" - 27 | } else if cfg!(target_vendor = "pc") { - 28 | "pc" - 29 | } else { - 30 | "unknown" - 31 | }; - 32 | let env = if cfg!(target_env = "gnu") { - 33 | "gnu" - 34 | } else if cfg!(target_env = "msvc") { - 35 | "msvc" - 36 | } else if cfg!(target_env = "musl") { - 37 | "musl" - 38 | } else if cfg!(target_env = "sgx") { - 39 | "sgx" - 40 | } else { - 41 | "unknown" - 42 | }; - 43 | let endian = if cfg!(target_endian = "little") { - 44 | "little" - 45 | } else if cfg!(target_endian = "big") { - 46 | "big" - 47 | } else { - 48 | "unknown" - 49 | }; - | - 50 | let machine = format!( - 51 | "{}-{}-{vendor}-{env}-{endian}", - 52 | std::env::consts::ARCH, - 53 | std::env::consts::OS - 54 | ); - 55 | let result = SCRATCH_BASE_DIR.join(machine); - 56 | fs::create_dir_all(&result).unwrap(); - 57 | result - 58 | }); - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/helpers/edits.rs: --------------------------------------------------------------------------------- - 1 | use std::{ops::Range, str}; - | - 2 | #[derive(Debug)] - 3 | pub struct ReadRecorder<'a> { - 4 | content: &'a [u8], - 5 | indices_read: Vec, - 6 | } - | - 7 | impl<'a> ReadRecorder<'a> { - 8 | #[must_use] - 9 | pub const fn new(content: &'a [u8]) -> Self { - 10 | Self { - 11 | content, - 12 | indices_read: Vec::new(), - 13 | } - 14 | } - | - 15 | pub fn read(&mut self, offset: usize) -> &'a [u8] { - 16 | if offset < self.content.len() { - 17 | if let Err(i) = self.indices_read.binary_search(&offset) { - 18 | self.indices_read.insert(i, offset); - 19 | } - 20 | &self.content[offset..(offset + 1)] - 21 | } else { - 22 | &[] - 23 | } - 24 | } - | - 25 | pub fn strings_read(&self) -> Vec<&'a str> { - 26 | let mut result = Vec::new(); - 27 | let mut last_range = Option::>::None; - 28 | for index in &self.indices_read { - 29 | if let Some(ref mut range) = &mut last_range { - 30 | if range.end == *index { - 31 | range.end += 1; - 32 | } else { - 33 | result.push(str::from_utf8(&self.content[range.clone()]).unwrap()); - 34 | last_range = None; - 35 | } - 36 | } else { - 37 | last_range = Some(*index..(*index + 1)); - 38 | } - 39 | } - 40 | if let Some(range) = last_range { - 41 | result.push(str::from_utf8(&self.content[range]).unwrap()); - 42 | } - 43 | result - 44 | } - 45 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/helpers/fixtures.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | env, fs, - 3 | path::{Path, PathBuf}, - 4 | sync::LazyLock, - 5 | }; - | - 6 | use anyhow::Context; - 7 | use tree_sitter::Language; - 8 | use tree_sitter_generate::{load_grammar_file, ALLOC_HEADER, ARRAY_HEADER}; - 9 | use tree_sitter_highlight::HighlightConfiguration; - 10 | use tree_sitter_loader::{CompileConfig, Loader}; - 11 | use tree_sitter_tags::TagsConfiguration; - | - 12 | use crate::tests::generate_parser; - | - 13 | include!("./dirs.rs"); - | - 14 | static TEST_LOADER: LazyLock = LazyLock::new(|| { - 15 | let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); - 16 | if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() { - 17 | loader.debug_build(true); - 18 | } - 19 | loader - 20 | }); - | - 21 | #[cfg(feature = "wasm")] - 22 | pub static ENGINE: LazyLock = LazyLock::new(Default::default); - | - 23 | pub fn test_loader() -> &'static Loader { - 24 | &TEST_LOADER - 25 | } - | - 26 | pub fn fixtures_dir() -> &'static Path { - 27 | &FIXTURES_DIR - 28 | } - | - 29 | pub fn scratch_dir() -> &'static Path { - 30 | &SCRATCH_DIR - 31 | } - | - 32 | pub fn get_language(name: &str) -> Language { - 33 | let src_dir = GRAMMARS_DIR.join(name).join("src"); - 34 | let mut config = CompileConfig::new(&src_dir, None, None); - 35 | config.header_paths.push(&HEADER_DIR); - 36 | TEST_LOADER.load_language_at_path(config).unwrap() - 37 | } - | - 38 | pub fn get_test_fixture_language(name: &str) -> Language { - 39 | get_test_fixture_language_internal(name, false) - 40 | } - | - 41 | #[cfg(feature = "wasm")] - 42 | pub fn get_test_fixture_language_wasm(name: &str) -> Language { - 43 | get_test_fixture_language_internal(name, true) - 44 | } - | - 45 | fn get_test_fixture_language_internal(name: &str, wasm: bool) -> Language { - 46 | let grammar_dir_path = fixtures_dir().join("test_grammars").join(name); - 47 | let grammar_json = load_grammar_file(&grammar_dir_path.join("grammar.js"), None).unwrap(); - 48 | let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); - 49 | get_test_language_internal(&parser_name, &parser_code, Some(&grammar_dir_path), wasm) - 50 | } - | - 51 | pub fn get_language_queries_path(language_name: &str) -> PathBuf { - 52 | GRAMMARS_DIR.join(language_name).join("queries") - 53 | } - | - 54 | pub fn get_highlight_config( - 55 | language_name: &str, - 56 | injection_query_filename: Option<&str>, - 57 | highlight_names: &[String], - 58 | ) -> HighlightConfiguration { - 59 | let language = get_language(language_name); - 60 | let queries_path = get_language_queries_path(language_name); - 61 | let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap(); - 62 | let injections_query = - 63 | injection_query_filename.map_or_else(String::new, |injection_query_filename| { - 64 | fs::read_to_string(queries_path.join(injection_query_filename)).unwrap() - 65 | }); - 66 | let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default(); - 67 | let mut result = HighlightConfiguration::new( - 68 | language, - 69 | language_name, - 70 | &highlights_query, - 71 | &injections_query, - 72 | &locals_query, - 73 | ) - 74 | .unwrap(); - 75 | result.configure(highlight_names); - 76 | result - 77 | } - | - 78 | pub fn get_tags_config(language_name: &str) -> TagsConfiguration { - 79 | let language = get_language(language_name); - 80 | let queries_path = get_language_queries_path(language_name); - 81 | let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap(); - 82 | let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default(); - 83 | TagsConfiguration::new(language, &tags_query, &locals_query).unwrap() - 84 | } - | - 85 | pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { - 86 | get_test_language_internal(name, parser_code, path, false) - 87 | } - | - 88 | fn get_test_language_internal( - 89 | name: &str, - 90 | parser_code: &str, - 91 | path: Option<&Path>, - 92 | wasm: bool, - 93 | ) -> Language { - 94 | let src_dir = scratch_dir().join("src").join(name); - 95 | fs::create_dir_all(&src_dir).unwrap(); - | - 96 | let parser_path = src_dir.join("parser.c"); - 97 | if !fs::read_to_string(&parser_path).is_ok_and(|content| content == parser_code) { - 98 | fs::write(&parser_path, parser_code).unwrap(); - 99 | } - | - 100 | let scanner_path = if let Some(path) = path { - 101 | let scanner_path = path.join("scanner.c"); - 102 | if scanner_path.exists() { - 103 | let scanner_code = fs::read_to_string(&scanner_path).unwrap(); - 104 | let scanner_copy_path = src_dir.join("scanner.c"); - 105 | if !fs::read_to_string(&scanner_copy_path).is_ok_and(|content| content == scanner_code) - 106 | { - 107 | fs::write(&scanner_copy_path, scanner_code).unwrap(); - 108 | } - 109 | Some(scanner_copy_path) - 110 | } else { - 111 | None - 112 | } - 113 | } else { - 114 | None - 115 | }; - | - 116 | let header_path = src_dir.join("tree_sitter"); - 117 | fs::create_dir_all(&header_path).unwrap(); - | - 118 | for (file, content) in [ - 119 | ("alloc.h", ALLOC_HEADER), - 120 | ("array.h", ARRAY_HEADER), - 121 | ("parser.h", tree_sitter::PARSER_HEADER), - 122 | ] { - 123 | let file = header_path.join(file); - 124 | fs::write(&file, content) - 125 | .with_context(|| format!("Failed to write {:?}", file.file_name().unwrap())) - 126 | .unwrap(); - 127 | } - | - 128 | let paths_to_check = if let Some(scanner_path) = &scanner_path { - 129 | vec![parser_path, scanner_path.clone()] - 130 | } else { - 131 | vec![parser_path] - 132 | }; - | - 133 | let mut config = CompileConfig::new(&src_dir, Some(&paths_to_check), None); - 134 | config.header_paths = vec![&HEADER_DIR]; - 135 | config.name = name.to_string(); - | - 136 | if wasm { - 137 | #[cfg(feature = "wasm")] - 138 | { - 139 | let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); - 140 | loader.use_wasm(&ENGINE); - 141 | if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() { - 142 | loader.debug_build(true); - 143 | } - 144 | loader.load_language_at_path_with_name(config).unwrap() - 145 | } - 146 | #[cfg(not(feature = "wasm"))] - 147 | { - 148 | unimplemented!("Wasm feature is not enabled") - 149 | } - 150 | } else { - 151 | TEST_LOADER.load_language_at_path_with_name(config).unwrap() - 152 | } - 153 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/helpers/query_helpers.rs: --------------------------------------------------------------------------------- - 1 | use std::{cmp::Ordering, fmt::Write, ops::Range}; - | - 2 | use rand::prelude::Rng; - 3 | use streaming_iterator::{IntoStreamingIterator, StreamingIterator}; - 4 | use tree_sitter::{ - 5 | Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor, - 6 | }; - | - 7 | #[derive(Debug)] - 8 | pub struct Pattern { - 9 | kind: Option<&'static str>, - 10 | named: bool, - 11 | field: Option<&'static str>, - 12 | capture: Option, - 13 | children: Vec, - 14 | } - | - 15 | #[derive(Clone, Debug, PartialEq, Eq)] - 16 | pub struct Match<'a, 'tree> { - 17 | pub captures: Vec<(&'a str, Node<'tree>)>, - 18 | pub last_node: Option>, - 19 | } - | - 20 | const CAPTURE_NAMES: &[&str] = &[ - 21 | "one", "two", "three", "four", "five", "six", "seven", "eight", - 22 | ]; - | - 23 | impl Pattern { - 24 | pub fn random_pattern_in_tree(tree: &Tree, rng: &mut impl Rng) -> (Self, Range) { - 25 | let mut cursor = tree.walk(); - | - 26 | // Descend to the node at a random byte offset and depth. - 27 | let mut max_depth = 0; - 28 | let byte_offset = rng.gen_range(0..cursor.node().end_byte()); - 29 | while cursor.goto_first_child_for_byte(byte_offset).is_some() { - 30 | max_depth += 1; - 31 | } - 32 | let depth = rng.gen_range(0..=max_depth); - 33 | for _ in 0..depth { - 34 | cursor.goto_parent(); - 35 | } - | - 36 | // Build a pattern that matches that node. - 37 | // Sometimes include subsequent siblings of the node. - 38 | let pattern_start = cursor.node().start_position(); - 39 | let mut roots = vec![Self::random_pattern_for_node(&mut cursor, rng)]; - 40 | while roots.len() < 5 && cursor.goto_next_sibling() { - 41 | if rng.gen_bool(0.2) { - 42 | roots.push(Self::random_pattern_for_node(&mut cursor, rng)); - 43 | } - 44 | } - 45 | let pattern_end = cursor.node().end_position(); - | - 46 | let mut pattern = Self { - 47 | kind: None, - 48 | named: true, - 49 | field: None, - 50 | capture: None, - 51 | children: roots, - 52 | }; - | - 53 | if pattern.children.len() == 1 || - 54 | // In a parenthesized list of sibling patterns, the first - 55 | // sibling can't be an anonymous `_` wildcard. - 56 | (pattern.children[0].kind == Some("_") && !pattern.children[0].named) - 57 | { - 58 | pattern = pattern.children.pop().unwrap(); - 59 | } - 60 | // In a parenthesized list of sibling patterns, the first - 61 | // sibling can't have a field name. - 62 | else { - 63 | pattern.children[0].field = None; - 64 | } - | - 65 | (pattern, pattern_start..pattern_end) - 66 | } - | - 67 | fn random_pattern_for_node(cursor: &mut TreeCursor, rng: &mut impl Rng) -> Self { - 68 | let node = cursor.node(); - | - 69 | // Sometimes specify the node's type, sometimes use a wildcard. - 70 | let (kind, named) = if rng.gen_bool(0.9) { - 71 | (Some(node.kind()), node.is_named()) - 72 | } else { - 73 | (Some("_"), node.is_named() && rng.gen_bool(0.8)) - 74 | }; - | - 75 | // Sometimes specify the node's field. - 76 | let field = if rng.gen_bool(0.75) { - 77 | cursor.field_name() - 78 | } else { - 79 | None - 80 | }; - | - 81 | // Sometimes capture the node. - 82 | let capture = if rng.gen_bool(0.7) { - 83 | Some(CAPTURE_NAMES[rng.gen_range(0..CAPTURE_NAMES.len())].to_string()) - 84 | } else { - 85 | None - 86 | }; - | - 87 | // Walk the children and include child patterns for some of them. - 88 | let mut children = Vec::new(); - 89 | if named && cursor.goto_first_child() { - 90 | let max_children = rng.gen_range(0..4); - 91 | while cursor.goto_next_sibling() { - 92 | if rng.gen_bool(0.6) { - 93 | let child_ast = Self::random_pattern_for_node(cursor, rng); - 94 | children.push(child_ast); - 95 | if children.len() >= max_children { - 96 | break; - 97 | } - 98 | } - 99 | } - 100 | cursor.goto_parent(); - 101 | } - | - 102 | Self { - 103 | kind, - 104 | named, - 105 | field, - 106 | capture, - 107 | children, - 108 | } - 109 | } - | - 110 | fn write_to_string(&self, string: &mut String, indent: usize) { - 111 | if let Some(field) = self.field { - 112 | write!(string, "{field}: ").unwrap(); - 113 | } - | - 114 | if self.named { - 115 | string.push('('); - 116 | let mut has_contents = if let Some(kind) = &self.kind { - 117 | write!(string, "{kind}").unwrap(); - 118 | true - 119 | } else { - 120 | false - 121 | }; - 122 | for child in &self.children { - 123 | let indent = indent + 2; - 124 | if has_contents { - 125 | string.push('\n'); - 126 | string.push_str(&" ".repeat(indent)); - 127 | } - 128 | child.write_to_string(string, indent); - 129 | has_contents = true; - 130 | } - 131 | string.push(')'); - 132 | } else if self.kind == Some("_") { - 133 | string.push('_'); - 134 | } else { - 135 | write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap(); - 136 | } - | - 137 | if let Some(capture) = &self.capture { - 138 | write!(string, " @{capture}").unwrap(); - 139 | } - 140 | } - | - 141 | pub fn matches_in_tree<'tree>(&self, tree: &'tree Tree) -> Vec> { - 142 | let mut matches = Vec::new(); - | - 143 | // Compute the matches naively: walk the tree and - 144 | // retry the entire pattern for each node. - 145 | let mut cursor = tree.walk(); - 146 | let mut ascending = false; - 147 | loop { - 148 | if ascending { - 149 | if cursor.goto_next_sibling() { - 150 | ascending = false; - 151 | } else if !cursor.goto_parent() { - 152 | break; - 153 | } - 154 | } else { - 155 | let matches_here = self.match_node(&mut cursor); - 156 | matches.extend_from_slice(&matches_here); - 157 | if !cursor.goto_first_child() { - 158 | ascending = true; - 159 | } - 160 | } - 161 | } - | - 162 | matches.sort_unstable(); - 163 | for m in &mut matches { - 164 | m.last_node = None; - 165 | } - 166 | matches.dedup(); - 167 | matches - 168 | } - | - 169 | pub fn match_node<'tree>(&self, cursor: &mut TreeCursor<'tree>) -> Vec> { - 170 | let node = cursor.node(); - | - 171 | // If a kind is specified, check that it matches the node. - 172 | if let Some(kind) = self.kind { - 173 | if kind == "_" { - 174 | if self.named && !node.is_named() { - 175 | return Vec::new(); - 176 | } - 177 | } else if kind != node.kind() || self.named != node.is_named() { - 178 | return Vec::new(); - 179 | } - 180 | } - | - 181 | // If a field is specified, check that it matches the node. - 182 | if let Some(field) = self.field { - 183 | if cursor.field_name() != Some(field) { - 184 | return Vec::new(); - 185 | } - 186 | } - | - 187 | // Create a match for the current node. - 188 | let mat = Match { - 189 | captures: self - 190 | .capture - 191 | .as_ref() - 192 | .map_or_else(Vec::new, |name| vec![(name.as_str(), node)]), - 193 | last_node: Some(node), - 194 | }; - | - 195 | // If there are no child patterns to match, then return this single match. - 196 | if self.children.is_empty() { - 197 | return vec![mat]; - 198 | } - | - 199 | // Find every matching combination of child patterns and child nodes. - 200 | let mut finished_matches = Vec::::new(); - 201 | if cursor.goto_first_child() { - 202 | let mut match_states = vec![(0, mat)]; - 203 | loop { - 204 | let mut new_match_states = Vec::new(); - 205 | for (pattern_index, mat) in &match_states { - 206 | let child_pattern = &self.children[*pattern_index]; - 207 | let child_matches = child_pattern.match_node(cursor); - 208 | for child_match in child_matches { - 209 | let mut combined_match = mat.clone(); - 210 | combined_match.last_node = child_match.last_node; - 211 | combined_match - 212 | .captures - 213 | .extend_from_slice(&child_match.captures); - 214 | if pattern_index + 1 < self.children.len() { - 215 | new_match_states.push((*pattern_index + 1, combined_match)); - 216 | } else { - 217 | let mut existing = false; - 218 | for existing_match in &mut finished_matches { - 219 | if existing_match.captures == combined_match.captures { - 220 | if child_pattern.capture.is_some() { - 221 | existing_match.last_node = combined_match.last_node; - 222 | } - 223 | existing = true; - 224 | } - 225 | } - 226 | if !existing { - 227 | finished_matches.push(combined_match); - 228 | } - 229 | } - 230 | } - 231 | } - 232 | match_states.extend_from_slice(&new_match_states); - 233 | if !cursor.goto_next_sibling() { - 234 | break; - 235 | } - 236 | } - 237 | cursor.goto_parent(); - 238 | } - 239 | finished_matches - 240 | } - 241 | } - | - 242 | impl std::fmt::Display for Pattern { - 243 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 244 | let mut result = String::new(); - 245 | self.write_to_string(&mut result, 0); - 246 | write!(f, "{result}") - 247 | } - 248 | } - | - 249 | impl PartialOrd for Match<'_, '_> { - 250 | fn partial_cmp(&self, other: &Self) -> Option { - 251 | Some(self.cmp(other)) - 252 | } - 253 | } - | - 254 | impl Ord for Match<'_, '_> { - 255 | // Tree-sitter returns matches in the order that they terminate - 256 | // during a depth-first walk of the tree. If multiple matches - 257 | // terminate on the same node, those matches are produced in the - 258 | // order that their captures were discovered. - 259 | fn cmp(&self, other: &Self) -> Ordering { - 260 | if let Some((last_node_a, last_node_b)) = self.last_node.zip(other.last_node) { - 261 | let cmp = compare_depth_first(last_node_a, last_node_b); - 262 | if cmp.is_ne() { - 263 | return cmp; - 264 | } - 265 | } - | - 266 | for (a, b) in self.captures.iter().zip(other.captures.iter()) { - 267 | let cmp = compare_depth_first(a.1, b.1); - 268 | if !cmp.is_eq() { - 269 | return cmp; - 270 | } - 271 | } - | - 272 | self.captures.len().cmp(&other.captures.len()) - 273 | } - 274 | } - | - 275 | fn compare_depth_first(a: Node, b: Node) -> Ordering { - 276 | let a = a.byte_range(); - 277 | let b = b.byte_range(); - 278 | a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)) - 279 | } - | - 280 | pub fn assert_query_matches( - 281 | language: &Language, - 282 | query: &Query, - 283 | source: &str, - 284 | expected: &[(usize, Vec<(&str, &str)>)], - 285 | ) { - 286 | let mut parser = Parser::new(); - 287 | parser.set_language(language).unwrap(); - 288 | let tree = parser.parse(source, None).unwrap(); - 289 | let mut cursor = QueryCursor::new(); - 290 | let matches = cursor.matches(query, tree.root_node(), source.as_bytes()); - 291 | pretty_assertions::assert_eq!(expected, collect_matches(matches, query, source)); - 292 | pretty_assertions::assert_eq!(false, cursor.did_exceed_match_limit()); - 293 | } - | - 294 | pub fn collect_matches<'a>( - 295 | mut matches: impl StreamingIterator>, - 296 | query: &'a Query, - 297 | source: &'a str, - 298 | ) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { - 299 | let mut result = Vec::new(); - 300 | while let Some(m) = matches.next() { - 301 | result.push(( - 302 | m.pattern_index, - 303 | format_captures(m.captures.iter().into_streaming_iter_ref(), query, source), - 304 | )); - 305 | } - 306 | result - 307 | } - | - 308 | pub fn collect_captures<'a>( - 309 | captures: impl StreamingIterator, usize)>, - 310 | query: &'a Query, - 311 | source: &'a str, - 312 | ) -> Vec<(&'a str, &'a str)> { - 313 | format_captures(captures.map(|(m, i)| m.captures[*i]), query, source) - 314 | } - | - 315 | fn format_captures<'a>( - 316 | mut captures: impl StreamingIterator>, - 317 | query: &'a Query, - 318 | source: &'a str, - 319 | ) -> Vec<(&'a str, &'a str)> { - 320 | let mut result = Vec::new(); - 321 | while let Some(capture) = captures.next() { - 322 | result.push(( - 323 | query.capture_names()[capture.index as usize], - 324 | capture.node.utf8_text(source.as_bytes()).unwrap(), - 325 | )); - 326 | } - 327 | result - 328 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/highlight_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | ffi::CString, - 3 | fs, - 4 | os::raw::c_char, - 5 | ptr, slice, str, - 6 | sync::{ - 7 | atomic::{AtomicUsize, Ordering}, - 8 | LazyLock, - 9 | }, - 10 | }; - | - 11 | use tree_sitter_highlight::{ - 12 | c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer, - 13 | }; - | - 14 | use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path}; - | - 15 | static JS_HIGHLIGHT: LazyLock = - 16 | LazyLock::new(|| get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES)); - | - 17 | static JSDOC_HIGHLIGHT: LazyLock = - 18 | LazyLock::new(|| get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES)); - | - 19 | static HTML_HIGHLIGHT: LazyLock = - 20 | LazyLock::new(|| get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES)); - | - 21 | static EJS_HIGHLIGHT: LazyLock = LazyLock::new(|| { - 22 | get_highlight_config( - 23 | "embedded-template", - 24 | Some("injections-ejs.scm"), - 25 | &HIGHLIGHT_NAMES, - 26 | ) - 27 | }); - | - 28 | static RUST_HIGHLIGHT: LazyLock = - 29 | LazyLock::new(|| get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES)); - | - 30 | static HIGHLIGHT_NAMES: LazyLock> = LazyLock::new(|| { - 31 | [ - 32 | "attribute", - 33 | "boolean", - 34 | "carriage-return", - 35 | "comment", - 36 | "constant", - 37 | "constant.builtin", - 38 | "constructor", - 39 | "embedded", - 40 | "function", - 41 | "function.builtin", - 42 | "keyword", - 43 | "module", - 44 | "number", - 45 | "operator", - 46 | "property", - 47 | "property.builtin", - 48 | "punctuation", - 49 | "punctuation.bracket", - 50 | "punctuation.delimiter", - 51 | "punctuation.special", - 52 | "string", - 53 | "string.special", - 54 | "tag", - 55 | "type", - 56 | "type.builtin", - 57 | "variable", - 58 | "variable.builtin", - 59 | "variable.parameter", - 60 | ] - 61 | .iter() - 62 | .copied() - 63 | .map(String::from) - 64 | .collect() - 65 | }); - | - 66 | static HTML_ATTRS: LazyLock> = LazyLock::new(|| { - 67 | HIGHLIGHT_NAMES - 68 | .iter() - 69 | .map(|s| format!("class={s}")) - 70 | .collect() - 71 | }); - | - 72 | #[test] - 73 | fn test_highlighting_javascript() { - 74 | let source = "const a = function(b) { return b + c; }"; - 75 | assert_eq!( - 76 | &to_token_vector(source, &JS_HIGHLIGHT).unwrap(), - 77 | &[vec![ - 78 | ("const", vec!["keyword"]), - 79 | (" ", vec![]), - 80 | ("a", vec!["function"]), - 81 | (" ", vec![]), - 82 | ("=", vec!["operator"]), - 83 | (" ", vec![]), - 84 | ("function", vec!["keyword"]), - 85 | ("(", vec!["punctuation.bracket"]), - 86 | ("b", vec!["variable"]), - 87 | (")", vec!["punctuation.bracket"]), - 88 | (" ", vec![]), - 89 | ("{", vec!["punctuation.bracket"]), - 90 | (" ", vec![]), - 91 | ("return", vec!["keyword"]), - 92 | (" ", vec![]), - 93 | ("b", vec!["variable"]), - 94 | (" ", vec![]), - 95 | ("+", vec!["operator"]), - 96 | (" ", vec![]), - 97 | ("c", vec!["variable"]), - 98 | (";", vec!["punctuation.delimiter"]), - 99 | (" ", vec![]), - 100 | ("}", vec!["punctuation.bracket"]), - 101 | ]] - 102 | ); - 103 | } - | - 104 | #[test] - 105 | fn test_highlighting_injected_html_in_javascript() { - 106 | let source = ["const s = html `
${a < b}
`;"].join("\n"); - | - 107 | assert_eq!( - 108 | &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), - 109 | &[vec![ - 110 | ("const", vec!["keyword"]), - 111 | (" ", vec![]), - 112 | ("s", vec!["variable"]), - 113 | (" ", vec![]), - 114 | ("=", vec!["operator"]), - 115 | (" ", vec![]), - 116 | ("html", vec!["function"]), - 117 | (" ", vec![]), - 118 | ("`", vec!["string"]), - 119 | ("<", vec!["string", "punctuation.bracket"]), - 120 | ("div", vec!["string", "tag"]), - 121 | (">", vec!["string", "punctuation.bracket"]), - 122 | ("${", vec!["string", "embedded", "punctuation.special"]), - 123 | ("a", vec!["string", "embedded", "variable"]), - 124 | (" ", vec!["string", "embedded"]), - 125 | ("<", vec!["string", "embedded", "operator"]), - 126 | (" ", vec!["string", "embedded"]), - 127 | ("b", vec!["string", "embedded", "variable"]), - 128 | ("}", vec!["string", "embedded", "punctuation.special"]), - 129 | ("", vec!["string", "punctuation.bracket"]), - 132 | ("`", vec!["string"]), - 133 | (";", vec!["punctuation.delimiter"]), - 134 | ]] - 135 | ); - 136 | } - | - 137 | #[test] - 138 | fn test_highlighting_injected_javascript_in_html_mini() { - 139 | let source = ""; - | - 140 | assert_eq!( - 141 | &to_token_vector(source, &HTML_HIGHLIGHT).unwrap(), - 142 | &[vec![ - 143 | ("<", vec!["punctuation.bracket"]), - 144 | ("script", vec!["tag"]), - 145 | (">", vec!["punctuation.bracket"]), - 146 | ("const", vec!["keyword"]), - 147 | (" ", vec![]), - 148 | ("x", vec!["variable"]), - 149 | (" ", vec![]), - 150 | ("=", vec!["operator"]), - 151 | (" ", vec![]), - 152 | ("new", vec!["keyword"]), - 153 | (" ", vec![]), - 154 | ("Thing", vec!["constructor"]), - 155 | ("(", vec!["punctuation.bracket"]), - 156 | (")", vec!["punctuation.bracket"]), - 157 | (";", vec!["punctuation.delimiter"]), - 158 | ("", vec!["punctuation.bracket"]), - 161 | ],] - 162 | ); - 163 | } - | - 164 | #[test] - 165 | fn test_highlighting_injected_javascript_in_html() { - 166 | let source = [ - 167 | "", - 168 | " ", - 171 | "", - 172 | ] - 173 | .join("\n"); - | - 174 | assert_eq!( - 175 | &to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(), - 176 | &[ - 177 | vec![ - 178 | ("<", vec!["punctuation.bracket"]), - 179 | ("body", vec!["tag"]), - 180 | (">", vec!["punctuation.bracket"]), - 181 | ], - 182 | vec![ - 183 | (" ", vec![]), - 184 | ("<", vec!["punctuation.bracket"]), - 185 | ("script", vec!["tag"]), - 186 | (">", vec!["punctuation.bracket"]), - 187 | ], - 188 | vec![ - 189 | (" ", vec![]), - 190 | ("const", vec!["keyword"]), - 191 | (" ", vec![]), - 192 | ("x", vec!["variable"]), - 193 | (" ", vec![]), - 194 | ("=", vec!["operator"]), - 195 | (" ", vec![]), - 196 | ("new", vec!["keyword"]), - 197 | (" ", vec![]), - 198 | ("Thing", vec!["constructor"]), - 199 | ("(", vec!["punctuation.bracket"]), - 200 | (")", vec!["punctuation.bracket"]), - 201 | (";", vec!["punctuation.delimiter"]), - 202 | ], - 203 | vec![ - 204 | (" ", vec![]), - 205 | ("", vec!["punctuation.bracket"]), - 208 | ], - 209 | vec![ - 210 | ("", vec!["punctuation.bracket"]), - 213 | ], - 214 | ] - 215 | ); - 216 | } - | - 217 | #[test] - 218 | fn test_highlighting_multiline_nodes_to_html() { - 219 | let source = [ - 220 | "const SOMETHING = `", - 221 | " one ${", - 222 | " two()", - 223 | " } three", - 224 | "`", - 225 | "", - 226 | ] - 227 | .join("\n"); - | - 228 | assert_eq!( - 229 | &to_html(&source, &JS_HIGHLIGHT).unwrap(), - 230 | &[ - 231 | "const SOMETHING = `\n".to_string(), - 232 | " one ${\n".to_string(), - 233 | " two()\n".to_string(), - 234 | " } three\n".to_string(), - 235 | "`\n".to_string(), - 236 | ] - 237 | ); - 238 | } - | - 239 | #[test] - 240 | fn test_highlighting_with_local_variable_tracking() { - 241 | let source = [ - 242 | "module.exports = function a(b) {", - 243 | " const module = c;", - 244 | " console.log(module, b);", - 245 | "}", - 246 | ] - 247 | .join("\n"); - | - 248 | assert_eq!( - 249 | &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), - 250 | &[ - 251 | vec![ - 252 | ("module", vec!["variable.builtin"]), - 253 | (".", vec!["punctuation.delimiter"]), - 254 | ("exports", vec!["function"]), - 255 | (" ", vec![]), - 256 | ("=", vec!["operator"]), - 257 | (" ", vec![]), - 258 | ("function", vec!["keyword"]), - 259 | (" ", vec![]), - 260 | ("a", vec!["function"]), - 261 | ("(", vec!["punctuation.bracket"]), - 262 | ("b", vec!["variable"]), - 263 | (")", vec!["punctuation.bracket"]), - 264 | (" ", vec![]), - 265 | ("{", vec!["punctuation.bracket"]) - 266 | ], - 267 | vec![ - 268 | (" ", vec![]), - 269 | ("const", vec!["keyword"]), - 270 | (" ", vec![]), - 271 | ("module", vec!["variable"]), - 272 | (" ", vec![]), - 273 | ("=", vec!["operator"]), - 274 | (" ", vec![]), - 275 | ("c", vec!["variable"]), - 276 | (";", vec!["punctuation.delimiter"]) - 277 | ], - 278 | vec![ - 279 | (" ", vec![]), - 280 | ("console", vec!["variable.builtin"]), - 281 | (".", vec!["punctuation.delimiter"]), - 282 | ("log", vec!["function"]), - 283 | ("(", vec!["punctuation.bracket"]), - 284 | // Not a builtin, because `module` was defined as a variable above. - 285 | ("module", vec!["variable"]), - 286 | (",", vec!["punctuation.delimiter"]), - 287 | (" ", vec![]), - 288 | // A parameter, because `b` was defined as a parameter above. - 289 | ("b", vec!["variable"]), - 290 | (")", vec!["punctuation.bracket"]), - 291 | (";", vec!["punctuation.delimiter"]), - 292 | ], - 293 | vec![("}", vec!["punctuation.bracket"])] - 294 | ], - 295 | ); - 296 | } - | - 297 | #[test] - 298 | fn test_highlighting_empty_lines() { - 299 | let source = [ - 300 | "class A {", - 301 | "", - 302 | " b(c) {", - 303 | "", - 304 | " d(e)", - 305 | "", - 306 | " }", - 307 | "", - 308 | "}", - 309 | ] - 310 | .join("\n"); - | - 311 | assert_eq!( - 312 | &to_html(&source, &JS_HIGHLIGHT).unwrap(), - 313 | &[ - 314 | "class A {\n".to_string(), - 315 | "\n".to_string(), - 316 | " b(c) {\n".to_string(), - 317 | "\n".to_string(), - 318 | " d(e)\n".to_string(), - 319 | "\n".to_string(), - 320 | " }\n".to_string(), - 321 | "\n".to_string(), - 322 | "}\n".to_string(), - 323 | ] - 324 | ); - 325 | } - | - 326 | #[test] - 327 | fn test_highlighting_carriage_returns() { - 328 | let source = "a = \"a\rb\"\r\nb\r"; - | - 329 | assert_eq!( - 330 | &to_html(source, &JS_HIGHLIGHT).unwrap(), - 331 | &[ - 332 | "a = "ab"\n", - 333 | "b\n", - 334 | ], - 335 | ); - 336 | } - | - 337 | #[test] - 338 | fn test_highlighting_ejs_with_html_and_javascript() { - 339 | let source = ["
<% foo() %>
"].join("\n"); - | - 340 | assert_eq!( - 341 | &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), - 342 | &[[ - 343 | ("<", vec!["punctuation.bracket"]), - 344 | ("div", vec!["tag"]), - 345 | (">", vec!["punctuation.bracket"]), - 346 | ("<%", vec!["keyword"]), - 347 | (" ", vec![]), - 348 | ("foo", vec!["function"]), - 349 | ("(", vec!["punctuation.bracket"]), - 350 | (")", vec!["punctuation.bracket"]), - 351 | (" ", vec![]), - 352 | ("%>", vec!["keyword"]), - 353 | ("", vec!["punctuation.bracket"]), - 356 | ("<", vec!["punctuation.bracket"]), - 357 | ("script", vec!["tag"]), - 358 | (">", vec!["punctuation.bracket"]), - 359 | (" ", vec![]), - 360 | ("bar", vec!["function"]), - 361 | ("(", vec!["punctuation.bracket"]), - 362 | (")", vec!["punctuation.bracket"]), - 363 | (" ", vec![]), - 364 | ("", vec!["punctuation.bracket"]), - 367 | ]], - 368 | ); - 369 | } - | - 370 | #[test] - 371 | fn test_highlighting_javascript_with_jsdoc() { - 372 | // Regression test: the middle comment has no highlights. This should not prevent - 373 | // later injections from highlighting properly. - 374 | let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); - | - 375 | assert_eq!( - 376 | &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), - 377 | &[[ - 378 | ("a", vec!["variable"]), - 379 | (" ", vec![]), - 380 | ("/* ", vec!["comment"]), - 381 | ("@see", vec!["comment", "keyword"]), - 382 | (" a */", vec!["comment"]), - 383 | (" ", vec![]), - 384 | ("b", vec!["variable"]), - 385 | (";", vec!["punctuation.delimiter"]), - 386 | (" ", vec![]), - 387 | ("/* nothing */", vec!["comment"]), - 388 | (" ", vec![]), - 389 | ("c", vec!["variable"]), - 390 | (";", vec!["punctuation.delimiter"]), - 391 | (" ", vec![]), - 392 | ("/* ", vec!["comment"]), - 393 | ("@see", vec!["comment", "keyword"]), - 394 | (" b */", vec!["comment"]) - 395 | ]], - 396 | ); - 397 | } - | - 398 | #[test] - 399 | fn test_highlighting_with_content_children_included() { - 400 | let source = ["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); - | - 401 | assert_eq!( - 402 | &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), - 403 | &[ - 404 | vec![ - 405 | ("assert", vec!["function"]), - 406 | ("!", vec!["function"]), - 407 | ("(", vec!["punctuation.bracket"]), - 408 | ], - 409 | vec![ - 410 | (" a", vec![]), - 411 | (".", vec!["punctuation.delimiter"]), - 412 | ("b", vec!["property"]), - 413 | (".", vec!["punctuation.delimiter"]), - 414 | ("c", vec!["function"]), - 415 | ("(", vec!["punctuation.bracket"]), - 416 | (")", vec!["punctuation.bracket"]), - 417 | (" < ", vec![]), - 418 | ("D", vec!["type"]), - 419 | ("::", vec!["punctuation.delimiter"]), - 420 | ("e", vec!["function"]), - 421 | ("::", vec!["punctuation.delimiter"]), - 422 | ("<", vec!["punctuation.bracket"]), - 423 | ("F", vec!["type"]), - 424 | (">", vec!["punctuation.bracket"]), - 425 | ("(", vec!["punctuation.bracket"]), - 426 | (")", vec!["punctuation.bracket"]), - 427 | ], - 428 | vec![ - 429 | (")", vec!["punctuation.bracket"]), - 430 | (";", vec!["punctuation.delimiter"]), - 431 | ] - 432 | ], - 433 | ); - 434 | } - | - 435 | #[test] - 436 | fn test_highlighting_cancellation() { - 437 | // An HTML document with a large injected JavaScript document: - 438 | let mut source = "\n"; - | - 443 | // Cancel the highlighting before parsing the injected document. - 444 | let cancellation_flag = AtomicUsize::new(0); - 445 | let injection_callback = |name: &str| { - 446 | cancellation_flag.store(1, Ordering::SeqCst); - 447 | test_language_for_injection_string(name) - 448 | }; - | - 449 | // The initial `highlight` call, which eagerly parses the outer document, should not fail. - 450 | let mut highlighter = Highlighter::new(); - 451 | let mut events = highlighter - 452 | .highlight( - 453 | &HTML_HIGHLIGHT, - 454 | source.as_bytes(), - 455 | Some(&cancellation_flag), - 456 | injection_callback, - 457 | ) - 458 | .unwrap(); - | - 459 | // Iterating the scopes should not panic. It should return an error once the - 460 | // cancellation is detected. - 461 | let found_cancellation_error = events.any(|event| match event { - 462 | Ok(_) => false, - 463 | Err(Error::Cancelled) => true, - 464 | Err(Error::InvalidLanguage | Error::Unknown) => { - 465 | unreachable!("Unexpected error type while iterating events") - 466 | } - 467 | }); - | - 468 | assert!( - 469 | found_cancellation_error, - 470 | "Expected a cancellation error while iterating events" - 471 | ); - 472 | } - | - 473 | #[test] - 474 | fn test_highlighting_via_c_api() { - 475 | let highlights = [ - 476 | "class=tag\0", - 477 | "class=function\0", - 478 | "class=string\0", - 479 | "class=keyword\0", - 480 | ]; - 481 | let highlight_names = highlights - 482 | .iter() - 483 | .map(|h| h["class=".len()..].as_ptr().cast::()) - 484 | .collect::>(); - 485 | let highlight_attrs = highlights - 486 | .iter() - 487 | .map(|h| h.as_bytes().as_ptr().cast::()) - 488 | .collect::>(); - 489 | let highlighter = unsafe { - 490 | c::ts_highlighter_new( - 491 | std::ptr::addr_of!(highlight_names[0]), - 492 | std::ptr::addr_of!(highlight_attrs[0]), - 493 | highlights.len() as u32, - 494 | ) - 495 | }; - | - 496 | let source_code = c_string(""); - | - 497 | let js_scope = c_string("source.js"); - 498 | let js_injection_regex = c_string("^javascript"); - 499 | let language = get_language("javascript"); - 500 | let lang_name = c_string("javascript"); - 501 | let queries = get_language_queries_path("javascript"); - 502 | let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); - 503 | let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); - 504 | let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); - 505 | unsafe { - 506 | c::ts_highlighter_add_language( - 507 | highlighter, - 508 | lang_name.as_ptr(), - 509 | js_scope.as_ptr(), - 510 | js_injection_regex.as_ptr(), - 511 | language, - 512 | highlights_query.as_ptr().cast::(), - 513 | injections_query.as_ptr().cast::(), - 514 | locals_query.as_ptr().cast::(), - 515 | highlights_query.len() as u32, - 516 | injections_query.len() as u32, - 517 | locals_query.len() as u32, - 518 | ); - 519 | } - | - 520 | let html_scope = c_string("text.html.basic"); - 521 | let html_injection_regex = c_string("^html"); - 522 | let language = get_language("html"); - 523 | let lang_name = c_string("html"); - 524 | let queries = get_language_queries_path("html"); - 525 | let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); - 526 | let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); - 527 | unsafe { - 528 | c::ts_highlighter_add_language( - 529 | highlighter, - 530 | lang_name.as_ptr(), - 531 | html_scope.as_ptr(), - 532 | html_injection_regex.as_ptr(), - 533 | language, - 534 | highlights_query.as_ptr().cast::(), - 535 | injections_query.as_ptr().cast::(), - 536 | ptr::null(), - 537 | highlights_query.len() as u32, - 538 | injections_query.len() as u32, - 539 | 0, - 540 | ); - 541 | } - | - 542 | let buffer = c::ts_highlight_buffer_new(); - | - 543 | unsafe { - 544 | c::ts_highlighter_highlight( - 545 | highlighter, - 546 | html_scope.as_ptr(), - 547 | source_code.as_ptr(), - 548 | source_code.as_bytes().len() as u32, - 549 | buffer, - 550 | ptr::null_mut(), - 551 | ); - 552 | } - | - 553 | let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) }; - 554 | let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) }; - 555 | let output_len = unsafe { c::ts_highlight_buffer_len(buffer) }; - 556 | let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) }; - | - 557 | let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; - 558 | let output_line_offsets = - 559 | unsafe { slice::from_raw_parts(output_line_offsets, output_line_count as usize) }; - | - 560 | let mut lines = Vec::with_capacity(output_line_count as usize); - 561 | for i in 0..(output_line_count as usize) { - 562 | let line_start = output_line_offsets[i] as usize; - 563 | let line_end = output_line_offsets - 564 | .get(i + 1) - 565 | .map_or(output_bytes.len(), |x| *x as usize); - 566 | lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap()); - 567 | } - | - 568 | assert_eq!( - 569 | lines, - 570 | vec![ - 571 | "<script>\n", - 572 | "const a = b('c');\n", - 573 | "c.d();\n", - 574 | "</script>\n", - 575 | ] - 576 | ); - | - 577 | unsafe { - 578 | c::ts_highlighter_delete(highlighter); - 579 | c::ts_highlight_buffer_delete(buffer); - 580 | } - 581 | } - | - 582 | #[test] - 583 | fn test_highlighting_with_all_captures_applied() { - 584 | let source = "fn main(a: u32, b: u32) -> { let c = a + b; }"; - 585 | let language = get_language("rust"); - 586 | let highlights_query = indoc::indoc! {" - 587 | [ - 588 | \"fn\" - 589 | \"let\" - 590 | ] @keyword - 591 | (identifier) @variable - 592 | (function_item name: (identifier) @function) - 593 | (parameter pattern: (identifier) @variable.parameter) - 594 | (primitive_type) @type.builtin - 595 | \"=\" @operator - 596 | [ \"->\" \":\" \";\" ] @punctuation.delimiter - 597 | [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket - 598 | "}; - 599 | let mut rust_highlight_reverse = - 600 | HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap(); - 601 | rust_highlight_reverse.configure(&HIGHLIGHT_NAMES); - | - 602 | assert_eq!( - 603 | &to_token_vector(source, &rust_highlight_reverse).unwrap(), - 604 | &[[ - 605 | ("fn", vec!["keyword"]), - 606 | (" ", vec![]), - 607 | ("main", vec!["function"]), - 608 | ("(", vec!["punctuation.bracket"]), - 609 | ("a", vec!["variable.parameter"]), - 610 | (":", vec!["punctuation.delimiter"]), - 611 | (" ", vec![]), - 612 | ("u32", vec!["type.builtin"]), - 613 | (", ", vec![]), - 614 | ("b", vec!["variable.parameter"]), - 615 | (":", vec!["punctuation.delimiter"]), - 616 | (" ", vec![]), - 617 | ("u32", vec!["type.builtin"]), - 618 | (")", vec!["punctuation.bracket"]), - 619 | (" ", vec![]), - 620 | ("->", vec!["punctuation.delimiter"]), - 621 | (" ", vec![]), - 622 | ("{", vec!["punctuation.bracket"]), - 623 | (" ", vec![]), - 624 | ("let", vec!["keyword"]), - 625 | (" ", vec![]), - 626 | ("c", vec!["variable"]), - 627 | (" ", vec![]), - 628 | ("=", vec!["operator"]), - 629 | (" ", vec![]), - 630 | ("a", vec!["variable"]), - 631 | (" + ", vec![]), - 632 | ("b", vec!["variable"]), - 633 | (";", vec!["punctuation.delimiter"]), - 634 | (" ", vec![]), - 635 | ("}", vec!["punctuation.bracket"]) - 636 | ]], - 637 | ); - 638 | } - | - 639 | #[test] - 640 | fn test_decode_utf8_lossy() { - 641 | use tree_sitter::LossyUtf8; - | - 642 | let parts = LossyUtf8::new(b"hi").collect::>(); - 643 | assert_eq!(parts, vec!["hi"]); - | - 644 | let parts = LossyUtf8::new(b"hi\xc0\xc1bye").collect::>(); - 645 | assert_eq!(parts, vec!["hi", "\u{fffd}", "\u{fffd}", "bye"]); - | - 646 | let parts = LossyUtf8::new(b"\xc0\xc1bye").collect::>(); - 647 | assert_eq!(parts, vec!["\u{fffd}", "\u{fffd}", "bye"]); - | - 648 | let parts = LossyUtf8::new(b"hello\xc0\xc1").collect::>(); - 649 | assert_eq!(parts, vec!["hello", "\u{fffd}", "\u{fffd}"]); - 650 | } - | - 651 | fn c_string(s: &str) -> CString { - 652 | CString::new(s.as_bytes().to_vec()).unwrap() - 653 | } - | - 654 | fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> { - 655 | match string { - 656 | "javascript" => Some(&JS_HIGHLIGHT), - 657 | "html" => Some(&HTML_HIGHLIGHT), - 658 | "rust" => Some(&RUST_HIGHLIGHT), - 659 | "jsdoc" => Some(&JSDOC_HIGHLIGHT), - 660 | _ => None, - 661 | } - 662 | } - | - 663 | fn to_html<'a>( - 664 | src: &'a str, - 665 | language_config: &'a HighlightConfiguration, - 666 | ) -> Result, Error> { - 667 | let src = src.as_bytes(); - 668 | let mut renderer = HtmlRenderer::new(); - 669 | let mut highlighter = Highlighter::new(); - 670 | let events = highlighter.highlight( - 671 | language_config, - 672 | src, - 673 | None, - 674 | &test_language_for_injection_string, - 675 | )?; - | - 676 | renderer.set_carriage_return_highlight( - 677 | HIGHLIGHT_NAMES - 678 | .iter() - 679 | .position(|s| s == "carriage-return") - 680 | .map(Highlight), - 681 | ); - 682 | renderer - 683 | .render(events, src, &|highlight, output| { - 684 | output.extend(HTML_ATTRS[highlight.0].as_bytes()); - 685 | }) - 686 | .unwrap(); - 687 | Ok(renderer - 688 | .lines() - 689 | .map(std::string::ToString::to_string) - 690 | .collect()) - 691 | } - | - 692 | #[allow(clippy::type_complexity)] - 693 | fn to_token_vector<'a>( - 694 | src: &'a str, - 695 | language_config: &'a HighlightConfiguration, - 696 | ) -> Result)>>, Error> { - 697 | let src = src.as_bytes(); - 698 | let mut highlighter = Highlighter::new(); - 699 | let mut lines = Vec::new(); - 700 | let mut highlights = Vec::new(); - 701 | let mut line = Vec::new(); - 702 | let events = highlighter.highlight( - 703 | language_config, - 704 | src, - 705 | None, - 706 | &test_language_for_injection_string, - 707 | )?; - 708 | for event in events { - 709 | match event? { - 710 | HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()), - 711 | HighlightEvent::HighlightEnd => { - 712 | highlights.pop(); - 713 | } - 714 | HighlightEvent::Source { start, end } => { - 715 | let s = str::from_utf8(&src[start..end]).unwrap(); - 716 | for (i, l) in s.split('\n').enumerate() { - 717 | let l = l.trim_end_matches('\r'); - 718 | if i > 0 { - 719 | lines.push(std::mem::take(&mut line)); - 720 | } - 721 | if !l.is_empty() { - 722 | line.push((l, highlights.clone())); - 723 | } - 724 | } - 725 | } - 726 | } - 727 | } - 728 | if !line.is_empty() { - 729 | lines.push(line); - 730 | } - 731 | Ok(lines) - 732 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/language_test.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::{self, Parser}; - | - 2 | use super::helpers::fixtures::get_language; - | - 3 | #[test] - 4 | fn test_lookahead_iterator() { - 5 | let mut parser = Parser::new(); - 6 | let language = get_language("rust"); - 7 | parser.set_language(&language).unwrap(); - | - 8 | let tree = parser.parse("struct Stuff {}", None).unwrap(); - | - 9 | let mut cursor = tree.walk(); - | - 10 | assert!(cursor.goto_first_child()); // struct - 11 | assert!(cursor.goto_first_child()); // struct keyword - | - 12 | let next_state = cursor.node().next_parse_state(); - 13 | assert_ne!(next_state, 0); - 14 | assert_eq!( - 15 | next_state, - 16 | language.next_state(cursor.node().parse_state(), cursor.node().grammar_id()) - 17 | ); - 18 | assert!((next_state as usize) < language.parse_state_count()); - 19 | assert!(cursor.goto_next_sibling()); // type_identifier - 20 | assert_eq!(next_state, cursor.node().parse_state()); - 21 | assert_eq!(cursor.node().grammar_name(), "identifier"); - 22 | assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id()); - | - 23 | let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"]; - 24 | let mut lookahead = language.lookahead_iterator(next_state).unwrap(); - 25 | assert_eq!(*lookahead.language(), language); - 26 | assert!(lookahead.iter_names().eq(expected_symbols)); - | - 27 | lookahead.reset_state(next_state); - 28 | assert!(lookahead.iter_names().eq(expected_symbols)); - | - 29 | lookahead.reset(&language, next_state); - 30 | assert!(lookahead - 31 | .map(|s| language.node_kind_for_id(s).unwrap()) - 32 | .eq(expected_symbols)); - 33 | } - | - 34 | #[test] - 35 | fn test_lookahead_iterator_modifiable_only_by_mut() { - 36 | let mut parser = Parser::new(); - 37 | let language = get_language("rust"); - 38 | parser.set_language(&language).unwrap(); - | - 39 | let tree = parser.parse("struct Stuff {}", None).unwrap(); - | - 40 | let mut cursor = tree.walk(); - | - 41 | assert!(cursor.goto_first_child()); // struct - 42 | assert!(cursor.goto_first_child()); // struct keyword - | - 43 | let next_state = cursor.node().next_parse_state(); - 44 | assert_ne!(next_state, 0); - | - 45 | let mut lookahead = language.lookahead_iterator(next_state).unwrap(); - 46 | let _ = lookahead.next(); - | - 47 | let mut names = lookahead.iter_names(); - 48 | let _ = names.next(); - 49 | } - | - 50 | #[test] - 51 | fn test_symbol_metadata_checks() { - 52 | let language = get_language("rust"); - 53 | for i in 0..language.node_kind_count() { - 54 | let sym = i as u16; - 55 | let name = language.node_kind_for_id(sym).unwrap(); - 56 | match name { - 57 | "_type" - 58 | | "_expression" - 59 | | "_pattern" - 60 | | "_literal" - 61 | | "_literal_pattern" - 62 | | "_declaration_statement" => assert!(language.node_kind_is_supertype(sym)), - | - 63 | "_raw_string_literal_start" - 64 | | "_raw_string_literal_end" - 65 | | "_line_doc_comment" - 66 | | "_error_sentinel" => assert!(!language.node_kind_is_supertype(sym)), - | - 67 | "enum_item" | "struct_item" | "type_item" => { - 68 | assert!(language.node_kind_is_named(sym)); - 69 | } - | - 70 | "=>" | "[" | "]" | "(" | ")" | "{" | "}" => { - 71 | assert!(language.node_kind_is_visible(sym)); - 72 | } - | - 73 | _ => {} - 74 | } - 75 | } - 76 | } - | - 77 | #[test] - 78 | fn test_supertypes() { - 79 | let language = get_language("rust"); - 80 | let supertypes = language.supertypes(); - | - 81 | if language.abi_version() < 15 { - 82 | return; - 83 | } - | - 84 | assert_eq!(supertypes.len(), 5); - 85 | assert_eq!( - 86 | supertypes - 87 | .iter() - 88 | .filter_map(|&s| language.node_kind_for_id(s)) - 89 | .map(|s| s.to_string()) - 90 | .collect::>(), - 91 | vec![ - 92 | "_expression", - 93 | "_literal", - 94 | "_literal_pattern", - 95 | "_pattern", - 96 | "_type" - 97 | ] - 98 | ); - | - 99 | for &supertype in supertypes { - 100 | let mut subtypes = language - 101 | .subtypes_for_supertype(supertype) - 102 | .iter() - 103 | .filter_map(|symbol| language.node_kind_for_id(*symbol)) - 104 | .collect::>(); - 105 | subtypes.sort_unstable(); - 106 | subtypes.dedup(); - | - 107 | match language.node_kind_for_id(supertype) { - 108 | Some("_literal") => { - 109 | assert_eq!( - 110 | subtypes, - 111 | &[ - 112 | "boolean_literal", - 113 | "char_literal", - 114 | "float_literal", - 115 | "integer_literal", - 116 | "raw_string_literal", - 117 | "string_literal" - 118 | ] - 119 | ); - 120 | } - 121 | Some("_pattern") => { - 122 | assert_eq!( - 123 | subtypes, - 124 | &[ - 125 | "_", - 126 | "_literal_pattern", - 127 | "captured_pattern", - 128 | "const_block", - 129 | "generic_pattern", - 130 | "identifier", - 131 | "macro_invocation", - 132 | "mut_pattern", - 133 | "or_pattern", - 134 | "range_pattern", - 135 | "ref_pattern", - 136 | "reference_pattern", - 137 | "remaining_field_pattern", - 138 | "scoped_identifier", - 139 | "slice_pattern", - 140 | "struct_pattern", - 141 | "tuple_pattern", - 142 | "tuple_struct_pattern", - 143 | ] - 144 | ); - 145 | } - 146 | Some("_type") => { - 147 | assert_eq!( - 148 | subtypes, - 149 | &[ - 150 | "abstract_type", - 151 | "array_type", - 152 | "bounded_type", - 153 | "dynamic_type", - 154 | "function_type", - 155 | "generic_type", - 156 | "macro_invocation", - 157 | "metavariable", - 158 | "never_type", - 159 | "pointer_type", - 160 | "primitive_type", - 161 | "reference_type", - 162 | "removed_trait_bound", - 163 | "scoped_type_identifier", - 164 | "tuple_type", - 165 | "type_identifier", - 166 | "unit_type" - 167 | ] - 168 | ); - 169 | } - 170 | _ => {} - 171 | } - 172 | } - 173 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/node_test.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::{InputEdit, Node, Parser, Point, Tree}; - 2 | use tree_sitter_generate::load_grammar_file; - | - 3 | use super::{ - 4 | get_random_edit, - 5 | helpers::fixtures::{fixtures_dir, get_language, get_test_language}, - 6 | Rand, - 7 | }; - 8 | use crate::{ - 9 | parse::perform_edit, - 10 | tests::{generate_parser, helpers::fixtures::get_test_fixture_language}, - 11 | }; - | - 12 | const JSON_EXAMPLE: &str = r#" - | - 13 | [ - 14 | 123, - 15 | false, - 16 | { - 17 | "x": null - 18 | } - 19 | ] - 20 | "#; - | - 21 | const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &str = r#"{ - 22 | "name": "aliases_and_extras", - | - 23 | "extras": [ - 24 | {"type": "PATTERN", "value": "\\s+"}, - 25 | {"type": "SYMBOL", "name": "comment"} - 26 | ], - | - 27 | "rules": { - 28 | "a": { - 29 | "type": "SEQ", - 30 | "members": [ - 31 | {"type": "SYMBOL", "name": "b"}, - 32 | { - 33 | "type": "ALIAS", - 34 | "value": "B", - 35 | "named": true, - 36 | "content": {"type": "SYMBOL", "name": "b"} - 37 | }, - 38 | { - 39 | "type": "ALIAS", - 40 | "value": "C", - 41 | "named": true, - 42 | "content": {"type": "SYMBOL", "name": "_c"} - 43 | } - 44 | ] - 45 | }, - | - 46 | "b": {"type": "STRING", "value": "b"}, - | - 47 | "_c": {"type": "STRING", "value": "c"}, - | - 48 | "comment": {"type": "STRING", "value": "..."} - 49 | } - 50 | }"#; - | - 51 | #[test] - 52 | fn test_node_child() { - 53 | let tree = parse_json_example(); - 54 | let array_node = tree.root_node().child(0).unwrap(); - | - 55 | assert_eq!(array_node.kind(), "array"); - 56 | assert_eq!(array_node.named_child_count(), 3); - 57 | assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find('[').unwrap()); - 58 | assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find(']').unwrap() + 1); - 59 | assert_eq!(array_node.start_position(), Point::new(2, 0)); - 60 | assert_eq!(array_node.end_position(), Point::new(8, 1)); - 61 | assert_eq!(array_node.child_count(), 7); - | - 62 | let left_bracket_node = array_node.child(0).unwrap(); - 63 | let number_node = array_node.child(1).unwrap(); - 64 | let comma_node1 = array_node.child(2).unwrap(); - 65 | let false_node = array_node.child(3).unwrap(); - 66 | let comma_node2 = array_node.child(4).unwrap(); - 67 | let object_node = array_node.child(5).unwrap(); - 68 | let right_bracket_node = array_node.child(6).unwrap(); - | - 69 | assert_eq!(left_bracket_node.kind(), "["); - 70 | assert_eq!(number_node.kind(), "number"); - 71 | assert_eq!(comma_node1.kind(), ","); - 72 | assert_eq!(false_node.kind(), "false"); - 73 | assert_eq!(comma_node2.kind(), ","); - 74 | assert_eq!(object_node.kind(), "object"); - 75 | assert_eq!(right_bracket_node.kind(), "]"); - | - 76 | assert!(!left_bracket_node.is_named()); - 77 | assert!(number_node.is_named()); - 78 | assert!(!comma_node1.is_named()); - 79 | assert!(false_node.is_named()); - 80 | assert!(!comma_node2.is_named()); - 81 | assert!(object_node.is_named()); - 82 | assert!(!right_bracket_node.is_named()); - | - 83 | assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap()); - 84 | assert_eq!( - 85 | number_node.end_byte(), - 86 | JSON_EXAMPLE.find("123").unwrap() + 3 - 87 | ); - 88 | assert_eq!(number_node.start_position(), Point::new(3, 2)); - 89 | assert_eq!(number_node.end_position(), Point::new(3, 5)); - | - 90 | assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap()); - 91 | assert_eq!( - 92 | false_node.end_byte(), - 93 | JSON_EXAMPLE.find("false").unwrap() + 5 - 94 | ); - 95 | assert_eq!(false_node.start_position(), Point::new(4, 2)); - 96 | assert_eq!(false_node.end_position(), Point::new(4, 7)); - | - 97 | assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap()); - 98 | assert_eq!(object_node.start_position(), Point::new(5, 2)); - 99 | assert_eq!(object_node.end_position(), Point::new(7, 3)); - | - 100 | assert_eq!(object_node.child_count(), 3); - 101 | let left_brace_node = object_node.child(0).unwrap(); - 102 | let pair_node = object_node.child(1).unwrap(); - 103 | let right_brace_node = object_node.child(2).unwrap(); - | - 104 | assert_eq!(left_brace_node.kind(), "{"); - 105 | assert_eq!(pair_node.kind(), "pair"); - 106 | assert_eq!(right_brace_node.kind(), "}"); - | - 107 | assert!(!left_brace_node.is_named()); - 108 | assert!(pair_node.is_named()); - 109 | assert!(!right_brace_node.is_named()); - | - 110 | assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap()); - 111 | assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4); - 112 | assert_eq!(pair_node.start_position(), Point::new(6, 4)); - 113 | assert_eq!(pair_node.end_position(), Point::new(6, 13)); - | - 114 | assert_eq!(pair_node.child_count(), 3); - 115 | let string_node = pair_node.child(0).unwrap(); - 116 | let colon_node = pair_node.child(1).unwrap(); - 117 | let null_node = pair_node.child(2).unwrap(); - | - 118 | assert_eq!(string_node.kind(), "string"); - 119 | assert_eq!(colon_node.kind(), ":"); - 120 | assert_eq!(null_node.kind(), "null"); - | - 121 | assert!(string_node.is_named()); - 122 | assert!(!colon_node.is_named()); - 123 | assert!(null_node.is_named()); - | - 124 | assert_eq!( - 125 | string_node.start_byte(), - 126 | JSON_EXAMPLE.find("\"x\"").unwrap() - 127 | ); - 128 | assert_eq!( - 129 | string_node.end_byte(), - 130 | JSON_EXAMPLE.find("\"x\"").unwrap() + 3 - 131 | ); - 132 | assert_eq!(string_node.start_position(), Point::new(6, 4)); - 133 | assert_eq!(string_node.end_position(), Point::new(6, 7)); - | - 134 | assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap()); - 135 | assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4); - 136 | assert_eq!(null_node.start_position(), Point::new(6, 9)); - 137 | assert_eq!(null_node.end_position(), Point::new(6, 13)); - | - 138 | assert_eq!(string_node.parent().unwrap(), pair_node); - 139 | assert_eq!(null_node.parent().unwrap(), pair_node); - 140 | assert_eq!(pair_node.parent().unwrap(), object_node); - 141 | assert_eq!(number_node.parent().unwrap(), array_node); - 142 | assert_eq!(false_node.parent().unwrap(), array_node); - 143 | assert_eq!(object_node.parent().unwrap(), array_node); - 144 | assert_eq!(array_node.parent().unwrap(), tree.root_node()); - 145 | assert_eq!(tree.root_node().parent(), None); - | - 146 | assert_eq!( - 147 | tree.root_node().child_with_descendant(null_node).unwrap(), - 148 | array_node - 149 | ); - 150 | assert_eq!( - 151 | array_node.child_with_descendant(null_node).unwrap(), - 152 | object_node - 153 | ); - 154 | assert_eq!( - 155 | object_node.child_with_descendant(null_node).unwrap(), - 156 | pair_node - 157 | ); - 158 | assert_eq!( - 159 | pair_node.child_with_descendant(null_node).unwrap(), - 160 | null_node - 161 | ); - 162 | assert_eq!(null_node.child_with_descendant(null_node), None); - 163 | } - | - 164 | #[test] - 165 | fn test_node_children() { - 166 | let tree = parse_json_example(); - 167 | let mut cursor = tree.walk(); - 168 | let array_node = tree.root_node().child(0).unwrap(); - 169 | assert_eq!( - 170 | array_node - 171 | .children(&mut cursor) - 172 | .map(|n| n.kind()) - 173 | .collect::>(), - 174 | &["[", "number", ",", "false", ",", "object", "]",] - 175 | ); - 176 | assert_eq!( - 177 | array_node - 178 | .named_children(&mut cursor) - 179 | .map(|n| n.kind()) - 180 | .collect::>(), - 181 | &["number", "false", "object"] - 182 | ); - 183 | let object_node = array_node - 184 | .named_children(&mut cursor) - 185 | .find(|n| n.kind() == "object") - 186 | .unwrap(); - 187 | assert_eq!( - 188 | object_node - 189 | .children(&mut cursor) - 190 | .map(|n| n.kind()) - 191 | .collect::>(), - 192 | &["{", "pair", "}",] - 193 | ); - 194 | } - | - 195 | #[test] - 196 | fn test_node_children_by_field_name() { - 197 | let mut parser = Parser::new(); - 198 | parser.set_language(&get_language("python")).unwrap(); - 199 | let source = " - 200 | if one: - 201 | a() - 202 | elif two: - 203 | b() - 204 | elif three: - 205 | c() - 206 | elif four: - 207 | d() - 208 | "; - | - 209 | let tree = parser.parse(source, None).unwrap(); - 210 | let node = tree.root_node().child(0).unwrap(); - 211 | assert_eq!(node.kind(), "if_statement"); - 212 | let mut cursor = tree.walk(); - 213 | let alternatives = node.children_by_field_name("alternative", &mut cursor); - 214 | let alternative_texts = - 215 | alternatives.map(|n| &source[n.child_by_field_name("condition").unwrap().byte_range()]); - 216 | assert_eq!( - 217 | alternative_texts.collect::>(), - 218 | &["two", "three", "four",] - 219 | ); - 220 | } - | - 221 | #[test] - 222 | fn test_node_parent_of_child_by_field_name() { - 223 | let mut parser = Parser::new(); - 224 | parser.set_language(&get_language("javascript")).unwrap(); - 225 | let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap(); - 226 | let call_node = tree - 227 | .root_node() - 228 | .named_child(0) - 229 | .unwrap() - 230 | .named_child(0) - 231 | .unwrap(); - 232 | assert_eq!(call_node.kind(), "call_expression"); - | - 233 | // Regression test - when a field points to a hidden node (in this case, `_expression`) - 234 | // the hidden node should not be added to the node parent cache. - 235 | assert_eq!( - 236 | call_node.child_by_field_name("function").unwrap().parent(), - 237 | Some(call_node) - 238 | ); - 239 | } - | - 240 | #[test] - 241 | fn test_parent_of_zero_width_node() { - 242 | let code = "def dupa(foo):"; - | - 243 | let mut parser = Parser::new(); - 244 | parser.set_language(&get_language("python")).unwrap(); - | - 245 | let tree = parser.parse(code, None).unwrap(); - 246 | let root = tree.root_node(); - 247 | let function_definition = root.child(0).unwrap(); - 248 | let block = function_definition.child(4).unwrap(); - 249 | let block_parent = block.parent().unwrap(); - | - 250 | assert_eq!(block.to_string(), "(block)"); - 251 | assert_eq!(block_parent.kind(), "function_definition"); - 252 | assert_eq!(block_parent.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))"); - | - 253 | assert_eq!( - 254 | root.child_with_descendant(block).unwrap(), - 255 | function_definition - 256 | ); - 257 | assert_eq!( - 258 | function_definition.child_with_descendant(block).unwrap(), - 259 | block - 260 | ); - 261 | assert_eq!(block.child_with_descendant(block), None); - | - 262 | let code = ""; - 263 | parser.set_language(&get_language("html")).unwrap(); - | - 264 | let tree = parser.parse(code, None).unwrap(); - 265 | let root = tree.root_node(); - 266 | let script_element = root.child(0).unwrap(); - 267 | let raw_text = script_element.child(1).unwrap(); - 268 | let parent = raw_text.parent().unwrap(); - 269 | assert_eq!(parent, script_element); - 270 | } - | - 271 | #[test] - 272 | fn test_next_sibling_of_zero_width_node() { - 273 | let mut parser = Parser::new(); - 274 | let language = get_test_fixture_language("next_sibling_from_zwt"); - 275 | parser.set_language(&language).unwrap(); - | - 276 | let tree = parser.parse("abdef", None).unwrap(); - | - 277 | let root_node = tree.root_node(); - 278 | let missing_c = root_node.child(2).unwrap(); - 279 | assert!(missing_c.is_missing()); - 280 | assert_eq!(missing_c.kind(), "c"); - 281 | let node_d = root_node.child(3).unwrap(); - 282 | assert_eq!(missing_c.next_sibling().unwrap(), node_d); - | - 283 | let prev_sibling = node_d.prev_sibling().unwrap(); - 284 | assert_eq!(prev_sibling, missing_c); - 285 | } - | - 286 | #[test] - 287 | fn test_first_child_for_offset() { - 288 | let mut parser = Parser::new(); - 289 | parser.set_language(&get_language("javascript")).unwrap(); - 290 | let tree = parser.parse("x10 + 100", None).unwrap(); - 291 | let sum_node = tree.root_node().child(0).unwrap().child(0).unwrap(); - | - 292 | assert_eq!( - 293 | sum_node.first_child_for_byte(0).unwrap().kind(), - 294 | "identifier" - 295 | ); - 296 | assert_eq!( - 297 | sum_node.first_child_for_byte(1).unwrap().kind(), - 298 | "identifier" - 299 | ); - 300 | assert_eq!(sum_node.first_child_for_byte(3).unwrap().kind(), "+"); - 301 | assert_eq!(sum_node.first_child_for_byte(5).unwrap().kind(), "number"); - 302 | } - | - 303 | #[test] - 304 | fn test_first_named_child_for_offset() { - 305 | let mut parser = Parser::new(); - 306 | parser.set_language(&get_language("javascript")).unwrap(); - 307 | let tree = parser.parse("x10 + 100", None).unwrap(); - 308 | let sum_node = tree.root_node().child(0).unwrap().child(0).unwrap(); - | - 309 | assert_eq!( - 310 | sum_node.first_named_child_for_byte(0).unwrap().kind(), - 311 | "identifier" - 312 | ); - 313 | assert_eq!( - 314 | sum_node.first_named_child_for_byte(1).unwrap().kind(), - 315 | "identifier" - 316 | ); - 317 | assert_eq!( - 318 | sum_node.first_named_child_for_byte(3).unwrap().kind(), - 319 | "number" - 320 | ); - 321 | } - | - 322 | #[test] - 323 | fn test_node_field_name_for_child() { - 324 | let mut parser = Parser::new(); - 325 | parser.set_language(&get_language("c")).unwrap(); - 326 | let tree = parser - 327 | .parse("int w = x + /* y is special! */ y;", None) - 328 | .unwrap(); - 329 | let translation_unit_node = tree.root_node(); - 330 | let declaration_node = translation_unit_node.named_child(0).unwrap(); - | - 331 | let binary_expression_node = declaration_node - 332 | .child_by_field_name("declarator") - 333 | .unwrap() - 334 | .child_by_field_name("value") - 335 | .unwrap(); - | - 336 | // ------------------- - 337 | // left: (identifier) 0 - 338 | // operator: "+" 1 <--- (not a named child) - 339 | // (comment) 2 <--- (is an extra) - 340 | // right: (identifier) 3 - 341 | // ------------------- - | - 342 | assert_eq!(binary_expression_node.field_name_for_child(0), Some("left")); - 343 | assert_eq!( - 344 | binary_expression_node.field_name_for_child(1), - 345 | Some("operator") - 346 | ); - 347 | // The comment should not have a field name, as it's just an extra - 348 | assert_eq!(binary_expression_node.field_name_for_child(2), None); - 349 | assert_eq!( - 350 | binary_expression_node.field_name_for_child(3), - 351 | Some("right") - 352 | ); - 353 | // Negative test - Not a valid child index - 354 | assert_eq!(binary_expression_node.field_name_for_child(4), None); - 355 | } - | - 356 | #[test] - 357 | fn test_node_field_name_for_named_child() { - 358 | let mut parser = Parser::new(); - 359 | parser.set_language(&get_language("c")).unwrap(); - 360 | let tree = parser - 361 | .parse("int w = x + /* y is special! */ y;", None) - 362 | .unwrap(); - 363 | let translation_unit_node = tree.root_node(); - 364 | let declaration_node = translation_unit_node.named_child(0).unwrap(); - | - 365 | let binary_expression_node = declaration_node - 366 | .child_by_field_name("declarator") - 367 | .unwrap() - 368 | .child_by_field_name("value") - 369 | .unwrap(); - | - 370 | // ------------------- - 371 | // left: (identifier) 0 - 372 | // operator: "+" _ <--- (not a named child) - 373 | // (comment) 1 <--- (is an extra) - 374 | // right: (identifier) 2 - 375 | // ------------------- - | - 376 | assert_eq!( - 377 | binary_expression_node.field_name_for_named_child(0), - 378 | Some("left") - 379 | ); - 380 | // The comment should not have a field name, as it's just an extra - 381 | assert_eq!(binary_expression_node.field_name_for_named_child(1), None); - 382 | // The operator is not a named child, so the named child at index 2 is the right child - 383 | assert_eq!( - 384 | binary_expression_node.field_name_for_named_child(2), - 385 | Some("right") - 386 | ); - 387 | // Negative test - Not a valid child index - 388 | assert_eq!(binary_expression_node.field_name_for_named_child(3), None); - 389 | } - | - 390 | #[test] - 391 | fn test_node_child_by_field_name_with_extra_hidden_children() { - 392 | let mut parser = Parser::new(); - 393 | parser.set_language(&get_language("python")).unwrap(); - | - 394 | // In the Python grammar, some fields are applied to `suite` nodes, - 395 | // which consist of an invisible `indent` token followed by a block. - 396 | // Check that when searching for a child with a field name, we don't - 397 | // - 398 | let tree = parser.parse("while a:\n pass", None).unwrap(); - 399 | let while_node = tree.root_node().child(0).unwrap(); - 400 | assert_eq!(while_node.kind(), "while_statement"); - 401 | assert_eq!( - 402 | while_node.child_by_field_name("body").unwrap(), - 403 | while_node.child(3).unwrap(), - 404 | ); - 405 | } - | - 406 | #[test] - 407 | fn test_node_named_child() { - 408 | let tree = parse_json_example(); - 409 | let array_node = tree.root_node().child(0).unwrap(); - | - 410 | let number_node = array_node.named_child(0).unwrap(); - 411 | let false_node = array_node.named_child(1).unwrap(); - 412 | let object_node = array_node.named_child(2).unwrap(); - | - 413 | assert_eq!(number_node.kind(), "number"); - 414 | assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap()); - 415 | assert_eq!( - 416 | number_node.end_byte(), - 417 | JSON_EXAMPLE.find("123").unwrap() + 3 - 418 | ); - 419 | assert_eq!(number_node.start_position(), Point::new(3, 2)); - 420 | assert_eq!(number_node.end_position(), Point::new(3, 5)); - | - 421 | assert_eq!(false_node.kind(), "false"); - 422 | assert_eq!(false_node.start_byte(), JSON_EXAMPLE.find("false").unwrap()); - 423 | assert_eq!( - 424 | false_node.end_byte(), - 425 | JSON_EXAMPLE.find("false").unwrap() + 5 - 426 | ); - 427 | assert_eq!(false_node.start_position(), Point::new(4, 2)); - 428 | assert_eq!(false_node.end_position(), Point::new(4, 7)); - | - 429 | assert_eq!(object_node.kind(), "object"); - 430 | assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap()); - 431 | assert_eq!(object_node.start_position(), Point::new(5, 2)); - 432 | assert_eq!(object_node.end_position(), Point::new(7, 3)); - | - 433 | assert_eq!(object_node.named_child_count(), 1); - | - 434 | let pair_node = object_node.named_child(0).unwrap(); - 435 | assert_eq!(pair_node.kind(), "pair"); - 436 | assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap()); - 437 | assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4); - 438 | assert_eq!(pair_node.start_position(), Point::new(6, 4)); - 439 | assert_eq!(pair_node.end_position(), Point::new(6, 13)); - | - 440 | let string_node = pair_node.named_child(0).unwrap(); - 441 | let null_node = pair_node.named_child(1).unwrap(); - | - 442 | assert_eq!(string_node.kind(), "string"); - 443 | assert_eq!(null_node.kind(), "null"); - | - 444 | assert_eq!( - 445 | string_node.start_byte(), - 446 | JSON_EXAMPLE.find("\"x\"").unwrap() - 447 | ); - 448 | assert_eq!( - 449 | string_node.end_byte(), - 450 | JSON_EXAMPLE.find("\"x\"").unwrap() + 3 - 451 | ); - 452 | assert_eq!(string_node.start_position(), Point::new(6, 4)); - 453 | assert_eq!(string_node.end_position(), Point::new(6, 7)); - | - 454 | assert_eq!(null_node.start_byte(), JSON_EXAMPLE.find("null").unwrap()); - 455 | assert_eq!(null_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4); - 456 | assert_eq!(null_node.start_position(), Point::new(6, 9)); - 457 | assert_eq!(null_node.end_position(), Point::new(6, 13)); - | - 458 | assert_eq!(string_node.parent().unwrap(), pair_node); - 459 | assert_eq!(null_node.parent().unwrap(), pair_node); - 460 | assert_eq!(pair_node.parent().unwrap(), object_node); - 461 | assert_eq!(number_node.parent().unwrap(), array_node); - 462 | assert_eq!(false_node.parent().unwrap(), array_node); - 463 | assert_eq!(object_node.parent().unwrap(), array_node); - 464 | assert_eq!(array_node.parent().unwrap(), tree.root_node()); - 465 | assert_eq!(tree.root_node().parent(), None); - | - 466 | assert_eq!( - 467 | tree.root_node().child_with_descendant(null_node).unwrap(), - 468 | array_node - 469 | ); - 470 | assert_eq!( - 471 | array_node.child_with_descendant(null_node).unwrap(), - 472 | object_node - 473 | ); - 474 | assert_eq!( - 475 | object_node.child_with_descendant(null_node).unwrap(), - 476 | pair_node - 477 | ); - 478 | assert_eq!( - 479 | pair_node.child_with_descendant(null_node).unwrap(), - 480 | null_node - 481 | ); - 482 | assert_eq!(null_node.child_with_descendant(null_node), None); - 483 | } - | - 484 | #[test] - 485 | fn test_node_named_child_with_aliases_and_extras() { - 486 | let (parser_name, parser_code) = generate_parser(GRAMMAR_WITH_ALIASES_AND_EXTRAS).unwrap(); - | - 487 | let mut parser = Parser::new(); - 488 | parser - 489 | .set_language(&get_test_language(&parser_name, &parser_code, None)) - 490 | .unwrap(); - | - 491 | let tree = parser.parse("b ... b ... c", None).unwrap(); - 492 | let root = tree.root_node(); - 493 | assert_eq!(root.to_sexp(), "(a (b) (comment) (B) (comment) (C))"); - 494 | assert_eq!(root.named_child_count(), 5); - 495 | assert_eq!(root.named_child(0).unwrap().kind(), "b"); - 496 | assert_eq!(root.named_child(1).unwrap().kind(), "comment"); - 497 | assert_eq!(root.named_child(2).unwrap().kind(), "B"); - 498 | assert_eq!(root.named_child(3).unwrap().kind(), "comment"); - 499 | assert_eq!(root.named_child(4).unwrap().kind(), "C"); - 500 | } - | - 501 | #[test] - 502 | fn test_node_descendant_count() { - 503 | let tree = parse_json_example(); - 504 | let value_node = tree.root_node(); - 505 | let all_nodes = get_all_nodes(&tree); - | - 506 | assert_eq!(value_node.descendant_count(), all_nodes.len()); - | - 507 | let mut cursor = value_node.walk(); - 508 | for (i, node) in all_nodes.iter().enumerate() { - 509 | cursor.goto_descendant(i); - 510 | assert_eq!(cursor.node(), *node, "index {i}"); - 511 | } - | - 512 | for (i, node) in all_nodes.iter().enumerate().rev() { - 513 | cursor.goto_descendant(i); - 514 | assert_eq!(cursor.node(), *node, "rev index {i}"); - 515 | } - 516 | } - | - 517 | #[test] - 518 | fn test_descendant_count_single_node_tree() { - 519 | let mut parser = Parser::new(); - 520 | parser - 521 | .set_language(&get_language("embedded-template")) - 522 | .unwrap(); - 523 | let tree = parser.parse("hello", None).unwrap(); - | - 524 | let nodes = get_all_nodes(&tree); - 525 | assert_eq!(nodes.len(), 2); - 526 | assert_eq!(tree.root_node().descendant_count(), 2); - | - 527 | let mut cursor = tree.root_node().walk(); - | - 528 | cursor.goto_descendant(0); - 529 | assert_eq!(cursor.depth(), 0); - 530 | assert_eq!(cursor.node(), nodes[0]); - 531 | cursor.goto_descendant(1); - 532 | assert_eq!(cursor.depth(), 1); - 533 | assert_eq!(cursor.node(), nodes[1]); - 534 | } - | - 535 | #[test] - 536 | fn test_node_descendant_for_range() { - 537 | let tree = parse_json_example(); - 538 | let array_node = tree.root_node(); - | - 539 | // Leaf node exactly matches the given bounds - byte query - 540 | let colon_index = JSON_EXAMPLE.find(':').unwrap(); - 541 | let colon_node = array_node - 542 | .descendant_for_byte_range(colon_index, colon_index + 1) - 543 | .unwrap(); - 544 | assert_eq!(colon_node.kind(), ":"); - 545 | assert_eq!(colon_node.start_byte(), colon_index); - 546 | assert_eq!(colon_node.end_byte(), colon_index + 1); - 547 | assert_eq!(colon_node.start_position(), Point::new(6, 7)); - 548 | assert_eq!(colon_node.end_position(), Point::new(6, 8)); - | - 549 | // Leaf node exactly matches the given bounds - point query - 550 | let colon_node = array_node - 551 | .descendant_for_point_range(Point::new(6, 7), Point::new(6, 8)) - 552 | .unwrap(); - 553 | assert_eq!(colon_node.kind(), ":"); - 554 | assert_eq!(colon_node.start_byte(), colon_index); - 555 | assert_eq!(colon_node.end_byte(), colon_index + 1); - 556 | assert_eq!(colon_node.start_position(), Point::new(6, 7)); - 557 | assert_eq!(colon_node.end_position(), Point::new(6, 8)); - | - 558 | // The given point is between two adjacent leaf nodes - byte query - 559 | let colon_index = JSON_EXAMPLE.find(':').unwrap(); - 560 | let colon_node = array_node - 561 | .descendant_for_byte_range(colon_index, colon_index) - 562 | .unwrap(); - 563 | assert_eq!(colon_node.kind(), ":"); - 564 | assert_eq!(colon_node.start_byte(), colon_index); - 565 | assert_eq!(colon_node.end_byte(), colon_index + 1); - 566 | assert_eq!(colon_node.start_position(), Point::new(6, 7)); - 567 | assert_eq!(colon_node.end_position(), Point::new(6, 8)); - | - 568 | // The given point is between two adjacent leaf nodes - point query - 569 | let colon_node = array_node - 570 | .descendant_for_point_range(Point::new(6, 7), Point::new(6, 7)) - 571 | .unwrap(); - 572 | assert_eq!(colon_node.kind(), ":"); - 573 | assert_eq!(colon_node.start_byte(), colon_index); - 574 | assert_eq!(colon_node.end_byte(), colon_index + 1); - 575 | assert_eq!(colon_node.start_position(), Point::new(6, 7)); - 576 | assert_eq!(colon_node.end_position(), Point::new(6, 8)); - | - 577 | // Leaf node starts at the lower bound, ends after the upper bound - byte query - 578 | let string_index = JSON_EXAMPLE.find("\"x\"").unwrap(); - 579 | let string_node = array_node - 580 | .descendant_for_byte_range(string_index, string_index + 2) - 581 | .unwrap(); - 582 | assert_eq!(string_node.kind(), "string"); - 583 | assert_eq!(string_node.start_byte(), string_index); - 584 | assert_eq!(string_node.end_byte(), string_index + 3); - 585 | assert_eq!(string_node.start_position(), Point::new(6, 4)); - 586 | assert_eq!(string_node.end_position(), Point::new(6, 7)); - | - 587 | // Leaf node starts at the lower bound, ends after the upper bound - point query - 588 | let string_node = array_node - 589 | .descendant_for_point_range(Point::new(6, 4), Point::new(6, 6)) - 590 | .unwrap(); - 591 | assert_eq!(string_node.kind(), "string"); - 592 | assert_eq!(string_node.start_byte(), string_index); - 593 | assert_eq!(string_node.end_byte(), string_index + 3); - 594 | assert_eq!(string_node.start_position(), Point::new(6, 4)); - 595 | assert_eq!(string_node.end_position(), Point::new(6, 7)); - | - 596 | // Leaf node starts before the lower bound, ends at the upper bound - byte query - 597 | let null_index = JSON_EXAMPLE.find("null").unwrap(); - 598 | let null_node = array_node - 599 | .descendant_for_byte_range(null_index + 1, null_index + 4) - 600 | .unwrap(); - 601 | assert_eq!(null_node.kind(), "null"); - 602 | assert_eq!(null_node.start_byte(), null_index); - 603 | assert_eq!(null_node.end_byte(), null_index + 4); - 604 | assert_eq!(null_node.start_position(), Point::new(6, 9)); - 605 | assert_eq!(null_node.end_position(), Point::new(6, 13)); - | - 606 | // Leaf node starts before the lower bound, ends at the upper bound - point query - 607 | let null_node = array_node - 608 | .descendant_for_point_range(Point::new(6, 11), Point::new(6, 13)) - 609 | .unwrap(); - 610 | assert_eq!(null_node.kind(), "null"); - 611 | assert_eq!(null_node.start_byte(), null_index); - 612 | assert_eq!(null_node.end_byte(), null_index + 4); - 613 | assert_eq!(null_node.start_position(), Point::new(6, 9)); - 614 | assert_eq!(null_node.end_position(), Point::new(6, 13)); - | - 615 | // The bounds span multiple leaf nodes - return the smallest node that does span it. - 616 | let pair_node = array_node - 617 | .descendant_for_byte_range(string_index + 2, string_index + 4) - 618 | .unwrap(); - 619 | assert_eq!(pair_node.kind(), "pair"); - 620 | assert_eq!(pair_node.start_byte(), string_index); - 621 | assert_eq!(pair_node.end_byte(), string_index + 9); - 622 | assert_eq!(pair_node.start_position(), Point::new(6, 4)); - 623 | assert_eq!(pair_node.end_position(), Point::new(6, 13)); - | - 624 | assert_eq!(colon_node.parent(), Some(pair_node)); - | - 625 | // no leaf spans the given range - return the smallest node that does span it. - 626 | let pair_node = array_node - 627 | .named_descendant_for_point_range(Point::new(6, 6), Point::new(6, 8)) - 628 | .unwrap(); - 629 | assert_eq!(pair_node.kind(), "pair"); - 630 | assert_eq!(pair_node.start_byte(), string_index); - 631 | assert_eq!(pair_node.end_byte(), string_index + 9); - 632 | assert_eq!(pair_node.start_position(), Point::new(6, 4)); - 633 | assert_eq!(pair_node.end_position(), Point::new(6, 13)); - | - 634 | // Zero-width token - 635 | { - 636 | let code = ""; - 637 | let mut parser = Parser::new(); - 638 | parser.set_language(&get_language("html")).unwrap(); - | - 639 | let tree = parser.parse(code, None).unwrap(); - 640 | let root = tree.root_node(); - | - 641 | let child = root - 642 | .named_descendant_for_point_range(Point::new(0, 8), Point::new(0, 8)) - 643 | .unwrap(); - 644 | assert_eq!(child.kind(), "raw_text"); - | - 645 | let child2 = root.named_descendant_for_byte_range(8, 8).unwrap(); - 646 | assert_eq!(child2.kind(), "raw_text"); - | - 647 | assert_eq!(child, child2); - 648 | } - | - 649 | // Negative test, start > end - 650 | assert_eq!(array_node.descendant_for_byte_range(1, 0), None); - 651 | assert_eq!( - 652 | array_node.descendant_for_point_range(Point::new(6, 8), Point::new(6, 7)), - 653 | None - 654 | ); - 655 | } - | - 656 | #[test] - 657 | fn test_node_edit() { - 658 | let mut code = JSON_EXAMPLE.as_bytes().to_vec(); - 659 | let mut tree = parse_json_example(); - 660 | let mut rand = Rand::new(0); - | - 661 | for _ in 0..10 { - 662 | let mut nodes_before = get_all_nodes(&tree); - | - 663 | let edit = get_random_edit(&mut rand, &code); - 664 | let mut tree2 = tree.clone(); - 665 | let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap(); - 666 | for node in &mut nodes_before { - 667 | node.edit(&edit); - 668 | } - | - 669 | let nodes_after = get_all_nodes(&tree2); - 670 | for (i, node) in nodes_before.into_iter().enumerate() { - 671 | assert_eq!( - 672 | (node.kind(), node.start_byte(), node.start_position()), - 673 | ( - 674 | nodes_after[i].kind(), - 675 | nodes_after[i].start_byte(), - 676 | nodes_after[i].start_position() - 677 | ), - 678 | ); - 679 | } - | - 680 | tree = tree2; - 681 | } - 682 | } - | - 683 | #[test] - 684 | fn test_root_node_with_offset() { - 685 | let mut parser = Parser::new(); - 686 | parser.set_language(&get_language("javascript")).unwrap(); - 687 | let tree = parser.parse(" if (a) b", None).unwrap(); - | - 688 | let node = tree.root_node_with_offset(6, Point::new(2, 2)); - 689 | assert_eq!(node.byte_range(), 8..16); - 690 | assert_eq!(node.start_position(), Point::new(2, 4)); - 691 | assert_eq!(node.end_position(), Point::new(2, 12)); - | - 692 | let child = node.child(0).unwrap().child(2).unwrap(); - 693 | assert_eq!(child.kind(), "expression_statement"); - 694 | assert_eq!(child.byte_range(), 15..16); - 695 | assert_eq!(child.start_position(), Point::new(2, 11)); - 696 | assert_eq!(child.end_position(), Point::new(2, 12)); - | - 697 | let mut cursor = node.walk(); - 698 | cursor.goto_first_child(); - 699 | cursor.goto_first_child(); - 700 | cursor.goto_next_sibling(); - 701 | let child = cursor.node(); - 702 | assert_eq!(child.kind(), "parenthesized_expression"); - 703 | assert_eq!(child.byte_range(), 11..14); - 704 | assert_eq!(child.start_position(), Point::new(2, 7)); - 705 | assert_eq!(child.end_position(), Point::new(2, 10)); - 706 | } - | - 707 | #[test] - 708 | fn test_node_is_extra() { - 709 | let mut parser = Parser::new(); - 710 | parser.set_language(&get_language("javascript")).unwrap(); - 711 | let tree = parser.parse("foo(/* hi */);", None).unwrap(); - | - 712 | let root_node = tree.root_node(); - 713 | let comment_node = root_node.descendant_for_byte_range(7, 7).unwrap(); - | - 714 | assert_eq!(root_node.kind(), "program"); - 715 | assert_eq!(comment_node.kind(), "comment"); - 716 | assert!(!root_node.is_extra()); - 717 | assert!(comment_node.is_extra()); - 718 | } - | - 719 | #[test] - 720 | fn test_node_is_error() { - 721 | let mut parser = Parser::new(); - 722 | parser.set_language(&get_language("javascript")).unwrap(); - 723 | let tree = parser.parse("foo(", None).unwrap(); - 724 | let root_node = tree.root_node(); - 725 | assert_eq!(root_node.kind(), "program"); - 726 | assert!(root_node.has_error()); - | - 727 | let child = root_node.child(0).unwrap(); - 728 | assert_eq!(child.kind(), "ERROR"); - 729 | assert!(child.is_error()); - 730 | } - | - 731 | #[test] - 732 | fn test_edit_point() { - 733 | let edit = InputEdit { - 734 | start_byte: 5, - 735 | old_end_byte: 5, - 736 | new_end_byte: 10, - 737 | start_position: Point::new(0, 5), - 738 | old_end_position: Point::new(0, 5), - 739 | new_end_position: Point::new(0, 10), - 740 | }; - | - 741 | // Point after edit - 742 | let mut point = Point::new(0, 8); - 743 | let mut byte = 8; - 744 | edit.edit_point(&mut point, &mut byte); - 745 | assert_eq!(point, Point::new(0, 13)); - 746 | assert_eq!(byte, 13); - | - 747 | // Point before edit - 748 | let mut point = Point::new(0, 2); - 749 | let mut byte = 2; - 750 | edit.edit_point(&mut point, &mut byte); - 751 | assert_eq!(point, Point::new(0, 2)); - 752 | assert_eq!(byte, 2); - | - 753 | // Point at edit start - 754 | let mut point = Point::new(0, 5); - 755 | let mut byte = 5; - 756 | edit.edit_point(&mut point, &mut byte); - 757 | assert_eq!(point, Point::new(0, 10)); - 758 | assert_eq!(byte, 10); - 759 | } - | - 760 | #[test] - 761 | fn test_edit_range() { - 762 | use tree_sitter::{InputEdit, Point, Range}; - | - 763 | let edit = InputEdit { - 764 | start_byte: 10, - 765 | old_end_byte: 15, - 766 | new_end_byte: 20, - 767 | start_position: Point::new(1, 0), - 768 | old_end_position: Point::new(1, 5), - 769 | new_end_position: Point::new(2, 0), - 770 | }; - | - 771 | // Range after edit - 772 | let mut range = Range { - 773 | start_byte: 20, - 774 | end_byte: 25, - 775 | start_point: Point::new(2, 0), - 776 | end_point: Point::new(2, 5), - 777 | }; - 778 | edit.edit_range(&mut range); - 779 | assert_eq!(range.start_byte, 25); - 780 | assert_eq!(range.end_byte, 30); - 781 | assert_eq!(range.start_point, Point::new(3, 0)); - 782 | assert_eq!(range.end_point, Point::new(3, 5)); - | - 783 | // Range before edit - 784 | let mut range = Range { - 785 | start_byte: 5, - 786 | end_byte: 8, - 787 | start_point: Point::new(0, 5), - 788 | end_point: Point::new(0, 8), - 789 | }; - 790 | edit.edit_range(&mut range); - 791 | assert_eq!(range.start_byte, 5); - 792 | assert_eq!(range.end_byte, 8); - 793 | assert_eq!(range.start_point, Point::new(0, 5)); - 794 | assert_eq!(range.end_point, Point::new(0, 8)); - | - 795 | // Range overlapping edit - 796 | let mut range = Range { - 797 | start_byte: 8, - 798 | end_byte: 12, - 799 | start_point: Point::new(0, 8), - 800 | end_point: Point::new(1, 2), - 801 | }; - 802 | edit.edit_range(&mut range); - 803 | assert_eq!(range.start_byte, 8); - 804 | assert_eq!(range.end_byte, 10); - 805 | assert_eq!(range.start_point, Point::new(0, 8)); - 806 | assert_eq!(range.end_point, Point::new(1, 0)); - 807 | } - | - 808 | #[test] - 809 | fn test_node_sexp() { - 810 | let mut parser = Parser::new(); - 811 | parser.set_language(&get_language("javascript")).unwrap(); - 812 | let tree = parser.parse("if (a) b", None).unwrap(); - 813 | let root_node = tree.root_node(); - 814 | let if_node = root_node.descendant_for_byte_range(0, 0).unwrap(); - 815 | let paren_node = root_node.descendant_for_byte_range(3, 3).unwrap(); - 816 | let identifier_node = root_node.descendant_for_byte_range(4, 4).unwrap(); - 817 | assert_eq!(if_node.kind(), "if"); - 818 | assert_eq!(if_node.to_sexp(), "(\"if\")"); - 819 | assert_eq!(paren_node.kind(), "("); - 820 | assert_eq!(paren_node.to_sexp(), "(\"(\")"); - 821 | assert_eq!(identifier_node.kind(), "identifier"); - 822 | assert_eq!(identifier_node.to_sexp(), "(identifier)"); - 823 | } - | - 824 | #[test] - 825 | fn test_node_field_names() { - 826 | let (parser_name, parser_code) = generate_parser( - 827 | r#" - 828 | { - 829 | "name": "test_grammar_with_fields", - 830 | "extras": [ - 831 | {"type": "PATTERN", "value": "\\s+"} - 832 | ], - 833 | "rules": { - 834 | "rule_a": { - 835 | "type": "SEQ", - 836 | "members": [ - 837 | { - 838 | "type": "FIELD", - 839 | "name": "field_1", - 840 | "content": {"type": "STRING", "value": "child-0"} - 841 | }, - 842 | { - 843 | "type": "CHOICE", - 844 | "members": [ - 845 | {"type": "STRING", "value": "child-1"}, - 846 | {"type": "BLANK"}, - | - 847 | // This isn't used in the test, but prevents `_hidden_rule1` - 848 | // from being eliminated as a unit reduction. - 849 | { - 850 | "type": "ALIAS", - 851 | "value": "x", - 852 | "named": true, - 853 | "content": { - 854 | "type": "SYMBOL", - 855 | "name": "_hidden_rule1" - 856 | } - 857 | } - 858 | ] - 859 | }, - 860 | { - 861 | "type": "FIELD", - 862 | "name": "field_2", - 863 | "content": {"type": "SYMBOL", "name": "_hidden_rule1"} - 864 | }, - 865 | {"type": "SYMBOL", "name": "_hidden_rule2"} - 866 | ] - 867 | }, - | - 868 | // Fields pointing to hidden nodes with a single child resolve to the child. - 869 | "_hidden_rule1": { - 870 | "type": "CHOICE", - 871 | "members": [ - 872 | {"type": "STRING", "value": "child-2"}, - 873 | {"type": "STRING", "value": "child-2.5"} - 874 | ] - 875 | }, - | - 876 | // Fields within hidden nodes can be referenced through the parent node. - 877 | "_hidden_rule2": { - 878 | "type": "SEQ", - 879 | "members": [ - 880 | {"type": "STRING", "value": "child-3"}, - 881 | { - 882 | "type": "FIELD", - 883 | "name": "field_3", - 884 | "content": {"type": "STRING", "value": "child-4"} - 885 | } - 886 | ] - 887 | } - 888 | } - 889 | } - 890 | "#, - 891 | ) - 892 | .unwrap(); - | - 893 | let mut parser = Parser::new(); - 894 | let language = get_test_language(&parser_name, &parser_code, None); - 895 | parser.set_language(&language).unwrap(); - | - 896 | let tree = parser - 897 | .parse("child-0 child-1 child-2 child-3 child-4", None) - 898 | .unwrap(); - 899 | let root_node = tree.root_node(); - | - 900 | assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); - 901 | assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); - 902 | assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4)); - 903 | assert_eq!( - 904 | root_node.child(0).unwrap().child_by_field_name("field_1"), - 905 | None - 906 | ); - 907 | assert_eq!(root_node.child_by_field_name("not_a_real_field"), None); - | - 908 | let mut cursor = root_node.walk(); - 909 | assert_eq!(cursor.field_name(), None); - 910 | cursor.goto_first_child(); - 911 | assert_eq!(cursor.node().kind(), "child-0"); - 912 | assert_eq!(cursor.field_name(), Some("field_1")); - 913 | cursor.goto_next_sibling(); - 914 | assert_eq!(cursor.node().kind(), "child-1"); - 915 | assert_eq!(cursor.field_name(), None); - 916 | cursor.goto_next_sibling(); - 917 | assert_eq!(cursor.node().kind(), "child-2"); - 918 | assert_eq!(cursor.field_name(), Some("field_2")); - 919 | cursor.goto_next_sibling(); - 920 | assert_eq!(cursor.node().kind(), "child-3"); - 921 | assert_eq!(cursor.field_name(), None); - 922 | cursor.goto_next_sibling(); - 923 | assert_eq!(cursor.node().kind(), "child-4"); - 924 | assert_eq!(cursor.field_name(), Some("field_3")); - 925 | } - | - 926 | #[test] - 927 | fn test_node_field_calls_in_language_without_fields() { - 928 | let (parser_name, parser_code) = generate_parser( - 929 | r#" - 930 | { - 931 | "name": "test_grammar_with_no_fields", - 932 | "extras": [ - 933 | {"type": "PATTERN", "value": "\\s+"} - 934 | ], - 935 | "rules": { - 936 | "a": { - 937 | "type": "SEQ", - 938 | "members": [ - 939 | { - 940 | "type": "STRING", - 941 | "value": "b" - 942 | }, - 943 | { - 944 | "type": "STRING", - 945 | "value": "c" - 946 | }, - 947 | { - 948 | "type": "STRING", - 949 | "value": "d" - 950 | } - 951 | ] - 952 | } - 953 | } - 954 | } - 955 | "#, - 956 | ) - 957 | .unwrap(); - | - 958 | let mut parser = Parser::new(); - 959 | let language = get_test_language(&parser_name, &parser_code, None); - 960 | parser.set_language(&language).unwrap(); - | - 961 | let tree = parser.parse("b c d", None).unwrap(); - | - 962 | let root_node = tree.root_node(); - 963 | assert_eq!(root_node.kind(), "a"); - 964 | assert_eq!(root_node.child_by_field_name("something"), None); - | - 965 | let mut cursor = root_node.walk(); - 966 | assert_eq!(cursor.field_name(), None); - 967 | assert!(cursor.goto_first_child()); - 968 | assert_eq!(cursor.field_name(), None); - 969 | } - | - 970 | #[test] - 971 | fn test_node_is_named_but_aliased_as_anonymous() { - 972 | let grammar_json = load_grammar_file( - 973 | &fixtures_dir() - 974 | .join("test_grammars") - 975 | .join("named_rule_aliased_as_anonymous") - 976 | .join("grammar.js"), - 977 | None, - 978 | ) - 979 | .unwrap(); - | - 980 | let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); - | - 981 | let mut parser = Parser::new(); - 982 | let language = get_test_language(&parser_name, &parser_code, None); - 983 | parser.set_language(&language).unwrap(); - | - 984 | let tree = parser.parse("B C B", None).unwrap(); - | - 985 | let root_node = tree.root_node(); - 986 | assert!(!root_node.has_error()); - 987 | assert_eq!(root_node.child_count(), 3); - 988 | assert_eq!(root_node.named_child_count(), 2); - | - 989 | let aliased = root_node.child(0).unwrap(); - 990 | assert!(!aliased.is_named()); - 991 | assert_eq!(aliased.kind(), "the-alias"); - | - 992 | assert_eq!(root_node.named_child(0).unwrap().kind(), "c"); - 993 | } - | - 994 | #[test] - 995 | fn test_node_numeric_symbols_respect_simple_aliases() { - 996 | let mut parser = Parser::new(); - 997 | parser.set_language(&get_language("python")).unwrap(); - | - 998 | // Example 1: - 999 | // Python argument lists can contain "splat" arguments, which are not allowed -1000 | // within other expressions. This includes `parenthesized_list_splat` nodes -1001 | // like `(*b)`. These `parenthesized_list_splat` nodes are aliased as -1002 | // `parenthesized_expression`. Their numeric `symbol`, aka `kind_id` should -1003 | // match that of a normal `parenthesized_expression`. -1004 | let tree = parser.parse("(a((*b)))", None).unwrap(); -1005 | let root = tree.root_node(); -1006 | assert_eq!( -1007 | root.to_sexp(), -1008 | "(module (expression_statement (parenthesized_expression (call function: (identifier) arguments: (argument_list (parenthesized_expression (list_splat (identifier))))))))", -1009 | ); - | -1010 | let outer_expr_node = root.child(0).unwrap().child(0).unwrap(); -1011 | assert_eq!(outer_expr_node.kind(), "parenthesized_expression"); - | -1012 | let inner_expr_node = outer_expr_node -1013 | .named_child(0) -1014 | .unwrap() -1015 | .child_by_field_name("arguments") -1016 | .unwrap() -1017 | .named_child(0) -1018 | .unwrap(); -1019 | assert_eq!(inner_expr_node.kind(), "parenthesized_expression"); -1020 | assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id()); - | -1021 | // Example 2: -1022 | // Ruby handles the unary (negative) and binary (minus) `-` operators using two -1023 | // different tokens. One or more of these is an external token that's -1024 | // aliased as `-`. Their numeric kind ids should match. -1025 | parser.set_language(&get_language("ruby")).unwrap(); -1026 | let tree = parser.parse("-a - b", None).unwrap(); -1027 | let root = tree.root_node(); -1028 | assert_eq!( -1029 | root.to_sexp(), -1030 | "(program (binary left: (unary operand: (identifier)) right: (identifier)))", -1031 | ); - | -1032 | let binary_node = root.child(0).unwrap(); -1033 | assert_eq!(binary_node.kind(), "binary"); - | -1034 | let unary_minus_node = binary_node -1035 | .child_by_field_name("left") -1036 | .unwrap() -1037 | .child(0) -1038 | .unwrap(); -1039 | assert_eq!(unary_minus_node.kind(), "-"); - | -1040 | let binary_minus_node = binary_node.child_by_field_name("operator").unwrap(); -1041 | assert_eq!(binary_minus_node.kind(), "-"); -1042 | assert_eq!(unary_minus_node.kind_id(), binary_minus_node.kind_id()); -1043 | } - | -1044 | #[test] -1045 | fn test_hidden_zero_width_node_with_visible_child() { -1046 | let code = r" -1047 | class Foo { -1048 | std:: -1049 | private: -1050 | std::string s; -1051 | }; -1052 | "; - | -1053 | let mut parser = Parser::new(); -1054 | parser.set_language(&get_language("cpp")).unwrap(); -1055 | let tree = parser.parse(code, None).unwrap(); -1056 | let root = tree.root_node(); - | -1057 | let class_specifier = root.child(0).unwrap(); -1058 | let field_decl_list = class_specifier.child_by_field_name("body").unwrap(); -1059 | let field_decl = field_decl_list.named_child(0).unwrap(); -1060 | let field_ident = field_decl.child_by_field_name("declarator").unwrap(); -1061 | assert_eq!( -1062 | field_decl.child_with_descendant(field_ident).unwrap(), -1063 | field_ident -1064 | ); -1065 | } - | -1066 | fn get_all_nodes(tree: &Tree) -> Vec { -1067 | let mut result = Vec::new(); -1068 | let mut visited_children = false; -1069 | let mut cursor = tree.walk(); -1070 | loop { -1071 | if !visited_children { -1072 | result.push(cursor.node()); -1073 | if !cursor.goto_first_child() { -1074 | visited_children = true; -1075 | } -1076 | } else if cursor.goto_next_sibling() { -1077 | visited_children = false; -1078 | } else if !cursor.goto_parent() { -1079 | break; -1080 | } -1081 | } -1082 | result -1083 | } - | -1084 | fn parse_json_example() -> Tree { -1085 | let mut parser = Parser::new(); -1086 | parser.set_language(&get_language("json")).unwrap(); -1087 | parser.parse(JSON_EXAMPLE, None).unwrap() -1088 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/parser_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | ops::ControlFlow, - 3 | sync::{ - 4 | atomic::{AtomicUsize, Ordering}, - 5 | mpsc, - 6 | }, - 7 | thread, - 8 | time::{self, Duration}, - 9 | }; - | - 10 | use tree_sitter::{ - 11 | Decode, IncludedRangesError, InputEdit, LogType, ParseOptions, ParseState, Parser, Point, Range, - 12 | }; - 13 | use tree_sitter_generate::load_grammar_file; - 14 | use tree_sitter_proc_macro::retry; - | - 15 | use super::helpers::{ - 16 | allocations, - 17 | edits::ReadRecorder, - 18 | fixtures::{get_language, get_test_language}, - 19 | }; - 20 | use crate::{ - 21 | fuzz::edits::Edit, - 22 | parse::perform_edit, - 23 | tests::{ - 24 | generate_parser, - 25 | helpers::fixtures::{fixtures_dir, get_test_fixture_language}, - 26 | invert_edit, - 27 | }, - 28 | }; - | - 29 | #[test] - 30 | fn test_parsing_simple_string() { - 31 | let mut parser = Parser::new(); - 32 | parser.set_language(&get_language("rust")).unwrap(); - | - 33 | let tree = parser - 34 | .parse( - 35 | " - 36 | struct Stuff {} - 37 | fn main() {} - 38 | ", - 39 | None, - 40 | ) - 41 | .unwrap(); - | - 42 | let root_node = tree.root_node(); - 43 | assert_eq!(root_node.kind(), "source_file"); - | - 44 | assert_eq!( - 45 | root_node.to_sexp(), - 46 | concat!( - 47 | "(source_file ", - 48 | "(struct_item name: (type_identifier) body: (field_declaration_list)) ", - 49 | "(function_item name: (identifier) parameters: (parameters) body: (block)))" - 50 | ) - 51 | ); - | - 52 | let struct_node = root_node.child(0).unwrap(); - 53 | assert_eq!(struct_node.kind(), "struct_item"); - 54 | } - | - 55 | #[test] - 56 | fn test_parsing_with_logging() { - 57 | let mut parser = Parser::new(); - 58 | parser.set_language(&get_language("rust")).unwrap(); - | - 59 | let mut messages = Vec::new(); - 60 | parser.set_logger(Some(Box::new(|log_type, message| { - 61 | messages.push((log_type, message.to_string())); - 62 | }))); - | - 63 | parser - 64 | .parse( - 65 | " - 66 | struct Stuff {} - 67 | fn main() {} - 68 | ", - 69 | None, - 70 | ) - 71 | .unwrap(); - | - 72 | assert!(messages.contains(&( - 73 | LogType::Parse, - 74 | "reduce sym:struct_item, child_count:3".to_string() - 75 | ))); - 76 | assert!(messages.contains(&(LogType::Lex, "skip character:' '".to_string()))); - | - 77 | let mut row_starts_from_0 = false; - 78 | for (_, m) in &messages { - 79 | if m.contains("row:0") { - 80 | row_starts_from_0 = true; - 81 | break; - 82 | } - 83 | } - 84 | assert!(row_starts_from_0); - 85 | } - | - 86 | #[test] - 87 | fn test_parsing_with_debug_graph_enabled() { - 88 | use std::io::{BufRead, BufReader, Seek}; - | - 89 | let has_zero_indexed_row = |s: &str| s.contains("position: 0,"); - | - 90 | let mut parser = Parser::new(); - 91 | parser.set_language(&get_language("javascript")).unwrap(); - | - 92 | let mut debug_graph_file = tempfile::tempfile().unwrap(); - 93 | parser.print_dot_graphs(&debug_graph_file); - 94 | parser.parse("const zero = 0", None).unwrap(); - | - 95 | debug_graph_file.rewind().unwrap(); - 96 | let log_reader = BufReader::new(debug_graph_file) - 97 | .lines() - 98 | .map(|l| l.expect("Failed to read line from graph log")); - 99 | for line in log_reader { - 100 | assert!( - 101 | !has_zero_indexed_row(&line), - 102 | "Graph log output includes zero-indexed row: {line}", - 103 | ); - 104 | } - 105 | } - | - 106 | #[test] - 107 | fn test_parsing_with_custom_utf8_input() { - 108 | let mut parser = Parser::new(); - 109 | parser.set_language(&get_language("rust")).unwrap(); - | - 110 | let lines = &["pub fn foo() {", " 1", "}"]; - | - 111 | let tree = parser - 112 | .parse_with_options( - 113 | &mut |_, position| { - 114 | let row = position.row; - 115 | let column = position.column; - 116 | if row < lines.len() { - 117 | if column < lines[row].len() { - 118 | &lines[row].as_bytes()[column..] - 119 | } else { - 120 | b"\n" - 121 | } - 122 | } else { - 123 | &[] - 124 | } - 125 | }, - 126 | None, - 127 | None, - 128 | ) - 129 | .unwrap(); - | - 130 | let root = tree.root_node(); - 131 | assert_eq!( - 132 | root.to_sexp(), - 133 | concat!( - 134 | "(source_file ", - 135 | "(function_item ", - 136 | "(visibility_modifier) ", - 137 | "name: (identifier) ", - 138 | "parameters: (parameters) ", - 139 | "body: (block (integer_literal))))" - 140 | ) - 141 | ); - 142 | assert_eq!(root.kind(), "source_file"); - 143 | assert!(!root.has_error()); - 144 | assert_eq!(root.child(0).unwrap().kind(), "function_item"); - 145 | } - | - 146 | #[test] - 147 | fn test_parsing_with_custom_utf16le_input() { - 148 | let mut parser = Parser::new(); - 149 | parser.set_language(&get_language("rust")).unwrap(); - | - 150 | let lines = ["pub fn foo() {", " 1", "}"] - 151 | .iter() - 152 | .map(|s| s.encode_utf16().map(u16::to_le).collect::>()) - 153 | .collect::>(); - | - 154 | let newline = [('\n' as u16).to_le()]; - | - 155 | let tree = parser - 156 | .parse_utf16_le_with_options( - 157 | &mut |_, position| { - 158 | let row = position.row; - 159 | let column = position.column; - 160 | if row < lines.len() { - 161 | if column < lines[row].len() { - 162 | &lines[row][column..] - 163 | } else { - 164 | &newline - 165 | } - 166 | } else { - 167 | &[] - 168 | } - 169 | }, - 170 | None, - 171 | None, - 172 | ) - 173 | .unwrap(); - | - 174 | let root = tree.root_node(); - 175 | assert_eq!( - 176 | root.to_sexp(), - 177 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" - 178 | ); - 179 | assert_eq!(root.kind(), "source_file"); - 180 | assert!(!root.has_error()); - 181 | assert_eq!(root.child(0).unwrap().kind(), "function_item"); - 182 | } - | - 183 | #[test] - 184 | fn test_parsing_with_custom_utf16_be_input() { - 185 | let mut parser = Parser::new(); - 186 | parser.set_language(&get_language("rust")).unwrap(); - | - 187 | let lines: Vec> = ["pub fn foo() {", " 1", "}"] - 188 | .iter() - 189 | .map(|s| s.encode_utf16().collect::>()) - 190 | .map(|v| v.iter().map(|u| u.to_be()).collect()) - 191 | .collect(); - | - 192 | let newline = [('\n' as u16).to_be()]; - | - 193 | let tree = parser - 194 | .parse_utf16_be_with_options( - 195 | &mut |_, position| { - 196 | let row = position.row; - 197 | let column = position.column; - 198 | if row < lines.len() { - 199 | if column < lines[row].len() { - 200 | &lines[row][column..] - 201 | } else { - 202 | &newline - 203 | } - 204 | } else { - 205 | &[] - 206 | } - 207 | }, - 208 | None, - 209 | None, - 210 | ) - 211 | .unwrap(); - 212 | let root = tree.root_node(); - 213 | assert_eq!( - 214 | root.to_sexp(), - 215 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" - 216 | ); - 217 | assert_eq!(root.kind(), "source_file"); - 218 | assert!(!root.has_error()); - 219 | assert_eq!(root.child(0).unwrap().kind(), "function_item"); - 220 | } - | - 221 | #[test] - 222 | fn test_parsing_with_callback_returning_owned_strings() { - 223 | let mut parser = Parser::new(); - 224 | parser.set_language(&get_language("rust")).unwrap(); - | - 225 | let text = b"pub fn foo() { 1 }"; - | - 226 | let tree = parser - 227 | .parse_with_options( - 228 | &mut |i, _| String::from_utf8(text[i..].to_vec()).unwrap(), - 229 | None, - 230 | None, - 231 | ) - 232 | .unwrap(); - | - 233 | let root = tree.root_node(); - 234 | assert_eq!( - 235 | root.to_sexp(), - 236 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" - 237 | ); - 238 | } - | - 239 | #[test] - 240 | fn test_parsing_text_with_byte_order_mark() { - 241 | let mut parser = Parser::new(); - 242 | parser.set_language(&get_language("rust")).unwrap(); - | - 243 | // Parse UTF16 text with a BOM - 244 | let tree = parser - 245 | .parse_utf16_le( - 246 | "\u{FEFF}fn a() {}" - 247 | .encode_utf16() - 248 | .map(u16::to_le) - 249 | .collect::>(), - 250 | None, - 251 | ) - 252 | .unwrap(); - 253 | assert_eq!( - 254 | tree.root_node().to_sexp(), - 255 | "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" - 256 | ); - 257 | assert_eq!(tree.root_node().start_byte(), 2); - | - 258 | // Parse UTF8 text with a BOM - 259 | let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap(); - 260 | assert_eq!( - 261 | tree.root_node().to_sexp(), - 262 | "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" - 263 | ); - 264 | assert_eq!(tree.root_node().start_byte(), 3); - | - 265 | // Edit the text, inserting a character before the BOM. The BOM is now an error. - 266 | tree.edit(&InputEdit { - 267 | start_byte: 0, - 268 | old_end_byte: 0, - 269 | new_end_byte: 1, - 270 | start_position: Point::new(0, 0), - 271 | old_end_position: Point::new(0, 0), - 272 | new_end_position: Point::new(0, 1), - 273 | }); - 274 | let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap(); - 275 | assert_eq!( - 276 | tree.root_node().to_sexp(), - 277 | "(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))" - 278 | ); - 279 | assert_eq!(tree.root_node().start_byte(), 1); - | - 280 | // Edit the text again, putting the BOM back at the beginning. - 281 | tree.edit(&InputEdit { - 282 | start_byte: 0, - 283 | old_end_byte: 1, - 284 | new_end_byte: 0, - 285 | start_position: Point::new(0, 0), - 286 | old_end_position: Point::new(0, 1), - 287 | new_end_position: Point::new(0, 0), - 288 | }); - 289 | let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap(); - 290 | assert_eq!( - 291 | tree.root_node().to_sexp(), - 292 | "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))" - 293 | ); - 294 | assert_eq!(tree.root_node().start_byte(), 3); - 295 | } - | - 296 | #[test] - 297 | fn test_parsing_invalid_chars_at_eof() { - 298 | let mut parser = Parser::new(); - 299 | parser.set_language(&get_language("json")).unwrap(); - 300 | let tree = parser.parse(b"\xdf", None).unwrap(); - 301 | assert_eq!( - 302 | tree.root_node().to_sexp(), - 303 | "(document (ERROR (UNEXPECTED INVALID)))" - 304 | ); - 305 | } - | - 306 | #[test] - 307 | fn test_parsing_unexpected_null_characters_within_source() { - 308 | let mut parser = Parser::new(); - 309 | parser.set_language(&get_language("javascript")).unwrap(); - 310 | let tree = parser.parse(b"var \0 something;", None).unwrap(); - 311 | assert_eq!( - 312 | tree.root_node().to_sexp(), - 313 | "(program (variable_declaration (ERROR (UNEXPECTED '\\0')) (variable_declarator name: (identifier))))" - 314 | ); - 315 | } - | - 316 | #[test] - 317 | fn test_parsing_ends_when_input_callback_returns_empty() { - 318 | let mut parser = Parser::new(); - 319 | parser.set_language(&get_language("javascript")).unwrap(); - 320 | let mut i = 0; - 321 | let source = b"abcdefghijklmnoqrs"; - 322 | let tree = parser - 323 | .parse_with_options( - 324 | &mut |offset, _| { - 325 | i += 1; - 326 | if offset >= 6 { - 327 | b"" - 328 | } else { - 329 | &source[offset..usize::min(source.len(), offset + 3)] - 330 | } - 331 | }, - 332 | None, - 333 | None, - 334 | ) - 335 | .unwrap(); - 336 | assert_eq!(tree.root_node().end_byte(), 6); - 337 | } - | - 338 | // Incremental parsing - | - 339 | #[test] - 340 | fn test_parsing_after_editing_beginning_of_code() { - 341 | let mut parser = Parser::new(); - 342 | parser.set_language(&get_language("javascript")).unwrap(); - | - 343 | let mut code = b"123 + 456 * (10 + x);".to_vec(); - 344 | let mut tree = parser.parse(&code, None).unwrap(); - 345 | assert_eq!( - 346 | tree.root_node().to_sexp(), - 347 | concat!( - 348 | "(program (expression_statement (binary_expression ", - 349 | "left: (number) ", - 350 | "right: (binary_expression left: (number) right: (parenthesized_expression ", - 351 | "(binary_expression left: (number) right: (identifier)))))))", - 352 | ) - 353 | ); - | - 354 | perform_edit( - 355 | &mut tree, - 356 | &mut code, - 357 | &Edit { - 358 | position: 3, - 359 | deleted_length: 0, - 360 | inserted_text: b" || 5".to_vec(), - 361 | }, - 362 | ) - 363 | .unwrap(); - | - 364 | let mut recorder = ReadRecorder::new(&code); - 365 | let tree = parser - 366 | .parse_with_options(&mut |i, _| recorder.read(i), Some(&tree), None) - 367 | .unwrap(); - 368 | assert_eq!( - 369 | tree.root_node().to_sexp(), - 370 | concat!( - 371 | "(program (expression_statement (binary_expression ", - 372 | "left: (number) ", - 373 | "right: (binary_expression ", - 374 | "left: (number) ", - 375 | "right: (binary_expression ", - 376 | "left: (number) ", - 377 | "right: (parenthesized_expression (binary_expression left: (number) right: (identifier))))))))", - 378 | ) - 379 | ); - | - 380 | assert_eq!(recorder.strings_read(), vec!["123 || 5 "]); - 381 | } - | - 382 | #[test] - 383 | fn test_parsing_after_editing_end_of_code() { - 384 | let mut parser = Parser::new(); - 385 | parser.set_language(&get_language("javascript")).unwrap(); - | - 386 | let mut code = b"x * (100 + abc);".to_vec(); - 387 | let mut tree = parser.parse(&code, None).unwrap(); - 388 | assert_eq!( - 389 | tree.root_node().to_sexp(), - 390 | concat!( - 391 | "(program (expression_statement (binary_expression ", - 392 | "left: (identifier) ", - 393 | "right: (parenthesized_expression (binary_expression left: (number) right: (identifier))))))", - 394 | ) - 395 | ); - | - 396 | let position = code.len() - 2; - 397 | perform_edit( - 398 | &mut tree, - 399 | &mut code, - 400 | &Edit { - 401 | position, - 402 | deleted_length: 0, - 403 | inserted_text: b".d".to_vec(), - 404 | }, - 405 | ) - 406 | .unwrap(); - | - 407 | let mut recorder = ReadRecorder::new(&code); - 408 | let tree = parser - 409 | .parse_with_options(&mut |i, _| recorder.read(i), Some(&tree), None) - 410 | .unwrap(); - 411 | assert_eq!( - 412 | tree.root_node().to_sexp(), - 413 | concat!( - 414 | "(program (expression_statement (binary_expression ", - 415 | "left: (identifier) ", - 416 | "right: (parenthesized_expression (binary_expression ", - 417 | "left: (number) ", - 418 | "right: (member_expression ", - 419 | "object: (identifier) ", - 420 | "property: (property_identifier)))))))" - 421 | ) - 422 | ); - | - 423 | assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]); - 424 | } - | - 425 | #[test] - 426 | fn test_parsing_empty_file_with_reused_tree() { - 427 | let mut parser = Parser::new(); - 428 | parser.set_language(&get_language("rust")).unwrap(); - | - 429 | let tree = parser.parse("", None); - 430 | parser.parse("", tree.as_ref()); - | - 431 | let tree = parser.parse("\n ", None); - 432 | parser.parse("\n ", tree.as_ref()); - 433 | } - | - 434 | #[test] - 435 | fn test_parsing_after_editing_tree_that_depends_on_column_values() { - 436 | let mut parser = Parser::new(); - 437 | parser - 438 | .set_language(&get_test_fixture_language("uses_current_column")) - 439 | .unwrap(); - | - 440 | let mut code = b" - 441 | a = b - 442 | c = do d - 443 | e + f - 444 | g - 445 | h + i - 446 | " - 447 | .to_vec(); - 448 | let mut tree = parser.parse(&code, None).unwrap(); - 449 | assert_eq!( - 450 | tree.root_node().to_sexp(), - 451 | concat!( - 452 | "(block ", - 453 | "(binary_expression (identifier) (identifier)) ", - 454 | "(binary_expression (identifier) (do_expression (block (identifier) (binary_expression (identifier) (identifier)) (identifier)))) ", - 455 | "(binary_expression (identifier) (identifier)))", - 456 | ) - 457 | ); - | - 458 | perform_edit( - 459 | &mut tree, - 460 | &mut code, - 461 | &Edit { - 462 | position: 8, - 463 | deleted_length: 0, - 464 | inserted_text: b"1234".to_vec(), - 465 | }, - 466 | ) - 467 | .unwrap(); - | - 468 | assert_eq!( - 469 | code, - 470 | b" - 471 | a = b - 472 | c1234 = do d - 473 | e + f - 474 | g - 475 | h + i - 476 | " - 477 | ); - | - 478 | let mut recorder = ReadRecorder::new(&code); - 479 | let tree = parser - 480 | .parse_with_options(&mut |i, _| recorder.read(i), Some(&tree), None) - 481 | .unwrap(); - | - 482 | assert_eq!( - 483 | tree.root_node().to_sexp(), - 484 | concat!( - 485 | "(block ", - 486 | "(binary_expression (identifier) (identifier)) ", - 487 | "(binary_expression (identifier) (do_expression (block (identifier)))) ", - 488 | "(binary_expression (identifier) (identifier)) ", - 489 | "(identifier) ", - 490 | "(binary_expression (identifier) (identifier)))", - 491 | ) - 492 | ); - | - 493 | assert_eq!( - 494 | recorder.strings_read(), - 495 | vec!["\nc1234 = do d\n e + f\n g\n"] - 496 | ); - 497 | } - | - 498 | #[test] - 499 | fn test_parsing_after_editing_tree_that_depends_on_column_position() { - 500 | let mut parser = Parser::new(); - 501 | parser - 502 | .set_language(&get_test_fixture_language("depends_on_column")) - 503 | .unwrap(); - | - 504 | let mut code = b"\n x".to_vec(); - 505 | let mut tree = parser.parse(&code, None).unwrap(); - 506 | assert_eq!(tree.root_node().to_sexp(), "(x_is_at (odd_column))"); - | - 507 | perform_edit( - 508 | &mut tree, - 509 | &mut code, - 510 | &Edit { - 511 | position: 1, - 512 | deleted_length: 0, - 513 | inserted_text: b" ".to_vec(), - 514 | }, - 515 | ) - 516 | .unwrap(); - | - 517 | assert_eq!(code, b"\n x"); - | - 518 | let mut recorder = ReadRecorder::new(&code); - 519 | let mut tree = parser - 520 | .parse_with_options(&mut |i, _| recorder.read(i), Some(&tree), None) - 521 | .unwrap(); - | - 522 | assert_eq!(tree.root_node().to_sexp(), "(x_is_at (even_column))",); - 523 | assert_eq!(recorder.strings_read(), vec!["\n x"]); - | - 524 | perform_edit( - 525 | &mut tree, - 526 | &mut code, - 527 | &Edit { - 528 | position: 1, - 529 | deleted_length: 0, - 530 | inserted_text: b"\n".to_vec(), - 531 | }, - 532 | ) - 533 | .unwrap(); - | - 534 | assert_eq!(code, b"\n\n x"); - | - 535 | let mut recorder = ReadRecorder::new(&code); - 536 | let tree = parser - 537 | .parse_with_options(&mut |i, _| recorder.read(i), Some(&tree), None) - 538 | .unwrap(); - | - 539 | assert_eq!(tree.root_node().to_sexp(), "(x_is_at (even_column))",); - 540 | assert_eq!(recorder.strings_read(), vec!["\n\n x"]); - 541 | } - | - 542 | #[test] - 543 | fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { - 544 | let mut parser = Parser::new(); - 545 | parser.set_language(&get_language("python")).unwrap(); - | - 546 | let mut source = b"a = b, 'c, d'".to_vec(); - 547 | let tree = parser.parse(&source, None).unwrap(); - 548 | assert_eq!( - 549 | tree.root_node().to_sexp(), - 550 | "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))" - 551 | ); - | - 552 | // Delete a suffix of the source code, starting in the middle of the string - 553 | // literal, after some whitespace. With this deletion, the remaining string - 554 | // content: "c, " looks like two valid python tokens: an identifier and a comma. - 555 | // When this edit is undone, in order correctly recover the original tree, the - 556 | // parser needs to remember that before matching the `c` as an identifier, it - 557 | // lookahead ahead several bytes, trying to find the closing quotation mark in - 558 | // order to match the "string content" node. - 559 | let edit_ix = std::str::from_utf8(&source).unwrap().find("d'").unwrap(); - 560 | let edit = Edit { - 561 | position: edit_ix, - 562 | deleted_length: source.len() - edit_ix, - 563 | inserted_text: Vec::new(), - 564 | }; - 565 | let undo = invert_edit(&source, &edit); - | - 566 | let mut tree2 = tree.clone(); - 567 | perform_edit(&mut tree2, &mut source, &edit).unwrap(); - 568 | tree2 = parser.parse(&source, Some(&tree2)).unwrap(); - 569 | assert!(tree2.root_node().has_error()); - | - 570 | let mut tree3 = tree2.clone(); - 571 | perform_edit(&mut tree3, &mut source, &undo).unwrap(); - 572 | tree3 = parser.parse(&source, Some(&tree3)).unwrap(); - 573 | assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),); - 574 | } - | - 575 | // Thread safety - | - 576 | #[test] - 577 | fn test_parsing_on_multiple_threads() { - 578 | // Parse this source file so that each thread has a non-trivial amount of - 579 | // work to do. - 580 | let this_file_source = include_str!("parser_test.rs"); - | - 581 | let mut parser = Parser::new(); - 582 | parser.set_language(&get_language("rust")).unwrap(); - 583 | let tree = parser.parse(this_file_source, None).unwrap(); - | - 584 | let mut parse_threads = Vec::new(); - 585 | for thread_id in 1..5 { - 586 | let mut tree_clone = tree.clone(); - 587 | parse_threads.push(thread::spawn(move || { - 588 | // For each thread, prepend a different number of declarations to the - 589 | // source code. - 590 | let mut prepend_line_count = 0; - 591 | let mut prepended_source = String::new(); - 592 | for _ in 0..thread_id { - 593 | prepend_line_count += 2; - 594 | prepended_source += "struct X {}\n\n"; - 595 | } - | - 596 | tree_clone.edit(&InputEdit { - 597 | start_byte: 0, - 598 | old_end_byte: 0, - 599 | new_end_byte: prepended_source.len(), - 600 | start_position: Point::new(0, 0), - 601 | old_end_position: Point::new(0, 0), - 602 | new_end_position: Point::new(prepend_line_count, 0), - 603 | }); - 604 | prepended_source += this_file_source; - | - 605 | // Reparse using the old tree as a starting point. - 606 | let mut parser = Parser::new(); - 607 | parser.set_language(&get_language("rust")).unwrap(); - 608 | parser.parse(&prepended_source, Some(&tree_clone)).unwrap() - 609 | })); - 610 | } - | - 611 | // Check that the trees have the expected relationship to one another. - 612 | let trees = parse_threads - 613 | .into_iter() - 614 | .map(|thread| thread.join().unwrap()); - 615 | let child_count_differences = trees - 616 | .map(|t| t.root_node().child_count() - tree.root_node().child_count()) - 617 | .collect::>(); - | - 618 | assert_eq!(child_count_differences, &[1, 2, 3, 4]); - 619 | } - | - 620 | #[test] - 621 | fn test_parsing_cancelled_by_another_thread() { - 622 | let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0)); - 623 | let flag = cancellation_flag.clone(); - 624 | let callback = &mut |_: &ParseState| { - 625 | if cancellation_flag.load(Ordering::SeqCst) != 0 { - 626 | ControlFlow::Break(()) - 627 | } else { - 628 | ControlFlow::Continue(()) - 629 | } - 630 | }; - | - 631 | let mut parser = Parser::new(); - 632 | parser.set_language(&get_language("javascript")).unwrap(); - | - 633 | // Long input - parsing succeeds - 634 | let tree = parser.parse_with_options( - 635 | &mut |offset, _| { - 636 | if offset == 0 { - 637 | " [".as_bytes() - 638 | } else if offset >= 20000 { - 639 | "".as_bytes() - 640 | } else { - 641 | "0,".as_bytes() - 642 | } - 643 | }, - 644 | None, - 645 | Some(ParseOptions::new().progress_callback(callback)), - 646 | ); - 647 | assert!(tree.is_some()); - | - 648 | let cancel_thread = thread::spawn(move || { - 649 | thread::sleep(time::Duration::from_millis(100)); - 650 | flag.store(1, Ordering::SeqCst); - 651 | }); - | - 652 | // Infinite input - 653 | let tree = parser.parse_with_options( - 654 | &mut |offset, _| { - 655 | thread::yield_now(); - 656 | thread::sleep(time::Duration::from_millis(10)); - 657 | if offset == 0 { - 658 | b" [" - 659 | } else { - 660 | b"0," - 661 | } - 662 | }, - 663 | None, - 664 | Some(ParseOptions::new().progress_callback(callback)), - 665 | ); - | - 666 | // Parsing returns None because it was cancelled. - 667 | cancel_thread.join().unwrap(); - 668 | assert!(tree.is_none()); - 669 | } - | - 670 | // Timeouts - | - 671 | #[test] - 672 | #[retry(10)] - 673 | fn test_parsing_with_a_timeout() { - 674 | let mut parser = Parser::new(); - 675 | parser.set_language(&get_language("json")).unwrap(); - | - 676 | // Parse an infinitely-long array, but pause after 1ms of processing. - 677 | let start_time = time::Instant::now(); - 678 | let tree = parser.parse_with_options( - 679 | &mut |offset, _| { - 680 | if offset == 0 { - 681 | b" [" - 682 | } else { - 683 | b",0" - 684 | } - 685 | }, - 686 | None, - 687 | Some(ParseOptions::new().progress_callback(&mut |_| { - 688 | if start_time.elapsed().as_micros() > 1000 { - 689 | ControlFlow::Break(()) - 690 | } else { - 691 | ControlFlow::Continue(()) - 692 | } - 693 | })), - 694 | ); - 695 | assert!(tree.is_none()); - 696 | assert!(start_time.elapsed().as_micros() < 2000); - | - 697 | // Continue parsing, but pause after 1 ms of processing. - 698 | let start_time = time::Instant::now(); - 699 | let tree = parser.parse_with_options( - 700 | &mut |offset, _| { - 701 | if offset == 0 { - 702 | b" [" - 703 | } else { - 704 | b",0" - 705 | } - 706 | }, - 707 | None, - 708 | Some(ParseOptions::new().progress_callback(&mut |_| { - 709 | if start_time.elapsed().as_micros() > 5000 { - 710 | ControlFlow::Break(()) - 711 | } else { - 712 | ControlFlow::Continue(()) - 713 | } - 714 | })), - 715 | ); - 716 | assert!(tree.is_none()); - 717 | assert!(start_time.elapsed().as_micros() > 100); - 718 | assert!(start_time.elapsed().as_micros() < 10000); - | - 719 | // Finish parsing - 720 | let tree = parser - 721 | .parse_with_options( - 722 | &mut |offset, _| match offset { - 723 | 5001.. => "".as_bytes(), - 724 | 5000 => "]".as_bytes(), - 725 | _ => ",0".as_bytes(), - 726 | }, - 727 | None, - 728 | None, - 729 | ) - 730 | .unwrap(); - 731 | assert_eq!(tree.root_node().child(0).unwrap().kind(), "array"); - 732 | } - | - 733 | #[test] - 734 | #[retry(10)] - 735 | fn test_parsing_with_a_timeout_and_a_reset() { - 736 | let mut parser = Parser::new(); - 737 | parser.set_language(&get_language("json")).unwrap(); - | - 738 | let start_time = time::Instant::now(); - 739 | let code = "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]"; - 740 | let tree = parser.parse_with_options( - 741 | &mut |offset, _| { - 742 | if offset >= code.len() { - 743 | &[] - 744 | } else { - 745 | &code.as_bytes()[offset..] - 746 | } - 747 | }, - 748 | None, - 749 | Some(ParseOptions::new().progress_callback(&mut |_| { - 750 | if start_time.elapsed().as_micros() > 5 { - 751 | ControlFlow::Break(()) - 752 | } else { - 753 | ControlFlow::Continue(()) - 754 | } - 755 | })), - 756 | ); - 757 | assert!(tree.is_none()); - | - 758 | // Without calling reset, the parser continues from where it left off, so - 759 | // it does not see the changes to the beginning of the source code. - 760 | let tree = parser.parse( - 761 | "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", - 762 | None, - 763 | ).unwrap(); - 764 | assert_eq!( - 765 | tree.root_node() - 766 | .named_child(0) - 767 | .unwrap() - 768 | .named_child(0) - 769 | .unwrap() - 770 | .kind(), - 771 | "string" - 772 | ); - | - 773 | let start_time = time::Instant::now(); - 774 | let code = "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]"; - 775 | let tree = parser.parse_with_options( - 776 | &mut |offset, _| { - 777 | if offset >= code.len() { - 778 | &[] - 779 | } else { - 780 | &code.as_bytes()[offset..] - 781 | } - 782 | }, - 783 | None, - 784 | Some(ParseOptions::new().progress_callback(&mut |_| { - 785 | if start_time.elapsed().as_micros() > 5 { - 786 | ControlFlow::Break(()) - 787 | } else { - 788 | ControlFlow::Continue(()) - 789 | } - 790 | })), - 791 | ); - 792 | assert!(tree.is_none()); - | - 793 | // By calling reset, we force the parser to start over from scratch so - 794 | // that it sees the changes to the beginning of the source code. - 795 | parser.reset(); - 796 | let tree = parser.parse( - 797 | "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", - 798 | None, - 799 | ).unwrap(); - 800 | assert_eq!( - 801 | tree.root_node() - 802 | .named_child(0) - 803 | .unwrap() - 804 | .named_child(0) - 805 | .unwrap() - 806 | .kind(), - 807 | "null" - 808 | ); - 809 | } - | - 810 | #[test] - 811 | #[retry(10)] - 812 | fn test_parsing_with_a_timeout_and_implicit_reset() { - 813 | allocations::record(|| { - 814 | let mut parser = Parser::new(); - 815 | parser.set_language(&get_language("javascript")).unwrap(); - | - 816 | let code = "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]"; - 817 | let start_time = time::Instant::now(); - 818 | let tree = parser.parse_with_options( - 819 | &mut |offset, _| { - 820 | if offset >= code.len() { - 821 | &[] - 822 | } else { - 823 | &code.as_bytes()[offset..] - 824 | } - 825 | }, - 826 | None, - 827 | Some(ParseOptions::new().progress_callback(&mut |_| { - 828 | if start_time.elapsed().as_micros() > 5 { - 829 | ControlFlow::Break(()) - 830 | } else { - 831 | ControlFlow::Continue(()) - 832 | } - 833 | })), - 834 | ); - 835 | assert!(tree.is_none()); - | - 836 | // Changing the parser's language implicitly resets, discarding - 837 | // the previous partial parse. - 838 | parser.set_language(&get_language("json")).unwrap(); - 839 | let tree = parser.parse( - 840 | "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", - 841 | None, - 842 | ).unwrap(); - 843 | assert_eq!( - 844 | tree.root_node() - 845 | .named_child(0) - 846 | .unwrap() - 847 | .named_child(0) - 848 | .unwrap() - 849 | .kind(), - 850 | "null" - 851 | ); - 852 | }); - 853 | } - | - 854 | #[test] - 855 | #[retry(10)] - 856 | fn test_parsing_with_timeout_and_no_completion() { - 857 | allocations::record(|| { - 858 | let mut parser = Parser::new(); - 859 | parser.set_language(&get_language("javascript")).unwrap(); - | - 860 | let code = "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]"; - 861 | let start_time = time::Instant::now(); - 862 | let tree = parser.parse_with_options( - 863 | &mut |offset, _| { - 864 | if offset >= code.len() { - 865 | &[] - 866 | } else { - 867 | &code.as_bytes()[offset..] - 868 | } - 869 | }, - 870 | None, - 871 | Some(ParseOptions::new().progress_callback(&mut |_| { - 872 | if start_time.elapsed().as_micros() > 5 { - 873 | ControlFlow::Break(()) - 874 | } else { - 875 | ControlFlow::Continue(()) - 876 | } - 877 | })), - 878 | ); - 879 | assert!(tree.is_none()); - | - 880 | // drop the parser when it has an unfinished parse - 881 | }); - 882 | } - | - 883 | #[test] - 884 | fn test_parsing_with_timeout_during_balancing() { - 885 | allocations::record(|| { - 886 | let mut parser = Parser::new(); - 887 | parser.set_language(&get_language("javascript")).unwrap(); - | - 888 | let function_count = 100; - | - 889 | let code = "function() {}\n".repeat(function_count); - 890 | let mut current_byte_offset = 0; - 891 | let mut in_balancing = false; - 892 | let tree = parser.parse_with_options( - 893 | &mut |offset, _| { - 894 | if offset >= code.len() { - 895 | &[] - 896 | } else { - 897 | &code.as_bytes()[offset..] - 898 | } - 899 | }, - 900 | None, - 901 | Some(ParseOptions::new().progress_callback(&mut |state| { - 902 | // The parser will call the progress_callback during parsing, and at the very end - 903 | // during tree-balancing. For very large trees, this balancing act can take quite - 904 | // some time, so we want to verify that timing out during this operation is - 905 | // possible. - 906 | // - 907 | // We verify this by checking the current byte offset, as this number will *not* be - 908 | // updated during tree balancing. If we see the same offset twice, we know that we - 909 | // are in the balancing phase. - 910 | if state.current_byte_offset() != current_byte_offset { - 911 | current_byte_offset = state.current_byte_offset(); - 912 | ControlFlow::Continue(()) - 913 | } else { - 914 | in_balancing = true; - 915 | ControlFlow::Break(()) - 916 | } - 917 | })), - 918 | ); - | - 919 | assert!(tree.is_none()); - 920 | assert!(in_balancing); - | - 921 | // This should not cause an assertion failure. - 922 | parser.reset(); - 923 | let tree = parser.parse_with_options( - 924 | &mut |offset, _| { - 925 | if offset >= code.len() { - 926 | &[] - 927 | } else { - 928 | &code.as_bytes()[offset..] - 929 | } - 930 | }, - 931 | None, - 932 | Some(ParseOptions::new().progress_callback(&mut |state| { - 933 | if state.current_byte_offset() != current_byte_offset { - 934 | current_byte_offset = state.current_byte_offset(); - 935 | ControlFlow::Continue(()) - 936 | } else { - 937 | in_balancing = true; - 938 | ControlFlow::Break(()) - 939 | } - 940 | })), - 941 | ); - | - 942 | assert!(tree.is_none()); - 943 | assert!(in_balancing); - | - 944 | // If we resume parsing (implying we didn't call `parser.reset()`), we should be able to - 945 | // finish parsing the tree, continuing from where we left off. - 946 | let tree = parser - 947 | .parse_with_options( - 948 | &mut |offset, _| { - 949 | if offset >= code.len() { - 950 | &[] - 951 | } else { - 952 | &code.as_bytes()[offset..] - 953 | } - 954 | }, - 955 | None, - 956 | Some(ParseOptions::new().progress_callback(&mut |state| { - 957 | // Because we've already finished parsing, we should only be resuming the - 958 | // balancing phase. - 959 | assert!(state.current_byte_offset() == current_byte_offset); - 960 | ControlFlow::Continue(()) - 961 | })), - 962 | ) - 963 | .unwrap(); - 964 | assert!(!tree.root_node().has_error()); - 965 | assert_eq!(tree.root_node().child_count(), function_count); - 966 | }); - 967 | } - | - 968 | #[test] - 969 | fn test_parsing_with_timeout_when_error_detected() { - 970 | let mut parser = Parser::new(); - 971 | parser.set_language(&get_language("json")).unwrap(); - | - 972 | // Parse an infinitely-long array, but insert an error after 1000 characters. - 973 | let mut offset = 0; - 974 | let erroneous_code = "!,"; - 975 | let tree = parser.parse_with_options( - 976 | &mut |i, _| match i { - 977 | 0 => "[", - 978 | 1..=1000 => "0,", - 979 | _ => erroneous_code, - 980 | }, - 981 | None, - 982 | Some(ParseOptions::new().progress_callback(&mut |state| { - 983 | offset = state.current_byte_offset(); - 984 | if state.has_error() { - 985 | ControlFlow::Break(()) - 986 | } else { - 987 | ControlFlow::Continue(()) - 988 | } - 989 | })), - 990 | ); - | - 991 | // The callback is called at the end of parsing, however, what we're asserting here is that - 992 | // parsing ends immediately as the error is detected. This is verified by checking the offset - 993 | // of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or - 994 | // 1000 + the length of the erroneous code. - 995 | assert_eq!(offset, 1000 + erroneous_code.len()); - 996 | assert!(tree.is_none()); - 997 | } - | - 998 | // Included Ranges - | - 999 | #[test] -1000 | fn test_parsing_with_one_included_range() { -1001 | let source_code = "hi"; - | -1002 | let mut parser = Parser::new(); -1003 | parser.set_language(&get_language("html")).unwrap(); -1004 | let html_tree = parser.parse(source_code, None).unwrap(); -1005 | let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap(); -1006 | assert_eq!(script_content_node.kind(), "raw_text"); - | -1007 | assert_eq!( -1008 | parser.included_ranges(), -1009 | &[Range { -1010 | start_byte: 0, -1011 | end_byte: u32::MAX as usize, -1012 | start_point: Point::new(0, 0), -1013 | end_point: Point::new(u32::MAX as usize, u32::MAX as usize), -1014 | }] -1015 | ); -1016 | parser -1017 | .set_included_ranges(&[script_content_node.range()]) -1018 | .unwrap(); -1019 | assert_eq!(parser.included_ranges(), &[script_content_node.range()]); -1020 | parser.set_language(&get_language("javascript")).unwrap(); -1021 | let js_tree = parser.parse(source_code, None).unwrap(); - | -1022 | assert_eq!( -1023 | js_tree.root_node().to_sexp(), -1024 | concat!( -1025 | "(program (expression_statement (call_expression ", -1026 | "function: (member_expression object: (identifier) property: (property_identifier)) ", -1027 | "arguments: (arguments (string (string_fragment))))))", -1028 | ) -1029 | ); -1030 | assert_eq!( -1031 | js_tree.root_node().start_position(), -1032 | Point::new(0, source_code.find("console").unwrap()) -1033 | ); -1034 | assert_eq!(js_tree.included_ranges(), &[script_content_node.range()]); -1035 | } - | -1036 | #[test] -1037 | fn test_parsing_with_multiple_included_ranges() { -1038 | let source_code = "html `
Hello, ${name.toUpperCase()}, it's ${now()}.
`"; - | -1039 | let mut parser = Parser::new(); -1040 | parser.set_language(&get_language("javascript")).unwrap(); -1041 | let js_tree = parser.parse(source_code, None).unwrap(); -1042 | let template_string_node = js_tree -1043 | .root_node() -1044 | .descendant_for_byte_range( -1045 | source_code.find("`<").unwrap(), -1046 | source_code.find(">`").unwrap(), -1047 | ) -1048 | .unwrap(); -1049 | assert_eq!(template_string_node.kind(), "template_string"); - | -1050 | let open_quote_node = template_string_node.child(0).unwrap(); -1051 | let interpolation_node1 = template_string_node.child(2).unwrap(); -1052 | let interpolation_node2 = template_string_node.child(4).unwrap(); -1053 | let close_quote_node = template_string_node.child(6).unwrap(); - | -1054 | parser.set_language(&get_language("html")).unwrap(); -1055 | let html_ranges = &[ -1056 | Range { -1057 | start_byte: open_quote_node.end_byte(), -1058 | start_point: open_quote_node.end_position(), -1059 | end_byte: interpolation_node1.start_byte(), -1060 | end_point: interpolation_node1.start_position(), -1061 | }, -1062 | Range { -1063 | start_byte: interpolation_node1.end_byte(), -1064 | start_point: interpolation_node1.end_position(), -1065 | end_byte: interpolation_node2.start_byte(), -1066 | end_point: interpolation_node2.start_position(), -1067 | }, -1068 | Range { -1069 | start_byte: interpolation_node2.end_byte(), -1070 | start_point: interpolation_node2.end_position(), -1071 | end_byte: close_quote_node.start_byte(), -1072 | end_point: close_quote_node.start_position(), -1073 | }, -1074 | ]; -1075 | parser.set_included_ranges(html_ranges).unwrap(); -1076 | let html_tree = parser.parse(source_code, None).unwrap(); - | -1077 | assert_eq!( -1078 | html_tree.root_node().to_sexp(), -1079 | concat!( -1080 | "(document (element", -1081 | " (start_tag (tag_name))", -1082 | " (text)", -1083 | " (element (start_tag (tag_name)) (end_tag (tag_name)))", -1084 | " (text)", -1085 | " (end_tag (tag_name))))", -1086 | ) -1087 | ); -1088 | assert_eq!(html_tree.included_ranges(), html_ranges); - | -1089 | let div_element_node = html_tree.root_node().child(0).unwrap(); -1090 | let hello_text_node = div_element_node.child(1).unwrap(); -1091 | let b_element_node = div_element_node.child(2).unwrap(); -1092 | let b_start_tag_node = b_element_node.child(0).unwrap(); -1093 | let b_end_tag_node = b_element_node.child(1).unwrap(); - | -1094 | assert_eq!(hello_text_node.kind(), "text"); -1095 | assert_eq!( -1096 | hello_text_node.start_byte(), -1097 | source_code.find("Hello").unwrap() -1098 | ); -1099 | assert_eq!( -1100 | hello_text_node.end_byte(), -1101 | source_code.find(" ").unwrap() -1102 | ); - | -1103 | assert_eq!(b_start_tag_node.kind(), "start_tag"); -1104 | assert_eq!( -1105 | b_start_tag_node.start_byte(), -1106 | source_code.find("").unwrap() -1107 | ); -1108 | assert_eq!( -1109 | b_start_tag_node.end_byte(), -1110 | source_code.find("${now()}").unwrap() -1111 | ); - | -1112 | assert_eq!(b_end_tag_node.kind(), "end_tag"); -1113 | assert_eq!( -1114 | b_end_tag_node.start_byte(), -1115 | source_code.find("").unwrap() -1116 | ); -1117 | assert_eq!( -1118 | b_end_tag_node.end_byte(), -1119 | source_code.find(".").unwrap() -1120 | ); -1121 | } - | -1122 | #[test] -1123 | fn test_parsing_with_included_range_containing_mismatched_positions() { -1124 | let source_code = "
test
{_ignore_this_part_}"; - | -1125 | let mut parser = Parser::new(); -1126 | parser.set_language(&get_language("html")).unwrap(); - | -1127 | let end_byte = source_code.find("{_ignore_this_part_").unwrap(); - | -1128 | let range_to_parse = Range { -1129 | start_byte: 0, -1130 | start_point: Point { -1131 | row: 10, -1132 | column: 12, -1133 | }, -1134 | end_byte, -1135 | end_point: Point { -1136 | row: 10, -1137 | column: 12 + end_byte, -1138 | }, -1139 | }; - | -1140 | parser.set_included_ranges(&[range_to_parse]).unwrap(); - | -1141 | let html_tree = parser -1142 | .parse_with_options(&mut chunked_input(source_code, 3), None, None) -1143 | .unwrap(); - | -1144 | assert_eq!(html_tree.root_node().range(), range_to_parse); - | -1145 | assert_eq!( -1146 | html_tree.root_node().to_sexp(), -1147 | "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" -1148 | ); -1149 | } - | -1150 | #[test] -1151 | fn test_parsing_error_in_invalid_included_ranges() { -1152 | let mut parser = Parser::new(); - | -1153 | // Ranges are not ordered -1154 | let error = parser -1155 | .set_included_ranges(&[ -1156 | Range { -1157 | start_byte: 23, -1158 | end_byte: 29, -1159 | start_point: Point::new(0, 23), -1160 | end_point: Point::new(0, 29), -1161 | }, -1162 | Range { -1163 | start_byte: 0, -1164 | end_byte: 5, -1165 | start_point: Point::new(0, 0), -1166 | end_point: Point::new(0, 5), -1167 | }, -1168 | Range { -1169 | start_byte: 50, -1170 | end_byte: 60, -1171 | start_point: Point::new(0, 50), -1172 | end_point: Point::new(0, 60), -1173 | }, -1174 | ]) -1175 | .unwrap_err(); -1176 | assert_eq!(error, IncludedRangesError(1)); - | -1177 | // Range ends before it starts -1178 | let error = parser -1179 | .set_included_ranges(&[Range { -1180 | start_byte: 10, -1181 | end_byte: 5, -1182 | start_point: Point::new(0, 10), -1183 | end_point: Point::new(0, 5), -1184 | }]) -1185 | .unwrap_err(); -1186 | assert_eq!(error, IncludedRangesError(0)); -1187 | } - | -1188 | #[test] -1189 | fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() { -1190 | let source_code = ""; -1191 | let utf16_source_code = source_code -1192 | .encode_utf16() -1193 | .map(u16::to_le) -1194 | .collect::>(); - | -1195 | let start_byte = 2 * source_code.find("a.").unwrap(); -1196 | let end_byte = 2 * source_code.find("").unwrap(); - | -1197 | let mut parser = Parser::new(); -1198 | parser.set_language(&get_language("javascript")).unwrap(); -1199 | parser -1200 | .set_included_ranges(&[Range { -1201 | start_byte, -1202 | end_byte, -1203 | start_point: Point::new(0, start_byte), -1204 | end_point: Point::new(0, end_byte), -1205 | }]) -1206 | .unwrap(); -1207 | let tree = parser.parse_utf16_le(&utf16_source_code, None).unwrap(); -1208 | assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))"); -1209 | } - | -1210 | #[test] -1211 | fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() { -1212 | let source_code = "a <%= b() %> c <% d() %>"; -1213 | let range1_start_byte = source_code.find(" b() ").unwrap(); -1214 | let range1_end_byte = range1_start_byte + " b() ".len(); -1215 | let range2_start_byte = source_code.find(" d() ").unwrap(); -1216 | let range2_end_byte = range2_start_byte + " d() ".len(); - | -1217 | let mut parser = Parser::new(); -1218 | parser.set_language(&get_language("javascript")).unwrap(); -1219 | parser -1220 | .set_included_ranges(&[ -1221 | Range { -1222 | start_byte: range1_start_byte, -1223 | end_byte: range1_end_byte, -1224 | start_point: Point::new(0, range1_start_byte), -1225 | end_point: Point::new(0, range1_end_byte), -1226 | }, -1227 | Range { -1228 | start_byte: range2_start_byte, -1229 | end_byte: range2_end_byte, -1230 | start_point: Point::new(0, range2_start_byte), -1231 | end_point: Point::new(0, range2_end_byte), -1232 | }, -1233 | ]) -1234 | .unwrap(); - | -1235 | let tree = parser.parse(source_code, None).unwrap(); -1236 | let root = tree.root_node(); -1237 | let statement1 = root.child(0).unwrap(); -1238 | let statement2 = root.child(1).unwrap(); - | -1239 | assert_eq!( -1240 | root.to_sexp(), -1241 | concat!( -1242 | "(program", -1243 | " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", -1244 | " (expression_statement (call_expression function: (identifier) arguments: (arguments))))" -1245 | ) -1246 | ); - | -1247 | assert_eq!(statement1.start_byte(), source_code.find("b()").unwrap()); -1248 | assert_eq!(statement1.end_byte(), source_code.find(" %> c").unwrap()); -1249 | assert_eq!(statement2.start_byte(), source_code.find("d()").unwrap()); -1250 | assert_eq!(statement2.end_byte(), source_code.len() - " %>".len()); -1251 | } - | -1252 | #[test] -1253 | fn test_parsing_with_a_newly_excluded_range() { -1254 | let mut source_code = String::from("
<%= something %>
"); - | -1255 | // Parse HTML including the template directive, which will cause an error -1256 | let mut parser = Parser::new(); -1257 | parser.set_language(&get_language("html")).unwrap(); -1258 | let mut first_tree = parser -1259 | .parse_with_options(&mut chunked_input(&source_code, 3), None, None) -1260 | .unwrap(); - | -1261 | // Insert code at the beginning of the document. -1262 | let prefix = "a very very long line of plain text. "; -1263 | first_tree.edit(&InputEdit { -1264 | start_byte: 0, -1265 | old_end_byte: 0, -1266 | new_end_byte: prefix.len(), -1267 | start_position: Point::new(0, 0), -1268 | old_end_position: Point::new(0, 0), -1269 | new_end_position: Point::new(0, prefix.len()), -1270 | }); -1271 | source_code.insert_str(0, prefix); - | -1272 | // Parse the HTML again, this time *excluding* the template directive -1273 | // (which has moved since the previous parse). -1274 | let directive_start = source_code.find("<%=").unwrap(); -1275 | let directive_end = source_code.find("").unwrap(); -1276 | let source_code_end = source_code.len(); -1277 | parser -1278 | .set_included_ranges(&[ -1279 | Range { -1280 | start_byte: 0, -1281 | end_byte: directive_start, -1282 | start_point: Point::new(0, 0), -1283 | end_point: Point::new(0, directive_start), -1284 | }, -1285 | Range { -1286 | start_byte: directive_end, -1287 | end_byte: source_code_end, -1288 | start_point: Point::new(0, directive_end), -1289 | end_point: Point::new(0, source_code_end), -1290 | }, -1291 | ]) -1292 | .unwrap(); -1293 | let tree = parser -1294 | .parse_with_options(&mut chunked_input(&source_code, 3), Some(&first_tree), None) -1295 | .unwrap(); - | -1296 | assert_eq!( -1297 | tree.root_node().to_sexp(), -1298 | concat!( -1299 | "(document (text) (element", -1300 | " (start_tag (tag_name))", -1301 | " (element (start_tag (tag_name)) (end_tag (tag_name)))", -1302 | " (end_tag (tag_name))))" -1303 | ) -1304 | ); - | -1305 | assert_eq!( -1306 | tree.changed_ranges(&first_tree).collect::>(), -1307 | vec![ -1308 | // The first range that has changed syntax is the range of the newly-inserted text. -1309 | Range { -1310 | start_byte: 0, -1311 | end_byte: prefix.len(), -1312 | start_point: Point::new(0, 0), -1313 | end_point: Point::new(0, prefix.len()), -1314 | }, -1315 | // Even though no edits were applied to the outer `div` element, -1316 | // its contents have changed syntax because a range of text that -1317 | // was previously included is now excluded. -1318 | Range { -1319 | start_byte: directive_start, -1320 | end_byte: directive_end, -1321 | start_point: Point::new(0, directive_start), -1322 | end_point: Point::new(0, directive_end), -1323 | }, -1324 | ] -1325 | ); -1326 | } - | -1327 | #[test] -1328 | fn test_parsing_with_a_newly_included_range() { -1329 | let source_code = "
<%= foo() %>
<%= bar() %><%= baz() %>"; -1330 | let range1_start = source_code.find(" foo").unwrap(); -1331 | let range2_start = source_code.find(" bar").unwrap(); -1332 | let range3_start = source_code.find(" baz").unwrap(); -1333 | let range1_end = range1_start + 7; -1334 | let range2_end = range2_start + 7; -1335 | let range3_end = range3_start + 7; - | -1336 | // Parse only the first code directive as JavaScript -1337 | let mut parser = Parser::new(); -1338 | parser.set_language(&get_language("javascript")).unwrap(); -1339 | parser -1340 | .set_included_ranges(&[simple_range(range1_start, range1_end)]) -1341 | .unwrap(); -1342 | let tree = parser -1343 | .parse_with_options(&mut chunked_input(source_code, 3), None, None) -1344 | .unwrap(); -1345 | assert_eq!( -1346 | tree.root_node().to_sexp(), -1347 | concat!( -1348 | "(program", -1349 | " (expression_statement (call_expression function: (identifier) arguments: (arguments))))", -1350 | ) -1351 | ); - | -1352 | // Parse both the first and third code directives as JavaScript, using the old tree as a -1353 | // reference. -1354 | parser -1355 | .set_included_ranges(&[ -1356 | simple_range(range1_start, range1_end), -1357 | simple_range(range3_start, range3_end), -1358 | ]) -1359 | .unwrap(); -1360 | let tree2 = parser -1361 | .parse_with_options(&mut chunked_input(source_code, 3), Some(&tree), None) -1362 | .unwrap(); -1363 | assert_eq!( -1364 | tree2.root_node().to_sexp(), -1365 | concat!( -1366 | "(program", -1367 | " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", -1368 | " (expression_statement (call_expression function: (identifier) arguments: (arguments))))", -1369 | ) -1370 | ); -1371 | assert_eq!( -1372 | tree2.changed_ranges(&tree).collect::>(), -1373 | &[simple_range(range1_end, range3_end)] -1374 | ); - | -1375 | // Parse all three code directives as JavaScript, using the old tree as a -1376 | // reference. -1377 | parser -1378 | .set_included_ranges(&[ -1379 | simple_range(range1_start, range1_end), -1380 | simple_range(range2_start, range2_end), -1381 | simple_range(range3_start, range3_end), -1382 | ]) -1383 | .unwrap(); -1384 | let tree3 = parser.parse(source_code, Some(&tree)).unwrap(); -1385 | assert_eq!( -1386 | tree3.root_node().to_sexp(), -1387 | concat!( -1388 | "(program", -1389 | " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", -1390 | " (expression_statement (call_expression function: (identifier) arguments: (arguments)))", -1391 | " (expression_statement (call_expression function: (identifier) arguments: (arguments))))", -1392 | ) -1393 | ); -1394 | assert_eq!( -1395 | tree3.changed_ranges(&tree2).collect::>(), -1396 | &[simple_range(range2_start + 1, range2_end - 1)] -1397 | ); -1398 | } - | -1399 | #[test] -1400 | fn test_parsing_with_included_ranges_and_missing_tokens() { -1401 | let (parser_name, parser_code) = generate_parser( -1402 | r#"{ -1403 | "name": "test_leading_missing_token", -1404 | "rules": { -1405 | "program": { -1406 | "type": "SEQ", -1407 | "members": [ -1408 | {"type": "SYMBOL", "name": "A"}, -1409 | {"type": "SYMBOL", "name": "b"}, -1410 | {"type": "SYMBOL", "name": "c"}, -1411 | {"type": "SYMBOL", "name": "A"}, -1412 | {"type": "SYMBOL", "name": "b"}, -1413 | {"type": "SYMBOL", "name": "c"} -1414 | ] -1415 | }, -1416 | "A": {"type": "SYMBOL", "name": "a"}, -1417 | "a": {"type": "STRING", "value": "a"}, -1418 | "b": {"type": "STRING", "value": "b"}, -1419 | "c": {"type": "STRING", "value": "c"} -1420 | } -1421 | }"#, -1422 | ) -1423 | .unwrap(); - | -1424 | let mut parser = Parser::new(); -1425 | parser -1426 | .set_language(&get_test_language(&parser_name, &parser_code, None)) -1427 | .unwrap(); - | -1428 | // There's a missing `a` token at the beginning of the code. It must be inserted -1429 | // at the beginning of the first included range, not at {0, 0}. -1430 | let source_code = "__bc__bc__"; -1431 | parser -1432 | .set_included_ranges(&[ -1433 | Range { -1434 | start_byte: 2, -1435 | end_byte: 4, -1436 | start_point: Point::new(0, 2), -1437 | end_point: Point::new(0, 4), -1438 | }, -1439 | Range { -1440 | start_byte: 6, -1441 | end_byte: 8, -1442 | start_point: Point::new(0, 6), -1443 | end_point: Point::new(0, 8), -1444 | }, -1445 | ]) -1446 | .unwrap(); - | -1447 | let tree = parser.parse(source_code, None).unwrap(); -1448 | let root = tree.root_node(); -1449 | assert_eq!( -1450 | root.to_sexp(), -1451 | "(program (A (MISSING a)) (b) (c) (A (MISSING a)) (b) (c))" -1452 | ); -1453 | assert_eq!(root.start_byte(), 2); -1454 | assert_eq!(root.child(3).unwrap().start_byte(), 4); -1455 | } - | -1456 | #[test] -1457 | fn test_grammars_that_can_hang_on_eof() { -1458 | let (parser_name, parser_code) = generate_parser( -1459 | r#" -1460 | { -1461 | "name": "test_single_null_char_regex", -1462 | "rules": { -1463 | "source_file": { -1464 | "type": "SEQ", -1465 | "members": [ -1466 | { "type": "STRING", "value": "\"" }, -1467 | { "type": "PATTERN", "value": "[\\x00]*" }, -1468 | { "type": "STRING", "value": "\"" } -1469 | ] -1470 | } -1471 | }, -1472 | "extras": [ { "type": "PATTERN", "value": "\\s" } ] -1473 | } -1474 | "#, -1475 | ) -1476 | .unwrap(); - | -1477 | let mut parser = Parser::new(); -1478 | parser -1479 | .set_language(&get_test_language(&parser_name, &parser_code, None)) -1480 | .unwrap(); -1481 | parser.parse("\"", None).unwrap(); - | -1482 | let (parser_name, parser_code) = generate_parser( -1483 | r#" -1484 | { -1485 | "name": "test_null_char_with_next_char_regex", -1486 | "rules": { -1487 | "source_file": { -1488 | "type": "SEQ", -1489 | "members": [ -1490 | { "type": "STRING", "value": "\"" }, -1491 | { "type": "PATTERN", "value": "[\\x00-\\x01]*" }, -1492 | { "type": "STRING", "value": "\"" } -1493 | ] -1494 | } -1495 | }, -1496 | "extras": [ { "type": "PATTERN", "value": "\\s" } ] -1497 | } -1498 | "#, -1499 | ) -1500 | .unwrap(); - | -1501 | parser -1502 | .set_language(&get_test_language(&parser_name, &parser_code, None)) -1503 | .unwrap(); -1504 | parser.parse("\"", None).unwrap(); - | -1505 | let (parser_name, parser_code) = generate_parser( -1506 | r#" -1507 | { -1508 | "name": "test_null_char_with_range_regex", -1509 | "rules": { -1510 | "source_file": { -1511 | "type": "SEQ", -1512 | "members": [ -1513 | { "type": "STRING", "value": "\"" }, -1514 | { "type": "PATTERN", "value": "[\\x00-\\x7F]*" }, -1515 | { "type": "STRING", "value": "\"" } -1516 | ] -1517 | } -1518 | }, -1519 | "extras": [ { "type": "PATTERN", "value": "\\s" } ] -1520 | } -1521 | "#, -1522 | ) -1523 | .unwrap(); - | -1524 | parser -1525 | .set_language(&get_test_language(&parser_name, &parser_code, None)) -1526 | .unwrap(); -1527 | parser.parse("\"", None).unwrap(); -1528 | } - | -1529 | #[test] -1530 | fn test_parse_stack_recursive_merge_error_cost_calculation_bug() { -1531 | let source_code = r" -1532 | fn main() { -1533 | if n == 1 { -1534 | } else if n == 2 { -1535 | } else { -1536 | } -1537 | } - | -1538 | let y = if x == 5 { 10 } else { 15 }; - | -1539 | if foo && bar {} - | -1540 | if foo && bar || baz {} -1541 | "; - | -1542 | let mut parser = Parser::new(); -1543 | parser.set_language(&get_language("rust")).unwrap(); - | -1544 | let mut tree = parser.parse(source_code, None).unwrap(); - | -1545 | let edit = Edit { -1546 | position: 60, -1547 | deleted_length: 63, -1548 | inserted_text: Vec::new(), -1549 | }; -1550 | let mut input = source_code.as_bytes().to_vec(); -1551 | perform_edit(&mut tree, &mut input, &edit).unwrap(); - | -1552 | parser.parse(&input, Some(&tree)).unwrap(); -1553 | } - | -1554 | #[test] -1555 | fn test_parsing_with_scanner_logging() { -1556 | let mut parser = Parser::new(); -1557 | parser -1558 | .set_language(&get_test_fixture_language("external_tokens")) -1559 | .unwrap(); - | -1560 | let mut found = false; -1561 | parser.set_logger(Some(Box::new(|log_type, message| { -1562 | if log_type == LogType::Lex && message == "Found a percent string" { -1563 | found = true; -1564 | } -1565 | }))); - | -1566 | let source_code = "x + %(sup (external) scanner?)"; - | -1567 | parser.parse(source_code, None).unwrap(); -1568 | assert!(found); -1569 | } - | -1570 | #[test] -1571 | fn test_parsing_get_column_at_eof() { -1572 | let mut parser = Parser::new(); -1573 | parser -1574 | .set_language(&get_test_fixture_language("get_col_eof")) -1575 | .unwrap(); - | -1576 | parser.parse("a", None).unwrap(); -1577 | } - | -1578 | #[test] -1579 | fn test_parsing_by_halting_at_offset() { -1580 | let mut parser = Parser::new(); -1581 | parser.set_language(&get_language("javascript")).unwrap(); - | -1582 | let source_code = "function foo() { return 1; }".repeat(1000); - | -1583 | let mut seen_byte_offsets = vec![]; - | -1584 | parser -1585 | .parse_with_options( -1586 | &mut |offset, _| { -1587 | if offset < source_code.len() { -1588 | &source_code.as_bytes()[offset..] -1589 | } else { -1590 | &[] -1591 | } -1592 | }, -1593 | None, -1594 | Some(ParseOptions::new().progress_callback(&mut |p| { -1595 | seen_byte_offsets.push(p.current_byte_offset()); -1596 | ControlFlow::Continue(()) -1597 | })), -1598 | ) -1599 | .unwrap(); - | -1600 | assert!(seen_byte_offsets.len() > 100); -1601 | } - | -1602 | #[test] -1603 | fn test_decode_utf32() { -1604 | use widestring::u32cstr; - | -1605 | let mut parser = Parser::new(); -1606 | parser.set_language(&get_language("rust")).unwrap(); - | -1607 | let utf32_text = u32cstr!("pub fn foo() { println!(\"€50\"); }"); -1608 | let utf32_text = unsafe { -1609 | std::slice::from_raw_parts(utf32_text.as_ptr().cast::(), utf32_text.len() * 4) -1610 | }; - | -1611 | struct U32Decoder; - | -1612 | impl Decode for U32Decoder { -1613 | fn decode(bytes: &[u8]) -> (i32, u32) { -1614 | if bytes.len() >= 4 { -1615 | #[cfg(target_endian = "big")] -1616 | { -1617 | ( -1618 | i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]), -1619 | 4, -1620 | ) -1621 | } - | -1622 | #[cfg(target_endian = "little")] -1623 | { -1624 | ( -1625 | i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]), -1626 | 4, -1627 | ) -1628 | } -1629 | } else { -1630 | (0, 0) -1631 | } -1632 | } -1633 | } - | -1634 | let tree = parser -1635 | .parse_custom_encoding::( -1636 | &mut |offset, _| { -1637 | if offset < utf32_text.len() { -1638 | &utf32_text[offset..] -1639 | } else { -1640 | &[] -1641 | } -1642 | }, -1643 | None, -1644 | None, -1645 | ) -1646 | .unwrap(); - | -1647 | assert_eq!( -1648 | tree.root_node().to_sexp(), -1649 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content))))))))" -1650 | ); -1651 | } - | -1652 | #[test] -1653 | fn test_decode_cp1252() { -1654 | use encoding_rs::WINDOWS_1252; - | -1655 | let mut parser = Parser::new(); -1656 | parser.set_language(&get_language("rust")).unwrap(); - | -1657 | let windows_1252_text = WINDOWS_1252.encode("pub fn foo() { println!(\"€50\"); }").0; - | -1658 | struct Cp1252Decoder; - | -1659 | impl Decode for Cp1252Decoder { -1660 | fn decode(bytes: &[u8]) -> (i32, u32) { -1661 | if !bytes.is_empty() { -1662 | let byte = bytes[0]; -1663 | (byte as i32, 1) -1664 | } else { -1665 | (0, 0) -1666 | } -1667 | } -1668 | } - | -1669 | let tree = parser -1670 | .parse_custom_encoding::( -1671 | &mut |offset, _| &windows_1252_text[offset..], -1672 | None, -1673 | None, -1674 | ) -1675 | .unwrap(); - | -1676 | assert_eq!( -1677 | tree.root_node().to_sexp(), -1678 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content))))))))" -1679 | ); -1680 | } - | -1681 | #[test] -1682 | fn test_decode_macintosh() { -1683 | use encoding_rs::MACINTOSH; - | -1684 | let mut parser = Parser::new(); -1685 | parser.set_language(&get_language("rust")).unwrap(); - | -1686 | let macintosh_text = MACINTOSH.encode("pub fn foo() { println!(\"€50\"); }").0; - | -1687 | struct MacintoshDecoder; - | -1688 | impl Decode for MacintoshDecoder { -1689 | fn decode(bytes: &[u8]) -> (i32, u32) { -1690 | if !bytes.is_empty() { -1691 | let byte = bytes[0]; -1692 | (byte as i32, 1) -1693 | } else { -1694 | (0, 0) -1695 | } -1696 | } -1697 | } - | -1698 | let tree = parser -1699 | .parse_custom_encoding::( -1700 | &mut |offset, _| &macintosh_text[offset..], -1701 | None, -1702 | None, -1703 | ) -1704 | .unwrap(); - | -1705 | assert_eq!( -1706 | tree.root_node().to_sexp(), -1707 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content))))))))" -1708 | ); -1709 | } - | -1710 | #[test] -1711 | fn test_decode_utf24le() { -1712 | let mut parser = Parser::new(); -1713 | parser.set_language(&get_language("rust")).unwrap(); - | -1714 | let mut utf24le_text = Vec::new(); -1715 | for c in "pub fn foo() { println!(\"€50\"); }".chars() { -1716 | let code_point = c as u32; -1717 | utf24le_text.push((code_point & 0xFF) as u8); -1718 | utf24le_text.push(((code_point >> 8) & 0xFF) as u8); -1719 | utf24le_text.push(((code_point >> 16) & 0xFF) as u8); -1720 | } - | -1721 | struct Utf24LeDecoder; - | -1722 | impl Decode for Utf24LeDecoder { -1723 | fn decode(bytes: &[u8]) -> (i32, u32) { -1724 | if bytes.len() >= 3 { -1725 | (i32::from_le_bytes([bytes[0], bytes[1], bytes[2], 0]), 3) -1726 | } else { -1727 | (0, 0) -1728 | } -1729 | } -1730 | } - | -1731 | let tree = parser -1732 | .parse_custom_encoding::( -1733 | &mut |offset, _| &utf24le_text[offset..], -1734 | None, -1735 | None, -1736 | ) -1737 | .unwrap(); - | -1738 | assert_eq!( -1739 | tree.root_node().to_sexp(), -1740 | "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (expression_statement (macro_invocation macro: (identifier) (token_tree (string_literal (string_content))))))))" -1741 | ); -1742 | } - | -1743 | #[test] -1744 | fn test_grammars_that_should_not_compile() { -1745 | assert!(generate_parser( -1746 | r#" -1747 | { -1748 | "name": "issue_1111", -1749 | "rules": { -1750 | "source_file": { "type": "STRING", "value": "" } -1751 | }, -1752 | } -1753 | "# -1754 | ) -1755 | .is_err()); - | -1756 | assert!(generate_parser( -1757 | r#" -1758 | { -1759 | "name": "issue_1271", -1760 | "rules": { -1761 | "source_file": { "type": "SYMBOL", "name": "identifier" }, -1762 | "identifier": { -1763 | "type": "TOKEN", -1764 | "content": { -1765 | "type": "REPEAT", -1766 | "content": { "type": "PATTERN", "value": "a" } -1767 | } -1768 | } -1769 | }, -1770 | } -1771 | "# -1772 | ) -1773 | .is_err()); - | -1774 | assert!(generate_parser( -1775 | r#" -1776 | { -1777 | "name": "issue_1156_expl_1", -1778 | "rules": { -1779 | "source_file": { -1780 | "type": "TOKEN", -1781 | "content": { -1782 | "type": "REPEAT", -1783 | "content": { "type": "STRING", "value": "c" } -1784 | } -1785 | } -1786 | }, -1787 | } -1788 | "# -1789 | ) -1790 | .is_err()); - | -1791 | assert!(generate_parser( -1792 | r#" -1793 | { -1794 | "name": "issue_1156_expl_2", -1795 | "rules": { -1796 | "source_file": { -1797 | "type": "TOKEN", -1798 | "content": { -1799 | "type": "CHOICE", -1800 | "members": [ -1801 | { "type": "STRING", "value": "e" }, -1802 | { "type": "BLANK" } -1803 | ] -1804 | } -1805 | } -1806 | }, -1807 | } -1808 | "# -1809 | ) -1810 | .is_err()); - | -1811 | assert!(generate_parser( -1812 | r#" -1813 | { -1814 | "name": "issue_1156_expl_3", -1815 | "rules": { -1816 | "source_file": { -1817 | "type": "IMMEDIATE_TOKEN", -1818 | "content": { -1819 | "type": "REPEAT", -1820 | "content": { "type": "STRING", "value": "p" } -1821 | } -1822 | } -1823 | }, -1824 | } -1825 | "# -1826 | ) -1827 | .is_err()); - | -1828 | assert!(generate_parser( -1829 | r#" -1830 | { -1831 | "name": "issue_1156_expl_4", -1832 | "rules": { -1833 | "source_file": { -1834 | "type": "IMMEDIATE_TOKEN", -1835 | "content": { -1836 | "type": "CHOICE", -1837 | "members": [ -1838 | { "type": "STRING", "value": "r" }, -1839 | { "type": "BLANK" } -1840 | ] -1841 | } -1842 | } -1843 | }, -1844 | } -1845 | "# -1846 | ) -1847 | .is_err()); -1848 | } - | -1849 | const fn simple_range(start: usize, end: usize) -> Range { -1850 | Range { -1851 | start_byte: start, -1852 | end_byte: end, -1853 | start_point: Point::new(0, start), -1854 | end_point: Point::new(0, end), -1855 | } -1856 | } - | -1857 | fn chunked_input<'a>(text: &'a str, size: usize) -> impl FnMut(usize, Point) -> &'a [u8] { -1858 | move |offset, _| &text.as_bytes()[offset..text.len().min(offset + size)] -1859 | } - | -1860 | #[test] -1861 | fn test_parse_options_reborrow() { -1862 | let mut parser = Parser::new(); -1863 | parser.set_language(&get_language("rust")).unwrap(); - | -1864 | let parse_count = AtomicUsize::new(0); - | -1865 | let mut callback = |_: &ParseState| { -1866 | parse_count.fetch_add(1, Ordering::SeqCst); -1867 | ControlFlow::Continue(()) -1868 | }; -1869 | let mut options = ParseOptions::new().progress_callback(&mut callback); - | -1870 | let text1 = "fn first() {}".repeat(20); -1871 | let text2 = "fn second() {}".repeat(20); - | -1872 | let tree1 = parser -1873 | .parse_with_options( -1874 | &mut |offset, _| { -1875 | if offset >= text1.len() { -1876 | &[] -1877 | } else { -1878 | &text1.as_bytes()[offset..] -1879 | } -1880 | }, -1881 | None, -1882 | Some(options.reborrow()), -1883 | ) -1884 | .unwrap(); - | -1885 | assert_eq!(tree1.root_node().child(0).unwrap().kind(), "function_item"); - | -1886 | let tree2 = parser -1887 | .parse_with_options( -1888 | &mut |offset, _| { -1889 | if offset >= text2.len() { -1890 | &[] -1891 | } else { -1892 | &text2.as_bytes()[offset..] -1893 | } -1894 | }, -1895 | None, -1896 | Some(options.reborrow()), -1897 | ) -1898 | .unwrap(); - | -1899 | assert_eq!(tree2.root_node().child(0).unwrap().kind(), "function_item"); - | -1900 | assert!(parse_count.load(Ordering::SeqCst) > 0); -1901 | } - | -1902 | #[test] -1903 | fn test_grammar_that_should_hang_and_not_segfault() { -1904 | fn hang_test() { -1905 | let test_grammar_dir = fixtures_dir() -1906 | .join("test_grammars") -1907 | .join("get_col_should_hang_not_crash"); - | -1908 | let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None) -1909 | .expect("Failed to load grammar file"); - | -1910 | let (parser_name, parser_code) = -1911 | generate_parser(grammar_json.as_str()).expect("Failed to generate parser"); - | -1912 | let language = -1913 | get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path())); - | -1914 | let mut parser = Parser::new(); -1915 | parser -1916 | .set_language(&language) -1917 | .expect("Failed to set parser language"); - | -1918 | let code_that_should_hang = "\nHello"; - | -1919 | parser -1920 | .parse(code_that_should_hang, None) -1921 | .expect("Parse operation completed unexpectedly"); -1922 | } - | -1923 | let timeout = Duration::from_millis(500); -1924 | let (tx, rx) = mpsc::channel(); - | -1925 | thread::spawn(move || tx.send(std::panic::catch_unwind(hang_test))); - | -1926 | match rx.recv_timeout(timeout) { -1927 | Ok(Ok(())) => panic!("The test completed rather than hanging"), -1928 | Ok(Err(panic_info)) => panic!("The test panicked unexpectedly: {panic_info:?}"), -1929 | Err(mpsc::RecvTimeoutError::Timeout) => {} // Expected -1930 | Err(mpsc::RecvTimeoutError::Disconnected) => { -1931 | panic!("The test thread disconnected unexpectedly") -1932 | } -1933 | } -1934 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/pathological_test.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::Parser; - | - 2 | use super::helpers::{allocations, fixtures::get_language}; - | - 3 | #[test] - 4 | fn test_pathological_example_1() { - 5 | let language = "cpp"; - 6 | let source = r#"*ss(qqX TokenStream { - 10 | let count = parse_macro_input!(args as LitInt); - 11 | let input = parse_macro_input!(input as ItemFn); - 12 | let attrs = &input.attrs; - 13 | let name = &input.sig.ident; - | - 14 | TokenStream::from(quote! { - 15 | #(#attrs),* - 16 | fn #name() { - 17 | #input - | - 18 | for i in 0..=#count { - 19 | let result = std::panic::catch_unwind(|| { - 20 | #name(); - 21 | }); - | - 22 | if result.is_ok() { - 23 | return; - 24 | } - | - 25 | if i == #count { - 26 | std::panic::resume_unwind(result.unwrap_err()); - 27 | } - 28 | } - 29 | } - 30 | }) - 31 | } - | - 32 | #[proc_macro_attribute] - 33 | pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { - 34 | struct Args { - 35 | retry: LitInt, - 36 | seed: Expr, - 37 | seed_fn: Option, - 38 | } - | - 39 | impl Parse for Args { - 40 | fn parse(input: ParseStream) -> syn::Result { - 41 | let mut retry = None; - 42 | let mut seed = None; - 43 | let mut seed_fn = None; - | - 44 | while !input.is_empty() { - 45 | let name = input.parse::()?; - 46 | match name.to_string().as_str() { - 47 | "retry" => { - 48 | input.parse::()?; - 49 | retry.replace(input.parse()?); - 50 | } - 51 | "seed" => { - 52 | input.parse::()?; - 53 | seed.replace(input.parse()?); - 54 | } - 55 | "seed_fn" => { - 56 | input.parse::()?; - 57 | seed_fn.replace(input.parse()?); - 58 | } - 59 | x => { - 60 | return Err(Error::new( - 61 | name.span(), - 62 | format!("Unsupported parameter `{x}`"), - 63 | )) - 64 | } - 65 | } - | - 66 | if !input.is_empty() { - 67 | input.parse::()?; - 68 | } - 69 | } - | - 70 | if retry.is_none() { - 71 | retry.replace(LitInt::new("0", Span::mixed_site())); - 72 | } - | - 73 | Ok(Self { - 74 | retry: retry.expect("`retry` parameter is required"), - 75 | seed: seed.expect("`seed` parameter is required"), - 76 | seed_fn, - 77 | }) - 78 | } - 79 | } - | - 80 | let Args { - 81 | retry, - 82 | seed, - 83 | seed_fn, - 84 | } = parse_macro_input!(args as Args); - | - 85 | let seed_fn = seed_fn.iter(); - | - 86 | let func = parse_macro_input!(input as ItemFn); - 87 | let attrs = &func.attrs; - 88 | let name = &func.sig.ident; - | - 89 | TokenStream::from(quote! { - 90 | #[test] - 91 | #(#attrs),* - 92 | fn #name() { - 93 | #func - | - 94 | let mut seed = #seed; - | - 95 | for i in 0..=#retry { - 96 | let result = std::panic::catch_unwind(|| { - 97 | #name(seed); - 98 | }); - | - 99 | if result.is_ok() { - 100 | return; - 101 | } - | - 102 | if i == #retry { - 103 | std::panic::resume_unwind(result.unwrap_err()); - 104 | } - | - 105 | #( - 106 | seed = #seed_fn(); - 107 | )* - | - 108 | if i < #retry { - 109 | println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed); - 110 | } - 111 | } - 112 | } - 113 | }) - 114 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/query_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{env, fmt::Write, ops::ControlFlow, sync::LazyLock}; - | - 2 | use indoc::indoc; - 3 | use rand::{prelude::StdRng, SeedableRng}; - 4 | use streaming_iterator::StreamingIterator; - 5 | use tree_sitter::{ - 6 | CaptureQuantifier, InputEdit, Language, Node, Parser, Point, Query, QueryCursor, - 7 | QueryCursorOptions, QueryError, QueryErrorKind, QueryPredicate, QueryPredicateArg, - 8 | QueryProperty, Range, - 9 | }; - 10 | use tree_sitter_generate::load_grammar_file; - 11 | use unindent::Unindent; - | - 12 | use super::helpers::{ - 13 | allocations, - 14 | fixtures::{get_language, get_test_language}, - 15 | query_helpers::{assert_query_matches, Match, Pattern}, - 16 | }; - 17 | use crate::tests::{ - 18 | generate_parser, - 19 | helpers::{ - 20 | fixtures::get_test_fixture_language, - 21 | query_helpers::{collect_captures, collect_matches}, - 22 | }, - 23 | ITERATION_COUNT, - 24 | }; - | - 25 | static EXAMPLE_FILTER: LazyLock> = - 26 | LazyLock::new(|| env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok()); - | - 27 | #[test] - 28 | fn test_query_errors_on_invalid_syntax() { - 29 | allocations::record(|| { - 30 | let language = get_language("javascript"); - | - 31 | assert!(Query::new(&language, "(if_statement)").is_ok()); - 32 | assert!(Query::new( - 33 | &language, - 34 | "(if_statement condition:(parenthesized_expression (identifier)))" - 35 | ) - 36 | .is_ok()); - | - 37 | // Mismatched parens - 38 | assert_eq!( - 39 | Query::new(&language, "(if_statement").unwrap_err().message, - 40 | [ - 41 | "(if_statement", // - 42 | " ^", - 43 | ] - 44 | .join("\n") - 45 | ); - 46 | assert_eq!( - 47 | Query::new(&language, "; comment 1\n; comment 2\n (if_statement))") - 48 | .unwrap_err() - 49 | .message, - 50 | [ - 51 | " (if_statement))", // - 52 | " ^", - 53 | ] - 54 | .join("\n") - 55 | ); - | - 56 | // Return an error at the *beginning* of a bare identifier not followed a colon. - 57 | // If there's a colon but no pattern, return an error at the end of the colon. - 58 | assert_eq!( - 59 | Query::new(&language, "(if_statement identifier)") - 60 | .unwrap_err() - 61 | .message, - 62 | [ - 63 | "(if_statement identifier)", // - 64 | " ^", - 65 | ] - 66 | .join("\n") - 67 | ); - 68 | assert_eq!( - 69 | Query::new(&language, "(if_statement condition:)") - 70 | .unwrap_err() - 71 | .message, - 72 | [ - 73 | "(if_statement condition:)", // - 74 | " ^", - 75 | ] - 76 | .join("\n") - 77 | ); - | - 78 | // Return an error at the beginning of an unterminated string. - 79 | assert_eq!( - 80 | Query::new(&language, r#"(identifier) "h "#) - 81 | .unwrap_err() - 82 | .message, - 83 | [ - 84 | r#"(identifier) "h "#, // - 85 | r" ^", - 86 | ] - 87 | .join("\n") - 88 | ); - | - 89 | // Empty tree pattern - 90 | assert_eq!( - 91 | Query::new(&language, r"((identifier) ()") - 92 | .unwrap_err() - 93 | .message, - 94 | [ - 95 | "((identifier) ()", // - 96 | " ^", - 97 | ] - 98 | .join("\n") - 99 | ); - | - 100 | // Empty alternation - 101 | assert_eq!( - 102 | Query::new(&language, r"((identifier) [])") - 103 | .unwrap_err() - 104 | .message, - 105 | [ - 106 | "((identifier) [])", // - 107 | " ^", - 108 | ] - 109 | .join("\n") - 110 | ); - | - 111 | // Unclosed sibling expression with predicate - 112 | assert_eq!( - 113 | Query::new(&language, r"((identifier) (#a?)") - 114 | .unwrap_err() - 115 | .message, - 116 | [ - 117 | "((identifier) (#a?)", // - 118 | " ^", - 119 | ] - 120 | .join("\n") - 121 | ); - | - 122 | // Predicate not ending in `?` or `!` - 123 | assert_eq!( - 124 | Query::new(&language, r"((identifier) (#a))") - 125 | .unwrap_err() - 126 | .message, - 127 | [ - 128 | "((identifier) (#a))", // - 129 | " ^", - 130 | ] - 131 | .join("\n") - 132 | ); - | - 133 | // Unclosed predicate - 134 | assert_eq!( - 135 | Query::new(&language, r"((identifier) @x (#eq? @x a") - 136 | .unwrap_err() - 137 | .message, - 138 | [ - 139 | r"((identifier) @x (#eq? @x a", - 140 | r" ^", - 141 | ] - 142 | .join("\n") - 143 | ); - | - 144 | // Need at least one child node for a child anchor - 145 | assert_eq!( - 146 | Query::new(&language, r"(statement_block .)") - 147 | .unwrap_err() - 148 | .message, - 149 | [ - 150 | // - 151 | r"(statement_block .)", - 152 | r" ^" - 153 | ] - 154 | .join("\n") - 155 | ); - | - 156 | // Need a field name after a negated field operator - 157 | assert_eq!( - 158 | Query::new(&language, r"(statement_block ! (if_statement))") - 159 | .unwrap_err() - 160 | .message, - 161 | [ - 162 | r"(statement_block ! (if_statement))", - 163 | r" ^" - 164 | ] - 165 | .join("\n") - 166 | ); - | - 167 | // Unclosed alternation within a tree - 168 | // tree-sitter/tree-sitter/issues/968 - 169 | assert_eq!( - 170 | Query::new(&get_language("c"), r#"(parameter_list [ ")" @foo)"#) - 171 | .unwrap_err() - 172 | .message, - 173 | [ - 174 | r#"(parameter_list [ ")" @foo)"#, - 175 | r" ^" - 176 | ] - 177 | .join("\n") - 178 | ); - | - 179 | // Unclosed tree within an alternation - 180 | // tree-sitter/tree-sitter/issues/1436 - 181 | assert_eq!( - 182 | Query::new( - 183 | &get_language("python"), - 184 | r"[(unary_operator (_) @operand) (not_operator (_) @operand]" - 185 | ) - 186 | .unwrap_err() - 187 | .message, - 188 | [ - 189 | r"[(unary_operator (_) @operand) (not_operator (_) @operand]", - 190 | r" ^" - 191 | ] - 192 | .join("\n") - 193 | ); - | - 194 | // MISSING keyword with full pattern - 195 | assert_eq!( - 196 | Query::new( - 197 | &get_language("c"), - 198 | r"(MISSING (function_declarator (identifier))) " - 199 | ) - 200 | .unwrap_err() - 201 | .message, - 202 | [ - 203 | r"(MISSING (function_declarator (identifier))) ", - 204 | r" ^", - 205 | ] - 206 | .join("\n") - 207 | ); - | - 208 | // MISSING keyword with multiple identifiers - 209 | assert_eq!( - 210 | Query::new( - 211 | &get_language("c"), - 212 | r"(MISSING function_declarator function_declarator) " - 213 | ) - 214 | .unwrap_err() - 215 | .message, - 216 | [ - 217 | r"(MISSING function_declarator function_declarator) ", - 218 | r" ^", - 219 | ] - 220 | .join("\n") - 221 | ); - 222 | assert_eq!( - 223 | Query::new(&language, "(statement / export_statement)").unwrap_err(), - 224 | QueryError { - 225 | row: 0, - 226 | offset: 11, - 227 | column: 11, - 228 | kind: QueryErrorKind::Syntax, - 229 | message: [ - 230 | "(statement / export_statement)", // - 231 | " ^" - 232 | ] - 233 | .join("\n") - 234 | } - 235 | ); - 236 | }); - 237 | } - | - 238 | #[test] - 239 | fn test_query_errors_on_invalid_symbols() { - 240 | allocations::record(|| { - 241 | let language = get_language("javascript"); - | - 242 | assert_eq!( - 243 | Query::new(&language, "\">>>>\"").unwrap_err(), - 244 | QueryError { - 245 | row: 0, - 246 | offset: 1, - 247 | column: 1, - 248 | kind: QueryErrorKind::NodeType, - 249 | message: "\">>>>\"".to_string() - 250 | } - 251 | ); - 252 | assert_eq!( - 253 | Query::new(&language, "\"te\\\"st\"").unwrap_err(), - 254 | QueryError { - 255 | row: 0, - 256 | offset: 1, - 257 | column: 1, - 258 | kind: QueryErrorKind::NodeType, - 259 | message: "\"te\\\"st\"".to_string() - 260 | } - 261 | ); - 262 | assert_eq!( - 263 | Query::new(&language, "\"\\\\\" @cap").unwrap_err(), - 264 | QueryError { - 265 | row: 0, - 266 | offset: 1, - 267 | column: 1, - 268 | kind: QueryErrorKind::NodeType, - 269 | message: "\"\\\\\"".to_string() - 270 | } - 271 | ); - 272 | assert_eq!( - 273 | Query::new(&language, "(clas)").unwrap_err(), - 274 | QueryError { - 275 | row: 0, - 276 | offset: 1, - 277 | column: 1, - 278 | kind: QueryErrorKind::NodeType, - 279 | message: "\"clas\"".to_string() - 280 | } - 281 | ); - 282 | assert_eq!( - 283 | Query::new(&language, "(if_statement (arrayyyyy))").unwrap_err(), - 284 | QueryError { - 285 | row: 0, - 286 | offset: 15, - 287 | column: 15, - 288 | kind: QueryErrorKind::NodeType, - 289 | message: "\"arrayyyyy\"".to_string() - 290 | }, - 291 | ); - 292 | assert_eq!( - 293 | Query::new(&language, "(if_statement condition: (non_existent3))").unwrap_err(), - 294 | QueryError { - 295 | row: 0, - 296 | offset: 26, - 297 | column: 26, - 298 | kind: QueryErrorKind::NodeType, - 299 | message: "\"non_existent3\"".to_string() - 300 | }, - 301 | ); - 302 | assert_eq!( - 303 | Query::new(&language, "(if_statement condit: (identifier))").unwrap_err(), - 304 | QueryError { - 305 | row: 0, - 306 | offset: 14, - 307 | column: 14, - 308 | kind: QueryErrorKind::Field, - 309 | message: "\"condit\"".to_string() - 310 | }, - 311 | ); - 312 | assert_eq!( - 313 | Query::new(&language, "(if_statement conditioning: (identifier))").unwrap_err(), - 314 | QueryError { - 315 | row: 0, - 316 | offset: 14, - 317 | column: 14, - 318 | kind: QueryErrorKind::Field, - 319 | message: "\"conditioning\"".to_string() - 320 | } - 321 | ); - 322 | assert_eq!( - 323 | Query::new(&language, "(if_statement !alternativ)").unwrap_err(), - 324 | QueryError { - 325 | row: 0, - 326 | offset: 15, - 327 | column: 15, - 328 | kind: QueryErrorKind::Field, - 329 | message: "\"alternativ\"".to_string() - 330 | } - 331 | ); - 332 | assert_eq!( - 333 | Query::new(&language, "(if_statement !alternatives)").unwrap_err(), - 334 | QueryError { - 335 | row: 0, - 336 | offset: 15, - 337 | column: 15, - 338 | kind: QueryErrorKind::Field, - 339 | message: "\"alternatives\"".to_string() - 340 | } - 341 | ); - 342 | assert_eq!( - 343 | Query::new(&language, "fakefield: (identifier)").unwrap_err(), - 344 | QueryError { - 345 | row: 0, - 346 | offset: 0, - 347 | column: 0, - 348 | kind: QueryErrorKind::Field, - 349 | message: "\"fakefield\"".to_string() - 350 | } - 351 | ); - 352 | }); - 353 | } - | - 354 | #[test] - 355 | fn test_query_errors_on_invalid_predicates() { - 356 | allocations::record(|| { - 357 | let language = get_language("javascript"); - | - 358 | assert_eq!( - 359 | Query::new(&language, "((identifier) @id (@id))").unwrap_err(), - 360 | QueryError { - 361 | kind: QueryErrorKind::Syntax, - 362 | row: 0, - 363 | column: 19, - 364 | offset: 19, - 365 | message: [ - 366 | "((identifier) @id (@id))", // - 367 | " ^" - 368 | ] - 369 | .join("\n") - 370 | } - 371 | ); - 372 | assert_eq!( - 373 | Query::new(&language, "((identifier) @id (#eq? @id))").unwrap_err(), - 374 | QueryError { - 375 | kind: QueryErrorKind::Predicate, - 376 | row: 0, - 377 | column: 0, - 378 | offset: 0, - 379 | message: "Wrong number of arguments to #eq? predicate. Expected 2, got 1." - 380 | .to_string() - 381 | } - 382 | ); - 383 | assert_eq!( - 384 | Query::new(&language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), - 385 | QueryError { - 386 | kind: QueryErrorKind::Capture, - 387 | row: 0, - 388 | column: 29, - 389 | offset: 29, - 390 | message: "\"ok\"".to_string(), - 391 | } - 392 | ); - 393 | }); - 394 | } - | - 395 | #[test] - 396 | fn test_query_errors_on_impossible_patterns() { - 397 | let js_lang = get_language("javascript"); - 398 | let rb_lang = get_language("ruby"); - | - 399 | allocations::record(|| { - 400 | assert_eq!( - 401 | Query::new( - 402 | &js_lang, - 403 | "(binary_expression left: (expression (identifier)) left: (expression (identifier)))" - 404 | ), - 405 | Err(QueryError { - 406 | kind: QueryErrorKind::Structure, - 407 | row: 0, - 408 | offset: 37, - 409 | column: 37, - 410 | message: [ - 411 | "(binary_expression left: (expression (identifier)) left: (expression (identifier)))", - 412 | " ^", - 413 | ] - 414 | .join("\n"), - 415 | }) - 416 | ); - | - 417 | Query::new( - 418 | &js_lang, - 419 | "(function_declaration name: (identifier) (statement_block))", - 420 | ) - 421 | .unwrap(); - 422 | assert_eq!( - 423 | Query::new(&js_lang, "(function_declaration name: (statement_block))"), - 424 | Err(QueryError { - 425 | kind: QueryErrorKind::Structure, - 426 | row: 0, - 427 | offset: 22, - 428 | column: 22, - 429 | message: [ - 430 | "(function_declaration name: (statement_block))", - 431 | " ^", - 432 | ] - 433 | .join("\n") - 434 | }) - 435 | ); - | - 436 | Query::new(&rb_lang, "(call receiver:(call))").unwrap(); - 437 | assert_eq!( - 438 | Query::new(&rb_lang, "(call receiver:(binary))"), - 439 | Err(QueryError { - 440 | kind: QueryErrorKind::Structure, - 441 | row: 0, - 442 | offset: 6, - 443 | column: 6, - 444 | message: [ - 445 | "(call receiver:(binary))", // - 446 | " ^", - 447 | ] - 448 | .join("\n") - 449 | }) - 450 | ); - | - 451 | Query::new( - 452 | &js_lang, - 453 | "[ - 454 | (function_expression (identifier)) - 455 | (function_declaration (identifier)) - 456 | (generator_function_declaration (identifier)) - 457 | ]", - 458 | ) - 459 | .unwrap(); - 460 | assert_eq!( - 461 | Query::new( - 462 | &js_lang, - 463 | "[ - 464 | (function_expression (identifier)) - 465 | (function_declaration (object)) - 466 | (generator_function_declaration (identifier)) - 467 | ]", - 468 | ), - 469 | Err(QueryError { - 470 | kind: QueryErrorKind::Structure, - 471 | row: 2, - 472 | offset: 99, - 473 | column: 42, - 474 | message: [ - 475 | " (function_declaration (object))", // - 476 | " ^", - 477 | ] - 478 | .join("\n") - 479 | }) - 480 | ); - | - 481 | assert_eq!( - 482 | Query::new(&js_lang, "(identifier (identifier))",), - 483 | Err(QueryError { - 484 | kind: QueryErrorKind::Structure, - 485 | row: 0, - 486 | offset: 12, - 487 | column: 12, - 488 | message: [ - 489 | "(identifier (identifier))", // - 490 | " ^", - 491 | ] - 492 | .join("\n") - 493 | }) - 494 | ); - 495 | assert_eq!( - 496 | Query::new(&js_lang, "(true (true))",), - 497 | Err(QueryError { - 498 | kind: QueryErrorKind::Structure, - 499 | row: 0, - 500 | offset: 6, - 501 | column: 6, - 502 | message: [ - 503 | "(true (true))", // - 504 | " ^", - 505 | ] - 506 | .join("\n") - 507 | }) - 508 | ); - | - 509 | Query::new( - 510 | &js_lang, - 511 | "(if_statement - 512 | condition: (parenthesized_expression (expression) @cond))", - 513 | ) - 514 | .unwrap(); - | - 515 | assert_eq!( - 516 | Query::new(&js_lang, "(if_statement condition: (expression))"), - 517 | Err(QueryError { - 518 | kind: QueryErrorKind::Structure, - 519 | row: 0, - 520 | offset: 14, - 521 | column: 14, - 522 | message: [ - 523 | "(if_statement condition: (expression))", // - 524 | " ^", - 525 | ] - 526 | .join("\n") - 527 | }) - 528 | ); - 529 | assert_eq!( - 530 | Query::new(&js_lang, "(identifier/identifier)").unwrap_err(), - 531 | QueryError { - 532 | row: 0, - 533 | offset: 0, - 534 | column: 0, - 535 | kind: QueryErrorKind::Structure, - 536 | message: [ - 537 | "(identifier/identifier)", // - 538 | "^" - 539 | ] - 540 | .join("\n") - 541 | } - 542 | ); - | - 543 | if js_lang.abi_version() >= 15 { - 544 | assert_eq!( - 545 | Query::new(&js_lang, "(statement/identifier)").unwrap_err(), - 546 | QueryError { - 547 | row: 0, - 548 | offset: 0, - 549 | column: 0, - 550 | kind: QueryErrorKind::Structure, - 551 | message: [ - 552 | "(statement/identifier)", // - 553 | "^" - 554 | ] - 555 | .join("\n") - 556 | } - 557 | ); - 558 | assert_eq!( - 559 | Query::new(&js_lang, "(statement/pattern)").unwrap_err(), - 560 | QueryError { - 561 | row: 0, - 562 | offset: 0, - 563 | column: 0, - 564 | kind: QueryErrorKind::Structure, - 565 | message: [ - 566 | "(statement/pattern)", // - 567 | "^" - 568 | ] - 569 | .join("\n") - 570 | } - 571 | ); - 572 | } - 573 | }); - 574 | } - | - 575 | #[test] - 576 | fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { - 577 | allocations::record(|| { - 578 | let language = get_language("ruby"); - | - 579 | Query::new(&language, "(destructured_parameter (identifier))").unwrap(); - | - 580 | assert_eq!( - 581 | Query::new(&language, "(destructured_parameter (string))",), - 582 | Err(QueryError { - 583 | kind: QueryErrorKind::Structure, - 584 | row: 0, - 585 | offset: 24, - 586 | column: 24, - 587 | message: [ - 588 | "(destructured_parameter (string))", // - 589 | " ^", - 590 | ] - 591 | .join("\n") - 592 | }) - 593 | ); - 594 | }); - 595 | } - | - 596 | #[test] - 597 | fn test_query_matches_with_simple_pattern() { - 598 | allocations::record(|| { - 599 | let language = get_language("javascript"); - 600 | let query = Query::new( - 601 | &language, - 602 | "(function_declaration name: (identifier) @fn-name)", - 603 | ) - 604 | .unwrap(); - | - 605 | assert_query_matches( - 606 | &language, - 607 | &query, - 608 | "function one() { two(); function three() {} }", - 609 | &[ - 610 | (0, vec![("fn-name", "one")]), - 611 | (0, vec![("fn-name", "three")]), - 612 | ], - 613 | ); - 614 | }); - 615 | } - | - 616 | #[test] - 617 | fn test_query_matches_with_multiple_on_same_root() { - 618 | allocations::record(|| { - 619 | let language = get_language("javascript"); - 620 | let query = Query::new( - 621 | &language, - 622 | "(class_declaration - 623 | name: (identifier) @the-class-name - 624 | (class_body - 625 | (method_definition - 626 | name: (property_identifier) @the-method-name)))", - 627 | ) - 628 | .unwrap(); - | - 629 | assert_query_matches( - 630 | &language, - 631 | &query, - 632 | " - 633 | class Person { - 634 | // the constructor - 635 | constructor(name) { this.name = name; } - | - 636 | // the getter - 637 | getFullName() { return this.name; } - 638 | } - 639 | ", - 640 | &[ - 641 | ( - 642 | 0, - 643 | vec![ - 644 | ("the-class-name", "Person"), - 645 | ("the-method-name", "constructor"), - 646 | ], - 647 | ), - 648 | ( - 649 | 0, - 650 | vec![ - 651 | ("the-class-name", "Person"), - 652 | ("the-method-name", "getFullName"), - 653 | ], - 654 | ), - 655 | ], - 656 | ); - 657 | }); - 658 | } - | - 659 | #[test] - 660 | fn test_query_matches_with_multiple_patterns_different_roots() { - 661 | allocations::record(|| { - 662 | let language = get_language("javascript"); - 663 | let query = Query::new( - 664 | &language, - 665 | " - 666 | (function_declaration name:(identifier) @fn-def) - 667 | (call_expression function:(identifier) @fn-ref) - 668 | ", - 669 | ) - 670 | .unwrap(); - | - 671 | assert_query_matches( - 672 | &language, - 673 | &query, - 674 | " - 675 | function f1() { - 676 | f2(f3()); - 677 | } - 678 | ", - 679 | &[ - 680 | (0, vec![("fn-def", "f1")]), - 681 | (1, vec![("fn-ref", "f2")]), - 682 | (1, vec![("fn-ref", "f3")]), - 683 | ], - 684 | ); - 685 | }); - 686 | } - | - 687 | #[test] - 688 | fn test_query_matches_with_multiple_patterns_same_root() { - 689 | allocations::record(|| { - 690 | let language = get_language("javascript"); - 691 | let query = Query::new( - 692 | &language, - 693 | " - 694 | (pair - 695 | key: (property_identifier) @method-def - 696 | value: (function_expression)) - | - 697 | (pair - 698 | key: (property_identifier) @method-def - 699 | value: (arrow_function)) - 700 | ", - 701 | ) - 702 | .unwrap(); - | - 703 | assert_query_matches( - 704 | &language, - 705 | &query, - 706 | " - 707 | a = { - 708 | b: () => { return c; }, - 709 | d: function() { return d; } - 710 | }; - 711 | ", - 712 | &[ - 713 | (1, vec![("method-def", "b")]), - 714 | (0, vec![("method-def", "d")]), - 715 | ], - 716 | ); - 717 | }); - 718 | } - | - 719 | #[test] - 720 | fn test_query_matches_with_nesting_and_no_fields() { - 721 | allocations::record(|| { - 722 | let language = get_language("javascript"); - 723 | let query = Query::new( - 724 | &language, - 725 | " - 726 | (array - 727 | (array - 728 | (identifier) @x1 - 729 | (identifier) @x2)) - 730 | ", - 731 | ) - 732 | .unwrap(); - | - 733 | assert_query_matches( - 734 | &language, - 735 | &query, - 736 | " - 737 | [[a]]; - 738 | [[c, d], [e, f, g, h]]; - 739 | [[h], [i]]; - 740 | ", - 741 | &[ - 742 | (0, vec![("x1", "c"), ("x2", "d")]), - 743 | (0, vec![("x1", "e"), ("x2", "f")]), - 744 | (0, vec![("x1", "e"), ("x2", "g")]), - 745 | (0, vec![("x1", "f"), ("x2", "g")]), - 746 | (0, vec![("x1", "e"), ("x2", "h")]), - 747 | (0, vec![("x1", "f"), ("x2", "h")]), - 748 | (0, vec![("x1", "g"), ("x2", "h")]), - 749 | ], - 750 | ); - 751 | }); - 752 | } - | - 753 | #[test] - 754 | fn test_query_matches_with_many_results() { - 755 | allocations::record(|| { - 756 | let language = get_language("javascript"); - 757 | let query = Query::new(&language, "(array (identifier) @element)").unwrap(); - | - 758 | assert_query_matches( - 759 | &language, - 760 | &query, - 761 | &"[hello];\n".repeat(50), - 762 | &vec![(0, vec![("element", "hello")]); 50], - 763 | ); - 764 | }); - 765 | } - | - 766 | #[test] - 767 | fn test_query_matches_with_many_overlapping_results() { - 768 | allocations::record(|| { - 769 | let language = get_language("javascript"); - 770 | let query = Query::new( - 771 | &language, - 772 | r#" - 773 | (call_expression - 774 | function: (member_expression - 775 | property: (property_identifier) @method)) - 776 | (call_expression - 777 | function: (identifier) @function) - 778 | ((identifier) @constant - 779 | (#match? @constant "[A-Z\\d_]+")) - 780 | "#, - 781 | ) - 782 | .unwrap(); - | - 783 | let count = 1024; - | - 784 | // Deeply nested chained function calls: - 785 | // a - 786 | // .foo(bar(BAZ)) - 787 | // .foo(bar(BAZ)) - 788 | // .foo(bar(BAZ)) - 789 | // ... - 790 | let source = format!("a{}", "\n .foo(bar(BAZ))".repeat(count)); - | - 791 | assert_query_matches( - 792 | &language, - 793 | &query, - 794 | &source, - 795 | &[ - 796 | (0, vec![("method", "foo")]), - 797 | (1, vec![("function", "bar")]), - 798 | (2, vec![("constant", "BAZ")]), - 799 | ] - 800 | .iter() - 801 | .cloned() - 802 | .cycle() - 803 | .take(3 * count) - 804 | .collect::>(), - 805 | ); - 806 | }); - 807 | } - | - 808 | #[test] - 809 | fn test_query_matches_capturing_error_nodes() { - 810 | allocations::record(|| { - 811 | let language = get_language("javascript"); - 812 | let query = Query::new( - 813 | &language, - 814 | " - 815 | (ERROR (identifier) @the-error-identifier) @the-error - 816 | ", - 817 | ) - 818 | .unwrap(); - | - 819 | assert_query_matches( - 820 | &language, - 821 | &query, - 822 | "function a(b,, c, d :e:) {}", - 823 | &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])], - 824 | ); - 825 | }); - 826 | } - | - 827 | #[test] - 828 | fn test_query_matches_capturing_missing_nodes() { - 829 | allocations::record(|| { - 830 | let language = get_language("javascript"); - 831 | let query = Query::new( - 832 | &language, - 833 | r#" - 834 | (MISSING - 835 | ; Comments should be valid - 836 | ) @missing - 837 | (MISSING - 838 | ; Comments should be valid - 839 | ";" - 840 | ; Comments should be valid - 841 | ) @missing-semicolon - 842 | "#, - 843 | ) - 844 | .unwrap(); - | - 845 | // Missing anonymous nodes - 846 | assert_query_matches( - 847 | &language, - 848 | &query, - 849 | " - 850 | x = function(a) { b; } function(c) { d; } - 851 | // ^ MISSING semicolon here - 852 | ", - 853 | &[ - 854 | (0, vec![("missing", "")]), - 855 | (1, vec![("missing-semicolon", "")]), - 856 | ], - 857 | ); - | - 858 | let language = get_language("c"); - 859 | let query = Query::new( - 860 | &language, - 861 | "(MISSING field_identifier) @missing-field-ident - 862 | (MISSING identifier) @missing-ident - 863 | (MISSING) @missing-anything", - 864 | ) - 865 | .unwrap(); - | - 866 | // Missing named nodes - 867 | assert_query_matches( - 868 | &language, - 869 | &query, - 870 | " - 871 | int main() { - 872 | if (a.) { - 873 | // ^ MISSING field_identifier here - 874 | b(); - 875 | c(); - | - 876 | if (*) d(); - 877 | // ^ MISSING identifier here - 878 | } - 879 | } - 880 | ", - 881 | &[ - 882 | (0, vec![("missing-field-ident", "")]), - 883 | (2, vec![("missing-anything", "")]), - 884 | (1, vec![("missing-ident", "")]), - 885 | (2, vec![("missing-anything", "")]), - 886 | ], - 887 | ); - 888 | }); - 889 | } - | - 890 | #[test] - 891 | fn test_query_matches_with_extra_children() { - 892 | allocations::record(|| { - 893 | let language = get_language("ruby"); - 894 | let query = Query::new( - 895 | &language, - 896 | " - 897 | (program(comment) @top_level_comment) - 898 | (argument_list (heredoc_body) @heredoc_in_args) - 899 | ", - 900 | ) - 901 | .unwrap(); - | - 902 | assert_query_matches( - 903 | &language, - 904 | &query, - 905 | " - 906 | # top-level - 907 | puts( - 908 | # not-top-level - 909 | <<-IN_ARGS, bar.baz - 910 | HELLO - 911 | IN_ARGS - 912 | ) - | - 913 | puts <<-NOT_IN_ARGS - 914 | NO - 915 | NOT_IN_ARGS - 916 | ", - 917 | &[ - 918 | (0, vec![("top_level_comment", "# top-level")]), - 919 | ( - 920 | 1, - 921 | vec![( - 922 | "heredoc_in_args", - 923 | "\n HELLO\n IN_ARGS", - 924 | )], - 925 | ), - 926 | ], - 927 | ); - 928 | }); - 929 | } - | - 930 | #[test] - 931 | fn test_query_matches_with_named_wildcard() { - 932 | allocations::record(|| { - 933 | let language = get_language("javascript"); - 934 | let query = Query::new( - 935 | &language, - 936 | " - 937 | (return_statement (_) @the-return-value) - 938 | (binary_expression operator: _ @the-operator) - 939 | ", - 940 | ) - 941 | .unwrap(); - | - 942 | let source = "return a + b - c;"; - | - 943 | let mut parser = Parser::new(); - 944 | parser.set_language(&language).unwrap(); - 945 | let tree = parser.parse(source, None).unwrap(); - 946 | let mut cursor = QueryCursor::new(); - 947 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - | - 948 | assert_eq!( - 949 | collect_matches(matches, &query, source), - 950 | &[ - 951 | (0, vec![("the-return-value", "a + b - c")]), - 952 | (1, vec![("the-operator", "+")]), - 953 | (1, vec![("the-operator", "-")]), - 954 | ] - 955 | ); - 956 | }); - 957 | } - | - 958 | #[test] - 959 | fn test_query_matches_with_wildcard_at_the_root() { - 960 | allocations::record(|| { - 961 | let language = get_language("javascript"); - 962 | let query = Query::new( - 963 | &language, - 964 | " - 965 | (_ - 966 | (comment) @doc - 967 | . - 968 | (function_declaration - 969 | name: (identifier) @name)) - 970 | ", - 971 | ) - 972 | .unwrap(); - | - 973 | assert_query_matches( - 974 | &language, - 975 | &query, - 976 | "/* one */ var x; /* two */ function y() {} /* three */ class Z {}", - 977 | &[(0, vec![("doc", "/* two */"), ("name", "y")])], - 978 | ); - | - 979 | let query = Query::new( - 980 | &language, - 981 | " - 982 | (_ (string) @a) - 983 | (_ (number) @b) - 984 | (_ (true) @c) - 985 | (_ (false) @d) - 986 | ", - 987 | ) - 988 | .unwrap(); - | - 989 | assert_query_matches( - 990 | &language, - 991 | &query, - 992 | "['hi', x(true), {y: false}]", - 993 | &[ - 994 | (0, vec![("a", "'hi'")]), - 995 | (2, vec![("c", "true")]), - 996 | (3, vec![("d", "false")]), - 997 | ], - 998 | ); - 999 | }); -1000 | } - | -1001 | #[test] -1002 | fn test_query_matches_with_wildcard_within_wildcard() { -1003 | allocations::record(|| { -1004 | let language = get_language("javascript"); -1005 | let query = Query::new( -1006 | &language, -1007 | " -1008 | (_ (_) @child) @parent -1009 | ", -1010 | ) -1011 | .unwrap(); - | -1012 | assert_query_matches( -1013 | &language, -1014 | &query, -1015 | "/* a */ b; c;", -1016 | &[ -1017 | (0, vec![("parent", "/* a */ b; c;"), ("child", "/* a */")]), -1018 | (0, vec![("parent", "/* a */ b; c;"), ("child", "b;")]), -1019 | (0, vec![("parent", "b;"), ("child", "b")]), -1020 | (0, vec![("parent", "/* a */ b; c;"), ("child", "c;")]), -1021 | (0, vec![("parent", "c;"), ("child", "c")]), -1022 | ], -1023 | ); -1024 | }); -1025 | } - | -1026 | #[test] -1027 | fn test_query_matches_with_immediate_siblings() { -1028 | allocations::record(|| { -1029 | let language = get_language("python"); - | -1030 | // The immediate child operator '.' can be used in three similar ways: -1031 | // 1. Before the first child node in a pattern, it means that there cannot be any named -1032 | // siblings before that child node. -1033 | // 2. After the last child node in a pattern, it means that there cannot be any named -1034 | // sibling after that child node. -1035 | // 2. Between two child nodes in a pattern, it specifies that there cannot be any named -1036 | // siblings between those two child snodes. -1037 | let query = Query::new( -1038 | &language, -1039 | " -1040 | (dotted_name -1041 | (identifier) @parent -1042 | . -1043 | (identifier) @child) -1044 | (dotted_name -1045 | (identifier) @last-child -1046 | .) -1047 | (list -1048 | . -1049 | (_) @first-element) -1050 | ", -1051 | ) -1052 | .unwrap(); - | -1053 | assert_query_matches( -1054 | &language, -1055 | &query, -1056 | "import a.b.c.d; return [w, [1, y], z]", -1057 | &[ -1058 | (0, vec![("parent", "a"), ("child", "b")]), -1059 | (0, vec![("parent", "b"), ("child", "c")]), -1060 | (0, vec![("parent", "c"), ("child", "d")]), -1061 | (1, vec![("last-child", "d")]), -1062 | (2, vec![("first-element", "w")]), -1063 | (2, vec![("first-element", "1")]), -1064 | ], -1065 | ); - | -1066 | let query = Query::new( -1067 | &language, -1068 | " -1069 | (block . (_) @first-stmt) -1070 | (block (_) @stmt) -1071 | (block (_) @last-stmt .) -1072 | ", -1073 | ) -1074 | .unwrap(); - | -1075 | assert_query_matches( -1076 | &language, -1077 | &query, -1078 | " -1079 | if a: -1080 | b() -1081 | c() -1082 | if d(): e(); f() -1083 | g() -1084 | ", -1085 | &[ -1086 | (0, vec![("first-stmt", "b()")]), -1087 | (1, vec![("stmt", "b()")]), -1088 | (1, vec![("stmt", "c()")]), -1089 | (1, vec![("stmt", "if d(): e(); f()")]), -1090 | (0, vec![("first-stmt", "e()")]), -1091 | (1, vec![("stmt", "e()")]), -1092 | (1, vec![("stmt", "f()")]), -1093 | (2, vec![("last-stmt", "f()")]), -1094 | (1, vec![("stmt", "g()")]), -1095 | (2, vec![("last-stmt", "g()")]), -1096 | ], -1097 | ); -1098 | }); -1099 | } - | -1100 | #[test] -1101 | fn test_query_matches_with_last_named_child() { -1102 | allocations::record(|| { -1103 | let language = get_language("c"); -1104 | let query = Query::new( -1105 | &language, -1106 | "(compound_statement -1107 | (_) -1108 | (_) -1109 | (expression_statement -1110 | (identifier) @last_id) .)", -1111 | ) -1112 | .unwrap(); -1113 | assert_query_matches( -1114 | &language, -1115 | &query, -1116 | " -1117 | void one() { a; b; c; } -1118 | void two() { d; e; } -1119 | void three() { f; g; h; i; } -1120 | ", -1121 | &[(0, vec![("last_id", "c")]), (0, vec![("last_id", "i")])], -1122 | ); -1123 | }); -1124 | } - | -1125 | #[test] -1126 | fn test_query_matches_with_negated_fields() { -1127 | allocations::record(|| { -1128 | let language = get_language("javascript"); -1129 | let query = Query::new( -1130 | &language, -1131 | " -1132 | (import_specifier -1133 | !alias -1134 | name: (identifier) @import_name) - | -1135 | (export_specifier -1136 | !alias -1137 | name: (identifier) @export_name) - | -1138 | (export_statement -1139 | !decorator -1140 | !source -1141 | (_) @exported) - | -1142 | ; This negated field list is an extension of a previous -1143 | ; negated field list. The order of the children and negated -1144 | ; fields doesn't matter. -1145 | (export_statement -1146 | !decorator -1147 | !source -1148 | (_) @exported_expr -1149 | !declaration) - | -1150 | ; This negated field list is a prefix of a previous -1151 | ; negated field list. -1152 | (export_statement -1153 | !decorator -1154 | (_) @export_child .) -1155 | ", -1156 | ) -1157 | .unwrap(); -1158 | assert_query_matches( -1159 | &language, -1160 | &query, -1161 | " -1162 | import {a as b, c} from 'p1'; -1163 | export {g, h as i} from 'p2'; - | -1164 | @foo -1165 | export default 1; - | -1166 | export var j = 1; - | -1167 | export default k; -1168 | ", -1169 | &[ -1170 | (0, vec![("import_name", "c")]), -1171 | (1, vec![("export_name", "g")]), -1172 | (4, vec![("export_child", "'p2'")]), -1173 | (2, vec![("exported", "var j = 1;")]), -1174 | (4, vec![("export_child", "var j = 1;")]), -1175 | (2, vec![("exported", "k")]), -1176 | (3, vec![("exported_expr", "k")]), -1177 | (4, vec![("export_child", "k")]), -1178 | ], -1179 | ); -1180 | }); -1181 | } - | -1182 | #[test] -1183 | fn test_query_matches_with_field_at_root() { -1184 | allocations::record(|| { -1185 | let language = get_language("javascript"); -1186 | let query = Query::new(&language, "name: (identifier) @name").unwrap(); -1187 | assert_query_matches( -1188 | &language, -1189 | &query, -1190 | " -1191 | a(); -1192 | function b() {} -1193 | class c extends d {} -1194 | ", -1195 | &[(0, vec![("name", "b")]), (0, vec![("name", "c")])], -1196 | ); -1197 | }); -1198 | } - | -1199 | #[test] -1200 | fn test_query_matches_with_repeated_leaf_nodes() { -1201 | allocations::record(|| { -1202 | let language = get_language("javascript"); - | -1203 | let query = Query::new( -1204 | &language, -1205 | " -1206 | ( -1207 | (comment)+ @doc -1208 | . -1209 | (class_declaration -1210 | name: (identifier) @name) -1211 | ) - | -1212 | ( -1213 | (comment)+ @doc -1214 | . -1215 | (function_declaration -1216 | name: (identifier) @name) -1217 | ) -1218 | ", -1219 | ) -1220 | .unwrap(); - | -1221 | assert_query_matches( -1222 | &language, -1223 | &query, -1224 | " -1225 | // one -1226 | // two -1227 | a(); - | -1228 | // three -1229 | { -1230 | // four -1231 | // five -1232 | // six -1233 | class B {} - | -1234 | // seven -1235 | c(); - | -1236 | // eight -1237 | function d() {} -1238 | } -1239 | ", -1240 | &[ -1241 | ( -1242 | 0, -1243 | vec![ -1244 | ("doc", "// four"), -1245 | ("doc", "// five"), -1246 | ("doc", "// six"), -1247 | ("name", "B"), -1248 | ], -1249 | ), -1250 | (1, vec![("doc", "// eight"), ("name", "d")]), -1251 | ], -1252 | ); -1253 | }); -1254 | } - | -1255 | #[test] -1256 | fn test_query_matches_with_optional_nodes_inside_of_repetitions() { -1257 | allocations::record(|| { -1258 | let language = get_language("javascript"); -1259 | let query = Query::new(&language, r#"(array (","? (number) @num)+)"#).unwrap(); - | -1260 | assert_query_matches( -1261 | &language, -1262 | &query, -1263 | r" -1264 | var a = [1, 2, 3, 4] -1265 | ", -1266 | &[( -1267 | 0, -1268 | vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")], -1269 | )], -1270 | ); -1271 | }); -1272 | } - | -1273 | #[test] -1274 | fn test_query_matches_with_top_level_repetitions() { -1275 | allocations::record(|| { -1276 | let language = get_language("javascript"); -1277 | let query = Query::new( -1278 | &language, -1279 | r" -1280 | (comment)+ @doc -1281 | ", -1282 | ) -1283 | .unwrap(); - | -1284 | assert_query_matches( -1285 | &language, -1286 | &query, -1287 | r" -1288 | // a -1289 | // b -1290 | // c - | -1291 | d() - | -1292 | // e -1293 | ", -1294 | &[ -1295 | (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]), -1296 | (0, vec![("doc", "// e")]), -1297 | ], -1298 | ); -1299 | }); -1300 | } - | -1301 | #[test] -1302 | fn test_query_matches_with_non_terminal_repetitions_within_root() { -1303 | allocations::record(|| { -1304 | let language = get_language("javascript"); -1305 | let query = Query::new(&language, "(_ (expression_statement (identifier) @id)+)").unwrap(); - | -1306 | assert_query_matches( -1307 | &language, -1308 | &query, -1309 | r" -1310 | function f() { -1311 | d; -1312 | e; -1313 | f; -1314 | g; -1315 | } -1316 | a; -1317 | b; -1318 | c; -1319 | ", -1320 | &[ -1321 | (0, vec![("id", "d"), ("id", "e"), ("id", "f"), ("id", "g")]), -1322 | (0, vec![("id", "a"), ("id", "b"), ("id", "c")]), -1323 | ], -1324 | ); -1325 | }); -1326 | } - | -1327 | #[test] -1328 | fn test_query_matches_with_nested_repetitions() { -1329 | allocations::record(|| { -1330 | let language = get_language("javascript"); -1331 | let query = Query::new( -1332 | &language, -1333 | r#" -1334 | (variable_declaration -1335 | (","? (variable_declarator name: (identifier) @x))+)+ -1336 | "#, -1337 | ) -1338 | .unwrap(); - | -1339 | assert_query_matches( -1340 | &language, -1341 | &query, -1342 | r" -1343 | var a = b, c, d -1344 | var e, f - | -1345 | // more -1346 | var g -1347 | ", -1348 | &[ -1349 | ( -1350 | 0, -1351 | vec![("x", "a"), ("x", "c"), ("x", "d"), ("x", "e"), ("x", "f")], -1352 | ), -1353 | (0, vec![("x", "g")]), -1354 | ], -1355 | ); -1356 | }); -1357 | } - | -1358 | #[test] -1359 | fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pattern() { -1360 | allocations::record(|| { -1361 | let language = get_language("javascript"); - | -1362 | // When this query sees a comment, it must keep track of several potential -1363 | // matches: up to two for each pattern that begins with a comment. -1364 | let query = Query::new( -1365 | &language, -1366 | r" -1367 | (call_expression -1368 | function: (member_expression -1369 | property: (property_identifier) @name)) @ref.method - | -1370 | ((comment)* @doc (function_declaration)) -1371 | ((comment)* @doc (generator_function_declaration)) -1372 | ((comment)* @doc (class_declaration)) -1373 | ((comment)* @doc (lexical_declaration)) -1374 | ((comment)* @doc (variable_declaration)) -1375 | ((comment)* @doc (method_definition)) - | -1376 | (comment) @comment -1377 | ", -1378 | ) -1379 | .unwrap(); - | -1380 | // Here, a series of comments occurs in the middle of a match of the first -1381 | // pattern. To avoid exceeding the storage limits and discarding that outer -1382 | // match, the comment-related matches need to be managed efficiently. -1383 | let source = format!( -1384 | "theObject\n{}\n.theMethod()", -1385 | " // the comment\n".repeat(64) -1386 | ); - | -1387 | assert_query_matches( -1388 | &language, -1389 | &query, -1390 | &source, -1391 | &vec![(7, vec![("comment", "// the comment")]); 64] -1392 | .into_iter() -1393 | .chain(vec![( -1394 | 0, -1395 | vec![("ref.method", source.as_str()), ("name", "theMethod")], -1396 | )]) -1397 | .collect::>(), -1398 | ); -1399 | }); -1400 | } - | -1401 | #[test] -1402 | fn test_query_matches_with_trailing_repetitions_of_last_child() { -1403 | allocations::record(|| { -1404 | let language = get_language("javascript"); - | -1405 | let query = Query::new( -1406 | &language, -1407 | " -1408 | (unary_expression (primary_expression)+ @operand) -1409 | ", -1410 | ) -1411 | .unwrap(); - | -1412 | assert_query_matches( -1413 | &language, -1414 | &query, -1415 | " -1416 | a = typeof (!b && ~c); -1417 | ", -1418 | &[ -1419 | (0, vec![("operand", "b")]), -1420 | (0, vec![("operand", "c")]), -1421 | (0, vec![("operand", "(!b && ~c)")]), -1422 | ], -1423 | ); -1424 | }); -1425 | } - | -1426 | #[test] -1427 | fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { -1428 | allocations::record(|| { -1429 | let language = get_language("javascript"); - | -1430 | let query = Query::new( -1431 | &language, -1432 | " -1433 | ( -1434 | (comment)* @doc -1435 | . -1436 | (function_declaration -1437 | name: (identifier) @name) -1438 | ) -1439 | ", -1440 | ) -1441 | .unwrap(); - | -1442 | assert_query_matches( -1443 | &language, -1444 | &query, -1445 | " -1446 | function a() { -1447 | // one -1448 | var b; - | -1449 | function c() {} - | -1450 | // two -1451 | // three -1452 | var d; - | -1453 | // four -1454 | // five -1455 | function e() { - | -1456 | } -1457 | } - | -1458 | // six -1459 | ", -1460 | &[ -1461 | (0, vec![("name", "a")]), -1462 | (0, vec![("name", "c")]), -1463 | ( -1464 | 0, -1465 | vec![("doc", "// four"), ("doc", "// five"), ("name", "e")], -1466 | ), -1467 | ], -1468 | ); -1469 | }); -1470 | } - | -1471 | #[test] -1472 | fn test_query_matches_with_trailing_optional_nodes() { -1473 | allocations::record(|| { -1474 | let language = get_language("javascript"); - | -1475 | let query = Query::new( -1476 | &language, -1477 | " -1478 | (class_declaration -1479 | name: (identifier) @class -1480 | (class_heritage -1481 | (identifier) @superclass)?) -1482 | ", -1483 | ) -1484 | .unwrap(); - | -1485 | assert_query_matches( -1486 | &language, -1487 | &query, -1488 | "class A {}", -1489 | &[(0, vec![("class", "A")])], -1490 | ); - | -1491 | assert_query_matches( -1492 | &language, -1493 | &query, -1494 | " -1495 | class A {} -1496 | class B extends C {} -1497 | class D extends (E.F) {} -1498 | ", -1499 | &[ -1500 | (0, vec![("class", "A")]), -1501 | (0, vec![("class", "B"), ("superclass", "C")]), -1502 | (0, vec![("class", "D")]), -1503 | ], -1504 | ); -1505 | }); -1506 | } - | -1507 | #[test] -1508 | fn test_query_matches_with_nested_optional_nodes() { -1509 | allocations::record(|| { -1510 | let language = get_language("javascript"); - | -1511 | // A function call, optionally containing a function call, which optionally contains a -1512 | // number -1513 | let query = Query::new( -1514 | &language, -1515 | " -1516 | (call_expression -1517 | function: (identifier) @outer-fn -1518 | arguments: (arguments -1519 | (call_expression -1520 | function: (identifier) @inner-fn -1521 | arguments: (arguments -1522 | (number)? @num))?)) -1523 | ", -1524 | ) -1525 | .unwrap(); - | -1526 | assert_query_matches( -1527 | &language, -1528 | &query, -1529 | r" -1530 | a(b, c(), d(null, 1, 2)) -1531 | e() -1532 | f(g()) -1533 | ", -1534 | &[ -1535 | (0, vec![("outer-fn", "a"), ("inner-fn", "c")]), -1536 | (0, vec![("outer-fn", "c")]), -1537 | (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "1")]), -1538 | (0, vec![("outer-fn", "a"), ("inner-fn", "d"), ("num", "2")]), -1539 | (0, vec![("outer-fn", "d")]), -1540 | (0, vec![("outer-fn", "e")]), -1541 | (0, vec![("outer-fn", "f"), ("inner-fn", "g")]), -1542 | (0, vec![("outer-fn", "g")]), -1543 | ], -1544 | ); -1545 | }); -1546 | } - | -1547 | #[test] -1548 | fn test_query_matches_with_repeated_internal_nodes() { -1549 | allocations::record(|| { -1550 | let language = get_language("javascript"); -1551 | let query = Query::new( -1552 | &language, -1553 | " -1554 | (_ -1555 | (method_definition -1556 | (decorator (identifier) @deco)+ -1557 | name: (property_identifier) @name)) -1558 | ", -1559 | ) -1560 | .unwrap(); - | -1561 | assert_query_matches( -1562 | &language, -1563 | &query, -1564 | " -1565 | class A { -1566 | @c -1567 | @d -1568 | e() {} -1569 | } -1570 | ", -1571 | &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])], -1572 | ); -1573 | }); -1574 | } - | -1575 | #[test] -1576 | fn test_query_matches_with_simple_alternatives() { -1577 | allocations::record(|| { -1578 | let language = get_language("javascript"); -1579 | let query = Query::new( -1580 | &language, -1581 | " -1582 | (pair -1583 | key: [(property_identifier) (string)] @key -1584 | value: [(function_expression) @val1 (arrow_function) @val2]) -1585 | ", -1586 | ) -1587 | .unwrap(); - | -1588 | assert_query_matches( -1589 | &language, -1590 | &query, -1591 | " -1592 | a = { -1593 | b: c, -1594 | 'd': e => f, -1595 | g: { -1596 | h: function i() {}, -1597 | 'x': null, -1598 | j: _ => k -1599 | }, -1600 | 'l': function m() {}, -1601 | }; -1602 | ", -1603 | &[ -1604 | (0, vec![("key", "'d'"), ("val2", "e => f")]), -1605 | (0, vec![("key", "h"), ("val1", "function i() {}")]), -1606 | (0, vec![("key", "j"), ("val2", "_ => k")]), -1607 | (0, vec![("key", "'l'"), ("val1", "function m() {}")]), -1608 | ], -1609 | ); -1610 | }); -1611 | } - | -1612 | #[test] -1613 | fn test_query_matches_with_alternatives_in_repetitions() { -1614 | allocations::record(|| { -1615 | let language = get_language("javascript"); -1616 | let query = Query::new( -1617 | &language, -1618 | r#" -1619 | (array -1620 | [(identifier) (string)] @el -1621 | . -1622 | ( -1623 | "," -1624 | . -1625 | [(identifier) (string)] @el -1626 | )*) -1627 | "#, -1628 | ) -1629 | .unwrap(); - | -1630 | assert_query_matches( -1631 | &language, -1632 | &query, -1633 | " -1634 | a = [b, 'c', d, 1, e, 'f', 'g', h]; -1635 | ", -1636 | &[ -1637 | (0, vec![("el", "b"), ("el", "'c'"), ("el", "d")]), -1638 | ( -1639 | 0, -1640 | vec![("el", "e"), ("el", "'f'"), ("el", "'g'"), ("el", "h")], -1641 | ), -1642 | ], -1643 | ); -1644 | }); -1645 | } - | -1646 | #[test] -1647 | fn test_query_matches_with_alternatives_at_root() { -1648 | allocations::record(|| { -1649 | let language = get_language("javascript"); -1650 | let query = Query::new( -1651 | &language, -1652 | r#" -1653 | [ -1654 | "if" -1655 | "else" -1656 | "function" -1657 | "throw" -1658 | "return" -1659 | ] @keyword -1660 | "#, -1661 | ) -1662 | .unwrap(); - | -1663 | assert_query_matches( -1664 | &language, -1665 | &query, -1666 | " -1667 | function a(b, c, d) { -1668 | if (b) { -1669 | return c; -1670 | } else { -1671 | throw d; -1672 | } -1673 | } -1674 | ", -1675 | &[ -1676 | (0, vec![("keyword", "function")]), -1677 | (0, vec![("keyword", "if")]), -1678 | (0, vec![("keyword", "return")]), -1679 | (0, vec![("keyword", "else")]), -1680 | (0, vec![("keyword", "throw")]), -1681 | ], -1682 | ); -1683 | }); -1684 | } - | -1685 | #[test] -1686 | fn test_query_matches_with_alternatives_under_fields() { -1687 | allocations::record(|| { -1688 | let language = get_language("javascript"); -1689 | let query = Query::new( -1690 | &language, -1691 | r" -1692 | (assignment_expression -1693 | left: [ -1694 | (identifier) @variable -1695 | (member_expression property: (property_identifier) @variable) -1696 | ]) -1697 | ", -1698 | ) -1699 | .unwrap(); - | -1700 | assert_query_matches( -1701 | &language, -1702 | &query, -1703 | " -1704 | a = b; -1705 | b = c.d; -1706 | e.f = g; -1707 | h.i = j.k; -1708 | ", -1709 | &[ -1710 | (0, vec![("variable", "a")]), -1711 | (0, vec![("variable", "b")]), -1712 | (0, vec![("variable", "f")]), -1713 | (0, vec![("variable", "i")]), -1714 | ], -1715 | ); -1716 | }); -1717 | } - | -1718 | #[test] -1719 | fn test_query_matches_in_language_with_simple_aliases() { -1720 | allocations::record(|| { -1721 | let language = get_language("html"); - | -1722 | // HTML uses different tokens to track start tags names, end -1723 | // tag names, script tag names, and style tag names. All of -1724 | // these tokens are aliased to `tag_name`. -1725 | let query = Query::new(&language, "(tag_name) @tag").unwrap(); - | -1726 | assert_query_matches( -1727 | &language, -1728 | &query, -1729 | " -1730 |
-1731 | -1732 | -1733 |
-1734 | ", -1735 | &[ -1736 | (0, vec![("tag", "div")]), -1737 | (0, vec![("tag", "script")]), -1738 | (0, vec![("tag", "script")]), -1739 | (0, vec![("tag", "style")]), -1740 | (0, vec![("tag", "style")]), -1741 | (0, vec![("tag", "div")]), -1742 | ], -1743 | ); -1744 | }); -1745 | } - | -1746 | #[test] -1747 | fn test_query_matches_with_different_tokens_with_the_same_string_value() { -1748 | allocations::record(|| { -1749 | // In Rust, there are two '<' tokens: one for the binary operator, -1750 | // and one with higher precedence for generics. -1751 | let language = get_language("rust"); -1752 | let query = Query::new( -1753 | &language, -1754 | r#" -1755 | "<" @less -1756 | ">" @greater -1757 | "#, -1758 | ) -1759 | .unwrap(); - | -1760 | assert_query_matches( -1761 | &language, -1762 | &query, -1763 | "const A: B = d < e || f > g;", -1764 | &[ -1765 | (0, vec![("less", "<")]), -1766 | (1, vec![("greater", ">")]), -1767 | (0, vec![("less", "<")]), -1768 | (1, vec![("greater", ">")]), -1769 | ], -1770 | ); -1771 | }); -1772 | } - | -1773 | #[test] -1774 | fn test_query_matches_with_too_many_permutations_to_track() { -1775 | allocations::record(|| { -1776 | let language = get_language("javascript"); -1777 | let query = Query::new( -1778 | &language, -1779 | " -1780 | (array (identifier) @pre (identifier) @post) -1781 | ", -1782 | ) -1783 | .unwrap(); - | -1784 | let mut source = "hello, ".repeat(50); -1785 | source.insert(0, '['); -1786 | source.push_str("];"); - | -1787 | let mut parser = Parser::new(); -1788 | parser.set_language(&language).unwrap(); -1789 | let tree = parser.parse(&source, None).unwrap(); -1790 | let mut cursor = QueryCursor::new(); -1791 | cursor.set_match_limit(32); -1792 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - | -1793 | // For this pathological query, some match permutations will be dropped. -1794 | // Just check that a subset of the results are returned, and crash or -1795 | // leak occurs. -1796 | assert_eq!( -1797 | collect_matches(matches, &query, source.as_str())[0], -1798 | (0, vec![("pre", "hello"), ("post", "hello")]), -1799 | ); -1800 | assert!(cursor.did_exceed_match_limit()); -1801 | }); -1802 | } - | -1803 | #[test] -1804 | fn test_query_sibling_patterns_dont_match_children_of_an_error() { -1805 | allocations::record(|| { -1806 | let language = get_language("rust"); -1807 | let query = Query::new( -1808 | &language, -1809 | r#" -1810 | ("{" @open "}" @close) - | -1811 | [ -1812 | (line_comment) -1813 | (block_comment) -1814 | ] @comment - | -1815 | ("<" @first "<" @second) -1816 | "#, -1817 | ) -1818 | .unwrap(); - | -1819 | // Most of the document will fail to parse, resulting in a -1820 | // large number of tokens that are *direct* children of an -1821 | // ERROR node. -1822 | // -1823 | // These children should still match, unless they are part -1824 | // of a "non-rooted" pattern, in which there are multiple -1825 | // top-level sibling nodes. Those patterns should not match -1826 | // directly inside of an error node, because the contents of -1827 | // an error node are not syntactically well-structured, so we -1828 | // would get many spurious matches. -1829 | let source = " -1830 | fn a() {} - | -1831 | <<<<<<<<<< add pub b fn () {} -1832 | // comment 1 -1833 | pub fn b() { -1834 | /* comment 2 */ -1835 | ========== -1836 | pub fn c() { -1837 | // comment 3 -1838 | >>>>>>>>>> add pub c fn () {} -1839 | } -1840 | "; - | -1841 | let mut parser = Parser::new(); -1842 | parser.set_language(&language).unwrap(); -1843 | let tree = parser.parse(source, None).unwrap(); -1844 | let mut cursor = QueryCursor::new(); -1845 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -1846 | assert_eq!( -1847 | collect_matches(matches, &query, source), -1848 | &[ -1849 | (0, vec![("open", "{"), ("close", "}")]), -1850 | (1, vec![("comment", "// comment 1")]), -1851 | (1, vec![("comment", "/* comment 2 */")]), -1852 | (1, vec![("comment", "// comment 3")]), -1853 | ], -1854 | ); -1855 | }); -1856 | } - | -1857 | #[test] -1858 | fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { -1859 | allocations::record(|| { -1860 | let language = get_language("javascript"); -1861 | let query = Query::new( -1862 | &language, -1863 | " -1864 | ( -1865 | (comment) @doc -1866 | ; not immediate -1867 | (class_declaration) @class -1868 | ) - | -1869 | (call_expression -1870 | function: [ -1871 | (identifier) @function -1872 | (member_expression property: (property_identifier) @method) -1873 | ]) -1874 | ", -1875 | ) -1876 | .unwrap(); - | -1877 | let source = "/* hi */ a.b(); ".repeat(50); - | -1878 | let mut parser = Parser::new(); -1879 | parser.set_language(&language).unwrap(); -1880 | let tree = parser.parse(&source, None).unwrap(); -1881 | let mut cursor = QueryCursor::new(); -1882 | cursor.set_match_limit(32); -1883 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - | -1884 | assert_eq!( -1885 | collect_matches(matches, &query, source.as_str()), -1886 | vec![(1, vec![("method", "b")]); 50], -1887 | ); -1888 | assert!(cursor.did_exceed_match_limit()); -1889 | }); -1890 | } - | -1891 | #[test] -1892 | fn test_repetitions_before_with_alternatives() { -1893 | allocations::record(|| { -1894 | let language = get_language("rust"); -1895 | let query = Query::new( -1896 | &language, -1897 | r" -1898 | ( -1899 | (line_comment)* @comment -1900 | . -1901 | [ -1902 | (struct_item name: (_) @name) -1903 | (function_item name: (_) @name) -1904 | (enum_item name: (_) @name) -1905 | (impl_item type: (_) @name) -1906 | ] -1907 | ) -1908 | ", -1909 | ) -1910 | .unwrap(); - | -1911 | assert_query_matches( -1912 | &language, -1913 | &query, -1914 | r" -1915 | // a -1916 | // b -1917 | fn c() {} - | -1918 | // d -1919 | // e -1920 | impl F {} -1921 | ", -1922 | &[ -1923 | ( -1924 | 0, -1925 | vec![("comment", "// a"), ("comment", "// b"), ("name", "c")], -1926 | ), -1927 | ( -1928 | 0, -1929 | vec![("comment", "// d"), ("comment", "// e"), ("name", "F")], -1930 | ), -1931 | ], -1932 | ); -1933 | }); -1934 | } - | -1935 | #[test] -1936 | fn test_query_matches_with_anonymous_tokens() { -1937 | allocations::record(|| { -1938 | let language = get_language("javascript"); -1939 | let query = Query::new( -1940 | &language, -1941 | r#" -1942 | ";" @punctuation -1943 | "&&" @operator -1944 | "\"" @quote -1945 | "#, -1946 | ) -1947 | .unwrap(); - | -1948 | assert_query_matches( -1949 | &language, -1950 | &query, -1951 | r#"foo(a && "b");"#, -1952 | &[ -1953 | (1, vec![("operator", "&&")]), -1954 | (2, vec![("quote", "\"")]), -1955 | (2, vec![("quote", "\"")]), -1956 | (0, vec![("punctuation", ";")]), -1957 | ], -1958 | ); -1959 | }); -1960 | } - | -1961 | #[test] -1962 | fn test_query_matches_with_supertypes() { -1963 | allocations::record(|| { -1964 | let language = get_language("python"); -1965 | let query = Query::new( -1966 | &language, -1967 | r" -1968 | (argument_list (expression) @arg) - | -1969 | (keyword_argument -1970 | value: (expression) @kw_arg) - | -1971 | (assignment -1972 | left: (identifier) @var_def) - | -1973 | (primary_expression/identifier) @var_ref -1974 | ", -1975 | ) -1976 | .unwrap(); - | -1977 | assert_query_matches( -1978 | &language, -1979 | &query, -1980 | " -1981 | a = b.c( -1982 | [d], -1983 | # a comment -1984 | e=f -1985 | ) -1986 | ", -1987 | &[ -1988 | (2, vec![("var_def", "a")]), -1989 | (3, vec![("var_ref", "b")]), -1990 | (0, vec![("arg", "[d]")]), -1991 | (3, vec![("var_ref", "d")]), -1992 | (1, vec![("kw_arg", "f")]), -1993 | (3, vec![("var_ref", "f")]), -1994 | ], -1995 | ); -1996 | }); -1997 | } - | -1998 | #[test] -1999 | #[allow(clippy::reversed_empty_ranges)] -2000 | fn test_query_matches_within_byte_range() { -2001 | allocations::record(|| { -2002 | let language = get_language("javascript"); -2003 | let query = Query::new(&language, "(identifier) @element").unwrap(); - | -2004 | let source = "[a, b, c, d, e, f, g]"; - | -2005 | let mut parser = Parser::new(); -2006 | parser.set_language(&language).unwrap(); -2007 | let tree = parser.parse(source, None).unwrap(); - | -2008 | let mut cursor = QueryCursor::new(); - | -2009 | let matches = -2010 | cursor -2011 | .set_byte_range(0..8) -2012 | .matches(&query, tree.root_node(), source.as_bytes()); -2013 | assert_eq!( -2014 | collect_matches(matches, &query, source), -2015 | &[ -2016 | (0, vec![("element", "a")]), -2017 | (0, vec![("element", "b")]), -2018 | (0, vec![("element", "c")]), -2019 | ] -2020 | ); - | -2021 | let matches = -2022 | cursor -2023 | .set_byte_range(5..15) -2024 | .matches(&query, tree.root_node(), source.as_bytes()); -2025 | assert_eq!( -2026 | collect_matches(matches, &query, source), -2027 | &[ -2028 | (0, vec![("element", "c")]), -2029 | (0, vec![("element", "d")]), -2030 | (0, vec![("element", "e")]), -2031 | ] -2032 | ); - | -2033 | // An end byte of zero indicates there is no end -2034 | let matches = -2035 | cursor -2036 | .set_byte_range(12..0) -2037 | .matches(&query, tree.root_node(), source.as_bytes()); -2038 | assert_eq!( -2039 | collect_matches(matches, &query, source), -2040 | &[ -2041 | (0, vec![("element", "e")]), -2042 | (0, vec![("element", "f")]), -2043 | (0, vec![("element", "g")]), -2044 | ] -2045 | ); -2046 | }); -2047 | } - | -2048 | #[test] -2049 | fn test_query_matches_within_point_range() { -2050 | allocations::record(|| { -2051 | let language = get_language("javascript"); -2052 | let query = Query::new(&language, "(identifier) @element").unwrap(); - | -2053 | let source = " -2054 | [ -2055 | a, b, -2056 | c, d, -2057 | e, f, -2058 | g, h, -2059 | i, j, -2060 | k, l, -2061 | ] -2062 | " -2063 | .unindent(); - | -2064 | let mut parser = Parser::new(); -2065 | parser.set_language(&language).unwrap(); -2066 | let tree = parser.parse(&source, None).unwrap(); -2067 | let mut cursor = QueryCursor::new(); - | -2068 | let matches = cursor -2069 | .set_point_range(Point::new(1, 0)..Point::new(2, 3)) -2070 | .matches(&query, tree.root_node(), source.as_bytes()); -2071 | assert_eq!( -2072 | collect_matches(matches, &query, &source), -2073 | &[ -2074 | (0, vec![("element", "a")]), -2075 | (0, vec![("element", "b")]), -2076 | (0, vec![("element", "c")]), -2077 | ] -2078 | ); - | -2079 | let matches = cursor -2080 | .set_point_range(Point::new(2, 0)..Point::new(3, 3)) -2081 | .matches(&query, tree.root_node(), source.as_bytes()); -2082 | assert_eq!( -2083 | collect_matches(matches, &query, &source), -2084 | &[ -2085 | (0, vec![("element", "c")]), -2086 | (0, vec![("element", "d")]), -2087 | (0, vec![("element", "e")]), -2088 | ] -2089 | ); - | -2090 | // Zero end point is treated like no end point. -2091 | let matches = cursor -2092 | .set_point_range(Point::new(4, 1)..Point::new(0, 0)) -2093 | .matches(&query, tree.root_node(), source.as_bytes()); -2094 | assert_eq!( -2095 | collect_matches(matches, &query, &source), -2096 | &[ -2097 | (0, vec![("element", "g")]), -2098 | (0, vec![("element", "h")]), -2099 | (0, vec![("element", "i")]), -2100 | (0, vec![("element", "j")]), -2101 | (0, vec![("element", "k")]), -2102 | (0, vec![("element", "l")]), -2103 | ] -2104 | ); -2105 | }); -2106 | } - | -2107 | #[test] -2108 | fn test_query_captures_within_byte_range() { -2109 | allocations::record(|| { -2110 | let language = get_language("c"); -2111 | let query = Query::new( -2112 | &language, -2113 | " -2114 | (call_expression -2115 | function: (identifier) @function -2116 | arguments: (argument_list (string_literal) @string.arg)) - | -2117 | (string_literal) @string -2118 | ", -2119 | ) -2120 | .unwrap(); - | -2121 | let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#; - | -2122 | let mut parser = Parser::new(); -2123 | parser.set_language(&language).unwrap(); -2124 | let tree = parser.parse(source, None).unwrap(); - | -2125 | let mut cursor = QueryCursor::new(); -2126 | let captures = -2127 | cursor -2128 | .set_byte_range(3..27) -2129 | .captures(&query, tree.root_node(), source.as_bytes()); - | -2130 | assert_eq!( -2131 | collect_captures(captures, &query, source), -2132 | &[ -2133 | ("function", "DEFUN"), -2134 | ("string.arg", "\"safe-length\""), -2135 | ("string", "\"safe-length\""), -2136 | ] -2137 | ); -2138 | }); -2139 | } - | -2140 | #[test] -2141 | fn test_query_cursor_next_capture_with_byte_range() { -2142 | allocations::record(|| { -2143 | let language = get_language("python"); -2144 | let query = Query::new( -2145 | &language, -2146 | "(function_definition name: (identifier) @function) -2147 | (attribute attribute: (identifier) @property) -2148 | ((identifier) @variable)", -2149 | ) -2150 | .unwrap(); - | -2151 | let source = "def func():\n foo.bar.baz()\n"; -2152 | // ^ ^ ^ ^ -2153 | // byte_pos 0 12 17 27 -2154 | // point_pos (0,0) (1,0) (1,5) (1,15) - | -2155 | let mut parser = Parser::new(); -2156 | parser.set_language(&language).unwrap(); -2157 | let tree = parser.parse(source, None).unwrap(); - | -2158 | let mut cursor = QueryCursor::new(); -2159 | let captures = -2160 | cursor -2161 | .set_byte_range(12..17) -2162 | .captures(&query, tree.root_node(), source.as_bytes()); - | -2163 | assert_eq!( -2164 | collect_captures(captures, &query, source), -2165 | &[("variable", "foo"),] -2166 | ); -2167 | }); -2168 | } - | -2169 | #[test] -2170 | fn test_query_cursor_next_capture_with_point_range() { -2171 | allocations::record(|| { -2172 | let language = get_language("python"); -2173 | let query = Query::new( -2174 | &language, -2175 | "(function_definition name: (identifier) @function) -2176 | (attribute attribute: (identifier) @property) -2177 | ((identifier) @variable)", -2178 | ) -2179 | .unwrap(); - | -2180 | let source = "def func():\n foo.bar.baz()\n"; -2181 | // ^ ^ ^ ^ -2182 | // byte_pos 0 12 17 27 -2183 | // point_pos (0,0) (1,0) (1,5) (1,15) - | -2184 | let mut parser = Parser::new(); -2185 | parser.set_language(&language).unwrap(); -2186 | let tree = parser.parse(source, None).unwrap(); - | -2187 | let mut cursor = QueryCursor::new(); -2188 | let captures = cursor -2189 | .set_point_range(Point::new(1, 0)..Point::new(1, 5)) -2190 | .captures(&query, tree.root_node(), source.as_bytes()); - | -2191 | assert_eq!( -2192 | collect_captures(captures, &query, source), -2193 | &[("variable", "foo"),] -2194 | ); -2195 | }); -2196 | } - | -2197 | #[test] -2198 | fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { -2199 | allocations::record(|| { -2200 | let language = get_language("rust"); -2201 | let query = Query::new( -2202 | &language, -2203 | r#" -2204 | ("{" @left "}" @right) -2205 | ("<" @left ">" @right) -2206 | "#, -2207 | ) -2208 | .unwrap(); - | -2209 | let source = "mod a { fn a(f: B) { g(f) } }"; - | -2210 | let mut parser = Parser::new(); -2211 | parser.set_language(&language).unwrap(); -2212 | let tree = parser.parse(source, None).unwrap(); -2213 | let mut cursor = QueryCursor::new(); - | -2214 | // within the type parameter list -2215 | let offset = source.find("D: E>").unwrap(); -2216 | let matches = cursor.set_byte_range(offset..offset).matches( -2217 | &query, -2218 | tree.root_node(), -2219 | source.as_bytes(), -2220 | ); -2221 | assert_eq!( -2222 | collect_matches(matches, &query, source), -2223 | &[ -2224 | (1, vec![("left", "<"), ("right", ">")]), -2225 | (0, vec![("left", "{"), ("right", "}")]), -2226 | ] -2227 | ); - | -2228 | // from within the type parameter list to within the function body -2229 | let start_offset = source.find("D: E>").unwrap(); -2230 | let end_offset = source.find("g(f)").unwrap(); -2231 | let matches = cursor.set_byte_range(start_offset..end_offset).matches( -2232 | &query, -2233 | tree.root_node(), -2234 | source.as_bytes(), -2235 | ); -2236 | assert_eq!( -2237 | collect_matches(matches, &query, source), -2238 | &[ -2239 | (1, vec![("left", "<"), ("right", ">")]), -2240 | (0, vec![("left", "{"), ("right", "}")]), -2241 | (0, vec![("left", "{"), ("right", "}")]), -2242 | ] -2243 | ); -2244 | }); -2245 | } - | -2246 | #[test] -2247 | fn test_query_matches_with_wildcard_at_root_intersecting_byte_range() { -2248 | allocations::record(|| { -2249 | let language = get_language("python"); -2250 | let query = Query::new( -2251 | &language, -2252 | " -2253 | [ -2254 | (_ body: (block)) -2255 | (_ consequence: (block)) -2256 | ] @indent -2257 | ", -2258 | ) -2259 | .unwrap(); - | -2260 | let source = " -2261 | class A: -2262 | def b(): -2263 | if c: -2264 | d -2265 | else: -2266 | e -2267 | " -2268 | .trim(); - | -2269 | let mut parser = Parser::new(); -2270 | parser.set_language(&language).unwrap(); -2271 | let tree = parser.parse(source, None).unwrap(); -2272 | let mut cursor = QueryCursor::new(); - | -2273 | // After the first line of the class definition -2274 | let offset = source.find("A:").unwrap() + 2; -2275 | let mut matches = Vec::new(); -2276 | let mut match_iter = cursor.set_byte_range(offset..offset).matches( -2277 | &query, -2278 | tree.root_node(), -2279 | source.as_bytes(), -2280 | ); - | -2281 | while let Some(mat) = match_iter.next() { -2282 | if let Some(capture) = mat.captures.first() { -2283 | matches.push(capture.node.kind()); -2284 | } -2285 | } -2286 | assert_eq!(matches, &["class_definition"]); - | -2287 | // After the first line of the function definition -2288 | let offset = source.find("b():").unwrap() + 4; -2289 | let mut matches = Vec::new(); -2290 | let mut match_iter = cursor.set_byte_range(offset..offset).matches( -2291 | &query, -2292 | tree.root_node(), -2293 | source.as_bytes(), -2294 | ); - | -2295 | while let Some(mat) = match_iter.next() { -2296 | if let Some(capture) = mat.captures.first() { -2297 | matches.push(capture.node.kind()); -2298 | } -2299 | } -2300 | assert_eq!(matches, &["class_definition", "function_definition"]); - | -2301 | // After the first line of the if statement -2302 | let offset = source.find("c:").unwrap() + 2; -2303 | let mut matches = Vec::new(); -2304 | let mut match_iter = cursor.set_byte_range(offset..offset).matches( -2305 | &query, -2306 | tree.root_node(), -2307 | source.as_bytes(), -2308 | ); - | -2309 | while let Some(mat) = match_iter.next() { -2310 | if let Some(capture) = mat.captures.first() { -2311 | matches.push(capture.node.kind()); -2312 | } -2313 | } -2314 | assert_eq!( -2315 | matches, -2316 | &["class_definition", "function_definition", "if_statement"] -2317 | ); -2318 | }); -2319 | } - | -2320 | #[test] -2321 | fn test_query_captures_within_byte_range_assigned_after_iterating() { -2322 | allocations::record(|| { -2323 | let language = get_language("rust"); -2324 | let query = Query::new( -2325 | &language, -2326 | r#" -2327 | (function_item -2328 | name: (identifier) @fn_name) - | -2329 | (mod_item -2330 | name: (identifier) @mod_name -2331 | body: (declaration_list -2332 | "{" @lbrace -2333 | "}" @rbrace)) - | -2334 | ; functions that return Result<()> -2335 | ((function_item -2336 | return_type: (generic_type -2337 | type: (type_identifier) @result -2338 | type_arguments: (type_arguments -2339 | (unit_type))) -2340 | body: _ @fallible_fn_body) -2341 | (#eq? @result "Result")) -2342 | "#, -2343 | ) -2344 | .unwrap(); -2345 | let source = " -2346 | mod m1 { -2347 | mod m2 { -2348 | fn f1() -> Option<()> { Some(()) } -2349 | } -2350 | fn f2() -> Result<()> { Ok(()) } -2351 | fn f3() {} -2352 | } -2353 | "; - | -2354 | let mut parser = Parser::new(); -2355 | parser.set_language(&language).unwrap(); -2356 | let tree = parser.parse(source, None).unwrap(); -2357 | let mut cursor = QueryCursor::new(); -2358 | let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); - | -2359 | // Retrieve some captures -2360 | let mut results = Vec::new(); -2361 | let mut first_five = captures.by_ref().take(5); -2362 | while let Some((mat, capture_ix)) = first_five.next() { -2363 | let capture = mat.captures[*capture_ix]; -2364 | results.push(( -2365 | query.capture_names()[capture.index as usize], -2366 | &source[capture.node.byte_range()], -2367 | )); -2368 | } -2369 | assert_eq!( -2370 | results, -2371 | vec![ -2372 | ("mod_name", "m1"), -2373 | ("lbrace", "{"), -2374 | ("mod_name", "m2"), -2375 | ("lbrace", "{"), -2376 | ("fn_name", "f1"), -2377 | ] -2378 | ); - | -2379 | // Advance to a range that only partially intersects some matches. -2380 | // Captures from these matches are reported, but only those that -2381 | // intersect the range. -2382 | results.clear(); -2383 | captures.set_byte_range(source.find("Ok").unwrap()..source.len()); -2384 | while let Some((mat, capture_ix)) = captures.next() { -2385 | let capture = mat.captures[*capture_ix]; -2386 | results.push(( -2387 | query.capture_names()[capture.index as usize], -2388 | &source[capture.node.byte_range()], -2389 | )); -2390 | } -2391 | assert_eq!( -2392 | results, -2393 | vec![ -2394 | ("fallible_fn_body", "{ Ok(()) }"), -2395 | ("fn_name", "f3"), -2396 | ("rbrace", "}") -2397 | ] -2398 | ); -2399 | }); -2400 | } - | -2401 | #[test] -2402 | fn test_query_matches_within_range_of_long_repetition() { -2403 | allocations::record(|| { -2404 | let language = get_language("rust"); -2405 | let query = Query::new( -2406 | &language, -2407 | " -2408 | (function_item name: (identifier) @fn-name) -2409 | ", -2410 | ) -2411 | .unwrap(); - | -2412 | let source = " -2413 | fn zero() {} -2414 | fn one() {} -2415 | fn two() {} -2416 | fn three() {} -2417 | fn four() {} -2418 | fn five() {} -2419 | fn six() {} -2420 | fn seven() {} -2421 | fn eight() {} -2422 | fn nine() {} -2423 | fn ten() {} -2424 | fn eleven() {} -2425 | fn twelve() {} -2426 | " -2427 | .unindent(); - | -2428 | let mut parser = Parser::new(); -2429 | let mut cursor = QueryCursor::new(); - | -2430 | parser.set_language(&language).unwrap(); -2431 | let tree = parser.parse(&source, None).unwrap(); - | -2432 | let matches = cursor -2433 | .set_point_range(Point::new(8, 0)..Point::new(20, 0)) -2434 | .matches(&query, tree.root_node(), source.as_bytes()); -2435 | assert_eq!( -2436 | collect_matches(matches, &query, &source), -2437 | &[ -2438 | (0, vec![("fn-name", "eight")]), -2439 | (0, vec![("fn-name", "nine")]), -2440 | (0, vec![("fn-name", "ten")]), -2441 | (0, vec![("fn-name", "eleven")]), -2442 | (0, vec![("fn-name", "twelve")]), -2443 | ] -2444 | ); -2445 | }); -2446 | } - | -2447 | #[test] -2448 | fn test_query_matches_different_queries_same_cursor() { -2449 | allocations::record(|| { -2450 | let language = get_language("javascript"); -2451 | let query1 = Query::new( -2452 | &language, -2453 | " -2454 | (array (identifier) @id1) -2455 | ", -2456 | ) -2457 | .unwrap(); -2458 | let query2 = Query::new( -2459 | &language, -2460 | " -2461 | (array (identifier) @id1) -2462 | (pair (identifier) @id2) -2463 | ", -2464 | ) -2465 | .unwrap(); -2466 | let query3 = Query::new( -2467 | &language, -2468 | " -2469 | (array (identifier) @id1) -2470 | (pair (identifier) @id2) -2471 | (parenthesized_expression (identifier) @id3) -2472 | ", -2473 | ) -2474 | .unwrap(); - | -2475 | let source = "[a, {b: b}, (c)];"; - | -2476 | let mut parser = Parser::new(); -2477 | let mut cursor = QueryCursor::new(); - | -2478 | parser.set_language(&language).unwrap(); -2479 | let tree = parser.parse(source, None).unwrap(); - | -2480 | let matches = cursor.matches(&query1, tree.root_node(), source.as_bytes()); -2481 | assert_eq!( -2482 | collect_matches(matches, &query1, source), -2483 | &[(0, vec![("id1", "a")]),] -2484 | ); - | -2485 | let matches = cursor.matches(&query3, tree.root_node(), source.as_bytes()); -2486 | assert_eq!( -2487 | collect_matches(matches, &query3, source), -2488 | &[ -2489 | (0, vec![("id1", "a")]), -2490 | (1, vec![("id2", "b")]), -2491 | (2, vec![("id3", "c")]), -2492 | ] -2493 | ); - | -2494 | let matches = cursor.matches(&query2, tree.root_node(), source.as_bytes()); -2495 | assert_eq!( -2496 | collect_matches(matches, &query2, source), -2497 | &[(0, vec![("id1", "a")]), (1, vec![("id2", "b")]),] -2498 | ); -2499 | }); -2500 | } - | -2501 | #[test] -2502 | fn test_query_matches_with_multiple_captures_on_a_node() { -2503 | allocations::record(|| { -2504 | let language = get_language("javascript"); -2505 | let mut query = Query::new( -2506 | &language, -2507 | "(function_declaration -2508 | (identifier) @name1 @name2 @name3 -2509 | (statement_block) @body1 @body2)", -2510 | ) -2511 | .unwrap(); - | -2512 | let source = "function foo() { return 1; }"; -2513 | let mut parser = Parser::new(); -2514 | let mut cursor = QueryCursor::new(); - | -2515 | parser.set_language(&language).unwrap(); -2516 | let tree = parser.parse(source, None).unwrap(); - | -2517 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -2518 | assert_eq!( -2519 | collect_matches(matches, &query, source), -2520 | &[( -2521 | 0, -2522 | vec![ -2523 | ("name1", "foo"), -2524 | ("name2", "foo"), -2525 | ("name3", "foo"), -2526 | ("body1", "{ return 1; }"), -2527 | ("body2", "{ return 1; }"), -2528 | ] -2529 | ),] -2530 | ); - | -2531 | // disabling captures still works when there are multiple captures on a -2532 | // single node. -2533 | query.disable_capture("name2"); -2534 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -2535 | assert_eq!( -2536 | collect_matches(matches, &query, source), -2537 | &[( -2538 | 0, -2539 | vec![ -2540 | ("name1", "foo"), -2541 | ("name3", "foo"), -2542 | ("body1", "{ return 1; }"), -2543 | ("body2", "{ return 1; }"), -2544 | ] -2545 | ),] -2546 | ); -2547 | }); -2548 | } - | -2549 | #[test] -2550 | fn test_query_matches_with_captured_wildcard_at_root() { -2551 | allocations::record(|| { -2552 | let language = get_language("python"); -2553 | let query = Query::new( -2554 | &language, -2555 | " -2556 | ; captured wildcard at the root -2557 | (_ [ -2558 | (except_clause (block) @block) -2559 | (finally_clause (block) @block) -2560 | ]) @stmt - | -2561 | [ -2562 | (while_statement (block) @block) -2563 | (if_statement (block) @block) - | -2564 | ; captured wildcard at the root within an alternation -2565 | (_ [ -2566 | (else_clause (block) @block) -2567 | (elif_clause (block) @block) -2568 | ]) - | -2569 | (try_statement (block) @block) -2570 | (for_statement (block) @block) -2571 | ] @stmt -2572 | ", -2573 | ) -2574 | .unwrap(); - | -2575 | let source = " -2576 | for i in j: -2577 | while True: -2578 | if a: -2579 | print b -2580 | elif c: -2581 | print d -2582 | else: -2583 | try: -2584 | print f -2585 | except: -2586 | print g -2587 | finally: -2588 | print h -2589 | else: -2590 | print i -2591 | " -2592 | .trim(); - | -2593 | let mut parser = Parser::new(); -2594 | let mut cursor = QueryCursor::new(); -2595 | parser.set_language(&language).unwrap(); -2596 | let tree = parser.parse(source, None).unwrap(); - | -2597 | let mut match_capture_names_and_rows = Vec::new(); -2598 | let mut match_iter = cursor.matches(&query, tree.root_node(), source.as_bytes()); - | -2599 | while let Some(m) = match_iter.next() { -2600 | let captures = m -2601 | .captures -2602 | .iter() -2603 | .map(|c| { -2604 | ( -2605 | query.capture_names()[c.index as usize], -2606 | c.node.kind(), -2607 | c.node.start_position().row, -2608 | ) -2609 | }) -2610 | .collect::>(); -2611 | match_capture_names_and_rows.push(captures); -2612 | } - | -2613 | assert_eq!( -2614 | match_capture_names_and_rows, -2615 | &[ -2616 | vec![("stmt", "for_statement", 0), ("block", "block", 1)], -2617 | vec![("stmt", "while_statement", 1), ("block", "block", 2)], -2618 | vec![("stmt", "if_statement", 2), ("block", "block", 3)], -2619 | vec![("stmt", "if_statement", 2), ("block", "block", 5)], -2620 | vec![("stmt", "if_statement", 2), ("block", "block", 7)], -2621 | vec![("stmt", "try_statement", 7), ("block", "block", 8)], -2622 | vec![("stmt", "try_statement", 7), ("block", "block", 10)], -2623 | vec![("stmt", "try_statement", 7), ("block", "block", 12)], -2624 | vec![("stmt", "while_statement", 1), ("block", "block", 14)], -2625 | ] -2626 | ); -2627 | }); -2628 | } - | -2629 | #[test] -2630 | fn test_query_matches_with_no_captures() { -2631 | allocations::record(|| { -2632 | let language = get_language("javascript"); -2633 | let query = Query::new( -2634 | &language, -2635 | r" -2636 | (identifier) -2637 | (string) @s -2638 | ", -2639 | ) -2640 | .unwrap(); - | -2641 | assert_query_matches( -2642 | &language, -2643 | &query, -2644 | " -2645 | a = 'hi'; -2646 | b = 'bye'; -2647 | ", -2648 | &[ -2649 | (0, vec![]), -2650 | (1, vec![("s", "'hi'")]), -2651 | (0, vec![]), -2652 | (1, vec![("s", "'bye'")]), -2653 | ], -2654 | ); -2655 | }); -2656 | } - | -2657 | #[test] -2658 | fn test_query_matches_with_repeated_fields() { -2659 | allocations::record(|| { -2660 | let language = get_language("c"); -2661 | let query = Query::new( -2662 | &language, -2663 | "(field_declaration declarator: (field_identifier) @field)", -2664 | ) -2665 | .unwrap(); - | -2666 | assert_query_matches( -2667 | &language, -2668 | &query, -2669 | " -2670 | struct S { -2671 | int a, b, c; -2672 | }; -2673 | ", -2674 | &[ -2675 | (0, vec![("field", "a")]), -2676 | (0, vec![("field", "b")]), -2677 | (0, vec![("field", "c")]), -2678 | ], -2679 | ); -2680 | }); -2681 | } - | -2682 | #[test] -2683 | fn test_query_matches_with_deeply_nested_patterns_with_fields() { -2684 | allocations::record(|| { -2685 | let language = get_language("python"); -2686 | let query = Query::new( -2687 | &language, -2688 | " -2689 | (call -2690 | function: (_) @func -2691 | arguments: (_) @args) -2692 | (call -2693 | function: (attribute -2694 | object: (_) @receiver -2695 | attribute: (identifier) @method) -2696 | arguments: (argument_list)) - | -2697 | ; These don't match anything, but they require additional -2698 | ; states to keep track of their captures. -2699 | (call -2700 | function: (_) @fn -2701 | arguments: (argument_list -2702 | (keyword_argument -2703 | name: (identifier) @name -2704 | value: (_) @val) @arg) @args) @call -2705 | (call -2706 | function: (identifier) @fn -2707 | (#eq? @fn \"super\")) @super_call -2708 | ", -2709 | ) -2710 | .unwrap(); - | -2711 | assert_query_matches( -2712 | &language, -2713 | &query, -2714 | " -2715 | a(1).b(2).c(3).d(4).e(5).f(6).g(7).h(8) -2716 | ", -2717 | &[ -2718 | (0, vec![("func", "a"), ("args", "(1)")]), -2719 | (0, vec![("func", "a(1).b"), ("args", "(2)")]), -2720 | (1, vec![("receiver", "a(1)"), ("method", "b")]), -2721 | (0, vec![("func", "a(1).b(2).c"), ("args", "(3)")]), -2722 | (1, vec![("receiver", "a(1).b(2)"), ("method", "c")]), -2723 | (0, vec![("func", "a(1).b(2).c(3).d"), ("args", "(4)")]), -2724 | (1, vec![("receiver", "a(1).b(2).c(3)"), ("method", "d")]), -2725 | (0, vec![("func", "a(1).b(2).c(3).d(4).e"), ("args", "(5)")]), -2726 | ( -2727 | 1, -2728 | vec![("receiver", "a(1).b(2).c(3).d(4)"), ("method", "e")], -2729 | ), -2730 | ( -2731 | 0, -2732 | vec![("func", "a(1).b(2).c(3).d(4).e(5).f"), ("args", "(6)")], -2733 | ), -2734 | ( -2735 | 1, -2736 | vec![("receiver", "a(1).b(2).c(3).d(4).e(5)"), ("method", "f")], -2737 | ), -2738 | ( -2739 | 0, -2740 | vec![("func", "a(1).b(2).c(3).d(4).e(5).f(6).g"), ("args", "(7)")], -2741 | ), -2742 | ( -2743 | 1, -2744 | vec![ -2745 | ("receiver", "a(1).b(2).c(3).d(4).e(5).f(6)"), -2746 | ("method", "g"), -2747 | ], -2748 | ), -2749 | ( -2750 | 0, -2751 | vec![ -2752 | ("func", "a(1).b(2).c(3).d(4).e(5).f(6).g(7).h"), -2753 | ("args", "(8)"), -2754 | ], -2755 | ), -2756 | ( -2757 | 1, -2758 | vec![ -2759 | ("receiver", "a(1).b(2).c(3).d(4).e(5).f(6).g(7)"), -2760 | ("method", "h"), -2761 | ], -2762 | ), -2763 | ], -2764 | ); -2765 | }); -2766 | } - | -2767 | #[test] -2768 | fn test_query_matches_with_alternations_and_predicates() { -2769 | allocations::record(|| { -2770 | let language = get_language("java"); -2771 | let query = Query::new( -2772 | &language, -2773 | " -2774 | (block -2775 | [ -2776 | (local_variable_declaration -2777 | (variable_declarator -2778 | (identifier) @def.a -2779 | (string_literal) @lit.a -2780 | ) -2781 | ) -2782 | (local_variable_declaration -2783 | (variable_declarator -2784 | (identifier) @def.b -2785 | (null_literal) @lit.b -2786 | ) -2787 | ) -2788 | ] -2789 | (expression_statement -2790 | (method_invocation [ -2791 | (argument_list -2792 | (identifier) @ref.a -2793 | (string_literal) -2794 | ) -2795 | (argument_list -2796 | (null_literal) -2797 | (identifier) @ref.b -2798 | ) -2799 | ]) -2800 | ) -2801 | (#eq? @def.a @ref.a ) -2802 | (#eq? @def.b @ref.b ) -2803 | ) -2804 | ", -2805 | ) -2806 | .unwrap(); - | -2807 | assert_query_matches( -2808 | &language, -2809 | &query, -2810 | r#" -2811 | void test() { -2812 | int a = "foo"; -2813 | f(null, b); -2814 | } -2815 | "#, -2816 | &[], -2817 | ); -2818 | }); -2819 | } - | -2820 | #[test] -2821 | fn test_query_matches_with_indefinite_step_containing_no_captures() { -2822 | allocations::record(|| { -2823 | // This pattern depends on the field declarations within the -2824 | // struct's body, but doesn't capture anything within the body. -2825 | // It demonstrates that internally, state-splitting needs to occur -2826 | // for each field declaration within the body, in order to avoid -2827 | // prematurely failing if the first field does not match. -2828 | // -2829 | // https://github.com/tree-sitter/tree-sitter/issues/937 -2830 | let language = get_language("c"); -2831 | let query = Query::new( -2832 | &language, -2833 | "(struct_specifier -2834 | name: (type_identifier) @name -2835 | body: (field_declaration_list -2836 | (field_declaration -2837 | type: (union_specifier))))", -2838 | ) -2839 | .unwrap(); - | -2840 | assert_query_matches( -2841 | &language, -2842 | &query, -2843 | " -2844 | struct LacksUnionField { -2845 | int a; -2846 | struct { -2847 | B c; -2848 | } d; -2849 | G *h; -2850 | }; - | -2851 | struct HasUnionField { -2852 | int a; -2853 | struct { -2854 | B c; -2855 | } d; -2856 | union { -2857 | bool e; -2858 | float f; -2859 | } g; -2860 | G *h; -2861 | }; -2862 | ", -2863 | &[(0, vec![("name", "HasUnionField")])], -2864 | ); -2865 | }); -2866 | } - | -2867 | #[test] -2868 | fn test_query_captures_basic() { -2869 | allocations::record(|| { -2870 | let language = get_language("javascript"); -2871 | let query = Query::new( -2872 | &language, -2873 | r#" -2874 | (pair -2875 | key: _ @method.def -2876 | (function_expression -2877 | name: (identifier) @method.alias)) - | -2878 | (variable_declarator -2879 | name: _ @function.def -2880 | value: (function_expression -2881 | name: (identifier) @function.alias)) - | -2882 | ":" @delimiter -2883 | "=" @operator -2884 | "#, -2885 | ) -2886 | .unwrap(); - | -2887 | let source = " -2888 | a({ -2889 | bc: function de() { -2890 | const fg = function hi() {} -2891 | }, -2892 | jk: function lm() { -2893 | const no = function pq() {} -2894 | }, -2895 | }); -2896 | "; - | -2897 | let mut parser = Parser::new(); -2898 | parser.set_language(&language).unwrap(); -2899 | let tree = parser.parse(source, None).unwrap(); -2900 | let mut cursor = QueryCursor::new(); -2901 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - | -2902 | assert_eq!( -2903 | collect_matches(matches, &query, source), -2904 | &[ -2905 | (2, vec![("delimiter", ":")]), -2906 | (0, vec![("method.def", "bc"), ("method.alias", "de")]), -2907 | (3, vec![("operator", "=")]), -2908 | (1, vec![("function.def", "fg"), ("function.alias", "hi")]), -2909 | (2, vec![("delimiter", ":")]), -2910 | (0, vec![("method.def", "jk"), ("method.alias", "lm")]), -2911 | (3, vec![("operator", "=")]), -2912 | (1, vec![("function.def", "no"), ("function.alias", "pq")]), -2913 | ], -2914 | ); - | -2915 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -2916 | assert_eq!( -2917 | collect_captures(captures, &query, source), -2918 | &[ -2919 | ("method.def", "bc"), -2920 | ("delimiter", ":"), -2921 | ("method.alias", "de"), -2922 | ("function.def", "fg"), -2923 | ("operator", "="), -2924 | ("function.alias", "hi"), -2925 | ("method.def", "jk"), -2926 | ("delimiter", ":"), -2927 | ("method.alias", "lm"), -2928 | ("function.def", "no"), -2929 | ("operator", "="), -2930 | ("function.alias", "pq"), -2931 | ] -2932 | ); -2933 | }); -2934 | } - | -2935 | #[test] -2936 | fn test_query_captures_with_text_conditions() { -2937 | allocations::record(|| { -2938 | let language = get_language("javascript"); -2939 | let query = Query::new( -2940 | &language, -2941 | r#" -2942 | ((identifier) @constant -2943 | (#match? @constant "^[A-Z]{2,}$")) - | -2944 | ((identifier) @constructor -2945 | (#match? @constructor "^[A-Z]")) - | -2946 | ((identifier) @function.builtin -2947 | (#eq? @function.builtin "require")) - | -2948 | ((identifier) @variable.builtin -2949 | (#any-of? @variable.builtin -2950 | "arguments" -2951 | "module" -2952 | "console" -2953 | "window" -2954 | "document")) - | -2955 | ((identifier) @variable -2956 | (#not-match? @variable "^(lambda|load)$")) -2957 | "#, -2958 | ) -2959 | .unwrap(); - | -2960 | let source = " -2961 | toad -2962 | load -2963 | panda -2964 | lambda -2965 | const ab = require('./ab'); -2966 | new Cd(EF); -2967 | document; -2968 | module; -2969 | console; -2970 | "; - | -2971 | let mut parser = Parser::new(); -2972 | parser.set_language(&language).unwrap(); -2973 | let tree = parser.parse(source, None).unwrap(); -2974 | let mut cursor = QueryCursor::new(); - | -2975 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -2976 | assert_eq!( -2977 | collect_captures(captures, &query, source), -2978 | &[ -2979 | ("variable", "toad"), -2980 | ("variable", "panda"), -2981 | ("variable", "ab"), -2982 | ("function.builtin", "require"), -2983 | ("variable", "require"), -2984 | ("constructor", "Cd"), -2985 | ("variable", "Cd"), -2986 | ("constant", "EF"), -2987 | ("constructor", "EF"), -2988 | ("variable", "EF"), -2989 | ("variable.builtin", "document"), -2990 | ("variable", "document"), -2991 | ("variable.builtin", "module"), -2992 | ("variable", "module"), -2993 | ("variable.builtin", "console"), -2994 | ("variable", "console"), -2995 | ], -2996 | ); -2997 | }); -2998 | } - | -2999 | #[test] -3000 | fn test_query_captures_with_predicates() { -3001 | allocations::record(|| { -3002 | let language = get_language("javascript"); - | -3003 | let query = Query::new( -3004 | &language, -3005 | r" -3006 | ((call_expression (identifier) @foo) -3007 | (#set! name something) -3008 | (#set! cool) -3009 | (#something! @foo omg)) - | -3010 | ((property_identifier) @bar -3011 | (#is? cool) -3012 | (#is-not? name something))", -3013 | ) -3014 | .unwrap(); - | -3015 | assert_eq!( -3016 | query.property_settings(0), -3017 | &[ -3018 | QueryProperty::new("name", Some("something"), None), -3019 | QueryProperty::new("cool", None, None), -3020 | ] -3021 | ); -3022 | assert_eq!( -3023 | query.general_predicates(0), -3024 | &[QueryPredicate { -3025 | operator: "something!".to_string().into_boxed_str(), -3026 | args: vec![ -3027 | QueryPredicateArg::Capture(0), -3028 | QueryPredicateArg::String("omg".to_string().into_boxed_str()), -3029 | ] -3030 | .into_boxed_slice(), -3031 | },] -3032 | ); -3033 | assert_eq!(query.property_settings(1), &[]); -3034 | assert_eq!(query.property_predicates(0), &[]); -3035 | assert_eq!( -3036 | query.property_predicates(1), -3037 | &[ -3038 | (QueryProperty::new("cool", None, None), true), -3039 | (QueryProperty::new("name", Some("something"), None), false), -3040 | ] -3041 | ); - | -3042 | let source = "const a = window.b"; -3043 | let mut parser = Parser::new(); -3044 | parser.set_language(&language).unwrap(); -3045 | let tree = parser.parse(source, None).unwrap(); - | -3046 | let query = Query::new( -3047 | &language, -3048 | r#"((identifier) @variable.builtin -3049 | (#match? @variable.builtin "^(arguments|module|console|window|document)$") -3050 | (#is-not? local)) -3051 | "#, -3052 | ) -3053 | .unwrap(); - | -3054 | let mut cursor = QueryCursor::new(); -3055 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -3056 | let matches = collect_matches(matches, &query, source); - | -3057 | assert_eq!(matches, &[(0, vec![("variable.builtin", "window")])]); -3058 | }); -3059 | } - | -3060 | #[test] -3061 | fn test_query_captures_with_quoted_predicate_args() { -3062 | allocations::record(|| { -3063 | let language = get_language("javascript"); - | -3064 | // Double-quoted strings can contain: -3065 | // * special escape sequences like \n and \r -3066 | // * escaped double quotes with \* -3067 | // * literal backslashes with \\ -3068 | let query = Query::new( -3069 | &language, -3070 | r#" -3071 | ((call_expression (identifier) @foo) -3072 | (#set! one "\"something\ngreat\"")) - | -3073 | ((identifier) -3074 | (#set! two "\\s(\r?\n)*$")) - | -3075 | ((function_declaration) -3076 | (#set! three "\"something\ngreat\"")) -3077 | "#, -3078 | ) -3079 | .unwrap(); - | -3080 | assert_eq!( -3081 | query.property_settings(0), -3082 | &[QueryProperty::new( -3083 | "one", -3084 | Some("\"something\ngreat\""), -3085 | None -3086 | )] -3087 | ); -3088 | assert_eq!( -3089 | query.property_settings(1), -3090 | &[QueryProperty::new("two", Some("\\s(\r?\n)*$"), None)] -3091 | ); -3092 | assert_eq!( -3093 | query.property_settings(2), -3094 | &[QueryProperty::new( -3095 | "three", -3096 | Some("\"something\ngreat\""), -3097 | None -3098 | )] -3099 | ); -3100 | }); -3101 | } - | -3102 | #[test] -3103 | fn test_query_captures_with_duplicates() { -3104 | allocations::record(|| { -3105 | let language = get_language("javascript"); -3106 | let query = Query::new( -3107 | &language, -3108 | r" -3109 | (variable_declarator -3110 | name: (identifier) @function -3111 | value: (function_expression)) - | -3112 | (identifier) @variable -3113 | ", -3114 | ) -3115 | .unwrap(); - | -3116 | let source = " -3117 | var x = function() {}; -3118 | "; - | -3119 | let mut parser = Parser::new(); -3120 | parser.set_language(&language).unwrap(); -3121 | let tree = parser.parse(source, None).unwrap(); -3122 | let mut cursor = QueryCursor::new(); - | -3123 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3124 | assert_eq!( -3125 | collect_captures(captures, &query, source), -3126 | &[("function", "x"), ("variable", "x"),], -3127 | ); -3128 | }); -3129 | } - | -3130 | #[test] -3131 | fn test_query_captures_with_many_nested_results_without_fields() { -3132 | allocations::record(|| { -3133 | let language = get_language("javascript"); - | -3134 | // Search for key-value pairs whose values are anonymous functions. -3135 | let query = Query::new( -3136 | &language, -3137 | r#" -3138 | (pair -3139 | key: _ @method-def -3140 | (arrow_function)) - | -3141 | ":" @colon -3142 | "," @comma -3143 | "#, -3144 | ) -3145 | .unwrap(); - | -3146 | // The `pair` node for key `y` does not match any pattern, but inside of -3147 | // its value, it contains many other `pair` nodes that do match the pattern. -3148 | // The match for the *outer* pair should be terminated *before* descending into -3149 | // the object value, so that we can avoid needing to buffer all of the inner -3150 | // matches. -3151 | let method_count = 50; -3152 | let mut source = "x = { y: {\n".to_owned(); -3153 | for i in 0..method_count { -3154 | writeln!(&mut source, " method{i}: $ => null,").unwrap(); -3155 | } -3156 | source.push_str("}};\n"); - | -3157 | let mut parser = Parser::new(); -3158 | parser.set_language(&language).unwrap(); -3159 | let tree = parser.parse(&source, None).unwrap(); -3160 | let mut cursor = QueryCursor::new(); - | -3161 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3162 | let captures = collect_captures(captures, &query, &source); - | -3163 | assert_eq!( -3164 | &captures[0..13], -3165 | &[ -3166 | ("colon", ":"), -3167 | ("method-def", "method0"), -3168 | ("colon", ":"), -3169 | ("comma", ","), -3170 | ("method-def", "method1"), -3171 | ("colon", ":"), -3172 | ("comma", ","), -3173 | ("method-def", "method2"), -3174 | ("colon", ":"), -3175 | ("comma", ","), -3176 | ("method-def", "method3"), -3177 | ("colon", ":"), -3178 | ("comma", ","), -3179 | ] -3180 | ); - | -3181 | // Ensure that we don't drop matches because of needing to buffer too many. -3182 | assert_eq!(captures.len(), 1 + 3 * method_count); -3183 | }); -3184 | } - | -3185 | #[test] -3186 | fn test_query_captures_with_many_nested_results_with_fields() { -3187 | allocations::record(|| { -3188 | let language = get_language("javascript"); - | -3189 | // Search expressions like `a ? a.b : null` -3190 | let query = Query::new( -3191 | &language, -3192 | r" -3193 | ((ternary_expression -3194 | condition: (identifier) @left -3195 | consequence: (member_expression -3196 | object: (identifier) @right) -3197 | alternative: (null)) -3198 | (#eq? @left @right)) -3199 | ", -3200 | ) -3201 | .unwrap(); - | -3202 | // The outer expression does not match the pattern, but the consequence of the ternary -3203 | // is an object that *does* contain many occurrences of the pattern. -3204 | let count = 50; -3205 | let mut source = "a ? {".to_owned(); -3206 | for i in 0..count { -3207 | writeln!(&mut source, " x: y{i} ? y{i}.z : null,").unwrap(); -3208 | } -3209 | source.push_str("} : null;\n"); - | -3210 | let mut parser = Parser::new(); -3211 | parser.set_language(&language).unwrap(); -3212 | let tree = parser.parse(&source, None).unwrap(); -3213 | let mut cursor = QueryCursor::new(); - | -3214 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3215 | let captures = collect_captures(captures, &query, &source); - | -3216 | assert_eq!( -3217 | &captures[0..20], -3218 | &[ -3219 | ("left", "y0"), -3220 | ("right", "y0"), -3221 | ("left", "y1"), -3222 | ("right", "y1"), -3223 | ("left", "y2"), -3224 | ("right", "y2"), -3225 | ("left", "y3"), -3226 | ("right", "y3"), -3227 | ("left", "y4"), -3228 | ("right", "y4"), -3229 | ("left", "y5"), -3230 | ("right", "y5"), -3231 | ("left", "y6"), -3232 | ("right", "y6"), -3233 | ("left", "y7"), -3234 | ("right", "y7"), -3235 | ("left", "y8"), -3236 | ("right", "y8"), -3237 | ("left", "y9"), -3238 | ("right", "y9"), -3239 | ] -3240 | ); - | -3241 | // Ensure that we don't drop matches because of needing to buffer too many. -3242 | assert_eq!(captures.len(), 2 * count); -3243 | }); -3244 | } - | -3245 | #[test] -3246 | fn test_query_captures_with_too_many_nested_results() { -3247 | allocations::record(|| { -3248 | let language = get_language("javascript"); - | -3249 | // Search for method calls in general, and also method calls with a template string -3250 | // in place of an argument list (aka "tagged template strings") in particular. -3251 | // -3252 | // This second pattern, which looks for the tagged template strings, is expensive to -3253 | // use with the `captures()` method, because: -3254 | // 1. When calling `captures`, all of the captures must be returned in order of their -3255 | // appearance. -3256 | // 2. This pattern captures the root `call_expression`. -3257 | // 3. This pattern's result also depends on the final child (the template string). -3258 | // 4. In between the `call_expression` and the possible `template_string`, there can be an -3259 | // arbitrarily deep subtree. -3260 | // -3261 | // This means that, if any patterns match *after* the initial `call_expression` is -3262 | // captured, but before the final `template_string` is found, those matches must -3263 | // be buffered, in order to prevent captures from being returned out-of-order. -3264 | let query = Query::new( -3265 | &language, -3266 | r" -3267 | ;; easy 👇 -3268 | (call_expression -3269 | function: (member_expression -3270 | property: (property_identifier) @method-name)) - | -3271 | ;; hard 👇 -3272 | (call_expression -3273 | function: (member_expression -3274 | property: (property_identifier) @template-tag) -3275 | arguments: (template_string)) @template-call -3276 | ", -3277 | ) -3278 | .unwrap(); - | -3279 | // There are a *lot* of matches in between the beginning of the outer `call_expression` -3280 | // (the call to `a(...).f`), which starts at the beginning of the file, and the final -3281 | // template string, which occurs at the end of the file. The query algorithm imposes a -3282 | // limit on the total number of matches which can be buffered at a time. But we don't -3283 | // want to neglect the inner matches just because of the expensive outer match, so we -3284 | // abandon the outer match (which would have captured `f` as a `template-tag`). -3285 | let source = " -3286 | a(b => { -3287 | b.c0().d0 `😄`; -3288 | b.c1().d1 `😄`; -3289 | b.c2().d2 `😄`; -3290 | b.c3().d3 `😄`; -3291 | b.c4().d4 `😄`; -3292 | b.c5().d5 `😄`; -3293 | b.c6().d6 `😄`; -3294 | b.c7().d7 `😄`; -3295 | b.c8().d8 `😄`; -3296 | b.c9().d9 `😄`; -3297 | }).e().f ``; -3298 | " -3299 | .trim(); - | -3300 | let mut parser = Parser::new(); -3301 | parser.set_language(&language).unwrap(); -3302 | let tree = parser.parse(source, None).unwrap(); -3303 | let mut cursor = QueryCursor::new(); -3304 | cursor.set_match_limit(32); -3305 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3306 | let captures = collect_captures(captures, &query, source); - | -3307 | assert_eq!( -3308 | &captures[0..4], -3309 | &[ -3310 | ("template-call", "b.c0().d0 `😄`"), -3311 | ("method-name", "c0"), -3312 | ("method-name", "d0"), -3313 | ("template-tag", "d0"), -3314 | ] -3315 | ); -3316 | assert_eq!( -3317 | &captures[36..40], -3318 | &[ -3319 | ("template-call", "b.c9().d9 `😄`"), -3320 | ("method-name", "c9"), -3321 | ("method-name", "d9"), -3322 | ("template-tag", "d9"), -3323 | ] -3324 | ); -3325 | assert_eq!( -3326 | &captures[40..], -3327 | &[("method-name", "e"), ("method-name", "f"),] -3328 | ); -3329 | }); -3330 | } - | -3331 | #[test] -3332 | fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { -3333 | allocations::record(|| { -3334 | let language = get_language("javascript"); -3335 | let query = Query::new( -3336 | &language, -3337 | r#" -3338 | (array -3339 | "[" @l-bracket -3340 | "]" @r-bracket) - | -3341 | "." @dot -3342 | "#, -3343 | ) -3344 | .unwrap(); - | -3345 | // The '[' node must be returned before all of the '.' nodes, -3346 | // even though its pattern does not finish until the ']' node -3347 | // at the end of the document. But because the '[' is definite, -3348 | // it can be returned before the pattern finishes matching. -3349 | let source = " -3350 | [ -3351 | a.b.c.d.e.f.g.h.i, -3352 | a.b.c.d.e.f.g.h.i, -3353 | a.b.c.d.e.f.g.h.i, -3354 | a.b.c.d.e.f.g.h.i, -3355 | a.b.c.d.e.f.g.h.i, -3356 | ] -3357 | "; - | -3358 | let mut parser = Parser::new(); -3359 | parser.set_language(&language).unwrap(); -3360 | let tree = parser.parse(source, None).unwrap(); -3361 | let mut cursor = QueryCursor::new(); - | -3362 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3363 | assert_eq!( -3364 | collect_captures(captures, &query, source), -3365 | std::iter::once(&("l-bracket", "[")) -3366 | .chain([("dot", "."); 40].iter()) -3367 | .chain(std::iter::once(&("r-bracket", "]"))) -3368 | .copied() -3369 | .collect::>(), -3370 | ); -3371 | }); -3372 | } - | -3373 | #[test] -3374 | fn test_query_captures_ordered_by_both_start_and_end_positions() { -3375 | allocations::record(|| { -3376 | let language = get_language("javascript"); -3377 | let query = Query::new( -3378 | &language, -3379 | r" -3380 | (call_expression) @call -3381 | (member_expression) @member -3382 | (identifier) @variable -3383 | ", -3384 | ) -3385 | .unwrap(); - | -3386 | let source = " -3387 | a.b(c.d().e).f; -3388 | "; - | -3389 | let mut parser = Parser::new(); -3390 | parser.set_language(&language).unwrap(); -3391 | let tree = parser.parse(source, None).unwrap(); -3392 | let mut cursor = QueryCursor::new(); - | -3393 | let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3394 | assert_eq!( -3395 | collect_captures(captures, &query, source), -3396 | &[ -3397 | ("member", "a.b(c.d().e).f"), -3398 | ("call", "a.b(c.d().e)"), -3399 | ("member", "a.b"), -3400 | ("variable", "a"), -3401 | ("member", "c.d().e"), -3402 | ("call", "c.d()"), -3403 | ("member", "c.d"), -3404 | ("variable", "c"), -3405 | ], -3406 | ); -3407 | }); -3408 | } - | -3409 | #[test] -3410 | fn test_query_captures_with_matches_removed() { -3411 | allocations::record(|| { -3412 | let language = get_language("javascript"); -3413 | let query = Query::new( -3414 | &language, -3415 | r" -3416 | (binary_expression -3417 | left: (identifier) @left -3418 | operator: _ @op -3419 | right: (identifier) @right) -3420 | ", -3421 | ) -3422 | .unwrap(); - | -3423 | let source = " -3424 | a === b && c > d && e < f; -3425 | "; - | -3426 | let mut parser = Parser::new(); -3427 | parser.set_language(&language).unwrap(); -3428 | let tree = parser.parse(source, None).unwrap(); -3429 | let mut cursor = QueryCursor::new(); - | -3430 | let mut captured_strings = Vec::new(); - | -3431 | let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3432 | while let Some((m, i)) = captures.next() { -3433 | let capture = m.captures[*i]; -3434 | let text = capture.node.utf8_text(source.as_bytes()).unwrap(); -3435 | if text == "a" { -3436 | m.remove(); -3437 | continue; -3438 | } -3439 | captured_strings.push(text); -3440 | } - | -3441 | assert_eq!(captured_strings, &["c", ">", "d", "e", "<", "f",]); -3442 | }); -3443 | } - | -3444 | #[test] -3445 | fn test_query_captures_with_matches_removed_before_they_finish() { -3446 | allocations::record(|| { -3447 | let language = get_language("javascript"); -3448 | // When Tree-sitter detects that a pattern is guaranteed to match, -3449 | // it will start to eagerly return the captures that it has found, -3450 | // even though it hasn't matched the entire pattern yet. A -3451 | // namespace_import node always has "*", "as" and then an identifier -3452 | // for children, so captures will be emitted eagerly for this pattern. -3453 | let query = Query::new( -3454 | &language, -3455 | r#" -3456 | (namespace_import -3457 | "*" @star -3458 | "as" @as -3459 | (identifier) @identifier) -3460 | "#, -3461 | ) -3462 | .unwrap(); - | -3463 | let source = " -3464 | import * as name from 'module-name'; -3465 | "; - | -3466 | let mut parser = Parser::new(); -3467 | parser.set_language(&language).unwrap(); -3468 | let tree = parser.parse(source, None).unwrap(); -3469 | let mut cursor = QueryCursor::new(); - | -3470 | let mut captured_strings = Vec::new(); -3471 | let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); -3472 | while let Some((m, i)) = captures.next() { -3473 | let capture = m.captures[*i]; -3474 | let text = capture.node.utf8_text(source.as_bytes()).unwrap(); -3475 | if text == "as" { -3476 | m.remove(); -3477 | continue; -3478 | } -3479 | captured_strings.push(text); -3480 | } - | -3481 | // .remove() removes the match before it is finished. The identifier -3482 | // "name" is part of this match, so we expect that removing the "as" -3483 | // capture from the match should prevent "name" from matching: -3484 | assert_eq!(captured_strings, &["*",]); -3485 | }); -3486 | } - | -3487 | #[test] -3488 | fn test_query_captures_and_matches_iterators_are_fused() { -3489 | allocations::record(|| { -3490 | let language = get_language("javascript"); -3491 | let query = Query::new( -3492 | &language, -3493 | r" -3494 | (comment) @comment -3495 | ", -3496 | ) -3497 | .unwrap(); - | -3498 | let source = " -3499 | // one -3500 | // two -3501 | // three -3502 | /* unfinished -3503 | "; - | -3504 | let mut parser = Parser::new(); -3505 | parser.set_language(&language).unwrap(); -3506 | let tree = parser.parse(source, None).unwrap(); -3507 | let mut cursor = QueryCursor::new(); -3508 | let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); - | -3509 | assert_eq!(captures.next().unwrap().0.captures[0].index, 0); -3510 | assert_eq!(captures.next().unwrap().0.captures[0].index, 0); -3511 | assert_eq!(captures.next().unwrap().0.captures[0].index, 0); -3512 | assert!(captures.next().is_none()); -3513 | assert!(captures.next().is_none()); -3514 | assert!(captures.next().is_none()); -3515 | drop(captures); - | -3516 | let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -3517 | assert_eq!(matches.next().unwrap().captures[0].index, 0); -3518 | assert_eq!(matches.next().unwrap().captures[0].index, 0); -3519 | assert_eq!(matches.next().unwrap().captures[0].index, 0); -3520 | assert!(matches.next().is_none()); -3521 | assert!(matches.next().is_none()); -3522 | assert!(matches.next().is_none()); -3523 | }); -3524 | } - | -3525 | #[test] -3526 | fn test_query_text_callback_returns_chunks() { -3527 | allocations::record(|| { -3528 | let language = get_language("javascript"); -3529 | let query = Query::new( -3530 | &language, -3531 | r#" -3532 | ((identifier) @leading_upper -3533 | (#match? @leading_upper "^[A-Z][A-Z_]*[a-z]")) -3534 | ((identifier) @all_upper -3535 | (#match? @all_upper "^[A-Z][A-Z_]*$")) -3536 | ((identifier) @all_lower -3537 | (#match? @all_lower "^[a-z][a-z_]*$")) -3538 | "#, -3539 | ) -3540 | .unwrap(); - | -3541 | let source = "SOMETHING[a] = transform(AnotherThing[b].property[c], PARAMETER);"; - | -3542 | // Store the source code in chunks of 3 bytes, and expose it via -3543 | // an iterator API. -3544 | let source_chunks = source.as_bytes().chunks(3).collect::>(); -3545 | let chunks_in_range = |range: std::ops::Range| { -3546 | let mut offset = 0; -3547 | source_chunks.iter().filter_map(move |chunk| { -3548 | let end_offset = offset + chunk.len(); -3549 | if offset < range.end && range.start < end_offset { -3550 | let end_in_chunk = (range.end - offset).min(chunk.len()); -3551 | let start_in_chunk = range.start.max(offset) - offset; -3552 | offset = end_offset; -3553 | Some(&chunk[start_in_chunk..end_in_chunk]) -3554 | } else { -3555 | offset = end_offset; -3556 | None -3557 | } -3558 | }) -3559 | }; -3560 | assert_eq!( -3561 | chunks_in_range(0..9) -3562 | .map(|c| std::str::from_utf8(c).unwrap()) -3563 | .collect::(), -3564 | "SOMETHING", -3565 | ); -3566 | assert_eq!( -3567 | chunks_in_range(15..24) -3568 | .map(|c| std::str::from_utf8(c).unwrap()) -3569 | .collect::(), -3570 | "transform", -3571 | ); - | -3572 | let mut parser = Parser::new(); -3573 | parser.set_language(&language).unwrap(); -3574 | let tree = parser.parse(source, None).unwrap(); -3575 | let mut cursor = QueryCursor::new(); -3576 | let captures = cursor.captures(&query, tree.root_node(), |node: Node| { -3577 | chunks_in_range(node.byte_range()) -3578 | }); - | -3579 | assert_eq!( -3580 | collect_captures(captures, &query, source), -3581 | &[ -3582 | ("all_upper", "SOMETHING"), -3583 | ("all_lower", "a"), -3584 | ("all_lower", "transform"), -3585 | ("leading_upper", "AnotherThing"), -3586 | ("all_lower", "b"), -3587 | ("all_lower", "c"), -3588 | ("all_upper", "PARAMETER"), -3589 | ] -3590 | ); -3591 | }); -3592 | } - | -3593 | #[test] -3594 | fn test_query_start_end_byte_for_pattern() { -3595 | let language = get_language("javascript"); - | -3596 | let patterns_1 = indoc! {r#" -3597 | "+" @operator -3598 | "-" @operator -3599 | "*" @operator -3600 | "=" @operator -3601 | "=>" @operator -3602 | "#}; - | -3603 | let patterns_2 = indoc! {" -3604 | (identifier) @a -3605 | (string) @b -3606 | "}; - | -3607 | let patterns_3 = indoc! {" -3608 | ((identifier) @b (#match? @b i)) -3609 | (function_declaration name: (identifier) @c) -3610 | (method_definition name: (property_identifier) @d) -3611 | "}; - | -3612 | let mut source = String::new(); -3613 | source += patterns_1; -3614 | source += patterns_2; -3615 | source += patterns_3; - | -3616 | let query = Query::new(&language, &source).unwrap(); - | -3617 | assert_eq!(query.start_byte_for_pattern(0), 0); -3618 | assert_eq!(query.end_byte_for_pattern(0), "\"+\" @operator\n".len()); -3619 | assert_eq!(query.start_byte_for_pattern(5), patterns_1.len()); -3620 | assert_eq!( -3621 | query.end_byte_for_pattern(5), -3622 | patterns_1.len() + "(identifier) @a\n".len() -3623 | ); -3624 | assert_eq!( -3625 | query.start_byte_for_pattern(7), -3626 | patterns_1.len() + patterns_2.len() -3627 | ); -3628 | assert_eq!( -3629 | query.end_byte_for_pattern(7), -3630 | patterns_1.len() + patterns_2.len() + "((identifier) @b (#match? @b i))\n".len() -3631 | ); -3632 | } - | -3633 | #[test] -3634 | fn test_query_capture_names() { -3635 | allocations::record(|| { -3636 | let language = get_language("javascript"); -3637 | let query = Query::new( -3638 | &language, -3639 | r#" -3640 | (if_statement -3641 | condition: (parenthesized_expression (binary_expression -3642 | left: _ @left-operand -3643 | operator: "||" -3644 | right: _ @right-operand)) -3645 | consequence: (statement_block) @body) - | -3646 | (while_statement -3647 | condition: _ @loop-condition) -3648 | "#, -3649 | ) -3650 | .unwrap(); - | -3651 | assert_eq!( -3652 | query.capture_names(), -3653 | ["left-operand", "right-operand", "body", "loop-condition"] -3654 | ); -3655 | }); -3656 | } - | -3657 | #[test] -3658 | fn test_query_lifetime_is_separate_from_nodes_lifetime() { -3659 | allocations::record(|| { -3660 | let query = r"(call_expression) @call"; -3661 | let source = "a(1); b(2);"; - | -3662 | let language = get_language("javascript"); -3663 | let mut parser = Parser::new(); -3664 | parser.set_language(&language).unwrap(); -3665 | let tree = parser.parse(source, None).unwrap(); - | -3666 | fn take_first_node_from_captures<'tree>( -3667 | source: &str, -3668 | query: &str, -3669 | node: Node<'tree>, -3670 | ) -> Node<'tree> { -3671 | // Following 2 lines are redundant but needed to demonstrate -3672 | // more understandable compiler error message -3673 | let language = get_language("javascript"); -3674 | let query = Query::new(&language, query).unwrap(); -3675 | let mut cursor = QueryCursor::new(); -3676 | let node = cursor -3677 | .matches(&query, node, source.as_bytes()) -3678 | .next() -3679 | .unwrap() -3680 | .captures[0] -3681 | .node; -3682 | node -3683 | } - | -3684 | let node = take_first_node_from_captures(source, query, tree.root_node()); -3685 | assert_eq!(node.kind(), "call_expression"); - | -3686 | fn take_first_node_from_matches<'tree>( -3687 | source: &str, -3688 | query: &str, -3689 | node: Node<'tree>, -3690 | ) -> Node<'tree> { -3691 | let language = get_language("javascript"); -3692 | let query = Query::new(&language, query).unwrap(); -3693 | let mut cursor = QueryCursor::new(); -3694 | let node = cursor -3695 | .captures(&query, node, source.as_bytes()) -3696 | .next() -3697 | .unwrap() -3698 | .0 -3699 | .captures[0] -3700 | .node; -3701 | node -3702 | } - | -3703 | let node = take_first_node_from_matches(source, query, tree.root_node()); -3704 | assert_eq!(node.kind(), "call_expression"); -3705 | }); -3706 | } - | -3707 | #[test] -3708 | fn test_query_with_no_patterns() { -3709 | allocations::record(|| { -3710 | let language = get_language("javascript"); -3711 | let query = Query::new(&language, "").unwrap(); -3712 | assert!(query.capture_names().is_empty()); -3713 | assert_eq!(query.pattern_count(), 0); -3714 | }); -3715 | } - | -3716 | #[test] -3717 | fn test_query_comments() { -3718 | allocations::record(|| { -3719 | let language = get_language("javascript"); -3720 | let query = Query::new( -3721 | &language, -3722 | " -3723 | ; this is my first comment -3724 | ; i have two comments here -3725 | (function_declaration -3726 | ; there is also a comment here -3727 | ; and here -3728 | name: (identifier) @fn-name)", -3729 | ) -3730 | .unwrap(); - | -3731 | let source = "function one() { }"; -3732 | let mut parser = Parser::new(); -3733 | parser.set_language(&language).unwrap(); -3734 | let tree = parser.parse(source, None).unwrap(); -3735 | let mut cursor = QueryCursor::new(); -3736 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -3737 | assert_eq!( -3738 | collect_matches(matches, &query, source), -3739 | &[(0, vec![("fn-name", "one")]),], -3740 | ); -3741 | }); -3742 | } - | -3743 | #[test] -3744 | fn test_query_disable_pattern() { -3745 | allocations::record(|| { -3746 | let language = get_language("javascript"); -3747 | let mut query = Query::new( -3748 | &language, -3749 | " -3750 | (function_declaration -3751 | name: (identifier) @name) -3752 | (function_declaration -3753 | body: (statement_block) @body) -3754 | (class_declaration -3755 | name: (identifier) @name) -3756 | (class_declaration -3757 | body: (class_body) @body) -3758 | ", -3759 | ) -3760 | .unwrap(); - | -3761 | // disable the patterns that match names -3762 | query.disable_pattern(0); -3763 | query.disable_pattern(2); - | -3764 | let source = "class A { constructor() {} } function b() { return 1; }"; -3765 | let mut parser = Parser::new(); -3766 | parser.set_language(&language).unwrap(); -3767 | let tree = parser.parse(source, None).unwrap(); -3768 | let mut cursor = QueryCursor::new(); -3769 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -3770 | assert_eq!( -3771 | collect_matches(matches, &query, source), -3772 | &[ -3773 | (3, vec![("body", "{ constructor() {} }")]), -3774 | (1, vec![("body", "{ return 1; }")]), -3775 | ], -3776 | ); -3777 | }); -3778 | } - | -3779 | #[test] -3780 | fn test_query_alternative_predicate_prefix() { -3781 | allocations::record(|| { -3782 | let language = get_language("c"); -3783 | let query = Query::new( -3784 | &language, -3785 | r#" -3786 | ((call_expression -3787 | function: (identifier) @keyword -3788 | arguments: (argument_list -3789 | (string_literal) @function)) -3790 | (.eq? @keyword "DEFUN")) -3791 | "#, -3792 | ) -3793 | .unwrap(); -3794 | let source = r#" -3795 | DEFUN ("identity", Fidentity, Sidentity, 1, 1, 0, -3796 | doc: /* Return the argument unchanged. */ -3797 | attributes: const) -3798 | (Lisp_Object arg) -3799 | { -3800 | return arg; -3801 | } -3802 | "#; -3803 | assert_query_matches( -3804 | &language, -3805 | &query, -3806 | source, -3807 | &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])], -3808 | ); -3809 | }); -3810 | } - | -3811 | #[test] -3812 | fn test_query_random() { -3813 | use pretty_assertions::assert_eq; - | -3814 | allocations::record(|| { -3815 | let language = get_language("rust"); -3816 | let mut parser = Parser::new(); -3817 | parser.set_language(&language).unwrap(); -3818 | let mut cursor = QueryCursor::new(); -3819 | cursor.set_match_limit(64); - | -3820 | let pattern_tree = parser -3821 | .parse(include_str!("helpers/query_helpers.rs"), None) -3822 | .unwrap(); -3823 | let test_tree = parser -3824 | .parse(include_str!("helpers/query_helpers.rs"), None) -3825 | .unwrap(); - | -3826 | let start_seed = 0; -3827 | let end_seed = start_seed + *ITERATION_COUNT; - | -3828 | for seed in start_seed..(start_seed + end_seed) { -3829 | let seed = seed as u64; -3830 | let mut rand = StdRng::seed_from_u64(seed); -3831 | let (pattern_ast, _) = Pattern::random_pattern_in_tree(&pattern_tree, &mut rand); -3832 | let pattern = pattern_ast.to_string(); -3833 | let expected_matches = pattern_ast.matches_in_tree(&test_tree); - | -3834 | let query = Query::new(&language, &pattern).unwrap_or_else(|e| { -3835 | panic!("failed to build query for pattern {pattern}. seed: {seed}\n{e}") -3836 | }); -3837 | let mut actual_matches = Vec::new(); -3838 | let mut match_iter = cursor.matches( -3839 | &query, -3840 | test_tree.root_node(), -3841 | include_bytes!("parser_test.rs").as_ref(), -3842 | ); - | -3843 | while let Some(mat) = match_iter.next() { -3844 | let transformed_match = Match { -3845 | last_node: None, -3846 | captures: mat -3847 | .captures -3848 | .iter() -3849 | .map(|c| (query.capture_names()[c.index as usize], c.node)) -3850 | .collect::>(), -3851 | }; -3852 | actual_matches.push(transformed_match); -3853 | } - | -3854 | // actual_matches.sort_unstable(); -3855 | actual_matches.dedup(); - | -3856 | if !cursor.did_exceed_match_limit() { -3857 | assert_eq!( -3858 | actual_matches, expected_matches, -3859 | "seed: {}, pattern:\n{}", -3860 | seed, pattern -3861 | ); -3862 | } -3863 | } -3864 | }); -3865 | } - | -3866 | #[test] -3867 | fn test_query_is_pattern_guaranteed_at_step() { -3868 | struct Row { -3869 | language: Language, -3870 | description: &'static str, -3871 | pattern: &'static str, -3872 | results_by_substring: &'static [(&'static str, bool)], -3873 | } - | -3874 | let rows = &[ -3875 | Row { -3876 | description: "no guaranteed steps", -3877 | language: get_language("python"), -3878 | pattern: r"(expression_statement (string))", -3879 | results_by_substring: &[("expression_statement", false), ("string", false)], -3880 | }, -3881 | Row { -3882 | description: "all guaranteed steps", -3883 | language: get_language("javascript"), -3884 | pattern: r#"(object "{" "}")"#, -3885 | results_by_substring: &[("object", false), ("{", true), ("}", true)], -3886 | }, -3887 | Row { -3888 | description: "a fallible step that is optional", -3889 | language: get_language("javascript"), -3890 | pattern: r#"(object "{" (identifier)? @foo "}")"#, -3891 | results_by_substring: &[ -3892 | ("object", false), -3893 | ("{", true), -3894 | ("(identifier)?", false), -3895 | ("}", true), -3896 | ], -3897 | }, -3898 | Row { -3899 | description: "multiple fallible steps that are optional", -3900 | language: get_language("javascript"), -3901 | pattern: r#"(object "{" (identifier)? @id1 ("," (identifier) @id2)? "}")"#, -3902 | results_by_substring: &[ -3903 | ("object", false), -3904 | ("{", true), -3905 | ("(identifier)? @id1", false), -3906 | ("\",\"", false), -3907 | ("}", true), -3908 | ], -3909 | }, -3910 | Row { -3911 | description: "guaranteed step after fallibe step", -3912 | language: get_language("javascript"), -3913 | pattern: r#"(pair (property_identifier) ":")"#, -3914 | results_by_substring: &[("pair", false), ("property_identifier", false), (":", true)], -3915 | }, -3916 | Row { -3917 | description: "fallible step in between two guaranteed steps", -3918 | language: get_language("javascript"), -3919 | pattern: r#"(ternary_expression -3920 | condition: (_) -3921 | "?" -3922 | consequence: (call_expression) -3923 | ":" -3924 | alternative: (_))"#, -3925 | results_by_substring: &[ -3926 | ("condition:", false), -3927 | ("\"?\"", false), -3928 | ("consequence:", false), -3929 | ("\":\"", true), -3930 | ("alternative:", true), -3931 | ], -3932 | }, -3933 | Row { -3934 | description: "one guaranteed step after a repetition", -3935 | language: get_language("javascript"), -3936 | pattern: r#"(object "{" (_) "}")"#, -3937 | results_by_substring: &[("object", false), ("{", false), ("(_)", false), ("}", true)], -3938 | }, -3939 | Row { -3940 | description: "guaranteed steps after multiple repetitions", -3941 | language: get_language("json"), -3942 | pattern: r#"(object "{" (pair) "," (pair) "," (_) "}")"#, -3943 | results_by_substring: &[ -3944 | ("object", false), -3945 | ("{", false), -3946 | ("(pair) \",\" (pair)", false), -3947 | ("(pair) \",\" (_)", false), -3948 | ("\",\" (_)", false), -3949 | ("(_)", true), -3950 | ("}", true), -3951 | ], -3952 | }, -3953 | Row { -3954 | description: "a guaranteed step with a field", -3955 | language: get_language("javascript"), -3956 | pattern: r"(binary_expression left: (expression) right: (_))", -3957 | results_by_substring: &[ -3958 | ("binary_expression", false), -3959 | ("(expression)", false), -3960 | ("(_)", true), -3961 | ], -3962 | }, -3963 | Row { -3964 | description: "multiple guaranteed steps with fields", -3965 | language: get_language("javascript"), -3966 | pattern: r"(function_declaration name: (identifier) body: (statement_block))", -3967 | results_by_substring: &[ -3968 | ("function_declaration", false), -3969 | ("identifier", true), -3970 | ("statement_block", true), -3971 | ], -3972 | }, -3973 | Row { -3974 | description: "nesting, one guaranteed step", -3975 | language: get_language("javascript"), -3976 | pattern: r#" -3977 | (function_declaration -3978 | name: (identifier) -3979 | body: (statement_block "{" (expression_statement) "}"))"#, -3980 | results_by_substring: &[ -3981 | ("function_declaration", false), -3982 | ("identifier", false), -3983 | ("statement_block", false), -3984 | ("{", false), -3985 | ("expression_statement", false), -3986 | ("}", true), -3987 | ], -3988 | }, -3989 | Row { -3990 | description: "a guaranteed step after some deeply nested hidden nodes", -3991 | language: get_language("ruby"), -3992 | pattern: r#" -3993 | (singleton_class -3994 | value: (constant) -3995 | "end") -3996 | "#, -3997 | results_by_substring: &[ -3998 | ("singleton_class", false), -3999 | ("constant", false), -4000 | ("end", true), -4001 | ], -4002 | }, -4003 | Row { -4004 | description: "nesting, no guaranteed steps", -4005 | language: get_language("javascript"), -4006 | pattern: r" -4007 | (call_expression -4008 | function: (member_expression -4009 | property: (property_identifier) @template-tag) -4010 | arguments: (template_string)) @template-call -4011 | ", -4012 | results_by_substring: &[("property_identifier", false), ("template_string", false)], -4013 | }, -4014 | Row { -4015 | description: "a guaranteed step after a nested node", -4016 | language: get_language("javascript"), -4017 | pattern: r#" -4018 | (subscript_expression -4019 | object: (member_expression -4020 | object: (identifier) @obj -4021 | property: (property_identifier) @prop) -4022 | "[") -4023 | "#, -4024 | results_by_substring: &[ -4025 | ("identifier", false), -4026 | ("property_identifier", false), -4027 | ("[", true), -4028 | ], -4029 | }, -4030 | Row { -4031 | description: "a step that is fallible due to a predicate", -4032 | language: get_language("javascript"), -4033 | pattern: r#" -4034 | (subscript_expression -4035 | object: (member_expression -4036 | object: (identifier) @obj -4037 | property: (property_identifier) @prop) -4038 | "[" -4039 | (#match? @prop "foo")) -4040 | "#, -4041 | results_by_substring: &[ -4042 | ("identifier", false), -4043 | ("property_identifier", false), -4044 | ("[", true), -4045 | ], -4046 | }, -4047 | Row { -4048 | description: "alternation where one branch has guaranteed steps", -4049 | language: get_language("javascript"), -4050 | pattern: r" -4051 | [ -4052 | (unary_expression (identifier)) -4053 | (call_expression -4054 | function: (_) -4055 | arguments: (_)) -4056 | (binary_expression right: (call_expression)) -4057 | ] -4058 | ", -4059 | results_by_substring: &[ -4060 | ("identifier", false), -4061 | ("right:", false), -4062 | ("function:", true), -4063 | ("arguments:", true), -4064 | ], -4065 | }, -4066 | Row { -4067 | description: "guaranteed step at the end of an aliased parent node", -4068 | language: get_language("ruby"), -4069 | pattern: r#" -4070 | (method_parameters "(" (identifier) @id")") -4071 | "#, -4072 | results_by_substring: &[("\"(\"", false), ("(identifier)", false), ("\")\"", true)], -4073 | }, -4074 | Row { -4075 | description: "long, but not too long to analyze", -4076 | language: get_language("javascript"), -4077 | pattern: r#" -4078 | (object "{" (pair) (pair) (pair) (pair) "}") -4079 | "#, -4080 | results_by_substring: &[ -4081 | ("\"{\"", false), -4082 | ("(pair)", false), -4083 | ("(pair) \"}\"", false), -4084 | ("\"}\"", true), -4085 | ], -4086 | }, -4087 | Row { -4088 | description: "too long to analyze", -4089 | language: get_language("javascript"), -4090 | pattern: r#" -4091 | (object "{" (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) (pair) "}") -4092 | "#, -4093 | results_by_substring: &[ -4094 | ("\"{\"", false), -4095 | ("(pair)", false), -4096 | ("(pair) \"}\"", false), -4097 | ("\"}\"", false), -4098 | ], -4099 | }, -4100 | Row { -4101 | description: "hidden nodes that have several fields", -4102 | language: get_language("java"), -4103 | pattern: r" -4104 | (method_declaration name: (identifier)) -4105 | ", -4106 | results_by_substring: &[("name:", true)], -4107 | }, -4108 | Row { -4109 | description: "top-level non-terminal extra nodes", -4110 | language: get_language("ruby"), -4111 | pattern: r" -4112 | (heredoc_body -4113 | (interpolation) -4114 | (heredoc_end) @end) -4115 | ", -4116 | results_by_substring: &[ -4117 | ("(heredoc_body", false), -4118 | ("(interpolation)", false), -4119 | ("(heredoc_end)", true), -4120 | ], -4121 | }, -4122 | // TODO: figure out why line comments, an extra, are no longer allowed *anywhere* -4123 | // likely culprits are the fact that it's no longer a token itself or that it uses an -4124 | // external token -4125 | // Row { -4126 | // description: "multiple extra nodes", -4127 | // language: get_language("rust"), -4128 | // pattern: r" -4129 | // (call_expression -4130 | // (line_comment) @a -4131 | // (line_comment) @b -4132 | // (arguments)) -4133 | // ", -4134 | // results_by_substring: &[ -4135 | // ("(line_comment) @a", false), -4136 | // ("(line_comment) @b", false), -4137 | // ("(arguments)", true), -4138 | // ], -4139 | // }, -4140 | ]; - | -4141 | allocations::record(|| { -4142 | eprintln!(); - | -4143 | for row in rows { -4144 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { -4145 | if !row.description.contains(filter.as_str()) { -4146 | continue; -4147 | } -4148 | } -4149 | eprintln!(" query example: {:?}", row.description); -4150 | let query = Query::new(&row.language, row.pattern).unwrap(); -4151 | for (substring, is_definite) in row.results_by_substring { -4152 | let offset = row.pattern.find(substring).unwrap(); -4153 | assert_eq!( -4154 | query.is_pattern_guaranteed_at_step(offset), -4155 | *is_definite, -4156 | "Description: {}, Pattern: {:?}, substring: {:?}, expected is_definite to be {}", -4157 | row.description, -4158 | row.pattern -4159 | .split_ascii_whitespace() -4160 | .collect::>() -4161 | .join(" "), -4162 | substring, -4163 | is_definite, -4164 | ); -4165 | } -4166 | } -4167 | }); -4168 | } - | -4169 | #[test] -4170 | fn test_query_is_pattern_rooted() { -4171 | struct Row { -4172 | description: &'static str, -4173 | pattern: &'static str, -4174 | is_rooted: bool, -4175 | } - | -4176 | let rows = [ -4177 | Row { -4178 | description: "simple token", -4179 | pattern: r"(identifier)", -4180 | is_rooted: true, -4181 | }, -4182 | Row { -4183 | description: "simple non-terminal", -4184 | pattern: r"(function_definition name: (identifier))", -4185 | is_rooted: true, -4186 | }, -4187 | Row { -4188 | description: "alternative of many tokens", -4189 | pattern: r#"["if" "def" (identifier) (comment)]"#, -4190 | is_rooted: true, -4191 | }, -4192 | Row { -4193 | description: "alternative of many non-terminals", -4194 | pattern: r"[ -4195 | (function_definition name: (identifier)) -4196 | (class_definition name: (identifier)) -4197 | (block) -4198 | ]", -4199 | is_rooted: true, -4200 | }, -4201 | Row { -4202 | description: "two siblings", -4203 | pattern: r#"("{" "}")"#, -4204 | is_rooted: false, -4205 | }, -4206 | Row { -4207 | description: "top-level repetition", -4208 | pattern: r"(comment)*", -4209 | is_rooted: false, -4210 | }, -4211 | Row { -4212 | description: "alternative where one option has two siblings", -4213 | pattern: r#"[ -4214 | (block) -4215 | (class_definition) -4216 | ("(" ")") -4217 | (function_definition) -4218 | ]"#, -4219 | is_rooted: false, -4220 | }, -4221 | Row { -4222 | description: "alternative where one option has a top-level repetition", -4223 | pattern: r"[ -4224 | (block) -4225 | (class_definition) -4226 | (comment)* -4227 | (function_definition) -4228 | ]", -4229 | is_rooted: false, -4230 | }, -4231 | ]; - | -4232 | allocations::record(|| { -4233 | eprintln!(); - | -4234 | let language = get_language("python"); -4235 | for row in &rows { -4236 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { -4237 | if !row.description.contains(filter.as_str()) { -4238 | continue; -4239 | } -4240 | } -4241 | eprintln!(" query example: {:?}", row.description); -4242 | let query = Query::new(&language, row.pattern).unwrap(); -4243 | assert_eq!( -4244 | query.is_pattern_rooted(0), -4245 | row.is_rooted, -4246 | "Description: {}, Pattern: {:?}", -4247 | row.description, -4248 | row.pattern -4249 | .split_ascii_whitespace() -4250 | .collect::>() -4251 | .join(" "), -4252 | ); -4253 | } -4254 | }); -4255 | } - | -4256 | #[test] -4257 | fn test_query_is_pattern_non_local() { -4258 | struct Row { -4259 | description: &'static str, -4260 | pattern: &'static str, -4261 | language: Language, -4262 | is_non_local: bool, -4263 | } - | -4264 | let rows = [ -4265 | Row { -4266 | description: "simple token", -4267 | pattern: r"(identifier)", -4268 | language: get_language("python"), -4269 | is_non_local: false, -4270 | }, -4271 | Row { -4272 | description: "siblings that can occur in an argument list", -4273 | pattern: r"((identifier) (identifier))", -4274 | language: get_language("python"), -4275 | is_non_local: true, -4276 | }, -4277 | Row { -4278 | description: "siblings that can occur in a statement block", -4279 | pattern: r"((return_statement) (return_statement))", -4280 | language: get_language("python"), -4281 | is_non_local: true, -4282 | }, -4283 | Row { -4284 | description: "siblings that can occur in a source file", -4285 | pattern: r"((function_definition) (class_definition))", -4286 | language: get_language("python"), -4287 | is_non_local: true, -4288 | }, -4289 | Row { -4290 | description: "siblings that can't occur in any repetition", -4291 | pattern: r#"("{" "}")"#, -4292 | language: get_language("python"), -4293 | is_non_local: false, -4294 | }, -4295 | Row { -4296 | description: "siblings that can't occur in any repetition, wildcard root", -4297 | pattern: r#"(_ "{" "}") @foo"#, -4298 | language: get_language("javascript"), -4299 | is_non_local: false, -4300 | }, -4301 | Row { -4302 | description: "siblings that can occur in a class body, wildcard root", -4303 | pattern: r"(_ (method_definition) (method_definition)) @foo", -4304 | language: get_language("javascript"), -4305 | is_non_local: true, -4306 | }, -4307 | Row { -4308 | description: "top-level repetitions that can occur in a class body", -4309 | pattern: r"(method_definition)+ @foo", -4310 | language: get_language("javascript"), -4311 | is_non_local: true, -4312 | }, -4313 | Row { -4314 | description: "top-level repetitions that can occur in a statement block", -4315 | pattern: r"(return_statement)+ @foo", -4316 | language: get_language("javascript"), -4317 | is_non_local: true, -4318 | }, -4319 | Row { -4320 | description: "rooted pattern that can occur in a statement block", -4321 | pattern: r"(return_statement) @foo", -4322 | language: get_language("javascript"), -4323 | is_non_local: false, -4324 | }, -4325 | ]; - | -4326 | allocations::record(|| { -4327 | eprintln!(); - | -4328 | for row in &rows { -4329 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { -4330 | if !row.description.contains(filter.as_str()) { -4331 | continue; -4332 | } -4333 | } -4334 | eprintln!(" query example: {:?}", row.description); -4335 | let query = Query::new(&row.language, row.pattern).unwrap(); -4336 | assert_eq!( -4337 | query.is_pattern_non_local(0), -4338 | row.is_non_local, -4339 | "Description: {}, Pattern: {:?}", -4340 | row.description, -4341 | row.pattern -4342 | .split_ascii_whitespace() -4343 | .collect::>() -4344 | .join(" "), -4345 | ); -4346 | } -4347 | }); -4348 | } - | -4349 | #[test] -4350 | fn test_capture_quantifiers() { -4351 | struct Row { -4352 | description: &'static str, -4353 | language: Language, -4354 | pattern: &'static str, -4355 | capture_quantifiers: &'static [(usize, &'static str, CaptureQuantifier)], -4356 | } - | -4357 | let rows = &[ -4358 | // Simple quantifiers -4359 | Row { -4360 | description: "Top level capture", -4361 | language: get_language("python"), -4362 | pattern: r" -4363 | (module) @mod -4364 | ", -4365 | capture_quantifiers: &[(0, "mod", CaptureQuantifier::One)], -4366 | }, -4367 | Row { -4368 | description: "Nested list capture capture", -4369 | language: get_language("javascript"), -4370 | pattern: r" -4371 | (array (_)* @elems) @array -4372 | ", -4373 | capture_quantifiers: &[ -4374 | (0, "array", CaptureQuantifier::One), -4375 | (0, "elems", CaptureQuantifier::ZeroOrMore), -4376 | ], -4377 | }, -4378 | Row { -4379 | description: "Nested non-empty list capture capture", -4380 | language: get_language("javascript"), -4381 | pattern: r" -4382 | (array (_)+ @elems) @array -4383 | ", -4384 | capture_quantifiers: &[ -4385 | (0, "array", CaptureQuantifier::One), -4386 | (0, "elems", CaptureQuantifier::OneOrMore), -4387 | ], -4388 | }, -4389 | // Nested quantifiers -4390 | Row { -4391 | description: "capture nested in optional pattern", -4392 | language: get_language("javascript"), -4393 | pattern: r" -4394 | (array (call_expression (arguments (_) @arg))? @call) @array -4395 | ", -4396 | capture_quantifiers: &[ -4397 | (0, "array", CaptureQuantifier::One), -4398 | (0, "call", CaptureQuantifier::ZeroOrOne), -4399 | (0, "arg", CaptureQuantifier::ZeroOrOne), -4400 | ], -4401 | }, -4402 | Row { -4403 | description: "optional capture nested in non-empty list pattern", -4404 | language: get_language("javascript"), -4405 | pattern: r" -4406 | (array (call_expression (arguments (_)? @arg))+ @call) @array -4407 | ", -4408 | capture_quantifiers: &[ -4409 | (0, "array", CaptureQuantifier::One), -4410 | (0, "call", CaptureQuantifier::OneOrMore), -4411 | (0, "arg", CaptureQuantifier::ZeroOrMore), -4412 | ], -4413 | }, -4414 | Row { -4415 | description: "non-empty list capture nested in optional pattern", -4416 | language: get_language("javascript"), -4417 | pattern: r" -4418 | (array (call_expression (arguments (_)+ @args))? @call) @array -4419 | ", -4420 | capture_quantifiers: &[ -4421 | (0, "array", CaptureQuantifier::One), -4422 | (0, "call", CaptureQuantifier::ZeroOrOne), -4423 | (0, "args", CaptureQuantifier::ZeroOrMore), -4424 | ], -4425 | }, -4426 | // Quantifiers in alternations -4427 | Row { -4428 | description: "capture is the same in all alternatives", -4429 | language: get_language("javascript"), -4430 | pattern: r"[ -4431 | (function_declaration name:(identifier) @name) -4432 | (call_expression function:(identifier) @name) -4433 | ]", -4434 | capture_quantifiers: &[(0, "name", CaptureQuantifier::One)], -4435 | }, -4436 | Row { -4437 | description: "capture appears in some alternatives", -4438 | language: get_language("javascript"), -4439 | pattern: r"[ -4440 | (function_declaration name:(identifier) @name) -4441 | (function_expression) -4442 | ] @fun", -4443 | capture_quantifiers: &[ -4444 | (0, "fun", CaptureQuantifier::One), -4445 | (0, "name", CaptureQuantifier::ZeroOrOne), -4446 | ], -4447 | }, -4448 | Row { -4449 | description: "capture has different quantifiers in alternatives", -4450 | language: get_language("javascript"), -4451 | pattern: r"[ -4452 | (call_expression arguments: (arguments (_)+ @args)) -4453 | (new_expression arguments: (arguments (_)? @args)) -4454 | ] @call", -4455 | capture_quantifiers: &[ -4456 | (0, "call", CaptureQuantifier::One), -4457 | (0, "args", CaptureQuantifier::ZeroOrMore), -4458 | ], -4459 | }, -4460 | // Quantifiers in siblings -4461 | Row { -4462 | description: "siblings have different captures with different quantifiers", -4463 | language: get_language("javascript"), -4464 | pattern: r" -4465 | (call_expression (arguments (identifier)? @self (_)* @args)) @call -4466 | ", -4467 | capture_quantifiers: &[ -4468 | (0, "call", CaptureQuantifier::One), -4469 | (0, "self", CaptureQuantifier::ZeroOrOne), -4470 | (0, "args", CaptureQuantifier::ZeroOrMore), -4471 | ], -4472 | }, -4473 | Row { -4474 | description: "siblings have same capture with different quantifiers", -4475 | language: get_language("javascript"), -4476 | pattern: r" -4477 | (call_expression (arguments (identifier) @args (_)* @args)) @call -4478 | ", -4479 | capture_quantifiers: &[ -4480 | (0, "call", CaptureQuantifier::One), -4481 | (0, "args", CaptureQuantifier::OneOrMore), -4482 | ], -4483 | }, -4484 | // Combined scenarios -4485 | Row { -4486 | description: "combined nesting, alternatives, and siblings", -4487 | language: get_language("javascript"), -4488 | pattern: r" -4489 | (array -4490 | (call_expression -4491 | (arguments [ -4492 | (identifier) @self -4493 | (_)+ @args -4494 | ]) -4495 | )+ @call -4496 | ) @array -4497 | ", -4498 | capture_quantifiers: &[ -4499 | (0, "array", CaptureQuantifier::One), -4500 | (0, "call", CaptureQuantifier::OneOrMore), -4501 | (0, "self", CaptureQuantifier::ZeroOrMore), -4502 | (0, "args", CaptureQuantifier::ZeroOrMore), -4503 | ], -4504 | }, -4505 | // Multiple patterns -4506 | Row { -4507 | description: "multiple patterns", -4508 | language: get_language("javascript"), -4509 | pattern: r" -4510 | (function_declaration name: (identifier) @x) -4511 | (statement_identifier) @y -4512 | (property_identifier)+ @z -4513 | (array (identifier)* @x) -4514 | ", -4515 | capture_quantifiers: &[ -4516 | // x -4517 | (0, "x", CaptureQuantifier::One), -4518 | (1, "x", CaptureQuantifier::Zero), -4519 | (2, "x", CaptureQuantifier::Zero), -4520 | (3, "x", CaptureQuantifier::ZeroOrMore), -4521 | // y -4522 | (0, "y", CaptureQuantifier::Zero), -4523 | (1, "y", CaptureQuantifier::One), -4524 | (2, "y", CaptureQuantifier::Zero), -4525 | (3, "y", CaptureQuantifier::Zero), -4526 | // z -4527 | (0, "z", CaptureQuantifier::Zero), -4528 | (1, "z", CaptureQuantifier::Zero), -4529 | (2, "z", CaptureQuantifier::OneOrMore), -4530 | (3, "z", CaptureQuantifier::Zero), -4531 | ], -4532 | }, -4533 | Row { -4534 | description: "multiple alternatives", -4535 | language: get_language("javascript"), -4536 | pattern: r" -4537 | [ -4538 | (array (identifier) @x) -4539 | (function_declaration name: (identifier)+ @x) -4540 | ] -4541 | [ -4542 | (array (identifier) @x) -4543 | (function_declaration name: (identifier)+ @x) -4544 | ] -4545 | ", -4546 | capture_quantifiers: &[ -4547 | (0, "x", CaptureQuantifier::OneOrMore), -4548 | (1, "x", CaptureQuantifier::OneOrMore), -4549 | ], -4550 | }, -4551 | ]; - | -4552 | allocations::record(|| { -4553 | eprintln!(); - | -4554 | for row in rows { -4555 | if let Some(filter) = EXAMPLE_FILTER.as_ref() { -4556 | if !row.description.contains(filter.as_str()) { -4557 | continue; -4558 | } -4559 | } -4560 | eprintln!(" query example: {:?}", row.description); -4561 | let query = Query::new(&row.language, row.pattern).unwrap(); -4562 | for (pattern, capture, expected_quantifier) in row.capture_quantifiers { -4563 | let index = query.capture_index_for_name(capture).unwrap(); -4564 | let actual_quantifier = query.capture_quantifiers(*pattern)[index as usize]; -4565 | assert_eq!( -4566 | actual_quantifier, -4567 | *expected_quantifier, -4568 | "Description: {}, Pattern: {:?}, expected quantifier of @{} to be {:?} instead of {:?}", -4569 | row.description, -4570 | row.pattern -4571 | .split_ascii_whitespace() -4572 | .collect::>() -4573 | .join(" "), -4574 | capture, -4575 | *expected_quantifier, -4576 | actual_quantifier, -4577 | ); -4578 | } -4579 | } -4580 | }); -4581 | } - | -4582 | #[test] -4583 | fn test_query_quantified_captures() { -4584 | struct Row { -4585 | description: &'static str, -4586 | language: Language, -4587 | code: &'static str, -4588 | pattern: &'static str, -4589 | captures: &'static [(&'static str, &'static str)], -4590 | } - | -4591 | // #[rustfmt::skip] -4592 | let rows = &[ -4593 | Row { -4594 | description: "doc comments where all must match the prefix", -4595 | language: get_language("c"), -4596 | code: indoc! {" -4597 | /// foo -4598 | /// bar -4599 | /// baz - | -4600 | void main() {} - | -4601 | /// qux -4602 | /// quux -4603 | // quuz -4604 | "}, -4605 | pattern: r#" -4606 | ((comment)+ @comment.documentation -4607 | (#match? @comment.documentation "^///")) -4608 | "#, -4609 | captures: &[ -4610 | ("comment.documentation", "/// foo"), -4611 | ("comment.documentation", "/// bar"), -4612 | ("comment.documentation", "/// baz"), -4613 | ], -4614 | }, -4615 | Row { -4616 | description: "doc comments where one must match the prefix", -4617 | language: get_language("c"), -4618 | code: indoc! {" -4619 | /// foo -4620 | /// bar -4621 | /// baz - | -4622 | void main() {} - | -4623 | /// qux -4624 | /// quux -4625 | // quuz -4626 | "}, -4627 | pattern: r#" -4628 | ((comment)+ @comment.documentation -4629 | (#any-match? @comment.documentation "^///")) -4630 | "#, -4631 | captures: &[ -4632 | ("comment.documentation", "/// foo"), -4633 | ("comment.documentation", "/// bar"), -4634 | ("comment.documentation", "/// baz"), -4635 | ("comment.documentation", "/// qux"), -4636 | ("comment.documentation", "/// quux"), -4637 | ("comment.documentation", "// quuz"), -4638 | ], -4639 | }, -4640 | ]; - | -4641 | allocations::record(|| { -4642 | for row in rows { -4643 | eprintln!(" quantified query example: {:?}", row.description); - | -4644 | let mut parser = Parser::new(); -4645 | parser.set_language(&row.language).unwrap(); -4646 | let tree = parser.parse(row.code, None).unwrap(); - | -4647 | let query = Query::new(&row.language, row.pattern).unwrap(); - | -4648 | let mut cursor = QueryCursor::new(); -4649 | let matches = cursor.captures(&query, tree.root_node(), row.code.as_bytes()); - | -4650 | assert_eq!(collect_captures(matches, &query, row.code), row.captures); -4651 | } -4652 | }); -4653 | } - | -4654 | #[test] -4655 | fn test_query_max_start_depth() { -4656 | struct Row { -4657 | description: &'static str, -4658 | pattern: &'static str, -4659 | depth: u32, -4660 | matches: &'static [(usize, &'static [(&'static str, &'static str)])], -4661 | } - | -4662 | let source = indoc! {" -4663 | if (a1 && a2) { -4664 | if (b1 && b2) { } -4665 | if (c) { } -4666 | } -4667 | if (d) { -4668 | if (e1 && e2) { } -4669 | if (f) { } -4670 | } -4671 | "}; - | -4672 | #[rustfmt::skip] -4673 | let rows = &[ -4674 | Row { -4675 | description: "depth 0: match translation unit", -4676 | depth: 0, -4677 | pattern: r" -4678 | (translation_unit) @capture -4679 | ", -4680 | matches: &[ -4681 | (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}\nif (d) {\n if (e1 && e2) { }\n if (f) { }\n}\n")]), -4682 | ] -4683 | }, -4684 | Row { -4685 | description: "depth 0: match none", -4686 | depth: 0, -4687 | pattern: r" -4688 | (if_statement) @capture -4689 | ", -4690 | matches: &[] -4691 | }, -4692 | Row { -4693 | description: "depth 1: match 2 if statements at the top level", -4694 | depth: 1, -4695 | pattern: r" -4696 | (if_statement) @capture -4697 | ", -4698 | matches : &[ -4699 | (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), -4700 | (0, &[("capture", "if (d) {\n if (e1 && e2) { }\n if (f) { }\n}")]), -4701 | ] -4702 | }, -4703 | Row { -4704 | description: "depth 1 with deep pattern: match the only the first if statement", -4705 | depth: 1, -4706 | pattern: r" -4707 | (if_statement -4708 | condition: (parenthesized_expression -4709 | (binary_expression) -4710 | ) -4711 | ) @capture -4712 | ", -4713 | matches: &[ -4714 | (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), -4715 | ] -4716 | }, -4717 | Row { -4718 | description: "depth 3 with deep pattern: match all if statements with a binexpr condition", -4719 | depth: 3, -4720 | pattern: r" -4721 | (if_statement -4722 | condition: (parenthesized_expression -4723 | (binary_expression) -4724 | ) -4725 | ) @capture -4726 | ", -4727 | matches: &[ -4728 | (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), -4729 | (0, &[("capture", "if (b1 && b2) { }")]), -4730 | (0, &[("capture", "if (e1 && e2) { }")]), -4731 | ] -4732 | }, -4733 | ]; - | -4734 | allocations::record(|| { -4735 | let language = get_language("c"); -4736 | let mut parser = Parser::new(); -4737 | parser.set_language(&language).unwrap(); -4738 | let tree = parser.parse(source, None).unwrap(); -4739 | let mut cursor = QueryCursor::new(); - | -4740 | for row in rows { -4741 | eprintln!(" query example: {:?}", row.description); - | -4742 | let query = Query::new(&language, row.pattern).unwrap(); -4743 | cursor.set_max_start_depth(Some(row.depth)); - | -4744 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -4745 | let expected = row -4746 | .matches -4747 | .iter() -4748 | .map(|x| (x.0, x.1.to_vec())) -4749 | .collect::>(); - | -4750 | assert_eq!(collect_matches(matches, &query, source), expected); -4751 | } -4752 | }); -4753 | } - | -4754 | #[test] -4755 | fn test_query_error_does_not_oob() { -4756 | let language = get_language("javascript"); - | -4757 | assert_eq!( -4758 | Query::new(&language, "(clas").unwrap_err(), -4759 | QueryError { -4760 | row: 0, -4761 | offset: 1, -4762 | column: 1, -4763 | kind: QueryErrorKind::NodeType, -4764 | message: "\"clas\"".to_string() -4765 | } -4766 | ); -4767 | } - | -4768 | #[test] -4769 | fn test_consecutive_zero_or_modifiers() { -4770 | let language = get_language("javascript"); -4771 | let mut parser = Parser::new(); -4772 | parser.set_language(&language).unwrap(); - | -4773 | let zero_source = ""; -4774 | let three_source = "/**/ /**/ /**/"; - | -4775 | let zero_tree = parser.parse(zero_source, None).unwrap(); -4776 | let three_tree = parser.parse(three_source, None).unwrap(); - | -4777 | let tests = [ -4778 | "(comment)*** @capture", -4779 | "(comment)??? @capture", -4780 | "(comment)*?* @capture", -4781 | "(comment)?*? @capture", -4782 | ]; - | -4783 | for test in tests { -4784 | let query = Query::new(&language, test).unwrap(); - | -4785 | let mut cursor = QueryCursor::new(); -4786 | let mut matches = cursor.matches(&query, zero_tree.root_node(), zero_source.as_bytes()); -4787 | assert!(matches.next().is_some()); - | -4788 | let mut cursor = QueryCursor::new(); -4789 | let mut matches = cursor.matches(&query, three_tree.root_node(), three_source.as_bytes()); - | -4790 | let mut len_3 = false; -4791 | let mut len_1 = false; - | -4792 | while let Some(m) = matches.next() { -4793 | if m.captures.len() == 3 { -4794 | len_3 = true; -4795 | } -4796 | if m.captures.len() == 1 { -4797 | len_1 = true; -4798 | } -4799 | } - | -4800 | assert_eq!(len_3, test.contains('*')); -4801 | assert_eq!(len_1, test.contains("???")); -4802 | } -4803 | } - | -4804 | #[test] -4805 | fn test_query_max_start_depth_more() { -4806 | struct Row { -4807 | depth: u32, -4808 | matches: &'static [(usize, &'static [(&'static str, &'static str)])], -4809 | } - | -4810 | let source = indoc! {" -4811 | { -4812 | { } -4813 | { -4814 | { } -4815 | } -4816 | } -4817 | "}; - | -4818 | #[rustfmt::skip] -4819 | let rows = &[ -4820 | Row { -4821 | depth: 0, -4822 | matches: &[ -4823 | (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]) -4824 | ] -4825 | }, -4826 | Row { -4827 | depth: 1, -4828 | matches: &[ -4829 | (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), -4830 | (0, &[("capture", "{ }")]), -4831 | (0, &[("capture", "{\n { }\n }")]) -4832 | ] -4833 | }, -4834 | Row { -4835 | depth: 2, -4836 | matches: &[ -4837 | (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), -4838 | (0, &[("capture", "{ }")]), -4839 | (0, &[("capture", "{\n { }\n }")]), -4840 | (0, &[("capture", "{ }")]), -4841 | ] -4842 | }, -4843 | ]; - | -4844 | allocations::record(|| { -4845 | let language = get_language("c"); -4846 | let mut parser = Parser::new(); -4847 | parser.set_language(&language).unwrap(); -4848 | let tree = parser.parse(source, None).unwrap(); -4849 | let mut cursor = QueryCursor::new(); -4850 | let query = Query::new(&language, "(compound_statement) @capture").unwrap(); - | -4851 | let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -4852 | let node = matches.next().unwrap().captures[0].node; -4853 | assert_eq!(node.kind(), "compound_statement"); - | -4854 | for row in rows { -4855 | eprintln!(" depth: {}", row.depth); - | -4856 | cursor.set_max_start_depth(Some(row.depth)); - | -4857 | let matches = cursor.matches(&query, node, source.as_bytes()); -4858 | let expected = row -4859 | .matches -4860 | .iter() -4861 | .map(|x| (x.0, x.1.to_vec())) -4862 | .collect::>(); - | -4863 | assert_eq!(collect_matches(matches, &query, source), expected); -4864 | } -4865 | }); -4866 | } - | -4867 | #[test] -4868 | fn test_grammar_with_aliased_literal_query() { -4869 | // module.exports = grammar({ -4870 | // name: 'test', -4871 | // -4872 | // rules: { -4873 | // source: $ => repeat(choice($.compound_statement, $.expansion)), -4874 | // -4875 | // compound_statement: $ => seq(alias(token(prec(-1, '}')), '}')), -4876 | // -4877 | // expansion: $ => seq('}'), -4878 | // }, -4879 | // }); -4880 | let (parser_name, parser_code) = generate_parser( -4881 | r#" -4882 | { -4883 | "name": "test", -4884 | "rules": { -4885 | "source": { -4886 | "type": "REPEAT", -4887 | "content": { -4888 | "type": "CHOICE", -4889 | "members": [ -4890 | { -4891 | "type": "SYMBOL", -4892 | "name": "compound_statement" -4893 | }, -4894 | { -4895 | "type": "SYMBOL", -4896 | "name": "expansion" -4897 | } -4898 | ] -4899 | } -4900 | }, -4901 | "compound_statement": { -4902 | "type": "SEQ", -4903 | "members": [ -4904 | { -4905 | "type": "ALIAS", -4906 | "content": { -4907 | "type": "TOKEN", -4908 | "content": { -4909 | "type": "PREC", -4910 | "value": -1, -4911 | "content": { -4912 | "type": "STRING", -4913 | "value": "}" -4914 | } -4915 | } -4916 | }, -4917 | "named": false, -4918 | "value": "}" -4919 | } -4920 | ] -4921 | }, -4922 | "expansion": { -4923 | "type": "SEQ", -4924 | "members": [ -4925 | { -4926 | "type": "STRING", -4927 | "value": "}" -4928 | } -4929 | ] -4930 | } -4931 | } -4932 | } -4933 | "#, -4934 | ) -4935 | .unwrap(); - | -4936 | let language = get_test_language(&parser_name, &parser_code, None); - | -4937 | let query = Query::new( -4938 | &language, -4939 | r#" -4940 | (compound_statement "}" @bracket1) -4941 | (expansion "}" @bracket2) -4942 | "#, -4943 | ); - | -4944 | assert!(query.is_ok()); -4945 | } - | -4946 | #[test] -4947 | fn test_query_with_first_child_in_group_is_anchor() { -4948 | let language = get_language("c"); -4949 | let source_code = r"void fun(int a, char b, int c) { };"; -4950 | let query = r#" -4951 | (parameter_list -4952 | . -4953 | ((parameter_declaration) @constant -4954 | (#match? @constant "^int")))"#; -4955 | let query = Query::new(&language, query).unwrap(); -4956 | assert_query_matches( -4957 | &language, -4958 | &query, -4959 | source_code, -4960 | &[(0, vec![("constant", "int a")])], -4961 | ); -4962 | } - | -4963 | // This test needs be executed with UBSAN enabled to check for regressions: -4964 | // ``` -4965 | // UBSAN_OPTIONS="halt_on_error=1" \ -4966 | // CFLAGS="-fsanitize=undefined" \ -4967 | // RUSTFLAGS="-lubsan" \ -4968 | // cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1 -4969 | // ``` -4970 | #[test] -4971 | fn test_query_compiler_oob_access() { -4972 | let language = get_language("java"); -4973 | // UBSAN should not report any OOB access -4974 | assert!(Query::new(&language, "(package_declaration _ (_) @name _)").is_ok()); -4975 | } - | -4976 | #[test] -4977 | fn test_query_wildcard_with_immediate_first_child() { -4978 | let language = get_language("javascript"); -4979 | let query = Query::new(&language, "(_ . (identifier) @firstChild)").unwrap(); -4980 | let source = "function name(one, two, three) { }"; - | -4981 | assert_query_matches( -4982 | &language, -4983 | &query, -4984 | source, -4985 | &[ -4986 | (0, vec![("firstChild", "name")]), -4987 | (0, vec![("firstChild", "one")]), -4988 | ], -4989 | ); -4990 | } - | -4991 | #[test] -4992 | fn test_query_on_empty_source_code() { -4993 | let language = get_language("javascript"); -4994 | let source_code = ""; -4995 | let query = "(program) @program"; -4996 | let query = Query::new(&language, query).unwrap(); -4997 | assert_query_matches( -4998 | &language, -4999 | &query, -5000 | source_code, -5001 | &[(0, vec![("program", "")])], -5002 | ); -5003 | } - | -5004 | #[test] -5005 | fn test_query_execution_with_timeout() { -5006 | let language = get_language("javascript"); -5007 | let mut parser = Parser::new(); -5008 | parser.set_language(&language).unwrap(); - | -5009 | let source_code = "function foo() { while (true) { } }\n".repeat(1000); -5010 | let tree = parser.parse(&source_code, None).unwrap(); - | -5011 | let query = Query::new(&language, "(function_declaration) @function").unwrap(); -5012 | let mut cursor = QueryCursor::new(); - | -5013 | let start_time = std::time::Instant::now(); -5014 | let matches = cursor -5015 | .matches_with_options( -5016 | &query, -5017 | tree.root_node(), -5018 | source_code.as_bytes(), -5019 | QueryCursorOptions::new().progress_callback(&mut |_| { -5020 | if start_time.elapsed().as_micros() > 1000 { -5021 | ControlFlow::Break(()) -5022 | } else { -5023 | ControlFlow::Continue(()) -5024 | } -5025 | }), -5026 | ) -5027 | .count(); -5028 | assert!(matches < 1000); - | -5029 | let matches = cursor -5030 | .matches(&query, tree.root_node(), source_code.as_bytes()) -5031 | .count(); -5032 | assert_eq!(matches, 1000); -5033 | } - | -5034 | #[test] -5035 | fn test_query_execution_with_points_causing_underflow() { -5036 | let language = get_language("rust"); -5037 | let mut parser = Parser::new(); -5038 | parser.set_language(&language).unwrap(); - | -5039 | #[allow(clippy::literal_string_with_formatting_args)] -5040 | let code = r#"fn main() { -5041 | println!("{:?}", foo()); -5042 | }"#; -5043 | parser -5044 | .set_included_ranges(&[Range { -5045 | start_byte: 24, -5046 | end_byte: 39, -5047 | start_point: Point::new(0, 0), // 5, 12 -5048 | end_point: Point::new(0, 0), // 5, 27 -5049 | }]) -5050 | .unwrap(); - | -5051 | let query = Query::new(&language, "(call_expression) @cap").unwrap(); -5052 | let mut cursor = QueryCursor::new(); - | -5053 | let mut tree = parser.parse(code, None).unwrap(); - | -5054 | let matches = { -5055 | let root_node = tree.root_node(); -5056 | let matches = cursor.matches(&query, root_node, code.as_bytes()); -5057 | collect_matches(matches, &query, code) -5058 | .into_iter() -5059 | .map(|(i, m)| { -5060 | ( -5061 | i, -5062 | m.into_iter() -5063 | .map(|(k, v)| (k.to_string(), v.to_string())) -5064 | .collect::>(), -5065 | ) -5066 | }) -5067 | .collect::>() -5068 | }; - | -5069 | tree.edit(&InputEdit { -5070 | start_byte: 40, -5071 | old_end_byte: 40, -5072 | new_end_byte: 41, -5073 | start_position: Point::new(1, 28), -5074 | old_end_position: Point::new(1, 28), -5075 | new_end_position: Point::new(2, 0), -5076 | }); - | -5077 | let tree2 = parser.parse(code, Some(&tree)).unwrap(); - | -5078 | let matches2 = { -5079 | let root_node = tree2.root_node(); -5080 | let matches = cursor.matches(&query, root_node, code.as_bytes()); -5081 | collect_matches(matches, &query, code) -5082 | .into_iter() -5083 | .map(|(i, m)| { -5084 | ( -5085 | i, -5086 | m.into_iter() -5087 | .map(|(k, v)| (k.to_string(), v.to_string())) -5088 | .collect::>(), -5089 | ) -5090 | }) -5091 | .collect::>() -5092 | }; - | -5093 | assert_eq!(matches, matches2); -5094 | } - | -5095 | #[test] -5096 | fn test_wildcard_behavior_before_anchor() { -5097 | let language = get_language("python"); -5098 | let mut parser = Parser::new(); -5099 | parser.set_language(&language).unwrap(); - | -5100 | let source = " -5101 | (a, b) -5102 | (c, d,) -5103 | "; - | -5104 | // In this query, we're targeting any *named* node immediately before a closing parenthesis. -5105 | let query = Query::new(&language, r#"(tuple (_) @last . ")" .) @match"#).unwrap(); -5106 | assert_query_matches( -5107 | &language, -5108 | &query, -5109 | source, -5110 | &[ -5111 | (0, vec![("match", "(a, b)"), ("last", "b")]), -5112 | (0, vec![("match", "(c, d,)"), ("last", "d")]), -5113 | ], -5114 | ); - | -5115 | // In this query, we're targeting *any* node immediately before a closing -5116 | // parenthesis. -5117 | let query = Query::new(&language, r#"(tuple _ @last . ")" .) @match"#).unwrap(); -5118 | assert_query_matches( -5119 | &language, -5120 | &query, -5121 | source, -5122 | &[ -5123 | (0, vec![("match", "(a, b)"), ("last", "b")]), -5124 | (0, vec![("match", "(c, d,)"), ("last", ",")]), -5125 | ], -5126 | ); -5127 | } - | -5128 | #[test] -5129 | fn test_pattern_alternatives_follow_last_child_constraint() { -5130 | let language = get_language("rust"); -5131 | let mut parser = Parser::new(); -5132 | parser.set_language(&language).unwrap(); - | -5133 | let code = " -5134 | fn f() { -5135 | if a {} // <- should NOT match -5136 | if b {} -5137 | }"; - | -5138 | let tree = parser.parse(code, None).unwrap(); -5139 | let mut cursor = QueryCursor::new(); - | -5140 | let query = Query::new( -5141 | &language, -5142 | "(block -5143 | [ -5144 | (type_cast_expression) -5145 | (expression_statement) -5146 | ] @last -5147 | . -5148 | )", -5149 | ) -5150 | .unwrap(); - | -5151 | let matches = { -5152 | let root_node = tree.root_node(); -5153 | let matches = cursor.matches(&query, root_node, code.as_bytes()); -5154 | collect_matches(matches, &query, code) -5155 | .into_iter() -5156 | .map(|(i, m)| { -5157 | ( -5158 | i, -5159 | m.into_iter() -5160 | .map(|(k, v)| (k.to_string(), v.to_string())) -5161 | .collect::>(), -5162 | ) -5163 | }) -5164 | .collect::>() -5165 | }; - | -5166 | let flipped_query = Query::new( -5167 | &language, -5168 | "(block -5169 | [ -5170 | (expression_statement) -5171 | (type_cast_expression) -5172 | ] @last -5173 | . -5174 | )", -5175 | ) -5176 | .unwrap(); - | -5177 | let flipped_matches = { -5178 | let root_node = tree.root_node(); -5179 | let matches = cursor.matches(&flipped_query, root_node, code.as_bytes()); -5180 | collect_matches(matches, &flipped_query, code) -5181 | .into_iter() -5182 | .map(|(i, m)| { -5183 | ( -5184 | i, -5185 | m.into_iter() -5186 | .map(|(k, v)| (k.to_string(), v.to_string())) -5187 | .collect::>(), -5188 | ) -5189 | }) -5190 | .collect::>() -5191 | }; - | -5192 | assert_eq!( -5193 | matches, -5194 | vec![(0, vec![(String::from("last"), String::from("if b {}"))])] -5195 | ); -5196 | assert_eq!(matches, flipped_matches); -5197 | } - | -5198 | #[test] -5199 | fn test_wildcard_parent_allows_fallible_child_patterns() { -5200 | let language = get_language("javascript"); -5201 | let mut parser = Parser::new(); -5202 | parser.set_language(&language).unwrap(); - | -5203 | let source_code = r#" -5204 | function foo() { -5205 | "bar" -5206 | } -5207 | "#; - | -5208 | let query = Query::new( -5209 | &language, -5210 | "(function_declaration -5211 | (_ -5212 | (expression_statement) -5213 | ) -5214 | ) @part", -5215 | ) -5216 | .unwrap(); - | -5217 | assert_query_matches( -5218 | &language, -5219 | &query, -5220 | source_code, -5221 | &[(0, vec![("part", "function foo() {\n \"bar\"\n}")])], -5222 | ); -5223 | } - | -5224 | #[test] -5225 | fn test_unfinished_captures_are_not_definite_with_pending_anchors() { -5226 | let language = get_language("javascript"); -5227 | let mut parser = Parser::new(); -5228 | parser.set_language(&language).unwrap(); - | -5229 | let source_code = " -5230 | const foo = [ -5231 | 1, 2, 3 -5232 | ] -5233 | "; - | -5234 | let tree = parser.parse(source_code, None).unwrap(); -5235 | let query = Query::new(&language, r#"(array (_) @foo . "]")"#).unwrap(); -5236 | let mut matches_cursor = QueryCursor::new(); -5237 | let mut captures_cursor = QueryCursor::new(); - | -5238 | let captures = captures_cursor.captures(&query, tree.root_node(), source_code.as_bytes()); -5239 | let captures = collect_captures(captures, &query, source_code); - | -5240 | let matches = matches_cursor.matches(&query, tree.root_node(), source_code.as_bytes()); -5241 | let matches = collect_matches(matches, &query, source_code); - | -5242 | assert_eq!(captures, vec![("foo", "3")]); -5243 | assert_eq!(matches.len(), 1); -5244 | assert_eq!(matches[0].1, captures); -5245 | } - | -5246 | #[test] -5247 | fn test_query_with_predicate_causing_oob_access() { -5248 | let language = get_language("rust"); - | -5249 | let query = "(call_expression -5250 | function: (scoped_identifier -5251 | path: (scoped_identifier (identifier) @_regex (#any-of? @_regex \"Regex\" \"RegexBuilder\") .)) -5252 | (#set! injection.language \"regex\"))"; -5253 | Query::new(&language, query).unwrap(); -5254 | } - | -5255 | #[test] -5256 | fn test_query_with_anonymous_error_node() { -5257 | let language = get_test_fixture_language("anonymous_error"); -5258 | let mut parser = Parser::new(); -5259 | parser.set_language(&language).unwrap(); - | -5260 | let source = "ERROR"; - | -5261 | let tree = parser.parse(source, None).unwrap(); -5262 | let query = Query::new( -5263 | &language, -5264 | r#" -5265 | "ERROR" @error -5266 | (document "ERROR" @error) -5267 | "#, -5268 | ) -5269 | .unwrap(); -5270 | let mut cursor = QueryCursor::new(); -5271 | let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); -5272 | let matches = collect_matches(matches, &query, source); - | -5273 | assert_eq!( -5274 | matches, -5275 | vec![(1, vec![("error", "ERROR")]), (0, vec![("error", "ERROR")])] -5276 | ); -5277 | } - | -5278 | #[test] -5279 | fn test_query_allows_error_nodes_with_children() { -5280 | allocations::record(|| { -5281 | let language = get_language("cpp"); - | -5282 | let code = "SomeStruct foo{.bar{}};"; - | -5283 | let mut parser = Parser::new(); -5284 | parser.set_language(&language).unwrap(); - | -5285 | let tree = parser.parse(code, None).unwrap(); -5286 | let root = tree.root_node(); - | -5287 | let query = Query::new(&language, "(initializer_list (ERROR) @error)").unwrap(); -5288 | let mut cursor = QueryCursor::new(); - | -5289 | let matches = cursor.matches(&query, root, code.as_bytes()); -5290 | let matches = collect_matches(matches, &query, code); -5291 | assert_eq!(matches, &[(0, vec![("error", ".bar")])]); -5292 | }); -5293 | } - | -5294 | #[test] -5295 | fn test_query_assertion_on_unreachable_node_with_child() { -5296 | // The `await_binding` rule is unreachable because it has a lower precedence than -5297 | // `identifier`, so we'll always reduce to an expression of type `identifier` -5298 | // instead whenever we see the token `await` followed by an identifier. -5299 | // -5300 | // A query that tries to capture the `await` token in the `await_binding` rule -5301 | // should not cause an assertion failure during query analysis. -5302 | let grammar = r#" -5303 | export default grammar({ -5304 | name: "query_assertion_crash", - | -5305 | rules: { -5306 | source_file: $ => repeat($.expression), - | -5307 | expression: $ => choice( -5308 | $.await_binding, -5309 | $.await_expr, -5310 | $.equal_expr, -5311 | prec(3, $.identifier), -5312 | ), - | -5313 | await_binding: $ => prec(1, seq('await', $.identifier, '=', $.expression)), - | -5314 | await_expr: $ => prec(1, seq('await', $.expression)), - | -5315 | equal_expr: $ => prec.right(2, seq($.expression, '=', $.expression)), - | -5316 | identifier: _ => /[a-z]+/, -5317 | } -5318 | }); -5319 | "#; - | -5320 | let file = tempfile::NamedTempFile::with_suffix(".js").unwrap(); -5321 | std::fs::write(file.path(), grammar).unwrap(); - | -5322 | let grammar_json = load_grammar_file(file.path(), None).unwrap(); - | -5323 | let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); - | -5324 | let language = get_test_language(&parser_name, &parser_code, None); - | -5325 | let query_result = Query::new(&language, r#"(await_binding "await")"#); - | -5326 | assert!(query_result.is_err()); -5327 | assert_eq!( -5328 | query_result.unwrap_err(), -5329 | QueryError { -5330 | kind: QueryErrorKind::Structure, -5331 | row: 0, -5332 | offset: 0, -5333 | column: 0, -5334 | message: ["(await_binding \"await\")", "^"].join("\n"), -5335 | } -5336 | ); -5337 | } - | -5338 | #[test] -5339 | fn test_query_supertype_with_anonymous_node() { -5340 | let grammar = r#" -5341 | export default grammar({ -5342 | name: "supertype_anonymous_test", - | -5343 | extras: $ => [/\s/, $.comment], - | -5344 | supertypes: $ => [$.expression], - | -5345 | word: $ => $.identifier, - | -5346 | rules: { -5347 | source_file: $ => repeat($.expression), - | -5348 | expression: $ => choice( -5349 | $.function_call, -5350 | '()' // an empty tuple, which should be queryable with the supertype syntax -5351 | ), - | -5352 | function_call: $ => seq($.identifier, '()'), - | -5353 | identifier: _ => /[a-zA-Z_][a-zA-Z0-9_]*/, - | -5354 | comment: _ => token(seq('//', /.*/)), -5355 | } -5356 | }); -5357 | "#; - | -5358 | let file = tempfile::NamedTempFile::with_suffix(".js").unwrap(); -5359 | std::fs::write(file.path(), grammar).unwrap(); - | -5360 | let grammar_json = load_grammar_file(file.path(), None).unwrap(); - | -5361 | let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap(); - | -5362 | let language = get_test_language(&parser_name, &parser_code, None); - | -5363 | let query_result = Query::new(&language, r#"(expression/"()") @tuple"#); - | -5364 | assert!(query_result.is_ok()); - | -5365 | let query = query_result.unwrap(); - | -5366 | let source = "foo()\n()"; - | -5367 | assert_query_matches(&language, &query, source, &[(0, vec![("tuple", "()")])]); -5368 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/tags_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | ffi::{CStr, CString}, - 3 | fs, ptr, slice, str, - 4 | sync::atomic::{AtomicUsize, Ordering}, - 5 | }; - | - 6 | use tree_sitter::Point; - 7 | use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext}; - | - 8 | use super::helpers::{ - 9 | allocations, - 10 | fixtures::{get_language, get_language_queries_path}, - 11 | }; - | - 12 | const PYTHON_TAG_QUERY: &str = r#" - 13 | ( - 14 | (function_definition - 15 | name: (identifier) @name - 16 | body: (block . (expression_statement (string) @doc))) @definition.function - 17 | (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") - 18 | ) - | - 19 | (function_definition - 20 | name: (identifier) @name) @definition.function - | - 21 | ( - 22 | (class_definition - 23 | name: (identifier) @name - 24 | body: (block - 25 | . (expression_statement (string) @doc))) @definition.class - 26 | (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)") - 27 | ) - | - 28 | (class_definition - 29 | name: (identifier) @name) @definition.class - | - 30 | (call - 31 | function: (identifier) @name) @reference.call - | - 32 | (call - 33 | function: (attribute - 34 | attribute: (identifier) @name)) @reference.call - 35 | "#; - | - 36 | const JS_TAG_QUERY: &str = r#" - 37 | ( - 38 | (comment)* @doc . - 39 | (class_declaration - 40 | name: (identifier) @name) @definition.class - 41 | (#select-adjacent! @doc @definition.class) - 42 | (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") - 43 | ) - | - 44 | ( - 45 | (comment)* @doc . - 46 | (method_definition - 47 | name: (property_identifier) @name) @definition.method - 48 | (#select-adjacent! @doc @definition.method) - 49 | (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") - 50 | ) - | - 51 | ( - 52 | (comment)* @doc . - 53 | (function_declaration - 54 | name: (identifier) @name) @definition.function - 55 | (#select-adjacent! @doc @definition.function) - 56 | (#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)") - 57 | ) - | - 58 | (call_expression - 59 | function: (identifier) @name) @reference.call - 60 | "#; - | - 61 | const RUBY_TAG_QUERY: &str = r" - 62 | (method - 63 | name: (_) @name) @definition.method - | - 64 | (call - 65 | method: (identifier) @name) @reference.call - | - 66 | (setter (identifier) @ignore) - | - 67 | ((identifier) @name @reference.call - 68 | (#is-not? local)) - 69 | "; - | - 70 | #[test] - 71 | fn test_tags_python() { - 72 | let language = get_language("python"); - 73 | let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); - 74 | let mut tag_context = TagsContext::new(); - | - 75 | let source = br#" - 76 | class Customer: - 77 | """ - 78 | Data about a customer - 79 | """ - | - 80 | def age(self): - 81 | ''' - 82 | Get the customer's age - 83 | ''' - 84 | compute_age(self.id) - 85 | } - 86 | "#; - | - 87 | let tags = tag_context - 88 | .generate_tags(&tags_config, source, None) - 89 | .unwrap() - 90 | .0 - 91 | .collect::, _>>() - 92 | .unwrap(); - | - 93 | assert_eq!( - 94 | tags.iter() - 95 | .map(|t| ( - 96 | substr(source, &t.name_range), - 97 | tags_config.syntax_type_name(t.syntax_type_id) - 98 | )) - 99 | .collect::>(), - 100 | &[ - 101 | ("Customer", "class"), - 102 | ("age", "function"), - 103 | ("compute_age", "call"), - 104 | ] - 105 | ); - | - 106 | assert_eq!(substr(source, &tags[0].line_range), "class Customer:"); - 107 | assert_eq!(substr(source, &tags[1].line_range), "def age(self):"); - 108 | assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer"); - 109 | assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); - 110 | } - | - 111 | #[test] - 112 | fn test_tags_javascript() { - 113 | let language = get_language("javascript"); - 114 | let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap(); - 115 | let source = br" - 116 | // hi - | - 117 | // Data about a customer. - 118 | // bla bla bla - 119 | class Customer { - 120 | /* - 121 | * Get the customer's age - 122 | */ - 123 | getAge() { - 124 | } - 125 | } - | - 126 | // ok - | - 127 | class Agent { - | - 128 | } - 129 | "; - | - 130 | let mut tag_context = TagsContext::new(); - 131 | let tags = tag_context - 132 | .generate_tags(&tags_config, source, None) - 133 | .unwrap() - 134 | .0 - 135 | .collect::, _>>() - 136 | .unwrap(); - | - 137 | assert_eq!( - 138 | tags.iter() - 139 | .map(|t| ( - 140 | substr(source, &t.name_range), - 141 | t.span.clone(), - 142 | tags_config.syntax_type_name(t.syntax_type_id) - 143 | )) - 144 | .collect::>(), - 145 | &[ - 146 | ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",), - 147 | ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",), - 148 | ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",) - 149 | ] - 150 | ); - 151 | assert_eq!( - 152 | tags[0].docs.as_ref().unwrap(), - 153 | "Data about a customer.\nbla bla bla" - 154 | ); - 155 | assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age"); - 156 | assert_eq!(tags[2].docs, None); - 157 | } - | - 158 | #[test] - 159 | fn test_tags_columns_measured_in_utf16_code_units() { - 160 | let language = get_language("python"); - 161 | let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); - 162 | let mut tag_context = TagsContext::new(); - | - 163 | let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes(); - | - 164 | let tag = tag_context - 165 | .generate_tags(&tags_config, source, None) - 166 | .unwrap() - 167 | .0 - 168 | .next() - 169 | .unwrap() - 170 | .unwrap(); - | - 171 | assert_eq!(substr(source, &tag.name_range), "hello_α_ω"); - 172 | assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32)); - 173 | assert_eq!(tag.utf16_column_range, 9..18); - 174 | } - | - 175 | #[test] - 176 | fn test_tags_ruby() { - 177 | let language = get_language("ruby"); - 178 | let locals_query = - 179 | fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap(); - 180 | let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap(); - 181 | let source = strip_whitespace( - 182 | 8, - 183 | " - 184 | b = 1 - | - 185 | def foo=() - 186 | c = 1 - | - 187 | # a is a method because it is not in scope - 188 | # b is a method because `b` doesn't capture variables from its containing scope - 189 | bar a, b, c - | - 190 | [1, 2, 3].each do |a| - 191 | # a is a parameter - 192 | # b is a method - 193 | # c is a variable, because the block captures variables from its containing scope. - 194 | baz a, b, c - 195 | end - 196 | end", - 197 | ); - | - 198 | let mut tag_context = TagsContext::new(); - 199 | let tags = tag_context - 200 | .generate_tags(&tags_config, source.as_bytes(), None) - 201 | .unwrap() - 202 | .0 - 203 | .collect::, _>>() - 204 | .unwrap(); - | - 205 | assert_eq!( - 206 | tags.iter() - 207 | .map(|t| ( - 208 | substr(source.as_bytes(), &t.name_range), - 209 | tags_config.syntax_type_name(t.syntax_type_id), - 210 | (t.span.start.row, t.span.start.column), - 211 | )) - 212 | .collect::>(), - 213 | &[ - 214 | ("foo=", "method", (2, 4)), - 215 | ("bar", "call", (7, 4)), - 216 | ("a", "call", (7, 8)), - 217 | ("b", "call", (7, 11)), - 218 | ("each", "call", (9, 14)), - 219 | ("baz", "call", (13, 8)), - 220 | ("b", "call", (13, 15),), - 221 | ] - 222 | ); - 223 | } - | - 224 | #[test] - 225 | fn test_tags_cancellation() { - 226 | allocations::record(|| { - 227 | // Large javascript document - 228 | let source = "/* hi */ class A { /* ok */ b() {} }\n".repeat(500); - 229 | let cancellation_flag = AtomicUsize::new(0); - 230 | let language = get_language("javascript"); - 231 | let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap(); - 232 | let mut tag_context = TagsContext::new(); - 233 | let tags = tag_context - 234 | .generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag)) - 235 | .unwrap(); - | - 236 | let found_cancellation_error = tags.0.enumerate().any(|(i, tag)| { - 237 | if i == 150 { - 238 | cancellation_flag.store(1, Ordering::SeqCst); - 239 | } - 240 | match tag { - 241 | Ok(_) => false, - 242 | Err(Error::Cancelled) => true, - 243 | Err(e) => { - 244 | unreachable!("Unexpected error type while iterating tags: {e}") - 245 | } - 246 | } - 247 | }); - | - 248 | assert!( - 249 | found_cancellation_error, - 250 | "Expected to halt tagging with a cancellation error" - 251 | ); - 252 | }); - 253 | } - | - 254 | #[test] - 255 | fn test_invalid_capture() { - 256 | let language = get_language("python"); - 257 | let e = TagsConfiguration::new(language, "(identifier) @method", "") - 258 | .expect_err("expected InvalidCapture error"); - 259 | assert_eq!(e, Error::InvalidCapture("method".to_string())); - 260 | } - | - 261 | #[test] - 262 | fn test_tags_with_parse_error() { - 263 | let language = get_language("python"); - 264 | let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); - 265 | let mut tag_context = TagsContext::new(); - | - 266 | let source = br" - 267 | class Fine: pass - 268 | class Bad - 269 | "; - | - 270 | let (tags, failed) = tag_context - 271 | .generate_tags(&tags_config, source, None) - 272 | .unwrap(); - | - 273 | let newtags = tags.collect::, _>>().unwrap(); - | - 274 | assert!(failed, "syntax error should have been detected"); - | - 275 | assert_eq!( - 276 | newtags - 277 | .iter() - 278 | .map(|t| ( - 279 | substr(source, &t.name_range), - 280 | tags_config.syntax_type_name(t.syntax_type_id) - 281 | )) - 282 | .collect::>(), - 283 | &[("Fine", "class"),] - 284 | ); - 285 | } - | - 286 | #[test] - 287 | fn test_tags_via_c_api() { - 288 | allocations::record(|| { - 289 | let tagger = c::ts_tagger_new(); - 290 | let buffer = c::ts_tags_buffer_new(); - 291 | let scope_name = "source.js"; - 292 | let language = get_language("javascript"); - | - 293 | let source_code = strip_whitespace( - 294 | 12, - 295 | " - 296 | var a = 1; - | - 297 | // one - 298 | // two - 299 | // three - 300 | function b() { - 301 | } - | - 302 | // four - 303 | // five - 304 | class C extends D { - | - 305 | } - | - 306 | b(a);", - 307 | ); - | - 308 | let c_scope_name = CString::new(scope_name).unwrap(); - 309 | let result = unsafe { - 310 | c::ts_tagger_add_language( - 311 | tagger, - 312 | c_scope_name.as_ptr(), - 313 | language, - 314 | JS_TAG_QUERY.as_ptr(), - 315 | ptr::null(), - 316 | JS_TAG_QUERY.len() as u32, - 317 | 0, - 318 | ) - 319 | }; - 320 | assert_eq!(result, c::TSTagsError::Ok); - | - 321 | let result = unsafe { - 322 | c::ts_tagger_tag( - 323 | tagger, - 324 | c_scope_name.as_ptr(), - 325 | source_code.as_ptr(), - 326 | source_code.len() as u32, - 327 | buffer, - 328 | ptr::null(), - 329 | ) - 330 | }; - 331 | assert_eq!(result, c::TSTagsError::Ok); - 332 | let tags = unsafe { - 333 | slice::from_raw_parts( - 334 | c::ts_tags_buffer_tags(buffer), - 335 | c::ts_tags_buffer_tags_len(buffer) as usize, - 336 | ) - 337 | }; - 338 | let docs = str::from_utf8(unsafe { - 339 | slice::from_raw_parts( - 340 | c::ts_tags_buffer_docs(buffer).cast::(), - 341 | c::ts_tags_buffer_docs_len(buffer) as usize, - 342 | ) - 343 | }) - 344 | .unwrap(); - | - 345 | let syntax_types = unsafe { - 346 | let mut len = 0; - 347 | let ptr = c::ts_tagger_syntax_kinds_for_scope_name( - 348 | tagger, - 349 | c_scope_name.as_ptr(), - 350 | &raw mut len, - 351 | ); - 352 | slice::from_raw_parts(ptr, len as usize) - 353 | .iter() - 354 | .map(|i| CStr::from_ptr(*i).to_str().unwrap()) - 355 | .collect::>() - 356 | }; - | - 357 | assert_eq!( - 358 | tags.iter() - 359 | .map(|tag| ( - 360 | syntax_types[tag.syntax_type_id as usize], - 361 | &source_code[tag.name_start_byte as usize..tag.name_end_byte as usize], - 362 | &source_code[tag.line_start_byte as usize..tag.line_end_byte as usize], - 363 | &docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize], - 364 | )) - 365 | .collect::>(), - 366 | &[ - 367 | ("function", "b", "function b() {", "one\ntwo\nthree"), - 368 | ("class", "C", "class C extends D {", "four\nfive"), - 369 | ("call", "b", "b(a);", "") - 370 | ] - 371 | ); - | - 372 | unsafe { - 373 | c::ts_tags_buffer_delete(buffer); - 374 | c::ts_tagger_delete(tagger); - 375 | } - 376 | }); - 377 | } - | - 378 | fn substr<'a>(source: &'a [u8], range: &std::ops::Range) -> &'a str { - 379 | std::str::from_utf8(&source[range.clone()]).unwrap() - 380 | } - | - 381 | fn strip_whitespace(indent: usize, s: &str) -> String { - 382 | s.lines() - 383 | .skip(1) - 384 | .map(|line| &line[line.len().min(indent)..]) - 385 | .collect::>() - 386 | .join("\n") - 387 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/test_highlight_test.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::Parser; - 2 | use tree_sitter_highlight::{Highlight, Highlighter}; - | - 3 | use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; - 4 | use crate::{ - 5 | query_testing::{parse_position_comments, Assertion, Utf8Point}, - 6 | test_highlight::get_highlight_positions, - 7 | }; - | - 8 | #[test] - 9 | fn test_highlight_test_with_basic_test() { - 10 | let language = get_language("javascript"); - 11 | let config = get_highlight_config( - 12 | "javascript", - 13 | Some("injections.scm"), - 14 | &[ - 15 | "function".to_string(), - 16 | "variable".to_string(), - 17 | "keyword".to_string(), - 18 | ], - 19 | ); - 20 | let source = [ - 21 | "// hi", - 22 | "var abc = function(d) {", - 23 | " // ^ function", - 24 | " // ^^^ keyword", - 25 | " return d + e;", - 26 | " // ^ variable", - 27 | " // ^ !variable", - 28 | "};", - 29 | "var y̆y̆y̆y̆ = function() {}", - 30 | " // ^ function", - 31 | " // ^ keyword", - 32 | ] - 33 | .join("\n"); - | - 34 | let assertions = - 35 | parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap(); - 36 | assert_eq!( - 37 | assertions, - 38 | &[ - 39 | Assertion::new(1, 5, 1, false, String::from("function")), - 40 | Assertion::new(1, 11, 3, false, String::from("keyword")), - 41 | Assertion::new(4, 9, 1, false, String::from("variable")), - 42 | Assertion::new(4, 11, 1, true, String::from("variable")), - 43 | Assertion::new(8, 5, 1, false, String::from("function")), - 44 | Assertion::new(8, 11, 1, false, String::from("keyword")), - 45 | ] - 46 | ); - | - 47 | let mut highlighter = Highlighter::new(); - 48 | let highlight_positions = - 49 | get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes()) - 50 | .unwrap(); - 51 | assert_eq!( - 52 | highlight_positions, - 53 | &[ - 54 | (Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var" - 55 | (Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc" - 56 | (Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function" - 57 | (Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d" - 58 | (Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return" - 59 | (Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d" - 60 | (Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e" - 61 | (Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var" - 62 | (Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆" - 63 | (Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function" - 64 | ] - 65 | ); - 66 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/test_tags_test.rs: --------------------------------------------------------------------------------- - 1 | use tree_sitter::Parser; - 2 | use tree_sitter_tags::TagsContext; - | - 3 | use super::helpers::fixtures::{get_language, get_tags_config}; - 4 | use crate::{ - 5 | query_testing::{parse_position_comments, Assertion, Utf8Point}, - 6 | test_tags::get_tag_positions, - 7 | }; - | - 8 | #[test] - 9 | fn test_tags_test_with_basic_test() { - 10 | let language = get_language("python"); - 11 | let config = get_tags_config("python"); - 12 | let source = [ - 13 | "# hi", - 14 | "def abc(d):", - 15 | " # <- definition.function", - 16 | " e = fgh(d)", - 17 | " # ^ reference.call", - 18 | " return d(e)", - 19 | " # ^ reference.call", - 20 | " # ^ !variable.parameter", - 21 | "", - 22 | ] - 23 | .join("\n"); - | - 24 | let assertions = - 25 | parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap(); - | - 26 | assert_eq!( - 27 | assertions, - 28 | &[ - 29 | Assertion::new(1, 4, 1, false, String::from("definition.function")), - 30 | Assertion::new(3, 9, 1, false, String::from("reference.call")), - 31 | Assertion::new(5, 11, 1, false, String::from("reference.call")), - 32 | Assertion::new(5, 13, 1, true, String::from("variable.parameter")), - 33 | ] - 34 | ); - | - 35 | let mut tags_context = TagsContext::new(); - 36 | let tag_positions = get_tag_positions(&mut tags_context, &config, source.as_bytes()).unwrap(); - 37 | assert_eq!( - 38 | tag_positions, - 39 | &[ - 40 | ( - 41 | Utf8Point::new(1, 4), - 42 | Utf8Point::new(1, 7), - 43 | "definition.function".to_string() - 44 | ), - 45 | ( - 46 | Utf8Point::new(3, 8), - 47 | Utf8Point::new(3, 11), - 48 | "reference.call".to_string() - 49 | ), - 50 | ( - 51 | Utf8Point::new(5, 11), - 52 | Utf8Point::new(5, 12), - 53 | "reference.call".to_string() - 54 | ), - 55 | ] - 56 | ); - 57 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/text_provider_test.rs: --------------------------------------------------------------------------------- - 1 | use std::{iter, sync::Arc}; - | - 2 | use streaming_iterator::StreamingIterator; - 3 | use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree}; - | - 4 | use crate::tests::helpers::fixtures::get_language; - | - 5 | fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) { - 6 | let language = get_language("c"); - 7 | let mut parser = Parser::new(); - 8 | parser.set_language(&language).unwrap(); - 9 | (parser.parse(text, None).unwrap(), language) - 10 | } - | - 11 | fn parse_text_with(callback: &mut F) -> (Tree, Language) - 12 | where - 13 | T: AsRef<[u8]>, - 14 | F: FnMut(usize, Point) -> T, - 15 | { - 16 | let language = get_language("c"); - 17 | let mut parser = Parser::new(); - 18 | parser.set_language(&language).unwrap(); - 19 | let tree = parser.parse_with_options(callback, None, None).unwrap(); - 20 | assert_eq!("comment", tree.root_node().child(0).unwrap().kind()); - 21 | (tree, language) - 22 | } - | - 23 | fn tree_query>(tree: &Tree, text: impl TextProvider, language: &Language) { - 24 | let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap(); - 25 | let mut cursor = QueryCursor::new(); - 26 | let mut captures = cursor.captures(&query, tree.root_node(), text); - 27 | let (match_, idx) = captures.next().unwrap(); - 28 | let capture = match_.captures[*idx]; - 29 | assert_eq!(capture.index as usize, *idx); - 30 | assert_eq!("comment", capture.node.kind()); - 31 | } - | - 32 | fn check_parsing>( - 33 | parser_text: impl AsRef<[u8]>, - 34 | text_provider: impl TextProvider, - 35 | ) { - 36 | let (tree, language) = parse_text(parser_text); - 37 | tree_query(&tree, text_provider, &language); - 38 | } - | - 39 | fn check_parsing_callback>( - 40 | parser_callback: &mut F, - 41 | text_provider: impl TextProvider, - 42 | ) where - 43 | T: AsRef<[u8]>, - 44 | F: FnMut(usize, Point) -> T, - 45 | { - 46 | let (tree, language) = parse_text_with(parser_callback); - 47 | tree_query(&tree, text_provider, &language); - 48 | } - | - 49 | #[test] - 50 | fn test_text_provider_for_str_slice() { - 51 | let text: &str = "// comment"; - | - 52 | check_parsing(text, text.as_bytes()); - 53 | check_parsing(text.as_bytes(), text.as_bytes()); - 54 | } - | - 55 | #[test] - 56 | fn test_text_provider_for_string() { - 57 | let text: String = "// comment".to_owned(); - | - 58 | check_parsing(text.clone(), text.as_bytes()); - 59 | check_parsing(text.as_bytes(), text.as_bytes()); - 60 | check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes()); - 61 | } - | - 62 | #[test] - 63 | fn test_text_provider_for_box_of_str_slice() { - 64 | let text = "// comment".to_owned().into_boxed_str(); - | - 65 | check_parsing(text.as_bytes(), text.as_bytes()); - 66 | check_parsing(<_ as AsRef>::as_ref(&text), text.as_bytes()); - 67 | check_parsing(text.as_ref(), text.as_ref().as_bytes()); - 68 | check_parsing(text.as_ref(), text.as_bytes()); - 69 | } - | - 70 | #[test] - 71 | fn test_text_provider_for_box_of_bytes_slice() { - 72 | let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes(); - | - 73 | check_parsing(text.as_ref(), text.as_ref()); - 74 | check_parsing(text.as_ref(), &*text); - 75 | check_parsing(&*text, &*text); - 76 | } - | - 77 | #[test] - 78 | fn test_text_provider_for_vec_of_bytes() { - 79 | let text = "// comment".to_owned().into_bytes(); - | - 80 | check_parsing(&*text, &*text); - 81 | } - | - 82 | #[test] - 83 | fn test_text_provider_for_arc_of_bytes_slice() { - 84 | let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes()); - | - 85 | check_parsing(&*text, &*text); - 86 | check_parsing(text.as_ref(), text.as_ref()); - 87 | check_parsing(text.clone(), text.as_ref()); - 88 | } - | - 89 | #[test] - 90 | fn test_text_provider_for_vec_utf16_text() { - 91 | let source_text = "你好".encode_utf16().collect::>(); - | - 92 | let language = get_language("c"); - 93 | let mut parser = Parser::new(); - 94 | parser.set_language(&language).unwrap(); - 95 | let tree = parser.parse_utf16_le(&source_text, None).unwrap(); - | - 96 | let tree_text = tree.root_node().utf16_text(&source_text); - 97 | assert_eq!(source_text, tree_text); - 98 | } - | - 99 | #[test] - 100 | fn test_text_provider_callback_with_str_slice() { - 101 | let text: &str = "// comment"; - | - 102 | check_parsing(text, |_node: Node<'_>| iter::once(text)); - 103 | check_parsing_callback( - 104 | &mut |offset, _point| { - 105 | (offset < text.len()) - 106 | .then_some(text.as_bytes()) - 107 | .unwrap_or_default() - 108 | }, - 109 | |_node: Node<'_>| iter::once(text), - 110 | ); - 111 | } - | - 112 | #[test] - 113 | fn test_text_provider_callback_with_owned_string_slice() { - 114 | let text: &str = "// comment"; - | - 115 | check_parsing_callback( - 116 | &mut |offset, _point| { - 117 | (offset < text.len()) - 118 | .then_some(text.as_bytes()) - 119 | .unwrap_or_default() - 120 | }, - 121 | |_node: Node<'_>| { - 122 | let slice: String = text.to_owned(); - 123 | iter::once(slice) - 124 | }, - 125 | ); - 126 | } - | - 127 | #[test] - 128 | fn test_text_provider_callback_with_owned_bytes_vec_slice() { - 129 | let text: &str = "// comment"; - | - 130 | check_parsing_callback( - 131 | &mut |offset, _point| { - 132 | (offset < text.len()) - 133 | .then_some(text.as_bytes()) - 134 | .unwrap_or_default() - 135 | }, - 136 | |_node: Node<'_>| { - 137 | let slice = text.to_owned().into_bytes(); - 138 | iter::once(slice) - 139 | }, - 140 | ); - 141 | } - | - 142 | #[test] - 143 | fn test_text_provider_callback_with_owned_arc_of_bytes_slice() { - 144 | let text: &str = "// comment"; - | - 145 | check_parsing_callback( - 146 | &mut |offset, _point| { - 147 | (offset < text.len()) - 148 | .then_some(text.as_bytes()) - 149 | .unwrap_or_default() - 150 | }, - 151 | |_node: Node<'_>| { - 152 | let slice: Arc<[u8]> = text.to_owned().into_bytes().into(); - 153 | iter::once(slice) - 154 | }, - 155 | ); - 156 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/tree_test.rs: --------------------------------------------------------------------------------- - 1 | use std::str; - | - 2 | use tree_sitter::{InputEdit, Parser, Point, Range, Tree}; - | - 3 | use super::helpers::fixtures::get_language; - 4 | use crate::{ - 5 | fuzz::edits::Edit, - 6 | parse::perform_edit, - 7 | tests::{helpers::fixtures::get_test_fixture_language, invert_edit}, - 8 | }; - | - 9 | #[test] - 10 | fn test_tree_edit() { - 11 | let mut parser = Parser::new(); - 12 | parser.set_language(&get_language("javascript")).unwrap(); - 13 | let tree = parser.parse(" abc !== def", None).unwrap(); - | - 14 | assert_eq!( - 15 | tree.root_node().to_sexp(), - 16 | "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" - 17 | ); - | - 18 | // edit entirely within the tree's padding: - 19 | // resize the padding of the tree and its leftmost descendants. - 20 | { - 21 | let mut tree = tree.clone(); - 22 | tree.edit(&InputEdit { - 23 | start_byte: 1, - 24 | old_end_byte: 1, - 25 | new_end_byte: 2, - 26 | start_position: Point::new(0, 1), - 27 | old_end_position: Point::new(0, 1), - 28 | new_end_position: Point::new(0, 2), - 29 | }); - | - 30 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 31 | let child1 = expr.child(0).unwrap(); - 32 | let child2 = expr.child(1).unwrap(); - | - 33 | assert!(expr.has_changes()); - 34 | assert_eq!(expr.start_byte(), 3); - 35 | assert_eq!(expr.end_byte(), 16); - 36 | assert!(child1.has_changes()); - 37 | assert_eq!(child1.start_byte(), 3); - 38 | assert_eq!(child1.end_byte(), 6); - 39 | assert!(!child2.has_changes()); - 40 | assert_eq!(child2.start_byte(), 8); - 41 | assert_eq!(child2.end_byte(), 11); - 42 | } - | - 43 | // edit starting in the tree's padding but extending into its content: - 44 | // shrink the content to compensate for the expanded padding. - 45 | { - 46 | let mut tree = tree.clone(); - 47 | tree.edit(&InputEdit { - 48 | start_byte: 1, - 49 | old_end_byte: 4, - 50 | new_end_byte: 5, - 51 | start_position: Point::new(0, 1), - 52 | old_end_position: Point::new(0, 5), - 53 | new_end_position: Point::new(0, 5), - 54 | }); - | - 55 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 56 | let child1 = expr.child(0).unwrap(); - 57 | let child2 = expr.child(1).unwrap(); - | - 58 | assert!(expr.has_changes()); - 59 | assert_eq!(expr.start_byte(), 5); - 60 | assert_eq!(expr.end_byte(), 16); - 61 | assert!(child1.has_changes()); - 62 | assert_eq!(child1.start_byte(), 5); - 63 | assert_eq!(child1.end_byte(), 6); - 64 | assert!(!child2.has_changes()); - 65 | assert_eq!(child2.start_byte(), 8); - 66 | assert_eq!(child2.end_byte(), 11); - 67 | } - | - 68 | // insertion at the edge of a tree's padding: - 69 | // expand the tree's padding. - 70 | { - 71 | let mut tree = tree.clone(); - 72 | tree.edit(&InputEdit { - 73 | start_byte: 2, - 74 | old_end_byte: 2, - 75 | new_end_byte: 4, - 76 | start_position: Point::new(0, 2), - 77 | old_end_position: Point::new(0, 2), - 78 | new_end_position: Point::new(0, 4), - 79 | }); - | - 80 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 81 | let child1 = expr.child(0).unwrap(); - 82 | let child2 = expr.child(1).unwrap(); - | - 83 | assert!(expr.has_changes()); - 84 | assert_eq!(expr.byte_range(), 4..17); - 85 | assert!(child1.has_changes()); - 86 | assert_eq!(child1.byte_range(), 4..7); - 87 | assert!(!child2.has_changes()); - 88 | assert_eq!(child2.byte_range(), 9..12); - 89 | } - | - 90 | // replacement starting at the edge of the tree's padding: - 91 | // resize the content and not the padding. - 92 | { - 93 | let mut tree = tree.clone(); - 94 | tree.edit(&InputEdit { - 95 | start_byte: 2, - 96 | old_end_byte: 2, - 97 | new_end_byte: 4, - 98 | start_position: Point::new(0, 2), - 99 | old_end_position: Point::new(0, 2), - 100 | new_end_position: Point::new(0, 4), - 101 | }); - | - 102 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 103 | let child1 = expr.child(0).unwrap(); - 104 | let child2 = expr.child(1).unwrap(); - | - 105 | assert!(expr.has_changes()); - 106 | assert_eq!(expr.byte_range(), 4..17); - 107 | assert!(child1.has_changes()); - 108 | assert_eq!(child1.byte_range(), 4..7); - 109 | assert!(!child2.has_changes()); - 110 | assert_eq!(child2.byte_range(), 9..12); - 111 | } - | - 112 | // deletion that spans more than one child node: - 113 | // shrink subsequent child nodes. - 114 | { - 115 | let mut tree = tree.clone(); - 116 | tree.edit(&InputEdit { - 117 | start_byte: 1, - 118 | old_end_byte: 11, - 119 | new_end_byte: 4, - 120 | start_position: Point::new(0, 1), - 121 | old_end_position: Point::new(0, 11), - 122 | new_end_position: Point::new(0, 4), - 123 | }); - | - 124 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 125 | let child1 = expr.child(0).unwrap(); - 126 | let child2 = expr.child(1).unwrap(); - 127 | let child3 = expr.child(2).unwrap(); - | - 128 | assert!(expr.has_changes()); - 129 | assert_eq!(expr.byte_range(), 4..8); - 130 | assert!(child1.has_changes()); - 131 | assert_eq!(child1.byte_range(), 4..4); - 132 | assert!(child2.has_changes()); - 133 | assert_eq!(child2.byte_range(), 4..4); - 134 | assert!(child3.has_changes()); - 135 | assert_eq!(child3.byte_range(), 5..8); - 136 | } - | - 137 | // insertion at the end of the tree: - 138 | // extend the tree's content. - 139 | { - 140 | let mut tree = tree.clone(); - 141 | tree.edit(&InputEdit { - 142 | start_byte: 15, - 143 | old_end_byte: 15, - 144 | new_end_byte: 16, - 145 | start_position: Point::new(0, 15), - 146 | old_end_position: Point::new(0, 15), - 147 | new_end_position: Point::new(0, 16), - 148 | }); - | - 149 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 150 | let child1 = expr.child(0).unwrap(); - 151 | let child2 = expr.child(1).unwrap(); - 152 | let child3 = expr.child(2).unwrap(); - | - 153 | assert!(expr.has_changes()); - 154 | assert_eq!(expr.byte_range(), 2..16); - 155 | assert!(!child1.has_changes()); - 156 | assert_eq!(child1.byte_range(), 2..5); - 157 | assert!(!child2.has_changes()); - 158 | assert_eq!(child2.byte_range(), 7..10); - 159 | assert!(child3.has_changes()); - 160 | assert_eq!(child3.byte_range(), 12..16); - 161 | } - | - 162 | // replacement that starts within a token and extends beyond the end of the tree: - 163 | // resize the token and empty out any subsequent child nodes. - 164 | { - 165 | let mut tree = tree.clone(); - 166 | tree.edit(&InputEdit { - 167 | start_byte: 3, - 168 | old_end_byte: 90, - 169 | new_end_byte: 4, - 170 | start_position: Point::new(0, 3), - 171 | old_end_position: Point::new(0, 90), - 172 | new_end_position: Point::new(0, 4), - 173 | }); - | - 174 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 175 | let child1 = expr.child(0).unwrap(); - 176 | let child2 = expr.child(1).unwrap(); - 177 | let child3 = expr.child(2).unwrap(); - 178 | assert_eq!(expr.byte_range(), 2..4); - 179 | assert!(expr.has_changes()); - 180 | assert_eq!(child1.byte_range(), 2..4); - 181 | assert!(child1.has_changes()); - 182 | assert_eq!(child2.byte_range(), 4..4); - 183 | assert!(child2.has_changes()); - 184 | assert_eq!(child3.byte_range(), 4..4); - 185 | assert!(child3.has_changes()); - 186 | } - | - 187 | // replacement that starts in whitespace and extends beyond the end of the tree: - 188 | // shift the token's start position and empty out its content. - 189 | { - 190 | let mut tree = tree; - 191 | tree.edit(&InputEdit { - 192 | start_byte: 6, - 193 | old_end_byte: 90, - 194 | new_end_byte: 8, - 195 | start_position: Point::new(0, 6), - 196 | old_end_position: Point::new(0, 90), - 197 | new_end_position: Point::new(0, 8), - 198 | }); - | - 199 | let expr = tree.root_node().child(0).unwrap().child(0).unwrap(); - 200 | let child1 = expr.child(0).unwrap(); - 201 | let child2 = expr.child(1).unwrap(); - 202 | let child3 = expr.child(2).unwrap(); - 203 | assert_eq!(expr.byte_range(), 2..8); - 204 | assert!(expr.has_changes()); - 205 | assert_eq!(child1.byte_range(), 2..5); - 206 | assert!(!child1.has_changes()); - 207 | assert_eq!(child2.byte_range(), 8..8); - 208 | assert!(child2.has_changes()); - 209 | assert_eq!(child3.byte_range(), 8..8); - 210 | assert!(child3.has_changes()); - 211 | } - 212 | } - | - 213 | #[test] - 214 | fn test_tree_edit_with_included_ranges() { - 215 | let mut parser = Parser::new(); - 216 | parser.set_language(&get_language("html")).unwrap(); - | - 217 | let source = "
<% if a %>a<% else %>b<% end %>
"; - | - 218 | let ranges = [0..5, 15..29, 39..53, 62..68]; - | - 219 | parser - 220 | .set_included_ranges( - 221 | &ranges - 222 | .iter() - 223 | .map(|range| Range { - 224 | start_byte: range.start, - 225 | end_byte: range.end, - 226 | start_point: Point::new(0, range.start), - 227 | end_point: Point::new(0, range.end), - 228 | }) - 229 | .collect::>(), - 230 | ) - 231 | .unwrap(); - | - 232 | let mut tree = parser.parse(source, None).unwrap(); - | - 233 | tree.edit(&InputEdit { - 234 | start_byte: 29, - 235 | old_end_byte: 53, - 236 | new_end_byte: 29, - 237 | start_position: Point::new(0, 29), - 238 | old_end_position: Point::new(0, 53), - 239 | new_end_position: Point::new(0, 29), - 240 | }); - | - 241 | assert_eq!( - 242 | tree.included_ranges(), - 243 | &[ - 244 | Range { - 245 | start_byte: 0, - 246 | end_byte: 5, - 247 | start_point: Point::new(0, 0), - 248 | end_point: Point::new(0, 5), - 249 | }, - 250 | Range { - 251 | start_byte: 15, - 252 | end_byte: 29, - 253 | start_point: Point::new(0, 15), - 254 | end_point: Point::new(0, 29), - 255 | }, - 256 | Range { - 257 | start_byte: 29, - 258 | end_byte: 29, - 259 | start_point: Point::new(0, 29), - 260 | end_point: Point::new(0, 29), - 261 | }, - 262 | Range { - 263 | start_byte: 38, - 264 | end_byte: 44, - 265 | start_point: Point::new(0, 38), - 266 | end_point: Point::new(0, 44), - 267 | } - 268 | ] - 269 | ); - 270 | } - | - 271 | #[test] - 272 | fn test_tree_cursor() { - 273 | let mut parser = Parser::new(); - 274 | parser.set_language(&get_language("rust")).unwrap(); - | - 275 | let tree = parser - 276 | .parse( - 277 | " - 278 | struct Stuff { - 279 | a: A, - 280 | b: Option, - 281 | } - 282 | ", - 283 | None, - 284 | ) - 285 | .unwrap(); - | - 286 | let mut cursor = tree.walk(); - 287 | assert_eq!(cursor.node().kind(), "source_file"); - | - 288 | assert!(cursor.goto_first_child()); - 289 | assert_eq!(cursor.node().kind(), "struct_item"); - | - 290 | assert!(cursor.goto_first_child()); - 291 | assert_eq!(cursor.node().kind(), "struct"); - 292 | assert!(!cursor.node().is_named()); - | - 293 | assert!(cursor.goto_next_sibling()); - 294 | assert_eq!(cursor.node().kind(), "type_identifier"); - 295 | assert!(cursor.node().is_named()); - | - 296 | assert!(cursor.goto_next_sibling()); - 297 | assert_eq!(cursor.node().kind(), "field_declaration_list"); - 298 | assert!(cursor.node().is_named()); - | - 299 | assert!(cursor.goto_last_child()); - 300 | assert_eq!(cursor.node().kind(), "}"); - 301 | assert!(!cursor.node().is_named()); - 302 | assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 }); - | - 303 | assert!(cursor.goto_previous_sibling()); - 304 | assert_eq!(cursor.node().kind(), ","); - 305 | assert!(!cursor.node().is_named()); - 306 | assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 }); - | - 307 | assert!(cursor.goto_previous_sibling()); - 308 | assert_eq!(cursor.node().kind(), "field_declaration"); - 309 | assert!(cursor.node().is_named()); - 310 | assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 }); - | - 311 | assert!(cursor.goto_previous_sibling()); - 312 | assert_eq!(cursor.node().kind(), ","); - 313 | assert!(!cursor.node().is_named()); - 314 | assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 }); - | - 315 | assert!(cursor.goto_previous_sibling()); - 316 | assert_eq!(cursor.node().kind(), "field_declaration"); - 317 | assert!(cursor.node().is_named()); - 318 | assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 }); - | - 319 | assert!(cursor.goto_previous_sibling()); - 320 | assert_eq!(cursor.node().kind(), "{"); - 321 | assert!(!cursor.node().is_named()); - 322 | assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 }); - | - 323 | let mut copy = tree.walk(); - 324 | copy.reset_to(&cursor); - | - 325 | assert_eq!(copy.node().kind(), "{"); - 326 | assert!(!copy.node().is_named()); - | - 327 | assert!(copy.goto_parent()); - 328 | assert_eq!(copy.node().kind(), "field_declaration_list"); - 329 | assert!(copy.node().is_named()); - | - 330 | assert!(copy.goto_parent()); - 331 | assert_eq!(copy.node().kind(), "struct_item"); - 332 | } - | - 333 | #[test] - 334 | fn test_tree_cursor_previous_sibling_with_aliases() { - 335 | let mut parser = Parser::new(); - 336 | parser - 337 | .set_language(&get_test_fixture_language("aliases_in_root")) - 338 | .unwrap(); - | - 339 | let text = "# comment\n# \nfoo foo"; - 340 | let tree = parser.parse(text, None).unwrap(); - 341 | let mut cursor = tree.walk(); - 342 | assert_eq!(cursor.node().kind(), "document"); - | - 343 | cursor.goto_first_child(); - 344 | assert_eq!(cursor.node().kind(), "comment"); - | - 345 | assert!(cursor.goto_next_sibling()); - 346 | assert_eq!(cursor.node().kind(), "comment"); - | - 347 | assert!(cursor.goto_next_sibling()); - 348 | assert_eq!(cursor.node().kind(), "bar"); - | - 349 | assert!(cursor.goto_previous_sibling()); - 350 | assert_eq!(cursor.node().kind(), "comment"); - | - 351 | assert!(cursor.goto_previous_sibling()); - 352 | assert_eq!(cursor.node().kind(), "comment"); - | - 353 | assert!(cursor.goto_next_sibling()); - 354 | assert_eq!(cursor.node().kind(), "comment"); - | - 355 | assert!(cursor.goto_next_sibling()); - 356 | assert_eq!(cursor.node().kind(), "bar"); - 357 | } - | - 358 | #[test] - 359 | fn test_tree_cursor_previous_sibling() { - 360 | let mut parser = Parser::new(); - 361 | parser.set_language(&get_language("rust")).unwrap(); - | - 362 | let text = " - 363 | // Hi there - 364 | // This is fun! - 365 | // Another one! - 366 | "; - 367 | let tree = parser.parse(text, None).unwrap(); - | - 368 | let mut cursor = tree.walk(); - 369 | assert_eq!(cursor.node().kind(), "source_file"); - | - 370 | assert!(cursor.goto_last_child()); - 371 | assert_eq!(cursor.node().kind(), "line_comment"); - 372 | assert_eq!( - 373 | cursor.node().utf8_text(text.as_bytes()).unwrap(), - 374 | "// Another one!" - 375 | ); - | - 376 | assert!(cursor.goto_previous_sibling()); - 377 | assert_eq!(cursor.node().kind(), "line_comment"); - 378 | assert_eq!( - 379 | cursor.node().utf8_text(text.as_bytes()).unwrap(), - 380 | "// This is fun!" - 381 | ); - | - 382 | assert!(cursor.goto_previous_sibling()); - 383 | assert_eq!(cursor.node().kind(), "line_comment"); - 384 | assert_eq!( - 385 | cursor.node().utf8_text(text.as_bytes()).unwrap(), - 386 | "// Hi there" - 387 | ); - | - 388 | assert!(!cursor.goto_previous_sibling()); - 389 | } - | - 390 | #[test] - 391 | fn test_tree_cursor_fields() { - 392 | let mut parser = Parser::new(); - 393 | parser.set_language(&get_language("javascript")).unwrap(); - | - 394 | let tree = parser - 395 | .parse("function /*1*/ bar /*2*/ () {}", None) - 396 | .unwrap(); - | - 397 | let mut cursor = tree.walk(); - 398 | assert_eq!(cursor.node().kind(), "program"); - | - 399 | cursor.goto_first_child(); - 400 | assert_eq!(cursor.node().kind(), "function_declaration"); - 401 | assert_eq!(cursor.field_name(), None); - | - 402 | cursor.goto_first_child(); - 403 | assert_eq!(cursor.node().kind(), "function"); - 404 | assert_eq!(cursor.field_name(), None); - | - 405 | cursor.goto_next_sibling(); - 406 | assert_eq!(cursor.node().kind(), "comment"); - 407 | assert_eq!(cursor.field_name(), None); - | - 408 | cursor.goto_next_sibling(); - 409 | assert_eq!(cursor.node().kind(), "identifier"); - 410 | assert_eq!(cursor.field_name(), Some("name")); - | - 411 | cursor.goto_next_sibling(); - 412 | assert_eq!(cursor.node().kind(), "comment"); - 413 | assert_eq!(cursor.field_name(), None); - | - 414 | cursor.goto_next_sibling(); - 415 | assert_eq!(cursor.node().kind(), "formal_parameters"); - 416 | assert_eq!(cursor.field_name(), Some("parameters")); - 417 | } - | - 418 | #[test] - 419 | fn test_tree_cursor_child_for_point() { - 420 | let mut parser = Parser::new(); - 421 | parser.set_language(&get_language("javascript")).unwrap(); - 422 | let source = &" - 423 | [ - 424 | one, - 425 | { - 426 | two: tree - 427 | }, - 428 | four, five, six - 429 | ];"[1..]; - 430 | let tree = parser.parse(source, None).unwrap(); - | - 431 | let mut c = tree.walk(); - 432 | assert_eq!(c.node().kind(), "program"); - | - 433 | assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None); - 434 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 7)), None); - 435 | assert_eq!(c.node().kind(), "program"); - | - 436 | // descend to expression statement - 437 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(0)); - 438 | assert_eq!(c.node().kind(), "expression_statement"); - | - 439 | // step into ';' and back up - 440 | assert_eq!(c.goto_first_child_for_point(Point::new(7, 0)), None); - 441 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 6)), None); - 442 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 5)), Some(1)); - 443 | assert_eq!( - 444 | (c.node().kind(), c.node().start_position()), - 445 | (";", Point::new(6, 5)) - 446 | ); - 447 | assert!(c.goto_parent()); - | - 448 | // descend into array - 449 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 4)), Some(0)); - 450 | assert_eq!( - 451 | (c.node().kind(), c.node().start_position()), - 452 | ("array", Point::new(0, 4)) - 453 | ); - | - 454 | // step into '[' and back up - 455 | assert_eq!(c.goto_first_child_for_point(Point::new(0, 4)), Some(0)); - 456 | assert_eq!( - 457 | (c.node().kind(), c.node().start_position()), - 458 | ("[", Point::new(0, 4)) - 459 | ); - 460 | assert!(c.goto_parent()); - | - 461 | // step into identifier 'one' and back up - 462 | assert_eq!(c.goto_first_child_for_point(Point::new(1, 0)), Some(1)); - 463 | assert_eq!( - 464 | (c.node().kind(), c.node().start_position()), - 465 | ("identifier", Point::new(1, 8)) - 466 | ); - 467 | assert!(c.goto_parent()); - 468 | assert_eq!(c.goto_first_child_for_point(Point::new(1, 10)), Some(1)); - 469 | assert_eq!( - 470 | (c.node().kind(), c.node().start_position()), - 471 | ("identifier", Point::new(1, 8)) - 472 | ); - 473 | assert!(c.goto_parent()); - | - 474 | // step into first ',' and back up - 475 | assert_eq!(c.goto_first_child_for_point(Point::new(1, 11)), Some(2)); - 476 | assert_eq!( - 477 | (c.node().kind(), c.node().start_position()), - 478 | (",", Point::new(1, 11)) - 479 | ); - 480 | assert!(c.goto_parent()); - | - 481 | // step into identifier 'four' and back up - 482 | assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5)); - 483 | assert_eq!( - 484 | (c.node().kind(), c.node().start_position()), - 485 | ("identifier", Point::new(5, 8)) - 486 | ); - 487 | assert!(c.goto_parent()); - 488 | assert_eq!(c.goto_first_child_for_point(Point::new(5, 0)), Some(5)); - 489 | assert_eq!( - 490 | (c.node().kind(), c.node().start_position()), - 491 | ("identifier", Point::new(5, 8)) - 492 | ); - 493 | assert!(c.goto_parent()); - | - 494 | // step into ']' and back up - 495 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10)); - 496 | assert_eq!( - 497 | (c.node().kind(), c.node().start_position()), - 498 | ("]", Point::new(6, 4)) - 499 | ); - 500 | assert!(c.goto_parent()); - 501 | assert_eq!(c.goto_first_child_for_point(Point::new(6, 0)), Some(10)); - 502 | assert_eq!( - 503 | (c.node().kind(), c.node().start_position()), - 504 | ("]", Point::new(6, 4)) - 505 | ); - 506 | assert!(c.goto_parent()); - | - 507 | // descend into object - 508 | assert_eq!(c.goto_first_child_for_point(Point::new(2, 0)), Some(3)); - 509 | assert_eq!( - 510 | (c.node().kind(), c.node().start_position()), - 511 | ("object", Point::new(2, 8)) - 512 | ); - 513 | } - | - 514 | #[test] - 515 | fn test_tree_node_equality() { - 516 | let mut parser = Parser::new(); - 517 | parser.set_language(&get_language("rust")).unwrap(); - 518 | let tree = parser.parse("struct A {}", None).unwrap(); - 519 | let node1 = tree.root_node(); - 520 | let node2 = tree.root_node(); - 521 | assert_eq!(node1, node2); - 522 | assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap()); - 523 | assert_ne!(node1.child(0).unwrap(), node2); - 524 | } - | - 525 | #[test] - 526 | fn test_get_changed_ranges() { - 527 | let source_code = b"{a: null};\n".to_vec(); - | - 528 | let mut parser = Parser::new(); - 529 | parser.set_language(&get_language("javascript")).unwrap(); - 530 | let tree = parser.parse(&source_code, None).unwrap(); - | - 531 | assert_eq!( - 532 | tree.root_node().to_sexp(), - 533 | "(program (expression_statement (object (pair key: (property_identifier) value: (null)))))" - 534 | ); - | - 535 | // Updating one token - 536 | { - 537 | let mut tree = tree.clone(); - 538 | let mut source_code = source_code.clone(); - | - 539 | // Replace `null` with `nothing` - that token has changed syntax - 540 | let edit = Edit { - 541 | position: index_of(&source_code, "ull"), - 542 | deleted_length: 3, - 543 | inserted_text: b"othing".to_vec(), - 544 | }; - 545 | let inverse_edit = invert_edit(&source_code, &edit); - 546 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit); - 547 | assert_eq!(ranges, vec![range_of(&source_code, "nothing")]); - | - 548 | // Replace `nothing` with `null` - that token has changed syntax - 549 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit); - 550 | assert_eq!(ranges, vec![range_of(&source_code, "null")]); - 551 | } - | - 552 | // Changing only leading whitespace - 553 | { - 554 | let mut tree = tree.clone(); - 555 | let mut source_code = source_code.clone(); - | - 556 | // Insert leading newline - no changed ranges - 557 | let edit = Edit { - 558 | position: 0, - 559 | deleted_length: 0, - 560 | inserted_text: b"\n".to_vec(), - 561 | }; - 562 | let inverse_edit = invert_edit(&source_code, &edit); - 563 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit); - 564 | assert_eq!(ranges, vec![]); - | - 565 | // Remove leading newline - no changed ranges - 566 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit); - 567 | assert_eq!(ranges, vec![]); - 568 | } - | - 569 | // Inserting elements - 570 | { - 571 | let mut tree = tree.clone(); - 572 | let mut source_code = source_code.clone(); - | - 573 | // Insert a key-value pair before the `}` - those tokens are changed - 574 | let edit1 = Edit { - 575 | position: index_of(&source_code, "}"), - 576 | deleted_length: 0, - 577 | inserted_text: b", b: false".to_vec(), - 578 | }; - 579 | let inverse_edit1 = invert_edit(&source_code, &edit1); - 580 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1); - 581 | assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]); - | - 582 | let edit2 = Edit { - 583 | position: index_of(&source_code, ", b"), - 584 | deleted_length: 0, - 585 | inserted_text: b", c: 1".to_vec(), - 586 | }; - 587 | let inverse_edit2 = invert_edit(&source_code, &edit2); - 588 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2); - 589 | assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]); - | - 590 | // Remove the middle pair - 591 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2); - 592 | assert_eq!(ranges, vec![]); - | - 593 | // Remove the second pair - 594 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1); - 595 | assert_eq!(ranges, vec![]); - 596 | } - | - 597 | // Wrapping elements in larger expressions - 598 | { - 599 | let mut tree = tree; - 600 | let mut source_code = source_code.clone(); - | - 601 | // Replace `null` with the binary expression `b === null` - 602 | let edit1 = Edit { - 603 | position: index_of(&source_code, "null"), - 604 | deleted_length: 0, - 605 | inserted_text: b"b === ".to_vec(), - 606 | }; - 607 | let inverse_edit1 = invert_edit(&source_code, &edit1); - 608 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1); - 609 | assert_eq!(ranges, vec![range_of(&source_code, "b === null")]); - | - 610 | // Undo - 611 | let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1); - 612 | assert_eq!(ranges, vec![range_of(&source_code, "null")]); - 613 | } - 614 | } - | - 615 | #[test] - 616 | fn test_consistency_with_mid_codepoint_edit() { - 617 | let mut parser = Parser::new(); - 618 | parser.set_language(&get_language("php/php")).unwrap(); - 619 | let mut source_code = - 620 | b"\n::E; - 639 | } - 640 | "; - | - 641 | let mut parser = Parser::new(); - 642 | parser.set_language(&get_language("rust")).unwrap(); - | - 643 | let tree = parser.parse(source, None).unwrap(); - | - 644 | let function = tree.root_node().child(0).unwrap(); - 645 | let block = function.child(3).unwrap(); - 646 | let expression_statement = block.child(1).unwrap(); - 647 | let scoped_identifier = expression_statement.child(0).unwrap(); - 648 | let generic_type = scoped_identifier.child(0).unwrap(); - 649 | assert_eq!(generic_type.kind(), "generic_type"); - | - 650 | let mut cursor = generic_type.walk(); - 651 | assert!(cursor.goto_first_child()); - 652 | assert_eq!(cursor.node().kind(), "type_identifier"); - 653 | assert!(cursor.goto_next_sibling()); - 654 | assert_eq!(cursor.node().kind(), "block_comment"); - 655 | } - | - 656 | fn index_of(text: &[u8], substring: &str) -> usize { - 657 | str::from_utf8(text).unwrap().find(substring).unwrap() - 658 | } - | - 659 | fn range_of(text: &[u8], substring: &str) -> Range { - 660 | let start_byte = index_of(text, substring); - 661 | let end_byte = start_byte + substring.len(); - 662 | Range { - 663 | start_byte, - 664 | end_byte, - 665 | start_point: Point::new(0, start_byte), - 666 | end_point: Point::new(0, end_byte), - 667 | } - 668 | } - | - 669 | fn get_changed_ranges( - 670 | parser: &mut Parser, - 671 | tree: &mut Tree, - 672 | source_code: &mut Vec, - 673 | edit: &Edit, - 674 | ) -> Vec { - 675 | perform_edit(tree, source_code, edit).unwrap(); - 676 | let new_tree = parser.parse(source_code, Some(tree)).unwrap(); - 677 | let result = tree.changed_ranges(&new_tree).collect(); - 678 | *tree = new_tree; - 679 | result - 680 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tests/wasm_language_test.rs: --------------------------------------------------------------------------------- - 1 | use std::fs; - | - 2 | use streaming_iterator::StreamingIterator; - 3 | use tree_sitter::{Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore}; - | - 4 | use crate::tests::helpers::{ - 5 | allocations, - 6 | fixtures::{get_test_fixture_language_wasm, ENGINE, WASM_DIR}, - 7 | }; - | - 8 | #[test] - 9 | fn test_wasm_stdlib_symbols() { - 10 | let symbols = tree_sitter::wasm_stdlib_symbols().collect::>(); - 11 | assert_eq!( - 12 | symbols, - 13 | { - 14 | let mut symbols = symbols.clone(); - 15 | symbols.sort_unstable(); - 16 | symbols - 17 | }, - 18 | "symbols aren't sorted" - 19 | ); - | - 20 | assert!(symbols.contains(&"malloc")); - 21 | assert!(symbols.contains(&"free")); - 22 | assert!(symbols.contains(&"memset")); - 23 | assert!(symbols.contains(&"memcpy")); - 24 | } - | - 25 | #[test] - 26 | fn test_load_wasm_ruby_language() { - 27 | allocations::record(|| { - 28 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 29 | let mut parser = Parser::new(); - 30 | let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap(); - 31 | let language = store.load_language("ruby", &wasm).unwrap(); - 32 | parser.set_wasm_store(store).unwrap(); - 33 | parser.set_language(&language).unwrap(); - 34 | let tree = parser.parse("class A; end", None).unwrap(); - 35 | assert_eq!( - 36 | tree.root_node().to_sexp(), - 37 | "(program (class name: (constant)))" - 38 | ); - 39 | }); - 40 | } - | - 41 | #[test] - 42 | fn test_load_wasm_html_language() { - 43 | allocations::record(|| { - 44 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 45 | let mut parser = Parser::new(); - 46 | let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap(); - 47 | let language = store.load_language("html", &wasm).unwrap(); - 48 | parser.set_wasm_store(store).unwrap(); - 49 | parser.set_language(&language).unwrap(); - 50 | let tree = parser - 51 | .parse("

", None) - 52 | .unwrap(); - 53 | assert_eq!( - 54 | tree.root_node().to_sexp(), - 55 | "(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))" - 56 | ); - 57 | }); - 58 | } - | - 59 | #[test] - 60 | fn test_load_wasm_rust_language() { - 61 | allocations::record(|| { - 62 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 63 | let mut parser = Parser::new(); - 64 | let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); - 65 | let language = store.load_language("rust", &wasm).unwrap(); - 66 | parser.set_wasm_store(store).unwrap(); - 67 | parser.set_language(&language).unwrap(); - 68 | let tree = parser.parse("fn main() {}", None).unwrap(); - 69 | assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"); - 70 | }); - 71 | } - | - 72 | #[test] - 73 | fn test_load_wasm_javascript_language() { - 74 | allocations::record(|| { - 75 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 76 | let mut parser = Parser::new(); - 77 | let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap(); - 78 | let language = store.load_language("javascript", &wasm).unwrap(); - 79 | parser.set_wasm_store(store).unwrap(); - 80 | parser.set_language(&language).unwrap(); - 81 | let tree = parser.parse("const a = b\nconst c = d", None).unwrap(); - 82 | assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))"); - 83 | }); - 84 | } - | - 85 | #[test] - 86 | fn test_load_wasm_python_language() { - 87 | allocations::record(|| { - 88 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 89 | let mut parser = Parser::new(); - 90 | let wasm = fs::read(WASM_DIR.join("tree-sitter-python.wasm")).unwrap(); - 91 | let language = store.load_language("python", &wasm).unwrap(); - 92 | parser.set_wasm_store(store).unwrap(); - 93 | parser.set_language(&language).unwrap(); - 94 | let tree = parser.parse("a = b\nc = d", None).unwrap(); - 95 | assert_eq!(tree.root_node().to_sexp(), "(module (expression_statement (assignment left: (identifier) right: (identifier))) (expression_statement (assignment left: (identifier) right: (identifier))))"); - 96 | }); - 97 | } - | - 98 | #[test] - 99 | fn test_load_fixture_language_wasm() { - 100 | allocations::record(|| { - 101 | let store = WasmStore::new(&ENGINE).unwrap(); - 102 | let mut parser = Parser::new(); - 103 | let language = get_test_fixture_language_wasm("epsilon_external_tokens"); - 104 | parser.set_wasm_store(store).unwrap(); - 105 | parser.set_language(&language).unwrap(); - 106 | let tree = parser.parse("hello", None).unwrap(); - 107 | assert_eq!(tree.root_node().to_sexp(), "(document (zero_width))"); - 108 | }); - 109 | } - | - 110 | #[test] - 111 | fn test_load_multiple_wasm_languages() { - 112 | allocations::record(|| { - 113 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 114 | let mut parser = Parser::new(); - | - 115 | let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap(); - 116 | let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); - 117 | let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap(); - 118 | let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap(); - | - 119 | let language_rust = store.load_language("rust", &wasm_rs).unwrap(); - 120 | let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap(); - 121 | let language_ruby = store.load_language("ruby", &wasm_rb).unwrap(); - 122 | let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); - 123 | parser.set_wasm_store(store).unwrap(); - | - 124 | let mut parser2 = Parser::new(); - 125 | parser2 - 126 | .set_wasm_store(WasmStore::new(&ENGINE).unwrap()) - 127 | .unwrap(); - 128 | let mut query_cursor = QueryCursor::new(); - | - 129 | // First, parse with the store that originally loaded the languages. - 130 | // Then parse with a new parser and Wasm store, so that the languages - 131 | // are added one-by-one, in between parses. - 132 | for mut parser in [parser, parser2] { - 133 | for _ in 0..2 { - 134 | let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap(); - 135 | let query_typescript = - 136 | Query::new(&language_typescript, "(class_declaration) @foo").unwrap(); - | - 137 | parser.set_language(&language_cpp).unwrap(); - 138 | let tree = parser.parse("A c = d();", None).unwrap(); - 139 | assert_eq!( - 140 | tree.root_node().to_sexp(), - 141 | "(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))" - 142 | ); - | - 143 | parser.set_language(&language_rust).unwrap(); - 144 | let source = "const A: B = c();"; - 145 | let tree = parser.parse(source, None).unwrap(); - 146 | assert_eq!( - 147 | tree.root_node().to_sexp(), - 148 | "(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))" - 149 | ); - 150 | assert_eq!( - 151 | query_cursor - 152 | .matches(&query_rust, tree.root_node(), source.as_bytes()) - 153 | .count(), - 154 | 1 - 155 | ); - | - 156 | parser.set_language(&language_ruby).unwrap(); - 157 | let tree = parser.parse("class A; end", None).unwrap(); - 158 | assert_eq!( - 159 | tree.root_node().to_sexp(), - 160 | "(program (class name: (constant)))" - 161 | ); - | - 162 | parser.set_language(&language_typescript).unwrap(); - 163 | let tree = parser.parse("class A {}", None).unwrap(); - 164 | assert_eq!( - 165 | tree.root_node().to_sexp(), - 166 | "(program (class_declaration name: (type_identifier) body: (class_body)))" - 167 | ); - 168 | assert_eq!( - 169 | query_cursor - 170 | .matches(&query_typescript, tree.root_node(), source.as_bytes()) - 171 | .count(), - 172 | 1 - 173 | ); - 174 | } - 175 | } - 176 | }); - 177 | } - | - 178 | #[test] - 179 | fn test_load_and_reload_wasm_language() { - 180 | allocations::record(|| { - 181 | let mut store = WasmStore::new(&ENGINE).unwrap(); - | - 182 | let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); - 183 | let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap(); - | - 184 | let language_rust = store.load_language("rust", &wasm_rust).unwrap(); - 185 | let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); - 186 | assert_eq!(store.language_count(), 2); - | - 187 | // When a language is dropped, stores can release their instances of that language. - 188 | drop(language_rust); - 189 | assert_eq!(store.language_count(), 1); - | - 190 | let language_rust = store.load_language("rust", &wasm_rust).unwrap(); - 191 | assert_eq!(store.language_count(), 2); - | - 192 | drop(language_rust); - 193 | drop(language_typescript); - 194 | assert_eq!(store.language_count(), 0); - 195 | }); - 196 | } - | - 197 | #[test] - 198 | fn test_reset_wasm_store() { - 199 | allocations::record(|| { - 200 | let mut language_store = WasmStore::new(&ENGINE).unwrap(); - 201 | let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); - 202 | let language = language_store.load_language("rust", &wasm).unwrap(); - | - 203 | let mut parser = Parser::new(); - 204 | let parser_store = WasmStore::new(&ENGINE).unwrap(); - 205 | parser.set_wasm_store(parser_store).unwrap(); - 206 | parser.set_language(&language).unwrap(); - 207 | let tree = parser.parse("fn main() {}", None).unwrap(); - 208 | assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"); - | - 209 | let parser_store = WasmStore::new(&ENGINE).unwrap(); - 210 | parser.set_wasm_store(parser_store).unwrap(); - 211 | let tree = parser.parse("fn main() {}", None).unwrap(); - 212 | assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"); - 213 | }); - 214 | } - | - 215 | #[test] - 216 | fn test_load_wasm_errors() { - 217 | allocations::record(|| { - 218 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 219 | let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); - | - 220 | let bad_wasm = &wasm[1..]; - 221 | assert_eq!( - 222 | store.load_language("rust", bad_wasm).unwrap_err(), - 223 | WasmError { - 224 | kind: WasmErrorKind::Parse, - 225 | message: "failed to parse dylink section of Wasm module".into(), - 226 | } - 227 | ); - | - 228 | assert_eq!( - 229 | store.load_language("not_rust", &wasm).unwrap_err(), - 230 | WasmError { - 231 | kind: WasmErrorKind::Instantiate, - 232 | message: "module did not contain language function: tree_sitter_not_rust".into(), - 233 | } - 234 | ); - | - 235 | let mut bad_wasm = wasm.clone(); - 236 | bad_wasm[300..500].iter_mut().for_each(|b| *b = 0); - 237 | assert_eq!( - 238 | store.load_language("rust", &bad_wasm).unwrap_err().kind, - 239 | WasmErrorKind::Compile, - 240 | ); - 241 | }); - 242 | } - | - 243 | #[test] - 244 | fn test_wasm_oom() { - 245 | allocations::record(|| { - 246 | let mut store = WasmStore::new(&ENGINE).unwrap(); - 247 | let mut parser = Parser::new(); - 248 | let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap(); - 249 | let language = store.load_language("html", &wasm).unwrap(); - 250 | parser.set_wasm_store(store).unwrap(); - 251 | parser.set_language(&language).unwrap(); - | - 252 | let tag_name = "a-b".repeat(2 * 1024 * 1024); - 253 | let code = format!("<{tag_name}>hello world"); - 254 | assert!(parser.parse(&code, None).is_none()); - | - 255 | let tag_name = "a-b".repeat(20); - 256 | let code = format!("<{tag_name}>hello world"); - 257 | parser.set_language(&language).unwrap(); - 258 | let tree = parser.parse(&code, None).unwrap(); - 259 | assert_eq!( - 260 | tree.root_node().to_sexp(), - 261 | "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" - 262 | ); - 263 | }); - 264 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/tree_sitter_cli.rs: --------------------------------------------------------------------------------- - 1 | #![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))] - | - 2 | pub mod fuzz; - 3 | pub mod highlight; - 4 | pub mod init; - 5 | pub mod input; - 6 | pub mod logger; - 7 | pub mod parse; - 8 | pub mod playground; - 9 | pub mod query; - 10 | pub mod query_testing; - 11 | pub mod tags; - 12 | pub mod test; - 13 | pub mod test_highlight; - 14 | pub mod test_tags; - 15 | pub mod util; - 16 | pub mod version; - 17 | pub mod wasm; - | - 18 | #[cfg(test)] - 19 | mod tests; - | - 20 | #[cfg(doctest)] - 21 | mod tests; - - - --------------------------------------------------------------------------------- -/crates/cli/src/util.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | path::{Path, PathBuf}, - 3 | process::{Child, ChildStdin, Command, Stdio}, - 4 | sync::{ - 5 | atomic::{AtomicUsize, Ordering}, - 6 | Arc, - 7 | }, - 8 | }; - | - 9 | use anyhow::{anyhow, Context, Result}; - 10 | use indoc::indoc; - 11 | use log::error; - 12 | use tree_sitter::{Parser, Tree}; - 13 | use tree_sitter_config::Config; - 14 | use tree_sitter_loader::Config as LoaderConfig; - | - 15 | const HTML_HEADER: &[u8] = b" - 16 | - | - 17 | - | - 20 | "; - | - 21 | #[must_use] - 22 | pub fn lang_not_found_for_path(path: &Path, loader_config: &LoaderConfig) -> String { - 23 | let path = path.display(); - 24 | format!( - 25 | indoc! {" - 26 | No language found for path `{}` - | - 27 | If a language should be associated with this file extension, please ensure the path to `{}` is inside one of the following directories as specified by your 'config.json':\n\n{}\n - 28 | If the directory that contains the relevant grammar for `{}` is not listed above, please add the directory to the list of directories in your config file, {} - 29 | "}, - 30 | path, - 31 | path, - 32 | loader_config - 33 | .parser_directories - 34 | .iter() - 35 | .enumerate() - 36 | .map(|(i, d)| format!(" {}. {}", i + 1, d.display())) - 37 | .collect::>() - 38 | .join(" \n"), - 39 | path, - 40 | if let Ok(Some(config_path)) = Config::find_config_file() { - 41 | format!("located at {}", config_path.display()) - 42 | } else { - 43 | String::from("which you need to create by running `tree-sitter init-config`") - 44 | } - 45 | ) - 46 | } - | - 47 | #[must_use] - 48 | pub fn cancel_on_signal() -> Arc { - 49 | let result = Arc::new(AtomicUsize::new(0)); - 50 | ctrlc::set_handler({ - 51 | let flag = result.clone(); - 52 | move || { - 53 | flag.store(1, Ordering::Relaxed); - 54 | } - 55 | }) - 56 | .expect("Error setting Ctrl-C handler"); - 57 | result - 58 | } - | - 59 | pub struct LogSession { - 60 | path: PathBuf, - 61 | dot_process: Option, - 62 | dot_process_stdin: Option, - 63 | open_log: bool, - 64 | } - | - 65 | pub fn print_tree_graph(tree: &Tree, path: &str, quiet: bool) -> Result<()> { - 66 | let session = LogSession::new(path, quiet)?; - 67 | tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap()); - 68 | Ok(()) - 69 | } - | - 70 | pub fn log_graphs(parser: &mut Parser, path: &str, open_log: bool) -> Result { - 71 | let session = LogSession::new(path, open_log)?; - 72 | parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap()); - 73 | Ok(session) - 74 | } - | - 75 | impl LogSession { - 76 | fn new(path: &str, open_log: bool) -> Result { - 77 | use std::io::Write; - | - 78 | let mut dot_file = std::fs::File::create(path)?; - 79 | dot_file.write_all(HTML_HEADER)?; - 80 | let mut dot_process = Command::new("dot") - 81 | .arg("-Tsvg") - 82 | .stdin(Stdio::piped()) - 83 | .stdout(dot_file) - 84 | .spawn() - 85 | .with_context(|| { - 86 | "Failed to run the `dot` command. Check that graphviz is installed." - 87 | })?; - 88 | let dot_stdin = dot_process - 89 | .stdin - 90 | .take() - 91 | .ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?; - 92 | Ok(Self { - 93 | path: PathBuf::from(path), - 94 | dot_process: Some(dot_process), - 95 | dot_process_stdin: Some(dot_stdin), - 96 | open_log, - 97 | }) - 98 | } - 99 | } - | - 100 | impl Drop for LogSession { - 101 | fn drop(&mut self) { - 102 | use std::fs; - | - 103 | drop(self.dot_process_stdin.take().unwrap()); - 104 | let output = self.dot_process.take().unwrap().wait_with_output().unwrap(); - 105 | if output.status.success() { - 106 | if self.open_log && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 { - 107 | webbrowser::open(&self.path.to_string_lossy()).unwrap(); - 108 | } - 109 | } else { - 110 | error!( - 111 | "Dot failed: {} {}", - 112 | String::from_utf8_lossy(&output.stdout), - 113 | String::from_utf8_lossy(&output.stderr) - 114 | ); - 115 | } - 116 | } - 117 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/version.rs: --------------------------------------------------------------------------------- - 1 | use std::{fs, path::PathBuf, process::Command}; - | - 2 | use anyhow::{anyhow, Context, Result}; - 3 | use clap::ValueEnum; - 4 | use log::{info, warn}; - 5 | use regex::Regex; - 6 | use semver::Version as SemverVersion; - 7 | use std::cmp::Ordering; - 8 | use tree_sitter_loader::TreeSitterJSON; - | - 9 | #[derive(Clone, Copy, Default, ValueEnum)] - 10 | pub enum BumpLevel { - 11 | #[default] - 12 | Patch, - 13 | Minor, - 14 | Major, - 15 | } - | - 16 | pub struct Version { - 17 | pub version: Option, - 18 | pub current_dir: PathBuf, - 19 | pub bump: Option, - 20 | } - | - 21 | impl Version { - 22 | #[must_use] - 23 | pub const fn new( - 24 | version: Option, - 25 | current_dir: PathBuf, - 26 | bump: Option, - 27 | ) -> Self { - 28 | Self { - 29 | version, - 30 | current_dir, - 31 | bump, - 32 | } - 33 | } - | - 34 | pub fn run(mut self) -> Result<()> { - 35 | let tree_sitter_json = self.current_dir.join("tree-sitter.json"); - | - 36 | let tree_sitter_json = - 37 | serde_json::from_str::(&fs::read_to_string(tree_sitter_json)?)?; - | - 38 | let current_version = tree_sitter_json.metadata.version; - 39 | self.version = match (self.version.is_some(), self.bump) { - 40 | (false, None) => { - 41 | info!("Current version: {current_version}"); - 42 | return Ok(()); - 43 | } - 44 | (true, None) => self.version, - 45 | (false, Some(bump)) => { - 46 | let mut v = current_version.clone(); - 47 | match bump { - 48 | BumpLevel::Patch => v.patch += 1, - 49 | BumpLevel::Minor => { - 50 | v.minor += 1; - 51 | v.patch = 0; - 52 | } - 53 | BumpLevel::Major => { - 54 | v.major += 1; - 55 | v.minor = 0; - 56 | v.patch = 0; - 57 | } - 58 | } - 59 | Some(v) - 60 | } - 61 | (true, Some(_)) => unreachable!(), - 62 | }; - | - 63 | let new_version = self.version.as_ref().unwrap(); - 64 | match new_version.cmp(¤t_version) { - 65 | Ordering::Less => { - 66 | warn!("New version is lower than current!"); - 67 | warn!("Reverting version {current_version} to {new_version}"); - 68 | } - 69 | Ordering::Greater => { - 70 | info!("Bumping version {current_version} to {new_version}"); - 71 | } - 72 | Ordering::Equal => { - 73 | info!("Keeping version {current_version}"); - 74 | } - 75 | } - | - 76 | let is_multigrammar = tree_sitter_json.grammars.len() > 1; - | - 77 | self.update_treesitter_json().with_context(|| { - 78 | format!( - 79 | "Failed to update tree-sitter.json at {}", - 80 | self.current_dir.display() - 81 | ) - 82 | })?; - 83 | self.update_cargo_toml().with_context(|| { - 84 | format!( - 85 | "Failed to update Cargo.toml at {}", - 86 | self.current_dir.display() - 87 | ) - 88 | })?; - 89 | self.update_package_json().with_context(|| { - 90 | format!( - 91 | "Failed to update package.json at {}", - 92 | self.current_dir.display() - 93 | ) - 94 | })?; - 95 | self.update_makefile(is_multigrammar).with_context(|| { - 96 | format!( - 97 | "Failed to update Makefile at {}", - 98 | self.current_dir.display() - 99 | ) - 100 | })?; - 101 | self.update_cmakelists_txt().with_context(|| { - 102 | format!( - 103 | "Failed to update CMakeLists.txt at {}", - 104 | self.current_dir.display() - 105 | ) - 106 | })?; - 107 | self.update_pyproject_toml().with_context(|| { - 108 | format!( - 109 | "Failed to update pyproject.toml at {}", - 110 | self.current_dir.display() - 111 | ) - 112 | })?; - | - 113 | Ok(()) - 114 | } - | - 115 | fn update_treesitter_json(&self) -> Result<()> { - 116 | let tree_sitter_json = &fs::read_to_string(self.current_dir.join("tree-sitter.json"))?; - | - 117 | let tree_sitter_json = tree_sitter_json - 118 | .lines() - 119 | .map(|line| { - 120 | if line.contains("\"version\":") { - 121 | let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len(); - 122 | let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1; - 123 | let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1; - | - 124 | format!( - 125 | "{}{}{}", - 126 | &line[..start_quote], - 127 | self.version.as_ref().unwrap(), - 128 | &line[end_quote..] - 129 | ) - 130 | } else { - 131 | line.to_string() - 132 | } - 133 | }) - 134 | .collect::>() - 135 | .join("\n") - 136 | + "\n"; - | - 137 | fs::write(self.current_dir.join("tree-sitter.json"), tree_sitter_json)?; - | - 138 | Ok(()) - 139 | } - | - 140 | fn update_cargo_toml(&self) -> Result<()> { - 141 | if !self.current_dir.join("Cargo.toml").exists() { - 142 | return Ok(()); - 143 | } - | - 144 | let cargo_toml = fs::read_to_string(self.current_dir.join("Cargo.toml"))?; - | - 145 | let cargo_toml = cargo_toml - 146 | .lines() - 147 | .map(|line| { - 148 | if line.starts_with("version =") { - 149 | format!("version = \"{}\"", self.version.as_ref().unwrap()) - 150 | } else { - 151 | line.to_string() - 152 | } - 153 | }) - 154 | .collect::>() - 155 | .join("\n") - 156 | + "\n"; - | - 157 | fs::write(self.current_dir.join("Cargo.toml"), cargo_toml)?; - | - 158 | if self.current_dir.join("Cargo.lock").exists() { - 159 | let Ok(cmd) = Command::new("cargo") - 160 | .arg("generate-lockfile") - 161 | .arg("--offline") - 162 | .current_dir(&self.current_dir) - 163 | .output() - 164 | else { - 165 | return Ok(()); // cargo is not `executable`, ignore - 166 | }; - | - 167 | if !cmd.status.success() { - 168 | let stderr = String::from_utf8_lossy(&cmd.stderr); - 169 | return Err(anyhow!( - 170 | "Failed to run `cargo generate-lockfile`:\n{stderr}" - 171 | )); - 172 | } - 173 | } - | - 174 | Ok(()) - 175 | } - | - 176 | fn update_package_json(&self) -> Result<()> { - 177 | if !self.current_dir.join("package.json").exists() { - 178 | return Ok(()); - 179 | } - | - 180 | let package_json = &fs::read_to_string(self.current_dir.join("package.json"))?; - | - 181 | let package_json = package_json - 182 | .lines() - 183 | .map(|line| { - 184 | if line.contains("\"version\":") { - 185 | let prefix_index = line.find("\"version\":").unwrap() + "\"version\":".len(); - 186 | let start_quote = line[prefix_index..].find('"').unwrap() + prefix_index + 1; - 187 | let end_quote = line[start_quote + 1..].find('"').unwrap() + start_quote + 1; - | - 188 | format!( - 189 | "{}{}{}", - 190 | &line[..start_quote], - 191 | self.version.as_ref().unwrap(), - 192 | &line[end_quote..] - 193 | ) - 194 | } else { - 195 | line.to_string() - 196 | } - 197 | }) - 198 | .collect::>() - 199 | .join("\n") - 200 | + "\n"; - | - 201 | fs::write(self.current_dir.join("package.json"), package_json)?; - | - 202 | if self.current_dir.join("package-lock.json").exists() { - 203 | let Ok(cmd) = Command::new("npm") - 204 | .arg("install") - 205 | .arg("--package-lock-only") - 206 | .current_dir(&self.current_dir) - 207 | .output() - 208 | else { - 209 | return Ok(()); // npm is not `executable`, ignore - 210 | }; - | - 211 | if !cmd.status.success() { - 212 | let stderr = String::from_utf8_lossy(&cmd.stderr); - 213 | return Err(anyhow!("Failed to run `npm install`:\n{stderr}")); - 214 | } - 215 | } - | - 216 | Ok(()) - 217 | } - | - 218 | fn update_makefile(&self, is_multigrammar: bool) -> Result<()> { - 219 | let makefile = if is_multigrammar { - 220 | if !self.current_dir.join("common").join("common.mak").exists() { - 221 | return Ok(()); - 222 | } - | - 223 | fs::read_to_string(self.current_dir.join("Makefile"))? - 224 | } else { - 225 | if !self.current_dir.join("Makefile").exists() { - 226 | return Ok(()); - 227 | } - | - 228 | fs::read_to_string(self.current_dir.join("Makefile"))? - 229 | }; - | - 230 | let makefile = makefile - 231 | .lines() - 232 | .map(|line| { - 233 | if line.starts_with("VERSION") { - 234 | format!("VERSION := {}", self.version.as_ref().unwrap()) - 235 | } else { - 236 | line.to_string() - 237 | } - 238 | }) - 239 | .collect::>() - 240 | .join("\n") - 241 | + "\n"; - | - 242 | fs::write(self.current_dir.join("Makefile"), makefile)?; - | - 243 | Ok(()) - 244 | } - | - 245 | fn update_cmakelists_txt(&self) -> Result<()> { - 246 | if !self.current_dir.join("CMakeLists.txt").exists() { - 247 | return Ok(()); - 248 | } - | - 249 | let cmake = fs::read_to_string(self.current_dir.join("CMakeLists.txt"))?; - | - 250 | let re = Regex::new(r#"(\s*VERSION\s+)"[0-9]+\.[0-9]+\.[0-9]+""#)?; - 251 | let cmake = re.replace(&cmake, format!(r#"$1"{}""#, self.version.as_ref().unwrap())); - | - 252 | fs::write(self.current_dir.join("CMakeLists.txt"), cmake.as_bytes())?; - | - 253 | Ok(()) - 254 | } - | - 255 | fn update_pyproject_toml(&self) -> Result<()> { - 256 | if !self.current_dir.join("pyproject.toml").exists() { - 257 | return Ok(()); - 258 | } - | - 259 | let pyproject_toml = fs::read_to_string(self.current_dir.join("pyproject.toml"))?; - | - 260 | let pyproject_toml = pyproject_toml - 261 | .lines() - 262 | .map(|line| { - 263 | if line.starts_with("version =") { - 264 | format!("version = \"{}\"", self.version.as_ref().unwrap()) - 265 | } else { - 266 | line.to_string() - 267 | } - 268 | }) - 269 | .collect::>() - 270 | .join("\n") - 271 | + "\n"; - | - 272 | fs::write(self.current_dir.join("pyproject.toml"), pyproject_toml)?; - | - 273 | Ok(()) - 274 | } - 275 | } - - - --------------------------------------------------------------------------------- -/crates/cli/src/wasm.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | fs, - 3 | path::{Path, PathBuf}, - 4 | }; - | - 5 | use anyhow::{anyhow, Context, Result}; - 6 | use tree_sitter::wasm_stdlib_symbols; - 7 | use tree_sitter_generate::{load_grammar_file, parse_grammar::GrammarJSON}; - 8 | use tree_sitter_loader::Loader; - 9 | use wasmparser::Parser; - | - 10 | pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec)> { - 11 | let grammar_name = get_grammar_name(language_dir) - 12 | .with_context(|| "Failed to get Wasm filename") - 13 | .unwrap(); - 14 | let wasm_filename = format!("tree-sitter-{grammar_name}.wasm"); - 15 | let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| { - 16 | format!("Failed to read {wasm_filename}. Run `tree-sitter build --wasm` first.",) - 17 | })?; - 18 | Ok((grammar_name, contents)) - 19 | } - | - 20 | pub fn get_grammar_name(language_dir: &Path) -> Result { - 21 | let src_dir = language_dir.join("src"); - 22 | let grammar_json_path = src_dir.join("grammar.json"); - 23 | let grammar_json = fs::read_to_string(&grammar_json_path).with_context(|| { - 24 | format!( - 25 | "Failed to read grammar file {}", - 26 | grammar_json_path.display() - 27 | ) - 28 | })?; - 29 | let grammar: GrammarJSON = serde_json::from_str(&grammar_json).with_context(|| { - 30 | format!( - 31 | "Failed to parse grammar file {}", - 32 | grammar_json_path.display() - 33 | ) - 34 | })?; - 35 | Ok(grammar.name) - 36 | } - | - 37 | pub fn compile_language_to_wasm( - 38 | loader: &Loader, - 39 | language_dir: &Path, - 40 | output_dir: &Path, - 41 | output_file: Option, - 42 | ) -> Result<()> { - 43 | let grammar_name = get_grammar_name(language_dir) - 44 | .or_else(|_| load_grammar_file(&language_dir.join("grammar.js"), None))?; - 45 | let output_filename = - 46 | output_file.unwrap_or_else(|| output_dir.join(format!("tree-sitter-{grammar_name}.wasm"))); - 47 | let src_path = language_dir.join("src"); - 48 | let scanner_path = loader.get_scanner_path(&src_path); - 49 | loader.compile_parser_to_wasm( - 50 | &grammar_name, - 51 | &src_path, - 52 | scanner_path - 53 | .as_ref() - 54 | .and_then(|p| Some(Path::new(p.file_name()?))), - 55 | &output_filename, - 56 | )?; - | - 57 | // Exit with an error if the external scanner uses symbols from the - 58 | // C or C++ standard libraries that aren't available to Wasm parsers. - 59 | let stdlib_symbols = wasm_stdlib_symbols().collect::>(); - 60 | let dylink_symbols = [ - 61 | "__indirect_function_table", - 62 | "__memory_base", - 63 | "__stack_pointer", - 64 | "__table_base", - 65 | "__table_base", - 66 | "memory", - 67 | ]; - 68 | let builtin_symbols = [ - 69 | "__assert_fail", - 70 | "__cxa_atexit", - 71 | "abort", - 72 | "emscripten_notify_memory_growth", - 73 | "tree_sitter_debug_message", - 74 | "proc_exit", - 75 | ]; - | - 76 | let mut missing_symbols = Vec::new(); - 77 | let wasm_bytes = fs::read(&output_filename)?; - 78 | let parser = Parser::new(0); - 79 | for payload in parser.parse_all(&wasm_bytes) { - 80 | if let wasmparser::Payload::ImportSection(imports) = payload? { - 81 | for import in imports { - 82 | let import = import?.name; - 83 | if !builtin_symbols.contains(&import) - 84 | && !stdlib_symbols.contains(&import) - 85 | && !dylink_symbols.contains(&import) - 86 | { - 87 | missing_symbols.push(import); - 88 | } - 89 | } - 90 | } - 91 | } - | - 92 | if !missing_symbols.is_empty() { - 93 | Err(anyhow!( - 94 | concat!( - 95 | "This external scanner uses a symbol that isn't available to Wasm parsers.\n", - 96 | "\n", - 97 | "Missing symbols:\n", - 98 | " {}\n", - 99 | "\n", - 100 | "Available symbols:\n", - 101 | " {}", - 102 | ), - 103 | missing_symbols.join("\n "), - 104 | stdlib_symbols.join("\n ") - 105 | ))?; - 106 | } - | - 107 | Ok(()) - 108 | } - - - --------------------------------------------------------------------------------- -/crates/config/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-config" - 3 | version.workspace = true - 4 | description = "User configuration of tree-sitter's command line programs" - 5 | authors.workspace = true - 6 | edition.workspace = true - 7 | rust-version.workspace = true - 8 | readme = "README.md" - 9 | homepage.workspace = true - 10 | repository.workspace = true - 11 | documentation = "https://docs.rs/tree-sitter-config" - 12 | license.workspace = true - 13 | keywords.workspace = true - 14 | categories.workspace = true - | - 15 | [lib] - 16 | path = "src/tree_sitter_config.rs" - | - 17 | [lints] - 18 | workspace = true - | - 19 | [dependencies] - 20 | anyhow.workspace = true - 21 | etcetera.workspace = true - 22 | log.workspace = true - 23 | serde.workspace = true - 24 | serde_json.workspace = true - - - --------------------------------------------------------------------------------- -/crates/config/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter Config - | - 2 | Manages Tree-sitter's configuration file. - | - 3 | You can use a configuration file to control the behavior of the `tree-sitter` - 4 | command-line program. This crate implements the logic for finding and the - 5 | parsing the contents of the configuration file. - - - --------------------------------------------------------------------------------- -/crates/config/src/tree_sitter_config.rs: --------------------------------------------------------------------------------- - 1 | #![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))] - | - 2 | use std::{env, fs, path::PathBuf}; - | - 3 | use anyhow::{Context, Result}; - 4 | use etcetera::BaseStrategy as _; - 5 | use log::warn; - 6 | use serde::{Deserialize, Serialize}; - 7 | use serde_json::Value; - | - 8 | /// Holds the contents of tree-sitter's configuration file. - 9 | /// - 10 | /// The file typically lives at `~/.config/tree-sitter/config.json`, but see the [`Config::load`][] - 11 | /// method for the full details on where it might be located. - 12 | /// - 13 | /// This type holds the generic JSON content of the configuration file. Individual tree-sitter - 14 | /// components will use the [`Config::get`][] method to parse that JSON to extract configuration - 15 | /// fields that are specific to that component. - 16 | #[derive(Debug)] - 17 | pub struct Config { - 18 | pub location: PathBuf, - 19 | pub config: Value, - 20 | } - | - 21 | impl Config { - 22 | pub fn find_config_file() -> Result> { - 23 | if let Ok(path) = env::var("TREE_SITTER_DIR") { - 24 | let mut path = PathBuf::from(path); - 25 | path.push("config.json"); - 26 | if !path.exists() { - 27 | return Ok(None); - 28 | } - 29 | if path.is_file() { - 30 | return Ok(Some(path)); - 31 | } - 32 | } - | - 33 | let xdg_path = Self::xdg_config_file()?; - 34 | if xdg_path.is_file() { - 35 | return Ok(Some(xdg_path)); - 36 | } - | - 37 | if cfg!(target_os = "macos") { - 38 | let legacy_apple_path = etcetera::base_strategy::Apple::new()? - 39 | .data_dir() // `$HOME/Library/Application Support/` - 40 | .join("tree-sitter") - 41 | .join("config.json"); - 42 | if legacy_apple_path.is_file() { - 43 | fs::create_dir_all(xdg_path.parent().unwrap())?; - 44 | fs::rename(&legacy_apple_path, &xdg_path)?; - 45 | warn!( - 46 | "Your config.json file has been automatically migrated from \"{}\" to \"{}\"", - 47 | legacy_apple_path.display(), - 48 | xdg_path.display() - 49 | ); - 50 | return Ok(Some(xdg_path)); - 51 | } - 52 | } - | - 53 | let legacy_path = etcetera::home_dir()? - 54 | .join(".tree-sitter") - 55 | .join("config.json"); - 56 | if legacy_path.is_file() { - 57 | return Ok(Some(legacy_path)); - 58 | } - | - 59 | Ok(None) - 60 | } - | - 61 | fn xdg_config_file() -> Result { - 62 | let xdg_path = etcetera::choose_base_strategy()? - 63 | .config_dir() - 64 | .join("tree-sitter") - 65 | .join("config.json"); - 66 | Ok(xdg_path) - 67 | } - | - 68 | /// Locates and loads in the user's configuration file. We search for the configuration file - 69 | /// in the following locations, in order: - 70 | /// - 71 | /// - Location specified by the path parameter if provided - 72 | /// - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set - 73 | /// - `tree-sitter/config.json` in your default user configuration directory, as determined by - 74 | /// [`etcetera::choose_base_strategy`](https://docs.rs/etcetera/*/etcetera/#basestrategy) - 75 | /// - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store - 76 | /// its configuration - 77 | pub fn load(path: Option) -> Result { - 78 | let location = if let Some(path) = path { - 79 | path - 80 | } else if let Some(path) = Self::find_config_file()? { - 81 | path - 82 | } else { - 83 | return Self::initial(); - 84 | }; - | - 85 | let content = fs::read_to_string(&location) - 86 | .with_context(|| format!("Failed to read {}", location.to_string_lossy()))?; - 87 | let config = serde_json::from_str(&content) - 88 | .with_context(|| format!("Bad JSON config {}", location.to_string_lossy()))?; - 89 | Ok(Self { location, config }) - 90 | } - | - 91 | /// Creates an empty initial configuration file. You can then use the [`Config::add`][] method - 92 | /// to add the component-specific configuration types for any components that want to add - 93 | /// content to the default file, and then use [`Config::save`][] to write the configuration to - 94 | /// disk. - 95 | /// - 96 | /// (Note that this is typically only done by the `tree-sitter init-config` command.) - 97 | pub fn initial() -> Result { - 98 | let location = if let Ok(path) = env::var("TREE_SITTER_DIR") { - 99 | let mut path = PathBuf::from(path); - 100 | path.push("config.json"); - 101 | path - 102 | } else { - 103 | Self::xdg_config_file()? - 104 | }; - 105 | let config = serde_json::json!({}); - 106 | Ok(Self { location, config }) - 107 | } - | - 108 | /// Saves this configuration to the file that it was originally loaded from. - 109 | pub fn save(&self) -> Result<()> { - 110 | let json = serde_json::to_string_pretty(&self.config)?; - 111 | fs::create_dir_all(self.location.parent().unwrap())?; - 112 | fs::write(&self.location, json)?; - 113 | Ok(()) - 114 | } - | - 115 | /// Parses a component-specific configuration from the configuration file. The type `C` must - 116 | /// be [deserializable](https://docs.rs/serde/*/serde/trait.Deserialize.html) from a JSON - 117 | /// object, and must only include the fields relevant to that component. - 118 | pub fn get(&self) -> Result - 119 | where - 120 | C: for<'de> Deserialize<'de>, - 121 | { - 122 | let config = serde_json::from_value(self.config.clone())?; - 123 | Ok(config) - 124 | } - | - 125 | /// Adds a component-specific configuration to the configuration file. The type `C` must be - 126 | /// [serializable](https://docs.rs/serde/*/serde/trait.Serialize.html) into a JSON object, and - 127 | /// must only include the fields relevant to that component. - 128 | pub fn add(&mut self, config: C) -> Result<()> - 129 | where - 130 | C: Serialize, - 131 | { - 132 | let mut config = serde_json::to_value(&config)?; - 133 | self.config - 134 | .as_object_mut() - 135 | .unwrap() - 136 | .append(config.as_object_mut().unwrap()); - 137 | Ok(()) - 138 | } - 139 | } - - - --------------------------------------------------------------------------------- -/crates/generate/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-generate" - 3 | version.workspace = true - 4 | description = "Library for generating C source code from a tree-sitter grammar" - 5 | authors.workspace = true - 6 | edition.workspace = true - 7 | rust-version.workspace = true - 8 | readme = "README.md" - 9 | homepage.workspace = true - 10 | repository.workspace = true - 11 | documentation = "https://docs.rs/tree-sitter-generate" - 12 | license.workspace = true - 13 | keywords.workspace = true - 14 | categories.workspace = true - | - 15 | [lib] - 16 | path = "src/generate.rs" - | - 17 | [lints] - 18 | workspace = true - | - 19 | [features] - 20 | default = ["qjs-rt"] - 21 | load = ["dep:semver"] - 22 | qjs-rt = ["load", "rquickjs", "pathdiff"] - | - 23 | [dependencies] - 24 | anyhow.workspace = true - 25 | bitflags = "2.9.4" - 26 | dunce = "1.0.5" - 27 | indexmap.workspace = true - 28 | indoc.workspace = true - 29 | log.workspace = true - 30 | pathdiff = { version = "0.2.3", optional = true } - 31 | regex.workspace = true - 32 | regex-syntax.workspace = true - 33 | rquickjs = { version = "0.9.0", optional = true, features = [ - 34 | "bindgen", - 35 | "loader", - 36 | "macro", - 37 | "phf", - 38 | ] } - 39 | rustc-hash.workspace = true - 40 | semver = { workspace = true, optional = true } - 41 | serde.workspace = true - 42 | serde_json.workspace = true - 43 | smallbitvec.workspace = true - 44 | thiserror.workspace = true - 45 | topological-sort.workspace = true - | - 46 | [dev-dependencies] - 47 | tempfile.workspace = true - - - --------------------------------------------------------------------------------- -/crates/generate/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter Generate - | - 2 | This helper crate implements the logic for the `tree-sitter generate` command, - 3 | and can be used by external tools to generate a parser from a grammar file. - - - --------------------------------------------------------------------------------- -/crates/generate/src/dedup.rs: --------------------------------------------------------------------------------- - 1 | pub fn split_state_id_groups( - 2 | states: &[S], - 3 | state_ids_by_group_id: &mut Vec>, - 4 | group_ids_by_state_id: &mut [usize], - 5 | start_group_id: usize, - 6 | mut should_split: impl FnMut(&S, &S, &[usize]) -> bool, - 7 | ) -> bool { - 8 | let mut result = false; - | - 9 | let mut group_id = start_group_id; - 10 | while group_id < state_ids_by_group_id.len() { - 11 | let state_ids = &state_ids_by_group_id[group_id]; - 12 | let mut split_state_ids = Vec::new(); - | - 13 | let mut i = 0; - 14 | while i < state_ids.len() { - 15 | let left_state_id = state_ids[i]; - 16 | if split_state_ids.contains(&left_state_id) { - 17 | i += 1; - 18 | continue; - 19 | } - | - 20 | let left_state = &states[left_state_id]; - | - 21 | // Identify all of the other states in the group that are incompatible with - 22 | // this state. - 23 | let mut j = i + 1; - 24 | while j < state_ids.len() { - 25 | let right_state_id = state_ids[j]; - 26 | if split_state_ids.contains(&right_state_id) { - 27 | j += 1; - 28 | continue; - 29 | } - 30 | let right_state = &states[right_state_id]; - | - 31 | if should_split(left_state, right_state, group_ids_by_state_id) { - 32 | split_state_ids.push(right_state_id); - 33 | } - | - 34 | j += 1; - 35 | } - | - 36 | i += 1; - 37 | } - | - 38 | // If any states were removed from the group, add them all as a new group. - 39 | if !split_state_ids.is_empty() { - 40 | result = true; - 41 | state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(i)); - | - 42 | let new_group_id = state_ids_by_group_id.len(); - 43 | for id in &split_state_ids { - 44 | group_ids_by_state_id[*id] = new_group_id; - 45 | } - | - 46 | state_ids_by_group_id.push(split_state_ids); - 47 | } - | - 48 | group_id += 1; - 49 | } - | - 50 | result - 51 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/dsl.js: --------------------------------------------------------------------------------- - 1 | function alias(rule, value) { - 2 | const result = { - 3 | type: "ALIAS", - 4 | content: normalize(rule), - 5 | named: false, - 6 | value: null - 7 | }; - | - 8 | switch (value.constructor) { - 9 | case String: - 10 | result.named = false; - 11 | result.value = value; - 12 | return result; - 13 | case ReferenceError: - 14 | result.named = true; - 15 | result.value = value.symbol.name; - 16 | return result; - 17 | case Object: - 18 | case GrammarSymbol: - 19 | if (typeof value.type === 'string' && value.type === 'SYMBOL') { - 20 | result.named = true; - 21 | result.value = value.name; - 22 | return result; - 23 | } - 24 | } - | - 25 | throw new Error(`Invalid alias value ${value}`); - 26 | } - | - 27 | function blank() { - 28 | return { - 29 | type: "BLANK" - 30 | }; - 31 | } - | - 32 | function field(name, rule) { - 33 | return { - 34 | type: "FIELD", - 35 | name, - 36 | content: normalize(rule) - 37 | } - 38 | } - | - 39 | function choice(...elements) { - 40 | return { - 41 | type: "CHOICE", - 42 | members: elements.map(normalize) - 43 | }; - 44 | } - | - 45 | function optional(value) { - 46 | checkArguments(arguments, arguments.length, optional, 'optional'); - 47 | return choice(value, blank()); - 48 | } - | - 49 | function prec(number, rule) { - 50 | checkPrecedence(number); - 51 | checkArguments( - 52 | arguments, - 53 | arguments.length - 1, - 54 | prec, - 55 | 'prec', - 56 | ' and a precedence argument' - 57 | ); - | - 58 | return { - 59 | type: "PREC", - 60 | value: number, - 61 | content: normalize(rule) - 62 | }; - 63 | } - | - 64 | prec.left = function (number, rule) { - 65 | if (rule == null) { - 66 | rule = number; - 67 | number = 0; - 68 | } - | - 69 | checkPrecedence(number); - 70 | checkArguments( - 71 | arguments, - 72 | arguments.length - 1, - 73 | prec.left, - 74 | 'prec.left', - 75 | ' and an optional precedence argument' - 76 | ); - | - 77 | return { - 78 | type: "PREC_LEFT", - 79 | value: number, - 80 | content: normalize(rule) - 81 | }; - 82 | } - | - 83 | prec.right = function (number, rule) { - 84 | if (rule == null) { - 85 | rule = number; - 86 | number = 0; - 87 | } - | - 88 | checkPrecedence(number); - 89 | checkArguments( - 90 | arguments, - 91 | arguments.length - 1, - 92 | prec.right, - 93 | 'prec.right', - 94 | ' and an optional precedence argument' - 95 | ); - | - 96 | return { - 97 | type: "PREC_RIGHT", - 98 | value: number, - 99 | content: normalize(rule) - 100 | }; - 101 | } - | - 102 | prec.dynamic = function (number, rule) { - 103 | checkPrecedence(number); - 104 | checkArguments( - 105 | arguments, - 106 | arguments.length - 1, - 107 | prec.dynamic, - 108 | 'prec.dynamic', - 109 | ' and a precedence argument' - 110 | ); - | - 111 | return { - 112 | type: "PREC_DYNAMIC", - 113 | value: number, - 114 | content: normalize(rule) - 115 | }; - 116 | } - | - 117 | function repeat(rule) { - 118 | checkArguments(arguments, arguments.length, repeat, 'repeat'); - 119 | return { - 120 | type: "REPEAT", - 121 | content: normalize(rule) - 122 | }; - 123 | } - | - 124 | function repeat1(rule) { - 125 | checkArguments(arguments, arguments.length, repeat1, 'repeat1'); - 126 | return { - 127 | type: "REPEAT1", - 128 | content: normalize(rule) - 129 | }; - 130 | } - | - 131 | function seq(...elements) { - 132 | return { - 133 | type: "SEQ", - 134 | members: elements.map(normalize) - 135 | }; - 136 | } - | - 137 | class GrammarSymbol { - 138 | constructor(name) { - 139 | this.type = "SYMBOL"; - 140 | this.name = name; - 141 | } - 142 | } - | - 143 | function reserved(wordset, rule) { - 144 | if (typeof wordset !== 'string') { - 145 | throw new Error('Invalid reserved word set name: ' + wordset) - 146 | } - 147 | return { - 148 | type: "RESERVED", - 149 | content: normalize(rule), - 150 | context_name: wordset, - 151 | } - 152 | } - | - 153 | function sym(name) { - 154 | return new GrammarSymbol(name); - 155 | } - | - 156 | function token(value) { - 157 | checkArguments(arguments, arguments.length, token, 'token', '', 'literal'); - 158 | return { - 159 | type: "TOKEN", - 160 | content: normalize(value) - 161 | }; - 162 | } - | - 163 | token.immediate = function (value) { - 164 | checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal'); - 165 | return { - 166 | type: "IMMEDIATE_TOKEN", - 167 | content: normalize(value) - 168 | }; - 169 | } - | - 170 | function normalize(value) { - 171 | if (typeof value == "undefined") - 172 | throw new Error("Undefined symbol"); - | - 173 | switch (value.constructor) { - 174 | case String: - 175 | return { - 176 | type: 'STRING', - 177 | value - 178 | }; - 179 | case RegExp: - 180 | return value.flags ? { - 181 | type: 'PATTERN', - 182 | value: value.source, - 183 | flags: value.flags - 184 | } : { - 185 | type: 'PATTERN', - 186 | value: value.source - 187 | }; - 188 | case RustRegex: - 189 | return { - 190 | type: 'PATTERN', - 191 | value: value.value - 192 | }; - 193 | case ReferenceError: - 194 | throw value - 195 | default: - 196 | if (typeof value.type === 'string') { - 197 | return value; - 198 | } else { - 199 | throw new TypeError(`Invalid rule: ${value}`); - 200 | } - 201 | } - 202 | } - | - 203 | function RuleBuilder(ruleMap) { - 204 | return new Proxy({}, { - 205 | get(_, propertyName) { - 206 | const symbol = sym(propertyName); - | - 207 | if (!ruleMap || Object.prototype.hasOwnProperty.call(ruleMap, propertyName)) { - 208 | return symbol; - 209 | } else { - 210 | const error = new ReferenceError(`Undefined symbol '${propertyName}'`); - 211 | error.symbol = symbol; - 212 | return error; - 213 | } - 214 | } - 215 | }) - 216 | } - | - 217 | function grammar(baseGrammar, options) { - 218 | let inherits = undefined; - | - 219 | if (!options) { - 220 | options = baseGrammar; - 221 | baseGrammar = { - 222 | name: null, - 223 | rules: {}, - 224 | extras: [normalize(/\s/)], - 225 | conflicts: [], - 226 | externals: [], - 227 | inline: [], - 228 | supertypes: [], - 229 | precedences: [], - 230 | reserved: {}, - 231 | }; - 232 | } else { - 233 | baseGrammar = baseGrammar.grammar; - 234 | inherits = baseGrammar.name; - 235 | } - | - 236 | let externals = baseGrammar.externals; - 237 | if (options.externals) { - 238 | if (typeof options.externals !== "function") { - 239 | throw new Error("Grammar's 'externals' property must be a function."); - 240 | } - | - 241 | const externalsRuleBuilder = RuleBuilder(null) - 242 | const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals); - | - 243 | if (!Array.isArray(externalRules)) { - 244 | throw new Error("Grammar's 'externals' property must return an array of rules."); - 245 | } - | - 246 | externals = externalRules.map(normalize); - 247 | } - | - 248 | const ruleMap = {}; - 249 | for (const key of Object.keys(options.rules)) { - 250 | ruleMap[key] = true; - 251 | } - 252 | for (const key of Object.keys(baseGrammar.rules)) { - 253 | ruleMap[key] = true; - 254 | } - 255 | for (const external of externals) { - 256 | if (typeof external.name === 'string') { - 257 | ruleMap[external.name] = true; - 258 | } - 259 | } - | - 260 | const ruleBuilder = RuleBuilder(ruleMap); - | - 261 | const name = options.name; - 262 | if (typeof name !== "string") { - 263 | throw new Error("Grammar's 'name' property must be a string."); - 264 | } - | - 265 | if (!/^[a-zA-Z_]\w*$/.test(name)) { - 266 | throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); - 267 | } - | - 268 | if (inherits && typeof inherits !== "string") { - 269 | throw new Error("Base grammar's 'name' property must be a string."); - 270 | } - | - 271 | if (inherits && !/^[a-zA-Z_]\w*$/.test(name)) { - 272 | throw new Error("Base grammar's 'name' property must not start with a digit and cannot contain non-word characters."); - 273 | } - | - 274 | const rules = Object.assign({}, baseGrammar.rules); - 275 | if (options.rules) { - 276 | if (typeof options.rules !== "object") { - 277 | throw new Error("Grammar's 'rules' property must be an object."); - 278 | } - | - 279 | for (const ruleName of Object.keys(options.rules)) { - 280 | const ruleFn = options.rules[ruleName]; - 281 | if (typeof ruleFn !== "function") { - 282 | throw new Error(`Grammar rules must all be functions. '${ruleName}' rule is not.`); - 283 | } - 284 | const rule = ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]); - 285 | if (rule === undefined) { - 286 | throw new Error(`Rule '${ruleName}' returned undefined.`); - 287 | } - 288 | rules[ruleName] = normalize(rule); - 289 | } - 290 | } - | - 291 | let reserved = baseGrammar.reserved; - 292 | if (options.reserved) { - 293 | if (typeof options.reserved !== "object") { - 294 | throw new Error("Grammar's 'reserved' property must be an object."); - 295 | } - | - 296 | for (const reservedWordSetName of Object.keys(options.reserved)) { - 297 | const reservedWordSetFn = options.reserved[reservedWordSetName] - 298 | if (typeof reservedWordSetFn !== "function") { - 299 | throw new Error(`Grammar reserved word sets must all be functions. '${reservedWordSetName}' is not.`); - 300 | } - | - 301 | const reservedTokens = reservedWordSetFn.call(ruleBuilder, ruleBuilder, baseGrammar.reserved[reservedWordSetName]); - | - 302 | if (!Array.isArray(reservedTokens)) { - 303 | throw new Error(`Grammar's reserved word set functions must all return arrays of rules. '${reservedWordSetName}' does not.`); - 304 | } - | - 305 | reserved[reservedWordSetName] = reservedTokens.map(normalize); - 306 | } - 307 | } - | - 308 | let extras = baseGrammar.extras.slice(); - 309 | if (options.extras) { - 310 | if (typeof options.extras !== "function") { - 311 | throw new Error("Grammar's 'extras' property must be a function."); - 312 | } - | - 313 | extras = options.extras - 314 | .call(ruleBuilder, ruleBuilder, baseGrammar.extras) - | - 315 | if (!Array.isArray(extras)) { - 316 | throw new Error("Grammar's 'extras' function must return an array.") - 317 | } - | - 318 | extras = extras.map(normalize); - 319 | } - | - 320 | let word = baseGrammar.word; - 321 | if (options.word) { - 322 | word = options.word.call(ruleBuilder, ruleBuilder).name; - 323 | if (typeof word != 'string') { - 324 | throw new Error("Grammar's 'word' property must be a named rule."); - 325 | } - | - 326 | if (word === 'ReferenceError') { - 327 | throw new Error("Grammar's 'word' property must be a valid rule name."); - 328 | } - 329 | } - | - 330 | let conflicts = baseGrammar.conflicts; - 331 | if (options.conflicts) { - 332 | if (typeof options.conflicts !== "function") { - 333 | throw new Error("Grammar's 'conflicts' property must be a function."); - 334 | } - | - 335 | const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym)); - 336 | const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules); - | - 337 | if (!Array.isArray(conflictRules)) { - 338 | throw new Error("Grammar's conflicts must be an array of arrays of rules."); - 339 | } - | - 340 | conflicts = conflictRules.map(conflictSet => { - 341 | if (!Array.isArray(conflictSet)) { - 342 | throw new Error("Grammar's conflicts must be an array of arrays of rules."); - 343 | } - | - 344 | return conflictSet.map(symbol => normalize(symbol).name); - 345 | }); - 346 | } - | - 347 | let inline = baseGrammar.inline; - 348 | if (options.inline) { - 349 | if (typeof options.inline !== "function") { - 350 | throw new Error("Grammar's 'inline' property must be a function."); - 351 | } - | - 352 | const baseInlineRules = baseGrammar.inline.map(sym); - 353 | const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules); - | - 354 | if (!Array.isArray(inlineRules)) { - 355 | throw new Error("Grammar's inline must be an array of rules."); - 356 | } - | - 357 | inline = inlineRules.filter((symbol, index, self) => { - 358 | if (self.findIndex(s => s.name === symbol.name) !== index) { - 359 | console.log(`Warning: duplicate inline rule '${symbol.name}'`); - 360 | return false; - 361 | } - 362 | if (symbol.name === 'ReferenceError') { - 363 | console.log(`Warning: inline rule '${symbol.symbol.name}' is not defined.`); - 364 | return false; - 365 | } - 366 | return true; - 367 | }).map(symbol => symbol.name); - 368 | } - | - 369 | let supertypes = baseGrammar.supertypes; - 370 | if (options.supertypes) { - 371 | if (typeof options.supertypes !== "function") { - 372 | throw new Error("Grammar's 'supertypes' property must be a function."); - 373 | } - | - 374 | const baseSupertypeRules = baseGrammar.supertypes.map(sym); - 375 | const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules); - | - 376 | if (!Array.isArray(supertypeRules)) { - 377 | throw new Error("Grammar's supertypes must be an array of rules."); - 378 | } - | - 379 | supertypes = supertypeRules.map(symbol => { - 380 | if (symbol.name === 'ReferenceError') { - 381 | throw new Error(`Supertype rule \`${symbol.symbol.name}\` is not defined.`); - 382 | } - 383 | return symbol.name; - 384 | }); - 385 | } - | - 386 | let precedences = baseGrammar.precedences; - 387 | if (options.precedences) { - 388 | if (typeof options.precedences !== "function") { - 389 | throw new Error("Grammar's 'precedences' property must be a function"); - 390 | } - 391 | precedences = options.precedences.call(ruleBuilder, ruleBuilder, baseGrammar.precedences); - 392 | if (!Array.isArray(precedences)) { - 393 | throw new Error("Grammar's precedences must be an array of arrays of rules."); - 394 | } - 395 | precedences = precedences.map(list => { - 396 | if (!Array.isArray(list)) { - 397 | throw new Error("Grammar's precedences must be an array of arrays of rules."); - 398 | } - 399 | return list.map(normalize); - 400 | }); - 401 | } - | - 402 | if (Object.keys(rules).length === 0) { - 403 | throw new Error("Grammar must have at least one rule."); - 404 | } - | - 405 | return { - 406 | grammar: { - 407 | name, - 408 | inherits, - 409 | word, - 410 | rules, - 411 | extras, - 412 | conflicts, - 413 | precedences, - 414 | externals, - 415 | inline, - 416 | supertypes, - 417 | reserved, - 418 | }, - 419 | }; - 420 | } - | - 421 | class RustRegex { - 422 | constructor(value) { - 423 | this.value = value; - 424 | } - 425 | } - | - 426 | function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') { - 427 | // Allow for .map() usage where additional arguments are index and the entire array. - 428 | const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]); - 429 | if (isMapCall) { - 430 | ruleCount = typeof args[2] === 'number' ? 1 : args[2].length; - 431 | } - 432 | if (ruleCount > 1 && !isMapCall) { - 433 | const error = new Error([ - 434 | `The \`${callerName}\` function only takes one ${argType} argument${suffix}.`, - 435 | `You passed in multiple ${argType}s. Did you mean to call \`seq\`?\n` - 436 | ].join('\n')); - 437 | Error.captureStackTrace(error, caller); - 438 | throw error - 439 | } - 440 | } - | - 441 | function checkPrecedence(value) { - 442 | if (value == null) { - 443 | throw new Error('Missing precedence value'); - 444 | } - 445 | } - | - 446 | function getEnv(name) { - 447 | if (globalThis.native) return globalThis.__ts_grammar_path; - 448 | if (globalThis.process) return process.env[name]; // Node/Bun - 449 | if (globalThis.Deno) return Deno.env.get(name); // Deno - 450 | throw Error("Unsupported JS runtime"); - 451 | } - | - 452 | globalThis.alias = alias; - 453 | globalThis.blank = blank; - 454 | globalThis.choice = choice; - 455 | globalThis.optional = optional; - 456 | globalThis.prec = prec; - 457 | globalThis.repeat = repeat; - 458 | globalThis.repeat1 = repeat1; - 459 | globalThis.reserved = reserved; - 460 | globalThis.seq = seq; - 461 | globalThis.sym = sym; - 462 | globalThis.token = token; - 463 | globalThis.grammar = grammar; - 464 | globalThis.field = field; - 465 | globalThis.RustRegex = RustRegex; - | - 466 | const grammarPath = getEnv("TREE_SITTER_GRAMMAR_PATH"); - 467 | let result = await import(grammarPath); - 468 | let grammarObj = result.default?.grammar ?? result.grammar; - | - 469 | if (globalThis.native && !grammarObj) { - 470 | grammarObj = module.exports.grammar; - 471 | } - | - 472 | const object = { - 473 | "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json", - 474 | ...grammarObj, - 475 | }; - 476 | const output = JSON.stringify(object); - | - 477 | if (globalThis.native) { - 478 | globalThis.output = output; - 479 | } else if (globalThis.process) { // Node/Bun - 480 | process.stdout.write(output); - 481 | } else if (globalThis.Deno) { // Deno - 482 | Deno.stdout.writeSync(new TextEncoder().encode(output)); - 483 | } else { - 484 | throw Error("Unsupported JS runtime"); - 485 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/generate.rs: --------------------------------------------------------------------------------- - 1 | use std::{collections::HashMap, sync::LazyLock}; - 2 | #[cfg(feature = "load")] - 3 | use std::{ - 4 | env, fs, - 5 | io::Write, - 6 | path::{Path, PathBuf}, - 7 | process::{Command, Stdio}, - 8 | }; - | - 9 | use anyhow::Result; - 10 | use bitflags::bitflags; - 11 | use log::warn; - 12 | use node_types::VariableInfo; - 13 | use regex::{Regex, RegexBuilder}; - 14 | use rules::{Alias, Symbol}; - 15 | #[cfg(feature = "load")] - 16 | use semver::Version; - 17 | #[cfg(feature = "load")] - 18 | use serde::Deserialize; - 19 | use serde::Serialize; - 20 | use thiserror::Error; - | - 21 | mod build_tables; - 22 | mod dedup; - 23 | mod grammars; - 24 | mod nfa; - 25 | mod node_types; - 26 | pub mod parse_grammar; - 27 | mod prepare_grammar; - 28 | #[cfg(feature = "qjs-rt")] - 29 | mod quickjs; - 30 | mod render; - 31 | mod rules; - 32 | mod tables; - | - 33 | use build_tables::build_tables; - 34 | pub use build_tables::ParseTableBuilderError; - 35 | use grammars::{InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar}; - 36 | pub use node_types::{SuperTypeCycleError, VariableInfoError}; - 37 | use parse_grammar::parse_grammar; - 38 | pub use parse_grammar::ParseGrammarError; - 39 | use prepare_grammar::prepare_grammar; - 40 | pub use prepare_grammar::PrepareGrammarError; - 41 | use render::render_c_code; - 42 | pub use render::{ABI_VERSION_MAX, ABI_VERSION_MIN}; - | - 43 | static JSON_COMMENT_REGEX: LazyLock = LazyLock::new(|| { - 44 | RegexBuilder::new("^\\s*//.*") - 45 | .multi_line(true) - 46 | .build() - 47 | .unwrap() - 48 | }); - | - 49 | struct JSONOutput { - 50 | #[cfg(feature = "load")] - 51 | node_types_json: String, - 52 | syntax_grammar: SyntaxGrammar, - 53 | lexical_grammar: LexicalGrammar, - 54 | inlines: InlinedProductionMap, - 55 | simple_aliases: HashMap, - 56 | variable_info: Vec, - 57 | } - | - 58 | struct GeneratedParser { - 59 | c_code: String, - 60 | #[cfg(feature = "load")] - 61 | node_types_json: String, - 62 | } - | - 63 | // NOTE: This constant must be kept in sync with the definition of - 64 | // `TREE_SITTER_LANGUAGE_VERSION` in `lib/include/tree_sitter/api.h`. - 65 | const LANGUAGE_VERSION: usize = 15; - | - 66 | pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h"); - 67 | pub const ARRAY_HEADER: &str = include_str!("templates/array.h"); - 68 | pub const PARSER_HEADER: &str = include_str!("parser.h.inc"); - | - 69 | pub type GenerateResult = Result; - | - 70 | #[derive(Debug, Error, Serialize)] - 71 | pub enum GenerateError { - 72 | #[error("Error with specified path -- {0}")] - 73 | GrammarPath(String), - 74 | #[error("{0}")] - 75 | IO(String), - 76 | #[cfg(feature = "load")] - 77 | #[error(transparent)] - 78 | LoadGrammarFile(#[from] LoadGrammarError), - 79 | #[error(transparent)] - 80 | ParseGrammar(#[from] ParseGrammarError), - 81 | #[error(transparent)] - 82 | Prepare(#[from] PrepareGrammarError), - 83 | #[error(transparent)] - 84 | VariableInfo(#[from] VariableInfoError), - 85 | #[error(transparent)] - 86 | BuildTables(#[from] ParseTableBuilderError), - 87 | #[cfg(feature = "load")] - 88 | #[error(transparent)] - 89 | ParseVersion(#[from] ParseVersionError), - 90 | #[error(transparent)] - 91 | SuperTypeCycle(#[from] SuperTypeCycleError), - 92 | } - | - 93 | impl From for GenerateError { - 94 | fn from(value: std::io::Error) -> Self { - 95 | Self::IO(value.to_string()) - 96 | } - 97 | } - | - 98 | #[cfg(feature = "load")] - 99 | pub type LoadGrammarFileResult = Result; - | - 100 | #[cfg(feature = "load")] - 101 | #[derive(Debug, Error, Serialize)] - 102 | pub enum LoadGrammarError { - 103 | #[error("Path to a grammar file with `.js` or `.json` extension is required")] - 104 | InvalidPath, - 105 | #[error("Failed to load grammar.js -- {0}")] - 106 | LoadJSGrammarFile(#[from] JSError), - 107 | #[error("Failed to load grammar.json -- {0}")] - 108 | IO(String), - 109 | #[error("Unknown grammar file extension: {0:?}")] - 110 | FileExtension(PathBuf), - 111 | } - | - 112 | #[cfg(feature = "load")] - 113 | impl From for LoadGrammarError { - 114 | fn from(value: std::io::Error) -> Self { - 115 | Self::IO(value.to_string()) - 116 | } - 117 | } - | - 118 | #[cfg(feature = "load")] - 119 | #[derive(Debug, Error, Serialize)] - 120 | pub enum ParseVersionError { - 121 | #[error("{0}")] - 122 | Version(String), - 123 | #[error("{0}")] - 124 | JSON(String), - 125 | #[error("{0}")] - 126 | IO(String), - 127 | } - | - 128 | #[cfg(feature = "load")] - 129 | pub type JSResult = Result; - | - 130 | #[cfg(feature = "load")] - 131 | #[derive(Debug, Error, Serialize)] - 132 | pub enum JSError { - 133 | #[error("Failed to run `{runtime}` -- {error}")] - 134 | JSRuntimeSpawn { runtime: String, error: String }, - 135 | #[error("Got invalid UTF8 from `{runtime}` -- {error}")] - 136 | JSRuntimeUtf8 { runtime: String, error: String }, - 137 | #[error("`{runtime}` process exited with status {code}")] - 138 | JSRuntimeExit { runtime: String, code: i32 }, - 139 | #[error("{0}")] - 140 | IO(String), - 141 | #[error("Could not parse this package's version as semver -- {0}")] - 142 | Semver(String), - 143 | #[error("Failed to serialze grammar JSON -- {0}")] - 144 | Serialzation(String), - 145 | #[cfg(feature = "qjs-rt")] - 146 | #[error("QuickJS error: {0}")] - 147 | QuickJS(String), - 148 | } - | - 149 | #[cfg(feature = "load")] - 150 | impl From for JSError { - 151 | fn from(value: std::io::Error) -> Self { - 152 | Self::IO(value.to_string()) - 153 | } - 154 | } - | - 155 | #[cfg(feature = "load")] - 156 | impl From for JSError { - 157 | fn from(value: serde_json::Error) -> Self { - 158 | Self::Serialzation(value.to_string()) - 159 | } - 160 | } - | - 161 | #[cfg(feature = "load")] - 162 | impl From for JSError { - 163 | fn from(value: semver::Error) -> Self { - 164 | Self::Semver(value.to_string()) - 165 | } - 166 | } - | - 167 | #[cfg(feature = "qjs-rt")] - 168 | impl From for JSError { - 169 | fn from(value: rquickjs::Error) -> Self { - 170 | Self::QuickJS(value.to_string()) - 171 | } - 172 | } - | - 173 | bitflags! { - 174 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - 175 | pub struct OptLevel: u32 { - 176 | const MergeStates = 1 << 0; - 177 | } - 178 | } - | - 179 | impl Default for OptLevel { - 180 | fn default() -> Self { - 181 | Self::MergeStates - 182 | } - 183 | } - | - 184 | #[cfg(feature = "load")] - 185 | #[allow(clippy::too_many_arguments)] - 186 | pub fn generate_parser_in_directory( - 187 | repo_path: T, - 188 | out_path: Option, - 189 | grammar_path: Option, - 190 | mut abi_version: usize, - 191 | report_symbol_name: Option<&str>, - 192 | js_runtime: Option<&str>, - 193 | generate_parser: bool, - 194 | optimizations: OptLevel, - 195 | ) -> GenerateResult<()> - 196 | where - 197 | T: Into, - 198 | U: Into, - 199 | V: Into, - 200 | { - 201 | let mut repo_path: PathBuf = repo_path.into(); - | - 202 | // Populate a new empty grammar directory. - 203 | let grammar_path = if let Some(path) = grammar_path { - 204 | let path_buf: PathBuf = path.into(); - 205 | if !path_buf - 206 | .try_exists() - 207 | .map_err(|e| GenerateError::GrammarPath(e.to_string()))? - 208 | { - 209 | fs::create_dir_all(&path_buf)?; - 210 | repo_path = path_buf; - 211 | repo_path.join("grammar.js") - 212 | } else { - 213 | path_buf - 214 | } - 215 | } else { - 216 | repo_path.join("grammar.js") - 217 | }; - | - 218 | // Read the grammar file. - 219 | let grammar_json = load_grammar_file(&grammar_path, js_runtime)?; - | - 220 | let src_path = out_path.map_or_else(|| repo_path.join("src"), |p| p.into()); - 221 | let header_path = src_path.join("tree_sitter"); - | - 222 | // Ensure that the output directory exists - 223 | fs::create_dir_all(&src_path)?; - | - 224 | if grammar_path.file_name().unwrap() != "grammar.json" { - 225 | fs::write(src_path.join("grammar.json"), &grammar_json).map_err(|e| { - 226 | GenerateError::IO(format!( - 227 | "Failed to write grammar.json to {} -- {e}", - 228 | src_path.display() - 229 | )) - 230 | })?; - 231 | } - | - 232 | // If our job is only to generate `grammar.json` and not `parser.c`, stop here. - 233 | let input_grammar = parse_grammar(&grammar_json)?; - | - 234 | if !generate_parser { - 235 | let node_types_json = generate_node_types_from_grammar(&input_grammar)?.node_types_json; - 236 | write_file(&src_path.join("node-types.json"), node_types_json)?; - 237 | return Ok(()); - 238 | } - | - 239 | let semantic_version = read_grammar_version(&repo_path)?; - | - 240 | if semantic_version.is_none() && abi_version > ABI_VERSION_MIN { - 241 | warn!( - 242 | concat!( - 243 | "No `tree-sitter.json` file found in your grammar, ", - 244 | "this file is required to generate with ABI {}. ", - 245 | "Using ABI version {} instead.\n", - 246 | "This file can be set up with `tree-sitter init`. ", - 247 | "For more information, see https://tree-sitter.github.io/tree-sitter/cli/init." - 248 | ), - 249 | abi_version, ABI_VERSION_MIN - 250 | ); - 251 | abi_version = ABI_VERSION_MIN; - 252 | } - | - 253 | // Generate the parser and related files. - 254 | let GeneratedParser { - 255 | c_code, - 256 | node_types_json, - 257 | } = generate_parser_for_grammar_with_opts( - 258 | &input_grammar, - 259 | abi_version, - 260 | semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)), - 261 | report_symbol_name, - 262 | optimizations, - 263 | )?; - | - 264 | write_file(&src_path.join("parser.c"), c_code)?; - 265 | write_file(&src_path.join("node-types.json"), node_types_json)?; - 266 | fs::create_dir_all(&header_path)?; - 267 | write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?; - 268 | write_file(&header_path.join("array.h"), ARRAY_HEADER)?; - 269 | write_file(&header_path.join("parser.h"), PARSER_HEADER)?; - | - 270 | Ok(()) - 271 | } - | - 272 | pub fn generate_parser_for_grammar( - 273 | grammar_json: &str, - 274 | semantic_version: Option<(u8, u8, u8)>, - 275 | ) -> GenerateResult<(String, String)> { - 276 | let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); - 277 | let input_grammar = parse_grammar(&grammar_json)?; - 278 | let parser = generate_parser_for_grammar_with_opts( - 279 | &input_grammar, - 280 | LANGUAGE_VERSION, - 281 | semantic_version, - 282 | None, - 283 | OptLevel::empty(), - 284 | )?; - 285 | Ok((input_grammar.name, parser.c_code)) - 286 | } - | - 287 | fn generate_node_types_from_grammar(input_grammar: &InputGrammar) -> GenerateResult { - 288 | let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = - 289 | prepare_grammar(input_grammar)?; - 290 | let variable_info = - 291 | node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?; - | - 292 | #[cfg(feature = "load")] - 293 | let node_types_json = node_types::generate_node_types_json( - 294 | &syntax_grammar, - 295 | &lexical_grammar, - 296 | &simple_aliases, - 297 | &variable_info, - 298 | )?; - 299 | Ok(JSONOutput { - 300 | #[cfg(feature = "load")] - 301 | node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(), - 302 | syntax_grammar, - 303 | lexical_grammar, - 304 | inlines, - 305 | simple_aliases, - 306 | variable_info, - 307 | }) - 308 | } - | - 309 | fn generate_parser_for_grammar_with_opts( - 310 | input_grammar: &InputGrammar, - 311 | abi_version: usize, - 312 | semantic_version: Option<(u8, u8, u8)>, - 313 | report_symbol_name: Option<&str>, - 314 | optimizations: OptLevel, - 315 | ) -> GenerateResult { - 316 | let JSONOutput { - 317 | syntax_grammar, - 318 | lexical_grammar, - 319 | inlines, - 320 | simple_aliases, - 321 | variable_info, - 322 | #[cfg(feature = "load")] - 323 | node_types_json, - 324 | } = generate_node_types_from_grammar(input_grammar)?; - 325 | let supertype_symbol_map = - 326 | node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info); - 327 | let tables = build_tables( - 328 | &syntax_grammar, - 329 | &lexical_grammar, - 330 | &simple_aliases, - 331 | &variable_info, - 332 | &inlines, - 333 | report_symbol_name, - 334 | optimizations, - 335 | )?; - 336 | let c_code = render_c_code( - 337 | &input_grammar.name, - 338 | tables, - 339 | syntax_grammar, - 340 | lexical_grammar, - 341 | simple_aliases, - 342 | abi_version, - 343 | semantic_version, - 344 | supertype_symbol_map, - 345 | ); - 346 | Ok(GeneratedParser { - 347 | c_code, - 348 | #[cfg(feature = "load")] - 349 | node_types_json, - 350 | }) - 351 | } - | - 352 | /// This will read the `tree-sitter.json` config file and attempt to extract the version. - 353 | /// - 354 | /// If the file is not found in the current directory or any of its parent directories, this will - 355 | /// return `None` to maintain backwards compatibility. If the file is found but the version cannot - 356 | /// be parsed as semver, this will return an error. - 357 | #[cfg(feature = "load")] - 358 | fn read_grammar_version(repo_path: &Path) -> Result, ParseVersionError> { - 359 | #[derive(Deserialize)] - 360 | struct TreeSitterJson { - 361 | metadata: Metadata, - 362 | } - | - 363 | #[derive(Deserialize)] - 364 | struct Metadata { - 365 | version: String, - 366 | } - | - 367 | let filename = "tree-sitter.json"; - 368 | let mut path = repo_path.join(filename); - | - 369 | loop { - 370 | let json = path - 371 | .exists() - 372 | .then(|| { - 373 | let contents = fs::read_to_string(path.as_path()).map_err(|e| { - 374 | ParseVersionError::IO(format!("Failed to read `{}` -- {e}", path.display())) - 375 | })?; - 376 | serde_json::from_str::(&contents).map_err(|e| { - 377 | ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display())) - 378 | }) - 379 | }) - 380 | .transpose()?; - 381 | if let Some(json) = json { - 382 | return Version::parse(&json.metadata.version) - 383 | .map_err(|e| { - 384 | ParseVersionError::Version(format!( - 385 | "Failed to parse `{}` version as semver -- {e}", - 386 | path.display() - 387 | )) - 388 | }) - 389 | .map(Some); - 390 | } - 391 | path.pop(); // filename - 392 | if !path.pop() { - 393 | return Ok(None); - 394 | } - 395 | path.push(filename); - 396 | } - 397 | } - | - 398 | #[cfg(feature = "load")] - 399 | pub fn load_grammar_file( - 400 | grammar_path: &Path, - 401 | js_runtime: Option<&str>, - 402 | ) -> LoadGrammarFileResult { - 403 | if grammar_path.is_dir() { - 404 | Err(LoadGrammarError::InvalidPath)?; - 405 | } - 406 | match grammar_path.extension().and_then(|e| e.to_str()) { - 407 | Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?), - 408 | Some("json") => Ok(fs::read_to_string(grammar_path)?), - 409 | _ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?, - 410 | } - 411 | } - | - 412 | #[cfg(feature = "load")] - 413 | fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult { - 414 | let grammar_path = dunce::canonicalize(grammar_path)?; - | - 415 | #[cfg(feature = "qjs-rt")] - 416 | if js_runtime == Some("native") { - 417 | return quickjs::execute_native_runtime(&grammar_path); - 418 | } - | - 419 | // The "file:///" prefix is incompatible with the quickjs runtime, but is required - 420 | // for node and bun - 421 | #[cfg(windows)] - 422 | let grammar_path = PathBuf::from(format!("file:///{}", grammar_path.display())); - | - 423 | let js_runtime = js_runtime.unwrap_or("node"); - | - 424 | let mut js_command = Command::new(js_runtime); - 425 | match js_runtime { - 426 | "node" => { - 427 | js_command.args(["--input-type=module", "-"]); - 428 | } - 429 | "bun" => { - 430 | js_command.arg("-"); - 431 | } - 432 | "deno" => { - 433 | js_command.args(["run", "--allow-all", "-"]); - 434 | } - 435 | _ => {} - 436 | } - | - 437 | let mut js_process = js_command - 438 | .env("TREE_SITTER_GRAMMAR_PATH", grammar_path) - 439 | .stdin(Stdio::piped()) - 440 | .stdout(Stdio::piped()) - 441 | .spawn() - 442 | .map_err(|e| JSError::JSRuntimeSpawn { - 443 | runtime: js_runtime.to_string(), - 444 | error: e.to_string(), - 445 | })?; - | - 446 | let mut js_stdin = js_process - 447 | .stdin - 448 | .take() - 449 | .ok_or_else(|| JSError::IO(format!("Failed to open stdin for `{js_runtime}`")))?; - | - 450 | let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?; - 451 | write!( - 452 | js_stdin, - 453 | "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {}; - 454 | globalThis.TREE_SITTER_CLI_VERSION_MINOR = {}; - 455 | globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};", - 456 | cli_version.major, cli_version.minor, cli_version.patch, - 457 | ) - 458 | .map_err(|e| { - 459 | JSError::IO(format!( - 460 | "Failed to write tree-sitter version to `{js_runtime}`'s stdin -- {e}" - 461 | )) - 462 | })?; - 463 | js_stdin.write(include_bytes!("./dsl.js")).map_err(|e| { - 464 | JSError::IO(format!( - 465 | "Failed to write grammar dsl to `{js_runtime}`'s stdin -- {e}" - 466 | )) - 467 | })?; - 468 | drop(js_stdin); - | - 469 | let output = js_process - 470 | .wait_with_output() - 471 | .map_err(|e| JSError::IO(format!("Failed to read output from `{js_runtime}` -- {e}")))?; - 472 | match output.status.code() { - 473 | Some(0) => { - 474 | let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 { - 475 | runtime: js_runtime.to_string(), - 476 | error: e.to_string(), - 477 | })?; - | - 478 | let mut grammar_json = &stdout[..]; - | - 479 | if let Some(pos) = stdout.rfind('\n') { - 480 | // If there's a newline, split the last line from the rest of the output - 481 | let node_output = &stdout[..pos]; - 482 | grammar_json = &stdout[pos + 1..]; - | - 483 | let mut stdout = std::io::stdout().lock(); - 484 | stdout.write_all(node_output.as_bytes())?; - 485 | stdout.write_all(b"\n")?; - 486 | stdout.flush()?; - 487 | } - | - 488 | Ok(serde_json::to_string_pretty(&serde_json::from_str::< - 489 | serde_json::Value, - 490 | >(grammar_json)?)?) - 491 | } - 492 | Some(code) => Err(JSError::JSRuntimeExit { - 493 | runtime: js_runtime.to_string(), - 494 | code, - 495 | }), - 496 | None => Err(JSError::JSRuntimeExit { - 497 | runtime: js_runtime.to_string(), - 498 | code: -1, - 499 | }), - 500 | } - 501 | } - | - 502 | #[cfg(feature = "load")] - 503 | pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> { - 504 | fs::write(path, body) - 505 | .map_err(|e| GenerateError::IO(format!("Failed to write {:?} -- {e}", path.file_name()))) - 506 | } - | - 507 | #[cfg(test)] - 508 | mod tests { - 509 | use super::{LANGUAGE_VERSION, PARSER_HEADER}; - 510 | #[test] - 511 | fn test_language_versions_are_in_sync() { - 512 | let api_h = include_str!("../../../lib/include/tree_sitter/api.h"); - 513 | let api_language_version = api_h - 514 | .lines() - 515 | .find_map(|line| { - 516 | line.trim() - 517 | .strip_prefix("#define TREE_SITTER_LANGUAGE_VERSION ") - 518 | .and_then(|v| v.parse::().ok()) - 519 | }) - 520 | .expect("Failed to find TREE_SITTER_LANGUAGE_VERSION definition in api.h"); - 521 | assert_eq!(LANGUAGE_VERSION, api_language_version); - 522 | } - | - 523 | #[test] - 524 | fn test_parser_header_in_sync() { - 525 | let parser_h = include_str!("../../../lib/src/parser.h"); - 526 | assert!( - 527 | parser_h == PARSER_HEADER, - 528 | "parser.h.inc is out of sync with lib/src/parser.h. Run: cp lib/src/parser.h crates/generate/src/parser.h.inc" - 529 | ); - 530 | } - 531 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/grammars.rs: --------------------------------------------------------------------------------- - 1 | use std::{collections::HashMap, fmt}; - | - 2 | use super::{ - 3 | nfa::Nfa, - 4 | rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet}, - 5 | }; - | - 6 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] - 7 | pub enum VariableType { - 8 | Hidden, - 9 | Auxiliary, - 10 | Anonymous, - 11 | Named, - 12 | } - | - 13 | // Input grammar - | - 14 | #[derive(Clone, Debug, PartialEq, Eq)] - 15 | pub struct Variable { - 16 | pub name: String, - 17 | pub kind: VariableType, - 18 | pub rule: Rule, - 19 | } - | - 20 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] - 21 | pub enum PrecedenceEntry { - 22 | Name(String), - 23 | Symbol(String), - 24 | } - | - 25 | #[derive(Debug, Default, PartialEq, Eq)] - 26 | pub struct InputGrammar { - 27 | pub name: String, - 28 | pub variables: Vec, - 29 | pub extra_symbols: Vec, - 30 | pub expected_conflicts: Vec>, - 31 | pub precedence_orderings: Vec>, - 32 | pub external_tokens: Vec, - 33 | pub variables_to_inline: Vec, - 34 | pub supertype_symbols: Vec, - 35 | pub word_token: Option, - 36 | pub reserved_words: Vec>, - 37 | } - | - 38 | #[derive(Debug, Default, PartialEq, Eq)] - 39 | pub struct ReservedWordContext { - 40 | pub name: String, - 41 | pub reserved_words: Vec, - 42 | } - | - 43 | // Extracted lexical grammar - | - 44 | #[derive(Debug, PartialEq, Eq)] - 45 | pub struct LexicalVariable { - 46 | pub name: String, - 47 | pub kind: VariableType, - 48 | pub implicit_precedence: i32, - 49 | pub start_state: u32, - 50 | } - | - 51 | #[derive(Debug, Default, PartialEq, Eq)] - 52 | pub struct LexicalGrammar { - 53 | pub nfa: Nfa, - 54 | pub variables: Vec, - 55 | } - | - 56 | // Extracted syntax grammar - | - 57 | #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] - 58 | pub struct ProductionStep { - 59 | pub symbol: Symbol, - 60 | pub precedence: Precedence, - 61 | pub associativity: Option, - 62 | pub alias: Option, - 63 | pub field_name: Option, - 64 | pub reserved_word_set_id: ReservedWordSetId, - 65 | } - | - 66 | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] - 67 | pub struct ReservedWordSetId(pub usize); - | - 68 | impl fmt::Display for ReservedWordSetId { - 69 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - 70 | self.0.fmt(f) - 71 | } - 72 | } - | - 73 | pub const NO_RESERVED_WORDS: ReservedWordSetId = ReservedWordSetId(usize::MAX); - | - 74 | #[derive(Clone, Debug, Default, PartialEq, Eq)] - 75 | pub struct Production { - 76 | pub steps: Vec, - 77 | pub dynamic_precedence: i32, - 78 | } - | - 79 | #[derive(Default)] - 80 | pub struct InlinedProductionMap { - 81 | pub productions: Vec, - 82 | pub production_map: HashMap<(*const Production, u32), Vec>, - 83 | } - | - 84 | #[derive(Clone, Debug, PartialEq, Eq)] - 85 | pub struct SyntaxVariable { - 86 | pub name: String, - 87 | pub kind: VariableType, - 88 | pub productions: Vec, - 89 | } - | - 90 | #[derive(Clone, Debug, PartialEq, Eq)] - 91 | pub struct ExternalToken { - 92 | pub name: String, - 93 | pub kind: VariableType, - 94 | pub corresponding_internal_token: Option, - 95 | } - | - 96 | #[derive(Debug, Default)] - 97 | pub struct SyntaxGrammar { - 98 | pub variables: Vec, - 99 | pub extra_symbols: Vec, - 100 | pub expected_conflicts: Vec>, - 101 | pub external_tokens: Vec, - 102 | pub supertype_symbols: Vec, - 103 | pub variables_to_inline: Vec, - 104 | pub word_token: Option, - 105 | pub precedence_orderings: Vec>, - 106 | pub reserved_word_sets: Vec, - 107 | } - | - 108 | #[cfg(test)] - 109 | impl ProductionStep { - 110 | #[must_use] - 111 | pub fn new(symbol: Symbol) -> Self { - 112 | Self { - 113 | symbol, - 114 | precedence: Precedence::None, - 115 | associativity: None, - 116 | alias: None, - 117 | field_name: None, - 118 | reserved_word_set_id: ReservedWordSetId::default(), - 119 | } - 120 | } - | - 121 | pub fn with_prec( - 122 | mut self, - 123 | precedence: Precedence, - 124 | associativity: Option, - 125 | ) -> Self { - 126 | self.precedence = precedence; - 127 | self.associativity = associativity; - 128 | self - 129 | } - | - 130 | pub fn with_alias(mut self, value: &str, is_named: bool) -> Self { - 131 | self.alias = Some(Alias { - 132 | value: value.to_string(), - 133 | is_named, - 134 | }); - 135 | self - 136 | } - | - 137 | pub fn with_field_name(mut self, name: &str) -> Self { - 138 | self.field_name = Some(name.to_string()); - 139 | self - 140 | } - 141 | } - | - 142 | impl Production { - 143 | pub fn first_symbol(&self) -> Option { - 144 | self.steps.first().map(|s| s.symbol) - 145 | } - 146 | } - | - 147 | #[cfg(test)] - 148 | impl Variable { - 149 | pub fn named(name: &str, rule: Rule) -> Self { - 150 | Self { - 151 | name: name.to_string(), - 152 | kind: VariableType::Named, - 153 | rule, - 154 | } - 155 | } - | - 156 | pub fn auxiliary(name: &str, rule: Rule) -> Self { - 157 | Self { - 158 | name: name.to_string(), - 159 | kind: VariableType::Auxiliary, - 160 | rule, - 161 | } - 162 | } - | - 163 | pub fn hidden(name: &str, rule: Rule) -> Self { - 164 | Self { - 165 | name: name.to_string(), - 166 | kind: VariableType::Hidden, - 167 | rule, - 168 | } - 169 | } - | - 170 | pub fn anonymous(name: &str, rule: Rule) -> Self { - 171 | Self { - 172 | name: name.to_string(), - 173 | kind: VariableType::Anonymous, - 174 | rule, - 175 | } - 176 | } - 177 | } - | - 178 | impl VariableType { - 179 | pub fn is_visible(self) -> bool { - 180 | self == Self::Named || self == Self::Anonymous - 181 | } - 182 | } - | - 183 | impl LexicalGrammar { - 184 | pub fn variable_indices_for_nfa_states<'a>( - 185 | &'a self, - 186 | state_ids: &'a [u32], - 187 | ) -> impl Iterator + 'a { - 188 | let mut prev = None; - 189 | state_ids.iter().filter_map(move |state_id| { - 190 | let variable_id = self.variable_index_for_nfa_state(*state_id); - 191 | if prev == Some(variable_id) { - 192 | None - 193 | } else { - 194 | prev = Some(variable_id); - 195 | prev - 196 | } - 197 | }) - 198 | } - | - 199 | pub fn variable_index_for_nfa_state(&self, state_id: u32) -> usize { - 200 | self.variables - 201 | .iter() - 202 | .position(|v| v.start_state >= state_id) - 203 | .unwrap() - 204 | } - 205 | } - | - 206 | impl SyntaxVariable { - 207 | pub fn is_auxiliary(&self) -> bool { - 208 | self.kind == VariableType::Auxiliary - 209 | } - | - 210 | pub fn is_hidden(&self) -> bool { - 211 | self.kind == VariableType::Hidden || self.kind == VariableType::Auxiliary - 212 | } - 213 | } - | - 214 | impl InlinedProductionMap { - 215 | pub fn inlined_productions<'a>( - 216 | &'a self, - 217 | production: &Production, - 218 | step_index: u32, - 219 | ) -> Option + 'a> { - 220 | self.production_map - 221 | .get(&(std::ptr::from_ref::(production), step_index)) - 222 | .map(|production_indices| { - 223 | production_indices - 224 | .iter() - 225 | .copied() - 226 | .map(move |index| &self.productions[index]) - 227 | }) - 228 | } - 229 | } - | - 230 | impl fmt::Display for PrecedenceEntry { - 231 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - 232 | match self { - 233 | Self::Name(n) => write!(f, "'{n}'"), - 234 | Self::Symbol(s) => write!(f, "$.{s}"), - 235 | } - 236 | } - 237 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/nfa.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | char, - 3 | cmp::{max, Ordering}, - 4 | fmt, - 5 | iter::ExactSizeIterator, - 6 | mem::{self, swap}, - 7 | ops::{Range, RangeInclusive}, - 8 | }; - | - 9 | /// A set of characters represented as a vector of ranges. - 10 | #[derive(Clone, Default, PartialEq, Eq, Hash)] - 11 | pub struct CharacterSet { - 12 | ranges: Vec>, - 13 | } - | - 14 | /// A state in an NFA representing a regular grammar. - 15 | #[derive(Debug, PartialEq, Eq)] - 16 | pub enum NfaState { - 17 | Advance { - 18 | chars: CharacterSet, - 19 | state_id: u32, - 20 | is_sep: bool, - 21 | precedence: i32, - 22 | }, - 23 | Split(u32, u32), - 24 | Accept { - 25 | variable_index: usize, - 26 | precedence: i32, - 27 | }, - 28 | } - | - 29 | #[derive(PartialEq, Eq, Default)] - 30 | pub struct Nfa { - 31 | pub states: Vec, - 32 | } - | - 33 | #[derive(Debug)] - 34 | pub struct NfaCursor<'a> { - 35 | pub(crate) state_ids: Vec, - 36 | nfa: &'a Nfa, - 37 | } - | - 38 | #[derive(Debug, PartialEq, Eq)] - 39 | pub struct NfaTransition { - 40 | pub characters: CharacterSet, - 41 | pub is_separator: bool, - 42 | pub precedence: i32, - 43 | pub states: Vec, - 44 | } - | - 45 | const END: u32 = char::MAX as u32 + 1; - | - 46 | impl CharacterSet { - 47 | /// Create a character set with a single character. - 48 | pub const fn empty() -> Self { - 49 | Self { ranges: Vec::new() } - 50 | } - | - 51 | /// Create a character set with a given *inclusive* range of characters. - 52 | #[allow(clippy::single_range_in_vec_init)] - 53 | #[cfg(test)] - 54 | fn from_range(mut first: char, mut last: char) -> Self { - 55 | if first > last { - 56 | swap(&mut first, &mut last); - 57 | } - 58 | Self { - 59 | ranges: vec![(first as u32)..(last as u32 + 1)], - 60 | } - 61 | } - | - 62 | /// Create a character set with a single character. - 63 | #[allow(clippy::single_range_in_vec_init)] - 64 | pub fn from_char(c: char) -> Self { - 65 | Self { - 66 | ranges: vec![(c as u32)..(c as u32 + 1)], - 67 | } - 68 | } - | - 69 | /// Create a character set containing all characters *not* present - 70 | /// in this character set. - 71 | pub fn negate(mut self) -> Self { - 72 | let mut i = 0; - 73 | let mut previous_end = 0; - 74 | while i < self.ranges.len() { - 75 | let range = &mut self.ranges[i]; - 76 | let start = previous_end; - 77 | previous_end = range.end; - 78 | if start < range.start { - 79 | self.ranges[i] = start..range.start; - 80 | i += 1; - 81 | } else { - 82 | self.ranges.remove(i); - 83 | } - 84 | } - 85 | if previous_end < END { - 86 | self.ranges.push(previous_end..END); - 87 | } - 88 | self - 89 | } - | - 90 | pub fn add_char(mut self, c: char) -> Self { - 91 | self.add_int_range(0, c as u32, c as u32 + 1); - 92 | self - 93 | } - | - 94 | pub fn add_range(mut self, start: char, end: char) -> Self { - 95 | self.add_int_range(0, start as u32, end as u32 + 1); - 96 | self - 97 | } - | - 98 | pub fn add(mut self, other: &Self) -> Self { - 99 | let mut index = 0; - 100 | for range in &other.ranges { - 101 | index = self.add_int_range(index, range.start, range.end); - 102 | } - 103 | self - 104 | } - | - 105 | pub fn assign(&mut self, other: &Self) { - 106 | self.ranges.clear(); - 107 | self.ranges.extend_from_slice(&other.ranges); - 108 | } - | - 109 | fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize { - 110 | while i < self.ranges.len() { - 111 | let range = &mut self.ranges[i]; - 112 | if range.start > end { - 113 | self.ranges.insert(i, start..end); - 114 | return i; - 115 | } - 116 | if range.end >= start { - 117 | range.end = range.end.max(end); - 118 | range.start = range.start.min(start); - | - 119 | // Join this range with the next range if needed. - 120 | while i + 1 < self.ranges.len() && self.ranges[i + 1].start <= self.ranges[i].end { - 121 | self.ranges[i].end = self.ranges[i].end.max(self.ranges[i + 1].end); - 122 | self.ranges.remove(i + 1); - 123 | } - | - 124 | return i; - 125 | } - 126 | i += 1; - 127 | } - 128 | self.ranges.push(start..end); - 129 | i - 130 | } - | - 131 | pub fn does_intersect(&self, other: &Self) -> bool { - 132 | let mut left_ranges = self.ranges.iter(); - 133 | let mut right_ranges = other.ranges.iter(); - 134 | let mut left_range = left_ranges.next(); - 135 | let mut right_range = right_ranges.next(); - 136 | while let (Some(left), Some(right)) = (&left_range, &right_range) { - 137 | if left.end <= right.start { - 138 | left_range = left_ranges.next(); - 139 | } else if left.start >= right.end { - 140 | right_range = right_ranges.next(); - 141 | } else { - 142 | return true; - 143 | } - 144 | } - 145 | false - 146 | } - | - 147 | /// Get the set of characters that are present in both this set - 148 | /// and the other set. Remove those common characters from both - 149 | /// of the operands. - 150 | pub fn remove_intersection(&mut self, other: &mut Self) -> Self { - 151 | let mut intersection = Vec::new(); - 152 | let mut left_i = 0; - 153 | let mut right_i = 0; - 154 | while left_i < self.ranges.len() && right_i < other.ranges.len() { - 155 | let left = &mut self.ranges[left_i]; - 156 | let right = &mut other.ranges[right_i]; - | - 157 | match left.start.cmp(&right.start) { - 158 | Ordering::Less => { - 159 | // [ L ] - 160 | // [ R ] - 161 | if left.end <= right.start { - 162 | left_i += 1; - 163 | continue; - 164 | } - | - 165 | match left.end.cmp(&right.end) { - 166 | // [ L ] - 167 | // [ R ] - 168 | Ordering::Less => { - 169 | intersection.push(right.start..left.end); - 170 | swap(&mut left.end, &mut right.start); - 171 | left_i += 1; - 172 | } - | - 173 | // [ L ] - 174 | // [ R ] - 175 | Ordering::Equal => { - 176 | intersection.push(right.clone()); - 177 | left.end = right.start; - 178 | other.ranges.remove(right_i); - 179 | } - | - 180 | // [ L ] - 181 | // [ R ] - 182 | Ordering::Greater => { - 183 | intersection.push(right.clone()); - 184 | let new_range = left.start..right.start; - 185 | left.start = right.end; - 186 | self.ranges.insert(left_i, new_range); - 187 | other.ranges.remove(right_i); - 188 | left_i += 1; - 189 | } - 190 | } - 191 | } - 192 | // [ L ] - 193 | // [ R ] - 194 | Ordering::Equal if left.end < right.end => { - 195 | intersection.push(left.start..left.end); - 196 | right.start = left.end; - 197 | self.ranges.remove(left_i); - 198 | } - 199 | // [ L ] - 200 | // [ R ] - 201 | Ordering::Equal if left.end == right.end => { - 202 | intersection.push(left.clone()); - 203 | self.ranges.remove(left_i); - 204 | other.ranges.remove(right_i); - 205 | } - 206 | // [ L ] - 207 | // [ R ] - 208 | Ordering::Equal if left.end > right.end => { - 209 | intersection.push(right.clone()); - 210 | left.start = right.end; - 211 | other.ranges.remove(right_i); - 212 | } - 213 | Ordering::Equal => {} - 214 | Ordering::Greater => { - 215 | // [ L ] - 216 | // [ R ] - 217 | if left.start >= right.end { - 218 | right_i += 1; - 219 | continue; - 220 | } - | - 221 | match left.end.cmp(&right.end) { - 222 | // [ L ] - 223 | // [ R ] - 224 | Ordering::Less => { - 225 | intersection.push(left.clone()); - 226 | let new_range = right.start..left.start; - 227 | right.start = left.end; - 228 | other.ranges.insert(right_i, new_range); - 229 | self.ranges.remove(left_i); - 230 | right_i += 1; - 231 | } - | - 232 | // [ L ] - 233 | // [ R ] - 234 | Ordering::Equal => { - 235 | intersection.push(left.clone()); - 236 | right.end = left.start; - 237 | self.ranges.remove(left_i); - 238 | } - | - 239 | // [ L ] - 240 | // [ R ] - 241 | Ordering::Greater => { - 242 | intersection.push(left.start..right.end); - 243 | swap(&mut left.start, &mut right.end); - 244 | right_i += 1; - 245 | } - 246 | } - 247 | } - 248 | } - 249 | } - 250 | Self { - 251 | ranges: intersection, - 252 | } - 253 | } - | - 254 | /// Produces a `CharacterSet` containing every character in `self` that is not present in - 255 | /// `other`. - 256 | pub fn difference(mut self, mut other: Self) -> Self { - 257 | self.remove_intersection(&mut other); - 258 | self - 259 | } - | - 260 | /// Produces a `CharacterSet` containing every character that is in _exactly one_ of `self` or - 261 | /// `other`, but is not present in both sets. - 262 | #[cfg(test)] - 263 | fn symmetric_difference(mut self, mut other: Self) -> Self { - 264 | self.remove_intersection(&mut other); - 265 | self.add(&other) - 266 | } - | - 267 | pub fn char_codes(&self) -> impl Iterator + '_ { - 268 | self.ranges.iter().flat_map(Clone::clone) - 269 | } - | - 270 | pub fn chars(&self) -> impl Iterator + '_ { - 271 | self.char_codes().filter_map(char::from_u32) - 272 | } - | - 273 | pub fn range_count(&self) -> usize { - 274 | self.ranges.len() - 275 | } - | - 276 | pub fn ranges(&self) -> impl Iterator> + '_ { - 277 | self.ranges.iter().filter_map(|range| { - 278 | let start = range.clone().find_map(char::from_u32)?; - 279 | let end = (range.start..range.end).rev().find_map(char::from_u32)?; - 280 | Some(start..=end) - 281 | }) - 282 | } - | - 283 | pub fn is_empty(&self) -> bool { - 284 | self.ranges.is_empty() - 285 | } - | - 286 | /// Get a reduced list of character ranges, assuming that a given - 287 | /// set of characters can be safely ignored. - 288 | pub fn simplify_ignoring(&self, ruled_out_characters: &Self) -> Self { - 289 | let mut prev_range: Option> = None; - 290 | Self { - 291 | ranges: self - 292 | .ranges - 293 | .iter() - 294 | .map(|range| Some(range.clone())) - 295 | .chain([None]) - 296 | .filter_map(move |range| { - 297 | if let Some(range) = &range { - 298 | if ruled_out_characters.contains_codepoint_range(range.clone()) { - 299 | return None; - 300 | } - | - 301 | if let Some(prev_range) = &mut prev_range { - 302 | if ruled_out_characters - 303 | .contains_codepoint_range(prev_range.end..range.start) - 304 | { - 305 | prev_range.end = range.end; - 306 | return None; - 307 | } - 308 | } - 309 | } - | - 310 | let result = prev_range.clone(); - 311 | prev_range = range; - 312 | result - 313 | }) - 314 | .collect(), - 315 | } - 316 | } - | - 317 | pub fn contains_codepoint_range(&self, seek_range: Range) -> bool { - 318 | let ix = match self.ranges.binary_search_by(|probe| { - 319 | if probe.end <= seek_range.start { - 320 | Ordering::Less - 321 | } else if probe.start > seek_range.start { - 322 | Ordering::Greater - 323 | } else { - 324 | Ordering::Equal - 325 | } - 326 | }) { - 327 | Ok(ix) | Err(ix) => ix, - 328 | }; - 329 | self.ranges - 330 | .get(ix) - 331 | .is_some_and(|range| range.start <= seek_range.start && range.end >= seek_range.end) - 332 | } - | - 333 | pub fn contains(&self, c: char) -> bool { - 334 | self.contains_codepoint_range(c as u32..c as u32 + 1) - 335 | } - 336 | } - | - 337 | impl Ord for CharacterSet { - 338 | fn cmp(&self, other: &Self) -> Ordering { - 339 | let count_cmp = self - 340 | .ranges - 341 | .iter() - 342 | .map(ExactSizeIterator::len) - 343 | .sum::() - 344 | .cmp(&other.ranges.iter().map(ExactSizeIterator::len).sum()); - 345 | if count_cmp != Ordering::Equal { - 346 | return count_cmp; - 347 | } - | - 348 | for (left_range, right_range) in self.ranges.iter().zip(other.ranges.iter()) { - 349 | let cmp = left_range.len().cmp(&right_range.len()); - 350 | if cmp != Ordering::Equal { - 351 | return cmp; - 352 | } - | - 353 | for (left, right) in left_range.clone().zip(right_range.clone()) { - 354 | let cmp = left.cmp(&right); - 355 | if cmp != Ordering::Equal { - 356 | return cmp; - 357 | } - 358 | } - 359 | } - 360 | Ordering::Equal - 361 | } - 362 | } - | - 363 | impl PartialOrd for CharacterSet { - 364 | fn partial_cmp(&self, other: &Self) -> Option { - 365 | Some(self.cmp(other)) - 366 | } - 367 | } - | - 368 | impl fmt::Debug for CharacterSet { - 369 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - 370 | write!(f, "CharacterSet [")?; - 371 | let mut set = self.clone(); - 372 | if self.contains(char::MAX) { - 373 | write!(f, "^ ")?; - 374 | set = set.negate(); - 375 | } - 376 | for (i, range) in set.ranges().enumerate() { - 377 | if i > 0 { - 378 | write!(f, ", ")?; - 379 | } - 380 | write!(f, "{range:?}")?; - 381 | } - 382 | write!(f, "]")?; - 383 | Ok(()) - 384 | } - 385 | } - | - 386 | impl Nfa { - 387 | #[must_use] - 388 | pub const fn new() -> Self { - 389 | Self { states: Vec::new() } - 390 | } - | - 391 | pub fn last_state_id(&self) -> u32 { - 392 | assert!(!self.states.is_empty()); - 393 | self.states.len() as u32 - 1 - 394 | } - 395 | } - | - 396 | impl fmt::Debug for Nfa { - 397 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - 398 | writeln!(f, "Nfa {{ states: {{")?; - 399 | for (i, state) in self.states.iter().enumerate() { - 400 | writeln!(f, " {i}: {state:?},")?; - 401 | } - 402 | write!(f, "}} }}")?; - 403 | Ok(()) - 404 | } - 405 | } - | - 406 | impl<'a> NfaCursor<'a> { - 407 | pub fn new(nfa: &'a Nfa, mut states: Vec) -> Self { - 408 | let mut result = Self { - 409 | nfa, - 410 | state_ids: Vec::new(), - 411 | }; - 412 | result.add_states(&mut states); - 413 | result - 414 | } - | - 415 | pub fn reset(&mut self, mut states: Vec) { - 416 | self.state_ids.clear(); - 417 | self.add_states(&mut states); - 418 | } - | - 419 | pub fn force_reset(&mut self, states: Vec) { - 420 | self.state_ids = states; - 421 | } - | - 422 | pub fn transition_chars(&self) -> impl Iterator { - 423 | self.raw_transitions().map(|t| (t.0, t.1)) - 424 | } - | - 425 | pub fn transitions(&self) -> Vec { - 426 | Self::group_transitions(self.raw_transitions()) - 427 | } - | - 428 | fn raw_transitions(&self) -> impl Iterator { - 429 | self.state_ids.iter().filter_map(move |id| { - 430 | if let NfaState::Advance { - 431 | chars, - 432 | state_id, - 433 | precedence, - 434 | is_sep, - 435 | } = &self.nfa.states[*id as usize] - 436 | { - 437 | Some((chars, *is_sep, *precedence, *state_id)) - 438 | } else { - 439 | None - 440 | } - 441 | }) - 442 | } - | - 443 | fn group_transitions<'b>( - 444 | iter: impl Iterator, - 445 | ) -> Vec { - 446 | let mut result = Vec::::new(); - 447 | for (chars, is_sep, prec, state) in iter { - 448 | let mut chars = chars.clone(); - 449 | let mut i = 0; - 450 | while i < result.len() && !chars.is_empty() { - 451 | let intersection = result[i].characters.remove_intersection(&mut chars); - 452 | if !intersection.is_empty() { - 453 | let mut intersection_states = result[i].states.clone(); - 454 | if let Err(j) = intersection_states.binary_search(&state) { - 455 | intersection_states.insert(j, state); - 456 | } - 457 | let intersection_transition = NfaTransition { - 458 | characters: intersection, - 459 | is_separator: result[i].is_separator && is_sep, - 460 | precedence: max(result[i].precedence, prec), - 461 | states: intersection_states, - 462 | }; - 463 | if result[i].characters.is_empty() { - 464 | result[i] = intersection_transition; - 465 | } else { - 466 | result.insert(i, intersection_transition); - 467 | i += 1; - 468 | } - 469 | } - 470 | i += 1; - 471 | } - 472 | if !chars.is_empty() { - 473 | result.push(NfaTransition { - 474 | characters: chars, - 475 | precedence: prec, - 476 | states: vec![state], - 477 | is_separator: is_sep, - 478 | }); - 479 | } - 480 | } - | - 481 | let mut i = 0; - 482 | while i < result.len() { - 483 | for j in 0..i { - 484 | if result[j].states == result[i].states - 485 | && result[j].is_separator == result[i].is_separator - 486 | && result[j].precedence == result[i].precedence - 487 | { - 488 | let characters = mem::take(&mut result[j].characters); - 489 | result[j].characters = characters.add(&result[i].characters); - 490 | result.remove(i); - 491 | i -= 1; - 492 | break; - 493 | } - 494 | } - 495 | i += 1; - 496 | } - | - 497 | result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters)); - 498 | result - 499 | } - | - 500 | pub fn completions(&self) -> impl Iterator + '_ { - 501 | self.state_ids.iter().filter_map(move |state_id| { - 502 | if let NfaState::Accept { - 503 | variable_index, - 504 | precedence, - 505 | } = self.nfa.states[*state_id as usize] - 506 | { - 507 | Some((variable_index, precedence)) - 508 | } else { - 509 | None - 510 | } - 511 | }) - 512 | } - | - 513 | pub fn add_states(&mut self, new_state_ids: &mut Vec) { - 514 | let mut i = 0; - 515 | while i < new_state_ids.len() { - 516 | let state_id = new_state_ids[i]; - 517 | let state = &self.nfa.states[state_id as usize]; - 518 | if let NfaState::Split(left, right) = state { - 519 | let mut has_left = false; - 520 | let mut has_right = false; - 521 | for new_state_id in new_state_ids.iter() { - 522 | if *new_state_id == *left { - 523 | has_left = true; - 524 | } - 525 | if *new_state_id == *right { - 526 | has_right = true; - 527 | } - 528 | } - 529 | if !has_left { - 530 | new_state_ids.push(*left); - 531 | } - 532 | if !has_right { - 533 | new_state_ids.push(*right); - 534 | } - 535 | } else if let Err(i) = self.state_ids.binary_search(&state_id) { - 536 | self.state_ids.insert(i, state_id); - 537 | } - 538 | i += 1; - 539 | } - 540 | } - 541 | } - | - 542 | #[cfg(test)] - 543 | mod tests { - 544 | use super::*; - | - 545 | #[test] - 546 | fn test_adding_ranges() { - 547 | let mut set = CharacterSet::empty() - 548 | .add_range('c', 'm') - 549 | .add_range('q', 's'); - | - 550 | // within existing range - 551 | set = set.add_char('d'); - 552 | assert_eq!( - 553 | set, - 554 | CharacterSet::empty() - 555 | .add_range('c', 'm') - 556 | .add_range('q', 's') - 557 | ); - | - 558 | // at end of existing range - 559 | set = set.add_char('m'); - 560 | assert_eq!( - 561 | set, - 562 | CharacterSet::empty() - 563 | .add_range('c', 'm') - 564 | .add_range('q', 's') - 565 | ); - | - 566 | // adjacent to end of existing range - 567 | set = set.add_char('n'); - 568 | assert_eq!( - 569 | set, - 570 | CharacterSet::empty() - 571 | .add_range('c', 'n') - 572 | .add_range('q', 's') - 573 | ); - | - 574 | // filling gap between existing ranges - 575 | set = set.add_range('o', 'p'); - 576 | assert_eq!(set, CharacterSet::empty().add_range('c', 's')); - | - 577 | set = CharacterSet::empty() - 578 | .add_range('c', 'f') - 579 | .add_range('i', 'l') - 580 | .add_range('n', 'r'); - 581 | set = set.add_range('d', 'o'); - 582 | assert_eq!(set, CharacterSet::empty().add_range('c', 'r')); - 583 | } - | - 584 | #[test] - 585 | fn test_adding_sets() { - 586 | let set1 = CharacterSet::empty() - 587 | .add_range('c', 'f') - 588 | .add_range('i', 'l'); - 589 | let set2 = CharacterSet::empty().add_range('b', 'g').add_char('h'); - 590 | assert_eq!( - 591 | set1.add(&set2), - 592 | CharacterSet::empty() - 593 | .add_range('b', 'g') - 594 | .add_range('h', 'l') - 595 | ); - 596 | } - | - 597 | #[test] - 598 | fn test_group_transitions() { - 599 | let table = [ - 600 | // overlapping character classes - 601 | ( - 602 | vec![ - 603 | (CharacterSet::empty().add_range('a', 'f'), false, 0, 1), - 604 | (CharacterSet::empty().add_range('d', 'i'), false, 1, 2), - 605 | ], - 606 | vec![ - 607 | NfaTransition { - 608 | characters: CharacterSet::empty().add_range('a', 'c'), - 609 | is_separator: false, - 610 | precedence: 0, - 611 | states: vec![1], - 612 | }, - 613 | NfaTransition { - 614 | characters: CharacterSet::empty().add_range('d', 'f'), - 615 | is_separator: false, - 616 | precedence: 1, - 617 | states: vec![1, 2], - 618 | }, - 619 | NfaTransition { - 620 | characters: CharacterSet::empty().add_range('g', 'i'), - 621 | is_separator: false, - 622 | precedence: 1, - 623 | states: vec![2], - 624 | }, - 625 | ], - 626 | ), - 627 | // large character class followed by many individual characters - 628 | ( - 629 | vec![ - 630 | (CharacterSet::empty().add_range('a', 'z'), false, 0, 1), - 631 | (CharacterSet::empty().add_char('d'), false, 0, 2), - 632 | (CharacterSet::empty().add_char('i'), false, 0, 3), - 633 | (CharacterSet::empty().add_char('f'), false, 0, 4), - 634 | ], - 635 | vec![ - 636 | NfaTransition { - 637 | characters: CharacterSet::empty().add_char('d'), - 638 | is_separator: false, - 639 | precedence: 0, - 640 | states: vec![1, 2], - 641 | }, - 642 | NfaTransition { - 643 | characters: CharacterSet::empty().add_char('f'), - 644 | is_separator: false, - 645 | precedence: 0, - 646 | states: vec![1, 4], - 647 | }, - 648 | NfaTransition { - 649 | characters: CharacterSet::empty().add_char('i'), - 650 | is_separator: false, - 651 | precedence: 0, - 652 | states: vec![1, 3], - 653 | }, - 654 | NfaTransition { - 655 | characters: CharacterSet::empty() - 656 | .add_range('a', 'c') - 657 | .add_char('e') - 658 | .add_range('g', 'h') - 659 | .add_range('j', 'z'), - 660 | is_separator: false, - 661 | precedence: 0, - 662 | states: vec![1], - 663 | }, - 664 | ], - 665 | ), - 666 | // negated character class followed by an individual character - 667 | ( - 668 | vec![ - 669 | (CharacterSet::empty().add_char('0'), false, 0, 1), - 670 | (CharacterSet::empty().add_char('b'), false, 0, 2), - 671 | ( - 672 | CharacterSet::empty().add_range('a', 'f').negate(), - 673 | false, - 674 | 0, - 675 | 3, - 676 | ), - 677 | (CharacterSet::empty().add_char('c'), false, 0, 4), - 678 | ], - 679 | vec![ - 680 | NfaTransition { - 681 | characters: CharacterSet::empty().add_char('0'), - 682 | precedence: 0, - 683 | states: vec![1, 3], - 684 | is_separator: false, - 685 | }, - 686 | NfaTransition { - 687 | characters: CharacterSet::empty().add_char('b'), - 688 | precedence: 0, - 689 | states: vec![2], - 690 | is_separator: false, - 691 | }, - 692 | NfaTransition { - 693 | characters: CharacterSet::empty().add_char('c'), - 694 | precedence: 0, - 695 | states: vec![4], - 696 | is_separator: false, - 697 | }, - 698 | NfaTransition { - 699 | characters: CharacterSet::empty() - 700 | .add_range('a', 'f') - 701 | .add_char('0') - 702 | .negate(), - 703 | precedence: 0, - 704 | states: vec![3], - 705 | is_separator: false, - 706 | }, - 707 | ], - 708 | ), - 709 | // multiple negated character classes - 710 | ( - 711 | vec![ - 712 | (CharacterSet::from_char('a'), false, 0, 1), - 713 | (CharacterSet::from_range('a', 'c').negate(), false, 0, 2), - 714 | (CharacterSet::from_char('g'), false, 0, 6), - 715 | (CharacterSet::from_range('d', 'f').negate(), false, 0, 3), - 716 | (CharacterSet::from_range('g', 'i').negate(), false, 0, 4), - 717 | (CharacterSet::from_char('g'), false, 0, 5), - 718 | ], - 719 | vec![ - 720 | NfaTransition { - 721 | characters: CharacterSet::from_char('a'), - 722 | precedence: 0, - 723 | states: vec![1, 3, 4], - 724 | is_separator: false, - 725 | }, - 726 | NfaTransition { - 727 | characters: CharacterSet::from_char('g'), - 728 | precedence: 0, - 729 | states: vec![2, 3, 5, 6], - 730 | is_separator: false, - 731 | }, - 732 | NfaTransition { - 733 | characters: CharacterSet::from_range('b', 'c'), - 734 | precedence: 0, - 735 | states: vec![3, 4], - 736 | is_separator: false, - 737 | }, - 738 | NfaTransition { - 739 | characters: CharacterSet::from_range('h', 'i'), - 740 | precedence: 0, - 741 | states: vec![2, 3], - 742 | is_separator: false, - 743 | }, - 744 | NfaTransition { - 745 | characters: CharacterSet::from_range('d', 'f'), - 746 | precedence: 0, - 747 | states: vec![2, 4], - 748 | is_separator: false, - 749 | }, - 750 | NfaTransition { - 751 | characters: CharacterSet::from_range('a', 'i').negate(), - 752 | precedence: 0, - 753 | states: vec![2, 3, 4], - 754 | is_separator: false, - 755 | }, - 756 | ], - 757 | ), - 758 | // disjoint characters with same state - 759 | ( - 760 | vec![ - 761 | (CharacterSet::from_char('a'), false, 0, 1), - 762 | (CharacterSet::from_char('b'), false, 0, 2), - 763 | (CharacterSet::from_char('c'), false, 0, 1), - 764 | (CharacterSet::from_char('d'), false, 0, 1), - 765 | (CharacterSet::from_char('e'), false, 0, 2), - 766 | ], - 767 | vec![ - 768 | NfaTransition { - 769 | characters: CharacterSet::empty().add_char('b').add_char('e'), - 770 | precedence: 0, - 771 | states: vec![2], - 772 | is_separator: false, - 773 | }, - 774 | NfaTransition { - 775 | characters: CharacterSet::empty().add_char('a').add_range('c', 'd'), - 776 | precedence: 0, - 777 | states: vec![1], - 778 | is_separator: false, - 779 | }, - 780 | ], - 781 | ), - 782 | ]; - | - 783 | for (i, row) in table.iter().enumerate() { - 784 | assert_eq!( - 785 | NfaCursor::group_transitions( - 786 | row.0 - 787 | .iter() - 788 | .map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state)) - 789 | ), - 790 | row.1, - 791 | "row {i}", - 792 | ); - 793 | } - 794 | } - | - 795 | #[test] - 796 | fn test_character_set_intersection_difference_ops() { - 797 | struct Row { - 798 | left: CharacterSet, - 799 | right: CharacterSet, - 800 | left_only: CharacterSet, - 801 | right_only: CharacterSet, - 802 | intersection: CharacterSet, - 803 | } - | - 804 | let rows = [ - 805 | // [ L ] - 806 | // [ R ] - 807 | Row { - 808 | left: CharacterSet::from_range('a', 'f'), - 809 | right: CharacterSet::from_range('g', 'm'), - 810 | left_only: CharacterSet::from_range('a', 'f'), - 811 | right_only: CharacterSet::from_range('g', 'm'), - 812 | intersection: CharacterSet::empty(), - 813 | }, - 814 | // [ L ] - 815 | // [ R ] - 816 | Row { - 817 | left: CharacterSet::from_range('a', 'f'), - 818 | right: CharacterSet::from_range('c', 'i'), - 819 | left_only: CharacterSet::from_range('a', 'b'), - 820 | right_only: CharacterSet::from_range('g', 'i'), - 821 | intersection: CharacterSet::from_range('c', 'f'), - 822 | }, - 823 | // [ L ] - 824 | // [ R ] - 825 | Row { - 826 | left: CharacterSet::from_range('a', 'f'), - 827 | right: CharacterSet::from_range('d', 'f'), - 828 | left_only: CharacterSet::from_range('a', 'c'), - 829 | right_only: CharacterSet::empty(), - 830 | intersection: CharacterSet::from_range('d', 'f'), - 831 | }, - 832 | // [ L ] - 833 | // [ R ] - 834 | Row { - 835 | left: CharacterSet::from_range('a', 'm'), - 836 | right: CharacterSet::from_range('d', 'f'), - 837 | left_only: CharacterSet::empty() - 838 | .add_range('a', 'c') - 839 | .add_range('g', 'm'), - 840 | right_only: CharacterSet::empty(), - 841 | intersection: CharacterSet::from_range('d', 'f'), - 842 | }, - 843 | // [ L ] - 844 | // [R] - 845 | Row { - 846 | left: CharacterSet::from_range(',', '/'), - 847 | right: CharacterSet::from_char('/'), - 848 | left_only: CharacterSet::from_range(',', '.'), - 849 | right_only: CharacterSet::empty(), - 850 | intersection: CharacterSet::from_char('/'), - 851 | }, - 852 | // [ L ] - 853 | // [R] - 854 | Row { - 855 | left: CharacterSet::from_range(',', '/'), - 856 | right: CharacterSet::from_char('/'), - 857 | left_only: CharacterSet::from_range(',', '.'), - 858 | right_only: CharacterSet::empty(), - 859 | intersection: CharacterSet::from_char('/'), - 860 | }, - 861 | // [ L1 ] [ L2 ] - 862 | // [ R ] - 863 | Row { - 864 | left: CharacterSet::empty() - 865 | .add_range('a', 'e') - 866 | .add_range('h', 'l'), - 867 | right: CharacterSet::from_range('c', 'i'), - 868 | left_only: CharacterSet::empty() - 869 | .add_range('a', 'b') - 870 | .add_range('j', 'l'), - 871 | right_only: CharacterSet::from_range('f', 'g'), - 872 | intersection: CharacterSet::empty() - 873 | .add_range('c', 'e') - 874 | .add_range('h', 'i'), - 875 | }, - 876 | ]; - | - 877 | for (i, row) in rows.iter().enumerate() { - 878 | let mut left = row.left.clone(); - 879 | let mut right = row.right.clone(); - 880 | assert_eq!( - 881 | left.remove_intersection(&mut right), - 882 | row.intersection, - 883 | "row {i}a: {:?} && {:?}", - 884 | row.left, - 885 | row.right - 886 | ); - 887 | assert_eq!( - 888 | left, row.left_only, - 889 | "row {i}a: {:?} - {:?}", - 890 | row.left, row.right - 891 | ); - 892 | assert_eq!( - 893 | right, row.right_only, - 894 | "row {i}a: {:?} - {:?}", - 895 | row.right, row.left - 896 | ); - | - 897 | let mut left = row.left.clone(); - 898 | let mut right = row.right.clone(); - 899 | assert_eq!( - 900 | right.remove_intersection(&mut left), - 901 | row.intersection, - 902 | "row {i}b: {:?} && {:?}", - 903 | row.left, - 904 | row.right - 905 | ); - 906 | assert_eq!( - 907 | left, row.left_only, - 908 | "row {i}b: {:?} - {:?}", - 909 | row.left, row.right - 910 | ); - 911 | assert_eq!( - 912 | right, row.right_only, - 913 | "row {i}b: {:?} - {:?}", - 914 | row.right, row.left - 915 | ); - | - 916 | assert_eq!( - 917 | row.left.clone().difference(row.right.clone()), - 918 | row.left_only, - 919 | "row {i}b: {:?} -- {:?}", - 920 | row.left, - 921 | row.right - 922 | ); - | - 923 | let symm_difference = row.left_only.clone().add(&row.right_only); - 924 | assert_eq!( - 925 | row.left.clone().symmetric_difference(row.right.clone()), - 926 | symm_difference, - 927 | "row {i}b: {:?} ~~ {:?}", - 928 | row.left, - 929 | row.right - 930 | ); - 931 | } - 932 | } - | - 933 | #[test] - 934 | fn test_character_set_does_intersect() { - 935 | let (a, b) = (CharacterSet::empty(), CharacterSet::empty()); - 936 | assert!(!a.does_intersect(&b)); - 937 | assert!(!b.does_intersect(&a)); - | - 938 | let (a, b) = ( - 939 | CharacterSet::empty().add_char('a'), - 940 | CharacterSet::empty().add_char('a'), - 941 | ); - 942 | assert!(a.does_intersect(&b)); - 943 | assert!(b.does_intersect(&a)); - | - 944 | let (a, b) = ( - 945 | CharacterSet::empty().add_char('b'), - 946 | CharacterSet::empty().add_char('a').add_char('c'), - 947 | ); - 948 | assert!(!a.does_intersect(&b)); - 949 | assert!(!b.does_intersect(&a)); - | - 950 | let (a, b) = ( - 951 | CharacterSet::from_char('b'), - 952 | CharacterSet::from_range('a', 'c'), - 953 | ); - 954 | assert!(a.does_intersect(&b)); - 955 | assert!(b.does_intersect(&a)); - | - 956 | let (a, b) = ( - 957 | CharacterSet::from_char('b'), - 958 | CharacterSet::from_range('a', 'c').negate(), - 959 | ); - 960 | assert!(!a.does_intersect(&b)); - 961 | assert!(!b.does_intersect(&a)); - | - 962 | let (a, b) = ( - 963 | CharacterSet::from_char('a').negate(), - 964 | CharacterSet::from_char('a').negate(), - 965 | ); - 966 | assert!(a.does_intersect(&b)); - 967 | assert!(b.does_intersect(&a)); - | - 968 | let (a, b) = ( - 969 | CharacterSet::from_char('c'), - 970 | CharacterSet::from_char('a').negate(), - 971 | ); - 972 | assert!(a.does_intersect(&b)); - 973 | assert!(b.does_intersect(&a)); - | - 974 | let (a, b) = ( - 975 | CharacterSet::from_range('c', 'f'), - 976 | CharacterSet::from_char('f'), - 977 | ); - 978 | assert!(a.does_intersect(&b)); - 979 | assert!(b.does_intersect(&a)); - 980 | } - | - 981 | #[test] - 982 | #[allow(clippy::single_range_in_vec_init)] - 983 | fn test_character_set_simplify_ignoring() { - 984 | struct Row { - 985 | chars: Vec, - 986 | ruled_out_chars: Vec, - 987 | expected_ranges: Vec>, - 988 | } - | - 989 | let table = [ - 990 | Row { - 991 | chars: vec!['a'], - 992 | ruled_out_chars: vec![], - 993 | expected_ranges: vec!['a'..'a'], - 994 | }, - 995 | Row { - 996 | chars: vec!['a', 'b', 'c', 'e', 'z'], - 997 | ruled_out_chars: vec![], - 998 | expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'], - 999 | }, -1000 | Row { -1001 | chars: vec!['a', 'b', 'c', 'e', 'h', 'z'], -1002 | ruled_out_chars: vec!['d', 'f', 'g'], -1003 | expected_ranges: vec!['a'..'h', 'z'..'z'], -1004 | }, -1005 | Row { -1006 | chars: vec!['a', 'b', 'c', 'g', 'h', 'i'], -1007 | ruled_out_chars: vec!['d', 'j'], -1008 | expected_ranges: vec!['a'..'c', 'g'..'i'], -1009 | }, -1010 | Row { -1011 | chars: vec!['c', 'd', 'e', 'g', 'h'], -1012 | ruled_out_chars: vec!['a', 'b', 'c', 'd', 'e', 'f'], -1013 | expected_ranges: vec!['g'..'h'], -1014 | }, -1015 | Row { -1016 | chars: vec!['I', 'N'], -1017 | ruled_out_chars: vec!['A', 'I', 'N', 'Z'], -1018 | expected_ranges: vec![], -1019 | }, -1020 | ]; - | -1021 | for Row { -1022 | chars, -1023 | ruled_out_chars, -1024 | expected_ranges, -1025 | } in &table -1026 | { -1027 | let ruled_out_chars = ruled_out_chars -1028 | .iter() -1029 | .fold(CharacterSet::empty(), |set, c| set.add_char(*c)); -1030 | let mut set = CharacterSet::empty(); -1031 | for c in chars { -1032 | set = set.add_char(*c); -1033 | } -1034 | let actual = set.simplify_ignoring(&ruled_out_chars); -1035 | let expected = expected_ranges -1036 | .iter() -1037 | .fold(CharacterSet::empty(), |set, range| { -1038 | set.add_range(range.start, range.end) -1039 | }); -1040 | assert_eq!( -1041 | actual, expected, -1042 | "chars: {chars:?}, ruled out chars: {ruled_out_chars:?}" -1043 | ); -1044 | } -1045 | } -1046 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/node_types.rs: --------------------------------------------------------------------------------- - 1 | use std::collections::{BTreeMap, HashMap, HashSet}; - | - 2 | use anyhow::Result; - 3 | use serde::Serialize; - 4 | use thiserror::Error; - | - 5 | use super::{ - 6 | grammars::{LexicalGrammar, SyntaxGrammar, VariableType}, - 7 | rules::{Alias, AliasMap, Symbol, SymbolType}, - 8 | }; - | - 9 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] - 10 | pub enum ChildType { - 11 | Normal(Symbol), - 12 | Aliased(Alias), - 13 | } - | - 14 | #[derive(Clone, Debug, Default, PartialEq, Eq)] - 15 | pub struct FieldInfo { - 16 | pub quantity: ChildQuantity, - 17 | pub types: Vec, - 18 | } - | - 19 | #[derive(Clone, Debug, Default, PartialEq, Eq)] - 20 | pub struct VariableInfo { - 21 | pub fields: HashMap, - 22 | pub children: FieldInfo, - 23 | pub children_without_fields: FieldInfo, - 24 | pub has_multi_step_production: bool, - 25 | } - | - 26 | #[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)] - 27 | #[cfg(feature = "load")] - 28 | pub struct NodeInfoJSON { - 29 | #[serde(rename = "type")] - 30 | kind: String, - 31 | named: bool, - 32 | #[serde(skip_serializing_if = "std::ops::Not::not")] - 33 | root: bool, - 34 | #[serde(skip_serializing_if = "std::ops::Not::not")] - 35 | extra: bool, - 36 | #[serde(skip_serializing_if = "Option::is_none")] - 37 | fields: Option>, - 38 | #[serde(skip_serializing_if = "Option::is_none")] - 39 | children: Option, - 40 | #[serde(skip_serializing_if = "Option::is_none")] - 41 | subtypes: Option>, - 42 | } - | - 43 | #[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)] - 44 | #[cfg(feature = "load")] - 45 | pub struct NodeTypeJSON { - 46 | #[serde(rename = "type")] - 47 | kind: String, - 48 | named: bool, - 49 | } - | - 50 | #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] - 51 | #[cfg(feature = "load")] - 52 | pub struct FieldInfoJSON { - 53 | multiple: bool, - 54 | required: bool, - 55 | types: Vec, - 56 | } - | - 57 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] - 58 | pub struct ChildQuantity { - 59 | exists: bool, - 60 | required: bool, - 61 | multiple: bool, - 62 | } - | - 63 | #[cfg(feature = "load")] - 64 | impl Default for FieldInfoJSON { - 65 | fn default() -> Self { - 66 | Self { - 67 | multiple: false, - 68 | required: true, - 69 | types: Vec::new(), - 70 | } - 71 | } - 72 | } - | - 73 | impl Default for ChildQuantity { - 74 | fn default() -> Self { - 75 | Self::one() - 76 | } - 77 | } - | - 78 | impl ChildQuantity { - 79 | #[must_use] - 80 | const fn zero() -> Self { - 81 | Self { - 82 | exists: false, - 83 | required: false, - 84 | multiple: false, - 85 | } - 86 | } - | - 87 | #[must_use] - 88 | const fn one() -> Self { - 89 | Self { - 90 | exists: true, - 91 | required: true, - 92 | multiple: false, - 93 | } - 94 | } - | - 95 | const fn append(&mut self, other: Self) { - 96 | if other.exists { - 97 | if self.exists || other.multiple { - 98 | self.multiple = true; - 99 | } - 100 | if other.required { - 101 | self.required = true; - 102 | } - 103 | self.exists = true; - 104 | } - 105 | } - | - 106 | const fn union(&mut self, other: Self) -> bool { - 107 | let mut result = false; - 108 | if !self.exists && other.exists { - 109 | result = true; - 110 | self.exists = true; - 111 | } - 112 | if self.required && !other.required { - 113 | result = true; - 114 | self.required = false; - 115 | } - 116 | if !self.multiple && other.multiple { - 117 | result = true; - 118 | self.multiple = true; - 119 | } - 120 | result - 121 | } - 122 | } - | - 123 | pub type VariableInfoResult = Result; - | - 124 | #[derive(Debug, Error, Serialize)] - 125 | pub enum VariableInfoError { - 126 | #[error("Grammar error: Supertype symbols must always have a single visible child, but `{0}` can have multiple")] - 127 | InvalidSupertype(String), - 128 | } - | - 129 | /// Compute a summary of the public-facing structure of each variable in the - 130 | /// grammar. Each variable in the grammar corresponds to a distinct public-facing - 131 | /// node type. - 132 | /// - 133 | /// The information collected about each node type `N` is: - 134 | /// 1. `child_types` - The types of visible children that can appear within `N`. - 135 | /// 2. `fields` - The fields that `N` can have. Data regarding each field: - 136 | /// * `types` - The types of visible children the field can contain. - 137 | /// * `optional` - Do `N` nodes always have this field? - 138 | /// * `multiple` - Can `N` nodes have multiple children for this field? - 139 | /// 3. `children_without_fields` - The *other* named children of `N` that are not associated with - 140 | /// fields. Data regarding these children: - 141 | /// * `types` - The types of named children with no field. - 142 | /// * `optional` - Do `N` nodes always have at least one named child with no field? - 143 | /// * `multiple` - Can `N` nodes have multiple named children with no field? - 144 | /// - 145 | /// Each summary must account for some indirect factors: - 146 | /// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible children of `C` - 147 | /// *appear* to be direct children of `N`. - 148 | /// 2. aliases. If a parent node type `M` is aliased as some other type `N`, then nodes which - 149 | /// *appear* to have type `N` may have internal structure based on `M`. - 150 | pub fn get_variable_info( - 151 | syntax_grammar: &SyntaxGrammar, - 152 | lexical_grammar: &LexicalGrammar, - 153 | default_aliases: &AliasMap, - 154 | ) -> VariableInfoResult> { - 155 | let child_type_is_visible = |t: &ChildType| { - 156 | variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous - 157 | }; - | - 158 | let child_type_is_named = |t: &ChildType| { - 159 | variable_type_for_child_type(t, syntax_grammar, lexical_grammar) == VariableType::Named - 160 | }; - | - 161 | // Each variable's summary can depend on the summaries of other hidden variables, - 162 | // and variables can have mutually recursive structure. So we compute the summaries - 163 | // iteratively, in a loop that terminates only when no more changes are possible. - 164 | let mut did_change = true; - 165 | let mut all_initialized = false; - 166 | let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()]; - 167 | while did_change { - 168 | did_change = false; - | - 169 | for (i, variable) in syntax_grammar.variables.iter().enumerate() { - 170 | let mut variable_info = result[i].clone(); - | - 171 | // Examine each of the variable's productions. The variable's child types can be - 172 | // immediately combined across all productions, but the child quantities must be - 173 | // recorded separately for each production. - 174 | for production in &variable.productions { - 175 | let mut production_field_quantities = HashMap::new(); - 176 | let mut production_children_quantity = ChildQuantity::zero(); - 177 | let mut production_children_without_fields_quantity = ChildQuantity::zero(); - 178 | let mut production_has_uninitialized_invisible_children = false; - | - 179 | if production.steps.len() > 1 { - 180 | variable_info.has_multi_step_production = true; - 181 | } - | - 182 | for step in &production.steps { - 183 | let child_symbol = step.symbol; - 184 | let child_type = if let Some(alias) = &step.alias { - 185 | ChildType::Aliased(alias.clone()) - 186 | } else if let Some(alias) = default_aliases.get(&step.symbol) { - 187 | ChildType::Aliased(alias.clone()) - 188 | } else { - 189 | ChildType::Normal(child_symbol) - 190 | }; - | - 191 | let child_is_hidden = !child_type_is_visible(&child_type) - 192 | && !syntax_grammar.supertype_symbols.contains(&child_symbol); - | - 193 | // Maintain the set of all child types for this variable, and the quantity of - 194 | // visible children in this production. - 195 | did_change |= - 196 | extend_sorted(&mut variable_info.children.types, Some(&child_type)); - 197 | if !child_is_hidden { - 198 | production_children_quantity.append(ChildQuantity::one()); - 199 | } - | - 200 | // Maintain the set of child types associated with each field, and the quantity - 201 | // of children associated with each field in this production. - 202 | if let Some(field_name) = &step.field_name { - 203 | let field_info = variable_info - 204 | .fields - 205 | .entry(field_name.clone()) - 206 | .or_insert_with(FieldInfo::default); - 207 | did_change |= extend_sorted(&mut field_info.types, Some(&child_type)); - | - 208 | let production_field_quantity = production_field_quantities - 209 | .entry(field_name) - 210 | .or_insert_with(ChildQuantity::zero); - | - 211 | // Inherit the types and quantities of hidden children associated with - 212 | // fields. - 213 | if child_is_hidden && child_symbol.is_non_terminal() { - 214 | let child_variable_info = &result[child_symbol.index]; - 215 | did_change |= extend_sorted( - 216 | &mut field_info.types, - 217 | &child_variable_info.children.types, - 218 | ); - 219 | production_field_quantity.append(child_variable_info.children.quantity); - 220 | } else { - 221 | production_field_quantity.append(ChildQuantity::one()); - 222 | } - 223 | } - 224 | // Maintain the set of named children without fields within this variable. - 225 | else if child_type_is_named(&child_type) { - 226 | production_children_without_fields_quantity.append(ChildQuantity::one()); - 227 | did_change |= extend_sorted( - 228 | &mut variable_info.children_without_fields.types, - 229 | Some(&child_type), - 230 | ); - 231 | } - | - 232 | // Inherit all child information from hidden children. - 233 | if child_is_hidden && child_symbol.is_non_terminal() { - 234 | let child_variable_info = &result[child_symbol.index]; - | - 235 | // If a hidden child can have multiple children, then its parent node can - 236 | // appear to have multiple children. - 237 | if child_variable_info.has_multi_step_production { - 238 | variable_info.has_multi_step_production = true; - 239 | } - | - 240 | // If a hidden child has fields, then the parent node can appear to have - 241 | // those same fields. - 242 | for (field_name, child_field_info) in &child_variable_info.fields { - 243 | production_field_quantities - 244 | .entry(field_name) - 245 | .or_insert_with(ChildQuantity::zero) - 246 | .append(child_field_info.quantity); - 247 | did_change |= extend_sorted( - 248 | &mut variable_info - 249 | .fields - 250 | .entry(field_name.clone()) - 251 | .or_insert_with(FieldInfo::default) - 252 | .types, - 253 | &child_field_info.types, - 254 | ); - 255 | } - | - 256 | // If a hidden child has children, then the parent node can appear to have - 257 | // those same children. - 258 | production_children_quantity.append(child_variable_info.children.quantity); - 259 | did_change |= extend_sorted( - 260 | &mut variable_info.children.types, - 261 | &child_variable_info.children.types, - 262 | ); - | - 263 | // If a hidden child can have named children without fields, then the parent - 264 | // node can appear to have those same children. - 265 | if step.field_name.is_none() { - 266 | let grandchildren_info = &child_variable_info.children_without_fields; - 267 | if !grandchildren_info.types.is_empty() { - 268 | production_children_without_fields_quantity - 269 | .append(child_variable_info.children_without_fields.quantity); - 270 | did_change |= extend_sorted( - 271 | &mut variable_info.children_without_fields.types, - 272 | &child_variable_info.children_without_fields.types, - 273 | ); - 274 | } - 275 | } - 276 | } - | - 277 | // Note whether or not this production contains children whose summaries - 278 | // have not yet been computed. - 279 | if child_symbol.index >= i && !all_initialized { - 280 | production_has_uninitialized_invisible_children = true; - 281 | } - 282 | } - | - 283 | // If this production's children all have had their summaries initialized, - 284 | // then expand the quantity information with all of the possibilities introduced - 285 | // by this production. - 286 | if !production_has_uninitialized_invisible_children { - 287 | did_change |= variable_info - 288 | .children - 289 | .quantity - 290 | .union(production_children_quantity); - | - 291 | did_change |= variable_info - 292 | .children_without_fields - 293 | .quantity - 294 | .union(production_children_without_fields_quantity); - | - 295 | for (field_name, info) in &mut variable_info.fields { - 296 | did_change |= info.quantity.union( - 297 | production_field_quantities - 298 | .get(field_name) - 299 | .copied() - 300 | .unwrap_or_else(ChildQuantity::zero), - 301 | ); - 302 | } - 303 | } - 304 | } - | - 305 | result[i] = variable_info; - 306 | } - | - 307 | all_initialized = true; - 308 | } - | - 309 | for supertype_symbol in &syntax_grammar.supertype_symbols { - 310 | if result[supertype_symbol.index].has_multi_step_production { - 311 | let variable = &syntax_grammar.variables[supertype_symbol.index]; - 312 | Err(VariableInfoError::InvalidSupertype(variable.name.clone()))?; - 313 | } - 314 | } - | - 315 | // Update all of the node type lists to eliminate hidden nodes. - 316 | for supertype_symbol in &syntax_grammar.supertype_symbols { - 317 | result[supertype_symbol.index] - 318 | .children - 319 | .types - 320 | .retain(child_type_is_visible); - 321 | } - 322 | for variable_info in &mut result { - 323 | for field_info in variable_info.fields.values_mut() { - 324 | field_info.types.retain(child_type_is_visible); - 325 | } - 326 | variable_info.fields.retain(|_, v| !v.types.is_empty()); - 327 | variable_info - 328 | .children_without_fields - 329 | .types - 330 | .retain(child_type_is_visible); - 331 | } - | - 332 | Ok(result) - 333 | } - | - 334 | fn get_aliases_by_symbol( - 335 | syntax_grammar: &SyntaxGrammar, - 336 | default_aliases: &AliasMap, - 337 | ) -> HashMap>> { - 338 | let mut aliases_by_symbol = HashMap::new(); - 339 | for (symbol, alias) in default_aliases { - 340 | aliases_by_symbol.insert(*symbol, { - 341 | let mut aliases = HashSet::new(); - 342 | aliases.insert(Some(alias.clone())); - 343 | aliases - 344 | }); - 345 | } - 346 | for extra_symbol in &syntax_grammar.extra_symbols { - 347 | if !default_aliases.contains_key(extra_symbol) { - 348 | aliases_by_symbol - 349 | .entry(*extra_symbol) - 350 | .or_insert_with(HashSet::new) - 351 | .insert(None); - 352 | } - 353 | } - 354 | for variable in &syntax_grammar.variables { - 355 | for production in &variable.productions { - 356 | for step in &production.steps { - 357 | aliases_by_symbol - 358 | .entry(step.symbol) - 359 | .or_insert_with(HashSet::new) - 360 | .insert( - 361 | step.alias - 362 | .as_ref() - 363 | .or_else(|| default_aliases.get(&step.symbol)) - 364 | .cloned(), - 365 | ); - 366 | } - 367 | } - 368 | } - 369 | aliases_by_symbol.insert( - 370 | Symbol::non_terminal(0), - 371 | std::iter::once(&None).cloned().collect(), - 372 | ); - 373 | aliases_by_symbol - 374 | } - | - 375 | pub fn get_supertype_symbol_map( - 376 | syntax_grammar: &SyntaxGrammar, - 377 | default_aliases: &AliasMap, - 378 | variable_info: &[VariableInfo], - 379 | ) -> BTreeMap> { - 380 | let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases); - 381 | let mut supertype_symbol_map = BTreeMap::new(); - | - 382 | let mut symbols_by_alias = HashMap::new(); - 383 | for (symbol, aliases) in &aliases_by_symbol { - 384 | for alias in aliases.iter().flatten() { - 385 | symbols_by_alias - 386 | .entry(alias) - 387 | .or_insert_with(Vec::new) - 388 | .push(*symbol); - 389 | } - 390 | } - | - 391 | for (i, info) in variable_info.iter().enumerate() { - 392 | let symbol = Symbol::non_terminal(i); - 393 | if syntax_grammar.supertype_symbols.contains(&symbol) { - 394 | let subtypes = info.children.types.clone(); - 395 | supertype_symbol_map.insert(symbol, subtypes); - 396 | } - 397 | } - 398 | supertype_symbol_map - 399 | } - | - 400 | #[cfg(feature = "load")] - 401 | pub type SuperTypeCycleResult = Result; - | - 402 | #[derive(Debug, Error, Serialize)] - 403 | pub struct SuperTypeCycleError { - 404 | items: Vec, - 405 | } - | - 406 | impl std::fmt::Display for SuperTypeCycleError { - 407 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 408 | write!(f, "Dependency cycle detected in node types:")?; - 409 | for (i, item) in self.items.iter().enumerate() { - 410 | write!(f, " {item}")?; - 411 | if i < self.items.len() - 1 { - 412 | write!(f, ",")?; - 413 | } - 414 | } - | - 415 | Ok(()) - 416 | } - 417 | } - | - 418 | #[cfg(feature = "load")] - 419 | pub fn generate_node_types_json( - 420 | syntax_grammar: &SyntaxGrammar, - 421 | lexical_grammar: &LexicalGrammar, - 422 | default_aliases: &AliasMap, - 423 | variable_info: &[VariableInfo], - 424 | ) -> SuperTypeCycleResult> { - 425 | let mut node_types_json = BTreeMap::new(); - | - 426 | let child_type_to_node_type = |child_type: &ChildType| match child_type { - 427 | ChildType::Aliased(alias) => NodeTypeJSON { - 428 | kind: alias.value.clone(), - 429 | named: alias.is_named, - 430 | }, - 431 | ChildType::Normal(symbol) => { - 432 | if let Some(alias) = default_aliases.get(symbol) { - 433 | NodeTypeJSON { - 434 | kind: alias.value.clone(), - 435 | named: alias.is_named, - 436 | } - 437 | } else { - 438 | match symbol.kind { - 439 | SymbolType::NonTerminal => { - 440 | let variable = &syntax_grammar.variables[symbol.index]; - 441 | NodeTypeJSON { - 442 | kind: variable.name.clone(), - 443 | named: variable.kind != VariableType::Anonymous, - 444 | } - 445 | } - 446 | SymbolType::Terminal => { - 447 | let variable = &lexical_grammar.variables[symbol.index]; - 448 | NodeTypeJSON { - 449 | kind: variable.name.clone(), - 450 | named: variable.kind != VariableType::Anonymous, - 451 | } - 452 | } - 453 | SymbolType::External => { - 454 | let variable = &syntax_grammar.external_tokens[symbol.index]; - 455 | NodeTypeJSON { - 456 | kind: variable.name.clone(), - 457 | named: variable.kind != VariableType::Anonymous, - 458 | } - 459 | } - 460 | _ => panic!("Unexpected symbol type"), - 461 | } - 462 | } - 463 | } - 464 | }; - | - 465 | let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| { - 466 | if info.types.is_empty() { - 467 | json.required = false; - 468 | } else { - 469 | json.multiple |= info.quantity.multiple; - 470 | json.required &= info.quantity.required; - 471 | json.types - 472 | .extend(info.types.iter().map(child_type_to_node_type)); - 473 | json.types.sort_unstable(); - 474 | json.types.dedup(); - 475 | } - 476 | }; - | - 477 | let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases); - | - 478 | let empty = HashSet::new(); - 479 | let extra_names = syntax_grammar - 480 | .extra_symbols - 481 | .iter() - 482 | .flat_map(|symbol| { - 483 | aliases_by_symbol - 484 | .get(symbol) - 485 | .unwrap_or(&empty) - 486 | .iter() - 487 | .map(|alias| { - 488 | alias.as_ref().map_or( - 489 | match symbol.kind { - 490 | SymbolType::NonTerminal => &syntax_grammar.variables[symbol.index].name, - 491 | SymbolType::Terminal => &lexical_grammar.variables[symbol.index].name, - 492 | SymbolType::External => { - 493 | &syntax_grammar.external_tokens[symbol.index].name - 494 | } - 495 | _ => unreachable!(), - 496 | }, - 497 | |alias| &alias.value, - 498 | ) - 499 | }) - 500 | }) - 501 | .collect::>(); - | - 502 | let mut subtype_map = Vec::new(); - 503 | for (i, info) in variable_info.iter().enumerate() { - 504 | let symbol = Symbol::non_terminal(i); - 505 | let variable = &syntax_grammar.variables[i]; - 506 | if syntax_grammar.supertype_symbols.contains(&symbol) { - 507 | let node_type_json = - 508 | node_types_json - 509 | .entry(variable.name.clone()) - 510 | .or_insert_with(|| NodeInfoJSON { - 511 | kind: variable.name.clone(), - 512 | named: true, - 513 | root: false, - 514 | extra: extra_names.contains(&variable.name), - 515 | fields: None, - 516 | children: None, - 517 | subtypes: None, - 518 | }); - 519 | let mut subtypes = info - 520 | .children - 521 | .types - 522 | .iter() - 523 | .map(child_type_to_node_type) - 524 | .collect::>(); - 525 | subtypes.sort_unstable(); - 526 | subtypes.dedup(); - 527 | let supertype = NodeTypeJSON { - 528 | kind: node_type_json.kind.clone(), - 529 | named: true, - 530 | }; - 531 | subtype_map.push((supertype, subtypes.clone())); - 532 | node_type_json.subtypes = Some(subtypes); - 533 | } else if !syntax_grammar.variables_to_inline.contains(&symbol) { - 534 | // If a rule is aliased under multiple names, then its information - 535 | // contributes to multiple entries in the final JSON. - 536 | for alias in aliases_by_symbol.get(&symbol).unwrap_or(&HashSet::new()) { - 537 | let kind; - 538 | let is_named; - 539 | if let Some(alias) = alias { - 540 | kind = &alias.value; - 541 | is_named = alias.is_named; - 542 | } else if variable.kind.is_visible() { - 543 | kind = &variable.name; - 544 | is_named = variable.kind == VariableType::Named; - 545 | } else { - 546 | continue; - 547 | } - | - 548 | // There may already be an entry with this name, because multiple - 549 | // rules may be aliased with the same name. - 550 | let mut node_type_existed = true; - 551 | let node_type_json = node_types_json.entry(kind.clone()).or_insert_with(|| { - 552 | node_type_existed = false; - 553 | NodeInfoJSON { - 554 | kind: kind.clone(), - 555 | named: is_named, - 556 | root: i == 0, - 557 | extra: extra_names.contains(&kind), - 558 | fields: Some(BTreeMap::new()), - 559 | children: None, - 560 | subtypes: None, - 561 | } - 562 | }); - | - 563 | let fields_json = node_type_json.fields.as_mut().unwrap(); - 564 | for (new_field, field_info) in &info.fields { - 565 | let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| { - 566 | // If another rule is aliased with the same name, and does *not* have this - 567 | // field, then this field cannot be required. - 568 | let mut field_json = FieldInfoJSON::default(); - 569 | if node_type_existed { - 570 | field_json.required = false; - 571 | } - 572 | field_json - 573 | }); - 574 | populate_field_info_json(field_json, field_info); - 575 | } - | - 576 | // If another rule is aliased with the same name, any fields that aren't present in - 577 | // this cannot be required. - 578 | for (existing_field, field_json) in fields_json.iter_mut() { - 579 | if !info.fields.contains_key(existing_field) { - 580 | field_json.required = false; - 581 | } - 582 | } - | - 583 | populate_field_info_json( - 584 | node_type_json - 585 | .children - 586 | .get_or_insert(FieldInfoJSON::default()), - 587 | &info.children_without_fields, - 588 | ); - 589 | } - 590 | } - 591 | } - | - 592 | // Sort the subtype map topologically so that subtypes are listed before their supertypes. - 593 | let mut sorted_kinds = Vec::with_capacity(subtype_map.len()); - 594 | let mut top_sort = topological_sort::TopologicalSort::::new(); - 595 | for (supertype, subtypes) in &subtype_map { - 596 | for subtype in subtypes { - 597 | top_sort.add_dependency(subtype.kind.clone(), supertype.kind.clone()); - 598 | } - 599 | } - 600 | loop { - 601 | let mut next_kinds = top_sort.pop_all(); - 602 | match (next_kinds.is_empty(), top_sort.is_empty()) { - 603 | (true, true) => break, - 604 | (true, false) => { - 605 | let mut items = top_sort.collect::>(); - 606 | items.sort(); - 607 | return Err(SuperTypeCycleError { items }); - 608 | } - 609 | (false, _) => { - 610 | next_kinds.sort(); - 611 | sorted_kinds.extend(next_kinds); - 612 | } - 613 | } - 614 | } - 615 | subtype_map.sort_by(|a, b| { - 616 | let a_idx = sorted_kinds.iter().position(|n| n.eq(&a.0.kind)).unwrap(); - 617 | let b_idx = sorted_kinds.iter().position(|n| n.eq(&b.0.kind)).unwrap(); - 618 | a_idx.cmp(&b_idx) - 619 | }); - | - 620 | for node_type_json in node_types_json.values_mut() { - 621 | if node_type_json - 622 | .children - 623 | .as_ref() - 624 | .is_some_and(|c| c.types.is_empty()) - 625 | { - 626 | node_type_json.children = None; - 627 | } - | - 628 | if let Some(children) = &mut node_type_json.children { - 629 | process_supertypes(children, &subtype_map); - 630 | } - 631 | if let Some(fields) = &mut node_type_json.fields { - 632 | for field_info in fields.values_mut() { - 633 | process_supertypes(field_info, &subtype_map); - 634 | } - 635 | } - 636 | } - | - 637 | let mut anonymous_node_types = Vec::new(); - | - 638 | let regular_tokens = lexical_grammar - 639 | .variables - 640 | .iter() - 641 | .enumerate() - 642 | .flat_map(|(i, variable)| { - 643 | aliases_by_symbol - 644 | .get(&Symbol::terminal(i)) - 645 | .unwrap_or(&empty) - 646 | .iter() - 647 | .map(move |alias| { - 648 | alias - 649 | .as_ref() - 650 | .map_or((&variable.name, variable.kind), |alias| { - 651 | (&alias.value, alias.kind()) - 652 | }) - 653 | }) - 654 | }); - 655 | let external_tokens = - 656 | syntax_grammar - 657 | .external_tokens - 658 | .iter() - 659 | .enumerate() - 660 | .flat_map(|(i, token)| { - 661 | aliases_by_symbol - 662 | .get(&Symbol::external(i)) - 663 | .unwrap_or(&empty) - 664 | .iter() - 665 | .map(move |alias| { - 666 | alias.as_ref().map_or((&token.name, token.kind), |alias| { - 667 | (&alias.value, alias.kind()) - 668 | }) - 669 | }) - 670 | }); - | - 671 | for (name, kind) in regular_tokens.chain(external_tokens) { - 672 | match kind { - 673 | VariableType::Named => { - 674 | let node_type_json = - 675 | node_types_json - 676 | .entry(name.clone()) - 677 | .or_insert_with(|| NodeInfoJSON { - 678 | kind: name.clone(), - 679 | named: true, - 680 | root: false, - 681 | extra: extra_names.contains(&name), - 682 | fields: None, - 683 | children: None, - 684 | subtypes: None, - 685 | }); - 686 | if let Some(children) = &mut node_type_json.children { - 687 | children.required = false; - 688 | } - 689 | if let Some(fields) = &mut node_type_json.fields { - 690 | for field in fields.values_mut() { - 691 | field.required = false; - 692 | } - 693 | } - 694 | } - 695 | VariableType::Anonymous => anonymous_node_types.push(NodeInfoJSON { - 696 | kind: name.clone(), - 697 | named: false, - 698 | root: false, - 699 | extra: extra_names.contains(&name), - 700 | fields: None, - 701 | children: None, - 702 | subtypes: None, - 703 | }), - 704 | _ => {} - 705 | } - 706 | } - | - 707 | let mut result = node_types_json.into_iter().map(|e| e.1).collect::>(); - 708 | result.extend(anonymous_node_types); - 709 | result.sort_unstable_by(|a, b| { - 710 | b.subtypes - 711 | .is_some() - 712 | .cmp(&a.subtypes.is_some()) - 713 | .then_with(|| { - 714 | let a_is_leaf = a.children.is_none() && a.fields.is_none(); - 715 | let b_is_leaf = b.children.is_none() && b.fields.is_none(); - 716 | a_is_leaf.cmp(&b_is_leaf) - 717 | }) - 718 | .then_with(|| a.kind.cmp(&b.kind)) - 719 | }); - 720 | result.dedup(); - 721 | Ok(result) - 722 | } - | - 723 | #[cfg(feature = "load")] - 724 | fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec)]) { - 725 | for (supertype, subtypes) in subtype_map { - 726 | if info.types.contains(supertype) { - 727 | info.types.retain(|t| !subtypes.contains(t)); - 728 | } - 729 | } - 730 | } - | - 731 | fn variable_type_for_child_type( - 732 | child_type: &ChildType, - 733 | syntax_grammar: &SyntaxGrammar, - 734 | lexical_grammar: &LexicalGrammar, - 735 | ) -> VariableType { - 736 | match child_type { - 737 | ChildType::Aliased(alias) => alias.kind(), - 738 | ChildType::Normal(symbol) => { - 739 | if syntax_grammar.supertype_symbols.contains(symbol) { - 740 | VariableType::Named - 741 | } else if syntax_grammar.variables_to_inline.contains(symbol) { - 742 | VariableType::Hidden - 743 | } else { - 744 | match symbol.kind { - 745 | SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, - 746 | SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, - 747 | SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, - 748 | _ => VariableType::Hidden, - 749 | } - 750 | } - 751 | } - 752 | } - 753 | } - | - 754 | fn extend_sorted<'a, T>(vec: &mut Vec, values: impl IntoIterator) -> bool - 755 | where - 756 | T: 'a + Clone + Eq + Ord, - 757 | { - 758 | values.into_iter().any(|value| { - 759 | if let Err(i) = vec.binary_search(value) { - 760 | vec.insert(i, value.clone()); - 761 | true - 762 | } else { - 763 | false - 764 | } - 765 | }) - 766 | } - | - 767 | #[cfg(all(test, feature = "load"))] - 768 | mod tests { - 769 | use super::*; - 770 | use crate::{ - 771 | grammars::{ - 772 | InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable, - 773 | }, - 774 | prepare_grammar::prepare_grammar, - 775 | rules::Rule, - 776 | }; - | - 777 | #[test] - 778 | fn test_node_types_simple() { - 779 | let node_types = get_node_types(&InputGrammar { - 780 | variables: vec![ - 781 | Variable { - 782 | name: "v1".to_string(), - 783 | kind: VariableType::Named, - 784 | rule: Rule::seq(vec![ - 785 | Rule::field("f1".to_string(), Rule::named("v2")), - 786 | Rule::field("f2".to_string(), Rule::string(";")), - 787 | ]), - 788 | }, - 789 | Variable { - 790 | name: "v2".to_string(), - 791 | kind: VariableType::Named, - 792 | rule: Rule::string("x"), - 793 | }, - 794 | // This rule is not reachable from the start symbol - 795 | // so it won't be present in the node_types - 796 | Variable { - 797 | name: "v3".to_string(), - 798 | kind: VariableType::Named, - 799 | rule: Rule::string("y"), - 800 | }, - 801 | ], - 802 | ..Default::default() - 803 | }) - 804 | .unwrap(); - | - 805 | assert_eq!(node_types.len(), 3); - | - 806 | assert_eq!( - 807 | node_types[0], - 808 | NodeInfoJSON { - 809 | kind: "v1".to_string(), - 810 | named: true, - 811 | root: true, - 812 | extra: false, - 813 | subtypes: None, - 814 | children: None, - 815 | fields: Some( - 816 | vec![ - 817 | ( - 818 | "f1".to_string(), - 819 | FieldInfoJSON { - 820 | multiple: false, - 821 | required: true, - 822 | types: vec![NodeTypeJSON { - 823 | kind: "v2".to_string(), - 824 | named: true, - 825 | }] - 826 | } - 827 | ), - 828 | ( - 829 | "f2".to_string(), - 830 | FieldInfoJSON { - 831 | multiple: false, - 832 | required: true, - 833 | types: vec![NodeTypeJSON { - 834 | kind: ";".to_string(), - 835 | named: false, - 836 | }] - 837 | } - 838 | ), - 839 | ] - 840 | .into_iter() - 841 | .collect() - 842 | ) - 843 | } - 844 | ); - 845 | assert_eq!( - 846 | node_types[1], - 847 | NodeInfoJSON { - 848 | kind: ";".to_string(), - 849 | named: false, - 850 | root: false, - 851 | extra: false, - 852 | subtypes: None, - 853 | children: None, - 854 | fields: None - 855 | } - 856 | ); - 857 | assert_eq!( - 858 | node_types[2], - 859 | NodeInfoJSON { - 860 | kind: "v2".to_string(), - 861 | named: true, - 862 | root: false, - 863 | extra: false, - 864 | subtypes: None, - 865 | children: None, - 866 | fields: None - 867 | } - 868 | ); - 869 | } - | - 870 | #[test] - 871 | fn test_node_types_simple_extras() { - 872 | let node_types = get_node_types(&InputGrammar { - 873 | extra_symbols: vec![Rule::named("v3")], - 874 | variables: vec![ - 875 | Variable { - 876 | name: "v1".to_string(), - 877 | kind: VariableType::Named, - 878 | rule: Rule::seq(vec![ - 879 | Rule::field("f1".to_string(), Rule::named("v2")), - 880 | Rule::field("f2".to_string(), Rule::string(";")), - 881 | ]), - 882 | }, - 883 | Variable { - 884 | name: "v2".to_string(), - 885 | kind: VariableType::Named, - 886 | rule: Rule::string("x"), - 887 | }, - 888 | // This rule is not reachable from the start symbol, but - 889 | // it is reachable from the 'extra_symbols' so it - 890 | // should be present in the node_types. - 891 | // But because it's only a literal, it will get replaced by - 892 | // a lexical variable. - 893 | Variable { - 894 | name: "v3".to_string(), - 895 | kind: VariableType::Named, - 896 | rule: Rule::string("y"), - 897 | }, - 898 | ], - 899 | ..Default::default() - 900 | }) - 901 | .unwrap(); - | - 902 | assert_eq!(node_types.len(), 4); - | - 903 | assert_eq!( - 904 | node_types[0], - 905 | NodeInfoJSON { - 906 | kind: "v1".to_string(), - 907 | named: true, - 908 | root: true, - 909 | extra: false, - 910 | subtypes: None, - 911 | children: None, - 912 | fields: Some( - 913 | vec![ - 914 | ( - 915 | "f1".to_string(), - 916 | FieldInfoJSON { - 917 | multiple: false, - 918 | required: true, - 919 | types: vec![NodeTypeJSON { - 920 | kind: "v2".to_string(), - 921 | named: true, - 922 | }] - 923 | } - 924 | ), - 925 | ( - 926 | "f2".to_string(), - 927 | FieldInfoJSON { - 928 | multiple: false, - 929 | required: true, - 930 | types: vec![NodeTypeJSON { - 931 | kind: ";".to_string(), - 932 | named: false, - 933 | }] - 934 | } - 935 | ), - 936 | ] - 937 | .into_iter() - 938 | .collect() - 939 | ) - 940 | } - 941 | ); - 942 | assert_eq!( - 943 | node_types[1], - 944 | NodeInfoJSON { - 945 | kind: ";".to_string(), - 946 | named: false, - 947 | root: false, - 948 | extra: false, - 949 | subtypes: None, - 950 | children: None, - 951 | fields: None - 952 | } - 953 | ); - 954 | assert_eq!( - 955 | node_types[2], - 956 | NodeInfoJSON { - 957 | kind: "v2".to_string(), - 958 | named: true, - 959 | root: false, - 960 | extra: false, - 961 | subtypes: None, - 962 | children: None, - 963 | fields: None - 964 | } - 965 | ); - 966 | assert_eq!( - 967 | node_types[3], - 968 | NodeInfoJSON { - 969 | kind: "v3".to_string(), - 970 | named: true, - 971 | root: false, - 972 | extra: true, - 973 | subtypes: None, - 974 | children: None, - 975 | fields: None - 976 | } - 977 | ); - 978 | } - | - 979 | #[test] - 980 | fn test_node_types_deeper_extras() { - 981 | let node_types = get_node_types(&InputGrammar { - 982 | extra_symbols: vec![Rule::named("v3")], - 983 | variables: vec![ - 984 | Variable { - 985 | name: "v1".to_string(), - 986 | kind: VariableType::Named, - 987 | rule: Rule::seq(vec![ - 988 | Rule::field("f1".to_string(), Rule::named("v2")), - 989 | Rule::field("f2".to_string(), Rule::string(";")), - 990 | ]), - 991 | }, - 992 | Variable { - 993 | name: "v2".to_string(), - 994 | kind: VariableType::Named, - 995 | rule: Rule::string("x"), - 996 | }, - 997 | // This rule is not reachable from the start symbol, but - 998 | // it is reachable from the 'extra_symbols' so it - 999 | // should be present in the node_types. -1000 | // Because it is not just a literal, it won't get replaced -1001 | // by a lexical variable. -1002 | Variable { -1003 | name: "v3".to_string(), -1004 | kind: VariableType::Named, -1005 | rule: Rule::seq(vec![Rule::string("y"), Rule::repeat(Rule::string("z"))]), -1006 | }, -1007 | ], -1008 | ..Default::default() -1009 | }) -1010 | .unwrap(); - | -1011 | assert_eq!(node_types.len(), 6); - | -1012 | assert_eq!( -1013 | node_types[0], -1014 | NodeInfoJSON { -1015 | kind: "v1".to_string(), -1016 | named: true, -1017 | root: true, -1018 | extra: false, -1019 | subtypes: None, -1020 | children: None, -1021 | fields: Some( -1022 | vec![ -1023 | ( -1024 | "f1".to_string(), -1025 | FieldInfoJSON { -1026 | multiple: false, -1027 | required: true, -1028 | types: vec![NodeTypeJSON { -1029 | kind: "v2".to_string(), -1030 | named: true, -1031 | }] -1032 | } -1033 | ), -1034 | ( -1035 | "f2".to_string(), -1036 | FieldInfoJSON { -1037 | multiple: false, -1038 | required: true, -1039 | types: vec![NodeTypeJSON { -1040 | kind: ";".to_string(), -1041 | named: false, -1042 | }] -1043 | } -1044 | ), -1045 | ] -1046 | .into_iter() -1047 | .collect() -1048 | ) -1049 | } -1050 | ); -1051 | assert_eq!( -1052 | node_types[1], -1053 | NodeInfoJSON { -1054 | kind: "v3".to_string(), -1055 | named: true, -1056 | root: false, -1057 | extra: true, -1058 | subtypes: None, -1059 | children: None, -1060 | fields: Some(BTreeMap::default()) -1061 | } -1062 | ); -1063 | assert_eq!( -1064 | node_types[2], -1065 | NodeInfoJSON { -1066 | kind: ";".to_string(), -1067 | named: false, -1068 | root: false, -1069 | extra: false, -1070 | subtypes: None, -1071 | children: None, -1072 | fields: None -1073 | } -1074 | ); -1075 | assert_eq!( -1076 | node_types[3], -1077 | NodeInfoJSON { -1078 | kind: "v2".to_string(), -1079 | named: true, -1080 | root: false, -1081 | extra: false, -1082 | subtypes: None, -1083 | children: None, -1084 | fields: None -1085 | } -1086 | ); -1087 | } - | -1088 | #[test] -1089 | fn test_node_types_with_supertypes() { -1090 | let node_types = get_node_types(&InputGrammar { -1091 | supertype_symbols: vec!["_v2".to_string()], -1092 | variables: vec![ -1093 | Variable { -1094 | name: "v1".to_string(), -1095 | kind: VariableType::Named, -1096 | rule: Rule::field("f1".to_string(), Rule::named("_v2")), -1097 | }, -1098 | Variable { -1099 | name: "_v2".to_string(), -1100 | kind: VariableType::Hidden, -1101 | rule: Rule::choice(vec![ -1102 | Rule::named("v3"), -1103 | Rule::named("v4"), -1104 | Rule::string("*"), -1105 | ]), -1106 | }, -1107 | Variable { -1108 | name: "v3".to_string(), -1109 | kind: VariableType::Named, -1110 | rule: Rule::string("x"), -1111 | }, -1112 | Variable { -1113 | name: "v4".to_string(), -1114 | kind: VariableType::Named, -1115 | rule: Rule::string("y"), -1116 | }, -1117 | ], -1118 | ..Default::default() -1119 | }) -1120 | .unwrap(); - | -1121 | assert_eq!( -1122 | node_types[0], -1123 | NodeInfoJSON { -1124 | kind: "_v2".to_string(), -1125 | named: true, -1126 | root: false, -1127 | extra: false, -1128 | fields: None, -1129 | children: None, -1130 | subtypes: Some(vec![ -1131 | NodeTypeJSON { -1132 | kind: "*".to_string(), -1133 | named: false, -1134 | }, -1135 | NodeTypeJSON { -1136 | kind: "v3".to_string(), -1137 | named: true, -1138 | }, -1139 | NodeTypeJSON { -1140 | kind: "v4".to_string(), -1141 | named: true, -1142 | }, -1143 | ]), -1144 | } -1145 | ); -1146 | assert_eq!( -1147 | node_types[1], -1148 | NodeInfoJSON { -1149 | kind: "v1".to_string(), -1150 | named: true, -1151 | root: true, -1152 | extra: false, -1153 | subtypes: None, -1154 | children: None, -1155 | fields: Some( -1156 | vec![( -1157 | "f1".to_string(), -1158 | FieldInfoJSON { -1159 | multiple: false, -1160 | required: true, -1161 | types: vec![NodeTypeJSON { -1162 | kind: "_v2".to_string(), -1163 | named: true, -1164 | }] -1165 | } -1166 | ),] -1167 | .into_iter() -1168 | .collect() -1169 | ) -1170 | } -1171 | ); -1172 | } - | -1173 | #[test] -1174 | fn test_node_types_for_children_without_fields() { -1175 | let node_types = get_node_types(&InputGrammar { -1176 | variables: vec![ -1177 | Variable { -1178 | name: "v1".to_string(), -1179 | kind: VariableType::Named, -1180 | rule: Rule::seq(vec![ -1181 | Rule::named("v2"), -1182 | Rule::field("f1".to_string(), Rule::named("v3")), -1183 | Rule::named("v4"), -1184 | ]), -1185 | }, -1186 | Variable { -1187 | name: "v2".to_string(), -1188 | kind: VariableType::Named, -1189 | rule: Rule::seq(vec![ -1190 | Rule::string("{"), -1191 | Rule::choice(vec![Rule::named("v3"), Rule::Blank]), -1192 | Rule::string("}"), -1193 | ]), -1194 | }, -1195 | Variable { -1196 | name: "v3".to_string(), -1197 | kind: VariableType::Named, -1198 | rule: Rule::string("x"), -1199 | }, -1200 | Variable { -1201 | name: "v4".to_string(), -1202 | kind: VariableType::Named, -1203 | rule: Rule::string("y"), -1204 | }, -1205 | ], -1206 | ..Default::default() -1207 | }) -1208 | .unwrap(); - | -1209 | assert_eq!( -1210 | node_types[0], -1211 | NodeInfoJSON { -1212 | kind: "v1".to_string(), -1213 | named: true, -1214 | root: true, -1215 | extra: false, -1216 | subtypes: None, -1217 | children: Some(FieldInfoJSON { -1218 | multiple: true, -1219 | required: true, -1220 | types: vec![ -1221 | NodeTypeJSON { -1222 | kind: "v2".to_string(), -1223 | named: true, -1224 | }, -1225 | NodeTypeJSON { -1226 | kind: "v4".to_string(), -1227 | named: true, -1228 | }, -1229 | ] -1230 | }), -1231 | fields: Some( -1232 | vec![( -1233 | "f1".to_string(), -1234 | FieldInfoJSON { -1235 | multiple: false, -1236 | required: true, -1237 | types: vec![NodeTypeJSON { -1238 | kind: "v3".to_string(), -1239 | named: true, -1240 | }] -1241 | } -1242 | ),] -1243 | .into_iter() -1244 | .collect() -1245 | ) -1246 | } -1247 | ); -1248 | assert_eq!( -1249 | node_types[1], -1250 | NodeInfoJSON { -1251 | kind: "v2".to_string(), -1252 | named: true, -1253 | root: false, -1254 | extra: false, -1255 | subtypes: None, -1256 | children: Some(FieldInfoJSON { -1257 | multiple: false, -1258 | required: false, -1259 | types: vec![NodeTypeJSON { -1260 | kind: "v3".to_string(), -1261 | named: true, -1262 | },] -1263 | }), -1264 | fields: Some(BTreeMap::new()), -1265 | } -1266 | ); -1267 | } - | -1268 | #[test] -1269 | fn test_node_types_with_inlined_rules() { -1270 | let node_types = get_node_types(&InputGrammar { -1271 | variables_to_inline: vec!["v2".to_string()], -1272 | variables: vec![ -1273 | Variable { -1274 | name: "v1".to_string(), -1275 | kind: VariableType::Named, -1276 | rule: Rule::seq(vec![Rule::named("v2"), Rule::named("v3")]), -1277 | }, -1278 | // v2 should not appear in the node types, since it is inlined -1279 | Variable { -1280 | name: "v2".to_string(), -1281 | kind: VariableType::Named, -1282 | rule: Rule::alias(Rule::string("a"), "x".to_string(), true), -1283 | }, -1284 | Variable { -1285 | name: "v3".to_string(), -1286 | kind: VariableType::Named, -1287 | rule: Rule::string("b"), -1288 | }, -1289 | ], -1290 | ..Default::default() -1291 | }) -1292 | .unwrap(); - | -1293 | assert_eq!( -1294 | node_types[0], -1295 | NodeInfoJSON { -1296 | kind: "v1".to_string(), -1297 | named: true, -1298 | root: true, -1299 | extra: false, -1300 | subtypes: None, -1301 | children: Some(FieldInfoJSON { -1302 | multiple: true, -1303 | required: true, -1304 | types: vec![ -1305 | NodeTypeJSON { -1306 | kind: "v3".to_string(), -1307 | named: true, -1308 | }, -1309 | NodeTypeJSON { -1310 | kind: "x".to_string(), -1311 | named: true, -1312 | }, -1313 | ] -1314 | }), -1315 | fields: Some(BTreeMap::new()), -1316 | } -1317 | ); -1318 | } - | -1319 | #[test] -1320 | fn test_node_types_for_aliased_nodes() { -1321 | let node_types = get_node_types(&InputGrammar { -1322 | variables: vec![ -1323 | Variable { -1324 | name: "thing".to_string(), -1325 | kind: VariableType::Named, -1326 | rule: Rule::choice(vec![Rule::named("type"), Rule::named("expression")]), -1327 | }, -1328 | Variable { -1329 | name: "type".to_string(), -1330 | kind: VariableType::Named, -1331 | rule: Rule::choice(vec![ -1332 | Rule::alias( -1333 | Rule::named("identifier"), -1334 | "type_identifier".to_string(), -1335 | true, -1336 | ), -1337 | Rule::string("void"), -1338 | ]), -1339 | }, -1340 | Variable { -1341 | name: "expression".to_string(), -1342 | kind: VariableType::Named, -1343 | rule: Rule::choice(vec![ -1344 | Rule::named("identifier"), -1345 | Rule::alias( -1346 | Rule::named("foo_identifier"), -1347 | "identifier".to_string(), -1348 | true, -1349 | ), -1350 | ]), -1351 | }, -1352 | Variable { -1353 | name: "identifier".to_string(), -1354 | kind: VariableType::Named, -1355 | rule: Rule::pattern("\\w+", ""), -1356 | }, -1357 | Variable { -1358 | name: "foo_identifier".to_string(), -1359 | kind: VariableType::Named, -1360 | rule: Rule::pattern("[\\w-]+", ""), -1361 | }, -1362 | ], -1363 | ..Default::default() -1364 | }) -1365 | .unwrap(); - | -1366 | assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None); -1367 | assert_eq!( -1368 | node_types.iter().find(|t| t.kind == "identifier"), -1369 | Some(&NodeInfoJSON { -1370 | kind: "identifier".to_string(), -1371 | named: true, -1372 | root: false, -1373 | extra: false, -1374 | subtypes: None, -1375 | children: None, -1376 | fields: None, -1377 | }) -1378 | ); -1379 | assert_eq!( -1380 | node_types.iter().find(|t| t.kind == "type_identifier"), -1381 | Some(&NodeInfoJSON { -1382 | kind: "type_identifier".to_string(), -1383 | named: true, -1384 | root: false, -1385 | extra: false, -1386 | subtypes: None, -1387 | children: None, -1388 | fields: None, -1389 | }) -1390 | ); -1391 | } - | -1392 | #[test] -1393 | fn test_node_types_with_multiple_valued_fields() { -1394 | let node_types = get_node_types(&InputGrammar { -1395 | variables: vec![ -1396 | Variable { -1397 | name: "a".to_string(), -1398 | kind: VariableType::Named, -1399 | rule: Rule::seq(vec![ -1400 | Rule::choice(vec![ -1401 | Rule::Blank, -1402 | Rule::repeat(Rule::field("f1".to_string(), Rule::named("b"))), -1403 | ]), -1404 | Rule::repeat(Rule::named("c")), -1405 | ]), -1406 | }, -1407 | Variable { -1408 | name: "b".to_string(), -1409 | kind: VariableType::Named, -1410 | rule: Rule::string("b"), -1411 | }, -1412 | Variable { -1413 | name: "c".to_string(), -1414 | kind: VariableType::Named, -1415 | rule: Rule::string("c"), -1416 | }, -1417 | ], -1418 | ..Default::default() -1419 | }) -1420 | .unwrap(); - | -1421 | assert_eq!( -1422 | node_types[0], -1423 | NodeInfoJSON { -1424 | kind: "a".to_string(), -1425 | named: true, -1426 | root: true, -1427 | extra: false, -1428 | subtypes: None, -1429 | children: Some(FieldInfoJSON { -1430 | multiple: true, -1431 | required: true, -1432 | types: vec![NodeTypeJSON { -1433 | kind: "c".to_string(), -1434 | named: true, -1435 | },] -1436 | }), -1437 | fields: Some( -1438 | vec![( -1439 | "f1".to_string(), -1440 | FieldInfoJSON { -1441 | multiple: true, -1442 | required: false, -1443 | types: vec![NodeTypeJSON { -1444 | kind: "b".to_string(), -1445 | named: true, -1446 | }] -1447 | } -1448 | )] -1449 | .into_iter() -1450 | .collect() -1451 | ), -1452 | } -1453 | ); -1454 | } - | -1455 | #[test] -1456 | fn test_node_types_with_fields_on_hidden_tokens() { -1457 | let node_types = get_node_types(&InputGrammar { -1458 | variables: vec![Variable { -1459 | name: "script".to_string(), -1460 | kind: VariableType::Named, -1461 | rule: Rule::seq(vec![ -1462 | Rule::field("a".to_string(), Rule::pattern("hi", "")), -1463 | Rule::field("b".to_string(), Rule::pattern("bye", "")), -1464 | ]), -1465 | }], -1466 | ..Default::default() -1467 | }) -1468 | .unwrap(); - | -1469 | assert_eq!( -1470 | node_types, -1471 | [NodeInfoJSON { -1472 | kind: "script".to_string(), -1473 | named: true, -1474 | root: true, -1475 | extra: false, -1476 | fields: Some(BTreeMap::new()), -1477 | children: None, -1478 | subtypes: None -1479 | }] -1480 | ); -1481 | } - | -1482 | #[test] -1483 | fn test_node_types_with_multiple_rules_same_alias_name() { -1484 | let node_types = get_node_types(&InputGrammar { -1485 | variables: vec![ -1486 | Variable { -1487 | name: "script".to_string(), -1488 | kind: VariableType::Named, -1489 | rule: Rule::choice(vec![ -1490 | Rule::named("a"), -1491 | // Rule `b` is aliased as rule `a` -1492 | Rule::alias(Rule::named("b"), "a".to_string(), true), -1493 | ]), -1494 | }, -1495 | Variable { -1496 | name: "a".to_string(), -1497 | kind: VariableType::Named, -1498 | rule: Rule::seq(vec![ -1499 | Rule::field("f1".to_string(), Rule::string("1")), -1500 | Rule::field("f2".to_string(), Rule::string("2")), -1501 | ]), -1502 | }, -1503 | Variable { -1504 | name: "b".to_string(), -1505 | kind: VariableType::Named, -1506 | rule: Rule::seq(vec![ -1507 | Rule::field("f2".to_string(), Rule::string("22")), -1508 | Rule::field("f2".to_string(), Rule::string("222")), -1509 | Rule::field("f3".to_string(), Rule::string("3")), -1510 | ]), -1511 | }, -1512 | ], -1513 | ..Default::default() -1514 | }) -1515 | .unwrap(); - | -1516 | assert_eq!( -1517 | &node_types -1518 | .iter() -1519 | .map(|t| t.kind.as_str()) -1520 | .collect::>(), -1521 | &["a", "script", "1", "2", "22", "222", "3"] -1522 | ); - | -1523 | assert_eq!( -1524 | &node_types[0..2], -1525 | &[ -1526 | // A combination of the types for `a` and `b`. -1527 | NodeInfoJSON { -1528 | kind: "a".to_string(), -1529 | named: true, -1530 | root: false, -1531 | extra: false, -1532 | subtypes: None, -1533 | children: None, -1534 | fields: Some( -1535 | vec![ -1536 | ( -1537 | "f1".to_string(), -1538 | FieldInfoJSON { -1539 | multiple: false, -1540 | required: false, -1541 | types: vec![NodeTypeJSON { -1542 | kind: "1".to_string(), -1543 | named: false, -1544 | }] -1545 | } -1546 | ), -1547 | ( -1548 | "f2".to_string(), -1549 | FieldInfoJSON { -1550 | multiple: true, -1551 | required: true, -1552 | types: vec![ -1553 | NodeTypeJSON { -1554 | kind: "2".to_string(), -1555 | named: false, -1556 | }, -1557 | NodeTypeJSON { -1558 | kind: "22".to_string(), -1559 | named: false, -1560 | }, -1561 | NodeTypeJSON { -1562 | kind: "222".to_string(), -1563 | named: false, -1564 | } -1565 | ] -1566 | }, -1567 | ), -1568 | ( -1569 | "f3".to_string(), -1570 | FieldInfoJSON { -1571 | multiple: false, -1572 | required: false, -1573 | types: vec![NodeTypeJSON { -1574 | kind: "3".to_string(), -1575 | named: false, -1576 | }] -1577 | } -1578 | ), -1579 | ] -1580 | .into_iter() -1581 | .collect() -1582 | ), -1583 | }, -1584 | NodeInfoJSON { -1585 | kind: "script".to_string(), -1586 | named: true, -1587 | root: true, -1588 | extra: false, -1589 | subtypes: None, -1590 | // Only one node -1591 | children: Some(FieldInfoJSON { -1592 | multiple: false, -1593 | required: true, -1594 | types: vec![NodeTypeJSON { -1595 | kind: "a".to_string(), -1596 | named: true, -1597 | }] -1598 | }), -1599 | fields: Some(BTreeMap::new()), -1600 | } -1601 | ] -1602 | ); -1603 | } - | -1604 | #[test] -1605 | fn test_node_types_with_tokens_aliased_to_match_rules() { -1606 | let node_types = get_node_types(&InputGrammar { -1607 | variables: vec![ -1608 | Variable { -1609 | name: "a".to_string(), -1610 | kind: VariableType::Named, -1611 | rule: Rule::seq(vec![Rule::named("b"), Rule::named("c")]), -1612 | }, -1613 | // Ordinarily, `b` nodes have two named `c` children. -1614 | Variable { -1615 | name: "b".to_string(), -1616 | kind: VariableType::Named, -1617 | rule: Rule::seq(vec![Rule::named("c"), Rule::string("B"), Rule::named("c")]), -1618 | }, -1619 | Variable { -1620 | name: "c".to_string(), -1621 | kind: VariableType::Named, -1622 | rule: Rule::choice(vec![ -1623 | Rule::string("C"), -1624 | // This token is aliased as a `b`, which will produce a `b` node -1625 | // with no children. -1626 | Rule::alias(Rule::string("D"), "b".to_string(), true), -1627 | ]), -1628 | }, -1629 | ], -1630 | ..Default::default() -1631 | }) -1632 | .unwrap(); - | -1633 | assert_eq!( -1634 | node_types.iter().map(|n| &n.kind).collect::>(), -1635 | &["a", "b", "c", "B", "C"] -1636 | ); -1637 | assert_eq!( -1638 | node_types[1], -1639 | NodeInfoJSON { -1640 | kind: "b".to_string(), -1641 | named: true, -1642 | root: false, -1643 | extra: false, -1644 | subtypes: None, -1645 | children: Some(FieldInfoJSON { -1646 | multiple: true, -1647 | required: false, -1648 | types: vec![NodeTypeJSON { -1649 | kind: "c".to_string(), -1650 | named: true, -1651 | }] -1652 | }), -1653 | fields: Some(BTreeMap::new()), -1654 | } -1655 | ); -1656 | } - | -1657 | #[test] -1658 | fn test_get_variable_info() { -1659 | let variable_info = get_variable_info( -1660 | &build_syntax_grammar( -1661 | vec![ -1662 | // Required field `field1` has only one node type. -1663 | SyntaxVariable { -1664 | name: "rule0".to_string(), -1665 | kind: VariableType::Named, -1666 | productions: vec![Production { -1667 | dynamic_precedence: 0, -1668 | steps: vec![ -1669 | ProductionStep::new(Symbol::terminal(0)), -1670 | ProductionStep::new(Symbol::non_terminal(1)) -1671 | .with_field_name("field1"), -1672 | ], -1673 | }], -1674 | }, -1675 | // Hidden node -1676 | SyntaxVariable { -1677 | name: "_rule1".to_string(), -1678 | kind: VariableType::Hidden, -1679 | productions: vec![Production { -1680 | dynamic_precedence: 0, -1681 | steps: vec![ProductionStep::new(Symbol::terminal(1))], -1682 | }], -1683 | }, -1684 | // Optional field `field2` can have two possible node types. -1685 | SyntaxVariable { -1686 | name: "rule2".to_string(), -1687 | kind: VariableType::Named, -1688 | productions: vec![ -1689 | Production { -1690 | dynamic_precedence: 0, -1691 | steps: vec![ProductionStep::new(Symbol::terminal(0))], -1692 | }, -1693 | Production { -1694 | dynamic_precedence: 0, -1695 | steps: vec![ -1696 | ProductionStep::new(Symbol::terminal(0)), -1697 | ProductionStep::new(Symbol::terminal(2)) -1698 | .with_field_name("field2"), -1699 | ], -1700 | }, -1701 | Production { -1702 | dynamic_precedence: 0, -1703 | steps: vec![ -1704 | ProductionStep::new(Symbol::terminal(0)), -1705 | ProductionStep::new(Symbol::terminal(3)) -1706 | .with_field_name("field2"), -1707 | ], -1708 | }, -1709 | ], -1710 | }, -1711 | ], -1712 | vec![], -1713 | ), -1714 | &build_lexical_grammar(), -1715 | &AliasMap::new(), -1716 | ) -1717 | .unwrap(); - | -1718 | assert_eq!( -1719 | variable_info[0].fields, -1720 | vec![( -1721 | "field1".to_string(), -1722 | FieldInfo { -1723 | quantity: ChildQuantity { -1724 | exists: true, -1725 | required: true, -1726 | multiple: false, -1727 | }, -1728 | types: vec![ChildType::Normal(Symbol::terminal(1))], -1729 | } -1730 | )] -1731 | .into_iter() -1732 | .collect::>() -1733 | ); - | -1734 | assert_eq!( -1735 | variable_info[2].fields, -1736 | vec![( -1737 | "field2".to_string(), -1738 | FieldInfo { -1739 | quantity: ChildQuantity { -1740 | exists: true, -1741 | required: false, -1742 | multiple: false, -1743 | }, -1744 | types: vec![ -1745 | ChildType::Normal(Symbol::terminal(2)), -1746 | ChildType::Normal(Symbol::terminal(3)), -1747 | ], -1748 | } -1749 | )] -1750 | .into_iter() -1751 | .collect::>() -1752 | ); -1753 | } - | -1754 | #[test] -1755 | fn test_get_variable_info_with_repetitions_inside_fields() { -1756 | let variable_info = get_variable_info( -1757 | &build_syntax_grammar( -1758 | vec![ -1759 | // Field associated with a repetition. -1760 | SyntaxVariable { -1761 | name: "rule0".to_string(), -1762 | kind: VariableType::Named, -1763 | productions: vec![ -1764 | Production { -1765 | dynamic_precedence: 0, -1766 | steps: vec![ProductionStep::new(Symbol::non_terminal(1)) -1767 | .with_field_name("field1")], -1768 | }, -1769 | Production { -1770 | dynamic_precedence: 0, -1771 | steps: vec![], -1772 | }, -1773 | ], -1774 | }, -1775 | // Repetition node -1776 | SyntaxVariable { -1777 | name: "_rule0_repeat".to_string(), -1778 | kind: VariableType::Hidden, -1779 | productions: vec![ -1780 | Production { -1781 | dynamic_precedence: 0, -1782 | steps: vec![ProductionStep::new(Symbol::terminal(1))], -1783 | }, -1784 | Production { -1785 | dynamic_precedence: 0, -1786 | steps: vec![ -1787 | ProductionStep::new(Symbol::non_terminal(1)), -1788 | ProductionStep::new(Symbol::non_terminal(1)), -1789 | ], -1790 | }, -1791 | ], -1792 | }, -1793 | ], -1794 | vec![], -1795 | ), -1796 | &build_lexical_grammar(), -1797 | &AliasMap::new(), -1798 | ) -1799 | .unwrap(); - | -1800 | assert_eq!( -1801 | variable_info[0].fields, -1802 | vec![( -1803 | "field1".to_string(), -1804 | FieldInfo { -1805 | quantity: ChildQuantity { -1806 | exists: true, -1807 | required: false, -1808 | multiple: true, -1809 | }, -1810 | types: vec![ChildType::Normal(Symbol::terminal(1))], -1811 | } -1812 | )] -1813 | .into_iter() -1814 | .collect::>() -1815 | ); -1816 | } - | -1817 | #[test] -1818 | fn test_get_variable_info_with_inherited_fields() { -1819 | let variable_info = get_variable_info( -1820 | &build_syntax_grammar( -1821 | vec![ -1822 | SyntaxVariable { -1823 | name: "rule0".to_string(), -1824 | kind: VariableType::Named, -1825 | productions: vec![ -1826 | Production { -1827 | dynamic_precedence: 0, -1828 | steps: vec![ -1829 | ProductionStep::new(Symbol::terminal(0)), -1830 | ProductionStep::new(Symbol::non_terminal(1)), -1831 | ProductionStep::new(Symbol::terminal(1)), -1832 | ], -1833 | }, -1834 | Production { -1835 | dynamic_precedence: 0, -1836 | steps: vec![ProductionStep::new(Symbol::non_terminal(1))], -1837 | }, -1838 | ], -1839 | }, -1840 | // Hidden node with fields -1841 | SyntaxVariable { -1842 | name: "_rule1".to_string(), -1843 | kind: VariableType::Hidden, -1844 | productions: vec![Production { -1845 | dynamic_precedence: 0, -1846 | steps: vec![ -1847 | ProductionStep::new(Symbol::terminal(2)).with_alias(".", false), -1848 | ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), -1849 | ], -1850 | }], -1851 | }, -1852 | ], -1853 | vec![], -1854 | ), -1855 | &build_lexical_grammar(), -1856 | &AliasMap::new(), -1857 | ) -1858 | .unwrap(); - | -1859 | assert_eq!( -1860 | variable_info[0].fields, -1861 | vec![( -1862 | "field1".to_string(), -1863 | FieldInfo { -1864 | quantity: ChildQuantity { -1865 | exists: true, -1866 | required: true, -1867 | multiple: false, -1868 | }, -1869 | types: vec![ChildType::Normal(Symbol::terminal(3))], -1870 | } -1871 | )] -1872 | .into_iter() -1873 | .collect::>() -1874 | ); - | -1875 | assert_eq!( -1876 | variable_info[0].children_without_fields, -1877 | FieldInfo { -1878 | quantity: ChildQuantity { -1879 | exists: true, -1880 | required: false, -1881 | multiple: true, -1882 | }, -1883 | types: vec![ -1884 | ChildType::Normal(Symbol::terminal(0)), -1885 | ChildType::Normal(Symbol::terminal(1)), -1886 | ], -1887 | } -1888 | ); -1889 | } - | -1890 | #[test] -1891 | fn test_get_variable_info_with_supertypes() { -1892 | let variable_info = get_variable_info( -1893 | &build_syntax_grammar( -1894 | vec![ -1895 | SyntaxVariable { -1896 | name: "rule0".to_string(), -1897 | kind: VariableType::Named, -1898 | productions: vec![Production { -1899 | dynamic_precedence: 0, -1900 | steps: vec![ -1901 | ProductionStep::new(Symbol::terminal(0)), -1902 | ProductionStep::new(Symbol::non_terminal(1)) -1903 | .with_field_name("field1"), -1904 | ProductionStep::new(Symbol::terminal(1)), -1905 | ], -1906 | }], -1907 | }, -1908 | SyntaxVariable { -1909 | name: "_rule1".to_string(), -1910 | kind: VariableType::Hidden, -1911 | productions: vec![ -1912 | Production { -1913 | dynamic_precedence: 0, -1914 | steps: vec![ProductionStep::new(Symbol::terminal(2))], -1915 | }, -1916 | Production { -1917 | dynamic_precedence: 0, -1918 | steps: vec![ProductionStep::new(Symbol::terminal(3))], -1919 | }, -1920 | ], -1921 | }, -1922 | ], -1923 | // _rule1 is a supertype -1924 | vec![Symbol::non_terminal(1)], -1925 | ), -1926 | &build_lexical_grammar(), -1927 | &AliasMap::new(), -1928 | ) -1929 | .unwrap(); - | -1930 | assert_eq!( -1931 | variable_info[0].fields, -1932 | vec![( -1933 | "field1".to_string(), -1934 | FieldInfo { -1935 | quantity: ChildQuantity { -1936 | exists: true, -1937 | required: true, -1938 | multiple: false, -1939 | }, -1940 | types: vec![ChildType::Normal(Symbol::non_terminal(1))], -1941 | } -1942 | )] -1943 | .into_iter() -1944 | .collect::>() -1945 | ); -1946 | } - | -1947 | fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult> { -1948 | let (syntax_grammar, lexical_grammar, _, default_aliases) = -1949 | prepare_grammar(grammar).unwrap(); -1950 | let variable_info = -1951 | get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); -1952 | generate_node_types_json( -1953 | &syntax_grammar, -1954 | &lexical_grammar, -1955 | &default_aliases, -1956 | &variable_info, -1957 | ) -1958 | } - | -1959 | fn build_syntax_grammar( -1960 | variables: Vec, -1961 | supertype_symbols: Vec, -1962 | ) -> SyntaxGrammar { -1963 | SyntaxGrammar { -1964 | variables, -1965 | supertype_symbols, -1966 | ..SyntaxGrammar::default() -1967 | } -1968 | } - | -1969 | fn build_lexical_grammar() -> LexicalGrammar { -1970 | let mut lexical_grammar = LexicalGrammar::default(); -1971 | for i in 0..10 { -1972 | lexical_grammar.variables.push(LexicalVariable { -1973 | name: format!("token_{i}"), -1974 | kind: VariableType::Named, -1975 | implicit_precedence: 0, -1976 | start_state: 0, -1977 | }); -1978 | } -1979 | lexical_grammar -1980 | } -1981 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/parse_grammar.rs: --------------------------------------------------------------------------------- - 1 | use std::collections::HashSet; - | - 2 | use anyhow::Result; - 3 | use log::warn; - 4 | use regex::Regex; - 5 | use serde::{Deserialize, Serialize}; - 6 | use serde_json::{Map, Value}; - 7 | use thiserror::Error; - | - 8 | use crate::{ - 9 | grammars::{InputGrammar, PrecedenceEntry, ReservedWordContext, Variable, VariableType}, - 10 | rules::{Precedence, Rule}, - 11 | }; - | - 12 | #[derive(Deserialize)] - 13 | #[serde(tag = "type")] - 14 | #[allow(non_camel_case_types)] - 15 | #[allow(clippy::upper_case_acronyms)] - 16 | enum RuleJSON { - 17 | ALIAS { - 18 | content: Box, - 19 | named: bool, - 20 | value: String, - 21 | }, - 22 | BLANK, - 23 | STRING { - 24 | value: String, - 25 | }, - 26 | PATTERN { - 27 | value: String, - 28 | flags: Option, - 29 | }, - 30 | SYMBOL { - 31 | name: String, - 32 | }, - 33 | CHOICE { - 34 | members: Vec, - 35 | }, - 36 | FIELD { - 37 | name: String, - 38 | content: Box, - 39 | }, - 40 | SEQ { - 41 | members: Vec, - 42 | }, - 43 | REPEAT { - 44 | content: Box, - 45 | }, - 46 | REPEAT1 { - 47 | content: Box, - 48 | }, - 49 | PREC_DYNAMIC { - 50 | value: i32, - 51 | content: Box, - 52 | }, - 53 | PREC_LEFT { - 54 | value: PrecedenceValueJSON, - 55 | content: Box, - 56 | }, - 57 | PREC_RIGHT { - 58 | value: PrecedenceValueJSON, - 59 | content: Box, - 60 | }, - 61 | PREC { - 62 | value: PrecedenceValueJSON, - 63 | content: Box, - 64 | }, - 65 | TOKEN { - 66 | content: Box, - 67 | }, - 68 | IMMEDIATE_TOKEN { - 69 | content: Box, - 70 | }, - 71 | RESERVED { - 72 | context_name: String, - 73 | content: Box, - 74 | }, - 75 | } - | - 76 | #[derive(Deserialize)] - 77 | #[serde(untagged)] - 78 | enum PrecedenceValueJSON { - 79 | Integer(i32), - 80 | Name(String), - 81 | } - | - 82 | #[derive(Deserialize)] - 83 | pub struct GrammarJSON { - 84 | pub name: String, - 85 | rules: Map, - 86 | #[serde(default)] - 87 | precedences: Vec>, - 88 | #[serde(default)] - 89 | conflicts: Vec>, - 90 | #[serde(default)] - 91 | externals: Vec, - 92 | #[serde(default)] - 93 | extras: Vec, - 94 | #[serde(default)] - 95 | inline: Vec, - 96 | #[serde(default)] - 97 | supertypes: Vec, - 98 | #[serde(default)] - 99 | word: Option, - 100 | #[serde(default)] - 101 | reserved: Map, - 102 | } - | - 103 | pub type ParseGrammarResult = Result; - | - 104 | #[derive(Debug, Error, Serialize)] - 105 | pub enum ParseGrammarError { - 106 | #[error("{0}")] - 107 | Serialization(String), - 108 | #[error("Rules in the `extras` array must not contain empty strings")] - 109 | InvalidExtra, - 110 | #[error("Invalid rule in precedences array. Only strings and symbols are allowed")] - 111 | Unexpected, - 112 | #[error("Reserved word sets must be arrays")] - 113 | InvalidReservedWordSet, - 114 | #[error("Grammar Error: Unexpected rule `{0}` in `token()` call")] - 115 | UnexpectedRule(String), - 116 | } - | - 117 | impl From for ParseGrammarError { - 118 | fn from(value: serde_json::Error) -> Self { - 119 | Self::Serialization(value.to_string()) - 120 | } - 121 | } - | - 122 | /// Check if a rule is referenced by another rule. - 123 | /// - 124 | /// This function is used to determine if a variable is used in a given rule, - 125 | /// and `is_other` indicates if the rule is an external, and if it is, - 126 | /// to not assume that a named symbol that is equal to itself means it's being referenced. - 127 | /// - 128 | /// For example, if we have an external rule **and** a normal rule both called `foo`, - 129 | /// `foo` should not be thought of as directly used unless it's used within another rule. - 130 | fn rule_is_referenced(rule: &Rule, target: &str, is_external: bool) -> bool { - 131 | match rule { - 132 | Rule::NamedSymbol(name) => name == target && !is_external, - 133 | Rule::Choice(rules) | Rule::Seq(rules) => { - 134 | rules.iter().any(|r| rule_is_referenced(r, target, false)) - 135 | } - 136 | Rule::Metadata { rule, .. } | Rule::Reserved { rule, .. } => { - 137 | rule_is_referenced(rule, target, is_external) - 138 | } - 139 | Rule::Repeat(inner) => rule_is_referenced(inner, target, false), - 140 | Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false, - 141 | } - 142 | } - | - 143 | fn variable_is_used( - 144 | grammar_rules: &[(String, Rule)], - 145 | extras: &[Rule], - 146 | externals: &[Rule], - 147 | target_name: &str, - 148 | in_progress: &mut HashSet, - 149 | ) -> bool { - 150 | let root = &grammar_rules.first().unwrap().0; - 151 | if target_name == root { - 152 | return true; - 153 | } - | - 154 | if extras - 155 | .iter() - 156 | .any(|rule| rule_is_referenced(rule, target_name, false)) - 157 | { - 158 | return true; - 159 | } - | - 160 | if externals - 161 | .iter() - 162 | .any(|rule| rule_is_referenced(rule, target_name, true)) - 163 | { - 164 | return true; - 165 | } - | - 166 | in_progress.insert(target_name.to_string()); - 167 | let result = grammar_rules - 168 | .iter() - 169 | .filter(|(key, _)| *key != target_name) - 170 | .any(|(name, rule)| { - 171 | if !rule_is_referenced(rule, target_name, false) || in_progress.contains(name) { - 172 | return false; - 173 | } - 174 | variable_is_used(grammar_rules, extras, externals, name, in_progress) - 175 | }); - 176 | in_progress.remove(target_name); - | - 177 | result - 178 | } - | - 179 | pub(crate) fn parse_grammar(input: &str) -> ParseGrammarResult { - 180 | let mut grammar_json = serde_json::from_str::(input)?; - | - 181 | let mut extra_symbols = - 182 | grammar_json - 183 | .extras - 184 | .into_iter() - 185 | .try_fold(Vec::::new(), |mut acc, item| { - 186 | let rule = parse_rule(item, false)?; - 187 | if let Rule::String(ref value) = rule { - 188 | if value.is_empty() { - 189 | Err(ParseGrammarError::InvalidExtra)?; - 190 | } - 191 | } - 192 | acc.push(rule); - 193 | ParseGrammarResult::Ok(acc) - 194 | })?; - | - 195 | let mut external_tokens = grammar_json - 196 | .externals - 197 | .into_iter() - 198 | .map(|e| parse_rule(e, false)) - 199 | .collect::>>()?; - | - 200 | let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len()); - 201 | for list in grammar_json.precedences { - 202 | let mut ordering = Vec::with_capacity(list.len()); - 203 | for entry in list { - 204 | ordering.push(match entry { - 205 | RuleJSON::STRING { value } => PrecedenceEntry::Name(value), - 206 | RuleJSON::SYMBOL { name } => PrecedenceEntry::Symbol(name), - 207 | _ => Err(ParseGrammarError::Unexpected)?, - 208 | }); - 209 | } - 210 | precedence_orderings.push(ordering); - 211 | } - | - 212 | let mut variables = Vec::with_capacity(grammar_json.rules.len()); - | - 213 | let rules = grammar_json - 214 | .rules - 215 | .into_iter() - 216 | .map(|(n, r)| Ok((n, parse_rule(serde_json::from_value(r)?, false)?))) - 217 | .collect::>>()?; - | - 218 | let mut in_progress = HashSet::new(); - | - 219 | for (name, rule) in &rules { - 220 | if grammar_json.word.as_ref().is_none_or(|w| w != name) - 221 | && !variable_is_used( - 222 | &rules, - 223 | &extra_symbols, - 224 | &external_tokens, - 225 | name, - 226 | &mut in_progress, - 227 | ) - 228 | { - 229 | grammar_json.conflicts.retain(|r| !r.contains(name)); - 230 | grammar_json.supertypes.retain(|r| r != name); - 231 | grammar_json.inline.retain(|r| r != name); - 232 | extra_symbols.retain(|r| !rule_is_referenced(r, name, true)); - 233 | external_tokens.retain(|r| !rule_is_referenced(r, name, true)); - 234 | precedence_orderings.retain(|r| { - 235 | !r.iter().any(|e| { - 236 | let PrecedenceEntry::Symbol(s) = e else { - 237 | return false; - 238 | }; - 239 | s == name - 240 | }) - 241 | }); - 242 | continue; - 243 | } - | - 244 | if extra_symbols - 245 | .iter() - 246 | .any(|r| rule_is_referenced(r, name, false)) - 247 | { - 248 | let inner_rule = if let Rule::Metadata { rule, .. } = rule { - 249 | rule - 250 | } else { - 251 | rule - 252 | }; - 253 | let matches_empty = match inner_rule { - 254 | Rule::String(rule_str) => rule_str.is_empty(), - 255 | Rule::Pattern(ref value, _) => Regex::new(value) - 256 | .map(|reg| reg.is_match("")) - 257 | .unwrap_or(false), - 258 | _ => false, - 259 | }; - 260 | if matches_empty { - 261 | warn!( - 262 | concat!( - 263 | "Named extra rule `{}` matches the empty string. ", - 264 | "Inline this to avoid infinite loops while parsing." - 265 | ), - 266 | name - 267 | ); - 268 | } - 269 | } - 270 | variables.push(Variable { - 271 | name: name.clone(), - 272 | kind: VariableType::Named, - 273 | rule: rule.clone(), - 274 | }); - 275 | } - | - 276 | let reserved_words = grammar_json - 277 | .reserved - 278 | .into_iter() - 279 | .map(|(name, rule_values)| { - 280 | let Value::Array(rule_values) = rule_values else { - 281 | Err(ParseGrammarError::InvalidReservedWordSet)? - 282 | }; - | - 283 | let mut reserved_words = Vec::with_capacity(rule_values.len()); - 284 | for value in rule_values { - 285 | reserved_words.push(parse_rule(serde_json::from_value(value)?, false)?); - 286 | } - 287 | Ok(ReservedWordContext { - 288 | name, - 289 | reserved_words, - 290 | }) - 291 | }) - 292 | .collect::>>()?; - | - 293 | Ok(InputGrammar { - 294 | name: grammar_json.name, - 295 | word_token: grammar_json.word, - 296 | expected_conflicts: grammar_json.conflicts, - 297 | supertype_symbols: grammar_json.supertypes, - 298 | variables_to_inline: grammar_json.inline, - 299 | precedence_orderings, - 300 | variables, - 301 | extra_symbols, - 302 | external_tokens, - 303 | reserved_words, - 304 | }) - 305 | } - | - 306 | fn parse_rule(json: RuleJSON, is_token: bool) -> ParseGrammarResult { - 307 | match json { - 308 | RuleJSON::ALIAS { - 309 | content, - 310 | value, - 311 | named, - 312 | } => parse_rule(*content, is_token).map(|r| Rule::alias(r, value, named)), - 313 | RuleJSON::BLANK => Ok(Rule::Blank), - 314 | RuleJSON::STRING { value } => Ok(Rule::String(value)), - 315 | RuleJSON::PATTERN { value, flags } => Ok(Rule::Pattern( - 316 | value, - 317 | flags.map_or(String::new(), |f| { - 318 | f.matches(|c| { - 319 | if c == 'i' { - 320 | true - 321 | } else { - 322 | // silently ignore unicode flags - 323 | if c != 'u' && c != 'v' { - 324 | warn!("unsupported flag {c}"); - 325 | } - 326 | false - 327 | } - 328 | }) - 329 | .collect() - 330 | }), - 331 | )), - 332 | RuleJSON::SYMBOL { name } => { - 333 | if is_token { - 334 | Err(ParseGrammarError::UnexpectedRule(name))? - 335 | } else { - 336 | Ok(Rule::NamedSymbol(name)) - 337 | } - 338 | } - 339 | RuleJSON::CHOICE { members } => members - 340 | .into_iter() - 341 | .map(|m| parse_rule(m, is_token)) - 342 | .collect::>>() - 343 | .map(Rule::choice), - 344 | RuleJSON::FIELD { content, name } => { - 345 | parse_rule(*content, is_token).map(|r| Rule::field(name, r)) - 346 | } - 347 | RuleJSON::SEQ { members } => members - 348 | .into_iter() - 349 | .map(|m| parse_rule(m, is_token)) - 350 | .collect::>>() - 351 | .map(Rule::seq), - 352 | RuleJSON::REPEAT1 { content } => parse_rule(*content, is_token).map(Rule::repeat), - 353 | RuleJSON::REPEAT { content } => { - 354 | parse_rule(*content, is_token).map(|m| Rule::choice(vec![Rule::repeat(m), Rule::Blank])) - 355 | } - 356 | RuleJSON::PREC { value, content } => { - 357 | parse_rule(*content, is_token).map(|r| Rule::prec(value.into(), r)) - 358 | } - 359 | RuleJSON::PREC_LEFT { value, content } => { - 360 | parse_rule(*content, is_token).map(|r| Rule::prec_left(value.into(), r)) - 361 | } - 362 | RuleJSON::PREC_RIGHT { value, content } => { - 363 | parse_rule(*content, is_token).map(|r| Rule::prec_right(value.into(), r)) - 364 | } - 365 | RuleJSON::PREC_DYNAMIC { value, content } => { - 366 | parse_rule(*content, is_token).map(|r| Rule::prec_dynamic(value, r)) - 367 | } - 368 | RuleJSON::RESERVED { - 369 | content, - 370 | context_name, - 371 | } => parse_rule(*content, is_token).map(|r| Rule::Reserved { - 372 | rule: Box::new(r), - 373 | context_name, - 374 | }), - 375 | RuleJSON::TOKEN { content } => parse_rule(*content, true).map(Rule::token), - 376 | RuleJSON::IMMEDIATE_TOKEN { content } => { - 377 | parse_rule(*content, is_token).map(Rule::immediate_token) - 378 | } - 379 | } - 380 | } - | - 381 | impl From for Precedence { - 382 | fn from(val: PrecedenceValueJSON) -> Self { - 383 | match val { - 384 | PrecedenceValueJSON::Integer(i) => Self::Integer(i), - 385 | PrecedenceValueJSON::Name(i) => Self::Name(i), - 386 | } - 387 | } - 388 | } - | - 389 | #[cfg(test)] - 390 | mod tests { - 391 | use super::*; - | - 392 | #[test] - 393 | fn test_parse_grammar() { - 394 | let grammar = parse_grammar( - 395 | r#"{ - 396 | "name": "my_lang", - 397 | "rules": { - 398 | "file": { - 399 | "type": "REPEAT1", - 400 | "content": { - 401 | "type": "SYMBOL", - 402 | "name": "statement" - 403 | } - 404 | }, - 405 | "statement": { - 406 | "type": "STRING", - 407 | "value": "foo" - 408 | } - 409 | } - 410 | }"#, - 411 | ) - 412 | .unwrap(); - | - 413 | assert_eq!(grammar.name, "my_lang"); - 414 | assert_eq!( - 415 | grammar.variables, - 416 | vec![ - 417 | Variable { - 418 | name: "file".to_string(), - 419 | kind: VariableType::Named, - 420 | rule: Rule::repeat(Rule::NamedSymbol("statement".to_string())) - 421 | }, - 422 | Variable { - 423 | name: "statement".to_string(), - 424 | kind: VariableType::Named, - 425 | rule: Rule::String("foo".to_string()) - 426 | }, - 427 | ] - 428 | ); - 429 | } - 430 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/parser.h.inc: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_PARSER_H_ - 2 | #define TREE_SITTER_PARSER_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include - 8 | #include - | - 9 | #define ts_builtin_sym_error ((TSSymbol)-1) - 10 | #define ts_builtin_sym_end 0 - 11 | #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - | - 12 | #ifndef TREE_SITTER_API_H_ - 13 | typedef uint16_t TSStateId; - 14 | typedef uint16_t TSSymbol; - 15 | typedef uint16_t TSFieldId; - 16 | typedef struct TSLanguage TSLanguage; - 17 | typedef struct TSLanguageMetadata { - 18 | uint8_t major_version; - 19 | uint8_t minor_version; - 20 | uint8_t patch_version; - 21 | } TSLanguageMetadata; - 22 | #endif - | - 23 | typedef struct { - 24 | TSFieldId field_id; - 25 | uint8_t child_index; - 26 | bool inherited; - 27 | } TSFieldMapEntry; - | - 28 | // Used to index the field and supertype maps. - 29 | typedef struct { - 30 | uint16_t index; - 31 | uint16_t length; - 32 | } TSMapSlice; - | - 33 | typedef struct { - 34 | bool visible; - 35 | bool named; - 36 | bool supertype; - 37 | } TSSymbolMetadata; - | - 38 | typedef struct TSLexer TSLexer; - | - 39 | struct TSLexer { - 40 | int32_t lookahead; - 41 | TSSymbol result_symbol; - 42 | void (*advance)(TSLexer *, bool); - 43 | void (*mark_end)(TSLexer *); - 44 | uint32_t (*get_column)(TSLexer *); - 45 | bool (*is_at_included_range_start)(const TSLexer *); - 46 | bool (*eof)(const TSLexer *); - 47 | void (*log)(const TSLexer *, const char *, ...); - 48 | }; - | - 49 | typedef enum { - 50 | TSParseActionTypeShift, - 51 | TSParseActionTypeReduce, - 52 | TSParseActionTypeAccept, - 53 | TSParseActionTypeRecover, - 54 | } TSParseActionType; - | - 55 | typedef union { - 56 | struct { - 57 | uint8_t type; - 58 | TSStateId state; - 59 | bool extra; - 60 | bool repetition; - 61 | } shift; - 62 | struct { - 63 | uint8_t type; - 64 | uint8_t child_count; - 65 | TSSymbol symbol; - 66 | int16_t dynamic_precedence; - 67 | uint16_t production_id; - 68 | } reduce; - 69 | uint8_t type; - 70 | } TSParseAction; - | - 71 | typedef struct { - 72 | uint16_t lex_state; - 73 | uint16_t external_lex_state; - 74 | } TSLexMode; - | - 75 | typedef struct { - 76 | uint16_t lex_state; - 77 | uint16_t external_lex_state; - 78 | uint16_t reserved_word_set_id; - 79 | } TSLexerMode; - | - 80 | typedef union { - 81 | TSParseAction action; - 82 | struct { - 83 | uint8_t count; - 84 | bool reusable; - 85 | } entry; - 86 | } TSParseActionEntry; - | - 87 | typedef struct { - 88 | int32_t start; - 89 | int32_t end; - 90 | } TSCharacterRange; - | - 91 | struct TSLanguage { - 92 | uint32_t abi_version; - 93 | uint32_t symbol_count; - 94 | uint32_t alias_count; - 95 | uint32_t token_count; - 96 | uint32_t external_token_count; - 97 | uint32_t state_count; - 98 | uint32_t large_state_count; - 99 | uint32_t production_id_count; - 100 | uint32_t field_count; - 101 | uint16_t max_alias_sequence_length; - 102 | const uint16_t *parse_table; - 103 | const uint16_t *small_parse_table; - 104 | const uint32_t *small_parse_table_map; - 105 | const TSParseActionEntry *parse_actions; - 106 | const char * const *symbol_names; - 107 | const char * const *field_names; - 108 | const TSMapSlice *field_map_slices; - 109 | const TSFieldMapEntry *field_map_entries; - 110 | const TSSymbolMetadata *symbol_metadata; - 111 | const TSSymbol *public_symbol_map; - 112 | const uint16_t *alias_map; - 113 | const TSSymbol *alias_sequences; - 114 | const TSLexerMode *lex_modes; - 115 | bool (*lex_fn)(TSLexer *, TSStateId); - 116 | bool (*keyword_lex_fn)(TSLexer *, TSStateId); - 117 | TSSymbol keyword_capture_token; - 118 | struct { - 119 | const bool *states; - 120 | const TSSymbol *symbol_map; - 121 | void *(*create)(void); - 122 | void (*destroy)(void *); - 123 | bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); - 124 | unsigned (*serialize)(void *, char *); - 125 | void (*deserialize)(void *, const char *, unsigned); - 126 | } external_scanner; - 127 | const TSStateId *primary_state_ids; - 128 | const char *name; - 129 | const TSSymbol *reserved_words; - 130 | uint16_t max_reserved_word_set_size; - 131 | uint32_t supertype_count; - 132 | const TSSymbol *supertype_symbols; - 133 | const TSMapSlice *supertype_map_slices; - 134 | const TSSymbol *supertype_map_entries; - 135 | TSLanguageMetadata metadata; - 136 | }; - | - 137 | static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { - 138 | uint32_t index = 0; - 139 | uint32_t size = len - index; - 140 | while (size > 1) { - 141 | uint32_t half_size = size / 2; - 142 | uint32_t mid_index = index + half_size; - 143 | const TSCharacterRange *range = &ranges[mid_index]; - 144 | if (lookahead >= range->start && lookahead <= range->end) { - 145 | return true; - 146 | } else if (lookahead > range->end) { - 147 | index = mid_index; - 148 | } - 149 | size -= half_size; - 150 | } - 151 | const TSCharacterRange *range = &ranges[index]; - 152 | return (lookahead >= range->start && lookahead <= range->end); - 153 | } - | - 154 | /* - 155 | * Lexer Macros - 156 | */ - | - 157 | #ifdef _MSC_VER - 158 | #define UNUSED __pragma(warning(suppress : 4101)) - 159 | #else - 160 | #define UNUSED __attribute__((unused)) - 161 | #endif - | - 162 | #define START_LEXER() \ - 163 | bool result = false; \ - 164 | bool skip = false; \ - 165 | UNUSED \ - 166 | bool eof = false; \ - 167 | int32_t lookahead; \ - 168 | goto start; \ - 169 | next_state: \ - 170 | lexer->advance(lexer, skip); \ - 171 | start: \ - 172 | skip = false; \ - 173 | lookahead = lexer->lookahead; - | - 174 | #define ADVANCE(state_value) \ - 175 | { \ - 176 | state = state_value; \ - 177 | goto next_state; \ - 178 | } - | - 179 | #define ADVANCE_MAP(...) \ - 180 | { \ - 181 | static const uint16_t map[] = { __VA_ARGS__ }; \ - 182 | for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ - 183 | if (map[i] == lookahead) { \ - 184 | state = map[i + 1]; \ - 185 | goto next_state; \ - 186 | } \ - 187 | } \ - 188 | } - | - 189 | #define SKIP(state_value) \ - 190 | { \ - 191 | skip = true; \ - 192 | state = state_value; \ - 193 | goto next_state; \ - 194 | } - | - 195 | #define ACCEPT_TOKEN(symbol_value) \ - 196 | result = true; \ - 197 | lexer->result_symbol = symbol_value; \ - 198 | lexer->mark_end(lexer); - | - 199 | #define END_STATE() return result; - | - 200 | /* - 201 | * Parse Table Macros - 202 | */ - | - 203 | #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) - | - 204 | #define STATE(id) id - | - 205 | #define ACTIONS(id) id - | - 206 | #define SHIFT(state_value) \ - 207 | {{ \ - 208 | .shift = { \ - 209 | .type = TSParseActionTypeShift, \ - 210 | .state = (state_value) \ - 211 | } \ - 212 | }} - | - 213 | #define SHIFT_REPEAT(state_value) \ - 214 | {{ \ - 215 | .shift = { \ - 216 | .type = TSParseActionTypeShift, \ - 217 | .state = (state_value), \ - 218 | .repetition = true \ - 219 | } \ - 220 | }} - | - 221 | #define SHIFT_EXTRA() \ - 222 | {{ \ - 223 | .shift = { \ - 224 | .type = TSParseActionTypeShift, \ - 225 | .extra = true \ - 226 | } \ - 227 | }} - | - 228 | #define REDUCE(symbol_name, children, precedence, prod_id) \ - 229 | {{ \ - 230 | .reduce = { \ - 231 | .type = TSParseActionTypeReduce, \ - 232 | .symbol = symbol_name, \ - 233 | .child_count = children, \ - 234 | .dynamic_precedence = precedence, \ - 235 | .production_id = prod_id \ - 236 | }, \ - 237 | }} - | - 238 | #define RECOVER() \ - 239 | {{ \ - 240 | .type = TSParseActionTypeRecover \ - 241 | }} - | - 242 | #define ACCEPT_INPUT() \ - 243 | {{ \ - 244 | .type = TSParseActionTypeAccept \ - 245 | }} - | - 246 | #ifdef __cplusplus - 247 | } - 248 | #endif - | - 249 | #endif // TREE_SITTER_PARSER_H_ - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar.rs: --------------------------------------------------------------------------------- - 1 | mod expand_repeats; - 2 | mod expand_tokens; - 3 | mod extract_default_aliases; - 4 | mod extract_tokens; - 5 | mod flatten_grammar; - 6 | mod intern_symbols; - 7 | mod process_inlines; - | - 8 | use std::{ - 9 | cmp::Ordering, - 10 | collections::{hash_map, BTreeSet, HashMap, HashSet}, - 11 | mem, - 12 | }; - | - 13 | use anyhow::Result; - 14 | pub use expand_tokens::ExpandTokensError; - 15 | pub use extract_tokens::ExtractTokensError; - 16 | pub use flatten_grammar::FlattenGrammarError; - 17 | use indexmap::IndexMap; - 18 | pub use intern_symbols::InternSymbolsError; - 19 | pub use process_inlines::ProcessInlinesError; - 20 | use serde::Serialize; - 21 | use thiserror::Error; - | - 22 | pub use self::expand_tokens::expand_tokens; - 23 | use self::{ - 24 | expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases, - 25 | extract_tokens::extract_tokens, flatten_grammar::flatten_grammar, - 26 | intern_symbols::intern_symbols, process_inlines::process_inlines, - 27 | }; - 28 | use super::{ - 29 | grammars::{ - 30 | ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry, - 31 | SyntaxGrammar, Variable, - 32 | }, - 33 | rules::{AliasMap, Precedence, Rule, Symbol}, - 34 | }; - 35 | use crate::grammars::ReservedWordContext; - | - 36 | pub struct IntermediateGrammar { - 37 | variables: Vec, - 38 | extra_symbols: Vec, - 39 | expected_conflicts: Vec>, - 40 | precedence_orderings: Vec>, - 41 | external_tokens: Vec, - 42 | variables_to_inline: Vec, - 43 | supertype_symbols: Vec, - 44 | word_token: Option, - 45 | reserved_word_sets: Vec>, - 46 | } - | - 47 | pub type InternedGrammar = IntermediateGrammar; - | - 48 | pub type ExtractedSyntaxGrammar = IntermediateGrammar; - | - 49 | #[derive(Debug, PartialEq, Eq)] - 50 | pub struct ExtractedLexicalGrammar { - 51 | pub variables: Vec, - 52 | pub separators: Vec, - 53 | } - | - 54 | impl Default for IntermediateGrammar { - 55 | fn default() -> Self { - 56 | Self { - 57 | variables: Vec::default(), - 58 | extra_symbols: Vec::default(), - 59 | expected_conflicts: Vec::default(), - 60 | precedence_orderings: Vec::default(), - 61 | external_tokens: Vec::default(), - 62 | variables_to_inline: Vec::default(), - 63 | supertype_symbols: Vec::default(), - 64 | word_token: Option::default(), - 65 | reserved_word_sets: Vec::default(), - 66 | } - 67 | } - 68 | } - | - 69 | pub type PrepareGrammarResult = Result; - | - 70 | #[derive(Debug, Error, Serialize)] - 71 | #[error(transparent)] - 72 | pub enum PrepareGrammarError { - 73 | ValidatePrecedences(#[from] ValidatePrecedenceError), - 74 | ValidateIndirectRecursion(#[from] IndirectRecursionError), - 75 | InternSymbols(#[from] InternSymbolsError), - 76 | ExtractTokens(#[from] ExtractTokensError), - 77 | FlattenGrammar(#[from] FlattenGrammarError), - 78 | ExpandTokens(#[from] ExpandTokensError), - 79 | ProcessInlines(#[from] ProcessInlinesError), - 80 | } - | - 81 | pub type ValidatePrecedenceResult = Result; - | - 82 | #[derive(Debug, Error, Serialize)] - 83 | #[error(transparent)] - 84 | pub enum ValidatePrecedenceError { - 85 | Undeclared(#[from] UndeclaredPrecedenceError), - 86 | Ordering(#[from] ConflictingPrecedenceOrderingError), - 87 | } - | - 88 | #[derive(Debug, Error, Serialize)] - 89 | pub struct IndirectRecursionError(pub Vec); - | - 90 | impl std::fmt::Display for IndirectRecursionError { - 91 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 92 | write!(f, "Grammar contains an indirectly recursive rule: ")?; - 93 | for (i, symbol) in self.0.iter().enumerate() { - 94 | if i > 0 { - 95 | write!(f, " -> ")?; - 96 | } - 97 | write!(f, "{symbol}")?; - 98 | } - 99 | Ok(()) - 100 | } - 101 | } - | - 102 | #[derive(Debug, Error, Serialize)] - 103 | pub struct UndeclaredPrecedenceError { - 104 | pub precedence: String, - 105 | pub rule: String, - 106 | } - | - 107 | impl std::fmt::Display for UndeclaredPrecedenceError { - 108 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 109 | write!( - 110 | f, - 111 | "Undeclared precedence '{}' in rule '{}'", - 112 | self.precedence, self.rule - 113 | )?; - 114 | Ok(()) - 115 | } - 116 | } - | - 117 | #[derive(Debug, Error, Serialize)] - 118 | pub struct ConflictingPrecedenceOrderingError { - 119 | pub precedence_1: String, - 120 | pub precedence_2: String, - 121 | } - | - 122 | impl std::fmt::Display for ConflictingPrecedenceOrderingError { - 123 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 124 | write!( - 125 | f, - 126 | "Conflicting orderings for precedences {} and {}", - 127 | self.precedence_1, self.precedence_2 - 128 | )?; - 129 | Ok(()) - 130 | } - 131 | } - | - 132 | /// Transform an input grammar into separate components that are ready - 133 | /// for parse table construction. - 134 | pub fn prepare_grammar( - 135 | input_grammar: &InputGrammar, - 136 | ) -> PrepareGrammarResult<( - 137 | SyntaxGrammar, - 138 | LexicalGrammar, - 139 | InlinedProductionMap, - 140 | AliasMap, - 141 | )> { - 142 | validate_precedences(input_grammar)?; - 143 | validate_indirect_recursion(input_grammar)?; - | - 144 | let interned_grammar = intern_symbols(input_grammar)?; - 145 | let (syntax_grammar, lexical_grammar) = extract_tokens(interned_grammar)?; - 146 | let syntax_grammar = expand_repeats(syntax_grammar); - 147 | let mut syntax_grammar = flatten_grammar(syntax_grammar)?; - 148 | let lexical_grammar = expand_tokens(lexical_grammar)?; - 149 | let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); - 150 | let inlines = process_inlines(&syntax_grammar, &lexical_grammar)?; - 151 | Ok((syntax_grammar, lexical_grammar, inlines, default_aliases)) - 152 | } - | - 153 | /// Check for indirect recursion cycles in the grammar that can cause infinite loops while - 154 | /// parsing. An indirect recursion cycle occurs when a non-terminal can derive itself through - 155 | /// a chain of single-symbol productions (e.g., A -> B, B -> A). - 156 | fn validate_indirect_recursion(grammar: &InputGrammar) -> Result<(), IndirectRecursionError> { - 157 | let mut epsilon_transitions: IndexMap<&str, BTreeSet> = IndexMap::new(); - | - 158 | for variable in &grammar.variables { - 159 | let productions = get_single_symbol_productions(&variable.rule); - 160 | // Filter out rules that *directly* reference themselves, as this doesn't - 161 | // cause a parsing loop. - 162 | let filtered: BTreeSet = productions - 163 | .into_iter() - 164 | .filter(|s| s != &variable.name) - 165 | .collect(); - 166 | epsilon_transitions.insert(variable.name.as_str(), filtered); - 167 | } - | - 168 | for start_symbol in epsilon_transitions.keys() { - 169 | let mut visited = BTreeSet::new(); - 170 | let mut path = Vec::new(); - 171 | if let Some((start_idx, end_idx)) = - 172 | get_cycle(start_symbol, &epsilon_transitions, &mut visited, &mut path) - 173 | { - 174 | let cycle_symbols = path[start_idx..=end_idx] - 175 | .iter() - 176 | .map(|s| (*s).to_string()) - 177 | .collect(); - 178 | return Err(IndirectRecursionError(cycle_symbols)); - 179 | } - 180 | } - | - 181 | Ok(()) - 182 | } - | - 183 | fn get_single_symbol_productions(rule: &Rule) -> BTreeSet { - 184 | match rule { - 185 | Rule::NamedSymbol(name) => BTreeSet::from([name.clone()]), - 186 | Rule::Choice(choices) => choices - 187 | .iter() - 188 | .flat_map(get_single_symbol_productions) - 189 | .collect(), - 190 | Rule::Metadata { rule, .. } => get_single_symbol_productions(rule), - 191 | _ => BTreeSet::new(), - 192 | } - 193 | } - | - 194 | /// Perform a depth-first search to detect cycles in single state transitions. - 195 | fn get_cycle<'a>( - 196 | current: &'a str, - 197 | transitions: &'a IndexMap<&'a str, BTreeSet>, - 198 | visited: &mut BTreeSet<&'a str>, - 199 | path: &mut Vec<&'a str>, - 200 | ) -> Option<(usize, usize)> { - 201 | if let Some(first_idx) = path.iter().position(|s| *s == current) { - 202 | path.push(current); - 203 | return Some((first_idx, path.len() - 1)); - 204 | } - | - 205 | if visited.contains(current) { - 206 | return None; - 207 | } - | - 208 | path.push(current); - 209 | visited.insert(current); - | - 210 | if let Some(next_symbols) = transitions.get(current) { - 211 | for next in next_symbols { - 212 | if let Some(cycle) = get_cycle(next, transitions, visited, path) { - 213 | return Some(cycle); - 214 | } - 215 | } - 216 | } - | - 217 | path.pop(); - 218 | None - 219 | } - | - 220 | /// Check that all of the named precedences used in the grammar are declared - 221 | /// within the `precedences` lists, and also that there are no conflicting - 222 | /// precedence orderings declared in those lists. - 223 | fn validate_precedences(grammar: &InputGrammar) -> ValidatePrecedenceResult<()> { - 224 | // Check that no rule contains a named precedence that is not present in - 225 | // any of the `precedences` lists. - 226 | fn validate( - 227 | rule_name: &str, - 228 | rule: &Rule, - 229 | names: &HashSet<&String>, - 230 | ) -> ValidatePrecedenceResult<()> { - 231 | match rule { - 232 | Rule::Repeat(rule) => validate(rule_name, rule, names), - 233 | Rule::Seq(elements) | Rule::Choice(elements) => elements - 234 | .iter() - 235 | .try_for_each(|e| validate(rule_name, e, names)), - 236 | Rule::Metadata { rule, params } => { - 237 | if let Precedence::Name(n) = ¶ms.precedence { - 238 | if !names.contains(n) { - 239 | Err(UndeclaredPrecedenceError { - 240 | precedence: n.clone(), - 241 | rule: rule_name.to_string(), - 242 | })?; - 243 | } - 244 | } - 245 | validate(rule_name, rule, names)?; - 246 | Ok(()) - 247 | } - 248 | _ => Ok(()), - 249 | } - 250 | } - | - 251 | // For any two precedence names `a` and `b`, if `a` comes before `b` - 252 | // in some list, then it cannot come *after* `b` in any list. - 253 | let mut pairs = HashMap::new(); - 254 | for list in &grammar.precedence_orderings { - 255 | for (i, mut entry1) in list.iter().enumerate() { - 256 | for mut entry2 in list.iter().skip(i + 1) { - 257 | if entry2 == entry1 { - 258 | continue; - 259 | } - 260 | let mut ordering = Ordering::Greater; - 261 | if entry1 > entry2 { - 262 | ordering = Ordering::Less; - 263 | mem::swap(&mut entry1, &mut entry2); - 264 | } - 265 | match pairs.entry((entry1, entry2)) { - 266 | hash_map::Entry::Vacant(e) => { - 267 | e.insert(ordering); - 268 | } - 269 | hash_map::Entry::Occupied(e) => { - 270 | if e.get() != &ordering { - 271 | Err(ConflictingPrecedenceOrderingError { - 272 | precedence_1: entry1.to_string(), - 273 | precedence_2: entry2.to_string(), - 274 | })?; - 275 | } - 276 | } - 277 | } - 278 | } - 279 | } - 280 | } - | - 281 | let precedence_names = grammar - 282 | .precedence_orderings - 283 | .iter() - 284 | .flat_map(|l| l.iter()) - 285 | .filter_map(|p| { - 286 | if let PrecedenceEntry::Name(n) = p { - 287 | Some(n) - 288 | } else { - 289 | None - 290 | } - 291 | }) - 292 | .collect::>(); - 293 | for variable in &grammar.variables { - 294 | validate(&variable.name, &variable.rule, &precedence_names)?; - 295 | } - | - 296 | Ok(()) - 297 | } - | - 298 | #[cfg(test)] - 299 | mod tests { - 300 | use super::*; - 301 | use crate::grammars::VariableType; - | - 302 | #[test] - 303 | fn test_validate_precedences_with_undeclared_precedence() { - 304 | let grammar = InputGrammar { - 305 | precedence_orderings: vec![ - 306 | vec![ - 307 | PrecedenceEntry::Name("a".to_string()), - 308 | PrecedenceEntry::Name("b".to_string()), - 309 | ], - 310 | vec![ - 311 | PrecedenceEntry::Name("b".to_string()), - 312 | PrecedenceEntry::Name("c".to_string()), - 313 | PrecedenceEntry::Name("d".to_string()), - 314 | ], - 315 | ], - 316 | variables: vec![ - 317 | Variable { - 318 | name: "v1".to_string(), - 319 | kind: VariableType::Named, - 320 | rule: Rule::Seq(vec![ - 321 | Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")), - 322 | Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")), - 323 | ]), - 324 | }, - 325 | Variable { - 326 | name: "v2".to_string(), - 327 | kind: VariableType::Named, - 328 | rule: Rule::repeat(Rule::Choice(vec![ - 329 | Rule::prec_left(Precedence::Name("omg".to_string()), Rule::string("y")), - 330 | Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")), - 331 | ])), - 332 | }, - 333 | ], - 334 | ..Default::default() - 335 | }; - | - 336 | let result = validate_precedences(&grammar); - 337 | assert_eq!( - 338 | result.unwrap_err().to_string(), - 339 | "Undeclared precedence 'omg' in rule 'v2'", - 340 | ); - 341 | } - | - 342 | #[test] - 343 | fn test_validate_precedences_with_conflicting_order() { - 344 | let grammar = InputGrammar { - 345 | precedence_orderings: vec![ - 346 | vec![ - 347 | PrecedenceEntry::Name("a".to_string()), - 348 | PrecedenceEntry::Name("b".to_string()), - 349 | ], - 350 | vec![ - 351 | PrecedenceEntry::Name("b".to_string()), - 352 | PrecedenceEntry::Name("c".to_string()), - 353 | PrecedenceEntry::Name("a".to_string()), - 354 | ], - 355 | ], - 356 | variables: vec![ - 357 | Variable { - 358 | name: "v1".to_string(), - 359 | kind: VariableType::Named, - 360 | rule: Rule::Seq(vec![ - 361 | Rule::prec_left(Precedence::Name("b".to_string()), Rule::string("w")), - 362 | Rule::prec(Precedence::Name("c".to_string()), Rule::string("x")), - 363 | ]), - 364 | }, - 365 | Variable { - 366 | name: "v2".to_string(), - 367 | kind: VariableType::Named, - 368 | rule: Rule::repeat(Rule::Choice(vec![ - 369 | Rule::prec_left(Precedence::Name("a".to_string()), Rule::string("y")), - 370 | Rule::prec(Precedence::Name("c".to_string()), Rule::string("z")), - 371 | ])), - 372 | }, - 373 | ], - 374 | ..Default::default() - 375 | }; - | - 376 | let result = validate_precedences(&grammar); - 377 | assert_eq!( - 378 | result.unwrap_err().to_string(), - 379 | "Conflicting orderings for precedences 'a' and 'b'", - 380 | ); - 381 | } - 382 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/expand_repeats.rs: --------------------------------------------------------------------------------- - 1 | use std::{collections::HashMap, mem}; - | - 2 | use super::ExtractedSyntaxGrammar; - 3 | use crate::{ - 4 | grammars::{Variable, VariableType}, - 5 | rules::{Rule, Symbol}, - 6 | }; - | - 7 | struct Expander { - 8 | variable_name: String, - 9 | repeat_count_in_variable: usize, - 10 | preceding_symbol_count: usize, - 11 | auxiliary_variables: Vec, - 12 | existing_repeats: HashMap, - 13 | } - | - 14 | impl Expander { - 15 | fn expand_variable(&mut self, index: usize, variable: &mut Variable) -> bool { - 16 | self.variable_name.clear(); - 17 | self.variable_name.push_str(&variable.name); - 18 | self.repeat_count_in_variable = 0; - 19 | let mut rule = Rule::Blank; - 20 | mem::swap(&mut rule, &mut variable.rule); - | - 21 | // In the special case of a hidden variable with a repetition at its top level, - 22 | // convert that rule itself into a binary tree structure instead of introducing - 23 | // another auxiliary rule. - 24 | if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) { - 25 | let inner_rule = self.expand_rule(repeated_content); - 26 | variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule); - 27 | variable.kind = VariableType::Auxiliary; - 28 | return true; - 29 | } - | - 30 | variable.rule = self.expand_rule(&rule); - 31 | false - 32 | } - | - 33 | fn expand_rule(&mut self, rule: &Rule) -> Rule { - 34 | match rule { - 35 | // For choices, sequences, and metadata, descend into the child rules, - 36 | // replacing any nested repetitions. - 37 | Rule::Choice(elements) => Rule::Choice( - 38 | elements - 39 | .iter() - 40 | .map(|element| self.expand_rule(element)) - 41 | .collect(), - 42 | ), - | - 43 | Rule::Seq(elements) => Rule::Seq( - 44 | elements - 45 | .iter() - 46 | .map(|element| self.expand_rule(element)) - 47 | .collect(), - 48 | ), - | - 49 | Rule::Metadata { rule, params } => Rule::Metadata { - 50 | rule: Box::new(self.expand_rule(rule)), - 51 | params: params.clone(), - 52 | }, - | - 53 | // For repetitions, introduce an auxiliary rule that contains the - 54 | // repeated content, but can also contain a recursive binary tree structure. - 55 | Rule::Repeat(content) => { - 56 | let inner_rule = self.expand_rule(content); - | - 57 | if let Some(existing_symbol) = self.existing_repeats.get(&inner_rule) { - 58 | return Rule::Symbol(*existing_symbol); - 59 | } - | - 60 | self.repeat_count_in_variable += 1; - 61 | let rule_name = format!( - 62 | "{}_repeat{}", - 63 | self.variable_name, self.repeat_count_in_variable - 64 | ); - 65 | let repeat_symbol = Symbol::non_terminal( - 66 | self.preceding_symbol_count + self.auxiliary_variables.len(), - 67 | ); - 68 | self.existing_repeats - 69 | .insert(inner_rule.clone(), repeat_symbol); - 70 | self.auxiliary_variables.push(Variable { - 71 | name: rule_name, - 72 | kind: VariableType::Auxiliary, - 73 | rule: self.wrap_rule_in_binary_tree(repeat_symbol, inner_rule), - 74 | }); - | - 75 | Rule::Symbol(repeat_symbol) - 76 | } - | - 77 | // For primitive rules, don't change anything. - 78 | _ => rule.clone(), - 79 | } - 80 | } - | - 81 | fn wrap_rule_in_binary_tree(&self, symbol: Symbol, rule: Rule) -> Rule { - 82 | Rule::choice(vec![ - 83 | Rule::Seq(vec![Rule::Symbol(symbol), Rule::Symbol(symbol)]), - 84 | rule, - 85 | ]) - 86 | } - 87 | } - | - 88 | pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSyntaxGrammar { - 89 | let mut expander = Expander { - 90 | variable_name: String::new(), - 91 | repeat_count_in_variable: 0, - 92 | preceding_symbol_count: grammar.variables.len(), - 93 | auxiliary_variables: Vec::new(), - 94 | existing_repeats: HashMap::new(), - 95 | }; - | - 96 | for (i, variable) in grammar.variables.iter_mut().enumerate() { - 97 | let expanded_top_level_repetition = expander.expand_variable(i, variable); - | - 98 | // If a hidden variable had a top-level repetition and it was converted to - 99 | // a recursive rule, then it can't be inlined. - 100 | if expanded_top_level_repetition { - 101 | grammar - 102 | .variables_to_inline - 103 | .retain(|symbol| *symbol != Symbol::non_terminal(i)); - 104 | } - 105 | } - | - 106 | grammar.variables.extend(expander.auxiliary_variables); - 107 | grammar - 108 | } - | - 109 | #[cfg(test)] - 110 | mod tests { - 111 | use super::*; - | - 112 | #[test] - 113 | fn test_basic_repeat_expansion() { - 114 | // Repeats nested inside of sequences and choices are expanded. - 115 | let grammar = expand_repeats(build_grammar(vec![Variable::named( - 116 | "rule0", - 117 | Rule::seq(vec![ - 118 | Rule::terminal(10), - 119 | Rule::choice(vec![ - 120 | Rule::repeat(Rule::terminal(11)), - 121 | Rule::repeat(Rule::terminal(12)), - 122 | ]), - 123 | Rule::terminal(13), - 124 | ]), - 125 | )])); - | - 126 | assert_eq!( - 127 | grammar.variables, - 128 | vec![ - 129 | Variable::named( - 130 | "rule0", - 131 | Rule::seq(vec![ - 132 | Rule::terminal(10), - 133 | Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]), - 134 | Rule::terminal(13), - 135 | ]) - 136 | ), - 137 | Variable::auxiliary( - 138 | "rule0_repeat1", - 139 | Rule::choice(vec![ - 140 | Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]), - 141 | Rule::terminal(11), - 142 | ]) - 143 | ), - 144 | Variable::auxiliary( - 145 | "rule0_repeat2", - 146 | Rule::choice(vec![ - 147 | Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]), - 148 | Rule::terminal(12), - 149 | ]) - 150 | ), - 151 | ] - 152 | ); - 153 | } - | - 154 | #[test] - 155 | fn test_repeat_deduplication() { - 156 | // Terminal 4 appears inside of a repeat in three different places. - 157 | let grammar = expand_repeats(build_grammar(vec![ - 158 | Variable::named( - 159 | "rule0", - 160 | Rule::choice(vec![ - 161 | Rule::seq(vec![Rule::terminal(1), Rule::repeat(Rule::terminal(4))]), - 162 | Rule::seq(vec![Rule::terminal(2), Rule::repeat(Rule::terminal(4))]), - 163 | ]), - 164 | ), - 165 | Variable::named( - 166 | "rule1", - 167 | Rule::seq(vec![Rule::terminal(3), Rule::repeat(Rule::terminal(4))]), - 168 | ), - 169 | ])); - | - 170 | // Only one auxiliary rule is created for repeating terminal 4. - 171 | assert_eq!( - 172 | grammar.variables, - 173 | vec![ - 174 | Variable::named( - 175 | "rule0", - 176 | Rule::choice(vec![ - 177 | Rule::seq(vec![Rule::terminal(1), Rule::non_terminal(2)]), - 178 | Rule::seq(vec![Rule::terminal(2), Rule::non_terminal(2)]), - 179 | ]) - 180 | ), - 181 | Variable::named( - 182 | "rule1", - 183 | Rule::seq(vec![Rule::terminal(3), Rule::non_terminal(2),]) - 184 | ), - 185 | Variable::auxiliary( - 186 | "rule0_repeat1", - 187 | Rule::choice(vec![ - 188 | Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]), - 189 | Rule::terminal(4), - 190 | ]) - 191 | ) - 192 | ] - 193 | ); - 194 | } - | - 195 | #[test] - 196 | fn test_expansion_of_nested_repeats() { - 197 | let grammar = expand_repeats(build_grammar(vec![Variable::named( - 198 | "rule0", - 199 | Rule::seq(vec![ - 200 | Rule::terminal(10), - 201 | Rule::repeat(Rule::seq(vec![ - 202 | Rule::terminal(11), - 203 | Rule::repeat(Rule::terminal(12)), - 204 | ])), - 205 | ]), - 206 | )])); - | - 207 | assert_eq!( - 208 | grammar.variables, - 209 | vec![ - 210 | Variable::named( - 211 | "rule0", - 212 | Rule::seq(vec![Rule::terminal(10), Rule::non_terminal(2),]) - 213 | ), - 214 | Variable::auxiliary( - 215 | "rule0_repeat1", - 216 | Rule::choice(vec![ - 217 | Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1),]), - 218 | Rule::terminal(12), - 219 | ]) - 220 | ), - 221 | Variable::auxiliary( - 222 | "rule0_repeat2", - 223 | Rule::choice(vec![ - 224 | Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2),]), - 225 | Rule::seq(vec![Rule::terminal(11), Rule::non_terminal(1),]), - 226 | ]) - 227 | ), - 228 | ] - 229 | ); - 230 | } - | - 231 | #[test] - 232 | fn test_expansion_of_repeats_at_top_of_hidden_rules() { - 233 | let grammar = expand_repeats(build_grammar(vec![ - 234 | Variable::named("rule0", Rule::non_terminal(1)), - 235 | Variable::hidden( - 236 | "_rule1", - 237 | Rule::repeat(Rule::choice(vec![Rule::terminal(11), Rule::terminal(12)])), - 238 | ), - 239 | ])); - | - 240 | assert_eq!( - 241 | grammar.variables, - 242 | vec![ - 243 | Variable::named("rule0", Rule::non_terminal(1),), - 244 | Variable::auxiliary( - 245 | "_rule1", - 246 | Rule::choice(vec![ - 247 | Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(1)]), - 248 | Rule::terminal(11), - 249 | Rule::terminal(12), - 250 | ]), - 251 | ), - 252 | ] - 253 | ); - 254 | } - | - 255 | fn build_grammar(variables: Vec) -> ExtractedSyntaxGrammar { - 256 | ExtractedSyntaxGrammar { - 257 | variables, - 258 | ..Default::default() - 259 | } - 260 | } - 261 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/expand_tokens.rs: --------------------------------------------------------------------------------- - 1 | use anyhow::Result; - 2 | use regex_syntax::{ - 3 | hir::{Class, Hir, HirKind}, - 4 | ParserBuilder, - 5 | }; - 6 | use serde::Serialize; - 7 | use thiserror::Error; - | - 8 | use super::ExtractedLexicalGrammar; - 9 | use crate::{ - 10 | grammars::{LexicalGrammar, LexicalVariable}, - 11 | nfa::{CharacterSet, Nfa, NfaState}, - 12 | rules::{Precedence, Rule}, - 13 | }; - | - 14 | struct NfaBuilder { - 15 | nfa: Nfa, - 16 | is_sep: bool, - 17 | precedence_stack: Vec, - 18 | } - | - 19 | pub type ExpandTokensResult = Result; - | - 20 | #[derive(Debug, Error, Serialize)] - 21 | pub enum ExpandTokensError { - 22 | #[error( - 23 | "The rule `{0}` matches the empty string. - 24 | Tree-sitter does not support syntactic rules that match the empty string - 25 | unless they are used only as the grammar's start rule. - 26 | " - 27 | )] - 28 | EmptyString(String), - 29 | #[error(transparent)] - 30 | Processing(ExpandTokensProcessingError), - 31 | #[error(transparent)] - 32 | ExpandRule(ExpandRuleError), - 33 | } - | - 34 | #[derive(Debug, Error, Serialize)] - 35 | pub struct ExpandTokensProcessingError { - 36 | rule: String, - 37 | error: ExpandRuleError, - 38 | } - | - 39 | impl std::fmt::Display for ExpandTokensProcessingError { - 40 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 41 | writeln!( - 42 | f, - 43 | "Error processing rule {}: Grammar error: Unexpected rule {:?}", - 44 | self.rule, self.error - 45 | )?; - 46 | Ok(()) - 47 | } - 48 | } - | - 49 | fn get_implicit_precedence(rule: &Rule) -> i32 { - 50 | match rule { - 51 | Rule::String(_) => 2, - 52 | Rule::Metadata { rule, params } => { - 53 | if params.is_main_token { - 54 | get_implicit_precedence(rule) + 1 - 55 | } else { - 56 | get_implicit_precedence(rule) - 57 | } - 58 | } - 59 | _ => 0, - 60 | } - 61 | } - | - 62 | const fn get_completion_precedence(rule: &Rule) -> i32 { - 63 | if let Rule::Metadata { params, .. } = rule { - 64 | if let Precedence::Integer(p) = params.precedence { - 65 | return p; - 66 | } - 67 | } - 68 | 0 - 69 | } - | - 70 | pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> ExpandTokensResult { - 71 | let mut builder = NfaBuilder { - 72 | nfa: Nfa::new(), - 73 | is_sep: true, - 74 | precedence_stack: vec![0], - 75 | }; - | - 76 | let separator_rule = if grammar.separators.is_empty() { - 77 | Rule::Blank - 78 | } else { - 79 | grammar.separators.push(Rule::Blank); - 80 | Rule::repeat(Rule::choice(grammar.separators)) - 81 | }; - | - 82 | let mut variables = Vec::with_capacity(grammar.variables.len()); - 83 | for (i, variable) in grammar.variables.into_iter().enumerate() { - 84 | if variable.rule.is_empty() { - 85 | Err(ExpandTokensError::EmptyString(variable.name.clone()))?; - 86 | } - | - 87 | let is_immediate_token = match &variable.rule { - 88 | Rule::Metadata { params, .. } => params.is_main_token, - 89 | _ => false, - 90 | }; - | - 91 | builder.is_sep = false; - 92 | builder.nfa.states.push(NfaState::Accept { - 93 | variable_index: i, - 94 | precedence: get_completion_precedence(&variable.rule), - 95 | }); - 96 | let last_state_id = builder.nfa.last_state_id(); - 97 | builder - 98 | .expand_rule(&variable.rule, last_state_id) - 99 | .map_err(|e| { - 100 | ExpandTokensError::Processing(ExpandTokensProcessingError { - 101 | rule: variable.name.clone(), - 102 | error: e, - 103 | }) - 104 | })?; - | - 105 | if !is_immediate_token { - 106 | builder.is_sep = true; - 107 | let last_state_id = builder.nfa.last_state_id(); - 108 | builder - 109 | .expand_rule(&separator_rule, last_state_id) - 110 | .map_err(ExpandTokensError::ExpandRule)?; - 111 | } - | - 112 | variables.push(LexicalVariable { - 113 | name: variable.name, - 114 | kind: variable.kind, - 115 | implicit_precedence: get_implicit_precedence(&variable.rule), - 116 | start_state: builder.nfa.last_state_id(), - 117 | }); - 118 | } - | - 119 | Ok(LexicalGrammar { - 120 | nfa: builder.nfa, - 121 | variables, - 122 | }) - 123 | } - | - 124 | pub type ExpandRuleResult = Result; - | - 125 | #[derive(Debug, Error, Serialize)] - 126 | pub enum ExpandRuleError { - 127 | #[error("Grammar error: Unexpected rule {0:?}")] - 128 | UnexpectedRule(Rule), - 129 | #[error("{0}")] - 130 | Parse(String), - 131 | #[error(transparent)] - 132 | ExpandRegex(ExpandRegexError), - 133 | } - | - 134 | pub type ExpandRegexResult = Result; - | - 135 | #[derive(Debug, Error, Serialize)] - 136 | pub enum ExpandRegexError { - 137 | #[error("{0}")] - 138 | Utf8(String), - 139 | #[error("Regex error: Assertions are not supported")] - 140 | Assertion, - 141 | } - | - 142 | impl NfaBuilder { - 143 | fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> ExpandRuleResult { - 144 | match rule { - 145 | Rule::Pattern(s, f) => { - 146 | // With unicode enabled, `\w`, `\s` and `\d` expand to character sets that are much - 147 | // larger than intended, so we replace them with the actual - 148 | // character sets they should represent. If the full unicode range - 149 | // of `\w`, `\s` or `\d` are needed then `\p{L}`, `\p{Z}` and `\p{N}` should be - 150 | // used. - 151 | let s = s - 152 | .replace(r"\w", r"[0-9A-Za-z_]") - 153 | .replace(r"\s", r"[\t-\r ]") - 154 | .replace(r"\d", r"[0-9]") - 155 | .replace(r"\W", r"[^0-9A-Za-z_]") - 156 | .replace(r"\S", r"[^\t-\r ]") - 157 | .replace(r"\D", r"[^0-9]"); - 158 | let mut parser = ParserBuilder::new() - 159 | .case_insensitive(f.contains('i')) - 160 | .unicode(true) - 161 | .utf8(false) - 162 | .build(); - 163 | let hir = parser - 164 | .parse(&s) - 165 | .map_err(|e| ExpandRuleError::Parse(e.to_string()))?; - 166 | self.expand_regex(&hir, next_state_id) - 167 | .map_err(ExpandRuleError::ExpandRegex) - 168 | } - 169 | Rule::String(s) => { - 170 | for c in s.chars().rev() { - 171 | self.push_advance(CharacterSet::empty().add_char(c), next_state_id); - 172 | next_state_id = self.nfa.last_state_id(); - 173 | } - 174 | Ok(!s.is_empty()) - 175 | } - 176 | Rule::Choice(elements) => { - 177 | let mut alternative_state_ids = Vec::with_capacity(elements.len()); - 178 | for element in elements { - 179 | if self.expand_rule(element, next_state_id)? { - 180 | alternative_state_ids.push(self.nfa.last_state_id()); - 181 | } else { - 182 | alternative_state_ids.push(next_state_id); - 183 | } - 184 | } - 185 | alternative_state_ids.sort_unstable(); - 186 | alternative_state_ids.dedup(); - 187 | alternative_state_ids.retain(|i| *i != self.nfa.last_state_id()); - 188 | for alternative_state_id in alternative_state_ids { - 189 | self.push_split(alternative_state_id); - 190 | } - 191 | Ok(true) - 192 | } - 193 | Rule::Seq(elements) => { - 194 | let mut result = false; - 195 | for element in elements.iter().rev() { - 196 | if self.expand_rule(element, next_state_id)? { - 197 | result = true; - 198 | } - 199 | next_state_id = self.nfa.last_state_id(); - 200 | } - 201 | Ok(result) - 202 | } - 203 | Rule::Repeat(rule) => { - 204 | self.nfa.states.push(NfaState::Accept { - 205 | variable_index: 0, - 206 | precedence: 0, - 207 | }); // Placeholder for split - 208 | let split_state_id = self.nfa.last_state_id(); - 209 | if self.expand_rule(rule, split_state_id)? { - 210 | self.nfa.states[split_state_id as usize] = - 211 | NfaState::Split(self.nfa.last_state_id(), next_state_id); - 212 | Ok(true) - 213 | } else { - 214 | Ok(false) - 215 | } - 216 | } - 217 | Rule::Metadata { rule, params } => { - 218 | let has_precedence = if let Precedence::Integer(precedence) = ¶ms.precedence { - 219 | self.precedence_stack.push(*precedence); - 220 | true - 221 | } else { - 222 | false - 223 | }; - 224 | let result = self.expand_rule(rule, next_state_id); - 225 | if has_precedence { - 226 | self.precedence_stack.pop(); - 227 | } - 228 | result - 229 | } - 230 | Rule::Blank => Ok(false), - 231 | _ => Err(ExpandRuleError::UnexpectedRule(rule.clone()))?, - 232 | } - 233 | } - | - 234 | fn expand_regex(&mut self, hir: &Hir, mut next_state_id: u32) -> ExpandRegexResult { - 235 | match hir.kind() { - 236 | HirKind::Empty => Ok(false), - 237 | HirKind::Literal(literal) => { - 238 | for character in std::str::from_utf8(&literal.0) - 239 | .map_err(|e| ExpandRegexError::Utf8(e.to_string()))? - 240 | .chars() - 241 | .rev() - 242 | { - 243 | let char_set = CharacterSet::from_char(character); - 244 | self.push_advance(char_set, next_state_id); - 245 | next_state_id = self.nfa.last_state_id(); - 246 | } - | - 247 | Ok(true) - 248 | } - 249 | HirKind::Class(class) => match class { - 250 | Class::Unicode(class) => { - 251 | let mut chars = CharacterSet::default(); - 252 | for c in class.ranges() { - 253 | chars = chars.add_range(c.start(), c.end()); - 254 | } - | - 255 | // For some reason, the long s `ſ` is included if the letter `s` is in a - 256 | // pattern, so we remove it. - 257 | if chars.range_count() == 3 - 258 | && chars - 259 | .ranges() - 260 | // exact check to ensure that `ſ` wasn't intentionally added. - 261 | .all(|r| ['s'..='s', 'S'..='S', 'ſ'..='ſ'].contains(&r)) - 262 | { - 263 | chars = chars.difference(CharacterSet::from_char('ſ')); - 264 | } - 265 | self.push_advance(chars, next_state_id); - 266 | Ok(true) - 267 | } - 268 | Class::Bytes(bytes_class) => { - 269 | let mut chars = CharacterSet::default(); - 270 | for c in bytes_class.ranges() { - 271 | chars = chars.add_range(c.start().into(), c.end().into()); - 272 | } - 273 | self.push_advance(chars, next_state_id); - 274 | Ok(true) - 275 | } - 276 | }, - 277 | HirKind::Look(_) => Err(ExpandRegexError::Assertion)?, - 278 | HirKind::Repetition(repetition) => match (repetition.min, repetition.max) { - 279 | (0, Some(1)) => self.expand_zero_or_one(&repetition.sub, next_state_id), - 280 | (1, None) => self.expand_one_or_more(&repetition.sub, next_state_id), - 281 | (0, None) => self.expand_zero_or_more(&repetition.sub, next_state_id), - 282 | (min, Some(max)) if min == max => { - 283 | self.expand_count(&repetition.sub, min, next_state_id) - 284 | } - 285 | (min, None) => { - 286 | if self.expand_zero_or_more(&repetition.sub, next_state_id)? { - 287 | self.expand_count(&repetition.sub, min, next_state_id) - 288 | } else { - 289 | Ok(false) - 290 | } - 291 | } - 292 | (min, Some(max)) => { - 293 | let mut result = self.expand_count(&repetition.sub, min, next_state_id)?; - 294 | for _ in min..max { - 295 | if result { - 296 | next_state_id = self.nfa.last_state_id(); - 297 | } - 298 | if self.expand_zero_or_one(&repetition.sub, next_state_id)? { - 299 | result = true; - 300 | } - 301 | } - 302 | Ok(result) - 303 | } - 304 | }, - 305 | HirKind::Capture(capture) => self.expand_regex(&capture.sub, next_state_id), - 306 | HirKind::Concat(concat) => { - 307 | let mut result = false; - 308 | for hir in concat.iter().rev() { - 309 | if self.expand_regex(hir, next_state_id)? { - 310 | result = true; - 311 | next_state_id = self.nfa.last_state_id(); - 312 | } - 313 | } - 314 | Ok(result) - 315 | } - 316 | HirKind::Alternation(alternations) => { - 317 | let mut alternative_state_ids = Vec::with_capacity(alternations.len()); - 318 | for hir in alternations { - 319 | if self.expand_regex(hir, next_state_id)? { - 320 | alternative_state_ids.push(self.nfa.last_state_id()); - 321 | } else { - 322 | alternative_state_ids.push(next_state_id); - 323 | } - 324 | } - 325 | alternative_state_ids.sort_unstable(); - 326 | alternative_state_ids.dedup(); - 327 | alternative_state_ids.retain(|i| *i != self.nfa.last_state_id()); - 328 | for alternative_state_id in alternative_state_ids { - 329 | self.push_split(alternative_state_id); - 330 | } - 331 | Ok(true) - 332 | } - 333 | } - 334 | } - | - 335 | fn expand_one_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult { - 336 | self.nfa.states.push(NfaState::Accept { - 337 | variable_index: 0, - 338 | precedence: 0, - 339 | }); // Placeholder for split - 340 | let split_state_id = self.nfa.last_state_id(); - 341 | if self.expand_regex(hir, split_state_id)? { - 342 | self.nfa.states[split_state_id as usize] = - 343 | NfaState::Split(self.nfa.last_state_id(), next_state_id); - 344 | Ok(true) - 345 | } else { - 346 | self.nfa.states.pop(); - 347 | Ok(false) - 348 | } - 349 | } - | - 350 | fn expand_zero_or_one(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult { - 351 | if self.expand_regex(hir, next_state_id)? { - 352 | self.push_split(next_state_id); - 353 | Ok(true) - 354 | } else { - 355 | Ok(false) - 356 | } - 357 | } - | - 358 | fn expand_zero_or_more(&mut self, hir: &Hir, next_state_id: u32) -> ExpandRegexResult { - 359 | if self.expand_one_or_more(hir, next_state_id)? { - 360 | self.push_split(next_state_id); - 361 | Ok(true) - 362 | } else { - 363 | Ok(false) - 364 | } - 365 | } - | - 366 | fn expand_count( - 367 | &mut self, - 368 | hir: &Hir, - 369 | count: u32, - 370 | mut next_state_id: u32, - 371 | ) -> ExpandRegexResult { - 372 | let mut result = false; - 373 | for _ in 0..count { - 374 | if self.expand_regex(hir, next_state_id)? { - 375 | result = true; - 376 | next_state_id = self.nfa.last_state_id(); - 377 | } - 378 | } - 379 | Ok(result) - 380 | } - | - 381 | fn push_advance(&mut self, chars: CharacterSet, state_id: u32) { - 382 | let precedence = *self.precedence_stack.last().unwrap(); - 383 | self.nfa.states.push(NfaState::Advance { - 384 | chars, - 385 | state_id, - 386 | precedence, - 387 | is_sep: self.is_sep, - 388 | }); - 389 | } - | - 390 | fn push_split(&mut self, state_id: u32) { - 391 | let last_state_id = self.nfa.last_state_id(); - 392 | self.nfa - 393 | .states - 394 | .push(NfaState::Split(state_id, last_state_id)); - 395 | } - 396 | } - | - 397 | #[cfg(test)] - 398 | mod tests { - 399 | use super::*; - 400 | use crate::{ - 401 | grammars::Variable, - 402 | nfa::{NfaCursor, NfaTransition}, - 403 | }; - | - 404 | fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> { - 405 | let start_states = grammar.variables.iter().map(|v| v.start_state).collect(); - 406 | let mut cursor = NfaCursor::new(&grammar.nfa, start_states); - | - 407 | let mut result = None; - 408 | let mut result_precedence = i32::MIN; - 409 | let mut start_char = 0; - 410 | let mut end_char = 0; - 411 | for c in s.chars() { - 412 | for (id, precedence) in cursor.completions() { - 413 | if result.is_none() || result_precedence <= precedence { - 414 | result = Some((id, &s[start_char..end_char])); - 415 | result_precedence = precedence; - 416 | } - 417 | } - 418 | if let Some(NfaTransition { - 419 | states, - 420 | is_separator, - 421 | .. - 422 | }) = cursor - 423 | .transitions() - 424 | .into_iter() - 425 | .find(|t| t.characters.contains(c) && t.precedence >= result_precedence) - 426 | { - 427 | cursor.reset(states); - 428 | end_char += c.len_utf8(); - 429 | if is_separator { - 430 | start_char = end_char; - 431 | } - 432 | } else { - 433 | break; - 434 | } - 435 | } - | - 436 | for (id, precedence) in cursor.completions() { - 437 | if result.is_none() || result_precedence <= precedence { - 438 | result = Some((id, &s[start_char..end_char])); - 439 | result_precedence = precedence; - 440 | } - 441 | } - | - 442 | result - 443 | } - | - 444 | #[test] - 445 | fn test_rule_expansion() { - 446 | struct Row { - 447 | rules: Vec, - 448 | separators: Vec, - 449 | examples: Vec<(&'static str, Option<(usize, &'static str)>)>, - 450 | } - | - 451 | let table = [ - 452 | // regex with sequences and alternatives - 453 | Row { - 454 | rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")], - 455 | separators: vec![], - 456 | examples: vec![ - 457 | ("ade1", Some((0, "ade"))), - 458 | ("bdf1", Some((0, "bdf"))), - 459 | ("bdfh1", Some((0, "bdfh"))), - 460 | ("ad1", None), - 461 | ], - 462 | }, - 463 | // regex with repeats - 464 | Row { - 465 | rules: vec![Rule::pattern("a*", "")], - 466 | separators: vec![], - 467 | examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))], - 468 | }, - 469 | // regex with repeats in sequences - 470 | Row { - 471 | rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")], - 472 | separators: vec![], - 473 | examples: vec![ - 474 | ("af1", Some((0, "af"))), - 475 | ("adedef1", Some((0, "adedef"))), - 476 | ("abcbcbcf1", Some((0, "abcbcbcf"))), - 477 | ("a", None), - 478 | ], - 479 | }, - 480 | // regex with character ranges - 481 | Row { - 482 | rules: vec![Rule::pattern("[a-fA-F0-9]+", "")], - 483 | separators: vec![], - 484 | examples: vec![("A1ff0.", Some((0, "A1ff0")))], - 485 | }, - 486 | // regex with perl character classes - 487 | Row { - 488 | rules: vec![Rule::pattern("\\w\\d\\s", "")], - 489 | separators: vec![], - 490 | examples: vec![("_0 ", Some((0, "_0 ")))], - 491 | }, - 492 | // string - 493 | Row { - 494 | rules: vec![Rule::string("abc")], - 495 | separators: vec![], - 496 | examples: vec![("abcd", Some((0, "abc"))), ("ab", None)], - 497 | }, - 498 | // complex rule containing strings and regexes - 499 | Row { - 500 | rules: vec![Rule::repeat(Rule::seq(vec![ - 501 | Rule::string("{"), - 502 | Rule::pattern("[a-f]+", ""), - 503 | Rule::string("}"), - 504 | ]))], - 505 | separators: vec![], - 506 | examples: vec![ - 507 | ("{a}{", Some((0, "{a}"))), - 508 | ("{a}{d", Some((0, "{a}"))), - 509 | ("ab", None), - 510 | ], - 511 | }, - 512 | // longest match rule - 513 | Row { - 514 | rules: vec![ - 515 | Rule::pattern("a|bc", ""), - 516 | Rule::pattern("aa", ""), - 517 | Rule::pattern("bcd", ""), - 518 | ], - 519 | separators: vec![], - 520 | examples: vec![ - 521 | ("a.", Some((0, "a"))), - 522 | ("bc.", Some((0, "bc"))), - 523 | ("aa.", Some((1, "aa"))), - 524 | ("bcd?", Some((2, "bcd"))), - 525 | ("b.", None), - 526 | ("c.", None), - 527 | ], - 528 | }, - 529 | // regex with an alternative including the empty string - 530 | Row { - 531 | rules: vec![Rule::pattern("a(b|)+c", "")], - 532 | separators: vec![], - 533 | examples: vec![ - 534 | ("ac.", Some((0, "ac"))), - 535 | ("abc.", Some((0, "abc"))), - 536 | ("abbc.", Some((0, "abbc"))), - 537 | ], - 538 | }, - 539 | // separators - 540 | Row { - 541 | rules: vec![Rule::pattern("[a-f]+", "")], - 542 | separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], - 543 | examples: vec![ - 544 | (" a", Some((0, "a"))), - 545 | (" \nb", Some((0, "b"))), - 546 | (" \\a", None), - 547 | (" \\\na", Some((0, "a"))), - 548 | ], - 549 | }, - 550 | // shorter tokens with higher precedence - 551 | Row { - 552 | rules: vec![ - 553 | Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")), - 554 | Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")), - 555 | Rule::pattern("[a-e]+", ""), - 556 | ], - 557 | separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], - 558 | examples: vec![ - 559 | ("abceef", Some((0, "abc"))), - 560 | ("abdeef", Some((1, "abde"))), - 561 | ("aeeeef", Some((2, "aeeee"))), - 562 | ], - 563 | }, - 564 | // immediate tokens with higher precedence - 565 | Row { - 566 | rules: vec![ - 567 | Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")), - 568 | Rule::immediate_token(Rule::prec( - 569 | Precedence::Integer(2), - 570 | Rule::pattern("[^ab]+", ""), - 571 | )), - 572 | ], - 573 | separators: vec![Rule::pattern("\\s", "")], - 574 | examples: vec![("cccb", Some((1, "ccc")))], - 575 | }, - 576 | Row { - 577 | rules: vec![Rule::seq(vec![ - 578 | Rule::string("a"), - 579 | Rule::choice(vec![Rule::string("b"), Rule::string("c")]), - 580 | Rule::string("d"), - 581 | ])], - 582 | separators: vec![], - 583 | examples: vec![ - 584 | ("abd", Some((0, "abd"))), - 585 | ("acd", Some((0, "acd"))), - 586 | ("abc", None), - 587 | ("ad", None), - 588 | ("d", None), - 589 | ("a", None), - 590 | ], - 591 | }, - 592 | // nested choices within sequences - 593 | Row { - 594 | rules: vec![Rule::seq(vec![ - 595 | Rule::pattern("[0-9]+", ""), - 596 | Rule::choice(vec![ - 597 | Rule::Blank, - 598 | Rule::choice(vec![Rule::seq(vec![ - 599 | Rule::choice(vec![Rule::string("e"), Rule::string("E")]), - 600 | Rule::choice(vec![ - 601 | Rule::Blank, - 602 | Rule::choice(vec![Rule::string("+"), Rule::string("-")]), - 603 | ]), - 604 | Rule::pattern("[0-9]+", ""), - 605 | ])]), - 606 | ]), - 607 | ])], - 608 | separators: vec![], - 609 | examples: vec![ - 610 | ("12", Some((0, "12"))), - 611 | ("12e", Some((0, "12"))), - 612 | ("12g", Some((0, "12"))), - 613 | ("12e3", Some((0, "12e3"))), - 614 | ("12e+", Some((0, "12"))), - 615 | ("12E+34 +", Some((0, "12E+34"))), - 616 | ("12e34", Some((0, "12e34"))), - 617 | ], - 618 | }, - 619 | // nested groups - 620 | Row { - 621 | rules: vec![Rule::seq(vec![Rule::pattern(r"([^x\\]|\\(.|\n))+", "")])], - 622 | separators: vec![], - 623 | examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))], - 624 | }, - 625 | // allowing unrecognized escape sequences - 626 | Row { - 627 | rules: vec![ - 628 | // Escaped forward slash (used in JS because '/' is the regex delimiter) - 629 | Rule::pattern(r"\/", ""), - 630 | // Escaped quotes - 631 | Rule::pattern(r#"\"\'"#, ""), - 632 | // Quote preceded by a literal backslash - 633 | Rule::pattern(r"[\\']+", ""), - 634 | ], - 635 | separators: vec![], - 636 | examples: vec![ - 637 | ("/", Some((0, "/"))), - 638 | ("\"\'", Some((1, "\"\'"))), - 639 | (r"'\'a", Some((2, r"'\'"))), - 640 | ], - 641 | }, - 642 | // unicode property escapes - 643 | Row { - 644 | rules: vec![ - 645 | Rule::pattern(r"\p{L}+\P{L}+", ""), - 646 | Rule::pattern(r"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*", ""), - 647 | ], - 648 | separators: vec![], - 649 | examples: vec![ - 650 | (" 123 abc", Some((1, " 123 "))), - 651 | ("ბΨƁ___ƀƔ", Some((0, "ბΨƁ___"))), - 652 | ], - 653 | }, - 654 | // unicode property escapes in bracketed sets - 655 | Row { - 656 | rules: vec![Rule::pattern(r"[\p{L}\p{Nd}]+", "")], - 657 | separators: vec![], - 658 | examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))], - 659 | }, - 660 | // unicode character escapes - 661 | Row { - 662 | rules: vec![ - 663 | Rule::pattern(r"\u{00dc}", ""), - 664 | Rule::pattern(r"\U{000000dd}", ""), - 665 | Rule::pattern(r"\u00de", ""), - 666 | Rule::pattern(r"\U000000df", ""), - 667 | ], - 668 | separators: vec![], - 669 | examples: vec![ - 670 | ("\u{00dc}", Some((0, "\u{00dc}"))), - 671 | ("\u{00dd}", Some((1, "\u{00dd}"))), - 672 | ("\u{00de}", Some((2, "\u{00de}"))), - 673 | ("\u{00df}", Some((3, "\u{00df}"))), - 674 | ], - 675 | }, - 676 | Row { - 677 | rules: vec![ - 678 | Rule::pattern(r"u\{[0-9a-fA-F]+\}", ""), - 679 | // Already-escaped curly braces - 680 | Rule::pattern(r"\{[ab]{3}\}", ""), - 681 | // Unicode codepoints - 682 | Rule::pattern(r"\u{1000A}", ""), - 683 | // Unicode codepoints (lowercase) - 684 | Rule::pattern(r"\u{1000b}", ""), - 685 | ], - 686 | separators: vec![], - 687 | examples: vec![ - 688 | ("u{1234} ok", Some((0, "u{1234}"))), - 689 | ("{aba}}", Some((1, "{aba}"))), - 690 | ("\u{1000A}", Some((2, "\u{1000A}"))), - 691 | ("\u{1000b}", Some((3, "\u{1000b}"))), - 692 | ], - 693 | }, - 694 | // Emojis - 695 | Row { - 696 | rules: vec![Rule::pattern(r"\p{Emoji}+", "")], - 697 | separators: vec![], - 698 | examples: vec![ - 699 | ("🐎", Some((0, "🐎"))), - 700 | ("🐴🐴", Some((0, "🐴🐴"))), - 701 | ("#0", Some((0, "#0"))), // These chars are technically emojis! - 702 | ("⻢", None), - 703 | ("♞", None), - 704 | ("horse", None), - 705 | ], - 706 | }, - 707 | // Intersection - 708 | Row { - 709 | rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")], - 710 | separators: vec![], - 711 | examples: vec![ - 712 | ("456", Some((0, "456"))), - 713 | ("64", Some((0, "64"))), - 714 | ("452", Some((0, "45"))), - 715 | ("91", None), - 716 | ("8", None), - 717 | ("3", None), - 718 | ], - 719 | }, - 720 | // Difference - 721 | Row { - 722 | rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")], - 723 | separators: vec![], - 724 | examples: vec![ - 725 | ("123", Some((0, "123"))), - 726 | ("83", Some((0, "83"))), - 727 | ("9", Some((0, "9"))), - 728 | ("124", Some((0, "12"))), - 729 | ("67", None), - 730 | ("4", None), - 731 | ], - 732 | }, - 733 | // Symmetric difference - 734 | Row { - 735 | rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")], - 736 | separators: vec![], - 737 | examples: vec![ - 738 | ("123", Some((0, "123"))), - 739 | ("83", Some((0, "83"))), - 740 | ("9", Some((0, "9"))), - 741 | ("124", Some((0, "12"))), - 742 | ("67", None), - 743 | ("4", None), - 744 | ], - 745 | }, - 746 | // Nested set operations - 747 | Row { - 748 | // 0 1 2 3 4 5 6 7 8 9 - 749 | // [0-5]: y y y y y y - 750 | // [2-4]: y y y - 751 | // [0-5]--[2-4]: y y y - 752 | // [3-9]: y y y y y y y - 753 | // [6-7]: y y - 754 | // [3-9]--[5-7]: y y y y y - 755 | // final regex: y y y y y y - 756 | rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")], - 757 | separators: vec![], - 758 | examples: vec![ - 759 | ("01", Some((0, "01"))), - 760 | ("432", Some((0, "43"))), - 761 | ("8", Some((0, "8"))), - 762 | ("9", Some((0, "9"))), - 763 | ("2", None), - 764 | ("567", None), - 765 | ], - 766 | }, - 767 | ]; - | - 768 | for Row { - 769 | rules, - 770 | separators, - 771 | examples, - 772 | } in &table - 773 | { - 774 | let grammar = expand_tokens(ExtractedLexicalGrammar { - 775 | separators: separators.clone(), - 776 | variables: rules - 777 | .iter() - 778 | .map(|rule| Variable::named("", rule.clone())) - 779 | .collect(), - 780 | }) - 781 | .unwrap(); - | - 782 | for (haystack, needle) in examples { - 783 | assert_eq!(simulate_nfa(&grammar, haystack), *needle); - 784 | } - 785 | } - 786 | } - 787 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/extract_default_aliases.rs: --------------------------------------------------------------------------------- - 1 | use crate::{ - 2 | grammars::{LexicalGrammar, SyntaxGrammar}, - 3 | rules::{Alias, AliasMap, Symbol, SymbolType}, - 4 | }; - | - 5 | #[derive(Clone, Default)] - 6 | struct SymbolStatus { - 7 | aliases: Vec<(Alias, usize)>, - 8 | appears_unaliased: bool, - 9 | } - | - 10 | // Update the grammar by finding symbols that always are aliased, and for each such symbol, - 11 | // promoting one of its aliases to a "default alias", which is applied globally instead - 12 | // of in a context-specific way. - 13 | // - 14 | // This has two benefits: - 15 | // * It reduces the overhead of storing production-specific alias info in the parse table. - 16 | // * Within an `ERROR` node, no context-specific aliases will be applied. This transformation - 17 | // ensures that the children of an `ERROR` node have symbols that are consistent with the way that - 18 | // they would appear in a valid syntax tree. - 19 | pub(super) fn extract_default_aliases( - 20 | syntax_grammar: &mut SyntaxGrammar, - 21 | lexical_grammar: &LexicalGrammar, - 22 | ) -> AliasMap { - 23 | let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()]; - 24 | let mut non_terminal_status_list = - 25 | vec![SymbolStatus::default(); syntax_grammar.variables.len()]; - 26 | let mut external_status_list = - 27 | vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()]; - | - 28 | // For each grammar symbol, find all of the aliases under which the symbol appears, - 29 | // and determine whether or not the symbol ever appears *unaliased*. - 30 | for variable in &syntax_grammar.variables { - 31 | for production in &variable.productions { - 32 | for step in &production.steps { - 33 | let status = match step.symbol.kind { - 34 | SymbolType::External => &mut external_status_list[step.symbol.index], - 35 | SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], - 36 | SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], - 37 | SymbolType::End | SymbolType::EndOfNonTerminalExtra => { - 38 | panic!("Unexpected end token") - 39 | } - 40 | }; - | - 41 | // Default aliases don't work for inlined variables. - 42 | if syntax_grammar.variables_to_inline.contains(&step.symbol) { - 43 | continue; - 44 | } - | - 45 | if let Some(alias) = &step.alias { - 46 | if let Some(count_for_alias) = status - 47 | .aliases - 48 | .iter_mut() - 49 | .find_map(|(a, count)| if a == alias { Some(count) } else { None }) - 50 | { - 51 | *count_for_alias += 1; - 52 | } else { - 53 | status.aliases.push((alias.clone(), 1)); - 54 | } - 55 | } else { - 56 | status.appears_unaliased = true; - 57 | } - 58 | } - 59 | } - 60 | } - | - 61 | for symbol in &syntax_grammar.extra_symbols { - 62 | let status = match symbol.kind { - 63 | SymbolType::External => &mut external_status_list[symbol.index], - 64 | SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index], - 65 | SymbolType::Terminal => &mut terminal_status_list[symbol.index], - 66 | SymbolType::End | SymbolType::EndOfNonTerminalExtra => { - 67 | panic!("Unexpected end token") - 68 | } - 69 | }; - 70 | status.appears_unaliased = true; - 71 | } - | - 72 | let symbols_with_statuses = (terminal_status_list - 73 | .iter_mut() - 74 | .enumerate() - 75 | .map(|(i, status)| (Symbol::terminal(i), status))) - 76 | .chain( - 77 | non_terminal_status_list - 78 | .iter_mut() - 79 | .enumerate() - 80 | .map(|(i, status)| (Symbol::non_terminal(i), status)), - 81 | ) - 82 | .chain( - 83 | external_status_list - 84 | .iter_mut() - 85 | .enumerate() - 86 | .map(|(i, status)| (Symbol::external(i), status)), - 87 | ); - | - 88 | // For each symbol that always appears aliased, find the alias the occurs most often, - 89 | // and designate that alias as the symbol's "default alias". Store all of these - 90 | // default aliases in a map that will be returned. - 91 | let mut result = AliasMap::new(); - 92 | for (symbol, status) in symbols_with_statuses { - 93 | if status.appears_unaliased { - 94 | status.aliases.clear(); - 95 | } else if let Some(default_entry) = status - 96 | .aliases - 97 | .iter() - 98 | .enumerate() - 99 | .max_by_key(|(i, (_, count))| (count, -(*i as i64))) - 100 | .map(|(_, entry)| entry.clone()) - 101 | { - 102 | status.aliases.clear(); - 103 | status.aliases.push(default_entry.clone()); - 104 | result.insert(symbol, default_entry.0); - 105 | } - 106 | } - | - 107 | // Wherever a symbol is aliased as its default alias, remove the usage of the alias, - 108 | // because it will now be redundant. - 109 | let mut alias_positions_to_clear = Vec::new(); - 110 | for variable in &mut syntax_grammar.variables { - 111 | alias_positions_to_clear.clear(); - | - 112 | for (i, production) in variable.productions.iter().enumerate() { - 113 | for (j, step) in production.steps.iter().enumerate() { - 114 | let status = match step.symbol.kind { - 115 | SymbolType::External => &mut external_status_list[step.symbol.index], - 116 | SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], - 117 | SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], - 118 | SymbolType::End | SymbolType::EndOfNonTerminalExtra => { - 119 | panic!("Unexpected end token") - 120 | } - 121 | }; - | - 122 | // If this step is aliased as the symbol's default alias, then remove that alias. - 123 | if step.alias.is_some() - 124 | && step.alias.as_ref() == status.aliases.first().map(|t| &t.0) - 125 | { - 126 | let mut other_productions_must_use_this_alias_at_this_index = false; - 127 | for (other_i, other_production) in variable.productions.iter().enumerate() { - 128 | if other_i != i - 129 | && other_production.steps.len() > j - 130 | && other_production.steps[j].alias == step.alias - 131 | && result.get(&other_production.steps[j].symbol) != step.alias.as_ref() - 132 | { - 133 | other_productions_must_use_this_alias_at_this_index = true; - 134 | break; - 135 | } - 136 | } - | - 137 | if !other_productions_must_use_this_alias_at_this_index { - 138 | alias_positions_to_clear.push((i, j)); - 139 | } - 140 | } - 141 | } - 142 | } - | - 143 | for (production_index, step_index) in &alias_positions_to_clear { - 144 | variable.productions[*production_index].steps[*step_index].alias = None; - 145 | } - 146 | } - | - 147 | result - 148 | } - | - 149 | #[cfg(test)] - 150 | mod tests { - 151 | use super::*; - 152 | use crate::{ - 153 | grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType}, - 154 | nfa::Nfa, - 155 | }; - | - 156 | #[test] - 157 | fn test_extract_simple_aliases() { - 158 | let mut syntax_grammar = SyntaxGrammar { - 159 | variables: vec![ - 160 | SyntaxVariable { - 161 | name: "v1".to_owned(), - 162 | kind: VariableType::Named, - 163 | productions: vec![Production { - 164 | dynamic_precedence: 0, - 165 | steps: vec![ - 166 | ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - 167 | ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - 168 | ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true), - 169 | ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), - 170 | ], - 171 | }], - 172 | }, - 173 | SyntaxVariable { - 174 | name: "v2".to_owned(), - 175 | kind: VariableType::Named, - 176 | productions: vec![Production { - 177 | dynamic_precedence: 0, - 178 | steps: vec![ - 179 | // Token 0 is always aliased as "a1". - 180 | ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true), - 181 | // Token 1 is aliased within rule `v1` above, but not here. - 182 | ProductionStep::new(Symbol::terminal(1)), - 183 | // Token 2 is aliased differently here than in `v1`. The alias from - 184 | // `v1` should be promoted to the default alias, because `v1` appears - 185 | // first in the grammar. - 186 | ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), - 187 | // Token 3 is also aliased differently here than in `v1`. In this case, - 188 | // this alias should be promoted to the default alias, because it is - 189 | // used a greater number of times (twice). - 190 | ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), - 191 | ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true), - 192 | ], - 193 | }], - 194 | }, - 195 | ], - 196 | ..Default::default() - 197 | }; - | - 198 | let lexical_grammar = LexicalGrammar { - 199 | nfa: Nfa::new(), - 200 | variables: vec![ - 201 | LexicalVariable { - 202 | name: "t0".to_string(), - 203 | kind: VariableType::Anonymous, - 204 | implicit_precedence: 0, - 205 | start_state: 0, - 206 | }, - 207 | LexicalVariable { - 208 | name: "t1".to_string(), - 209 | kind: VariableType::Anonymous, - 210 | implicit_precedence: 0, - 211 | start_state: 0, - 212 | }, - 213 | LexicalVariable { - 214 | name: "t2".to_string(), - 215 | kind: VariableType::Anonymous, - 216 | implicit_precedence: 0, - 217 | start_state: 0, - 218 | }, - 219 | LexicalVariable { - 220 | name: "t3".to_string(), - 221 | kind: VariableType::Anonymous, - 222 | implicit_precedence: 0, - 223 | start_state: 0, - 224 | }, - 225 | ], - 226 | }; - | - 227 | let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar); - 228 | assert_eq!(default_aliases.len(), 3); - | - 229 | assert_eq!( - 230 | default_aliases.get(&Symbol::terminal(0)), - 231 | Some(&Alias { - 232 | value: "a1".to_string(), - 233 | is_named: true, - 234 | }) - 235 | ); - 236 | assert_eq!( - 237 | default_aliases.get(&Symbol::terminal(2)), - 238 | Some(&Alias { - 239 | value: "a3".to_string(), - 240 | is_named: true, - 241 | }) - 242 | ); - 243 | assert_eq!( - 244 | default_aliases.get(&Symbol::terminal(3)), - 245 | Some(&Alias { - 246 | value: "a6".to_string(), - 247 | is_named: true, - 248 | }) - 249 | ); - 250 | assert_eq!(default_aliases.get(&Symbol::terminal(1)), None); - | - 251 | assert_eq!( - 252 | syntax_grammar.variables, - 253 | vec![ - 254 | SyntaxVariable { - 255 | name: "v1".to_owned(), - 256 | kind: VariableType::Named, - 257 | productions: vec![Production { - 258 | dynamic_precedence: 0, - 259 | steps: vec![ - 260 | ProductionStep::new(Symbol::terminal(0)), - 261 | ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true), - 262 | ProductionStep::new(Symbol::terminal(2)), - 263 | ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true), - 264 | ], - 265 | },], - 266 | }, - 267 | SyntaxVariable { - 268 | name: "v2".to_owned(), - 269 | kind: VariableType::Named, - 270 | productions: vec![Production { - 271 | dynamic_precedence: 0, - 272 | steps: vec![ - 273 | ProductionStep::new(Symbol::terminal(0)), - 274 | ProductionStep::new(Symbol::terminal(1)), - 275 | ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true), - 276 | ProductionStep::new(Symbol::terminal(3)), - 277 | ProductionStep::new(Symbol::terminal(3)), - 278 | ], - 279 | },], - 280 | }, - 281 | ] - 282 | ); - 283 | } - 284 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/extract_tokens.rs: --------------------------------------------------------------------------------- - 1 | use std::collections::HashMap; - | - 2 | use anyhow::Result; - 3 | use serde::Serialize; - 4 | use thiserror::Error; - | - 5 | use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar}; - 6 | use crate::{ - 7 | grammars::{ExternalToken, ReservedWordContext, Variable, VariableType}, - 8 | rules::{MetadataParams, Rule, Symbol, SymbolType}, - 9 | }; - | - 10 | pub type ExtractTokensResult = Result; - | - 11 | #[derive(Debug, Error, Serialize)] - 12 | pub enum ExtractTokensError { - 13 | #[error( - 14 | "The rule `{0}` contains an empty string. - | - 15 | Tree-sitter does not support syntactic rules that contain an empty string - 16 | unless they are used only as the grammar's start rule. - 17 | " - 18 | )] - 19 | EmptyString(String), - 20 | #[error("Rule '{0}' cannot be used as both an external token and a non-terminal rule")] - 21 | ExternalTokenNonTerminal(String), - 22 | #[error("Non-symbol rules cannot be used as external tokens")] - 23 | NonSymbolExternalToken, - 24 | #[error(transparent)] - 25 | WordToken(NonTerminalWordTokenError), - 26 | #[error("Reserved word '{0}' must be a token")] - 27 | NonTokenReservedWord(String), - 28 | } - | - 29 | #[derive(Debug, Error, Serialize)] - 30 | pub struct NonTerminalWordTokenError { - 31 | pub symbol_name: String, - 32 | pub conflicting_symbol_name: Option, - 33 | } - | - 34 | impl std::fmt::Display for NonTerminalWordTokenError { - 35 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - 36 | write!( - 37 | f, - 38 | "Non-terminal symbol '{}' cannot be used as the word token", - 39 | self.symbol_name - 40 | )?; - 41 | if let Some(conflicting_name) = &self.conflicting_symbol_name { - 42 | writeln!( - 43 | f, - 44 | ", because its rule is duplicated in '{conflicting_name}'", - 45 | ) - 46 | } else { - 47 | writeln!(f) - 48 | } - 49 | } - 50 | } - | - 51 | pub(super) fn extract_tokens( - 52 | mut grammar: InternedGrammar, - 53 | ) -> ExtractTokensResult<(ExtractedSyntaxGrammar, ExtractedLexicalGrammar)> { - 54 | let mut extractor = TokenExtractor { - 55 | current_variable_name: String::new(), - 56 | current_variable_token_count: 0, - 57 | is_first_rule: false, - 58 | extracted_variables: Vec::new(), - 59 | extracted_usage_counts: Vec::new(), - 60 | }; - | - 61 | for (i, variable) in &mut grammar.variables.iter_mut().enumerate() { - 62 | extractor.extract_tokens_in_variable(i == 0, variable)?; - 63 | } - | - 64 | for variable in &mut grammar.external_tokens { - 65 | extractor.extract_tokens_in_variable(false, variable)?; - 66 | } - | - 67 | let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len()); - 68 | for variable in extractor.extracted_variables { - 69 | lexical_variables.push(variable); - 70 | } - | - 71 | // If a variable's entire rule was extracted as a token and that token didn't - 72 | // appear within any other rule, then remove that variable from the syntax - 73 | // grammar, giving its name to the token in the lexical grammar. Any symbols - 74 | // that pointed to that variable will need to be updated to point to the - 75 | // variable in the lexical grammar. Symbols that pointed to later variables - 76 | // will need to have their indices decremented. - 77 | let mut variables = Vec::with_capacity(grammar.variables.len()); - 78 | let mut symbol_replacer = SymbolReplacer { - 79 | replacements: HashMap::new(), - 80 | }; - 81 | for (i, variable) in grammar.variables.into_iter().enumerate() { - 82 | if let Rule::Symbol(Symbol { - 83 | kind: SymbolType::Terminal, - 84 | index, - 85 | }) = variable.rule - 86 | { - 87 | if i > 0 && extractor.extracted_usage_counts[index] == 1 { - 88 | let lexical_variable = &mut lexical_variables[index]; - 89 | if lexical_variable.kind == VariableType::Auxiliary - 90 | || variable.kind != VariableType::Hidden - 91 | { - 92 | lexical_variable.kind = variable.kind; - 93 | lexical_variable.name = variable.name; - 94 | symbol_replacer.replacements.insert(i, index); - 95 | continue; - 96 | } - 97 | } - 98 | } - 99 | variables.push(variable); - 100 | } - | - 101 | for variable in &mut variables { - 102 | variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule); - 103 | } - | - 104 | let expected_conflicts = grammar - 105 | .expected_conflicts - 106 | .into_iter() - 107 | .map(|conflict| { - 108 | let mut result = conflict - 109 | .iter() - 110 | .map(|symbol| symbol_replacer.replace_symbol(*symbol)) - 111 | .collect::>(); - 112 | result.sort_unstable(); - 113 | result.dedup(); - 114 | result - 115 | }) - 116 | .collect(); - | - 117 | let supertype_symbols = grammar - 118 | .supertype_symbols - 119 | .into_iter() - 120 | .map(|symbol| symbol_replacer.replace_symbol(symbol)) - 121 | .collect(); - | - 122 | let variables_to_inline = grammar - 123 | .variables_to_inline - 124 | .into_iter() - 125 | .map(|symbol| symbol_replacer.replace_symbol(symbol)) - 126 | .collect(); - | - 127 | let mut separators = Vec::new(); - 128 | let mut extra_symbols = Vec::new(); - 129 | for rule in grammar.extra_symbols { - 130 | if let Rule::Symbol(symbol) = rule { - 131 | extra_symbols.push(symbol_replacer.replace_symbol(symbol)); - 132 | } else if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { - 133 | extra_symbols.push(Symbol::terminal(index)); - 134 | } else { - 135 | separators.push(rule); - 136 | } - 137 | } - | - 138 | let mut external_tokens = Vec::new(); - 139 | for external_token in grammar.external_tokens { - 140 | let rule = symbol_replacer.replace_symbols_in_rule(&external_token.rule); - 141 | if let Rule::Symbol(symbol) = rule { - 142 | if symbol.is_non_terminal() { - 143 | Err(ExtractTokensError::ExternalTokenNonTerminal( - 144 | variables[symbol.index].name.clone(), - 145 | ))?; - 146 | } - | - 147 | if symbol.is_external() { - 148 | external_tokens.push(ExternalToken { - 149 | name: external_token.name, - 150 | kind: external_token.kind, - 151 | corresponding_internal_token: None, - 152 | }); - 153 | } else { - 154 | external_tokens.push(ExternalToken { - 155 | name: lexical_variables[symbol.index].name.clone(), - 156 | kind: external_token.kind, - 157 | corresponding_internal_token: Some(symbol), - 158 | }); - 159 | } - 160 | } else { - 161 | Err(ExtractTokensError::NonSymbolExternalToken)?; - 162 | } - 163 | } - | - 164 | let word_token = if let Some(token) = grammar.word_token { - 165 | let token = symbol_replacer.replace_symbol(token); - 166 | if token.is_non_terminal() { - 167 | let word_token_variable = &variables[token.index]; - 168 | let conflicting_symbol_name = variables - 169 | .iter() - 170 | .enumerate() - 171 | .find(|(i, v)| *i != token.index && v.rule == word_token_variable.rule) - 172 | .map(|(_, v)| v.name.clone()); - | - 173 | Err(ExtractTokensError::WordToken(NonTerminalWordTokenError { - 174 | symbol_name: word_token_variable.name.clone(), - 175 | conflicting_symbol_name, - 176 | }))?; - 177 | } - 178 | Some(token) - 179 | } else { - 180 | None - 181 | }; - | - 182 | let mut reserved_word_contexts = Vec::with_capacity(grammar.reserved_word_sets.len()); - 183 | for reserved_word_context in grammar.reserved_word_sets { - 184 | let mut reserved_words = Vec::with_capacity(reserved_word_contexts.len()); - 185 | for reserved_rule in reserved_word_context.reserved_words { - 186 | if let Rule::Symbol(symbol) = reserved_rule { - 187 | reserved_words.push(symbol_replacer.replace_symbol(symbol)); - 188 | } else if let Some(index) = lexical_variables - 189 | .iter() - 190 | .position(|v| v.rule == reserved_rule) - 191 | { - 192 | reserved_words.push(Symbol::terminal(index)); - 193 | } else { - 194 | let rule = if let Rule::Metadata { rule, .. } = &reserved_rule { - 195 | rule.as_ref() - 196 | } else { - 197 | &reserved_rule - 198 | }; - 199 | let token_name = match rule { - 200 | Rule::String(s) => s.clone(), - 201 | Rule::Pattern(p, _) => p.clone(), - 202 | _ => "unknown".to_string(), - 203 | }; - 204 | Err(ExtractTokensError::NonTokenReservedWord(token_name))?; - 205 | } - 206 | } - 207 | reserved_word_contexts.push(ReservedWordContext { - 208 | name: reserved_word_context.name, - 209 | reserved_words, - 210 | }); - 211 | } - | - 212 | Ok(( - 213 | ExtractedSyntaxGrammar { - 214 | variables, - 215 | expected_conflicts, - 216 | extra_symbols, - 217 | variables_to_inline, - 218 | supertype_symbols, - 219 | external_tokens, - 220 | word_token, - 221 | precedence_orderings: grammar.precedence_orderings, - 222 | reserved_word_sets: reserved_word_contexts, - 223 | }, - 224 | ExtractedLexicalGrammar { - 225 | variables: lexical_variables, - 226 | separators, - 227 | }, - 228 | )) - 229 | } - | - 230 | struct TokenExtractor { - 231 | current_variable_name: String, - 232 | current_variable_token_count: usize, - 233 | is_first_rule: bool, - 234 | extracted_variables: Vec, - 235 | extracted_usage_counts: Vec, - 236 | } - | - 237 | struct SymbolReplacer { - 238 | replacements: HashMap, - 239 | } - | - 240 | impl TokenExtractor { - 241 | fn extract_tokens_in_variable( - 242 | &mut self, - 243 | is_first: bool, - 244 | variable: &mut Variable, - 245 | ) -> ExtractTokensResult<()> { - 246 | self.current_variable_name.clear(); - 247 | self.current_variable_name.push_str(&variable.name); - 248 | self.current_variable_token_count = 0; - 249 | self.is_first_rule = is_first; - 250 | variable.rule = self.extract_tokens_in_rule(&variable.rule)?; - 251 | Ok(()) - 252 | } - | - 253 | fn extract_tokens_in_rule(&mut self, input: &Rule) -> ExtractTokensResult { - 254 | match input { - 255 | Rule::String(name) => Ok(self.extract_token(input, Some(name))?.into()), - 256 | Rule::Pattern(..) => Ok(self.extract_token(input, None)?.into()), - 257 | Rule::Metadata { params, rule } => { - 258 | if params.is_token { - 259 | let mut params = params.clone(); - 260 | params.is_token = false; - | - 261 | let string_value = if let Rule::String(value) = rule.as_ref() { - 262 | Some(value) - 263 | } else { - 264 | None - 265 | }; - | - 266 | let rule_to_extract = if params == MetadataParams::default() { - 267 | rule.as_ref() - 268 | } else { - 269 | input - 270 | }; - | - 271 | Ok(self.extract_token(rule_to_extract, string_value)?.into()) - 272 | } else { - 273 | Ok(Rule::Metadata { - 274 | params: params.clone(), - 275 | rule: Box::new(self.extract_tokens_in_rule(rule)?), - 276 | }) - 277 | } - 278 | } - 279 | Rule::Repeat(content) => Ok(Rule::Repeat(Box::new( - 280 | self.extract_tokens_in_rule(content)?, - 281 | ))), - 282 | Rule::Seq(elements) => Ok(Rule::Seq( - 283 | elements - 284 | .iter() - 285 | .map(|e| self.extract_tokens_in_rule(e)) - 286 | .collect::>>()?, - 287 | )), - 288 | Rule::Choice(elements) => Ok(Rule::Choice( - 289 | elements - 290 | .iter() - 291 | .map(|e| self.extract_tokens_in_rule(e)) - 292 | .collect::>>()?, - 293 | )), - 294 | Rule::Reserved { rule, context_name } => Ok(Rule::Reserved { - 295 | rule: Box::new(self.extract_tokens_in_rule(rule)?), - 296 | context_name: context_name.clone(), - 297 | }), - 298 | _ => Ok(input.clone()), - 299 | } - 300 | } - | - 301 | fn extract_token( - 302 | &mut self, - 303 | rule: &Rule, - 304 | string_value: Option<&String>, - 305 | ) -> ExtractTokensResult { - 306 | for (i, variable) in self.extracted_variables.iter_mut().enumerate() { - 307 | if variable.rule == *rule { - 308 | self.extracted_usage_counts[i] += 1; - 309 | return Ok(Symbol::terminal(i)); - 310 | } - 311 | } - | - 312 | let index = self.extracted_variables.len(); - 313 | let variable = if let Some(string_value) = string_value { - 314 | if string_value.is_empty() && !self.is_first_rule { - 315 | Err(ExtractTokensError::EmptyString( - 316 | self.current_variable_name.clone(), - 317 | ))?; - 318 | } - 319 | Variable { - 320 | name: string_value.clone(), - 321 | kind: VariableType::Anonymous, - 322 | rule: rule.clone(), - 323 | } - 324 | } else { - 325 | self.current_variable_token_count += 1; - 326 | Variable { - 327 | name: format!( - 328 | "{}_token{}", - 329 | self.current_variable_name, self.current_variable_token_count - 330 | ), - 331 | kind: VariableType::Auxiliary, - 332 | rule: rule.clone(), - 333 | } - 334 | }; - | - 335 | self.extracted_variables.push(variable); - 336 | self.extracted_usage_counts.push(1); - 337 | Ok(Symbol::terminal(index)) - 338 | } - 339 | } - | - 340 | impl SymbolReplacer { - 341 | fn replace_symbols_in_rule(&mut self, rule: &Rule) -> Rule { - 342 | match rule { - 343 | Rule::Symbol(symbol) => self.replace_symbol(*symbol).into(), - 344 | Rule::Choice(elements) => Rule::Choice( - 345 | elements - 346 | .iter() - 347 | .map(|e| self.replace_symbols_in_rule(e)) - 348 | .collect(), - 349 | ), - 350 | Rule::Seq(elements) => Rule::Seq( - 351 | elements - 352 | .iter() - 353 | .map(|e| self.replace_symbols_in_rule(e)) - 354 | .collect(), - 355 | ), - 356 | Rule::Repeat(content) => Rule::Repeat(Box::new(self.replace_symbols_in_rule(content))), - 357 | Rule::Metadata { rule, params } => Rule::Metadata { - 358 | params: params.clone(), - 359 | rule: Box::new(self.replace_symbols_in_rule(rule)), - 360 | }, - 361 | Rule::Reserved { rule, context_name } => Rule::Reserved { - 362 | rule: Box::new(self.replace_symbols_in_rule(rule)), - 363 | context_name: context_name.clone(), - 364 | }, - 365 | _ => rule.clone(), - 366 | } - 367 | } - | - 368 | fn replace_symbol(&self, symbol: Symbol) -> Symbol { - 369 | if !symbol.is_non_terminal() { - 370 | return symbol; - 371 | } - | - 372 | if let Some(replacement) = self.replacements.get(&symbol.index) { - 373 | return Symbol::terminal(*replacement); - 374 | } - | - 375 | let mut adjusted_index = symbol.index; - 376 | for replaced_index in self.replacements.keys() { - 377 | if *replaced_index < symbol.index { - 378 | adjusted_index -= 1; - 379 | } - 380 | } - | - 381 | Symbol::non_terminal(adjusted_index) - 382 | } - 383 | } - | - 384 | #[cfg(test)] - 385 | mod test { - 386 | use super::*; - | - 387 | #[test] - 388 | fn test_extraction() { - 389 | let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![ - 390 | Variable::named( - 391 | "rule_0", - 392 | Rule::repeat(Rule::seq(vec![ - 393 | Rule::string("a"), - 394 | Rule::pattern("b", ""), - 395 | Rule::choice(vec![ - 396 | Rule::non_terminal(1), - 397 | Rule::non_terminal(2), - 398 | Rule::token(Rule::repeat(Rule::choice(vec![ - 399 | Rule::string("c"), - 400 | Rule::string("d"), - 401 | ]))), - 402 | ]), - 403 | ])), - 404 | ), - 405 | Variable::named("rule_1", Rule::pattern("e", "")), - 406 | Variable::named("rule_2", Rule::pattern("b", "")), - 407 | Variable::named( - 408 | "rule_3", - 409 | Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]), - 410 | ), - 411 | ])) - 412 | .unwrap(); - | - 413 | assert_eq!( - 414 | syntax_grammar.variables, - 415 | vec![ - 416 | Variable::named( - 417 | "rule_0", - 418 | Rule::repeat(Rule::seq(vec![ - 419 | // The string "a" was replaced by a symbol referencing the lexical grammar - 420 | Rule::terminal(0), - 421 | // The pattern "b" was replaced by a symbol referencing the lexical grammar - 422 | Rule::terminal(1), - 423 | Rule::choice(vec![ - 424 | // The symbol referencing `rule_1` was replaced by a symbol referencing - 425 | // the lexical grammar. - 426 | Rule::terminal(3), - 427 | // The symbol referencing `rule_2` had its index decremented because - 428 | // `rule_1` was moved to the lexical grammar. - 429 | Rule::non_terminal(1), - 430 | // The rule wrapped in `token` was replaced by a symbol referencing - 431 | // the lexical grammar. - 432 | Rule::terminal(2), - 433 | ]) - 434 | ])) - 435 | ), - 436 | // The pattern "e" was only used in once place: as the definition of `rule_1`, - 437 | // so that rule was moved to the lexical grammar. The pattern "b" appeared in - 438 | // two places, so it was not moved into the lexical grammar. - 439 | Variable::named("rule_2", Rule::terminal(1)), - 440 | Variable::named( - 441 | "rule_3", - 442 | Rule::seq(vec![Rule::non_terminal(1), Rule::Blank,]) - 443 | ), - 444 | ] - 445 | ); - | - 446 | assert_eq!( - 447 | lexical_grammar.variables, - 448 | vec![ - 449 | Variable::anonymous("a", Rule::string("a")), - 450 | Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")), - 451 | Variable::auxiliary( - 452 | "rule_0_token2", - 453 | Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),])) - 454 | ), - 455 | Variable::named("rule_1", Rule::pattern("e", "")), - 456 | ] - 457 | ); - 458 | } - | - 459 | #[test] - 460 | fn test_start_rule_is_token() { - 461 | let (syntax_grammar, lexical_grammar) = - 462 | extract_tokens(build_grammar(vec![Variable::named( - 463 | "rule_0", - 464 | Rule::string("hello"), - 465 | )])) - 466 | .unwrap(); - | - 467 | assert_eq!( - 468 | syntax_grammar.variables, - 469 | vec![Variable::named("rule_0", Rule::terminal(0)),] - 470 | ); - 471 | assert_eq!( - 472 | lexical_grammar.variables, - 473 | vec![Variable::anonymous("hello", Rule::string("hello")),] - 474 | ); - 475 | } - | - 476 | #[test] - 477 | fn test_extracting_extra_symbols() { - 478 | let mut grammar = build_grammar(vec![ - 479 | Variable::named("rule_0", Rule::string("x")), - 480 | Variable::named("comment", Rule::pattern("//.*", "")), - 481 | ]); - 482 | grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)]; - | - 483 | let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap(); - 484 | assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]); - 485 | assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]); - 486 | } - | - 487 | #[test] - 488 | fn test_extract_externals() { - 489 | let mut grammar = build_grammar(vec![ - 490 | Variable::named( - 491 | "rule_0", - 492 | Rule::seq(vec![ - 493 | Rule::external(0), - 494 | Rule::string("a"), - 495 | Rule::non_terminal(1), - 496 | Rule::non_terminal(2), - 497 | ]), - 498 | ), - 499 | Variable::named("rule_1", Rule::string("b")), - 500 | Variable::named("rule_2", Rule::string("c")), - 501 | ]); - 502 | grammar.external_tokens = vec![ - 503 | Variable::named("external_0", Rule::external(0)), - 504 | Variable::anonymous("a", Rule::string("a")), - 505 | Variable::named("rule_2", Rule::non_terminal(2)), - 506 | ]; - | - 507 | let (syntax_grammar, _) = extract_tokens(grammar).unwrap(); - | - 508 | assert_eq!( - 509 | syntax_grammar.external_tokens, - 510 | vec![ - 511 | ExternalToken { - 512 | name: "external_0".to_string(), - 513 | kind: VariableType::Named, - 514 | corresponding_internal_token: None, - 515 | }, - 516 | ExternalToken { - 517 | name: "a".to_string(), - 518 | kind: VariableType::Anonymous, - 519 | corresponding_internal_token: Some(Symbol::terminal(0)), - 520 | }, - 521 | ExternalToken { - 522 | name: "rule_2".to_string(), - 523 | kind: VariableType::Named, - 524 | corresponding_internal_token: Some(Symbol::terminal(2)), - 525 | }, - 526 | ] - 527 | ); - 528 | } - | - 529 | #[test] - 530 | fn test_error_on_external_with_same_name_as_non_terminal() { - 531 | let mut grammar = build_grammar(vec![ - 532 | Variable::named( - 533 | "rule_0", - 534 | Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]), - 535 | ), - 536 | Variable::named( - 537 | "rule_1", - 538 | Rule::seq(vec![Rule::non_terminal(2), Rule::non_terminal(2)]), - 539 | ), - 540 | Variable::named("rule_2", Rule::string("a")), - 541 | ]); - 542 | grammar.external_tokens = vec![Variable::named("rule_1", Rule::non_terminal(1))]; - | - 543 | let result = extract_tokens(grammar); - 544 | assert!(result.is_err(), "Expected an error but got no error"); - 545 | let err = result.err().unwrap(); - 546 | assert_eq!( - 547 | err.to_string(), - 548 | "Rule 'rule_1' cannot be used as both an external token and a non-terminal rule" - 549 | ); - 550 | } - | - 551 | #[test] - 552 | fn test_extraction_on_hidden_terminal() { - 553 | let (syntax_grammar, lexical_grammar) = extract_tokens(build_grammar(vec![ - 554 | Variable::named("rule_0", Rule::non_terminal(1)), - 555 | Variable::hidden("_rule_1", Rule::string("a")), - 556 | ])) - 557 | .unwrap(); - | - 558 | // The rule `_rule_1` should not "absorb" the - 559 | // terminal "a", since it is hidden, - 560 | // so we expect two variables still - 561 | assert_eq!( - 562 | syntax_grammar.variables, - 563 | vec![ - 564 | Variable::named("rule_0", Rule::non_terminal(1)), - 565 | Variable::hidden("_rule_1", Rule::terminal(0)), - 566 | ] - 567 | ); - | - 568 | // We should not have a hidden rule in our lexical grammar, only the terminal "a" - 569 | assert_eq!( - 570 | lexical_grammar.variables, - 571 | vec![Variable::anonymous("a", Rule::string("a"))] - 572 | ); - 573 | } - | - 574 | #[test] - 575 | fn test_extraction_with_empty_string() { - 576 | assert!(extract_tokens(build_grammar(vec![ - 577 | Variable::named("rule_0", Rule::non_terminal(1)), - 578 | Variable::hidden("_rule_1", Rule::string("")), - 579 | ])) - 580 | .is_err()); - 581 | } - | - 582 | fn build_grammar(variables: Vec) -> InternedGrammar { - 583 | InternedGrammar { - 584 | variables, - 585 | ..Default::default() - 586 | } - 587 | } - 588 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/flatten_grammar.rs: --------------------------------------------------------------------------------- - 1 | use std::collections::HashMap; - | - 2 | use anyhow::Result; - 3 | use serde::Serialize; - 4 | use thiserror::Error; - | - 5 | use super::ExtractedSyntaxGrammar; - 6 | use crate::{ - 7 | grammars::{ - 8 | Production, ProductionStep, ReservedWordSetId, SyntaxGrammar, SyntaxVariable, Variable, - 9 | }, - 10 | rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet}, - 11 | }; - | - 12 | pub type FlattenGrammarResult = Result; - | - 13 | #[derive(Debug, Error, Serialize)] - 14 | pub enum FlattenGrammarError { - 15 | #[error("No such reserved word set: {0}")] - 16 | NoReservedWordSet(String), - 17 | #[error( - 18 | "The rule `{0}` matches the empty string. - | - 19 | Tree-sitter does not support syntactic rules that match the empty string - 20 | unless they are used only as the grammar's start rule. - 21 | " - 22 | )] - 23 | EmptyString(String), - 24 | #[error("Rule `{0}` cannot be inlined because it contains a reference to itself")] - 25 | RecursiveInline(String), - 26 | } - | - 27 | struct RuleFlattener { - 28 | production: Production, - 29 | reserved_word_set_ids: HashMap, - 30 | precedence_stack: Vec, - 31 | associativity_stack: Vec, - 32 | reserved_word_stack: Vec, - 33 | alias_stack: Vec, - 34 | field_name_stack: Vec, - 35 | } - | - 36 | impl RuleFlattener { - 37 | const fn new(reserved_word_set_ids: HashMap) -> Self { - 38 | Self { - 39 | production: Production { - 40 | steps: Vec::new(), - 41 | dynamic_precedence: 0, - 42 | }, - 43 | reserved_word_set_ids, - 44 | precedence_stack: Vec::new(), - 45 | associativity_stack: Vec::new(), - 46 | reserved_word_stack: Vec::new(), - 47 | alias_stack: Vec::new(), - 48 | field_name_stack: Vec::new(), - 49 | } - 50 | } - | - 51 | fn flatten_variable(&mut self, variable: Variable) -> FlattenGrammarResult { - 52 | let choices = extract_choices(variable.rule); - 53 | let mut productions = Vec::with_capacity(choices.len()); - 54 | for rule in choices { - 55 | let production = self.flatten_rule(rule)?; - 56 | if !productions.contains(&production) { - 57 | productions.push(production); - 58 | } - 59 | } - 60 | Ok(SyntaxVariable { - 61 | name: variable.name, - 62 | kind: variable.kind, - 63 | productions, - 64 | }) - 65 | } - | - 66 | fn flatten_rule(&mut self, rule: Rule) -> FlattenGrammarResult { - 67 | self.production = Production::default(); - 68 | self.alias_stack.clear(); - 69 | self.reserved_word_stack.clear(); - 70 | self.precedence_stack.clear(); - 71 | self.associativity_stack.clear(); - 72 | self.field_name_stack.clear(); - 73 | self.apply(rule, true)?; - 74 | Ok(self.production.clone()) - 75 | } - | - 76 | fn apply(&mut self, rule: Rule, at_end: bool) -> FlattenGrammarResult { - 77 | match rule { - 78 | Rule::Seq(members) => { - 79 | let mut result = false; - 80 | let last_index = members.len() - 1; - 81 | for (i, member) in members.into_iter().enumerate() { - 82 | result |= self.apply(member, i == last_index && at_end)?; - 83 | } - 84 | Ok(result) - 85 | } - 86 | Rule::Metadata { rule, params } => { - 87 | let mut has_precedence = false; - 88 | if !params.precedence.is_none() { - 89 | has_precedence = true; - 90 | self.precedence_stack.push(params.precedence); - 91 | } - | - 92 | let mut has_associativity = false; - 93 | if let Some(associativity) = params.associativity { - 94 | has_associativity = true; - 95 | self.associativity_stack.push(associativity); - 96 | } - | - 97 | let mut has_alias = false; - 98 | if let Some(alias) = params.alias { - 99 | has_alias = true; - 100 | self.alias_stack.push(alias); - 101 | } - | - 102 | let mut has_field_name = false; - 103 | if let Some(field_name) = params.field_name { - 104 | has_field_name = true; - 105 | self.field_name_stack.push(field_name); - 106 | } - | - 107 | if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() { - 108 | self.production.dynamic_precedence = params.dynamic_precedence; - 109 | } - | - 110 | let did_push = self.apply(*rule, at_end)?; - | - 111 | if has_precedence { - 112 | self.precedence_stack.pop(); - 113 | if did_push && !at_end { - 114 | self.production.steps.last_mut().unwrap().precedence = self - 115 | .precedence_stack - 116 | .last() - 117 | .cloned() - 118 | .unwrap_or(Precedence::None); - 119 | } - 120 | } - | - 121 | if has_associativity { - 122 | self.associativity_stack.pop(); - 123 | if did_push && !at_end { - 124 | self.production.steps.last_mut().unwrap().associativity = - 125 | self.associativity_stack.last().copied(); - 126 | } - 127 | } - | - 128 | if has_alias { - 129 | self.alias_stack.pop(); - 130 | } - | - 131 | if has_field_name { - 132 | self.field_name_stack.pop(); - 133 | } - | - 134 | Ok(did_push) - 135 | } - 136 | Rule::Reserved { rule, context_name } => { - 137 | self.reserved_word_stack.push( - 138 | self.reserved_word_set_ids - 139 | .get(&context_name) - 140 | .copied() - 141 | .ok_or_else(|| { - 142 | FlattenGrammarError::NoReservedWordSet(context_name.clone()) - 143 | })?, - 144 | ); - 145 | let did_push = self.apply(*rule, at_end)?; - 146 | self.reserved_word_stack.pop(); - 147 | Ok(did_push) - 148 | } - 149 | Rule::Symbol(symbol) => { - 150 | self.production.steps.push(ProductionStep { - 151 | symbol, - 152 | precedence: self - 153 | .precedence_stack - 154 | .last() - 155 | .cloned() - 156 | .unwrap_or(Precedence::None), - 157 | associativity: self.associativity_stack.last().copied(), - 158 | reserved_word_set_id: self - 159 | .reserved_word_stack - 160 | .last() - 161 | .copied() - 162 | .unwrap_or(ReservedWordSetId::default()), - 163 | alias: self.alias_stack.last().cloned(), - 164 | field_name: self.field_name_stack.last().cloned(), - 165 | }); - 166 | Ok(true) - 167 | } - 168 | _ => Ok(false), - 169 | } - 170 | } - 171 | } - | - 172 | fn extract_choices(rule: Rule) -> Vec { - 173 | match rule { - 174 | Rule::Seq(elements) => { - 175 | let mut result = vec![Rule::Blank]; - 176 | for element in elements { - 177 | let extraction = extract_choices(element); - 178 | let mut next_result = Vec::with_capacity(result.len()); - 179 | for entry in result { - 180 | for extraction_entry in &extraction { - 181 | next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()])); - 182 | } - 183 | } - 184 | result = next_result; - 185 | } - 186 | result - 187 | } - 188 | Rule::Choice(elements) => { - 189 | let mut result = Vec::with_capacity(elements.len()); - 190 | for element in elements { - 191 | for rule in extract_choices(element) { - 192 | result.push(rule); - 193 | } - 194 | } - 195 | result - 196 | } - 197 | Rule::Metadata { rule, params } => extract_choices(*rule) - 198 | .into_iter() - 199 | .map(|rule| Rule::Metadata { - 200 | rule: Box::new(rule), - 201 | params: params.clone(), - 202 | }) - 203 | .collect(), - 204 | Rule::Reserved { rule, context_name } => extract_choices(*rule) - 205 | .into_iter() - 206 | .map(|rule| Rule::Reserved { - 207 | rule: Box::new(rule), - 208 | context_name: context_name.clone(), - 209 | }) - 210 | .collect(), - 211 | _ => vec![rule], - 212 | } - 213 | } - | - 214 | fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { - 215 | for variable in variables { - 216 | for production in &variable.productions { - 217 | for step in &production.steps { - 218 | if step.symbol == symbol { - 219 | return true; - 220 | } - 221 | } - 222 | } - 223 | } - 224 | false - 225 | } - | - 226 | pub(super) fn flatten_grammar( - 227 | grammar: ExtractedSyntaxGrammar, - 228 | ) -> FlattenGrammarResult { - 229 | let mut reserved_word_set_ids_by_name = HashMap::new(); - 230 | for (ix, set) in grammar.reserved_word_sets.iter().enumerate() { - 231 | reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix)); - 232 | } - | - 233 | let mut flattener = RuleFlattener::new(reserved_word_set_ids_by_name); - 234 | let variables = grammar - 235 | .variables - 236 | .into_iter() - 237 | .map(|variable| flattener.flatten_variable(variable)) - 238 | .collect::>>()?; - | - 239 | for (i, variable) in variables.iter().enumerate() { - 240 | let symbol = Symbol::non_terminal(i); - 241 | let used = symbol_is_used(&variables, symbol); - | - 242 | for production in &variable.productions { - 243 | if used && production.steps.is_empty() { - 244 | Err(FlattenGrammarError::EmptyString(variable.name.clone()))?; - 245 | } - | - 246 | if grammar.variables_to_inline.contains(&symbol) - 247 | && production.steps.iter().any(|step| step.symbol == symbol) - 248 | { - 249 | Err(FlattenGrammarError::RecursiveInline(variable.name.clone()))?; - 250 | } - 251 | } - 252 | } - 253 | let mut reserved_word_sets = grammar - 254 | .reserved_word_sets - 255 | .into_iter() - 256 | .map(|set| set.reserved_words.into_iter().collect()) - 257 | .collect::>(); - | - 258 | // If no default reserved word set is specified, there are no reserved words. - 259 | if reserved_word_sets.is_empty() { - 260 | reserved_word_sets.push(TokenSet::default()); - 261 | } - | - 262 | Ok(SyntaxGrammar { - 263 | extra_symbols: grammar.extra_symbols, - 264 | expected_conflicts: grammar.expected_conflicts, - 265 | variables_to_inline: grammar.variables_to_inline, - 266 | precedence_orderings: grammar.precedence_orderings, - 267 | external_tokens: grammar.external_tokens, - 268 | supertype_symbols: grammar.supertype_symbols, - 269 | word_token: grammar.word_token, - 270 | reserved_word_sets, - 271 | variables, - 272 | }) - 273 | } - | - 274 | #[cfg(test)] - 275 | mod tests { - 276 | use super::*; - 277 | use crate::grammars::VariableType; - | - 278 | #[test] - 279 | fn test_flatten_grammar() { - 280 | let mut flattener = RuleFlattener::new(HashMap::default()); - 281 | let result = flattener - 282 | .flatten_variable(Variable { - 283 | name: "test".to_string(), - 284 | kind: VariableType::Named, - 285 | rule: Rule::seq(vec![ - 286 | Rule::non_terminal(1), - 287 | Rule::prec_left( - 288 | Precedence::Integer(101), - 289 | Rule::seq(vec![ - 290 | Rule::non_terminal(2), - 291 | Rule::choice(vec![ - 292 | Rule::prec_right( - 293 | Precedence::Integer(102), - 294 | Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]), - 295 | ), - 296 | Rule::non_terminal(5), - 297 | ]), - 298 | Rule::non_terminal(6), - 299 | ]), - 300 | ), - 301 | Rule::non_terminal(7), - 302 | ]), - 303 | }) - 304 | .unwrap(); - | - 305 | assert_eq!( - 306 | result.productions, - 307 | vec![ - 308 | Production { - 309 | dynamic_precedence: 0, - 310 | steps: vec![ - 311 | ProductionStep::new(Symbol::non_terminal(1)), - 312 | ProductionStep::new(Symbol::non_terminal(2)) - 313 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)), - 314 | ProductionStep::new(Symbol::non_terminal(3)) - 315 | .with_prec(Precedence::Integer(102), Some(Associativity::Right)), - 316 | ProductionStep::new(Symbol::non_terminal(4)) - 317 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)), - 318 | ProductionStep::new(Symbol::non_terminal(6)), - 319 | ProductionStep::new(Symbol::non_terminal(7)), - 320 | ] - 321 | }, - 322 | Production { - 323 | dynamic_precedence: 0, - 324 | steps: vec![ - 325 | ProductionStep::new(Symbol::non_terminal(1)), - 326 | ProductionStep::new(Symbol::non_terminal(2)) - 327 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)), - 328 | ProductionStep::new(Symbol::non_terminal(5)) - 329 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)), - 330 | ProductionStep::new(Symbol::non_terminal(6)), - 331 | ProductionStep::new(Symbol::non_terminal(7)), - 332 | ] - 333 | }, - 334 | ] - 335 | ); - 336 | } - | - 337 | #[test] - 338 | fn test_flatten_grammar_with_maximum_dynamic_precedence() { - 339 | let mut flattener = RuleFlattener::new(HashMap::default()); - 340 | let result = flattener - 341 | .flatten_variable(Variable { - 342 | name: "test".to_string(), - 343 | kind: VariableType::Named, - 344 | rule: Rule::seq(vec![ - 345 | Rule::non_terminal(1), - 346 | Rule::prec_dynamic( - 347 | 101, - 348 | Rule::seq(vec![ - 349 | Rule::non_terminal(2), - 350 | Rule::choice(vec![ - 351 | Rule::prec_dynamic( - 352 | 102, - 353 | Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]), - 354 | ), - 355 | Rule::non_terminal(5), - 356 | ]), - 357 | Rule::non_terminal(6), - 358 | ]), - 359 | ), - 360 | Rule::non_terminal(7), - 361 | ]), - 362 | }) - 363 | .unwrap(); - | - 364 | assert_eq!( - 365 | result.productions, - 366 | vec![ - 367 | Production { - 368 | dynamic_precedence: 102, - 369 | steps: vec![ - 370 | ProductionStep::new(Symbol::non_terminal(1)), - 371 | ProductionStep::new(Symbol::non_terminal(2)), - 372 | ProductionStep::new(Symbol::non_terminal(3)), - 373 | ProductionStep::new(Symbol::non_terminal(4)), - 374 | ProductionStep::new(Symbol::non_terminal(6)), - 375 | ProductionStep::new(Symbol::non_terminal(7)), - 376 | ], - 377 | }, - 378 | Production { - 379 | dynamic_precedence: 101, - 380 | steps: vec![ - 381 | ProductionStep::new(Symbol::non_terminal(1)), - 382 | ProductionStep::new(Symbol::non_terminal(2)), - 383 | ProductionStep::new(Symbol::non_terminal(5)), - 384 | ProductionStep::new(Symbol::non_terminal(6)), - 385 | ProductionStep::new(Symbol::non_terminal(7)), - 386 | ], - 387 | }, - 388 | ] - 389 | ); - 390 | } - | - 391 | #[test] - 392 | fn test_flatten_grammar_with_final_precedence() { - 393 | let mut flattener = RuleFlattener::new(HashMap::default()); - 394 | let result = flattener - 395 | .flatten_variable(Variable { - 396 | name: "test".to_string(), - 397 | kind: VariableType::Named, - 398 | rule: Rule::prec_left( - 399 | Precedence::Integer(101), - 400 | Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]), - 401 | ), - 402 | }) - 403 | .unwrap(); - | - 404 | assert_eq!( - 405 | result.productions, - 406 | vec![Production { - 407 | dynamic_precedence: 0, - 408 | steps: vec![ - 409 | ProductionStep::new(Symbol::non_terminal(1)) - 410 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)), - 411 | ProductionStep::new(Symbol::non_terminal(2)) - 412 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)), - 413 | ] - 414 | }] - 415 | ); - | - 416 | let result = flattener - 417 | .flatten_variable(Variable { - 418 | name: "test".to_string(), - 419 | kind: VariableType::Named, - 420 | rule: Rule::prec_left( - 421 | Precedence::Integer(101), - 422 | Rule::seq(vec![Rule::non_terminal(1)]), - 423 | ), - 424 | }) - 425 | .unwrap(); - | - 426 | assert_eq!( - 427 | result.productions, - 428 | vec![Production { - 429 | dynamic_precedence: 0, - 430 | steps: vec![ProductionStep::new(Symbol::non_terminal(1)) - 431 | .with_prec(Precedence::Integer(101), Some(Associativity::Left)),] - 432 | }] - 433 | ); - 434 | } - | - 435 | #[test] - 436 | fn test_flatten_grammar_with_field_names() { - 437 | let mut flattener = RuleFlattener::new(HashMap::default()); - 438 | let result = flattener - 439 | .flatten_variable(Variable { - 440 | name: "test".to_string(), - 441 | kind: VariableType::Named, - 442 | rule: Rule::seq(vec![ - 443 | Rule::field("first-thing".to_string(), Rule::terminal(1)), - 444 | Rule::terminal(2), - 445 | Rule::choice(vec![ - 446 | Rule::Blank, - 447 | Rule::field("second-thing".to_string(), Rule::terminal(3)), - 448 | ]), - 449 | ]), - 450 | }) - 451 | .unwrap(); - | - 452 | assert_eq!( - 453 | result.productions, - 454 | vec![ - 455 | Production { - 456 | dynamic_precedence: 0, - 457 | steps: vec![ - 458 | ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"), - 459 | ProductionStep::new(Symbol::terminal(2)) - 460 | ] - 461 | }, - 462 | Production { - 463 | dynamic_precedence: 0, - 464 | steps: vec![ - 465 | ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"), - 466 | ProductionStep::new(Symbol::terminal(2)), - 467 | ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"), - 468 | ] - 469 | }, - 470 | ] - 471 | ); - 472 | } - | - 473 | #[test] - 474 | fn test_flatten_grammar_with_recursive_inline_variable() { - 475 | let result = flatten_grammar(ExtractedSyntaxGrammar { - 476 | extra_symbols: Vec::new(), - 477 | expected_conflicts: Vec::new(), - 478 | variables_to_inline: vec![Symbol::non_terminal(0)], - 479 | precedence_orderings: Vec::new(), - 480 | external_tokens: Vec::new(), - 481 | supertype_symbols: Vec::new(), - 482 | word_token: None, - 483 | reserved_word_sets: Vec::new(), - 484 | variables: vec![Variable { - 485 | name: "test".to_string(), - 486 | kind: VariableType::Named, - 487 | rule: Rule::seq(vec![ - 488 | Rule::non_terminal(0), - 489 | Rule::non_terminal(1), - 490 | Rule::non_terminal(2), - 491 | ]), - 492 | }], - 493 | }); - | - 494 | assert_eq!( - 495 | result.unwrap_err().to_string(), - 496 | "Rule `test` cannot be inlined because it contains a reference to itself", - 497 | ); - 498 | } - 499 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/intern_symbols.rs: --------------------------------------------------------------------------------- - 1 | use anyhow::Result; - 2 | use log::warn; - 3 | use serde::Serialize; - 4 | use thiserror::Error; - | - 5 | use super::InternedGrammar; - 6 | use crate::{ - 7 | grammars::{InputGrammar, ReservedWordContext, Variable, VariableType}, - 8 | rules::{Rule, Symbol}, - 9 | }; - | - 10 | pub type InternSymbolsResult = Result; - | - 11 | #[derive(Debug, Error, Serialize)] - 12 | pub enum InternSymbolsError { - 13 | #[error("A grammar's start rule must be visible.")] - 14 | HiddenStartRule, - 15 | #[error("Undefined symbol `{0}`")] - 16 | Undefined(String), - 17 | #[error("Undefined symbol `{0}` in grammar's supertypes array")] - 18 | UndefinedSupertype(String), - 19 | #[error("Undefined symbol `{0}` in grammar's conflicts array")] - 20 | UndefinedConflict(String), - 21 | #[error("Undefined symbol `{0}` as grammar's word token")] - 22 | UndefinedWordToken(String), - 23 | } - | - 24 | pub(super) fn intern_symbols(grammar: &InputGrammar) -> InternSymbolsResult { - 25 | let interner = Interner { grammar }; - | - 26 | if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden { - 27 | Err(InternSymbolsError::HiddenStartRule)?; - 28 | } - | - 29 | let mut variables = Vec::with_capacity(grammar.variables.len()); - 30 | for variable in &grammar.variables { - 31 | variables.push(Variable { - 32 | name: variable.name.clone(), - 33 | kind: variable_type_for_name(&variable.name), - 34 | rule: interner.intern_rule(&variable.rule, Some(&variable.name))?, - 35 | }); - 36 | } - | - 37 | let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len()); - 38 | for external_token in &grammar.external_tokens { - 39 | let rule = interner.intern_rule(external_token, None)?; - 40 | let (name, kind) = if let Rule::NamedSymbol(name) = external_token { - 41 | (name.clone(), variable_type_for_name(name)) - 42 | } else { - 43 | (String::new(), VariableType::Anonymous) - 44 | }; - 45 | external_tokens.push(Variable { name, kind, rule }); - 46 | } - | - 47 | let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len()); - 48 | for extra_token in &grammar.extra_symbols { - 49 | extra_symbols.push(interner.intern_rule(extra_token, None)?); - 50 | } - | - 51 | let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); - 52 | for supertype_symbol_name in &grammar.supertype_symbols { - 53 | supertype_symbols.push(interner.intern_name(supertype_symbol_name).ok_or_else(|| { - 54 | InternSymbolsError::UndefinedSupertype(supertype_symbol_name.clone()) - 55 | })?); - 56 | } - | - 57 | let mut reserved_words = Vec::with_capacity(grammar.reserved_words.len()); - 58 | for reserved_word_set in &grammar.reserved_words { - 59 | let mut interned_set = Vec::with_capacity(reserved_word_set.reserved_words.len()); - 60 | for rule in &reserved_word_set.reserved_words { - 61 | interned_set.push(interner.intern_rule(rule, None)?); - 62 | } - 63 | reserved_words.push(ReservedWordContext { - 64 | name: reserved_word_set.name.clone(), - 65 | reserved_words: interned_set, - 66 | }); - 67 | } - | - 68 | let mut expected_conflicts = Vec::with_capacity(grammar.expected_conflicts.len()); - 69 | for conflict in &grammar.expected_conflicts { - 70 | let mut interned_conflict = Vec::with_capacity(conflict.len()); - 71 | for name in conflict { - 72 | interned_conflict.push( - 73 | interner - 74 | .intern_name(name) - 75 | .ok_or_else(|| InternSymbolsError::UndefinedConflict(name.clone()))?, - 76 | ); - 77 | } - 78 | expected_conflicts.push(interned_conflict); - 79 | } - | - 80 | let mut variables_to_inline = Vec::new(); - 81 | for name in &grammar.variables_to_inline { - 82 | if let Some(symbol) = interner.intern_name(name) { - 83 | variables_to_inline.push(symbol); - 84 | } - 85 | } - | - 86 | let word_token = if let Some(name) = grammar.word_token.as_ref() { - 87 | Some( - 88 | interner - 89 | .intern_name(name) - 90 | .ok_or_else(|| InternSymbolsError::UndefinedWordToken(name.clone()))?, - 91 | ) - 92 | } else { - 93 | None - 94 | }; - | - 95 | for (i, variable) in variables.iter_mut().enumerate() { - 96 | if supertype_symbols.contains(&Symbol::non_terminal(i)) { - 97 | variable.kind = VariableType::Hidden; - 98 | } - 99 | } - | - 100 | Ok(InternedGrammar { - 101 | variables, - 102 | external_tokens, - 103 | extra_symbols, - 104 | expected_conflicts, - 105 | variables_to_inline, - 106 | supertype_symbols, - 107 | word_token, - 108 | precedence_orderings: grammar.precedence_orderings.clone(), - 109 | reserved_word_sets: reserved_words, - 110 | }) - 111 | } - | - 112 | struct Interner<'a> { - 113 | grammar: &'a InputGrammar, - 114 | } - | - 115 | impl Interner<'_> { - 116 | fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult { - 117 | match rule { - 118 | Rule::Choice(elements) => { - 119 | self.check_single(elements, name, "choice"); - 120 | let mut result = Vec::with_capacity(elements.len()); - 121 | for element in elements { - 122 | result.push(self.intern_rule(element, name)?); - 123 | } - 124 | Ok(Rule::Choice(result)) - 125 | } - 126 | Rule::Seq(elements) => { - 127 | self.check_single(elements, name, "seq"); - 128 | let mut result = Vec::with_capacity(elements.len()); - 129 | for element in elements { - 130 | result.push(self.intern_rule(element, name)?); - 131 | } - 132 | Ok(Rule::Seq(result)) - 133 | } - 134 | Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content, name)?))), - 135 | Rule::Metadata { rule, params } => Ok(Rule::Metadata { - 136 | rule: Box::new(self.intern_rule(rule, name)?), - 137 | params: params.clone(), - 138 | }), - 139 | Rule::Reserved { rule, context_name } => Ok(Rule::Reserved { - 140 | rule: Box::new(self.intern_rule(rule, name)?), - 141 | context_name: context_name.clone(), - 142 | }), - 143 | Rule::NamedSymbol(name) => self.intern_name(name).map_or_else( - 144 | || Err(InternSymbolsError::Undefined(name.clone())), - 145 | |symbol| Ok(Rule::Symbol(symbol)), - 146 | ), - 147 | _ => Ok(rule.clone()), - 148 | } - 149 | } - | - 150 | fn intern_name(&self, symbol: &str) -> Option { - 151 | for (i, variable) in self.grammar.variables.iter().enumerate() { - 152 | if variable.name == symbol { - 153 | return Some(Symbol::non_terminal(i)); - 154 | } - 155 | } - | - 156 | for (i, external_token) in self.grammar.external_tokens.iter().enumerate() { - 157 | if let Rule::NamedSymbol(name) = external_token { - 158 | if name == symbol { - 159 | return Some(Symbol::external(i)); - 160 | } - 161 | } - 162 | } - | - 163 | None - 164 | } - | - 165 | // In the case of a seq or choice rule of 1 element in a hidden rule, weird - 166 | // inconsistent behavior with queries can occur. So we should warn the user about it. - 167 | fn check_single(&self, elements: &[Rule], name: Option<&str>, kind: &str) { - 168 | if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) { - 169 | warn!( - 170 | "rule {} contains a `{kind}` rule with a single element. This is unnecessary.", - 171 | name.unwrap_or_default() - 172 | ); - 173 | } - 174 | } - 175 | } - | - 176 | fn variable_type_for_name(name: &str) -> VariableType { - 177 | if name.starts_with('_') { - 178 | VariableType::Hidden - 179 | } else { - 180 | VariableType::Named - 181 | } - 182 | } - | - 183 | #[cfg(test)] - 184 | mod tests { - 185 | use super::*; - | - 186 | #[test] - 187 | fn test_basic_repeat_expansion() { - 188 | let grammar = intern_symbols(&build_grammar(vec![ - 189 | Variable::named("x", Rule::choice(vec![Rule::named("y"), Rule::named("_z")])), - 190 | Variable::named("y", Rule::named("_z")), - 191 | Variable::named("_z", Rule::string("a")), - 192 | ])) - 193 | .unwrap(); - | - 194 | assert_eq!( - 195 | grammar.variables, - 196 | vec![ - 197 | Variable::named( - 198 | "x", - 199 | Rule::choice(vec![Rule::non_terminal(1), Rule::non_terminal(2),]) - 200 | ), - 201 | Variable::named("y", Rule::non_terminal(2)), - 202 | Variable::hidden("_z", Rule::string("a")), - 203 | ] - 204 | ); - 205 | } - | - 206 | #[test] - 207 | fn test_interning_external_token_names() { - 208 | // Variable `y` is both an internal and an external token. - 209 | // Variable `z` is just an external token. - 210 | let mut input_grammar = build_grammar(vec![ - 211 | Variable::named( - 212 | "w", - 213 | Rule::choice(vec![Rule::named("x"), Rule::named("y"), Rule::named("z")]), - 214 | ), - 215 | Variable::named("x", Rule::string("a")), - 216 | Variable::named("y", Rule::string("b")), - 217 | ]); - 218 | input_grammar - 219 | .external_tokens - 220 | .extend(vec![Rule::named("y"), Rule::named("z")]); - | - 221 | let grammar = intern_symbols(&input_grammar).unwrap(); - | - 222 | // Variable `y` is referred to by its internal index. - 223 | // Variable `z` is referred to by its external index. - 224 | assert_eq!( - 225 | grammar.variables, - 226 | vec![ - 227 | Variable::named( - 228 | "w", - 229 | Rule::choice(vec![ - 230 | Rule::non_terminal(1), - 231 | Rule::non_terminal(2), - 232 | Rule::external(1), - 233 | ]) - 234 | ), - 235 | Variable::named("x", Rule::string("a")), - 236 | Variable::named("y", Rule::string("b")), - 237 | ] - 238 | ); - | - 239 | // The external token for `y` refers back to its internal index. - 240 | assert_eq!( - 241 | grammar.external_tokens, - 242 | vec![ - 243 | Variable::named("y", Rule::non_terminal(2)), - 244 | Variable::named("z", Rule::external(1)), - 245 | ] - 246 | ); - 247 | } - | - 248 | #[test] - 249 | fn test_grammar_with_undefined_symbols() { - 250 | let result = intern_symbols(&build_grammar(vec![Variable::named("x", Rule::named("y"))])); - | - 251 | assert!(result.is_err(), "Expected an error but got none"); - 252 | let e = result.err().unwrap(); - 253 | assert_eq!(e.to_string(), "Undefined symbol `y`"); - 254 | } - | - 255 | fn build_grammar(variables: Vec) -> InputGrammar { - 256 | InputGrammar { - 257 | variables, - 258 | name: "the_language".to_string(), - 259 | ..Default::default() - 260 | } - 261 | } - 262 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/prepare_grammar/process_inlines.rs: --------------------------------------------------------------------------------- - 1 | use std::collections::HashMap; - | - 2 | use anyhow::Result; - 3 | use serde::Serialize; - 4 | use thiserror::Error; - | - 5 | use crate::{ - 6 | grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar}, - 7 | rules::SymbolType, - 8 | }; - | - 9 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] - 10 | struct ProductionStepId { - 11 | // A `None` value here means that the production itself was produced via inlining, - 12 | // and is stored in the builder's `productions` vector, as opposed to being - 13 | // stored in one of the grammar's variables. - 14 | variable_index: Option, - 15 | production_index: usize, - 16 | step_index: usize, - 17 | } - | - 18 | struct InlinedProductionMapBuilder { - 19 | production_indices_by_step_id: HashMap>, - 20 | productions: Vec, - 21 | } - | - 22 | impl InlinedProductionMapBuilder { - 23 | fn build(mut self, grammar: &SyntaxGrammar) -> InlinedProductionMap { - 24 | let mut step_ids_to_process = Vec::new(); - 25 | for (variable_index, variable) in grammar.variables.iter().enumerate() { - 26 | for production_index in 0..variable.productions.len() { - 27 | step_ids_to_process.push(ProductionStepId { - 28 | variable_index: Some(variable_index), - 29 | production_index, - 30 | step_index: 0, - 31 | }); - 32 | while !step_ids_to_process.is_empty() { - 33 | let mut i = 0; - 34 | while i < step_ids_to_process.len() { - 35 | let step_id = step_ids_to_process[i]; - 36 | if let Some(step) = self.production_step_for_id(step_id, grammar) { - 37 | if grammar.variables_to_inline.contains(&step.symbol) { - 38 | let inlined_step_ids = self - 39 | .inline_production_at_step(step_id, grammar) - 40 | .iter() - 41 | .copied() - 42 | .map(|production_index| ProductionStepId { - 43 | variable_index: None, - 44 | production_index, - 45 | step_index: step_id.step_index, - 46 | }); - 47 | step_ids_to_process.splice(i..=i, inlined_step_ids); - 48 | } else { - 49 | step_ids_to_process[i] = ProductionStepId { - 50 | variable_index: step_id.variable_index, - 51 | production_index: step_id.production_index, - 52 | step_index: step_id.step_index + 1, - 53 | }; - 54 | i += 1; - 55 | } - 56 | } else { - 57 | step_ids_to_process.remove(i); - 58 | } - 59 | } - 60 | } - 61 | } - 62 | } - | - 63 | let productions = self.productions; - 64 | let production_indices_by_step_id = self.production_indices_by_step_id; - 65 | let production_map = production_indices_by_step_id - 66 | .into_iter() - 67 | .map(|(step_id, production_indices)| { - 68 | let production = step_id.variable_index.map_or_else( - 69 | || &productions[step_id.production_index], - 70 | |variable_index| { - 71 | &grammar.variables[variable_index].productions[step_id.production_index] - 72 | }, - 73 | ) as *const Production; - 74 | ((production, step_id.step_index as u32), production_indices) - 75 | }) - 76 | .collect(); - | - 77 | InlinedProductionMap { - 78 | productions, - 79 | production_map, - 80 | } - 81 | } - | - 82 | fn inline_production_at_step<'a>( - 83 | &'a mut self, - 84 | step_id: ProductionStepId, - 85 | grammar: &'a SyntaxGrammar, - 86 | ) -> &'a [usize] { - 87 | // Build a list of productions produced by inlining rules. - 88 | let mut i = 0; - 89 | let step_index = step_id.step_index; - 90 | let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()]; - 91 | while i < productions_to_add.len() { - 92 | if let Some(step) = productions_to_add[i].steps.get(step_index) { - 93 | let symbol = step.symbol; - 94 | if grammar.variables_to_inline.contains(&symbol) { - 95 | // Remove the production from the vector, replacing it with a placeholder. - 96 | let production = productions_to_add - 97 | .splice(i..=i, std::iter::once(&Production::default()).cloned()) - 98 | .next() - 99 | .unwrap(); - | - 100 | // Replace the placeholder with the inlined productions. - 101 | productions_to_add.splice( - 102 | i..=i, - 103 | grammar.variables[symbol.index].productions.iter().map(|p| { - 104 | let mut production = production.clone(); - 105 | let removed_step = production - 106 | .steps - 107 | .splice(step_index..=step_index, p.steps.iter().cloned()) - 108 | .next() - 109 | .unwrap(); - 110 | let inserted_steps = - 111 | &mut production.steps[step_index..(step_index + p.steps.len())]; - 112 | if let Some(alias) = removed_step.alias { - 113 | for inserted_step in inserted_steps.iter_mut() { - 114 | inserted_step.alias = Some(alias.clone()); - 115 | } - 116 | } - 117 | if let Some(field_name) = removed_step.field_name { - 118 | for inserted_step in inserted_steps.iter_mut() { - 119 | inserted_step.field_name = Some(field_name.clone()); - 120 | } - 121 | } - 122 | if let Some(last_inserted_step) = inserted_steps.last_mut() { - 123 | if last_inserted_step.precedence.is_none() { - 124 | last_inserted_step.precedence = removed_step.precedence; - 125 | } - 126 | if last_inserted_step.associativity.is_none() { - 127 | last_inserted_step.associativity = removed_step.associativity; - 128 | } - 129 | } - 130 | if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() { - 131 | production.dynamic_precedence = p.dynamic_precedence; - 132 | } - 133 | production - 134 | }), - 135 | ); - | - 136 | continue; - 137 | } - 138 | } - 139 | i += 1; - 140 | } - | - 141 | // Store all the computed productions. - 142 | let result = productions_to_add - 143 | .into_iter() - 144 | .map(|production| { - 145 | self.productions - 146 | .iter() - 147 | .position(|p| *p == production) - 148 | .unwrap_or_else(|| { - 149 | self.productions.push(production); - 150 | self.productions.len() - 1 - 151 | }) - 152 | }) - 153 | .collect(); - | - 154 | // Cache these productions based on the original production step. - 155 | self.production_indices_by_step_id - 156 | .entry(step_id) - 157 | .or_insert(result) - 158 | } - | - 159 | fn production_for_id<'a>( - 160 | &'a self, - 161 | id: ProductionStepId, - 162 | grammar: &'a SyntaxGrammar, - 163 | ) -> &'a Production { - 164 | id.variable_index.map_or_else( - 165 | || &self.productions[id.production_index], - 166 | |variable_index| &grammar.variables[variable_index].productions[id.production_index], - 167 | ) - 168 | } - | - 169 | fn production_step_for_id<'a>( - 170 | &'a self, - 171 | id: ProductionStepId, - 172 | grammar: &'a SyntaxGrammar, - 173 | ) -> Option<&'a ProductionStep> { - 174 | self.production_for_id(id, grammar).steps.get(id.step_index) - 175 | } - 176 | } - | - 177 | pub type ProcessInlinesResult = Result; - | - 178 | #[derive(Debug, Error, Serialize)] - 179 | pub enum ProcessInlinesError { - 180 | #[error("External token `{0}` cannot be inlined")] - 181 | ExternalToken(String), - 182 | #[error("Token `{0}` cannot be inlined")] - 183 | Token(String), - 184 | #[error("Rule `{0}` cannot be inlined because it is the first rule")] - 185 | FirstRule(String), - 186 | } - | - 187 | pub(super) fn process_inlines( - 188 | grammar: &SyntaxGrammar, - 189 | lexical_grammar: &LexicalGrammar, - 190 | ) -> ProcessInlinesResult { - 191 | for symbol in &grammar.variables_to_inline { - 192 | match symbol.kind { - 193 | SymbolType::External => { - 194 | Err(ProcessInlinesError::ExternalToken( - 195 | grammar.external_tokens[symbol.index].name.clone(), - 196 | ))?; - 197 | } - 198 | SymbolType::Terminal => { - 199 | Err(ProcessInlinesError::Token( - 200 | lexical_grammar.variables[symbol.index].name.clone(), - 201 | ))?; - 202 | } - 203 | SymbolType::NonTerminal if symbol.index == 0 => { - 204 | Err(ProcessInlinesError::FirstRule( - 205 | grammar.variables[symbol.index].name.clone(), - 206 | ))?; - 207 | } - 208 | _ => {} - 209 | } - 210 | } - | - 211 | Ok(InlinedProductionMapBuilder { - 212 | productions: Vec::new(), - 213 | production_indices_by_step_id: HashMap::new(), - 214 | } - 215 | .build(grammar)) - 216 | } - | - 217 | #[cfg(test)] - 218 | mod tests { - 219 | use super::*; - 220 | use crate::{ - 221 | grammars::{LexicalVariable, SyntaxVariable, VariableType}, - 222 | rules::{Associativity, Precedence, Symbol}, - 223 | }; - | - 224 | #[test] - 225 | fn test_basic_inlining() { - 226 | let grammar = SyntaxGrammar { - 227 | variables_to_inline: vec![Symbol::non_terminal(1)], - 228 | variables: vec![ - 229 | SyntaxVariable { - 230 | name: "non-terminal-0".to_string(), - 231 | kind: VariableType::Named, - 232 | productions: vec![Production { - 233 | dynamic_precedence: 0, - 234 | steps: vec![ - 235 | ProductionStep::new(Symbol::terminal(10)), - 236 | ProductionStep::new(Symbol::non_terminal(1)), // inlined - 237 | ProductionStep::new(Symbol::terminal(11)), - 238 | ], - 239 | }], - 240 | }, - 241 | SyntaxVariable { - 242 | name: "non-terminal-1".to_string(), - 243 | kind: VariableType::Named, - 244 | productions: vec![ - 245 | Production { - 246 | dynamic_precedence: 0, - 247 | steps: vec![ - 248 | ProductionStep::new(Symbol::terminal(12)), - 249 | ProductionStep::new(Symbol::terminal(13)), - 250 | ], - 251 | }, - 252 | Production { - 253 | dynamic_precedence: -2, - 254 | steps: vec![ProductionStep::new(Symbol::terminal(14))], - 255 | }, - 256 | ], - 257 | }, - 258 | ], - 259 | ..Default::default() - 260 | }; - | - 261 | let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - | - 262 | // Nothing to inline at step 0. - 263 | assert!(inline_map - 264 | .inlined_productions(&grammar.variables[0].productions[0], 0) - 265 | .is_none()); - | - 266 | // Inlining variable 1 yields two productions. - 267 | assert_eq!( - 268 | inline_map - 269 | .inlined_productions(&grammar.variables[0].productions[0], 1) - 270 | .unwrap() - 271 | .cloned() - 272 | .collect::>(), - 273 | vec![ - 274 | Production { - 275 | dynamic_precedence: 0, - 276 | steps: vec![ - 277 | ProductionStep::new(Symbol::terminal(10)), - 278 | ProductionStep::new(Symbol::terminal(12)), - 279 | ProductionStep::new(Symbol::terminal(13)), - 280 | ProductionStep::new(Symbol::terminal(11)), - 281 | ], - 282 | }, - 283 | Production { - 284 | dynamic_precedence: -2, - 285 | steps: vec![ - 286 | ProductionStep::new(Symbol::terminal(10)), - 287 | ProductionStep::new(Symbol::terminal(14)), - 288 | ProductionStep::new(Symbol::terminal(11)), - 289 | ], - 290 | }, - 291 | ] - 292 | ); - 293 | } - | - 294 | #[test] - 295 | fn test_nested_inlining() { - 296 | let grammar = SyntaxGrammar { - 297 | variables: vec![ - 298 | SyntaxVariable { - 299 | name: "non-terminal-0".to_string(), - 300 | kind: VariableType::Named, - 301 | productions: vec![Production { - 302 | dynamic_precedence: 0, - 303 | steps: vec![ - 304 | ProductionStep::new(Symbol::terminal(10)), - 305 | ProductionStep::new(Symbol::non_terminal(1)), // inlined - 306 | ProductionStep::new(Symbol::terminal(11)), - 307 | ProductionStep::new(Symbol::non_terminal(2)), // inlined - 308 | ProductionStep::new(Symbol::terminal(12)), - 309 | ], - 310 | }], - 311 | }, - 312 | SyntaxVariable { - 313 | name: "non-terminal-1".to_string(), - 314 | kind: VariableType::Named, - 315 | productions: vec![ - 316 | Production { - 317 | dynamic_precedence: 0, - 318 | steps: vec![ProductionStep::new(Symbol::terminal(13))], - 319 | }, - 320 | Production { - 321 | dynamic_precedence: 0, - 322 | steps: vec![ - 323 | ProductionStep::new(Symbol::non_terminal(3)), // inlined - 324 | ProductionStep::new(Symbol::terminal(14)), - 325 | ], - 326 | }, - 327 | ], - 328 | }, - 329 | SyntaxVariable { - 330 | name: "non-terminal-2".to_string(), - 331 | kind: VariableType::Named, - 332 | productions: vec![Production { - 333 | dynamic_precedence: 0, - 334 | steps: vec![ProductionStep::new(Symbol::terminal(15))], - 335 | }], - 336 | }, - 337 | SyntaxVariable { - 338 | name: "non-terminal-3".to_string(), - 339 | kind: VariableType::Named, - 340 | productions: vec![Production { - 341 | dynamic_precedence: 0, - 342 | steps: vec![ProductionStep::new(Symbol::terminal(16))], - 343 | }], - 344 | }, - 345 | ], - 346 | variables_to_inline: vec![ - 347 | Symbol::non_terminal(1), - 348 | Symbol::non_terminal(2), - 349 | Symbol::non_terminal(3), - 350 | ], - 351 | ..Default::default() - 352 | }; - | - 353 | let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - | - 354 | let productions = inline_map - 355 | .inlined_productions(&grammar.variables[0].productions[0], 1) - 356 | .unwrap() - 357 | .collect::>(); - | - 358 | assert_eq!( - 359 | productions.iter().copied().cloned().collect::>(), - 360 | vec![ - 361 | Production { - 362 | dynamic_precedence: 0, - 363 | steps: vec![ - 364 | ProductionStep::new(Symbol::terminal(10)), - 365 | ProductionStep::new(Symbol::terminal(13)), - 366 | ProductionStep::new(Symbol::terminal(11)), - 367 | ProductionStep::new(Symbol::non_terminal(2)), - 368 | ProductionStep::new(Symbol::terminal(12)), - 369 | ], - 370 | }, - 371 | Production { - 372 | dynamic_precedence: 0, - 373 | steps: vec![ - 374 | ProductionStep::new(Symbol::terminal(10)), - 375 | ProductionStep::new(Symbol::terminal(16)), - 376 | ProductionStep::new(Symbol::terminal(14)), - 377 | ProductionStep::new(Symbol::terminal(11)), - 378 | ProductionStep::new(Symbol::non_terminal(2)), - 379 | ProductionStep::new(Symbol::terminal(12)), - 380 | ], - 381 | }, - 382 | ] - 383 | ); - | - 384 | assert_eq!( - 385 | inline_map - 386 | .inlined_productions(productions[0], 3) - 387 | .unwrap() - 388 | .cloned() - 389 | .collect::>(), - 390 | vec![Production { - 391 | dynamic_precedence: 0, - 392 | steps: vec![ - 393 | ProductionStep::new(Symbol::terminal(10)), - 394 | ProductionStep::new(Symbol::terminal(13)), - 395 | ProductionStep::new(Symbol::terminal(11)), - 396 | ProductionStep::new(Symbol::terminal(15)), - 397 | ProductionStep::new(Symbol::terminal(12)), - 398 | ], - 399 | },] - 400 | ); - 401 | } - | - 402 | #[test] - 403 | fn test_inlining_with_precedence_and_alias() { - 404 | let grammar = SyntaxGrammar { - 405 | variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)], - 406 | variables: vec![ - 407 | SyntaxVariable { - 408 | name: "non-terminal-0".to_string(), - 409 | kind: VariableType::Named, - 410 | productions: vec![Production { - 411 | dynamic_precedence: 0, - 412 | steps: vec![ - 413 | // inlined - 414 | ProductionStep::new(Symbol::non_terminal(1)) - 415 | .with_prec(Precedence::Integer(1), Some(Associativity::Left)), - 416 | ProductionStep::new(Symbol::terminal(10)), - 417 | // inlined - 418 | ProductionStep::new(Symbol::non_terminal(2)) - 419 | .with_alias("outer_alias", true), - 420 | ], - 421 | }], - 422 | }, - 423 | SyntaxVariable { - 424 | name: "non-terminal-1".to_string(), - 425 | kind: VariableType::Named, - 426 | productions: vec![Production { - 427 | dynamic_precedence: 0, - 428 | steps: vec![ - 429 | ProductionStep::new(Symbol::terminal(11)) - 430 | .with_prec(Precedence::Integer(2), None) - 431 | .with_alias("inner_alias", true), - 432 | ProductionStep::new(Symbol::terminal(12)), - 433 | ], - 434 | }], - 435 | }, - 436 | SyntaxVariable { - 437 | name: "non-terminal-2".to_string(), - 438 | kind: VariableType::Named, - 439 | productions: vec![Production { - 440 | dynamic_precedence: 0, - 441 | steps: vec![ProductionStep::new(Symbol::terminal(13))], - 442 | }], - 443 | }, - 444 | ], - 445 | ..Default::default() - 446 | }; - | - 447 | let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - | - 448 | let productions = inline_map - 449 | .inlined_productions(&grammar.variables[0].productions[0], 0) - 450 | .unwrap() - 451 | .collect::>(); - | - 452 | assert_eq!( - 453 | productions.iter().copied().cloned().collect::>(), - 454 | vec![Production { - 455 | dynamic_precedence: 0, - 456 | steps: vec![ - 457 | // The first step in the inlined production retains its precedence - 458 | // and alias. - 459 | ProductionStep::new(Symbol::terminal(11)) - 460 | .with_prec(Precedence::Integer(2), None) - 461 | .with_alias("inner_alias", true), - 462 | // The final step of the inlined production inherits the precedence of - 463 | // the inlined step. - 464 | ProductionStep::new(Symbol::terminal(12)) - 465 | .with_prec(Precedence::Integer(1), Some(Associativity::Left)), - 466 | ProductionStep::new(Symbol::terminal(10)), - 467 | ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true), - 468 | ] - 469 | }], - 470 | ); - | - 471 | assert_eq!( - 472 | inline_map - 473 | .inlined_productions(productions[0], 3) - 474 | .unwrap() - 475 | .cloned() - 476 | .collect::>(), - 477 | vec![Production { - 478 | dynamic_precedence: 0, - 479 | steps: vec![ - 480 | ProductionStep::new(Symbol::terminal(11)) - 481 | .with_prec(Precedence::Integer(2), None) - 482 | .with_alias("inner_alias", true), - 483 | ProductionStep::new(Symbol::terminal(12)) - 484 | .with_prec(Precedence::Integer(1), Some(Associativity::Left)), - 485 | ProductionStep::new(Symbol::terminal(10)), - 486 | // All steps of the inlined production inherit their alias from the - 487 | // inlined step. - 488 | ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true), - 489 | ] - 490 | }], - 491 | ); - 492 | } - | - 493 | #[test] - 494 | fn test_error_when_inlining_tokens() { - 495 | let lexical_grammar = LexicalGrammar { - 496 | variables: vec![LexicalVariable { - 497 | name: "something".to_string(), - 498 | kind: VariableType::Named, - 499 | implicit_precedence: 0, - 500 | start_state: 0, - 501 | }], - 502 | ..Default::default() - 503 | }; - | - 504 | let grammar = SyntaxGrammar { - 505 | variables_to_inline: vec![Symbol::terminal(0)], - 506 | variables: vec![SyntaxVariable { - 507 | name: "non-terminal-0".to_string(), - 508 | kind: VariableType::Named, - 509 | productions: vec![Production { - 510 | dynamic_precedence: 0, - 511 | steps: vec![ProductionStep::new(Symbol::terminal(0))], - 512 | }], - 513 | }], - 514 | ..Default::default() - 515 | }; - | - 516 | let result = process_inlines(&grammar, &lexical_grammar); - 517 | assert!(result.is_err(), "expected an error, but got none"); - 518 | let err = result.err().unwrap(); - 519 | assert_eq!(err.to_string(), "Token `something` cannot be inlined",); - 520 | } - 521 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/quickjs.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashMap, - 3 | path::{Path, PathBuf}, - 4 | sync::{LazyLock, Mutex}, - 5 | }; - | - 6 | use log::{error, info, warn}; - 7 | use rquickjs::{ - 8 | loader::{FileResolver, ScriptLoader}, - 9 | Context, Ctx, Function, Module, Object, Runtime, Type, Value, - 10 | }; - | - 11 | use super::{JSError, JSResult}; - | - 12 | const DSL: &[u8] = include_bytes!("dsl.js"); - | - 13 | trait JSResultExt { - 14 | fn or_js_error(self, ctx: &Ctx) -> JSResult; - 15 | } - | - 16 | impl JSResultExt for Result { - 17 | fn or_js_error(self, ctx: &Ctx) -> JSResult { - 18 | match self { - 19 | Ok(v) => Ok(v), - 20 | Err(rquickjs::Error::Exception) => Err(format_js_exception(ctx.catch())), - 21 | Err(e) => Err(JSError::QuickJS(e.to_string())), - 22 | } - 23 | } - 24 | } - | - 25 | fn format_js_exception(v: Value) -> JSError { - 26 | let Some(exception) = v.into_exception() else { - 27 | return JSError::QuickJS("Expected a JS exception".to_string()); - 28 | }; - | - 29 | let error_obj = exception.as_object(); - 30 | let mut parts = Vec::new(); - | - 31 | for (key, label) in [("message", "Message"), ("stack", "Stack"), ("name", "Type")] { - 32 | if let Ok(value) = error_obj.get::<_, String>(key) { - 33 | parts.push(format!("{label}: {value}")); - 34 | } - 35 | } - | - 36 | if parts.is_empty() { - 37 | JSError::QuickJS(exception.to_string()) - 38 | } else { - 39 | JSError::QuickJS(parts.join("\n")) - 40 | } - 41 | } - | - 42 | static FILE_CACHE: LazyLock>> = - 43 | LazyLock::new(|| Mutex::new(HashMap::new())); - | - 44 | #[rquickjs::function] - 45 | fn load_file(path: String) -> rquickjs::Result { - 46 | { - 47 | let cache = FILE_CACHE.lock().unwrap(); - 48 | if let Some(cached) = cache.get(&path) { - 49 | return Ok(cached.clone()); - 50 | } - 51 | } - | - 52 | let content = std::fs::read_to_string(&path).map_err(|e| { - 53 | rquickjs::Error::new_from_js_message("IOError", "FileReadError", e.to_string()) - 54 | })?; - | - 55 | { - 56 | let mut cache = FILE_CACHE.lock().unwrap(); - 57 | cache.insert(path, content.clone()); - 58 | } - | - 59 | Ok(content) - 60 | } - | - 61 | #[rquickjs::class] - 62 | #[derive(rquickjs::class::Trace, rquickjs::JsLifetime, Default)] - 63 | pub struct Console {} - | - 64 | impl Console { - 65 | fn format_args(args: &[Value<'_>]) -> String { - 66 | args.iter() - 67 | .map(|v| match v.type_of() { - 68 | Type::Bool => v.as_bool().unwrap().to_string(), - 69 | Type::Int => v.as_int().unwrap().to_string(), - 70 | Type::Float => v.as_float().unwrap().to_string(), - 71 | Type::String => v - 72 | .as_string() - 73 | .unwrap() - 74 | .to_string() - 75 | .unwrap_or_else(|_| String::new()), - 76 | Type::Null => "null".to_string(), - 77 | Type::Undefined => "undefined".to_string(), - 78 | Type::Uninitialized => "uninitialized".to_string(), - 79 | Type::Module => "module".to_string(), - 80 | Type::BigInt => v.get::().unwrap_or_else(|_| "BigInt".to_string()), - 81 | Type::Unknown => "unknown".to_string(), - 82 | Type::Symbol - 83 | | Type::Object - 84 | | Type::Array - 85 | | Type::Function - 86 | | Type::Constructor - 87 | | Type::Promise - 88 | | Type::Exception => "[object Object]".to_string(), - 89 | }) - 90 | .collect::>() - 91 | .join(" ") - 92 | } - 93 | } - | - 94 | #[rquickjs::methods] - 95 | impl Console { - 96 | #[qjs(constructor)] - 97 | pub const fn new() -> Self { - 98 | Console {} - 99 | } - | - 100 | #[allow(clippy::needless_pass_by_value)] - 101 | pub fn log(&self, args: rquickjs::function::Rest>) -> rquickjs::Result<()> { - 102 | info!("{}", Self::format_args(&args)); - 103 | Ok(()) - 104 | } - | - 105 | #[allow(clippy::needless_pass_by_value)] - 106 | pub fn warn(&self, args: rquickjs::function::Rest>) -> rquickjs::Result<()> { - 107 | warn!("{}", Self::format_args(&args)); - 108 | Ok(()) - 109 | } - | - 110 | #[allow(clippy::needless_pass_by_value)] - 111 | pub fn error(&self, args: rquickjs::function::Rest>) -> rquickjs::Result<()> { - 112 | error!("Error: {}", Self::format_args(&args)); - 113 | Ok(()) - 114 | } - 115 | } - | - 116 | fn resolve_module_path(base_path: &Path, module_path: &str) -> rquickjs::Result { - 117 | let candidates = if module_path.starts_with("./") || module_path.starts_with("../") { - 118 | let target = base_path.join(module_path); - 119 | vec![ - 120 | target.with_extension("js"), - 121 | target.with_extension("json"), - 122 | target.clone(), - 123 | ] - 124 | } else { - 125 | let local_target = base_path.join(module_path); - 126 | let node_modules_target = Path::new("node_modules").join(module_path); - | - 127 | vec![ - 128 | local_target.with_extension("js"), - 129 | local_target.with_extension("json"), - 130 | local_target.clone(), - 131 | node_modules_target.with_extension("js"), - 132 | node_modules_target.with_extension("json"), - 133 | node_modules_target, - 134 | ] - 135 | }; - | - 136 | for candidate in candidates { - 137 | if let Ok(resolved) = try_resolve_path(&candidate) { - 138 | return Ok(resolved); - 139 | } - 140 | } - | - 141 | Err(rquickjs::Error::new_from_js_message( - 142 | "Error", - 143 | "ModuleNotFound", - 144 | format!("Module not found: {module_path}"), - 145 | )) - 146 | } - | - 147 | fn try_resolve_path(path: &Path) -> rquickjs::Result { - 148 | let metadata = std::fs::metadata(path).map_err(|_| { - 149 | rquickjs::Error::new_from_js_message( - 150 | "Error", - 151 | "FileNotFound", - 152 | format!("Path not found: {}", path.display()), - 153 | ) - 154 | })?; - | - 155 | if metadata.is_file() { - 156 | return Ok(path.to_path_buf()); - 157 | } - | - 158 | if metadata.is_dir() { - 159 | let index_path = path.join("index.js"); - 160 | if index_path.exists() { - 161 | return Ok(index_path); - 162 | } - 163 | } - | - 164 | Err(rquickjs::Error::new_from_js_message( - 165 | "Error", - 166 | "ResolutionFailed", - 167 | format!("Cannot resolve: {}", path.display()), - 168 | )) - 169 | } - | - 170 | #[allow(clippy::needless_pass_by_value)] - 171 | fn require_from_module<'a>( - 172 | ctx: Ctx<'a>, - 173 | module_path: String, - 174 | from_module: &str, - 175 | ) -> rquickjs::Result> { - 176 | let current_module = PathBuf::from(from_module); - 177 | let current_dir = if current_module.is_file() { - 178 | current_module.parent().unwrap_or(Path::new(".")) - 179 | } else { - 180 | current_module.as_path() - 181 | }; - | - 182 | let resolved_path = resolve_module_path(current_dir, &module_path)?; - | - 183 | let contents = load_file(resolved_path.to_string_lossy().to_string())?; - | - 184 | load_module_from_content(&ctx, &resolved_path, &contents) - 185 | } - | - 186 | fn load_module_from_content<'a>( - 187 | ctx: &Ctx<'a>, - 188 | path: &Path, - 189 | contents: &str, - 190 | ) -> rquickjs::Result> { - 191 | if path.extension().is_some_and(|ext| ext == "json") { - 192 | return ctx.eval::(format!("JSON.parse({contents:?})")); - 193 | } - | - 194 | let exports = Object::new(ctx.clone())?; - 195 | let module_obj = Object::new(ctx.clone())?; - 196 | module_obj.set("exports", exports.clone())?; - | - 197 | let filename = path.to_string_lossy().to_string(); - 198 | let dirname = path - 199 | .parent() - 200 | .map_or_else(|| ".".to_string(), |p| p.to_string_lossy().to_string()); - | - 201 | // Require function specific to *this* module - 202 | let module_path = filename.clone(); - 203 | let require = Function::new( - 204 | ctx.clone(), - 205 | move |ctx_inner: Ctx<'a>, target_path: String| -> rquickjs::Result> { - 206 | require_from_module(ctx_inner, target_path, &module_path) - 207 | }, - 208 | )?; - | - 209 | let wrapper = - 210 | format!("(function(exports, require, module, __filename, __dirname) {{ {contents} }})"); - | - 211 | let module_func = ctx.eval::(wrapper)?; - 212 | module_func.call::<_, Value>((exports, require, module_obj.clone(), filename, dirname))?; - | - 213 | module_obj.get("exports") - 214 | } - | - 215 | pub fn execute_native_runtime(grammar_path: &Path) -> JSResult { - 216 | let runtime = Runtime::new()?; - | - 217 | runtime.set_memory_limit(64 * 1024 * 1024); // 64MB - 218 | runtime.set_max_stack_size(256 * 1024); // 256KB - | - 219 | let context = Context::full(&runtime)?; - | - 220 | let resolver = FileResolver::default() - 221 | .with_path("./") - 222 | .with_pattern("{}.mjs"); - 223 | let loader = ScriptLoader::default().with_extension("mjs"); - 224 | runtime.set_loader(resolver, loader); - | - 225 | let cwd = std::env::current_dir()?; - 226 | let relative_path = pathdiff::diff_paths(grammar_path, &cwd) - 227 | .map(|p| p.to_string_lossy().to_string()) - 228 | .ok_or_else(|| JSError::IO("Failed to get relative path".to_string()))?; - | - 229 | context.with(|ctx| -> JSResult { - 230 | let globals = ctx.globals(); - | - 231 | globals.set("native", true).or_js_error(&ctx)?; - 232 | globals - 233 | .set("__ts_grammar_path", relative_path) - 234 | .or_js_error(&ctx)?; - | - 235 | let console = rquickjs::Class::instance(ctx.clone(), Console::new()).or_js_error(&ctx)?; - 236 | globals.set("console", console).or_js_error(&ctx)?; - | - 237 | let process = Object::new(ctx.clone()).or_js_error(&ctx)?; - 238 | let env = Object::new(ctx.clone()).or_js_error(&ctx)?; - 239 | for (key, value) in std::env::vars() { - 240 | env.set(key, value).or_js_error(&ctx)?; - 241 | } - 242 | process.set("env", env).or_js_error(&ctx)?; - 243 | globals.set("process", process).or_js_error(&ctx)?; - | - 244 | let module = Object::new(ctx.clone()).or_js_error(&ctx)?; - 245 | module - 246 | .set("exports", Object::new(ctx.clone()).or_js_error(&ctx)?) - 247 | .or_js_error(&ctx)?; - 248 | globals.set("module", module).or_js_error(&ctx)?; - | - 249 | let grammar_path_string = grammar_path.to_string_lossy().to_string(); - 250 | let main_require = Function::new( - 251 | ctx.clone(), - 252 | move |ctx_inner, target_path: String| -> rquickjs::Result { - 253 | require_from_module(ctx_inner, target_path, &grammar_path_string) - 254 | }, - 255 | )?; - 256 | globals.set("require", main_require).or_js_error(&ctx)?; - | - 257 | let promise = Module::evaluate(ctx.clone(), "dsl", DSL).or_js_error(&ctx)?; - 258 | promise.finish::<()>().or_js_error(&ctx)?; - | - 259 | let grammar_json = ctx - 260 | .eval::("globalThis.output") - 261 | .map(|s| s.to_string()) - 262 | .or_js_error(&ctx)? - 263 | .or_js_error(&ctx)?; - | - 264 | let parsed = serde_json::from_str::(&grammar_json)?; - 265 | Ok(serde_json::to_string_pretty(&parsed)?) - 266 | }) - 267 | } - | - 268 | #[cfg(test)] - 269 | mod tests { - 270 | use std::{ - 271 | fs, - 272 | sync::{Arc, Mutex, OnceLock}, - 273 | }; - 274 | use tempfile::TempDir; - | - 275 | use super::*; - | - 276 | static TEST_MUTEX: OnceLock>> = OnceLock::new(); - | - 277 | fn with_test_lock(test: F) -> R - 278 | where - 279 | F: FnOnce() -> R, - 280 | { - 281 | let _guard = TEST_MUTEX.get_or_init(|| Arc::new(Mutex::new(()))).lock(); - 282 | let result = test(); - 283 | cleanup_runtime_state(); - 284 | result - 285 | } - | - 286 | fn cleanup_runtime_state() { - 287 | FILE_CACHE.lock().unwrap().clear(); - 288 | } - | - 289 | #[test] - 290 | fn test_basic_grammar_execution() { - 291 | with_test_lock(|| { - 292 | let temp_dir = TempDir::new().unwrap(); - 293 | std::env::set_current_dir(temp_dir.path()).unwrap(); - | - 294 | let grammar_path = temp_dir.path().join("grammar.js"); - 295 | fs::write( - 296 | &grammar_path, - 297 | r" - 298 | module.exports = grammar({ - 299 | name: 'test', - 300 | rules: { source_file: $ => 'hello' } - 301 | }); - 302 | ", - 303 | ) - 304 | .unwrap(); - | - 305 | let json = execute_native_runtime(&grammar_path).expect("Failed to execute grammar"); - 306 | assert!(json.contains("\"name\": \"test\"")); - 307 | assert!(json.contains("\"hello\"")); - 308 | }); - 309 | } - | - 310 | #[test] - 311 | fn test_module_imports() { - 312 | with_test_lock(|| { - 313 | let temp_dir = TempDir::new().unwrap(); - 314 | std::env::set_current_dir(temp_dir.path()).unwrap(); - | - 315 | fs::write( - 316 | temp_dir.path().join("common.js"), - 317 | r" - 318 | module.exports = { identifier: $ => /[a-zA-Z_][a-zA-Z0-9_]*/ }; - 319 | ", - 320 | ) - 321 | .unwrap(); - | - 322 | fs::write( - 323 | temp_dir.path().join("grammar.js"), - 324 | r" - 325 | const common = require('./common'); - 326 | module.exports = grammar({ - 327 | name: 'test_import', - 328 | rules: { source_file: common.identifier } - 329 | }); - 330 | ", - 331 | ) - 332 | .unwrap(); - | - 333 | let json = execute_native_runtime(&temp_dir.path().join("grammar.js")) - 334 | .expect("Failed to execute grammar with imports"); - 335 | assert!(json.contains("\"name\": \"test_import\"")); - 336 | }); - 337 | } - | - 338 | #[test] - 339 | fn test_json_module_loading() { - 340 | with_test_lock(|| { - 341 | let temp_dir = TempDir::new().unwrap(); - 342 | std::env::set_current_dir(temp_dir.path()).unwrap(); - | - 343 | fs::write( - 344 | temp_dir.path().join("package.json"), - 345 | r#"{"version": "1.0.0"}"#, - 346 | ) - 347 | .unwrap(); - 348 | fs::write( - 349 | temp_dir.path().join("grammar.js"), - 350 | r" - 351 | const pkg = require('./package.json'); - 352 | module.exports = grammar({ - 353 | name: 'json_test', - 354 | rules: { - 355 | source_file: $ => 'version_' + pkg.version.replace(/\./g, '_') - 356 | } - 357 | }); - 358 | ", - 359 | ) - 360 | .unwrap(); - | - 361 | let json = execute_native_runtime(&temp_dir.path().join("grammar.js")) - 362 | .expect("Failed to execute grammar with JSON import"); - 363 | assert!(json.contains("version_1_0_0")); - 364 | }); - 365 | } - | - 366 | #[test] - 367 | fn test_resource_limits() { - 368 | with_test_lock(|| { - 369 | let temp_dir = TempDir::new().unwrap(); - 370 | std::env::set_current_dir(temp_dir.path()).unwrap(); - | - 371 | fs::write( - 372 | temp_dir.path().join("grammar.js"), - 373 | r" - 374 | const huge = new Array(10000000).fill('x'.repeat(1000)); - 375 | module.exports = grammar({ - 376 | name: 'resource_test', - 377 | rules: { source_file: $ => 'test' } - 378 | }); - 379 | ", - 380 | ) - 381 | .unwrap(); - | - 382 | let result = execute_native_runtime(&temp_dir.path().join("grammar.js")); - 383 | assert!(result.is_err()); - 384 | assert!(matches!(result.unwrap_err(), JSError::QuickJS(_))); - 385 | }); - 386 | } - 387 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/render.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | cmp, - 3 | collections::{BTreeMap, BTreeSet, HashMap, HashSet}, - 4 | fmt::Write, - 5 | mem::swap, - 6 | }; - | - 7 | use crate::LANGUAGE_VERSION; - 8 | use indoc::indoc; - | - 9 | use super::{ - 10 | build_tables::Tables, - 11 | grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}, - 12 | nfa::CharacterSet, - 13 | node_types::ChildType, - 14 | rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet}, - 15 | tables::{ - 16 | AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, - 17 | ParseTableEntry, - 18 | }, - 19 | }; - | - 20 | const SMALL_STATE_THRESHOLD: usize = 64; - 21 | pub const ABI_VERSION_MIN: usize = 14; - 22 | pub const ABI_VERSION_MAX: usize = LANGUAGE_VERSION; - 23 | const ABI_VERSION_WITH_RESERVED_WORDS: usize = 15; - | - 24 | #[clippy::format_args] - 25 | macro_rules! add { - 26 | ($this: tt, $($arg: tt)*) => {{ - 27 | $this.buffer.write_fmt(format_args!($($arg)*)).unwrap(); - 28 | }} - 29 | } - | - 30 | macro_rules! add_whitespace { - 31 | ($this:tt) => {{ - 32 | for _ in 0..$this.indent_level { - 33 | write!(&mut $this.buffer, " ").unwrap(); - 34 | } - 35 | }}; - 36 | } - | - 37 | #[clippy::format_args] - 38 | macro_rules! add_line { - 39 | ($this: tt, $($arg: tt)*) => { - 40 | add_whitespace!($this); - 41 | $this.buffer.write_fmt(format_args!($($arg)*)).unwrap(); - 42 | $this.buffer += "\n"; - 43 | } - 44 | } - | - 45 | macro_rules! indent { - 46 | ($this:tt) => { - 47 | $this.indent_level += 1; - 48 | }; - 49 | } - | - 50 | macro_rules! dedent { - 51 | ($this:tt) => { - 52 | assert_ne!($this.indent_level, 0); - 53 | $this.indent_level -= 1; - 54 | }; - 55 | } - | - 56 | #[derive(Default)] - 57 | struct Generator { - 58 | buffer: String, - 59 | indent_level: usize, - 60 | language_name: String, - 61 | parse_table: ParseTable, - 62 | main_lex_table: LexTable, - 63 | keyword_lex_table: LexTable, - 64 | large_character_sets: Vec<(Option, CharacterSet)>, - 65 | large_character_set_info: Vec, - 66 | large_state_count: usize, - 67 | syntax_grammar: SyntaxGrammar, - 68 | lexical_grammar: LexicalGrammar, - 69 | default_aliases: AliasMap, - 70 | symbol_order: HashMap, - 71 | symbol_ids: HashMap, - 72 | alias_ids: HashMap, - 73 | unique_aliases: Vec, - 74 | symbol_map: HashMap, - 75 | reserved_word_sets: Vec, - 76 | reserved_word_set_ids_by_parse_state: Vec, - 77 | field_names: Vec, - 78 | supertype_symbol_map: BTreeMap>, - 79 | supertype_map: BTreeMap>, - 80 | abi_version: usize, - 81 | metadata: Option, - 82 | } - | - 83 | struct LargeCharacterSetInfo { - 84 | constant_name: String, - 85 | is_used: bool, - 86 | } - | - 87 | struct Metadata { - 88 | major_version: u8, - 89 | minor_version: u8, - 90 | patch_version: u8, - 91 | } - | - 92 | impl Generator { - 93 | fn generate(mut self) -> String { - 94 | self.init(); - 95 | self.add_header(); - 96 | self.add_includes(); - 97 | self.add_pragmas(); - 98 | self.add_stats(); - 99 | self.add_symbol_enum(); - 100 | self.add_symbol_names_list(); - 101 | self.add_unique_symbol_map(); - 102 | self.add_symbol_metadata_list(); - | - 103 | if !self.field_names.is_empty() { - 104 | self.add_field_name_enum(); - 105 | self.add_field_name_names_list(); - 106 | self.add_field_sequences(); - 107 | } - | - 108 | if !self.parse_table.production_infos.is_empty() { - 109 | self.add_alias_sequences(); - 110 | } - | - 111 | self.add_non_terminal_alias_map(); - 112 | self.add_primary_state_id_list(); - | - 113 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && !self.supertype_map.is_empty() { - 114 | self.add_supertype_map(); - 115 | } - | - 116 | let buffer_offset_before_lex_functions = self.buffer.len(); - | - 117 | let mut main_lex_table = LexTable::default(); - 118 | swap(&mut main_lex_table, &mut self.main_lex_table); - 119 | self.add_lex_function("ts_lex", main_lex_table); - | - 120 | if self.syntax_grammar.word_token.is_some() { - 121 | let mut keyword_lex_table = LexTable::default(); - 122 | swap(&mut keyword_lex_table, &mut self.keyword_lex_table); - 123 | self.add_lex_function("ts_lex_keywords", keyword_lex_table); - 124 | } - | - 125 | // Once the lex functions are generated, and we've determined which large - 126 | // character sets are actually used, we can generate the large character set - 127 | // constants. Insert them into the output buffer before the lex functions. - 128 | let lex_functions = self.buffer[buffer_offset_before_lex_functions..].to_string(); - 129 | self.buffer.truncate(buffer_offset_before_lex_functions); - 130 | for ix in 0..self.large_character_sets.len() { - 131 | self.add_character_set(ix); - 132 | } - 133 | self.buffer.push_str(&lex_functions); - | - 134 | self.add_lex_modes(); - | - 135 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1 - 136 | { - 137 | self.add_reserved_word_sets(); - 138 | } - | - 139 | self.add_parse_table(); - | - 140 | if !self.syntax_grammar.external_tokens.is_empty() { - 141 | self.add_external_token_enum(); - 142 | self.add_external_scanner_symbol_map(); - 143 | self.add_external_scanner_states_list(); - 144 | } - | - 145 | self.add_parser_export(); - | - 146 | self.buffer - 147 | } - | - 148 | fn init(&mut self) { - 149 | let mut symbol_identifiers = HashSet::new(); - 150 | for i in 0..self.parse_table.symbols.len() { - 151 | self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); - 152 | } - 153 | self.symbol_ids.insert( - 154 | Symbol::end_of_nonterminal_extra(), - 155 | self.symbol_ids[&Symbol::end()].clone(), - 156 | ); - | - 157 | self.symbol_map = HashMap::new(); - | - 158 | for symbol in &self.parse_table.symbols { - 159 | let mut mapping = symbol; - | - 160 | // There can be multiple symbols in the grammar that have the same name and kind, - 161 | // due to simple aliases. When that happens, ensure that they map to the same - 162 | // public-facing symbol. If one of the symbols is not aliased, choose that one - 163 | // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - 164 | // numeric value. - 165 | if let Some(alias) = self.default_aliases.get(symbol) { - 166 | let kind = alias.kind(); - 167 | for other_symbol in &self.parse_table.symbols { - 168 | if let Some(other_alias) = self.default_aliases.get(other_symbol) { - 169 | if other_symbol < mapping && other_alias == alias { - 170 | mapping = other_symbol; - 171 | } - 172 | } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { - 173 | mapping = other_symbol; - 174 | break; - 175 | } - 176 | } - 177 | } - 178 | // Two anonymous tokens with different flags but the same string value - 179 | // should be represented with the same symbol in the public API. Examples: - 180 | // * "<" and token(prec(1, "<")) - 181 | // * "(" and token.immediate("(") - 182 | else if symbol.is_terminal() { - 183 | let metadata = self.metadata_for_symbol(*symbol); - 184 | for other_symbol in &self.parse_table.symbols { - 185 | let other_metadata = self.metadata_for_symbol(*other_symbol); - 186 | if other_metadata == metadata { - 187 | if let Some(mapped) = self.symbol_map.get(other_symbol) { - 188 | if mapped == symbol { - 189 | break; - 190 | } - 191 | } - 192 | mapping = other_symbol; - 193 | break; - 194 | } - 195 | } - 196 | } - | - 197 | self.symbol_map.insert(*symbol, *mapping); - 198 | } - | - 199 | for production_info in &self.parse_table.production_infos { - 200 | // Build a list of all field names - 201 | for field_name in production_info.field_map.keys() { - 202 | if let Err(i) = self.field_names.binary_search(field_name) { - 203 | self.field_names.insert(i, field_name.clone()); - 204 | } - 205 | } - | - 206 | for alias in &production_info.alias_sequence { - 207 | // Generate a mapping from aliases to C identifiers. - 208 | if let Some(alias) = &alias { - 209 | // Some aliases match an existing symbol in the grammar. - 210 | let alias_id = - 211 | if let Some(existing_symbol) = self.symbols_for_alias(alias).first() { - 212 | self.symbol_ids[&self.symbol_map[existing_symbol]].clone() - 213 | } - 214 | // Other aliases don't match any existing symbol, and need their own - 215 | // identifiers. - 216 | else { - 217 | if let Err(i) = self.unique_aliases.binary_search(alias) { - 218 | self.unique_aliases.insert(i, alias.clone()); - 219 | } - | - 220 | if alias.is_named { - 221 | format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - 222 | } else { - 223 | format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - 224 | } - 225 | }; - | - 226 | self.alias_ids.entry(alias.clone()).or_insert(alias_id); - 227 | } - 228 | } - 229 | } - | - 230 | for (ix, (symbol, _)) in self.large_character_sets.iter().enumerate() { - 231 | let count = self.large_character_sets[0..ix] - 232 | .iter() - 233 | .filter(|(sym, _)| sym == symbol) - 234 | .count() - 235 | + 1; - 236 | let constant_name = if let Some(symbol) = symbol { - 237 | format!("{}_character_set_{}", self.symbol_ids[symbol], count) - 238 | } else { - 239 | format!("extras_character_set_{count}") - 240 | }; - 241 | self.large_character_set_info.push(LargeCharacterSetInfo { - 242 | constant_name, - 243 | is_used: false, - 244 | }); - 245 | } - | - 246 | // Assign an id to each unique reserved word set - 247 | self.reserved_word_sets.push(TokenSet::new()); - 248 | for state in &self.parse_table.states { - 249 | let id = if let Some(ix) = self - 250 | .reserved_word_sets - 251 | .iter() - 252 | .position(|set| *set == state.reserved_words) - 253 | { - 254 | ix - 255 | } else { - 256 | self.reserved_word_sets.push(state.reserved_words.clone()); - 257 | self.reserved_word_sets.len() - 1 - 258 | }; - 259 | self.reserved_word_set_ids_by_parse_state.push(id); - 260 | } - | - 261 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS { - 262 | for (supertype, subtypes) in &self.supertype_symbol_map { - 263 | if let Some(supertype) = self.symbol_ids.get(supertype) { - 264 | self.supertype_map - 265 | .entry(supertype.clone()) - 266 | .or_insert_with(|| subtypes.clone()); - 267 | } - 268 | } - | - 269 | self.supertype_symbol_map.clear(); - 270 | } - | - 271 | // Determine which states should use the "small state" representation, and which should - 272 | // use the normal array representation. - 273 | let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); - 274 | self.large_state_count = self - 275 | .parse_table - 276 | .states - 277 | .iter() - 278 | .enumerate() - 279 | .take_while(|(i, s)| { - 280 | *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold - 281 | }) - 282 | .count(); - 283 | } - | - 284 | fn add_header(&mut self) { - 285 | add_line!(self, "/* Automatically @generated by tree-sitter */",); - 286 | add_line!(self, ""); - 287 | } - | - 288 | fn add_includes(&mut self) { - 289 | add_line!(self, "#include \"tree_sitter/parser.h\""); - 290 | add_line!(self, ""); - 291 | } - | - 292 | fn add_pragmas(&mut self) { - 293 | add_line!(self, "#if defined(__GNUC__) || defined(__clang__)"); - 294 | add_line!( - 295 | self, - 296 | "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"" - 297 | ); - 298 | add_line!(self, "#endif"); - 299 | add_line!(self, ""); - | - 300 | // Compiling large lexer functions can be very slow. Disabling optimizations - 301 | // is not ideal, but only a very small fraction of overall parse time is - 302 | // spent lexing, so the performance impact of this is negligible. - 303 | if self.main_lex_table.states.len() > 300 { - 304 | add_line!(self, "#ifdef _MSC_VER"); - 305 | add_line!(self, "#pragma optimize(\"\", off)"); - 306 | add_line!(self, "#elif defined(__clang__)"); - 307 | add_line!(self, "#pragma clang optimize off"); - 308 | add_line!(self, "#elif defined(__GNUC__)"); - 309 | add_line!(self, "#pragma GCC optimize (\"O0\")"); - 310 | add_line!(self, "#endif"); - 311 | add_line!(self, ""); - 312 | } - 313 | } - | - 314 | fn add_stats(&mut self) { - 315 | let token_count = self - 316 | .parse_table - 317 | .symbols - 318 | .iter() - 319 | .filter(|symbol| { - 320 | if symbol.is_terminal() || symbol.is_eof() { - 321 | true - 322 | } else if symbol.is_external() { - 323 | self.syntax_grammar.external_tokens[symbol.index] - 324 | .corresponding_internal_token - 325 | .is_none() - 326 | } else { - 327 | false - 328 | } - 329 | }) - 330 | .count(); - | - 331 | add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version); - 332 | add_line!( - 333 | self, - 334 | "#define STATE_COUNT {}", - 335 | self.parse_table.states.len() - 336 | ); - 337 | add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count); - | - 338 | add_line!( - 339 | self, - 340 | "#define SYMBOL_COUNT {}", - 341 | self.parse_table.symbols.len() - 342 | ); - 343 | add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len()); - 344 | add_line!(self, "#define TOKEN_COUNT {token_count}"); - 345 | add_line!( - 346 | self, - 347 | "#define EXTERNAL_TOKEN_COUNT {}", - 348 | self.syntax_grammar.external_tokens.len() - 349 | ); - 350 | add_line!(self, "#define FIELD_COUNT {}", self.field_names.len()); - 351 | add_line!( - 352 | self, - 353 | "#define MAX_ALIAS_SEQUENCE_LENGTH {}", - 354 | self.parse_table.max_aliased_production_length - 355 | ); - 356 | add_line!( - 357 | self, - 358 | "#define MAX_RESERVED_WORD_SET_SIZE {}", - 359 | self.reserved_word_sets - 360 | .iter() - 361 | .map(TokenSet::len) - 362 | .max() - 363 | .unwrap() - 364 | ); - | - 365 | add_line!( - 366 | self, - 367 | "#define PRODUCTION_ID_COUNT {}", - 368 | self.parse_table.production_infos.len() - 369 | ); - 370 | add_line!(self, "#define SUPERTYPE_COUNT {}", self.supertype_map.len()); - 371 | add_line!(self, ""); - 372 | } - | - 373 | fn add_symbol_enum(&mut self) { - 374 | add_line!(self, "enum ts_symbol_identifiers {{"); - 375 | indent!(self); - 376 | self.symbol_order.insert(Symbol::end(), 0); - 377 | let mut i = 1; - 378 | for symbol in &self.parse_table.symbols { - 379 | if *symbol != Symbol::end() { - 380 | self.symbol_order.insert(*symbol, i); - 381 | add_line!(self, "{} = {i},", self.symbol_ids[symbol]); - 382 | i += 1; - 383 | } - 384 | } - 385 | for alias in &self.unique_aliases { - 386 | add_line!(self, "{} = {i},", self.alias_ids[alias]); - 387 | i += 1; - 388 | } - 389 | dedent!(self); - 390 | add_line!(self, "}};"); - 391 | add_line!(self, ""); - 392 | } - | - 393 | fn add_symbol_names_list(&mut self) { - 394 | add_line!(self, "static const char * const ts_symbol_names[] = {{"); - 395 | indent!(self); - 396 | for symbol in &self.parse_table.symbols { - 397 | let name = self.sanitize_string( - 398 | self.default_aliases - 399 | .get(symbol) - 400 | .map_or(self.metadata_for_symbol(*symbol).0, |alias| { - 401 | alias.value.as_str() - 402 | }), - 403 | ); - 404 | add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]); - 405 | } - 406 | for alias in &self.unique_aliases { - 407 | add_line!( - 408 | self, - 409 | "[{}] = \"{}\",", - 410 | self.alias_ids[alias], - 411 | self.sanitize_string(&alias.value) - 412 | ); - 413 | } - 414 | dedent!(self); - 415 | add_line!(self, "}};"); - 416 | add_line!(self, ""); - 417 | } - | - 418 | fn add_unique_symbol_map(&mut self) { - 419 | add_line!(self, "static const TSSymbol ts_symbol_map[] = {{"); - 420 | indent!(self); - 421 | for symbol in &self.parse_table.symbols { - 422 | add_line!( - 423 | self, - 424 | "[{}] = {},", - 425 | self.symbol_ids[symbol], - 426 | self.symbol_ids[&self.symbol_map[symbol]], - 427 | ); - 428 | } - | - 429 | for alias in &self.unique_aliases { - 430 | add_line!( - 431 | self, - 432 | "[{}] = {},", - 433 | self.alias_ids[alias], - 434 | self.alias_ids[alias], - 435 | ); - 436 | } - | - 437 | dedent!(self); - 438 | add_line!(self, "}};"); - 439 | add_line!(self, ""); - 440 | } - | - 441 | fn add_field_name_enum(&mut self) { - 442 | add_line!(self, "enum ts_field_identifiers {{"); - 443 | indent!(self); - 444 | for (i, field_name) in self.field_names.iter().enumerate() { - 445 | add_line!(self, "{} = {},", self.field_id(field_name), i + 1); - 446 | } - 447 | dedent!(self); - 448 | add_line!(self, "}};"); - 449 | add_line!(self, ""); - 450 | } - | - 451 | fn add_field_name_names_list(&mut self) { - 452 | add_line!(self, "static const char * const ts_field_names[] = {{"); - 453 | indent!(self); - 454 | add_line!(self, "[0] = NULL,"); - 455 | for field_name in &self.field_names { - 456 | add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name)); - 457 | } - 458 | dedent!(self); - 459 | add_line!(self, "}};"); - 460 | add_line!(self, ""); - 461 | } - | - 462 | fn add_symbol_metadata_list(&mut self) { - 463 | add_line!( - 464 | self, - 465 | "static const TSSymbolMetadata ts_symbol_metadata[] = {{" - 466 | ); - 467 | indent!(self); - 468 | for symbol in &self.parse_table.symbols { - 469 | add_line!(self, "[{}] = {{", self.symbol_ids[symbol]); - 470 | indent!(self); - 471 | if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { - 472 | add_line!(self, ".visible = true,"); - 473 | add_line!(self, ".named = {is_named},"); - 474 | } else { - 475 | match self.metadata_for_symbol(*symbol).1 { - 476 | VariableType::Named => { - 477 | add_line!(self, ".visible = true,"); - 478 | add_line!(self, ".named = true,"); - 479 | } - 480 | VariableType::Anonymous => { - 481 | add_line!(self, ".visible = true,"); - 482 | add_line!(self, ".named = false,"); - 483 | } - 484 | VariableType::Hidden => { - 485 | add_line!(self, ".visible = false,"); - 486 | add_line!(self, ".named = true,"); - 487 | if self.syntax_grammar.supertype_symbols.contains(symbol) { - 488 | add_line!(self, ".supertype = true,"); - 489 | } - 490 | } - 491 | VariableType::Auxiliary => { - 492 | add_line!(self, ".visible = false,"); - 493 | add_line!(self, ".named = false,"); - 494 | } - 495 | } - 496 | } - 497 | dedent!(self); - 498 | add_line!(self, "}},"); - 499 | } - 500 | for alias in &self.unique_aliases { - 501 | add_line!(self, "[{}] = {{", self.alias_ids[alias]); - 502 | indent!(self); - 503 | add_line!(self, ".visible = true,"); - 504 | add_line!(self, ".named = {},", alias.is_named); - 505 | dedent!(self); - 506 | add_line!(self, "}},"); - 507 | } - 508 | dedent!(self); - 509 | add_line!(self, "}};"); - 510 | add_line!(self, ""); - 511 | } - | - 512 | fn add_alias_sequences(&mut self) { - 513 | add_line!( - 514 | self, - 515 | "static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{", - 516 | ); - 517 | indent!(self); - 518 | for (i, production_info) in self.parse_table.production_infos.iter().enumerate() { - 519 | if production_info.alias_sequence.is_empty() { - 520 | // Work around MSVC's intolerance of empty array initializers by - 521 | // explicitly zero-initializing the first element. - 522 | if i == 0 { - 523 | add_line!(self, "[0] = {{0}},"); - 524 | } - 525 | continue; - 526 | } - | - 527 | add_line!(self, "[{i}] = {{"); - 528 | indent!(self); - 529 | for (j, alias) in production_info.alias_sequence.iter().enumerate() { - 530 | if let Some(alias) = alias { - 531 | add_line!(self, "[{j}] = {},", self.alias_ids[alias]); - 532 | } - 533 | } - 534 | dedent!(self); - 535 | add_line!(self, "}},"); - 536 | } - 537 | dedent!(self); - 538 | add_line!(self, "}};"); - 539 | add_line!(self, ""); - 540 | } - | - 541 | fn add_non_terminal_alias_map(&mut self) { - 542 | let mut alias_ids_by_symbol = HashMap::new(); - 543 | for variable in &self.syntax_grammar.variables { - 544 | for production in &variable.productions { - 545 | for step in &production.steps { - 546 | if let Some(alias) = &step.alias { - 547 | if step.symbol.is_non_terminal() - 548 | && Some(alias) != self.default_aliases.get(&step.symbol) - 549 | && self.symbol_ids.contains_key(&step.symbol) - 550 | { - 551 | if let Some(alias_id) = self.alias_ids.get(alias) { - 552 | let alias_ids = - 553 | alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new()); - 554 | if let Err(i) = alias_ids.binary_search(&alias_id) { - 555 | alias_ids.insert(i, alias_id); - 556 | } - 557 | } - 558 | } - 559 | } - 560 | } - 561 | } - 562 | } - | - 563 | let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::>(); - 564 | alias_ids_by_symbol.sort_unstable_by_key(|e| e.0); - | - 565 | add_line!( - 566 | self, - 567 | "static const uint16_t ts_non_terminal_alias_map[] = {{" - 568 | ); - 569 | indent!(self); - 570 | for (symbol, alias_ids) in alias_ids_by_symbol { - 571 | let symbol_id = &self.symbol_ids[symbol]; - 572 | let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]]; - 573 | add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len()); - 574 | indent!(self); - 575 | add_line!(self, "{public_symbol_id},"); - 576 | for alias_id in alias_ids { - 577 | add_line!(self, "{alias_id},"); - 578 | } - 579 | dedent!(self); - 580 | } - 581 | add_line!(self, "0,"); - 582 | dedent!(self); - 583 | add_line!(self, "}};"); - 584 | add_line!(self, ""); - 585 | } - | - 586 | /// Produces a list of the "primary state" for every state in the grammar. - 587 | /// - 588 | /// The "primary state" for a given state is the first encountered state that behaves - 589 | /// identically with respect to query analysis. We derive this by keeping track of the `core_id` - 590 | /// for each state and treating the first state with a given `core_id` as primary. - 591 | fn add_primary_state_id_list(&mut self) { - 592 | add_line!( - 593 | self, - 594 | "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{" - 595 | ); - 596 | indent!(self); - 597 | let mut first_state_for_each_core_id = HashMap::new(); - 598 | for (idx, state) in self.parse_table.states.iter().enumerate() { - 599 | let primary_state = first_state_for_each_core_id - 600 | .entry(state.core_id) - 601 | .or_insert(idx); - 602 | add_line!(self, "[{idx}] = {primary_state},"); - 603 | } - 604 | dedent!(self); - 605 | add_line!(self, "}};"); - 606 | add_line!(self, ""); - 607 | } - | - 608 | fn add_field_sequences(&mut self) { - 609 | let mut flat_field_maps = vec![]; - 610 | let mut next_flat_field_map_index = 0; - 611 | self.get_field_map_id( - 612 | Vec::new(), - 613 | &mut flat_field_maps, - 614 | &mut next_flat_field_map_index, - 615 | ); - | - 616 | let mut field_map_ids = Vec::with_capacity(self.parse_table.production_infos.len()); - 617 | for production_info in &self.parse_table.production_infos { - 618 | if production_info.field_map.is_empty() { - 619 | field_map_ids.push((0, 0)); - 620 | } else { - 621 | let mut flat_field_map = Vec::with_capacity(production_info.field_map.len()); - 622 | for (field_name, locations) in &production_info.field_map { - 623 | for location in locations { - 624 | flat_field_map.push((field_name.clone(), *location)); - 625 | } - 626 | } - 627 | field_map_ids.push(( - 628 | self.get_field_map_id( - 629 | flat_field_map.clone(), - 630 | &mut flat_field_maps, - 631 | &mut next_flat_field_map_index, - 632 | ), - 633 | flat_field_map.len(), - 634 | )); - 635 | } - 636 | } - | - 637 | add_line!( - 638 | self, - 639 | "static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{", - 640 | ); - 641 | indent!(self); - 642 | for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { - 643 | if length > 0 { - 644 | add_line!( - 645 | self, - 646 | "[{production_id}] = {{.index = {row_id}, .length = {length}}},", - 647 | ); - 648 | } - 649 | } - 650 | dedent!(self); - 651 | add_line!(self, "}};"); - 652 | add_line!(self, ""); - | - 653 | add_line!( - 654 | self, - 655 | "static const TSFieldMapEntry ts_field_map_entries[] = {{", - 656 | ); - 657 | indent!(self); - 658 | for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) { - 659 | add_line!(self, "[{row_index}] ="); - 660 | indent!(self); - 661 | for (field_name, location) in field_pairs { - 662 | add_whitespace!(self); - 663 | add!(self, "{{{}, {}", self.field_id(&field_name), location.index); - 664 | if location.inherited { - 665 | add!(self, ", .inherited = true"); - 666 | } - 667 | add!(self, "}},\n"); - 668 | } - 669 | dedent!(self); - 670 | } - | - 671 | dedent!(self); - 672 | add_line!(self, "}};"); - 673 | add_line!(self, ""); - 674 | } - | - 675 | fn add_supertype_map(&mut self) { - 676 | add_line!( - 677 | self, - 678 | "static const TSSymbol ts_supertype_symbols[SUPERTYPE_COUNT] = {{" - 679 | ); - 680 | indent!(self); - 681 | for supertype in self.supertype_map.keys() { - 682 | add_line!(self, "{supertype},"); - 683 | } - 684 | dedent!(self); - 685 | add_line!(self, "}};\n"); - | - 686 | add_line!( - 687 | self, - 688 | "static const TSMapSlice ts_supertype_map_slices[] = {{", - 689 | ); - 690 | indent!(self); - 691 | let mut row_id = 0; - 692 | let mut supertype_ids = vec![0]; - 693 | let mut supertype_string_map = BTreeMap::new(); - 694 | for (supertype, subtypes) in &self.supertype_map { - 695 | supertype_string_map.insert( - 696 | supertype, - 697 | subtypes - 698 | .iter() - 699 | .flat_map(|s| match s { - 700 | ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()], - 701 | ChildType::Aliased(alias) => { - 702 | self.alias_ids.get(alias).cloned().map_or_else( - 703 | || { - 704 | self.symbols_for_alias(alias) - 705 | .into_iter() - 706 | .map(|s| self.symbol_ids.get(&s).cloned()) - 707 | .collect() - 708 | }, - 709 | |a| vec![Some(a)], - 710 | ) - 711 | } - 712 | }) - 713 | .flatten() - 714 | .collect::>(), - 715 | ); - 716 | } - 717 | for (supertype, subtypes) in &supertype_string_map { - 718 | let length = subtypes.len(); - 719 | add_line!( - 720 | self, - 721 | "[{supertype}] = {{.index = {row_id}, .length = {length}}},", - 722 | ); - 723 | row_id += length; - 724 | supertype_ids.push(row_id); - 725 | } - 726 | dedent!(self); - 727 | add_line!(self, "}};"); - 728 | add_line!(self, ""); - | - 729 | add_line!( - 730 | self, - 731 | "static const TSSymbol ts_supertype_map_entries[] = {{", - 732 | ); - 733 | indent!(self); - 734 | for (i, (_, subtypes)) in supertype_string_map.iter().enumerate() { - 735 | let row_index = supertype_ids[i]; - 736 | add_line!(self, "[{row_index}] ="); - 737 | indent!(self); - 738 | for subtype in subtypes { - 739 | add_whitespace!(self); - 740 | add!(self, "{subtype},\n"); - 741 | } - 742 | dedent!(self); - 743 | } - | - 744 | dedent!(self); - 745 | add_line!(self, "}};"); - 746 | add_line!(self, ""); - 747 | } - | - 748 | fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { - 749 | add_line!( - 750 | self, - 751 | "static bool {name}(TSLexer *lexer, TSStateId state) {{", - 752 | ); - 753 | indent!(self); - | - 754 | add_line!(self, "START_LEXER();"); - 755 | add_line!(self, "eof = lexer->eof(lexer);"); - 756 | add_line!(self, "switch (state) {{"); - | - 757 | indent!(self); - 758 | for (i, state) in lex_table.states.into_iter().enumerate() { - 759 | add_line!(self, "case {i}:"); - 760 | indent!(self); - 761 | self.add_lex_state(i, state); - 762 | dedent!(self); - 763 | } - | - 764 | add_line!(self, "default:"); - 765 | indent!(self); - 766 | add_line!(self, "return false;"); - 767 | dedent!(self); - | - 768 | dedent!(self); - 769 | add_line!(self, "}}"); - | - 770 | dedent!(self); - 771 | add_line!(self, "}}"); - 772 | add_line!(self, ""); - 773 | } - | - 774 | fn add_lex_state(&mut self, _state_ix: usize, state: LexState) { - 775 | if let Some(accept_action) = state.accept_action { - 776 | add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); - 777 | } - | - 778 | if let Some(eof_action) = state.eof_action { - 779 | add_line!(self, "if (eof) ADVANCE({});", eof_action.state); - 780 | } - | - 781 | let mut chars_copy = CharacterSet::empty(); - 782 | let mut large_set = CharacterSet::empty(); - 783 | let mut ruled_out_chars = CharacterSet::empty(); - | - 784 | // The transitions in a lex state are sorted with the single-character - 785 | // transitions first. If there are many single-character transitions, - 786 | // then implement them using an array of (lookahead character, state) - 787 | // pairs, instead of individual if statements, in order to reduce compile - 788 | // time. - 789 | let mut leading_simple_transition_count = 0; - 790 | let mut leading_simple_transition_range_count = 0; - 791 | for (chars, action) in &state.advance_actions { - 792 | if action.in_main_token - 793 | && chars.ranges().all(|r| { - 794 | let start = *r.start() as u32; - 795 | let end = *r.end() as u32; - 796 | end <= start + 1 && u16::try_from(end).is_ok() - 797 | }) - 798 | { - 799 | leading_simple_transition_count += 1; - 800 | leading_simple_transition_range_count += chars.range_count(); - 801 | } else { - 802 | break; - 803 | } - 804 | } - | - 805 | if leading_simple_transition_range_count >= 8 { - 806 | add_line!(self, "ADVANCE_MAP("); - 807 | indent!(self); - 808 | for (chars, action) in &state.advance_actions[0..leading_simple_transition_count] { - 809 | for range in chars.ranges() { - 810 | add_whitespace!(self); - 811 | self.add_character(*range.start()); - 812 | add!(self, ", {},\n", action.state); - 813 | if range.end() > range.start() { - 814 | add_whitespace!(self); - 815 | self.add_character(*range.end()); - 816 | add!(self, ", {},\n", action.state); - 817 | } - 818 | } - 819 | ruled_out_chars = ruled_out_chars.add(chars); - 820 | } - 821 | dedent!(self); - 822 | add_line!(self, ");"); - 823 | } else { - 824 | leading_simple_transition_count = 0; - 825 | } - | - 826 | for (chars, action) in &state.advance_actions[leading_simple_transition_count..] { - 827 | add_whitespace!(self); - | - 828 | // The lex state's advance actions are represented with disjoint - 829 | // sets of characters. When translating these disjoint sets into a - 830 | // sequence of checks, we don't need to re-check conditions that - 831 | // have already been checked due to previous transitions. - 832 | // - 833 | // Note that this simplification may result in an empty character set. - 834 | // That means that the transition is guaranteed (nothing further needs to - 835 | // be checked), not that this transition is impossible. - 836 | let simplified_chars = chars.simplify_ignoring(&ruled_out_chars); - | - 837 | // For large character sets, find the best matching character set from - 838 | // a pre-selected list of large character sets, which are based on the - 839 | // state transitions for invidual tokens. This transition may not exactly - 840 | // match one of the pre-selected character sets. In that case, determine - 841 | // the additional checks that need to be performed to match this transition. - 842 | let mut best_large_char_set: Option<(usize, CharacterSet, CharacterSet)> = None; - 843 | if simplified_chars.range_count() >= super::build_tables::LARGE_CHARACTER_RANGE_COUNT { - 844 | for (ix, (_, set)) in self.large_character_sets.iter().enumerate() { - 845 | chars_copy.assign(&simplified_chars); - 846 | large_set.assign(set); - 847 | let intersection = chars_copy.remove_intersection(&mut large_set); - 848 | if !intersection.is_empty() { - 849 | let additions = chars_copy.simplify_ignoring(&ruled_out_chars); - 850 | let removals = large_set.simplify_ignoring(&ruled_out_chars); - 851 | let total_range_count = additions.range_count() + removals.range_count(); - 852 | if total_range_count >= simplified_chars.range_count() { - 853 | continue; - 854 | } - 855 | if let Some((_, best_additions, best_removals)) = &best_large_char_set { - 856 | let best_range_count = - 857 | best_additions.range_count() + best_removals.range_count(); - 858 | if best_range_count < total_range_count { - 859 | continue; - 860 | } - 861 | } - 862 | best_large_char_set = Some((ix, additions, removals)); - 863 | } - 864 | } - 865 | } - | - 866 | // Add this transition's character set to the set of ruled out characters, - 867 | // which don't need to be checked for subsequent transitions in this state. - 868 | ruled_out_chars = ruled_out_chars.add(chars); - | - 869 | let mut large_char_set_ix = None; - 870 | let mut asserted_chars = simplified_chars; - 871 | let mut negated_chars = CharacterSet::empty(); - 872 | if let Some((char_set_ix, additions, removals)) = best_large_char_set { - 873 | asserted_chars = additions; - 874 | negated_chars = removals; - 875 | large_char_set_ix = Some(char_set_ix); - 876 | } - | - 877 | let mut line_break = "\n".to_string(); - 878 | for _ in 0..self.indent_level + 2 { - 879 | line_break.push_str(" "); - 880 | } - | - 881 | let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty(); - 882 | let has_negative_condition = !negated_chars.is_empty(); - 883 | let has_condition = has_positive_condition || has_negative_condition; - 884 | if has_condition { - 885 | add!(self, "if ("); - 886 | if has_positive_condition && has_negative_condition { - 887 | add!(self, "("); - 888 | } - 889 | } - | - 890 | if let Some(large_char_set_ix) = large_char_set_ix { - 891 | let large_set = &self.large_character_sets[large_char_set_ix].1; - | - 892 | // If the character set contains the null character, check that we - 893 | // are not at the end of the file. - 894 | let check_eof = large_set.contains('\0'); - 895 | if check_eof { - 896 | add!(self, "(!eof && "); - 897 | } - | - 898 | let char_set_info = &mut self.large_character_set_info[large_char_set_ix]; - 899 | char_set_info.is_used = true; - 900 | add!( - 901 | self, - 902 | "set_contains({}, {}, lookahead)", - 903 | char_set_info.constant_name, - 904 | large_set.range_count(), - 905 | ); - 906 | if check_eof { - 907 | add!(self, ")"); - 908 | } - 909 | } - | - 910 | if !asserted_chars.is_empty() { - 911 | if large_char_set_ix.is_some() { - 912 | add!(self, " ||{line_break}"); - 913 | } - | - 914 | // If the character set contains the max character, than it probably - 915 | // corresponds to a negated character class in a regex, so it will be more - 916 | // concise and readable to express it in terms of negated ranges. - 917 | let is_included = !asserted_chars.contains(char::MAX); - 918 | if !is_included { - 919 | asserted_chars = asserted_chars.negate().add_char('\0'); - 920 | } - | - 921 | self.add_character_range_conditions(&asserted_chars, is_included, &line_break); - 922 | } - | - 923 | if has_negative_condition { - 924 | if has_positive_condition { - 925 | add!(self, ") &&{line_break}"); - 926 | } - 927 | self.add_character_range_conditions(&negated_chars, false, &line_break); - 928 | } - | - 929 | if has_condition { - 930 | add!(self, ") "); - 931 | } - | - 932 | self.add_advance_action(action); - 933 | add!(self, "\n"); - 934 | } - | - 935 | add_line!(self, "END_STATE();"); - 936 | } - | - 937 | fn add_character_range_conditions( - 938 | &mut self, - 939 | characters: &CharacterSet, - 940 | is_included: bool, - 941 | line_break: &str, - 942 | ) { - 943 | for (i, range) in characters.ranges().enumerate() { - 944 | let start = *range.start(); - 945 | let end = *range.end(); - 946 | if is_included { - 947 | if i > 0 { - 948 | add!(self, " ||{line_break}"); - 949 | } - | - 950 | if start == '\0' { - 951 | add!(self, "(!eof && "); - 952 | if end == '\0' { - 953 | add!(self, "lookahead == 0"); - 954 | } else { - 955 | add!(self, "lookahead <= "); - 956 | } - 957 | self.add_character(end); - 958 | add!(self, ")"); - 959 | } else if end == start { - 960 | add!(self, "lookahead == "); - 961 | self.add_character(start); - 962 | } else if end as u32 == start as u32 + 1 { - 963 | add!(self, "lookahead == "); - 964 | self.add_character(start); - 965 | add!(self, " ||{line_break}lookahead == "); - 966 | self.add_character(end); - 967 | } else { - 968 | add!(self, "("); - 969 | self.add_character(start); - 970 | add!(self, " <= lookahead && lookahead <= "); - 971 | self.add_character(end); - 972 | add!(self, ")"); - 973 | } - 974 | } else { - 975 | if i > 0 { - 976 | add!(self, " &&{line_break}"); - 977 | } - 978 | if end == start { - 979 | add!(self, "lookahead != "); - 980 | self.add_character(start); - 981 | } else if end as u32 == start as u32 + 1 { - 982 | add!(self, "lookahead != "); - 983 | self.add_character(start); - 984 | add!(self, " &&{line_break}lookahead != "); - 985 | self.add_character(end); - 986 | } else if start != '\0' { - 987 | add!(self, "(lookahead < "); - 988 | self.add_character(start); - 989 | add!(self, " || "); - 990 | self.add_character(end); - 991 | add!(self, " < lookahead)"); - 992 | } else { - 993 | add!(self, "lookahead > "); - 994 | self.add_character(end); - 995 | } - 996 | } - 997 | } - 998 | } - | - 999 | fn add_character_set(&mut self, ix: usize) { -1000 | let characters = self.large_character_sets[ix].1.clone(); -1001 | let info = &self.large_character_set_info[ix]; -1002 | if !info.is_used { -1003 | return; -1004 | } - | -1005 | add_line!( -1006 | self, -1007 | "static const TSCharacterRange {}[] = {{", -1008 | info.constant_name -1009 | ); - | -1010 | indent!(self); -1011 | for (ix, range) in characters.ranges().enumerate() { -1012 | let column = ix % 8; -1013 | if column == 0 { -1014 | if ix > 0 { -1015 | add!(self, "\n"); -1016 | } -1017 | add_whitespace!(self); -1018 | } else { -1019 | add!(self, " "); -1020 | } -1021 | add!(self, "{{"); -1022 | self.add_character(*range.start()); -1023 | add!(self, ", "); -1024 | self.add_character(*range.end()); -1025 | add!(self, "}},"); -1026 | } -1027 | add!(self, "\n"); -1028 | dedent!(self); -1029 | add_line!(self, "}};"); -1030 | add_line!(self, ""); -1031 | } - | -1032 | fn add_advance_action(&mut self, action: &AdvanceAction) { -1033 | if action.in_main_token { -1034 | add!(self, "ADVANCE({});", action.state); -1035 | } else { -1036 | add!(self, "SKIP({});", action.state); -1037 | } -1038 | } - | -1039 | fn add_lex_modes(&mut self) { -1040 | add_line!( -1041 | self, -1042 | "static const {} ts_lex_modes[STATE_COUNT] = {{", -1043 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS { -1044 | "TSLexerMode" -1045 | } else { -1046 | "TSLexMode" -1047 | } -1048 | ); -1049 | indent!(self); -1050 | for (i, state) in self.parse_table.states.iter().enumerate() { -1051 | add_whitespace!(self); -1052 | add!(self, "[{i}] = {{"); -1053 | if state.is_end_of_non_terminal_extra() { -1054 | add!(self, "(TSStateId)(-1),"); -1055 | } else { -1056 | add!(self, ".lex_state = {}", state.lex_state_id); - | -1057 | if state.external_lex_state_id > 0 { -1058 | add!( -1059 | self, -1060 | ", .external_lex_state = {}", -1061 | state.external_lex_state_id -1062 | ); -1063 | } - | -1064 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS { -1065 | let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i]; -1066 | if reserved_word_set_id != 0 { -1067 | add!(self, ", .reserved_word_set_id = {reserved_word_set_id}"); -1068 | } -1069 | } -1070 | } - | -1071 | add!(self, "}},\n"); -1072 | } -1073 | dedent!(self); -1074 | add_line!(self, "}};"); -1075 | add_line!(self, ""); -1076 | } - | -1077 | fn add_reserved_word_sets(&mut self) { -1078 | add_line!( -1079 | self, -1080 | "static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{", -1081 | self.reserved_word_sets.len(), -1082 | ); -1083 | indent!(self); -1084 | for (id, set) in self.reserved_word_sets.iter().enumerate() { -1085 | if id == 0 { -1086 | continue; -1087 | } -1088 | add_line!(self, "[{id}] = {{"); -1089 | indent!(self); -1090 | for token in set.iter() { -1091 | add_line!(self, "{},", self.symbol_ids[&token]); -1092 | } -1093 | dedent!(self); -1094 | add_line!(self, "}},"); -1095 | } -1096 | dedent!(self); -1097 | add_line!(self, "}};"); -1098 | add_line!(self, ""); -1099 | } - | -1100 | fn add_external_token_enum(&mut self) { -1101 | add_line!(self, "enum ts_external_scanner_symbol_identifiers {{"); -1102 | indent!(self); -1103 | for i in 0..self.syntax_grammar.external_tokens.len() { -1104 | add_line!( -1105 | self, -1106 | "{} = {i},", -1107 | self.external_token_id(&self.syntax_grammar.external_tokens[i]), -1108 | ); -1109 | } -1110 | dedent!(self); -1111 | add_line!(self, "}};"); -1112 | add_line!(self, ""); -1113 | } - | -1114 | fn add_external_scanner_symbol_map(&mut self) { -1115 | add_line!( -1116 | self, -1117 | "static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{" -1118 | ); -1119 | indent!(self); -1120 | for i in 0..self.syntax_grammar.external_tokens.len() { -1121 | let token = &self.syntax_grammar.external_tokens[i]; -1122 | let id_token = token -1123 | .corresponding_internal_token -1124 | .unwrap_or_else(|| Symbol::external(i)); -1125 | add_line!( -1126 | self, -1127 | "[{}] = {},", -1128 | self.external_token_id(token), -1129 | self.symbol_ids[&id_token], -1130 | ); -1131 | } -1132 | dedent!(self); -1133 | add_line!(self, "}};"); -1134 | add_line!(self, ""); -1135 | } - | -1136 | fn add_external_scanner_states_list(&mut self) { -1137 | add_line!( -1138 | self, -1139 | "static const bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{", -1140 | self.parse_table.external_lex_states.len(), -1141 | ); -1142 | indent!(self); -1143 | for i in 0..self.parse_table.external_lex_states.len() { -1144 | if !self.parse_table.external_lex_states[i].is_empty() { -1145 | add_line!(self, "[{i}] = {{"); -1146 | indent!(self); -1147 | for token in self.parse_table.external_lex_states[i].iter() { -1148 | add_line!( -1149 | self, -1150 | "[{}] = true,", -1151 | self.external_token_id(&self.syntax_grammar.external_tokens[token.index]) -1152 | ); -1153 | } -1154 | dedent!(self); -1155 | add_line!(self, "}},"); -1156 | } -1157 | } -1158 | dedent!(self); -1159 | add_line!(self, "}};"); -1160 | add_line!(self, ""); -1161 | } - | -1162 | fn add_parse_table(&mut self) { -1163 | let mut parse_table_entries = HashMap::new(); -1164 | let mut next_parse_action_list_index = 0; - | -1165 | // Parse action lists zero is for the default value, when a symbol is not valid. -1166 | self.get_parse_action_list_id( -1167 | &ParseTableEntry { -1168 | actions: Vec::new(), -1169 | reusable: false, -1170 | }, -1171 | &mut parse_table_entries, -1172 | &mut next_parse_action_list_index, -1173 | ); - | -1174 | add_line!( -1175 | self, -1176 | "static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{", -1177 | ); -1178 | indent!(self); - | -1179 | let mut terminal_entries = Vec::new(); -1180 | let mut nonterminal_entries = Vec::new(); - | -1181 | for (i, state) in self -1182 | .parse_table -1183 | .states -1184 | .iter() -1185 | .enumerate() -1186 | .take(self.large_state_count) -1187 | { -1188 | add_line!(self, "[STATE({i})] = {{"); -1189 | indent!(self); - | -1190 | // Ensure the entries are in a deterministic order, since they are -1191 | // internally represented as a hash map. -1192 | terminal_entries.clear(); -1193 | nonterminal_entries.clear(); -1194 | terminal_entries.extend(state.terminal_entries.iter()); -1195 | nonterminal_entries.extend(state.nonterminal_entries.iter()); -1196 | terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0)); -1197 | nonterminal_entries.sort_unstable_by_key(|k| k.0); - | -1198 | for (symbol, action) in &nonterminal_entries { -1199 | add_line!( -1200 | self, -1201 | "[{}] = STATE({}),", -1202 | self.symbol_ids[symbol], -1203 | match action { -1204 | GotoAction::Goto(state) => *state, -1205 | GotoAction::ShiftExtra => i, -1206 | } -1207 | ); -1208 | } - | -1209 | for (symbol, entry) in &terminal_entries { -1210 | let entry_id = self.get_parse_action_list_id( -1211 | entry, -1212 | &mut parse_table_entries, -1213 | &mut next_parse_action_list_index, -1214 | ); -1215 | add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]); -1216 | } - | -1217 | dedent!(self); -1218 | add_line!(self, "}},"); -1219 | } - | -1220 | dedent!(self); -1221 | add_line!(self, "}};"); -1222 | add_line!(self, ""); - | -1223 | if self.large_state_count < self.parse_table.states.len() { -1224 | add_line!(self, "static const uint16_t ts_small_parse_table[] = {{"); -1225 | indent!(self); - | -1226 | let mut next_table_index = 0; -1227 | let mut small_state_indices = Vec::with_capacity( -1228 | self.parse_table -1229 | .states -1230 | .len() -1231 | .saturating_sub(self.large_state_count), -1232 | ); -1233 | let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec>::new(); -1234 | for state in self.parse_table.states.iter().skip(self.large_state_count) { -1235 | small_state_indices.push(next_table_index); -1236 | symbols_by_value.clear(); - | -1237 | terminal_entries.clear(); -1238 | terminal_entries.extend(state.terminal_entries.iter()); -1239 | terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0)); - | -1240 | // In a given parse state, many lookahead symbols have the same actions. -1241 | // So in the "small state" representation, group symbols by their action -1242 | // in order to avoid repeating the action. -1243 | for (symbol, entry) in &terminal_entries { -1244 | let entry_id = self.get_parse_action_list_id( -1245 | entry, -1246 | &mut parse_table_entries, -1247 | &mut next_parse_action_list_index, -1248 | ); -1249 | symbols_by_value -1250 | .entry((entry_id, SymbolType::Terminal)) -1251 | .or_default() -1252 | .push(**symbol); -1253 | } -1254 | for (symbol, action) in &state.nonterminal_entries { -1255 | let state_id = match action { -1256 | GotoAction::Goto(i) => *i, -1257 | GotoAction::ShiftExtra => { -1258 | self.large_state_count + small_state_indices.len() - 1 -1259 | } -1260 | }; -1261 | symbols_by_value -1262 | .entry((state_id, SymbolType::NonTerminal)) -1263 | .or_default() -1264 | .push(*symbol); -1265 | } - | -1266 | let mut values_with_symbols = symbols_by_value.drain().collect::>(); -1267 | values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| { -1268 | (symbols.len(), *kind, *value, symbols[0]) -1269 | }); - | -1270 | add_line!( -1271 | self, -1272 | "[{next_table_index}] = {},", -1273 | values_with_symbols.len() -1274 | ); -1275 | indent!(self); -1276 | next_table_index += 1; - | -1277 | for ((value, kind), symbols) in &mut values_with_symbols { -1278 | next_table_index += 2 + symbols.len(); -1279 | if *kind == SymbolType::NonTerminal { -1280 | add_line!(self, "STATE({value}), {},", symbols.len()); -1281 | } else { -1282 | add_line!(self, "ACTIONS({value}), {},", symbols.len()); -1283 | } - | -1284 | symbols.sort_unstable(); -1285 | indent!(self); -1286 | for symbol in symbols { -1287 | add_line!(self, "{},", self.symbol_ids[symbol]); -1288 | } -1289 | dedent!(self); -1290 | } - | -1291 | dedent!(self); -1292 | } - | -1293 | dedent!(self); -1294 | add_line!(self, "}};"); -1295 | add_line!(self, ""); - | -1296 | add_line!( -1297 | self, -1298 | "static const uint32_t ts_small_parse_table_map[] = {{" -1299 | ); -1300 | indent!(self); -1301 | for i in self.large_state_count..self.parse_table.states.len() { -1302 | add_line!( -1303 | self, -1304 | "[SMALL_STATE({i})] = {},", -1305 | small_state_indices[i - self.large_state_count] -1306 | ); -1307 | } -1308 | dedent!(self); -1309 | add_line!(self, "}};"); -1310 | add_line!(self, ""); -1311 | } - | -1312 | let mut parse_table_entries = parse_table_entries -1313 | .into_iter() -1314 | .map(|(entry, i)| (i, entry)) -1315 | .collect::>(); -1316 | parse_table_entries.sort_by_key(|(index, _)| *index); -1317 | self.add_parse_action_list(parse_table_entries); -1318 | } - | -1319 | fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) { -1320 | add_line!( -1321 | self, -1322 | "static const TSParseActionEntry ts_parse_actions[] = {{" -1323 | ); -1324 | indent!(self); -1325 | for (i, entry) in parse_table_entries { -1326 | add!( -1327 | self, -1328 | " [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},", -1329 | entry.actions.len(), -1330 | entry.reusable -1331 | ); -1332 | for action in entry.actions { -1333 | add!(self, " "); -1334 | match action { -1335 | ParseAction::Accept => add!(self, " ACCEPT_INPUT()"), -1336 | ParseAction::Recover => add!(self, "RECOVER()"), -1337 | ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"), -1338 | ParseAction::Shift { -1339 | state, -1340 | is_repetition, -1341 | } => { -1342 | if is_repetition { -1343 | add!(self, "SHIFT_REPEAT({state})"); -1344 | } else { -1345 | add!(self, "SHIFT({state})"); -1346 | } -1347 | } -1348 | ParseAction::Reduce { -1349 | symbol, -1350 | child_count, -1351 | dynamic_precedence, -1352 | production_id, -1353 | .. -1354 | } => { -1355 | add!( -1356 | self, -1357 | "REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})", -1358 | self.symbol_ids[&symbol] -1359 | ); -1360 | } -1361 | } -1362 | add!(self, ","); -1363 | } -1364 | add!(self, "\n"); -1365 | } -1366 | dedent!(self); -1367 | add_line!(self, "}};"); -1368 | add_line!(self, ""); -1369 | } - | -1370 | fn add_parser_export(&mut self) { -1371 | let language_function_name = format!("tree_sitter_{}", self.language_name); -1372 | let external_scanner_name = format!("{language_function_name}_external_scanner"); - | -1373 | add_line!(self, "#ifdef __cplusplus"); -1374 | add_line!(self, r#"extern "C" {{"#); -1375 | add_line!(self, "#endif"); - | -1376 | if !self.syntax_grammar.external_tokens.is_empty() { -1377 | add_line!(self, "void *{external_scanner_name}_create(void);"); -1378 | add_line!(self, "void {external_scanner_name}_destroy(void *);"); -1379 | add_line!( -1380 | self, -1381 | "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);", -1382 | ); -1383 | add_line!( -1384 | self, -1385 | "unsigned {external_scanner_name}_serialize(void *, char *);", -1386 | ); -1387 | add_line!( -1388 | self, -1389 | "void {external_scanner_name}_deserialize(void *, const char *, unsigned);", -1390 | ); -1391 | add_line!(self, ""); -1392 | } - | -1393 | add_line!(self, "#ifdef TREE_SITTER_HIDE_SYMBOLS"); -1394 | add_line!(self, "#define TS_PUBLIC"); -1395 | add_line!(self, "#elif defined(_WIN32)"); -1396 | add_line!(self, "#define TS_PUBLIC __declspec(dllexport)"); -1397 | add_line!(self, "#else"); -1398 | add_line!( -1399 | self, -1400 | "#define TS_PUBLIC __attribute__((visibility(\"default\")))" -1401 | ); -1402 | add_line!(self, "#endif"); -1403 | add_line!(self, ""); - | -1404 | add_line!( -1405 | self, -1406 | "TS_PUBLIC const TSLanguage *{language_function_name}(void) {{", -1407 | ); -1408 | indent!(self); -1409 | add_line!(self, "static const TSLanguage language = {{"); -1410 | indent!(self); -1411 | add_line!(self, ".abi_version = LANGUAGE_VERSION,"); - | -1412 | // Quantities -1413 | add_line!(self, ".symbol_count = SYMBOL_COUNT,"); -1414 | add_line!(self, ".alias_count = ALIAS_COUNT,"); -1415 | add_line!(self, ".token_count = TOKEN_COUNT,"); -1416 | add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,"); -1417 | add_line!(self, ".state_count = STATE_COUNT,"); -1418 | add_line!(self, ".large_state_count = LARGE_STATE_COUNT,"); -1419 | add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,"); -1420 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS { -1421 | add_line!(self, ".supertype_count = SUPERTYPE_COUNT,"); -1422 | } -1423 | add_line!(self, ".field_count = FIELD_COUNT,"); -1424 | add_line!( -1425 | self, -1426 | ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," -1427 | ); - | -1428 | // Parse table -1429 | add_line!(self, ".parse_table = &ts_parse_table[0][0],"); -1430 | if self.large_state_count < self.parse_table.states.len() { -1431 | add_line!(self, ".small_parse_table = ts_small_parse_table,"); -1432 | add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,"); -1433 | } -1434 | add_line!(self, ".parse_actions = ts_parse_actions,"); - | -1435 | // Metadata -1436 | add_line!(self, ".symbol_names = ts_symbol_names,"); -1437 | if !self.field_names.is_empty() { -1438 | add_line!(self, ".field_names = ts_field_names,"); -1439 | add_line!(self, ".field_map_slices = ts_field_map_slices,"); -1440 | add_line!(self, ".field_map_entries = ts_field_map_entries,"); -1441 | } -1442 | if !self.supertype_map.is_empty() && self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS { -1443 | add_line!(self, ".supertype_map_slices = ts_supertype_map_slices,"); -1444 | add_line!(self, ".supertype_map_entries = ts_supertype_map_entries,"); -1445 | add_line!(self, ".supertype_symbols = ts_supertype_symbols,"); -1446 | } -1447 | add_line!(self, ".symbol_metadata = ts_symbol_metadata,"); -1448 | add_line!(self, ".public_symbol_map = ts_symbol_map,"); -1449 | add_line!(self, ".alias_map = ts_non_terminal_alias_map,"); -1450 | if !self.parse_table.production_infos.is_empty() { -1451 | add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],"); -1452 | } - | -1453 | // Lexing -1454 | add_line!(self, ".lex_modes = (const void*)ts_lex_modes,"); -1455 | add_line!(self, ".lex_fn = ts_lex,"); -1456 | if let Some(keyword_capture_token) = self.syntax_grammar.word_token { -1457 | add_line!(self, ".keyword_lex_fn = ts_lex_keywords,"); -1458 | add_line!( -1459 | self, -1460 | ".keyword_capture_token = {},", -1461 | self.symbol_ids[&keyword_capture_token] -1462 | ); -1463 | } - | -1464 | if !self.syntax_grammar.external_tokens.is_empty() { -1465 | add_line!(self, ".external_scanner = {{"); -1466 | indent!(self); -1467 | add_line!(self, "&ts_external_scanner_states[0][0],"); -1468 | add_line!(self, "ts_external_scanner_symbol_map,"); -1469 | add_line!(self, "{external_scanner_name}_create,"); -1470 | add_line!(self, "{external_scanner_name}_destroy,"); -1471 | add_line!(self, "{external_scanner_name}_scan,"); -1472 | add_line!(self, "{external_scanner_name}_serialize,"); -1473 | add_line!(self, "{external_scanner_name}_deserialize,"); -1474 | dedent!(self); -1475 | add_line!(self, "}},"); -1476 | } - | -1477 | add_line!(self, ".primary_state_ids = ts_primary_state_ids,"); - | -1478 | if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS { -1479 | add_line!(self, ".name = \"{}\",", self.language_name); - | -1480 | if self.reserved_word_sets.len() > 1 { -1481 | add_line!(self, ".reserved_words = &ts_reserved_words[0][0],"); -1482 | } - | -1483 | add_line!( -1484 | self, -1485 | ".max_reserved_word_set_size = {},", -1486 | self.reserved_word_sets -1487 | .iter() -1488 | .map(TokenSet::len) -1489 | .max() -1490 | .unwrap() -1491 | ); - | -1492 | let Some(metadata) = &self.metadata else { -1493 | panic!( -1494 | indoc! {" -1495 | Metadata is required to generate ABI version {}. -1496 | This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table. -1497 | "}, -1498 | self.abi_version -1499 | ); -1500 | }; - | -1501 | add_line!(self, ".metadata = {{"); -1502 | indent!(self); -1503 | add_line!(self, ".major_version = {},", metadata.major_version); -1504 | add_line!(self, ".minor_version = {},", metadata.minor_version); -1505 | add_line!(self, ".patch_version = {},", metadata.patch_version); -1506 | dedent!(self); -1507 | add_line!(self, "}},"); -1508 | } - | -1509 | dedent!(self); -1510 | add_line!(self, "}};"); -1511 | add_line!(self, "return &language;"); -1512 | dedent!(self); -1513 | add_line!(self, "}}"); -1514 | add_line!(self, "#ifdef __cplusplus"); -1515 | add_line!(self, "}}"); -1516 | add_line!(self, "#endif"); -1517 | } - | -1518 | fn get_parse_action_list_id( -1519 | &self, -1520 | entry: &ParseTableEntry, -1521 | parse_table_entries: &mut HashMap, -1522 | next_parse_action_list_index: &mut usize, -1523 | ) -> usize { -1524 | if let Some(&index) = parse_table_entries.get(entry) { -1525 | index -1526 | } else { -1527 | let result = *next_parse_action_list_index; -1528 | parse_table_entries.insert(entry.clone(), result); -1529 | *next_parse_action_list_index += 1 + entry.actions.len(); -1530 | result -1531 | } -1532 | } - | -1533 | fn get_field_map_id( -1534 | &self, -1535 | flat_field_map: Vec<(String, FieldLocation)>, -1536 | flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>, -1537 | next_flat_field_map_index: &mut usize, -1538 | ) -> usize { -1539 | if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) { -1540 | return *index; -1541 | } - | -1542 | let result = *next_flat_field_map_index; -1543 | *next_flat_field_map_index += flat_field_map.len(); -1544 | flat_field_maps.push((result, flat_field_map)); -1545 | result -1546 | } - | -1547 | fn external_token_id(&self, token: &ExternalToken) -> String { -1548 | format!( -1549 | "ts_external_token_{}", -1550 | self.sanitize_identifier(&token.name) -1551 | ) -1552 | } - | -1553 | fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet) { -1554 | let mut id; -1555 | if symbol == Symbol::end() { -1556 | id = "ts_builtin_sym_end".to_string(); -1557 | } else { -1558 | let (name, kind) = self.metadata_for_symbol(symbol); -1559 | id = match kind { -1560 | VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)), -1561 | VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)), -1562 | VariableType::Hidden | VariableType::Named => { -1563 | format!("sym_{}", self.sanitize_identifier(name)) -1564 | } -1565 | }; - | -1566 | let mut suffix_number = 1; -1567 | let mut suffix = String::new(); -1568 | while used_identifiers.contains(&id) { -1569 | id.drain(id.len() - suffix.len()..); -1570 | suffix_number += 1; -1571 | suffix = suffix_number.to_string(); -1572 | id += &suffix; -1573 | } -1574 | } - | -1575 | used_identifiers.insert(id.clone()); -1576 | self.symbol_ids.insert(symbol, id); -1577 | } - | -1578 | fn field_id(&self, field_name: &str) -> String { -1579 | format!("field_{field_name}") -1580 | } - | -1581 | fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { -1582 | match symbol.kind { -1583 | SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden), -1584 | SymbolType::NonTerminal => { -1585 | let variable = &self.syntax_grammar.variables[symbol.index]; -1586 | (&variable.name, variable.kind) -1587 | } -1588 | SymbolType::Terminal => { -1589 | let variable = &self.lexical_grammar.variables[symbol.index]; -1590 | (&variable.name, variable.kind) -1591 | } -1592 | SymbolType::External => { -1593 | let token = &self.syntax_grammar.external_tokens[symbol.index]; -1594 | (&token.name, token.kind) -1595 | } -1596 | } -1597 | } - | -1598 | fn symbols_for_alias(&self, alias: &Alias) -> Vec { -1599 | self.parse_table -1600 | .symbols -1601 | .iter() -1602 | .copied() -1603 | .filter(move |symbol| { -1604 | self.default_aliases.get(symbol).map_or_else( -1605 | || { -1606 | let (name, kind) = self.metadata_for_symbol(*symbol); -1607 | name == alias.value && kind == alias.kind() -1608 | }, -1609 | |default_alias| default_alias == alias, -1610 | ) -1611 | }) -1612 | .collect() -1613 | } - | -1614 | fn sanitize_identifier(&self, name: &str) -> String { -1615 | let mut result = String::with_capacity(name.len()); -1616 | for c in name.chars() { -1617 | if c.is_ascii_alphanumeric() || c == '_' { -1618 | result.push(c); -1619 | } else { -1620 | 'special_chars: { -1621 | let replacement = match c { -1622 | ' ' if name.len() == 1 => "SPACE", -1623 | '~' => "TILDE", -1624 | '`' => "BQUOTE", -1625 | '!' => "BANG", -1626 | '@' => "AT", -1627 | '#' => "POUND", -1628 | '$' => "DOLLAR", -1629 | '%' => "PERCENT", -1630 | '^' => "CARET", -1631 | '&' => "AMP", -1632 | '*' => "STAR", -1633 | '(' => "LPAREN", -1634 | ')' => "RPAREN", -1635 | '-' => "DASH", -1636 | '+' => "PLUS", -1637 | '=' => "EQ", -1638 | '{' => "LBRACE", -1639 | '}' => "RBRACE", -1640 | '[' => "LBRACK", -1641 | ']' => "RBRACK", -1642 | '\\' => "BSLASH", -1643 | '|' => "PIPE", -1644 | ':' => "COLON", -1645 | ';' => "SEMI", -1646 | '"' => "DQUOTE", -1647 | '\'' => "SQUOTE", -1648 | '<' => "LT", -1649 | '>' => "GT", -1650 | ',' => "COMMA", -1651 | '.' => "DOT", -1652 | '?' => "QMARK", -1653 | '/' => "SLASH", -1654 | '\n' => "LF", -1655 | '\r' => "CR", -1656 | '\t' => "TAB", -1657 | '\0' => "NULL", -1658 | '\u{0001}' => "SOH", -1659 | '\u{0002}' => "STX", -1660 | '\u{0003}' => "ETX", -1661 | '\u{0004}' => "EOT", -1662 | '\u{0005}' => "ENQ", -1663 | '\u{0006}' => "ACK", -1664 | '\u{0007}' => "BEL", -1665 | '\u{0008}' => "BS", -1666 | '\u{000b}' => "VTAB", -1667 | '\u{000c}' => "FF", -1668 | '\u{000e}' => "SO", -1669 | '\u{000f}' => "SI", -1670 | '\u{0010}' => "DLE", -1671 | '\u{0011}' => "DC1", -1672 | '\u{0012}' => "DC2", -1673 | '\u{0013}' => "DC3", -1674 | '\u{0014}' => "DC4", -1675 | '\u{0015}' => "NAK", -1676 | '\u{0016}' => "SYN", -1677 | '\u{0017}' => "ETB", -1678 | '\u{0018}' => "CAN", -1679 | '\u{0019}' => "EM", -1680 | '\u{001a}' => "SUB", -1681 | '\u{001b}' => "ESC", -1682 | '\u{001c}' => "FS", -1683 | '\u{001d}' => "GS", -1684 | '\u{001e}' => "RS", -1685 | '\u{001f}' => "US", -1686 | '\u{007F}' => "DEL", -1687 | '\u{FEFF}' => "BOM", -1688 | '\u{0080}'..='\u{FFFF}' => { -1689 | write!(result, "u{:04x}", c as u32).unwrap(); -1690 | break 'special_chars; -1691 | } -1692 | '\u{10000}'..='\u{10FFFF}' => { -1693 | write!(result, "U{:08x}", c as u32).unwrap(); -1694 | break 'special_chars; -1695 | } -1696 | '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(), -1697 | ' ' => break 'special_chars, -1698 | }; -1699 | if !result.is_empty() && !result.ends_with('_') { -1700 | result.push('_'); -1701 | } -1702 | result += replacement; -1703 | } -1704 | } -1705 | } -1706 | result -1707 | } - | -1708 | fn sanitize_string(&self, name: &str) -> String { -1709 | let mut result = String::with_capacity(name.len()); -1710 | for c in name.chars() { -1711 | match c { -1712 | '\"' => result += "\\\"", -1713 | '?' => result += "\\?", -1714 | '\\' => result += "\\\\", -1715 | '\u{0007}' => result += "\\a", -1716 | '\u{0008}' => result += "\\b", -1717 | '\u{000b}' => result += "\\v", -1718 | '\u{000c}' => result += "\\f", -1719 | '\n' => result += "\\n", -1720 | '\r' => result += "\\r", -1721 | '\t' => result += "\\t", -1722 | '\0' => result += "\\0", -1723 | '\u{0001}'..='\u{001f}' => write!(result, "\\x{:02x}", c as u32).unwrap(), -1724 | '\u{007F}'..='\u{FFFF}' => write!(result, "\\u{:04x}", c as u32).unwrap(), -1725 | '\u{10000}'..='\u{10FFFF}' => write!(result, "\\U{:08x}", c as u32).unwrap(), -1726 | _ => result.push(c), -1727 | } -1728 | } -1729 | result -1730 | } - | -1731 | fn add_character(&mut self, c: char) { -1732 | match c { -1733 | '\'' => add!(self, "'\\''"), -1734 | '\\' => add!(self, "'\\\\'"), -1735 | '\u{000c}' => add!(self, "'\\f'"), -1736 | '\n' => add!(self, "'\\n'"), -1737 | '\t' => add!(self, "'\\t'"), -1738 | '\r' => add!(self, "'\\r'"), -1739 | _ => { -1740 | if c == '\0' { -1741 | add!(self, "0"); -1742 | } else if c == ' ' || c.is_ascii_graphic() { -1743 | add!(self, "'{c}'"); -1744 | } else { -1745 | add!(self, "0x{:02x}", c as u32); -1746 | } -1747 | } -1748 | } -1749 | } -1750 | } - | -1751 | /// Returns a String of C code for the given components of a parser. -1752 | /// -1753 | /// # Arguments -1754 | /// -1755 | /// * `name` - A string slice containing the name of the language -1756 | /// * `parse_table` - The generated parse table for the language -1757 | /// * `main_lex_table` - The generated lexing table for the language -1758 | /// * `keyword_lex_table` - The generated keyword lexing table for the language -1759 | /// * `keyword_capture_token` - A symbol indicating which token is used for keyword capture, if any. -1760 | /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar -1761 | /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar -1762 | /// * `default_aliases` - A map describing the global rename rules that should apply. the keys are -1763 | /// symbols that are *always* aliased in the same way, and the values are the aliases that are -1764 | /// applied to those symbols. -1765 | /// * `abi_version` - The language ABI version that should be generated. Usually you want -1766 | /// Tree-sitter's current version, but right after making an ABI change, it may be useful to -1767 | /// generate code with the previous ABI. -1768 | #[allow(clippy::too_many_arguments)] -1769 | pub fn render_c_code( -1770 | name: &str, -1771 | tables: Tables, -1772 | syntax_grammar: SyntaxGrammar, -1773 | lexical_grammar: LexicalGrammar, -1774 | default_aliases: AliasMap, -1775 | abi_version: usize, -1776 | semantic_version: Option<(u8, u8, u8)>, -1777 | supertype_symbol_map: BTreeMap>, -1778 | ) -> String { -1779 | assert!( -1780 | (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version), -1781 | "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}", -1782 | ); - | -1783 | Generator { -1784 | language_name: name.to_string(), -1785 | parse_table: tables.parse_table, -1786 | main_lex_table: tables.main_lex_table, -1787 | keyword_lex_table: tables.keyword_lex_table, -1788 | large_character_sets: tables.large_character_sets, -1789 | large_character_set_info: Vec::new(), -1790 | syntax_grammar, -1791 | lexical_grammar, -1792 | default_aliases, -1793 | abi_version, -1794 | metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata { -1795 | major_version, -1796 | minor_version, -1797 | patch_version, -1798 | }), -1799 | supertype_symbol_map, -1800 | ..Default::default() -1801 | } -1802 | .generate() -1803 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/rules.rs: --------------------------------------------------------------------------------- - 1 | use std::{collections::HashMap, fmt}; - | - 2 | use serde::Serialize; - 3 | use smallbitvec::SmallBitVec; - | - 4 | use super::grammars::VariableType; - | - 5 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] - 6 | pub enum SymbolType { - 7 | External, - 8 | End, - 9 | EndOfNonTerminalExtra, - 10 | Terminal, - 11 | NonTerminal, - 12 | } - | - 13 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] - 14 | pub enum Associativity { - 15 | Left, - 16 | Right, - 17 | } - | - 18 | #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] - 19 | pub struct Alias { - 20 | pub value: String, - 21 | pub is_named: bool, - 22 | } - | - 23 | #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize)] - 24 | pub enum Precedence { - 25 | #[default] - 26 | None, - 27 | Integer(i32), - 28 | Name(String), - 29 | } - | - 30 | pub type AliasMap = HashMap; - | - 31 | #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize)] - 32 | pub struct MetadataParams { - 33 | pub precedence: Precedence, - 34 | pub dynamic_precedence: i32, - 35 | pub associativity: Option, - 36 | pub is_token: bool, - 37 | pub is_main_token: bool, - 38 | pub alias: Option, - 39 | pub field_name: Option, - 40 | } - | - 41 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] - 42 | pub struct Symbol { - 43 | pub kind: SymbolType, - 44 | pub index: usize, - 45 | } - | - 46 | #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize)] - 47 | pub enum Rule { - 48 | Blank, - 49 | String(String), - 50 | Pattern(String, String), - 51 | NamedSymbol(String), - 52 | Symbol(Symbol), - 53 | Choice(Vec), - 54 | Metadata { - 55 | params: MetadataParams, - 56 | rule: Box, - 57 | }, - 58 | Repeat(Box), - 59 | Seq(Vec), - 60 | Reserved { - 61 | rule: Box, - 62 | context_name: String, - 63 | }, - 64 | } - | - 65 | // Because tokens are represented as small (~400 max) unsigned integers, - 66 | // sets of tokens can be efficiently represented as bit vectors with each - 67 | // index corresponding to a token, and each value representing whether or not - 68 | // the token is present in the set. - 69 | #[derive(Default, Clone, PartialEq, Eq, Hash)] - 70 | pub struct TokenSet { - 71 | terminal_bits: SmallBitVec, - 72 | external_bits: SmallBitVec, - 73 | eof: bool, - 74 | end_of_nonterminal_extra: bool, - 75 | } - | - 76 | impl fmt::Debug for TokenSet { - 77 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - 78 | f.debug_list().entries(self.iter()).finish() - 79 | } - 80 | } - | - 81 | impl PartialOrd for TokenSet { - 82 | fn partial_cmp(&self, other: &Self) -> Option { - 83 | Some(self.cmp(other)) - 84 | } - 85 | } - | - 86 | impl Ord for TokenSet { - 87 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { - 88 | self.terminal_bits - 89 | .iter() - 90 | .cmp(other.terminal_bits.iter()) - 91 | .then_with(|| self.external_bits.iter().cmp(other.external_bits.iter())) - 92 | .then_with(|| self.eof.cmp(&other.eof)) - 93 | .then_with(|| { - 94 | self.end_of_nonterminal_extra - 95 | .cmp(&other.end_of_nonterminal_extra) - 96 | }) - 97 | } - 98 | } - | - 99 | impl Rule { - 100 | pub fn field(name: String, content: Self) -> Self { - 101 | add_metadata(content, move |params| { - 102 | params.field_name = Some(name); - 103 | }) - 104 | } - | - 105 | pub fn alias(content: Self, value: String, is_named: bool) -> Self { - 106 | add_metadata(content, move |params| { - 107 | params.alias = Some(Alias { value, is_named }); - 108 | }) - 109 | } - | - 110 | pub fn token(content: Self) -> Self { - 111 | add_metadata(content, |params| { - 112 | params.is_token = true; - 113 | }) - 114 | } - | - 115 | pub fn immediate_token(content: Self) -> Self { - 116 | add_metadata(content, |params| { - 117 | params.is_token = true; - 118 | params.is_main_token = true; - 119 | }) - 120 | } - | - 121 | pub fn prec(value: Precedence, content: Self) -> Self { - 122 | add_metadata(content, |params| { - 123 | params.precedence = value; - 124 | }) - 125 | } - | - 126 | pub fn prec_left(value: Precedence, content: Self) -> Self { - 127 | add_metadata(content, |params| { - 128 | params.associativity = Some(Associativity::Left); - 129 | params.precedence = value; - 130 | }) - 131 | } - | - 132 | pub fn prec_right(value: Precedence, content: Self) -> Self { - 133 | add_metadata(content, |params| { - 134 | params.associativity = Some(Associativity::Right); - 135 | params.precedence = value; - 136 | }) - 137 | } - | - 138 | pub fn prec_dynamic(value: i32, content: Self) -> Self { - 139 | add_metadata(content, |params| { - 140 | params.dynamic_precedence = value; - 141 | }) - 142 | } - | - 143 | pub fn repeat(rule: Self) -> Self { - 144 | Self::Repeat(Box::new(rule)) - 145 | } - | - 146 | pub fn choice(rules: Vec) -> Self { - 147 | let mut elements = Vec::with_capacity(rules.len()); - 148 | for rule in rules { - 149 | choice_helper(&mut elements, rule); - 150 | } - 151 | Self::Choice(elements) - 152 | } - | - 153 | pub const fn seq(rules: Vec) -> Self { - 154 | Self::Seq(rules) - 155 | } - | - 156 | pub fn is_empty(&self) -> bool { - 157 | match self { - 158 | Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false, - 159 | Self::String(string) => string.is_empty(), - 160 | Self::Metadata { rule, .. } | Self::Repeat(rule) | Self::Reserved { rule, .. } => { - 161 | rule.is_empty() - 162 | } - 163 | Self::Choice(rules) => rules.iter().any(Self::is_empty), - 164 | Self::Seq(rules) => rules.iter().all(Self::is_empty), - 165 | } - 166 | } - 167 | } - | - 168 | impl Alias { - 169 | #[must_use] - 170 | pub const fn kind(&self) -> VariableType { - 171 | if self.is_named { - 172 | VariableType::Named - 173 | } else { - 174 | VariableType::Anonymous - 175 | } - 176 | } - 177 | } - | - 178 | impl Precedence { - 179 | #[must_use] - 180 | pub const fn is_none(&self) -> bool { - 181 | matches!(self, Self::None) - 182 | } - 183 | } - | - 184 | #[cfg(test)] - 185 | impl Rule { - 186 | #[must_use] - 187 | pub const fn terminal(index: usize) -> Self { - 188 | Self::Symbol(Symbol::terminal(index)) - 189 | } - | - 190 | #[must_use] - 191 | pub const fn non_terminal(index: usize) -> Self { - 192 | Self::Symbol(Symbol::non_terminal(index)) - 193 | } - | - 194 | #[must_use] - 195 | pub const fn external(index: usize) -> Self { - 196 | Self::Symbol(Symbol::external(index)) - 197 | } - | - 198 | #[must_use] - 199 | pub fn named(name: &'static str) -> Self { - 200 | Self::NamedSymbol(name.to_string()) - 201 | } - | - 202 | #[must_use] - 203 | pub fn string(value: &'static str) -> Self { - 204 | Self::String(value.to_string()) - 205 | } - | - 206 | #[must_use] - 207 | pub fn pattern(value: &'static str, flags: &'static str) -> Self { - 208 | Self::Pattern(value.to_string(), flags.to_string()) - 209 | } - 210 | } - | - 211 | impl Symbol { - 212 | #[must_use] - 213 | pub fn is_terminal(&self) -> bool { - 214 | self.kind == SymbolType::Terminal - 215 | } - | - 216 | #[must_use] - 217 | pub fn is_non_terminal(&self) -> bool { - 218 | self.kind == SymbolType::NonTerminal - 219 | } - | - 220 | #[must_use] - 221 | pub fn is_external(&self) -> bool { - 222 | self.kind == SymbolType::External - 223 | } - | - 224 | #[must_use] - 225 | pub fn is_eof(&self) -> bool { - 226 | self.kind == SymbolType::End - 227 | } - | - 228 | #[must_use] - 229 | pub const fn non_terminal(index: usize) -> Self { - 230 | Self { - 231 | kind: SymbolType::NonTerminal, - 232 | index, - 233 | } - 234 | } - | - 235 | #[must_use] - 236 | pub const fn terminal(index: usize) -> Self { - 237 | Self { - 238 | kind: SymbolType::Terminal, - 239 | index, - 240 | } - 241 | } - | - 242 | #[must_use] - 243 | pub const fn external(index: usize) -> Self { - 244 | Self { - 245 | kind: SymbolType::External, - 246 | index, - 247 | } - 248 | } - | - 249 | #[must_use] - 250 | pub const fn end() -> Self { - 251 | Self { - 252 | kind: SymbolType::End, - 253 | index: 0, - 254 | } - 255 | } - | - 256 | #[must_use] - 257 | pub const fn end_of_nonterminal_extra() -> Self { - 258 | Self { - 259 | kind: SymbolType::EndOfNonTerminalExtra, - 260 | index: 0, - 261 | } - 262 | } - 263 | } - | - 264 | impl From for Rule { - 265 | fn from(symbol: Symbol) -> Self { - 266 | Self::Symbol(symbol) - 267 | } - 268 | } - | - 269 | impl TokenSet { - 270 | #[must_use] - 271 | pub const fn new() -> Self { - 272 | Self { - 273 | terminal_bits: SmallBitVec::new(), - 274 | external_bits: SmallBitVec::new(), - 275 | eof: false, - 276 | end_of_nonterminal_extra: false, - 277 | } - 278 | } - | - 279 | pub fn iter(&self) -> impl Iterator + '_ { - 280 | self.terminal_bits - 281 | .iter() - 282 | .enumerate() - 283 | .filter_map(|(i, value)| { - 284 | if value { - 285 | Some(Symbol::terminal(i)) - 286 | } else { - 287 | None - 288 | } - 289 | }) - 290 | .chain( - 291 | self.external_bits - 292 | .iter() - 293 | .enumerate() - 294 | .filter_map(|(i, value)| { - 295 | if value { - 296 | Some(Symbol::external(i)) - 297 | } else { - 298 | None - 299 | } - 300 | }), - 301 | ) - 302 | .chain(if self.eof { Some(Symbol::end()) } else { None }) - 303 | .chain(if self.end_of_nonterminal_extra { - 304 | Some(Symbol::end_of_nonterminal_extra()) - 305 | } else { - 306 | None - 307 | }) - 308 | } - | - 309 | pub fn terminals(&self) -> impl Iterator + '_ { - 310 | self.terminal_bits - 311 | .iter() - 312 | .enumerate() - 313 | .filter_map(|(i, value)| { - 314 | if value { - 315 | Some(Symbol::terminal(i)) - 316 | } else { - 317 | None - 318 | } - 319 | }) - 320 | } - | - 321 | pub fn contains(&self, symbol: &Symbol) -> bool { - 322 | match symbol.kind { - 323 | SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), - 324 | SymbolType::Terminal => self.terminal_bits.get(symbol.index).unwrap_or(false), - 325 | SymbolType::External => self.external_bits.get(symbol.index).unwrap_or(false), - 326 | SymbolType::End => self.eof, - 327 | SymbolType::EndOfNonTerminalExtra => self.end_of_nonterminal_extra, - 328 | } - 329 | } - | - 330 | pub fn contains_terminal(&self, index: usize) -> bool { - 331 | self.terminal_bits.get(index).unwrap_or(false) - 332 | } - | - 333 | pub fn insert(&mut self, other: Symbol) { - 334 | let vec = match other.kind { - 335 | SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), - 336 | SymbolType::Terminal => &mut self.terminal_bits, - 337 | SymbolType::External => &mut self.external_bits, - 338 | SymbolType::End => { - 339 | self.eof = true; - 340 | return; - 341 | } - 342 | SymbolType::EndOfNonTerminalExtra => { - 343 | self.end_of_nonterminal_extra = true; - 344 | return; - 345 | } - 346 | }; - 347 | if other.index >= vec.len() { - 348 | vec.resize(other.index + 1, false); - 349 | } - 350 | vec.set(other.index, true); - 351 | } - | - 352 | pub fn remove(&mut self, other: &Symbol) -> bool { - 353 | let vec = match other.kind { - 354 | SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), - 355 | SymbolType::Terminal => &mut self.terminal_bits, - 356 | SymbolType::External => &mut self.external_bits, - 357 | SymbolType::End => { - 358 | return if self.eof { - 359 | self.eof = false; - 360 | true - 361 | } else { - 362 | false - 363 | } - 364 | } - 365 | SymbolType::EndOfNonTerminalExtra => { - 366 | return if self.end_of_nonterminal_extra { - 367 | self.end_of_nonterminal_extra = false; - 368 | true - 369 | } else { - 370 | false - 371 | }; - 372 | } - 373 | }; - 374 | if other.index < vec.len() && vec[other.index] { - 375 | vec.set(other.index, false); - 376 | while vec.last() == Some(false) { - 377 | vec.pop(); - 378 | } - 379 | return true; - 380 | } - 381 | false - 382 | } - | - 383 | pub fn is_empty(&self) -> bool { - 384 | !self.eof - 385 | && !self.end_of_nonterminal_extra - 386 | && !self.terminal_bits.iter().any(|a| a) - 387 | && !self.external_bits.iter().any(|a| a) - 388 | } - | - 389 | pub fn len(&self) -> usize { - 390 | self.eof as usize - 391 | + self.end_of_nonterminal_extra as usize - 392 | + self.terminal_bits.iter().filter(|b| *b).count() - 393 | + self.external_bits.iter().filter(|b| *b).count() - 394 | } - | - 395 | pub fn insert_all_terminals(&mut self, other: &Self) -> bool { - 396 | let mut result = false; - 397 | if other.terminal_bits.len() > self.terminal_bits.len() { - 398 | self.terminal_bits.resize(other.terminal_bits.len(), false); - 399 | } - 400 | for (i, element) in other.terminal_bits.iter().enumerate() { - 401 | if element { - 402 | result |= !self.terminal_bits[i]; - 403 | self.terminal_bits.set(i, element); - 404 | } - 405 | } - 406 | result - 407 | } - | - 408 | fn insert_all_externals(&mut self, other: &Self) -> bool { - 409 | let mut result = false; - 410 | if other.external_bits.len() > self.external_bits.len() { - 411 | self.external_bits.resize(other.external_bits.len(), false); - 412 | } - 413 | for (i, element) in other.external_bits.iter().enumerate() { - 414 | if element { - 415 | result |= !self.external_bits[i]; - 416 | self.external_bits.set(i, element); - 417 | } - 418 | } - 419 | result - 420 | } - | - 421 | pub fn insert_all(&mut self, other: &Self) -> bool { - 422 | let mut result = false; - 423 | if other.eof { - 424 | result |= !self.eof; - 425 | self.eof = true; - 426 | } - 427 | if other.end_of_nonterminal_extra { - 428 | result |= !self.end_of_nonterminal_extra; - 429 | self.end_of_nonterminal_extra = true; - 430 | } - 431 | result |= self.insert_all_terminals(other); - 432 | result |= self.insert_all_externals(other); - 433 | result - 434 | } - 435 | } - | - 436 | impl FromIterator for TokenSet { - 437 | fn from_iter>(iter: T) -> Self { - 438 | let mut result = Self::new(); - 439 | for symbol in iter { - 440 | result.insert(symbol); - 441 | } - 442 | result - 443 | } - 444 | } - | - 445 | fn add_metadata(input: Rule, f: T) -> Rule { - 446 | match input { - 447 | Rule::Metadata { rule, mut params } if !params.is_token => { - 448 | f(&mut params); - 449 | Rule::Metadata { rule, params } - 450 | } - 451 | _ => { - 452 | let mut params = MetadataParams::default(); - 453 | f(&mut params); - 454 | Rule::Metadata { - 455 | rule: Box::new(input), - 456 | params, - 457 | } - 458 | } - 459 | } - 460 | } - | - 461 | fn choice_helper(result: &mut Vec, rule: Rule) { - 462 | match rule { - 463 | Rule::Choice(elements) => { - 464 | for element in elements { - 465 | choice_helper(result, element); - 466 | } - 467 | } - 468 | _ => { - 469 | if !result.contains(&rule) { - 470 | result.push(rule); - 471 | } - 472 | } - 473 | } - 474 | } - | - 475 | impl fmt::Display for Precedence { - 476 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - 477 | match self { - 478 | Self::Integer(i) => write!(f, "{i}"), - 479 | Self::Name(s) => write!(f, "'{s}'"), - 480 | Self::None => write!(f, "none"), - 481 | } - 482 | } - 483 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/tables.rs: --------------------------------------------------------------------------------- - 1 | use std::collections::BTreeMap; - | - 2 | use super::{ - 3 | nfa::CharacterSet, - 4 | rules::{Alias, Symbol, TokenSet}, - 5 | }; - 6 | pub type ProductionInfoId = usize; - 7 | pub type ParseStateId = usize; - 8 | pub type LexStateId = usize; - | - 9 | use std::hash::BuildHasherDefault; - | - 10 | use indexmap::IndexMap; - 11 | use rustc_hash::FxHasher; - | - 12 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] - 13 | pub enum ParseAction { - 14 | Accept, - 15 | Shift { - 16 | state: ParseStateId, - 17 | is_repetition: bool, - 18 | }, - 19 | ShiftExtra, - 20 | Recover, - 21 | Reduce { - 22 | symbol: Symbol, - 23 | child_count: usize, - 24 | dynamic_precedence: i32, - 25 | production_id: ProductionInfoId, - 26 | }, - 27 | } - | - 28 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] - 29 | pub enum GotoAction { - 30 | Goto(ParseStateId), - 31 | ShiftExtra, - 32 | } - | - 33 | #[derive(Clone, Debug, PartialEq, Eq, Hash)] - 34 | pub struct ParseTableEntry { - 35 | pub actions: Vec, - 36 | pub reusable: bool, - 37 | } - | - 38 | #[derive(Clone, Debug, Default, PartialEq, Eq)] - 39 | pub struct ParseState { - 40 | pub id: ParseStateId, - 41 | pub terminal_entries: IndexMap>, - 42 | pub nonterminal_entries: IndexMap>, - 43 | pub reserved_words: TokenSet, - 44 | pub lex_state_id: usize, - 45 | pub external_lex_state_id: usize, - 46 | pub core_id: usize, - 47 | } - | - 48 | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] - 49 | pub struct FieldLocation { - 50 | pub index: usize, - 51 | pub inherited: bool, - 52 | } - | - 53 | #[derive(Debug, Default, PartialEq, Eq)] - 54 | pub struct ProductionInfo { - 55 | pub alias_sequence: Vec>, - 56 | pub field_map: BTreeMap>, - 57 | } - | - 58 | #[derive(Debug, Default, PartialEq, Eq)] - 59 | pub struct ParseTable { - 60 | pub states: Vec, - 61 | pub symbols: Vec, - 62 | pub production_infos: Vec, - 63 | pub max_aliased_production_length: usize, - 64 | pub external_lex_states: Vec, - 65 | } - | - 66 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] - 67 | pub struct AdvanceAction { - 68 | pub state: LexStateId, - 69 | pub in_main_token: bool, - 70 | } - | - 71 | #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] - 72 | pub struct LexState { - 73 | pub accept_action: Option, - 74 | pub eof_action: Option, - 75 | pub advance_actions: Vec<(CharacterSet, AdvanceAction)>, - 76 | } - | - 77 | #[derive(Debug, PartialEq, Eq, Default)] - 78 | pub struct LexTable { - 79 | pub states: Vec, - 80 | } - | - 81 | impl ParseTableEntry { - 82 | #[must_use] - 83 | pub const fn new() -> Self { - 84 | Self { - 85 | reusable: true, - 86 | actions: Vec::new(), - 87 | } - 88 | } - 89 | } - | - 90 | impl ParseState { - 91 | pub fn is_end_of_non_terminal_extra(&self) -> bool { - 92 | self.terminal_entries - 93 | .contains_key(&Symbol::end_of_nonterminal_extra()) - 94 | } - | - 95 | pub fn referenced_states(&self) -> impl Iterator + '_ { - 96 | self.terminal_entries - 97 | .iter() - 98 | .flat_map(|(_, entry)| { - 99 | entry.actions.iter().filter_map(|action| match action { - 100 | ParseAction::Shift { state, .. } => Some(*state), - 101 | _ => None, - 102 | }) - 103 | }) - 104 | .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| { - 105 | if let GotoAction::Goto(state) = action { - 106 | Some(*state) - 107 | } else { - 108 | None - 109 | } - 110 | })) - 111 | } - | - 112 | pub fn update_referenced_states(&mut self, mut f: F) - 113 | where - 114 | F: FnMut(usize, &Self) -> usize, - 115 | { - 116 | let mut updates = Vec::new(); - 117 | for (symbol, entry) in &self.terminal_entries { - 118 | for (i, action) in entry.actions.iter().enumerate() { - 119 | if let ParseAction::Shift { state, .. } = action { - 120 | let result = f(*state, self); - 121 | if result != *state { - 122 | updates.push((*symbol, i, result)); - 123 | } - 124 | } - 125 | } - 126 | } - 127 | for (symbol, action) in &self.nonterminal_entries { - 128 | if let GotoAction::Goto(other_state) = action { - 129 | let result = f(*other_state, self); - 130 | if result != *other_state { - 131 | updates.push((*symbol, 0, result)); - 132 | } - 133 | } - 134 | } - 135 | for (symbol, action_index, new_state) in updates { - 136 | if symbol.is_non_terminal() { - 137 | self.nonterminal_entries - 138 | .insert(symbol, GotoAction::Goto(new_state)); - 139 | } else { - 140 | let entry = self.terminal_entries.get_mut(&symbol).unwrap(); - 141 | if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] { - 142 | entry.actions[action_index] = ParseAction::Shift { - 143 | state: new_state, - 144 | is_repetition, - 145 | }; - 146 | } - 147 | } - 148 | } - 149 | } - 150 | } - - - --------------------------------------------------------------------------------- -/crates/generate/src/templates/alloc.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ALLOC_H_ - 2 | #define TREE_SITTER_ALLOC_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include - 8 | #include - | - 9 | // Allow clients to override allocation functions - 10 | #ifdef TREE_SITTER_REUSE_ALLOCATOR - | - 11 | extern void *(*ts_current_malloc)(size_t size); - 12 | extern void *(*ts_current_calloc)(size_t count, size_t size); - 13 | extern void *(*ts_current_realloc)(void *ptr, size_t size); - 14 | extern void (*ts_current_free)(void *ptr); - | - 15 | #ifndef ts_malloc - 16 | #define ts_malloc ts_current_malloc - 17 | #endif - 18 | #ifndef ts_calloc - 19 | #define ts_calloc ts_current_calloc - 20 | #endif - 21 | #ifndef ts_realloc - 22 | #define ts_realloc ts_current_realloc - 23 | #endif - 24 | #ifndef ts_free - 25 | #define ts_free ts_current_free - 26 | #endif - | - 27 | #else - | - 28 | #ifndef ts_malloc - 29 | #define ts_malloc malloc - 30 | #endif - 31 | #ifndef ts_calloc - 32 | #define ts_calloc calloc - 33 | #endif - 34 | #ifndef ts_realloc - 35 | #define ts_realloc realloc - 36 | #endif - 37 | #ifndef ts_free - 38 | #define ts_free free - 39 | #endif - | - 40 | #endif - | - 41 | #ifdef __cplusplus - 42 | } - 43 | #endif - | - 44 | #endif // TREE_SITTER_ALLOC_H_ - - - --------------------------------------------------------------------------------- -/crates/generate/src/templates/array.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ARRAY_H_ - 2 | #define TREE_SITTER_ARRAY_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./alloc.h" - | - 7 | #include - 8 | #include - 9 | #include - 10 | #include - 11 | #include - | - 12 | #ifdef _MSC_VER - 13 | #pragma warning(push) - 14 | #pragma warning(disable : 4101) - 15 | #elif defined(__GNUC__) || defined(__clang__) - 16 | #pragma GCC diagnostic push - 17 | #pragma GCC diagnostic ignored "-Wunused-variable" - 18 | #endif - | - 19 | #define Array(T) \ - 20 | struct { \ - 21 | T *contents; \ - 22 | uint32_t size; \ - 23 | uint32_t capacity; \ - 24 | } - | - 25 | /// Initialize an array. - 26 | #define array_init(self) \ - 27 | ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) - | - 28 | /// Create an empty array. - 29 | #define array_new() \ - 30 | { NULL, 0, 0 } - | - 31 | /// Get a pointer to the element at a given `index` in the array. - 32 | #define array_get(self, _index) \ - 33 | (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) - | - 34 | /// Get a pointer to the first element in the array. - 35 | #define array_front(self) array_get(self, 0) - | - 36 | /// Get a pointer to the last element in the array. - 37 | #define array_back(self) array_get(self, (self)->size - 1) - | - 38 | /// Clear the array, setting its size to zero. Note that this does not free any - 39 | /// memory allocated for the array's contents. - 40 | #define array_clear(self) ((self)->size = 0) - | - 41 | /// Reserve `new_capacity` elements of space in the array. If `new_capacity` is - 42 | /// less than the array's current capacity, this function has no effect. - 43 | #define array_reserve(self, new_capacity) \ - 44 | _array__reserve((Array *)(self), array_elem_size(self), new_capacity) - | - 45 | /// Free any memory allocated for this array. Note that this does not free any - 46 | /// memory allocated for the array's contents. - 47 | #define array_delete(self) _array__delete((Array *)(self)) - | - 48 | /// Push a new `element` onto the end of the array. - 49 | #define array_push(self, element) \ - 50 | (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - 51 | (self)->contents[(self)->size++] = (element)) - | - 52 | /// Increase the array's size by `count` elements. - 53 | /// New elements are zero-initialized. - 54 | #define array_grow_by(self, count) \ - 55 | do { \ - 56 | if ((count) == 0) break; \ - 57 | _array__grow((Array *)(self), count, array_elem_size(self)); \ - 58 | memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ - 59 | (self)->size += (count); \ - 60 | } while (0) - | - 61 | /// Append all elements from one array to the end of another. - 62 | #define array_push_all(self, other) \ - 63 | array_extend((self), (other)->size, (other)->contents) - | - 64 | /// Append `count` elements to the end of the array, reading their values from the - 65 | /// `contents` pointer. - 66 | #define array_extend(self, count, contents) \ - 67 | _array__splice( \ - 68 | (Array *)(self), array_elem_size(self), (self)->size, \ - 69 | 0, count, contents \ - 70 | ) - | - 71 | /// Remove `old_count` elements from the array starting at the given `index`. At - 72 | /// the same index, insert `new_count` new elements, reading their values from the - 73 | /// `new_contents` pointer. - 74 | #define array_splice(self, _index, old_count, new_count, new_contents) \ - 75 | _array__splice( \ - 76 | (Array *)(self), array_elem_size(self), _index, \ - 77 | old_count, new_count, new_contents \ - 78 | ) - | - 79 | /// Insert one `element` into the array at the given `index`. - 80 | #define array_insert(self, _index, element) \ - 81 | _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) - | - 82 | /// Remove one element from the array at the given `index`. - 83 | #define array_erase(self, _index) \ - 84 | _array__erase((Array *)(self), array_elem_size(self), _index) - | - 85 | /// Pop the last element off the array, returning the element by value. - 86 | #define array_pop(self) ((self)->contents[--(self)->size]) - | - 87 | /// Assign the contents of one array to another, reallocating if necessary. - 88 | #define array_assign(self, other) \ - 89 | _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) - | - 90 | /// Swap one array with another - 91 | #define array_swap(self, other) \ - 92 | _array__swap((Array *)(self), (Array *)(other)) - | - 93 | /// Get the size of the array contents - 94 | #define array_elem_size(self) (sizeof *(self)->contents) - | - 95 | /// Search a sorted array for a given `needle` value, using the given `compare` - 96 | /// callback to determine the order. - 97 | /// - 98 | /// If an existing element is found to be equal to `needle`, then the `index` - 99 | /// out-parameter is set to the existing value's index, and the `exists` - 100 | /// out-parameter is set to true. Otherwise, `index` is set to an index where - 101 | /// `needle` should be inserted in order to preserve the sorting, and `exists` - 102 | /// is set to false. - 103 | #define array_search_sorted_with(self, compare, needle, _index, _exists) \ - 104 | _array__search_sorted(self, 0, compare, , needle, _index, _exists) - | - 105 | /// Search a sorted array for a given `needle` value, using integer comparisons - 106 | /// of a given struct field (specified with a leading dot) to determine the order. - 107 | /// - 108 | /// See also `array_search_sorted_with`. - 109 | #define array_search_sorted_by(self, field, needle, _index, _exists) \ - 110 | _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) - | - 111 | /// Insert a given `value` into a sorted array, using the given `compare` - 112 | /// callback to determine the order. - 113 | #define array_insert_sorted_with(self, compare, value) \ - 114 | do { \ - 115 | unsigned _index, _exists; \ - 116 | array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - 117 | if (!_exists) array_insert(self, _index, value); \ - 118 | } while (0) - | - 119 | /// Insert a given `value` into a sorted array, using integer comparisons of - 120 | /// a given struct field (specified with a leading dot) to determine the order. - 121 | /// - 122 | /// See also `array_search_sorted_by`. - 123 | #define array_insert_sorted_by(self, field, value) \ - 124 | do { \ - 125 | unsigned _index, _exists; \ - 126 | array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ - 127 | if (!_exists) array_insert(self, _index, value); \ - 128 | } while (0) - | - 129 | // Private - | - 130 | typedef Array(void) Array; - | - 131 | /// This is not what you're looking for, see `array_delete`. - 132 | static inline void _array__delete(Array *self) { - 133 | if (self->contents) { - 134 | ts_free(self->contents); - 135 | self->contents = NULL; - 136 | self->size = 0; - 137 | self->capacity = 0; - 138 | } - 139 | } - | - 140 | /// This is not what you're looking for, see `array_erase`. - 141 | static inline void _array__erase(Array *self, size_t element_size, - 142 | uint32_t index) { - 143 | assert(index < self->size); - 144 | char *contents = (char *)self->contents; - 145 | memmove(contents + index * element_size, contents + (index + 1) * element_size, - 146 | (self->size - index - 1) * element_size); - 147 | self->size--; - 148 | } - | - 149 | /// This is not what you're looking for, see `array_reserve`. - 150 | static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { - 151 | if (new_capacity > self->capacity) { - 152 | if (self->contents) { - 153 | self->contents = ts_realloc(self->contents, new_capacity * element_size); - 154 | } else { - 155 | self->contents = ts_malloc(new_capacity * element_size); - 156 | } - 157 | self->capacity = new_capacity; - 158 | } - 159 | } - | - 160 | /// This is not what you're looking for, see `array_assign`. - 161 | static inline void _array__assign(Array *self, const Array *other, size_t element_size) { - 162 | _array__reserve(self, element_size, other->size); - 163 | self->size = other->size; - 164 | memcpy(self->contents, other->contents, self->size * element_size); - 165 | } - | - 166 | /// This is not what you're looking for, see `array_swap`. - 167 | static inline void _array__swap(Array *self, Array *other) { - 168 | Array swap = *other; - 169 | *other = *self; - 170 | *self = swap; - 171 | } - | - 172 | /// This is not what you're looking for, see `array_push` or `array_grow_by`. - 173 | static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { - 174 | uint32_t new_size = self->size + count; - 175 | if (new_size > self->capacity) { - 176 | uint32_t new_capacity = self->capacity * 2; - 177 | if (new_capacity < 8) new_capacity = 8; - 178 | if (new_capacity < new_size) new_capacity = new_size; - 179 | _array__reserve(self, element_size, new_capacity); - 180 | } - 181 | } - | - 182 | /// This is not what you're looking for, see `array_splice`. - 183 | static inline void _array__splice(Array *self, size_t element_size, - 184 | uint32_t index, uint32_t old_count, - 185 | uint32_t new_count, const void *elements) { - 186 | uint32_t new_size = self->size + new_count - old_count; - 187 | uint32_t old_end = index + old_count; - 188 | uint32_t new_end = index + new_count; - 189 | assert(old_end <= self->size); - | - 190 | _array__reserve(self, element_size, new_size); - | - 191 | char *contents = (char *)self->contents; - 192 | if (self->size > old_end) { - 193 | memmove( - 194 | contents + new_end * element_size, - 195 | contents + old_end * element_size, - 196 | (self->size - old_end) * element_size - 197 | ); - 198 | } - 199 | if (new_count > 0) { - 200 | if (elements) { - 201 | memcpy( - 202 | (contents + index * element_size), - 203 | elements, - 204 | new_count * element_size - 205 | ); - 206 | } else { - 207 | memset( - 208 | (contents + index * element_size), - 209 | 0, - 210 | new_count * element_size - 211 | ); - 212 | } - 213 | } - 214 | self->size += new_count - old_count; - 215 | } - | - 216 | /// A binary search routine, based on Rust's `std::slice::binary_search_by`. - 217 | /// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. - 218 | #define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ - 219 | do { \ - 220 | *(_index) = start; \ - 221 | *(_exists) = false; \ - 222 | uint32_t size = (self)->size - *(_index); \ - 223 | if (size == 0) break; \ - 224 | int comparison; \ - 225 | while (size > 1) { \ - 226 | uint32_t half_size = size / 2; \ - 227 | uint32_t mid_index = *(_index) + half_size; \ - 228 | comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - 229 | if (comparison <= 0) *(_index) = mid_index; \ - 230 | size -= half_size; \ - 231 | } \ - 232 | comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ - 233 | if (comparison == 0) *(_exists) = true; \ - 234 | else if (comparison < 0) *(_index) += 1; \ - 235 | } while (0) - | - 236 | /// Helper macro for the `_sorted_by` routines below. This takes the left (existing) - 237 | /// parameter by reference in order to work with the generic sorting function above. - 238 | #define _compare_int(a, b) ((int)*(a) - (int)(b)) - | - 239 | #ifdef _MSC_VER - 240 | #pragma warning(pop) - 241 | #elif defined(__GNUC__) || defined(__clang__) - 242 | #pragma GCC diagnostic pop - 243 | #endif - | - 244 | #ifdef __cplusplus - 245 | } - 246 | #endif - | - 247 | #endif // TREE_SITTER_ARRAY_H_ - - - --------------------------------------------------------------------------------- -/crates/highlight/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-highlight" - 3 | version.workspace = true - 4 | description = "Library for performing syntax highlighting with Tree-sitter" - 5 | authors = [ - 6 | "Max Brunsfeld ", - 7 | "Tim Clem ", - 8 | ] - 9 | edition.workspace = true - 10 | rust-version.workspace = true - 11 | readme = "README.md" - 12 | homepage.workspace = true - 13 | repository.workspace = true - 14 | documentation = "https://docs.rs/tree-sitter-highlight" - 15 | license.workspace = true - 16 | keywords = ["incremental", "parsing", "syntax", "highlighting"] - 17 | categories = ["parsing", "text-editors"] - | - 18 | [lints] - 19 | workspace = true - | - 20 | [lib] - 21 | path = "src/highlight.rs" - 22 | crate-type = ["lib", "staticlib"] - | - 23 | [dependencies] - 24 | regex.workspace = true - 25 | thiserror.workspace = true - 26 | streaming-iterator.workspace = true - | - 27 | tree-sitter.workspace = true - - - --------------------------------------------------------------------------------- -/crates/highlight/include/tree_sitter/highlight.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_HIGHLIGHT_H_ - 2 | #define TREE_SITTER_HIGHLIGHT_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - | - 7 | typedef enum { - 8 | TSHighlightOk, - 9 | TSHighlightUnknownScope, - 10 | TSHighlightTimeout, - 11 | TSHighlightInvalidLanguage, - 12 | TSHighlightInvalidUtf8, - 13 | TSHighlightInvalidRegex, - 14 | TSHighlightInvalidQuery, - 15 | } TSHighlightError; - | - 16 | typedef struct TSHighlighter TSHighlighter; - 17 | typedef struct TSHighlightBuffer TSHighlightBuffer; - | - 18 | // Construct a `TSHighlighter` by providing a list of strings containing - 19 | // the HTML attributes that should be applied for each highlight value. - 20 | TSHighlighter *ts_highlighter_new( - 21 | const char **highlight_names, - 22 | const char **attribute_strings, - 23 | uint32_t highlight_count - 24 | ); - | - 25 | // Delete a syntax highlighter. - 26 | void ts_highlighter_delete(TSHighlighter *); - | - 27 | // Add a `TSLanguage` to a highlighter. The language is associated with a - 28 | // scope name, which can be used later to select a language for syntax - 29 | // highlighting. Along with the language, you must provide a JSON string - 30 | // containing the compiled PropertySheet to use for syntax highlighting - 31 | // with that language. You can also optionally provide an 'injection regex', - 32 | // which is used to detect when this language has been embedded in a document - 33 | // written in a different language. - 34 | TSHighlightError ts_highlighter_add_language( - 35 | TSHighlighter *self, - 36 | const char *language_name, - 37 | const char *scope_name, - 38 | const char *injection_regex, - 39 | const TSLanguage *language, - 40 | const char *highlight_query, - 41 | const char *injection_query, - 42 | const char *locals_query, - 43 | uint32_t highlight_query_len, - 44 | uint32_t injection_query_len, - 45 | uint32_t locals_query_len - 46 | ); - | - 47 | // Compute syntax highlighting for a given document. You must first - 48 | // create a `TSHighlightBuffer` to hold the output. - 49 | TSHighlightError ts_highlighter_highlight( - 50 | const TSHighlighter *self, - 51 | const char *scope_name, - 52 | const char *source_code, - 53 | uint32_t source_code_len, - 54 | TSHighlightBuffer *output, - 55 | const size_t *cancellation_flag - 56 | ); - | - 57 | // TSHighlightBuffer: This struct stores the HTML output of syntax - 58 | // highlighting. It can be reused for multiple highlighting calls. - 59 | TSHighlightBuffer *ts_highlight_buffer_new(); - | - 60 | // Delete a highlight buffer. - 61 | void ts_highlight_buffer_delete(TSHighlightBuffer *); - | - 62 | // Access the HTML content of a highlight buffer. - 63 | const uint8_t *ts_highlight_buffer_content(const TSHighlightBuffer *); - 64 | const uint32_t *ts_highlight_buffer_line_offsets(const TSHighlightBuffer *); - 65 | uint32_t ts_highlight_buffer_len(const TSHighlightBuffer *); - 66 | uint32_t ts_highlight_buffer_line_count(const TSHighlightBuffer *); - | - 67 | #ifdef __cplusplus - 68 | } - 69 | #endif - | - 70 | #endif // TREE_SITTER_HIGHLIGHT_H_ - - - --------------------------------------------------------------------------------- -/crates/highlight/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter Highlight - | - 2 | [![crates.io badge]][crates.io] - | - 3 | [crates.io]: https://crates.io/crates/tree-sitter-highlight - 4 | [crates.io badge]: https://img.shields.io/crates/v/tree-sitter-highlight.svg?color=%23B48723 - | - 5 | ## Usage - | - 6 | Add this crate, and the language-specific crates for whichever languages you want - 7 | to parse, to your `Cargo.toml`: - | - 8 | ```toml - 9 | [dependencies] - 10 | tree-sitter-highlight = "0.25.4" - 11 | tree-sitter-javascript = "0.23.1" - 12 | ``` - | - 13 | Define the list of highlight names that you will recognize: - | - 14 | ```rust - 15 | let highlight_names = [ - 16 | "attribute", - 17 | "comment", - 18 | "constant", - 19 | "constant.builtin", - 20 | "constructor", - 21 | "embedded", - 22 | "function", - 23 | "function.builtin", - 24 | "keyword", - 25 | "module", - 26 | "number", - 27 | "operator", - 28 | "property", - 29 | "property.builtin", - 30 | "punctuation", - 31 | "punctuation.bracket", - 32 | "punctuation.delimiter", - 33 | "punctuation.special", - 34 | "string", - 35 | "string.special", - 36 | "tag", - 37 | "type", - 38 | "type.builtin", - 39 | "variable", - 40 | "variable.builtin", - 41 | "variable.parameter", - 42 | ]; - 43 | ``` - | - 44 | Create a highlighter. You need one of these for each thread that you're using for - 45 | syntax highlighting: - | - 46 | ```rust - 47 | use tree_sitter_highlight::Highlighter; - | - 48 | let mut highlighter = Highlighter::new(); - 49 | ``` - | - 50 | Load some highlighting queries from the `queries` directory of the language repository: - | - 51 | ```rust - 52 | use tree_sitter_highlight::HighlightConfiguration; - | - 53 | let javascript_language = tree_sitter_javascript::LANGUAGE.into(); - | - 54 | let mut javascript_config = HighlightConfiguration::new( - 55 | javascript_language, - 56 | "javascript", - 57 | tree_sitter_javascript::HIGHLIGHT_QUERY, - 58 | tree_sitter_javascript::INJECTIONS_QUERY, - 59 | tree_sitter_javascript::LOCALS_QUERY, - 60 | ).unwrap(); - 61 | ``` - | - 62 | Configure the recognized names: - | - 63 | ```rust - 64 | javascript_config.configure(&highlight_names); - 65 | ``` - | - 66 | Highlight some code: - | - 67 | ```rust - 68 | use tree_sitter_highlight::HighlightEvent; - | - 69 | let highlights = highlighter.highlight( - 70 | &javascript_config, - 71 | b"const x = new Y();", - 72 | None, - 73 | |_| None - 74 | ).unwrap(); - | - 75 | for event in highlights { - 76 | match event.unwrap() { - 77 | HighlightEvent::Source {start, end} => { - 78 | eprintln!("source: {start}-{end}"); - 79 | }, - 80 | HighlightEvent::HighlightStart(s) => { - 81 | eprintln!("highlight style started: {s:?}"); - 82 | }, - 83 | HighlightEvent::HighlightEnd => { - 84 | eprintln!("highlight style ended"); - 85 | }, - 86 | } - 87 | } - 88 | ``` - | - 89 | The last parameter to `highlight` is a _language injection_ callback. This allows - 90 | other languages to be retrieved when Tree-sitter detects an embedded document - 91 | (for example, a piece of JavaScript code inside a `script` tag within HTML). - - - --------------------------------------------------------------------------------- -/crates/highlight/src/c_lib.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str, - 3 | sync::atomic::AtomicUsize, - 4 | }; - | - 5 | use regex::Regex; - 6 | use tree_sitter::Language; - | - 7 | use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer}; - | - 8 | pub struct TSHighlighter { - 9 | pub languages: HashMap, HighlightConfiguration)>, - 10 | pub attribute_strings: Vec<&'static [u8]>, - 11 | pub highlight_names: Vec, - 12 | pub carriage_return_index: Option, - 13 | } - | - 14 | pub struct TSHighlightBuffer { - 15 | highlighter: Highlighter, - 16 | renderer: HtmlRenderer, - 17 | } - | - 18 | #[repr(C)] - 19 | pub enum ErrorCode { - 20 | Ok, - 21 | UnknownScope, - 22 | Timeout, - 23 | InvalidLanguage, - 24 | InvalidUtf8, - 25 | InvalidRegex, - 26 | InvalidQuery, - 27 | InvalidLanguageName, - 28 | } - | - 29 | /// Create a new [`TSHighlighter`] instance. - 30 | /// - 31 | /// # Safety - 32 | /// - 33 | /// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for - 34 | /// the lifetime of the returned [`TSHighlighter`] instance, and are non-null. - 35 | #[no_mangle] - 36 | pub unsafe extern "C" fn ts_highlighter_new( - 37 | highlight_names: *const *const c_char, - 38 | attribute_strings: *const *const c_char, - 39 | highlight_count: u32, - 40 | ) -> *mut TSHighlighter { - 41 | let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize); - 42 | let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize); - 43 | let highlight_names = highlight_names - 44 | .iter() - 45 | .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string()) - 46 | .collect::>(); - 47 | let attribute_strings = attribute_strings - 48 | .iter() - 49 | .map(|s| CStr::from_ptr(*s).to_bytes()) - 50 | .collect(); - 51 | let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return"); - 52 | Box::into_raw(Box::new(TSHighlighter { - 53 | languages: HashMap::new(), - 54 | attribute_strings, - 55 | highlight_names, - 56 | carriage_return_index, - 57 | })) - 58 | } - | - 59 | /// Add a language to a [`TSHighlighter`] instance. - 60 | /// - 61 | /// Returns an [`ErrorCode`] indicating whether the language was added successfully or not. - 62 | /// - 63 | /// # Safety - 64 | /// - 65 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance - 66 | /// created by [`ts_highlighter_new`]. - 67 | /// - 68 | /// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the - 69 | /// lifetime of the [`TSHighlighter`] instance, and are non-null. - 70 | #[no_mangle] - 71 | pub unsafe extern "C" fn ts_highlighter_add_language( - 72 | this: *mut TSHighlighter, - 73 | language_name: *const c_char, - 74 | scope_name: *const c_char, - 75 | injection_regex: *const c_char, - 76 | language: Language, - 77 | highlight_query: *const c_char, - 78 | injection_query: *const c_char, - 79 | locals_query: *const c_char, - 80 | highlight_query_len: u32, - 81 | injection_query_len: u32, - 82 | locals_query_len: u32, - 83 | ) -> ErrorCode { - 84 | let f = move || { - 85 | let this = unwrap_mut_ptr(this); - 86 | let scope_name = CStr::from_ptr(scope_name); - 87 | let scope_name = scope_name - 88 | .to_str() - 89 | .or(Err(ErrorCode::InvalidUtf8))? - 90 | .to_string(); - 91 | let injection_regex = if injection_regex.is_null() { - 92 | None - 93 | } else { - 94 | let pattern = CStr::from_ptr(injection_regex); - 95 | let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?; - 96 | Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?) - 97 | }; - | - 98 | let highlight_query = - 99 | slice::from_raw_parts(highlight_query.cast::(), highlight_query_len as usize); - | - 100 | let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?; - | - 101 | let injection_query = if injection_query_len > 0 { - 102 | let query = - 103 | slice::from_raw_parts(injection_query.cast::(), injection_query_len as usize); - 104 | str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? - 105 | } else { - 106 | "" - 107 | }; - | - 108 | let locals_query = if locals_query_len > 0 { - 109 | let query = slice::from_raw_parts(locals_query.cast::(), locals_query_len as usize); - 110 | str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? - 111 | } else { - 112 | "" - 113 | }; - | - 114 | let lang = CStr::from_ptr(language_name) - 115 | .to_str() - 116 | .or(Err(ErrorCode::InvalidLanguageName))?; - | - 117 | let mut config = HighlightConfiguration::new( - 118 | language, - 119 | lang, - 120 | highlight_query, - 121 | injection_query, - 122 | locals_query, - 123 | ) - 124 | .or(Err(ErrorCode::InvalidQuery))?; - 125 | config.configure(this.highlight_names.as_slice()); - 126 | this.languages.insert(scope_name, (injection_regex, config)); - | - 127 | Ok(()) - 128 | }; - | - 129 | match f() { - 130 | Ok(()) => ErrorCode::Ok, - 131 | Err(e) => e, - 132 | } - 133 | } - | - 134 | #[no_mangle] - 135 | pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { - 136 | Box::into_raw(Box::new(TSHighlightBuffer { - 137 | highlighter: Highlighter::new(), - 138 | renderer: HtmlRenderer::new(), - 139 | })) - 140 | } - | - 141 | /// Deletes a [`TSHighlighter`] instance. - 142 | /// - 143 | /// # Safety - 144 | /// - 145 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance - 146 | /// created by [`ts_highlighter_new`]. - 147 | /// - 148 | /// It cannot be used after this function is called. - 149 | #[no_mangle] - 150 | pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { - 151 | drop(Box::from_raw(this)); - 152 | } - | - 153 | /// Deletes a [`TSHighlightBuffer`] instance. - 154 | /// - 155 | /// # Safety - 156 | /// - 157 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance - 158 | /// created by [`ts_highlight_buffer_new`] - 159 | /// - 160 | /// It cannot be used after this function is called. - 161 | #[no_mangle] - 162 | pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { - 163 | drop(Box::from_raw(this)); - 164 | } - | - 165 | /// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer. - 166 | /// - 167 | /// # Safety - 168 | /// - 169 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance - 170 | /// created by [`ts_highlight_buffer_new`]. - 171 | /// - 172 | /// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance, - 173 | /// else the data will point to garbage. - 174 | /// - 175 | /// To get the length of the HTML content, use [`ts_highlight_buffer_len`]. - 176 | #[no_mangle] - 177 | pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { - 178 | let this = unwrap_ptr(this); - 179 | this.renderer.html.as_slice().as_ptr() - 180 | } - | - 181 | /// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array. - 182 | /// - 183 | /// # Safety - 184 | /// - 185 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance - 186 | /// created by [`ts_highlight_buffer_new`]. - 187 | /// - 188 | /// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`] - 189 | /// instance, else the data will point to garbage. - 190 | /// - 191 | /// To get the length of the array, use [`ts_highlight_buffer_line_count`]. - 192 | #[no_mangle] - 193 | pub unsafe extern "C" fn ts_highlight_buffer_line_offsets( - 194 | this: *const TSHighlightBuffer, - 195 | ) -> *const u32 { - 196 | let this = unwrap_ptr(this); - 197 | this.renderer.line_offsets.as_slice().as_ptr() - 198 | } - | - 199 | /// Get the length of the HTML content of a [`TSHighlightBuffer`] instance. - 200 | /// - 201 | /// # Safety - 202 | /// - 203 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance - 204 | /// created by [`ts_highlight_buffer_new`]. - 205 | #[no_mangle] - 206 | pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { - 207 | let this = unwrap_ptr(this); - 208 | this.renderer.html.len() as u32 - 209 | } - | - 210 | /// Get the number of lines in a [`TSHighlightBuffer`] instance. - 211 | /// - 212 | /// # Safety - 213 | /// - 214 | /// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance - 215 | /// created by [`ts_highlight_buffer_new`]. - 216 | #[no_mangle] - 217 | pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { - 218 | let this = unwrap_ptr(this); - 219 | this.renderer.line_offsets.len() as u32 - 220 | } - | - 221 | /// Highlight a string of source code. - 222 | /// - 223 | /// # Safety - 224 | /// - 225 | /// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are - 226 | /// valid for the lifetime of the [`TSHighlighter`] instance, and are non-null. - 227 | /// - 228 | /// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by - 229 | /// [`ts_highlighter_new`] - 230 | #[no_mangle] - 231 | pub unsafe extern "C" fn ts_highlighter_highlight( - 232 | this: *const TSHighlighter, - 233 | scope_name: *const c_char, - 234 | source_code: *const c_char, - 235 | source_code_len: u32, - 236 | output: *mut TSHighlightBuffer, - 237 | cancellation_flag: *const AtomicUsize, - 238 | ) -> ErrorCode { - 239 | let this = unwrap_ptr(this); - 240 | let output = unwrap_mut_ptr(output); - 241 | let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); - 242 | let source_code = slice::from_raw_parts(source_code.cast::(), source_code_len as usize); - 243 | let cancellation_flag = cancellation_flag.as_ref(); - 244 | this.highlight(source_code, scope_name, output, cancellation_flag) - 245 | } - | - 246 | impl TSHighlighter { - 247 | fn highlight( - 248 | &self, - 249 | source_code: &[u8], - 250 | scope_name: &str, - 251 | output: &mut TSHighlightBuffer, - 252 | cancellation_flag: Option<&AtomicUsize>, - 253 | ) -> ErrorCode { - 254 | let entry = self.languages.get(scope_name); - 255 | if entry.is_none() { - 256 | return ErrorCode::UnknownScope; - 257 | } - 258 | let (_, configuration) = entry.unwrap(); - 259 | let languages = &self.languages; - | - 260 | let highlights = output.highlighter.highlight( - 261 | configuration, - 262 | source_code, - 263 | cancellation_flag, - 264 | move |injection_string| { - 265 | languages.values().find_map(|(injection_regex, config)| { - 266 | injection_regex.as_ref().and_then(|regex| { - 267 | if regex.is_match(injection_string) { - 268 | Some(config) - 269 | } else { - 270 | None - 271 | } - 272 | }) - 273 | }) - 274 | }, - 275 | ); - | - 276 | if let Ok(highlights) = highlights { - 277 | output.renderer.reset(); - 278 | output - 279 | .renderer - 280 | .set_carriage_return_highlight(self.carriage_return_index.map(Highlight)); - 281 | let result = output.renderer.render(highlights, source_code, &|s, out| { - 282 | out.extend(self.attribute_strings[s.0]); - 283 | }); - 284 | match result { - 285 | Err(Error::Cancelled | Error::Unknown) => ErrorCode::Timeout, - 286 | Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage, - 287 | Ok(()) => ErrorCode::Ok, - 288 | } - 289 | } else { - 290 | ErrorCode::Timeout - 291 | } - 292 | } - 293 | } - | - 294 | unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - 295 | result.as_ref().unwrap_or_else(|| { - 296 | eprintln!("{}:{} - pointer must not be null", file!(), line!()); - 297 | abort(); - 298 | }) - 299 | } - | - 300 | unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - 301 | result.as_mut().unwrap_or_else(|| { - 302 | eprintln!("{}:{} - pointer must not be null", file!(), line!()); - 303 | abort(); - 304 | }) - 305 | } - | - 306 | fn unwrap(result: Result) -> T { - 307 | result.unwrap_or_else(|error| { - 308 | eprintln!("tree-sitter highlight error: {error}"); - 309 | abort(); - 310 | }) - 311 | } - - - --------------------------------------------------------------------------------- -/crates/highlight/src/highlight.rs: --------------------------------------------------------------------------------- - 1 | #![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))] - | - 2 | pub mod c_lib; - 3 | use core::slice; - 4 | use std::{ - 5 | collections::HashSet, - 6 | iter, - 7 | marker::PhantomData, - 8 | mem::{self, MaybeUninit}, - 9 | ops::{self, ControlFlow}, - 10 | str, - 11 | sync::{ - 12 | atomic::{AtomicUsize, Ordering}, - 13 | LazyLock, - 14 | }, - 15 | }; - | - 16 | pub use c_lib as c; - 17 | use streaming_iterator::StreamingIterator; - 18 | use thiserror::Error; - 19 | use tree_sitter::{ - 20 | ffi, Language, LossyUtf8, Node, ParseOptions, Parser, Point, Query, QueryCapture, - 21 | QueryCaptures, QueryCursor, QueryError, QueryMatch, Range, TextProvider, Tree, - 22 | }; - | - 23 | const CANCELLATION_CHECK_INTERVAL: usize = 100; - 24 | const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; - 25 | const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; - | - 26 | static STANDARD_CAPTURE_NAMES: LazyLock> = LazyLock::new(|| { - 27 | vec![ - 28 | "attribute", - 29 | "boolean", - 30 | "carriage-return", - 31 | "comment", - 32 | "comment.documentation", - 33 | "constant", - 34 | "constant.builtin", - 35 | "constructor", - 36 | "constructor.builtin", - 37 | "embedded", - 38 | "error", - 39 | "escape", - 40 | "function", - 41 | "function.builtin", - 42 | "keyword", - 43 | "markup", - 44 | "markup.bold", - 45 | "markup.heading", - 46 | "markup.italic", - 47 | "markup.link", - 48 | "markup.link.url", - 49 | "markup.list", - 50 | "markup.list.checked", - 51 | "markup.list.numbered", - 52 | "markup.list.unchecked", - 53 | "markup.list.unnumbered", - 54 | "markup.quote", - 55 | "markup.raw", - 56 | "markup.raw.block", - 57 | "markup.raw.inline", - 58 | "markup.strikethrough", - 59 | "module", - 60 | "number", - 61 | "operator", - 62 | "property", - 63 | "property.builtin", - 64 | "punctuation", - 65 | "punctuation.bracket", - 66 | "punctuation.delimiter", - 67 | "punctuation.special", - 68 | "string", - 69 | "string.escape", - 70 | "string.regexp", - 71 | "string.special", - 72 | "string.special.symbol", - 73 | "tag", - 74 | "type", - 75 | "type.builtin", - 76 | "variable", - 77 | "variable.builtin", - 78 | "variable.member", - 79 | "variable.parameter", - 80 | ] - 81 | .into_iter() - 82 | .collect() - 83 | }); - | - 84 | /// Indicates which highlight should be applied to a region of source code. - 85 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] - 86 | pub struct Highlight(pub usize); - | - 87 | /// Represents the reason why syntax highlighting failed. - 88 | #[derive(Debug, Error, PartialEq, Eq)] - 89 | pub enum Error { - 90 | #[error("Cancelled")] - 91 | Cancelled, - 92 | #[error("Invalid language")] - 93 | InvalidLanguage, - 94 | #[error("Unknown error")] - 95 | Unknown, - 96 | } - | - 97 | /// Represents a single step in rendering a syntax-highlighted document. - 98 | #[derive(Copy, Clone, Debug)] - 99 | pub enum HighlightEvent { - 100 | Source { start: usize, end: usize }, - 101 | HighlightStart(Highlight), - 102 | HighlightEnd, - 103 | } - | - 104 | /// Contains the data needed to highlight code written in a particular language. - 105 | /// - 106 | /// This struct is immutable and can be shared between threads. - 107 | pub struct HighlightConfiguration { - 108 | pub language: Language, - 109 | pub language_name: String, - 110 | pub query: Query, - 111 | combined_injections_query: Option, - 112 | locals_pattern_index: usize, - 113 | highlights_pattern_index: usize, - 114 | highlight_indices: Vec>, - 115 | non_local_variable_patterns: Vec, - 116 | injection_content_capture_index: Option, - 117 | injection_language_capture_index: Option, - 118 | local_scope_capture_index: Option, - 119 | local_def_capture_index: Option, - 120 | local_def_value_capture_index: Option, - 121 | local_ref_capture_index: Option, - 122 | } - | - 123 | /// Performs syntax highlighting, recognizing a given list of highlight names. - 124 | /// - 125 | /// For the best performance `Highlighter` values should be reused between - 126 | /// syntax highlighting calls. A separate highlighter is needed for each thread that - 127 | /// is performing highlighting. - 128 | pub struct Highlighter { - 129 | pub parser: Parser, - 130 | cursors: Vec, - 131 | } - | - 132 | /// Converts a general-purpose syntax highlighting iterator into a sequence of lines of HTML. - 133 | pub struct HtmlRenderer { - 134 | pub html: Vec, - 135 | pub line_offsets: Vec, - 136 | carriage_return_highlight: Option, - 137 | // The offset in `self.html` of the last carriage return. - 138 | last_carriage_return: Option, - 139 | } - | - 140 | #[derive(Debug)] - 141 | struct LocalDef<'a> { - 142 | name: &'a str, - 143 | value_range: ops::Range, - 144 | highlight: Option, - 145 | } - | - 146 | #[derive(Debug)] - 147 | struct LocalScope<'a> { - 148 | inherits: bool, - 149 | range: ops::Range, - 150 | local_defs: Vec>, - 151 | } - | - 152 | struct HighlightIter<'a, F> - 153 | where - 154 | F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, - 155 | { - 156 | source: &'a [u8], - 157 | language_name: &'a str, - 158 | byte_offset: usize, - 159 | highlighter: &'a mut Highlighter, - 160 | injection_callback: F, - 161 | cancellation_flag: Option<&'a AtomicUsize>, - 162 | layers: Vec>, - 163 | iter_count: usize, - 164 | next_event: Option, - 165 | last_highlight_range: Option<(usize, usize, usize)>, - 166 | } - | - 167 | struct HighlightIterLayer<'a> { - 168 | _tree: Tree, - 169 | cursor: QueryCursor, - 170 | captures: iter::Peekable<_QueryCaptures<'a, 'a, &'a [u8], &'a [u8]>>, - 171 | config: &'a HighlightConfiguration, - 172 | highlight_end_stack: Vec, - 173 | scope_stack: Vec>, - 174 | ranges: Vec, - 175 | depth: usize, - 176 | } - | - 177 | pub struct _QueryCaptures<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> { - 178 | ptr: *mut ffi::TSQueryCursor, - 179 | query: &'query Query, - 180 | text_provider: T, - 181 | buffer1: Vec, - 182 | buffer2: Vec, - 183 | _current_match: Option<(QueryMatch<'query, 'tree>, usize)>, - 184 | _options: Option<*mut ffi::TSQueryCursorOptions>, - 185 | _phantom: PhantomData<(&'tree (), I)>, - 186 | } - | - 187 | struct _QueryMatch<'cursor, 'tree> { - 188 | pub _pattern_index: usize, - 189 | pub _captures: &'cursor [QueryCapture<'tree>], - 190 | _id: u32, - 191 | _cursor: *mut ffi::TSQueryCursor, - 192 | } - | - 193 | impl<'tree> _QueryMatch<'_, 'tree> { - 194 | fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { - 195 | _QueryMatch { - 196 | _cursor: cursor, - 197 | _id: m.id, - 198 | _pattern_index: m.pattern_index as usize, - 199 | _captures: (m.capture_count > 0) - 200 | .then(|| unsafe { - 201 | slice::from_raw_parts( - 202 | m.captures.cast::>(), - 203 | m.capture_count as usize, - 204 | ) - 205 | }) - 206 | .unwrap_or_default(), - 207 | } - 208 | } - 209 | } - | - 210 | impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> Iterator - 211 | for _QueryCaptures<'query, 'tree, T, I> - 212 | { - 213 | type Item = (QueryMatch<'query, 'tree>, usize); - | - 214 | fn next(&mut self) -> Option { - 215 | unsafe { - 216 | loop { - 217 | let mut capture_index = 0u32; - 218 | let mut m = MaybeUninit::::uninit(); - 219 | if ffi::ts_query_cursor_next_capture( - 220 | self.ptr, - 221 | m.as_mut_ptr(), - 222 | core::ptr::addr_of_mut!(capture_index), - 223 | ) { - 224 | let result = std::mem::transmute::<_QueryMatch, QueryMatch>(_QueryMatch::new( - 225 | &m.assume_init(), - 226 | self.ptr, - 227 | )); - 228 | if result.satisfies_text_predicates( - 229 | self.query, - 230 | &mut self.buffer1, - 231 | &mut self.buffer2, - 232 | &mut self.text_provider, - 233 | ) { - 234 | return Some((result, capture_index as usize)); - 235 | } - 236 | result.remove(); - 237 | } else { - 238 | return None; - 239 | } - 240 | } - 241 | } - 242 | } - 243 | } - | - 244 | impl Default for Highlighter { - 245 | fn default() -> Self { - 246 | Self::new() - 247 | } - 248 | } - | - 249 | impl Highlighter { - 250 | #[must_use] - 251 | pub fn new() -> Self { - 252 | Self { - 253 | parser: Parser::new(), - 254 | cursors: Vec::new(), - 255 | } - 256 | } - | - 257 | pub const fn parser(&mut self) -> &mut Parser { - 258 | &mut self.parser - 259 | } - | - 260 | /// Iterate over the highlighted regions for a given slice of source code. - 261 | pub fn highlight<'a>( - 262 | &'a mut self, - 263 | config: &'a HighlightConfiguration, - 264 | source: &'a [u8], - 265 | cancellation_flag: Option<&'a AtomicUsize>, - 266 | mut injection_callback: impl FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, - 267 | ) -> Result> + 'a, Error> { - 268 | let layers = HighlightIterLayer::new( - 269 | source, - 270 | None, - 271 | self, - 272 | cancellation_flag, - 273 | &mut injection_callback, - 274 | config, - 275 | 0, - 276 | vec![Range { - 277 | start_byte: 0, - 278 | end_byte: usize::MAX, - 279 | start_point: Point::new(0, 0), - 280 | end_point: Point::new(usize::MAX, usize::MAX), - 281 | }], - 282 | )?; - 283 | assert_ne!(layers.len(), 0); - 284 | let mut result = HighlightIter { - 285 | source, - 286 | language_name: &config.language_name, - 287 | byte_offset: 0, - 288 | injection_callback, - 289 | cancellation_flag, - 290 | highlighter: self, - 291 | iter_count: 0, - 292 | layers, - 293 | next_event: None, - 294 | last_highlight_range: None, - 295 | }; - 296 | result.sort_layers(); - 297 | Ok(result) - 298 | } - 299 | } - | - 300 | impl HighlightConfiguration { - 301 | /// Creates a `HighlightConfiguration` for a given `Language` and set of highlighting - 302 | /// queries. - 303 | /// - 304 | /// # Parameters - 305 | /// - 306 | /// * `language` - The Tree-sitter `Language` that should be used for parsing. - 307 | /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This - 308 | /// should be non-empty, otherwise no syntax highlights will be added. - 309 | /// * `injections_query` - A string containing tree patterns for injecting other languages into - 310 | /// the document. This can be empty if no injections are desired. - 311 | /// * `locals_query` - A string containing tree patterns for tracking local variable definitions - 312 | /// and references. This can be empty if local variable tracking is not needed. - 313 | /// - 314 | /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. - 315 | pub fn new( - 316 | language: Language, - 317 | name: impl Into, - 318 | highlights_query: &str, - 319 | injection_query: &str, - 320 | locals_query: &str, - 321 | ) -> Result { - 322 | // Concatenate the query strings, keeping track of the start offset of each section. - 323 | let mut query_source = String::new(); - 324 | query_source.push_str(injection_query); - 325 | let locals_query_offset = query_source.len(); - 326 | query_source.push_str(locals_query); - 327 | let highlights_query_offset = query_source.len(); - 328 | query_source.push_str(highlights_query); - | - 329 | // Construct a single query by concatenating the three query strings, but record the - 330 | // range of pattern indices that belong to each individual string. - 331 | let mut query = Query::new(&language, &query_source)?; - 332 | let mut locals_pattern_index = 0; - 333 | let mut highlights_pattern_index = 0; - 334 | for i in 0..(query.pattern_count()) { - 335 | let pattern_offset = query.start_byte_for_pattern(i); - 336 | if pattern_offset < highlights_query_offset { - 337 | if pattern_offset < highlights_query_offset { - 338 | highlights_pattern_index += 1; - 339 | } - 340 | if pattern_offset < locals_query_offset { - 341 | locals_pattern_index += 1; - 342 | } - 343 | } - 344 | } - | - 345 | // Construct a separate query just for dealing with the 'combined injections'. - 346 | // Disable the combined injection patterns in the main query. - 347 | let mut combined_injections_query = Query::new(&language, injection_query)?; - 348 | let mut has_combined_queries = false; - 349 | for pattern_index in 0..locals_pattern_index { - 350 | let settings = query.property_settings(pattern_index); - 351 | if settings.iter().any(|s| &*s.key == "injection.combined") { - 352 | has_combined_queries = true; - 353 | query.disable_pattern(pattern_index); - 354 | } else { - 355 | combined_injections_query.disable_pattern(pattern_index); - 356 | } - 357 | } - 358 | let combined_injections_query = if has_combined_queries { - 359 | Some(combined_injections_query) - 360 | } else { - 361 | None - 362 | }; - | - 363 | // Find all of the highlighting patterns that are disabled for nodes that - 364 | // have been identified as local variables. - 365 | let non_local_variable_patterns = (0..query.pattern_count()) - 366 | .map(|i| { - 367 | query - 368 | .property_predicates(i) - 369 | .iter() - 370 | .any(|(prop, positive)| !*positive && prop.key.as_ref() == "local") - 371 | }) - 372 | .collect(); - | - 373 | // Store the numeric ids for all of the special captures. - 374 | let mut injection_content_capture_index = None; - 375 | let mut injection_language_capture_index = None; - 376 | let mut local_def_capture_index = None; - 377 | let mut local_def_value_capture_index = None; - 378 | let mut local_ref_capture_index = None; - 379 | let mut local_scope_capture_index = None; - 380 | for (i, name) in query.capture_names().iter().enumerate() { - 381 | let i = Some(i as u32); - 382 | match *name { - 383 | "injection.content" => injection_content_capture_index = i, - 384 | "injection.language" => injection_language_capture_index = i, - 385 | "local.definition" => local_def_capture_index = i, - 386 | "local.definition-value" => local_def_value_capture_index = i, - 387 | "local.reference" => local_ref_capture_index = i, - 388 | "local.scope" => local_scope_capture_index = i, - 389 | _ => {} - 390 | } - 391 | } - | - 392 | let highlight_indices = vec![None; query.capture_names().len()]; - 393 | Ok(Self { - 394 | language, - 395 | language_name: name.into(), - 396 | query, - 397 | combined_injections_query, - 398 | locals_pattern_index, - 399 | highlights_pattern_index, - 400 | highlight_indices, - 401 | non_local_variable_patterns, - 402 | injection_content_capture_index, - 403 | injection_language_capture_index, - 404 | local_def_capture_index, - 405 | local_def_value_capture_index, - 406 | local_ref_capture_index, - 407 | local_scope_capture_index, - 408 | }) - 409 | } - | - 410 | /// Get a slice containing all of the highlight names used in the configuration. - 411 | #[must_use] - 412 | pub const fn names(&self) -> &[&str] { - 413 | self.query.capture_names() - 414 | } - | - 415 | /// Set the list of recognized highlight names. - 416 | /// - 417 | /// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated - 418 | /// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of - 419 | /// these queries can choose to recognize highlights with different levels of specificity. - 420 | /// For example, the string `function.builtin` will match against `function.method.builtin` - 421 | /// and `function.builtin.constructor`, but will not match `function.method`. - 422 | /// - 423 | /// When highlighting, results are returned as `Highlight` values, which contain the index - 424 | /// of the matched highlight this list of highlight names. - 425 | pub fn configure(&mut self, recognized_names: &[impl AsRef]) { - 426 | let mut capture_parts = Vec::new(); - 427 | self.highlight_indices.clear(); - 428 | self.highlight_indices - 429 | .extend(self.query.capture_names().iter().map(move |capture_name| { - 430 | capture_parts.clear(); - 431 | capture_parts.extend(capture_name.split('.')); - | - 432 | let mut best_index = None; - 433 | let mut best_match_len = 0; - 434 | for (i, recognized_name) in recognized_names.iter().enumerate() { - 435 | let mut len = 0; - 436 | let mut matches = true; - 437 | for part in recognized_name.as_ref().split('.') { - 438 | len += 1; - 439 | if !capture_parts.contains(&part) { - 440 | matches = false; - 441 | break; - 442 | } - 443 | } - 444 | if matches && len > best_match_len { - 445 | best_index = Some(i); - 446 | best_match_len = len; - 447 | } - 448 | } - 449 | best_index.map(Highlight) - 450 | })); - 451 | } - | - 452 | // Return the list of this configuration's capture names that are neither present in the - 453 | // list of predefined 'canonical' names nor start with an underscore (denoting 'private' - 454 | // captures used as part of capture internals). - 455 | #[must_use] - 456 | pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> { - 457 | let capture_names = if capture_names.is_empty() { - 458 | &*STANDARD_CAPTURE_NAMES - 459 | } else { - 460 | capture_names - 461 | }; - 462 | self.names() - 463 | .iter() - 464 | .filter(|&n| !(n.starts_with('_') || capture_names.contains(n))) - 465 | .copied() - 466 | .collect() - 467 | } - 468 | } - | - 469 | impl<'a> HighlightIterLayer<'a> { - 470 | /// Create a new 'layer' of highlighting for this document. - 471 | /// - 472 | /// In the event that the new layer contains "combined injections" (injections where multiple - 473 | /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and - 474 | /// added to the returned vector. - 475 | #[allow(clippy::too_many_arguments)] - 476 | fn new Option<&'a HighlightConfiguration> + 'a>( - 477 | source: &'a [u8], - 478 | parent_name: Option<&str>, - 479 | highlighter: &mut Highlighter, - 480 | cancellation_flag: Option<&'a AtomicUsize>, - 481 | injection_callback: &mut F, - 482 | mut config: &'a HighlightConfiguration, - 483 | mut depth: usize, - 484 | mut ranges: Vec, - 485 | ) -> Result, Error> { - 486 | let mut result = Vec::with_capacity(1); - 487 | let mut queue = Vec::new(); - 488 | loop { - 489 | if highlighter.parser.set_included_ranges(&ranges).is_ok() { - 490 | highlighter - 491 | .parser - 492 | .set_language(&config.language) - 493 | .map_err(|_| Error::InvalidLanguage)?; - | - 494 | let tree = highlighter - 495 | .parser - 496 | .parse_with_options( - 497 | &mut |i, _| { - 498 | if i < source.len() { - 499 | &source[i..] - 500 | } else { - 501 | &[] - 502 | } - 503 | }, - 504 | None, - 505 | Some(ParseOptions::new().progress_callback(&mut |_| { - 506 | if let Some(cancellation_flag) = cancellation_flag { - 507 | if cancellation_flag.load(Ordering::SeqCst) != 0 { - 508 | ControlFlow::Break(()) - 509 | } else { - 510 | ControlFlow::Continue(()) - 511 | } - 512 | } else { - 513 | ControlFlow::Continue(()) - 514 | } - 515 | })), - 516 | ) - 517 | .ok_or(Error::Cancelled)?; - 518 | let mut cursor = highlighter.cursors.pop().unwrap_or_default(); - | - 519 | // Process combined injections. - 520 | if let Some(combined_injections_query) = &config.combined_injections_query { - 521 | let mut injections_by_pattern_index = - 522 | vec![(None, Vec::new(), false); combined_injections_query.pattern_count()]; - 523 | let mut matches = - 524 | cursor.matches(combined_injections_query, tree.root_node(), source); - 525 | while let Some(mat) = matches.next() { - 526 | let entry = &mut injections_by_pattern_index[mat.pattern_index]; - 527 | let (language_name, content_node, include_children) = injection_for_match( - 528 | config, - 529 | parent_name, - 530 | combined_injections_query, - 531 | mat, - 532 | source, - 533 | ); - 534 | if language_name.is_some() { - 535 | entry.0 = language_name; - 536 | } - 537 | if let Some(content_node) = content_node { - 538 | entry.1.push(content_node); - 539 | } - 540 | entry.2 = include_children; - 541 | } - 542 | for (lang_name, content_nodes, includes_children) in injections_by_pattern_index - 543 | { - 544 | if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) { - 545 | if let Some(next_config) = (injection_callback)(lang_name) { - 546 | let ranges = Self::intersect_ranges( - 547 | &ranges, - 548 | &content_nodes, - 549 | includes_children, - 550 | ); - 551 | if !ranges.is_empty() { - 552 | queue.push((next_config, depth + 1, ranges)); - 553 | } - 554 | } - 555 | } - 556 | } - 557 | } - | - 558 | // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which - 559 | // prevents them from being moved. But both of these values are really just - 560 | // pointers, so it's actually ok to move them. - 561 | let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) }; - 562 | let cursor_ref = unsafe { - 563 | mem::transmute::<&mut QueryCursor, &'static mut QueryCursor>(&mut cursor) - 564 | }; - 565 | let captures = unsafe { - 566 | std::mem::transmute::, _QueryCaptures<_, _>>( - 567 | cursor_ref.captures(&config.query, tree_ref.root_node(), source), - 568 | ) - 569 | } - 570 | .peekable(); - | - 571 | result.push(HighlightIterLayer { - 572 | highlight_end_stack: Vec::new(), - 573 | scope_stack: vec![LocalScope { - 574 | inherits: false, - 575 | range: 0..usize::MAX, - 576 | local_defs: Vec::new(), - 577 | }], - 578 | cursor, - 579 | depth, - 580 | _tree: tree, - 581 | captures, - 582 | config, - 583 | ranges, - 584 | }); - 585 | } - | - 586 | if queue.is_empty() { - 587 | break; - 588 | } - | - 589 | let (next_config, next_depth, next_ranges) = queue.remove(0); - 590 | config = next_config; - 591 | depth = next_depth; - 592 | ranges = next_ranges; - 593 | } - | - 594 | Ok(result) - 595 | } - | - 596 | // Compute the ranges that should be included when parsing an injection. - 597 | // This takes into account three things: - 598 | // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. - 599 | // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the - 600 | // ranges of those nodes. - 601 | // * `includes_children` - For some injections, the content nodes' children should be excluded - 602 | // from the nested document, so that only the content nodes' *own* content is reparsed. For - 603 | // other injections, the content nodes' entire ranges should be reparsed, including the ranges - 604 | // of their children. - 605 | fn intersect_ranges( - 606 | parent_ranges: &[Range], - 607 | nodes: &[Node], - 608 | includes_children: bool, - 609 | ) -> Vec { - 610 | let mut cursor = nodes[0].walk(); - 611 | let mut result = Vec::new(); - 612 | let mut parent_range_iter = parent_ranges.iter(); - 613 | let mut parent_range = parent_range_iter - 614 | .next() - 615 | .expect("Layers should only be constructed with non-empty ranges vectors"); - 616 | for node in nodes { - 617 | let mut preceding_range = Range { - 618 | start_byte: 0, - 619 | start_point: Point::new(0, 0), - 620 | end_byte: node.start_byte(), - 621 | end_point: node.start_position(), - 622 | }; - 623 | let following_range = Range { - 624 | start_byte: node.end_byte(), - 625 | start_point: node.end_position(), - 626 | end_byte: usize::MAX, - 627 | end_point: Point::new(usize::MAX, usize::MAX), - 628 | }; - | - 629 | for excluded_range in node - 630 | .children(&mut cursor) - 631 | .filter_map(|child| { - 632 | if includes_children { - 633 | None - 634 | } else { - 635 | Some(child.range()) - 636 | } - 637 | }) - 638 | .chain(std::iter::once(following_range)) - 639 | { - 640 | let mut range = Range { - 641 | start_byte: preceding_range.end_byte, - 642 | start_point: preceding_range.end_point, - 643 | end_byte: excluded_range.start_byte, - 644 | end_point: excluded_range.start_point, - 645 | }; - 646 | preceding_range = excluded_range; - | - 647 | if range.end_byte < parent_range.start_byte { - 648 | continue; - 649 | } - | - 650 | while parent_range.start_byte <= range.end_byte { - 651 | if parent_range.end_byte > range.start_byte { - 652 | if range.start_byte < parent_range.start_byte { - 653 | range.start_byte = parent_range.start_byte; - 654 | range.start_point = parent_range.start_point; - 655 | } - | - 656 | if parent_range.end_byte < range.end_byte { - 657 | if range.start_byte < parent_range.end_byte { - 658 | result.push(Range { - 659 | start_byte: range.start_byte, - 660 | start_point: range.start_point, - 661 | end_byte: parent_range.end_byte, - 662 | end_point: parent_range.end_point, - 663 | }); - 664 | } - 665 | range.start_byte = parent_range.end_byte; - 666 | range.start_point = parent_range.end_point; - 667 | } else { - 668 | if range.start_byte < range.end_byte { - 669 | result.push(range); - 670 | } - 671 | break; - 672 | } - 673 | } - | - 674 | if let Some(next_range) = parent_range_iter.next() { - 675 | parent_range = next_range; - 676 | } else { - 677 | return result; - 678 | } - 679 | } - 680 | } - 681 | } - 682 | result - 683 | } - | - 684 | // First, sort scope boundaries by their byte offset in the document. At a - 685 | // given position, emit scope endings before scope beginnings. Finally, emit - 686 | // scope boundaries from deeper layers first. - 687 | fn sort_key(&mut self) -> Option<(usize, bool, isize)> { - 688 | let depth = -(self.depth as isize); - 689 | let next_start = self - 690 | .captures - 691 | .peek() - 692 | .map(|(m, i)| m.captures[*i].node.start_byte()); - 693 | let next_end = self.highlight_end_stack.last().copied(); - 694 | match (next_start, next_end) { - 695 | (Some(start), Some(end)) => { - 696 | if start < end { - 697 | Some((start, true, depth)) - 698 | } else { - 699 | Some((end, false, depth)) - 700 | } - 701 | } - 702 | (Some(i), None) => Some((i, true, depth)), - 703 | (None, Some(j)) => Some((j, false, depth)), - 704 | _ => None, - 705 | } - 706 | } - 707 | } - | - 708 | impl<'a, F> HighlightIter<'a, F> - 709 | where - 710 | F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, - 711 | { - 712 | fn emit_event( - 713 | &mut self, - 714 | offset: usize, - 715 | event: Option, - 716 | ) -> Option> { - 717 | let result; - 718 | if self.byte_offset < offset { - 719 | result = Some(Ok(HighlightEvent::Source { - 720 | start: self.byte_offset, - 721 | end: offset, - 722 | })); - 723 | self.byte_offset = offset; - 724 | self.next_event = event; - 725 | } else { - 726 | result = event.map(Ok); - 727 | } - 728 | self.sort_layers(); - 729 | result - 730 | } - | - 731 | fn sort_layers(&mut self) { - 732 | while !self.layers.is_empty() { - 733 | if let Some(sort_key) = self.layers[0].sort_key() { - 734 | let mut i = 0; - 735 | while i + 1 < self.layers.len() { - 736 | if let Some(next_offset) = self.layers[i + 1].sort_key() { - 737 | if next_offset < sort_key { - 738 | i += 1; - 739 | continue; - 740 | } - 741 | } - 742 | break; - 743 | } - 744 | if i > 0 { - 745 | self.layers[0..=i].rotate_left(1); - 746 | } - 747 | break; - 748 | } - 749 | let layer = self.layers.remove(0); - 750 | self.highlighter.cursors.push(layer.cursor); - 751 | } - 752 | } - | - 753 | fn insert_layer(&mut self, mut layer: HighlightIterLayer<'a>) { - 754 | if let Some(sort_key) = layer.sort_key() { - 755 | let mut i = 1; - 756 | while i < self.layers.len() { - 757 | if let Some(sort_key_i) = self.layers[i].sort_key() { - 758 | if sort_key_i > sort_key { - 759 | self.layers.insert(i, layer); - 760 | return; - 761 | } - 762 | i += 1; - 763 | } else { - 764 | self.layers.remove(i); - 765 | } - 766 | } - 767 | self.layers.push(layer); - 768 | } - 769 | } - 770 | } - | - 771 | impl<'a, F> Iterator for HighlightIter<'a, F> - 772 | where - 773 | F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, - 774 | { - 775 | type Item = Result; - | - 776 | fn next(&mut self) -> Option { - 777 | 'main: loop { - 778 | // If we've already determined the next highlight boundary, just return it. - 779 | if let Some(e) = self.next_event.take() { - 780 | return Some(Ok(e)); - 781 | } - | - 782 | // Periodically check for cancellation, returning `Cancelled` error if the - 783 | // cancellation flag was flipped. - 784 | if let Some(cancellation_flag) = self.cancellation_flag { - 785 | self.iter_count += 1; - 786 | if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - 787 | self.iter_count = 0; - 788 | if cancellation_flag.load(Ordering::Relaxed) != 0 { - 789 | return Some(Err(Error::Cancelled)); - 790 | } - 791 | } - 792 | } - | - 793 | // If none of the layers have any more highlight boundaries, terminate. - 794 | if self.layers.is_empty() { - 795 | return if self.byte_offset < self.source.len() { - 796 | let result = Some(Ok(HighlightEvent::Source { - 797 | start: self.byte_offset, - 798 | end: self.source.len(), - 799 | })); - 800 | self.byte_offset = self.source.len(); - 801 | result - 802 | } else { - 803 | None - 804 | }; - 805 | } - | - 806 | // Get the next capture from whichever layer has the earliest highlight boundary. - 807 | let range; - 808 | let layer = &mut self.layers[0]; - 809 | if let Some((next_match, capture_index)) = layer.captures.peek() { - 810 | let next_capture = next_match.captures[*capture_index]; - 811 | range = next_capture.node.byte_range(); - | - 812 | // If any previous highlight ends before this node starts, then before - 813 | // processing this capture, emit the source code up until the end of the - 814 | // previous highlight, and an end event for that highlight. - 815 | if let Some(end_byte) = layer.highlight_end_stack.last().copied() { - 816 | if end_byte <= range.start { - 817 | layer.highlight_end_stack.pop(); - 818 | return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - 819 | } - 820 | } - 821 | } - 822 | // If there are no more captures, then emit any remaining highlight end events. - 823 | // And if there are none of those, then just advance to the end of the document. - 824 | else { - 825 | if let Some(end_byte) = layer.highlight_end_stack.last().copied() { - 826 | layer.highlight_end_stack.pop(); - 827 | return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - 828 | } - 829 | return self.emit_event(self.source.len(), None); - 830 | } - | - 831 | let (mut match_, capture_index) = layer.captures.next().unwrap(); - 832 | let mut capture = match_.captures[capture_index]; - | - 833 | // If this capture represents an injection, then process the injection. - 834 | if match_.pattern_index < layer.config.locals_pattern_index { - 835 | let (language_name, content_node, include_children) = injection_for_match( - 836 | layer.config, - 837 | Some(self.language_name), - 838 | &layer.config.query, - 839 | &match_, - 840 | self.source, - 841 | ); - | - 842 | // Explicitly remove this match so that none of its other captures will remain - 843 | // in the stream of captures. - 844 | match_.remove(); - | - 845 | // If a language is found with the given name, then add a new language layer - 846 | // to the highlighted document. - 847 | if let (Some(language_name), Some(content_node)) = (language_name, content_node) { - 848 | if let Some(config) = (self.injection_callback)(language_name) { - 849 | let ranges = HighlightIterLayer::intersect_ranges( - 850 | &self.layers[0].ranges, - 851 | &[content_node], - 852 | include_children, - 853 | ); - 854 | if !ranges.is_empty() { - 855 | match HighlightIterLayer::new( - 856 | self.source, - 857 | Some(self.language_name), - 858 | self.highlighter, - 859 | self.cancellation_flag, - 860 | &mut self.injection_callback, - 861 | config, - 862 | self.layers[0].depth + 1, - 863 | ranges, - 864 | ) { - 865 | Ok(layers) => { - 866 | for layer in layers { - 867 | self.insert_layer(layer); - 868 | } - 869 | } - 870 | Err(e) => return Some(Err(e)), - 871 | } - 872 | } - 873 | } - 874 | } - | - 875 | self.sort_layers(); - 876 | continue 'main; - 877 | } - | - 878 | // Remove from the local scope stack any local scopes that have already ended. - 879 | while range.start > layer.scope_stack.last().unwrap().range.end { - 880 | layer.scope_stack.pop(); - 881 | } - | - 882 | // If this capture is for tracking local variables, then process the - 883 | // local variable info. - 884 | let mut reference_highlight = None; - 885 | let mut definition_highlight = None; - 886 | while match_.pattern_index < layer.config.highlights_pattern_index { - 887 | // If the node represents a local scope, push a new local scope onto - 888 | // the scope stack. - 889 | if Some(capture.index) == layer.config.local_scope_capture_index { - 890 | definition_highlight = None; - 891 | let mut scope = LocalScope { - 892 | inherits: true, - 893 | range: range.clone(), - 894 | local_defs: Vec::new(), - 895 | }; - 896 | for prop in layer.config.query.property_settings(match_.pattern_index) { - 897 | if prop.key.as_ref() == "local.scope-inherits" { - 898 | scope.inherits = - 899 | prop.value.as_ref().is_none_or(|r| r.as_ref() == "true"); - 900 | } - 901 | } - 902 | layer.scope_stack.push(scope); - 903 | } - 904 | // If the node represents a definition, add a new definition to the - 905 | // local scope at the top of the scope stack. - 906 | else if Some(capture.index) == layer.config.local_def_capture_index { - 907 | reference_highlight = None; - 908 | definition_highlight = None; - 909 | let scope = layer.scope_stack.last_mut().unwrap(); - | - 910 | let mut value_range = 0..0; - 911 | for capture in match_.captures { - 912 | if Some(capture.index) == layer.config.local_def_value_capture_index { - 913 | value_range = capture.node.byte_range(); - 914 | } - 915 | } - | - 916 | if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { - 917 | scope.local_defs.push(LocalDef { - 918 | name, - 919 | value_range, - 920 | highlight: None, - 921 | }); - 922 | definition_highlight = - 923 | scope.local_defs.last_mut().map(|s| &mut s.highlight); - 924 | } - 925 | } - 926 | // If the node represents a reference, then try to find the corresponding - 927 | // definition in the scope stack. - 928 | else if Some(capture.index) == layer.config.local_ref_capture_index - 929 | && definition_highlight.is_none() - 930 | { - 931 | definition_highlight = None; - 932 | if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { - 933 | for scope in layer.scope_stack.iter().rev() { - 934 | if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { - 935 | if def.name == name && range.start >= def.value_range.end { - 936 | Some(def.highlight) - 937 | } else { - 938 | None - 939 | } - 940 | }) { - 941 | reference_highlight = highlight; - 942 | break; - 943 | } - 944 | if !scope.inherits { - 945 | break; - 946 | } - 947 | } - 948 | } - 949 | } - | - 950 | // Continue processing any additional matches for the same node. - 951 | if let Some((next_match, next_capture_index)) = layer.captures.peek() { - 952 | let next_capture = next_match.captures[*next_capture_index]; - 953 | if next_capture.node == capture.node { - 954 | capture = next_capture; - 955 | match_ = layer.captures.next().unwrap().0; - 956 | continue; - 957 | } - 958 | } - | - 959 | self.sort_layers(); - 960 | continue 'main; - 961 | } - | - 962 | // Otherwise, this capture must represent a highlight. - 963 | // If this exact range has already been highlighted by an earlier pattern, or by - 964 | // a different layer, then skip over this one. - 965 | if let Some((last_start, last_end, last_depth)) = self.last_highlight_range { - 966 | if range.start == last_start && range.end == last_end && layer.depth < last_depth { - 967 | self.sort_layers(); - 968 | continue 'main; - 969 | } - 970 | } - | - 971 | // Once a highlighting pattern is found for the current node, keep iterating over - 972 | // any later highlighting patterns that also match this node and set the match to it. - 973 | // Captures for a given node are ordered by pattern index, so these subsequent - 974 | // captures are guaranteed to be for highlighting, not injections or - 975 | // local variables. - 976 | while let Some((next_match, next_capture_index)) = layer.captures.peek() { - 977 | let next_capture = next_match.captures[*next_capture_index]; - 978 | if next_capture.node == capture.node { - 979 | let following_match = layer.captures.next().unwrap().0; - 980 | // If the current node was found to be a local variable, then ignore - 981 | // the following match if it's a highlighting pattern that is disabled - 982 | // for local variables. - 983 | if (definition_highlight.is_some() || reference_highlight.is_some()) - 984 | && layer.config.non_local_variable_patterns[following_match.pattern_index] - 985 | { - 986 | continue; - 987 | } - 988 | match_.remove(); - 989 | capture = next_capture; - 990 | match_ = following_match; - 991 | } else { - 992 | break; - 993 | } - 994 | } - | - 995 | let current_highlight = layer.config.highlight_indices[capture.index as usize]; - | - 996 | // If this node represents a local definition, then store the current - 997 | // highlight value on the local scope entry representing this node. - 998 | if let Some(definition_highlight) = definition_highlight { - 999 | *definition_highlight = current_highlight; -1000 | } - | -1001 | // Emit a scope start event and push the node's end position to the stack. -1002 | if let Some(highlight) = reference_highlight.or(current_highlight) { -1003 | self.last_highlight_range = Some((range.start, range.end, layer.depth)); -1004 | layer.highlight_end_stack.push(range.end); -1005 | return self -1006 | .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight))); -1007 | } - | -1008 | self.sort_layers(); -1009 | } -1010 | } -1011 | } - | -1012 | impl Default for HtmlRenderer { -1013 | fn default() -> Self { -1014 | Self::new() -1015 | } -1016 | } - | -1017 | impl HtmlRenderer { -1018 | #[must_use] -1019 | pub fn new() -> Self { -1020 | let mut result = Self { -1021 | html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY), -1022 | line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY), -1023 | carriage_return_highlight: None, -1024 | last_carriage_return: None, -1025 | }; -1026 | result.line_offsets.push(0); -1027 | result -1028 | } - | -1029 | pub const fn set_carriage_return_highlight(&mut self, highlight: Option) { -1030 | self.carriage_return_highlight = highlight; -1031 | } - | -1032 | pub fn reset(&mut self) { -1033 | shrink_and_clear(&mut self.html, BUFFER_HTML_RESERVE_CAPACITY); -1034 | shrink_and_clear(&mut self.line_offsets, BUFFER_LINES_RESERVE_CAPACITY); -1035 | self.line_offsets.push(0); -1036 | } - | -1037 | pub fn render( -1038 | &mut self, -1039 | highlighter: impl Iterator>, -1040 | source: &[u8], -1041 | attribute_callback: &F, -1042 | ) -> Result<(), Error> -1043 | where -1044 | F: Fn(Highlight, &mut Vec), -1045 | { -1046 | let mut highlights = Vec::new(); -1047 | for event in highlighter { -1048 | match event { -1049 | Ok(HighlightEvent::HighlightStart(s)) => { -1050 | highlights.push(s); -1051 | self.start_highlight(s, &attribute_callback); -1052 | } -1053 | Ok(HighlightEvent::HighlightEnd) => { -1054 | highlights.pop(); -1055 | self.end_highlight(); -1056 | } -1057 | Ok(HighlightEvent::Source { start, end }) => { -1058 | self.add_text(&source[start..end], &highlights, &attribute_callback); -1059 | } -1060 | Err(a) => return Err(a), -1061 | } -1062 | } -1063 | if let Some(offset) = self.last_carriage_return.take() { -1064 | self.add_carriage_return(offset, attribute_callback); -1065 | } -1066 | if self.html.last() != Some(&b'\n') { -1067 | self.html.push(b'\n'); -1068 | } -1069 | if self.line_offsets.last() == Some(&(self.html.len() as u32)) { -1070 | self.line_offsets.pop(); -1071 | } -1072 | Ok(()) -1073 | } - | -1074 | pub fn lines(&self) -> impl Iterator { -1075 | self.line_offsets -1076 | .iter() -1077 | .enumerate() -1078 | .map(move |(i, line_start)| { -1079 | let line_start = *line_start as usize; -1080 | let line_end = if i + 1 == self.line_offsets.len() { -1081 | self.html.len() -1082 | } else { -1083 | self.line_offsets[i + 1] as usize -1084 | }; -1085 | str::from_utf8(&self.html[line_start..line_end]).unwrap() -1086 | }) -1087 | } - | -1088 | fn add_carriage_return(&mut self, offset: usize, attribute_callback: &F) -1089 | where -1090 | F: Fn(Highlight, &mut Vec), -1091 | { -1092 | if let Some(highlight) = self.carriage_return_highlight { -1093 | // If a CR is the last character in a `HighlightEvent::Source` -1094 | // region, then we don't know until the next `Source` event or EOF -1095 | // whether it is part of CRLF or on its own. To avoid unbounded -1096 | // lookahead, save the offset of the CR and insert there now that we -1097 | // know. -1098 | let rest = self.html.split_off(offset); -1099 | self.html.extend(b""); -1102 | self.html.extend(rest); -1103 | } -1104 | } - | -1105 | fn start_highlight(&mut self, h: Highlight, attribute_callback: &F) -1106 | where -1107 | F: Fn(Highlight, &mut Vec), -1108 | { -1109 | self.html.extend(b""); -1112 | } - | -1113 | fn end_highlight(&mut self) { -1114 | self.html.extend(b""); -1115 | } - | -1116 | fn add_text(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F) -1117 | where -1118 | F: Fn(Highlight, &mut Vec), -1119 | { -1120 | pub const fn html_escape(c: u8) -> Option<&'static [u8]> { -1121 | match c as char { -1122 | '>' => Some(b">"), -1123 | '<' => Some(b"<"), -1124 | '&' => Some(b"&"), -1125 | '\'' => Some(b"'"), -1126 | '"' => Some(b"""), -1127 | _ => None, -1128 | } -1129 | } - | -1130 | for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) { -1131 | // Don't render carriage return characters, but allow lone carriage returns (not -1132 | // followed by line feeds) to be styled via the attribute callback. -1133 | if c == b'\r' { -1134 | self.last_carriage_return = Some(self.html.len()); -1135 | continue; -1136 | } -1137 | if let Some(offset) = self.last_carriage_return.take() { -1138 | if c != b'\n' { -1139 | self.add_carriage_return(offset, attribute_callback); -1140 | } -1141 | } - | -1142 | // At line boundaries, close and re-open all of the open tags. -1143 | if c == b'\n' { -1144 | for _ in highlights { -1145 | self.end_highlight(); -1146 | } -1147 | self.html.push(c); -1148 | self.line_offsets.push(self.html.len() as u32); -1149 | for scope in highlights { -1150 | self.start_highlight(*scope, attribute_callback); -1151 | } -1152 | } else if let Some(escape) = html_escape(c) { -1153 | self.html.extend_from_slice(escape); -1154 | } else { -1155 | self.html.push(c); -1156 | } -1157 | } -1158 | } -1159 | } - | -1160 | fn injection_for_match<'a>( -1161 | config: &'a HighlightConfiguration, -1162 | parent_name: Option<&'a str>, -1163 | query: &'a Query, -1164 | query_match: &QueryMatch<'a, 'a>, -1165 | source: &'a [u8], -1166 | ) -> (Option<&'a str>, Option>, bool) { -1167 | let content_capture_index = config.injection_content_capture_index; -1168 | let language_capture_index = config.injection_language_capture_index; - | -1169 | let mut language_name = None; -1170 | let mut content_node = None; - | -1171 | for capture in query_match.captures { -1172 | let index = Some(capture.index); -1173 | if index == language_capture_index { -1174 | language_name = capture.node.utf8_text(source).ok(); -1175 | } else if index == content_capture_index { -1176 | content_node = Some(capture.node); -1177 | } -1178 | } - | -1179 | let mut include_children = false; -1180 | for prop in query.property_settings(query_match.pattern_index) { -1181 | match prop.key.as_ref() { -1182 | // In addition to specifying the language name via the text of a -1183 | // captured node, it can also be hard-coded via a `#set!` predicate -1184 | // that sets the injection.language key. -1185 | "injection.language" => { -1186 | if language_name.is_none() { -1187 | language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref); -1188 | } -1189 | } - | -1190 | // Setting the `injection.self` key can be used to specify that the -1191 | // language name should be the same as the language of the current -1192 | // layer. -1193 | "injection.self" => { -1194 | if language_name.is_none() { -1195 | language_name = Some(config.language_name.as_str()); -1196 | } -1197 | } - | -1198 | // Setting the `injection.parent` key can be used to specify that -1199 | // the language name should be the same as the language of the -1200 | // parent layer -1201 | "injection.parent" => { -1202 | if language_name.is_none() { -1203 | language_name = parent_name; -1204 | } -1205 | } - | -1206 | // By default, injections do not include the *children* of an -1207 | // `injection.content` node - only the ranges that belong to the -1208 | // node itself. This can be changed using a `#set!` predicate that -1209 | // sets the `injection.include-children` key. -1210 | "injection.include-children" => include_children = true, -1211 | _ => {} -1212 | } -1213 | } - | -1214 | (language_name, content_node, include_children) -1215 | } - | -1216 | fn shrink_and_clear(vec: &mut Vec, capacity: usize) { -1217 | if vec.len() > capacity { -1218 | vec.truncate(capacity); -1219 | vec.shrink_to_fit(); -1220 | } -1221 | vec.clear(); -1222 | } - - - --------------------------------------------------------------------------------- -/crates/language/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-language" - 3 | description = "The tree-sitter Language type, used by the library and by language implementations" - 4 | version = "0.1.5" - 5 | authors.workspace = true - 6 | edition.workspace = true - 7 | rust-version = "1.77" - 8 | readme = "README.md" - 9 | homepage.workspace = true - 10 | repository.workspace = true - 11 | documentation = "https://docs.rs/tree-sitter-language" - 12 | license.workspace = true - 13 | keywords.workspace = true - 14 | categories = ["api-bindings", "development-tools::ffi", "parsing"] - | - 15 | build = "build.rs" - 16 | links = "tree-sitter-language" - | - 17 | [lints] - 18 | workspace = true - | - 19 | [lib] - 20 | path = "src/language.rs" - - - --------------------------------------------------------------------------------- -/crates/language/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter Language - | - 2 | This crate provides a `LanguageFn` type for grammars to create `Language` instances from a parser, - 3 | without having to worry about the `tree-sitter` crate version not matching. - - - --------------------------------------------------------------------------------- -/crates/language/src/language.rs: --------------------------------------------------------------------------------- - 1 | #![no_std] - 2 | /// `LanguageFn` wraps a C function that returns a pointer to a tree-sitter grammar. - 3 | #[repr(transparent)] - 4 | #[derive(Clone, Copy)] - 5 | pub struct LanguageFn(unsafe extern "C" fn() -> *const ()); - | - 6 | impl LanguageFn { - 7 | /// Creates a [`LanguageFn`]. - 8 | /// - 9 | /// # Safety - 10 | /// - 11 | /// Only call this with language functions generated from grammars - 12 | /// by the Tree-sitter CLI. - 13 | pub const unsafe fn from_raw(f: unsafe extern "C" fn() -> *const ()) -> Self { - 14 | Self(f) - 15 | } - | - 16 | /// Gets the function wrapped by this [`LanguageFn`]. - 17 | #[must_use] - 18 | pub const fn into_raw(self) -> unsafe extern "C" fn() -> *const () { - 19 | self.0 - 20 | } - 21 | } - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/assert.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_ASSERT_H_ - 2 | #define TREE_SITTER_WASM_ASSERT_H_ - | - 3 | #ifdef NDEBUG - 4 | #define assert(e) ((void)0) - 5 | #else - 6 | __attribute__((noreturn)) void __assert_fail(const char *assertion, const char *file, unsigned line, const char *function) { - 7 | __builtin_trap(); - 8 | } - 9 | #define assert(expression) \ - 10 | ((expression) ? (void)0 : __assert_fail(#expression, __FILE__, __LINE__, __func__)) - 11 | #endif - | - 12 | #endif // TREE_SITTER_WASM_ASSERT_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/ctype.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_CTYPE_H_ - 2 | #define TREE_SITTER_WASM_CTYPE_H_ - | - 3 | static inline int isprint(int c) { - 4 | return c >= 0x20 && c <= 0x7E; - 5 | } - | - 6 | #endif // TREE_SITTER_WASM_CTYPE_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/endian.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_ENDIAN_H_ - 2 | #define TREE_SITTER_WASM_ENDIAN_H_ - | - 3 | #define be16toh(x) __builtin_bswap16(x) - 4 | #define be32toh(x) __builtin_bswap32(x) - 5 | #define be64toh(x) __builtin_bswap64(x) - 6 | #define le16toh(x) (x) - 7 | #define le32toh(x) (x) - 8 | #define le64toh(x) (x) - | - | - 9 | #endif // TREE_SITTER_WASM_ENDIAN_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/inttypes.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_INTTYPES_H_ - 2 | #define TREE_SITTER_WASM_INTTYPES_H_ - | - 3 | // https://github.com/llvm/llvm-project/blob/0c3cf200f5b918fb5c1114e9f1764c2d54d1779b/libc/include/llvm-libc-macros/inttypes-macros.h#L209 - | - 4 | #define PRId32 "d" - | - 5 | #endif // TREE_SITTER_WASM_INTTYPES_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/stdint.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_STDINT_H_ - 2 | #define TREE_SITTER_WASM_STDINT_H_ - | - 3 | // https://github.com/llvm/llvm-project/blob/0c3cf200f5b918fb5c1114e9f1764c2d54d1779b/clang/test/Preprocessor/init.c#L1672 - | - 4 | typedef signed char int8_t; - | - 5 | typedef short int16_t; - | - 6 | typedef int int32_t; - | - 7 | typedef long long int int64_t; - | - 8 | typedef unsigned char uint8_t; - | - 9 | typedef unsigned short uint16_t; - | - 10 | typedef unsigned int uint32_t; - | - 11 | typedef long long unsigned int uint64_t; - | - 12 | typedef long unsigned int size_t; - | - 13 | typedef long unsigned int uintptr_t; - | - 14 | #define UINT16_MAX 65535 - | - 15 | #define UINT32_MAX 4294967295U - | - 16 | #if defined(__wasm32__) - | - 17 | #define SIZE_MAX 4294967295UL - | - 18 | #elif defined(__wasm64__) - | - 19 | #define SIZE_MAX 18446744073709551615UL - | - 20 | #endif - | - 21 | #endif // TREE_SITTER_WASM_STDINT_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/stdio.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_STDIO_H_ - 2 | #define TREE_SITTER_WASM_STDIO_H_ - | - 3 | #include - 4 | #include - | - 5 | typedef struct FILE FILE; - | - 6 | typedef __builtin_va_list va_list; - 7 | #define va_start(ap, last) __builtin_va_start(ap, last) - 8 | #define va_end(ap) __builtin_va_end(ap) - 9 | #define va_arg(ap, type) __builtin_va_arg(ap, type) - | - 10 | #define stdout ((FILE *)0) - | - 11 | #define stderr ((FILE *)1) - | - 12 | #define stdin ((FILE *)2) - | - 13 | int fclose(FILE *stream); - | - 14 | FILE *fdopen(int fd, const char *mode); - | - 15 | int fputc(int c, FILE *stream); - | - 16 | int fputs(const char *restrict s, FILE *restrict stream); - | - 17 | size_t fwrite(const void *restrict buffer, size_t size, size_t nmemb, FILE *restrict stream); - | - 18 | int fprintf(FILE *restrict stream, const char *restrict format, ...); - | - 19 | int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, ...); - | - 20 | int vsnprintf(char *restrict buffer, size_t buffsz, const char *restrict format, va_list vlist); - | - 21 | #endif // TREE_SITTER_WASM_STDIO_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/stdlib.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_STDLIB_H_ - 2 | #define TREE_SITTER_WASM_STDLIB_H_ - | - 3 | #include - | - 4 | #define NULL ((void*)0) - | - 5 | void* malloc(size_t); - 6 | void* calloc(size_t, size_t); - 7 | void free(void*); - 8 | void* realloc(void*, size_t); - | - 9 | __attribute__((noreturn)) void abort(void); - | - 10 | #endif // TREE_SITTER_WASM_STDLIB_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/string.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_STRING_H_ - 2 | #define TREE_SITTER_WASM_STRING_H_ - | - 3 | #include - | - 4 | int memcmp(const void *lhs, const void *rhs, size_t count); - | - 5 | void *memcpy(void *restrict dst, const void *restrict src, size_t size); - | - 6 | void *memmove(void *dst, const void *src, size_t count); - | - 7 | void *memset(void *dst, int value, size_t count); - | - 8 | int strncmp(const char *left, const char *right, size_t n); - | - 9 | #endif // TREE_SITTER_WASM_STRING_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/include/wctype.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_WCTYPE_H_ - 2 | #define TREE_SITTER_WASM_WCTYPE_H_ - | - 3 | typedef int wint_t; - | - 4 | static inline bool iswalpha(wint_t wch) { - 5 | switch (wch) { - 6 | case L'a': - 7 | case L'b': - 8 | case L'c': - 9 | case L'd': - 10 | case L'e': - 11 | case L'f': - 12 | case L'g': - 13 | case L'h': - 14 | case L'i': - 15 | case L'j': - 16 | case L'k': - 17 | case L'l': - 18 | case L'm': - 19 | case L'n': - 20 | case L'o': - 21 | case L'p': - 22 | case L'q': - 23 | case L'r': - 24 | case L's': - 25 | case L't': - 26 | case L'u': - 27 | case L'v': - 28 | case L'w': - 29 | case L'x': - 30 | case L'y': - 31 | case L'z': - 32 | case L'A': - 33 | case L'B': - 34 | case L'C': - 35 | case L'D': - 36 | case L'E': - 37 | case L'F': - 38 | case L'G': - 39 | case L'H': - 40 | case L'I': - 41 | case L'J': - 42 | case L'K': - 43 | case L'L': - 44 | case L'M': - 45 | case L'N': - 46 | case L'O': - 47 | case L'P': - 48 | case L'Q': - 49 | case L'R': - 50 | case L'S': - 51 | case L'T': - 52 | case L'U': - 53 | case L'V': - 54 | case L'W': - 55 | case L'X': - 56 | case L'Y': - 57 | case L'Z': - 58 | return true; - 59 | default: - 60 | return false; - 61 | } - 62 | } - | - 63 | static inline bool iswdigit(wint_t wch) { - 64 | switch (wch) { - 65 | case L'0': - 66 | case L'1': - 67 | case L'2': - 68 | case L'3': - 69 | case L'4': - 70 | case L'5': - 71 | case L'6': - 72 | case L'7': - 73 | case L'8': - 74 | case L'9': - 75 | return true; - 76 | default: - 77 | return false; - 78 | } - 79 | } - | - 80 | static inline bool iswalnum(wint_t wch) { - 81 | switch (wch) { - 82 | case L'a': - 83 | case L'b': - 84 | case L'c': - 85 | case L'd': - 86 | case L'e': - 87 | case L'f': - 88 | case L'g': - 89 | case L'h': - 90 | case L'i': - 91 | case L'j': - 92 | case L'k': - 93 | case L'l': - 94 | case L'm': - 95 | case L'n': - 96 | case L'o': - 97 | case L'p': - 98 | case L'q': - 99 | case L'r': - 100 | case L's': - 101 | case L't': - 102 | case L'u': - 103 | case L'v': - 104 | case L'w': - 105 | case L'x': - 106 | case L'y': - 107 | case L'z': - 108 | case L'A': - 109 | case L'B': - 110 | case L'C': - 111 | case L'D': - 112 | case L'E': - 113 | case L'F': - 114 | case L'G': - 115 | case L'H': - 116 | case L'I': - 117 | case L'J': - 118 | case L'K': - 119 | case L'L': - 120 | case L'M': - 121 | case L'N': - 122 | case L'O': - 123 | case L'P': - 124 | case L'Q': - 125 | case L'R': - 126 | case L'S': - 127 | case L'T': - 128 | case L'U': - 129 | case L'V': - 130 | case L'W': - 131 | case L'X': - 132 | case L'Y': - 133 | case L'Z': - 134 | case L'0': - 135 | case L'1': - 136 | case L'2': - 137 | case L'3': - 138 | case L'4': - 139 | case L'5': - 140 | case L'6': - 141 | case L'7': - 142 | case L'8': - 143 | case L'9': - 144 | return true; - 145 | default: - 146 | return false; - 147 | } - 148 | } - | - 149 | static inline bool iswspace(wint_t wch) { - 150 | switch (wch) { - 151 | case L' ': - 152 | case L'\t': - 153 | case L'\n': - 154 | case L'\v': - 155 | case L'\f': - 156 | case L'\r': - 157 | return true; - 158 | default: - 159 | return false; - 160 | } - 161 | } - | - 162 | #endif // TREE_SITTER_WASM_WCTYPE_H_ - - - --------------------------------------------------------------------------------- -/crates/language/wasm/src/stdio.c: --------------------------------------------------------------------------------- - 1 | #include - | - 2 | typedef struct { - 3 | bool left_justify; // - - 4 | bool zero_pad; // 0 - 5 | bool show_sign; // + - 6 | bool space_prefix; // ' ' - 7 | bool alternate_form; // # - 8 | } format_flags_t; - | - 9 | static const char* parse_format_spec( - 10 | const char *format, - 11 | int *width, - 12 | int *precision, - 13 | format_flags_t *flags - 14 | ) { - 15 | *width = 0; - 16 | *precision = -1; - 17 | flags->left_justify = false; - 18 | flags->zero_pad = false; - 19 | flags->show_sign = false; - 20 | flags->space_prefix = false; - 21 | flags->alternate_form = false; - | - 22 | const char *p = format; - | - 23 | // Parse flags - 24 | while (*p == '-' || *p == '+' || *p == ' ' || *p == '#' || *p == '0') { - 25 | switch (*p) { - 26 | case '-': flags->left_justify = true; break; - 27 | case '0': flags->zero_pad = true; break; - 28 | case '+': flags->show_sign = true; break; - 29 | case ' ': flags->space_prefix = true; break; - 30 | case '#': flags->alternate_form = true; break; - 31 | } - 32 | p++; - 33 | } - | - 34 | // width - 35 | while (*p >= '0' && *p <= '9') { - 36 | *width = (*width * 10) + (*p - '0'); - 37 | p++; - 38 | } - | - 39 | // precision - 40 | if (*p == '.') { - 41 | p++; - 42 | *precision = 0; - 43 | while (*p >= '0' && *p <= '9') { - 44 | *precision = (*precision * 10) + (*p - '0'); - 45 | p++; - 46 | } - 47 | } - | - 48 | return p; - 49 | } - | - 50 | static int int_to_str( - 51 | long long value, - 52 | char *buffer, - 53 | int base, - 54 | bool is_signed, - 55 | bool uppercase - 56 | ) { - 57 | if (base < 2 || base > 16) return 0; - | - 58 | const char *digits = uppercase ? "0123456789ABCDEF" : "0123456789abcdef"; - 59 | char temp[32]; - 60 | int i = 0, len = 0; - 61 | bool is_negative = false; - | - 62 | if (value == 0) { - 63 | buffer[0] = '0'; - 64 | buffer[1] = '\0'; - 65 | return 1; - 66 | } - | - 67 | if (is_signed && value < 0 && base == 10) { - 68 | is_negative = true; - 69 | value = -value; - 70 | } - | - 71 | unsigned long long uval = (unsigned long long)value; - 72 | while (uval > 0) { - 73 | temp[i++] = digits[uval % base]; - 74 | uval /= base; - 75 | } - | - 76 | if (is_negative) { - 77 | buffer[len++] = '-'; - 78 | } - | - 79 | while (i > 0) { - 80 | buffer[len++] = temp[--i]; - 81 | } - | - 82 | buffer[len] = '\0'; - 83 | return len; - 84 | } - | - 85 | static int ptr_to_str(void *ptr, char *buffer) { - 86 | buffer[0] = '0'; - 87 | buffer[1] = 'x'; - 88 | int len = int_to_str((uintptr_t)ptr, buffer + 2, 16, 0, 0); - 89 | return 2 + len; - 90 | } - | - 91 | size_t strlen(const char *str) { - 92 | const char *s = str; - 93 | while (*s) s++; - 94 | return s - str; - 95 | } - | - 96 | char *strncpy(char *dest, const char *src, size_t n) { - 97 | char *d = dest; - 98 | const char *s = src; - 99 | while (n-- && (*d++ = *s++)); - 100 | if (n == (size_t)-1) *d = '\0'; - 101 | return dest; - 102 | } - | - 103 | static int write_formatted_to_buffer( - 104 | char *buffer, - 105 | size_t buffer_size, - 106 | size_t *pos, - 107 | const char *str, - 108 | int width, - 109 | const format_flags_t *flags - 110 | ) { - 111 | int len = strlen(str); - 112 | int written = 0; - 113 | int pad_len = (width > len) ? (width - len) : 0; - 114 | int zero_pad = flags->zero_pad && !flags->left_justify; - | - 115 | if (!flags->left_justify && pad_len > 0) { - 116 | char pad_char = zero_pad ? '0' : ' '; - 117 | for (int i = 0; i < pad_len && *pos < buffer_size - 1; i++) { - 118 | buffer[(*pos)++] = pad_char; - 119 | written++; - 120 | } - 121 | } - | - 122 | for (int i = 0; i < len && *pos < buffer_size - 1; i++) { - 123 | buffer[(*pos)++] = str[i]; - 124 | written++; - 125 | } - | - 126 | if (flags->left_justify && pad_len > 0) { - 127 | for (int i = 0; i < pad_len && *pos < buffer_size - 1; i++) { - 128 | buffer[(*pos)++] = ' '; - 129 | written++; - 130 | } - 131 | } - | - 132 | return written; - 133 | } - | - 134 | static int vsnprintf_impl(char *buffer, size_t buffsz, const char *format, va_list args) { - 135 | if (!buffer || buffsz == 0 || !format) return -1; - | - 136 | size_t pos = 0; - 137 | int total_chars = 0; - 138 | const char *p = format; - | - 139 | while (*p) { - 140 | if (*p == '%') { - 141 | p++; - 142 | if (*p == '%') { - 143 | if (pos < buffsz - 1) buffer[pos++] = '%'; - 144 | total_chars++; - 145 | p++; - 146 | continue; - 147 | } - | - 148 | int width, precision; - 149 | format_flags_t flags; - 150 | p = parse_format_spec(p, &width, &precision, &flags); - | - 151 | char temp_buf[64]; - 152 | const char *output_str = temp_buf; - | - 153 | switch (*p) { - 154 | case 's': { - 155 | const char *str = va_arg(args, const char*); - 156 | if (!str) str = "(null)"; - | - 157 | int str_len = strlen(str); - 158 | if (precision >= 0 && str_len > precision) { - 159 | strncpy(temp_buf, str, precision); - 160 | temp_buf[precision] = '\0'; - 161 | output_str = temp_buf; - 162 | } else { - 163 | output_str = str; - 164 | } - 165 | break; - 166 | } - 167 | case 'd': - 168 | case 'i': { - 169 | int value = va_arg(args, int); - 170 | int_to_str(value, temp_buf, 10, true, false); - 171 | break; - 172 | } - 173 | case 'u': { - 174 | unsigned int value = va_arg(args, unsigned int); - 175 | int_to_str(value, temp_buf, 10, false, false); - 176 | break; - 177 | } - 178 | case 'x': { - 179 | unsigned int value = va_arg(args, unsigned int); - 180 | int_to_str(value, temp_buf, 16, false, false); - 181 | break; - 182 | } - 183 | case 'X': { - 184 | unsigned int value = va_arg(args, unsigned int); - 185 | int_to_str(value, temp_buf, 16, false, true); - 186 | break; - 187 | } - 188 | case 'p': { - 189 | void *ptr = va_arg(args, void*); - 190 | ptr_to_str(ptr, temp_buf); - 191 | break; - 192 | } - 193 | case 'c': { - 194 | int c = va_arg(args, int); - 195 | temp_buf[0] = (char)c; - 196 | temp_buf[1] = '\0'; - 197 | break; - 198 | } - 199 | case 'z': { - 200 | if (*(p + 1) == 'u') { - 201 | size_t value = va_arg(args, size_t); - 202 | int_to_str(value, temp_buf, 10, false, false); - 203 | p++; - 204 | } else { - 205 | temp_buf[0] = 'z'; - 206 | temp_buf[1] = '\0'; - 207 | } - 208 | break; - 209 | } - 210 | default: - 211 | temp_buf[0] = '%'; - 212 | temp_buf[1] = *p; - 213 | temp_buf[2] = '\0'; - 214 | break; - 215 | } - | - 216 | int str_len = strlen(output_str); - 217 | int formatted_len = (width > str_len) ? width : str_len; - 218 | total_chars += formatted_len; - | - 219 | if (pos < buffsz - 1) { - 220 | write_formatted_to_buffer(buffer, buffsz, &pos, output_str, width, &flags); - 221 | } - | - 222 | } else { - 223 | if (pos < buffsz - 1) buffer[pos++] = *p; - 224 | total_chars++; - 225 | } - 226 | p++; - 227 | } - | - 228 | if (buffsz > 0) buffer[pos < buffsz ? pos : buffsz - 1] = '\0'; - | - 229 | return total_chars; - 230 | } - | - 231 | int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, ...) { - 232 | if (!buffer || buffsz == 0 || !format) return -1; - | - 233 | va_list args; - 234 | va_start(args, format); - 235 | int result = vsnprintf_impl(buffer, buffsz, format, args); - 236 | va_end(args); - | - 237 | return result; - 238 | } - | - 239 | int vsnprintf(char *restrict buffer, size_t buffsz, const char *restrict format, va_list vlist) { - 240 | return vsnprintf_impl(buffer, buffsz, format, vlist); - 241 | } - | - 242 | int fclose(FILE *stream) { - 243 | return 0; - 244 | } - | - 245 | FILE* fdopen(int fd, const char *mode) { - 246 | return 0; - 247 | } - | - 248 | int fputc(int c, FILE *stream) { - 249 | return c; - 250 | } - | - 251 | int fputs(const char *restrict str, FILE *restrict stream) { - 252 | return 0; - 253 | } - | - 254 | size_t fwrite(const void *restrict buffer, size_t size, size_t nmemb, FILE *restrict stream) { - 255 | return size * nmemb; - 256 | } - | - 257 | int fprintf(FILE *restrict stream, const char *restrict format, ...) { - 258 | return 0; - 259 | } - - - --------------------------------------------------------------------------------- -/crates/language/wasm/src/stdlib.c: --------------------------------------------------------------------------------- - 1 | // This file implements a very simple allocator for external scanners running - 2 | // in Wasm. Allocation is just bumping a static pointer and growing the heap - 3 | // as needed, and freeing is just adding the freed region to a free list. - 4 | // When additional memory is allocated, the free list is searched first. - 5 | // If there is not a suitable region in the free list, the heap is - 6 | // grown as necessary, and the allocation is made at the end of the heap. - 7 | // When the heap is reset, all allocated memory is considered freed. - | - 8 | #include - 9 | #include - 10 | #include - | - 11 | extern void tree_sitter_debug_message(const char *, size_t); - | - 12 | #define PAGESIZE 0x10000 - 13 | #define MAX_HEAP_SIZE (4 * 1024 * 1024) - | - 14 | typedef struct { - 15 | size_t size; - 16 | struct Region *next; - 17 | char data[0]; - 18 | } Region; - | - 19 | static Region *heap_end = NULL; - 20 | static Region *heap_start = NULL; - 21 | static Region *next = NULL; - 22 | static Region *free_list = NULL; - | - 23 | // Get the region metadata for the given heap pointer. - 24 | static inline Region *region_for_ptr(void *ptr) { - 25 | return ((Region *)ptr) - 1; - 26 | } - | - 27 | // Get the location of the next region after the given region, - 28 | // if the given region had the given size. - 29 | static inline Region *region_after(Region *self, size_t len) { - 30 | char *address = self->data + len; - 31 | char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3); - 32 | return (Region *)aligned; - 33 | } - | - 34 | static void *get_heap_end() { - 35 | return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE); - 36 | } - | - 37 | static int grow_heap(size_t size) { - 38 | size_t new_page_count = ((size - 1) / PAGESIZE) + 1; - 39 | return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX; - 40 | } - | - 41 | // Clear out the heap, and move it to the given address. - 42 | void reset_heap(void *new_heap_start) { - 43 | heap_start = new_heap_start; - 44 | next = new_heap_start; - 45 | heap_end = get_heap_end(); - 46 | free_list = NULL; - 47 | } - | - 48 | void *malloc(size_t size) { - 49 | if (size == 0) return NULL; - | - 50 | Region *prev = NULL; - 51 | Region *curr = free_list; - 52 | while (curr != NULL) { - 53 | if (curr->size >= size) { - 54 | if (prev == NULL) { - 55 | free_list = curr->next; - 56 | } else { - 57 | prev->next = curr->next; - 58 | } - 59 | return &curr->data; - 60 | } - 61 | prev = curr; - 62 | curr = curr->next; - 63 | } - | - 64 | Region *region_end = region_after(next, size); - | - 65 | if (region_end > heap_end) { - 66 | if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) { - 67 | return NULL; - 68 | } - 69 | if (!grow_heap(size)) return NULL; - 70 | heap_end = get_heap_end(); - 71 | } - | - 72 | void *result = &next->data; - 73 | next->size = size; - 74 | next = region_end; - | - 75 | return result; - 76 | } - | - 77 | void free(void *ptr) { - 78 | if (ptr == NULL) return; - | - 79 | Region *region = region_for_ptr(ptr); - 80 | Region *region_end = region_after(region, region->size); - | - 81 | // When freeing the last allocated pointer, re-use that - 82 | // pointer for the next allocation. - 83 | if (region_end == next) { - 84 | next = region; - 85 | } else { - 86 | region->next = free_list; - 87 | free_list = region; - 88 | } - 89 | } - | - 90 | void *calloc(size_t count, size_t size) { - 91 | void *result = malloc(count * size); - 92 | memset(result, 0, count * size); - 93 | return result; - 94 | } - | - 95 | void *realloc(void *ptr, size_t new_size) { - 96 | if (ptr == NULL) { - 97 | return malloc(new_size); - 98 | } - | - 99 | Region *region = region_for_ptr(ptr); - 100 | Region *region_end = region_after(region, region->size); - | - 101 | // When reallocating the last allocated region, return - 102 | // the same pointer, and skip copying the data. - 103 | if (region_end == next) { - 104 | next = region; - 105 | return malloc(new_size); - 106 | } - | - 107 | void *result = malloc(new_size); - 108 | memcpy(result, ®ion->data, region->size); - 109 | return result; - 110 | } - | - 111 | __attribute__((noreturn)) void abort(void) { - 112 | __builtin_trap(); - 113 | } - - - --------------------------------------------------------------------------------- -/crates/language/wasm/src/string.c: --------------------------------------------------------------------------------- - 1 | #include - | - 2 | int memcmp(const void *lhs, const void *rhs, size_t count) { - 3 | const unsigned char *l = lhs; - 4 | const unsigned char *r = rhs; - 5 | while (count--) { - 6 | if (*l != *r) { - 7 | return *l - *r; - 8 | } - 9 | l++; - 10 | r++; - 11 | } - 12 | return 0; - 13 | } - | - 14 | void *memcpy(void *restrict dst, const void *restrict src, size_t size) { - 15 | unsigned char *d = dst; - 16 | const unsigned char *s = src; - 17 | while (size--) { - 18 | *d++ = *s++; - 19 | } - 20 | return dst; - 21 | } - | - 22 | void *memmove(void *dst, const void *src, size_t count) { - 23 | unsigned char *d = dst; - 24 | const unsigned char *s = src; - 25 | if (d < s) { - 26 | while (count--) { - 27 | *d++ = *s++; - 28 | } - 29 | } else if (d > s) { - 30 | d += count; - 31 | s += count; - 32 | while (count--) { - 33 | *(--d) = *(--s); - 34 | } - 35 | } - 36 | return dst; - 37 | } - | - 38 | void *memset(void *dst, int value, size_t count) { - 39 | unsigned char *p = dst; - 40 | while (count--) { - 41 | *p++ = (unsigned char)value; - 42 | } - 43 | return dst; - 44 | } - | - 45 | int strncmp(const char *left, const char *right, size_t n) { - 46 | while (n-- > 0) { - 47 | if (*left != *right) { - 48 | return *(unsigned char *)left - *(unsigned char *)right; - 49 | } - 50 | if (*left == '\0') break; - 51 | left++; - 52 | right++; - 53 | } - 54 | return 0; - 55 | } - - - --------------------------------------------------------------------------------- -/crates/loader/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-loader" - 3 | version.workspace = true - 4 | description = "Locates, builds, and loads tree-sitter grammars at runtime" - 5 | authors.workspace = true - 6 | edition.workspace = true - 7 | rust-version.workspace = true - 8 | readme = "README.md" - 9 | homepage.workspace = true - 10 | repository.workspace = true - 11 | documentation = "https://docs.rs/tree-sitter-loader" - 12 | license.workspace = true - 13 | keywords.workspace = true - 14 | categories.workspace = true - | - 15 | [package.metadata.docs.rs] - 16 | all-features = true - 17 | rustdoc-args = ["--cfg", "docsrs"] - | - 18 | [lib] - 19 | path = "src/loader.rs" - | - 20 | [lints] - 21 | workspace = true - | - 22 | [features] - 23 | wasm = ["tree-sitter/wasm"] - 24 | default = ["tree-sitter-highlight", "tree-sitter-tags"] - | - 25 | [dependencies] - 26 | anyhow.workspace = true - 27 | cc.workspace = true - 28 | etcetera.workspace = true - 29 | fs4.workspace = true - 30 | indoc.workspace = true - 31 | libloading.workspace = true - 32 | log.workspace = true - 33 | once_cell.workspace = true - 34 | regex.workspace = true - 35 | semver.workspace = true - 36 | serde.workspace = true - 37 | serde_json.workspace = true - 38 | tempfile.workspace = true - | - 39 | tree-sitter = { workspace = true } - 40 | tree-sitter-highlight = { workspace = true, optional = true } - 41 | tree-sitter-tags = { workspace = true, optional = true } - - - --------------------------------------------------------------------------------- -/crates/loader/emscripten-version: --------------------------------------------------------------------------------- - 1 | 4.0.15 - - - --------------------------------------------------------------------------------- -/crates/loader/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter Loader - | - 2 | The `tree-sitter` command-line program will dynamically find and build grammars - 3 | at runtime, if you have cloned the grammars' repositories to your local - 4 | filesystem. This helper crate implements that logic, so that you can use it in - 5 | your own program analysis tools, as well. - - - --------------------------------------------------------------------------------- -/crates/loader/src/loader.rs: --------------------------------------------------------------------------------- - 1 | #![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))] - 2 | #![cfg_attr(docsrs, feature(doc_cfg))] - | - 3 | #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))] - 4 | use std::ops::Range; - 5 | #[cfg(feature = "tree-sitter-highlight")] - 6 | use std::sync::Mutex; - 7 | use std::{ - 8 | collections::HashMap, - 9 | env, fs, - 10 | io::{BufRead, BufReader}, - 11 | marker::PhantomData, - 12 | mem, - 13 | path::{Path, PathBuf}, - 14 | process::Command, - 15 | sync::LazyLock, - 16 | time::SystemTime, - 17 | }; - | - 18 | use anyhow::Error; - 19 | use anyhow::{anyhow, Context, Result}; - 20 | use etcetera::BaseStrategy as _; - 21 | use fs4::fs_std::FileExt; - 22 | use libloading::{Library, Symbol}; - 23 | use log::{error, info, warn}; - 24 | use once_cell::unsync::OnceCell; - 25 | use regex::{Regex, RegexBuilder}; - 26 | use semver::Version; - 27 | use serde::{Deserialize, Deserializer, Serialize}; - 28 | use tree_sitter::Language; - 29 | #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))] - 30 | use tree_sitter::QueryError; - 31 | #[cfg(feature = "tree-sitter-highlight")] - 32 | use tree_sitter::QueryErrorKind; - 33 | #[cfg(feature = "tree-sitter-highlight")] - 34 | use tree_sitter_highlight::HighlightConfiguration; - 35 | #[cfg(feature = "tree-sitter-tags")] - 36 | use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; - | - 37 | static GRAMMAR_NAME_REGEX: LazyLock = - 38 | LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap()); - | - 39 | #[derive(Default, Deserialize, Serialize)] - 40 | pub struct Config { - 41 | #[serde(default)] - 42 | #[serde( - 43 | rename = "parser-directories", - 44 | deserialize_with = "deserialize_parser_directories" - 45 | )] - 46 | pub parser_directories: Vec, - 47 | } - | - 48 | #[derive(Serialize, Deserialize, Clone, Default)] - 49 | #[serde(untagged)] - 50 | pub enum PathsJSON { - 51 | #[default] - 52 | Empty, - 53 | Single(PathBuf), - 54 | Multiple(Vec), - 55 | } - | - 56 | impl PathsJSON { - 57 | fn into_vec(self) -> Option> { - 58 | match self { - 59 | Self::Empty => None, - 60 | Self::Single(s) => Some(vec![s]), - 61 | Self::Multiple(s) => Some(s), - 62 | } - 63 | } - | - 64 | const fn is_empty(&self) -> bool { - 65 | matches!(self, Self::Empty) - 66 | } - 67 | } - | - 68 | #[derive(Serialize, Deserialize, Clone)] - 69 | #[serde(untagged)] - 70 | pub enum PackageJSONAuthor { - 71 | String(String), - 72 | Object { - 73 | name: String, - 74 | email: Option, - 75 | url: Option, - 76 | }, - 77 | } - | - 78 | #[derive(Serialize, Deserialize, Clone)] - 79 | #[serde(untagged)] - 80 | pub enum PackageJSONRepository { - 81 | String(String), - 82 | Object { url: String }, - 83 | } - | - 84 | #[derive(Serialize, Deserialize)] - 85 | pub struct PackageJSON { - 86 | pub name: String, - 87 | pub version: Version, - 88 | pub description: Option, - 89 | pub author: Option, - 90 | pub maintainers: Option>, - 91 | pub license: Option, - 92 | pub repository: Option, - 93 | #[serde(default)] - 94 | #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")] - 95 | pub tree_sitter: Option>, - 96 | } - | - 97 | fn default_path() -> PathBuf { - 98 | PathBuf::from(".") - 99 | } - | - 100 | #[derive(Serialize, Deserialize, Clone)] - 101 | #[serde(rename_all = "kebab-case")] - 102 | pub struct LanguageConfigurationJSON { - 103 | #[serde(default = "default_path")] - 104 | pub path: PathBuf, - 105 | pub scope: Option, - 106 | pub file_types: Option>, - 107 | pub content_regex: Option, - 108 | pub first_line_regex: Option, - 109 | pub injection_regex: Option, - 110 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 111 | pub highlights: PathsJSON, - 112 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 113 | pub injections: PathsJSON, - 114 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 115 | pub locals: PathsJSON, - 116 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 117 | pub tags: PathsJSON, - 118 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 119 | pub external_files: PathsJSON, - 120 | } - | - 121 | #[derive(Serialize, Deserialize)] - 122 | #[serde(rename_all = "kebab-case")] - 123 | pub struct TreeSitterJSON { - 124 | #[serde(rename = "$schema")] - 125 | pub schema: Option, - 126 | pub grammars: Vec, - 127 | pub metadata: Metadata, - 128 | #[serde(default)] - 129 | pub bindings: Bindings, - 130 | } - | - 131 | impl TreeSitterJSON { - 132 | pub fn from_file(path: &Path) -> Result { - 133 | Ok(serde_json::from_str(&fs::read_to_string( - 134 | path.join("tree-sitter.json"), - 135 | )?)?) - 136 | } - | - 137 | #[must_use] - 138 | pub fn has_multiple_language_configs(&self) -> bool { - 139 | self.grammars.len() > 1 - 140 | } - 141 | } - | - 142 | #[derive(Serialize, Deserialize)] - 143 | #[serde(rename_all = "kebab-case")] - 144 | pub struct Grammar { - 145 | pub name: String, - 146 | #[serde(skip_serializing_if = "Option::is_none")] - 147 | pub camelcase: Option, - 148 | #[serde(skip_serializing_if = "Option::is_none")] - 149 | pub title: Option, - 150 | pub scope: String, - 151 | #[serde(skip_serializing_if = "Option::is_none")] - 152 | pub path: Option, - 153 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 154 | pub external_files: PathsJSON, - 155 | pub file_types: Option>, - 156 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 157 | pub highlights: PathsJSON, - 158 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 159 | pub injections: PathsJSON, - 160 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 161 | pub locals: PathsJSON, - 162 | #[serde(default, skip_serializing_if = "PathsJSON::is_empty")] - 163 | pub tags: PathsJSON, - 164 | #[serde(skip_serializing_if = "Option::is_none")] - 165 | pub injection_regex: Option, - 166 | #[serde(skip_serializing_if = "Option::is_none")] - 167 | pub first_line_regex: Option, - 168 | #[serde(skip_serializing_if = "Option::is_none")] - 169 | pub content_regex: Option, - 170 | #[serde(skip_serializing_if = "Option::is_none")] - 171 | pub class_name: Option, - 172 | } - | - 173 | #[derive(Serialize, Deserialize)] - 174 | pub struct Metadata { - 175 | pub version: Version, - 176 | #[serde(skip_serializing_if = "Option::is_none")] - 177 | pub license: Option, - 178 | #[serde(skip_serializing_if = "Option::is_none")] - 179 | pub description: Option, - 180 | #[serde(skip_serializing_if = "Option::is_none")] - 181 | pub authors: Option>, - 182 | #[serde(skip_serializing_if = "Option::is_none")] - 183 | pub links: Option, - 184 | #[serde(skip)] - 185 | pub namespace: Option, - 186 | } - | - 187 | #[derive(Serialize, Deserialize)] - 188 | pub struct Author { - 189 | pub name: String, - 190 | #[serde(skip_serializing_if = "Option::is_none")] - 191 | pub email: Option, - 192 | #[serde(skip_serializing_if = "Option::is_none")] - 193 | pub url: Option, - 194 | } - | - 195 | #[derive(Serialize, Deserialize)] - 196 | pub struct Links { - 197 | pub repository: String, - 198 | #[serde(skip_serializing_if = "Option::is_none")] - 199 | pub funding: Option, - 200 | } - | - 201 | #[derive(Serialize, Deserialize, Clone)] - 202 | #[serde(default)] - 203 | pub struct Bindings { - 204 | pub c: bool, - 205 | pub go: bool, - 206 | #[serde(skip)] - 207 | pub java: bool, - 208 | #[serde(skip)] - 209 | pub kotlin: bool, - 210 | pub node: bool, - 211 | pub python: bool, - 212 | pub rust: bool, - 213 | pub swift: bool, - 214 | pub zig: bool, - 215 | } - | - 216 | impl Bindings { - 217 | /// return available languages and its default enabled state. - 218 | #[must_use] - 219 | pub const fn languages(&self) -> [(&'static str, bool); 7] { - 220 | [ - 221 | ("c", true), - 222 | ("go", true), - 223 | // Comment out Java and Kotlin until the bindings are actually available. - 224 | // ("java", false), - 225 | // ("kotlin", false), - 226 | ("node", true), - 227 | ("python", true), - 228 | ("rust", true), - 229 | ("swift", true), - 230 | ("zig", false), - 231 | ] - 232 | } - | - 233 | /// construct Bindings from a language list. If a language isn't supported, its name will be put on the error part. - 234 | pub fn with_enabled_languages<'a, I>(languages: I) -> Result - 235 | where - 236 | I: Iterator, - 237 | { - 238 | let mut out = Self { - 239 | c: false, - 240 | go: false, - 241 | java: false, - 242 | kotlin: false, - 243 | node: false, - 244 | python: false, - 245 | rust: false, - 246 | swift: false, - 247 | zig: false, - 248 | }; - | - 249 | for v in languages { - 250 | match v { - 251 | "c" => out.c = true, - 252 | "go" => out.go = true, - 253 | // Comment out Java and Kotlin until the bindings are actually available. - 254 | // "java" => out.java = true, - 255 | // "kotlin" => out.kotlin = true, - 256 | "node" => out.node = true, - 257 | "python" => out.python = true, - 258 | "rust" => out.rust = true, - 259 | "swift" => out.swift = true, - 260 | "zig" => out.zig = true, - 261 | unsupported => return Err(unsupported), - 262 | } - 263 | } - | - 264 | Ok(out) - 265 | } - 266 | } - | - 267 | impl Default for Bindings { - 268 | fn default() -> Self { - 269 | Self { - 270 | c: true, - 271 | go: true, - 272 | java: false, - 273 | kotlin: false, - 274 | node: true, - 275 | python: true, - 276 | rust: true, - 277 | swift: true, - 278 | zig: false, - 279 | } - 280 | } - 281 | } - | - 282 | // Replace `~` or `$HOME` with home path string. - 283 | // (While paths like "~/.tree-sitter/config.json" can be deserialized, - 284 | // they're not valid path for I/O modules.) - 285 | fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result, D::Error> - 286 | where - 287 | D: Deserializer<'de>, - 288 | { - 289 | let paths = Vec::::deserialize(deserializer)?; - 290 | let Ok(home) = etcetera::home_dir() else { - 291 | return Ok(paths); - 292 | }; - 293 | let standardized = paths - 294 | .into_iter() - 295 | .map(|path| standardize_path(path, &home)) - 296 | .collect(); - 297 | Ok(standardized) - 298 | } - | - 299 | fn standardize_path(path: PathBuf, home: &Path) -> PathBuf { - 300 | if let Ok(p) = path.strip_prefix("~") { - 301 | return home.join(p); - 302 | } - 303 | if let Ok(p) = path.strip_prefix("$HOME") { - 304 | return home.join(p); - 305 | } - 306 | path - 307 | } - | - 308 | impl Config { - 309 | #[must_use] - 310 | pub fn initial() -> Self { - 311 | let home_dir = etcetera::home_dir().expect("Cannot determine home directory"); - 312 | Self { - 313 | parser_directories: vec![ - 314 | home_dir.join("github"), - 315 | home_dir.join("src"), - 316 | home_dir.join("source"), - 317 | home_dir.join("projects"), - 318 | home_dir.join("dev"), - 319 | home_dir.join("git"), - 320 | ], - 321 | } - 322 | } - 323 | } - | - 324 | const BUILD_TARGET: &str = env!("BUILD_TARGET"); - 325 | const BUILD_HOST: &str = env!("BUILD_HOST"); - | - 326 | pub struct LanguageConfiguration<'a> { - 327 | pub scope: Option, - 328 | pub content_regex: Option, - 329 | pub first_line_regex: Option, - 330 | pub injection_regex: Option, - 331 | pub file_types: Vec, - 332 | pub root_path: PathBuf, - 333 | pub highlights_filenames: Option>, - 334 | pub injections_filenames: Option>, - 335 | pub locals_filenames: Option>, - 336 | pub tags_filenames: Option>, - 337 | pub language_name: String, - 338 | language_id: usize, - 339 | #[cfg(feature = "tree-sitter-highlight")] - 340 | highlight_config: OnceCell>, - 341 | #[cfg(feature = "tree-sitter-tags")] - 342 | tags_config: OnceCell>, - 343 | #[cfg(feature = "tree-sitter-highlight")] - 344 | highlight_names: &'a Mutex>, - 345 | #[cfg(feature = "tree-sitter-highlight")] - 346 | use_all_highlight_names: bool, - 347 | _phantom: PhantomData<&'a ()>, - 348 | } - | - 349 | pub struct Loader { - 350 | pub parser_lib_path: PathBuf, - 351 | languages_by_id: Vec<(PathBuf, OnceCell, Option>)>, - 352 | language_configurations: Vec>, - 353 | language_configuration_ids_by_file_type: HashMap>, - 354 | language_configuration_in_current_path: Option, - 355 | language_configuration_ids_by_first_line_regex: HashMap>, - 356 | #[cfg(feature = "tree-sitter-highlight")] - 357 | highlight_names: Box>>, - 358 | #[cfg(feature = "tree-sitter-highlight")] - 359 | use_all_highlight_names: bool, - 360 | debug_build: bool, - 361 | sanitize_build: bool, - 362 | force_rebuild: bool, - | - 363 | #[cfg(feature = "wasm")] - 364 | wasm_store: Mutex>, - 365 | } - | - 366 | pub struct CompileConfig<'a> { - 367 | pub src_path: &'a Path, - 368 | pub header_paths: Vec<&'a Path>, - 369 | pub parser_path: PathBuf, - 370 | pub scanner_path: Option, - 371 | pub external_files: Option<&'a [PathBuf]>, - 372 | pub output_path: Option, - 373 | pub flags: &'a [&'a str], - 374 | pub sanitize: bool, - 375 | pub name: String, - 376 | } - | - 377 | impl<'a> CompileConfig<'a> { - 378 | #[must_use] - 379 | pub fn new( - 380 | src_path: &'a Path, - 381 | externals: Option<&'a [PathBuf]>, - 382 | output_path: Option, - 383 | ) -> Self { - 384 | Self { - 385 | src_path, - 386 | header_paths: vec![src_path], - 387 | parser_path: src_path.join("parser.c"), - 388 | scanner_path: None, - 389 | external_files: externals, - 390 | output_path, - 391 | flags: &[], - 392 | sanitize: false, - 393 | name: String::new(), - 394 | } - 395 | } - 396 | } - | - 397 | unsafe impl Sync for Loader {} - | - 398 | impl Loader { - 399 | pub fn new() -> Result { - 400 | let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") { - 401 | PathBuf::from(path) - 402 | } else { - 403 | if cfg!(target_os = "macos") { - 404 | let legacy_apple_path = etcetera::base_strategy::Apple::new()? - 405 | .cache_dir() // `$HOME/Library/Caches/` - 406 | .join("tree-sitter"); - 407 | if legacy_apple_path.exists() && legacy_apple_path.is_dir() { - 408 | std::fs::remove_dir_all(legacy_apple_path)?; - 409 | } - 410 | } - | - 411 | etcetera::choose_base_strategy()? - 412 | .cache_dir() - 413 | .join("tree-sitter") - 414 | .join("lib") - 415 | }; - 416 | Ok(Self::with_parser_lib_path(parser_lib_path)) - 417 | } - | - 418 | #[must_use] - 419 | pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self { - 420 | Self { - 421 | parser_lib_path, - 422 | languages_by_id: Vec::new(), - 423 | language_configurations: Vec::new(), - 424 | language_configuration_ids_by_file_type: HashMap::new(), - 425 | language_configuration_in_current_path: None, - 426 | language_configuration_ids_by_first_line_regex: HashMap::new(), - 427 | #[cfg(feature = "tree-sitter-highlight")] - 428 | highlight_names: Box::new(Mutex::new(Vec::new())), - 429 | #[cfg(feature = "tree-sitter-highlight")] - 430 | use_all_highlight_names: true, - 431 | debug_build: false, - 432 | sanitize_build: false, - 433 | force_rebuild: false, - | - 434 | #[cfg(feature = "wasm")] - 435 | wasm_store: Mutex::default(), - 436 | } - 437 | } - | - 438 | #[cfg(feature = "tree-sitter-highlight")] - 439 | #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))] - 440 | pub fn configure_highlights(&mut self, names: &[String]) { - 441 | self.use_all_highlight_names = false; - 442 | let mut highlights = self.highlight_names.lock().unwrap(); - 443 | highlights.clear(); - 444 | highlights.extend(names.iter().cloned()); - 445 | } - | - 446 | #[must_use] - 447 | #[cfg(feature = "tree-sitter-highlight")] - 448 | #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))] - 449 | pub fn highlight_names(&self) -> Vec { - 450 | self.highlight_names.lock().unwrap().clone() - 451 | } - | - 452 | pub fn find_all_languages(&mut self, config: &Config) -> Result<()> { - 453 | if config.parser_directories.is_empty() { - 454 | warn!(concat!( - 455 | "You have not configured any parser directories!\n", - 456 | "Please run `tree-sitter init-config` and edit the resulting\n", - 457 | "configuration file to indicate where we should look for\n", - 458 | "language grammars.\n" - 459 | )); - 460 | } - 461 | for parser_container_dir in &config.parser_directories { - 462 | if let Ok(entries) = fs::read_dir(parser_container_dir) { - 463 | for entry in entries { - 464 | let entry = entry?; - 465 | if let Some(parser_dir_name) = entry.file_name().to_str() { - 466 | if parser_dir_name.starts_with("tree-sitter-") { - 467 | self.find_language_configurations_at_path( - 468 | &parser_container_dir.join(parser_dir_name), - 469 | false, - 470 | ) - 471 | .ok(); - 472 | } - 473 | } - 474 | } - 475 | } - 476 | } - 477 | Ok(()) - 478 | } - | - 479 | pub fn languages_at_path(&mut self, path: &Path) -> Result> { - 480 | if let Ok(configurations) = self.find_language_configurations_at_path(path, true) { - 481 | let mut language_ids = configurations - 482 | .iter() - 483 | .map(|c| (c.language_id, c.language_name.clone())) - 484 | .collect::>(); - 485 | language_ids.sort_unstable(); - 486 | language_ids.dedup(); - 487 | language_ids - 488 | .into_iter() - 489 | .map(|(id, name)| Ok((self.language_for_id(id)?, name))) - 490 | .collect::>>() - 491 | } else { - 492 | Ok(Vec::new()) - 493 | } - 494 | } - | - 495 | #[must_use] - 496 | pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> { - 497 | self.language_configurations - 498 | .iter() - 499 | .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref())) - 500 | .collect() - 501 | } - | - 502 | pub fn language_configuration_for_scope( - 503 | &self, - 504 | scope: &str, - 505 | ) -> Result> { - 506 | for configuration in &self.language_configurations { - 507 | if configuration.scope.as_ref().is_some_and(|s| s == scope) { - 508 | let language = self.language_for_id(configuration.language_id)?; - 509 | return Ok(Some((language, configuration))); - 510 | } - 511 | } - 512 | Ok(None) - 513 | } - | - 514 | pub fn language_configuration_for_first_line_regex( - 515 | &self, - 516 | path: &Path, - 517 | ) -> Result> { - 518 | self.language_configuration_ids_by_first_line_regex - 519 | .iter() - 520 | .try_fold(None, |_, (regex, ids)| { - 521 | if let Some(regex) = Self::regex(Some(regex)) { - 522 | let file = fs::File::open(path)?; - 523 | let reader = BufReader::new(file); - 524 | let first_line = reader.lines().next().transpose()?; - 525 | if let Some(first_line) = first_line { - 526 | if regex.is_match(&first_line) && !ids.is_empty() { - 527 | let configuration = &self.language_configurations[ids[0]]; - 528 | let language = self.language_for_id(configuration.language_id)?; - 529 | return Ok(Some((language, configuration))); - 530 | } - 531 | } - 532 | } - | - 533 | Ok(None) - 534 | }) - 535 | } - | - 536 | pub fn language_configuration_for_file_name( - 537 | &self, - 538 | path: &Path, - 539 | ) -> Result> { - 540 | // Find all the language configurations that match this file name - 541 | // or a suffix of the file name. - 542 | let configuration_ids = path - 543 | .file_name() - 544 | .and_then(|n| n.to_str()) - 545 | .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name)) - 546 | .or_else(|| { - 547 | let mut path = path.to_owned(); - 548 | let mut extensions = Vec::with_capacity(2); - 549 | while let Some(extension) = path.extension() { - 550 | extensions.push(extension.to_str()?.to_string()); - 551 | path = PathBuf::from(path.file_stem()?.to_os_string()); - 552 | } - 553 | extensions.reverse(); - 554 | self.language_configuration_ids_by_file_type - 555 | .get(&extensions.join(".")) - 556 | }); - | - 557 | if let Some(configuration_ids) = configuration_ids { - 558 | if !configuration_ids.is_empty() { - 559 | let configuration = if configuration_ids.len() == 1 { - 560 | &self.language_configurations[configuration_ids[0]] - 561 | } - 562 | // If multiple language configurations match, then determine which - 563 | // one to use by applying the configurations' content regexes. - 564 | else { - 565 | let file_contents = fs::read(path) - 566 | .with_context(|| format!("Failed to read path {}", path.display()))?; - 567 | let file_contents = String::from_utf8_lossy(&file_contents); - 568 | let mut best_score = -2isize; - 569 | let mut best_configuration_id = None; - 570 | for configuration_id in configuration_ids { - 571 | let config = &self.language_configurations[*configuration_id]; - | - 572 | // If the language configuration has a content regex, assign - 573 | // a score based on the length of the first match. - 574 | let score; - 575 | if let Some(content_regex) = &config.content_regex { - 576 | if let Some(mat) = content_regex.find(&file_contents) { - 577 | score = (mat.end() - mat.start()) as isize; - 578 | } - 579 | // If the content regex does not match, then *penalize* this - 580 | // language configuration, so that language configurations - 581 | // without content regexes are preferred over those with - 582 | // non-matching content regexes. - 583 | else { - 584 | score = -1; - 585 | } - 586 | } else { - 587 | score = 0; - 588 | } - 589 | if score > best_score { - 590 | best_configuration_id = Some(*configuration_id); - 591 | best_score = score; - 592 | } - 593 | } - | - 594 | &self.language_configurations[best_configuration_id.unwrap()] - 595 | }; - | - 596 | let language = self.language_for_id(configuration.language_id)?; - 597 | return Ok(Some((language, configuration))); - 598 | } - 599 | } - | - 600 | Ok(None) - 601 | } - | - 602 | pub fn language_configuration_for_injection_string( - 603 | &self, - 604 | string: &str, - 605 | ) -> Result> { - 606 | let mut best_match_length = 0; - 607 | let mut best_match_position = None; - 608 | for (i, configuration) in self.language_configurations.iter().enumerate() { - 609 | if let Some(injection_regex) = &configuration.injection_regex { - 610 | if let Some(mat) = injection_regex.find(string) { - 611 | let length = mat.end() - mat.start(); - 612 | if length > best_match_length { - 613 | best_match_position = Some(i); - 614 | best_match_length = length; - 615 | } - 616 | } - 617 | } - 618 | } - | - 619 | if let Some(i) = best_match_position { - 620 | let configuration = &self.language_configurations[i]; - 621 | let language = self.language_for_id(configuration.language_id)?; - 622 | Ok(Some((language, configuration))) - 623 | } else { - 624 | Ok(None) - 625 | } - 626 | } - | - 627 | pub fn language_for_configuration( - 628 | &self, - 629 | configuration: &LanguageConfiguration, - 630 | ) -> Result { - 631 | self.language_for_id(configuration.language_id) - 632 | } - | - 633 | fn language_for_id(&self, id: usize) -> Result { - 634 | let (path, language, externals) = &self.languages_by_id[id]; - 635 | language - 636 | .get_or_try_init(|| { - 637 | let src_path = path.join("src"); - 638 | self.load_language_at_path(CompileConfig::new( - 639 | &src_path, - 640 | externals.as_deref(), - 641 | None, - 642 | )) - 643 | }) - 644 | .cloned() - 645 | } - | - 646 | pub fn compile_parser_at_path( - 647 | &self, - 648 | grammar_path: &Path, - 649 | output_path: PathBuf, - 650 | flags: &[&str], - 651 | ) -> Result<()> { - 652 | let src_path = grammar_path.join("src"); - 653 | let mut config = CompileConfig::new(&src_path, None, Some(output_path)); - 654 | config.flags = flags; - 655 | self.load_language_at_path(config).map(|_| ()) - 656 | } - | - 657 | pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result { - 658 | let grammar_path = config.src_path.join("grammar.json"); - 659 | config.name = Self::grammar_json_name(&grammar_path)?; - 660 | self.load_language_at_path_with_name(config) - 661 | } - | - 662 | pub fn load_language_at_path_with_name(&self, mut config: CompileConfig) -> Result { - 663 | let mut lib_name = config.name.clone(); - 664 | let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_")); - 665 | if self.debug_build { - 666 | lib_name.push_str(".debug._"); - 667 | } - | - 668 | if self.sanitize_build { - 669 | lib_name.push_str(".sanitize._"); - 670 | config.sanitize = true; - 671 | } - | - 672 | if config.output_path.is_none() { - 673 | fs::create_dir_all(&self.parser_lib_path)?; - 674 | } - | - 675 | let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile - | - 676 | let output_path = config.output_path.unwrap_or_else(|| { - 677 | let mut path = self.parser_lib_path.join(lib_name); - 678 | path.set_extension(env::consts::DLL_EXTENSION); - 679 | #[cfg(feature = "wasm")] - 680 | if self.wasm_store.lock().unwrap().is_some() { - 681 | path.set_extension("wasm"); - 682 | } - 683 | path - 684 | }); - 685 | config.output_path = Some(output_path.clone()); - | - 686 | let parser_path = config.src_path.join("parser.c"); - 687 | config.scanner_path = self.get_scanner_path(config.src_path); - | - 688 | let mut paths_to_check = vec![parser_path]; - | - 689 | if let Some(scanner_path) = config.scanner_path.as_ref() { - 690 | paths_to_check.push(scanner_path.clone()); - 691 | } - | - 692 | paths_to_check.extend( - 693 | config - 694 | .external_files - 695 | .unwrap_or_default() - 696 | .iter() - 697 | .map(|p| config.src_path.join(p)), - 698 | ); - | - 699 | if !recompile { - 700 | recompile = needs_recompile(&output_path, &paths_to_check) - 701 | .with_context(|| "Failed to compare source and binary timestamps")?; - 702 | } - | - 703 | #[cfg(feature = "wasm")] - 704 | if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() { - 705 | if recompile { - 706 | self.compile_parser_to_wasm( - 707 | &config.name, - 708 | config.src_path, - 709 | config - 710 | .scanner_path - 711 | .as_ref() - 712 | .and_then(|p| p.strip_prefix(config.src_path).ok()), - 713 | &output_path, - 714 | )?; - 715 | } - | - 716 | let wasm_bytes = fs::read(&output_path)?; - 717 | return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?); - 718 | } - | - 719 | let lock_path = if env::var("CROSS_RUNNER").is_ok() { - 720 | tempfile::tempdir() - 721 | .unwrap() - 722 | .path() - 723 | .join("tree-sitter") - 724 | .join("lock") - 725 | .join(format!("{}.lock", config.name)) - 726 | } else { - 727 | etcetera::choose_base_strategy()? - 728 | .cache_dir() - 729 | .join("tree-sitter") - 730 | .join("lock") - 731 | .join(format!("{}.lock", config.name)) - 732 | }; - | - 733 | if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) { - 734 | recompile = false; - 735 | if lock_file.try_lock_exclusive().is_err() { - 736 | // if we can't acquire the lock, another process is compiling the parser, wait for - 737 | // it and don't recompile - 738 | lock_file.lock_exclusive()?; - 739 | recompile = false; - 740 | } else { - 741 | // if we can acquire the lock, check if the lock file is older than 30 seconds, a - 742 | // run that was interrupted and left the lock file behind should not block - 743 | // subsequent runs - 744 | let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs(); - 745 | if time > 30 { - 746 | fs::remove_file(&lock_path)?; - 747 | recompile = true; - 748 | } - 749 | } - 750 | } - | - 751 | if recompile { - 752 | fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| { - 753 | format!( - 754 | "Failed to create directory {}", - 755 | lock_path.parent().unwrap().display() - 756 | ) - 757 | })?; - 758 | let lock_file = fs::OpenOptions::new() - 759 | .create(true) - 760 | .truncate(true) - 761 | .write(true) - 762 | .open(&lock_path)?; - 763 | lock_file.lock_exclusive()?; - | - 764 | self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?; - | - 765 | if config.scanner_path.is_some() { - 766 | self.check_external_scanner(&config.name, &output_path)?; - 767 | } - 768 | } - | - 769 | Self::load_language(&output_path, &language_fn_name) - 770 | } - | - 771 | pub fn load_language(path: &Path, function_name: &str) -> Result { - 772 | let library = unsafe { Library::new(path) } - 773 | .with_context(|| format!("Error opening dynamic library {}", path.display()))?; - 774 | let language = unsafe { - 775 | let language_fn = library - 776 | .get:: Language>>(function_name.as_bytes()) - 777 | .with_context(|| { - 778 | format!( - 779 | "Failed to load symbol {function_name} from {}", - 780 | path.display() - 781 | ) - 782 | })?; - 783 | language_fn() - 784 | }; - 785 | mem::forget(library); - 786 | Ok(language) - 787 | } - | - 788 | fn compile_parser_to_dylib( - 789 | &self, - 790 | config: &CompileConfig, - 791 | lock_file: &fs::File, - 792 | lock_path: &Path, - 793 | ) -> Result<(), Error> { - 794 | let mut cc_config = cc::Build::new(); - 795 | cc_config - 796 | .cargo_metadata(false) - 797 | .cargo_warnings(false) - 798 | .target(BUILD_TARGET) - 799 | .host(BUILD_HOST) - 800 | .debug(self.debug_build) - 801 | .file(&config.parser_path) - 802 | .includes(&config.header_paths) - 803 | .std("c11"); - | - 804 | if let Some(scanner_path) = config.scanner_path.as_ref() { - 805 | cc_config.file(scanner_path); - 806 | } - | - 807 | if self.debug_build { - 808 | cc_config.opt_level(0).extra_warnings(true); - 809 | } else { - 810 | cc_config.opt_level(2).extra_warnings(false); - 811 | } - | - 812 | for flag in config.flags { - 813 | cc_config.define(flag, None); - 814 | } - | - 815 | let compiler = cc_config.get_compiler(); - 816 | let mut command = Command::new(compiler.path()); - 817 | command.args(compiler.args()); - 818 | for (key, value) in compiler.env() { - 819 | command.env(key, value); - 820 | } - | - 821 | let output_path = config.output_path.as_ref().unwrap(); - | - 822 | let temp_dir = if compiler.is_like_msvc() { - 823 | let out = format!("-out:{}", output_path.to_str().unwrap()); - 824 | command.arg(if self.debug_build { "-LDd" } else { "-LD" }); - 825 | command.arg("-utf-8"); - | - 826 | // Windows creates intermediate files when compiling (.exp, .lib, .obj), which causes - 827 | // issues when multiple processes are compiling in the same directory. This creates a - 828 | // temporary directory for those files to go into, which is deleted after compilation. - 829 | let temp_dir = output_path.parent().unwrap().join(format!( - 830 | "tmp_{}_{:?}", - 831 | std::process::id(), - 832 | std::thread::current().id() - 833 | )); - 834 | std::fs::create_dir_all(&temp_dir).unwrap(); - | - 835 | command.arg(format!("/Fo{}\\", temp_dir.display())); - 836 | command.args(cc_config.get_files()); - 837 | command.arg("-link").arg(out); - 838 | command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display())); - | - 839 | Some(temp_dir) - 840 | } else { - 841 | command.arg("-Werror=implicit-function-declaration"); - 842 | if cfg!(any(target_os = "macos", target_os = "ios")) { - 843 | command.arg("-dynamiclib"); - 844 | // TODO: remove when supported - 845 | command.arg("-UTREE_SITTER_REUSE_ALLOCATOR"); - 846 | } else { - 847 | command.arg("-shared"); - 848 | } - 849 | command.args(cc_config.get_files()); - 850 | command.arg("-o").arg(output_path); - | - 851 | None - 852 | }; - | - 853 | let output = command.output().with_context(|| { - 854 | format!("Failed to execute the C compiler with the following command:\n{command:?}") - 855 | })?; - | - 856 | if let Some(temp_dir) = temp_dir { - 857 | let _ = fs::remove_dir_all(temp_dir); - 858 | } - | - 859 | FileExt::unlock(lock_file)?; - 860 | fs::remove_file(lock_path)?; - | - 861 | if output.status.success() { - 862 | Ok(()) - 863 | } else { - 864 | Err(anyhow!( - 865 | "Parser compilation failed.\nStdout: {}\nStderr: {}", - 866 | String::from_utf8_lossy(&output.stdout), - 867 | String::from_utf8_lossy(&output.stderr) - 868 | )) - 869 | } - 870 | } - | - 871 | #[cfg(unix)] - 872 | fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> { - 873 | let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) { - 874 | "_" - 875 | } else { - 876 | "" - 877 | }; - 878 | let section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) { - 879 | " D " - 880 | } else { - 881 | " T " - 882 | }; - 883 | let mut must_have = vec![ - 884 | format!("{prefix}tree_sitter_{name}_external_scanner_create"), - 885 | format!("{prefix}tree_sitter_{name}_external_scanner_destroy"), - 886 | format!("{prefix}tree_sitter_{name}_external_scanner_serialize"), - 887 | format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"), - 888 | format!("{prefix}tree_sitter_{name}_external_scanner_scan"), - 889 | ]; - | - 890 | let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned()); - 891 | let command = Command::new(nm_cmd) - 892 | .arg("--defined-only") - 893 | .arg(library_path) - 894 | .output(); - 895 | if let Ok(output) = command { - 896 | if output.status.success() { - 897 | let mut found_non_static = false; - 898 | for line in String::from_utf8_lossy(&output.stdout).lines() { - 899 | if line.contains(section) { - 900 | if let Some(function_name) = - 901 | line.split_whitespace().collect::>().get(2) - 902 | { - 903 | if !line.contains("tree_sitter_") { - 904 | if !found_non_static { - 905 | found_non_static = true; - 906 | warn!("Found non-static non-tree-sitter functions in the external scanner"); - 907 | } - 908 | warn!(" `{function_name}`"); - 909 | } else { - 910 | must_have.retain(|f| f != function_name); - 911 | } - 912 | } - 913 | } - 914 | } - 915 | if found_non_static { - 916 | warn!(concat!( - 917 | "Consider making these functions static, they can cause conflicts ", - 918 | "when another tree-sitter project uses the same function name." - 919 | )); - 920 | } - | - 921 | if !must_have.is_empty() { - 922 | let missing = must_have - 923 | .iter() - 924 | .map(|f| format!(" `{f}`")) - 925 | .collect::>() - 926 | .join("\n"); - | - 927 | return Err(anyhow!(format!( - 928 | indoc::indoc! {" - 929 | Missing required functions in the external scanner, parsing won't work without these! - | - 930 | {} - | - 931 | You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners - 932 | "}, - 933 | missing, - 934 | ))); - 935 | } - 936 | } - 937 | } - | - 938 | Ok(()) - 939 | } - | - 940 | #[cfg(windows)] - 941 | fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> { - 942 | // TODO: there's no nm command on windows, whoever wants to implement this can and should :) - | - 943 | // let mut must_have = vec![ - 944 | // format!("tree_sitter_{name}_external_scanner_create"), - 945 | // format!("tree_sitter_{name}_external_scanner_destroy"), - 946 | // format!("tree_sitter_{name}_external_scanner_serialize"), - 947 | // format!("tree_sitter_{name}_external_scanner_deserialize"), - 948 | // format!("tree_sitter_{name}_external_scanner_scan"), - 949 | // ]; - | - 950 | Ok(()) - 951 | } - | - 952 | pub fn compile_parser_to_wasm( - 953 | &self, - 954 | language_name: &str, - 955 | src_path: &Path, - 956 | scanner_filename: Option<&Path>, - 957 | output_path: &Path, - 958 | ) -> Result<(), Error> { - 959 | let clang_executable = self.ensure_wasi_sdk_exists()?; - | - 960 | let output_name = "output.wasm"; - 961 | let mut command = Command::new(&clang_executable); - 962 | command.current_dir(src_path).args([ - 963 | "-o", - 964 | output_name, - 965 | "-fPIC", - 966 | "-shared", - 967 | if self.debug_build { "-g" } else { "-Os" }, - 968 | format!("-Wl,--export=tree_sitter_{language_name}").as_str(), - 969 | "-Wl,--allow-undefined", - 970 | "-Wl,--no-entry", - 971 | "-nostdlib", - 972 | "-fno-exceptions", - 973 | "-fvisibility=hidden", - 974 | "-I", - 975 | ".", - 976 | "parser.c", - 977 | ]); - | - 978 | if let Some(scanner_filename) = scanner_filename { - 979 | command.arg(scanner_filename); - 980 | } - | - 981 | let output = command.output().context("Failed to run wasi-sdk clang")?; - | - 982 | if !output.status.success() { - 983 | return Err(anyhow!( - 984 | "wasi-sdk clang command failed: {}", - 985 | String::from_utf8_lossy(&output.stderr) - 986 | )); - 987 | } - | - 988 | fs::rename(src_path.join(output_name), output_path) - 989 | .context("failed to rename Wasm output file")?; - | - 990 | Ok(()) - 991 | } - | - 992 | /// Extracts a tar.gz archive with `tar`, stripping the first path component. - 993 | fn extract_tar_gz_with_strip( - 994 | &self, - 995 | archive_path: &Path, - 996 | destination: &Path, - 997 | ) -> Result<(), Error> { - 998 | let status = Command::new("tar") - 999 | .arg("-xzf") -1000 | .arg(archive_path) -1001 | .arg("--strip-components=1") -1002 | .arg("-C") -1003 | .arg(destination) -1004 | .status() -1005 | .with_context(|| format!("Failed to execute tar for {}", archive_path.display()))?; - | -1006 | if !status.success() { -1007 | return Err(anyhow!( -1008 | "Failed to extract archive {} to {}", -1009 | archive_path.display(), -1010 | destination.display() -1011 | )); -1012 | } - | -1013 | Ok(()) -1014 | } - | -1015 | /// This ensures that the wasi-sdk is available, downloading and extracting it if necessary, -1016 | /// and returns the path to the `clang` executable. -1017 | /// -1018 | /// If `TREE_SITTER_WASI_SDK_PATH` is set, it will use that path to look for the clang executable. -1019 | fn ensure_wasi_sdk_exists(&self) -> Result { -1020 | let possible_executables = if cfg!(windows) { -1021 | vec![ -1022 | "clang.exe", -1023 | "wasm32-unknown-wasi-clang.exe", -1024 | "wasm32-wasi-clang.exe", -1025 | ] -1026 | } else { -1027 | vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"] -1028 | }; - | -1029 | if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") { -1030 | let wasi_sdk_dir = PathBuf::from(wasi_sdk_path); - | -1031 | for exe in &possible_executables { -1032 | let clang_exe = wasi_sdk_dir.join("bin").join(exe); -1033 | if clang_exe.exists() { -1034 | return Ok(clang_exe); -1035 | } -1036 | } - | -1037 | return Err(anyhow!( -1038 | "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory. \ -1039 | Looked for: {}", -1040 | wasi_sdk_dir.display(), -1041 | possible_executables.join(", ") -1042 | )); -1043 | } - | -1044 | let cache_dir = etcetera::choose_base_strategy()? -1045 | .cache_dir() -1046 | .join("tree-sitter"); -1047 | fs::create_dir_all(&cache_dir)?; - | -1048 | let wasi_sdk_dir = cache_dir.join("wasi-sdk"); - | -1049 | for exe in &possible_executables { -1050 | let clang_exe = wasi_sdk_dir.join("bin").join(exe); -1051 | if clang_exe.exists() { -1052 | return Ok(clang_exe); -1053 | } -1054 | } - | -1055 | fs::create_dir_all(&wasi_sdk_dir)?; - | -1056 | let arch_os = if cfg!(target_os = "macos") { -1057 | if cfg!(target_arch = "aarch64") { -1058 | "arm64-macos" -1059 | } else { -1060 | "x86_64-macos" -1061 | } -1062 | } else if cfg!(target_os = "windows") { -1063 | if cfg!(target_arch = "aarch64") { -1064 | "arm64-windows" -1065 | } else { -1066 | "x86_64-windows" -1067 | } -1068 | } else if cfg!(target_os = "linux") { -1069 | if cfg!(target_arch = "aarch64") { -1070 | "arm64-linux" -1071 | } else { -1072 | "x86_64-linux" -1073 | } -1074 | } else { -1075 | return Err(anyhow!("Unsupported platform for wasi-sdk")); -1076 | }; - | -1077 | let sdk_filename = format!("wasi-sdk-25.0-{arch_os}.tar.gz"); -1078 | let sdk_url = format!( -1079 | "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/{sdk_filename}", -1080 | ); - | -1081 | info!("Downloading wasi-sdk from {sdk_url}..."); -1082 | let temp_tar_path = cache_dir.join(sdk_filename); - | -1083 | let status = Command::new("curl") -1084 | .arg("-f") -1085 | .arg("-L") -1086 | .arg("-o") -1087 | .arg(&temp_tar_path) -1088 | .arg(&sdk_url) -1089 | .status() -1090 | .with_context(|| format!("Failed to execute curl for {sdk_url}"))?; - | -1091 | if !status.success() { -1092 | return Err(anyhow!("Failed to download wasi-sdk from {sdk_url}",)); -1093 | } - | -1094 | info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display()); -1095 | self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir) -1096 | .context("Failed to extract wasi-sdk archive")?; - | -1097 | fs::remove_file(temp_tar_path).ok(); -1098 | for exe in &possible_executables { -1099 | let clang_exe = wasi_sdk_dir.join("bin").join(exe); -1100 | if clang_exe.exists() { -1101 | return Ok(clang_exe); -1102 | } -1103 | } - | -1104 | Err(anyhow!( -1105 | "Failed to find clang executable in downloaded wasi-sdk at '{}'. Looked for: {}", -1106 | wasi_sdk_dir.display(), -1107 | possible_executables.join(", ") -1108 | )) -1109 | } - | -1110 | #[must_use] -1111 | #[cfg(feature = "tree-sitter-highlight")] -1112 | pub fn highlight_config_for_injection_string<'a>( -1113 | &'a self, -1114 | string: &str, -1115 | ) -> Option<&'a HighlightConfiguration> { -1116 | match self.language_configuration_for_injection_string(string) { -1117 | Err(e) => { -1118 | error!("Failed to load language for injection string '{string}': {e}",); -1119 | None -1120 | } -1121 | Ok(None) => None, -1122 | Ok(Some((language, configuration))) => { -1123 | match configuration.highlight_config(language, None) { -1124 | Err(e) => { -1125 | error!( -1126 | "Failed to load higlight config for injection string '{string}': {e}" -1127 | ); -1128 | None -1129 | } -1130 | Ok(None) => None, -1131 | Ok(Some(config)) => Some(config), -1132 | } -1133 | } -1134 | } -1135 | } - | -1136 | #[must_use] -1137 | pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> { -1138 | self.language_configuration_in_current_path -1139 | .map(|i| &self.language_configurations[i]) -1140 | } - | -1141 | pub fn find_language_configurations_at_path( -1142 | &mut self, -1143 | parser_path: &Path, -1144 | set_current_path_config: bool, -1145 | ) -> Result<&[LanguageConfiguration]> { -1146 | let initial_language_configuration_count = self.language_configurations.len(); - | -1147 | let ts_json = TreeSitterJSON::from_file(parser_path); -1148 | if let Ok(config) = ts_json { -1149 | let language_count = self.languages_by_id.len(); -1150 | for grammar in config.grammars { -1151 | // Determine the path to the parser directory. This can be specified in -1152 | // the tree-sitter.json, but defaults to the directory containing the -1153 | // tree-sitter.json. -1154 | let language_path = parser_path.join(grammar.path.unwrap_or(PathBuf::from("."))); - | -1155 | // Determine if a previous language configuration in this package.json file -1156 | // already uses the same language. -1157 | let mut language_id = None; -1158 | for (id, (path, _, _)) in -1159 | self.languages_by_id.iter().enumerate().skip(language_count) -1160 | { -1161 | if language_path == *path { -1162 | language_id = Some(id); -1163 | } -1164 | } - | -1165 | // If not, add a new language path to the list. -1166 | let language_id = if let Some(language_id) = language_id { -1167 | language_id -1168 | } else { -1169 | self.languages_by_id.push(( -1170 | language_path, -1171 | OnceCell::new(), -1172 | grammar.external_files.clone().into_vec().map(|files| { -1173 | files.into_iter() -1174 | .map(|path| { -1175 | let path = parser_path.join(path); -1176 | // prevent p being above/outside of parser_path -1177 | if path.starts_with(parser_path) { -1178 | Ok(path) -1179 | } else { -1180 | Err(anyhow!( -1181 | "External file path {} is outside of parser directory {}", path.display(), parser_path.display(), -1182 | )) -1183 | } -1184 | }) -1185 | .collect::>>() -1186 | }).transpose()?, -1187 | )); -1188 | self.languages_by_id.len() - 1 -1189 | }; - | -1190 | let configuration = LanguageConfiguration { -1191 | root_path: parser_path.to_path_buf(), -1192 | language_name: grammar.name, -1193 | scope: Some(grammar.scope), -1194 | language_id, -1195 | file_types: grammar.file_types.unwrap_or_default(), -1196 | content_regex: Self::regex(grammar.content_regex.as_deref()), -1197 | first_line_regex: Self::regex(grammar.first_line_regex.as_deref()), -1198 | injection_regex: Self::regex(grammar.injection_regex.as_deref()), -1199 | injections_filenames: grammar.injections.into_vec(), -1200 | locals_filenames: grammar.locals.into_vec(), -1201 | tags_filenames: grammar.tags.into_vec(), -1202 | highlights_filenames: grammar.highlights.into_vec(), -1203 | #[cfg(feature = "tree-sitter-highlight")] -1204 | highlight_config: OnceCell::new(), -1205 | #[cfg(feature = "tree-sitter-tags")] -1206 | tags_config: OnceCell::new(), -1207 | #[cfg(feature = "tree-sitter-highlight")] -1208 | highlight_names: &self.highlight_names, -1209 | #[cfg(feature = "tree-sitter-highlight")] -1210 | use_all_highlight_names: self.use_all_highlight_names, -1211 | _phantom: PhantomData, -1212 | }; - | -1213 | for file_type in &configuration.file_types { -1214 | self.language_configuration_ids_by_file_type -1215 | .entry(file_type.clone()) -1216 | .or_default() -1217 | .push(self.language_configurations.len()); -1218 | } -1219 | if let Some(first_line_regex) = &configuration.first_line_regex { -1220 | self.language_configuration_ids_by_first_line_regex -1221 | .entry(first_line_regex.to_string()) -1222 | .or_default() -1223 | .push(self.language_configurations.len()); -1224 | } - | -1225 | self.language_configurations.push(unsafe { -1226 | mem::transmute::, LanguageConfiguration<'static>>( -1227 | configuration, -1228 | ) -1229 | }); - | -1230 | if set_current_path_config && self.language_configuration_in_current_path.is_none() -1231 | { -1232 | self.language_configuration_in_current_path = -1233 | Some(self.language_configurations.len() - 1); -1234 | } -1235 | } -1236 | } else if let Err(e) = ts_json { -1237 | match e.downcast_ref::() { -1238 | // This is noisy, and not really an issue. -1239 | Some(e) if e.kind() == std::io::ErrorKind::NotFound => {} -1240 | _ => { -1241 | warn!( -1242 | "Failed to parse {} -- {e}", -1243 | parser_path.join("tree-sitter.json").display() -1244 | ); -1245 | } -1246 | } -1247 | } - | -1248 | // If we didn't find any language configurations in the tree-sitter.json file, -1249 | // but there is a grammar.json file, then use the grammar file to form a simple -1250 | // language configuration. -1251 | if self.language_configurations.len() == initial_language_configuration_count -1252 | && parser_path.join("src").join("grammar.json").exists() -1253 | { -1254 | let grammar_path = parser_path.join("src").join("grammar.json"); -1255 | let language_name = Self::grammar_json_name(&grammar_path)?; -1256 | let configuration = LanguageConfiguration { -1257 | root_path: parser_path.to_owned(), -1258 | language_name, -1259 | language_id: self.languages_by_id.len(), -1260 | file_types: Vec::new(), -1261 | scope: None, -1262 | content_regex: None, -1263 | first_line_regex: None, -1264 | injection_regex: None, -1265 | injections_filenames: None, -1266 | locals_filenames: None, -1267 | highlights_filenames: None, -1268 | tags_filenames: None, -1269 | #[cfg(feature = "tree-sitter-highlight")] -1270 | highlight_config: OnceCell::new(), -1271 | #[cfg(feature = "tree-sitter-tags")] -1272 | tags_config: OnceCell::new(), -1273 | #[cfg(feature = "tree-sitter-highlight")] -1274 | highlight_names: &self.highlight_names, -1275 | #[cfg(feature = "tree-sitter-highlight")] -1276 | use_all_highlight_names: self.use_all_highlight_names, -1277 | _phantom: PhantomData, -1278 | }; -1279 | self.language_configurations.push(unsafe { -1280 | mem::transmute::, LanguageConfiguration<'static>>( -1281 | configuration, -1282 | ) -1283 | }); -1284 | self.languages_by_id -1285 | .push((parser_path.to_owned(), OnceCell::new(), None)); -1286 | } - | -1287 | Ok(&self.language_configurations[initial_language_configuration_count..]) -1288 | } - | -1289 | fn regex(pattern: Option<&str>) -> Option { -1290 | pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok()) -1291 | } - | -1292 | fn grammar_json_name(grammar_path: &Path) -> Result { -1293 | let file = fs::File::open(grammar_path).with_context(|| { -1294 | format!("Failed to open grammar.json at {}", grammar_path.display()) -1295 | })?; - | -1296 | let first_three_lines = BufReader::new(file) -1297 | .lines() -1298 | .take(3) -1299 | .collect::, _>>() -1300 | .with_context(|| { -1301 | format!( -1302 | "Failed to read the first three lines of grammar.json at {}", -1303 | grammar_path.display() -1304 | ) -1305 | })? -1306 | .join("\n"); - | -1307 | let name = GRAMMAR_NAME_REGEX -1308 | .captures(&first_three_lines) -1309 | .and_then(|c| c.get(1)) -1310 | .ok_or_else(|| { -1311 | anyhow!( -1312 | "Failed to parse the language name from grammar.json at {}", -1313 | grammar_path.display() -1314 | ) -1315 | })?; - | -1316 | Ok(name.as_str().to_string()) -1317 | } - | -1318 | pub fn select_language( -1319 | &mut self, -1320 | path: &Path, -1321 | current_dir: &Path, -1322 | scope: Option<&str>, -1323 | // path to dynamic library, name of language -1324 | lib_info: Option<&(PathBuf, &str)>, -1325 | ) -> Result { -1326 | if let Some((ref lib_path, language_name)) = lib_info { -1327 | let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_")); -1328 | Self::load_language(lib_path, &language_fn_name) -1329 | } else if let Some(scope) = scope { -1330 | if let Some(config) = self -1331 | .language_configuration_for_scope(scope) -1332 | .with_context(|| format!("Failed to load language for scope '{scope}'"))? -1333 | { -1334 | Ok(config.0) -1335 | } else { -1336 | Err(anyhow!("Unknown scope '{scope}'")) -1337 | } -1338 | } else if let Some((lang, _)) = self -1339 | .language_configuration_for_file_name(path) -1340 | .with_context(|| { -1341 | format!( -1342 | "Failed to load language for file name {}", -1343 | path.file_name().unwrap().to_string_lossy() -1344 | ) -1345 | })? -1346 | { -1347 | Ok(lang) -1348 | } else if let Some(id) = self.language_configuration_in_current_path { -1349 | Ok(self.language_for_id(self.language_configurations[id].language_id)?) -1350 | } else if let Some(lang) = self -1351 | .languages_at_path(current_dir) -1352 | .with_context(|| "Failed to load language in current directory")? -1353 | .first() -1354 | .cloned() -1355 | { -1356 | Ok(lang.0) -1357 | } else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? { -1358 | Ok(lang.0) -1359 | } else { -1360 | Err(anyhow!("No language found")) -1361 | } -1362 | } - | -1363 | pub const fn debug_build(&mut self, flag: bool) { -1364 | self.debug_build = flag; -1365 | } - | -1366 | pub const fn sanitize_build(&mut self, flag: bool) { -1367 | self.sanitize_build = flag; -1368 | } - | -1369 | pub const fn force_rebuild(&mut self, rebuild: bool) { -1370 | self.force_rebuild = rebuild; -1371 | } - | -1372 | #[cfg(feature = "wasm")] -1373 | #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))] -1374 | pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) { -1375 | *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap()); -1376 | } - | -1377 | #[must_use] -1378 | pub fn get_scanner_path(&self, src_path: &Path) -> Option { -1379 | let path = src_path.join("scanner.c"); -1380 | path.exists().then_some(path) -1381 | } -1382 | } - | -1383 | impl LanguageConfiguration<'_> { -1384 | #[cfg(feature = "tree-sitter-highlight")] -1385 | pub fn highlight_config( -1386 | &self, -1387 | language: Language, -1388 | paths: Option<&[PathBuf]>, -1389 | ) -> Result> { -1390 | let (highlights_filenames, injections_filenames, locals_filenames) = match paths { -1391 | Some(paths) => ( -1392 | Some( -1393 | paths -1394 | .iter() -1395 | .filter(|p| p.ends_with("highlights.scm")) -1396 | .cloned() -1397 | .collect::>(), -1398 | ), -1399 | Some( -1400 | paths -1401 | .iter() -1402 | .filter(|p| p.ends_with("tags.scm")) -1403 | .cloned() -1404 | .collect::>(), -1405 | ), -1406 | Some( -1407 | paths -1408 | .iter() -1409 | .filter(|p| p.ends_with("locals.scm")) -1410 | .cloned() -1411 | .collect::>(), -1412 | ), -1413 | ), -1414 | None => (None, None, None), -1415 | }; -1416 | self.highlight_config -1417 | .get_or_try_init(|| { -1418 | let (highlights_query, highlight_ranges) = self.read_queries( -1419 | if highlights_filenames.is_some() { -1420 | highlights_filenames.as_deref() -1421 | } else { -1422 | self.highlights_filenames.as_deref() -1423 | }, -1424 | "highlights.scm", -1425 | )?; -1426 | let (injections_query, injection_ranges) = self.read_queries( -1427 | if injections_filenames.is_some() { -1428 | injections_filenames.as_deref() -1429 | } else { -1430 | self.injections_filenames.as_deref() -1431 | }, -1432 | "injections.scm", -1433 | )?; -1434 | let (locals_query, locals_ranges) = self.read_queries( -1435 | if locals_filenames.is_some() { -1436 | locals_filenames.as_deref() -1437 | } else { -1438 | self.locals_filenames.as_deref() -1439 | }, -1440 | "locals.scm", -1441 | )?; - | -1442 | if highlights_query.is_empty() { -1443 | Ok(None) -1444 | } else { -1445 | let mut result = HighlightConfiguration::new( -1446 | language, -1447 | &self.language_name, -1448 | &highlights_query, -1449 | &injections_query, -1450 | &locals_query, -1451 | ) -1452 | .map_err(|error| match error.kind { -1453 | QueryErrorKind::Language => Error::from(error), -1454 | _ => { -1455 | if error.offset < injections_query.len() { -1456 | Self::include_path_in_query_error( -1457 | error, -1458 | &injection_ranges, -1459 | &injections_query, -1460 | 0, -1461 | ) -1462 | } else if error.offset < injections_query.len() + locals_query.len() { -1463 | Self::include_path_in_query_error( -1464 | error, -1465 | &locals_ranges, -1466 | &locals_query, -1467 | injections_query.len(), -1468 | ) -1469 | } else { -1470 | Self::include_path_in_query_error( -1471 | error, -1472 | &highlight_ranges, -1473 | &highlights_query, -1474 | injections_query.len() + locals_query.len(), -1475 | ) -1476 | } -1477 | } -1478 | })?; -1479 | let mut all_highlight_names = self.highlight_names.lock().unwrap(); -1480 | if self.use_all_highlight_names { -1481 | for capture_name in result.query.capture_names() { -1482 | if !all_highlight_names.iter().any(|x| x == capture_name) { -1483 | all_highlight_names.push((*capture_name).to_string()); -1484 | } -1485 | } -1486 | } -1487 | result.configure(all_highlight_names.as_slice()); -1488 | drop(all_highlight_names); -1489 | Ok(Some(result)) -1490 | } -1491 | }) -1492 | .map(Option::as_ref) -1493 | } - | -1494 | #[cfg(feature = "tree-sitter-tags")] -1495 | pub fn tags_config(&self, language: Language) -> Result> { -1496 | self.tags_config -1497 | .get_or_try_init(|| { -1498 | let (tags_query, tags_ranges) = -1499 | self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?; -1500 | let (locals_query, locals_ranges) = -1501 | self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?; -1502 | if tags_query.is_empty() { -1503 | Ok(None) -1504 | } else { -1505 | TagsConfiguration::new(language, &tags_query, &locals_query) -1506 | .map(Some) -1507 | .map_err(|error| { -1508 | if let TagsError::Query(error) = error { -1509 | if error.offset < locals_query.len() { -1510 | Self::include_path_in_query_error( -1511 | error, -1512 | &locals_ranges, -1513 | &locals_query, -1514 | 0, -1515 | ) -1516 | } else { -1517 | Self::include_path_in_query_error( -1518 | error, -1519 | &tags_ranges, -1520 | &tags_query, -1521 | locals_query.len(), -1522 | ) -1523 | } -1524 | } else { -1525 | error.into() -1526 | } -1527 | }) -1528 | } -1529 | }) -1530 | .map(Option::as_ref) -1531 | } - | -1532 | #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))] -1533 | fn include_path_in_query_error( -1534 | mut error: QueryError, -1535 | ranges: &[(PathBuf, Range)], -1536 | source: &str, -1537 | start_offset: usize, -1538 | ) -> Error { -1539 | let offset_within_section = error.offset - start_offset; -1540 | let (path, range) = ranges -1541 | .iter() -1542 | .find(|(_, range)| range.contains(&offset_within_section)) -1543 | .unwrap_or_else(|| ranges.last().unwrap()); -1544 | error.offset = offset_within_section - range.start; -1545 | error.row = source[range.start..offset_within_section] -1546 | .matches('\n') -1547 | .count(); -1548 | Error::from(error).context(format!("Error in query file {}", path.display())) -1549 | } - | -1550 | #[allow(clippy::type_complexity)] -1551 | #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))] -1552 | fn read_queries( -1553 | &self, -1554 | paths: Option<&[PathBuf]>, -1555 | default_path: &str, -1556 | ) -> Result<(String, Vec<(PathBuf, Range)>)> { -1557 | let mut query = String::new(); -1558 | let mut path_ranges = Vec::new(); -1559 | if let Some(paths) = paths { -1560 | for path in paths { -1561 | let abs_path = self.root_path.join(path); -1562 | let prev_query_len = query.len(); -1563 | query += &fs::read_to_string(&abs_path) -1564 | .with_context(|| format!("Failed to read query file {}", path.display()))?; -1565 | path_ranges.push((path.clone(), prev_query_len..query.len())); -1566 | } -1567 | } else { -1568 | // highlights.scm is needed to test highlights, and tags.scm to test tags -1569 | if default_path == "highlights.scm" || default_path == "tags.scm" { -1570 | warn!( -1571 | concat!( -1572 | "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ", -1573 | "object in the grammar's tree-sitter.json file. See more here: ", -1574 | "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths" -1575 | ), -1576 | default_path.replace(".scm", ""), -1577 | default_path -1578 | ); -1579 | } -1580 | let queries_path = self.root_path.join("queries"); -1581 | let path = queries_path.join(default_path); -1582 | if path.exists() { -1583 | query = fs::read_to_string(&path) -1584 | .with_context(|| format!("Failed to read query file {}", path.display()))?; -1585 | path_ranges.push((PathBuf::from(default_path), 0..query.len())); -1586 | } -1587 | } - | -1588 | Ok((query, path_ranges)) -1589 | } -1590 | } - | -1591 | fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result { -1592 | if !lib_path.exists() { -1593 | return Ok(true); -1594 | } -1595 | let lib_mtime = mtime(lib_path) -1596 | .with_context(|| format!("Failed to read mtime of {}", lib_path.display()))?; -1597 | for path in paths_to_check { -1598 | if mtime(path)? > lib_mtime { -1599 | return Ok(true); -1600 | } -1601 | } -1602 | Ok(false) -1603 | } - | -1604 | fn mtime(path: &Path) -> Result { -1605 | Ok(fs::metadata(path)?.modified()?) -1606 | } - - - --------------------------------------------------------------------------------- -/crates/tags/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter-tags" - 3 | version.workspace = true - 4 | description = "Library for extracting tag information" - 5 | authors = [ - 6 | "Max Brunsfeld ", - 7 | "Patrick Thomson ", - 8 | ] - 9 | edition.workspace = true - 10 | rust-version.workspace = true - 11 | readme = "README.md" - 12 | homepage.workspace = true - 13 | repository.workspace = true - 14 | documentation = "https://docs.rs/tree-sitter-tags" - 15 | license.workspace = true - 16 | keywords = ["incremental", "parsing", "syntax", "tagging"] - 17 | categories = ["parsing", "text-editors"] - | - 18 | [lints] - 19 | workspace = true - | - 20 | [lib] - 21 | path = "src/tags.rs" - 22 | crate-type = ["lib", "staticlib"] - | - 23 | [dependencies] - 24 | memchr.workspace = true - 25 | regex.workspace = true - 26 | streaming-iterator.workspace = true - 27 | thiserror.workspace = true - | - 28 | tree-sitter.workspace = true - - - --------------------------------------------------------------------------------- -/crates/tags/include/tree_sitter/tags.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_TAGS_H_ - 2 | #define TREE_SITTER_TAGS_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include "tree_sitter/api.h" - | - 8 | typedef enum { - 9 | TSTagsOk, - 10 | TSTagsUnknownScope, - 11 | TSTagsTimeout, - 12 | TSTagsInvalidLanguage, - 13 | TSTagsInvalidUtf8, - 14 | TSTagsInvalidRegex, - 15 | TSTagsInvalidQuery, - 16 | TSTagsInvalidCapture, - 17 | } TSTagsError; - | - 18 | typedef struct { - 19 | uint32_t start_byte; - 20 | uint32_t end_byte; - 21 | uint32_t name_start_byte; - 22 | uint32_t name_end_byte; - 23 | uint32_t line_start_byte; - 24 | uint32_t line_end_byte; - 25 | TSPoint start_point; - 26 | TSPoint end_point; - 27 | uint32_t utf16_start_column; - 28 | uint32_t utf16_end_column; - 29 | uint32_t docs_start_byte; - 30 | uint32_t docs_end_byte; - 31 | uint32_t syntax_type_id; - 32 | bool is_definition; - 33 | } TSTag; - | - 34 | typedef struct TSTagger TSTagger; - 35 | typedef struct TSTagsBuffer TSTagsBuffer; - | - 36 | // Construct a tagger. - 37 | TSTagger *ts_tagger_new(); - | - 38 | // Delete a tagger. - 39 | void ts_tagger_delete(TSTagger *); - | - 40 | // Add a `TSLanguage` to a tagger. The language is associated with a scope name, - 41 | // which can be used later to select a language for tagging. Along with the language, - 42 | // you must provide two tree query strings, one for matching tags themselves, and one - 43 | // specifying local variable definitions. - 44 | TSTagsError ts_tagger_add_language( - 45 | TSTagger *self, - 46 | const char *scope_name, - 47 | const TSLanguage *language, - 48 | const char *tags_query, - 49 | const char *locals_query, - 50 | uint32_t tags_query_len, - 51 | uint32_t locals_query_len - 52 | ); - | - 53 | // Compute syntax highlighting for a given document. You must first - 54 | // create a `TSTagsBuffer` to hold the output. - 55 | TSTagsError ts_tagger_tag( - 56 | const TSTagger *self, - 57 | const char *scope_name, - 58 | const char *source_code, - 59 | uint32_t source_code_len, - 60 | TSTagsBuffer *output, - 61 | const size_t *cancellation_flag - 62 | ); - | - 63 | // A tags buffer stores the results produced by a tagging call. It can be reused - 64 | // for multiple calls. - 65 | TSTagsBuffer *ts_tags_buffer_new(); - | - 66 | // Delete a tags buffer. - 67 | void ts_tags_buffer_delete(TSTagsBuffer *); - | - 68 | // Access the tags within a tag buffer. - 69 | const TSTag *ts_tags_buffer_tags(const TSTagsBuffer *); - 70 | uint32_t ts_tags_buffer_tags_len(const TSTagsBuffer *); - | - 71 | // Access the string containing all of the docs - 72 | const char *ts_tags_buffer_docs(const TSTagsBuffer *); - 73 | uint32_t ts_tags_buffer_docs_len(const TSTagsBuffer *); - | - 74 | // Get the syntax kinds for a scope. - 75 | const char **ts_tagger_syntax_kinds_for_scope_name(const TSTagger *, const char *scope_name, uint32_t *len); - | - 76 | // Determine whether a parse error was encountered while tagging. - 77 | bool ts_tags_buffer_found_parse_error(const TSTagsBuffer*); - | - 78 | #ifdef __cplusplus - 79 | } - 80 | #endif - | - 81 | #endif // TREE_SITTER_TAGS_H_ - - - --------------------------------------------------------------------------------- -/crates/tags/README.md: --------------------------------------------------------------------------------- - 1 | # Tree-sitter Tags - | - 2 | [![crates.io badge]][crates.io] - | - 3 | [crates.io]: https://crates.io/crates/tree-sitter-tags - 4 | [crates.io badge]: https://img.shields.io/crates/v/tree-sitter-tags.svg?color=%23B48723 - | - 5 | ### Usage - | - 6 | Add this crate, and the language-specific crates for whichever languages you want to parse, to your `Cargo.toml`: - | - 7 | ```toml - 8 | [dependencies] - 9 | tree-sitter-tags = "0.19" - 10 | tree-sitter-javascript = "0.19" - 11 | tree-sitter-python = "0.19" - 12 | ``` - | - 13 | Create a tag context. You need one of these for each thread that you're using for tag computation: - | - 14 | ```rust - 15 | use tree_sitter_tags::TagsContext; - | - 16 | let context = TagsContext::new(); - 17 | ``` - | - 18 | Load some tagging queries from the `queries` directory of some language repositories: - | - 19 | ```rust - 20 | use tree_sitter_tags::TagsConfiguration; - | - 21 | let python_config = TagsConfiguration::new( - 22 | tree_sitter_python::language(), - 23 | tree_sitter_python::TAGGING_QUERY, - 24 | "", - 25 | ).unwrap(); - | - 26 | let javascript_config = TagsConfiguration::new( - 27 | tree_sitter_javascript::language(), - 28 | tree_sitter_javascript::TAGGING_QUERY, - 29 | tree_sitter_javascript::LOCALS_QUERY, - 30 | ).unwrap(); - 31 | ``` - | - 32 | Compute code navigation tags for some source code: - | - 33 | ```rust - 34 | let tags = context.generate_tags( - 35 | &javascript_config, - 36 | b"class A { getB() { return c(); } }", - 37 | None, - 38 | ); - | - 39 | for tag in tags { - 40 | println!("kind: {:?}", tag.kind); - 41 | println!("range: {:?}", tag.range); - 42 | println!("name_range: {:?}", tag.name_range); - 43 | println!("docs: {:?}", tag.docs); - 44 | } - 45 | ``` - - - --------------------------------------------------------------------------------- -/crates/tags/src/c_lib.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str, - 3 | sync::atomic::AtomicUsize, - 4 | }; - | - 5 | use tree_sitter::Language; - | - 6 | use super::{Error, TagsConfiguration, TagsContext}; - | - 7 | const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100; - 8 | const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024; - | - 9 | #[repr(C)] - 10 | #[derive(Debug, PartialEq, Eq)] - 11 | pub enum TSTagsError { - 12 | Ok, - 13 | UnknownScope, - 14 | Timeout, - 15 | InvalidLanguage, - 16 | InvalidUtf8, - 17 | InvalidRegex, - 18 | InvalidQuery, - 19 | InvalidCapture, - 20 | Unknown, - 21 | } - | - 22 | #[repr(C)] - 23 | pub struct TSPoint { - 24 | row: u32, - 25 | column: u32, - 26 | } - | - 27 | #[repr(C)] - 28 | pub struct TSTag { - 29 | pub start_byte: u32, - 30 | pub end_byte: u32, - 31 | pub name_start_byte: u32, - 32 | pub name_end_byte: u32, - 33 | pub line_start_byte: u32, - 34 | pub line_end_byte: u32, - 35 | pub start_point: TSPoint, - 36 | pub end_point: TSPoint, - 37 | pub utf16_start_column: u32, - 38 | pub utf16_end_column: u32, - 39 | pub docs_start_byte: u32, - 40 | pub docs_end_byte: u32, - 41 | pub syntax_type_id: u32, - 42 | pub is_definition: bool, - 43 | } - | - 44 | pub struct TSTagger { - 45 | languages: HashMap, - 46 | } - | - 47 | pub struct TSTagsBuffer { - 48 | context: TagsContext, - 49 | tags: Vec, - 50 | docs: Vec, - 51 | errors_present: bool, - 52 | } - | - 53 | #[no_mangle] - 54 | pub extern "C" fn ts_tagger_new() -> *mut TSTagger { - 55 | Box::into_raw(Box::new(TSTagger { - 56 | languages: HashMap::new(), - 57 | })) - 58 | } - | - 59 | /// Delete a [`TSTagger`]. - 60 | /// - 61 | /// # Safety - 62 | /// - 63 | /// `this` must be non-null and a valid pointer to a [`TSTagger`] instance. - 64 | #[no_mangle] - 65 | pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) { - 66 | drop(Box::from_raw(this)); - 67 | } - | - 68 | /// Add a language to a [`TSTagger`]. - 69 | /// - 70 | /// Returns a [`TSTagsError`] indicating whether the operation was successful or not. - 71 | /// - 72 | /// # Safety - 73 | /// - 74 | /// `this` must be non-null and a valid pointer to a [`TSTagger`] instance. - 75 | /// `scope_name` must be non-null and a valid pointer to a null-terminated string. - 76 | /// `tags_query` and `locals_query` must be non-null and valid pointers to strings. - 77 | /// - 78 | /// The caller must ensure that the lengths of `tags_query` and `locals_query` are correct. - 79 | #[no_mangle] - 80 | pub unsafe extern "C" fn ts_tagger_add_language( - 81 | this: *mut TSTagger, - 82 | scope_name: *const c_char, - 83 | language: Language, - 84 | tags_query: *const u8, - 85 | locals_query: *const u8, - 86 | tags_query_len: u32, - 87 | locals_query_len: u32, - 88 | ) -> TSTagsError { - 89 | let tagger = unwrap_mut_ptr(this); - 90 | let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); - 91 | let tags_query = slice::from_raw_parts(tags_query, tags_query_len as usize); - 92 | let locals_query = if !locals_query.is_null() { - 93 | slice::from_raw_parts(locals_query, locals_query_len as usize) - 94 | } else { - 95 | &[] - 96 | }; - 97 | let Ok(tags_query) = str::from_utf8(tags_query) else { - 98 | return TSTagsError::InvalidUtf8; - 99 | }; - 100 | let Ok(locals_query) = str::from_utf8(locals_query) else { - 101 | return TSTagsError::InvalidUtf8; - 102 | }; - | - 103 | match TagsConfiguration::new(language, tags_query, locals_query) { - 104 | Ok(c) => { - 105 | tagger.languages.insert(scope_name.to_string(), c); - 106 | TSTagsError::Ok - 107 | } - 108 | Err(Error::Query(_)) => TSTagsError::InvalidQuery, - 109 | Err(Error::Regex(_)) => TSTagsError::InvalidRegex, - 110 | Err(Error::Cancelled) => TSTagsError::Timeout, - 111 | Err(Error::InvalidLanguage) => TSTagsError::InvalidLanguage, - 112 | Err(Error::InvalidCapture(_)) => TSTagsError::InvalidCapture, - 113 | } - 114 | } - | - 115 | /// Tags some source code. - 116 | /// - 117 | /// Returns a [`TSTagsError`] indicating whether the operation was successful or not. - 118 | /// - 119 | /// # Safety - 120 | /// - 121 | /// `this` must be a non-null valid pointer to a [`TSTagger`] instance. - 122 | /// `scope_name` must be a non-null valid pointer to a null-terminated string. - 123 | /// `source_code` must be a non-null valid pointer to a slice of bytes. - 124 | /// `output` must be a non-null valid pointer to a [`TSTagsBuffer`] instance. - 125 | /// `cancellation_flag` must be a non-null valid pointer to an [`AtomicUsize`] instance. - 126 | #[no_mangle] - 127 | pub unsafe extern "C" fn ts_tagger_tag( - 128 | this: *mut TSTagger, - 129 | scope_name: *const c_char, - 130 | source_code: *const u8, - 131 | source_code_len: u32, - 132 | output: *mut TSTagsBuffer, - 133 | cancellation_flag: *const AtomicUsize, - 134 | ) -> TSTagsError { - 135 | let tagger = unwrap_mut_ptr(this); - 136 | let buffer = unwrap_mut_ptr(output); - 137 | let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); - | - 138 | if let Some(config) = tagger.languages.get(scope_name) { - 139 | shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY); - 140 | shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY); - | - 141 | let source_code = slice::from_raw_parts(source_code, source_code_len as usize); - 142 | let cancellation_flag = cancellation_flag.as_ref(); - | - 143 | let tags = match buffer - 144 | .context - 145 | .generate_tags(config, source_code, cancellation_flag) - 146 | { - 147 | Ok((tags, found_error)) => { - 148 | buffer.errors_present = found_error; - 149 | tags - 150 | } - 151 | Err(e) => { - 152 | return match e { - 153 | Error::InvalidLanguage => TSTagsError::InvalidLanguage, - 154 | _ => TSTagsError::Timeout, - 155 | } - 156 | } - 157 | }; - | - 158 | for tag in tags { - 159 | let Ok(tag) = tag else { - 160 | buffer.tags.clear(); - 161 | buffer.docs.clear(); - 162 | return TSTagsError::Timeout; - 163 | }; - | - 164 | let prev_docs_len = buffer.docs.len(); - 165 | if let Some(docs) = tag.docs { - 166 | buffer.docs.extend_from_slice(docs.as_bytes()); - 167 | } - 168 | buffer.tags.push(TSTag { - 169 | start_byte: tag.range.start as u32, - 170 | end_byte: tag.range.end as u32, - 171 | name_start_byte: tag.name_range.start as u32, - 172 | name_end_byte: tag.name_range.end as u32, - 173 | line_start_byte: tag.line_range.start as u32, - 174 | line_end_byte: tag.line_range.end as u32, - 175 | start_point: TSPoint { - 176 | row: tag.span.start.row as u32, - 177 | column: tag.span.start.column as u32, - 178 | }, - 179 | end_point: TSPoint { - 180 | row: tag.span.end.row as u32, - 181 | column: tag.span.end.column as u32, - 182 | }, - 183 | utf16_start_column: tag.utf16_column_range.start as u32, - 184 | utf16_end_column: tag.utf16_column_range.end as u32, - 185 | docs_start_byte: prev_docs_len as u32, - 186 | docs_end_byte: buffer.docs.len() as u32, - 187 | syntax_type_id: tag.syntax_type_id, - 188 | is_definition: tag.is_definition, - 189 | }); - 190 | } - | - 191 | TSTagsError::Ok - 192 | } else { - 193 | TSTagsError::UnknownScope - 194 | } - 195 | } - | - 196 | #[no_mangle] - 197 | pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { - 198 | Box::into_raw(Box::new(TSTagsBuffer { - 199 | context: TagsContext::new(), - 200 | tags: Vec::with_capacity(BUFFER_TAGS_RESERVE_CAPACITY), - 201 | docs: Vec::with_capacity(BUFFER_DOCS_RESERVE_CAPACITY), - 202 | errors_present: false, - 203 | })) - 204 | } - | - 205 | /// Delete a [`TSTagsBuffer`]. - 206 | /// - 207 | /// # Safety - 208 | /// - 209 | /// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by - 210 | /// [`ts_tags_buffer_new`]. - 211 | #[no_mangle] - 212 | pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { - 213 | drop(Box::from_raw(this)); - 214 | } - | - 215 | /// Get the tags from a [`TSTagsBuffer`]. - 216 | /// - 217 | /// # Safety - 218 | /// - 219 | /// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by - 220 | /// [`ts_tags_buffer_new`]. - 221 | /// - 222 | /// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`] - 223 | /// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage. - 224 | #[no_mangle] - 225 | pub unsafe extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag { - 226 | unwrap_ptr(this).tags.as_ptr() - 227 | } - | - 228 | /// Get the number of tags in a [`TSTagsBuffer`]. - 229 | /// - 230 | /// # Safety - 231 | /// - 232 | /// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance. - 233 | #[no_mangle] - 234 | pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 { - 235 | unwrap_ptr(this).tags.len() as u32 - 236 | } - | - 237 | /// Get the documentation strings from a [`TSTagsBuffer`]. - 238 | /// - 239 | /// # Safety - 240 | /// - 241 | /// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by - 242 | /// [`ts_tags_buffer_new`]. - 243 | /// - 244 | /// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`] - 245 | /// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage. - 246 | /// - 247 | /// The returned pointer points to a C-style string. - 248 | /// To get the length of the string, use [`ts_tags_buffer_docs_len`]. - 249 | #[no_mangle] - 250 | pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char { - 251 | unwrap_ptr(this).docs.as_ptr().cast::() - 252 | } - | - 253 | /// Get the length of the documentation strings in a [`TSTagsBuffer`]. - 254 | /// - 255 | /// # Safety - 256 | /// - 257 | /// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by - 258 | /// [`ts_tags_buffer_new`]. - 259 | #[no_mangle] - 260 | pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { - 261 | unwrap_ptr(this).docs.len() as u32 - 262 | } - | - 263 | /// Get whether or not a [`TSTagsBuffer`] contains any parse errors. - 264 | /// - 265 | /// # Safety - 266 | /// - 267 | /// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by - 268 | /// [`ts_tags_buffer_new`]. - 269 | #[no_mangle] - 270 | pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { - 271 | unwrap_ptr(this).errors_present - 272 | } - | - 273 | /// Get the syntax kinds for a given scope name. - 274 | /// - 275 | /// Returns a pointer to a null-terminated array of null-terminated strings. - 276 | /// - 277 | /// # Safety - 278 | /// - 279 | /// `this` must be non-null and a valid pointer to a [`TSTagger`] instance created by - 280 | /// [`ts_tagger_new`]. - 281 | /// `scope_name` must be non-null and a valid pointer to a null-terminated string. - 282 | /// `len` must be non-null and a valid pointer to a `u32`. - 283 | /// - 284 | /// The caller must ensure that the returned pointer is not used after the [`TSTagger`] - 285 | /// is deleted with [`ts_tagger_delete`], else the data will point to garbage. - 286 | /// - 287 | /// The returned pointer points to a C-style string array. - 288 | #[no_mangle] - 289 | pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( - 290 | this: *mut TSTagger, - 291 | scope_name: *const c_char, - 292 | len: *mut u32, - 293 | ) -> *const *const c_char { - 294 | let tagger = unwrap_mut_ptr(this); - 295 | let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); - 296 | let len = unwrap_mut_ptr(len); - | - 297 | *len = 0; - 298 | if let Some(config) = tagger.languages.get(scope_name) { - 299 | *len = config.c_syntax_type_names.len() as u32; - 300 | return config.c_syntax_type_names.as_ptr().cast::<*const c_char>(); - 301 | } - 302 | std::ptr::null() - 303 | } - | - 304 | unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - 305 | result.as_ref().unwrap_or_else(|| { - 306 | eprintln!("{}:{} - pointer must not be null", file!(), line!()); - 307 | abort(); - 308 | }) - 309 | } - | - 310 | unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - 311 | result.as_mut().unwrap_or_else(|| { - 312 | eprintln!("{}:{} - pointer must not be null", file!(), line!()); - 313 | abort(); - 314 | }) - 315 | } - | - 316 | fn unwrap(result: Result) -> T { - 317 | result.unwrap_or_else(|error| { - 318 | eprintln!("tree-sitter tag error: {error}"); - 319 | abort(); - 320 | }) - 321 | } - | - 322 | fn shrink_and_clear(vec: &mut Vec, capacity: usize) { - 323 | if vec.len() > capacity { - 324 | vec.truncate(capacity); - 325 | vec.shrink_to_fit(); - 326 | } - 327 | vec.clear(); - 328 | } - - - --------------------------------------------------------------------------------- -/crates/tags/src/tags.rs: --------------------------------------------------------------------------------- - 1 | #![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))] - | - 2 | pub mod c_lib; - | - 3 | use std::{ - 4 | char, - 5 | collections::HashMap, - 6 | ffi::{CStr, CString}, - 7 | mem, - 8 | ops::{ControlFlow, Range}, - 9 | os::raw::c_char, - 10 | str, - 11 | sync::atomic::{AtomicUsize, Ordering}, - 12 | }; - | - 13 | use memchr::memchr; - 14 | use regex::Regex; - 15 | use streaming_iterator::StreamingIterator; - 16 | use thiserror::Error; - 17 | use tree_sitter::{ - 18 | Language, LossyUtf8, ParseOptions, Parser, Point, Query, QueryCursor, QueryError, - 19 | QueryPredicateArg, Tree, - 20 | }; - | - 21 | const MAX_LINE_LEN: usize = 180; - 22 | const CANCELLATION_CHECK_INTERVAL: usize = 100; - | - 23 | /// Contains the data needed to compute tags for code written in a - 24 | /// particular language. - 25 | #[derive(Debug)] - 26 | pub struct TagsConfiguration { - 27 | pub language: Language, - 28 | pub query: Query, - 29 | syntax_type_names: Vec>, - 30 | c_syntax_type_names: Vec<*const u8>, - 31 | capture_map: HashMap, - 32 | doc_capture_index: Option, - 33 | name_capture_index: Option, - 34 | ignore_capture_index: Option, - 35 | local_scope_capture_index: Option, - 36 | local_definition_capture_index: Option, - 37 | tags_pattern_index: usize, - 38 | pattern_info: Vec, - 39 | } - | - 40 | unsafe impl Send for TagsConfiguration {} - 41 | unsafe impl Sync for TagsConfiguration {} - | - 42 | #[derive(Debug)] - 43 | pub struct NamedCapture { - 44 | pub syntax_type_id: u32, - 45 | pub is_definition: bool, - 46 | } - | - 47 | pub struct TagsContext { - 48 | pub parser: Parser, - 49 | cursor: QueryCursor, - 50 | } - | - 51 | #[derive(Debug, Clone)] - 52 | pub struct Tag { - 53 | pub range: Range, - 54 | pub name_range: Range, - 55 | pub line_range: Range, - 56 | pub span: Range, - 57 | pub utf16_column_range: Range, - 58 | pub docs: Option, - 59 | pub is_definition: bool, - 60 | pub syntax_type_id: u32, - 61 | } - | - 62 | #[derive(Debug, Error, PartialEq)] - 63 | pub enum Error { - 64 | #[error(transparent)] - 65 | Query(#[from] QueryError), - 66 | #[error(transparent)] - 67 | Regex(#[from] regex::Error), - 68 | #[error("Cancelled")] - 69 | Cancelled, - 70 | #[error("Invalid language")] - 71 | InvalidLanguage, - 72 | #[error("Invalid capture @{0}. Expected one of: @definition.*, @reference.*, @doc, @name, @local.(scope|definition|reference).")] - 73 | InvalidCapture(String), - 74 | } - | - 75 | #[derive(Debug, Default)] - 76 | struct PatternInfo { - 77 | docs_adjacent_capture: Option, - 78 | local_scope_inherits: bool, - 79 | name_must_be_non_local: bool, - 80 | doc_strip_regex: Option, - 81 | } - | - 82 | #[derive(Debug)] - 83 | struct LocalDef<'a> { - 84 | name: &'a [u8], - 85 | } - | - 86 | #[derive(Debug)] - 87 | struct LocalScope<'a> { - 88 | inherits: bool, - 89 | range: Range, - 90 | local_defs: Vec>, - 91 | } - | - 92 | struct TagsIter<'a, I> - 93 | where - 94 | I: StreamingIterator>, - 95 | { - 96 | matches: I, - 97 | _tree: Tree, - 98 | source: &'a [u8], - 99 | prev_line_info: Option, - 100 | config: &'a TagsConfiguration, - 101 | cancellation_flag: Option<&'a AtomicUsize>, - 102 | iter_count: usize, - 103 | tag_queue: Vec<(Tag, usize)>, - 104 | scopes: Vec>, - 105 | } - | - 106 | struct LineInfo { - 107 | utf8_position: Point, - 108 | utf8_byte: usize, - 109 | utf16_column: usize, - 110 | line_range: Range, - 111 | } - | - 112 | impl TagsConfiguration { - 113 | pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { - 114 | let query = Query::new(&language, &format!("{locals_query}{tags_query}"))?; - | - 115 | let tags_query_offset = locals_query.len(); - 116 | let mut tags_pattern_index = 0; - 117 | for i in 0..(query.pattern_count()) { - 118 | let pattern_offset = query.start_byte_for_pattern(i); - 119 | if pattern_offset < tags_query_offset { - 120 | tags_pattern_index += 1; - 121 | } - 122 | } - | - 123 | let mut capture_map = HashMap::new(); - 124 | let mut syntax_type_names = Vec::new(); - 125 | let mut doc_capture_index = None; - 126 | let mut name_capture_index = None; - 127 | let mut ignore_capture_index = None; - 128 | let mut local_scope_capture_index = None; - 129 | let mut local_definition_capture_index = None; - 130 | for (i, name) in query.capture_names().iter().enumerate() { - 131 | match *name { - 132 | "name" => name_capture_index = Some(i as u32), - 133 | "ignore" => ignore_capture_index = Some(i as u32), - 134 | "doc" => doc_capture_index = Some(i as u32), - 135 | "local.scope" => local_scope_capture_index = Some(i as u32), - 136 | "local.definition" => local_definition_capture_index = Some(i as u32), - 137 | "local.reference" | "" => {} - 138 | _ => { - 139 | let mut is_definition = false; - | - 140 | let kind = if name.starts_with("definition.") { - 141 | is_definition = true; - 142 | name.trim_start_matches("definition.") - 143 | } else if name.starts_with("reference.") { - 144 | name.trim_start_matches("reference.") - 145 | } else { - 146 | return Err(Error::InvalidCapture((*name).to_string())); - 147 | }; - | - 148 | if let Ok(cstr) = CString::new(kind) { - 149 | let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice(); - 150 | let syntax_type_id = syntax_type_names - 151 | .iter() - 152 | .position(|n| n == &c_kind) - 153 | .unwrap_or_else(|| { - 154 | syntax_type_names.push(c_kind); - 155 | syntax_type_names.len() - 1 - 156 | }) as u32; - 157 | capture_map.insert( - 158 | i as u32, - 159 | NamedCapture { - 160 | syntax_type_id, - 161 | is_definition, - 162 | }, - 163 | ); - 164 | } - 165 | } - 166 | } - 167 | } - | - 168 | let c_syntax_type_names = syntax_type_names.iter().map(|s| s.as_ptr()).collect(); - | - 169 | let pattern_info = (0..query.pattern_count()) - 170 | .map(|pattern_index| { - 171 | let mut info = PatternInfo::default(); - 172 | for (property, is_positive) in query.property_predicates(pattern_index) { - 173 | if !is_positive && property.key.as_ref() == "local" { - 174 | info.name_must_be_non_local = true; - 175 | } - 176 | } - 177 | info.local_scope_inherits = true; - 178 | for property in query.property_settings(pattern_index) { - 179 | if property.key.as_ref() == "local.scope-inherits" - 180 | && property - 181 | .value - 182 | .as_ref() - 183 | .is_some_and(|v| v.as_ref() == "false") - 184 | { - 185 | info.local_scope_inherits = false; - 186 | } - 187 | } - 188 | if let Some(doc_capture_index) = doc_capture_index { - 189 | for predicate in query.general_predicates(pattern_index) { - 190 | if predicate.args.first() - 191 | == Some(&QueryPredicateArg::Capture(doc_capture_index)) - 192 | { - 193 | match (predicate.operator.as_ref(), predicate.args.get(1)) { - 194 | ("select-adjacent!", Some(QueryPredicateArg::Capture(index))) => { - 195 | info.docs_adjacent_capture = Some(*index); - 196 | } - 197 | ("strip!", Some(QueryPredicateArg::String(pattern))) => { - 198 | let regex = Regex::new(pattern.as_ref())?; - 199 | info.doc_strip_regex = Some(regex); - 200 | } - 201 | _ => {} - 202 | } - 203 | } - 204 | } - 205 | } - 206 | Ok(info) - 207 | }) - 208 | .collect::, Error>>()?; - | - 209 | Ok(Self { - 210 | language, - 211 | query, - 212 | syntax_type_names, - 213 | c_syntax_type_names, - 214 | capture_map, - 215 | doc_capture_index, - 216 | name_capture_index, - 217 | ignore_capture_index, - 218 | local_scope_capture_index, - 219 | local_definition_capture_index, - 220 | tags_pattern_index, - 221 | pattern_info, - 222 | }) - 223 | } - | - 224 | #[must_use] - 225 | pub fn syntax_type_name(&self, id: u32) -> &str { - 226 | unsafe { - 227 | let cstr = CStr::from_ptr( - 228 | self.syntax_type_names[id as usize] - 229 | .as_ptr() - 230 | .cast::(), - 231 | ) - 232 | .to_bytes(); - 233 | str::from_utf8(cstr).expect("syntax type name was not valid utf-8") - 234 | } - 235 | } - 236 | } - | - 237 | impl Default for TagsContext { - 238 | fn default() -> Self { - 239 | Self::new() - 240 | } - 241 | } - | - 242 | impl TagsContext { - 243 | #[must_use] - 244 | pub fn new() -> Self { - 245 | Self { - 246 | parser: Parser::new(), - 247 | cursor: QueryCursor::new(), - 248 | } - 249 | } - | - 250 | pub const fn parser(&mut self) -> &mut Parser { - 251 | &mut self.parser - 252 | } - | - 253 | pub fn generate_tags<'a>( - 254 | &'a mut self, - 255 | config: &'a TagsConfiguration, - 256 | source: &'a [u8], - 257 | cancellation_flag: Option<&'a AtomicUsize>, - 258 | ) -> Result<(impl Iterator> + 'a, bool), Error> { - 259 | self.parser - 260 | .set_language(&config.language) - 261 | .map_err(|_| Error::InvalidLanguage)?; - 262 | self.parser.reset(); - 263 | let tree = self - 264 | .parser - 265 | .parse_with_options( - 266 | &mut |i, _| { - 267 | if i < source.len() { - 268 | &source[i..] - 269 | } else { - 270 | &[] - 271 | } - 272 | }, - 273 | None, - 274 | Some(ParseOptions::new().progress_callback(&mut |_| { - 275 | if let Some(cancellation_flag) = cancellation_flag { - 276 | if cancellation_flag.load(Ordering::SeqCst) != 0 { - 277 | ControlFlow::Break(()) - 278 | } else { - 279 | ControlFlow::Continue(()) - 280 | } - 281 | } else { - 282 | ControlFlow::Continue(()) - 283 | } - 284 | })), - 285 | ) - 286 | .ok_or(Error::Cancelled)?; - | - 287 | // The `matches` iterator borrows the `Tree`, which prevents it from being - 288 | // moved. But the tree is really just a pointer, so it's actually ok to - 289 | // move it. - 290 | let tree_ref = unsafe { mem::transmute::<&Tree, &'static Tree>(&tree) }; - 291 | let matches = self - 292 | .cursor - 293 | .matches(&config.query, tree_ref.root_node(), source); - 294 | Ok(( - 295 | TagsIter { - 296 | _tree: tree, - 297 | matches, - 298 | source, - 299 | config, - 300 | cancellation_flag, - 301 | prev_line_info: None, - 302 | tag_queue: Vec::new(), - 303 | iter_count: 0, - 304 | scopes: vec![LocalScope { - 305 | range: 0..source.len(), - 306 | inherits: false, - 307 | local_defs: Vec::new(), - 308 | }], - 309 | }, - 310 | tree_ref.root_node().has_error(), - 311 | )) - 312 | } - 313 | } - | - 314 | impl<'a, I> Iterator for TagsIter<'a, I> - 315 | where - 316 | I: StreamingIterator>, - 317 | { - 318 | type Item = Result; - | - 319 | fn next(&mut self) -> Option { - 320 | loop { - 321 | // Periodically check for cancellation, returning `Cancelled` error if the - 322 | // cancellation flag was flipped. - 323 | if let Some(cancellation_flag) = self.cancellation_flag { - 324 | self.iter_count += 1; - 325 | if self.iter_count >= CANCELLATION_CHECK_INTERVAL { - 326 | self.iter_count = 0; - 327 | if cancellation_flag.load(Ordering::Relaxed) != 0 { - 328 | return Some(Err(Error::Cancelled)); - 329 | } - 330 | } - 331 | } - | - 332 | // If there is a queued tag for an earlier node in the syntax tree, then pop - 333 | // it off of the queue and return it. - 334 | if let Some(last_entry) = self.tag_queue.last() { - 335 | if self.tag_queue.len() > 1 - 336 | && self.tag_queue[0].0.name_range.end < last_entry.0.name_range.start - 337 | { - 338 | let tag = self.tag_queue.remove(0).0; - 339 | if tag.is_ignored() { - 340 | continue; - 341 | } - 342 | return Some(Ok(tag)); - 343 | } - 344 | } - | - 345 | // If there is another match, then compute its tag and add it to the - 346 | // tag queue. - 347 | if let Some(mat) = self.matches.next() { - 348 | let pattern_info = &self.config.pattern_info[mat.pattern_index]; - | - 349 | if mat.pattern_index < self.config.tags_pattern_index { - 350 | for capture in mat.captures { - 351 | let index = Some(capture.index); - 352 | let range = capture.node.byte_range(); - 353 | if index == self.config.local_scope_capture_index { - 354 | self.scopes.push(LocalScope { - 355 | range, - 356 | inherits: pattern_info.local_scope_inherits, - 357 | local_defs: Vec::new(), - 358 | }); - 359 | } else if index == self.config.local_definition_capture_index { - 360 | if let Some(scope) = self.scopes.iter_mut().rev().find(|scope| { - 361 | scope.range.start <= range.start && scope.range.end >= range.end - 362 | }) { - 363 | scope.local_defs.push(LocalDef { - 364 | name: &self.source[range.clone()], - 365 | }); - 366 | } - 367 | } - 368 | } - 369 | continue; - 370 | } - | - 371 | let mut name_node = None; - 372 | let mut doc_nodes = Vec::new(); - 373 | let mut tag_node = None; - 374 | let mut syntax_type_id = 0; - 375 | let mut is_definition = false; - 376 | let mut docs_adjacent_node = None; - 377 | let mut is_ignored = false; - | - 378 | for capture in mat.captures { - 379 | let index = Some(capture.index); - | - 380 | if index == self.config.ignore_capture_index { - 381 | is_ignored = true; - 382 | name_node = Some(capture.node); - 383 | } - | - 384 | if index == self.config.pattern_info[mat.pattern_index].docs_adjacent_capture { - 385 | docs_adjacent_node = Some(capture.node); - 386 | } - | - 387 | if index == self.config.name_capture_index { - 388 | name_node = Some(capture.node); - 389 | } else if index == self.config.doc_capture_index { - 390 | doc_nodes.push(capture.node); - 391 | } - | - 392 | if let Some(named_capture) = self.config.capture_map.get(&capture.index) { - 393 | tag_node = Some(capture.node); - 394 | syntax_type_id = named_capture.syntax_type_id; - 395 | is_definition = named_capture.is_definition; - 396 | } - 397 | } - | - 398 | if let Some(name_node) = name_node { - 399 | let name_range = name_node.byte_range(); - | - 400 | let tag; - 401 | if let Some(tag_node) = tag_node { - 402 | if name_node.has_error() { - 403 | continue; - 404 | } - | - 405 | if pattern_info.name_must_be_non_local { - 406 | let mut is_local = false; - 407 | for scope in self.scopes.iter().rev() { - 408 | if scope.range.start <= name_range.start - 409 | && scope.range.end >= name_range.end - 410 | { - 411 | if scope - 412 | .local_defs - 413 | .iter() - 414 | .any(|d| d.name == &self.source[name_range.clone()]) - 415 | { - 416 | is_local = true; - 417 | break; - 418 | } - 419 | if !scope.inherits { - 420 | break; - 421 | } - 422 | } - 423 | } - 424 | if is_local { - 425 | continue; - 426 | } - 427 | } - | - 428 | // If needed, filter the doc nodes based on their ranges, selecting - 429 | // only the slice that are adjacent to some specified node. - 430 | let mut docs_start_index = 0; - 431 | if let (Some(docs_adjacent_node), false) = - 432 | (docs_adjacent_node, doc_nodes.is_empty()) - 433 | { - 434 | docs_start_index = doc_nodes.len(); - 435 | let mut start_row = docs_adjacent_node.start_position().row; - 436 | while docs_start_index > 0 { - 437 | let doc_node = &doc_nodes[docs_start_index - 1]; - 438 | let prev_doc_end_row = doc_node.end_position().row; - 439 | if prev_doc_end_row + 1 >= start_row { - 440 | docs_start_index -= 1; - 441 | start_row = doc_node.start_position().row; - 442 | } else { - 443 | break; - 444 | } - 445 | } - 446 | } - | - 447 | // Generate a doc string from all of the doc nodes, applying any strip - 448 | // regexes. - 449 | let mut docs = None; - 450 | for doc_node in &doc_nodes[docs_start_index..] { - 451 | if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) - 452 | { - 453 | let content = pattern_info.doc_strip_regex.as_ref().map_or_else( - 454 | || content.to_string(), - 455 | |regex| regex.replace_all(content, "").to_string(), - 456 | ); - 457 | match &mut docs { - 458 | None => docs = Some(content), - 459 | Some(d) => { - 460 | d.push('\n'); - 461 | d.push_str(&content); - 462 | } - 463 | } - 464 | } - 465 | } - | - 466 | let rng = tag_node.byte_range(); - 467 | let range = rng.start.min(name_range.start)..rng.end.max(name_range.end); - 468 | let span = name_node.start_position()..name_node.end_position(); - | - 469 | // Compute tag properties that depend on the text of the containing line. If - 470 | // the previous tag occurred on the same line, then - 471 | // reuse results from the previous tag. - 472 | let mut prev_utf16_column = 0; - 473 | let mut prev_utf8_byte = name_range.start - span.start.column; - 474 | let line_info = self.prev_line_info.as_ref().and_then(|info| { - 475 | if info.utf8_position.row == span.start.row { - 476 | Some(info) - 477 | } else { - 478 | None - 479 | } - 480 | }); - 481 | let line_range = if let Some(line_info) = line_info { - 482 | if line_info.utf8_position.column <= span.start.column { - 483 | prev_utf8_byte = line_info.utf8_byte; - 484 | prev_utf16_column = line_info.utf16_column; - 485 | } - 486 | line_info.line_range.clone() - 487 | } else { - 488 | self::line_range( - 489 | self.source, - 490 | name_range.start, - 491 | span.start, - 492 | MAX_LINE_LEN, - 493 | ) - 494 | }; - | - 495 | let utf16_start_column = prev_utf16_column - 496 | + utf16_len(&self.source[prev_utf8_byte..name_range.start]); - 497 | let utf16_end_column = - 498 | utf16_start_column + utf16_len(&self.source[name_range.clone()]); - 499 | let utf16_column_range = utf16_start_column..utf16_end_column; - | - 500 | self.prev_line_info = Some(LineInfo { - 501 | utf8_position: span.end, - 502 | utf8_byte: name_range.end, - 503 | utf16_column: utf16_end_column, - 504 | line_range: line_range.clone(), - 505 | }); - 506 | tag = Tag { - 507 | range, - 508 | name_range, - 509 | line_range, - 510 | span, - 511 | utf16_column_range, - 512 | docs, - 513 | is_definition, - 514 | syntax_type_id, - 515 | }; - 516 | } else if is_ignored { - 517 | tag = Tag::ignored(name_range); - 518 | } else { - 519 | continue; - 520 | } - | - 521 | // Only create one tag per node. The tag queue is sorted by node position - 522 | // to allow for fast lookup. - 523 | match self.tag_queue.binary_search_by_key( - 524 | &(tag.name_range.end, tag.name_range.start), - 525 | |(tag, _)| (tag.name_range.end, tag.name_range.start), - 526 | ) { - 527 | Ok(i) => { - 528 | let (existing_tag, pattern_index) = &mut self.tag_queue[i]; - 529 | if *pattern_index > mat.pattern_index { - 530 | *pattern_index = mat.pattern_index; - 531 | *existing_tag = tag; - 532 | } - 533 | } - 534 | Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)), - 535 | } - 536 | } - 537 | } - 538 | // If there are no more matches, then drain the queue. - 539 | else if !self.tag_queue.is_empty() { - 540 | return Some(Ok(self.tag_queue.remove(0).0)); - 541 | } else { - 542 | return None; - 543 | } - 544 | } - 545 | } - 546 | } - | - 547 | impl Tag { - 548 | #[must_use] - 549 | const fn ignored(name_range: Range) -> Self { - 550 | Self { - 551 | name_range, - 552 | line_range: 0..0, - 553 | span: Point::new(0, 0)..Point::new(0, 0), - 554 | utf16_column_range: 0..0, - 555 | range: usize::MAX..usize::MAX, - 556 | docs: None, - 557 | is_definition: false, - 558 | syntax_type_id: 0, - 559 | } - 560 | } - | - 561 | #[must_use] - 562 | const fn is_ignored(&self) -> bool { - 563 | self.range.start == usize::MAX - 564 | } - 565 | } - | - 566 | fn line_range( - 567 | text: &[u8], - 568 | start_byte: usize, - 569 | start_point: Point, - 570 | max_line_len: usize, - 571 | ) -> Range { - 572 | // Trim leading whitespace - 573 | let mut line_start_byte = start_byte - start_point.column; - 574 | while line_start_byte < text.len() && text[line_start_byte].is_ascii_whitespace() { - 575 | line_start_byte += 1; - 576 | } - | - 577 | let max_line_len = max_line_len.min(text.len() - line_start_byte); - 578 | let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)]; - 579 | let line_len = if let Some(len) = memchr(b'\n', text_after_line_start) { - 580 | len - 581 | } else if let Err(e) = str::from_utf8(text_after_line_start) { - 582 | e.valid_up_to() - 583 | } else { - 584 | max_line_len - 585 | }; - | - 586 | // Trim trailing whitespace - 587 | let mut line_end_byte = line_start_byte + line_len; - 588 | while line_end_byte > line_start_byte && text[line_end_byte - 1].is_ascii_whitespace() { - 589 | line_end_byte -= 1; - 590 | } - | - 591 | line_start_byte..line_end_byte - 592 | } - | - 593 | fn utf16_len(bytes: &[u8]) -> usize { - 594 | LossyUtf8::new(bytes) - 595 | .flat_map(|chunk| chunk.chars().map(char::len_utf16)) - 596 | .sum() - 597 | } - | - 598 | #[cfg(test)] - 599 | mod tests { - 600 | use super::*; - | - 601 | #[test] - 602 | fn test_get_line() { - 603 | let text = "abc\ndefg❤hij\nklmno".as_bytes(); - 604 | assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14); - 605 | assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8); - 606 | assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20); - 607 | assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19); - 608 | } - | - 609 | #[test] - 610 | fn test_get_line_trims() { - 611 | let text = b" foo\nbar\n"; - 612 | assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 3..6); - | - 613 | let text = b"\t func foo \nbar\n"; - 614 | assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 2..10); - | - 615 | let r = line_range(text, 0, Point::new(0, 0), 14); - 616 | assert_eq!(r, 2..10); - 617 | assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo"); - | - 618 | let r = line_range(text, 12, Point::new(1, 0), 14); - 619 | assert_eq!(r, 12..15); - 620 | assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar"); - 621 | } - 622 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "xtask" - 3 | version = "0.1.0" - 4 | authors.workspace = true - 5 | edition.workspace = true - 6 | rust-version.workspace = true - 7 | homepage.workspace = true - 8 | repository.workspace = true - 9 | license.workspace = true - 10 | keywords.workspace = true - 11 | categories.workspace = true - 12 | publish = false - | - 13 | [lints] - 14 | workspace = true - | - 15 | [dependencies] - 16 | anstyle.workspace = true - 17 | anyhow.workspace = true - 18 | bindgen = { version = "0.72.0" } - 19 | clap.workspace = true - 20 | indoc.workspace = true - 21 | regex.workspace = true - 22 | semver.workspace = true - 23 | serde_json.workspace = true - 24 | notify = "8.2.0" - 25 | notify-debouncer-full = "0.6.0" - - - --------------------------------------------------------------------------------- -/crates/xtask/src/benchmark.rs: --------------------------------------------------------------------------------- - 1 | use anyhow::Result; - | - 2 | use crate::{bail_on_err, Benchmark}; - | - 3 | pub fn run(args: &Benchmark) -> Result<()> { - 4 | if let Some(ref example) = args.example_file_name { - 5 | std::env::set_var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER", example); - 6 | } - | - 7 | if let Some(ref language) = args.language { - 8 | std::env::set_var("TREE_SITTER_BENCHMARK_LANGUAGE_FILTER", language); - 9 | } - | - 10 | if args.repetition_count != 5 { - 11 | std::env::set_var( - 12 | "TREE_SITTER_BENCHMARK_REPETITION_COUNT", - 13 | args.repetition_count.to_string(), - 14 | ); - 15 | } - | - 16 | if args.debug { - 17 | let output = std::process::Command::new("cargo") - 18 | .arg("bench") - 19 | .arg("benchmark") - 20 | .arg("-p") - 21 | .arg("tree-sitter-cli") - 22 | .arg("--no-run") - 23 | .arg("--message-format=json") - 24 | .spawn()? - 25 | .wait_with_output()?; - | - 26 | bail_on_err(&output, "Failed to run `cargo bench`")?; - | - 27 | let json_output = serde_json::from_slice::(&output.stdout)?; - | - 28 | let test_binary = json_output - 29 | .as_array() - 30 | .ok_or_else(|| anyhow::anyhow!("Invalid JSON output"))? - 31 | .iter() - 32 | .find_map(|message| { - 33 | if message - 34 | .get("target") - 35 | .and_then(|target| target.get("name")) - 36 | .and_then(|name| name.as_str()) - 37 | .is_some_and(|name| name == "benchmark") - 38 | && message - 39 | .get("executable") - 40 | .and_then(|executable| executable.as_str()) - 41 | .is_some() - 42 | { - 43 | message - 44 | .get("executable") - 45 | .and_then(|executable| executable.as_str()) - 46 | } else { - 47 | None - 48 | } - 49 | }) - 50 | .ok_or_else(|| anyhow::anyhow!("Failed to find benchmark executable"))?; - | - 51 | println!("{test_binary}"); - 52 | } else { - 53 | let status = std::process::Command::new("cargo") - 54 | .arg("bench") - 55 | .arg("benchmark") - 56 | .arg("-p") - 57 | .arg("tree-sitter-cli") - 58 | .status()?; - | - 59 | if !status.success() { - 60 | anyhow::bail!("Failed to run `cargo bench`"); - 61 | } - 62 | } - | - 63 | Ok(()) - 64 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/bump.rs: --------------------------------------------------------------------------------- - 1 | use std::{cmp::Ordering, path::Path}; - | - 2 | use anyhow::{anyhow, Context, Result}; - 3 | use indoc::indoc; - 4 | use semver::{BuildMetadata, Prerelease, Version}; - | - 5 | use crate::{create_commit, BumpVersion}; - | - 6 | pub fn get_latest_tag() -> Result { - 7 | let output = std::process::Command::new("git") - 8 | .args(["tag", "-l"]) - 9 | .output()?; - 10 | if !output.status.success() { - 11 | anyhow::bail!( - 12 | "Failed to list tags: {}", - 13 | String::from_utf8_lossy(&output.stderr) - 14 | ); - 15 | } - | - 16 | let mut tags = String::from_utf8(output.stdout)? - 17 | .lines() - 18 | .filter_map(|tag| Version::parse(tag.strip_prefix('v').unwrap_or(tag)).ok()) - 19 | .collect::>(); - | - 20 | tags.sort_by( - 21 | |a, b| match (a.pre != Prerelease::EMPTY, b.pre != Prerelease::EMPTY) { - 22 | (true, true) | (false, false) => a.cmp(b), - 23 | (true, false) => Ordering::Less, - 24 | (false, true) => Ordering::Greater, - 25 | }, - 26 | ); - | - 27 | tags.last() - 28 | .map(std::string::ToString::to_string) - 29 | .ok_or_else(|| anyhow!("No tags found")) - 30 | } - | - 31 | pub fn run(args: BumpVersion) -> Result<()> { - 32 | let latest_tag = get_latest_tag()?; - 33 | let current_version = Version::parse(&latest_tag)?; - | - 34 | let output = std::process::Command::new("git") - 35 | .args(["rev-parse", &format!("v{latest_tag}")]) - 36 | .output()?; - 37 | if !output.status.success() { - 38 | anyhow::bail!( - 39 | "Failed to get tag SHA: {}", - 40 | String::from_utf8_lossy(&output.stderr) - 41 | ); - 42 | } - 43 | let latest_tag_sha = String::from_utf8(output.stdout)?.trim().to_string(); - | - 44 | let workspace_toml_version = Version::parse(&fetch_workspace_version()?)?; - | - 45 | if current_version.major != workspace_toml_version.major - 46 | && current_version.minor != workspace_toml_version.minor - 47 | { - 48 | eprintln!( - 49 | indoc! {" - 50 | Seems like the workspace Cargo.toml ({}) version does not match up with the latest git tag ({}). - 51 | Please ensure you don't change that yourself, this subcommand will handle this for you. - 52 | "}, - 53 | workspace_toml_version, latest_tag - 54 | ); - 55 | return Ok(()); - 56 | } - | - 57 | let output = std::process::Command::new("git") - 58 | .args(["rev-list", &format!("{latest_tag_sha}..HEAD")]) - 59 | .output()?; - 60 | if !output.status.success() { - 61 | anyhow::bail!( - 62 | "Failed to get commits: {}", - 63 | String::from_utf8_lossy(&output.stderr) - 64 | ); - 65 | } - 66 | let commits = String::from_utf8(output.stdout)? - 67 | .lines() - 68 | .map(|s| s.to_string()) - 69 | .collect::>(); - | - 70 | let mut should_increment_patch = false; - 71 | let mut should_increment_minor = false; - | - 72 | for commit_sha in commits { - 73 | let output = std::process::Command::new("git") - 74 | .args(["log", "-1", "--format=%s", &commit_sha]) - 75 | .output()?; - 76 | if !output.status.success() { - 77 | continue; - 78 | } - 79 | let message = String::from_utf8(output.stdout)?.trim().to_string(); - | - 80 | let output = std::process::Command::new("git") - 81 | .args([ - 82 | "diff-tree", - 83 | "--no-commit-id", - 84 | "--name-only", - 85 | "-r", - 86 | &commit_sha, - 87 | ]) - 88 | .output()?; - 89 | if !output.status.success() { - 90 | continue; - 91 | } - | - 92 | let mut source_code_changed = false; - 93 | for path in String::from_utf8(output.stdout)?.lines() { - 94 | let path = Path::new(path); - 95 | if path.extension().is_some_and(|ext| { - 96 | ext.eq_ignore_ascii_case("rs") - 97 | || ext.eq_ignore_ascii_case("js") - 98 | || ext.eq_ignore_ascii_case("c") - 99 | }) { - 100 | source_code_changed = true; - 101 | break; - 102 | } - 103 | } - | - 104 | if source_code_changed { - 105 | should_increment_patch = true; - | - 106 | let Some((prefix, _)) = message.split_once(':') else { - 107 | continue; - 108 | }; - | - 109 | let convention = if prefix.contains('(') { - 110 | prefix.split_once('(').unwrap().0 - 111 | } else { - 112 | prefix - 113 | }; - | - 114 | if ["feat", "feat!"].contains(&convention) || prefix.ends_with('!') { - 115 | should_increment_minor = true; - 116 | } - 117 | } - 118 | } - | - 119 | let next_version = if let Some(version) = args.version { - 120 | version - 121 | } else { - 122 | let mut next_version = current_version.clone(); - 123 | if should_increment_minor { - 124 | next_version.minor += 1; - 125 | next_version.patch = 0; - 126 | next_version.pre = Prerelease::EMPTY; - 127 | next_version.build = BuildMetadata::EMPTY; - 128 | } else if should_increment_patch { - 129 | next_version.patch += 1; - 130 | next_version.pre = Prerelease::EMPTY; - 131 | next_version.build = BuildMetadata::EMPTY; - 132 | } else { - 133 | return Err(anyhow!(format!( - 134 | "No source code changed since {current_version}" - 135 | ))); - 136 | } - 137 | next_version - 138 | }; - 139 | if next_version <= current_version { - 140 | return Err(anyhow!(format!( - 141 | "Next version {next_version} must be greater than current version {current_version}" - 142 | ))); - 143 | } - | - 144 | println!("Bumping from {current_version} to {next_version}"); - 145 | update_crates(¤t_version, &next_version)?; - 146 | update_makefile(&next_version)?; - 147 | update_cmake(&next_version)?; - 148 | update_nix(&next_version)?; - 149 | update_npm(&next_version)?; - 150 | update_zig(&next_version)?; - 151 | tag_next_version(&next_version)?; - | - 152 | Ok(()) - 153 | } - | - 154 | fn tag_next_version(next_version: &Version) -> Result<()> { - 155 | let commit_sha = create_commit( - 156 | &format!("{next_version}"), - 157 | &[ - 158 | "Cargo.lock", - 159 | "Cargo.toml", - 160 | "Makefile", - 161 | "build.zig.zon", - 162 | "flake.nix", - 163 | "crates/cli/Cargo.toml", - 164 | "crates/cli/npm/package.json", - 165 | "crates/cli/npm/package-lock.json", - 166 | "crates/config/Cargo.toml", - 167 | "crates/highlight/Cargo.toml", - 168 | "crates/loader/Cargo.toml", - 169 | "crates/tags/Cargo.toml", - 170 | "CMakeLists.txt", - 171 | "lib/Cargo.toml", - 172 | "lib/binding_web/package.json", - 173 | "lib/binding_web/package-lock.json", - 174 | ], - 175 | )?; - | - 176 | // Create tag - 177 | let output = std::process::Command::new("git") - 178 | .args([ - 179 | "tag", - 180 | "-a", - 181 | &format!("v{next_version}"), - 182 | "-m", - 183 | &format!("v{next_version}"), - 184 | &commit_sha, - 185 | ]) - 186 | .output()?; - 187 | if !output.status.success() { - 188 | anyhow::bail!( - 189 | "Failed to create tag: {}", - 190 | String::from_utf8_lossy(&output.stderr) - 191 | ); - 192 | } - | - 193 | println!("Tagged commit {commit_sha} with tag v{next_version}"); - | - 194 | Ok(()) - 195 | } - | - 196 | fn update_makefile(next_version: &Version) -> Result<()> { - 197 | let makefile = std::fs::read_to_string("Makefile")?; - 198 | let makefile = makefile - 199 | .lines() - 200 | .map(|line| { - 201 | if line.starts_with("VERSION") { - 202 | format!("VERSION := {next_version}") - 203 | } else { - 204 | line.to_string() - 205 | } - 206 | }) - 207 | .collect::>() - 208 | .join("\n") - 209 | + "\n"; - | - 210 | std::fs::write("Makefile", makefile)?; - | - 211 | Ok(()) - 212 | } - | - 213 | fn update_cmake(next_version: &Version) -> Result<()> { - 214 | let cmake = std::fs::read_to_string("CMakeLists.txt")?; - 215 | let cmake = cmake - 216 | .lines() - 217 | .map(|line| { - 218 | if line.contains(" VERSION") { - 219 | let start_quote = line.find('"').unwrap(); - 220 | let end_quote = line.rfind('"').unwrap(); - 221 | format!( - 222 | "{}{next_version}{}", - 223 | &line[..=start_quote], - 224 | &line[end_quote..] - 225 | ) - 226 | } else { - 227 | line.to_string() - 228 | } - 229 | }) - 230 | .collect::>() - 231 | .join("\n") - 232 | + "\n"; - | - 233 | std::fs::write("CMakeLists.txt", cmake)?; - | - 234 | Ok(()) - 235 | } - | - 236 | fn update_nix(next_version: &Version) -> Result<()> { - 237 | let nix = std::fs::read_to_string("flake.nix")?; - 238 | let nix = nix - 239 | .lines() - 240 | .map(|line| { - 241 | if line.trim_start().starts_with("version =") { - 242 | format!(" version = \"{next_version}\";") - 243 | } else { - 244 | line.to_string() - 245 | } - 246 | }) - 247 | .collect::>() - 248 | .join("\n") - 249 | + "\n"; - | - 250 | std::fs::write("flake.nix", nix)?; - | - 251 | Ok(()) - 252 | } - | - 253 | fn update_crates(current_version: &Version, next_version: &Version) -> Result<()> { - 254 | let mut cmd = std::process::Command::new("cargo"); - 255 | cmd.arg("workspaces").arg("version"); - | - 256 | if next_version.minor > current_version.minor { - 257 | cmd.arg("minor"); - 258 | } else { - 259 | cmd.arg("patch"); - 260 | } - | - 261 | cmd.arg("--no-git-commit") - 262 | .arg("--yes") - 263 | .arg("--force") - 264 | .arg("tree-sitter{,-cli,-config,-generate,-loader,-highlight,-tags}") - 265 | .arg("--ignore-changes") - 266 | .arg("crates/language/*"); - | - 267 | let status = cmd.status()?; - | - 268 | if !status.success() { - 269 | return Err(anyhow!("Failed to update crates")); - 270 | } - | - 271 | Ok(()) - 272 | } - | - 273 | fn update_npm(next_version: &Version) -> Result<()> { - 274 | for npm_project in ["lib/binding_web", "crates/cli/npm"] { - 275 | let npm_path = Path::new(npm_project); - | - 276 | let package_json_path = npm_path.join("package.json"); - | - 277 | let package_json = serde_json::from_str::( - 278 | &std::fs::read_to_string(&package_json_path) - 279 | .with_context(|| format!("Failed to read {}", package_json_path.display()))?, - 280 | )?; - | - 281 | let mut package_json = package_json - 282 | .as_object() - 283 | .ok_or_else(|| anyhow!("Invalid package.json"))? - 284 | .clone(); - 285 | package_json.insert( - 286 | "version".to_string(), - 287 | serde_json::Value::String(next_version.to_string()), - 288 | ); - | - 289 | let package_json = serde_json::to_string_pretty(&package_json)? + "\n"; - | - 290 | std::fs::write(package_json_path, package_json)?; - | - 291 | let Ok(cmd) = std::process::Command::new("npm") - 292 | .arg("install") - 293 | .arg("--package-lock-only") - 294 | .arg("--ignore-scripts") - 295 | .current_dir(npm_path) - 296 | .output() - 297 | else { - 298 | return Ok(()); // npm is not `executable`, ignore - 299 | }; - | - 300 | if !cmd.status.success() { - 301 | let stderr = String::from_utf8_lossy(&cmd.stderr); - 302 | return Err(anyhow!( - 303 | "Failed to run `npm install` in {}:\n{stderr}", - 304 | npm_path.display() - 305 | )); - 306 | } - 307 | } - | - 308 | Ok(()) - 309 | } - | - 310 | fn update_zig(next_version: &Version) -> Result<()> { - 311 | let zig = std::fs::read_to_string("build.zig.zon")? - 312 | .lines() - 313 | .map(|line| { - 314 | if line.starts_with(" .version") { - 315 | format!(" .version = \"{next_version}\",") - 316 | } else { - 317 | line.to_string() - 318 | } - 319 | }) - 320 | .collect::>() - 321 | .join("\n") - 322 | + "\n"; - | - 323 | std::fs::write("build.zig.zon", zig)?; - | - 324 | Ok(()) - 325 | } - | - 326 | /// read Cargo.toml and get the version - 327 | fn fetch_workspace_version() -> Result { - 328 | std::fs::read_to_string("Cargo.toml")? - 329 | .lines() - 330 | .find(|line| line.starts_with("version = ")) - 331 | .and_then(|line| { - 332 | line.split_terminator('"') - 333 | .next_back() - 334 | .map(|s| s.to_string()) - 335 | }) - 336 | .ok_or_else(|| anyhow!("No version found in Cargo.toml")) - 337 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/check_wasm_exports.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | collections::HashSet, - 3 | env, - 4 | io::BufRead, - 5 | path::PathBuf, - 6 | process::{Command, Stdio}, - 7 | time::Duration, - 8 | }; - | - 9 | use anyhow::{anyhow, Result}; - 10 | use notify::{ - 11 | event::{AccessKind, AccessMode}, - 12 | EventKind, RecursiveMode, - 13 | }; - 14 | use notify_debouncer_full::new_debouncer; - | - 15 | use crate::{bail_on_err, watch_wasm, CheckWasmExports}; - | - 16 | const EXCLUDES: [&str; 23] = [ - 17 | // Unneeded because the JS side has its own way of implementing it - 18 | "ts_node_child_by_field_name", - 19 | "ts_node_edit", - 20 | // Precomputed and stored in the JS side - 21 | "ts_node_type", - 22 | "ts_node_grammar_type", - 23 | "ts_node_eq", - 24 | "ts_tree_cursor_current_field_name", - 25 | "ts_lookahead_iterator_current_symbol_name", - 26 | // Not used in Wasm - 27 | "ts_init", - 28 | "ts_set_allocator", - 29 | "ts_parser_print_dot_graphs", - 30 | "ts_tree_print_dot_graph", - 31 | "ts_parser_set_wasm_store", - 32 | "ts_parser_take_wasm_store", - 33 | "ts_parser_language", - 34 | "ts_node_language", - 35 | "ts_tree_language", - 36 | "ts_lookahead_iterator_language", - 37 | "ts_parser_logger", - 38 | "ts_parser_parse_string", - 39 | "ts_parser_parse_string_encoding", - 40 | // Query cursor is not managed by user in web bindings - 41 | "ts_query_cursor_delete", - 42 | "ts_query_cursor_match_limit", - 43 | "ts_query_cursor_remove_match", - 44 | ]; - | - 45 | pub fn run(args: &CheckWasmExports) -> Result<()> { - 46 | if args.watch { - 47 | watch_wasm!(check_wasm_exports); - 48 | } else { - 49 | check_wasm_exports()?; - 50 | } - | - 51 | Ok(()) - 52 | } - | - 53 | fn check_wasm_exports() -> Result<()> { - 54 | let mut wasm_exports = std::fs::read_to_string("lib/binding_web/lib/exports.txt")? - 55 | .lines() - 56 | .map(|s| s.replace("_wasm", "").replace("byte", "index")) - 57 | // remove leading and trailing quotes, trailing comma - 58 | .map(|s| s[1..s.len() - 2].to_string()) - 59 | .collect::>(); - | - 60 | // Run wasm-objdump to see symbols used internally in binding.c but not exposed in any way. - 61 | let wasm_objdump = Command::new("wasm-objdump") - 62 | .args([ - 63 | "--details", - 64 | "lib/binding_web/debug/web-tree-sitter.wasm", - 65 | "--section", - 66 | "Name", - 67 | ]) - 68 | .output() - 69 | .expect("Failed to run wasm-objdump"); - 70 | bail_on_err(&wasm_objdump, "Failed to run wasm-objdump")?; - | - 71 | wasm_exports.extend( - 72 | wasm_objdump - 73 | .stdout - 74 | .lines() - 75 | .map_while(Result::ok) - 76 | .skip_while(|line| !line.contains("- func")) - 77 | .filter_map(|line| { - 78 | if line.contains("func") { - 79 | if let Some(function) = line.split_whitespace().nth(2).map(String::from) { - 80 | let trimmed = function.trim_start_matches('<').trim_end_matches('>'); - 81 | if trimmed.starts_with("ts") && !trimmed.contains("__") { - 82 | return Some(trimmed.to_string()); - 83 | } - 84 | } - 85 | } - 86 | None - 87 | }), - 88 | ); - | - 89 | let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned()); - 90 | let nm_child = Command::new(nm_cmd) - 91 | .arg("-W") - 92 | .arg("-U") - 93 | .arg("libtree-sitter.so") - 94 | .stdout(Stdio::piped()) - 95 | .output() - 96 | .expect("Failed to run nm"); - 97 | bail_on_err(&nm_child, "Failed to run nm")?; - 98 | let export_reader = nm_child - 99 | .stdout - 100 | .lines() - 101 | .map_while(Result::ok) - 102 | .filter(|line| line.contains(" T ")); - | - 103 | let exports = export_reader - 104 | .filter_map(|line| line.split_whitespace().nth(2).map(String::from)) - 105 | .filter(|symbol| !EXCLUDES.contains(&symbol.as_str())) - 106 | .collect::>(); - | - 107 | let mut missing = exports - 108 | .iter() - 109 | .filter(|&symbol| !wasm_exports.contains(symbol)) - 110 | .map(String::as_str) - 111 | .collect::>(); - 112 | missing.sort_unstable(); - | - 113 | if !missing.is_empty() { - 114 | Err(anyhow!(format!( - 115 | "Unmatched Wasm exports:\n{}", - 116 | missing.join("\n") - 117 | )))?; - 118 | } - | - 119 | Ok(()) - 120 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/clippy.rs: --------------------------------------------------------------------------------- - 1 | use std::process::Command; - | - 2 | use anyhow::Result; - | - 3 | use crate::{bail_on_err, Clippy}; - | - 4 | pub fn run(args: &Clippy) -> Result<()> { - 5 | let mut clippy_command = Command::new("cargo"); - 6 | clippy_command.arg("clippy"); - | - 7 | if let Some(package) = args.package.as_ref() { - 8 | clippy_command.args(["--package", package]); - 9 | } else { - 10 | clippy_command.arg("--workspace"); - 11 | } - | - 12 | clippy_command - 13 | .arg("--release") - 14 | .arg("--all-targets") - 15 | .arg("--all-features") - 16 | .arg("--") - 17 | .arg("-D") - 18 | .arg("warnings"); - | - 19 | if args.fix { - 20 | clippy_command.arg("--fix"); - 21 | } - | - 22 | bail_on_err( - 23 | &clippy_command.spawn()?.wait_with_output()?, - 24 | "Clippy failed", - 25 | ) - 26 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/embed_sources.rs: --------------------------------------------------------------------------------- - 1 | use anyhow::Result; - 2 | use std::fs; - 3 | use std::path::Path; - | - 4 | /// Restores sourcesContent if it was stripped by Binaryen. - 5 | /// - 6 | /// This is a workaround for Binaryen where `wasm-opt -O2` and higher - 7 | /// optimization levels strip the `sourcesContent` field from source maps, - 8 | /// even when the source map was generated with `--sources` flag. - 9 | /// - 10 | /// This is fixed upstream in Binaryen as of Apr 9, 2025, but there hasn't been a release with the fix yet. - 11 | /// See: - 12 | /// - 13 | /// This reads the original source files and embeds them in the - 14 | /// source map's `sourcesContent` field, making debugging possible even - 15 | /// with optimized builds. - 16 | /// - 17 | /// TODO: Once Binaryen releases a version with the fix, and emscripten updates to that - 18 | /// version, and we update our emscripten version, this function can be removed. - 19 | pub fn embed_sources_in_map(map_path: &Path) -> Result<()> { - 20 | let map_content = fs::read_to_string(map_path)?; - 21 | let mut map: serde_json::Value = serde_json::from_str(&map_content)?; - | - 22 | if let Some(sources_content) = map.get("sourcesContent") { - 23 | if let Some(arr) = sources_content.as_array() { - 24 | if !arr.is_empty() && arr.iter().any(|v| !v.is_null()) { - 25 | return Ok(()); - 26 | } - 27 | } - 28 | } - | - 29 | let sources = map["sources"] - 30 | .as_array() - 31 | .ok_or_else(|| anyhow::anyhow!("No sources array in source map"))?; - | - 32 | let map_dir = map_path.parent().unwrap_or(Path::new(".")); - 33 | let mut sources_content = Vec::new(); - | - 34 | for source in sources { - 35 | let source_path = source.as_str().unwrap_or(""); - 36 | let full_path = map_dir.join(source_path); - | - 37 | let content = if full_path.exists() { - 38 | match fs::read_to_string(&full_path) { - 39 | Ok(content) => serde_json::Value::String(content), - 40 | Err(_) => serde_json::Value::Null, - 41 | } - 42 | } else { - 43 | serde_json::Value::Null - 44 | }; - | - 45 | sources_content.push(content); - 46 | } - | - 47 | map["sourcesContent"] = serde_json::Value::Array(sources_content); - | - 48 | let output = serde_json::to_string(&map)?; - 49 | fs::write(map_path, output)?; - | - 50 | Ok(()) - 51 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/fetch.rs: --------------------------------------------------------------------------------- - 1 | use crate::{bail_on_err, root_dir, FetchFixtures, EMSCRIPTEN_VERSION}; - 2 | use anyhow::Result; - 3 | use std::{fs, process::Command}; - | - 4 | pub fn run_fixtures(args: &FetchFixtures) -> Result<()> { - 5 | let fixtures_dir = root_dir().join("test").join("fixtures"); - 6 | let grammars_dir = fixtures_dir.join("grammars"); - 7 | let fixtures_path = fixtures_dir.join("fixtures.json"); - | - 8 | // grammar name, tag - 9 | let mut fixtures: Vec<(String, String)> = - 10 | serde_json::from_str(&fs::read_to_string(&fixtures_path)?)?; - | - 11 | for (grammar, tag) in &mut fixtures { - 12 | let grammar_dir = grammars_dir.join(&grammar); - 13 | let grammar_url = format!("https://github.com/tree-sitter/tree-sitter-{grammar}"); - | - 14 | println!("Fetching the {grammar} grammar..."); - | - 15 | if !grammar_dir.exists() { - 16 | let mut command = Command::new("git"); - 17 | command.args([ - 18 | "clone", - 19 | "--depth", - 20 | "1", - 21 | "--branch", - 22 | tag, - 23 | &grammar_url, - 24 | &grammar_dir.to_string_lossy(), - 25 | ]); - 26 | bail_on_err( - 27 | &command.spawn()?.wait_with_output()?, - 28 | &format!("Failed to clone the {grammar} grammar"), - 29 | )?; - 30 | } else { - 31 | let mut describe_command = Command::new("git"); - 32 | describe_command.current_dir(&grammar_dir).args([ - 33 | "describe", - 34 | "--tags", - 35 | "--exact-match", - 36 | "HEAD", - 37 | ]); - | - 38 | let output = describe_command.output()?; - 39 | let current_tag = String::from_utf8_lossy(&output.stdout); - 40 | let current_tag = current_tag.trim(); - | - 41 | if current_tag != tag { - 42 | println!("Updating {grammar} grammar from {current_tag} to {tag}..."); - | - 43 | let mut fetch_command = Command::new("git"); - 44 | fetch_command.current_dir(&grammar_dir).args([ - 45 | "fetch", - 46 | "origin", - 47 | &format!("refs/tags/{tag}:refs/tags/{tag}"), - 48 | ]); - 49 | bail_on_err( - 50 | &fetch_command.spawn()?.wait_with_output()?, - 51 | &format!("Failed to fetch tag {tag} for {grammar} grammar"), - 52 | )?; - | - 53 | let mut reset_command = Command::new("git"); - 54 | reset_command - 55 | .current_dir(&grammar_dir) - 56 | .args(["reset", "--hard", "HEAD"]); - 57 | bail_on_err( - 58 | &reset_command.spawn()?.wait_with_output()?, - 59 | &format!("Failed to reset {grammar} grammar working tree"), - 60 | )?; - | - 61 | let mut checkout_command = Command::new("git"); - 62 | checkout_command - 63 | .current_dir(&grammar_dir) - 64 | .args(["checkout", tag]); - 65 | bail_on_err( - 66 | &checkout_command.spawn()?.wait_with_output()?, - 67 | &format!("Failed to checkout tag {tag} for {grammar} grammar"), - 68 | )?; - 69 | } else { - 70 | println!("{grammar} grammar is already at tag {tag}"); - 71 | } - 72 | } - 73 | } - | - 74 | if args.update { - 75 | println!("Updating the fixtures lock file"); - 76 | fs::write( - 77 | &fixtures_path, - 78 | // format the JSON without extra newlines - 79 | serde_json::to_string(&fixtures)? - 80 | .replace("[[", "[\n [") - 81 | .replace("],", "],\n ") - 82 | .replace("]]", "]\n]"), - 83 | )?; - 84 | } - | - 85 | Ok(()) - 86 | } - | - 87 | pub fn run_emscripten() -> Result<()> { - 88 | let emscripten_dir = root_dir().join("target").join("emsdk"); - 89 | if emscripten_dir.exists() { - 90 | println!("Emscripten SDK already exists"); - 91 | return Ok(()); - 92 | } - 93 | println!("Cloning the Emscripten SDK..."); - | - 94 | let mut command = Command::new("git"); - 95 | command.args([ - 96 | "clone", - 97 | "https://github.com/emscripten-core/emsdk.git", - 98 | &emscripten_dir.to_string_lossy(), - 99 | ]); - 100 | bail_on_err( - 101 | &command.spawn()?.wait_with_output()?, - 102 | "Failed to clone the Emscripten SDK", - 103 | )?; - | - 104 | std::env::set_current_dir(&emscripten_dir)?; - | - 105 | let emsdk = if cfg!(windows) { - 106 | "emsdk.bat" - 107 | } else { - 108 | "./emsdk" - 109 | }; - | - 110 | let mut command = Command::new(emsdk); - 111 | command.args(["install", EMSCRIPTEN_VERSION]); - 112 | bail_on_err( - 113 | &command.spawn()?.wait_with_output()?, - 114 | "Failed to install Emscripten", - 115 | )?; - | - 116 | let mut command = Command::new(emsdk); - 117 | command.args(["activate", EMSCRIPTEN_VERSION]); - 118 | bail_on_err( - 119 | &command.spawn()?.wait_with_output()?, - 120 | "Failed to activate Emscripten", - 121 | ) - 122 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/generate.rs: --------------------------------------------------------------------------------- - 1 | use std::{collections::BTreeSet, ffi::OsStr, fs, path::Path, process::Command, str::FromStr}; - | - 2 | use anyhow::{Context, Result}; - 3 | use bindgen::RustTarget; - | - 4 | use crate::{bail_on_err, GenerateFixtures}; - | - 5 | const HEADER_PATH: &str = "lib/include/tree_sitter/api.h"; - | - 6 | pub fn run_fixtures(args: &GenerateFixtures) -> Result<()> { - 7 | let output = std::process::Command::new("cargo") - 8 | .args(["build", "--release"]) - 9 | .spawn()? - 10 | .wait_with_output()?; - 11 | bail_on_err(&output, "Failed to run cargo build")?; - | - 12 | let tree_sitter_binary = std::env::current_dir()? - 13 | .join("target") - 14 | .join("release") - 15 | .join("tree-sitter"); - | - 16 | let grammars_dir = std::env::current_dir()? - 17 | .join("test") - 18 | .join("fixtures") - 19 | .join("grammars"); - | - 20 | for grammar_file in find_grammar_files(grammars_dir.to_str().unwrap()).flatten() { - 21 | let grammar_dir = grammar_file.parent().unwrap(); - 22 | let grammar_name = grammar_dir.file_name().and_then(OsStr::to_str).unwrap(); - | - 23 | println!( - 24 | "Regenerating {grammar_name} parser{}", - 25 | if args.wasm { " to Wasm" } else { "" } - 26 | ); - | - 27 | if args.wasm { - 28 | let mut cmd = Command::new(&tree_sitter_binary); - 29 | let cmd = cmd.args([ - 30 | "build", - 31 | "--wasm", - 32 | "-o", - 33 | &format!("target/release/tree-sitter-{grammar_name}.wasm"), - 34 | grammar_dir.to_str().unwrap(), - 35 | ]); - 36 | bail_on_err( - 37 | &cmd.spawn()?.wait_with_output()?, - 38 | &format!("Failed to regenerate {grammar_name} parser to wasm"), - 39 | )?; - 40 | } else { - 41 | let output = Command::new(&tree_sitter_binary) - 42 | .arg("generate") - 43 | .arg("src/grammar.json") - 44 | .arg("--abi=latest") - 45 | .current_dir(grammar_dir) - 46 | .spawn()? - 47 | .wait_with_output()?; - 48 | bail_on_err( - 49 | &output, - 50 | &format!("Failed to regenerate {grammar_name} parser"), - 51 | )?; - 52 | } - 53 | } - | - 54 | Ok(()) - 55 | } - | - 56 | pub fn run_bindings() -> Result<()> { - 57 | let output = Command::new("cargo") - 58 | .args(["metadata", "--format-version", "1"]) - 59 | .output() - 60 | .unwrap(); - | - 61 | let metadata = serde_json::from_slice::(&output.stdout).unwrap(); - | - 62 | let Some(rust_version) = metadata - 63 | .get("packages") - 64 | .and_then(|packages| packages.as_array()) - 65 | .and_then(|packages| { - 66 | packages.iter().find_map(|package| { - 67 | if package["name"] == "tree-sitter" { - 68 | package.get("rust_version").and_then(|v| v.as_str()) - 69 | } else { - 70 | None - 71 | } - 72 | }) - 73 | }) - 74 | else { - 75 | panic!("Failed to find tree-sitter package in cargo metadata"); - 76 | }; - | - 77 | let no_copy = [ - 78 | "TSInput", - 79 | "TSLanguage", - 80 | "TSLogger", - 81 | "TSLookaheadIterator", - 82 | "TSParser", - 83 | "TSTree", - 84 | "TSQuery", - 85 | "TSQueryCursor", - 86 | "TSQueryCapture", - 87 | "TSQueryMatch", - 88 | "TSQueryPredicateStep", - 89 | ]; - | - 90 | let bindings = bindgen::Builder::default() - 91 | .header(HEADER_PATH) - 92 | .layout_tests(false) - 93 | .allowlist_type("^TS.*") - 94 | .allowlist_function("^ts_.*") - 95 | .allowlist_var("^TREE_SITTER.*") - 96 | .no_copy(no_copy.join("|")) - 97 | .prepend_enum_name(false) - 98 | .use_core() - 99 | .clang_arg("-D TREE_SITTER_FEATURE_WASM") - 100 | .rust_target(RustTarget::from_str(rust_version).unwrap()) - 101 | .generate() - 102 | .expect("Failed to generate bindings"); - | - 103 | bindings - 104 | .write_to_file("lib/binding_rust/bindings.rs") - 105 | .with_context(|| "Failed to write bindings") - 106 | } - | - 107 | pub fn run_wasm_exports() -> Result<()> { - 108 | let mut imports = BTreeSet::new(); - | - 109 | let mut callback = |path: &str| -> Result<()> { - 110 | let output = Command::new("wasm-objdump") - 111 | .args(["--details", path, "--section", "Import"]) - 112 | .output()?; - 113 | bail_on_err(&output, "Failed to run wasm-objdump")?; - | - 114 | let output = String::from_utf8_lossy(&output.stdout); - | - 115 | for line in output.lines() { - 116 | if let Some(imp) = line.split("').next()) { - 117 | imports.insert(imp.to_string()); - 118 | } - 119 | } - | - 120 | Ok(()) - 121 | }; - | - 122 | for entry in fs::read_dir(Path::new("target"))? { - 123 | let Ok(entry) = entry else { - 124 | continue; - 125 | }; - 126 | let path = entry.path(); - 127 | if path.is_dir() { - 128 | for entry in fs::read_dir(&path)? { - 129 | let Ok(entry) = entry else { - 130 | continue; - 131 | }; - 132 | let path = entry.path(); - 133 | if path.is_file() - 134 | && path.extension() == Some(OsStr::new("wasm")) - 135 | && path - 136 | .file_name() - 137 | .unwrap() - 138 | .to_str() - 139 | .unwrap() - 140 | .starts_with("tree-sitter-") - 141 | { - 142 | callback(path.to_str().unwrap())?; - 143 | } - 144 | } - 145 | } - 146 | } - | - 147 | for imp in imports { - 148 | println!("{imp}"); - 149 | } - | - 150 | Ok(()) - 151 | } - | - 152 | fn find_grammar_files( - 153 | dir: &str, - 154 | ) -> impl Iterator> { - 155 | fs::read_dir(dir) - 156 | .expect("Failed to read directory") - 157 | .filter_map(Result::ok) - 158 | .flat_map(|entry| { - 159 | let path = entry.path(); - 160 | if path.is_dir() && !path.to_string_lossy().contains("node_modules") { - 161 | Box::new(find_grammar_files(path.to_str().unwrap())) as Box> - 162 | } else if path.is_file() && path.file_name() == Some(OsStr::new("grammar.js")) { - 163 | Box::new(std::iter::once(Ok(path))) as _ - 164 | } else { - 165 | Box::new(std::iter::empty()) as _ - 166 | } - 167 | }) - 168 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/main.rs: --------------------------------------------------------------------------------- - 1 | mod benchmark; - 2 | mod build_wasm; - 3 | mod bump; - 4 | mod check_wasm_exports; - 5 | mod clippy; - 6 | mod embed_sources; - 7 | mod fetch; - 8 | mod generate; - 9 | mod test; - 10 | mod upgrade_wasmtime; - | - 11 | use std::{path::Path, process::Command}; - | - 12 | use anstyle::{AnsiColor, Color, Style}; - 13 | use anyhow::Result; - 14 | use clap::{crate_authors, Args, FromArgMatches as _, Subcommand}; - 15 | use semver::Version; - | - 16 | #[derive(Subcommand)] - 17 | #[command(about="Run various tasks", author=crate_authors!("\n"), styles=get_styles())] - 18 | enum Commands { - 19 | /// Runs `cargo benchmark` with some optional environment variables set. - 20 | Benchmark(Benchmark), - 21 | /// Compile the Tree-sitter Wasm library. This will create two files in the - 22 | /// `lib/binding_web` directory: `web-tree-sitter.js` and `web-tree-sitter.wasm`. - 23 | BuildWasm(BuildWasm), - 24 | /// Compile the Tree-sitter Wasm standard library. - 25 | BuildWasmStdlib, - 26 | /// Bumps the version of the workspace. - 27 | BumpVersion(BumpVersion), - 28 | /// Checks that Wasm exports are synced. - 29 | CheckWasmExports(CheckWasmExports), - 30 | /// Runs `cargo clippy`. - 31 | Clippy(Clippy), - 32 | /// Fetches emscripten. - 33 | FetchEmscripten, - 34 | /// Fetches the fixtures for testing tree-sitter. - 35 | FetchFixtures(FetchFixtures), - 36 | /// Generate the Rust bindings from the C library. - 37 | GenerateBindings, - 38 | /// Generates the fixtures for testing tree-sitter. - 39 | GenerateFixtures(GenerateFixtures), - 40 | /// Generate the list of exports from Tree-sitter Wasm files. - 41 | GenerateWasmExports, - 42 | /// Run the test suite - 43 | Test(Test), - 44 | /// Run the Wasm test suite - 45 | TestWasm, - 46 | /// Upgrade the wasmtime dependency. - 47 | UpgradeWasmtime(UpgradeWasmtime), - 48 | } - | - 49 | #[derive(Args)] - 50 | struct Benchmark { - 51 | /// The language to run the benchmarks for. - 52 | #[arg(long, short)] - 53 | language: Option, - 54 | /// The example file to run the benchmarks for. - 55 | #[arg(long, short)] - 56 | example_file_name: Option, - 57 | /// The number of times to parse each sample (default is 5). - 58 | #[arg(long, short, default_value = "5")] - 59 | repetition_count: u32, - 60 | /// Whether to run the benchmarks in debug mode. - 61 | #[arg(long, short = 'g')] - 62 | debug: bool, - 63 | } - | - 64 | #[derive(Args)] - 65 | struct BuildWasm { - 66 | /// Compile the library more quickly, with fewer optimizations - 67 | /// and more runtime assertions. - 68 | #[arg(long, short = '0')] - 69 | debug: bool, - 70 | /// Run emscripten using docker, even if \`emcc\` is installed. - 71 | /// By default, \`emcc\` will be run directly when available. - 72 | #[arg(long, short)] - 73 | docker: bool, - 74 | /// Run emscripten with verbose output. - 75 | #[arg(long, short)] - 76 | verbose: bool, - 77 | /// Rebuild when relevant files are changed. - 78 | #[arg(long, short)] - 79 | watch: bool, - 80 | /// Emit TypeScript type definitions for the generated bindings, - 81 | /// requires `tsc` to be available. - 82 | #[arg(long, short)] - 83 | emit_tsd: bool, - 84 | /// Generate `CommonJS` modules instead of ES modules. - 85 | #[arg(long, short, env = "CJS")] - 86 | cjs: bool, - 87 | } - | - 88 | #[derive(Args)] - 89 | struct BumpVersion { - 90 | /// The version to bump to. - 91 | #[arg(long, short)] - 92 | version: Option, - 93 | } - | - 94 | #[derive(Args)] - 95 | struct CheckWasmExports { - 96 | /// Recheck when relevant files are changed. - 97 | #[arg(long, short)] - 98 | watch: bool, - 99 | } - | - 100 | #[derive(Args)] - 101 | struct Clippy { - 102 | /// Automatically apply lint suggestions (`clippy --fix`). - 103 | #[arg(long, short)] - 104 | fix: bool, - 105 | /// The package to run Clippy against (`cargo -p clippy`). - 106 | #[arg(long, short)] - 107 | package: Option, - 108 | } - | - 109 | #[derive(Args)] - 110 | struct FetchFixtures { - 111 | /// Update all fixtures to the latest tag - 112 | #[arg(long, short)] - 113 | update: bool, - 114 | } - | - 115 | #[derive(Args)] - 116 | struct GenerateFixtures { - 117 | /// Generates the parser to Wasm - 118 | #[arg(long, short)] - 119 | wasm: bool, - 120 | } - | - 121 | #[derive(Args)] - 122 | struct Test { - 123 | /// Compile C code with the Clang address sanitizer. - 124 | #[arg(long, short)] - 125 | address_sanitizer: bool, - 126 | /// Run only the corpus tests for the given language. - 127 | #[arg(long, short)] - 128 | language: Option, - 129 | /// Run only the corpus tests whose name contain the given string. - 130 | #[arg(long, short)] - 131 | example: Option, - 132 | /// Run the given number of iterations of randomized tests (default 10). - 133 | #[arg(long, short)] - 134 | iterations: Option, - 135 | /// Set the seed used to control random behavior. - 136 | #[arg(long, short)] - 137 | seed: Option, - 138 | /// Print parsing log to stderr. - 139 | #[arg(long, short)] - 140 | debug: bool, - 141 | /// Generate an SVG graph of parsing logs. - 142 | #[arg(long, short = 'D')] - 143 | debug_graph: bool, - 144 | /// Run the tests with a debugger. - 145 | #[arg(short)] - 146 | g: bool, - 147 | #[arg(trailing_var_arg = true)] - 148 | args: Vec, - 149 | /// Don't capture the output - 150 | #[arg(long)] - 151 | nocapture: bool, - 152 | /// Enable the Wasm tests. - 153 | #[arg(long, short)] - 154 | wasm: bool, - 155 | } - | - 156 | #[derive(Args)] - 157 | struct UpgradeWasmtime { - 158 | /// The version to upgrade to. - 159 | #[arg(long, short)] - 160 | version: Version, - 161 | } - | - 162 | const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION"); - 163 | const BUILD_SHA: Option<&str> = option_env!("BUILD_SHA"); - 164 | const EMSCRIPTEN_VERSION: &str = include_str!("../../loader/emscripten-version").trim_ascii(); - 165 | const EMSCRIPTEN_TAG: &str = concat!( - 166 | "docker.io/emscripten/emsdk:", - 167 | include_str!("../../loader/emscripten-version") - 168 | ) - 169 | .trim_ascii(); - | - 170 | fn main() { - 171 | let result = run(); - 172 | if let Err(err) = &result { - 173 | // Ignore BrokenPipe errors - 174 | if let Some(error) = err.downcast_ref::() { - 175 | if error.kind() == std::io::ErrorKind::BrokenPipe { - 176 | return; - 177 | } - 178 | } - 179 | if !err.to_string().is_empty() { - 180 | eprintln!("{err:?}"); - 181 | } - 182 | std::process::exit(1); - 183 | } - 184 | } - | - 185 | fn run() -> Result<()> { - 186 | let version = BUILD_SHA.map_or_else( - 187 | || BUILD_VERSION.to_string(), - 188 | |build_sha| format!("{BUILD_VERSION} ({build_sha})"), - 189 | ); - 190 | let version: &'static str = Box::leak(version.into_boxed_str()); - | - 191 | let cli = clap::Command::new("xtask") - 192 | .help_template( - 193 | "\ - 194 | {before-help}{name} {version} - 195 | {author-with-newline}{about-with-newline} - 196 | {usage-heading} {usage} - | - 197 | {all-args}{after-help} - 198 | ", - 199 | ) - 200 | .version(version) - 201 | .subcommand_required(true) - 202 | .arg_required_else_help(true) - 203 | .disable_help_subcommand(true) - 204 | .disable_colored_help(false); - 205 | let command = Commands::from_arg_matches(&Commands::augment_subcommands(cli).get_matches())?; - | - 206 | match command { - 207 | Commands::Benchmark(benchmark_options) => benchmark::run(&benchmark_options)?, - 208 | Commands::BuildWasm(build_wasm_options) => build_wasm::run_wasm(&build_wasm_options)?, - 209 | Commands::BuildWasmStdlib => build_wasm::run_wasm_stdlib()?, - 210 | Commands::BumpVersion(bump_options) => bump::run(bump_options)?, - 211 | Commands::CheckWasmExports(check_options) => check_wasm_exports::run(&check_options)?, - 212 | Commands::Clippy(clippy_options) => clippy::run(&clippy_options)?, - 213 | Commands::FetchEmscripten => fetch::run_emscripten()?, - 214 | Commands::FetchFixtures(fetch_fixture_options) => { - 215 | fetch::run_fixtures(&fetch_fixture_options)?; - 216 | } - 217 | Commands::GenerateBindings => generate::run_bindings()?, - 218 | Commands::GenerateFixtures(generate_fixtures_options) => { - 219 | generate::run_fixtures(&generate_fixtures_options)?; - 220 | } - 221 | Commands::GenerateWasmExports => generate::run_wasm_exports()?, - 222 | Commands::Test(test_options) => test::run(&test_options)?, - 223 | Commands::TestWasm => test::run_wasm()?, - 224 | Commands::UpgradeWasmtime(upgrade_wasmtime_options) => { - 225 | upgrade_wasmtime::run(&upgrade_wasmtime_options)?; - 226 | } - 227 | } - | - 228 | Ok(()) - 229 | } - | - 230 | fn root_dir() -> &'static Path { - 231 | Path::new(env!("CARGO_MANIFEST_DIR")) - 232 | .parent() - 233 | .unwrap() - 234 | .parent() - 235 | .unwrap() - 236 | } - | - 237 | fn bail_on_err(output: &std::process::Output, prefix: &str) -> Result<()> { - 238 | if !output.status.success() { - 239 | let stderr = String::from_utf8_lossy(&output.stderr); - 240 | anyhow::bail!("{prefix}:\n{stderr}"); - 241 | } - 242 | Ok(()) - 243 | } - | - 244 | #[must_use] - 245 | const fn get_styles() -> clap::builder::Styles { - 246 | clap::builder::Styles::styled() - 247 | .usage( - 248 | Style::new() - 249 | .bold() - 250 | .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), - 251 | ) - 252 | .header( - 253 | Style::new() - 254 | .bold() - 255 | .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), - 256 | ) - 257 | .literal(Style::new().fg_color(Some(Color::Ansi(AnsiColor::Green)))) - 258 | .invalid( - 259 | Style::new() - 260 | .bold() - 261 | .fg_color(Some(Color::Ansi(AnsiColor::Red))), - 262 | ) - 263 | .error( - 264 | Style::new() - 265 | .bold() - 266 | .fg_color(Some(Color::Ansi(AnsiColor::Red))), - 267 | ) - 268 | .valid( - 269 | Style::new() - 270 | .bold() - 271 | .fg_color(Some(Color::Ansi(AnsiColor::Green))), - 272 | ) - 273 | .placeholder(Style::new().fg_color(Some(Color::Ansi(AnsiColor::White)))) - 274 | } - | - 275 | pub fn create_commit(msg: &str, paths: &[&str]) -> Result { - 276 | for path in paths { - 277 | let output = Command::new("git").args(["add", path]).output()?; - 278 | if !output.status.success() { - 279 | anyhow::bail!( - 280 | "Failed to add {path}: {}", - 281 | String::from_utf8_lossy(&output.stderr) - 282 | ); - 283 | } - 284 | } - | - 285 | let output = Command::new("git").args(["commit", "-m", msg]).output()?; - 286 | if !output.status.success() { - 287 | anyhow::bail!( - 288 | "Failed to commit: {}", - 289 | String::from_utf8_lossy(&output.stderr) - 290 | ); - 291 | } - | - 292 | let output = Command::new("git").args(["rev-parse", "HEAD"]).output()?; - 293 | if !output.status.success() { - 294 | anyhow::bail!( - 295 | "Failed to get commit SHA: {}", - 296 | String::from_utf8_lossy(&output.stderr) - 297 | ); - 298 | } - | - 299 | Ok(String::from_utf8(output.stdout)?.trim().to_string()) - 300 | } - | - 301 | #[macro_export] - 302 | macro_rules! watch_wasm { - 303 | ($watch_fn:expr) => { - 304 | if let Err(e) = $watch_fn() { - 305 | eprintln!("{e}"); - 306 | } else { - 307 | println!("Build succeeded"); - 308 | } - | - 309 | let watch_files = [ - 310 | "lib/tree-sitter.c", - 311 | "lib/exports.txt", - 312 | "lib/imports.js", - 313 | "lib/prefix.js", - 314 | ] - 315 | .iter() - 316 | .map(PathBuf::from) - 317 | .collect::>(); - 318 | let (tx, rx) = std::sync::mpsc::channel(); - 319 | let mut debouncer = new_debouncer(Duration::from_secs(1), None, tx)?; - 320 | debouncer.watch("lib/binding_web", RecursiveMode::NonRecursive)?; - | - 321 | for result in rx { - 322 | match result { - 323 | Ok(events) => { - 324 | for event in events { - 325 | if event.kind == EventKind::Access(AccessKind::Close(AccessMode::Write)) - 326 | && event - 327 | .paths - 328 | .iter() - 329 | .filter_map(|p| p.file_name()) - 330 | .any(|p| watch_files.contains(&PathBuf::from(p))) - 331 | { - 332 | if let Err(e) = $watch_fn() { - 333 | eprintln!("{e}"); - 334 | } else { - 335 | println!("Build succeeded"); - 336 | } - 337 | } - 338 | } - 339 | } - 340 | Err(errors) => { - 341 | return Err(anyhow!( - 342 | "{}", - 343 | errors - 344 | .into_iter() - 345 | .map(|e| e.to_string()) - 346 | .collect::>() - 347 | .join("\n") - 348 | )); - 349 | } - 350 | } - 351 | } - 352 | }; - 353 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/test.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | env, - 3 | path::Path, - 4 | process::{Command, Stdio}, - 5 | }; - | - 6 | use anyhow::{anyhow, Result}; - 7 | use regex::Regex; - | - 8 | use crate::{bail_on_err, Test}; - | - 9 | pub fn run(args: &Test) -> Result<()> { - 10 | let test_flags = if args.address_sanitizer { - 11 | env::set_var("CFLAGS", "-fsanitize=undefined,address"); - | - 12 | // When the Tree-sitter C library is compiled with the address sanitizer, the address - 13 | // sanitizer runtime library needs to be linked into the final test executable. When - 14 | // using Xcode clang, the Rust linker doesn't know where to find that library, so we - 15 | // need to specify linker flags directly. - 16 | let output = Command::new("cc").arg("-print-runtime-dir").output()?; - 17 | bail_on_err(&output, "Failed to get clang runtime dir")?; - 18 | let runtime_dir = String::from_utf8(output.stdout)?; - 19 | if runtime_dir.contains("/Xcode.app/") { - 20 | env::set_var( - 21 | "RUSTFLAGS", - 22 | format!( - 23 | "-C link-arg=-L{runtime_dir} -C link-arg=-lclang_rt.asan_osx_dynamic -C link-arg=-Wl,-rpath,{runtime_dir}" - 24 | ), - 25 | ); - 26 | } - | - 27 | // Specify a `--target` explicitly. This is required for address sanitizer support. - 28 | let output = Command::new("rustup") - 29 | .arg("show") - 30 | .arg("active-toolchain") - 31 | .output()?; - 32 | bail_on_err(&output, "Failed to get active Rust toolchain")?; - 33 | let toolchain = String::from_utf8(output.stdout)?; - 34 | let re = Regex::new(r"(stable|beta|nightly)-([_a-z0-9-]+).*")?; - 35 | let captures = re - 36 | .captures(&toolchain) - 37 | .ok_or_else(|| anyhow!("Failed to parse toolchain '{toolchain}'"))?; - 38 | let current_target = captures.get(2).unwrap().as_str(); - 39 | format!("--target={current_target}") - 40 | } else { - 41 | String::new() - 42 | }; - 43 | if let Some(language) = &args.language { - 44 | env::set_var("TREE_SITTER_LANGUAGE", language); - 45 | } - 46 | if let Some(example) = &args.example { - 47 | env::set_var("TREE_SITTER_EXAMPLE_INCLUDE", example); - 48 | } - 49 | if let Some(seed) = args.seed { - 50 | env::set_var("TREE_SITTER_SEED", seed.to_string()); - 51 | } - 52 | if let Some(iterations) = args.iterations { - 53 | env::set_var("TREE_SITTER_ITERATIONS", iterations.to_string()); - 54 | } - 55 | if args.debug { - 56 | env::set_var("TREE_SITTER_LOG", "1"); - 57 | } - 58 | if args.debug_graph { - 59 | env::set_var("TREE_SITTER_LOG_GRAPHS", "1"); - 60 | } - | - 61 | if args.g { - 62 | let mut cargo_cmd = Command::new("cargo"); - 63 | cargo_cmd - 64 | .arg("test") - 65 | .arg("--all") - 66 | .arg(&test_flags) - 67 | .arg("--no-run") - 68 | .arg("--message-format=json"); - | - 69 | let cargo_cmd = cargo_cmd.stdout(Stdio::piped()).spawn()?; - | - 70 | let jq_cmd = Command::new("jq") - 71 | .arg("-rs") - 72 | .arg(r#"map(select(.target.name == "tree_sitter_cli" and .executable))[0].executable"#) - 73 | .stdin(cargo_cmd.stdout.unwrap()) - 74 | .output()?; - | - 75 | let test_binary = String::from_utf8(jq_cmd.stdout)?; - | - 76 | let mut lldb_cmd = Command::new("lldb"); - 77 | lldb_cmd.arg(test_binary.trim()).arg("--").args(&args.args); - 78 | bail_on_err( - 79 | &lldb_cmd.spawn()?.wait_with_output()?, - 80 | &format!("Failed to run {lldb_cmd:?}"), - 81 | )?; - 82 | } else { - 83 | let mut cargo_cmd = Command::new("cargo"); - 84 | cargo_cmd.arg("test").arg("--all"); - 85 | if args.wasm { - 86 | cargo_cmd.arg("--features").arg("wasm"); - 87 | } - 88 | if !test_flags.is_empty() { - 89 | cargo_cmd.arg(&test_flags); - 90 | } - 91 | cargo_cmd.args(&args.args); - | - 92 | if args.nocapture { - 93 | #[cfg(not(target_os = "windows"))] - 94 | cargo_cmd.arg("--"); - | - 95 | cargo_cmd.arg("--nocapture"); - 96 | } - 97 | bail_on_err( - 98 | &cargo_cmd.spawn()?.wait_with_output()?, - 99 | &format!("Failed to run {cargo_cmd:?}"), - 100 | )?; - 101 | } - | - 102 | Ok(()) - 103 | } - | - 104 | pub fn run_wasm() -> Result<()> { - 105 | std::env::set_current_dir("lib/binding_web")?; - | - 106 | let node_modules_dir = Path::new("node_modules"); - 107 | let npm = if cfg!(target_os = "windows") { - 108 | "npm.cmd" - 109 | } else { - 110 | "npm" - 111 | }; - | - 112 | if !node_modules_dir.join("chai").exists() || !node_modules_dir.join("mocha").exists() { - 113 | println!("Installing test dependencies..."); - 114 | let output = Command::new(npm).arg("install").output()?; - 115 | bail_on_err(&output, "Failed to install test dependencies")?; - 116 | } - | - 117 | let child = Command::new(npm).arg("test").spawn()?; - 118 | let output = child.wait_with_output()?; - 119 | bail_on_err(&output, &format!("Failed to run `{npm} test`"))?; - | - 120 | // Display test results - 121 | let output = String::from_utf8_lossy(&output.stdout); - 122 | for line in output.lines() { - 123 | println!("{line}"); - 124 | } - | - 125 | Ok(()) - 126 | } - - - --------------------------------------------------------------------------------- -/crates/xtask/src/upgrade_wasmtime.rs: --------------------------------------------------------------------------------- - 1 | use std::process::Command; - | - 2 | use anyhow::{Context, Result}; - 3 | use semver::Version; - | - 4 | use crate::{create_commit, UpgradeWasmtime}; - | - 5 | const WASMTIME_RELEASE_URL: &str = "https://github.com/bytecodealliance/wasmtime/releases/download"; - | - 6 | fn update_cargo(version: &Version) -> Result<()> { - 7 | let file = std::fs::read_to_string("lib/Cargo.toml")?; - 8 | let mut old_lines = file.lines(); - 9 | let mut new_lines = Vec::with_capacity(old_lines.size_hint().0); - | - 10 | while let Some(line) = old_lines.next() { - 11 | new_lines.push(line.to_string()); - 12 | if line == "[dependencies.wasmtime-c-api]" { - 13 | let _ = old_lines.next(); - 14 | new_lines.push(format!("version = \"{version}\"")); - 15 | } - 16 | } - | - 17 | std::fs::write("lib/Cargo.toml", new_lines.join("\n") + "\n")?; - | - 18 | Command::new("cargo") - 19 | .arg("update") - 20 | .status() - 21 | .map(|_| ()) - 22 | .with_context(|| "Failed to execute cargo update") - 23 | } - | - 24 | fn zig_fetch(lines: &mut Vec, version: &Version, url_suffix: &str) -> Result<()> { - 25 | let url = &format!("{WASMTIME_RELEASE_URL}/v{version}/wasmtime-v{version}-{url_suffix}"); - 26 | println!(" Fetching {url}"); - 27 | lines.push(format!(" .url = \"{url}\",")); - | - 28 | let output = Command::new("zig") - 29 | .arg("fetch") - 30 | .arg(url) - 31 | .output() - 32 | .with_context(|| format!("Failed to execute zig fetch {url}"))?; - | - 33 | let hash = String::from_utf8_lossy(&output.stdout); - 34 | lines.push(format!(" .hash = \"{}\",", hash.trim_end())); - | - 35 | Ok(()) - 36 | } - | - 37 | fn update_zig(version: &Version) -> Result<()> { - 38 | let file = std::fs::read_to_string("build.zig.zon")?; - 39 | let mut old_lines = file.lines(); - 40 | let new_lines = &mut Vec::with_capacity(old_lines.size_hint().0); - | - 41 | macro_rules! match_wasmtime_zig_dep { - 42 | ($line:ident, {$($platform:literal => [$($arch:literal),*]),*,}) => { - 43 | match $line { - 44 | $($(concat!(" .wasmtime_c_api_", $arch, "_", $platform, " = .{") => { - 45 | let (_, _) = (old_lines.next(), old_lines.next()); - 46 | let suffix = if $platform == "windows" || $platform == "mingw" { - 47 | concat!($arch, "-", $platform, "-c-api.zip") - 48 | } else { - 49 | concat!($arch, "-", $platform, "-c-api.tar.xz") - 50 | }; - 51 | zig_fetch(new_lines, version, suffix)?; - 52 | })*)* - 53 | _ => {} - 54 | } - 55 | }; - 56 | } - | - 57 | while let Some(line) = old_lines.next() { - 58 | new_lines.push(line.to_string()); - 59 | match_wasmtime_zig_dep!(line, { - 60 | "android" => ["aarch64", "x86_64"], - 61 | "linux" => ["aarch64", "armv7", "i686", "riscv64gc", "s390x", "x86_64"], - 62 | "macos" => ["aarch64", "x86_64"], - 63 | "mingw" => ["x86_64"], - 64 | "musl" => ["aarch64", "x86_64"], - 65 | "windows" => ["aarch64", "i686", "x86_64"], - 66 | }); - 67 | } - | - 68 | std::fs::write("build.zig.zon", new_lines.join("\n") + "\n")?; - | - 69 | Ok(()) - 70 | } - | - 71 | pub fn run(args: &UpgradeWasmtime) -> Result<()> { - 72 | println!("Upgrading wasmtime for Rust"); - 73 | update_cargo(&args.version)?; - | - 74 | println!("Upgrading wasmtime for Zig"); - 75 | update_zig(&args.version)?; - | - 76 | create_commit( - 77 | &format!("build(deps): bump wasmtime-c-api to v{}", args.version), - 78 | &["lib/Cargo.toml", "Cargo.lock", "build.zig.zon"], - 79 | )?; - | - 80 | Ok(()) - 81 | } - - - --------------------------------------------------------------------------------- -/Dockerfile: --------------------------------------------------------------------------------- - 1 | FROM rust:1.76-buster - | - 2 | WORKDIR /app - | - 3 | RUN apt-get update - 4 | RUN apt-get install -y nodejs - | - 5 | COPY . . - | - 6 | CMD cargo test --all-features - - - --------------------------------------------------------------------------------- -/docs/book.toml: --------------------------------------------------------------------------------- - 1 | [book] - 2 | authors = [ - 3 | "Max Brunsfeld ", - 4 | "Amaan Qureshi ", - 5 | ] - 6 | language = "en" - 7 | multilingual = false - 8 | src = "src" - 9 | title = "Tree-sitter" - | - 10 | [output.html] - 11 | additional-css = [ - 12 | "src/assets/css/playground.css", - 13 | "src/assets/css/mdbook-admonish.css", - 14 | ] - 15 | additional-js = ["src/assets/js/playground.js"] - 16 | git-repository-url = "https://github.com/tree-sitter/tree-sitter" - 17 | git-repository-icon = "fa-github" - 18 | edit-url-template = "https://github.com/tree-sitter/tree-sitter/edit/master/docs/{path}" - | - 19 | [output.html.search] - 20 | limit-results = 20 - 21 | use-boolean-and = true - 22 | boost-title = 2 - 23 | boost-hierarchy = 2 - 24 | boost-paragraph = 1 - 25 | expand = true - | - 26 | [preprocessor] - | - 27 | [preprocessor.admonish] - 28 | command = "mdbook-admonish" - 29 | assets_version = "3.0.2" # do not edit: managed by `mdbook-admonish install` - - - --------------------------------------------------------------------------------- -/docs/package.nix: --------------------------------------------------------------------------------- - 1 | { - 2 | stdenv, - 3 | lib, - 4 | version, - 5 | mdbook, - 6 | mdbook-admonish, - 7 | }: - 8 | stdenv.mkDerivation { - 9 | inherit version; - | - 10 | src = ./.; - 11 | pname = "tree-sitter-docs"; - | - 12 | nativeBuildInputs = [ - 13 | mdbook - 14 | mdbook-admonish - 15 | ]; - | - 16 | buildPhase = '' - 17 | mdbook build - 18 | ''; - | - 19 | installPhase = '' - 20 | mkdir -p $out/share/doc - 21 | cp -r book $out/share/doc/tree-sitter - 22 | ''; - | - 23 | meta = { - 24 | description = "Tree-sitter documentation"; - 25 | homepage = "https://tree-sitter.github.io/tree-sitter"; - 26 | license = lib.licenses.mit; - 27 | }; - 28 | } - - - --------------------------------------------------------------------------------- -/docs/src/3-syntax-highlighting.md: --------------------------------------------------------------------------------- - 1 | # Syntax Highlighting - | - 2 | Syntax highlighting is a very common feature in applications that deal with code. Tree-sitter has built-in support for - 3 | syntax highlighting via the [`tree-sitter-highlight`][highlight crate] library, which is now used on GitHub.com for highlighting - 4 | code written in several languages. You can also perform syntax highlighting at the command line using the - 5 | `tree-sitter highlight` command. - | - 6 | This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are - 7 | using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the - 8 | configuration data is provided using in-memory objects, rather than files. - | - 9 | ## Overview - | - 10 | All the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter - 11 | grammar for that language (for example, [`tree-sitter-javascript`][js grammar], [`tree-sitter-ruby`][ruby grammar]). - 12 | To run syntax highlighting from the command-line, three types of files are needed: - | - 13 | 1. Per-user configuration in `~/.config/tree-sitter/config.json` (see the [init-config][init-config] page for more info). - 14 | 2. Language configuration in grammar repositories' `tree-sitter.json` files (see the [init][init] page for more info). - 15 | 3. Tree queries in the grammars repositories' `queries` folders. - | - 16 | For an example of the language-specific files, see the [`tree-sitter.json` file][ts json] and [`queries` directory][queries] - 17 | in the `tree-sitter-ruby` repository. The following sections describe the behavior of each file. - | - 18 | ## Language Configuration - | - 19 | The `tree-sitter.json` file is used by the Tree-sitter CLI. Within this file, the CLI looks for data nested under the - 20 | top-level `"grammars"` key. This key is expected to contain an array of objects with the following keys: - | - 21 | ### Basics - | - 22 | These keys specify basic information about the parser: - | - 23 | - `scope` (required) — A string like `"source.js"` that identifies the language. We strive to match the scope names used - 24 | by popular [TextMate grammars][textmate] and by the [Linguist][linguist] library. - | - 25 | - `path` (optional) — A relative path from the directory containing `tree-sitter.json` to another directory containing - 26 | the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same - 27 | folder as `tree-sitter.json`), and this very rarely needs to be overridden. - | - 28 | - `external-files` (optional) — A list of relative paths from the root dir of a - 29 | parser to files that should be checked for modifications during recompilation. - 30 | This is useful during development to have changes to other files besides scanner.c - 31 | be picked up by the cli. - | - 32 | ### Language Detection - | - 33 | These keys help to decide whether the language applies to a given file: - | - 34 | - `file-types` — An array of filename suffix strings. The grammar will be used for files whose names end with one of these - 35 | suffixes. Note that the suffix may match an *entire* filename. - | - 36 | - `first-line-regex` — A regex pattern that will be tested against the first line of a file to determine whether this language - 37 | applies to the file. If present, this regex will be used for any file whose language does not match any grammar's `file-types`. - | - 38 | - `content-regex` — A regex pattern that will be tested against the contents of the file to break ties in cases where - 39 | multiple grammars matched the file using the above two criteria. If the regex matches, this grammar will be preferred over - 40 | another grammar with no `content-regex`. If the regex does not match, a grammar with no `content-regex` will be preferred - 41 | over this one. - | - 42 | - `injection-regex` — A regex pattern that will be tested against a *language name* ito determine whether this language - 43 | should be used for a potential *language injection* site. Language injection is described in more detail in [a later section](#language-injection). - | - 44 | ### Query Paths - | - 45 | These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting: - | - 46 | - `highlights` — Path to a *highlight query*. Default: `queries/highlights.scm` - 47 | - `locals` — Path to a *local variable query*. Default: `queries/locals.scm`. - 48 | - `injections` — Path to an *injection query*. Default: `queries/injections.scm`. - | - 49 | The behaviors of these three files are described in the next section. - | - 50 | ## Queries - | - 51 | Tree-sitter's syntax highlighting system is based on *tree queries*, which are a general system for pattern-matching on Tree-sitter's - 52 | syntax trees. See [this section][pattern matching] of the documentation for more information - 53 | about tree queries. - | - 54 | Syntax highlighting is controlled by *three* different types of query files that are usually included in the `queries` folder. - 55 | The default names for the query files use the `.scm` file. We chose this extension because it commonly used for files written - 56 | in [Scheme][scheme], a popular dialect of Lisp, and these query files use a Lisp-like syntax. - | - 57 | ### Highlights - | - 58 | The most important query is called the highlights query. The highlights query uses *captures* to assign arbitrary - 59 | *highlight names* to different nodes in the tree. Each highlight name can then be mapped to a color - 60 | (as described in the [init-config command][theme]). Commonly used highlight names include - 61 | `keyword`, `function`, `type`, `property`, and `string`. Names can also be dot-separated like `function.builtin`. - | - 62 | #### Example Go Snippet - | - 63 | For example, consider the following Go code: - | - 64 | ```go - 65 | func increment(a int) int { - 66 | return a + 1 - 67 | } - 68 | ``` - | - 69 | With this syntax tree: - | - 70 | ```scheme - 71 | (source_file - 72 | (function_declaration - 73 | name: (identifier) - 74 | parameters: (parameter_list - 75 | (parameter_declaration - 76 | name: (identifier) - 77 | type: (type_identifier))) - 78 | result: (type_identifier) - 79 | body: (block - 80 | (return_statement - 81 | (expression_list - 82 | (binary_expression - 83 | left: (identifier) - 84 | right: (int_literal))))))) - 85 | ``` - | - 86 | #### Example Query - | - 87 | Suppose we wanted to render this code with the following colors: - | - 88 | - keywords `func` and `return` in purple - 89 | - function `increment` in blue - 90 | - type `int` in green - 91 | - number `5` brown - | - 92 | We can assign each of these categories a *highlight name* using a query like this: - | - 93 | ```scheme - 94 | ; highlights.scm - | - 95 | "func" @keyword - 96 | "return" @keyword - 97 | (type_identifier) @type - 98 | (int_literal) @number - 99 | (function_declaration name: (identifier) @function) - 100 | ``` - | - 101 | Then, in our config file, we could map each of these highlight names to a color: - | - 102 | ```json - 103 | { - 104 | "theme": { - 105 | "keyword": "purple", - 106 | "function": "blue", - 107 | "type": "green", - 108 | "number": "brown" - 109 | } - 110 | } - 111 | ``` - | - 112 | #### Highlights Result - | - 113 | Running `tree-sitter highlight` on this Go file would produce output like this: - | - 114 | ```admonish example collapsible=true, title='Output' - 115 |
- 116 | func increment(a int) int {
- 117 |     return a + 1
- 118 | }
- 119 | 
- 120 | ``` - | - 121 | ### Local Variables - | - 122 | Good syntax highlighting helps the reader to quickly distinguish between the different types of *entities* in their code. - 123 | Ideally, if a given entity appears in *multiple* places, it should be colored the same in each place. The Tree-sitter syntax - 124 | highlighting system can help you to achieve this by keeping track of local scopes and variables. - | - 125 | The *local variables* query is different from the highlights query in that, while the highlights query uses *arbitrary* - 126 | capture names, which can then be mapped to colors, the locals variable query uses a fixed set of capture names, each of - 127 | which has a special meaning. - | - 128 | The capture names are as follows: - | - 129 | - `@local.scope` — indicates that a syntax node introduces a new local scope. - 130 | - `@local.definition` — indicates that a syntax node contains the *name* of a definition within the current local scope. - 131 | - `@local.reference` — indicates that a syntax node contains the *name*, which *may* refer to an earlier definition within - 132 | some enclosing scope. - | - 133 | Additionally, to ignore certain nodes from being tagged, you can use the `@ignore` capture. This is useful if you want to - 134 | exclude a subset of nodes from being tagged. When writing a query leveraging this, you should ensure this pattern comes - 135 | before any other patterns that would be used for tagging, for example: - | - 136 | ```scheme - 137 | (expression (identifier) @ignore) - | - 138 | (identifier) @local.reference - 139 | ``` - | - 140 | When highlighting a file, Tree-sitter will keep track of the set of scopes that contains any given position, and the set - 141 | of definitions within each scope. When processing a syntax node that is captured as a `local.reference`, Tree-sitter will - 142 | try to find a definition for a name that matches the node's text. If it finds a match, Tree-sitter will ensure that the - 143 | *reference*, and the *definition* are colored the same. - | - 144 | The information produced by this query can also be *used* by the highlights query. You can *disable* a pattern for nodes, - 145 | which have been identified as local variables by adding the predicate `(#is-not? local)` to the pattern. This is used in - 146 | the example below: - | - 147 | #### Example Ruby Snippet - | - 148 | Consider this Ruby code: - | - 149 | ```ruby - 150 | def process_list(list) - 151 | context = current_context - 152 | list.map do |item| - 153 | process_item(item, context) - 154 | end - 155 | end - | - 156 | item = 5 - 157 | list = [item] - 158 | ``` - | - 159 | With this syntax tree: - | - 160 | ```scheme - 161 | (program - 162 | (method - 163 | name: (identifier) - 164 | parameters: (method_parameters - 165 | (identifier)) - 166 | (assignment - 167 | left: (identifier) - 168 | right: (identifier)) - 169 | (method_call - 170 | method: (call - 171 | receiver: (identifier) - 172 | method: (identifier)) - 173 | block: (do_block - 174 | (block_parameters - 175 | (identifier)) - 176 | (method_call - 177 | method: (identifier) - 178 | arguments: (argument_list - 179 | (identifier) - 180 | (identifier)))))) - 181 | (assignment - 182 | left: (identifier) - 183 | right: (integer)) - 184 | (assignment - 185 | left: (identifier) - 186 | right: (array - 187 | (identifier)))) - 188 | ``` - | - 189 | There are several types of names within this method: - | - 190 | - `process_list` is a method. - 191 | - Within this method, `list` is a formal parameter - 192 | - `context` is a local variable. - 193 | - `current_context` is *not* a local variable, so it must be a method. - 194 | - Within the `do` block, `item` is a formal parameter - 195 | - Later on, `item` and `list` are both local variables (not formal parameters). - | - 196 | #### Example Queries - | - 197 | Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, - 198 | as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters: - | - 199 | ```scheme - 200 | ; highlights.scm - | - 201 | (call method: (identifier) @function.method) - 202 | (method_call method: (identifier) @function.method) - | - 203 | (method name: (identifier) @function.method) - | - 204 | (method_parameters (identifier) @variable.parameter) - 205 | (block_parameters (identifier) @variable.parameter) - | - 206 | ((identifier) @function.method - 207 | (#is-not? local)) - 208 | ``` - | - 209 | Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods - 210 | and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered - 211 | *references*: - | - 212 | ```scheme - 213 | ; locals.scm - | - 214 | (method) @local.scope - 215 | (do_block) @local.scope - | - 216 | (method_parameters (identifier) @local.definition) - 217 | (block_parameters (identifier) @local.definition) - | - 218 | (assignment left:(identifier) @local.definition) - | - 219 | (identifier) @local.reference - 220 | ``` - | - 221 | #### Locals Result - | - 222 | Running `tree-sitter highlight` on this ruby file would produce output like this: - | - 223 | ```admonish example collapsible=true, title='Output' - 224 |
- 225 | def process_list(list)
- 226 |   context = current_context
- 227 |   list.map do |item|
- 228 |     process_item(item, context)
- 229 |   end
- 230 | end
-     |
- 231 | item = 5
- 232 | list = [item]
- 233 | 
- 234 | ``` - | - 235 | ### Language Injection - | - 236 | Some source files contain code written in multiple different languages. Examples include: - | - 237 | - HTML files, which can contain JavaScript inside ` - | - 99 | - 100 | - | - 105 | - - - --------------------------------------------------------------------------------- -/docs/src/assets/css/mdbook-admonish.css: --------------------------------------------------------------------------------- - 1 | @charset "UTF-8"; - 2 | :is(.admonition) { - 3 | display: flow-root; - 4 | margin: 1.5625em 0; - 5 | padding: 0 1.2rem; - 6 | color: var(--fg); - 7 | page-break-inside: avoid; - 8 | background-color: var(--bg); - 9 | border: 0 solid black; - 10 | border-inline-start-width: 0.4rem; - 11 | border-radius: 0.2rem; - 12 | box-shadow: 0 0.2rem 1rem rgba(0, 0, 0, 0.05), 0 0 0.1rem rgba(0, 0, 0, 0.1); - 13 | } - 14 | @media print { - 15 | :is(.admonition) { - 16 | box-shadow: none; - 17 | } - 18 | } - 19 | :is(.admonition) > * { - 20 | box-sizing: border-box; - 21 | } - 22 | :is(.admonition) :is(.admonition) { - 23 | margin-top: 1em; - 24 | margin-bottom: 1em; - 25 | } - 26 | :is(.admonition) > .tabbed-set:only-child { - 27 | margin-top: 0; - 28 | } - 29 | html :is(.admonition) > :last-child { - 30 | margin-bottom: 1.2rem; - 31 | } - | - 32 | a.admonition-anchor-link { - 33 | display: none; - 34 | position: absolute; - 35 | left: -1.2rem; - 36 | padding-right: 1rem; - 37 | } - 38 | a.admonition-anchor-link:link, a.admonition-anchor-link:visited { - 39 | color: var(--fg); - 40 | } - 41 | a.admonition-anchor-link:link:hover, a.admonition-anchor-link:visited:hover { - 42 | text-decoration: none; - 43 | } - 44 | a.admonition-anchor-link::before { - 45 | content: "§"; - 46 | } - | - 47 | :is(.admonition-title, summary.admonition-title) { - 48 | position: relative; - 49 | min-height: 4rem; - 50 | margin-block: 0; - 51 | margin-inline: -1.6rem -1.2rem; - 52 | padding-block: 0.8rem; - 53 | padding-inline: 4.4rem 1.2rem; - 54 | font-weight: 700; - 55 | background-color: rgba(68, 138, 255, 0.1); - 56 | print-color-adjust: exact; - 57 | -webkit-print-color-adjust: exact; - 58 | display: flex; - 59 | } - 60 | :is(.admonition-title, summary.admonition-title) p { - 61 | margin: 0; - 62 | } - 63 | html :is(.admonition-title, summary.admonition-title):last-child { - 64 | margin-bottom: 0; - 65 | } - 66 | :is(.admonition-title, summary.admonition-title)::before { - 67 | position: absolute; - 68 | top: 0.625em; - 69 | inset-inline-start: 1.6rem; - 70 | width: 2rem; - 71 | height: 2rem; - 72 | background-color: #448aff; - 73 | print-color-adjust: exact; - 74 | -webkit-print-color-adjust: exact; - 75 | mask-image: url('data:image/svg+xml;charset=utf-8,'); - 76 | -webkit-mask-image: url('data:image/svg+xml;charset=utf-8,'); - 77 | mask-repeat: no-repeat; - 78 | -webkit-mask-repeat: no-repeat; - 79 | mask-size: contain; - 80 | -webkit-mask-size: contain; - 81 | content: ""; - 82 | } - 83 | :is(.admonition-title, summary.admonition-title):hover a.admonition-anchor-link { - 84 | display: initial; - 85 | } - | - 86 | details.admonition > summary.admonition-title::after { - 87 | position: absolute; - 88 | top: 0.625em; - 89 | inset-inline-end: 1.6rem; - 90 | height: 2rem; - 91 | width: 2rem; - 92 | background-color: currentcolor; - 93 | mask-image: var(--md-details-icon); - 94 | -webkit-mask-image: var(--md-details-icon); - 95 | mask-repeat: no-repeat; - 96 | -webkit-mask-repeat: no-repeat; - 97 | mask-size: contain; - 98 | -webkit-mask-size: contain; - 99 | content: ""; - 100 | transform: rotate(0deg); - 101 | transition: transform 0.25s; - 102 | } - 103 | details[open].admonition > summary.admonition-title::after { - 104 | transform: rotate(90deg); - 105 | } - | - 106 | :root { - 107 | --md-details-icon: url("data:image/svg+xml;charset=utf-8,"); - 108 | } - | - 109 | :root { - 110 | --md-admonition-icon--admonish-note: url("data:image/svg+xml;charset=utf-8,"); - 111 | --md-admonition-icon--admonish-abstract: url("data:image/svg+xml;charset=utf-8,"); - 112 | --md-admonition-icon--admonish-info: url("data:image/svg+xml;charset=utf-8,"); - 113 | --md-admonition-icon--admonish-tip: url("data:image/svg+xml;charset=utf-8,"); - 114 | --md-admonition-icon--admonish-success: url("data:image/svg+xml;charset=utf-8,"); - 115 | --md-admonition-icon--admonish-question: url("data:image/svg+xml;charset=utf-8,"); - 116 | --md-admonition-icon--admonish-warning: url("data:image/svg+xml;charset=utf-8,"); - 117 | --md-admonition-icon--admonish-failure: url("data:image/svg+xml;charset=utf-8,"); - 118 | --md-admonition-icon--admonish-danger: url("data:image/svg+xml;charset=utf-8,"); - 119 | --md-admonition-icon--admonish-bug: url("data:image/svg+xml;charset=utf-8,"); - 120 | --md-admonition-icon--admonish-example: url("data:image/svg+xml;charset=utf-8,"); - 121 | --md-admonition-icon--admonish-quote: url("data:image/svg+xml;charset=utf-8,"); - 122 | } - | - 123 | :is(.admonition):is(.admonish-note) { - 124 | border-color: #448aff; - 125 | } - | - 126 | :is(.admonish-note) > :is(.admonition-title, summary.admonition-title) { - 127 | background-color: rgba(68, 138, 255, 0.1); - 128 | } - 129 | :is(.admonish-note) > :is(.admonition-title, summary.admonition-title)::before { - 130 | background-color: #448aff; - 131 | mask-image: var(--md-admonition-icon--admonish-note); - 132 | -webkit-mask-image: var(--md-admonition-icon--admonish-note); - 133 | mask-repeat: no-repeat; - 134 | -webkit-mask-repeat: no-repeat; - 135 | mask-size: contain; - 136 | -webkit-mask-repeat: no-repeat; - 137 | } - | - 138 | :is(.admonition):is(.admonish-abstract, .admonish-summary, .admonish-tldr) { - 139 | border-color: #00b0ff; - 140 | } - | - 141 | :is(.admonish-abstract, .admonish-summary, .admonish-tldr) > :is(.admonition-title, summary.admonition-title) { - 142 | background-color: rgba(0, 176, 255, 0.1); - 143 | } - 144 | :is(.admonish-abstract, .admonish-summary, .admonish-tldr) > :is(.admonition-title, summary.admonition-title)::before { - 145 | background-color: #00b0ff; - 146 | mask-image: var(--md-admonition-icon--admonish-abstract); - 147 | -webkit-mask-image: var(--md-admonition-icon--admonish-abstract); - 148 | mask-repeat: no-repeat; - 149 | -webkit-mask-repeat: no-repeat; - 150 | mask-size: contain; - 151 | -webkit-mask-repeat: no-repeat; - 152 | } - | - 153 | :is(.admonition):is(.admonish-info, .admonish-todo) { - 154 | border-color: #00b8d4; - 155 | } - | - 156 | :is(.admonish-info, .admonish-todo) > :is(.admonition-title, summary.admonition-title) { - 157 | background-color: rgba(0, 184, 212, 0.1); - 158 | } - 159 | :is(.admonish-info, .admonish-todo) > :is(.admonition-title, summary.admonition-title)::before { - 160 | background-color: #00b8d4; - 161 | mask-image: var(--md-admonition-icon--admonish-info); - 162 | -webkit-mask-image: var(--md-admonition-icon--admonish-info); - 163 | mask-repeat: no-repeat; - 164 | -webkit-mask-repeat: no-repeat; - 165 | mask-size: contain; - 166 | -webkit-mask-repeat: no-repeat; - 167 | } - | - 168 | :is(.admonition):is(.admonish-tip, .admonish-hint, .admonish-important) { - 169 | border-color: #00bfa5; - 170 | } - | - 171 | :is(.admonish-tip, .admonish-hint, .admonish-important) > :is(.admonition-title, summary.admonition-title) { - 172 | background-color: rgba(0, 191, 165, 0.1); - 173 | } - 174 | :is(.admonish-tip, .admonish-hint, .admonish-important) > :is(.admonition-title, summary.admonition-title)::before { - 175 | background-color: #00bfa5; - 176 | mask-image: var(--md-admonition-icon--admonish-tip); - 177 | -webkit-mask-image: var(--md-admonition-icon--admonish-tip); - 178 | mask-repeat: no-repeat; - 179 | -webkit-mask-repeat: no-repeat; - 180 | mask-size: contain; - 181 | -webkit-mask-repeat: no-repeat; - 182 | } - | - 183 | :is(.admonition):is(.admonish-success, .admonish-check, .admonish-done) { - 184 | border-color: #00c853; - 185 | } - | - 186 | :is(.admonish-success, .admonish-check, .admonish-done) > :is(.admonition-title, summary.admonition-title) { - 187 | background-color: rgba(0, 200, 83, 0.1); - 188 | } - 189 | :is(.admonish-success, .admonish-check, .admonish-done) > :is(.admonition-title, summary.admonition-title)::before { - 190 | background-color: #00c853; - 191 | mask-image: var(--md-admonition-icon--admonish-success); - 192 | -webkit-mask-image: var(--md-admonition-icon--admonish-success); - 193 | mask-repeat: no-repeat; - 194 | -webkit-mask-repeat: no-repeat; - 195 | mask-size: contain; - 196 | -webkit-mask-repeat: no-repeat; - 197 | } - | - 198 | :is(.admonition):is(.admonish-question, .admonish-help, .admonish-faq) { - 199 | border-color: #64dd17; - 200 | } - | - 201 | :is(.admonish-question, .admonish-help, .admonish-faq) > :is(.admonition-title, summary.admonition-title) { - 202 | background-color: rgba(100, 221, 23, 0.1); - 203 | } - 204 | :is(.admonish-question, .admonish-help, .admonish-faq) > :is(.admonition-title, summary.admonition-title)::before { - 205 | background-color: #64dd17; - 206 | mask-image: var(--md-admonition-icon--admonish-question); - 207 | -webkit-mask-image: var(--md-admonition-icon--admonish-question); - 208 | mask-repeat: no-repeat; - 209 | -webkit-mask-repeat: no-repeat; - 210 | mask-size: contain; - 211 | -webkit-mask-repeat: no-repeat; - 212 | } - | - 213 | :is(.admonition):is(.admonish-warning, .admonish-caution, .admonish-attention) { - 214 | border-color: #ff9100; - 215 | } - | - 216 | :is(.admonish-warning, .admonish-caution, .admonish-attention) > :is(.admonition-title, summary.admonition-title) { - 217 | background-color: rgba(255, 145, 0, 0.1); - 218 | } - 219 | :is(.admonish-warning, .admonish-caution, .admonish-attention) > :is(.admonition-title, summary.admonition-title)::before { - 220 | background-color: #ff9100; - 221 | mask-image: var(--md-admonition-icon--admonish-warning); - 222 | -webkit-mask-image: var(--md-admonition-icon--admonish-warning); - 223 | mask-repeat: no-repeat; - 224 | -webkit-mask-repeat: no-repeat; - 225 | mask-size: contain; - 226 | -webkit-mask-repeat: no-repeat; - 227 | } - | - 228 | :is(.admonition):is(.admonish-failure, .admonish-fail, .admonish-missing) { - 229 | border-color: #ff5252; - 230 | } - | - 231 | :is(.admonish-failure, .admonish-fail, .admonish-missing) > :is(.admonition-title, summary.admonition-title) { - 232 | background-color: rgba(255, 82, 82, 0.1); - 233 | } - 234 | :is(.admonish-failure, .admonish-fail, .admonish-missing) > :is(.admonition-title, summary.admonition-title)::before { - 235 | background-color: #ff5252; - 236 | mask-image: var(--md-admonition-icon--admonish-failure); - 237 | -webkit-mask-image: var(--md-admonition-icon--admonish-failure); - 238 | mask-repeat: no-repeat; - 239 | -webkit-mask-repeat: no-repeat; - 240 | mask-size: contain; - 241 | -webkit-mask-repeat: no-repeat; - 242 | } - | - 243 | :is(.admonition):is(.admonish-danger, .admonish-error) { - 244 | border-color: #ff1744; - 245 | } - | - 246 | :is(.admonish-danger, .admonish-error) > :is(.admonition-title, summary.admonition-title) { - 247 | background-color: rgba(255, 23, 68, 0.1); - 248 | } - 249 | :is(.admonish-danger, .admonish-error) > :is(.admonition-title, summary.admonition-title)::before { - 250 | background-color: #ff1744; - 251 | mask-image: var(--md-admonition-icon--admonish-danger); - 252 | -webkit-mask-image: var(--md-admonition-icon--admonish-danger); - 253 | mask-repeat: no-repeat; - 254 | -webkit-mask-repeat: no-repeat; - 255 | mask-size: contain; - 256 | -webkit-mask-repeat: no-repeat; - 257 | } - | - 258 | :is(.admonition):is(.admonish-bug) { - 259 | border-color: #f50057; - 260 | } - | - 261 | :is(.admonish-bug) > :is(.admonition-title, summary.admonition-title) { - 262 | background-color: rgba(245, 0, 87, 0.1); - 263 | } - 264 | :is(.admonish-bug) > :is(.admonition-title, summary.admonition-title)::before { - 265 | background-color: #f50057; - 266 | mask-image: var(--md-admonition-icon--admonish-bug); - 267 | -webkit-mask-image: var(--md-admonition-icon--admonish-bug); - 268 | mask-repeat: no-repeat; - 269 | -webkit-mask-repeat: no-repeat; - 270 | mask-size: contain; - 271 | -webkit-mask-repeat: no-repeat; - 272 | } - | - 273 | :is(.admonition):is(.admonish-example) { - 274 | border-color: #7c4dff; - 275 | } - | - 276 | :is(.admonish-example) > :is(.admonition-title, summary.admonition-title) { - 277 | background-color: rgba(124, 77, 255, 0.1); - 278 | } - 279 | :is(.admonish-example) > :is(.admonition-title, summary.admonition-title)::before { - 280 | background-color: #7c4dff; - 281 | mask-image: var(--md-admonition-icon--admonish-example); - 282 | -webkit-mask-image: var(--md-admonition-icon--admonish-example); - 283 | mask-repeat: no-repeat; - 284 | -webkit-mask-repeat: no-repeat; - 285 | mask-size: contain; - 286 | -webkit-mask-repeat: no-repeat; - 287 | } - | - 288 | :is(.admonition):is(.admonish-quote, .admonish-cite) { - 289 | border-color: #9e9e9e; - 290 | } - | - 291 | :is(.admonish-quote, .admonish-cite) > :is(.admonition-title, summary.admonition-title) { - 292 | background-color: rgba(158, 158, 158, 0.1); - 293 | } - 294 | :is(.admonish-quote, .admonish-cite) > :is(.admonition-title, summary.admonition-title)::before { - 295 | background-color: #9e9e9e; - 296 | mask-image: var(--md-admonition-icon--admonish-quote); - 297 | -webkit-mask-image: var(--md-admonition-icon--admonish-quote); - 298 | mask-repeat: no-repeat; - 299 | -webkit-mask-repeat: no-repeat; - 300 | mask-size: contain; - 301 | -webkit-mask-repeat: no-repeat; - 302 | } - | - 303 | .navy :is(.admonition) { - 304 | background-color: var(--sidebar-bg); - 305 | } - | - 306 | .ayu :is(.admonition), - 307 | .coal :is(.admonition) { - 308 | background-color: var(--theme-hover); - 309 | } - | - 310 | .rust :is(.admonition) { - 311 | background-color: var(--sidebar-bg); - 312 | color: var(--sidebar-fg); - 313 | } - 314 | .rust .admonition-anchor-link:link, .rust .admonition-anchor-link:visited { - 315 | color: var(--sidebar-fg); - 316 | } - - - --------------------------------------------------------------------------------- -/docs/src/assets/css/playground.css: --------------------------------------------------------------------------------- - 1 | /* Base Variables */ - 2 | :root { - 3 | --light-bg: #f9f9f9; - 4 | --light-border: #e0e0e0; - 5 | --light-text: #333; - 6 | --light-hover-border: #c1c1c1; - 7 | --light-scrollbar-track: #f1f1f1; - 8 | --light-scrollbar-thumb: #c1c1c1; - 9 | --light-scrollbar-thumb-hover: #a8a8a8; - | - 10 | --dark-bg: #1d1f21; - 11 | --dark-border: #2d2d2d; - 12 | --dark-text: #c5c8c6; - 13 | --dark-scrollbar-track: #25282c; - 14 | --dark-scrollbar-thumb: #4a4d51; - 15 | --dark-scrollbar-thumb-hover: #5a5d61; - | - 16 | --primary-color: #0550ae; - 17 | --primary-color-alpha: rgba(5, 80, 174, 0.1); - 18 | --primary-color-alpha-dark: rgba(121, 192, 255, 0.1); - 19 | --selection-color: rgba(39, 95, 255, 0.3); - 20 | } - | - 21 | /* Common Scrollbar Styles */ - 22 | ::-webkit-scrollbar { - 23 | width: 8px; - 24 | height: 8px; - 25 | } - | - 26 | ::-webkit-scrollbar-track { - 27 | border-radius: 4px; - 28 | } - | - 29 | ::-webkit-scrollbar-thumb { - 30 | border-radius: 4px; - 31 | } - | - 32 | /* Base Light Theme Scrollbars */ - 33 | ::-webkit-scrollbar-track { - 34 | background: var(--light-scrollbar-track); - 35 | } - | - 36 | ::-webkit-scrollbar-thumb { - 37 | background: var(--light-scrollbar-thumb); - 38 | } - | - 39 | ::-webkit-scrollbar-thumb:hover { - 40 | background: var(--light-scrollbar-thumb-hover); - 41 | } - | - 42 | /* Dropdown Styling */ - 43 | .custom-select { - 44 | position: relative; - 45 | display: inline-block; - 46 | } - | - 47 | .language-container { - 48 | display: flex; - 49 | align-items: center; - 50 | gap: 16px; - 51 | margin-bottom: 16px; - 52 | } - | - 53 | #language-version { - 54 | color: var(--light-text); - 55 | font-size: 14px; - 56 | font-weight: 500; - 57 | padding: 4px 8px; - 58 | background: var(--light-bg); - 59 | border-radius: 4px; - 60 | border: 1px solid var(--light-border); - 61 | } - | - 62 | #language-select { - 63 | background-color: var(--light-bg); - 64 | border: 1px solid var(--light-border); - 65 | border-radius: 4px; - 66 | padding: 4px 24px 4px 8px; - 67 | font-size: 14px; - 68 | color: var(--light-text); - 69 | cursor: pointer; - 70 | min-width: 120px; - 71 | appearance: none; - 72 | background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 24 24' fill='none' stroke='%23666' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'%3E%3C/polyline%3E%3C/svg%3E"); - 73 | background-repeat: no-repeat; - 74 | background-position: right 8px center; - 75 | } - | - 76 | #copy-button { - 77 | background: none; - 78 | border: 1px solid var(--light-border); - 79 | border-radius: 4px; - 80 | padding: 6px; - 81 | cursor: pointer; - 82 | color: var(--light-text); - 83 | display: inline-flex; - 84 | align-items: center; - 85 | justify-content: center; - 86 | margin-left: 8px; - 87 | } - | - 88 | #copy-button:hover { - 89 | background-color: var(--primary-color-alpha); - 90 | border-color: var(--light-hover-border); - 91 | } - | - 92 | #copy-button:focus { - 93 | outline: none; - 94 | border-color: var(--primary-color); - 95 | box-shadow: 0 0 0 2px var(--primary-color-alpha); - 96 | } - | - 97 | .toast { - 98 | position: fixed; - 99 | bottom: 20px; - 100 | right: 20px; - 101 | background-color: var(--lighbt-bg); - 102 | color: var(--light-text); - 103 | padding: 12px 16px; - 104 | border-radius: 6px; - 105 | box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3); - 106 | font-size: 14px; - 107 | font-weight: 500; - 108 | opacity: 0; - 109 | transform: translateY(20px); - 110 | transition: all 0.3s ease; - 111 | z-index: 1000; - 112 | pointer-events: none; - 113 | } - | - 114 | .toast.show { - 115 | opacity: 1; - 116 | transform: translateY(0); - 117 | } - | - 118 | .select-button { - 119 | background-color: var(--light-bg); - 120 | border: 1px solid var(--light-border); - 121 | border-radius: 4px; - 122 | padding: 4px 8px; - 123 | font-size: 14px; - 124 | color: var(--light-text); - 125 | cursor: pointer; - 126 | min-width: 120px; - 127 | display: flex; - 128 | align-items: center; - 129 | justify-content: space-between; - 130 | } - | - 131 | #language-select:hover, - 132 | .select-button:hover { - 133 | border-color: var(--light-hover-border); - 134 | } - | - 135 | #language-select:focus, - 136 | .select-button:focus { - 137 | outline: none; - 138 | border-color: var(--primary-color); - 139 | box-shadow: 0 0 0 2px var(--primary-color-alpha); - 140 | } - | - 141 | /* Custom Checkbox Styling */ - 142 | input[type="checkbox"] { - 143 | appearance: none; - 144 | width: 16px; - 145 | height: 16px; - 146 | border: 1px solid var(--light-border); - 147 | border-radius: 3px; - 148 | margin-right: 6px; - 149 | position: relative; - 150 | cursor: pointer; - 151 | vertical-align: middle; - 152 | } - | - 153 | input[type="checkbox"]:checked { - 154 | background-color: var(--primary-color); - 155 | border-color: var(--primary-color); - 156 | } - | - 157 | input[type="checkbox"]:checked::after { - 158 | content: ''; - 159 | position: absolute; - 160 | left: 5px; - 161 | top: 2px; - 162 | width: 4px; - 163 | height: 8px; - 164 | border: solid white; - 165 | border-width: 0 2px 2px 0; - 166 | transform: rotate(45deg); - 167 | } - | - 168 | input[type="checkbox"]:hover { - 169 | border-color: var(--light-hover-border); - 170 | } - | - 171 | input[type="checkbox"]:focus { - 172 | outline: none; - 173 | border-color: var(--primary-color); - 174 | box-shadow: 0 0 0 2px var(--primary-color-alpha); - 175 | } - | - 176 | /* Select Dropdown */ - 177 | .select-dropdown { - 178 | position: absolute; - 179 | top: 100%; - 180 | left: 0; - 181 | right: 0; - 182 | background-color: var(--light-bg); - 183 | border: 1px solid var(--light-border); - 184 | border-radius: 4px; - 185 | margin-top: 4px; - 186 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); - 187 | display: none; - 188 | z-index: 1000; - 189 | max-height: 300px; - 190 | overflow-y: auto; - 191 | } - | - 192 | .select-dropdown.show { - 193 | display: block; - 194 | } - | - 195 | .option { - 196 | padding: 8px 12px; - 197 | cursor: pointer; - 198 | } - | - 199 | .option:hover { - 200 | background-color: var(--primary-color-alpha); - 201 | } - | - 202 | .option.selected { - 203 | background-color: var(--primary-color-alpha); - 204 | } - | - 205 | /* CodeMirror Base Styles */ - 206 | .ts-playground .CodeMirror { - 207 | border-radius: 6px; - 208 | background-color: var(--light-bg) !important; - 209 | color: #080808 !important; - 210 | } - | - 211 | .ts-playground .CodeMirror-scroll { - 212 | padding: 8px; - 213 | border: 1px solid var(--light-border); - 214 | border-radius: 6px; - 215 | box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); - 216 | } - | - 217 | .ayu .ts-playground .CodeMirror-scroll, - 218 | .coal .ts-playground .CodeMirror-scroll, - 219 | .navy .ts-playground .CodeMirror-scroll { - 220 | border-color: var(--dark-border); - 221 | } - | - 222 | .ts-playground .CodeMirror-gutters { - 223 | background: #ebebeb !important; - 224 | border-right: 1px solid #e8e8e8 !important; - 225 | } - | - 226 | .ts-playground .CodeMirror-cursor { - 227 | border-left: 2px solid #000 !important; - 228 | } - | - 229 | .ts-playground .CodeMirror-selected { - 230 | background: var(--selection-color) !important; - 231 | } - | - 232 | .ts-playground .CodeMirror-activeline-background { - 233 | background: rgba(36, 99, 180, 0.12) !important; - 234 | } - | - 235 | .query-error { - 236 | text-decoration: underline red dashed; - 237 | -webkit-text-decoration: underline red dashed; - 238 | } - | - 239 | /* Output Container Styles */ - 240 | #output-container { - 241 | color: #080808; - 242 | background-color: var(--light-bg); - 243 | margin: 0; - 244 | white-space: pre; - 245 | font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, "Liberation Mono", monospace; - 246 | } - | - 247 | #output-container-scroll { - 248 | max-height: 400px; - 249 | overflow: auto; - 250 | padding: 8px; - 251 | border: 1px solid var(--light-border); - 252 | border-radius: 6px; - 253 | box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); - 254 | background-color: var(--light-bg); - 255 | } - | - 256 | #output-container a { - 257 | color: var(--primary-color); - 258 | text-decoration: none; - 259 | } - | - 260 | #output-container a:hover { - 261 | text-decoration: underline; - 262 | } - | - 263 | #output-container a.node-link.anonymous { - 264 | color: #116329; - 265 | } - | - 266 | #output-container a.node-link.anonymous:before { - 267 | content: '"'; - 268 | } - | - 269 | #output-container a.node-link.anonymous:after { - 270 | content: '"'; - 271 | } - | - 272 | #output-container a.node-link.error { - 273 | color: #cf222e; - 274 | } - | - 275 | #output-container a.highlighted { - 276 | background-color: var(--selection-color); - 277 | } - | - 278 | /* Dark Theme Overrides */ - 279 | .ayu, - 280 | .coal, - 281 | .navy { - | - 282 | & #language-version, - 283 | & #language-select, - 284 | & #copy-button, - 285 | & .select-button { - 286 | background-color: var(--dark-bg); - 287 | border-color: var(--dark-border); - 288 | color: var(--dark-text); - 289 | } - | - 290 | & #copy-button:hover, - 291 | & #language-select:hover, - 292 | & .select-button:hover { - 293 | border-color: var(--dark-border); - 294 | background-color: var(--primary-color-alpha-dark); - 295 | } - | - 296 | & .toast { - 297 | background-color: var(--dark-bg); - 298 | color: var(--dark-text); - 299 | } - | - 300 | #language-select:focus, - 301 | & .select-button:focus { - 302 | border-color: #79c0ff; - 303 | box-shadow: 0 0 0 2px var(--primary-color-alpha-dark); - 304 | } - | - 305 | & input[type="checkbox"] { - 306 | border-color: var(--dark-border); - 307 | background-color: var(--dark-bg); - 308 | } - | - 309 | & input[type="checkbox"]:checked { - 310 | background-color: #79c0ff; - 311 | border-color: #79c0ff; - 312 | } - | - 313 | & label { - 314 | color: var(--dark-text); - 315 | } - | - 316 | & .select-dropdown { - 317 | background-color: var(--dark-bg); - 318 | border-color: var(--dark-border); - 319 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3); - 320 | } - | - 321 | & .option:hover { - 322 | background-color: var(--primary-color-alpha-dark); - 323 | } - | - 324 | & .option.selected { - 325 | background-color: var(--primary-color-alpha-dark); - 326 | } - | - 327 | & .ts-playground .CodeMirror { - 328 | background-color: var(--dark-bg) !important; - 329 | color: var(--dark-text) !important; - 330 | } - | - 331 | & .ts-playground .CodeMirror-gutters { - 332 | background: var(--dark-scrollbar-track) !important; - 333 | border-right-color: var(--dark-border) !important; - 334 | } - | - 335 | & .ts-playground .CodeMirror-cursor { - 336 | border-left-color: #aeafad !important; - 337 | } - | - 338 | & .ts-playground .CodeMirror-selected { - 339 | background: #373b41 !important; - 340 | } - | - 341 | & .ts-playground .CodeMirror-activeline-background { - 342 | background: #282a2e !important; - 343 | } - | - 344 | & #output-container { - 345 | color: var(--dark-text); - 346 | background-color: var(--dark-bg); - 347 | } - | - 348 | & #output-container-scroll { - 349 | background-color: var(--dark-bg); - 350 | border-color: var(--dark-border); - 351 | } - | - 352 | & #output-container a { - 353 | color: #79c0ff; - 354 | } - | - 355 | & #output-container a.node-link.anonymous { - 356 | color: #7ee787; - 357 | } - | - 358 | & #output-container a.node-link.error { - 359 | color: #ff7b72; - 360 | } - | - 361 | & #output-container a.highlighted { - 362 | background-color: #373b41; - 363 | } - | - 364 | /* Dark Theme Scrollbars */ - 365 | & ::-webkit-scrollbar-track { - 366 | background: var(--dark-scrollbar-track) !important; - 367 | } - | - 368 | & ::-webkit-scrollbar-thumb { - 369 | background: var(--dark-scrollbar-thumb) !important; - 370 | } - | - 371 | & ::-webkit-scrollbar-thumb:hover { - 372 | background: var(--dark-scrollbar-thumb-hover) !important; - 373 | } - | - 374 | & * { - 375 | scrollbar-width: thin !important; - 376 | scrollbar-color: var(--dark-scrollbar-thumb) var(--dark-scrollbar-track) !important; - 377 | } - 378 | } - | - 379 | /* Spacing Utilities */ - 380 | #language-select, - 381 | input[type="checkbox"], - 382 | label { - 383 | margin: 0 4px; - 384 | } - | - 385 | #language-select { - 386 | margin-right: 16px; - 387 | } - | - 388 | label { - 389 | font-size: 14px; - 390 | margin-right: 16px; - 391 | cursor: pointer; - 392 | } - - - --------------------------------------------------------------------------------- -/docs/src/assets/js/playground.js: --------------------------------------------------------------------------------- - 1 | function initializeLocalTheme() { - 2 | const themeToggle = document.getElementById('theme-toggle'); - 3 | if (!themeToggle) return; - | - 4 | // Load saved theme or use system preference - 5 | const savedTheme = localStorage.getItem('theme'); - 6 | const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches; - 7 | const initialTheme = savedTheme || (prefersDark ? 'dark' : 'light'); - | - 8 | // Set initial theme - 9 | document.documentElement.setAttribute('data-theme', initialTheme); - | - 10 | themeToggle.addEventListener('click', () => { - 11 | const currentTheme = document.documentElement.getAttribute('data-theme'); - 12 | const newTheme = currentTheme === 'light' ? 'dark' : 'light'; - 13 | document.documentElement.setAttribute('data-theme', newTheme); - 14 | localStorage.setItem('theme', newTheme); - 15 | }); - 16 | } - | - 17 | function initializeCustomSelect({ initialValue = null, addListeners = false }) { - 18 | const button = document.getElementById('language-button'); - 19 | const select = document.getElementById('language-select'); - 20 | if (!button || !select) return; - | - 21 | const dropdown = button.nextElementSibling; - 22 | const selectedValue = button.querySelector('.selected-value'); - | - 23 | if (initialValue) { - 24 | select.value = initialValue; - 25 | } - 26 | if (select.selectedIndex >= 0 && select.options[select.selectedIndex]) { - 27 | selectedValue.textContent = select.options[select.selectedIndex].text; - 28 | } else { - 29 | selectedValue.textContent = 'JavaScript'; - 30 | } - | - 31 | if (addListeners) { - 32 | button.addEventListener('click', (e) => { - 33 | e.preventDefault(); // Prevent form submission - 34 | dropdown.classList.toggle('show'); - 35 | }); - | - 36 | document.addEventListener('click', (e) => { - 37 | if (!button.contains(e.target)) { - 38 | dropdown.classList.remove('show'); - 39 | } - 40 | }); - | - 41 | dropdown.querySelectorAll('.option').forEach(option => { - 42 | option.addEventListener('click', () => { - 43 | selectedValue.textContent = option.textContent; - 44 | select.value = option.dataset.value; - 45 | dropdown.classList.remove('show'); - | - 46 | const event = new Event('change'); - 47 | select.dispatchEvent(event); - 48 | }); - 49 | }); - 50 | } - 51 | } - | - 52 | window.initializePlayground = async (opts) => { - 53 | const { Parser, Language } = window.TreeSitter; - | - 54 | const { local } = opts; - 55 | if (local) { - 56 | initializeLocalTheme(); - 57 | } - 58 | initializeCustomSelect({ addListeners: true }); - | - 59 | let tree; - | - 60 | const CAPTURE_REGEX = /@\s*([\w\._-]+)/g; - 61 | const LIGHT_COLORS = [ - 62 | "#0550ae", // blue - 63 | "#ab5000", // rust brown - 64 | "#116329", // forest green - 65 | "#844708", // warm brown - 66 | "#6639ba", // purple - 67 | "#7d4e00", // orange brown - 68 | "#0969da", // bright blue - 69 | "#1a7f37", // green - 70 | "#cf222e", // red - 71 | "#8250df", // violet - 72 | "#6e7781", // gray - 73 | "#953800", // dark orange - 74 | "#1b7c83" // teal - 75 | ]; - | - 76 | const DARK_COLORS = [ - 77 | "#79c0ff", // light blue - 78 | "#ffa657", // orange - 79 | "#7ee787", // light green - 80 | "#ff7b72", // salmon - 81 | "#d2a8ff", // light purple - 82 | "#ffa198", // pink - 83 | "#a5d6ff", // pale blue - 84 | "#56d364", // bright green - 85 | "#ff9492", // light red - 86 | "#e0b8ff", // pale purple - 87 | "#9ca3af", // gray - 88 | "#ffb757", // yellow orange - 89 | "#80cbc4" // light teal - 90 | ]; - | - 91 | const codeInput = document.getElementById("code-input"); - 92 | const languageSelect = document.getElementById("language-select"); - 93 | const languageVersion = document.getElementById('language-version'); - 94 | const loggingCheckbox = document.getElementById("logging-checkbox"); - 95 | const anonymousNodes = document.getElementById('anonymous-nodes-checkbox'); - 96 | const outputContainer = document.getElementById("output-container"); - 97 | const outputContainerScroll = document.getElementById( - 98 | "output-container-scroll", - 99 | ); - 100 | const playgroundContainer = document.getElementById("playground-container"); - 101 | const queryCheckbox = document.getElementById("query-checkbox"); - 102 | const queryContainer = document.getElementById("query-container"); - 103 | const queryInput = document.getElementById("query-input"); - 104 | const accessibilityCheckbox = document.getElementById("accessibility-checkbox"); - 105 | const copyButton = document.getElementById("copy-button"); - 106 | const updateTimeSpan = document.getElementById("update-time"); - 107 | const languagesByName = {}; - | - 108 | loadState(); - | - 109 | await Parser.init(); - | - 110 | const parser = new Parser(); - | - 111 | const codeEditor = CodeMirror.fromTextArea(codeInput, { - 112 | lineNumbers: true, - 113 | showCursorWhenSelecting: true - 114 | }); - | - 115 | codeEditor.on('keydown', (_, event) => { - 116 | const key = event.key; - 117 | if (key === 'ArrowLeft' || key === 'ArrowRight' || key === '?') { - 118 | event.stopPropagation(); // Prevent mdBook from going back/forward, or showing help - 119 | } - 120 | }); - | - 121 | const queryEditor = CodeMirror.fromTextArea(queryInput, { - 122 | lineNumbers: true, - 123 | showCursorWhenSelecting: true, - 124 | }); - | - 125 | queryEditor.on('keydown', (_, event) => { - 126 | if (event.key === 'ArrowLeft' || event.key === 'ArrowRight') { - 127 | event.stopPropagation(); // Prevent mdBook from going back/forward - 128 | } - 129 | }); - | - 130 | const cluster = new Clusterize({ - 131 | rows: [], - 132 | noDataText: null, - 133 | contentElem: outputContainer, - 134 | scrollElem: outputContainerScroll, - 135 | }); - 136 | const renderTreeOnCodeChange = debounce(renderTree, 50); - 137 | const saveStateOnChange = debounce(saveState, 2000); - 138 | const runTreeQueryOnChange = debounce(runTreeQuery, 50); - | - 139 | let languageName = languageSelect.value; - 140 | let treeRows = null; - 141 | let treeRowHighlightedIndex = -1; - 142 | let parseCount = 0; - 143 | let isRendering = 0; - 144 | let query; - | - 145 | codeEditor.on("changes", handleCodeChange); - 146 | codeEditor.on("viewportChange", runTreeQueryOnChange); - 147 | codeEditor.on("cursorActivity", debounce(handleCursorMovement, 150)); - 148 | queryEditor.on("changes", debounce(handleQueryChange, 150)); - | - 149 | loggingCheckbox.addEventListener("change", handleLoggingChange); - 150 | anonymousNodes.addEventListener("change", renderTree); - 151 | queryCheckbox.addEventListener("change", handleQueryEnableChange); - 152 | accessibilityCheckbox.addEventListener("change", handleQueryChange); - 153 | languageSelect.addEventListener("change", handleLanguageChange); - 154 | outputContainer.addEventListener("click", handleTreeClick); - 155 | copyButton?.addEventListener("click", handleCopy); - | - 156 | handleQueryEnableChange(); - 157 | await handleLanguageChange(); - | - 158 | playgroundContainer.style.visibility = "visible"; - | - 159 | async function handleLanguageChange() { - 160 | const newLanguageName = languageSelect.value; - 161 | if (!languagesByName[newLanguageName]) { - 162 | const url = `${LANGUAGE_BASE_URL}/tree-sitter-${newLanguageName}.wasm`; - 163 | languageSelect.disabled = true; - 164 | try { - 165 | languagesByName[newLanguageName] = await Language.load(url); - 166 | } catch (e) { - 167 | console.error(e); - 168 | languageSelect.value = languageName; - 169 | return; - 170 | } finally { - 171 | languageSelect.disabled = false; - 172 | } - 173 | } - | - 174 | tree = null; - 175 | languageName = newLanguageName; - | - 176 | const metadata = languagesByName[languageName].metadata; - 177 | if (languageVersion && metadata) { - 178 | languageVersion.textContent = `v${metadata.major_version}.${metadata.minor_version}.${metadata.patch_version}`; - 179 | languageVersion.style.visibility = 'visible'; - 180 | } else if (languageVersion) { - 181 | languageVersion.style.visibility = 'hidden'; - 182 | } - | - 183 | parser.setLanguage(languagesByName[newLanguageName]); - 184 | handleCodeChange(); - 185 | handleQueryChange(); - 186 | } - | - 187 | async function handleCodeChange(editor, changes) { - 188 | const newText = codeEditor.getValue() + "\n"; - 189 | const edits = tree && changes && changes.map(treeEditForEditorChange); - | - 190 | const start = performance.now(); - 191 | if (edits) { - 192 | for (const edit of edits) { - 193 | tree.edit(edit); - 194 | } - 195 | } - 196 | const newTree = parser.parse(newText, tree); - 197 | const duration = (performance.now() - start).toFixed(1); - | - 198 | updateTimeSpan.innerText = `${duration} ms`; - 199 | if (tree) tree.delete(); - 200 | tree = newTree; - 201 | parseCount++; - 202 | renderTreeOnCodeChange(); - 203 | runTreeQueryOnChange(); - 204 | saveStateOnChange(); - 205 | } - | - 206 | async function renderTree() { - 207 | isRendering++; - 208 | const cursor = tree.walk(); - | - 209 | let currentRenderCount = parseCount; - 210 | let row = ""; - 211 | let rows = []; - 212 | let finishedRow = false; - 213 | let visitedChildren = false; - 214 | let indentLevel = 0; - | - 215 | for (let i = 0; ; i++) { - 216 | if (i > 0 && i % 10000 === 0) { - 217 | await new Promise((r) => setTimeout(r, 0)); - 218 | if (parseCount !== currentRenderCount) { - 219 | cursor.delete(); - 220 | isRendering--; - 221 | return; - 222 | } - 223 | } - | - 224 | let displayName; - 225 | if (cursor.nodeIsMissing) { - 226 | const nodeTypeText = cursor.nodeIsNamed ? cursor.nodeType : `"${cursor.nodeType}"`; - 227 | displayName = `MISSING ${nodeTypeText}`; - 228 | } else if (cursor.nodeIsNamed) { - 229 | displayName = cursor.nodeType; - 230 | } else if (anonymousNodes.checked) { - 231 | displayName = cursor.nodeType - 232 | } - | - 233 | if (visitedChildren) { - 234 | if (displayName) { - 235 | finishedRow = true; - 236 | } - | - 237 | if (cursor.gotoNextSibling()) { - 238 | visitedChildren = false; - 239 | } else if (cursor.gotoParent()) { - 240 | visitedChildren = true; - 241 | indentLevel--; - 242 | } else { - 243 | break; - 244 | } - 245 | } else { - 246 | if (displayName) { - 247 | if (finishedRow) { - 248 | row += ""; - 249 | rows.push(row); - 250 | finishedRow = false; - 251 | } - 252 | const start = cursor.startPosition; - 253 | const end = cursor.endPosition; - 254 | const id = cursor.nodeId; - 255 | let fieldName = cursor.currentFieldName; - 256 | if (fieldName) { - 257 | fieldName += ": "; - 258 | } else { - 259 | fieldName = ""; - 260 | } - | - 261 | const nodeClass = - 262 | displayName === 'ERROR' || displayName.startsWith('MISSING') - 263 | ? 'node-link error plain' - 264 | : cursor.nodeIsNamed - 265 | ? 'node-link named plain' - 266 | : 'node-link anonymous plain'; - | - 267 | row = `
${" ".repeat(indentLevel)}${fieldName}` + - 268 | `` + - 270 | `${displayName} ` + - 271 | `[${start.row}, ${start.column}] - [${end.row}, ${end.column}]`; - 272 | finishedRow = true; - 273 | } - | - 274 | if (cursor.gotoFirstChild()) { - 275 | visitedChildren = false; - 276 | indentLevel++; - 277 | } else { - 278 | visitedChildren = true; - 279 | } - 280 | } - 281 | } - 282 | if (finishedRow) { - 283 | row += "
"; - 284 | rows.push(row); - 285 | } - | - 286 | cursor.delete(); - 287 | cluster.update(rows); - 288 | treeRows = rows; - 289 | isRendering--; - 290 | handleCursorMovement(); - 291 | } - | - 292 | function getCaptureCSS(name) { - 293 | if (accessibilityCheckbox.checked) { - 294 | return `color: white; background-color: ${colorForCaptureName(name)}`; - 295 | } else { - 296 | return `color: ${colorForCaptureName(name)}`; - 297 | } - 298 | } - | - 299 | function runTreeQuery(_, startRow, endRow) { - 300 | if (endRow == null) { - 301 | const viewport = codeEditor.getViewport(); - 302 | startRow = viewport.from; - 303 | endRow = viewport.to; - 304 | } - | - 305 | codeEditor.operation(() => { - 306 | const marks = codeEditor.getAllMarks(); - 307 | marks.forEach((m) => m.clear()); - | - 308 | if (tree && query) { - 309 | const captures = query.captures( - 310 | tree.rootNode, - 311 | { row: startRow, column: 0 }, - 312 | { row: endRow, column: 0 }, - 313 | ); - 314 | let lastNodeId; - 315 | for (const { name, node } of captures) { - 316 | if (node.id === lastNodeId) continue; - 317 | lastNodeId = node.id; - 318 | const { startPosition, endPosition } = node; - 319 | codeEditor.markText( - 320 | { line: startPosition.row, ch: startPosition.column }, - 321 | { line: endPosition.row, ch: endPosition.column }, - 322 | { - 323 | inclusiveLeft: true, - 324 | inclusiveRight: true, - 325 | css: getCaptureCSS(name), - 326 | }, - 327 | ); - 328 | } - 329 | } - 330 | }); - 331 | } - | - 332 | // When we change from a dark theme to a light theme (and vice versa), the colors of the - 333 | // captures need to be updated. - 334 | const observer = new MutationObserver((mutations) => { - 335 | mutations.forEach((mutation) => { - 336 | if (mutation.attributeName === 'class') { - 337 | handleQueryChange(); - 338 | } - 339 | }); - 340 | }); - | - 341 | observer.observe(document.documentElement, { - 342 | attributes: true, - 343 | attributeFilter: ['class'] - 344 | }); - | - 345 | function handleQueryChange() { - 346 | if (query) { - 347 | query.delete(); - 348 | query.deleted = true; - 349 | query = null; - 350 | } - | - 351 | queryEditor.operation(() => { - 352 | queryEditor.getAllMarks().forEach((m) => m.clear()); - 353 | if (!queryCheckbox.checked) return; - | - 354 | const queryText = queryEditor.getValue(); - | - 355 | try { - 356 | query = parser.language.query(queryText); - 357 | let match; - | - 358 | let row = 0; - 359 | queryEditor.eachLine((line) => { - 360 | while ((match = CAPTURE_REGEX.exec(line.text))) { - 361 | queryEditor.markText( - 362 | { line: row, ch: match.index }, - 363 | { line: row, ch: match.index + match[0].length }, - 364 | { - 365 | inclusiveLeft: true, - 366 | inclusiveRight: true, - 367 | css: `color: ${colorForCaptureName(match[1])}`, - 368 | }, - 369 | ); - 370 | } - 371 | row++; - 372 | }); - 373 | } catch (error) { - 374 | const startPosition = queryEditor.posFromIndex(error.index); - 375 | const endPosition = { - 376 | line: startPosition.line, - 377 | ch: startPosition.ch + (error.length || Infinity), - 378 | }; - | - 379 | if (error.index === queryText.length) { - 380 | if (startPosition.ch > 0) { - 381 | startPosition.ch--; - 382 | } else if (startPosition.row > 0) { - 383 | startPosition.row--; - 384 | startPosition.column = Infinity; - 385 | } - 386 | } - | - 387 | queryEditor.markText(startPosition, endPosition, { - 388 | className: "query-error", - 389 | inclusiveLeft: true, - 390 | inclusiveRight: true, - 391 | attributes: { title: error.message }, - 392 | }); - 393 | } - 394 | }); - | - 395 | runTreeQuery(); - 396 | saveQueryState(); - 397 | } - | - 398 | function handleCursorMovement() { - 399 | if (isRendering) return; - | - 400 | const selection = codeEditor.getDoc().listSelections()[0]; - 401 | let start = { row: selection.anchor.line, column: selection.anchor.ch }; - 402 | let end = { row: selection.head.line, column: selection.head.ch }; - 403 | if ( - 404 | start.row > end.row || - 405 | (start.row === end.row && start.column > end.column) - 406 | ) { - 407 | let swap = end; - 408 | end = start; - 409 | start = swap; - 410 | } - 411 | const node = tree.rootNode.namedDescendantForPosition(start, end); - 412 | if (treeRows) { - 413 | if (treeRowHighlightedIndex !== -1) { - 414 | const row = treeRows[treeRowHighlightedIndex]; - 415 | if (row) - 416 | treeRows[treeRowHighlightedIndex] = row.replace( - 417 | "highlighted", - 418 | "plain", - 419 | ); - 420 | } - 421 | treeRowHighlightedIndex = treeRows.findIndex((row) => - 422 | row.includes(`data-id=${node.id}`), - 423 | ); - 424 | if (treeRowHighlightedIndex !== -1) { - 425 | const row = treeRows[treeRowHighlightedIndex]; - 426 | if (row) - 427 | treeRows[treeRowHighlightedIndex] = row.replace( - 428 | "plain", - 429 | "highlighted", - 430 | ); - 431 | } - 432 | cluster.update(treeRows); - 433 | const lineHeight = cluster.options.item_height; - 434 | const scrollTop = outputContainerScroll.scrollTop; - 435 | const containerHeight = outputContainerScroll.clientHeight; - 436 | const offset = treeRowHighlightedIndex * lineHeight; - 437 | if (scrollTop > offset - 20) { - 438 | outputContainerScroll.scrollTo({ top: offset - 20, behavior: 'smooth' }); - 439 | } else if (scrollTop < offset + lineHeight + 40 - containerHeight) { - 440 | outputContainerScroll.scrollTo({ - 441 | top: offset - containerHeight + 40, - 442 | behavior: 'smooth' - 443 | }); - 444 | } - 445 | } - 446 | } - | - 447 | function handleCopy() { - 448 | const selection = window.getSelection(); - 449 | selection.removeAllRanges(); - 450 | const range = document.createRange(); - 451 | range.selectNodeContents(outputContainer); - 452 | selection.addRange(range); - 453 | navigator.clipboard.writeText(selection.toString()); - 454 | selection.removeRange(range); - 455 | showToast('Tree copied to clipboard!'); - 456 | } - | - 457 | function handleTreeClick(event) { - 458 | if (event.target.tagName === "A") { - 459 | event.preventDefault(); - 460 | const [startRow, startColumn, endRow, endColumn] = - 461 | event.target.dataset.range.split(",").map((n) => parseInt(n)); - 462 | codeEditor.focus(); - 463 | codeEditor.setSelection( - 464 | { line: startRow, ch: startColumn }, - 465 | { line: endRow, ch: endColumn }, - 466 | ); - 467 | } - 468 | } - | - 469 | function handleLoggingChange() { - 470 | if (loggingCheckbox.checked) { - 471 | parser.setLogger((message, lexing) => { - 472 | if (lexing) { - 473 | console.log(" ", message); - 474 | } else { - 475 | console.log(message); - 476 | } - 477 | }); - 478 | } else { - 479 | parser.setLogger(null); - 480 | } - 481 | } - | - 482 | function handleQueryEnableChange() { - 483 | if (queryCheckbox.checked) { - 484 | queryContainer.style.visibility = ""; - 485 | queryContainer.style.position = ""; - 486 | } else { - 487 | queryContainer.style.visibility = "hidden"; - 488 | queryContainer.style.position = "absolute"; - 489 | } - 490 | handleQueryChange(); - 491 | } - | - 492 | function treeEditForEditorChange(change) { - 493 | const oldLineCount = change.removed.length; - 494 | const newLineCount = change.text.length; - 495 | const lastLineLength = change.text[newLineCount - 1].length; - | - 496 | const startPosition = { row: change.from.line, column: change.from.ch }; - 497 | const oldEndPosition = { row: change.to.line, column: change.to.ch }; - 498 | const newEndPosition = { - 499 | row: startPosition.row + newLineCount - 1, - 500 | column: - 501 | newLineCount === 1 - 502 | ? startPosition.column + lastLineLength - 503 | : lastLineLength, - 504 | }; - | - 505 | const startIndex = codeEditor.indexFromPos(change.from); - 506 | let newEndIndex = startIndex + newLineCount - 1; - 507 | let oldEndIndex = startIndex + oldLineCount - 1; - 508 | for (let i = 0; i < newLineCount; i++) newEndIndex += change.text[i].length; - 509 | for (let i = 0; i < oldLineCount; i++) - 510 | oldEndIndex += change.removed[i].length; - | - 511 | return { - 512 | startIndex, - 513 | oldEndIndex, - 514 | newEndIndex, - 515 | startPosition, - 516 | oldEndPosition, - 517 | newEndPosition, - 518 | }; - 519 | } - | - 520 | function colorForCaptureName(capture) { - 521 | const id = query.captureNames.indexOf(capture); - 522 | const isDark = document.querySelector('html').classList.contains('ayu') || - 523 | document.querySelector('html').classList.contains('coal') || - 524 | document.querySelector('html').classList.contains('navy'); - | - 525 | const colors = isDark ? DARK_COLORS : LIGHT_COLORS; - 526 | return colors[id % colors.length]; - 527 | } - | - 528 | function loadState() { - 529 | const language = localStorage.getItem("language"); - 530 | const sourceCode = localStorage.getItem("sourceCode"); - 531 | const anonNodes = localStorage.getItem("anonymousNodes"); - 532 | const query = localStorage.getItem("query"); - 533 | const queryEnabled = localStorage.getItem("queryEnabled"); - 534 | if (language != null && sourceCode != null && query != null) { - 535 | queryInput.value = query; - 536 | codeInput.value = sourceCode; - 537 | languageSelect.value = language; - 538 | initializeCustomSelect({ initialValue: language }); - 539 | anonymousNodes.checked = anonNodes === "true"; - 540 | queryCheckbox.checked = queryEnabled === "true"; - 541 | } - 542 | } - | - 543 | function saveState() { - 544 | localStorage.setItem("language", languageSelect.value); - 545 | localStorage.setItem("sourceCode", codeEditor.getValue()); - 546 | localStorage.setItem("anonymousNodes", anonymousNodes.checked); - 547 | saveQueryState(); - 548 | } - | - 549 | function saveQueryState() { - 550 | localStorage.setItem("queryEnabled", queryCheckbox.checked); - 551 | localStorage.setItem("query", queryEditor.getValue()); - 552 | } - | - 553 | function debounce(func, wait, immediate) { - 554 | var timeout; - 555 | return function () { - 556 | var context = this, - 557 | args = arguments; - 558 | var later = function () { - 559 | timeout = null; - 560 | if (!immediate) func.apply(context, args); - 561 | }; - 562 | var callNow = immediate && !timeout; - 563 | clearTimeout(timeout); - 564 | timeout = setTimeout(later, wait); - 565 | if (callNow) func.apply(context, args); - 566 | }; - 567 | } - | - 568 | function showToast(message) { - 569 | const existingToast = document.querySelector('.toast'); - 570 | if (existingToast) { - 571 | existingToast.remove(); - 572 | } - | - 573 | const toast = document.createElement('div'); - 574 | toast.className = 'toast'; - 575 | toast.textContent = message; - 576 | document.body.appendChild(toast); - | - 577 | setTimeout(() => toast.classList.add('show'), 50); - | - 578 | setTimeout(() => { - 579 | toast.classList.remove('show'); - 580 | setTimeout(() => toast.remove(), 200); - 581 | }, 1000); - 582 | } - 583 | }; - - - --------------------------------------------------------------------------------- -/docs/src/assets/schemas/config.schema.json: --------------------------------------------------------------------------------- - 1 | { - 2 | "$schema": "http://json-schema.org/draft-07/schema#", - 3 | "type": "object", - 4 | "properties": { - 5 | "$schema": { - 6 | "type": "string" - 7 | }, - 8 | "grammars": { - 9 | "type": "array", - 10 | "items": { - 11 | "type": "object", - 12 | "properties": { - 13 | "name": { - 14 | "type": "string", - 15 | "description": "The name of the grammar.", - 16 | "pattern": "^[a-z0-9_]+$" - 17 | }, - 18 | "camelcase": { - 19 | "type": "string", - 20 | "description": "The name converted to CamelCase.", - 21 | "pattern": "^\\w+$", - 22 | "examples": [ - 23 | "Rust", - 24 | "HTML" - 25 | ] - 26 | }, - 27 | "title": { - 28 | "type": "string", - 29 | "description": "The title of the language.", - 30 | "examples": [ - 31 | "Rust", - 32 | "HTML" - 33 | ] - 34 | }, - 35 | "scope": { - 36 | "type": "string", - 37 | "description": "The TextMate scope that represents this language.", - 38 | "pattern": "^(source|text)(\\.[\\w\\-]+)+$", - 39 | "examples": [ - 40 | "source.rust", - 41 | "text.html" - 42 | ] - 43 | }, - 44 | "path": { - 45 | "type": "string", - 46 | "default": ".", - 47 | "description": "The relative path to the directory containing the grammar." - 48 | }, - 49 | "external-files": { - 50 | "type": "array", - 51 | "description": "The relative paths to files that should be checked for modifications during recompilation.", - 52 | "items": { - 53 | "type": "string" - 54 | }, - 55 | "minItems": 1 - 56 | }, - 57 | "file-types": { - 58 | "type": "array", - 59 | "description": "An array of filename suffix strings.", - 60 | "items": { - 61 | "type": "string" - 62 | }, - 63 | "minItems": 1 - 64 | }, - 65 | "highlights": { - 66 | "anyOf": [ - 67 | { - 68 | "type": "string" - 69 | }, - 70 | { - 71 | "type": "array", - 72 | "items": { - 73 | "type": "string" - 74 | }, - 75 | "minItems": 1 - 76 | } - 77 | ], - 78 | "default": "queries/highlights.scm", - 79 | "description": "The path(s) to the grammar's highlight queries." - 80 | }, - 81 | "injections": { - 82 | "anyOf": [ - 83 | { - 84 | "type": "string" - 85 | }, - 86 | { - 87 | "type": "array", - 88 | "items": { - 89 | "type": "string" - 90 | }, - 91 | "minItems": 1 - 92 | } - 93 | ], - 94 | "default": "queries/injections.scm", - 95 | "description": "The path(s) to the grammar's injection queries." - 96 | }, - 97 | "locals": { - 98 | "anyOf": [ - 99 | { - 100 | "type": "string" - 101 | }, - 102 | { - 103 | "type": "array", - 104 | "items": { - 105 | "type": "string" - 106 | }, - 107 | "minItems": 1 - 108 | } - 109 | ], - 110 | "default": "queries/locals.scm", - 111 | "description": "The path(s) to the grammar's local variable queries." - 112 | }, - 113 | "tags": { - 114 | "anyOf": [ - 115 | { - 116 | "type": "string" - 117 | }, - 118 | { - 119 | "type": "array", - 120 | "items": { - 121 | "type": "string" - 122 | }, - 123 | "minItems": 1 - 124 | } - 125 | ], - 126 | "default": "queries/tags.scm", - 127 | "description": "The path(s) to the grammar's code navigation queries." - 128 | }, - 129 | "injection-regex": { - 130 | "type": "string", - 131 | "format": "regex", - 132 | "description": "A regex pattern that will be tested against a language name in order to determine whether this language should be used for a potential language injection site." - 133 | }, - 134 | "first-line-regex": { - 135 | "type": "string", - 136 | "format": "regex", - 137 | "description": "A regex pattern that will be tested against the first line of a file in order to determine whether this language applies to the file." - 138 | }, - 139 | "content-regex": { - 140 | "type": "string", - 141 | "format": "regex", - 142 | "description": "A regex pattern that will be tested against the contents of the file in order to break ties in cases where multiple grammars matched the file." - 143 | }, - 144 | "class-name": { - 145 | "type": "string", - 146 | "pattern": "^TreeSitter\\w+$", - 147 | "description": "The class name for the Swift, Java & Kotlin bindings" - 148 | } - 149 | }, - 150 | "additionalProperties": false, - 151 | "required": [ - 152 | "name", - 153 | "scope" - 154 | ] - 155 | }, - 156 | "minItems": 1 - 157 | }, - 158 | "metadata": { - 159 | "type": "object", - 160 | "properties": { - 161 | "version": { - 162 | "type": "string", - 163 | "description": "The current version of the project.", - 164 | "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$", - 165 | "$comment": "The CLI will use this version to update package.json, Cargo.toml, pyproject.toml, Makefile." - 166 | }, - 167 | "license": { - 168 | "type": "string", - 169 | "default": "MIT", - 170 | "description": "The project's license." - 171 | }, - 172 | "description": { - 173 | "type": "string", - 174 | "description": "The project's description.", - 175 | "examples": [ - 176 | "Rust grammar for tree-sitter" - 177 | ] - 178 | }, - 179 | "links": { - 180 | "type": "object", - 181 | "properties": { - 182 | "repository": { - 183 | "type": "string", - 184 | "format": "uri", - 185 | "description": "The project's repository." - 186 | }, - 187 | "funding": { - 188 | "type": "string", - 189 | "format": "uri", - 190 | "description": "The project's funding link." - 191 | } - 192 | }, - 193 | "additionalProperties": false, - 194 | "required": [ - 195 | "repository" - 196 | ] - 197 | }, - 198 | "authors": { - 199 | "type": "array", - 200 | "items": { - 201 | "type": "object", - 202 | "description": "The project's author(s).", - 203 | "properties": { - 204 | "name": { - 205 | "type": "string" - 206 | }, - 207 | "email": { - 208 | "type": "string", - 209 | "format": "email" - 210 | }, - 211 | "url": { - 212 | "type": "string", - 213 | "format": "uri" - 214 | } - 215 | }, - 216 | "additionalProperties": false, - 217 | "required": [ - 218 | "name" - 219 | ] - 220 | }, - 221 | "minItems": 1 - 222 | }, - 223 | "namespace": { - 224 | "type": "string", - 225 | "description": "The namespace for the Java & Kotlin packages.", - 226 | "default": "io.github.tree-sitter", - 227 | "$comment": "Used as is in the Maven/Gradle group name and transformed accordingly for the package names and directories (e.g. io.github.treesitter.jtreesitter.html - src/main/java/io/github/treesitter/jtreesitter/html)." - 228 | } - 229 | }, - 230 | "additionalProperties": false, - 231 | "required": [ - 232 | "version", - 233 | "links" - 234 | ] - 235 | }, - 236 | "bindings": { - 237 | "type": "object", - 238 | "description": "The language bindings that will be generated.", - 239 | "properties": { - 240 | "c": { - 241 | "type": "boolean", - 242 | "default": true - 243 | }, - 244 | "go": { - 245 | "type": "boolean", - 246 | "default": true - 247 | }, - 248 | "java": { - 249 | "type": "boolean", - 250 | "default": false - 251 | }, - 252 | "kotlin": { - 253 | "type": "boolean", - 254 | "default": false - 255 | }, - 256 | "node": { - 257 | "type": "boolean", - 258 | "default": true - 259 | }, - 260 | "python": { - 261 | "type": "boolean", - 262 | "default": true - 263 | }, - 264 | "rust": { - 265 | "type": "boolean", - 266 | "default": true - 267 | }, - 268 | "swift": { - 269 | "type": "boolean", - 270 | "default": true - 271 | }, - 272 | "zig": { - 273 | "type": "boolean", - 274 | "default": false - 275 | } - 276 | }, - 277 | "additionalProperties": false - 278 | } - 279 | }, - 280 | "additionalProperties": false, - 281 | "required": [ - 282 | "grammars", - 283 | "metadata" - 284 | ] - 285 | } - - - --------------------------------------------------------------------------------- -/docs/src/assets/schemas/grammar.schema.json: --------------------------------------------------------------------------------- - 1 | { - 2 | "$schema": "http://json-schema.org/draft-07/schema#", - 3 | "title": "Tree-sitter grammar specification", - 4 | "type": "object", - | - 5 | "required": ["name", "rules"], - | - 6 | "additionalProperties": false, - | - 7 | "properties": { - 8 | "$schema": { - 9 | "type": "string" - 10 | }, - | - 11 | "name": { - 12 | "description": "The name of the grammar", - 13 | "type": "string", - 14 | "pattern": "^[a-zA-Z_]\\w*" - 15 | }, - | - 16 | "inherits": { - 17 | "description": "The name of the parent grammar", - 18 | "type": "string", - 19 | "pattern": "^[a-zA-Z_]\\w*" - 20 | }, - | - 21 | "rules": { - 22 | "type": "object", - 23 | "patternProperties": { - 24 | "^[a-zA-Z_]\\w*$": { - 25 | "$ref": "#/definitions/rule" - 26 | } - 27 | }, - 28 | "additionalProperties": false - 29 | }, - | - 30 | "extras": { - 31 | "type": "array", - 32 | "uniqueItems": true, - 33 | "items": { - 34 | "$ref": "#/definitions/rule" - 35 | } - 36 | }, - | - 37 | "precedences": { - 38 | "type": "array", - 39 | "uniqueItems": true, - 40 | "items": { - 41 | "type": "array", - 42 | "uniqueItems": true, - 43 | "items": { - 44 | "oneOf": [ - 45 | { "type": "string" }, - 46 | { "$ref": "#/definitions/symbol-rule" } - 47 | ] - 48 | } - 49 | } - 50 | }, - | - 51 | "reserved": { - 52 | "type": "object", - 53 | "patternProperties": { - 54 | "^[a-zA-Z_]\\w*$": { - 55 | "type": "array", - 56 | "uniqueItems": true, - 57 | "items": { - 58 | "$ref": "#/definitions/rule" - 59 | } - 60 | } - 61 | }, - 62 | "additionalProperties": false - 63 | }, - | - 64 | "externals": { - 65 | "type": "array", - 66 | "uniqueItems": true, - 67 | "items": { - 68 | "$ref": "#/definitions/rule" - 69 | } - 70 | }, - | - 71 | "inline": { - 72 | "type": "array", - 73 | "uniqueItems": true, - 74 | "items": { - 75 | "type": "string", - 76 | "pattern": "^[a-zA-Z_]\\w*$" - 77 | } - 78 | }, - | - 79 | "conflicts": { - 80 | "type": "array", - 81 | "uniqueItems": true, - 82 | "items": { - 83 | "type": "array", - 84 | "uniqueItems": true, - 85 | "items": { - 86 | "type": "string", - 87 | "pattern": "^[a-zA-Z_]\\w*$" - 88 | } - 89 | } - 90 | }, - | - 91 | "word": { - 92 | "type": "string", - 93 | "pattern": "^[a-zA-Z_]\\w*" - 94 | }, - | - 95 | "supertypes": { - 96 | "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.", - 97 | "type": "array", - 98 | "uniqueItems": true, - 99 | "items": { - 100 | "description": "The name of a rule in `rules` or `extras`", - 101 | "type": "string" - 102 | } - 103 | } - 104 | }, - | - 105 | "definitions": { - 106 | "blank-rule": { - 107 | "type": "object", - 108 | "properties": { - 109 | "type": { - 110 | "type": "string", - 111 | "const": "BLANK" - 112 | } - 113 | }, - 114 | "required": ["type"] - 115 | }, - | - 116 | "string-rule": { - 117 | "type": "object", - 118 | "properties": { - 119 | "type": { - 120 | "type": "string", - 121 | "const": "STRING" - 122 | }, - 123 | "value": { - 124 | "type": "string" - 125 | } - 126 | }, - 127 | "required": ["type", "value"] - 128 | }, - | - 129 | "pattern-rule": { - 130 | "type": "object", - 131 | "properties": { - 132 | "type": { - 133 | "type": "string", - 134 | "const": "PATTERN" - 135 | }, - 136 | "value": { "type": "string" }, - 137 | "flags": { "type": "string" } - 138 | }, - 139 | "required": ["type", "value"] - 140 | }, - | - 141 | "symbol-rule": { - 142 | "type": "object", - 143 | "properties": { - 144 | "type": { - 145 | "type": "string", - 146 | "const": "SYMBOL" - 147 | }, - 148 | "name": { "type": "string" } - 149 | }, - 150 | "required": ["type", "name"] - 151 | }, - | - 152 | "seq-rule": { - 153 | "type": "object", - 154 | "properties": { - 155 | "type": { - 156 | "type": "string", - 157 | "const": "SEQ" - 158 | }, - 159 | "members": { - 160 | "type": "array", - 161 | "items": { - 162 | "$ref": "#/definitions/rule" - 163 | } - 164 | } - 165 | }, - 166 | "required": ["type", "members"] - 167 | }, - | - 168 | "choice-rule": { - 169 | "type": "object", - 170 | "properties": { - 171 | "type": { - 172 | "type": "string", - 173 | "const": "CHOICE" - 174 | }, - 175 | "members": { - 176 | "type": "array", - 177 | "items": { - 178 | "$ref": "#/definitions/rule" - 179 | } - 180 | } - 181 | }, - 182 | "required": ["type", "members"] - 183 | }, - | - 184 | "alias-rule": { - 185 | "type": "object", - 186 | "properties": { - 187 | "type": { - 188 | "type": "string", - 189 | "const": "ALIAS" - 190 | }, - 191 | "value": { "type": "string" }, - 192 | "named": { "type": "boolean" }, - 193 | "content": { - 194 | "$ref": "#/definitions/rule" - 195 | } - 196 | }, - 197 | "required": ["type", "named", "content", "value"] - 198 | }, - | - 199 | "repeat-rule": { - 200 | "type": "object", - 201 | "properties": { - 202 | "type": { - 203 | "type": "string", - 204 | "const": "REPEAT" - 205 | }, - 206 | "content": { - 207 | "$ref": "#/definitions/rule" - 208 | } - 209 | }, - 210 | "required": ["type", "content"] - 211 | }, - | - 212 | "repeat1-rule": { - 213 | "type": "object", - 214 | "properties": { - 215 | "type": { - 216 | "type": "string", - 217 | "const": "REPEAT1" - 218 | }, - 219 | "content": { - 220 | "$ref": "#/definitions/rule" - 221 | } - 222 | }, - 223 | "required": ["type", "content"] - 224 | }, - | - 225 | "reserved-rule": { - 226 | "type": "object", - 227 | "properties": { - 228 | "type": { - 229 | "type": "string", - 230 | "const": "RESERVED" - 231 | }, - 232 | "context_name": { "type": "string" }, - 233 | "content": { - 234 | "$ref": "#/definitions/rule" - 235 | } - 236 | }, - 237 | "required": ["type", "context_name", "content"] - 238 | }, - | - 239 | "token-rule": { - 240 | "type": "object", - 241 | "properties": { - 242 | "type": { - 243 | "type": "string", - 244 | "enum": [ - 245 | "TOKEN", - 246 | "IMMEDIATE_TOKEN" - 247 | ] - 248 | }, - 249 | "content": { - 250 | "$ref": "#/definitions/rule" - 251 | } - 252 | }, - 253 | "required": ["type", "content"] - 254 | }, - | - 255 | "field-rule": { - 256 | "properties": { - 257 | "name": { "type": "string" }, - 258 | "type": { - 259 | "type": "string", - 260 | "const": "FIELD" - 261 | }, - 262 | "content": { - 263 | "$ref": "#/definitions/rule" - 264 | } - 265 | }, - 266 | "required": ["name", "type", "content"] - 267 | }, - | - 268 | "prec-rule": { - 269 | "type": "object", - 270 | "properties": { - 271 | "type": { - 272 | "type": "string", - 273 | "enum": [ - 274 | "PREC", - 275 | "PREC_LEFT", - 276 | "PREC_RIGHT", - 277 | "PREC_DYNAMIC" - 278 | ] - 279 | }, - 280 | "value": { - 281 | "oneof": [ - 282 | { "type": "integer" }, - 283 | { "type": "string" } - 284 | ] - 285 | }, - 286 | "content": { - 287 | "$ref": "#/definitions/rule" - 288 | } - 289 | }, - 290 | "required": ["type", "content", "value"] - 291 | }, - | - 292 | "rule": { - 293 | "oneOf": [ - 294 | { "$ref": "#/definitions/alias-rule" }, - 295 | { "$ref": "#/definitions/blank-rule" }, - 296 | { "$ref": "#/definitions/string-rule" }, - 297 | { "$ref": "#/definitions/pattern-rule" }, - 298 | { "$ref": "#/definitions/symbol-rule" }, - 299 | { "$ref": "#/definitions/seq-rule" }, - 300 | { "$ref": "#/definitions/choice-rule" }, - 301 | { "$ref": "#/definitions/repeat1-rule" }, - 302 | { "$ref": "#/definitions/repeat-rule" }, - 303 | { "$ref": "#/definitions/reserved-rule" }, - 304 | { "$ref": "#/definitions/token-rule" }, - 305 | { "$ref": "#/definitions/field-rule" }, - 306 | { "$ref": "#/definitions/prec-rule" } - 307 | ] - 308 | } - 309 | } - 310 | } - - - --------------------------------------------------------------------------------- -/docs/src/cli/complete.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter complete` - | - 2 | The `complete` command generates a completion script for your shell. - 3 | This script can be used to enable autocompletion for the `tree-sitter` CLI. - | - 4 | ```bash - 5 | tree-sitter complete --shell # Aliases: comp - 6 | ``` - | - 7 | ## Options - | - 8 | ### `--shell ` - | - 9 | The shell for which to generate the completion script. - | - 10 | Supported values: `bash`, `elvish`, `fish`, `power-shell`, `zsh`, and `nushell`. - - - --------------------------------------------------------------------------------- -/docs/src/cli/dump-languages.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter dump-languages` - | - 2 | The `dump-languages` command prints out a list of all the languages that the CLI knows about. This can be useful for debugging purposes, or for scripting. The paths to search comes from the config file's [`parser-directories`][parser-directories] object. - | - 3 | ```bash - 4 | tree-sitter dump-languages [OPTIONS] # Aliases: langs - 5 | ``` - | - 6 | ## Options - | - 7 | ### `--config-path` - | - 8 | The path to the configuration file. Ordinarily, the CLI will use the default location as explained in the [init-config](./init-config.md) command. This flag allows you to explicitly override that default, and use a config defined elsewhere. - | - 9 | [parser-directories]: ./init-config.md#parser-directories - - - --------------------------------------------------------------------------------- -/docs/src/cli/fuzz.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter fuzz` - | - 2 | The `fuzz` command is used to fuzz a parser by performing random edits and ensuring that undoing these edits results in - 3 | consistent parse trees. It will fail if the parse trees are not equal, or if the changed ranges are inconsistent. - | - 4 | ```bash - 5 | tree-sitter fuzz [OPTIONS] # Aliases: f - 6 | ``` - | - 7 | ## Options - | - 8 | ### `-s/--skip ` - | - 9 | A list of test names to skip fuzzing. - | - 10 | ### `--subdir ` - | - 11 | The directory containing the parser. This is primarily useful in multi-language repositories. - | - 12 | ### `--edits ` - | - 13 | The maximum number of edits to perform. The default is 3. - | - 14 | ### `--iterations ` - | - 15 | The number of iterations to run. The default is 10. - | - 16 | ### `-i/--include ` - | - 17 | Only run tests whose names match this regex. - | - 18 | ### `-e/--exclude ` - | - 19 | Skip tests whose names match this regex. - | - 20 | ### `--log-graphs` - | - 21 | Outputs logs of the graphs of the stack and parse trees during parsing, as well as the actual parsing and lexing message. - 22 | The graphs are constructed with [graphviz dot][dot], and the output is written to `log.html`. - | - 23 | ### `-l/--log` - | - 24 | Outputs parsing and lexing logs. This logs to stderr. - | - 25 | ### `-r/--rebuild` - | - 26 | Force a rebuild of the parser before running the fuzzer. - | - 27 | [dot]: https://graphviz.org/doc/info/lang.html - - - --------------------------------------------------------------------------------- -/docs/src/cli/generate.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter generate` - | - 2 | The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current - 3 | working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, - 4 | just run `tree-sitter generate` again. - | - 5 | ```bash - 6 | tree-sitter generate [OPTIONS] [GRAMMAR_PATH] # Aliases: gen, g - 7 | ``` - | - 8 | The grammar path argument allows you to specify a path to a `grammar.js` JavaScript file, or `grammar.json` JSON file. - 9 | In case your `grammar.js` file is in a non-standard path, you can specify it yourself. But, if you are using a parser - 10 | where `grammar.json` was already generated, or it was hand-written, you can tell the CLI to generate the parser *based* - 11 | on this JSON file. This avoids relying on a JavaScript file and avoids the dependency on a JavaScript runtime. - | - 12 | If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and - 13 | it will exit with a `Unresolved conflict` error message. To learn more about conflicts and how to handle them, check out - 14 | the section on [`Structuring Rules Well`](../creating-parsers/3-writing-the-grammar.md#structuring-rules-well) - 15 | in the user guide. - | - 16 | ## Options - | - 17 | ### `-l/--log` - | - 18 | Print the log of the parser generation process. This is really only useful if you know what you're doing, or are investigating - 19 | a bug in the CLI itself. It logs info such as what tokens are included in the error recovery state, - 20 | what keywords were extracted, what states were split and why, and the entry point state. - | - 21 | ### `--abi ` - | - 22 | The ABI to use for parser generation. The default is ABI 15, with ABI 14 being a supported target. - | - 23 | ### `--emit` - | - 24 | What generated files to emit. Possible values: - | - 25 | - `json`: Generate `grammar.json` and `node-types.json` - 26 | - `parser` (default): Generate `parser.c` and related files. - 27 | - `lib`: Compile to a library (equivalent of the deprecated `--build` option) - | - 28 | ### `-0/--debug-build` - | - 29 | Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`. - | - 30 | ### `--libdir ` - | - 31 | The directory to place the compiled parser(s) in. - 32 | On Unix systems, the default path is `$XDG_CACHE_HOME/tree-sitter` if `$XDG_CACHE_HOME` is set, - 33 | otherwise `$HOME/.config/tree-sitter` is used. On Windows, the default path is `%LOCALAPPDATA%\tree-sitter` if available, - 34 | otherwise `$HOME\AppData\Local\tree-sitter` is used. - | - 35 | ### `-o/--output` - | - 36 | The directory to place the generated parser in. The default is `src/` in the current directory. - | - 37 | ### `--report-states-for-rule ` - | - 38 | Print the overview of states from the given rule. This is useful for debugging and understanding the generated parser's - 39 | item sets for all given states in a given rule. To solely view state count numbers for rules, pass in `-` for the rule argument. - 40 | To view the overview of states for every rule, pass in `*` for the rule argument. - | - 41 | ### `--json` - | - 42 | Report conflicts in a JSON format. - | - 43 | ### `--js-runtime ` - | - 44 | The path to the JavaScript runtime executable to use when generating the parser. The default is `node`. - 45 | Note that you can also set this with `TREE_SITTER_JS_RUNTIME`. Starting from version 0.26.0, you can - 46 | also pass in `native` to use the native QuickJS runtime that comes bundled with the CLI. This avoids - 47 | the dependency on a JavaScript runtime entirely. - - - --------------------------------------------------------------------------------- -/docs/src/cli/highlight.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter highlight` - | - 2 | You can run syntax highlighting on an arbitrary file using `tree-sitter highlight`. This can either output colors directly - 3 | to your terminal using ANSI escape codes, or produce HTML (if the `--html` flag is passed). For more information, see - 4 | [the syntax highlighting page](../3-syntax-highlighting.md). - | - 5 | ```bash - 6 | tree-sitter highlight [OPTIONS] [PATHS]... # Aliases: hi - 7 | ``` - | - 8 | ## Options - | - 9 | ### `-H/--html` - | - 10 | Output an HTML document with syntax highlighting. - | - 11 | ### `--css-classes` - | - 12 | Output HTML with CSS classes instead of inline styles. - | - 13 | ### `--check` - | - 14 | Check that the highlighting captures conform strictly to the standards. - | - 15 | ### `--captures-path ` - | - 16 | The path to a file with captures. These captures would be considered the "standard" captures to compare against. - | - 17 | ### `--query-paths ` - | - 18 | The paths to query files to use for syntax highlighting. These should end in `highlights.scm`. - | - 19 | ### `--scope ` - | - 20 | The language scope to use for syntax highlighting. This is useful when the language is ambiguous. - | - 21 | ### `-t/--time` - | - 22 | Print the time taken to highlight the file. - | - 23 | ### `-q/--quiet` - | - 24 | Suppress main output. - | - 25 | ### `--paths ` - | - 26 | The path to a file that contains paths to source files to highlight - | - 27 | ### `-p/--grammar-path ` - | - 28 | The path to the directory containing the grammar. - | - 29 | ### `--config-path ` - | - 30 | The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information. - | - 31 | ### `-n/--test-number ` - | - 32 | Highlight the contents of a specific test. - - - --------------------------------------------------------------------------------- -/docs/src/cli/index.md: --------------------------------------------------------------------------------- - 1 | # CLI Overview - | - 2 | Let's go over all of the functionality of the `tree-sitter` command line interface. - 3 | Once you feel that you have enough of a grasp on the CLI, you can move onto the grammar authoring section to learn more about writing your own parser. - - - --------------------------------------------------------------------------------- -/docs/src/cli/init-config.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter init-config` - | - 2 | This command initializes a configuration file for the Tree-sitter CLI. - | - 3 | ```bash - 4 | tree-sitter init-config - 5 | ``` - | - 6 | These directories are created in the "default" location for your platform: - | - 7 | * On Unix, `$XDG_CONFIG_HOME/tree-sitter` or `$HOME/.config/tree-sitter` - 8 | * On Windows, `%APPDATA%\tree-sitter` or `$HOME\AppData\Roaming\tree-sitter` - | - 9 | ```admonish info - 10 | The CLI will work if there's no config file present, falling back on default values for each configuration option. - 11 | ``` - | - 12 | When you run the `init-config` command, it will print out the location of the file that it creates so that you can easily - 13 | find and modify it. - | - 14 | The configuration file is a JSON file that contains the following fields: - | - 15 | ## `parser-directories` - | - 16 | The [`tree-sitter highlight`](./highlight.md) command takes one or more file paths, and tries to automatically determine, - 17 | which language should be used to highlight those files. To do this, it needs to know *where* to look for Tree-sitter grammars - 18 | on your filesystem. You can control this using the `"parser-directories"` key in your configuration file: - | - 19 | ```json - 20 | { - 21 | "parser-directories": [ - 22 | "/Users/my-name/code", - 23 | "~/other-code", - 24 | "$HOME/another-code" - 25 | ] - 26 | } - 27 | ``` - | - 28 | Any folder within one of these *parser directories* whose name begins with `tree-sitter-` will be treated as a Tree-sitter - 29 | grammar repository. - | - 30 | ## `theme` - | - 31 | The [Tree-sitter highlighting system](../3-syntax-highlighting.md) works by annotating ranges of source code with logical - 32 | "highlight names" like `function.method`, `type.builtin`, `keyword`, etc. To decide what *color* should be used for rendering - 33 | each highlight, a *theme* is needed. - | - 34 | In your config file, the `"theme"` value is an object whose keys are dot-separated highlight names like - 35 | `function.builtin` or `keyword`, and whose values are JSON expressions that represent text styling parameters. - | - 36 | ### Highlight Names - | - 37 | A theme can contain multiple keys that share a common subsequence. Examples: - | - 38 | * `variable` and `variable.parameter` - 39 | * `function`, `function.builtin`, and `function.method` - | - 40 | For a given highlight produced, styling will be determined based on the **longest matching theme key**. For example, the - 41 | highlight `function.builtin.static` would match the key `function.builtin` rather than `function`. - | - 42 | ### Styling Values - | - 43 | Styling values can be any of the following: - | - 44 | * Integers from 0 to 255, representing ANSI terminal color ids. - 45 | * Strings like `"#e45649"` representing hexadecimal RGB colors. - 46 | * Strings naming basic ANSI colors like `"red"`, `"black"`, `"purple"`, or `"cyan"`. - 47 | * Objects with the following keys: - 48 | * `color` — An integer or string as described above. - 49 | * `underline` — A boolean indicating whether the text should be underlined. - 50 | * `italic` — A boolean indicating whether the text should be italicized. - 51 | * `bold` — A boolean indicating whether the text should be bold-face. - | - 52 | An example theme can be seen below: - | - 53 | ```json - 54 | { - 55 | "function": 26, - 56 | "operator": { - 57 | "bold": true, - 58 | "color": 239 - 59 | }, - 60 | "variable.builtin": { - 61 | "bold": true - 62 | }, - 63 | "variable.parameter": { - 64 | "underline": true - 65 | }, - 66 | "type.builtin": { - 67 | "color": 23, - 68 | "bold": true - 69 | }, - 70 | "keyword": 56, - 71 | "type": 23, - 72 | "number": { - 73 | "bold": true, - 74 | "color": 94 - 75 | }, - 76 | "constant": 94, - 77 | "attribute": { - 78 | "color": 124, - 79 | "italic": true - 80 | }, - 81 | "comment": { - 82 | "color": 245, - 83 | "italic": true - 84 | }, - 85 | "constant.builtin": { - 86 | "color": 94, - 87 | "bold": true - 88 | }, - 89 | } - 90 | ``` - | - 91 | ## `parse-theme` - | - 92 | The [`tree-sitter parse`](./parse.md) command will output a pretty-printed CST when the `-c/--cst` option is used. You can - 93 | control what colors are used for various parts of the tree in your configuration file. - | - 94 | ```admonish note - 95 | Omitting a field will cause the relevant text to be rendered with its default color. - 96 | ``` - | - 97 | An example parse theme can be seen below: - | - 98 | ```json - 99 | { - 100 | "parse-theme": { - 101 | // The color of node kinds - 102 | "node-kind": [20, 20, 20], - 103 | // The color of text associated with a node - 104 | "node-text": [255, 255, 255], - 105 | // The color of node fields - 106 | "field": [42, 42, 42], - 107 | // The color of the range information for unnamed nodes - 108 | "row-color": [255, 255, 255], - 109 | // The color of the range information for named nodes - 110 | "row-color-named": [255, 130, 0], - 111 | // The color of extra nodes - 112 | "extra": [255, 0, 255], - 113 | // The color of ERROR nodes - 114 | "error": [255, 0, 0], - 115 | // The color of MISSING nodes and their associated text - 116 | "missing": [153, 75, 0], - 117 | // The color of newline characters - 118 | "line-feed": [150, 150, 150], - 119 | // The color of backtick characters - 120 | "backtick": [0, 200, 0], - 121 | // The color of literals - 122 | "literal": [0, 0, 200], - 123 | } - 124 | } - 125 | ``` - - - --------------------------------------------------------------------------------- -/docs/src/cli/init.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter init` - | - 2 | The `init` command is your starting point for creating a new grammar. When you run it, it sets up a repository with all - 3 | the essential files and structure needed for grammar development. Since the command includes git-related files by default, - 4 | we recommend using git for version control of your grammar. - | - 5 | ```bash - 6 | tree-sitter init [OPTIONS] # Aliases: i - 7 | ``` - | - 8 | ## Options - | - 9 | ### `--update` - | - 10 | Update outdated generated files, if needed. - | - 11 | ### `-p/--grammar-path ` - | - 12 | The path to the directory containing the grammar. - | - 13 | ## Structure of `tree-sitter.json` - | - 14 | The main file of interest for users to configure is `tree-sitter.json`, which tells the CLI information about your grammar, - 15 | such as the location of queries. - | - 16 | ### The `grammars` field - | - 17 | This field is an array of objects, though you typically only need one object in this array unless your repo has - 18 | multiple grammars (for example, `Typescript` and `TSX`). - | - 19 | ### Example - | - 20 | Typically, the objects in the `"tree-sitter"` array only needs to specify a few keys: - | - 21 | ```json - 22 | { - 23 | "tree-sitter": [ - 24 | { - 25 | "scope": "source.ruby", - 26 | "file-types": [ - 27 | "rb", - 28 | "gemspec", - 29 | "Gemfile", - 30 | "Rakefile" - 31 | ], - 32 | "first-line-regex": "#!.*\\bruby$" - 33 | } - 34 | ] - 35 | } - 36 | ``` - | - 37 | #### Basic Fields - | - 38 | These keys specify basic information about the parser: - | - 39 | - `scope` (required) — A string like `"source.js"` that identifies the language. - 40 | We strive to match the scope names used by popular [TextMate grammars][textmate] and by the [Linguist][linguist] library. - | - 41 | - `path` — A relative path from the directory containing `tree-sitter.json` to another directory containing the `src/` - 42 | folder, which contains the actual generated parser. The default value is `"."` - 43 | (so that `src/` is in the same folder as `tree-sitter.json`), and this very rarely needs to be overridden. - | - 44 | - `external-files` — A list of relative paths from the root dir of a - 45 | parser to files that should be checked for modifications during recompilation. - 46 | This is useful during development to have changes to other files besides scanner.c - 47 | be picked up by the cli. - | - 48 | #### Language Detection - | - 49 | These keys help to decide whether the language applies to a given file: - | - 50 | - `file-types` — An array of filename suffix strings. The grammar will be used for files whose names end with one of - 51 | these suffixes. Note that the suffix may match an *entire* filename. - | - 52 | - `first-line-regex` — A regex pattern that will be tested against the first line of a file - 53 | to determine whether this language applies to the file. If present, this regex will be used for any file whose - 54 | language does not match any grammar's `file-types`. - | - 55 | - `content-regex` — A regex pattern that will be tested against the contents of the file - 56 | to break ties in cases where multiple grammars matched the file using the above two criteria. If the regex matches, - 57 | this grammar will be preferred over another grammar with no `content-regex`. If the regex does not match, a grammar with - 58 | no `content-regex` will be preferred over this one. - | - 59 | - `injection-regex` — A regex pattern that will be tested against a *language name* to determine whether this language - 60 | should be used for a potential *language injection* site. - 61 | Language injection is described in more detail in [the relevant section](../3-syntax-highlighting.md#language-injection). - | - 62 | #### Query Paths - | - 63 | These keys specify relative paths from the directory containing `tree-sitter.json` to the files that control syntax highlighting: - | - 64 | - `highlights` — Path to a *highlight query*. Default: `queries/highlights.scm` - 65 | - `locals` — Path to a *local variable query*. Default: `queries/locals.scm`. - 66 | - `injections` — Path to an *injection query*. Default: `queries/injections.scm`. - 67 | - `tags` — Path to an *tag query*. Default: `queries/tags.scm`. - | - 68 | ### The `metadata` field - | - 69 | This field contains information that tree-sitter will use to populate relevant bindings' files, especially their versions. - 70 | Typically, this will all be set up when you run `tree-sitter init`, but you are welcome to update it as you see fit. - | - 71 | - `version` (required) — The current version of your grammar, which should follow [semver][semver] - 72 | - `license` — The license of your grammar, which should be a valid [SPDX license][spdx] - 73 | - `description` — The brief description of your grammar - 74 | - `authors` (required) — An array of objects that contain a `name` field, and optionally an `email` and `url` field. - 75 | Each field is a string - 76 | - `links` — An object that contains a `repository` field, and optionally a `funding` field. Each field is a string - 77 | - `namespace` — The namespace for the `Java` and `Kotlin` bindings, defaults to `io.github.tree-sitter` if not provided - | - 78 | ### The `bindings` field - | - 79 | This field controls what bindings are generated when the `init` command is run. - 80 | Each key is a language name, and the value is a boolean. - | - 81 | - `c` (default: `true`) - 82 | - `go` (default: `true`) - 83 | - `java` (default: `false`) - 84 | - `kotlin` (default: `false`) - 85 | - `node` (default: `true`) - 86 | - `python` (default: `true`) - 87 | - `rust` (default: `true`) - 88 | - `swift` (default: `false`) - | - 89 | ## Binding Files - | - 90 | When you run `tree-sitter init`, the CLI will also generate a number of files in your repository that allow for your parser - 91 | to be used from different language. Here is a list of these bindings files that are generated, and what their purpose is: - | - 92 | ### C/C++ - | - 93 | - `Makefile` — This file tells [`make`][make] how to compile your language. - 94 | - `CMakeLists.txt` — This file tells [`cmake`][cmake] how to compile your language. - 95 | - `bindings/c/tree_sitter/tree-sitter-language.h` — This file provides the C interface of your language. - 96 | - `bindings/c/tree-sitter-language.pc` — This file provides [pkg-config][pkg-config] metadata about your language's C library. - 97 | - `src/tree_sitter/parser.h` — This file provides some basic C definitions that are used in your generated `parser.c` file. - 98 | - `src/tree_sitter/alloc.h` — This file provides some memory allocation macros that are to be used in your external scanner, - 99 | if you have one. - 100 | - `src/tree_sitter/array.h` — This file provides some array macros that are to be used in your external scanner, - 101 | if you have one. - | - 102 | ### Go - | - 103 | - `go.mod` — This file is the manifest of the Go module. - 104 | - `bindings/go/binding.go` — This file wraps your language in a Go module. - 105 | - `bindings/go/binding_test.go` — This file contains a test for the Go package. - | - 106 | ### Node - | - 107 | - `binding.gyp` — This file tells Node.js how to compile your language. - 108 | - `package.json` — This file is the manifest of the Node.js package. - 109 | - `bindings/node/binding.cc` — This file wraps your language in a JavaScript module for Node.js. - 110 | - `bindings/node/index.js` — This is the file that Node.js initially loads when using your language. - 111 | - `bindings/node/index.d.ts` — This file provides type hints for your parser when used in TypeScript. - 112 | - `bindings/node/binding_test.js` — This file contains a test for the Node.js package. - | - 113 | ### Python - | - 114 | - `pyproject.toml` — This file is the manifest of the Python package. - 115 | - `setup.py` — This file tells Python how to compile your language. - 116 | - `bindings/python/tree_sitter_language/binding.c` — This file wraps your language in a Python module. - 117 | - `bindings/python/tree_sitter_language/__init__.py` — This file tells Python how to load your language. - 118 | `bindings/python/tree_sitter_language/__init__.pyi` — This file provides type hints for your parser when used in Python. - 119 | - `bindings/python/tree_sitter_language/py.typed` — This file provides type hints for your parser when used in Python. - 120 | - `bindings/python/tests/test_binding.py` — This file contains a test for the Python package. - | - 121 | ### Rust - | - 122 | - `Cargo.toml` — This file is the manifest of the Rust package. - 123 | - `bindings/rust/lib.rs` — This file wraps your language in a Rust crate when used in Rust. - 124 | - `bindings/rust/build.rs` — This file wraps the building process for the Rust crate. - | - 125 | ### Swift - | - 126 | - `Package.swift` — This file tells Swift how to compile your language. - 127 | - `bindings/swift/TreeSitterLanguage/language.h` — This file wraps your language in a Swift module when used in Swift. - 128 | - `bindings/swift/TreeSitterLanguageTests/TreeSitterLanguageTests.swift` — This file contains a test for the Swift package. - | - 129 | ### Additional Files - | - 130 | Additionally, there's a few other files that are generated when you run `tree-sitter init`, - 131 | that aim to improve the development experience: - | - 132 | - `.editorconfig` — This file tells your editor how to format your code. More information about this file can be found [here][editorconfig] - 133 | - `.gitattributes` — This file tells Git how to handle line endings, and tells GitHub what files are generated. - 134 | - `.gitignore` — This file tells Git what files to ignore when committing changes. - | - 135 | [cmake]: https://cmake.org/cmake/help/latest - 136 | [editorconfig]: https://editorconfig.org - 137 | [linguist]: https://github.com/github/linguist - 138 | [make]: https://www.gnu.org/software/make/manual/make.html - 139 | [pkg-config]: https://www.freedesktop.org/wiki/Software/pkg-config - 140 | [semver]: https://semver.org - 141 | [spdx]: https://spdx.org/licenses - 142 | [textmate]: https://macromates.com/manual/en/language_grammars - - - --------------------------------------------------------------------------------- -/docs/src/cli/parse.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter parse` - | - 2 | The `parse` command parses source files using a Tree-sitter parser. You can pass any number of file paths and glob patterns - 3 | to `tree-sitter parse`, and it will parse all the given files. The command will exit with a non-zero status code if any - 4 | parse errors occurred. - | - 5 | ```bash - 6 | tree-sitter parse [OPTIONS] [PATHS]... # Aliases: p - 7 | ``` - | - 8 | ## Options - | - 9 | ### `--paths ` - | - 10 | The path to a file that contains paths to source files to parse. - | - 11 | ### `-p/--grammar-path ` - | - 12 | The path to the directory containing the grammar. - | - 13 | ### `--scope ` - | - 14 | The language scope to use for parsing. This is useful when the language is ambiguous. - | - 15 | ### `-d/--debug` - | - 16 | Outputs parsing and lexing logs. This logs to stderr. - | - 17 | ### `-0/--debug-build` - | - 18 | Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`. - | - 19 | ### `-D/--debug-graph` - | - 20 | Outputs logs of the graphs of the stack and parse trees during parsing, as well as the actual parsing and lexing message. - 21 | The graphs are constructed with [graphviz dot][dot], and the output is written to `log.html`. - | - 22 | ### `--wasm` - | - 23 | Compile and run the parser as a Wasm module. - | - 24 | ### `--dot` - | - 25 | Output the parse tree with [graphviz dot][dot]. - | - 26 | ### `-x/--xml` - | - 27 | Output the parse tree in XML format. - | - 28 | ### `-c/--cst` - | - 29 | Output the parse tree in a pretty-printed CST format. - | - 30 | ### `-s/--stat` - | - 31 | Show parsing statistics. - | - 32 | ### `--timeout ` - | - 33 | Set the timeout for parsing a single file, in microseconds. - | - 34 | ### `-t/--time` - | - 35 | Print the time taken to parse the file. If edits are provided, this will also print the time taken to parse the file after - 36 | each edit. - | - 37 | ### `-q/--quiet` - | - 38 | Suppress main output. - | - 39 | ### `--edits ...` - | - 40 | Apply edits after parsing the file. Edits are in the form of `row,col|position delcount insert_text` where row and col, or position are 0-indexed. - | - 41 | ### `--encoding ` - | - 42 | Set the encoding of the input file. By default, the CLI will look for the [`BOM`][bom] to determine if the file is encoded - 43 | in `UTF-16BE` or `UTF-16LE`. If no `BOM` is present, `UTF-8` is the default. One of `utf8`, `utf16-le`, `utf16-be`. - | - 44 | ### `--open-log` - | - 45 | When using the `--debug-graph` option, open the log file in the default browser. - | - 46 | ### `-j/--json` - | - 47 | Output parsing results in a JSON format. - | - 48 | ### `--config-path ` - | - 49 | The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information. - | - 50 | ### `-n/--test-number ` - | - 51 | Parse a specific test in the corpus. The test number is the same number that appears in the output of `tree-sitter test`. - | - 52 | ### `-r/--rebuild` - | - 53 | Force a rebuild of the parser before running tests. - | - 54 | ### `--no-ranges` - | - 55 | Omit the node's ranges from the default parse output. This is useful when copying S-Expressions to a test file. - | - 56 | [dot]: https://graphviz.org/doc/info/lang.html - 57 | [bom]: https://en.wikipedia.org/wiki/Byte_order_mark - - - --------------------------------------------------------------------------------- -/docs/src/cli/playground.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter playground` - | - 2 | The `playground` command allows you to start a local playground to test your parser interactively. - | - 3 | ```bash - 4 | tree-sitter playground [OPTIONS] # Aliases: play, pg, web-ui - 5 | ``` - | - 6 | ```admonish note - 7 | For this to work, you must have already built the parser as a Wasm module. This can be done with the [`build`](./build.md) subcommand - 8 | (`tree-sitter build --wasm`). - 9 | ``` - | - 10 | ## Options - | - 11 | ### `-e/--export ` - | - 12 | Export static playground files to the specified directory instead of serving them. - | - 13 | ### `-q/--quiet` - | - 14 | Don't automatically open the playground in the default browser. - | - 15 | ### `--grammar-path ` - | - 16 | The path to the directory containing the grammar and wasm files. - - - --------------------------------------------------------------------------------- -/docs/src/cli/query.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter query` - | - 2 | The `query` command is used to run a query on a parser, and view the results. - | - 3 | ```bash - 4 | tree-sitter query [OPTIONS] [PATHS]... # Aliases: q - 5 | ``` - | - 6 | ## Options - | - 7 | ### `-p/--grammar-path ` - | - 8 | The path to the directory containing the grammar. - | - 9 | ### `-t/--time` - | - 10 | Print the time taken to execute the query on the file. - | - 11 | ### `-q/--quiet` - | - 12 | Suppress main output. - | - 13 | ### `--paths ` - | - 14 | The path to a file that contains paths to source files in which the query will be executed. - | - 15 | ### `--byte-range ` - | - 16 | The range of byte offsets in which the query will be executed. The format is `start_byte:end_byte`. - | - 17 | ### `--row-range ` - | - 18 | The range of rows in which the query will be executed. The format is `start_row:end_row`. - | - 19 | ### `--scope ` - | - 20 | The language scope to use for parsing and querying. This is useful when the language is ambiguous. - | - 21 | ### `-c/--captures` - | - 22 | Order the query results by captures instead of matches. - | - 23 | ### `--test` - | - 24 | Whether to run query tests or not. - | - 25 | ### `--config-path ` - | - 26 | The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information. - | - 27 | ### `-n/--test-number ` - | - 28 | Query the contents of a specific test. - - - --------------------------------------------------------------------------------- -/docs/src/cli/tags.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter tags` - | - 2 | You can run symbol tagging on an arbitrary file using `tree-sitter tags`. This will output a list of tags. - 3 | For more information, see [the code navigation page](../4-code-navigation.md#tagging-and-captures). - | - 4 | ```bash - 5 | tree-sitter tags [OPTIONS] [PATHS]... - 6 | ``` - | - 7 | ## Options - | - 8 | ### `--scope ` - | - 9 | The language scope to use for symbol tagging. This is useful when the language is ambiguous. - | - 10 | ### `-t/--time` - | - 11 | Print the time taken to generate tags for the file. - | - 12 | ### `-q/--quiet` - | - 13 | Suppress main output. - | - 14 | ### `--paths ` - | - 15 | The path to a file that contains paths to source files to tag. - | - 16 | ### `-p/--grammar-path ` - | - 17 | The path to the directory containing the grammar. - | - 18 | ### `--config-path ` - | - 19 | The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information. - | - 20 | ### `-n/--test-number ` - | - 21 | Generate tags from the contents of a specific test. - - - --------------------------------------------------------------------------------- -/docs/src/cli/test.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter test` - | - 2 | The `test` command is used to run the test suite for a parser. - | - 3 | ```bash - 4 | tree-sitter test [OPTIONS] # Aliases: t - 5 | ``` - | - 6 | ## Options - | - 7 | ### `-i/--include ` - | - 8 | Only run tests whose names match this regex. - | - 9 | ### `-e/--exclude ` - | - 10 | Skip tests whose names match this regex. - | - 11 | ### `--file-name ` - | - 12 | Only run tests from the given filename in the corpus. - | - 13 | ### `-p/--grammar-path ` - | - 14 | The path to the directory containing the grammar. - | - 15 | ### `-u/--update` - | - 16 | Update the expected output of tests. - | - 17 | ```admonish info - 18 | Tests containing `ERROR` nodes or `MISSING` nodes will not be updated. - 19 | ``` - | - 20 | ### `-d/--debug` - | - 21 | Outputs parsing and lexing logs. This logs to stderr. - | - 22 | ### `-0/--debug-build` - | - 23 | Compile the parser with debug flags enabled. This is useful when debugging issues that require a debugger like `gdb` or `lldb`. - | - 24 | ### `-D/--debug-graph` - | - 25 | Outputs logs of the graphs of the stack and parse trees during parsing, as well as the actual parsing and lexing message. - 26 | The graphs are constructed with [graphviz dot][dot], and the output is written to `log.html`. - | - 27 | ### `--wasm` - | - 28 | Compile and run the parser as a Wasm module. - | - 29 | ### `--open-log` - | - 30 | When using the `--debug-graph` option, open the log file in the default browser. - | - 31 | ### `--config-path ` - | - 32 | The path to an alternative configuration (`config.json`) file. See [the init-config command](./init-config.md) for more information. - | - 33 | ### `--show-fields` - | - 34 | Force showing fields in test diffs. - | - 35 | ### `--stat ` - | - 36 | Show parsing statistics when tests are being run. One of `all`, `outliers-and-total`, or `total-only`. - | - 37 | - `all`: Show statistics for every test. - | - 38 | - `outliers-and-total`: Show statistics only for outliers, and total statistics. - | - 39 | - `total-only`: Show only total statistics. - | - 40 | ### `-r/--rebuild` - | - 41 | Force a rebuild of the parser before running tests. - | - 42 | ### `--overview-only` - | - 43 | Only show the overview of the test results, and not the diff. - - - --------------------------------------------------------------------------------- -/docs/src/cli/version.md: --------------------------------------------------------------------------------- - 1 | # `tree-sitter version` - | - 2 | The `version` command upgrades the version of your grammar. - | - 3 | ```bash - 4 | tree-sitter version # Aliases: publish - 5 | ``` - | - 6 | This will update the version in several files, if they exist: - | - 7 | * tree-sitter.json - 8 | * Cargo.toml - 9 | * Cargo.lock - 10 | * package.json - 11 | * package-lock.json - 12 | * Makefile - 13 | * CMakeLists.txt - 14 | * pyproject.toml - | - 15 | Alternative forms can use the version in `tree-sitter.json` to bump automatically: - | - 16 | ```bash - 17 | tree-sitter version --bump patch # patch bump - 18 | tree-sitter version --bump minor # minor bump - 19 | tree-sitter version --bump major # major bump - 20 | ``` - | - 21 | As a grammar author, you should keep the version of your grammar in sync across - 22 | different bindings. However, doing so manually is error-prone and tedious, so - 23 | this command takes care of the burden. If you are using a version control system, - 24 | it is recommended to commit the changes made by this command, and to tag the - 25 | commit with the new version. - | - 26 | To print the current version without bumping it, use: - | - 27 | ```bash - 28 | tree-sitter version - 29 | ``` - | - 30 | ## Options - | - 31 | ### `-p/--grammar-path ` - | - 32 | The path to the directory containing the grammar. - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/1-getting-started.md: --------------------------------------------------------------------------------- - 1 | # Getting Started - | - 2 | ## Dependencies - | - 3 | To develop a Tree-sitter parser, there are two dependencies that you need to install: - | - 4 | - **A JavaScript runtime** — Tree-sitter grammars are written in JavaScript, and Tree-sitter uses a JavaScript runtime - 5 | (the default being [Node.js][node.js]) to interpret JavaScript files. It requires this runtime command (default: `node`) - 6 | to be in one of the directories in your [`PATH`][path-env]. - | - 7 | - **A C Compiler** — Tree-sitter creates parsers that are written in C. To run and test these parsers with the - 8 | `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look - 9 | for these compilers in the standard places for each platform. - | - 10 | ## Installation - | - 11 | To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few - 12 | different ways: - | - 13 | - Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. This works - 14 | on any platform. See [the contributing docs](../6-contributing.md#developing-tree-sitter) for more information. - | - 15 | - Install the `tree-sitter-cli` [Rust crate][crate] from [crates.io][crates.io] using [`cargo`][cargo]. You can do so by - 16 | running the following command: `cargo install tree-sitter-cli --locked` - | - 17 | - Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This approach - 18 | is fast, but it only works on certain platforms, because it relies on pre-built binaries. - | - 19 | - Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`. - | - 20 | ## Project Setup - | - 21 | The preferred convention is to name the parser repository "tree-sitter-" followed by the name of the language, in lowercase. - | - 22 | ```sh - 23 | mkdir tree-sitter-${LOWER_PARSER_NAME} - 24 | cd tree-sitter-${LOWER_PARSER_NAME} - 25 | ``` - | - 26 | ```admonish note - 27 | The `LOWER_` prefix here means the "lowercase" name of the language. - 28 | ``` - | - 29 | ### Init - | - 30 | Once you've installed the `tree-sitter` CLI tool, you can start setting up your project, which will allow your parser to - 31 | be used from multiple languages. - | - 32 | ```sh - 33 | # This will prompt you for input - 34 | tree-sitter init - 35 | ``` - | - 36 | The `init` command will create a bunch of files in the project. - 37 | There should be a file called `grammar.js` with the following contents: - | - 38 | ```js - 39 | /** - 40 | * @file PARSER_DESCRIPTION - 41 | * @author PARSER_AUTHOR_NAME PARSER_AUTHOR_EMAIL - 42 | * @license PARSER_LICENSE - 43 | */ - | - 44 | /// - 45 | // @ts-check - | - 46 | export default grammar({ - 47 | name: 'LOWER_PARSER_NAME', - | - 48 | rules: { - 49 | // TODO: add the actual grammar rules - 50 | source_file: $ => 'hello' - 51 | } - 52 | }); - 53 | ``` - | - 54 | ```admonish info - 55 | The placeholders shown above would be replaced with the corresponding data you provided in the `init` sub-command's - 56 | prompts. - 57 | ``` - | - 58 | To learn more about this command, check the [reference page](../cli/init.md). - | - 59 | ### Generate - | - 60 | Next, run the following command: - | - 61 | ```sh - 62 | tree-sitter generate - 63 | ``` - | - 64 | This will generate the C code required to parse this trivial language. - | - 65 | You can test this parser by creating a source file with the contents "hello" and parsing it: - | - 66 | ```sh - 67 | echo 'hello' > example-file - 68 | tree-sitter parse example-file - 69 | ``` - | - 70 | Alternatively, in Windows PowerShell: - | - 71 | ```pwsh - 72 | "hello" | Out-File example-file -Encoding utf8 - 73 | tree-sitter parse example-file - 74 | ``` - | - 75 | This should print the following: - | - 76 | ```text - 77 | (source_file [0, 0] - [1, 0]) - 78 | ``` - | - 79 | You now have a working parser. - | - 80 | Finally, look back at the [triple-slash][] and [`@ts-check`][ts-check] comments in `grammar.js`; these tell your editor - 81 | to provide documentation and type information as you edit your grammar. For these to work, you must download Tree-sitter's - 82 | TypeScript API from npm into a `node_modules` directory in your project: - | - 83 | ```sh - 84 | npm install # or your package manager of choice - 85 | ``` - | - 86 | To learn more about this command, check the [reference page](../cli/generate.md). - | - 87 | [cargo]: https://doc.rust-lang.org/cargo/getting-started/installation.html - 88 | [crate]: https://crates.io/crates/tree-sitter-cli - 89 | [crates.io]: https://crates.io/crates/tree-sitter-cli - 90 | [node-module]: https://www.npmjs.com/package/tree-sitter-cli - 91 | [node.js]: https://nodejs.org - 92 | [npm]: https://docs.npmjs.com - 93 | [path-env]: https://en.wikipedia.org/wiki/PATH_(variable) - 94 | [releases]: https://github.com/tree-sitter/tree-sitter/releases/latest - 95 | [tree-sitter-cli]: https://github.com/tree-sitter/tree-sitter/tree/master/crates/cli - 96 | [triple-slash]: https://www.typescriptlang.org/docs/handbook/triple-slash-directives.html - 97 | [ts-check]: https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/2-the-grammar-dsl.md: --------------------------------------------------------------------------------- - 1 | # The Grammar DSL - | - 2 | The following is a complete list of built-in functions you can use in your `grammar.js` to define rules. Use-cases for some - 3 | of these functions will be explained in more detail in later sections. - | - 4 | - **Symbols (the `$` object)** — Every grammar rule is written as a JavaScript function that takes a parameter conventionally - 5 | called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. Names starting with `$.MISSING` - 6 | or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command. - 7 | - **String and Regex literals** — The terminal symbols in a grammar are described using JavaScript strings and regular - 8 | expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes; - 9 | it generates its own regex-matching logic based on the Rust regex syntax as part of each parser. Regex literals are just - 10 | used as a convenient way of writing regular expressions in your grammar. You can use Rust regular expressions in your grammar - 11 | DSL through the `RustRegex` class. Simply pass your regex pattern as a string: - | - 12 | ```js - 13 | new RustRegex('(?i)[a-z_][a-z0-9_]*') // matches a simple identifier - 14 | ``` - | - 15 | Unlike JavaScript's builtin `RegExp` class, which takes a pattern and flags as separate arguments, `RustRegex` only - 16 | accepts a single pattern string. While it doesn't support separate flags, you can use inline flags within the pattern itself. - 17 | For more details about Rust's regex syntax and capabilities, check out the [Rust regex documentation][rust regex]. - | - 18 | ```admonish note - 19 | Only a subset of the Regex engine is actually supported. This is due to certain features like lookahead and lookaround - 20 | assertions not feasible to use in an LR(1) grammar, as well as certain flags being unnecessary for tree-sitter. However, - 21 | plenty of features are supported by default: - | - 22 | - Character classes - 23 | - Character ranges - 24 | - Character sets - 25 | - Quantifiers - 26 | - Alternation - 27 | - Grouping - 28 | - Unicode character escapes - 29 | - Unicode property escapes - 30 | ``` - | - 31 | - **Sequences : `seq(rule1, rule2, ...)`** — This function creates a rule that matches any number of other rules, one after - 32 | another. It is analogous to simply writing multiple symbols next to each other in [EBNF notation][ebnf]. - | - 33 | - **Alternatives : `choice(rule1, rule2, ...)`** — This function creates a rule that matches *one* of a set of possible - 34 | rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation. - | - 35 | - **Repetitions : `repeat(rule)`** — This function creates a rule that matches *zero-or-more* occurrences of a given rule. - 36 | It is analogous to the `{x}` (curly brace) syntax in EBNF notation. - | - 37 | - **Repetitions : `repeat1(rule)`** — This function creates a rule that matches *one-or-more* occurrences of a given rule. - 38 | The previous `repeat` rule is implemented in `repeat1` but is included because it is very commonly used. - | - 39 | - **Options : `optional(rule)`** — This function creates a rule that matches *zero or one* occurrence of a given rule. - 40 | It is analogous to the `[x]` (square bracket) syntax in EBNF notation. - | - 41 | - **Precedence : `prec(number, rule)`** — This function marks the given rule with a numerical precedence, which will be used - 42 | to resolve [*LR(1) Conflicts*][lr-conflict] at parser-generation time. When two rules overlap in a way that represents either - 43 | a true ambiguity or a *local* ambiguity given one token of lookahead, Tree-sitter will try to resolve the conflict by matching - 44 | the rule with the higher precedence. The default precedence of all rules is zero. This works similarly to the - 45 | [precedence directives][yacc-prec] in Yacc grammars. - | - 46 | This function can also be used to assign lexical precedence to a given - 47 | token, but it must be wrapped in a `token` call, such as `token(prec(1, 'foo'))`. This reads as "the token `foo` has a - 48 | lexical precedence of 1". The purpose of lexical precedence is to solve the issue where multiple tokens can match the same - 49 | set of characters, but one token should be preferred over the other. See [Lexical Precedence vs Parse Precedence][lexical vs parse] - 50 | for a more detailed explanation. - | - 51 | - **Left Associativity : `prec.left([number], rule)`** — This function marks the given rule as left-associative (and optionally - 52 | applies a numerical precedence). When an LR(1) conflict arises in which all the rules have the same numerical precedence, - 53 | Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching - 54 | a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars. - | - 55 | - **Right Associativity : `prec.right([number], rule)`** — This function is like `prec.left`, but it instructs Tree-sitter - 56 | to prefer matching a rule that ends *later*. - | - 57 | - **Dynamic Precedence : `prec.dynamic(number, rule)`** — This function is similar to `prec`, but the given numerical precedence - 58 | is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically - 59 | using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given - 60 | piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the - 61 | one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. - | - 62 | - **Tokens : `token(rule)`** — This function marks the given rule as producing only - 63 | a single token. Tree-sitter's default is to treat each String or RegExp literal - 64 | in the grammar as a separate token. Each token is matched separately by the lexer - 65 | and returned as its own leaf node in the tree. The `token` function allows you to - 66 | express a complex rule using the functions described above (rather than as a single - 67 | regular expression) but still have Tree-sitter treat it as a single token. - 68 | The token function will only accept terminal rules, so `token($.foo)` will not work. - 69 | You can think of it as a shortcut for squashing complex rules of strings or regexes - 70 | down to a single token. - | - 71 | - **Immediate Tokens : `token.immediate(rule)`** — Usually, whitespace (and any other extras, such as comments) is optional - 72 | before each token. This function means that the token will only match if there is no whitespace. - | - 73 | - **Aliases : `alias(rule, name)`** — This function causes the given rule to *appear* with an alternative name in the syntax - 74 | tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes] - 75 | called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an - 76 | [anonymous node][named-vs-anonymous-nodes], as if the rule had been written as the simple string. - | - 77 | - **Field Names : `field(name, rule)`** — This function assigns a *field name* to the child node(s) matched by the given - 78 | rule. In the resulting syntax tree, you can then use that field name to access specific children. - | - 79 | - **Reserved Keywords : `reserved(wordset, rule)`** — This function will override the global reserved word set with the - 80 | one passed into the `wordset` parameter. This is useful for contextual keywords, such as `if` in JavaScript, which cannot - 81 | be used as a variable name in most contexts, but can be used as a property name. - | - 82 | In addition to the `name` and `rules` fields, grammars have a few other optional public fields that influence the behavior - 83 | of the parser. Each of these fields is a function that accepts the grammar object (`$`) as its only parameter, like the - 84 | grammar rules themselves. These fields are: - | - 85 | - **`extras`** — an array of tokens that may appear *anywhere* in the language. This is often used for whitespace and - 86 | comments. The default value of `extras` is to accept whitespace. To control whitespace explicitly, specify - 87 | `extras: $ => []` in your grammar. See the section on [using extras][extras] for more details. - | - 88 | - **`inline`** — an array of rule names that should be automatically *removed* from the grammar by replacing all of their - 89 | usages with a copy of their definition. This is useful for rules that are used in multiple places but for which you *don't* - 90 | want to create syntax tree nodes at runtime. - | - 91 | - **`conflicts`** — an array of arrays of rule names. Each inner array represents a set of rules that's involved in an - 92 | *LR(1) conflict* that is *intended to exist* in the grammar. When these conflicts occur at runtime, Tree-sitter will use - 93 | the GLR algorithm to explore all the possible interpretations. If *multiple* parses end up succeeding, Tree-sitter will pick - 94 | the subtree whose corresponding rule has the highest total *dynamic precedence*. - | - 95 | - **`externals`** — an array of token names which can be returned by an - 96 | [*external scanner*][external-scanners]. External scanners allow you to write custom C code which runs during the lexing - 97 | process to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions. - | - 98 | - **`precedences`** — an array of arrays of strings, where each array of strings defines named precedence levels in descending - 99 | order. These names can be used in the `prec` functions to define precedence relative only to other names in the array, rather - 100 | than globally. Can only be used with parse precedence, not lexical precedence. - | - 101 | - **`word`** — the name of a token that will match keywords to the - 102 | [keyword extraction][keyword-extraction] optimization. - | - 103 | - **`supertypes`** — an array of rule names which should be considered to be 'supertypes' in the generated - 104 | [*node types* file][static-node-types-supertypes]. Supertype rules are automatically hidden from the parse tree, regardless - 105 | of whether their names start with an underscore. The main use case for supertypes is to group together multiple different - 106 | kinds of nodes under a single abstract category, such as "expression" or "declaration". See the section on [`using supertypes`][supertypes] - 107 | for more details. - | - 108 | - **`reserved`** — similar in structure to the main `rules` property, an object of reserved word sets associated with an - 109 | array of reserved rules. The reserved rule in the array must be a terminal token meaning it must be a string, regex, token, - 110 | or terminal rule. The reserved rule must also exist and be used in the grammar, specifying arbitrary tokens will not work. - 111 | The *first* reserved word set in the object is the global word set, meaning it applies to every rule in every parse state. - 112 | However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords are typically not allowed - 113 | as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved` function would be used, - 114 | and the word set to pass in would be the name of the word set that is declared in the `reserved` object that corresponds to an - 115 | empty array, signifying *no* keywords are reserved. - | - 116 | [bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html - 117 | [ebnf]: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form - 118 | [external-scanners]: ./4-external-scanners.md - 119 | [extras]: ./3-writing-the-grammar.md#using-extras - 120 | [keyword-extraction]: ./3-writing-the-grammar.md#keyword-extraction - 121 | [lexical vs parse]: ./3-writing-the-grammar.md#lexical-precedence-vs-parse-precedence - 122 | [lr-conflict]: https://en.wikipedia.org/wiki/LR_parser#Conflicts_in_the_constructed_tables - 123 | [named-vs-anonymous-nodes]: ../using-parsers/2-basic-parsing.md#named-vs-anonymous-nodes - 124 | [rust regex]: https://docs.rs/regex/1.1.8/regex/#grouping-and-flags - 125 | [static-node-types]: ../using-parsers/6-static-node-types.md - 126 | [static-node-types-supertypes]: ../using-parsers/6-static-node-types.md#supertype-nodes - 127 | [supertypes]: ./3-writing-the-grammar.md#using-supertypes - 128 | [yacc-prec]: https://docs.oracle.com/cd/E19504-01/802-5880/6i9k05dh3/index.html - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/3-writing-the-grammar.md: --------------------------------------------------------------------------------- - 1 | # Writing the Grammar - | - 2 | Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe - 3 | any given language. To produce a good Tree-sitter parser, you need to create a grammar with two important properties: - | - 4 | 1. **An intuitive structure** — Tree-sitter's output is a [concrete syntax tree][cst]; each node in the tree corresponds - 5 | directly to a [terminal or non-terminal symbol][non-terminal] in the grammar. So to produce an easy-to-analyze tree, there - 6 | should be a direct correspondence between the symbols in your grammar and the recognizable constructs in the language. - 7 | This might seem obvious, but it is very different from the way that context-free grammars are often written in contexts - 8 | like [language specifications][language-spec] or [Yacc][yacc]/[Bison][bison] parsers. - | - 9 | 2. **A close adherence to LR(1)** — Tree-sitter is based on the [GLR parsing][glr-parsing] algorithm. This means that while - 10 | it can handle any context-free grammar, it works most efficiently with a class of context-free grammars called [LR(1) Grammars][lr-grammars]. - 11 | In this respect, Tree-sitter's grammars are similar to (but less restrictive than) [Yacc][yacc] and [Bison][bison] grammars, - 12 | but _different_ from [ANTLR grammars][antlr], [Parsing Expression Grammars][peg], or the [ambiguous grammars][ambiguous-grammar] - 13 | commonly used in language specifications. - | - 14 | It's unlikely that you'll be able to satisfy these two properties just by translating an existing context-free grammar directly - 15 | into Tree-sitter's grammar format. There are a few kinds of adjustments that are often required. - 16 | The following sections will explain these adjustments in more depth. - | - 17 | ## The First Few Rules - | - 18 | It's usually a good idea to find a formal specification for the language you're trying to parse. This specification will - 19 | most likely contain a context-free grammar. As you read through the rules of this CFG, you will probably discover a complex - 20 | and cyclic graph of relationships. It might be unclear how you should navigate this graph as you define your grammar. - | - 21 | Although languages have very different constructs, their constructs can often be categorized in to similar groups like - 22 | _Declarations_, _Definitions_, _Statements_, _Expressions_, _Types_ and _Patterns_. In writing your grammar, a good first - 23 | step is to create just enough structure to include all of these basic _groups_ of symbols. For a language like Go, - 24 | you might start with something like this: - | - 25 | ```js - 26 | { - 27 | // ... - | - 28 | rules: { - 29 | source_file: $ => repeat($._definition), - | - 30 | _definition: $ => choice( - 31 | $.function_definition - 32 | // TODO: other kinds of definitions - 33 | ), - | - 34 | function_definition: $ => seq( - 35 | 'func', - 36 | $.identifier, - 37 | $.parameter_list, - 38 | $._type, - 39 | $.block - 40 | ), - | - 41 | parameter_list: $ => seq( - 42 | '(', - 43 | // TODO: parameters - 44 | ')' - 45 | ), - | - 46 | _type: $ => choice( - 47 | 'bool' - 48 | // TODO: other kinds of types - 49 | ), - | - 50 | block: $ => seq( - 51 | '{', - 52 | repeat($._statement), - 53 | '}' - 54 | ), - | - 55 | _statement: $ => choice( - 56 | $.return_statement - 57 | // TODO: other kinds of statements - 58 | ), - | - 59 | return_statement: $ => seq( - 60 | 'return', - 61 | $.expression, - 62 | ';' - 63 | ), - | - 64 | expression: $ => choice( - 65 | $.identifier, - 66 | $.number - 67 | // TODO: other kinds of expressions - 68 | ), - | - 69 | identifier: $ => /[a-z]+/, - | - 70 | number: $ => /\d+/ - 71 | } - 72 | } - 73 | ``` - | - 74 | One important fact to know up front is that the start rule for the grammar is the first property in the `rules` object. - 75 | In the example above, that would correspond to `source_file`, but it can be named anything. - | - 76 | Some details of this grammar will be explained in more depth later on, but if you focus on the `TODO` comments, you can - 77 | see that the overall strategy is _breadth-first_. Notably, this initial skeleton does not need to directly match an exact - 78 | subset of the context-free grammar in the language specification. It just needs to touch on the major groupings of rules - 79 | in as simple and obvious a way as possible. - | - 80 | With this structure in place, you can now freely decide what part of the grammar to flesh out next. For example, you might - 81 | decide to start with _types_. One-by-one, you could define the rules for writing basic types and composing them into more - 82 | complex types: - | - 83 | ```js - 84 | { - 85 | // ... - | - 86 | _type: $ => choice( - 87 | $.primitive_type, - 88 | $.array_type, - 89 | $.pointer_type - 90 | ), - | - 91 | primitive_type: $ => choice( - 92 | 'bool', - 93 | 'int' - 94 | ), - | - 95 | array_type: $ => seq( - 96 | '[', - 97 | ']', - 98 | $._type - 99 | ), - | - 100 | pointer_type: $ => seq( - 101 | '*', - 102 | $._type - 103 | ) - 104 | } - 105 | ``` - | - 106 | After developing the _type_ sublanguage a bit further, you might decide to switch to working on _statements_ or _expressions_ - 107 | instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`. - | - 108 | **And remember to add tests for each rule in your `test/corpus` folder!** - | - 109 | ## Structuring Rules Well - | - 110 | Imagine that you were just starting work on the [Tree-sitter JavaScript parser][tree-sitter-javascript]. Naively, you might - 111 | try to directly mirror the structure of the [ECMAScript Language Spec][ecmascript-spec]. To illustrate the problem with this - 112 | approach, consider the following line of code: - | - 113 | ```js - 114 | return x + y; - 115 | ``` - | - 116 | According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, - 117 | and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series - 118 | of production rules: - | - 119 | ```text - 120 | ReturnStatement -> 'return' Expression - 121 | Expression -> AssignmentExpression - 122 | AssignmentExpression -> ConditionalExpression - 123 | ConditionalExpression -> LogicalORExpression - 124 | LogicalORExpression -> LogicalANDExpression - 125 | LogicalANDExpression -> BitwiseORExpression - 126 | BitwiseORExpression -> BitwiseXORExpression - 127 | BitwiseXORExpression -> BitwiseANDExpression - 128 | BitwiseANDExpression -> EqualityExpression - 129 | EqualityExpression -> RelationalExpression - 130 | RelationalExpression -> ShiftExpression - 131 | ShiftExpression -> AdditiveExpression - 132 | AdditiveExpression -> MultiplicativeExpression - 133 | MultiplicativeExpression -> ExponentiationExpression - 134 | ExponentiationExpression -> UnaryExpression - 135 | UnaryExpression -> UpdateExpression - 136 | UpdateExpression -> LeftHandSideExpression - 137 | LeftHandSideExpression -> NewExpression - 138 | NewExpression -> MemberExpression - 139 | MemberExpression -> PrimaryExpression - 140 | PrimaryExpression -> IdentifierReference - 141 | ``` - | - 142 | The language spec encodes the twenty different precedence levels of JavaScript expressions using twenty levels of indirection - 143 | between `IdentifierReference` and `Expression`. If we were to create a concrete syntax tree representing this statement - 144 | according to the language spec, it would have twenty levels of nesting, and it would contain nodes with names like `BitwiseXORExpression`, - 145 | which are unrelated to the actual code. - | - 146 | ## Standard Rule Names - | - 147 | Tree-sitter places no restrictions on how to name the rules of your grammar. It can be helpful, however, to follow certain conventions - 148 | used by many other established grammars in the ecosystem. Some of these well-established patterns are listed below: - | - 149 | - `source_file`: Represents an entire source file, this rule is commonly used as the root node for a grammar, - 150 | - `expression`/`statement`: Used to represent statements and expressions for a given language. Commonly defined as a choice between several - 151 | more specific sub-expression/sub-statement rules. - 152 | - `block`: Used as the parent node for block scopes, with its children representing the block's contents. - 153 | - `type`: Represents the types of a language such as `int`, `char`, and `void`. - 154 | - `identifier`: Used for constructs like variable names, function arguments, and object fields; this rule is commonly used as the `word` - 155 | token in grammars. - 156 | - `string`: Used to represent `"string literals"`. - 157 | - `comment`: Used to represent comments, this rule is commonly used as an `extra`. - | - 158 | ## Using Precedence - | - 159 | To produce a readable syntax tree, we'd like to model JavaScript expressions using a much flatter structure like this: - | - 160 | ```js - 161 | { - 162 | // ... - | - 163 | expression: $ => choice( - 164 | $.identifier, - 165 | $.unary_expression, - 166 | $.binary_expression, - 167 | // ... - 168 | ), - | - 169 | unary_expression: $ => choice( - 170 | seq('-', $.expression), - 171 | seq('!', $.expression), - 172 | // ... - 173 | ), - | - 174 | binary_expression: $ => choice( - 175 | seq($.expression, '*', $.expression), - 176 | seq($.expression, '+', $.expression), - 177 | // ... - 178 | ), - 179 | } - 180 | ``` - | - 181 | Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message: - | - 182 | ```text - 183 | Error: Unresolved conflict for symbol sequence: - | - 184 | '-' _expression • '*' … - | - 185 | Possible interpretations: - | - 186 | 1: '-' (binary_expression _expression • '*' _expression) - 187 | 2: (unary_expression '-' _expression) • '*' … - | - 188 | Possible resolutions: - | - 189 | 1: Specify a higher precedence in `binary_expression` than in the other rules. - 190 | 2: Specify a higher precedence in `unary_expression` than in the other rules. - 191 | 3: Specify a left or right associativity in `unary_expression` - 192 | 4: Add a conflict for these rules: `binary_expression` `unary_expression` - 193 | ``` - | - 194 | ```admonish hint - 195 | The • character in the error message indicates where exactly during - 196 | parsing the conflict occurs, or in other words, where the parser is encountering - 197 | ambiguity. - 198 | ``` - | - 199 | For an expression like `-a * b`, it's not clear whether the `-` operator applies to the `a * b` or just to the `a`. This - 200 | is where the `prec` function [described in the previous page][grammar dsl] comes into play. By wrapping a rule with `prec`, - 201 | we can indicate that certain sequence of symbols should _bind to each other more tightly_ than others. For example, the - 202 | `'-', $.expression` sequence in `unary_expression` should bind more tightly than the `$.expression, '+', $.expression` - 203 | sequence in `binary_expression`: - | - 204 | ```js - 205 | { - 206 | // ... - | - 207 | unary_expression: $ => - 208 | prec( - 209 | 2, - 210 | choice( - 211 | seq("-", $.expression), - 212 | seq("!", $.expression), - 213 | // ... - 214 | ), - 215 | ); - 216 | } - 217 | ``` - | - 218 | ## Using Associativity - | - 219 | Applying a higher precedence in `unary_expression` fixes that conflict, but there is still another conflict: - | - 220 | ```text - 221 | Error: Unresolved conflict for symbol sequence: - | - 222 | _expression '*' _expression • '*' … - | - 223 | Possible interpretations: - | - 224 | 1: _expression '*' (binary_expression _expression • '*' _expression) - 225 | 2: (binary_expression _expression '*' _expression) • '*' … - | - 226 | Possible resolutions: - | - 227 | 1: Specify a left or right associativity in `binary_expression` - 228 | 2: Add a conflict for these rules: `binary_expression` - 229 | ``` - | - 230 | For an expression like `a * b * c`, it's not clear whether we mean `a * (b * c)` or `(a * b) * c`. - 231 | This is where `prec.left` and `prec.right` come into use. We want to select the second interpretation, so we use `prec.left`. - | - 232 | ```js - 233 | { - 234 | // ... - | - 235 | binary_expression: $ => choice( - 236 | prec.left(2, seq($.expression, '*', $.expression)), - 237 | prec.left(1, seq($.expression, '+', $.expression)), - 238 | // ... - 239 | ), - 240 | } - 241 | ``` - | - 242 | ## Using Conflicts - | - 243 | Sometimes, conflicts are actually desirable. In our JavaScript grammar, expressions and patterns can create intentional ambiguity. - 244 | A construct like `[x, y]` could be legitimately parsed as both an array literal (like in `let a = [x, y]`) or as a destructuring - 245 | pattern (like in `let [x, y] = arr`). - | - 246 | ```js - 247 | export default grammar({ - 248 | name: "javascript", - | - 249 | rules: { - 250 | expression: $ => choice( - 251 | $.identifier, - 252 | $.array, - 253 | $.pattern, - 254 | ), - | - 255 | array: $ => seq( - 256 | "[", - 257 | optional(seq( - 258 | $.expression, repeat(seq(",", $.expression)) - 259 | )), - 260 | "]" - 261 | ), - | - 262 | array_pattern: $ => seq( - 263 | "[", - 264 | optional(seq( - 265 | $.pattern, repeat(seq(",", $.pattern)) - 266 | )), - 267 | "]" - 268 | ), - | - 269 | pattern: $ => choice( - 270 | $.identifier, - 271 | $.array_pattern, - 272 | ), - 273 | }, - 274 | }) - 275 | ``` - | - 276 | In such cases, we want the parser to explore both possibilities by explicitly declaring this ambiguity: - | - 277 | ```js - 278 | { - 279 | name: "javascript", - | - 280 | conflicts: $ => [ - 281 | [$.array, $.array_pattern], - 282 | ], - | - 283 | rules: { - 284 | // ... - 285 | }, - 286 | } - 287 | ``` - | - 288 | ```admonish note - 289 | The example is a bit contrived for the purpose of illustrating the usage of conflicts. The actual JavaScript grammar isn't - 290 | structured like that, but this conflict is actually present in the - 291 | [Tree-sitter JavaScript grammar](https://github.com/tree-sitter/tree-sitter-javascript/blob/108b2d4d17a04356a340aea809e4dd5b801eb40d/grammar.js#L100). - 292 | ``` - | - 293 | ## Hiding Rules - | - 294 | You may have noticed in the above examples that some grammar rule name like `_expression` and `_type` began with an underscore. - 295 | Starting a rule's name with an underscore causes the rule to be _hidden_ in the syntax tree. This is useful for rules like - 296 | `_expression` in the grammars above, which always just wrap a single child node. If these nodes were not hidden, they would - 297 | add substantial depth and noise to the syntax tree without making it any easier to understand. - | - 298 | ## Using Fields - | - 299 | Often, it's easier to analyze a syntax node if you can refer to its children by _name_ instead of by their position in an - 300 | ordered list. Tree-sitter grammars support this using the `field` function. This function allows you to assign unique names - 301 | to some or all of a node's children: - | - 302 | ```js - 303 | function_definition: $ => - 304 | seq( - 305 | "func", - 306 | field("name", $.identifier), - 307 | field("parameters", $.parameter_list), - 308 | field("return_type", $._type), - 309 | field("body", $.block), - 310 | ); - 311 | ``` - | - 312 | Adding fields like this allows you to retrieve nodes using the [field APIs][field-names-section]. - | - 313 | ## Using Extras - | - 314 | Extras are tokens that can appear anywhere in the grammar, without being explicitly mentioned in a rule. This is useful - 315 | for things like whitespace and comments, which can appear between any two tokens in most programming languages. To define - 316 | an extra, you can use the `extras` function: - | - 317 | ```js - 318 | module.exports = grammar({ - 319 | name: "my_language", - | - 320 | extras: ($) => [ - 321 | /\s/, // whitespace - 322 | $.comment, - 323 | ], - | - 324 | rules: { - 325 | comment: ($) => - 326 | token( - 327 | choice(seq("//", /.*/), seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/")), - 328 | ), - 329 | }, - 330 | }); - 331 | ``` - | - 332 | ```admonish warning - 333 | When adding more complicated tokens to `extras`, it's preferable to associate the pattern - 334 | with a rule. This way, you avoid the lexer inlining this pattern in a bunch of spots, - 335 | which can dramatically reduce the parser size. - 336 | ``` - | - 337 | For example, instead of defining the `comment` token inline in `extras`: - | - 338 | ```js - 339 | // ❌ Less preferable - | - 340 | const comment = token( - 341 | choice(seq("//", /.*/), seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/")), - 342 | ); - | - 343 | module.exports = grammar({ - 344 | name: "my_language", - 345 | extras: ($) => [ - 346 | /\s/, // whitespace - 347 | comment, - 348 | ], - 349 | rules: { - 350 | // ... - 351 | }, - 352 | }); - 353 | ``` - | - 354 | We can define it as a rule and then reference it in `extras`: - | - 355 | ```js - 356 | // ✅ More preferable - | - 357 | module.exports = grammar({ - 358 | name: "my_language", - | - 359 | extras: ($) => [ - 360 | /\s/, // whitespace - 361 | $.comment, - 362 | ], - | - 363 | rules: { - 364 | // ... - | - 365 | comment: ($) => - 366 | token( - 367 | choice(seq("//", /.*/), seq("/*", /[^*]*\*+([^/*][^*]*\*+)*/, "/")), - 368 | ), - 369 | }, - 370 | }); - 371 | ``` - | - 372 | ```admonish note - 373 | Tree-sitter intentionally simplifies the whitespace character class, `\s`, to `[ \t\n\r]` as a performance - 374 | optimization. This is because typically users do not require the full Unicode definition of whitespace. - 375 | ``` - | - 376 | ## Using Supertypes - | - 377 | Some rules in your grammar will represent abstract categories of syntax nodes, such as "expression", "type", or "declaration". - 378 | These rules are often defined as simple choices between several other rules. For example, in the JavaScript grammar, the - 379 | `_expression` rule is defined as a choice between many different kinds of expressions: - | - 380 | ```js - 381 | expression: $ => choice( - 382 | $.identifier, - 383 | $.unary_expression, - 384 | $.binary_expression, - 385 | $.call_expression, - 386 | $.member_expression, - 387 | // ... - 388 | ), - 389 | ``` - | - 390 | By default, Tree-sitter will generate a visible node type for each of these abstract category rules, which can lead to - 391 | unnecessarily deep and complex syntax trees. To avoid this, you can add these abstract category rules to the grammar's `supertypes` - 392 | definition. Tree-sitter will then treat these rules as _supertypes_, and will not generate visible node types for them in - 393 | the syntax tree. - | - 394 | ```js - 395 | module.exports = grammar({ - 396 | name: "javascript", - | - 397 | supertypes: $ => [ - 398 | $.expression, - 399 | ], - | - 400 | rules: { - 401 | expression: $ => choice( - 402 | $.identifier, - 403 | // ... - 404 | ), - | - 405 | // ... - 406 | }, - 407 | }); - 408 | _ - 409 | ``` - | - 410 | Although supertype rules are hidden from the syntax tree, they can still be used in queries. See the chapter on - 411 | [Query Syntax][query syntax] for more information. - | - 412 | # Lexical Analysis - | - 413 | Tree-sitter's parsing process is divided into two phases: parsing (which is described above) and [lexing][lexing] — the - 414 | process of grouping individual characters into the language's fundamental _tokens_. There are a few important things to - 415 | know about how Tree-sitter's lexing works. - | - 416 | ## Conflicting Tokens - | - 417 | Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens - 418 | (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways. - | - 419 | 1. **Context-aware Lexing** — Tree-sitter performs lexing on-demand, during the parsing process. At any given position - 420 | in a source document, the lexer only tries to recognize tokens that are _valid_ at that position in the document. - | - 421 | 2. **Lexical Precedence** — When the precedence functions described [in the previous page][grammar dsl] are used _within_ - 422 | the `token` function, the given explicit precedence values serve as instructions to the lexer. If there are two valid tokens - 423 | that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. - | - 424 | 3. **Match Length** — If multiple valid tokens with the same precedence match the characters at a given position in a document, - 425 | Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. - | - 426 | 4. **Match Specificity** — If there are two valid tokens with the same precedence, and they both match the same number - 427 | of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as - 428 | a `RegExp`. - | - 429 | 5. **Rule Order** — If none of the above criteria can be used to select one token over another, Tree-sitter will prefer - 430 | the token that appears earlier in the grammar. - | - 431 | If there is an external scanner it may have [an additional impact][external scanner] over regular tokens - 432 | defined in the grammar. - | - 433 | ## Lexical Precedence vs. Parse Precedence - | - 434 | One common mistake involves not distinguishing _lexical precedence_ from _parse precedence_. Parse precedence determines - 435 | which rule is chosen to interpret a given sequence of tokens. _Lexical precedence_ determines which token is chosen to interpret - 436 | at a given position of text, and it is a lower-level operation that is done first. The above list fully captures Tree-sitter's - 437 | lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. - 438 | Most of the time when you really get stuck, you're dealing with a lexical precedence problem. Pay particular attention to - 439 | the difference in meaning between using `prec` inside the `token` function versus outside it. The _lexical precedence_ syntax, - 440 | as mentioned in the previous page, is `token(prec(N, ...))`. - | - 441 | ## Keywords - | - 442 | Many languages have a set of _keyword_ tokens (e.g. `if`, `for`, `return`), as well as a more general token (e.g. `identifier`) - 443 | that matches any word, including many of the keyword strings. For example, JavaScript has a keyword `instanceof`, which is - 444 | used as a binary operator, like this: - | - 445 | ```js - 446 | if (a instanceof Something) b(); - 447 | ``` - | - 448 | The following, however, is not valid JavaScript: - | - 449 | ```js - 450 | if (a instanceofSomething) b(); - 451 | ``` - | - 452 | A keyword like `instanceof` cannot be followed immediately by another letter, because then it would be tokenized as an `identifier`, - 453 | **even though an identifier is not valid at that position**. Because Tree-sitter uses context-aware lexing, as described - 454 | [above](#conflicting-tokens), it would not normally impose this restriction. By default, Tree-sitter would recognize `instanceofSomething` - 455 | as two separate tokens: the `instanceof` keyword followed by an `identifier`. - | - 456 | ## Keyword Extraction - | - 457 | Fortunately, Tree-sitter has a feature that allows you to fix this, so that you can match the behavior of other standard - 458 | parsers: the `word` token. If you specify a `word` token in your grammar, Tree-sitter will find the set of _keyword_ tokens - 459 | that match strings also matched by the `word` token. Then, during lexing, instead of matching each of these keywords individually, - 460 | Tree-sitter will match the keywords via a two-step process where it _first_ matches the `word` token. - | - 461 | For example, suppose we added `identifier` as the `word` token in our JavaScript grammar: - | - 462 | ```js - 463 | grammar({ - 464 | name: "javascript", - | - 465 | word: $ => $.identifier, - | - 466 | rules: { - 467 | expression: $ => - 468 | choice( - 469 | $.identifier, - 470 | $.unary_expression, - 471 | $.binary_expression, - 472 | // ... - 473 | ), - | - 474 | binary_expression: $ => - 475 | choice( - 476 | prec.left(1, seq($.expression, "instanceof", $.expression)), - 477 | // ... - 478 | ), - | - 479 | unary_expression: $ => - 480 | choice( - 481 | prec.left(2, seq("typeof", $.expression)), - 482 | // ... - 483 | ), - | - 484 | identifier: $ => /[a-z_]+/, - 485 | }, - 486 | }); - 487 | ``` - | - 488 | Tree-sitter would identify `typeof` and `instanceof` as keywords. Then, when parsing the invalid code above, rather than - 489 | scanning for the `instanceof` token individually, it would scan for an `identifier` first, and find `instanceofSomething`. - 490 | It would then correctly recognize the code as invalid. - | - 491 | Aside from improving error detection, keyword extraction also has performance benefits. It allows Tree-sitter to generate - 492 | a smaller, simpler lexing function, which means that **the parser will compile much more quickly**. - | - 493 | ```admonish note - 494 | The word token must be a unique token that is not reused by another rule. If you want to have a word token used in a - 495 | rule that's called something else, you should just alias the word token instead, like how the Rust grammar does it - 496 | here - 497 | ``` - | - 498 | [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar - 499 | [antlr]: https://www.antlr.org - 500 | [bison]: https://en.wikipedia.org/wiki/GNU_bison - 501 | [cst]: https://en.wikipedia.org/wiki/Parse_tree - 502 | [ecmascript-spec]: https://262.ecma-international.org/6.0/ - 503 | [external scanner]: ./4-external-scanners.md#other-external-scanner-details - 504 | [glr-parsing]: https://en.wikipedia.org/wiki/GLR_parser - 505 | [grammar dsl]: ./2-the-grammar-dsl.md - 506 | [language-spec]: https://en.wikipedia.org/wiki/Programming_language_specification - 507 | [lexing]: https://en.wikipedia.org/wiki/Lexical_analysis - 508 | [longest-match]: https://en.wikipedia.org/wiki/Maximal_munch - 509 | [lr-grammars]: https://en.wikipedia.org/wiki/LR_parser - 510 | [field-names-section]: ../using-parsers/2-basic-parsing.md#node-field-names - 511 | [non-terminal]: https://en.wikipedia.org/wiki/Terminal_and_nonterminal_symbols - 512 | [peg]: https://en.wikipedia.org/wiki/Parsing_expression_grammar - 513 | [query syntax]: ../using-parsers/queries/1-syntax.md#supertype-nodes - 514 | [tree-sitter-javascript]: https://github.com/tree-sitter/tree-sitter-javascript - 515 | [yacc]: https://en.wikipedia.org/wiki/Yacc - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/4-external-scanners.md: --------------------------------------------------------------------------------- - 1 | # External Scanners - | - 2 | Many languages have some tokens whose structure is impossible or inconvenient to describe with a regular expression. - 3 | Some examples: - | - 4 | - [Indent and dedent][indent-tokens] tokens in Python - 5 | - [Heredocs][heredoc] in Bash and Ruby - 6 | - [Percent strings][percent-string] in Ruby - | - 7 | Tree-sitter allows you to handle these kinds of tokens using _external scanners_. An external scanner is a set of C functions - 8 | that you, the grammar author, can write by hand to add custom logic for recognizing certain tokens. - | - 9 | To use an external scanner, there are a few steps. First, add an `externals` section to your grammar. This section should - 10 | list the names of all of your external tokens. These names can then be used elsewhere in your grammar. - | - 11 | ```js - 12 | grammar({ - 13 | name: "my_language", - | - 14 | externals: $ => [$.indent, $.dedent, $.newline], - | - 15 | // ... - 16 | }); - 17 | ``` - | - 18 | Then, add another C source file to your project. Its path must be src/scanner.c for the CLI to recognize it. Be sure to add - 19 | this file to the sources section of your `binding.gyp` file so that it will be included when your project is compiled by - 20 | Node.js and uncomment the appropriate block in your bindings/rust/build.rs file so that it will be included in your Rust - 21 | crate. - | - 22 | In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering - 23 | of this enum must match the order in your grammar's `externals` array; the actual names do not matter. - | - 24 | ```c - 25 | #include "tree_sitter/parser.h" - 26 | #include "tree_sitter/alloc.h" - 27 | #include "tree_sitter/array.h" - | - 28 | enum TokenType { - 29 | INDENT, - 30 | DEDENT, - 31 | NEWLINE - 32 | } - 33 | ``` - | - 34 | Finally, you must define five functions with specific names, based on your language's name and five actions: - 35 | _create_, _destroy_, _serialize_, _deserialize_, and _scan_. - | - 36 | ## Create - | - 37 | ```c - 38 | void * tree_sitter_my_language_external_scanner_create() { - 39 | // ... - 40 | } - 41 | ``` - | - 42 | This function should create your scanner object. It will only be called once anytime your language is set on a parser. - 43 | Often, you will want to allocate memory on the heap and return a pointer to it. If your external scanner doesn't need to - 44 | maintain any state, it's ok to return `NULL`. - | - 45 | ## Destroy - | - 46 | ```c - 47 | void tree_sitter_my_language_external_scanner_destroy(void *payload) { - 48 | // ... - 49 | } - 50 | ``` - | - 51 | This function should free any memory used by your scanner. It is called once when a parser is deleted or assigned a different - 52 | language. It receives as an argument the same pointer that was returned from the _create_ function. If your _create_ function - 53 | didn't allocate any memory, this function can be a no-op. - | - 54 | ## Serialize - | - 55 | ```c - 56 | unsigned tree_sitter_my_language_external_scanner_serialize( - 57 | void *payload, - 58 | char *buffer - 59 | ) { - 60 | // ... - 61 | } - 62 | ``` - | - 63 | This function should copy the complete state of your scanner into a given byte buffer, and return the number of bytes written. - 64 | The function is called every time the external scanner successfully recognizes a token. It receives a pointer to your scanner - 65 | and a pointer to a buffer. The maximum number of bytes that you can write is given by the `TREE_SITTER_SERIALIZATION_BUFFER_SIZE` - 66 | constant, defined in the `tree_sitter/parser.h` header file. - | - 67 | The data that this function writes will ultimately be stored in the syntax tree so that the scanner can be restored to the - 68 | right state when handling edits or ambiguities. For your parser to work correctly, the `serialize` function must store its - 69 | entire state, and `deserialize` must restore the entire state. For good performance, you should design your scanner so that - 70 | its state can be serialized as quickly and compactly as possible. - | - 71 | ## Deserialize - | - 72 | ```c - 73 | void tree_sitter_my_language_external_scanner_deserialize( - 74 | void *payload, - 75 | const char *buffer, - 76 | unsigned length - 77 | ) { - 78 | // ... - 79 | } - 80 | ``` - | - 81 | This function should _restore_ the state of your scanner based the bytes that were previously written by the `serialize` - 82 | function. It is called with a pointer to your scanner, a pointer to the buffer of bytes, and the number of bytes that should - 83 | be read. It is good practice to explicitly erase your scanner state variables at the start of this function, before restoring - 84 | their values from the byte buffer. - | - 85 | ## Scan - | - 86 | Typically, one will - | - 87 | - Call `lexer->advance` several times, if the characters are valid for the token being lexed. - | - 88 | - Optionally, call `lexer->mark_end` to mark the end of the token, and "peek ahead" - 89 | to check if the next character (or set of characters) invalidates the token. - | - 90 | - Set `lexer->result_symbol` to the token type. - | - 91 | - Return `true` from the scanning function, indicating that a token was successfully lexed. - | - 92 | Tree-sitter will then push resulting node to the parse stack, and the input position will remain where it reached at the - 93 | point `lexer->mark_end` was called. - | - 94 | ```c - 95 | bool tree_sitter_my_language_external_scanner_scan( - 96 | void *payload, - 97 | TSLexer *lexer, - 98 | const bool *valid_symbols - 99 | ) { - 100 | // ... - 101 | } - 102 | ``` - | - 103 | The second parameter to this function is the lexer, of type `TSLexer`. The `TSLexer` struct has the following fields: - | - 104 | - **`int32_t lookahead`** — The current next character in the input stream, represented as a 32-bit unicode code point. - | - 105 | - **`TSSymbol result_symbol`** — The symbol that was recognized. Your scan function should _assign_ to this field one of - 106 | the values from the `TokenType` enum, described above. - | - 107 | - **`void (*advance)(TSLexer *, bool skip)`** — A function for advancing to the next character. If you pass `true` for - 108 | the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range - 109 | associated with tokens emitted by the external scanner. - | - 110 | - **`void (*mark_end)(TSLexer *)`** — A function for marking the end of the recognized token. This allows matching tokens - 111 | that require multiple characters of lookahead. By default, (if you don't call `mark_end`), any character that you moved past - 112 | using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls - 113 | to `advance` will _not_ increase the size of the returned token. You can call `mark_end` multiple times to increase the size - 114 | of the token. - | - 115 | - **`uint32_t (*get_column)(TSLexer *)`** — A function for querying the current column position of the lexer. It returns - 116 | the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this - 117 | function by reading from the start of the line. - | - 118 | - **`bool (*is_at_included_range_start)(const TSLexer *)`** — A function for checking whether the parser has just skipped - 119 | some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function - 120 | (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special - 121 | behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses - 122 | this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. - | - 123 | - **`bool (*eof)(const TSLexer *)`** — A function for determining whether the lexer is at the end of the file. The value - 124 | of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because - 125 | the `0` or "NUL" value is also a valid character that could be present in the file being parsed. - | - 126 | The third argument to the `scan` function is an array of booleans that indicates which of external tokens are expected by - 127 | the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot - 128 | backtrack, so you may need to combine certain pieces of logic. - | - 129 | ```c - 130 | if (valid_symbols[INDENT] || valid_symbols[DEDENT]) { - | - 131 | // ... logic that is common to both `INDENT` and `DEDENT` - | - 132 | if (valid_symbols[INDENT]) { - | - 133 | // ... logic that is specific to `INDENT` - | - 134 | lexer->result_symbol = INDENT; - 135 | return true; - 136 | } - 137 | } - 138 | ``` - | - 139 | ## External Scanner Helpers - | - 140 | ### Allocator - | - 141 | Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`. - 142 | These macros can allow a potential consumer to override the default allocator with their own implementation, but by default - 143 | will use the libc functions. - | - 144 | As a consumer of the tree-sitter core library as well as any parser libraries that might use allocations, you can enable - 145 | overriding the default allocator and have it use the same one as the library allocator, of which you can set with `ts_set_allocator`. - 146 | To enable this overriding in scanners, you must compile them with the `TREE_SITTER_REUSE_ALLOCATOR` macro defined, and tree-sitter - 147 | the library must be linked into your final app dynamically, since it needs to resolve the internal functions at runtime. - 148 | If you are compiling an executable binary that uses the core library, but want to load parsers dynamically at runtime, then - 149 | you will have to use a special linker flag on Unix. For non-Darwin systems, that would be `--dynamic-list` and for Darwin - 150 | systems, that would be `-exported_symbols_list`. The CLI does exactly this, so you can use it as a reference (check out `cli/build.rs`). - | - 151 | For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example: - | - 152 | ```c - 153 | #include "tree_sitter/parser.h" - 154 | #include "tree_sitter/alloc.h" - | - 155 | // ... - | - 156 | void* tree_sitter_my_language_external_scanner_create() { - 157 | return ts_calloc(100, 1); // or ts_malloc(100) - 158 | } - | - 159 | // ... - | - 160 | ``` - | - 161 | ### Arrays - | - 162 | If you need to use array-like types in your scanner, such as tracking a stack of indentations or tags, you should use the - 163 | array macros from `tree_sitter/array.h`. - | - 164 | There are quite a few of them provided for you, but here's how you could get started tracking some . Check out the header - 165 | itself for more detailed documentation. - | - 166 | ```admonish attention - 167 | Do not use any of the array functions or macros that are prefixed with an underscore and have comments saying - 168 | that it is not what you are looking for. These are internal functions used as helpers by other macros that are public. - 169 | They are not meant to be used directly, nor are they what you want. - 170 | ``` - | - 171 | ```c - 172 | #include "tree_sitter/parser.h" - 173 | #include "tree_sitter/array.h" - | - 174 | enum TokenType { - 175 | INDENT, - 176 | DEDENT, - 177 | NEWLINE, - 178 | STRING, - 179 | } - | - 180 | // Create the array in your create function - | - 181 | void* tree_sitter_my_language_external_scanner_create() { - 182 | return ts_calloc(1, sizeof(Array(int))); - | - 183 | // or if you want to zero out the memory yourself - | - 184 | Array(int) *stack = ts_malloc(sizeof(Array(int))); - 185 | array_init(&stack); - 186 | return stack; - 187 | } - | - 188 | bool tree_sitter_my_language_external_scanner_scan( - 189 | void *payload, - 190 | TSLexer *lexer, - 191 | const bool *valid_symbols - 192 | ) { - 193 | Array(int) *stack = payload; - 194 | if (valid_symbols[INDENT]) { - 195 | array_push(stack, lexer->get_column(lexer)); - 196 | lexer->result_symbol = INDENT; - 197 | return true; - 198 | } - 199 | if (valid_symbols[DEDENT]) { - 200 | array_pop(stack); // this returns the popped element by value, but we don't need it - 201 | lexer->result_symbol = DEDENT; - 202 | return true; - 203 | } - | - 204 | // we can also use an array on the stack to keep track of a string - | - 205 | Array(char) next_string = array_new(); - | - 206 | if (valid_symbols[STRING] && lexer->lookahead == '"') { - 207 | lexer->advance(lexer, false); - 208 | while (lexer->lookahead != '"' && lexer->lookahead != '\n' && !lexer->eof(lexer)) { - 209 | array_push(&next_string, lexer->lookahead); - 210 | lexer->advance(lexer, false); - 211 | } - | - 212 | // assume we have some arbitrary constraint of not having more than 100 characters in a string - 213 | if (lexer->lookahead == '"' && next_string.size <= 100) { - 214 | lexer->advance(lexer, false); - 215 | lexer->result_symbol = STRING; - 216 | return true; - 217 | } - 218 | } - | - 219 | return false; - 220 | } - | - 221 | ``` - | - 222 | ## Other External Scanner Details - | - 223 | External scanners have priority over Tree-sitter's normal lexing process. When a token listed in the externals array is valid - 224 | at a given position, the external scanner is called first. This makes external scanners a powerful way to override Tree-sitter's - 225 | default lexing behavior, especially for cases that can't be handled with regular lexical rules, parsing, or dynamic precedence. - | - 226 | During error recovery, Tree-sitter's first step is to call the external scanner's scan function with all tokens marked as - 227 | valid. Your scanner should detect and handle this case appropriately. One simple approach is to add an unused "sentinel" - 228 | token at the end of your externals array: - | - 229 | ```js - 230 | { - 231 | name: "my_language", - | - 232 | externals: $ => [$.token1, $.token2, $.error_sentinel] - | - 233 | // ... - 234 | } - 235 | ``` - | - 236 | You can then check if this sentinel token is marked valid to determine if Tree-sitter is in error recovery mode. - | - 237 | If you would rather not handle the error recovery case explicitly, the easiest way to "opt-out" and let tree-sitter's internal - 238 | lexer handle it is to return `false` from your scan function when `valid_symbols` contains the error sentinel. - | - 239 | ```c - 240 | bool tree_sitter_my_language_external_scanner_scan( - 241 | void *payload, - 242 | TSLexer *lexer, - 243 | const bool *valid_symbols - 244 | ) { - 245 | if (valid_symbols[ERROR_SENTINEL]) { - 246 | return false; - 247 | } - 248 | // ... - 249 | } - 250 | ``` - | - 251 | When you include literal keywords in the externals array, for example: - | - 252 | ```js - 253 | externals: $ => ['if', 'then', 'else'] - 254 | ``` - | - 255 | _those_ keywords will - 256 | be tokenized by the external scanner whenever they appear in the grammar. - | - 257 | This is equivalent to declaring named tokens and aliasing them: - | - 258 | ```js - 259 | { - 260 | name: "my_language", - | - 261 | externals: $ => [$.if_keyword, $.then_keyword, $.else_keyword], - | - 262 | rules: { - | - 263 | // then using it in a rule like so: - 264 | if_statement: $ => seq(alias($.if_keyword, 'if'), ...), - | - 265 | // ... - 266 | } - 267 | } - 268 | ``` - | - 269 | The tokenization process for external keywords works in two stages: - | - 270 | 1. The external scanner attempts to recognize the token first - 271 | 2. If the scanner returns true and sets a token, that token is used - 272 | 3. If the scanner returns false, Tree-sitter falls back to its internal lexer - | - 273 | However, when you use rule references (like `$.if_keyword`) in the externals array without defining the corresponding rules - 274 | in the grammar, Tree-sitter cannot fall back to its internal lexer. In this case, the external scanner is solely responsible - 275 | for recognizing these tokens. - | - 276 | ```admonish danger - 277 | - External scanners can easily create infinite loops - | - 278 | - Be extremely careful when emitting zero-width tokens - | - 279 | - Always use the `eof` function when looping through characters - 280 | ``` - | - 281 | [ejs]: https://ejs.co - 282 | [enum]: https://en.wikipedia.org/wiki/Enumerated_type#C - 283 | [heredoc]: https://en.wikipedia.org/wiki/Here_document - 284 | [indent-tokens]: https://en.wikipedia.org/wiki/Off-side_rule - 285 | [multi-language-section]: ../using-parsers/3-advanced-parsing.md#multi-language-documents - 286 | [percent-string]: https://docs.ruby-lang.org/en/2.5.0/doc/syntax/literals_rdoc.html#label-Percent+Strings - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/5-writing-tests.md: --------------------------------------------------------------------------------- - 1 | # Writing Tests - | - 2 | For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look - 3 | when parsing that rule. These tests are written using specially-formatted text files in the `test/corpus/` directory within - 4 | your parser's root folder. - | - 5 | For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this: - | - 6 | ```text - 7 | ================== - 8 | Return statements - 9 | ================== - | - 10 | func x() int { - 11 | return 1; - 12 | } - | - 13 | --- - | - 14 | (source_file - 15 | (function_definition - 16 | (identifier) - 17 | (parameter_list) - 18 | (primitive_type) - 19 | (block - 20 | (return_statement (number))))) - 21 | ``` - | - 22 | * The **name** of each test is written between two lines containing only `=` (equal sign) characters. - | - 23 | * Then the **input source code** is written, followed by a line containing three or more `-` (dash) characters. - | - 24 | * Then, the **expected output syntax tree** is written as an [S-expression][s-exp]. The exact placement of whitespace in - 25 | the S-expression doesn't matter, but ideally the syntax tree should be legible. - | - 26 | ```admonish tip - 27 | The S-expression does not show syntax nodes like `func`, `(` and `;`, which are expressed as strings and regexes in the grammar. - 28 | It only shows the *named* nodes, as described in [this section][named-vs-anonymous-nodes] of the page on parser usage. - 29 | ``` - | - 30 | The expected output section can also *optionally* show the [*field names*][node-field-names] associated with each child - 31 | node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in - 32 | the S-expression: - | - 33 | ```query - 34 | (source_file - 35 | (function_definition - 36 | name: (identifier) - 37 | parameters: (parameter_list) - 38 | result: (primitive_type) - 39 | body: (block - 40 | (return_statement (number))))) - 41 | ``` - | - 42 | * If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical - 43 | suffix (in the below example, `|||`) to disambiguate them: - | - 44 | ```text - 45 | ==================||| - 46 | Basic module - 47 | ==================||| - | - 48 | ---- MODULE Test ---- - 49 | increment(n) == n + 1 - 50 | ==== - | - 51 | ---||| - | - 52 | (source_file - 53 | (module (identifier) - 54 | (operator (identifier) - 55 | (parameter_list (identifier)) - 56 | (plus (identifier_ref) (number))))) - 57 | ``` - | - 58 | These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar - 59 | to verify that everything still parses correctly. - | - 60 | By default, the `tree-sitter test` command runs all the tests in your `test/corpus/` folder. To run a particular test, you - 61 | can use the `-i` flag: - | - 62 | ```sh - 63 | tree-sitter test -i 'Return statements' - 64 | ``` - | - 65 | The recommendation is to be comprehensive in adding tests. If it's a visible node, add it to a test file in your `test/corpus` - 66 | directory. It's typically a good idea to test all the permutations of each language construct. This increases test coverage, - 67 | but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language. - | - 68 | ## Attributes - | - 69 | Tests can be annotated with a few `attributes`. Attributes must be put in the header, below the test name, and start with - 70 | a `:`. A couple of attributes also take in a parameter, which require the use of parenthesis. - | - 71 | ```admonish tip - 72 | If you'd like to supply in multiple parameters, e.g. to run tests on multiple platforms or to test multiple languages, - 73 | you can repeat the attribute on a new line. - 74 | ``` - | - 75 | The following attributes are available: - | - 76 | * `:cst` - This attribute specifies that the expected output should be in the form of a CST instead of the normal S-expression. This - 77 | CST matches the format given by `parse --cst`. - 78 | * `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain - 79 | input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line. - 80 | * `:fail-fast` — This attribute will stop the testing of additional cases if the test marked with this attribute fails. - 81 | * `:language(LANG)` — This attribute will run the tests using the parser for the specified language. This is useful for - 82 | multi-parser repos, such as XML and DTD, or Typescript and TSX. The default parser used will always be the first entry in - 83 | the `grammars` field in the `tree-sitter.json` config file, so having a way to pick a second or even third parser is useful. - 84 | * `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific - 85 | behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`][constants]. - 86 | * `:skip` — This attribute will skip the test when running `tree-sitter test`. - 87 | This is useful when you want to temporarily disable running a test without deleting it. - | - 88 | Examples using attributes: - | - 89 | ```text - 90 | ========================= - 91 | Test that will be skipped - 92 | :skip - 93 | ========================= - | - 94 | int main() {} - | - 95 | ------------------------- - | - 96 | ==================================== - 97 | Test that will run on Linux or macOS - | - 98 | :platform(linux) - 99 | :platform(macos) - 100 | ==================================== - | - 101 | int main() {} - | - 102 | ------------------------------------ - | - 103 | ======================================================================== - 104 | Test that expects an error, and will fail fast if there's no parse error - 105 | :fail-fast - 106 | :error - 107 | ======================================================================== - | - 108 | int main ( {} - | - 109 | ------------------------------------------------------------------------ - | - 110 | ================================================= - 111 | Test that will parse with both Typescript and TSX - 112 | :language(typescript) - 113 | :language(tsx) - 114 | ================================================= - | - 115 | console.log('Hello, world!'); - | - 116 | ------------------------------------------------- - 117 | ``` - | - 118 | ### Automatic Compilation - | - 119 | You might notice that the first time you run `tree-sitter test` after regenerating your parser, it takes some extra time. - 120 | This is because Tree-sitter automatically compiles your C code into a dynamically-loadable library. It recompiles your parser - 121 | as-needed whenever you update it by re-running `tree-sitter generate`, or whenever the [external scanner][external-scanners] - 122 | file is changed. - | - 123 | [constants]: https://doc.rust-lang.org/std/env/consts/constant.OS.html - 124 | [external-scanners]: ./4-external-scanners.md - 125 | [node-field-names]: ../using-parsers/2-basic-parsing.md#node-field-names - 126 | [s-exp]: https://en.wikipedia.org/wiki/S-expression - 127 | [named-vs-anonymous-nodes]: ../using-parsers/2-basic-parsing.md#named-vs-anonymous-nodes - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/6-publishing.md: --------------------------------------------------------------------------------- - 1 | # Publishing your grammar - | - 2 | Once you feel that your parser is in a stable working state for consumers to use, you can publish it to various registries. - 3 | It's strongly recommended to publish grammars to GitHub, [crates.io][crates.io] (Rust), [npm][npm] (JavaScript), and [PyPI][pypi] - 4 | (Python) to make it easier for others to find and use your grammar. - | - 5 | If your grammar is hosted on GitHub, you can make use of our [reusable workflows][workflows] to handle the publishing process - 6 | for you. This action will automatically handle regenerating and publishing your grammar in CI, so long as you have the required - 7 | tokens setup for the various registries. For an example of this workflow in action, see the [Python grammar's GitHub][python-gh] - | - 8 | ## From start to finish - | - 9 | To release a new grammar (or publish your first version), these are the steps you should follow: - | - 10 | 1. Bump your version to the desired version with `tree-sitter version`. For example, if you're releasing version `1.0.0` - 11 | of your grammar, you'd run `tree-sitter version 1.0.0`. - 12 | 2. Commit the changes with `git commit -am "Release 1.0.0" (or however you like)` (ensure that your working directory is - 13 | clean). - 14 | 3. Tag the commit with `git tag -- v1.0.0`. - 15 | 4. Push the commit and tag with `git push --tags origin main` (assuming you're on the `main` branch, and `origin` is your - 16 | remote). - 17 | 5. (optional) If you've set up the GitHub workflows for your grammar, the release will be automatically published to GitHub, - 18 | crates.io, npm, and PyPI. - | - 19 | ### Adhering to Semantic Versioning - | - 20 | When releasing new versions of your grammar, it's important to adhere to [Semantic Versioning][semver]. This ensures that - 21 | consumers can predictably update their dependencies and that their existing tree-sitter integrations (queries, tree traversal - 22 | code, node type checks) will continue to work as expected when upgrading. - | - 23 | 1. Increment the major version when you make incompatible changes to the grammar's node types or structure - 24 | 2. Increment the minor version when you add new node types or patterns while maintaining backward compatibility - 25 | 3. Increment the patch version when you fix bugs without changing the grammar's structure - | - 26 | For grammars in version 0.y.z (zero version), the usual semantic versioning rules are technically relaxed. However, if your - 27 | grammar already has users, it's recommended to treat version changes more conservatively: - | - 28 | - Treat patch version (`z`) changes as if they were minor version changes - 29 | - Treat minor version (`y`) changes as if they were major version changes - | - 30 | This helps maintain stability for existing users during the pre-1.0 phase. By following these versioning guidelines, you - 31 | ensure that downstream users can safely upgrade without their existing queries breaking. - | - 32 | [crates.io]: https://crates.io - 33 | [npm]: https://www.npmjs.com - 34 | [pypi]: https://pypi.org - 35 | [python-gh]: https://github.com/tree-sitter/tree-sitter-python/blob/master/.github/workflows/publish.yml - 36 | [semver]: https://semver.org/ - 37 | [workflows]: https://github.com/tree-sitter/workflows - - - --------------------------------------------------------------------------------- -/docs/src/creating-parsers/index.md: --------------------------------------------------------------------------------- - 1 | # Creating parsers - | - 2 | Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even - 3 | zen-like. This document will help you to get started and to develop a useful mental model. - - - --------------------------------------------------------------------------------- -/docs/src/index.md: --------------------------------------------------------------------------------- - 1 |
- 2 | Tree-sitter logo - 3 |
- | - 4 | # Introduction - | - 5 | Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source - 6 | file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: - | - 7 | - **General** enough to parse any programming language - 8 | - **Fast** enough to parse on every keystroke in a text editor - 9 | - **Robust** enough to provide useful results even in the presence of syntax errors - 10 | - **Dependency-free** so that the runtime library (which is written in pure [C11](https://github.com/tree-sitter/tree-sitter/tree/master/lib)) can be embedded in any application - | - 11 | ## Language Bindings - | - 12 | There are bindings that allow Tree-sitter to be used from the following languages: - | - 13 | ### Official - | - 14 | - [C#](https://github.com/tree-sitter/csharp-tree-sitter) - 15 | - [Go](https://github.com/tree-sitter/go-tree-sitter) - 16 | - [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) - 17 | - [Java (JDK 22+)](https://github.com/tree-sitter/java-tree-sitter) - 18 | - [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter) - 19 | - [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) - 20 | - [Kotlin](https://github.com/tree-sitter/kotlin-tree-sitter) - 21 | - [Python](https://github.com/tree-sitter/py-tree-sitter) - 22 | - [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) - 23 | - [Swift](https://github.com/tree-sitter/swift-tree-sitter) - 24 | - [Zig](https://github.com/tree-sitter/zig-tree-sitter) - | - 25 | ### Third-party - | - 26 | - [C# (.NET)](https://github.com/zabbius/dotnet-tree-sitter) - 27 | - [C++](https://github.com/nsumner/cpp-tree-sitter) - 28 | - [Crystal](https://github.com/crystal-lang-tools/crystal-tree-sitter) - 29 | - [D](https://github.com/aminya/d-tree-sitter) - 30 | - [Delphi](https://github.com/modersohn/delphi-tree-sitter) - 31 | - [ELisp](https://www.gnu.org/software/emacs/manual/html_node/elisp/Parsing-Program-Source.html) - 32 | - [Go](https://github.com/alexaandru/go-tree-sitter-bare) - 33 | - [Guile](https://github.com/Z572/guile-ts) - 34 | - [Janet](https://github.com/sogaiu/janet-tree-sitter) - 35 | - [Java (JDK 8+)](https://github.com/bonede/tree-sitter-ng) - 36 | - [Java (JDK 11+)](https://github.com/seart-group/java-tree-sitter) - 37 | - [Julia](https://github.com/MichaelHatherly/TreeSitter.jl) - 38 | - [Lua](https://github.com/euclidianAce/ltreesitter) - 39 | - [Lua](https://github.com/xcb-xwii/lua-tree-sitter) - 40 | - [OCaml](https://github.com/semgrep/ocaml-tree-sitter-core) - 41 | - [Odin](https://github.com/laytan/odin-tree-sitter) - 42 | - [Perl](https://metacpan.org/pod/Text::Treesitter) - 43 | - [Pharo](https://github.com/Evref-BL/Pharo-Tree-Sitter) - 44 | - [PHP](https://github.com/soulseekah/ext-treesitter) - 45 | - [R](https://github.com/DavisVaughan/r-tree-sitter) - 46 | - [Ruby](https://github.com/Faveod/ruby-tree-sitter) - | - 47 | _Keep in mind that some of the bindings may be incomplete or out of date._ - | - 48 | ## Parsers - | - 49 | The following parsers can be found in the upstream organization: - | - 50 | - [Agda](https://github.com/tree-sitter/tree-sitter-agda) - 51 | - [Bash](https://github.com/tree-sitter/tree-sitter-bash) - 52 | - [C](https://github.com/tree-sitter/tree-sitter-c) - 53 | - [C++](https://github.com/tree-sitter/tree-sitter-cpp) - 54 | - [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) - 55 | - [CSS](https://github.com/tree-sitter/tree-sitter-css) - 56 | - [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) - 57 | - [Go](https://github.com/tree-sitter/tree-sitter-go) - 58 | - [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) - 59 | - [HTML](https://github.com/tree-sitter/tree-sitter-html) - 60 | - [Java](https://github.com/tree-sitter/tree-sitter-java) - 61 | - [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) - 62 | - [JSDoc](https://github.com/tree-sitter/tree-sitter-jsdoc) - 63 | - [JSON](https://github.com/tree-sitter/tree-sitter-json) - 64 | - [Julia](https://github.com/tree-sitter/tree-sitter-julia) - 65 | - [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml) - 66 | - [PHP](https://github.com/tree-sitter/tree-sitter-php) - 67 | - [Python](https://github.com/tree-sitter/tree-sitter-python) - 68 | - [Regex](https://github.com/tree-sitter/tree-sitter-regex) - 69 | - [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) - 70 | - [Rust](https://github.com/tree-sitter/tree-sitter-rust) - 71 | - [Scala](https://github.com/tree-sitter/tree-sitter-scala) - 72 | - [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) - 73 | - [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) - | - 74 | A list of known parsers can be found in the [wiki](https://github.com/tree-sitter/tree-sitter/wiki/List-of-parsers). - | - 75 | ## Talks on Tree-sitter - | - 76 | - [Strange Loop 2018](https://www.thestrangeloop.com/2018/tree-sitter---a-new-parsing-system-for-programming-tools.html) - 77 | - [FOSDEM 2018](https://www.youtube.com/watch?v=0CGzC_iss-8) - 78 | - [GitHub Universe 2017](https://www.youtube.com/watch?v=a1rC79DHpmY) - | - 79 | ## Underlying Research - | - 80 | The design of Tree-sitter was greatly influenced by the following research papers: - | - 81 | - [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) - 82 | - [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) - 83 | - [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) - 84 | - [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) - 85 | - [Error Detection and Recovery in LR Parsers](https://web.archive.org/web/20240302031213/https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) - 86 | - [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) - - - --------------------------------------------------------------------------------- -/docs/src/SUMMARY.md: --------------------------------------------------------------------------------- - 1 | # Summary - | - 2 | [Introduction](./index.md) - | - 3 | # User Guide - | - 4 | - [Using Parsers](./using-parsers/index.md) - 5 | - [Getting Started](./using-parsers/1-getting-started.md) - 6 | - [Basic Parsing](./using-parsers/2-basic-parsing.md) - 7 | - [Advanced Parsing](./using-parsers/3-advanced-parsing.md) - 8 | - [Walking Trees](./using-parsers/4-walking-trees.md) - 9 | - [Queries](./using-parsers/queries/index.md) - 10 | - [Basic Syntax](./using-parsers/queries/1-syntax.md) - 11 | - [Operators](./using-parsers/queries/2-operators.md) - 12 | - [Predicates and Directives](./using-parsers/queries/3-predicates-and-directives.md) - 13 | - [API](./using-parsers/queries/4-api.md) - 14 | - [Static Node Types](./using-parsers/6-static-node-types.md) - 15 | - [Creating Parsers](./creating-parsers/index.md) - 16 | - [Getting Started](./creating-parsers/1-getting-started.md) - 17 | - [The Grammar DSL](./creating-parsers/2-the-grammar-dsl.md) - 18 | - [Writing the Grammar](./creating-parsers/3-writing-the-grammar.md) - 19 | - [External Scanners](./creating-parsers/4-external-scanners.md) - 20 | - [Writing Tests](./creating-parsers/5-writing-tests.md) - 21 | - [Publishing Parsers](./creating-parsers/6-publishing.md) - 22 | - [Syntax Highlighting](./3-syntax-highlighting.md) - 23 | - [Code Navigation](./4-code-navigation.md) - 24 | - [Implementation](./5-implementation.md) - 25 | - [Contributing](./6-contributing.md) - 26 | - [Playground](./7-playground.md) - | - 27 | # Reference Guide - | - 28 | - [Command Line Interface](./cli/index.md) - 29 | - [Init Config](./cli/init-config.md) - 30 | - [Init](./cli/init.md) - 31 | - [Generate](./cli/generate.md) - 32 | - [Build](./cli/build.md) - 33 | - [Parse](./cli/parse.md) - 34 | - [Test](./cli/test.md) - 35 | - [Version](./cli/version.md) - 36 | - [Fuzz](./cli/fuzz.md) - 37 | - [Query](./cli/query.md) - 38 | - [Highlight](./cli/highlight.md) - 39 | - [Tags](./cli/tags.md) - 40 | - [Playground](./cli/playground.md) - 41 | - [Dump Languages](./cli/dump-languages.md) - 42 | - [Complete](./cli/complete.md) - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/1-getting-started.md: --------------------------------------------------------------------------------- - 1 | # Getting Started - | - 2 | ## Building the Library - | - 3 | To build the library on a POSIX system, just run `make` in the Tree-sitter directory. This will create a static library - 4 | called `libtree-sitter.a` as well as dynamic libraries. - | - 5 | Alternatively, you can incorporate the library in a larger project's build system by adding one source file to the build. - 6 | This source file needs two directories to be in the include path when compiled: - | - 7 | **source file:** - | - 8 | - `tree-sitter/lib/src/lib.c` - | - 9 | **include directories:** - | - 10 | - `tree-sitter/lib/src` - 11 | - `tree-sitter/lib/include` - | - 12 | ## The Basic Objects - | - 13 | There are four main types of objects involved when using Tree-sitter: languages, parsers, syntax trees, and syntax nodes. - 14 | In C, these are called `TSLanguage`, `TSParser`, `TSTree`, and `TSNode`. - | - 15 | - A `TSLanguage` is an opaque object that defines how to parse a particular programming language. The code for each `TSLanguage` - 16 | is generated by Tree-sitter. Many languages are already available in separate git repositories within the - 17 | [Tree-sitter GitHub organization][ts org] and the [Tree-sitter grammars GitHub organization][tsg org]. - 18 | See [the next section][creating parsers] for how to create new languages. - | - 19 | - A `TSParser` is a stateful object that can be assigned a `TSLanguage` and used to produce a `TSTree` based on some - 20 | source code. - | - 21 | - A `TSTree` represents the syntax tree of an entire source code file. It contains `TSNode` instances that indicate the - 22 | structure of the source code. It can also be edited and used to produce a new `TSTree` in the event that the - 23 | source code changes. - | - 24 | - A `TSNode` represents a single node in the syntax tree. It tracks its start and end positions in the source code, as - 25 | well as its relation to other nodes like its parent, siblings and children. - | - 26 | ## An Example Program - | - 27 | Here's an example of a simple C program that uses the Tree-sitter [JSON parser][json]. - | - 28 | ```c - 29 | // Filename - test-json-parser.c - | - 30 | #include - 31 | #include - 32 | #include - 33 | #include - | - 34 | // Declare the `tree_sitter_json` function, which is - 35 | // implemented by the `tree-sitter-json` library. - 36 | const TSLanguage *tree_sitter_json(void); - | - 37 | int main() { - 38 | // Create a parser. - 39 | TSParser *parser = ts_parser_new(); - | - 40 | // Set the parser's language (JSON in this case). - 41 | ts_parser_set_language(parser, tree_sitter_json()); - | - 42 | // Build a syntax tree based on source code stored in a string. - 43 | const char *source_code = "[1, null]"; - 44 | TSTree *tree = ts_parser_parse_string( - 45 | parser, - 46 | NULL, - 47 | source_code, - 48 | strlen(source_code) - 49 | ); - | - 50 | // Get the root node of the syntax tree. - 51 | TSNode root_node = ts_tree_root_node(tree); - | - 52 | // Get some child nodes. - 53 | TSNode array_node = ts_node_named_child(root_node, 0); - 54 | TSNode number_node = ts_node_named_child(array_node, 0); - | - 55 | // Check that the nodes have the expected types. - 56 | assert(strcmp(ts_node_type(root_node), "document") == 0); - 57 | assert(strcmp(ts_node_type(array_node), "array") == 0); - 58 | assert(strcmp(ts_node_type(number_node), "number") == 0); - | - 59 | // Check that the nodes have the expected child counts. - 60 | assert(ts_node_child_count(root_node) == 1); - 61 | assert(ts_node_child_count(array_node) == 5); - 62 | assert(ts_node_named_child_count(array_node) == 2); - 63 | assert(ts_node_child_count(number_node) == 0); - | - 64 | // Print the syntax tree as an S-expression. - 65 | char *string = ts_node_string(root_node); - 66 | printf("Syntax tree: %s\n", string); - | - 67 | // Free all of the heap-allocated memory. - 68 | free(string); - 69 | ts_tree_delete(tree); - 70 | ts_parser_delete(parser); - 71 | return 0; - 72 | } - 73 | ``` - | - 74 | This program requires three components to build: - | - 75 | 1. The Tree-sitter C API from `tree-sitter/api.h` (requiring `tree-sitter/lib/include` in our include path) - 76 | 2. The Tree-sitter library (`libtree-sitter.a`) - 77 | 3. The JSON grammar's source code, which we compile directly into the binary - | - 78 | ```sh - 79 | clang \ - 80 | -I tree-sitter/lib/include \ - 81 | test-json-parser.c \ - 82 | tree-sitter-json/src/parser.c \ - 83 | tree-sitter/libtree-sitter.a \ - 84 | -o test-json-parser - 85 | ./test-json-parser - 86 | ``` - | - 87 | When using dynamic linking, you'll need to ensure the shared library is discoverable through `LD_LIBRARY_PATH` or your system's - 88 | equivalent environment variable. Here's how to compile with dynamic linking: - | - 89 | ```sh - 90 | clang \ - 91 | -I tree-sitter/lib/include \ - 92 | test-json-parser.c \ - 93 | tree-sitter-json/src/parser.c \ - 94 | -ltree-sitter \ - 95 | -o test-json-parser - 96 | ./test-json-parser - 97 | ``` - | - 98 | [creating parsers]: ../creating-parsers/index.md - 99 | [json]: https://github.com/tree-sitter/tree-sitter-json - 100 | [ts org]: https://github.com/tree-sitter - 101 | [tsg org]: https://github.com/tree-sitter-grammars - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/2-basic-parsing.md: --------------------------------------------------------------------------------- - 1 | # Basic Parsing - | - 2 | ## Providing the Code - | - 3 | In the example on the previous page, we parsed source code stored in a simple string using the `ts_parser_parse_string` function: - | - 4 | ```c - 5 | TSTree *ts_parser_parse_string( - 6 | TSParser *self, - 7 | const TSTree *old_tree, - 8 | const char *string, - 9 | uint32_t length - 10 | ); - 11 | ``` - | - 12 | You may want to parse source code that's stored in a custom data structure, like a [piece table][piece table] or a [rope][rope]. - 13 | In this case, you can use the more general `ts_parser_parse` function: - | - 14 | ```c - 15 | TSTree *ts_parser_parse( - 16 | TSParser *self, - 17 | const TSTree *old_tree, - 18 | TSInput input - 19 | ); - 20 | ``` - | - 21 | The `TSInput` structure lets you provide your own function for reading a chunk of text at a given byte offset and row/column - 22 | position. The function can return text encoded in either UTF-8 or UTF-16. This interface allows you to efficiently parse - 23 | text that is stored in your own data structure. - | - 24 | ```c - 25 | typedef struct { - 26 | void *payload; - 27 | const char *(*read)( - 28 | void *payload, - 29 | uint32_t byte_offset, - 30 | TSPoint position, - 31 | uint32_t *bytes_read - 32 | ); - 33 | TSInputEncoding encoding; - 34 | TSDecodeFunction decode; - 35 | } TSInput; - 36 | ``` - | - 37 | If you want to decode text that is not encoded in UTF-8 or UTF-16, you can set the `decode` field of the input to your function - 38 | that will decode text. The signature of the `TSDecodeFunction` is as follows: - | - 39 | ```c - 40 | typedef uint32_t (*TSDecodeFunction)( - 41 | const uint8_t *string, - 42 | uint32_t length, - 43 | int32_t *code_point - 44 | ); - 45 | ``` - | - 46 | ```admonish attention - 47 | The `TSInputEncoding` must be set to `TSInputEncodingCustom` for the `decode` function to be called. - 48 | ``` - | - 49 | The `string` argument is a pointer to the text to decode, which comes from the `read` function, and the `length` argument - 50 | is the length of the `string`. The `code_point` argument is a pointer to an integer that represents the decoded code point, - 51 | and should be written to in your `decode` callback. The function should return the number of bytes decoded. - | - 52 | ## Syntax Nodes - | - 53 | Tree-sitter provides a [DOM][dom]-style interface for inspecting syntax trees. - 54 | A syntax node's _type_ is a string that indicates which grammar rule the node represents. - | - 55 | ```c - 56 | const char *ts_node_type(TSNode); - 57 | ``` - | - 58 | Syntax nodes store their position in the source code both in raw bytes and row/column - 59 | coordinates. In a point, rows and columns are zero-based. The `row` field represents - 60 | the number of newlines before a given position, while `column` represents the number - 61 | of bytes between the position and beginning of the line. - | - 62 | ```c - 63 | uint32_t ts_node_start_byte(TSNode); - 64 | uint32_t ts_node_end_byte(TSNode); - 65 | typedef struct { - 66 | uint32_t row; - 67 | uint32_t column; - 68 | } TSPoint; - 69 | TSPoint ts_node_start_point(TSNode); - 70 | TSPoint ts_node_end_point(TSNode); - 71 | ``` - | - 72 | ```admonish note - 73 | A *newline* is considered to be a single line feed (`\n`) character. - 74 | ``` - | - 75 | ## Retrieving Nodes - | - 76 | Every tree has a _root node_: - | - 77 | ```c - 78 | TSNode ts_tree_root_node(const TSTree *); - 79 | ``` - | - 80 | Once you have a node, you can access the node's children: - | - 81 | ```c - 82 | uint32_t ts_node_child_count(TSNode); - 83 | TSNode ts_node_child(TSNode, uint32_t); - 84 | ``` - | - 85 | You can also access its siblings and parent: - | - 86 | ```c - 87 | TSNode ts_node_next_sibling(TSNode); - 88 | TSNode ts_node_prev_sibling(TSNode); - 89 | TSNode ts_node_parent(TSNode); - 90 | ``` - | - 91 | These methods may all return a _null node_ to indicate, for example, that a node does not _have_ a next sibling. - 92 | You can check if a node is null: - | - 93 | ```c - 94 | bool ts_node_is_null(TSNode); - 95 | ``` - | - 96 | ## Named vs Anonymous Nodes - | - 97 | Tree-sitter produces [_concrete_ syntax trees][cst] — trees that contain nodes for - 98 | every individual token in the source code, including things like commas and parentheses. This is important for use-cases - 99 | that deal with individual tokens, like [syntax highlighting][syntax highlighting]. But some - 100 | types of code analysis are easier to perform using an [_abstract_ syntax tree][ast] — a tree in which the less important - 101 | details have been removed. Tree-sitter's trees support these use cases by making a distinction between - 102 | _named_ and _anonymous_ nodes. - | - 103 | Consider a grammar rule like this: - | - 104 | ```js - 105 | if_statement: $ => seq("if", "(", $._expression, ")", $._statement); - 106 | ``` - | - 107 | A syntax node representing an `if_statement` in this language would have 5 children: the condition expression, the body statement, - 108 | as well as the `if`, `(`, and `)` tokens. The expression and the statement would be marked as _named_ nodes, because they - 109 | have been given explicit names in the grammar. But the `if`, `(`, and `)` nodes would _not_ be named nodes, because they - 110 | are represented in the grammar as simple strings. - | - 111 | You can check whether any given node is named: - | - 112 | ```c - 113 | bool ts_node_is_named(TSNode); - 114 | ``` - | - 115 | When traversing the tree, you can also choose to skip over anonymous nodes by using the `_named_` variants of all of the - 116 | methods described above: - | - 117 | ```c - 118 | TSNode ts_node_named_child(TSNode, uint32_t); - 119 | uint32_t ts_node_named_child_count(TSNode); - 120 | TSNode ts_node_next_named_sibling(TSNode); - 121 | TSNode ts_node_prev_named_sibling(TSNode); - 122 | ``` - | - 123 | If you use this group of methods, the syntax tree functions much like an abstract syntax tree. - | - 124 | ## Node Field Names - | - 125 | To make syntax nodes easier to analyze, many grammars assign unique _field names_ to particular child nodes. - 126 | In the [creating parsers][using fields] section, it's explained how to do this in your own grammars. If a syntax node has - 127 | fields, you can access its children using their field name: - | - 128 | ```c - 129 | TSNode ts_node_child_by_field_name( - 130 | TSNode self, - 131 | const char *field_name, - 132 | uint32_t field_name_length - 133 | ); - 134 | ``` - | - 135 | Fields also have numeric ids that you can use, if you want to avoid repeated string comparisons. You can convert between - 136 | strings and ids using the `TSLanguage`: - | - 137 | ```c - 138 | uint32_t ts_language_field_count(const TSLanguage *); - 139 | const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId); - 140 | TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t); - 141 | ``` - | - 142 | The field ids can be used in place of the name: - | - 143 | ```c - 144 | TSNode ts_node_child_by_field_id(TSNode, TSFieldId); - 145 | ``` - | - 146 | [ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree - 147 | [cst]: https://en.wikipedia.org/wiki/Parse_tree - 148 | [dom]: https://en.wikipedia.org/wiki/Document_Object_Model - 149 | [piece table]: - 150 | [rope]: - 151 | [syntax highlighting]: https://en.wikipedia.org/wiki/Syntax_highlighting - 152 | [using fields]: ../creating-parsers/3-writing-the-grammar.md#using-fields - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/3-advanced-parsing.md: --------------------------------------------------------------------------------- - 1 | # Advanced Parsing - | - 2 | ## Editing - | - 3 | In applications like text editors, you often need to re-parse a file after its source code has changed. Tree-sitter is designed - 4 | to support this use case efficiently. There are two steps required. First, you must _edit_ the syntax tree, which adjusts - 5 | the ranges of its nodes so that they stay in sync with the code. - | - 6 | ```c - 7 | typedef struct { - 8 | uint32_t start_byte; - 9 | uint32_t old_end_byte; - 10 | uint32_t new_end_byte; - 11 | TSPoint start_point; - 12 | TSPoint old_end_point; - 13 | TSPoint new_end_point; - 14 | } TSInputEdit; - | - 15 | void ts_tree_edit(TSTree *, const TSInputEdit *); - 16 | ``` - | - 17 | Then, you can call `ts_parser_parse` again, passing in the old tree. This will create a new tree that internally shares structure - 18 | with the old tree. - | - 19 | When you edit a syntax tree, the positions of its nodes will change. If you have stored any `TSNode` instances outside of - 20 | the `TSTree`, you must update their positions separately, using the same `TSInputEdit` value, in order to update their - 21 | cached positions. - | - 22 | ```c - 23 | void ts_node_edit(TSNode *, const TSInputEdit *); - 24 | ``` - | - 25 | This `ts_node_edit` function is _only_ needed in the case where you have retrieved `TSNode` instances _before_ editing the - 26 | tree, and then _after_ editing the tree, you want to continue to use those specific node instances. Often, you'll just want - 27 | to re-fetch nodes from the edited tree, in which case `ts_node_edit` is not needed. - | - 28 | ## Multi-language Documents - | - 29 | Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS][ejs] - 30 | and [ERB][erb] allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. - | - 31 | Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain - 32 | _ranges_ of a file. - | - 33 | ```c - 34 | typedef struct { - 35 | TSPoint start_point; - 36 | TSPoint end_point; - 37 | uint32_t start_byte; - 38 | uint32_t end_byte; - 39 | } TSRange; - | - 40 | void ts_parser_set_included_ranges( - 41 | TSParser *self, - 42 | const TSRange *ranges, - 43 | uint32_t range_count - 44 | ); - 45 | ``` - | - 46 | For example, consider this ERB document: - | - 47 | ```erb - 48 |
    - 49 | <% people.each do |person| %> - 50 |
  • <%= person.name %>
  • - 51 | <% end %> - 52 |
- 53 | ``` - | - 54 | Conceptually, it can be represented by three syntax trees with overlapping ranges: an ERB syntax tree, a Ruby syntax tree, - 55 | and an HTML syntax tree. You could generate these syntax trees with the following code: - | - 56 | ```c - 57 | #include - 58 | #include - | - 59 | // These functions are each implemented in their own repo. - 60 | const TSLanguage *tree_sitter_embedded_template(void); - 61 | const TSLanguage *tree_sitter_html(void); - 62 | const TSLanguage *tree_sitter_ruby(void); - | - 63 | int main(int argc, const char **argv) { - 64 | const char *text = argv[1]; - 65 | unsigned len = strlen(text); - | - 66 | // Parse the entire text as ERB. - 67 | TSParser *parser = ts_parser_new(); - 68 | ts_parser_set_language(parser, tree_sitter_embedded_template()); - 69 | TSTree *erb_tree = ts_parser_parse_string(parser, NULL, text, len); - 70 | TSNode erb_root_node = ts_tree_root_node(erb_tree); - | - 71 | // In the ERB syntax tree, find the ranges of the `content` nodes, - 72 | // which represent the underlying HTML, and the `code` nodes, which - 73 | // represent the interpolated Ruby. - 74 | TSRange html_ranges[10]; - 75 | TSRange ruby_ranges[10]; - 76 | unsigned html_range_count = 0; - 77 | unsigned ruby_range_count = 0; - 78 | unsigned child_count = ts_node_child_count(erb_root_node); - | - 79 | for (unsigned i = 0; i < child_count; i++) { - 80 | TSNode node = ts_node_child(erb_root_node, i); - 81 | if (strcmp(ts_node_type(node), "content") == 0) { - 82 | html_ranges[html_range_count++] = (TSRange) { - 83 | ts_node_start_point(node), - 84 | ts_node_end_point(node), - 85 | ts_node_start_byte(node), - 86 | ts_node_end_byte(node), - 87 | }; - 88 | } else { - 89 | TSNode code_node = ts_node_named_child(node, 0); - 90 | ruby_ranges[ruby_range_count++] = (TSRange) { - 91 | ts_node_start_point(code_node), - 92 | ts_node_end_point(code_node), - 93 | ts_node_start_byte(code_node), - 94 | ts_node_end_byte(code_node), - 95 | }; - 96 | } - 97 | } - | - 98 | // Use the HTML ranges to parse the HTML. - 99 | ts_parser_set_language(parser, tree_sitter_html()); - 100 | ts_parser_set_included_ranges(parser, html_ranges, html_range_count); - 101 | TSTree *html_tree = ts_parser_parse_string(parser, NULL, text, len); - 102 | TSNode html_root_node = ts_tree_root_node(html_tree); - | - 103 | // Use the Ruby ranges to parse the Ruby. - 104 | ts_parser_set_language(parser, tree_sitter_ruby()); - 105 | ts_parser_set_included_ranges(parser, ruby_ranges, ruby_range_count); - 106 | TSTree *ruby_tree = ts_parser_parse_string(parser, NULL, text, len); - 107 | TSNode ruby_root_node = ts_tree_root_node(ruby_tree); - | - 108 | // Print all three trees. - 109 | char *erb_sexp = ts_node_string(erb_root_node); - 110 | char *html_sexp = ts_node_string(html_root_node); - 111 | char *ruby_sexp = ts_node_string(ruby_root_node); - 112 | printf("ERB: %s\n", erb_sexp); - 113 | printf("HTML: %s\n", html_sexp); - 114 | printf("Ruby: %s\n", ruby_sexp); - 115 | return 0; - 116 | } - 117 | ``` - | - 118 | This API allows for great flexibility in how languages can be composed. Tree-sitter is not responsible for mediating the - 119 | interactions between languages. Instead, you are free to do that using arbitrary application-specific logic. - | - 120 | ## Concurrency - | - 121 | Tree-sitter supports multi-threaded use cases by making syntax trees very cheap to copy. - | - 122 | ```c - 123 | TSTree *ts_tree_copy(const TSTree *); - 124 | ``` - | - 125 | Internally, copying a syntax tree just entails incrementing an atomic reference count. Conceptually, it provides you a new - 126 | tree which you can freely query, edit, reparse, or delete on a new thread while continuing to use the original tree on a - 127 | different thread. - | - 128 | ```admonish danger - 129 | Individual `TSTree` instances are _not_ thread safe; you must copy a tree if you want to use it on multiple threads simultaneously. - 130 | ``` - | - 131 | [ejs]: https://ejs.co - 132 | [erb]: https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/4-walking-trees.md: --------------------------------------------------------------------------------- - 1 | # Walking Trees with Tree Cursors - | - 2 | You can access every node in a syntax tree using the `TSNode` APIs [described earlier][retrieving nodes], but if you need - 3 | to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that - 4 | allows you to walk a syntax tree with maximum efficiency. - | - 5 | ```admonish note - 6 | The given input node is considered the root of the cursor, and the cursor cannot walk outside this node. - 7 | Going to the parent or any sibling of the root node will always return `false`. - | - 8 | This has no unexpected effects if the given input node is the actual `root` node of the tree, but is something to keep in - 9 | mind when using cursors constructed with a node that is not the `root` node. - 10 | ``` - | - 11 | You can initialize a cursor from any node: - | - 12 | ```c - 13 | TSTreeCursor ts_tree_cursor_new(TSNode); - 14 | ``` - | - 15 | You can move the cursor around the tree: - | - 16 | ```c - 17 | bool ts_tree_cursor_goto_first_child(TSTreeCursor *); - 18 | bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); - 19 | bool ts_tree_cursor_goto_parent(TSTreeCursor *); - 20 | ``` - | - 21 | These methods return `true` if the cursor successfully moved and `false` if there was no node to move to. - | - 22 | You can always retrieve the cursor's current node, as well as the [field name][node-field-names] that is associated with - 23 | the current node. - | - 24 | ```c - 25 | TSNode ts_tree_cursor_current_node(const TSTreeCursor *); - 26 | const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); - 27 | TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); - 28 | ``` - | - 29 | [retrieving nodes]: ./2-basic-parsing.md#retrieving-nodes - 30 | [node-field-names]: ./2-basic-parsing.md#node-field-names - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/6-static-node-types.md: --------------------------------------------------------------------------------- - 1 | # Static Node Types - | - 2 | In languages with static typing, it can be helpful for syntax trees to provide specific type information about individual - 3 | syntax nodes. Tree-sitter makes this information available via a generated file called `node-types.json`. This _node types_ - 4 | file provides structured data about every possible syntax node in a grammar. - | - 5 | You can use this data to generate type declarations in statically-typed programming languages. - | - 6 | The node types file contains an array of objects, each of which describes a particular type of syntax node using the - 7 | following entries: - | - 8 | ## Basic Info - | - 9 | Every object in this array has these two entries: - | - 10 | - `"type"` — A string that indicates, which grammar rule the node represents. This corresponds to the `ts_node_type` function - 11 | described [here][syntax nodes]. - 12 | - `"named"` — A boolean that indicates whether this kind of node corresponds to a rule name in the grammar or just a string - 13 | literal. See [here][named-vs-anonymous-nodes] for more info. - | - 14 | Examples: - | - 15 | ```json - 16 | { - 17 | "type": "string_literal", - 18 | "named": true - 19 | } - 20 | { - 21 | "type": "+", - 22 | "named": false - 23 | } - 24 | ``` - | - 25 | Together, these two fields constitute a unique identifier for a node type; no two top-level objects in the `node-types.json` - 26 | should have the same values for both `"type"` and `"named"`. - | - 27 | ## Internal Nodes - | - 28 | Many syntax nodes can have _children_. The node type object describes the possible children that a node can have using the - 29 | following entries: - | - 30 | - `"fields"` — An object that describes the possible [fields][node-field-names] that the node can have. The keys of this - 31 | object are field names, and the values are _child type_ objects, described below. - 32 | - `"children"` — Another _child type_ object that describes all the node's possible _named_ children _without_ fields. - | - 33 | A _child type_ object describes a set of child nodes using the following entries: - | - 34 | - `"required"` — A boolean indicating whether there is always _at least one_ node in this set. - 35 | - `"multiple"` — A boolean indicating whether there can be _multiple_ nodes in this set. - 36 | - `"types"`- An array of objects that represent the possible types of nodes in this set. Each object has two keys: `"type"` - 37 | and `"named"`, whose meanings are described above. - | - 38 | Example with fields: - | - 39 | ```json - 40 | { - 41 | "type": "method_definition", - 42 | "named": true, - 43 | "fields": { - 44 | "body": { - 45 | "multiple": false, - 46 | "required": true, - 47 | "types": [{ "type": "statement_block", "named": true }] - 48 | }, - 49 | "decorator": { - 50 | "multiple": true, - 51 | "required": false, - 52 | "types": [{ "type": "decorator", "named": true }] - 53 | }, - 54 | "name": { - 55 | "multiple": false, - 56 | "required": true, - 57 | "types": [ - 58 | { "type": "computed_property_name", "named": true }, - 59 | { "type": "property_identifier", "named": true } - 60 | ] - 61 | }, - 62 | "parameters": { - 63 | "multiple": false, - 64 | "required": true, - 65 | "types": [{ "type": "formal_parameters", "named": true }] - 66 | } - 67 | } - 68 | } - 69 | ``` - | - 70 | Example with children: - | - 71 | ```json - 72 | { - 73 | "type": "array", - 74 | "named": true, - 75 | "fields": {}, - 76 | "children": { - 77 | "multiple": true, - 78 | "required": false, - 79 | "types": [ - 80 | { "type": "_expression", "named": true }, - 81 | { "type": "spread_element", "named": true } - 82 | ] - 83 | } - 84 | } - 85 | ``` - | - 86 | ## Supertype Nodes - | - 87 | In Tree-sitter grammars, there are usually certain rules that represent abstract _categories_ of syntax nodes (e.g. "expression", - 88 | "type", "declaration"). In the `grammar.js` file, these are often written as [hidden rules][hidden rules] - 89 | whose definition is a simple [`choice`][grammar dsl] where each member is just a single symbol. - | - 90 | Normally, hidden rules are not mentioned in the node types file, since they don't appear in the syntax tree. But if you add - 91 | a hidden rule to the grammar's [`supertypes` list][grammar dsl], then it _will_ show up in the node - 92 | types file, with the following special entry: - | - 93 | - `"subtypes"` — An array of objects that specify the _types_ of nodes that this 'supertype' node can wrap. - | - 94 | Example: - | - 95 | ```json - 96 | { - 97 | "type": "_declaration", - 98 | "named": true, - 99 | "subtypes": [ - 100 | { "type": "class_declaration", "named": true }, - 101 | { "type": "function_declaration", "named": true }, - 102 | { "type": "generator_function_declaration", "named": true }, - 103 | { "type": "lexical_declaration", "named": true }, - 104 | { "type": "variable_declaration", "named": true } - 105 | ] - 106 | } - 107 | ``` - | - 108 | Supertype nodes will also appear elsewhere in the node types file, as children of other node types, in a way that corresponds - 109 | with how the supertype rule was used in the grammar. This can make the node types much shorter and easier to read, because - 110 | a single supertype will take the place of multiple subtypes. - | - 111 | Example: - | - 112 | ```json - 113 | { - 114 | "type": "export_statement", - 115 | "named": true, - 116 | "fields": { - 117 | "declaration": { - 118 | "multiple": false, - 119 | "required": false, - 120 | "types": [{ "type": "_declaration", "named": true }] - 121 | }, - 122 | "source": { - 123 | "multiple": false, - 124 | "required": false, - 125 | "types": [{ "type": "string", "named": true }] - 126 | } - 127 | } - 128 | } - 129 | ``` - | - 130 | [grammar dsl]: ../creating-parsers/2-the-grammar-dsl.md - 131 | [hidden rules]: ../creating-parsers/3-writing-the-grammar.md#hiding-rules - 132 | [named-vs-anonymous-nodes]: ./2-basic-parsing.md#named-vs-anonymous-nodes - 133 | [node-field-names]: ./2-basic-parsing.md#node-field-names - 134 | [syntax nodes]: ./2-basic-parsing.md#syntax-nodes - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/index.md: --------------------------------------------------------------------------------- - 1 | # Using Parsers - | - 2 | This guide covers the fundamental concepts of using Tree-sitter, which is applicable across all programming languages. - 3 | Although we'll explore some C-specific details that are valuable for direct C API usage or creating new language bindings, - 4 | the core concepts remain the same. - | - 5 | Tree-sitter's parsing functionality is implemented through its C API, with all functions documented in the [tree_sitter/api.h][api.h] - 6 | header file, but if you're working in another language, you can use one of the following bindings found [here](../index.md#language-bindings), - 7 | each providing idiomatic access to Tree-sitter's functionality. Of these bindings, the official ones have their own API docs - 8 | hosted online at the following pages: - | - 9 | - [Go][go] - 10 | - [Java] - 11 | - [JavaScript (Node.js)][javascript] - 12 | - [Kotlin][kotlin] - 13 | - [Python][python] - 14 | - [Rust][rust] - 15 | - [Zig][zig] - | - 16 | [api.h]: https://github.com/tree-sitter/tree-sitter/blob/master/lib/include/tree_sitter/api.h - 17 | [go]: https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter - 18 | [java]: https://tree-sitter.github.io/java-tree-sitter - 19 | [javascript]: https://tree-sitter.github.io/node-tree-sitter - 20 | [kotlin]: https://tree-sitter.github.io/kotlin-tree-sitter - 21 | [python]: https://tree-sitter.github.io/py-tree-sitter - 22 | [rust]: https://docs.rs/tree-sitter - 23 | [zig]: https://tree-sitter.github.io/zig-tree-sitter - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/queries/1-syntax.md: --------------------------------------------------------------------------------- - 1 | # Query Syntax - | - 2 | A _query_ consists of one or more _patterns_, where each pattern is an [S-expression][s-exp] that matches a certain set of - 3 | nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the - 4 | node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would - 5 | match any `binary_expression` node whose children are both `number_literal` nodes: - | - 6 | ```query - 7 | (binary_expression (number_literal) (number_literal)) - 8 | ``` - | - 9 | Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a - 10 | `string_literal` node: - | - 11 | ```query - 12 | (binary_expression (string_literal)) - 13 | ``` - | - 14 | ## Fields - | - 15 | In general, it's a good idea to make patterns more specific by specifying [field names][node-field-names] associated with - 16 | child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would - 17 | match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`. - | - 18 | ```query - 19 | (assignment_expression - 20 | left: (member_expression - 21 | object: (call_expression))) - 22 | ``` - | - 23 | ## Negated Fields - | - 24 | You can also constrain a pattern so that it only matches nodes that _lack_ a certain field. To do this, add a field name - 25 | prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: - | - 26 | ```query - 27 | (class_declaration - 28 | name: (identifier) @class_name - 29 | !type_parameters) - 30 | ``` - | - 31 | ## Anonymous Nodes - | - 32 | The parenthesized syntax for writing nodes only applies to [named nodes][named-vs-anonymous-nodes]. To match specific anonymous - 33 | nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the - 34 | operator is `!=` and the right side is `null`: - | - 35 | ```query - 36 | (binary_expression - 37 | operator: "!=" - 38 | right: (null)) - 39 | ``` - | - 40 | ## Special Nodes - | - 41 | ### The Wildcard Node - | - 42 | A wildcard node is represented with an underscore (`_`), it matches any node. - 43 | This is similar to `.` in regular expressions. - 44 | There are two types, `(_)` will match any named node, - 45 | and `_` will match any named or anonymous node. - | - 46 | For example, this pattern would match any node inside a call: - | - 47 | ```query - 48 | (call (_) @call.inner) - 49 | ``` - | - 50 | ### The `ERROR` Node - | - 51 | When the parser encounters text it does not recognize, it represents this node - 52 | as `(ERROR)` in the syntax tree. These error nodes can be queried just like - 53 | normal nodes: - | - 54 | ```scheme - 55 | (ERROR) @error-node - 56 | ``` - | - 57 | ### The `MISSING` Node - | - 58 | If the parser is able to recover from erroneous text by inserting a missing token and then reducing, it will insert that - 59 | missing node in the final tree so long as that tree has the lowest error cost. These missing nodes appear as seemingly normal - 60 | nodes in the tree, but they are zero tokens wide, and are internally represented as a property of the actual terminal node - 61 | that was inserted, instead of being its own kind of node, like the `ERROR` node. These special missing nodes can be queried - 62 | using `(MISSING)`: - | - 63 | ```scheme - 64 | (MISSING) @missing-node - 65 | ``` - | - 66 | This is useful when attempting to detect all syntax errors in a given parse tree, since these missing node are not captured - 67 | by `(ERROR)` queries. Specific missing node types can also be queried: - | - 68 | ```scheme - 69 | (MISSING identifier) @missing-identifier - 70 | (MISSING ";") @missing-semicolon - 71 | ``` - | - 72 | ### Supertype Nodes - | - 73 | Some node types are marked as _supertypes_ in a grammar. A supertype is a node type that contains multiple - 74 | subtypes. For example, in the [JavaScript grammar example][grammar], `expression` is a supertype that can represent any kind - 75 | of expression, such as a `binary_expression`, `call_expression`, or `identifier`. You can use supertypes in queries to match - 76 | any of their subtypes, rather than having to list out each subtype individually. For example, this pattern would match any - 77 | kind of expression, even though it's not a visible node in the syntax tree: - | - 78 | ```query - 79 | (expression) @any-expression - 80 | ``` - | - 81 | To query specific subtypes of a supertype, you can use the syntax `supertype/subtype`. For example, this pattern would - 82 | match a `binary_expression` only if it is a child of `expression`: - | - 83 | ```query - 84 | (expression/binary_expression) @binary-expression - 85 | ``` - | - 86 | This also applies to anonymous nodes. For example, this pattern would match `"()"` only if it is a child of `expression`: - | - 87 | ```query - 88 | (expression/"()") @empty-expression - 89 | ``` - | - 90 | [grammar]: ../../creating-parsers/3-writing-the-grammar.md#structuring-rules-well - 91 | [node-field-names]: ../2-basic-parsing.md#node-field-names - 92 | [named-vs-anonymous-nodes]: ../2-basic-parsing.md#named-vs-anonymous-nodes - 93 | [s-exp]: https://en.wikipedia.org/wiki/S-expression - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/queries/2-operators.md: --------------------------------------------------------------------------------- - 1 | # Operators - | - 2 | ## Capturing Nodes - | - 3 | When matching patterns, you may want to process specific nodes within the pattern. Captures allow you to associate names - 4 | with specific nodes in a pattern, so that you can later refer to those nodes by those names. Capture names are written _after_ - 5 | the nodes that they refer to, and start with an `@` character. - | - 6 | For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name - 7 | `the-function-name` with the identifier: - | - 8 | ```query - 9 | (assignment_expression - 10 | left: (identifier) @the-function-name - 11 | right: (function)) - 12 | ``` - | - 13 | And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` - 14 | with the containing class name: - | - 15 | ```query - 16 | (class_declaration - 17 | name: (identifier) @the-class-name - 18 | body: (class_body - 19 | (method_definition - 20 | name: (property_identifier) @the-method-name))) - 21 | ``` - | - 22 | ## Quantification Operators - | - 23 | You can match a repeating sequence of sibling nodes using the postfix `+` and `*` _repetition_ operators, which work analogously - 24 | to the `+` and `*` operators [in regular expressions][regex]. The `+` operator matches _one or more_ repetitions of a pattern, - 25 | and the `*` operator matches _zero or more_. - | - 26 | For example, this pattern would match a sequence of one or more comments: - | - 27 | ```query - 28 | (comment)+ - 29 | ``` - | - 30 | This pattern would match a class declaration, capturing all of the decorators if any were present: - | - 31 | ```query - 32 | (class_declaration - 33 | (decorator)* @the-decorator - 34 | name: (identifier) @the-name) - 35 | ``` - | - 36 | You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing - 37 | a string argument if one was present: - | - 38 | ```query - 39 | (call_expression - 40 | function: (identifier) @the-function - 41 | arguments: (arguments (string)? @the-string-arg)) - 42 | ``` - | - 43 | ## Grouping Sibling Nodes - | - 44 | You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment - 45 | followed by a function declaration: - | - 46 | ```query - 47 | ( - 48 | (comment) - 49 | (function_declaration) - 50 | ) - 51 | ``` - | - 52 | Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this - 53 | pattern would match a comma-separated series of numbers: - | - 54 | ```query - 55 | ( - 56 | (number) - 57 | ("," (number))* - 58 | ) - 59 | ``` - | - 60 | ## Alternations - | - 61 | An alternation is written as a pair of square brackets (`[]`) containing a list of alternative patterns. - 62 | This is similar to _character classes_ from regular expressions (`[abc]` matches either a, b, or c). - | - 63 | For example, this pattern would match a call to either a variable or an object property. - 64 | In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: - | - 65 | ```query - 66 | (call_expression - 67 | function: [ - 68 | (identifier) @function - 69 | (member_expression - 70 | property: (property_identifier) @method) - 71 | ]) - 72 | ``` - | - 73 | This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: - | - 74 | ```query - 75 | [ - 76 | "break" - 77 | "delete" - 78 | "else" - 79 | "for" - 80 | "function" - 81 | "if" - 82 | "return" - 83 | "try" - 84 | "while" - 85 | ] @keyword - 86 | ``` - | - 87 | ## Anchors - | - 88 | The anchor operator, `.`, is used to constrain the ways in which child patterns are matched. It has different behaviors - 89 | depending on where it's placed inside a query. - | - 90 | When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named - 91 | node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` - 92 | capture to the first `identifier` node in the parent `array`: - | - 93 | ```query - 94 | (array . (identifier) @the-element) - 95 | ``` - | - 96 | Without this anchor, the pattern would match once for every identifier in the array, with `@the-element` bound - 97 | to each matched identifier. - | - 98 | Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the - 99 | last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. - | - 100 | ```query - 101 | (block (_) @last-expression .) - 102 | ``` - | - 103 | Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. - 104 | The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: - 105 | `a, b`, `b, c`, and `c, d`. - | - 106 | ```query - 107 | (dotted_name - 108 | (identifier) @prev-id - 109 | . - 110 | (identifier) @next-id) - 111 | ``` - | - 112 | Without the anchor, non-consecutive pairs like `a, c` and `b, d` would also be matched. - | - 113 | The restrictions placed on a pattern by an anchor operator ignore anonymous nodes. - | - 114 | [regex]: https://en.wikipedia.org/wiki/Regular_expression#Basic_concepts - - - --------------------------------------------------------------------------------- -/docs/src/using-parsers/queries/3-predicates-and-directives.md: --------------------------------------------------------------------------------- - 1 | # Predicates - | - 2 | You can also specify arbitrary metadata and conditions associated with a pattern - 3 | by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions - 4 | start with a _predicate name_ beginning with a `#` character, and ending with a `?` character. After that, they can - 5 | contain an arbitrary number of `@`-prefixed capture names or strings. - | - 6 | Tree-sitter's CLI supports the following predicates by default: - | - 7 | ## The `eq?` predicate - | - 8 | This family of predicates allows you to match against a single capture or string - 9 | value. - | - 10 | The first argument to this predicate must be a capture, but the second can be either a capture to - 11 | compare the two captures' text, or a string to compare first capture's text - 12 | against. - | - 13 | The base predicate is `#eq?`, but its complement, `#not-eq?`, can be used to _not_ - 14 | match a value. Additionally, you can prefix either of these with `any-` to match - 15 | if _any_ of the nodes match the predicate. This is only useful when dealing with - 16 | quantified captures, as by default a quantified capture will only match if _all_ the captured nodes match the predicate. - | - 17 | Thus, there are four predicates in total: - | - 18 | - `#eq?` - 19 | - `#not-eq?` - 20 | - `#any-eq?` - 21 | - `#any-not-eq?` - | - 22 | Consider the following example targeting C: - | - 23 | ```query - 24 | ((identifier) @variable.builtin - 25 | (#eq? @variable.builtin "self")) - 26 | ``` - | - 27 | This pattern would match any identifier that is `self`. - | - 28 | Now consider the following example: - | - 29 | ```query - 30 | ( - 31 | (pair - 32 | key: (property_identifier) @key-name - 33 | value: (identifier) @value-name) - 34 | (#eq? @key-name @value-name) - 35 | ) - 36 | ``` - | - 37 | This pattern would match key-value pairs where the `value` is an identifier - 38 | with the same text as the key (meaning they are the same): - | - 39 | As mentioned earlier, the `any-` prefix is meant for use with quantified captures. Here's - 40 | an example finding an empty comment within a group of comments: - | - 41 | ```query - 42 | ((comment)+ @comment.empty - 43 | (#any-eq? @comment.empty "//")) - 44 | ``` - | - 45 | ## The `match?` predicate - | - 46 | These predicates are similar to the `eq?` predicates, but they use regular expressions - 47 | to match against the capture's text instead of string comparisons. - | - 48 | The first argument must be a capture, and the second must be a string containing - 49 | a regular expression. - | - 50 | Like the `eq?` predicate family, we can tack on `not-` to the beginning of the predicate - 51 | to negate the match, and `any-` to match if _any_ of the nodes in a quantified capture match the predicate. - | - 52 | This pattern matches identifiers written in `SCREAMING_SNAKE_CASE`. - | - 53 | ```query - 54 | ((identifier) @constant - 55 | (#match? @constant "^[A-Z][A-Z_]+")) - 56 | ``` - | - 57 | This query identifies documentation comments in C that begin with three forward slashes (`///`). - | - 58 | ```query - 59 | ((comment)+ @comment.documentation - 60 | (#match? @comment.documentation "^///\\s+.*")) - 61 | ``` - | - 62 | This query finds C code embedded in Go comments that appear just before a "C" import statement. - 63 | These are known as [`Cgo`][cgo] comments and are used to inject C code into Go programs. - | - 64 | ```query - 65 | ((comment)+ @injection.content - 66 | . - 67 | (import_declaration - 68 | (import_spec path: (interpreted_string_literal) @_import_c)) - 69 | (#eq? @_import_c "\"C\"") - 70 | (#match? @injection.content "^//")) - 71 | ``` - | - 72 | ## The `any-of?` predicate - | - 73 | The `any-of?` predicate allows you to match a capture against multiple strings, - 74 | and will match if the capture's text is equal to any of the strings. - | - 75 | The query below will match any of the builtin variables in JavaScript. - | - 76 | ```query - 77 | ((identifier) @variable.builtin - 78 | (#any-of? @variable.builtin - 79 | "arguments" - 80 | "module" - 81 | "console" - 82 | "window" - 83 | "document")) - 84 | ``` - | - 85 | ## The `is?` predicate - | - 86 | The `is?` predicate allows you to assert that a capture has a given property. This isn't widely used, but the CLI uses it - 87 | to determine whether a given node is a local variable or not, for example: - | - 88 | ```query - 89 | ((identifier) @variable.builtin - 90 | (#match? @variable.builtin "^(arguments|module|console|window|document)$") - 91 | (#is-not? local)) - 92 | ``` - | - 93 | This pattern would match any builtin variable that is not a local variable, because the `#is-not? local` predicate is used. - | - 94 | # Directives - | - 95 | Similar to predicates, directives are a way to associate arbitrary metadata with a pattern. The only difference between predicates - 96 | and directives is that directives end in a `!` character instead of `?` character. - | - 97 | Tree-sitter's CLI supports the following directives by default: - | - 98 | ## The `set!` directive - | - 99 | This directive allows you to associate key-value pairs with a pattern. The key and value can be any arbitrary text that you - 100 | see fit. - | - 101 | ```query - 102 | ((comment) @injection.content - 103 | (#match? @injection.content "/[*\/][!*\/] u32, - 39 | >; - 40 | pub const TSInputEncodingUTF8: TSInputEncoding = 0; - 41 | pub const TSInputEncodingUTF16LE: TSInputEncoding = 1; - 42 | pub const TSInputEncodingUTF16BE: TSInputEncoding = 2; - 43 | pub const TSInputEncodingCustom: TSInputEncoding = 3; - 44 | pub type TSInputEncoding = ::core::ffi::c_uint; - 45 | pub const TSSymbolTypeRegular: TSSymbolType = 0; - 46 | pub const TSSymbolTypeAnonymous: TSSymbolType = 1; - 47 | pub const TSSymbolTypeSupertype: TSSymbolType = 2; - 48 | pub const TSSymbolTypeAuxiliary: TSSymbolType = 3; - 49 | pub type TSSymbolType = ::core::ffi::c_uint; - 50 | #[repr(C)] - 51 | #[derive(Debug, Copy, Clone)] - 52 | pub struct TSPoint { - 53 | pub row: u32, - 54 | pub column: u32, - 55 | } - 56 | #[repr(C)] - 57 | #[derive(Debug, Copy, Clone)] - 58 | pub struct TSRange { - 59 | pub start_point: TSPoint, - 60 | pub end_point: TSPoint, - 61 | pub start_byte: u32, - 62 | pub end_byte: u32, - 63 | } - 64 | #[repr(C)] - 65 | #[derive(Debug)] - 66 | pub struct TSInput { - 67 | pub payload: *mut ::core::ffi::c_void, - 68 | pub read: ::core::option::Option< - 69 | unsafe extern "C" fn( - 70 | payload: *mut ::core::ffi::c_void, - 71 | byte_index: u32, - 72 | position: TSPoint, - 73 | bytes_read: *mut u32, - 74 | ) -> *const ::core::ffi::c_char, - 75 | >, - 76 | pub encoding: TSInputEncoding, - 77 | pub decode: TSDecodeFunction, - 78 | } - 79 | #[repr(C)] - 80 | #[derive(Debug, Copy, Clone)] - 81 | pub struct TSParseState { - 82 | pub payload: *mut ::core::ffi::c_void, - 83 | pub current_byte_offset: u32, - 84 | pub has_error: bool, - 85 | } - 86 | #[repr(C)] - 87 | #[derive(Debug, Copy, Clone)] - 88 | pub struct TSParseOptions { - 89 | pub payload: *mut ::core::ffi::c_void, - 90 | pub progress_callback: - 91 | ::core::option::Option bool>, - 92 | } - 93 | pub const TSLogTypeParse: TSLogType = 0; - 94 | pub const TSLogTypeLex: TSLogType = 1; - 95 | pub type TSLogType = ::core::ffi::c_uint; - 96 | #[repr(C)] - 97 | #[derive(Debug)] - 98 | pub struct TSLogger { - 99 | pub payload: *mut ::core::ffi::c_void, - 100 | pub log: ::core::option::Option< - 101 | unsafe extern "C" fn( - 102 | payload: *mut ::core::ffi::c_void, - 103 | log_type: TSLogType, - 104 | buffer: *const ::core::ffi::c_char, - 105 | ), - 106 | >, - 107 | } - 108 | #[repr(C)] - 109 | #[derive(Debug, Copy, Clone)] - 110 | pub struct TSInputEdit { - 111 | pub start_byte: u32, - 112 | pub old_end_byte: u32, - 113 | pub new_end_byte: u32, - 114 | pub start_point: TSPoint, - 115 | pub old_end_point: TSPoint, - 116 | pub new_end_point: TSPoint, - 117 | } - 118 | #[repr(C)] - 119 | #[derive(Debug, Copy, Clone)] - 120 | pub struct TSNode { - 121 | pub context: [u32; 4usize], - 122 | pub id: *const ::core::ffi::c_void, - 123 | pub tree: *const TSTree, - 124 | } - 125 | #[repr(C)] - 126 | #[derive(Debug, Copy, Clone)] - 127 | pub struct TSTreeCursor { - 128 | pub tree: *const ::core::ffi::c_void, - 129 | pub id: *const ::core::ffi::c_void, - 130 | pub context: [u32; 3usize], - 131 | } - 132 | #[repr(C)] - 133 | #[derive(Debug)] - 134 | pub struct TSQueryCapture { - 135 | pub node: TSNode, - 136 | pub index: u32, - 137 | } - 138 | pub const TSQuantifierZero: TSQuantifier = 0; - 139 | pub const TSQuantifierZeroOrOne: TSQuantifier = 1; - 140 | pub const TSQuantifierZeroOrMore: TSQuantifier = 2; - 141 | pub const TSQuantifierOne: TSQuantifier = 3; - 142 | pub const TSQuantifierOneOrMore: TSQuantifier = 4; - 143 | pub type TSQuantifier = ::core::ffi::c_uint; - 144 | #[repr(C)] - 145 | #[derive(Debug)] - 146 | pub struct TSQueryMatch { - 147 | pub id: u32, - 148 | pub pattern_index: u16, - 149 | pub capture_count: u16, - 150 | pub captures: *const TSQueryCapture, - 151 | } - 152 | pub const TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; - 153 | pub const TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; - 154 | pub const TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; - 155 | pub type TSQueryPredicateStepType = ::core::ffi::c_uint; - 156 | #[repr(C)] - 157 | #[derive(Debug)] - 158 | pub struct TSQueryPredicateStep { - 159 | pub type_: TSQueryPredicateStepType, - 160 | pub value_id: u32, - 161 | } - 162 | pub const TSQueryErrorNone: TSQueryError = 0; - 163 | pub const TSQueryErrorSyntax: TSQueryError = 1; - 164 | pub const TSQueryErrorNodeType: TSQueryError = 2; - 165 | pub const TSQueryErrorField: TSQueryError = 3; - 166 | pub const TSQueryErrorCapture: TSQueryError = 4; - 167 | pub const TSQueryErrorStructure: TSQueryError = 5; - 168 | pub const TSQueryErrorLanguage: TSQueryError = 6; - 169 | pub type TSQueryError = ::core::ffi::c_uint; - 170 | #[repr(C)] - 171 | #[derive(Debug, Copy, Clone)] - 172 | pub struct TSQueryCursorState { - 173 | pub payload: *mut ::core::ffi::c_void, - 174 | pub current_byte_offset: u32, - 175 | } - 176 | #[repr(C)] - 177 | #[derive(Debug, Copy, Clone)] - 178 | pub struct TSQueryCursorOptions { - 179 | pub payload: *mut ::core::ffi::c_void, - 180 | pub progress_callback: - 181 | ::core::option::Option bool>, - 182 | } - 183 | #[doc = " The metadata associated with a language.\n\n Currently, this metadata can be used to check the [Semantic Version](https://semver.org/)\n of the language. This version information should be used to signal if a given parser might\n be incompatible with existing queries when upgrading between major versions, or minor versions\n if it's in zerover."] - 184 | #[repr(C)] - 185 | #[derive(Debug, Copy, Clone)] - 186 | pub struct TSLanguageMetadata { - 187 | pub major_version: u8, - 188 | pub minor_version: u8, - 189 | pub patch_version: u8, - 190 | } - 191 | extern "C" { - 192 | #[doc = " Create a new parser."] - 193 | pub fn ts_parser_new() -> *mut TSParser; - 194 | } - 195 | extern "C" { - 196 | #[doc = " Delete the parser, freeing all of the memory that it used."] - 197 | pub fn ts_parser_delete(self_: *mut TSParser); - 198 | } - 199 | extern "C" { - 200 | #[doc = " Get the parser's current language."] - 201 | pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; - 202 | } - 203 | extern "C" { - 204 | #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."] - 205 | pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; - 206 | } - 207 | extern "C" { - 208 | #[doc = " Set the ranges of text that the parser should include when parsing.\n\n By default, the parser will always include entire documents. This function\n allows you to parse only a *portion* of a document but still return a syntax\n tree whose ranges match up with the document as a whole. You can also pass\n multiple disjoint ranges.\n\n The second and third parameters specify the location and length of an array\n of ranges. The parser does *not* take ownership of these ranges; it copies\n the data, so it doesn't matter how these ranges are allocated.\n\n If `count` is zero, then the entire document will be parsed. Otherwise,\n the given ranges must be ordered from earliest to latest in the document,\n and they must not overlap. That is, the following must hold for all:\n\n `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte`\n\n If this requirement is not satisfied, the operation will fail, the ranges\n will not be assigned, and this function will return `false`. On success,\n this function returns `true`"] - 209 | pub fn ts_parser_set_included_ranges( - 210 | self_: *mut TSParser, - 211 | ranges: *const TSRange, - 212 | count: u32, - 213 | ) -> bool; - 214 | } - 215 | extern "C" { - 216 | #[doc = " Get the ranges of text that the parser will include when parsing.\n\n The returned pointer is owned by the parser. The caller should not free it\n or write to it. The length of the array will be written to the given\n `count` pointer."] - 217 | pub fn ts_parser_included_ranges(self_: *const TSParser, count: *mut u32) -> *const TSRange; - 218 | } - 219 | extern "C" { - 220 | #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are four possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to the progress callback returning true. This callback\n is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"] - 221 | pub fn ts_parser_parse( - 222 | self_: *mut TSParser, - 223 | old_tree: *const TSTree, - 224 | input: TSInput, - 225 | ) -> *mut TSTree; - 226 | } - 227 | extern "C" { - 228 | #[doc = " Use the parser to parse some source code and create a syntax tree, with some options.\n\n See [`ts_parser_parse`] for more details.\n\n See [`TSParseOptions`] for more details on the options."] - 229 | pub fn ts_parser_parse_with_options( - 230 | self_: *mut TSParser, - 231 | old_tree: *const TSTree, - 232 | input: TSInput, - 233 | parse_options: TSParseOptions, - 234 | ) -> *mut TSTree; - 235 | } - 236 | extern "C" { - 237 | #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the [`ts_parser_parse`] function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] - 238 | pub fn ts_parser_parse_string( - 239 | self_: *mut TSParser, - 240 | old_tree: *const TSTree, - 241 | string: *const ::core::ffi::c_char, - 242 | length: u32, - 243 | ) -> *mut TSTree; - 244 | } - 245 | extern "C" { - 246 | #[doc = " Use the parser to parse some source code stored in one contiguous buffer with\n a given encoding. The first four parameters work the same as in the\n [`ts_parser_parse_string`] method above. The final parameter indicates whether\n the text is encoded as UTF8 or UTF16."] - 247 | pub fn ts_parser_parse_string_encoding( - 248 | self_: *mut TSParser, - 249 | old_tree: *const TSTree, - 250 | string: *const ::core::ffi::c_char, - 251 | length: u32, - 252 | encoding: TSInputEncoding, - 253 | ) -> *mut TSTree; - 254 | } - 255 | extern "C" { - 256 | #[doc = " Instruct the parser to start the next parse from the beginning.\n\n If the parser previously failed because of the progress callback, then\n by default, it will resume where it left off on the next call to\n [`ts_parser_parse`] or other parsing functions. If you don't want to resume,\n and instead intend to use this parser to parse some other document, you must\n call [`ts_parser_reset`] first."] - 257 | pub fn ts_parser_reset(self_: *mut TSParser); - 258 | } - 259 | extern "C" { - 260 | #[doc = " Set the logger that a parser should use during parsing.\n\n The parser does not take ownership over the logger payload. If a logger was\n previously assigned, the caller is responsible for releasing any memory\n owned by the previous logger."] - 261 | pub fn ts_parser_set_logger(self_: *mut TSParser, logger: TSLogger); - 262 | } - 263 | extern "C" { - 264 | #[doc = " Get the parser's current logger."] - 265 | pub fn ts_parser_logger(self_: *const TSParser) -> TSLogger; - 266 | } - 267 | extern "C" { - 268 | #[doc = " Set the file descriptor to which the parser should write debugging graphs\n during parsing. The graphs are formatted in the DOT language. You may want\n to pipe these graphs directly to a `dot(1)` process in order to generate\n SVG output. You can turn off this logging by passing a negative number."] - 269 | pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, fd: ::core::ffi::c_int); - 270 | } - 271 | extern "C" { - 272 | #[doc = " Create a shallow copy of the syntax tree. This is very fast.\n\n You need to copy a syntax tree in order to use it on more than one thread at\n a time, as syntax trees are not thread safe."] - 273 | pub fn ts_tree_copy(self_: *const TSTree) -> *mut TSTree; - 274 | } - 275 | extern "C" { - 276 | #[doc = " Delete the syntax tree, freeing all of the memory that it used."] - 277 | pub fn ts_tree_delete(self_: *mut TSTree); - 278 | } - 279 | extern "C" { - 280 | #[doc = " Get the root node of the syntax tree."] - 281 | pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode; - 282 | } - 283 | extern "C" { - 284 | #[doc = " Get the root node of the syntax tree, but with its position\n shifted forward by the given offset."] - 285 | pub fn ts_tree_root_node_with_offset( - 286 | self_: *const TSTree, - 287 | offset_bytes: u32, - 288 | offset_extent: TSPoint, - 289 | ) -> TSNode; - 290 | } - 291 | extern "C" { - 292 | #[doc = " Get the language that was used to parse the syntax tree."] - 293 | pub fn ts_tree_language(self_: *const TSTree) -> *const TSLanguage; - 294 | } - 295 | extern "C" { - 296 | #[doc = " Get the array of included ranges that was used to parse the syntax tree.\n\n The returned pointer must be freed by the caller."] - 297 | pub fn ts_tree_included_ranges(self_: *const TSTree, length: *mut u32) -> *mut TSRange; - 298 | } - 299 | extern "C" { - 300 | #[doc = " Edit the syntax tree to keep it in sync with source code that has been\n edited.\n\n You must describe the edit both in terms of byte offsets and in terms of\n (row, column) coordinates."] - 301 | pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit); - 302 | } - 303 | extern "C" { - 304 | #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same\n document, returning an array of ranges whose syntactic structure has changed.\n\n For this to work correctly, the old syntax tree must have been edited such\n that its ranges match up to the new tree. Generally, you'll want to call\n this function right after calling one of the [`ts_parser_parse`] functions.\n You need to pass the old tree that was passed to parse, as well as the new\n tree that was returned from that function.\n\n The returned ranges indicate areas where the hierarchical structure of syntax\n nodes (from root to leaf) has changed between the old and new trees. Characters\n outside these ranges have identical ancestor nodes in both trees.\n\n Note that the returned ranges may be slightly larger than the exact changed areas,\n but Tree-sitter attempts to make them as small as possible.\n\n The returned array is allocated using `malloc` and the caller is responsible\n for freeing it using `free`. The length of the array will be written to the\n given `length` pointer."] - 305 | pub fn ts_tree_get_changed_ranges( - 306 | old_tree: *const TSTree, - 307 | new_tree: *const TSTree, - 308 | length: *mut u32, - 309 | ) -> *mut TSRange; - 310 | } - 311 | extern "C" { - 312 | #[doc = " Write a DOT graph describing the syntax tree to the given file."] - 313 | pub fn ts_tree_print_dot_graph(self_: *const TSTree, file_descriptor: ::core::ffi::c_int); - 314 | } - 315 | extern "C" { - 316 | #[doc = " Get the node's type as a null-terminated string."] - 317 | pub fn ts_node_type(self_: TSNode) -> *const ::core::ffi::c_char; - 318 | } - 319 | extern "C" { - 320 | #[doc = " Get the node's type as a numerical id."] - 321 | pub fn ts_node_symbol(self_: TSNode) -> TSSymbol; - 322 | } - 323 | extern "C" { - 324 | #[doc = " Get the node's language."] - 325 | pub fn ts_node_language(self_: TSNode) -> *const TSLanguage; - 326 | } - 327 | extern "C" { - 328 | #[doc = " Get the node's type as it appears in the grammar ignoring aliases as a\n null-terminated string."] - 329 | pub fn ts_node_grammar_type(self_: TSNode) -> *const ::core::ffi::c_char; - 330 | } - 331 | extern "C" { - 332 | #[doc = " Get the node's type as a numerical id as it appears in the grammar ignoring\n aliases. This should be used in [`ts_language_next_state`] instead of\n [`ts_node_symbol`]."] - 333 | pub fn ts_node_grammar_symbol(self_: TSNode) -> TSSymbol; - 334 | } - 335 | extern "C" { - 336 | #[doc = " Get the node's start byte."] - 337 | pub fn ts_node_start_byte(self_: TSNode) -> u32; - 338 | } - 339 | extern "C" { - 340 | #[doc = " Get the node's start position in terms of rows and columns."] - 341 | pub fn ts_node_start_point(self_: TSNode) -> TSPoint; - 342 | } - 343 | extern "C" { - 344 | #[doc = " Get the node's end byte."] - 345 | pub fn ts_node_end_byte(self_: TSNode) -> u32; - 346 | } - 347 | extern "C" { - 348 | #[doc = " Get the node's end position in terms of rows and columns."] - 349 | pub fn ts_node_end_point(self_: TSNode) -> TSPoint; - 350 | } - 351 | extern "C" { - 352 | #[doc = " Get an S-expression representing the node as a string.\n\n This string is allocated with `malloc` and the caller is responsible for\n freeing it using `free`."] - 353 | pub fn ts_node_string(self_: TSNode) -> *mut ::core::ffi::c_char; - 354 | } - 355 | extern "C" { - 356 | #[doc = " Check if the node is null. Functions like [`ts_node_child`] and\n [`ts_node_next_sibling`] will return a null node to indicate that no such node\n was found."] - 357 | pub fn ts_node_is_null(self_: TSNode) -> bool; - 358 | } - 359 | extern "C" { - 360 | #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the\n grammar, whereas *anonymous* nodes correspond to string literals in the\n grammar."] - 361 | pub fn ts_node_is_named(self_: TSNode) -> bool; - 362 | } - 363 | extern "C" { - 364 | #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in\n order to recover from certain kinds of syntax errors."] - 365 | pub fn ts_node_is_missing(self_: TSNode) -> bool; - 366 | } - 367 | extern "C" { - 368 | #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,\n which are not required the grammar, but can appear anywhere."] - 369 | pub fn ts_node_is_extra(self_: TSNode) -> bool; - 370 | } - 371 | extern "C" { - 372 | #[doc = " Check if a syntax node has been edited."] - 373 | pub fn ts_node_has_changes(self_: TSNode) -> bool; - 374 | } - 375 | extern "C" { - 376 | #[doc = " Check if the node is a syntax error or contains any syntax errors."] - 377 | pub fn ts_node_has_error(self_: TSNode) -> bool; - 378 | } - 379 | extern "C" { - 380 | #[doc = " Check if the node is a syntax error."] - 381 | pub fn ts_node_is_error(self_: TSNode) -> bool; - 382 | } - 383 | extern "C" { - 384 | #[doc = " Get this node's parse state."] - 385 | pub fn ts_node_parse_state(self_: TSNode) -> TSStateId; - 386 | } - 387 | extern "C" { - 388 | #[doc = " Get the parse state after this node."] - 389 | pub fn ts_node_next_parse_state(self_: TSNode) -> TSStateId; - 390 | } - 391 | extern "C" { - 392 | #[doc = " Get the node's immediate parent.\n Prefer [`ts_node_child_with_descendant`] for\n iterating over the node's ancestors."] - 393 | pub fn ts_node_parent(self_: TSNode) -> TSNode; - 394 | } - 395 | extern "C" { - 396 | #[doc = " Get the node that contains `descendant`.\n\n Note that this can return `descendant` itself."] - 397 | pub fn ts_node_child_with_descendant(self_: TSNode, descendant: TSNode) -> TSNode; - 398 | } - 399 | extern "C" { - 400 | #[doc = " Get the node's child at the given index, where zero represents the first\n child."] - 401 | pub fn ts_node_child(self_: TSNode, child_index: u32) -> TSNode; - 402 | } - 403 | extern "C" { - 404 | #[doc = " Get the field name for node's child at the given index, where zero represents\n the first child. Returns NULL, if no field is found."] - 405 | pub fn ts_node_field_name_for_child( - 406 | self_: TSNode, - 407 | child_index: u32, - 408 | ) -> *const ::core::ffi::c_char; - 409 | } - 410 | extern "C" { - 411 | #[doc = " Get the field name for node's named child at the given index, where zero\n represents the first named child. Returns NULL, if no field is found."] - 412 | pub fn ts_node_field_name_for_named_child( - 413 | self_: TSNode, - 414 | named_child_index: u32, - 415 | ) -> *const ::core::ffi::c_char; - 416 | } - 417 | extern "C" { - 418 | #[doc = " Get the node's number of children."] - 419 | pub fn ts_node_child_count(self_: TSNode) -> u32; - 420 | } - 421 | extern "C" { - 422 | #[doc = " Get the node's *named* child at the given index.\n\n See also [`ts_node_is_named`]."] - 423 | pub fn ts_node_named_child(self_: TSNode, child_index: u32) -> TSNode; - 424 | } - 425 | extern "C" { - 426 | #[doc = " Get the node's number of *named* children.\n\n See also [`ts_node_is_named`]."] - 427 | pub fn ts_node_named_child_count(self_: TSNode) -> u32; - 428 | } - 429 | extern "C" { - 430 | #[doc = " Get the node's child with the given field name."] - 431 | pub fn ts_node_child_by_field_name( - 432 | self_: TSNode, - 433 | name: *const ::core::ffi::c_char, - 434 | name_length: u32, - 435 | ) -> TSNode; - 436 | } - 437 | extern "C" { - 438 | #[doc = " Get the node's child with the given numerical field id.\n\n You can convert a field name to an id using the\n [`ts_language_field_id_for_name`] function."] - 439 | pub fn ts_node_child_by_field_id(self_: TSNode, field_id: TSFieldId) -> TSNode; - 440 | } - 441 | extern "C" { - 442 | #[doc = " Get the node's next / previous sibling."] - 443 | pub fn ts_node_next_sibling(self_: TSNode) -> TSNode; - 444 | } - 445 | extern "C" { - 446 | pub fn ts_node_prev_sibling(self_: TSNode) -> TSNode; - 447 | } - 448 | extern "C" { - 449 | #[doc = " Get the node's next / previous *named* sibling."] - 450 | pub fn ts_node_next_named_sibling(self_: TSNode) -> TSNode; - 451 | } - 452 | extern "C" { - 453 | pub fn ts_node_prev_named_sibling(self_: TSNode) -> TSNode; - 454 | } - 455 | extern "C" { - 456 | #[doc = " Get the node's first child that contains or starts after the given byte offset."] - 457 | pub fn ts_node_first_child_for_byte(self_: TSNode, byte: u32) -> TSNode; - 458 | } - 459 | extern "C" { - 460 | #[doc = " Get the node's first named child that contains or starts after the given byte offset."] - 461 | pub fn ts_node_first_named_child_for_byte(self_: TSNode, byte: u32) -> TSNode; - 462 | } - 463 | extern "C" { - 464 | #[doc = " Get the node's number of descendants, including one for the node itself."] - 465 | pub fn ts_node_descendant_count(self_: TSNode) -> u32; - 466 | } - 467 | extern "C" { - 468 | #[doc = " Get the smallest node within this node that spans the given range of bytes\n or (row, column) positions."] - 469 | pub fn ts_node_descendant_for_byte_range(self_: TSNode, start: u32, end: u32) -> TSNode; - 470 | } - 471 | extern "C" { - 472 | pub fn ts_node_descendant_for_point_range( - 473 | self_: TSNode, - 474 | start: TSPoint, - 475 | end: TSPoint, - 476 | ) -> TSNode; - 477 | } - 478 | extern "C" { - 479 | #[doc = " Get the smallest named node within this node that spans the given range of\n bytes or (row, column) positions."] - 480 | pub fn ts_node_named_descendant_for_byte_range(self_: TSNode, start: u32, end: u32) -> TSNode; - 481 | } - 482 | extern "C" { - 483 | pub fn ts_node_named_descendant_for_point_range( - 484 | self_: TSNode, - 485 | start: TSPoint, - 486 | end: TSPoint, - 487 | ) -> TSNode; - 488 | } - 489 | extern "C" { - 490 | #[doc = " Edit the node to keep it in-sync with source code that has been edited.\n\n This function is only rarely needed. When you edit a syntax tree with the\n [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree\n afterward will already reflect the edit. You only need to use [`ts_node_edit`]\n when you have a [`TSNode`] instance that you want to keep and continue to use\n after an edit."] - 491 | pub fn ts_node_edit(self_: *mut TSNode, edit: *const TSInputEdit); - 492 | } - 493 | extern "C" { - 494 | #[doc = " Check if two nodes are identical."] - 495 | pub fn ts_node_eq(self_: TSNode, other: TSNode) -> bool; - 496 | } - 497 | extern "C" { - 498 | #[doc = " Edit a point to keep it in-sync with source code that has been edited.\n\n This function updates a single point's byte offset and row/column position\n based on an edit operation. This is useful for editing points without\n requiring a tree or node instance."] - 499 | pub fn ts_point_edit(point: *mut TSPoint, point_byte: *mut u32, edit: *const TSInputEdit); - 500 | } - 501 | extern "C" { - 502 | #[doc = " Edit a range to keep it in-sync with source code that has been edited.\n\n This function updates a range's start and end positions based on an edit\n operation. This is useful for editing ranges without requiring a tree\n or node instance."] - 503 | pub fn ts_range_edit(range: *mut TSRange, edit: *const TSInputEdit); - 504 | } - 505 | extern "C" { - 506 | #[doc = " Create a new tree cursor starting from the given node.\n\n A tree cursor allows you to walk a syntax tree more efficiently than is\n possible using the [`TSNode`] functions. It is a mutable object that is always\n on a certain syntax node, and can be moved imperatively to different nodes.\n\n Note that the given node is considered the root of the cursor,\n and the cursor cannot walk outside this node."] - 507 | pub fn ts_tree_cursor_new(node: TSNode) -> TSTreeCursor; - 508 | } - 509 | extern "C" { - 510 | #[doc = " Delete a tree cursor, freeing all of the memory that it used."] - 511 | pub fn ts_tree_cursor_delete(self_: *mut TSTreeCursor); - 512 | } - 513 | extern "C" { - 514 | #[doc = " Re-initialize a tree cursor to start at the original node that the cursor was\n constructed with."] - 515 | pub fn ts_tree_cursor_reset(self_: *mut TSTreeCursor, node: TSNode); - 516 | } - 517 | extern "C" { - 518 | #[doc = " Re-initialize a tree cursor to the same position as another cursor.\n\n Unlike [`ts_tree_cursor_reset`], this will not lose parent information and\n allows reusing already created cursors."] - 519 | pub fn ts_tree_cursor_reset_to(dst: *mut TSTreeCursor, src: *const TSTreeCursor); - 520 | } - 521 | extern "C" { - 522 | #[doc = " Get the tree cursor's current node."] - 523 | pub fn ts_tree_cursor_current_node(self_: *const TSTreeCursor) -> TSNode; - 524 | } - 525 | extern "C" { - 526 | #[doc = " Get the field name of the tree cursor's current node.\n\n This returns `NULL` if the current node doesn't have a field.\n See also [`ts_node_child_by_field_name`]."] - 527 | pub fn ts_tree_cursor_current_field_name( - 528 | self_: *const TSTreeCursor, - 529 | ) -> *const ::core::ffi::c_char; - 530 | } - 531 | extern "C" { - 532 | #[doc = " Get the field id of the tree cursor's current node.\n\n This returns zero if the current node doesn't have a field.\n See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]."] - 533 | pub fn ts_tree_cursor_current_field_id(self_: *const TSTreeCursor) -> TSFieldId; - 534 | } - 535 | extern "C" { - 536 | #[doc = " Move the cursor to the parent of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no parent node (the cursor was already on the root node).\n\n Note that the node the cursor was constructed with is considered the root\n of the cursor, and the cursor cannot walk outside this node."] - 537 | pub fn ts_tree_cursor_goto_parent(self_: *mut TSTreeCursor) -> bool; - 538 | } - 539 | extern "C" { - 540 | #[doc = " Move the cursor to the next sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no next sibling node.\n\n Note that the node the cursor was constructed with is considered the root\n of the cursor, and the cursor cannot walk outside this node."] - 541 | pub fn ts_tree_cursor_goto_next_sibling(self_: *mut TSTreeCursor) -> bool; - 542 | } - 543 | extern "C" { - 544 | #[doc = " Move the cursor to the previous sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there was no previous sibling node.\n\n Note, that this function may be slower than\n [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In\n the worst case, this will need to iterate through all the children up to the\n previous sibling node to recalculate its position. Also note that the node the cursor\n was constructed with is considered the root of the cursor, and the cursor cannot\n walk outside this node."] - 545 | pub fn ts_tree_cursor_goto_previous_sibling(self_: *mut TSTreeCursor) -> bool; - 546 | } - 547 | extern "C" { - 548 | #[doc = " Move the cursor to the first child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there were no children."] - 549 | pub fn ts_tree_cursor_goto_first_child(self_: *mut TSTreeCursor) -> bool; - 550 | } - 551 | extern "C" { - 552 | #[doc = " Move the cursor to the last child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there were no children.\n\n Note that this function may be slower than [`ts_tree_cursor_goto_first_child`]\n because it needs to iterate through all the children to compute the child's\n position."] - 553 | pub fn ts_tree_cursor_goto_last_child(self_: *mut TSTreeCursor) -> bool; - 554 | } - 555 | extern "C" { - 556 | #[doc = " Move the cursor to the node that is the nth descendant of\n the original node that the cursor was constructed with, where\n zero represents the original node itself."] - 557 | pub fn ts_tree_cursor_goto_descendant(self_: *mut TSTreeCursor, goal_descendant_index: u32); - 558 | } - 559 | extern "C" { - 560 | #[doc = " Get the index of the cursor's current node out of all of the\n descendants of the original node that the cursor was constructed with."] - 561 | pub fn ts_tree_cursor_current_descendant_index(self_: *const TSTreeCursor) -> u32; - 562 | } - 563 | extern "C" { - 564 | #[doc = " Get the depth of the cursor's current node relative to the original\n node that the cursor was constructed with."] - 565 | pub fn ts_tree_cursor_current_depth(self_: *const TSTreeCursor) -> u32; - 566 | } - 567 | extern "C" { - 568 | #[doc = " Move the cursor to the first child of its current node that contains or starts after\n the given byte offset or point.\n\n This returns the index of the child node if one was found, and returns -1\n if no such child was found."] - 569 | pub fn ts_tree_cursor_goto_first_child_for_byte( - 570 | self_: *mut TSTreeCursor, - 571 | goal_byte: u32, - 572 | ) -> i64; - 573 | } - 574 | extern "C" { - 575 | pub fn ts_tree_cursor_goto_first_child_for_point( - 576 | self_: *mut TSTreeCursor, - 577 | goal_point: TSPoint, - 578 | ) -> i64; - 579 | } - 580 | extern "C" { - 581 | pub fn ts_tree_cursor_copy(cursor: *const TSTreeCursor) -> TSTreeCursor; - 582 | } - 583 | extern "C" { - 584 | #[doc = " Create a new query from a string containing one or more S-expression\n patterns. The query is associated with a particular language, and can\n only be run on syntax nodes parsed with that language.\n\n If all of the given patterns are valid, this returns a [`TSQuery`].\n If a pattern is invalid, this returns `NULL`, and provides two pieces\n of information about the problem:\n 1. The byte offset of the error is written to the `error_offset` parameter.\n 2. The type of error is written to the `error_type` parameter."] - 585 | pub fn ts_query_new( - 586 | language: *const TSLanguage, - 587 | source: *const ::core::ffi::c_char, - 588 | source_len: u32, - 589 | error_offset: *mut u32, - 590 | error_type: *mut TSQueryError, - 591 | ) -> *mut TSQuery; - 592 | } - 593 | extern "C" { - 594 | #[doc = " Delete a query, freeing all of the memory that it used."] - 595 | pub fn ts_query_delete(self_: *mut TSQuery); - 596 | } - 597 | extern "C" { - 598 | #[doc = " Get the number of patterns, captures, or string literals in the query."] - 599 | pub fn ts_query_pattern_count(self_: *const TSQuery) -> u32; - 600 | } - 601 | extern "C" { - 602 | pub fn ts_query_capture_count(self_: *const TSQuery) -> u32; - 603 | } - 604 | extern "C" { - 605 | pub fn ts_query_string_count(self_: *const TSQuery) -> u32; - 606 | } - 607 | extern "C" { - 608 | #[doc = " Get the byte offset where the given pattern starts in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] - 609 | pub fn ts_query_start_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; - 610 | } - 611 | extern "C" { - 612 | #[doc = " Get the byte offset where the given pattern ends in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] - 613 | pub fn ts_query_end_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; - 614 | } - 615 | extern "C" { - 616 | #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n [`ts_query_capture_name_for_id`] function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n [`ts_query_string_value_for_id`] function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] - 617 | pub fn ts_query_predicates_for_pattern( - 618 | self_: *const TSQuery, - 619 | pattern_index: u32, - 620 | step_count: *mut u32, - 621 | ) -> *const TSQueryPredicateStep; - 622 | } - 623 | extern "C" { - 624 | pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; - 625 | } - 626 | extern "C" { - 627 | pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; - 628 | } - 629 | extern "C" { - 630 | pub fn ts_query_is_pattern_guaranteed_at_step(self_: *const TSQuery, byte_offset: u32) -> bool; - 631 | } - 632 | extern "C" { - 633 | #[doc = " Get the name and length of one of the query's captures, or one of the\n query's string literals. Each capture and string is associated with a\n numeric id based on the order that it appeared in the query's source."] - 634 | pub fn ts_query_capture_name_for_id( - 635 | self_: *const TSQuery, - 636 | index: u32, - 637 | length: *mut u32, - 638 | ) -> *const ::core::ffi::c_char; - 639 | } - 640 | extern "C" { - 641 | #[doc = " Get the quantifier of the query's captures. Each capture is * associated\n with a numeric id based on the order that it appeared in the query's source."] - 642 | pub fn ts_query_capture_quantifier_for_id( - 643 | self_: *const TSQuery, - 644 | pattern_index: u32, - 645 | capture_index: u32, - 646 | ) -> TSQuantifier; - 647 | } - 648 | extern "C" { - 649 | pub fn ts_query_string_value_for_id( - 650 | self_: *const TSQuery, - 651 | index: u32, - 652 | length: *mut u32, - 653 | ) -> *const ::core::ffi::c_char; - 654 | } - 655 | extern "C" { - 656 | #[doc = " Disable a certain capture within a query.\n\n This prevents the capture from being returned in matches, and also avoids\n any resource usage associated with recording the capture. Currently, there\n is no way to undo this."] - 657 | pub fn ts_query_disable_capture( - 658 | self_: *mut TSQuery, - 659 | name: *const ::core::ffi::c_char, - 660 | length: u32, - 661 | ); - 662 | } - 663 | extern "C" { - 664 | #[doc = " Disable a certain pattern within a query.\n\n This prevents the pattern from matching and removes most of the overhead\n associated with the pattern. Currently, there is no way to undo this."] - 665 | pub fn ts_query_disable_pattern(self_: *mut TSQuery, pattern_index: u32); - 666 | } - 667 | extern "C" { - 668 | #[doc = " Create a new cursor for executing a given query.\n\n The cursor stores the state that is needed to iteratively search\n for matches. To use the query cursor, first call [`ts_query_cursor_exec`]\n to start running a given query on a given syntax node. Then, there are\n two options for consuming the results of the query:\n 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the\n *matches* in the order that they were found. Each match contains the\n index of the pattern that matched, and an array of captures. Because\n multiple patterns can match the same set of nodes, one match may contain\n captures that appear *before* some of the captures from a previous match.\n 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the\n individual *captures* in the order that they appear. This is useful if\n don't care about which pattern matched, and just want a single ordered\n sequence of captures.\n\n If you don't care about consuming all of the results, you can stop calling\n [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.\n You can then start executing another query on another node by calling\n [`ts_query_cursor_exec`] again."] - 669 | pub fn ts_query_cursor_new() -> *mut TSQueryCursor; - 670 | } - 671 | extern "C" { - 672 | #[doc = " Delete a query cursor, freeing all of the memory that it used."] - 673 | pub fn ts_query_cursor_delete(self_: *mut TSQueryCursor); - 674 | } - 675 | extern "C" { - 676 | #[doc = " Start running a given query on a given node."] - 677 | pub fn ts_query_cursor_exec(self_: *mut TSQueryCursor, query: *const TSQuery, node: TSNode); - 678 | } - 679 | extern "C" { - 680 | #[doc = " Start running a given query on a given node, with some options."] - 681 | pub fn ts_query_cursor_exec_with_options( - 682 | self_: *mut TSQueryCursor, - 683 | query: *const TSQuery, - 684 | node: TSNode, - 685 | query_options: *const TSQueryCursorOptions, - 686 | ); - 687 | } - 688 | extern "C" { - 689 | #[doc = " Manage the maximum number of in-progress matches allowed by this query\n cursor.\n\n Query cursors have an optional maximum capacity for storing lists of\n in-progress captures. If this capacity is exceeded, then the\n earliest-starting match will silently be dropped to make room for further\n matches. This maximum capacity is optional — by default, query cursors allow\n any number of pending matches, dynamically allocating new space for them as\n needed as the query is executed."] - 690 | pub fn ts_query_cursor_did_exceed_match_limit(self_: *const TSQueryCursor) -> bool; - 691 | } - 692 | extern "C" { - 693 | pub fn ts_query_cursor_match_limit(self_: *const TSQueryCursor) -> u32; - 694 | } - 695 | extern "C" { - 696 | pub fn ts_query_cursor_set_match_limit(self_: *mut TSQueryCursor, limit: u32); - 697 | } - 698 | extern "C" { - 699 | #[doc = " Set the range of bytes in which the query will be executed.\n\n The query cursor will return matches that intersect with the given point range.\n This means that a match may be returned even if some of its captures fall\n outside the specified range, as long as at least part of the match\n overlaps with the range.\n\n For example, if a query pattern matches a node that spans a larger area\n than the specified range, but part of that node intersects with the range,\n the entire match will be returned.\n\n This will return `false` if the start byte is greater than the end byte, otherwise\n it will return `true`."] - 700 | pub fn ts_query_cursor_set_byte_range( - 701 | self_: *mut TSQueryCursor, - 702 | start_byte: u32, - 703 | end_byte: u32, - 704 | ) -> bool; - 705 | } - 706 | extern "C" { - 707 | #[doc = " Set the range of (row, column) positions in which the query will be executed.\n\n The query cursor will return matches that intersect with the given point range.\n This means that a match may be returned even if some of its captures fall\n outside the specified range, as long as at least part of the match\n overlaps with the range.\n\n For example, if a query pattern matches a node that spans a larger area\n than the specified range, but part of that node intersects with the range,\n the entire match will be returned.\n\n This will return `false` if the start point is greater than the end point, otherwise\n it will return `true`."] - 708 | pub fn ts_query_cursor_set_point_range( - 709 | self_: *mut TSQueryCursor, - 710 | start_point: TSPoint, - 711 | end_point: TSPoint, - 712 | ) -> bool; - 713 | } - 714 | extern "C" { - 715 | #[doc = " Advance to the next match of the currently running query.\n\n If there is a match, write it to `*match` and return `true`.\n Otherwise, return `false`."] - 716 | pub fn ts_query_cursor_next_match(self_: *mut TSQueryCursor, match_: *mut TSQueryMatch) - 717 | -> bool; - 718 | } - 719 | extern "C" { - 720 | pub fn ts_query_cursor_remove_match(self_: *mut TSQueryCursor, match_id: u32); - 721 | } - 722 | extern "C" { - 723 | #[doc = " Advance to the next capture of the currently running query.\n\n If there is a capture, write its match to `*match` and its index within\n the match's capture list to `*capture_index`. Otherwise, return `false`."] - 724 | pub fn ts_query_cursor_next_capture( - 725 | self_: *mut TSQueryCursor, - 726 | match_: *mut TSQueryMatch, - 727 | capture_index: *mut u32, - 728 | ) -> bool; - 729 | } - 730 | extern "C" { - 731 | #[doc = " Set the maximum start depth for a query cursor.\n\n This prevents cursors from exploring children nodes at a certain depth.\n Note if a pattern includes many children, then they will still be checked.\n\n The zero max start depth value can be used as a special behavior and\n it helps to destructure a subtree by staying on a node and using captures\n for interested parts. Note that the zero max start depth only limit a search\n depth for a pattern's root node but other nodes that are parts of the pattern\n may be searched at any depth what defined by the pattern structure.\n\n Set to `UINT32_MAX` to remove the maximum start depth."] - 732 | pub fn ts_query_cursor_set_max_start_depth(self_: *mut TSQueryCursor, max_start_depth: u32); - 733 | } - 734 | extern "C" { - 735 | #[doc = " Get another reference to the given language."] - 736 | pub fn ts_language_copy(self_: *const TSLanguage) -> *const TSLanguage; - 737 | } - 738 | extern "C" { - 739 | #[doc = " Free any dynamically-allocated resources for this language, if\n this is the last reference."] - 740 | pub fn ts_language_delete(self_: *const TSLanguage); - 741 | } - 742 | extern "C" { - 743 | #[doc = " Get the number of distinct node types in the language."] - 744 | pub fn ts_language_symbol_count(self_: *const TSLanguage) -> u32; - 745 | } - 746 | extern "C" { - 747 | #[doc = " Get the number of valid states in this language."] - 748 | pub fn ts_language_state_count(self_: *const TSLanguage) -> u32; - 749 | } - 750 | extern "C" { - 751 | #[doc = " Get the numerical id for the given node type string."] - 752 | pub fn ts_language_symbol_for_name( - 753 | self_: *const TSLanguage, - 754 | string: *const ::core::ffi::c_char, - 755 | length: u32, - 756 | is_named: bool, - 757 | ) -> TSSymbol; - 758 | } - 759 | extern "C" { - 760 | #[doc = " Get the number of distinct field names in the language."] - 761 | pub fn ts_language_field_count(self_: *const TSLanguage) -> u32; - 762 | } - 763 | extern "C" { - 764 | #[doc = " Get the field name string for the given numerical id."] - 765 | pub fn ts_language_field_name_for_id( - 766 | self_: *const TSLanguage, - 767 | id: TSFieldId, - 768 | ) -> *const ::core::ffi::c_char; - 769 | } - 770 | extern "C" { - 771 | #[doc = " Get the numerical id for the given field name string."] - 772 | pub fn ts_language_field_id_for_name( - 773 | self_: *const TSLanguage, - 774 | name: *const ::core::ffi::c_char, - 775 | name_length: u32, - 776 | ) -> TSFieldId; - 777 | } - 778 | extern "C" { - 779 | #[doc = " Get a list of all supertype symbols for the language."] - 780 | pub fn ts_language_supertypes(self_: *const TSLanguage, length: *mut u32) -> *const TSSymbol; - 781 | } - 782 | extern "C" { - 783 | #[doc = " Get a list of all subtype symbol ids for a given supertype symbol.\n\n See [`ts_language_supertypes`] for fetching all supertype symbols."] - 784 | pub fn ts_language_subtypes( - 785 | self_: *const TSLanguage, - 786 | supertype: TSSymbol, - 787 | length: *mut u32, - 788 | ) -> *const TSSymbol; - 789 | } - 790 | extern "C" { - 791 | #[doc = " Get a node type string for the given numerical id."] - 792 | pub fn ts_language_symbol_name( - 793 | self_: *const TSLanguage, - 794 | symbol: TSSymbol, - 795 | ) -> *const ::core::ffi::c_char; - 796 | } - 797 | extern "C" { - 798 | #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,\n or a hidden nodes.\n\n See also [`ts_node_is_named`]. Hidden nodes are never returned from the API."] - 799 | pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType; - 800 | } - 801 | extern "C" { - 802 | #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] - 803 | pub fn ts_language_abi_version(self_: *const TSLanguage) -> u32; - 804 | } - 805 | extern "C" { - 806 | #[doc = " Get the metadata for this language. This information is generated by the\n CLI, and relies on the language author providing the correct metadata in\n the language's `tree-sitter.json` file.\n\n See also [`TSMetadata`]."] - 807 | pub fn ts_language_metadata(self_: *const TSLanguage) -> *const TSLanguageMetadata; - 808 | } - 809 | extern "C" { - 810 | #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."] - 811 | pub fn ts_language_next_state( - 812 | self_: *const TSLanguage, - 813 | state: TSStateId, - 814 | symbol: TSSymbol, - 815 | ) -> TSStateId; - 816 | } - 817 | extern "C" { - 818 | #[doc = " Get the name of this language. This returns `NULL` in older parsers."] - 819 | pub fn ts_language_name(self_: *const TSLanguage) -> *const ::core::ffi::c_char; - 820 | } - 821 | extern "C" { - 822 | #[doc = " Create a new lookahead iterator for the given language and parse state.\n\n This returns `NULL` if state is invalid for the language.\n\n Repeatedly using [`ts_lookahead_iterator_next`] and\n [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the\n given parse state. Newly created lookahead iterators will contain the `ERROR`\n symbol.\n\n Lookahead iterators can be useful to generate suggestions and improve syntax\n error diagnostics. To get symbols valid in an ERROR node, use the lookahead\n iterator on its first leaf node state. For `MISSING` nodes, a lookahead\n iterator created on the previous non-extra leaf node may be appropriate."] - 823 | pub fn ts_lookahead_iterator_new( - 824 | self_: *const TSLanguage, - 825 | state: TSStateId, - 826 | ) -> *mut TSLookaheadIterator; - 827 | } - 828 | extern "C" { - 829 | #[doc = " Delete a lookahead iterator freeing all the memory used."] - 830 | pub fn ts_lookahead_iterator_delete(self_: *mut TSLookaheadIterator); - 831 | } - 832 | extern "C" { - 833 | #[doc = " Reset the lookahead iterator to another state.\n\n This returns `true` if the iterator was reset to the given state and `false`\n otherwise."] - 834 | pub fn ts_lookahead_iterator_reset_state( - 835 | self_: *mut TSLookaheadIterator, - 836 | state: TSStateId, - 837 | ) -> bool; - 838 | } - 839 | extern "C" { - 840 | #[doc = " Reset the lookahead iterator.\n\n This returns `true` if the language was set successfully and `false`\n otherwise."] - 841 | pub fn ts_lookahead_iterator_reset( - 842 | self_: *mut TSLookaheadIterator, - 843 | language: *const TSLanguage, - 844 | state: TSStateId, - 845 | ) -> bool; - 846 | } - 847 | extern "C" { - 848 | #[doc = " Get the current language of the lookahead iterator."] - 849 | pub fn ts_lookahead_iterator_language(self_: *const TSLookaheadIterator) -> *const TSLanguage; - 850 | } - 851 | extern "C" { - 852 | #[doc = " Advance the lookahead iterator to the next symbol.\n\n This returns `true` if there is a new symbol and `false` otherwise."] - 853 | pub fn ts_lookahead_iterator_next(self_: *mut TSLookaheadIterator) -> bool; - 854 | } - 855 | extern "C" { - 856 | #[doc = " Get the current symbol of the lookahead iterator;"] - 857 | pub fn ts_lookahead_iterator_current_symbol(self_: *const TSLookaheadIterator) -> TSSymbol; - 858 | } - 859 | extern "C" { - 860 | #[doc = " Get the current symbol type of the lookahead iterator as a null terminated\n string."] - 861 | pub fn ts_lookahead_iterator_current_symbol_name( - 862 | self_: *const TSLookaheadIterator, - 863 | ) -> *const ::core::ffi::c_char; - 864 | } - 865 | #[repr(C)] - 866 | #[derive(Debug, Copy, Clone)] - 867 | pub struct wasm_engine_t { - 868 | _unused: [u8; 0], - 869 | } - 870 | pub type TSWasmEngine = wasm_engine_t; - 871 | #[repr(C)] - 872 | #[derive(Debug, Copy, Clone)] - 873 | pub struct TSWasmStore { - 874 | _unused: [u8; 0], - 875 | } - 876 | pub const TSWasmErrorKindNone: TSWasmErrorKind = 0; - 877 | pub const TSWasmErrorKindParse: TSWasmErrorKind = 1; - 878 | pub const TSWasmErrorKindCompile: TSWasmErrorKind = 2; - 879 | pub const TSWasmErrorKindInstantiate: TSWasmErrorKind = 3; - 880 | pub const TSWasmErrorKindAllocate: TSWasmErrorKind = 4; - 881 | pub type TSWasmErrorKind = ::core::ffi::c_uint; - 882 | #[repr(C)] - 883 | #[derive(Debug, Copy, Clone)] - 884 | pub struct TSWasmError { - 885 | pub kind: TSWasmErrorKind, - 886 | pub message: *mut ::core::ffi::c_char, - 887 | } - 888 | extern "C" { - 889 | #[doc = " Create a Wasm store."] - 890 | pub fn ts_wasm_store_new( - 891 | engine: *mut TSWasmEngine, - 892 | error: *mut TSWasmError, - 893 | ) -> *mut TSWasmStore; - 894 | } - 895 | extern "C" { - 896 | #[doc = " Free the memory associated with the given Wasm store."] - 897 | pub fn ts_wasm_store_delete(arg1: *mut TSWasmStore); - 898 | } - 899 | extern "C" { - 900 | #[doc = " Create a language from a buffer of Wasm. The resulting language behaves\n like any other Tree-sitter language, except that in order to use it with\n a parser, that parser must have a Wasm store. Note that the language\n can be used with any Wasm store, it doesn't need to be the same store that\n was used to originally load it."] - 901 | pub fn ts_wasm_store_load_language( - 902 | arg1: *mut TSWasmStore, - 903 | name: *const ::core::ffi::c_char, - 904 | wasm: *const ::core::ffi::c_char, - 905 | wasm_len: u32, - 906 | error: *mut TSWasmError, - 907 | ) -> *const TSLanguage; - 908 | } - 909 | extern "C" { - 910 | #[doc = " Get the number of languages instantiated in the given Wasm store."] - 911 | pub fn ts_wasm_store_language_count(arg1: *const TSWasmStore) -> usize; - 912 | } - 913 | extern "C" { - 914 | #[doc = " Check if the language came from a Wasm module. If so, then in order to use\n this language with a Parser, that parser must have a Wasm store assigned."] - 915 | pub fn ts_language_is_wasm(arg1: *const TSLanguage) -> bool; - 916 | } - 917 | extern "C" { - 918 | #[doc = " Assign the given Wasm store to the parser. A parser must have a Wasm store\n in order to use Wasm languages."] - 919 | pub fn ts_parser_set_wasm_store(arg1: *mut TSParser, arg2: *mut TSWasmStore); - 920 | } - 921 | extern "C" { - 922 | #[doc = " Remove the parser's current Wasm store and return it. This returns NULL if\n the parser doesn't have a Wasm store."] - 923 | pub fn ts_parser_take_wasm_store(arg1: *mut TSParser) -> *mut TSWasmStore; - 924 | } - 925 | extern "C" { - 926 | #[doc = " Set the allocation functions used by the library.\n\n By default, Tree-sitter uses the standard libc allocation functions,\n but aborts the process when an allocation fails. This function lets\n you supply alternative allocation functions at runtime.\n\n If you pass `NULL` for any parameter, Tree-sitter will switch back to\n its default implementation of that function.\n\n If you call this function after the library has already been used, then\n you must ensure that either:\n 1. All the existing objects have been freed.\n 2. The new allocator shares its state with the old one, so it is capable\n of freeing memory that was allocated by the old allocator."] - 927 | pub fn ts_set_allocator( - 928 | new_malloc: ::core::option::Option< - 929 | unsafe extern "C" fn(arg1: usize) -> *mut ::core::ffi::c_void, - 930 | >, - 931 | new_calloc: ::core::option::Option< - 932 | unsafe extern "C" fn(arg1: usize, arg2: usize) -> *mut ::core::ffi::c_void, - 933 | >, - 934 | new_realloc: ::core::option::Option< - 935 | unsafe extern "C" fn( - 936 | arg1: *mut ::core::ffi::c_void, - 937 | arg2: usize, - 938 | ) -> *mut ::core::ffi::c_void, - 939 | >, - 940 | new_free: ::core::option::Option, - 941 | ); - 942 | } - - - --------------------------------------------------------------------------------- -/lib/binding_rust/ffi.rs: --------------------------------------------------------------------------------- - 1 | #![allow(dead_code)] - 2 | #![allow(non_upper_case_globals)] - 3 | #![allow(non_camel_case_types)] - 4 | #![allow(clippy::missing_const_for_fn)] - | - 5 | #[cfg(feature = "bindgen")] - 6 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); - | - 7 | #[cfg(not(feature = "bindgen"))] - 8 | include!("./bindings.rs"); - | - 9 | #[cfg(unix)] - 10 | #[cfg(feature = "std")] - 11 | extern "C" { - 12 | pub(crate) fn _ts_dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; - 13 | } - | - 14 | #[cfg(windows)] - 15 | #[cfg(feature = "std")] - 16 | extern "C" { - 17 | pub(crate) fn _ts_dup(handle: *mut std::os::raw::c_void) -> std::os::raw::c_int; - 18 | } - | - 19 | use core::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str}; - | - 20 | use crate::{ - 21 | Language, LookaheadIterator, Node, ParseState, Parser, Query, QueryCursor, QueryCursorState, - 22 | QueryError, Tree, TreeCursor, - 23 | }; - | - 24 | impl Language { - 25 | /// Reconstructs a [`Language`] from a raw pointer. - 26 | /// - 27 | /// # Safety - 28 | /// - 29 | /// `ptr` must be non-null. - 30 | #[must_use] - 31 | pub const unsafe fn from_raw(ptr: *const TSLanguage) -> Self { - 32 | Self(ptr) - 33 | } - | - 34 | /// Consumes the [`Language`], returning a raw pointer to the underlying C structure. - 35 | #[must_use] - 36 | pub fn into_raw(self) -> *const TSLanguage { - 37 | ManuallyDrop::new(self).0 - 38 | } - 39 | } - | - 40 | impl Parser { - 41 | /// Reconstructs a [`Parser`] from a raw pointer. - 42 | /// - 43 | /// # Safety - 44 | /// - 45 | /// `ptr` must be non-null. - 46 | #[must_use] - 47 | pub const unsafe fn from_raw(ptr: *mut TSParser) -> Self { - 48 | Self(NonNull::new_unchecked(ptr)) - 49 | } - | - 50 | /// Consumes the [`Parser`], returning a raw pointer to the underlying C structure. - 51 | /// - 52 | /// # Safety - 53 | /// - 54 | /// It's a caller responsibility to adjust parser's state - 55 | /// like disable logging or dot graphs printing if this - 56 | /// may cause issues like use after free. - 57 | #[must_use] - 58 | pub fn into_raw(self) -> *mut TSParser { - 59 | ManuallyDrop::new(self).0.as_ptr() - 60 | } - 61 | } - | - 62 | impl ParseState { - 63 | /// Reconstructs a [`ParseState`] from a raw pointer - 64 | /// - 65 | /// # Safety - 66 | /// - 67 | /// `ptr` must be non-null. - 68 | #[must_use] - 69 | pub const unsafe fn from_raw(ptr: *mut TSParseState) -> Self { - 70 | Self(NonNull::new_unchecked(ptr)) - 71 | } - | - 72 | /// Consumes the [`ParseState`], returning a raw pointer to the underlying C structure. - 73 | #[must_use] - 74 | pub fn into_raw(self) -> *mut TSParseState { - 75 | ManuallyDrop::new(self).0.as_ptr() - 76 | } - 77 | } - | - 78 | impl Tree { - 79 | /// Reconstructs a [`Tree`] from a raw pointer. - 80 | /// - 81 | /// # Safety - 82 | /// - 83 | /// `ptr` must be non-null. - 84 | #[must_use] - 85 | pub const unsafe fn from_raw(ptr: *mut TSTree) -> Self { - 86 | Self(NonNull::new_unchecked(ptr)) - 87 | } - | - 88 | /// Consumes the [`Tree`], returning a raw pointer to the underlying C structure. - 89 | #[must_use] - 90 | pub fn into_raw(self) -> *mut TSTree { - 91 | ManuallyDrop::new(self).0.as_ptr() - 92 | } - 93 | } - | - 94 | impl Node<'_> { - 95 | /// Reconstructs a [`Node`] from a raw pointer. - 96 | /// - 97 | /// # Safety - 98 | /// - 99 | /// `ptr` must be non-null. - 100 | #[must_use] - 101 | pub const unsafe fn from_raw(raw: TSNode) -> Self { - 102 | Self(raw, PhantomData) - 103 | } - | - 104 | /// Consumes the [`Node`], returning a raw pointer to the underlying C structure. - 105 | #[must_use] - 106 | pub fn into_raw(self) -> TSNode { - 107 | ManuallyDrop::new(self).0 - 108 | } - 109 | } - | - 110 | impl TreeCursor<'_> { - 111 | /// Reconstructs a [`TreeCursor`] from a raw pointer. - 112 | /// - 113 | /// # Safety - 114 | /// - 115 | /// `ptr` must be non-null. - 116 | #[must_use] - 117 | pub const unsafe fn from_raw(raw: TSTreeCursor) -> Self { - 118 | Self(raw, PhantomData) - 119 | } - | - 120 | /// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure. - 121 | #[must_use] - 122 | pub fn into_raw(self) -> TSTreeCursor { - 123 | ManuallyDrop::new(self).0 - 124 | } - 125 | } - | - 126 | impl Query { - 127 | /// Reconstructs a [`Query`] from a raw pointer. - 128 | /// - 129 | /// # Safety - 130 | /// - 131 | /// `ptr` must be non-null. - 132 | pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result { - 133 | Self::from_raw_parts(ptr, source) - 134 | } - | - 135 | /// Consumes the [`Query`], returning a raw pointer to the underlying C structure. - 136 | #[must_use] - 137 | pub fn into_raw(self) -> *mut TSQuery { - 138 | ManuallyDrop::new(self).ptr.as_ptr() - 139 | } - 140 | } - | - 141 | impl QueryCursor { - 142 | /// Reconstructs a [`QueryCursor`] from a raw pointer. - 143 | /// - 144 | /// # Safety - 145 | /// - 146 | /// `ptr` must be non-null. - 147 | #[must_use] - 148 | pub const unsafe fn from_raw(ptr: *mut TSQueryCursor) -> Self { - 149 | Self { - 150 | ptr: NonNull::new_unchecked(ptr), - 151 | } - 152 | } - | - 153 | /// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure. - 154 | #[must_use] - 155 | pub fn into_raw(self) -> *mut TSQueryCursor { - 156 | ManuallyDrop::new(self).ptr.as_ptr() - 157 | } - 158 | } - | - 159 | impl QueryCursorState { - 160 | /// Reconstructs a [`QueryCursorState`] from a raw pointer. - 161 | /// - 162 | /// # Safety - 163 | /// - 164 | /// `ptr` must be non-null. - 165 | #[must_use] - 166 | pub const unsafe fn from_raw(ptr: *mut TSQueryCursorState) -> Self { - 167 | Self(NonNull::new_unchecked(ptr)) - 168 | } - | - 169 | /// Consumes the [`QueryCursorState`], returning a raw pointer to the underlying C structure. - 170 | #[must_use] - 171 | pub fn into_raw(self) -> *mut TSQueryCursorState { - 172 | ManuallyDrop::new(self).0.as_ptr() - 173 | } - 174 | } - | - 175 | impl LookaheadIterator { - 176 | /// Reconstructs a [`LookaheadIterator`] from a raw pointer. - 177 | /// - 178 | /// # Safety - 179 | /// - 180 | /// `ptr` must be non-null. - 181 | #[must_use] - 182 | pub const unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> Self { - 183 | Self(NonNull::new_unchecked(ptr)) - 184 | } - | - 185 | /// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure. - 186 | #[must_use] - 187 | pub fn into_raw(self) -> *mut TSLookaheadIterator { - 188 | ManuallyDrop::new(self).0.as_ptr() - 189 | } - 190 | } - - - --------------------------------------------------------------------------------- -/lib/binding_rust/lib.rs: --------------------------------------------------------------------------------- - 1 | #![cfg_attr(not(any(test, doctest)), doc = include_str!("./README.md"))] - 2 | #![cfg_attr(not(feature = "std"), no_std)] - 3 | #![cfg_attr(docsrs, feature(doc_cfg))] - | - 4 | pub mod ffi; - 5 | mod util; - | - 6 | #[cfg(not(feature = "std"))] - 7 | extern crate alloc; - 8 | #[cfg(not(feature = "std"))] - 9 | use alloc::{boxed::Box, format, string::String, string::ToString, vec::Vec}; - 10 | use core::{ - 11 | ffi::{c_char, c_void, CStr}, - 12 | fmt::{self, Write}, - 13 | hash, iter, - 14 | marker::PhantomData, - 15 | mem::MaybeUninit, - 16 | num::NonZeroU16, - 17 | ops::{self, ControlFlow, Deref}, - 18 | ptr::{self, NonNull}, - 19 | slice, str, - 20 | }; - 21 | #[cfg(feature = "std")] - 22 | use std::error; - 23 | #[cfg(all(unix, feature = "std"))] - 24 | use std::os::fd::AsRawFd; - 25 | #[cfg(all(windows, feature = "std"))] - 26 | use std::os::windows::io::AsRawHandle; - | - 27 | pub use streaming_iterator::{StreamingIterator, StreamingIteratorMut}; - 28 | use tree_sitter_language::LanguageFn; - | - 29 | #[cfg(feature = "wasm")] - 30 | mod wasm_language; - 31 | #[cfg(feature = "wasm")] - 32 | #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))] - 33 | pub use wasm_language::*; - | - 34 | /// The latest ABI version that is supported by the current version of the - 35 | /// library. - 36 | /// - 37 | /// When Languages are generated by the Tree-sitter CLI, they are - 38 | /// assigned an ABI version number that corresponds to the current CLI version. - 39 | /// The Tree-sitter library is generally backwards-compatible with languages - 40 | /// generated using older CLI versions, but is not forwards-compatible. - 41 | #[doc(alias = "TREE_SITTER_LANGUAGE_VERSION")] - 42 | pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION as usize; - | - 43 | /// The earliest ABI version that is supported by the current version of the - 44 | /// library. - 45 | #[doc(alias = "TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION")] - 46 | pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = - 47 | ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION as usize; - | - 48 | pub const PARSER_HEADER: &str = include_str!("../src/parser.h"); - | - 49 | /// An opaque object that defines how to parse a particular language. The code - 50 | /// for each `Language` is generated by the Tree-sitter CLI. - 51 | #[doc(alias = "TSLanguage")] - 52 | #[derive(Debug, PartialEq, Eq, Hash)] - 53 | #[repr(transparent)] - 54 | pub struct Language(*const ffi::TSLanguage); - | - 55 | pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>); - | - 56 | /// The metadata associated with a language. - 57 | /// - 58 | /// Currently, this metadata can be used to check the [Semantic Version](https://semver.org/) - 59 | /// of the language. This version information should be used to signal if a given parser might - 60 | /// be incompatible with existing queries when upgrading between major versions, or minor versions - 61 | /// if it's in zerover. - 62 | #[doc(alias = "TSLanguageMetadata")] - 63 | pub struct LanguageMetadata { - 64 | pub major_version: u8, - 65 | pub minor_version: u8, - 66 | pub patch_version: u8, - 67 | } - | - 68 | impl From for LanguageMetadata { - 69 | fn from(val: ffi::TSLanguageMetadata) -> Self { - 70 | Self { - 71 | major_version: val.major_version, - 72 | minor_version: val.minor_version, - 73 | patch_version: val.patch_version, - 74 | } - 75 | } - 76 | } - | - 77 | /// A tree that represents the syntactic structure of a source code file. - 78 | #[doc(alias = "TSTree")] - 79 | pub struct Tree(NonNull); - | - 80 | /// A position in a multi-line text document, in terms of rows and columns. - 81 | /// - 82 | /// Rows and columns are zero-based. - 83 | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] - 84 | pub struct Point { - 85 | pub row: usize, - 86 | pub column: usize, - 87 | } - | - 88 | /// A range of positions in a multi-line text document, both in terms of bytes - 89 | /// and of rows and columns. - 90 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] - 91 | pub struct Range { - 92 | pub start_byte: usize, - 93 | pub end_byte: usize, - 94 | pub start_point: Point, - 95 | pub end_point: Point, - 96 | } - | - 97 | /// A summary of a change to a text document. - 98 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] - 99 | pub struct InputEdit { - 100 | pub start_byte: usize, - 101 | pub old_end_byte: usize, - 102 | pub new_end_byte: usize, - 103 | pub start_position: Point, - 104 | pub old_end_position: Point, - 105 | pub new_end_position: Point, - 106 | } - | - 107 | impl InputEdit { - 108 | /// Edit a point to keep it in-sync with source code that has been edited. - 109 | /// - 110 | /// This function updates a single point's byte offset and row/column position - 111 | /// based on this edit operation. This is useful for editing points without - 112 | /// requiring a tree or node instance. - 113 | #[doc(alias = "ts_point_edit")] - 114 | pub fn edit_point(&self, point: &mut Point, byte: &mut usize) { - 115 | let edit = self.into(); - 116 | let mut ts_point = (*point).into(); - 117 | let mut ts_byte = *byte as u32; - | - 118 | unsafe { - 119 | ffi::ts_point_edit( - 120 | core::ptr::addr_of_mut!(ts_point), - 121 | core::ptr::addr_of_mut!(ts_byte), - 122 | &edit, - 123 | ); - 124 | } - | - 125 | *point = ts_point.into(); - 126 | *byte = ts_byte as usize; - 127 | } - | - 128 | /// Edit a range to keep it in-sync with source code that has been edited. - 129 | /// - 130 | /// This function updates a range's start and end positions based on this edit - 131 | /// operation. This is useful for editing ranges without requiring a tree - 132 | /// or node instance. - 133 | #[doc(alias = "ts_range_edit")] - 134 | pub fn edit_range(&self, range: &mut Range) { - 135 | let edit = self.into(); - 136 | let mut ts_range = (*range).into(); - | - 137 | unsafe { - 138 | ffi::ts_range_edit(core::ptr::addr_of_mut!(ts_range), &edit); - 139 | } - | - 140 | *range = ts_range.into(); - 141 | } - 142 | } - | - 143 | /// A single node within a syntax [`Tree`]. - 144 | #[doc(alias = "TSNode")] - 145 | #[derive(Clone, Copy)] - 146 | #[repr(transparent)] - 147 | pub struct Node<'tree>(ffi::TSNode, PhantomData<&'tree ()>); - | - 148 | /// A stateful object that this is used to produce a [`Tree`] based on some - 149 | /// source code. - 150 | #[doc(alias = "TSParser")] - 151 | pub struct Parser(NonNull); - | - 152 | /// A stateful object that is used to look up symbols valid in a specific parse - 153 | /// state - 154 | #[doc(alias = "TSLookaheadIterator")] - 155 | pub struct LookaheadIterator(NonNull); - 156 | struct LookaheadNamesIterator<'a>(&'a mut LookaheadIterator); - | - 157 | /// A stateful object that is passed into a [`ParseProgressCallback`] - 158 | /// to pass in the current state of the parser. - 159 | pub struct ParseState(NonNull); - | - 160 | impl ParseState { - 161 | #[must_use] - 162 | pub const fn current_byte_offset(&self) -> usize { - 163 | unsafe { self.0.as_ref() }.current_byte_offset as usize - 164 | } - | - 165 | #[must_use] - 166 | pub const fn has_error(&self) -> bool { - 167 | unsafe { self.0.as_ref() }.has_error - 168 | } - 169 | } - | - 170 | /// A stateful object that is passed into a [`QueryProgressCallback`] - 171 | /// to pass in the current state of the query execution. - 172 | pub struct QueryCursorState(NonNull); - | - 173 | impl QueryCursorState { - 174 | #[must_use] - 175 | pub const fn current_byte_offset(&self) -> usize { - 176 | unsafe { self.0.as_ref() }.current_byte_offset as usize - 177 | } - 178 | } - | - 179 | #[derive(Default)] - 180 | pub struct ParseOptions<'a> { - 181 | pub progress_callback: Option>, - 182 | } - | - 183 | impl<'a> ParseOptions<'a> { - 184 | #[must_use] - 185 | pub fn new() -> Self { - 186 | Self::default() - 187 | } - | - 188 | #[must_use] - 189 | pub fn progress_callback ControlFlow<()>>( - 190 | mut self, - 191 | callback: &'a mut F, - 192 | ) -> Self { - 193 | self.progress_callback = Some(callback); - 194 | self - 195 | } - | - 196 | /// Create a new `ParseOptions` with a shorter lifetime, borrowing from this one. - 197 | /// - 198 | /// This is useful when you need to reuse parse options multiple times, e.g., calling - 199 | /// [`Parser::parse_with_options`] multiple times with the same options. - 200 | #[must_use] - 201 | pub fn reborrow(&mut self) -> ParseOptions { - 202 | ParseOptions { - 203 | progress_callback: match &mut self.progress_callback { - 204 | Some(cb) => Some(*cb), - 205 | None => None, - 206 | }, - 207 | } - 208 | } - 209 | } - | - 210 | #[derive(Default)] - 211 | pub struct QueryCursorOptions<'a> { - 212 | pub progress_callback: Option>, - 213 | } - | - 214 | impl<'a> QueryCursorOptions<'a> { - 215 | #[must_use] - 216 | pub fn new() -> Self { - 217 | Self::default() - 218 | } - | - 219 | #[must_use] - 220 | pub fn progress_callback ControlFlow<()>>( - 221 | mut self, - 222 | callback: &'a mut F, - 223 | ) -> Self { - 224 | self.progress_callback = Some(callback); - 225 | self - 226 | } - | - 227 | /// Create a new `QueryCursorOptions` with a shorter lifetime, borrowing from this one. - 228 | /// - 229 | /// This is useful when you need to reuse query cursor options multiple times, e.g., calling - 230 | /// [`QueryCursor::matches`] multiple times with the same options. - 231 | #[must_use] - 232 | pub fn reborrow(&mut self) -> QueryCursorOptions { - 233 | QueryCursorOptions { - 234 | progress_callback: match &mut self.progress_callback { - 235 | Some(cb) => Some(*cb), - 236 | None => None, - 237 | }, - 238 | } - 239 | } - 240 | } - | - 241 | struct QueryCursorOptionsDrop(*mut ffi::TSQueryCursorOptions); - | - 242 | impl Drop for QueryCursorOptionsDrop { - 243 | fn drop(&mut self) { - 244 | unsafe { - 245 | if !(*self.0).payload.is_null() { - 246 | drop(Box::from_raw( - 247 | (*self.0).payload.cast::(), - 248 | )); - 249 | } - 250 | drop(Box::from_raw(self.0)); - 251 | } - 252 | } - 253 | } - | - 254 | /// A type of log message. - 255 | #[derive(Debug, PartialEq, Eq)] - 256 | pub enum LogType { - 257 | Parse, - 258 | Lex, - 259 | } - | - 260 | type FieldId = NonZeroU16; - | - 261 | /// A callback that receives log messages during parsing. - 262 | type Logger<'a> = Box; - | - 263 | /// A callback that receives the parse state during parsing. - 264 | type ParseProgressCallback<'a> = &'a mut dyn FnMut(&ParseState) -> ControlFlow<()>; - | - 265 | /// A callback that receives the query state during query execution. - 266 | type QueryProgressCallback<'a> = &'a mut dyn FnMut(&QueryCursorState) -> ControlFlow<()>; - | - 267 | pub trait Decode { - 268 | /// A callback that decodes the next code point from the input slice. It should return the code - 269 | /// point, and how many bytes were decoded. - 270 | fn decode(bytes: &[u8]) -> (i32, u32); - 271 | } - | - 272 | /// A stateful object for walking a syntax [`Tree`] efficiently. - 273 | #[doc(alias = "TSTreeCursor")] - 274 | pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); - | - 275 | /// A set of patterns that match nodes in a syntax tree. - 276 | #[doc(alias = "TSQuery")] - 277 | #[derive(Debug)] - 278 | #[allow(clippy::type_complexity)] - 279 | pub struct Query { - 280 | ptr: NonNull, - 281 | capture_names: Box<[&'static str]>, - 282 | capture_quantifiers: Box<[Box<[CaptureQuantifier]>]>, - 283 | text_predicates: Box<[Box<[TextPredicateCapture]>]>, - 284 | property_settings: Box<[Box<[QueryProperty]>]>, - 285 | property_predicates: Box<[Box<[(QueryProperty, bool)]>]>, - 286 | general_predicates: Box<[Box<[QueryPredicate]>]>, - 287 | } - | - 288 | /// A quantifier for captures - 289 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] - 290 | pub enum CaptureQuantifier { - 291 | Zero, - 292 | ZeroOrOne, - 293 | ZeroOrMore, - 294 | One, - 295 | OneOrMore, - 296 | } - | - 297 | impl From for CaptureQuantifier { - 298 | fn from(value: ffi::TSQuantifier) -> Self { - 299 | match value { - 300 | ffi::TSQuantifierZero => Self::Zero, - 301 | ffi::TSQuantifierZeroOrOne => Self::ZeroOrOne, - 302 | ffi::TSQuantifierZeroOrMore => Self::ZeroOrMore, - 303 | ffi::TSQuantifierOne => Self::One, - 304 | ffi::TSQuantifierOneOrMore => Self::OneOrMore, - 305 | _ => unreachable!(), - 306 | } - 307 | } - 308 | } - | - 309 | /// A stateful object for executing a [`Query`] on a syntax [`Tree`]. - 310 | #[doc(alias = "TSQueryCursor")] - 311 | pub struct QueryCursor { - 312 | ptr: NonNull, - 313 | } - | - 314 | /// A key-value pair associated with a particular pattern in a [`Query`]. - 315 | #[derive(Debug, PartialEq, Eq)] - 316 | pub struct QueryProperty { - 317 | pub key: Box, - 318 | pub value: Option>, - 319 | pub capture_id: Option, - 320 | } - | - 321 | #[derive(Debug, PartialEq, Eq)] - 322 | pub enum QueryPredicateArg { - 323 | Capture(u32), - 324 | String(Box), - 325 | } - | - 326 | /// A key-value pair associated with a particular pattern in a [`Query`]. - 327 | #[derive(Debug, PartialEq, Eq)] - 328 | pub struct QueryPredicate { - 329 | pub operator: Box, - 330 | pub args: Box<[QueryPredicateArg]>, - 331 | } - | - 332 | /// A match of a [`Query`] to a particular set of [`Node`]s. - 333 | pub struct QueryMatch<'cursor, 'tree> { - 334 | pub pattern_index: usize, - 335 | pub captures: &'cursor [QueryCapture<'tree>], - 336 | id: u32, - 337 | cursor: *mut ffi::TSQueryCursor, - 338 | } - | - 339 | /// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`]. - 340 | pub struct QueryMatches<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> { - 341 | ptr: *mut ffi::TSQueryCursor, - 342 | query: &'query Query, - 343 | text_provider: T, - 344 | buffer1: Vec, - 345 | buffer2: Vec, - 346 | current_match: Option>, - 347 | _options: Option, - 348 | _phantom: PhantomData<(&'tree (), I)>, - 349 | } - | - 350 | /// A sequence of [`QueryCapture`]s associated with a given [`QueryCursor`]. - 351 | /// - 352 | /// During iteration, each element contains a [`QueryMatch`] and index. The index can - 353 | /// be used to access the new capture inside of the [`QueryMatch::captures`]'s [`captures`]. - 354 | pub struct QueryCaptures<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> { - 355 | ptr: *mut ffi::TSQueryCursor, - 356 | query: &'query Query, - 357 | text_provider: T, - 358 | buffer1: Vec, - 359 | buffer2: Vec, - 360 | current_match: Option<(QueryMatch<'query, 'tree>, usize)>, - 361 | _options: Option, - 362 | _phantom: PhantomData<(&'tree (), I)>, - 363 | } - | - 364 | pub trait TextProvider - 365 | where - 366 | I: AsRef<[u8]>, - 367 | { - 368 | type I: Iterator; - 369 | fn text(&mut self, node: Node) -> Self::I; - 370 | } - | - 371 | /// A particular [`Node`] that has been captured with a particular name within a - 372 | /// [`Query`]. - 373 | #[derive(Clone, Copy, Debug)] - 374 | #[repr(C)] - 375 | pub struct QueryCapture<'tree> { - 376 | pub node: Node<'tree>, - 377 | pub index: u32, - 378 | } - | - 379 | /// An error that occurred when trying to assign an incompatible [`Language`] to - 380 | /// a [`Parser`]. If the `wasm` feature is enabled, this can also indicate a failure - 381 | /// to load the Wasm store. - 382 | #[derive(Debug, PartialEq, Eq)] - 383 | pub enum LanguageError { - 384 | Version(usize), - 385 | #[cfg(feature = "wasm")] - 386 | Wasm, - 387 | } - | - 388 | /// An error that occurred in [`Parser::set_included_ranges`]. - 389 | #[derive(Debug, PartialEq, Eq)] - 390 | pub struct IncludedRangesError(pub usize); - | - 391 | /// An error that occurred when trying to create a [`Query`]. - 392 | #[derive(Debug, PartialEq, Eq)] - 393 | pub struct QueryError { - 394 | pub row: usize, - 395 | pub column: usize, - 396 | pub offset: usize, - 397 | pub message: String, - 398 | pub kind: QueryErrorKind, - 399 | } - | - 400 | #[derive(Debug, PartialEq, Eq)] - 401 | pub enum QueryErrorKind { - 402 | Syntax, - 403 | NodeType, - 404 | Field, - 405 | Capture, - 406 | Predicate, - 407 | Structure, - 408 | Language, - 409 | } - | - 410 | #[derive(Debug)] - 411 | /// The first item is the capture index - 412 | /// The next is capture specific, depending on what item is expected - 413 | /// The first bool is if the capture is positive - 414 | /// The last item is a bool signifying whether or not it's meant to match - 415 | /// any or all captures - 416 | enum TextPredicateCapture { - 417 | EqString(u32, Box, bool, bool), - 418 | EqCapture(u32, u32, bool, bool), - 419 | MatchString(u32, regex::bytes::Regex, bool, bool), - 420 | AnyString(u32, Box<[Box]>, bool), - 421 | } - | - 422 | // TODO: Remove this struct at some point. If `core::str::lossy::Utf8Lossy` - 423 | // is ever stabilized. - 424 | pub struct LossyUtf8<'a> { - 425 | bytes: &'a [u8], - 426 | in_replacement: bool, - 427 | } - | - 428 | impl Language { - 429 | #[must_use] - 430 | pub fn new(builder: LanguageFn) -> Self { - 431 | Self(unsafe { builder.into_raw()().cast() }) - 432 | } - | - 433 | /// Get the name of this language. This returns `None` in older parsers. - 434 | #[doc(alias = "ts_language_name")] - 435 | #[must_use] - 436 | pub fn name(&self) -> Option<&'static str> { - 437 | let ptr = unsafe { ffi::ts_language_name(self.0) }; - 438 | (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - 439 | } - | - 440 | /// Get the ABI version number that indicates which version of the - 441 | /// Tree-sitter CLI that was used to generate this [`Language`]. - 442 | #[doc(alias = "ts_language_abi_version")] - 443 | #[must_use] - 444 | pub fn abi_version(&self) -> usize { - 445 | unsafe { ffi::ts_language_abi_version(self.0) as usize } - 446 | } - | - 447 | /// Get the metadata for this language. This information is generated by the - 448 | /// CLI, and relies on the language author providing the correct metadata in - 449 | /// the language's `tree-sitter.json` file. - 450 | /// - 451 | /// See also [`LanguageMetadata`]. - 452 | #[doc(alias = "ts_language_metadata")] - 453 | #[must_use] - 454 | pub fn metadata(&self) -> Option { - 455 | unsafe { - 456 | let ptr = ffi::ts_language_metadata(self.0); - 457 | (!ptr.is_null()).then(|| (*ptr).into()) - 458 | } - 459 | } - | - 460 | /// Get the number of distinct node types in this language. - 461 | #[doc(alias = "ts_language_symbol_count")] - 462 | #[must_use] - 463 | pub fn node_kind_count(&self) -> usize { - 464 | unsafe { ffi::ts_language_symbol_count(self.0) as usize } - 465 | } - | - 466 | /// Get the number of valid states in this language. - 467 | #[doc(alias = "ts_language_state_count")] - 468 | #[must_use] - 469 | pub fn parse_state_count(&self) -> usize { - 470 | unsafe { ffi::ts_language_state_count(self.0) as usize } - 471 | } - | - 472 | /// Get a list of all supertype symbols for the language. - 473 | #[doc(alias = "ts_language_supertypes")] - 474 | #[must_use] - 475 | pub fn supertypes(&self) -> &[u16] { - 476 | let mut length = 0u32; - 477 | unsafe { - 478 | let ptr = ffi::ts_language_supertypes(self.0, core::ptr::addr_of_mut!(length)); - 479 | if length == 0 { - 480 | &[] - 481 | } else { - 482 | slice::from_raw_parts(ptr.cast_mut(), length as usize) - 483 | } - 484 | } - 485 | } - | - 486 | /// Get a list of all subtype symbols for a given supertype symbol. - 487 | #[doc(alias = "ts_language_supertype_map")] - 488 | #[must_use] - 489 | pub fn subtypes_for_supertype(&self, supertype: u16) -> &[u16] { - 490 | unsafe { - 491 | let mut length = 0u32; - 492 | let ptr = ffi::ts_language_subtypes(self.0, supertype, core::ptr::addr_of_mut!(length)); - 493 | if length == 0 { - 494 | &[] - 495 | } else { - 496 | slice::from_raw_parts(ptr.cast_mut(), length as usize) - 497 | } - 498 | } - 499 | } - | - 500 | /// Get the name of the node kind for the given numerical id. - 501 | #[doc(alias = "ts_language_symbol_name")] - 502 | #[must_use] - 503 | pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> { - 504 | let ptr = unsafe { ffi::ts_language_symbol_name(self.0, id) }; - 505 | (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - 506 | } - | - 507 | /// Get the numeric id for the given node kind. - 508 | #[doc(alias = "ts_language_symbol_for_name")] - 509 | #[must_use] - 510 | pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 { - 511 | unsafe { - 512 | ffi::ts_language_symbol_for_name( - 513 | self.0, - 514 | kind.as_bytes().as_ptr().cast::(), - 515 | kind.len() as u32, - 516 | named, - 517 | ) - 518 | } - 519 | } - | - 520 | /// Check if the node type for the given numerical id is named (as opposed - 521 | /// to an anonymous node type). - 522 | #[must_use] - 523 | pub fn node_kind_is_named(&self, id: u16) -> bool { - 524 | unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolTypeRegular } - 525 | } - | - 526 | /// Check if the node type for the given numerical id is visible (as opposed - 527 | /// to a hidden node type). - 528 | #[must_use] - 529 | pub fn node_kind_is_visible(&self, id: u16) -> bool { - 530 | unsafe { ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolTypeAnonymous } - 531 | } - | - 532 | /// Check if the node type for the given numerical id is a supertype. - 533 | #[must_use] - 534 | pub fn node_kind_is_supertype(&self, id: u16) -> bool { - 535 | unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolTypeSupertype } - 536 | } - | - 537 | /// Get the number of distinct field names in this language. - 538 | #[doc(alias = "ts_language_field_count")] - 539 | #[must_use] - 540 | pub fn field_count(&self) -> usize { - 541 | unsafe { ffi::ts_language_field_count(self.0) as usize } - 542 | } - | - 543 | /// Get the field name for the given numerical id. - 544 | #[doc(alias = "ts_language_field_name_for_id")] - 545 | #[must_use] - 546 | pub fn field_name_for_id(&self, field_id: u16) -> Option<&'static str> { - 547 | let ptr = unsafe { ffi::ts_language_field_name_for_id(self.0, field_id) }; - 548 | (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - 549 | } - | - 550 | /// Get the numerical id for the given field name. - 551 | #[doc(alias = "ts_language_field_id_for_name")] - 552 | #[must_use] - 553 | pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { - 554 | let field_name = field_name.as_ref(); - 555 | let id = unsafe { - 556 | ffi::ts_language_field_id_for_name( - 557 | self.0, - 558 | field_name.as_ptr().cast::(), - 559 | field_name.len() as u32, - 560 | ) - 561 | }; - 562 | FieldId::new(id) - 563 | } - | - 564 | /// Get the next parse state. Combine this with - 565 | /// [`lookahead_iterator`](Language::lookahead_iterator) to - 566 | /// generate completion suggestions or valid symbols in error nodes. - 567 | /// - 568 | /// Example: - 569 | /// ```ignore - 570 | /// let state = language.next_state(node.parse_state(), node.grammar_id()); - 571 | /// ``` - 572 | #[doc(alias = "ts_language_next_state")] - 573 | #[must_use] - 574 | pub fn next_state(&self, state: u16, id: u16) -> u16 { - 575 | unsafe { ffi::ts_language_next_state(self.0, state, id) } - 576 | } - | - 577 | /// Create a new lookahead iterator for this language and parse state. - 578 | /// - 579 | /// This returns `None` if state is invalid for this language. - 580 | /// - 581 | /// Iterating [`LookaheadIterator`] will yield valid symbols in the given - 582 | /// parse state. Newly created lookahead iterators will return the `ERROR` - 583 | /// symbol from [`LookaheadIterator::current_symbol`]. - 584 | /// - 585 | /// Lookahead iterators can be useful to generate suggestions and improve - 586 | /// syntax error diagnostics. To get symbols valid in an `ERROR` node, use the - 587 | /// lookahead iterator on its first leaf node state. For `MISSING` nodes, a - 588 | /// lookahead iterator created on the previous non-extra leaf node may be - 589 | /// appropriate. - 590 | #[doc(alias = "ts_lookahead_iterator_new")] - 591 | #[must_use] - 592 | pub fn lookahead_iterator(&self, state: u16) -> Option { - 593 | let ptr = unsafe { ffi::ts_lookahead_iterator_new(self.0, state) }; - 594 | (!ptr.is_null()).then(|| unsafe { LookaheadIterator::from_raw(ptr) }) - 595 | } - 596 | } - | - 597 | impl From for Language { - 598 | fn from(value: LanguageFn) -> Self { - 599 | Self::new(value) - 600 | } - 601 | } - | - 602 | impl Clone for Language { - 603 | fn clone(&self) -> Self { - 604 | unsafe { Self(ffi::ts_language_copy(self.0)) } - 605 | } - 606 | } - | - 607 | impl Drop for Language { - 608 | fn drop(&mut self) { - 609 | unsafe { ffi::ts_language_delete(self.0) } - 610 | } - 611 | } - | - 612 | impl Deref for LanguageRef<'_> { - 613 | type Target = Language; - | - 614 | fn deref(&self) -> &Self::Target { - 615 | unsafe { &*(core::ptr::addr_of!(self.0).cast::()) } - 616 | } - 617 | } - | - 618 | impl Default for Parser { - 619 | fn default() -> Self { - 620 | Self::new() - 621 | } - 622 | } - | - 623 | impl Parser { - 624 | /// Create a new parser. - 625 | #[doc(alias = "ts_parser_new")] - 626 | #[must_use] - 627 | pub fn new() -> Self { - 628 | unsafe { - 629 | let parser = ffi::ts_parser_new(); - 630 | Self(NonNull::new_unchecked(parser)) - 631 | } - 632 | } - | - 633 | /// Set the language that the parser should use for parsing. - 634 | /// - 635 | /// Returns a Result indicating whether or not the language was successfully - 636 | /// assigned. True means assignment succeeded. False means there was a - 637 | /// version mismatch: the language was generated with an incompatible - 638 | /// version of the Tree-sitter CLI. Check the language's version using - 639 | /// [`Language::version`] and compare it to this library's - 640 | /// [`LANGUAGE_VERSION`] and [`MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. - 641 | #[doc(alias = "ts_parser_set_language")] - 642 | pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> { - 643 | let version = language.abi_version(); - 644 | if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) { - 645 | #[allow(unused_variables)] - 646 | let success = unsafe { ffi::ts_parser_set_language(self.0.as_ptr(), language.0) }; - 647 | #[cfg(feature = "wasm")] - 648 | if !success { - 649 | return Err(LanguageError::Wasm); - 650 | } - 651 | Ok(()) - 652 | } else { - 653 | Err(LanguageError::Version(version)) - 654 | } - 655 | } - | - 656 | /// Get the parser's current language. - 657 | #[doc(alias = "ts_parser_language")] - 658 | #[must_use] - 659 | pub fn language(&self) -> Option> { - 660 | let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) }; - 661 | (!ptr.is_null()).then_some(LanguageRef(ptr, PhantomData)) - 662 | } - | - 663 | /// Get the parser's current logger. - 664 | #[doc(alias = "ts_parser_logger")] - 665 | #[must_use] - 666 | pub fn logger(&self) -> Option<&Logger> { - 667 | let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; - 668 | unsafe { logger.payload.cast::().as_ref() } - 669 | } - | - 670 | /// Set the logging callback that the parser should use during parsing. - 671 | #[doc(alias = "ts_parser_set_logger")] - 672 | pub fn set_logger(&mut self, logger: Option) { - 673 | let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; - 674 | if !prev_logger.payload.is_null() { - 675 | drop(unsafe { Box::from_raw(prev_logger.payload.cast::()) }); - 676 | } - | - 677 | let c_logger = if let Some(logger) = logger { - 678 | let container = Box::new(logger); - | - 679 | unsafe extern "C" fn log( - 680 | payload: *mut c_void, - 681 | c_log_type: ffi::TSLogType, - 682 | c_message: *const c_char, - 683 | ) { - 684 | let callback = payload.cast::().as_mut().unwrap(); - 685 | if let Ok(message) = CStr::from_ptr(c_message).to_str() { - 686 | let log_type = if c_log_type == ffi::TSLogTypeParse { - 687 | LogType::Parse - 688 | } else { - 689 | LogType::Lex - 690 | }; - 691 | callback(log_type, message); - 692 | } - 693 | } - | - 694 | let raw_container = Box::into_raw(container); - | - 695 | ffi::TSLogger { - 696 | payload: raw_container.cast::(), - 697 | log: Some(log), - 698 | } - 699 | } else { - 700 | ffi::TSLogger { - 701 | payload: ptr::null_mut(), - 702 | log: None, - 703 | } - 704 | }; - | - 705 | unsafe { ffi::ts_parser_set_logger(self.0.as_ptr(), c_logger) }; - 706 | } - | - 707 | /// Set the destination to which the parser should write debugging graphs - 708 | /// during parsing. The graphs are formatted in the DOT language. You may - 709 | /// want to pipe these graphs directly to a `dot(1)` process in order to - 710 | /// generate SVG output. - 711 | #[doc(alias = "ts_parser_print_dot_graphs")] - 712 | #[cfg(not(target_os = "wasi"))] - 713 | #[cfg(feature = "std")] - 714 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] - 715 | pub fn print_dot_graphs( - 716 | &mut self, - 717 | #[cfg(unix)] file: &impl AsRawFd, - 718 | #[cfg(windows)] file: &impl AsRawHandle, - 719 | ) { - 720 | #[cfg(unix)] - 721 | { - 722 | let fd = file.as_raw_fd(); - 723 | unsafe { - 724 | ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(fd)); - 725 | } - 726 | } - | - 727 | #[cfg(windows)] - 728 | { - 729 | let handle = file.as_raw_handle(); - 730 | unsafe { - 731 | ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(handle)); - 732 | } - 733 | } - 734 | } - | - 735 | /// Stop the parser from printing debugging graphs while parsing. - 736 | #[doc(alias = "ts_parser_print_dot_graphs")] - 737 | #[cfg(not(target_os = "wasi"))] - 738 | #[cfg(feature = "std")] - 739 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] - 740 | pub fn stop_printing_dot_graphs(&mut self) { - 741 | unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), -1) } - 742 | } - | - 743 | /// Parse a slice of UTF8 text. - 744 | /// - 745 | /// # Arguments: - 746 | /// * `text` The UTF8-encoded text to parse. - 747 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the - 748 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match - 749 | /// the new text using [`Tree::edit`]. - 750 | /// - 751 | /// Returns a [`Tree`] if parsing succeeded, or `None` if: - 752 | /// * The parser has not yet had a language assigned with [`Parser::set_language`] - 753 | #[doc(alias = "ts_parser_parse")] - 754 | pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { - 755 | let bytes = text.as_ref(); - 756 | let len = bytes.len(); - 757 | self.parse_with_options( - 758 | &mut |i, _| (i < len).then(|| &bytes[i..]).unwrap_or_default(), - 759 | old_tree, - 760 | None, - 761 | ) - 762 | } - | - 763 | /// Parse text provided in chunks by a callback. - 764 | /// - 765 | /// # Arguments: - 766 | /// * `callback` A function that takes a byte offset and position and returns a slice of - 767 | /// UTF8-encoded text starting at that byte offset and position. The slices can be of any - 768 | /// length. If the given position is at the end of the text, the callback should return an - 769 | /// empty slice. - 770 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the - 771 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match - 772 | /// the new text using [`Tree::edit`]. - 773 | /// * `options` Options for parsing the text. This can be used to set a progress callback. - 774 | pub fn parse_with_options, F: FnMut(usize, Point) -> T>( - 775 | &mut self, - 776 | callback: &mut F, - 777 | old_tree: Option<&Tree>, - 778 | options: Option, - 779 | ) -> Option { - 780 | type Payload<'a, F, T> = (&'a mut F, Option); - | - 781 | // This C function is passed to Tree-sitter as the progress callback. - 782 | unsafe extern "C" fn progress(state: *mut ffi::TSParseState) -> bool { - 783 | let callback = (*state) - 784 | .payload - 785 | .cast::() - 786 | .as_mut() - 787 | .unwrap(); - 788 | match callback(&ParseState::from_raw(state)) { - 789 | ControlFlow::Continue(()) => false, - 790 | ControlFlow::Break(()) => true, - 791 | } - 792 | } - | - 793 | // This C function is passed to Tree-sitter as the input callback. - 794 | unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( - 795 | payload: *mut c_void, - 796 | byte_offset: u32, - 797 | position: ffi::TSPoint, - 798 | bytes_read: *mut u32, - 799 | ) -> *const c_char { - 800 | let (callback, text) = payload.cast::>().as_mut().unwrap(); - 801 | *text = Some(callback(byte_offset as usize, position.into())); - 802 | let slice = text.as_ref().unwrap().as_ref(); - 803 | *bytes_read = slice.len() as u32; - 804 | slice.as_ptr().cast::() - 805 | } - | - 806 | let empty_options = ffi::TSParseOptions { - 807 | payload: ptr::null_mut(), - 808 | progress_callback: None, - 809 | }; - | - 810 | let mut callback_ptr; - 811 | let parse_options = if let Some(options) = options { - 812 | if let Some(cb) = options.progress_callback { - 813 | callback_ptr = cb; - 814 | ffi::TSParseOptions { - 815 | payload: core::ptr::addr_of_mut!(callback_ptr).cast::(), - 816 | progress_callback: Some(progress), - 817 | } - 818 | } else { - 819 | empty_options - 820 | } - 821 | } else { - 822 | empty_options - 823 | }; - | - 824 | // A pointer to this payload is passed on every call to the `read` C function. - 825 | // The payload contains two things: - 826 | // 1. A reference to the rust `callback`. - 827 | // 2. The text that was returned from the previous call to `callback`. This allows the - 828 | // callback to return owned values like vectors. - 829 | let mut payload: Payload = (callback, None); - | - 830 | let c_input = ffi::TSInput { - 831 | payload: ptr::addr_of_mut!(payload).cast::(), - 832 | read: Some(read::), - 833 | encoding: ffi::TSInputEncodingUTF8, - 834 | decode: None, - 835 | }; - | - 836 | let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); - 837 | unsafe { - 838 | let c_new_tree = ffi::ts_parser_parse_with_options( - 839 | self.0.as_ptr(), - 840 | c_old_tree, - 841 | c_input, - 842 | parse_options, - 843 | ); - | - 844 | NonNull::new(c_new_tree).map(Tree) - 845 | } - 846 | } - | - 847 | /// Parse a slice of UTF16 little-endian text. - 848 | /// - 849 | /// # Arguments: - 850 | /// * `text` The UTF16-encoded text to parse. - 851 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the - 852 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match - 853 | /// the new text using [`Tree::edit`]. - 854 | pub fn parse_utf16_le( - 855 | &mut self, - 856 | input: impl AsRef<[u16]>, - 857 | old_tree: Option<&Tree>, - 858 | ) -> Option { - 859 | let code_points = input.as_ref(); - 860 | let len = code_points.len(); - 861 | self.parse_utf16_le_with_options( - 862 | &mut |i, _| (i < len).then(|| &code_points[i..]).unwrap_or_default(), - 863 | old_tree, - 864 | None, - 865 | ) - 866 | } - | - 867 | /// Parse UTF16 little-endian text provided in chunks by a callback. - 868 | /// - 869 | /// # Arguments: - 870 | /// * `callback` A function that takes a code point offset and position and returns a slice of - 871 | /// UTF16-encoded text starting at that byte offset and position. The slices can be of any - 872 | /// length. If the given position is at the end of the text, the callback should return an - 873 | /// empty slice. - 874 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the - 875 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match - 876 | /// the new text using [`Tree::edit`]. - 877 | /// * `options` Options for parsing the text. This can be used to set a progress callback. - 878 | pub fn parse_utf16_le_with_options, F: FnMut(usize, Point) -> T>( - 879 | &mut self, - 880 | callback: &mut F, - 881 | old_tree: Option<&Tree>, - 882 | options: Option, - 883 | ) -> Option { - 884 | type Payload<'a, F, T> = (&'a mut F, Option); - | - 885 | unsafe extern "C" fn progress(state: *mut ffi::TSParseState) -> bool { - 886 | let callback = (*state) - 887 | .payload - 888 | .cast::() - 889 | .as_mut() - 890 | .unwrap(); - 891 | match callback(&ParseState::from_raw(state)) { - 892 | ControlFlow::Continue(()) => false, - 893 | ControlFlow::Break(()) => true, - 894 | } - 895 | } - | - 896 | // This C function is passed to Tree-sitter as the input callback. - 897 | unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( - 898 | payload: *mut c_void, - 899 | byte_offset: u32, - 900 | position: ffi::TSPoint, - 901 | bytes_read: *mut u32, - 902 | ) -> *const c_char { - 903 | let (callback, text) = payload.cast::>().as_mut().unwrap(); - 904 | *text = Some(callback( - 905 | (byte_offset / 2) as usize, - 906 | Point { - 907 | row: position.row as usize, - 908 | column: position.column as usize / 2, - 909 | }, - 910 | )); - 911 | let slice = text.as_ref().unwrap().as_ref(); - 912 | *bytes_read = slice.len() as u32 * 2; - 913 | slice.as_ptr().cast::() - 914 | } - | - 915 | let empty_options = ffi::TSParseOptions { - 916 | payload: ptr::null_mut(), - 917 | progress_callback: None, - 918 | }; - | - 919 | let mut callback_ptr; - 920 | let parse_options = if let Some(options) = options { - 921 | if let Some(cb) = options.progress_callback { - 922 | callback_ptr = cb; - 923 | ffi::TSParseOptions { - 924 | payload: core::ptr::addr_of_mut!(callback_ptr).cast::(), - 925 | progress_callback: Some(progress), - 926 | } - 927 | } else { - 928 | empty_options - 929 | } - 930 | } else { - 931 | empty_options - 932 | }; - | - 933 | // A pointer to this payload is passed on every call to the `read` C function. - 934 | // The payload contains two things: - 935 | // 1. A reference to the rust `callback`. - 936 | // 2. The text that was returned from the previous call to `callback`. This allows the - 937 | // callback to return owned values like vectors. - 938 | let mut payload: Payload = (callback, None); - | - 939 | let c_input = ffi::TSInput { - 940 | payload: core::ptr::addr_of_mut!(payload).cast::(), - 941 | read: Some(read::), - 942 | encoding: ffi::TSInputEncodingUTF16LE, - 943 | decode: None, - 944 | }; - | - 945 | let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); - 946 | unsafe { - 947 | let c_new_tree = ffi::ts_parser_parse_with_options( - 948 | self.0.as_ptr(), - 949 | c_old_tree, - 950 | c_input, - 951 | parse_options, - 952 | ); - | - 953 | NonNull::new(c_new_tree).map(Tree) - 954 | } - 955 | } - | - 956 | /// Parse a slice of UTF16 big-endian text. - 957 | /// - 958 | /// # Arguments: - 959 | /// * `text` The UTF16-encoded text to parse. - 960 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the - 961 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match - 962 | /// the new text using [`Tree::edit`]. - 963 | pub fn parse_utf16_be( - 964 | &mut self, - 965 | input: impl AsRef<[u16]>, - 966 | old_tree: Option<&Tree>, - 967 | ) -> Option { - 968 | let code_points = input.as_ref(); - 969 | let len = code_points.len(); - 970 | self.parse_utf16_be_with_options( - 971 | &mut |i, _| if i < len { &code_points[i..] } else { &[] }, - 972 | old_tree, - 973 | None, - 974 | ) - 975 | } - | - 976 | /// Parse UTF16 big-endian text provided in chunks by a callback. - 977 | /// - 978 | /// # Arguments: - 979 | /// * `callback` A function that takes a code point offset and position and returns a slice of - 980 | /// UTF16-encoded text starting at that byte offset and position. The slices can be of any - 981 | /// length. If the given position is at the end of the text, the callback should return an - 982 | /// empty slice. - 983 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the - 984 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match - 985 | /// the new text using [`Tree::edit`]. - 986 | /// * `options` Options for parsing the text. This can be used to set a progress callback. - 987 | pub fn parse_utf16_be_with_options, F: FnMut(usize, Point) -> T>( - 988 | &mut self, - 989 | callback: &mut F, - 990 | old_tree: Option<&Tree>, - 991 | options: Option, - 992 | ) -> Option { - 993 | type Payload<'a, F, T> = (&'a mut F, Option); - | - 994 | // This C function is passed to Tree-sitter as the progress callback. - 995 | unsafe extern "C" fn progress(state: *mut ffi::TSParseState) -> bool { - 996 | let callback = (*state) - 997 | .payload - 998 | .cast::() - 999 | .as_mut() -1000 | .unwrap(); -1001 | match callback(&ParseState::from_raw(state)) { -1002 | ControlFlow::Continue(()) => false, -1003 | ControlFlow::Break(()) => true, -1004 | } -1005 | } - | -1006 | // This C function is passed to Tree-sitter as the input callback. -1007 | unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( -1008 | payload: *mut c_void, -1009 | byte_offset: u32, -1010 | position: ffi::TSPoint, -1011 | bytes_read: *mut u32, -1012 | ) -> *const c_char { -1013 | let (callback, text) = payload.cast::>().as_mut().unwrap(); -1014 | *text = Some(callback( -1015 | (byte_offset / 2) as usize, -1016 | Point { -1017 | row: position.row as usize, -1018 | column: position.column as usize / 2, -1019 | }, -1020 | )); -1021 | let slice = text.as_ref().unwrap().as_ref(); -1022 | *bytes_read = slice.len() as u32 * 2; -1023 | slice.as_ptr().cast::() -1024 | } - | -1025 | let empty_options = ffi::TSParseOptions { -1026 | payload: ptr::null_mut(), -1027 | progress_callback: None, -1028 | }; - | -1029 | let mut callback_ptr; -1030 | let parse_options = if let Some(options) = options { -1031 | if let Some(cb) = options.progress_callback { -1032 | callback_ptr = cb; -1033 | ffi::TSParseOptions { -1034 | payload: core::ptr::addr_of_mut!(callback_ptr).cast::(), -1035 | progress_callback: Some(progress), -1036 | } -1037 | } else { -1038 | empty_options -1039 | } -1040 | } else { -1041 | empty_options -1042 | }; - | -1043 | // A pointer to this payload is passed on every call to the `read` C function. -1044 | // The payload contains two things: -1045 | // 1. A reference to the rust `callback`. -1046 | // 2. The text that was returned from the previous call to `callback`. This allows the -1047 | // callback to return owned values like vectors. -1048 | let mut payload: Payload = (callback, None); - | -1049 | let c_input = ffi::TSInput { -1050 | payload: core::ptr::addr_of_mut!(payload).cast::(), -1051 | read: Some(read::), -1052 | encoding: ffi::TSInputEncodingUTF16BE, -1053 | decode: None, -1054 | }; - | -1055 | let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); -1056 | unsafe { -1057 | let c_new_tree = ffi::ts_parser_parse_with_options( -1058 | self.0.as_ptr(), -1059 | c_old_tree, -1060 | c_input, -1061 | parse_options, -1062 | ); - | -1063 | NonNull::new(c_new_tree).map(Tree) -1064 | } -1065 | } - | -1066 | /// Parse text provided in chunks by a callback using a custom encoding. -1067 | /// This is useful for parsing text in encodings that are not UTF-8 or UTF-16. -1068 | /// -1069 | /// # Arguments: -1070 | /// * `callback` A function that takes a byte offset and position and returns a slice of text -1071 | /// starting at that byte offset and position. The slices can be of any length. If the given -1072 | /// position is at the end of the text, the callback should return an empty slice. -1073 | /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the -1074 | /// document has changed since `old_tree` was created, then you must edit `old_tree` to match -1075 | /// the new text using [`Tree::edit`]. -1076 | /// * `options` Options for parsing the text. This can be used to set a progress callback. -1077 | /// -1078 | /// Additionally, you must set the generic parameter [`D`] to a type that implements the -1079 | /// [`Decode`] trait. This trait has a single method, [`decode`](Decode::decode), which takes a -1080 | /// slice of bytes and returns a tuple of the code point and the number of bytes consumed. -1081 | /// The `decode` method should return `-1` for the code point if decoding fails. -1082 | pub fn parse_custom_encoding, F: FnMut(usize, Point) -> T>( -1083 | &mut self, -1084 | callback: &mut F, -1085 | old_tree: Option<&Tree>, -1086 | options: Option, -1087 | ) -> Option { -1088 | type Payload<'a, F, T> = (&'a mut F, Option); - | -1089 | unsafe extern "C" fn progress(state: *mut ffi::TSParseState) -> bool { -1090 | let callback = (*state) -1091 | .payload -1092 | .cast::() -1093 | .as_mut() -1094 | .unwrap(); -1095 | match callback(&ParseState::from_raw(state)) { -1096 | ControlFlow::Continue(()) => false, -1097 | ControlFlow::Break(()) => true, -1098 | } -1099 | } - | -1100 | // At compile time, create a C-compatible callback that calls the custom `decode` method. -1101 | unsafe extern "C" fn decode_fn( -1102 | data: *const u8, -1103 | len: u32, -1104 | code_point: *mut i32, -1105 | ) -> u32 { -1106 | let (c, len) = D::decode(core::slice::from_raw_parts(data, len as usize)); -1107 | if let Some(code_point) = code_point.as_mut() { -1108 | *code_point = c; -1109 | } -1110 | len -1111 | } - | -1112 | // This C function is passed to Tree-sitter as the input callback. -1113 | unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( -1114 | payload: *mut c_void, -1115 | byte_offset: u32, -1116 | position: ffi::TSPoint, -1117 | bytes_read: *mut u32, -1118 | ) -> *const c_char { -1119 | let (callback, text) = payload.cast::>().as_mut().unwrap(); -1120 | *text = Some(callback(byte_offset as usize, position.into())); -1121 | let slice = text.as_ref().unwrap().as_ref(); -1122 | *bytes_read = slice.len() as u32; -1123 | slice.as_ptr().cast::() -1124 | } - | -1125 | let empty_options = ffi::TSParseOptions { -1126 | payload: ptr::null_mut(), -1127 | progress_callback: None, -1128 | }; - | -1129 | let mut callback_ptr; -1130 | let parse_options = if let Some(options) = options { -1131 | if let Some(cb) = options.progress_callback { -1132 | callback_ptr = cb; -1133 | ffi::TSParseOptions { -1134 | payload: core::ptr::addr_of_mut!(callback_ptr).cast::(), -1135 | progress_callback: Some(progress), -1136 | } -1137 | } else { -1138 | empty_options -1139 | } -1140 | } else { -1141 | empty_options -1142 | }; - | -1143 | // A pointer to this payload is passed on every call to the `read` C function. -1144 | // The payload contains two things: -1145 | // 1. A reference to the rust `callback`. -1146 | // 2. The text that was returned from the previous call to `callback`. This allows the -1147 | // callback to return owned values like vectors. -1148 | let mut payload: Payload = (callback, None); - | -1149 | let c_input = ffi::TSInput { -1150 | payload: core::ptr::addr_of_mut!(payload).cast::(), -1151 | read: Some(read::), -1152 | encoding: ffi::TSInputEncodingCustom, -1153 | // Use this custom decode callback -1154 | decode: Some(decode_fn::), -1155 | }; - | -1156 | let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); -1157 | unsafe { -1158 | let c_new_tree = ffi::ts_parser_parse_with_options( -1159 | self.0.as_ptr(), -1160 | c_old_tree, -1161 | c_input, -1162 | parse_options, -1163 | ); - | -1164 | NonNull::new(c_new_tree).map(Tree) -1165 | } -1166 | } - | -1167 | /// Instruct the parser to start the next parse from the beginning. -1168 | /// -1169 | /// If the parser previously failed because of a callback, then by default, -1170 | /// it will resume where it left off on the next call to [`parse`](Parser::parse) -1171 | /// or other parsing functions. If you don't want to resume, and instead intend to use -1172 | /// this parser to parse some other document, you must call `reset` first. -1173 | #[doc(alias = "ts_parser_reset")] -1174 | pub fn reset(&mut self) { -1175 | unsafe { ffi::ts_parser_reset(self.0.as_ptr()) } -1176 | } - | -1177 | /// Set the ranges of text that the parser should include when parsing. -1178 | /// -1179 | /// By default, the parser will always include entire documents. This -1180 | /// function allows you to parse only a *portion* of a document but -1181 | /// still return a syntax tree whose ranges match up with the document -1182 | /// as a whole. You can also pass multiple disjoint ranges. -1183 | /// -1184 | /// If `ranges` is empty, then the entire document will be parsed. -1185 | /// Otherwise, the given ranges must be ordered from earliest to latest -1186 | /// in the document, and they must not overlap. That is, the following -1187 | /// must hold for all `i` < `length - 1`: -1188 | /// ```text -1189 | /// ranges[i].end_byte <= ranges[i + 1].start_byte -1190 | /// ``` -1191 | /// If this requirement is not satisfied, method will return -1192 | /// [`IncludedRangesError`] error with an offset in the passed ranges -1193 | /// slice pointing to a first incorrect range. -1194 | #[doc(alias = "ts_parser_set_included_ranges")] -1195 | pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), IncludedRangesError> { -1196 | let ts_ranges = ranges.iter().copied().map(Into::into).collect::>(); -1197 | let result = unsafe { -1198 | ffi::ts_parser_set_included_ranges( -1199 | self.0.as_ptr(), -1200 | ts_ranges.as_ptr(), -1201 | ts_ranges.len() as u32, -1202 | ) -1203 | }; - | -1204 | if result { -1205 | Ok(()) -1206 | } else { -1207 | let mut prev_end_byte = 0; -1208 | for (i, range) in ranges.iter().enumerate() { -1209 | if range.start_byte < prev_end_byte || range.end_byte < range.start_byte { -1210 | return Err(IncludedRangesError(i)); -1211 | } -1212 | prev_end_byte = range.end_byte; -1213 | } -1214 | Err(IncludedRangesError(0)) -1215 | } -1216 | } - | -1217 | /// Get the ranges of text that the parser will include when parsing. -1218 | #[doc(alias = "ts_parser_included_ranges")] -1219 | #[must_use] -1220 | pub fn included_ranges(&self) -> Vec { -1221 | let mut count = 0u32; -1222 | unsafe { -1223 | let ptr = -1224 | ffi::ts_parser_included_ranges(self.0.as_ptr(), core::ptr::addr_of_mut!(count)); -1225 | let ranges = slice::from_raw_parts(ptr, count as usize); -1226 | let result = ranges.iter().copied().map(Into::into).collect(); -1227 | result -1228 | } -1229 | } -1230 | } - | -1231 | impl Drop for Parser { -1232 | fn drop(&mut self) { -1233 | #[cfg(feature = "std")] -1234 | #[cfg(not(target_os = "wasi"))] -1235 | { -1236 | self.stop_printing_dot_graphs(); -1237 | } -1238 | self.set_logger(None); -1239 | unsafe { ffi::ts_parser_delete(self.0.as_ptr()) } -1240 | } -1241 | } - | -1242 | #[cfg(windows)] -1243 | extern "C" { -1244 | fn _open_osfhandle(osfhandle: isize, flags: core::ffi::c_int) -> core::ffi::c_int; -1245 | } - | -1246 | impl Tree { -1247 | /// Get the root node of the syntax tree. -1248 | #[doc(alias = "ts_tree_root_node")] -1249 | #[must_use] -1250 | pub fn root_node(&self) -> Node { -1251 | Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap() -1252 | } - | -1253 | /// Get the root node of the syntax tree, but with its position shifted -1254 | /// forward by the given offset. -1255 | #[doc(alias = "ts_tree_root_node_with_offset")] -1256 | #[must_use] -1257 | pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node { -1258 | Node::new(unsafe { -1259 | ffi::ts_tree_root_node_with_offset( -1260 | self.0.as_ptr(), -1261 | offset_bytes as u32, -1262 | offset_extent.into(), -1263 | ) -1264 | }) -1265 | .unwrap() -1266 | } - | -1267 | /// Get the language that was used to parse the syntax tree. -1268 | #[doc(alias = "ts_tree_language")] -1269 | #[must_use] -1270 | pub fn language(&self) -> LanguageRef { -1271 | LanguageRef( -1272 | unsafe { ffi::ts_tree_language(self.0.as_ptr()) }, -1273 | PhantomData, -1274 | ) -1275 | } - | -1276 | /// Edit the syntax tree to keep it in sync with source code that has been -1277 | /// edited. -1278 | /// -1279 | /// You must describe the edit both in terms of byte offsets and in terms of -1280 | /// row/column coordinates. -1281 | #[doc(alias = "ts_tree_edit")] -1282 | pub fn edit(&mut self, edit: &InputEdit) { -1283 | let edit = edit.into(); -1284 | unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) }; -1285 | } - | -1286 | /// Create a new [`TreeCursor`] starting from the root of the tree. -1287 | #[must_use] -1288 | pub fn walk(&self) -> TreeCursor { -1289 | self.root_node().walk() -1290 | } - | -1291 | /// Compare this old edited syntax tree to a new syntax tree representing -1292 | /// the same document, returning a sequence of ranges whose syntactic -1293 | /// structure has changed. -1294 | /// -1295 | /// For this to work correctly, this syntax tree must have been edited such -1296 | /// that its ranges match up to the new tree. Generally, you'll want to -1297 | /// call this method right after calling one of the [`Parser::parse`] -1298 | /// functions. Call it on the old tree that was passed to parse, and -1299 | /// pass the new tree that was returned from `parse`. -1300 | #[doc(alias = "ts_tree_get_changed_ranges")] -1301 | #[must_use] -1302 | pub fn changed_ranges(&self, other: &Self) -> impl ExactSizeIterator { -1303 | let mut count = 0u32; -1304 | unsafe { -1305 | let ptr = ffi::ts_tree_get_changed_ranges( -1306 | self.0.as_ptr(), -1307 | other.0.as_ptr(), -1308 | core::ptr::addr_of_mut!(count), -1309 | ); -1310 | util::CBufferIter::new(ptr, count as usize).map(Into::into) -1311 | } -1312 | } - | -1313 | /// Get the included ranges that were used to parse the syntax tree. -1314 | #[doc(alias = "ts_tree_included_ranges")] -1315 | #[must_use] -1316 | pub fn included_ranges(&self) -> Vec { -1317 | let mut count = 0u32; -1318 | unsafe { -1319 | let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), core::ptr::addr_of_mut!(count)); -1320 | let ranges = slice::from_raw_parts(ptr, count as usize); -1321 | let result = ranges.iter().copied().map(Into::into).collect(); -1322 | (FREE_FN)(ptr.cast::()); -1323 | result -1324 | } -1325 | } - | -1326 | /// Print a graph of the tree to the given file descriptor. -1327 | /// The graph is formatted in the DOT language. You may want to pipe this -1328 | /// graph directly to a `dot(1)` process in order to generate SVG -1329 | /// output. -1330 | #[doc(alias = "ts_tree_print_dot_graph")] -1331 | #[cfg(not(target_os = "wasi"))] -1332 | #[cfg(feature = "std")] -1333 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] -1334 | pub fn print_dot_graph( -1335 | &self, -1336 | #[cfg(unix)] file: &impl AsRawFd, -1337 | #[cfg(windows)] file: &impl AsRawHandle, -1338 | ) { -1339 | #[cfg(unix)] -1340 | { -1341 | let fd = file.as_raw_fd(); -1342 | unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } -1343 | } - | -1344 | #[cfg(windows)] -1345 | { -1346 | let handle = file.as_raw_handle(); -1347 | let fd = unsafe { _open_osfhandle(handle as isize, 0) }; -1348 | unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } -1349 | } -1350 | } -1351 | } - | -1352 | impl fmt::Debug for Tree { -1353 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -1354 | write!(f, "{{Tree {:?}}}", self.root_node()) -1355 | } -1356 | } - | -1357 | impl Drop for Tree { -1358 | fn drop(&mut self) { -1359 | unsafe { ffi::ts_tree_delete(self.0.as_ptr()) } -1360 | } -1361 | } - | -1362 | impl Clone for Tree { -1363 | fn clone(&self) -> Self { -1364 | unsafe { Self(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } -1365 | } -1366 | } - | -1367 | impl<'tree> Node<'tree> { -1368 | fn new(node: ffi::TSNode) -> Option { -1369 | (!node.id.is_null()).then_some(Node(node, PhantomData)) -1370 | } - | -1371 | /// Get a numeric id for this node that is unique. -1372 | /// -1373 | /// Within a given syntax tree, no two nodes have the same id. However: -1374 | /// -1375 | /// - If a new tree is created based on an older tree, and a node from the old tree is reused in -1376 | /// the process, then that node will have the same id in both trees. -1377 | /// -1378 | /// - A node not marked as having changes does not guarantee it was reused. -1379 | /// -1380 | /// - If a node is marked as having changed in the old tree, it will not be reused. -1381 | #[must_use] -1382 | pub fn id(&self) -> usize { -1383 | self.0.id as usize -1384 | } - | -1385 | /// Get this node's type as a numerical id. -1386 | #[doc(alias = "ts_node_symbol")] -1387 | #[must_use] -1388 | pub fn kind_id(&self) -> u16 { -1389 | unsafe { ffi::ts_node_symbol(self.0) } -1390 | } - | -1391 | /// Get the node's type as a numerical id as it appears in the grammar -1392 | /// ignoring aliases. -1393 | #[doc(alias = "ts_node_grammar_symbol")] -1394 | #[must_use] -1395 | pub fn grammar_id(&self) -> u16 { -1396 | unsafe { ffi::ts_node_grammar_symbol(self.0) } -1397 | } - | -1398 | /// Get this node's type as a string. -1399 | #[doc(alias = "ts_node_type")] -1400 | #[must_use] -1401 | pub fn kind(&self) -> &'static str { -1402 | unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } -1403 | .to_str() -1404 | .unwrap() -1405 | } - | -1406 | /// Get this node's symbol name as it appears in the grammar ignoring -1407 | /// aliases as a string. -1408 | #[doc(alias = "ts_node_grammar_type")] -1409 | #[must_use] -1410 | pub fn grammar_name(&self) -> &'static str { -1411 | unsafe { CStr::from_ptr(ffi::ts_node_grammar_type(self.0)) } -1412 | .to_str() -1413 | .unwrap() -1414 | } - | -1415 | /// Get the [`Language`] that was used to parse this node's syntax tree. -1416 | #[doc(alias = "ts_node_language")] -1417 | #[must_use] -1418 | pub fn language(&self) -> LanguageRef { -1419 | LanguageRef(unsafe { ffi::ts_node_language(self.0) }, PhantomData) -1420 | } - | -1421 | /// Check if this node is *named*. -1422 | /// -1423 | /// Named nodes correspond to named rules in the grammar, whereas -1424 | /// *anonymous* nodes correspond to string literals in the grammar. -1425 | #[doc(alias = "ts_node_is_named")] -1426 | #[must_use] -1427 | pub fn is_named(&self) -> bool { -1428 | unsafe { ffi::ts_node_is_named(self.0) } -1429 | } - | -1430 | /// Check if this node is *extra*. -1431 | /// -1432 | /// Extra nodes represent things like comments, which are not required by the -1433 | /// grammar, but can appear anywhere. -1434 | #[doc(alias = "ts_node_is_extra")] -1435 | #[must_use] -1436 | pub fn is_extra(&self) -> bool { -1437 | unsafe { ffi::ts_node_is_extra(self.0) } -1438 | } - | -1439 | /// Check if this node has been edited. -1440 | #[doc(alias = "ts_node_has_changes")] -1441 | #[must_use] -1442 | pub fn has_changes(&self) -> bool { -1443 | unsafe { ffi::ts_node_has_changes(self.0) } -1444 | } - | -1445 | /// Check if this node represents a syntax error or contains any syntax -1446 | /// errors anywhere within it. -1447 | #[doc(alias = "ts_node_has_error")] -1448 | #[must_use] -1449 | pub fn has_error(&self) -> bool { -1450 | unsafe { ffi::ts_node_has_error(self.0) } -1451 | } - | -1452 | /// Check if this node represents a syntax error. -1453 | /// -1454 | /// Syntax errors represent parts of the code that could not be incorporated -1455 | /// into a valid syntax tree. -1456 | #[doc(alias = "ts_node_is_error")] -1457 | #[must_use] -1458 | pub fn is_error(&self) -> bool { -1459 | unsafe { ffi::ts_node_is_error(self.0) } -1460 | } - | -1461 | /// Get this node's parse state. -1462 | #[doc(alias = "ts_node_parse_state")] -1463 | #[must_use] -1464 | pub fn parse_state(&self) -> u16 { -1465 | unsafe { ffi::ts_node_parse_state(self.0) } -1466 | } - | -1467 | /// Get the parse state after this node. -1468 | #[doc(alias = "ts_node_next_parse_state")] -1469 | #[must_use] -1470 | pub fn next_parse_state(&self) -> u16 { -1471 | unsafe { ffi::ts_node_next_parse_state(self.0) } -1472 | } - | -1473 | /// Check if this node is *missing*. -1474 | /// -1475 | /// Missing nodes are inserted by the parser in order to recover from -1476 | /// certain kinds of syntax errors. -1477 | #[doc(alias = "ts_node_is_missing")] -1478 | #[must_use] -1479 | pub fn is_missing(&self) -> bool { -1480 | unsafe { ffi::ts_node_is_missing(self.0) } -1481 | } - | -1482 | /// Get the byte offset where this node starts. -1483 | #[doc(alias = "ts_node_start_byte")] -1484 | #[must_use] -1485 | pub fn start_byte(&self) -> usize { -1486 | unsafe { ffi::ts_node_start_byte(self.0) as usize } -1487 | } - | -1488 | /// Get the byte offset where this node ends. -1489 | #[doc(alias = "ts_node_end_byte")] -1490 | #[must_use] -1491 | pub fn end_byte(&self) -> usize { -1492 | unsafe { ffi::ts_node_end_byte(self.0) as usize } -1493 | } - | -1494 | /// Get the byte range of source code that this node represents. -1495 | #[must_use] -1496 | pub fn byte_range(&self) -> core::ops::Range { -1497 | self.start_byte()..self.end_byte() -1498 | } - | -1499 | /// Get the range of source code that this node represents, both in terms of -1500 | /// raw bytes and of row/column coordinates. -1501 | #[must_use] -1502 | pub fn range(&self) -> Range { -1503 | Range { -1504 | start_byte: self.start_byte(), -1505 | end_byte: self.end_byte(), -1506 | start_point: self.start_position(), -1507 | end_point: self.end_position(), -1508 | } -1509 | } - | -1510 | /// Get this node's start position in terms of rows and columns. -1511 | #[doc(alias = "ts_node_start_point")] -1512 | #[must_use] -1513 | pub fn start_position(&self) -> Point { -1514 | let result = unsafe { ffi::ts_node_start_point(self.0) }; -1515 | result.into() -1516 | } - | -1517 | /// Get this node's end position in terms of rows and columns. -1518 | #[doc(alias = "ts_node_end_point")] -1519 | #[must_use] -1520 | pub fn end_position(&self) -> Point { -1521 | let result = unsafe { ffi::ts_node_end_point(self.0) }; -1522 | result.into() -1523 | } - | -1524 | /// Get the node's child at the given index, where zero represents the first -1525 | /// child. -1526 | /// -1527 | /// This method is fairly fast, but its cost is technically log(i), so if -1528 | /// you might be iterating over a long list of children, you should use -1529 | /// [`Node::children`] instead. -1530 | #[doc(alias = "ts_node_child")] -1531 | #[must_use] -1532 | pub fn child(&self, i: u32) -> Option { -1533 | Self::new(unsafe { ffi::ts_node_child(self.0, i) }) -1534 | } - | -1535 | /// Get this node's number of children. -1536 | #[doc(alias = "ts_node_child_count")] -1537 | #[must_use] -1538 | pub fn child_count(&self) -> usize { -1539 | unsafe { ffi::ts_node_child_count(self.0) as usize } -1540 | } - | -1541 | /// Get this node's *named* child at the given index. -1542 | /// -1543 | /// See also [`Node::is_named`]. -1544 | /// This method is fairly fast, but its cost is technically log(i), so if -1545 | /// you might be iterating over a long list of children, you should use -1546 | /// [`Node::named_children`] instead. -1547 | #[doc(alias = "ts_node_named_child")] -1548 | #[must_use] -1549 | pub fn named_child(&self, i: u32) -> Option { -1550 | Self::new(unsafe { ffi::ts_node_named_child(self.0, i) }) -1551 | } - | -1552 | /// Get this node's number of *named* children. -1553 | /// -1554 | /// See also [`Node::is_named`]. -1555 | #[doc(alias = "ts_node_named_child_count")] -1556 | #[must_use] -1557 | pub fn named_child_count(&self) -> usize { -1558 | unsafe { ffi::ts_node_named_child_count(self.0) as usize } -1559 | } - | -1560 | /// Get the first child with the given field name. -1561 | /// -1562 | /// If multiple children may have the same field name, access them using -1563 | /// [`children_by_field_name`](Node::children_by_field_name) -1564 | #[doc(alias = "ts_node_child_by_field_name")] -1565 | #[must_use] -1566 | pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { -1567 | let field_name = field_name.as_ref(); -1568 | Self::new(unsafe { -1569 | ffi::ts_node_child_by_field_name( -1570 | self.0, -1571 | field_name.as_ptr().cast::(), -1572 | field_name.len() as u32, -1573 | ) -1574 | }) -1575 | } - | -1576 | /// Get this node's child with the given numerical field id. -1577 | /// -1578 | /// See also [`child_by_field_name`](Node::child_by_field_name). You can -1579 | /// convert a field name to an id using [`Language::field_id_for_name`]. -1580 | #[doc(alias = "ts_node_child_by_field_id")] -1581 | #[must_use] -1582 | pub fn child_by_field_id(&self, field_id: u16) -> Option { -1583 | Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) -1584 | } - | -1585 | /// Get the field name of this node's child at the given index. -1586 | #[doc(alias = "ts_node_field_name_for_child")] -1587 | #[must_use] -1588 | pub fn field_name_for_child(&self, child_index: u32) -> Option<&'static str> { -1589 | unsafe { -1590 | let ptr = ffi::ts_node_field_name_for_child(self.0, child_index); -1591 | (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) -1592 | } -1593 | } - | -1594 | /// Get the field name of this node's named child at the given index. -1595 | #[must_use] -1596 | pub fn field_name_for_named_child(&self, named_child_index: u32) -> Option<&'static str> { -1597 | unsafe { -1598 | let ptr = ffi::ts_node_field_name_for_named_child(self.0, named_child_index); -1599 | (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) -1600 | } -1601 | } - | -1602 | /// Iterate over this node's children. -1603 | /// -1604 | /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain -1605 | /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid -1606 | /// unnecessary allocations, you should reuse the same cursor for -1607 | /// subsequent calls to this method. -1608 | /// -1609 | /// If you're walking the tree recursively, you may want to use the -1610 | /// [`TreeCursor`] APIs directly instead. -1611 | pub fn children<'cursor>( -1612 | &self, -1613 | cursor: &'cursor mut TreeCursor<'tree>, -1614 | ) -> impl ExactSizeIterator> + 'cursor { -1615 | cursor.reset(*self); -1616 | cursor.goto_first_child(); -1617 | (0..self.child_count()).map(move |_| { -1618 | let result = cursor.node(); -1619 | cursor.goto_next_sibling(); -1620 | result -1621 | }) -1622 | } - | -1623 | /// Iterate over this node's named children. -1624 | /// -1625 | /// See also [`Node::children`]. -1626 | pub fn named_children<'cursor>( -1627 | &self, -1628 | cursor: &'cursor mut TreeCursor<'tree>, -1629 | ) -> impl ExactSizeIterator> + 'cursor { -1630 | cursor.reset(*self); -1631 | cursor.goto_first_child(); -1632 | (0..self.named_child_count()).map(move |_| { -1633 | while !cursor.node().is_named() { -1634 | if !cursor.goto_next_sibling() { -1635 | break; -1636 | } -1637 | } -1638 | let result = cursor.node(); -1639 | cursor.goto_next_sibling(); -1640 | result -1641 | }) -1642 | } - | -1643 | /// Iterate over this node's children with a given field name. -1644 | /// -1645 | /// See also [`Node::children`]. -1646 | pub fn children_by_field_name<'cursor>( -1647 | &self, -1648 | field_name: &str, -1649 | cursor: &'cursor mut TreeCursor<'tree>, -1650 | ) -> impl Iterator> + 'cursor { -1651 | let field_id = self.language().field_id_for_name(field_name); -1652 | let mut done = field_id.is_none(); -1653 | if !done { -1654 | cursor.reset(*self); -1655 | cursor.goto_first_child(); -1656 | } -1657 | iter::from_fn(move || { -1658 | if !done { -1659 | while cursor.field_id() != field_id { -1660 | if !cursor.goto_next_sibling() { -1661 | return None; -1662 | } -1663 | } -1664 | let result = cursor.node(); -1665 | if !cursor.goto_next_sibling() { -1666 | done = true; -1667 | } -1668 | return Some(result); -1669 | } -1670 | None -1671 | }) -1672 | } - | -1673 | /// Iterate over this node's children with a given field id. -1674 | /// -1675 | /// See also [`Node::children_by_field_name`]. -1676 | pub fn children_by_field_id<'cursor>( -1677 | &self, -1678 | field_id: FieldId, -1679 | cursor: &'cursor mut TreeCursor<'tree>, -1680 | ) -> impl Iterator> + 'cursor { -1681 | cursor.reset(*self); -1682 | cursor.goto_first_child(); -1683 | let mut done = false; -1684 | iter::from_fn(move || { -1685 | if !done { -1686 | while cursor.field_id() != Some(field_id) { -1687 | if !cursor.goto_next_sibling() { -1688 | return None; -1689 | } -1690 | } -1691 | let result = cursor.node(); -1692 | if !cursor.goto_next_sibling() { -1693 | done = true; -1694 | } -1695 | return Some(result); -1696 | } -1697 | None -1698 | }) -1699 | } - | -1700 | /// Get this node's immediate parent. -1701 | /// Prefer [`child_with_descendant`](Node::child_with_descendant) -1702 | /// for iterating over this node's ancestors. -1703 | #[doc(alias = "ts_node_parent")] -1704 | #[must_use] -1705 | pub fn parent(&self) -> Option { -1706 | Self::new(unsafe { ffi::ts_node_parent(self.0) }) -1707 | } - | -1708 | /// Get the node that contains `descendant`. -1709 | /// -1710 | /// Note that this can return `descendant` itself. -1711 | #[doc(alias = "ts_node_child_with_descendant")] -1712 | #[must_use] -1713 | pub fn child_with_descendant(&self, descendant: Self) -> Option { -1714 | Self::new(unsafe { ffi::ts_node_child_with_descendant(self.0, descendant.0) }) -1715 | } - | -1716 | /// Get this node's next sibling. -1717 | #[doc(alias = "ts_node_next_sibling")] -1718 | #[must_use] -1719 | pub fn next_sibling(&self) -> Option { -1720 | Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) -1721 | } - | -1722 | /// Get this node's previous sibling. -1723 | #[doc(alias = "ts_node_prev_sibling")] -1724 | #[must_use] -1725 | pub fn prev_sibling(&self) -> Option { -1726 | Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) -1727 | } - | -1728 | /// Get this node's next named sibling. -1729 | #[doc(alias = "ts_node_next_named_sibling")] -1730 | #[must_use] -1731 | pub fn next_named_sibling(&self) -> Option { -1732 | Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) -1733 | } - | -1734 | /// Get this node's previous named sibling. -1735 | #[doc(alias = "ts_node_prev_named_sibling")] -1736 | #[must_use] -1737 | pub fn prev_named_sibling(&self) -> Option { -1738 | Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) -1739 | } - | -1740 | /// Get this node's first child that contains or starts after the given byte offset. -1741 | #[doc(alias = "ts_node_first_child_for_byte")] -1742 | #[must_use] -1743 | pub fn first_child_for_byte(&self, byte: usize) -> Option { -1744 | Self::new(unsafe { ffi::ts_node_first_child_for_byte(self.0, byte as u32) }) -1745 | } - | -1746 | /// Get this node's first named child that contains or starts after the given byte offset. -1747 | #[doc(alias = "ts_node_first_named_child_for_point")] -1748 | #[must_use] -1749 | pub fn first_named_child_for_byte(&self, byte: usize) -> Option { -1750 | Self::new(unsafe { ffi::ts_node_first_named_child_for_byte(self.0, byte as u32) }) -1751 | } - | -1752 | /// Get the node's number of descendants, including one for the node itself. -1753 | #[doc(alias = "ts_node_descendant_count")] -1754 | #[must_use] -1755 | pub fn descendant_count(&self) -> usize { -1756 | unsafe { ffi::ts_node_descendant_count(self.0) as usize } -1757 | } - | -1758 | /// Get the smallest node within this node that spans the given byte range. -1759 | #[doc(alias = "ts_node_descendant_for_byte_range")] -1760 | #[must_use] -1761 | pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option { -1762 | Self::new(unsafe { -1763 | ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32) -1764 | }) -1765 | } - | -1766 | /// Get the smallest named node within this node that spans the given byte range. -1767 | #[doc(alias = "ts_node_named_descendant_for_byte_range")] -1768 | #[must_use] -1769 | pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option { -1770 | Self::new(unsafe { -1771 | ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32) -1772 | }) -1773 | } - | -1774 | /// Get the smallest node within this node that spans the given point range. -1775 | #[doc(alias = "ts_node_descendant_for_point_range")] -1776 | #[must_use] -1777 | pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option { -1778 | Self::new(unsafe { -1779 | ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into()) -1780 | }) -1781 | } - | -1782 | /// Get the smallest named node within this node that spans the given point range. -1783 | #[doc(alias = "ts_node_named_descendant_for_point_range")] -1784 | #[must_use] -1785 | pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option { -1786 | Self::new(unsafe { -1787 | ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into()) -1788 | }) -1789 | } - | -1790 | /// Get an S-expression representing the node. -1791 | #[doc(alias = "ts_node_string")] -1792 | #[must_use] -1793 | pub fn to_sexp(&self) -> String { -1794 | let c_string = unsafe { ffi::ts_node_string(self.0) }; -1795 | let result = unsafe { CStr::from_ptr(c_string) } -1796 | .to_str() -1797 | .unwrap() -1798 | .to_string(); -1799 | unsafe { (FREE_FN)(c_string.cast::()) }; -1800 | result -1801 | } - | -1802 | pub fn utf8_text<'a>(&self, source: &'a [u8]) -> Result<&'a str, str::Utf8Error> { -1803 | str::from_utf8(&source[self.start_byte()..self.end_byte()]) -1804 | } - | -1805 | #[must_use] -1806 | pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { -1807 | &source[self.start_byte() / 2..self.end_byte() / 2] -1808 | } - | -1809 | /// Create a new [`TreeCursor`] starting from this node. -1810 | /// -1811 | /// Note that the given node is considered the root of the cursor, -1812 | /// and the cursor cannot walk outside this node. -1813 | #[doc(alias = "ts_tree_cursor_new")] -1814 | #[must_use] -1815 | pub fn walk(&self) -> TreeCursor<'tree> { -1816 | TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) -1817 | } - | -1818 | /// Edit this node to keep it in-sync with source code that has been edited. -1819 | /// -1820 | /// This function is only rarely needed. When you edit a syntax tree with -1821 | /// the [`Tree::edit`] method, all of the nodes that you retrieve from -1822 | /// the tree afterward will already reflect the edit. You only need to -1823 | /// use [`Node::edit`] when you have a specific [`Node`] instance that -1824 | /// you want to keep and continue to use after an edit. -1825 | #[doc(alias = "ts_node_edit")] -1826 | pub fn edit(&mut self, edit: &InputEdit) { -1827 | let edit = edit.into(); -1828 | unsafe { ffi::ts_node_edit(core::ptr::addr_of_mut!(self.0), &edit) } -1829 | } -1830 | } - | -1831 | impl PartialEq for Node<'_> { -1832 | fn eq(&self, other: &Self) -> bool { -1833 | core::ptr::eq(self.0.id, other.0.id) -1834 | } -1835 | } - | -1836 | impl Eq for Node<'_> {} - | -1837 | impl hash::Hash for Node<'_> { -1838 | fn hash(&self, state: &mut H) { -1839 | self.0.id.hash(state); -1840 | self.0.context[0].hash(state); -1841 | self.0.context[1].hash(state); -1842 | self.0.context[2].hash(state); -1843 | self.0.context[3].hash(state); -1844 | } -1845 | } - | -1846 | impl fmt::Debug for Node<'_> { -1847 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -1848 | write!( -1849 | f, -1850 | "{{Node {} {} - {}}}", -1851 | self.kind(), -1852 | self.start_position(), -1853 | self.end_position() -1854 | ) -1855 | } -1856 | } - | -1857 | impl fmt::Display for Node<'_> { -1858 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -1859 | let sexp = self.to_sexp(); -1860 | if sexp.is_empty() { -1861 | write!(f, "") -1862 | } else if !f.alternate() { -1863 | write!(f, "{sexp}") -1864 | } else { -1865 | write!(f, "{}", format_sexp(&sexp, f.width().unwrap_or(0))) -1866 | } -1867 | } -1868 | } - | -1869 | impl<'cursor> TreeCursor<'cursor> { -1870 | /// Get the tree cursor's current [`Node`]. -1871 | #[doc(alias = "ts_tree_cursor_current_node")] -1872 | #[must_use] -1873 | pub fn node(&self) -> Node<'cursor> { -1874 | Node( -1875 | unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, -1876 | PhantomData, -1877 | ) -1878 | } - | -1879 | /// Get the numerical field id of this tree cursor's current node. -1880 | /// -1881 | /// See also [`field_name`](TreeCursor::field_name). -1882 | #[doc(alias = "ts_tree_cursor_current_field_id")] -1883 | #[must_use] -1884 | pub fn field_id(&self) -> Option { -1885 | let id = unsafe { ffi::ts_tree_cursor_current_field_id(&self.0) }; -1886 | FieldId::new(id) -1887 | } - | -1888 | /// Get the field name of this tree cursor's current node. -1889 | #[doc(alias = "ts_tree_cursor_current_field_name")] -1890 | #[must_use] -1891 | pub fn field_name(&self) -> Option<&'static str> { -1892 | unsafe { -1893 | let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); -1894 | (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) -1895 | } -1896 | } - | -1897 | /// Get the depth of the cursor's current node relative to the original -1898 | /// node that the cursor was constructed with. -1899 | #[doc(alias = "ts_tree_cursor_current_depth")] -1900 | #[must_use] -1901 | pub fn depth(&self) -> u32 { -1902 | unsafe { ffi::ts_tree_cursor_current_depth(&self.0) } -1903 | } - | -1904 | /// Get the index of the cursor's current node out of all of the -1905 | /// descendants of the original node that the cursor was constructed with -1906 | #[doc(alias = "ts_tree_cursor_current_descendant_index")] -1907 | #[must_use] -1908 | pub fn descendant_index(&self) -> usize { -1909 | unsafe { ffi::ts_tree_cursor_current_descendant_index(&self.0) as usize } -1910 | } - | -1911 | /// Move this cursor to the first child of its current node. -1912 | /// -1913 | /// This returns `true` if the cursor successfully moved, and returns -1914 | /// `false` if there were no children. -1915 | #[doc(alias = "ts_tree_cursor_goto_first_child")] -1916 | pub fn goto_first_child(&mut self) -> bool { -1917 | unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) } -1918 | } - | -1919 | /// Move this cursor to the last child of its current node. -1920 | /// -1921 | /// This returns `true` if the cursor successfully moved, and returns -1922 | /// `false` if there were no children. -1923 | /// -1924 | /// Note that this function may be slower than -1925 | /// [`goto_first_child`](TreeCursor::goto_first_child) because it needs to -1926 | /// iterate through all the children to compute the child's position. -1927 | #[doc(alias = "ts_tree_cursor_goto_last_child")] -1928 | pub fn goto_last_child(&mut self) -> bool { -1929 | unsafe { ffi::ts_tree_cursor_goto_last_child(&mut self.0) } -1930 | } - | -1931 | /// Move this cursor to the parent of its current node. -1932 | /// -1933 | /// This returns `true` if the cursor successfully moved, and returns -1934 | /// `false` if there was no parent node (the cursor was already on the -1935 | /// root node). -1936 | /// -1937 | /// Note that the node the cursor was constructed with is considered the root -1938 | /// of the cursor, and the cursor cannot walk outside this node. -1939 | #[doc(alias = "ts_tree_cursor_goto_parent")] -1940 | pub fn goto_parent(&mut self) -> bool { -1941 | unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) } -1942 | } - | -1943 | /// Move this cursor to the next sibling of its current node. -1944 | /// -1945 | /// This returns `true` if the cursor successfully moved, and returns -1946 | /// `false` if there was no next sibling node. -1947 | /// -1948 | /// Note that the node the cursor was constructed with is considered the root -1949 | /// of the cursor, and the cursor cannot walk outside this node. -1950 | #[doc(alias = "ts_tree_cursor_goto_next_sibling")] -1951 | pub fn goto_next_sibling(&mut self) -> bool { -1952 | unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) } -1953 | } - | -1954 | /// Move the cursor to the node that is the nth descendant of -1955 | /// the original node that the cursor was constructed with, where -1956 | /// zero represents the original node itself. -1957 | #[doc(alias = "ts_tree_cursor_goto_descendant")] -1958 | pub fn goto_descendant(&mut self, descendant_index: usize) { -1959 | unsafe { ffi::ts_tree_cursor_goto_descendant(&mut self.0, descendant_index as u32) } -1960 | } - | -1961 | /// Move this cursor to the previous sibling of its current node. -1962 | /// -1963 | /// This returns `true` if the cursor successfully moved, and returns -1964 | /// `false` if there was no previous sibling node. -1965 | /// -1966 | /// Note, that this function may be slower than -1967 | /// [`goto_next_sibling`](TreeCursor::goto_next_sibling) due to how node -1968 | /// positions are stored. In the worst case, this will need to iterate -1969 | /// through all the children up to the previous sibling node to recalculate -1970 | /// its position. Also note that the node the cursor was constructed with is -1971 | /// considered the root of the cursor, and the cursor cannot walk outside this node. -1972 | #[doc(alias = "ts_tree_cursor_goto_previous_sibling")] -1973 | pub fn goto_previous_sibling(&mut self) -> bool { -1974 | unsafe { ffi::ts_tree_cursor_goto_previous_sibling(&mut self.0) } -1975 | } - | -1976 | /// Move this cursor to the first child of its current node that contains or -1977 | /// starts after the given byte offset. -1978 | /// -1979 | /// This returns the index of the child node if one was found, and returns -1980 | /// `None` if no such child was found. -1981 | #[doc(alias = "ts_tree_cursor_goto_first_child_for_byte")] -1982 | pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option { -1983 | let result = -1984 | unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; -1985 | result.try_into().ok() -1986 | } - | -1987 | /// Move this cursor to the first child of its current node that contains or -1988 | /// starts after the given byte offset. -1989 | /// -1990 | /// This returns the index of the child node if one was found, and returns -1991 | /// `None` if no such child was found. -1992 | #[doc(alias = "ts_tree_cursor_goto_first_child_for_point")] -1993 | pub fn goto_first_child_for_point(&mut self, point: Point) -> Option { -1994 | let result = -1995 | unsafe { ffi::ts_tree_cursor_goto_first_child_for_point(&mut self.0, point.into()) }; -1996 | result.try_into().ok() -1997 | } - | -1998 | /// Re-initialize this tree cursor to start at the original node that the -1999 | /// cursor was constructed with. -2000 | #[doc(alias = "ts_tree_cursor_reset")] -2001 | pub fn reset(&mut self, node: Node<'cursor>) { -2002 | unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; -2003 | } - | -2004 | /// Re-initialize a tree cursor to the same position as another cursor. -2005 | /// -2006 | /// Unlike [`reset`](TreeCursor::reset), this will not lose parent -2007 | /// information and allows reusing already created cursors. -2008 | #[doc(alias = "ts_tree_cursor_reset_to")] -2009 | pub fn reset_to(&mut self, cursor: &Self) { -2010 | unsafe { ffi::ts_tree_cursor_reset_to(&mut self.0, &cursor.0) }; -2011 | } -2012 | } - | -2013 | impl Clone for TreeCursor<'_> { -2014 | fn clone(&self) -> Self { -2015 | TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) -2016 | } -2017 | } - | -2018 | impl Drop for TreeCursor<'_> { -2019 | fn drop(&mut self) { -2020 | unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } -2021 | } -2022 | } - | -2023 | impl LookaheadIterator { -2024 | /// Get the current language of the lookahead iterator. -2025 | #[doc(alias = "ts_lookahead_iterator_language")] -2026 | #[must_use] -2027 | pub fn language(&self) -> LanguageRef<'_> { -2028 | LanguageRef( -2029 | unsafe { ffi::ts_lookahead_iterator_language(self.0.as_ptr()) }, -2030 | PhantomData, -2031 | ) -2032 | } - | -2033 | /// Get the current symbol of the lookahead iterator. -2034 | #[doc(alias = "ts_lookahead_iterator_current_symbol")] -2035 | #[must_use] -2036 | pub fn current_symbol(&self) -> u16 { -2037 | unsafe { ffi::ts_lookahead_iterator_current_symbol(self.0.as_ptr()) } -2038 | } - | -2039 | /// Get the current symbol name of the lookahead iterator. -2040 | #[doc(alias = "ts_lookahead_iterator_current_symbol_name")] -2041 | #[must_use] -2042 | pub fn current_symbol_name(&self) -> &'static str { -2043 | unsafe { -2044 | CStr::from_ptr(ffi::ts_lookahead_iterator_current_symbol_name( -2045 | self.0.as_ptr(), -2046 | )) -2047 | .to_str() -2048 | .unwrap() -2049 | } -2050 | } - | -2051 | /// Reset the lookahead iterator. -2052 | /// -2053 | /// This returns `true` if the language was set successfully and `false` -2054 | /// otherwise. -2055 | #[doc(alias = "ts_lookahead_iterator_reset")] -2056 | pub fn reset(&mut self, language: &Language, state: u16) -> bool { -2057 | unsafe { ffi::ts_lookahead_iterator_reset(self.0.as_ptr(), language.0, state) } -2058 | } - | -2059 | /// Reset the lookahead iterator to another state. -2060 | /// -2061 | /// This returns `true` if the iterator was reset to the given state and -2062 | /// `false` otherwise. -2063 | #[doc(alias = "ts_lookahead_iterator_reset_state")] -2064 | pub fn reset_state(&mut self, state: u16) -> bool { -2065 | unsafe { ffi::ts_lookahead_iterator_reset_state(self.0.as_ptr(), state) } -2066 | } - | -2067 | /// Iterate symbol names. -2068 | pub fn iter_names(&mut self) -> impl Iterator + '_ { -2069 | LookaheadNamesIterator(self) -2070 | } -2071 | } - | -2072 | impl Iterator for LookaheadNamesIterator<'_> { -2073 | type Item = &'static str; - | -2074 | #[doc(alias = "ts_lookahead_iterator_next")] -2075 | fn next(&mut self) -> Option { -2076 | unsafe { ffi::ts_lookahead_iterator_next(self.0 .0.as_ptr()) } -2077 | .then(|| self.0.current_symbol_name()) -2078 | } -2079 | } - | -2080 | impl Iterator for LookaheadIterator { -2081 | type Item = u16; - | -2082 | #[doc(alias = "ts_lookahead_iterator_next")] -2083 | fn next(&mut self) -> Option { -2084 | // the first symbol is always `0` so we can safely skip it -2085 | unsafe { ffi::ts_lookahead_iterator_next(self.0.as_ptr()) }.then(|| self.current_symbol()) -2086 | } -2087 | } - | -2088 | impl Drop for LookaheadIterator { -2089 | #[doc(alias = "ts_lookahead_iterator_delete")] -2090 | fn drop(&mut self) { -2091 | unsafe { ffi::ts_lookahead_iterator_delete(self.0.as_ptr()) } -2092 | } -2093 | } - | -2094 | impl Query { -2095 | /// Create a new query from a string containing one or more S-expression -2096 | /// patterns. -2097 | /// -2098 | /// The query is associated with a particular language, and can only be run -2099 | /// on syntax nodes parsed with that language. References to Queries can be -2100 | /// shared between multiple threads. -2101 | pub fn new(language: &Language, source: &str) -> Result { -2102 | let ptr = Self::new_raw(language, source)?; -2103 | unsafe { Self::from_raw_parts(ptr, source) } -2104 | } - | -2105 | /// Constructs a raw [`TSQuery`](ffi::TSQuery) pointer without performing extra checks specific to the rust -2106 | /// bindings, such as predicate validation. A [`Query`] object can be constructed from the -2107 | /// returned pointer using [`from_raw_parts`](Query::from_raw_parts). The caller is -2108 | /// responsible for ensuring that the returned pointer is eventually freed by calling -2109 | /// [`ts_query_delete`](ffi::ts_query_delete). -2110 | pub fn new_raw(language: &Language, source: &str) -> Result<*mut ffi::TSQuery, QueryError> { -2111 | let mut error_offset = 0u32; -2112 | let mut error_type: ffi::TSQueryError = 0; -2113 | let bytes = source.as_bytes(); - | -2114 | // Compile the query. -2115 | let ptr = unsafe { -2116 | ffi::ts_query_new( -2117 | language.0, -2118 | bytes.as_ptr().cast::(), -2119 | bytes.len() as u32, -2120 | core::ptr::addr_of_mut!(error_offset), -2121 | core::ptr::addr_of_mut!(error_type), -2122 | ) -2123 | }; - | -2124 | if !ptr.is_null() { -2125 | return Ok(ptr); -2126 | } - | -2127 | // On failure, build an error based on the error code and offset. -2128 | if error_type == ffi::TSQueryErrorLanguage { -2129 | return Err(QueryError { -2130 | row: 0, -2131 | column: 0, -2132 | offset: 0, -2133 | message: LanguageError::Version(language.abi_version()).to_string(), -2134 | kind: QueryErrorKind::Language, -2135 | }); -2136 | } - | -2137 | let offset = error_offset as usize; -2138 | let mut line_start = 0; -2139 | let mut row = 0; -2140 | let mut line_containing_error = None; -2141 | for line in source.lines() { -2142 | let line_end = line_start + line.len() + 1; -2143 | if line_end > offset { -2144 | line_containing_error = Some(line); -2145 | break; -2146 | } -2147 | line_start = line_end; -2148 | row += 1; -2149 | } -2150 | let column = offset - line_start; - | -2151 | let kind; -2152 | let message; -2153 | match error_type { -2154 | // Error types that report names -2155 | ffi::TSQueryErrorNodeType | ffi::TSQueryErrorField | ffi::TSQueryErrorCapture => { -2156 | let suffix = source.split_at(offset).1; -2157 | let in_quotes = offset > 0 && source.as_bytes()[offset - 1] == b'"'; -2158 | let mut backslashes = 0; -2159 | let end_offset = suffix -2160 | .find(|c| { -2161 | if in_quotes { -2162 | if c == '"' && backslashes % 2 == 0 { -2163 | true -2164 | } else if c == '\\' { -2165 | backslashes += 1; -2166 | false -2167 | } else { -2168 | backslashes = 0; -2169 | false -2170 | } -2171 | } else { -2172 | !char::is_alphanumeric(c) && c != '_' && c != '-' -2173 | } -2174 | }) -2175 | .unwrap_or(suffix.len()); -2176 | message = format!("\"{}\"", suffix.split_at(end_offset).0); -2177 | kind = match error_type { -2178 | ffi::TSQueryErrorNodeType => QueryErrorKind::NodeType, -2179 | ffi::TSQueryErrorField => QueryErrorKind::Field, -2180 | ffi::TSQueryErrorCapture => QueryErrorKind::Capture, -2181 | _ => unreachable!(), -2182 | }; -2183 | } - | -2184 | // Error types that report positions -2185 | _ => { -2186 | message = line_containing_error.map_or_else( -2187 | || "Unexpected EOF".to_string(), -2188 | |line| line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^", -2189 | ); -2190 | kind = match error_type { -2191 | ffi::TSQueryErrorStructure => QueryErrorKind::Structure, -2192 | _ => QueryErrorKind::Syntax, -2193 | }; -2194 | } -2195 | } - | -2196 | Err(QueryError { -2197 | row, -2198 | column, -2199 | offset, -2200 | message, -2201 | kind, -2202 | }) -2203 | } - | -2204 | #[doc(hidden)] -2205 | unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { -2206 | let ptr = { -2207 | struct TSQueryDrop(*mut ffi::TSQuery); -2208 | impl Drop for TSQueryDrop { -2209 | fn drop(&mut self) { -2210 | unsafe { ffi::ts_query_delete(self.0) } -2211 | } -2212 | } -2213 | TSQueryDrop(ptr) -2214 | }; - | -2215 | let string_count = unsafe { ffi::ts_query_string_count(ptr.0) }; -2216 | let capture_count = unsafe { ffi::ts_query_capture_count(ptr.0) }; -2217 | let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr.0) as usize }; - | -2218 | let mut capture_names = Vec::with_capacity(capture_count as usize); -2219 | let mut capture_quantifiers_vec = Vec::with_capacity(pattern_count as usize); -2220 | let mut text_predicates_vec = Vec::with_capacity(pattern_count); -2221 | let mut property_predicates_vec = Vec::with_capacity(pattern_count); -2222 | let mut property_settings_vec = Vec::with_capacity(pattern_count); -2223 | let mut general_predicates_vec = Vec::with_capacity(pattern_count); - | -2224 | // Build a vector of strings to store the capture names. -2225 | for i in 0..capture_count { -2226 | unsafe { -2227 | let mut length = 0u32; -2228 | let name = -2229 | ffi::ts_query_capture_name_for_id(ptr.0, i, core::ptr::addr_of_mut!(length)) -2230 | .cast::(); -2231 | let name = slice::from_raw_parts(name, length as usize); -2232 | let name = str::from_utf8_unchecked(name); -2233 | capture_names.push(name); -2234 | } -2235 | } - | -2236 | // Build a vector to store capture quantifiers. -2237 | for i in 0..pattern_count { -2238 | let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); -2239 | for j in 0..capture_count { -2240 | unsafe { -2241 | let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr.0, i as u32, j); -2242 | capture_quantifiers.push(quantifier.into()); -2243 | } -2244 | } -2245 | capture_quantifiers_vec.push(capture_quantifiers.into()); -2246 | } - | -2247 | // Build a vector of strings to represent literal values used in predicates. -2248 | let string_values = (0..string_count) -2249 | .map(|i| unsafe { -2250 | let mut length = 0u32; -2251 | let value = -2252 | ffi::ts_query_string_value_for_id(ptr.0, i, core::ptr::addr_of_mut!(length)) -2253 | .cast::(); -2254 | let value = slice::from_raw_parts(value, length as usize); -2255 | let value = str::from_utf8_unchecked(value); -2256 | value -2257 | }) -2258 | .collect::>(); - | -2259 | // Build a vector of predicates for each pattern. -2260 | for i in 0..pattern_count { -2261 | let predicate_steps = unsafe { -2262 | let mut length = 0u32; -2263 | let raw_predicates = ffi::ts_query_predicates_for_pattern( -2264 | ptr.0, -2265 | i as u32, -2266 | core::ptr::addr_of_mut!(length), -2267 | ); -2268 | (length > 0) -2269 | .then(|| slice::from_raw_parts(raw_predicates, length as usize)) -2270 | .unwrap_or_default() -2271 | }; - | -2272 | let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr.0, i as u32) }; -2273 | let row = source -2274 | .char_indices() -2275 | .take_while(|(i, _)| *i < byte_offset as usize) -2276 | .filter(|(_, c)| *c == '\n') -2277 | .count(); - | -2278 | use ffi::TSQueryPredicateStepType as T; -2279 | const TYPE_DONE: T = ffi::TSQueryPredicateStepTypeDone; -2280 | const TYPE_CAPTURE: T = ffi::TSQueryPredicateStepTypeCapture; -2281 | const TYPE_STRING: T = ffi::TSQueryPredicateStepTypeString; - | -2282 | let mut text_predicates = Vec::new(); -2283 | let mut property_predicates = Vec::new(); -2284 | let mut property_settings = Vec::new(); -2285 | let mut general_predicates = Vec::new(); -2286 | for p in predicate_steps.split(|s| s.type_ == TYPE_DONE) { -2287 | if p.is_empty() { -2288 | continue; -2289 | } - | -2290 | if p[0].type_ != TYPE_STRING { -2291 | return Err(predicate_error( -2292 | row, -2293 | format!( -2294 | "Expected predicate to start with a function name. Got @{}.", -2295 | capture_names[p[0].value_id as usize], -2296 | ), -2297 | )); -2298 | } - | -2299 | // Build a predicate for each of the known predicate function names. -2300 | let operator_name = string_values[p[0].value_id as usize]; -2301 | match operator_name { -2302 | "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => { -2303 | if p.len() != 3 { -2304 | return Err(predicate_error( -2305 | row, -2306 | format!( -2307 | "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", -2308 | p.len() - 1 -2309 | ), -2310 | )); -2311 | } -2312 | if p[1].type_ != TYPE_CAPTURE { -2313 | return Err(predicate_error(row, format!( -2314 | "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", -2315 | string_values[p[1].value_id as usize], -2316 | ))); -2317 | } - | -2318 | let is_positive = operator_name == "eq?" || operator_name == "any-eq?"; -2319 | let match_all = match operator_name { -2320 | "eq?" | "not-eq?" => true, -2321 | "any-eq?" | "any-not-eq?" => false, -2322 | _ => unreachable!(), -2323 | }; -2324 | text_predicates.push(if p[2].type_ == TYPE_CAPTURE { -2325 | TextPredicateCapture::EqCapture( -2326 | p[1].value_id, -2327 | p[2].value_id, -2328 | is_positive, -2329 | match_all, -2330 | ) -2331 | } else { -2332 | TextPredicateCapture::EqString( -2333 | p[1].value_id, -2334 | string_values[p[2].value_id as usize].to_string().into(), -2335 | is_positive, -2336 | match_all, -2337 | ) -2338 | }); -2339 | } - | -2340 | "match?" | "not-match?" | "any-match?" | "any-not-match?" => { -2341 | if p.len() != 3 { -2342 | return Err(predicate_error(row, format!( -2343 | "Wrong number of arguments to #match? predicate. Expected 2, got {}.", -2344 | p.len() - 1 -2345 | ))); -2346 | } -2347 | if p[1].type_ != TYPE_CAPTURE { -2348 | return Err(predicate_error(row, format!( -2349 | "First argument to #match? predicate must be a capture name. Got literal \"{}\".", -2350 | string_values[p[1].value_id as usize], -2351 | ))); -2352 | } -2353 | if p[2].type_ == TYPE_CAPTURE { -2354 | return Err(predicate_error(row, format!( -2355 | "Second argument to #match? predicate must be a literal. Got capture @{}.", -2356 | capture_names[p[2].value_id as usize], -2357 | ))); -2358 | } - | -2359 | let is_positive = -2360 | operator_name == "match?" || operator_name == "any-match?"; -2361 | let match_all = match operator_name { -2362 | "match?" | "not-match?" => true, -2363 | "any-match?" | "any-not-match?" => false, -2364 | _ => unreachable!(), -2365 | }; -2366 | let regex = &string_values[p[2].value_id as usize]; -2367 | text_predicates.push(TextPredicateCapture::MatchString( -2368 | p[1].value_id, -2369 | regex::bytes::Regex::new(regex).map_err(|_| { -2370 | predicate_error(row, format!("Invalid regex '{regex}'")) -2371 | })?, -2372 | is_positive, -2373 | match_all, -2374 | )); -2375 | } - | -2376 | "set!" => property_settings.push(Self::parse_property( -2377 | row, -2378 | operator_name, -2379 | &capture_names, -2380 | &string_values, -2381 | &p[1..], -2382 | )?), - | -2383 | "is?" | "is-not?" => property_predicates.push(( -2384 | Self::parse_property( -2385 | row, -2386 | operator_name, -2387 | &capture_names, -2388 | &string_values, -2389 | &p[1..], -2390 | )?, -2391 | operator_name == "is?", -2392 | )), - | -2393 | "any-of?" | "not-any-of?" => { -2394 | if p.len() < 2 { -2395 | return Err(predicate_error(row, format!( -2396 | "Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.", -2397 | p.len() - 1 -2398 | ))); -2399 | } -2400 | if p[1].type_ != TYPE_CAPTURE { -2401 | return Err(predicate_error(row, format!( -2402 | "First argument to #any-of? predicate must be a capture name. Got literal \"{}\".", -2403 | string_values[p[1].value_id as usize], -2404 | ))); -2405 | } - | -2406 | let is_positive = operator_name == "any-of?"; -2407 | let mut values = Vec::new(); -2408 | for arg in &p[2..] { -2409 | if arg.type_ == TYPE_CAPTURE { -2410 | return Err(predicate_error(row, format!( -2411 | "Arguments to #any-of? predicate must be literals. Got capture @{}.", -2412 | capture_names[arg.value_id as usize], -2413 | ))); -2414 | } -2415 | values.push(string_values[arg.value_id as usize]); -2416 | } -2417 | text_predicates.push(TextPredicateCapture::AnyString( -2418 | p[1].value_id, -2419 | values -2420 | .iter() -2421 | .map(|x| (*x).to_string().into()) -2422 | .collect::>() -2423 | .into(), -2424 | is_positive, -2425 | )); -2426 | } - | -2427 | _ => general_predicates.push(QueryPredicate { -2428 | operator: operator_name.to_string().into(), -2429 | args: p[1..] -2430 | .iter() -2431 | .map(|a| { -2432 | if a.type_ == TYPE_CAPTURE { -2433 | QueryPredicateArg::Capture(a.value_id) -2434 | } else { -2435 | QueryPredicateArg::String( -2436 | string_values[a.value_id as usize].to_string().into(), -2437 | ) -2438 | } -2439 | }) -2440 | .collect(), -2441 | }), -2442 | } -2443 | } - | -2444 | text_predicates_vec.push(text_predicates.into()); -2445 | property_predicates_vec.push(property_predicates.into()); -2446 | property_settings_vec.push(property_settings.into()); -2447 | general_predicates_vec.push(general_predicates.into()); -2448 | } - | -2449 | let result = Self { -2450 | ptr: unsafe { NonNull::new_unchecked(ptr.0) }, -2451 | capture_names: capture_names.into(), -2452 | capture_quantifiers: capture_quantifiers_vec.into(), -2453 | text_predicates: text_predicates_vec.into(), -2454 | property_predicates: property_predicates_vec.into(), -2455 | property_settings: property_settings_vec.into(), -2456 | general_predicates: general_predicates_vec.into(), -2457 | }; - | -2458 | core::mem::forget(ptr); - | -2459 | Ok(result) -2460 | } - | -2461 | /// Get the byte offset where the given pattern starts in the query's -2462 | /// source. -2463 | #[doc(alias = "ts_query_start_byte_for_pattern")] -2464 | #[must_use] -2465 | pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize { -2466 | assert!( -2467 | pattern_index < self.text_predicates.len(), -2468 | "Pattern index is {pattern_index} but the pattern count is {}", -2469 | self.text_predicates.len(), -2470 | ); -2471 | unsafe { -2472 | ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize -2473 | } -2474 | } - | -2475 | /// Get the byte offset where the given pattern ends in the query's -2476 | /// source. -2477 | #[doc(alias = "ts_query_end_byte_for_pattern")] -2478 | #[must_use] -2479 | pub fn end_byte_for_pattern(&self, pattern_index: usize) -> usize { -2480 | assert!( -2481 | pattern_index < self.text_predicates.len(), -2482 | "Pattern index is {pattern_index} but the pattern count is {}", -2483 | self.text_predicates.len(), -2484 | ); -2485 | unsafe { -2486 | ffi::ts_query_end_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize -2487 | } -2488 | } - | -2489 | /// Get the number of patterns in the query. -2490 | #[doc(alias = "ts_query_pattern_count")] -2491 | #[must_use] -2492 | pub fn pattern_count(&self) -> usize { -2493 | unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize } -2494 | } - | -2495 | /// Get the names of the captures used in the query. -2496 | #[must_use] -2497 | pub const fn capture_names(&self) -> &[&str] { -2498 | &self.capture_names -2499 | } - | -2500 | /// Get the quantifiers of the captures used in the query. -2501 | #[must_use] -2502 | pub const fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { -2503 | &self.capture_quantifiers[index] -2504 | } - | -2505 | /// Get the index for a given capture name. -2506 | #[must_use] -2507 | pub fn capture_index_for_name(&self, name: &str) -> Option { -2508 | self.capture_names -2509 | .iter() -2510 | .position(|n| *n == name) -2511 | .map(|ix| ix as u32) -2512 | } - | -2513 | /// Get the properties that are checked for the given pattern index. -2514 | /// -2515 | /// This includes predicates with the operators `is?` and `is-not?`. -2516 | #[must_use] -2517 | pub const fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { -2518 | &self.property_predicates[index] -2519 | } - | -2520 | /// Get the properties that are set for the given pattern index. -2521 | /// -2522 | /// This includes predicates with the operator `set!`. -2523 | #[must_use] -2524 | pub const fn property_settings(&self, index: usize) -> &[QueryProperty] { -2525 | &self.property_settings[index] -2526 | } - | -2527 | /// Get the other user-defined predicates associated with the given index. -2528 | /// -2529 | /// This includes predicate with operators other than: -2530 | /// * `match?` -2531 | /// * `eq?` and `not-eq?` -2532 | /// * `is?` and `is-not?` -2533 | /// * `set!` -2534 | #[must_use] -2535 | pub const fn general_predicates(&self, index: usize) -> &[QueryPredicate] { -2536 | &self.general_predicates[index] -2537 | } - | -2538 | /// Disable a certain capture within a query. -2539 | /// -2540 | /// This prevents the capture from being returned in matches, and also -2541 | /// avoids any resource usage associated with recording the capture. -2542 | #[doc(alias = "ts_query_disable_capture")] -2543 | pub fn disable_capture(&mut self, name: &str) { -2544 | unsafe { -2545 | ffi::ts_query_disable_capture( -2546 | self.ptr.as_ptr(), -2547 | name.as_bytes().as_ptr().cast::(), -2548 | name.len() as u32, -2549 | ); -2550 | } -2551 | } - | -2552 | /// Disable a certain pattern within a query. -2553 | /// -2554 | /// This prevents the pattern from matching, and also avoids any resource -2555 | /// usage associated with the pattern. -2556 | #[doc(alias = "ts_query_disable_pattern")] -2557 | pub fn disable_pattern(&mut self, index: usize) { -2558 | unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } -2559 | } - | -2560 | /// Check if a given pattern within a query has a single root node. -2561 | #[doc(alias = "ts_query_is_pattern_rooted")] -2562 | #[must_use] -2563 | pub fn is_pattern_rooted(&self, index: usize) -> bool { -2564 | unsafe { ffi::ts_query_is_pattern_rooted(self.ptr.as_ptr(), index as u32) } -2565 | } - | -2566 | /// Check if a given pattern within a query has a single root node. -2567 | #[doc(alias = "ts_query_is_pattern_non_local")] -2568 | #[must_use] -2569 | pub fn is_pattern_non_local(&self, index: usize) -> bool { -2570 | unsafe { ffi::ts_query_is_pattern_non_local(self.ptr.as_ptr(), index as u32) } -2571 | } - | -2572 | /// Check if a given step in a query is 'definite'. -2573 | /// -2574 | /// A query step is 'definite' if its parent pattern will be guaranteed to -2575 | /// match successfully once it reaches the step. -2576 | #[doc(alias = "ts_query_is_pattern_guaranteed_at_step")] -2577 | #[must_use] -2578 | pub fn is_pattern_guaranteed_at_step(&self, byte_offset: usize) -> bool { -2579 | unsafe { -2580 | ffi::ts_query_is_pattern_guaranteed_at_step(self.ptr.as_ptr(), byte_offset as u32) -2581 | } -2582 | } - | -2583 | fn parse_property( -2584 | row: usize, -2585 | function_name: &str, -2586 | capture_names: &[&str], -2587 | string_values: &[&str], -2588 | args: &[ffi::TSQueryPredicateStep], -2589 | ) -> Result { -2590 | if args.is_empty() || args.len() > 3 { -2591 | return Err(predicate_error( -2592 | row, -2593 | format!( -2594 | "Wrong number of arguments to {function_name} predicate. Expected 1 to 3, got {}.", -2595 | args.len(), -2596 | ), -2597 | )); -2598 | } - | -2599 | let mut capture_id = None; -2600 | let mut key = None; -2601 | let mut value = None; - | -2602 | for arg in args { -2603 | if arg.type_ == ffi::TSQueryPredicateStepTypeCapture { -2604 | if capture_id.is_some() { -2605 | return Err(predicate_error( -2606 | row, -2607 | format!( -2608 | "Invalid arguments to {function_name} predicate. Unexpected second capture name @{}", -2609 | capture_names[arg.value_id as usize] -2610 | ), -2611 | )); -2612 | } -2613 | capture_id = Some(arg.value_id as usize); -2614 | } else if key.is_none() { -2615 | key = Some(&string_values[arg.value_id as usize]); -2616 | } else if value.is_none() { -2617 | value = Some(string_values[arg.value_id as usize]); -2618 | } else { -2619 | return Err(predicate_error( -2620 | row, -2621 | format!( -2622 | "Invalid arguments to {function_name} predicate. Unexpected third argument @{}", -2623 | string_values[arg.value_id as usize] -2624 | ), -2625 | )); -2626 | } -2627 | } - | -2628 | if let Some(key) = key { -2629 | Ok(QueryProperty::new(key, value, capture_id)) -2630 | } else { -2631 | Err(predicate_error( -2632 | row, -2633 | format!("Invalid arguments to {function_name} predicate. Missing key argument",), -2634 | )) -2635 | } -2636 | } -2637 | } - | -2638 | impl Default for QueryCursor { -2639 | fn default() -> Self { -2640 | Self::new() -2641 | } -2642 | } - | -2643 | impl QueryCursor { -2644 | /// Create a new cursor for executing a given query. -2645 | /// -2646 | /// The cursor stores the state that is needed to iteratively search for -2647 | /// matches. -2648 | #[doc(alias = "ts_query_cursor_new")] -2649 | #[must_use] -2650 | pub fn new() -> Self { -2651 | Self { -2652 | ptr: unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }, -2653 | } -2654 | } - | -2655 | /// Return the maximum number of in-progress matches for this cursor. -2656 | #[doc(alias = "ts_query_cursor_match_limit")] -2657 | #[must_use] -2658 | pub fn match_limit(&self) -> u32 { -2659 | unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) } -2660 | } - | -2661 | /// Set the maximum number of in-progress matches for this cursor. The -2662 | /// limit must be > 0 and <= 65536. -2663 | #[doc(alias = "ts_query_cursor_set_match_limit")] -2664 | pub fn set_match_limit(&mut self, limit: u32) { -2665 | unsafe { -2666 | ffi::ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit); -2667 | } -2668 | } - | -2669 | /// Check if, on its last execution, this cursor exceeded its maximum number -2670 | /// of in-progress matches. -2671 | #[doc(alias = "ts_query_cursor_did_exceed_match_limit")] -2672 | #[must_use] -2673 | pub fn did_exceed_match_limit(&self) -> bool { -2674 | unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) } -2675 | } - | -2676 | /// Iterate over all of the matches in the order that they were found. -2677 | /// -2678 | /// Each match contains the index of the pattern that matched, and a list of -2679 | /// captures. Because multiple patterns can match the same set of nodes, -2680 | /// one match may contain captures that appear *before* some of the -2681 | /// captures from a previous match. -2682 | /// -2683 | /// Iterating over a `QueryMatches` object requires the `StreamingIterator` -2684 | /// or `StreamingIteratorMut` trait to be in scope. This can be done via -2685 | /// `use tree_sitter::StreamingIterator` or `use tree_sitter::StreamingIteratorMut` -2686 | #[doc(alias = "ts_query_cursor_exec")] -2687 | pub fn matches<'query, 'cursor: 'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( -2688 | &'cursor mut self, -2689 | query: &'query Query, -2690 | node: Node<'tree>, -2691 | text_provider: T, -2692 | ) -> QueryMatches<'query, 'tree, T, I> { -2693 | let ptr = self.ptr.as_ptr(); -2694 | unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; -2695 | QueryMatches { -2696 | ptr, -2697 | query, -2698 | text_provider, -2699 | buffer1: Vec::default(), -2700 | buffer2: Vec::default(), -2701 | current_match: None, -2702 | _options: None, -2703 | _phantom: PhantomData, -2704 | } -2705 | } - | -2706 | /// Iterate over all of the matches in the order that they were found, with options. -2707 | /// -2708 | /// Each match contains the index of the pattern that matched, and a list of -2709 | /// captures. Because multiple patterns can match the same set of nodes, -2710 | /// one match may contain captures that appear *before* some of the -2711 | /// captures from a previous match. -2712 | #[doc(alias = "ts_query_cursor_exec_with_options")] -2713 | pub fn matches_with_options< -2714 | 'query, -2715 | 'cursor: 'query, -2716 | 'tree, -2717 | T: TextProvider, -2718 | I: AsRef<[u8]>, -2719 | >( -2720 | &'cursor mut self, -2721 | query: &'query Query, -2722 | node: Node<'tree>, -2723 | text_provider: T, -2724 | options: QueryCursorOptions, -2725 | ) -> QueryMatches<'query, 'tree, T, I> { -2726 | unsafe extern "C" fn progress(state: *mut ffi::TSQueryCursorState) -> bool { -2727 | let callback = (*state) -2728 | .payload -2729 | .cast::() -2730 | .as_mut() -2731 | .unwrap(); -2732 | match callback(&QueryCursorState::from_raw(state)) { -2733 | ControlFlow::Continue(()) => false, -2734 | ControlFlow::Break(()) => true, -2735 | } -2736 | } - | -2737 | let query_options = options.progress_callback.map(|cb| { -2738 | QueryCursorOptionsDrop(Box::into_raw(Box::new(ffi::TSQueryCursorOptions { -2739 | payload: Box::into_raw(Box::new(cb)).cast::(), -2740 | progress_callback: Some(progress), -2741 | }))) -2742 | }); - | -2743 | let ptr = self.ptr.as_ptr(); -2744 | unsafe { -2745 | ffi::ts_query_cursor_exec_with_options( -2746 | ptr, -2747 | query.ptr.as_ptr(), -2748 | node.0, -2749 | query_options.as_ref().map_or(ptr::null_mut(), |q| q.0), -2750 | ); -2751 | } -2752 | QueryMatches { -2753 | ptr, -2754 | query, -2755 | text_provider, -2756 | buffer1: Vec::default(), -2757 | buffer2: Vec::default(), -2758 | current_match: None, -2759 | _options: query_options, -2760 | _phantom: PhantomData, -2761 | } -2762 | } - | -2763 | /// Iterate over all of the individual captures in the order that they -2764 | /// appear. -2765 | /// -2766 | /// This is useful if you don't care about which pattern matched, and just -2767 | /// want a single, ordered sequence of captures. -2768 | /// -2769 | /// Iterating over a `QueryCaptures` object requires the `StreamingIterator` -2770 | /// or `StreamingIteratorMut` trait to be in scope. This can be done via -2771 | /// `use tree_sitter::StreamingIterator` or `use tree_sitter::StreamingIteratorMut` -2772 | #[doc(alias = "ts_query_cursor_exec")] -2773 | pub fn captures<'query, 'cursor: 'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( -2774 | &'cursor mut self, -2775 | query: &'query Query, -2776 | node: Node<'tree>, -2777 | text_provider: T, -2778 | ) -> QueryCaptures<'query, 'tree, T, I> { -2779 | let ptr = self.ptr.as_ptr(); -2780 | unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; -2781 | QueryCaptures { -2782 | ptr, -2783 | query, -2784 | text_provider, -2785 | buffer1: Vec::default(), -2786 | buffer2: Vec::default(), -2787 | current_match: None, -2788 | _options: None, -2789 | _phantom: PhantomData, -2790 | } -2791 | } - | -2792 | /// Iterate over all of the individual captures in the order that they -2793 | /// appear, with options. -2794 | /// -2795 | /// This is useful if you don't care about which pattern matched, and just -2796 | /// want a single, ordered sequence of captures. -2797 | #[doc(alias = "ts_query_cursor_exec")] -2798 | pub fn captures_with_options< -2799 | 'query, -2800 | 'cursor: 'query, -2801 | 'tree, -2802 | T: TextProvider, -2803 | I: AsRef<[u8]>, -2804 | >( -2805 | &'cursor mut self, -2806 | query: &'query Query, -2807 | node: Node<'tree>, -2808 | text_provider: T, -2809 | options: QueryCursorOptions, -2810 | ) -> QueryCaptures<'query, 'tree, T, I> { -2811 | unsafe extern "C" fn progress(state: *mut ffi::TSQueryCursorState) -> bool { -2812 | let callback = (*state) -2813 | .payload -2814 | .cast::() -2815 | .as_mut() -2816 | .unwrap(); -2817 | match callback(&QueryCursorState::from_raw(state)) { -2818 | ControlFlow::Continue(()) => false, -2819 | ControlFlow::Break(()) => true, -2820 | } -2821 | } - | -2822 | let query_options = options.progress_callback.map(|cb| { -2823 | QueryCursorOptionsDrop(Box::into_raw(Box::new(ffi::TSQueryCursorOptions { -2824 | payload: Box::into_raw(Box::new(cb)).cast::(), -2825 | progress_callback: Some(progress), -2826 | }))) -2827 | }); - | -2828 | let ptr = self.ptr.as_ptr(); -2829 | unsafe { -2830 | ffi::ts_query_cursor_exec_with_options( -2831 | ptr, -2832 | query.ptr.as_ptr(), -2833 | node.0, -2834 | query_options.as_ref().map_or(ptr::null_mut(), |q| q.0), -2835 | ); -2836 | } -2837 | QueryCaptures { -2838 | ptr, -2839 | query, -2840 | text_provider, -2841 | buffer1: Vec::default(), -2842 | buffer2: Vec::default(), -2843 | current_match: None, -2844 | _options: query_options, -2845 | _phantom: PhantomData, -2846 | } -2847 | } - | -2848 | /// Set the range in which the query will be executed, in terms of byte -2849 | /// offsets. -2850 | #[doc(alias = "ts_query_cursor_set_byte_range")] -2851 | pub fn set_byte_range(&mut self, range: ops::Range) -> &mut Self { -2852 | unsafe { -2853 | ffi::ts_query_cursor_set_byte_range( -2854 | self.ptr.as_ptr(), -2855 | range.start as u32, -2856 | range.end as u32, -2857 | ); -2858 | } -2859 | self -2860 | } - | -2861 | /// Set the range in which the query will be executed, in terms of rows and -2862 | /// columns. -2863 | #[doc(alias = "ts_query_cursor_set_point_range")] -2864 | pub fn set_point_range(&mut self, range: ops::Range) -> &mut Self { -2865 | unsafe { -2866 | ffi::ts_query_cursor_set_point_range( -2867 | self.ptr.as_ptr(), -2868 | range.start.into(), -2869 | range.end.into(), -2870 | ); -2871 | } -2872 | self -2873 | } - | -2874 | /// Set the maximum start depth for a query cursor. -2875 | /// -2876 | /// This prevents cursors from exploring children nodes at a certain depth. -2877 | /// Note if a pattern includes many children, then they will still be -2878 | /// checked. -2879 | /// -2880 | /// The zero max start depth value can be used as a special behavior and -2881 | /// it helps to destructure a subtree by staying on a node and using -2882 | /// captures for interested parts. Note that the zero max start depth -2883 | /// only limit a search depth for a pattern's root node but other nodes -2884 | /// that are parts of the pattern may be searched at any depth what -2885 | /// defined by the pattern structure. -2886 | /// -2887 | /// Set to `None` to remove the maximum start depth. -2888 | #[doc(alias = "ts_query_cursor_set_max_start_depth")] -2889 | pub fn set_max_start_depth(&mut self, max_start_depth: Option) -> &mut Self { -2890 | unsafe { -2891 | ffi::ts_query_cursor_set_max_start_depth( -2892 | self.ptr.as_ptr(), -2893 | max_start_depth.unwrap_or(u32::MAX), -2894 | ); -2895 | } -2896 | self -2897 | } -2898 | } - | -2899 | impl<'tree> QueryMatch<'_, 'tree> { -2900 | #[must_use] -2901 | pub const fn id(&self) -> u32 { -2902 | self.id -2903 | } - | -2904 | #[doc(alias = "ts_query_cursor_remove_match")] -2905 | pub fn remove(&self) { -2906 | unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) } -2907 | } - | -2908 | pub fn nodes_for_capture_index( -2909 | &self, -2910 | capture_ix: u32, -2911 | ) -> impl Iterator> + '_ { -2912 | self.captures -2913 | .iter() -2914 | .filter_map(move |capture| (capture.index == capture_ix).then_some(capture.node)) -2915 | } - | -2916 | fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { -2917 | QueryMatch { -2918 | cursor, -2919 | id: m.id, -2920 | pattern_index: m.pattern_index as usize, -2921 | captures: (m.capture_count > 0) -2922 | .then(|| unsafe { -2923 | slice::from_raw_parts( -2924 | m.captures.cast::>(), -2925 | m.capture_count as usize, -2926 | ) -2927 | }) -2928 | .unwrap_or_default(), -2929 | } -2930 | } - | -2931 | pub fn satisfies_text_predicates>( -2932 | &self, -2933 | query: &Query, -2934 | buffer1: &mut Vec, -2935 | buffer2: &mut Vec, -2936 | text_provider: &mut impl TextProvider, -2937 | ) -> bool { -2938 | struct NodeText<'a, T> { -2939 | buffer: &'a mut Vec, -2940 | first_chunk: Option, -2941 | } -2942 | impl<'a, T: AsRef<[u8]>> NodeText<'a, T> { -2943 | fn new(buffer: &'a mut Vec) -> Self { -2944 | Self { -2945 | buffer, -2946 | first_chunk: None, -2947 | } -2948 | } - | -2949 | fn get_text(&mut self, chunks: &mut impl Iterator) -> &[u8] { -2950 | self.first_chunk = chunks.next(); -2951 | if let Some(next_chunk) = chunks.next() { -2952 | self.buffer.clear(); -2953 | self.buffer -2954 | .extend_from_slice(self.first_chunk.as_ref().unwrap().as_ref()); -2955 | self.buffer.extend_from_slice(next_chunk.as_ref()); -2956 | for chunk in chunks { -2957 | self.buffer.extend_from_slice(chunk.as_ref()); -2958 | } -2959 | self.buffer.as_slice() -2960 | } else if let Some(ref first_chunk) = self.first_chunk { -2961 | first_chunk.as_ref() -2962 | } else { -2963 | &[] -2964 | } -2965 | } -2966 | } - | -2967 | let mut node_text1 = NodeText::new(buffer1); -2968 | let mut node_text2 = NodeText::new(buffer2); - | -2969 | query.text_predicates[self.pattern_index] -2970 | .iter() -2971 | .all(|predicate| match predicate { -2972 | TextPredicateCapture::EqCapture(i, j, is_positive, match_all_nodes) => { -2973 | let mut nodes_1 = self.nodes_for_capture_index(*i).peekable(); -2974 | let mut nodes_2 = self.nodes_for_capture_index(*j).peekable(); -2975 | while nodes_1.peek().is_some() && nodes_2.peek().is_some() { -2976 | let node1 = nodes_1.next().unwrap(); -2977 | let node2 = nodes_2.next().unwrap(); -2978 | let mut text1 = text_provider.text(node1); -2979 | let mut text2 = text_provider.text(node2); -2980 | let text1 = node_text1.get_text(&mut text1); -2981 | let text2 = node_text2.get_text(&mut text2); -2982 | let is_positive_match = text1 == text2; -2983 | if is_positive_match != *is_positive && *match_all_nodes { -2984 | return false; -2985 | } -2986 | if is_positive_match == *is_positive && !*match_all_nodes { -2987 | return true; -2988 | } -2989 | } -2990 | nodes_1.next().is_none() && nodes_2.next().is_none() -2991 | } -2992 | TextPredicateCapture::EqString(i, s, is_positive, match_all_nodes) => { -2993 | let nodes = self.nodes_for_capture_index(*i); -2994 | for node in nodes { -2995 | let mut text = text_provider.text(node); -2996 | let text = node_text1.get_text(&mut text); -2997 | let is_positive_match = text == s.as_bytes(); -2998 | if is_positive_match != *is_positive && *match_all_nodes { -2999 | return false; -3000 | } -3001 | if is_positive_match == *is_positive && !*match_all_nodes { -3002 | return true; -3003 | } -3004 | } -3005 | true -3006 | } -3007 | TextPredicateCapture::MatchString(i, r, is_positive, match_all_nodes) => { -3008 | let nodes = self.nodes_for_capture_index(*i); -3009 | for node in nodes { -3010 | let mut text = text_provider.text(node); -3011 | let text = node_text1.get_text(&mut text); -3012 | let is_positive_match = r.is_match(text); -3013 | if is_positive_match != *is_positive && *match_all_nodes { -3014 | return false; -3015 | } -3016 | if is_positive_match == *is_positive && !*match_all_nodes { -3017 | return true; -3018 | } -3019 | } -3020 | true -3021 | } -3022 | TextPredicateCapture::AnyString(i, v, is_positive) => { -3023 | let nodes = self.nodes_for_capture_index(*i); -3024 | for node in nodes { -3025 | let mut text = text_provider.text(node); -3026 | let text = node_text1.get_text(&mut text); -3027 | if (v.iter().any(|s| text == s.as_bytes())) != *is_positive { -3028 | return false; -3029 | } -3030 | } -3031 | true -3032 | } -3033 | }) -3034 | } -3035 | } - | -3036 | impl QueryProperty { -3037 | #[must_use] -3038 | pub fn new(key: &str, value: Option<&str>, capture_id: Option) -> Self { -3039 | Self { -3040 | capture_id, -3041 | key: key.to_string().into(), -3042 | value: value.map(|s| s.to_string().into()), -3043 | } -3044 | } -3045 | } - | -3046 | /// Provide a `StreamingIterator` instead of the traditional `Iterator`, as the -3047 | /// underlying object in the C library gets updated on each iteration. Copies would -3048 | /// have their internal state overwritten, leading to Undefined Behavior -3049 | impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIterator -3050 | for QueryMatches<'query, 'tree, T, I> -3051 | { -3052 | type Item = QueryMatch<'query, 'tree>; - | -3053 | fn advance(&mut self) { -3054 | self.current_match = unsafe { -3055 | loop { -3056 | let mut m = MaybeUninit::::uninit(); -3057 | if ffi::ts_query_cursor_next_match(self.ptr, m.as_mut_ptr()) { -3058 | let result = QueryMatch::new(&m.assume_init(), self.ptr); -3059 | if result.satisfies_text_predicates( -3060 | self.query, -3061 | &mut self.buffer1, -3062 | &mut self.buffer2, -3063 | &mut self.text_provider, -3064 | ) { -3065 | break Some(result); -3066 | } -3067 | } else { -3068 | break None; -3069 | } -3070 | } -3071 | }; -3072 | } - | -3073 | fn get(&self) -> Option<&Self::Item> { -3074 | self.current_match.as_ref() -3075 | } -3076 | } - | -3077 | impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIteratorMut -3078 | for QueryMatches<'query, 'tree, T, I> -3079 | { -3080 | fn get_mut(&mut self) -> Option<&mut Self::Item> { -3081 | self.current_match.as_mut() -3082 | } -3083 | } - | -3084 | impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIterator -3085 | for QueryCaptures<'query, 'tree, T, I> -3086 | { -3087 | type Item = (QueryMatch<'query, 'tree>, usize); - | -3088 | fn advance(&mut self) { -3089 | self.current_match = unsafe { -3090 | loop { -3091 | let mut capture_index = 0u32; -3092 | let mut m = MaybeUninit::::uninit(); -3093 | if ffi::ts_query_cursor_next_capture( -3094 | self.ptr, -3095 | m.as_mut_ptr(), -3096 | core::ptr::addr_of_mut!(capture_index), -3097 | ) { -3098 | let result = QueryMatch::new(&m.assume_init(), self.ptr); -3099 | if result.satisfies_text_predicates( -3100 | self.query, -3101 | &mut self.buffer1, -3102 | &mut self.buffer2, -3103 | &mut self.text_provider, -3104 | ) { -3105 | break Some((result, capture_index as usize)); -3106 | } -3107 | result.remove(); -3108 | } else { -3109 | break None; -3110 | } -3111 | } -3112 | } -3113 | } - | -3114 | fn get(&self) -> Option<&Self::Item> { -3115 | self.current_match.as_ref() -3116 | } -3117 | } - | -3118 | impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> StreamingIteratorMut -3119 | for QueryCaptures<'query, 'tree, T, I> -3120 | { -3121 | fn get_mut(&mut self) -> Option<&mut Self::Item> { -3122 | self.current_match.as_mut() -3123 | } -3124 | } - | -3125 | impl, I: AsRef<[u8]>> QueryMatches<'_, '_, T, I> { -3126 | #[doc(alias = "ts_query_cursor_set_byte_range")] -3127 | pub fn set_byte_range(&mut self, range: ops::Range) { -3128 | unsafe { -3129 | ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); -3130 | } -3131 | } - | -3132 | #[doc(alias = "ts_query_cursor_set_point_range")] -3133 | pub fn set_point_range(&mut self, range: ops::Range) { -3134 | unsafe { -3135 | ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); -3136 | } -3137 | } -3138 | } - | -3139 | impl, I: AsRef<[u8]>> QueryCaptures<'_, '_, T, I> { -3140 | #[doc(alias = "ts_query_cursor_set_byte_range")] -3141 | pub fn set_byte_range(&mut self, range: ops::Range) { -3142 | unsafe { -3143 | ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); -3144 | } -3145 | } - | -3146 | #[doc(alias = "ts_query_cursor_set_point_range")] -3147 | pub fn set_point_range(&mut self, range: ops::Range) { -3148 | unsafe { -3149 | ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); -3150 | } -3151 | } -3152 | } - | -3153 | impl fmt::Debug for QueryMatch<'_, '_> { -3154 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -3155 | write!( -3156 | f, -3157 | "QueryMatch {{ id: {}, pattern_index: {}, captures: {:?} }}", -3158 | self.id, self.pattern_index, self.captures -3159 | ) -3160 | } -3161 | } - | -3162 | impl TextProvider for F -3163 | where -3164 | F: FnMut(Node) -> R, -3165 | R: Iterator, -3166 | I: AsRef<[u8]>, -3167 | { -3168 | type I = R; - | -3169 | fn text(&mut self, node: Node) -> Self::I { -3170 | (self)(node) -3171 | } -3172 | } - | -3173 | impl<'a> TextProvider<&'a [u8]> for &'a [u8] { -3174 | type I = iter::Once<&'a [u8]>; - | -3175 | fn text(&mut self, node: Node) -> Self::I { -3176 | iter::once(&self[node.byte_range()]) -3177 | } -3178 | } - | -3179 | impl PartialEq for Query { -3180 | fn eq(&self, other: &Self) -> bool { -3181 | self.ptr == other.ptr -3182 | } -3183 | } - | -3184 | impl Drop for Query { -3185 | fn drop(&mut self) { -3186 | unsafe { ffi::ts_query_delete(self.ptr.as_ptr()) } -3187 | } -3188 | } - | -3189 | impl Drop for QueryCursor { -3190 | fn drop(&mut self) { -3191 | unsafe { ffi::ts_query_cursor_delete(self.ptr.as_ptr()) } -3192 | } -3193 | } - | -3194 | impl Point { -3195 | #[must_use] -3196 | pub const fn new(row: usize, column: usize) -> Self { -3197 | Self { row, column } -3198 | } -3199 | } - | -3200 | impl fmt::Display for Point { -3201 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -3202 | write!(f, "({}, {})", self.row, self.column) -3203 | } -3204 | } - | -3205 | impl From for ffi::TSPoint { -3206 | fn from(val: Point) -> Self { -3207 | Self { -3208 | row: val.row as u32, -3209 | column: val.column as u32, -3210 | } -3211 | } -3212 | } - | -3213 | impl From for Point { -3214 | fn from(point: ffi::TSPoint) -> Self { -3215 | Self { -3216 | row: point.row as usize, -3217 | column: point.column as usize, -3218 | } -3219 | } -3220 | } - | -3221 | impl From for ffi::TSRange { -3222 | fn from(val: Range) -> Self { -3223 | Self { -3224 | start_byte: val.start_byte as u32, -3225 | end_byte: val.end_byte as u32, -3226 | start_point: val.start_point.into(), -3227 | end_point: val.end_point.into(), -3228 | } -3229 | } -3230 | } - | -3231 | impl From for Range { -3232 | fn from(range: ffi::TSRange) -> Self { -3233 | Self { -3234 | start_byte: range.start_byte as usize, -3235 | end_byte: range.end_byte as usize, -3236 | start_point: range.start_point.into(), -3237 | end_point: range.end_point.into(), -3238 | } -3239 | } -3240 | } - | -3241 | impl From<&'_ InputEdit> for ffi::TSInputEdit { -3242 | fn from(val: &'_ InputEdit) -> Self { -3243 | Self { -3244 | start_byte: val.start_byte as u32, -3245 | old_end_byte: val.old_end_byte as u32, -3246 | new_end_byte: val.new_end_byte as u32, -3247 | start_point: val.start_position.into(), -3248 | old_end_point: val.old_end_position.into(), -3249 | new_end_point: val.new_end_position.into(), -3250 | } -3251 | } -3252 | } - | -3253 | impl<'a> LossyUtf8<'a> { -3254 | #[must_use] -3255 | pub const fn new(bytes: &'a [u8]) -> Self { -3256 | LossyUtf8 { -3257 | bytes, -3258 | in_replacement: false, -3259 | } -3260 | } -3261 | } - | -3262 | impl<'a> Iterator for LossyUtf8<'a> { -3263 | type Item = &'a str; - | -3264 | fn next(&mut self) -> Option<&'a str> { -3265 | if self.bytes.is_empty() { -3266 | return None; -3267 | } -3268 | if self.in_replacement { -3269 | self.in_replacement = false; -3270 | return Some("\u{fffd}"); -3271 | } -3272 | match core::str::from_utf8(self.bytes) { -3273 | Ok(valid) => { -3274 | self.bytes = &[]; -3275 | Some(valid) -3276 | } -3277 | Err(error) => { -3278 | if let Some(error_len) = error.error_len() { -3279 | let error_start = error.valid_up_to(); -3280 | if error_start > 0 { -3281 | let result = -3282 | unsafe { core::str::from_utf8_unchecked(&self.bytes[..error_start]) }; -3283 | self.bytes = &self.bytes[(error_start + error_len)..]; -3284 | self.in_replacement = true; -3285 | Some(result) -3286 | } else { -3287 | self.bytes = &self.bytes[error_len..]; -3288 | Some("\u{fffd}") -3289 | } -3290 | } else { -3291 | None -3292 | } -3293 | } -3294 | } -3295 | } -3296 | } - | -3297 | #[must_use] -3298 | const fn predicate_error(row: usize, message: String) -> QueryError { -3299 | QueryError { -3300 | kind: QueryErrorKind::Predicate, -3301 | row, -3302 | column: 0, -3303 | offset: 0, -3304 | message, -3305 | } -3306 | } - | -3307 | impl fmt::Display for IncludedRangesError { -3308 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -3309 | write!(f, "Incorrect range by index: {}", self.0) -3310 | } -3311 | } - | -3312 | impl fmt::Display for LanguageError { -3313 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -3314 | match self { -3315 | Self::Version(version) => { -3316 | write!( -3317 | f, -3318 | "Incompatible language version {version}. Expected minimum {MIN_COMPATIBLE_LANGUAGE_VERSION}, maximum {LANGUAGE_VERSION}", -3319 | ) -3320 | } -3321 | #[cfg(feature = "wasm")] -3322 | Self::Wasm => { -3323 | write!(f, "Failed to load the Wasm store.") -3324 | } -3325 | } -3326 | } -3327 | } - | -3328 | impl fmt::Display for QueryError { -3329 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -3330 | let msg = match self.kind { -3331 | QueryErrorKind::Field => "Invalid field name ", -3332 | QueryErrorKind::NodeType => "Invalid node type ", -3333 | QueryErrorKind::Capture => "Invalid capture name ", -3334 | QueryErrorKind::Predicate => "Invalid predicate: ", -3335 | QueryErrorKind::Structure => "Impossible pattern:\n", -3336 | QueryErrorKind::Syntax => "Invalid syntax:\n", -3337 | QueryErrorKind::Language => "", -3338 | }; -3339 | if msg.is_empty() { -3340 | write!(f, "{}", self.message) -3341 | } else { -3342 | write!( -3343 | f, -3344 | "Query error at {}:{}. {}{}", -3345 | self.row + 1, -3346 | self.column + 1, -3347 | msg, -3348 | self.message -3349 | ) -3350 | } -3351 | } -3352 | } - | -3353 | #[doc(hidden)] -3354 | #[must_use] -3355 | pub fn format_sexp(sexp: &str, initial_indent_level: usize) -> String { -3356 | let mut indent_level = initial_indent_level; -3357 | let mut formatted = String::new(); -3358 | let mut has_field = false; - | -3359 | let mut c_iter = sexp.chars().peekable(); -3360 | let mut s = String::with_capacity(sexp.len()); -3361 | let mut quote = '\0'; -3362 | let mut saw_paren = false; -3363 | let mut did_last = false; - | -3364 | let mut fetch_next_str = |next: &mut String| { -3365 | next.clear(); -3366 | while let Some(c) = c_iter.next() { -3367 | if c == '\'' || c == '"' { -3368 | quote = c; -3369 | } else if c == ' ' || (c == ')' && quote != '\0') { -3370 | if let Some(next_c) = c_iter.peek() { -3371 | if *next_c == quote { -3372 | next.push(c); -3373 | next.push(*next_c); -3374 | c_iter.next(); -3375 | quote = '\0'; -3376 | continue; -3377 | } -3378 | } -3379 | break; -3380 | } -3381 | if c == ')' { -3382 | saw_paren = true; -3383 | break; -3384 | } -3385 | next.push(c); -3386 | } - | -3387 | // at the end -3388 | if c_iter.peek().is_none() && next.is_empty() { -3389 | if saw_paren { -3390 | // but did we see a ) before ending? -3391 | saw_paren = false; -3392 | return Some(()); -3393 | } -3394 | if !did_last { -3395 | // but did we account for the end empty string as if we're splitting? -3396 | did_last = true; -3397 | return Some(()); -3398 | } -3399 | return None; -3400 | } -3401 | Some(()) -3402 | }; - | -3403 | while fetch_next_str(&mut s).is_some() { -3404 | if s.is_empty() && indent_level > 0 { -3405 | // ")" -3406 | indent_level -= 1; -3407 | write!(formatted, ")").unwrap(); -3408 | } else if s.starts_with('(') { -3409 | if has_field { -3410 | has_field = false; -3411 | } else { -3412 | if indent_level > 0 { -3413 | writeln!(formatted).unwrap(); -3414 | for _ in 0..indent_level { -3415 | write!(formatted, " ").unwrap(); -3416 | } -3417 | } -3418 | indent_level += 1; -3419 | } - | -3420 | // "(node_name" -3421 | write!(formatted, "{s}").unwrap(); - | -3422 | // "(MISSING node_name" or "(UNEXPECTED 'x'" -3423 | if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") { -3424 | fetch_next_str(&mut s).unwrap(); -3425 | if s.is_empty() { -3426 | while indent_level > 0 { -3427 | indent_level -= 1; -3428 | write!(formatted, ")").unwrap(); -3429 | } -3430 | } else { -3431 | write!(formatted, " {s}").unwrap(); -3432 | } -3433 | } -3434 | } else if s.ends_with(':') { -3435 | // "field:" -3436 | writeln!(formatted).unwrap(); -3437 | for _ in 0..indent_level { -3438 | write!(formatted, " ").unwrap(); -3439 | } -3440 | write!(formatted, "{s} ").unwrap(); -3441 | has_field = true; -3442 | indent_level += 1; -3443 | } -3444 | } - | -3445 | formatted -3446 | } - | -3447 | pub fn wasm_stdlib_symbols() -> impl Iterator { -3448 | const WASM_STDLIB_SYMBOLS: &str = include_str!(concat!(env!("OUT_DIR"), "/stdlib-symbols.txt")); - | -3449 | WASM_STDLIB_SYMBOLS -3450 | .lines() -3451 | .map(|s| s.trim_matches(|c| c == '"' || c == ',')) -3452 | } - | -3453 | extern "C" { -3454 | fn free(ptr: *mut c_void); -3455 | } - | -3456 | static mut FREE_FN: unsafe extern "C" fn(ptr: *mut c_void) = free; - | -3457 | /// Sets the memory allocation functions that the core library should use. -3458 | /// -3459 | /// # Safety -3460 | /// -3461 | /// This function uses FFI and mutates a static global. -3462 | #[doc(alias = "ts_set_allocator")] -3463 | pub unsafe fn set_allocator( -3464 | new_malloc: Option *mut c_void>, -3465 | new_calloc: Option *mut c_void>, -3466 | new_realloc: Option *mut c_void>, -3467 | new_free: Option, -3468 | ) { -3469 | FREE_FN = new_free.unwrap_or(free); -3470 | ffi::ts_set_allocator(new_malloc, new_calloc, new_realloc, new_free); -3471 | } - | -3472 | #[cfg(feature = "std")] -3473 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] -3474 | impl error::Error for IncludedRangesError {} -3475 | #[cfg(feature = "std")] -3476 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] -3477 | impl error::Error for LanguageError {} -3478 | #[cfg(feature = "std")] -3479 | #[cfg_attr(docsrs, doc(cfg(feature = "std")))] -3480 | impl error::Error for QueryError {} - | -3481 | unsafe impl Send for Language {} -3482 | unsafe impl Sync for Language {} - | -3483 | unsafe impl Send for Node<'_> {} -3484 | unsafe impl Sync for Node<'_> {} - | -3485 | unsafe impl Send for LookaheadIterator {} -3486 | unsafe impl Sync for LookaheadIterator {} - | -3487 | unsafe impl Send for LookaheadNamesIterator<'_> {} -3488 | unsafe impl Sync for LookaheadNamesIterator<'_> {} - | -3489 | unsafe impl Send for Parser {} -3490 | unsafe impl Sync for Parser {} - | -3491 | unsafe impl Send for Query {} -3492 | unsafe impl Sync for Query {} - | -3493 | unsafe impl Send for QueryCursor {} -3494 | unsafe impl Sync for QueryCursor {} - | -3495 | unsafe impl Send for Tree {} -3496 | unsafe impl Sync for Tree {} - | -3497 | unsafe impl Send for TreeCursor<'_> {} -3498 | unsafe impl Sync for TreeCursor<'_> {} - - - --------------------------------------------------------------------------------- -/lib/binding_rust/README.md: --------------------------------------------------------------------------------- - 1 | # Rust Tree-sitter - | - 2 | [![crates.io badge]][crates.io] - | - 3 | [crates.io]: https://crates.io/crates/tree-sitter - 4 | [crates.io badge]: https://img.shields.io/crates/v/tree-sitter.svg?color=%23B48723 - | - 5 | Rust bindings to the [Tree-sitter][] parsing library. - | - 6 | ## Basic Usage - | - 7 | First, create a parser: - | - 8 | ```rust - 9 | use tree_sitter::{InputEdit, Language, Parser, Point}; - | - 10 | let mut parser = Parser::new(); - 11 | ``` - | - 12 | Then, add a language as a dependency: - | - 13 | ```toml - 14 | [dependencies] - 15 | tree-sitter = "0.24" - 16 | tree-sitter-rust = "0.23" - 17 | ``` - | - 18 | To use a language, you assign them to the parser. - | - 19 | ```rust - 20 | parser.set_language(&tree_sitter_rust::LANGUAGE.into()).expect("Error loading Rust grammar"); - 21 | ``` - | - 22 | Now you can parse source code: - | - 23 | ```rust - 24 | let source_code = "fn test() {}"; - 25 | let mut tree = parser.parse(source_code, None).unwrap(); - 26 | let root_node = tree.root_node(); - | - 27 | assert_eq!(root_node.kind(), "source_file"); - 28 | assert_eq!(root_node.start_position().column, 0); - 29 | assert_eq!(root_node.end_position().column, 12); - 30 | ``` - | - 31 | ### Editing - | - 32 | Once you have a syntax tree, you can update it when your source code changes. - 33 | Passing in the previous edited tree makes `parse` run much more quickly: - | - 34 | ```rust - 35 | let new_source_code = "fn test(a: u32) {}"; - | - 36 | tree.edit(&InputEdit { - 37 | start_byte: 8, - 38 | old_end_byte: 8, - 39 | new_end_byte: 14, - 40 | start_position: Point::new(0, 8), - 41 | old_end_position: Point::new(0, 8), - 42 | new_end_position: Point::new(0, 14), - 43 | }); - | - 44 | let new_tree = parser.parse(new_source_code, Some(&tree)); - 45 | ``` - | - 46 | ### Text Input - | - 47 | The source code to parse can be provided either as a string, a slice, a vector, - 48 | or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: - | - 49 | ```rust - 50 | // Store some source code in an array of lines. - 51 | let lines = &[ - 52 | "pub fn foo() {", - 53 | " 1", - 54 | "}", - 55 | ]; - | - 56 | // Parse the source code using a custom callback. The callback is called - 57 | // with both a byte offset and a row/column offset. - 58 | let tree = parser.parse_with(&mut |_byte: usize, position: Point| -> &[u8] { - 59 | let row = position.row as usize; - 60 | let column = position.column as usize; - 61 | if row < lines.len() { - 62 | if column < lines[row].as_bytes().len() { - 63 | &lines[row].as_bytes()[column..] - 64 | } else { - 65 | b"\n" - 66 | } - 67 | } else { - 68 | &[] - 69 | } - 70 | }, None).unwrap(); - | - 71 | assert_eq!( - 72 | tree.root_node().to_sexp(), - 73 | "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))" - 74 | ); - 75 | ``` - | - 76 | ## Using Wasm Grammar Files - | - 77 | > Requires the feature **wasm** to be enabled. - | - 78 | First, create a parser with a Wasm store: - | - 79 | ```rust - 80 | use tree_sitter::{wasmtime::Engine, Parser, WasmStore}; - | - 81 | let engine = Engine::default(); - 82 | let store = WasmStore::new(&engine).unwrap(); - | - 83 | let mut parser = Parser::new(); - 84 | parser.set_wasm_store(store).unwrap(); - 85 | ``` - | - 86 | Then, load the language from a Wasm file: - | - 87 | ```rust - 88 | const JAVASCRIPT_GRAMMAR: &[u8] = include_bytes!("path/to/tree-sitter-javascript.wasm"); - | - 89 | let mut store = WasmStore::new(&engine).unwrap(); - 90 | let javascript = store - 91 | .load_language("javascript", JAVASCRIPT_GRAMMAR) - 92 | .unwrap(); - | - 93 | // The language may be loaded from a different WasmStore than the one set on - 94 | // the parser but it must use the same underlying WasmEngine. - 95 | parser.set_language(&javascript).unwrap(); - 96 | ``` - | - 97 | Now you can parse source code: - | - 98 | ```rust - 99 | let source_code = "let x = 1;"; - 100 | let tree = parser.parse(source_code, None).unwrap(); - | - 101 | assert_eq!( - 102 | tree.root_node().to_sexp(), - 103 | "(program (lexical_declaration (variable_declarator name: (identifier) value: (number))))" - 104 | ); - 105 | ``` - | - 106 | [tree-sitter]: https://github.com/tree-sitter/tree-sitter - | - 107 | ## Features - | - 108 | - **std** - This feature is enabled by default and allows `tree-sitter` to use the standard library. - 109 | - Error types implement the `std::error:Error` trait. - 110 | - `regex` performance optimizations are enabled. - 111 | - The DOT graph methods are enabled. - 112 | - **wasm** - This feature allows `tree-sitter` to be built for Wasm targets using the `wasmtime-c-api` crate. - - - --------------------------------------------------------------------------------- -/lib/binding_rust/util.rs: --------------------------------------------------------------------------------- - 1 | use core::ffi::c_void; - | - 2 | use super::FREE_FN; - | - 3 | /// A raw pointer and a length, exposed as an iterator. - 4 | pub struct CBufferIter { - 5 | ptr: *mut T, - 6 | count: usize, - 7 | i: usize, - 8 | } - | - 9 | impl CBufferIter { - 10 | pub const unsafe fn new(ptr: *mut T, count: usize) -> Self { - 11 | Self { ptr, count, i: 0 } - 12 | } - 13 | } - | - 14 | impl Iterator for CBufferIter { - 15 | type Item = T; - | - 16 | fn next(&mut self) -> Option { - 17 | let i = self.i; - 18 | if i >= self.count { - 19 | None - 20 | } else { - 21 | self.i += 1; - 22 | Some(unsafe { *self.ptr.add(i) }) - 23 | } - 24 | } - | - 25 | fn size_hint(&self) -> (usize, Option) { - 26 | let remaining = self.count - self.i; - 27 | (remaining, Some(remaining)) - 28 | } - 29 | } - | - 30 | impl ExactSizeIterator for CBufferIter {} - | - 31 | impl Drop for CBufferIter { - 32 | fn drop(&mut self) { - 33 | if !self.ptr.is_null() { - 34 | unsafe { (FREE_FN)(self.ptr.cast::()) }; - 35 | } - 36 | } - 37 | } - - - --------------------------------------------------------------------------------- -/lib/binding_rust/wasm_language.rs: --------------------------------------------------------------------------------- - 1 | use std::{ - 2 | error, - 3 | ffi::{CStr, CString}, - 4 | fmt, - 5 | mem::{self, MaybeUninit}, - 6 | os::raw::c_char, - 7 | }; - | - 8 | pub use wasmtime_c_api::wasmtime; - | - 9 | use crate::{ffi, Language, LanguageError, Parser, FREE_FN}; - | - 10 | // Force Cargo to include wasmtime-c-api as a dependency of this crate, - 11 | // even though it is only used by the C code. - 12 | #[allow(unused)] - 13 | fn _use_wasmtime() { - 14 | wasmtime_c_api::wasm_engine_new(); - 15 | } - | - 16 | #[repr(C)] - 17 | #[derive(Clone)] - 18 | #[allow(non_camel_case_types)] - 19 | pub struct wasm_engine_t { - 20 | pub(crate) engine: wasmtime::Engine, - 21 | } - | - 22 | pub struct WasmStore(*mut ffi::TSWasmStore); - | - 23 | unsafe impl Send for WasmStore {} - 24 | unsafe impl Sync for WasmStore {} - | - 25 | #[derive(Debug, PartialEq, Eq)] - 26 | pub struct WasmError { - 27 | pub kind: WasmErrorKind, - 28 | pub message: String, - 29 | } - | - 30 | #[derive(Debug, PartialEq, Eq)] - 31 | pub enum WasmErrorKind { - 32 | Parse, - 33 | Compile, - 34 | Instantiate, - 35 | Other, - 36 | } - | - 37 | impl WasmStore { - 38 | pub fn new(engine: &wasmtime::Engine) -> Result { - 39 | unsafe { - 40 | let mut error = MaybeUninit::::uninit(); - 41 | let store = ffi::ts_wasm_store_new( - 42 | std::ptr::from_ref::(engine) - 43 | .cast_mut() - 44 | .cast(), - 45 | error.as_mut_ptr(), - 46 | ); - 47 | if store.is_null() { - 48 | Err(WasmError::new(error.assume_init())) - 49 | } else { - 50 | Ok(Self(store)) - 51 | } - 52 | } - 53 | } - | - 54 | pub fn load_language(&mut self, name: &str, bytes: &[u8]) -> Result { - 55 | let name = CString::new(name).unwrap(); - 56 | unsafe { - 57 | let mut error = MaybeUninit::::uninit(); - 58 | let language = ffi::ts_wasm_store_load_language( - 59 | self.0, - 60 | name.as_ptr(), - 61 | bytes.as_ptr().cast::(), - 62 | bytes.len() as u32, - 63 | error.as_mut_ptr(), - 64 | ); - 65 | if language.is_null() { - 66 | Err(WasmError::new(error.assume_init())) - 67 | } else { - 68 | Ok(Language(language)) - 69 | } - 70 | } - 71 | } - | - 72 | #[must_use] - 73 | pub fn language_count(&self) -> usize { - 74 | unsafe { ffi::ts_wasm_store_language_count(self.0) } - 75 | } - 76 | } - | - 77 | impl WasmError { - 78 | unsafe fn new(error: ffi::TSWasmError) -> Self { - 79 | let message = CStr::from_ptr(error.message).to_str().unwrap().to_string(); - 80 | (FREE_FN)(error.message.cast()); - 81 | Self { - 82 | kind: match error.kind { - 83 | ffi::TSWasmErrorKindParse => WasmErrorKind::Parse, - 84 | ffi::TSWasmErrorKindCompile => WasmErrorKind::Compile, - 85 | ffi::TSWasmErrorKindInstantiate => WasmErrorKind::Instantiate, - 86 | _ => WasmErrorKind::Other, - 87 | }, - 88 | message, - 89 | } - 90 | } - 91 | } - | - 92 | impl Language { - 93 | #[must_use] - 94 | pub fn is_wasm(&self) -> bool { - 95 | unsafe { ffi::ts_language_is_wasm(self.0) } - 96 | } - 97 | } - | - 98 | impl Parser { - 99 | pub fn set_wasm_store(&mut self, store: WasmStore) -> Result<(), LanguageError> { - 100 | unsafe { ffi::ts_parser_set_wasm_store(self.0.as_ptr(), store.0) }; - 101 | mem::forget(store); - 102 | Ok(()) - 103 | } - | - 104 | pub fn take_wasm_store(&mut self) -> Option { - 105 | let ptr = unsafe { ffi::ts_parser_take_wasm_store(self.0.as_ptr()) }; - 106 | if ptr.is_null() { - 107 | None - 108 | } else { - 109 | Some(WasmStore(ptr)) - 110 | } - 111 | } - 112 | } - | - 113 | impl Drop for WasmStore { - 114 | fn drop(&mut self) { - 115 | unsafe { ffi::ts_wasm_store_delete(self.0) }; - 116 | } - 117 | } - | - 118 | impl fmt::Display for WasmError { - 119 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - 120 | let kind = match self.kind { - 121 | WasmErrorKind::Parse => "Failed to parse Wasm", - 122 | WasmErrorKind::Compile => "Failed to compile Wasm", - 123 | WasmErrorKind::Instantiate => "Failed to instantiate Wasm module", - 124 | WasmErrorKind::Other => "Unknown error", - 125 | }; - 126 | write!(f, "{kind}: {}", self.message) - 127 | } - 128 | } - | - 129 | impl error::Error for WasmError {} - - - --------------------------------------------------------------------------------- -/lib/binding_web/eslint.config.mjs: --------------------------------------------------------------------------------- - 1 | import eslint from '@eslint/js'; - 2 | import tseslint from 'typescript-eslint'; - | - 3 | export default tseslint.config( - 4 | eslint.configs.recommended, - 5 | tseslint.configs.recommendedTypeChecked, - 6 | tseslint.configs.strictTypeChecked, - 7 | tseslint.configs.stylisticTypeChecked, - 8 | { - 9 | languageOptions: { - 10 | parserOptions: { - 11 | projectService: true, - 12 | tsconfigRootDir: import.meta.dirname, - 13 | }, - 14 | }, - 15 | rules: { - 16 | 'no-fallthrough': 'off', - 17 | '@typescript-eslint/no-non-null-assertion': 'off', - 18 | '@typescript-eslint/no-unnecessary-condition': ['error', { - 19 | allowConstantLoopConditions: true - 20 | }], - 21 | '@typescript-eslint/restrict-template-expressions': ['error', { - 22 | allowNumber: true - 23 | }], - 24 | } - 25 | }, - 26 | ); - - - --------------------------------------------------------------------------------- -/lib/binding_web/lib/exports.txt: --------------------------------------------------------------------------------- - 1 | "ts_init", - 2 | "ts_language_field_count", - 3 | "ts_language_field_name_for_id", - 4 | "ts_language_type_is_named_wasm", - 5 | "ts_language_type_is_visible_wasm", - 6 | "ts_language_symbol_count", - 7 | "ts_language_state_count", - 8 | "ts_language_supertypes_wasm", - 9 | "ts_language_subtypes_wasm", - 10 | "ts_language_symbol_for_name", - 11 | "ts_language_symbol_name", - 12 | "ts_language_symbol_type", - 13 | "ts_language_name", - 14 | "ts_language_abi_version", - 15 | "ts_language_metadata_wasm", - 16 | "ts_language_next_state", - 17 | "ts_node_field_name_for_child_wasm", - 18 | "ts_node_field_name_for_named_child_wasm", - 19 | "ts_node_children_by_field_id_wasm", - 20 | "ts_node_first_child_for_byte_wasm", - 21 | "ts_node_first_named_child_for_byte_wasm", - 22 | "ts_node_child_by_field_id_wasm", - 23 | "ts_node_child_count_wasm", - 24 | "ts_node_child_wasm", - 25 | "ts_node_children_wasm", - 26 | "ts_node_descendant_for_index_wasm", - 27 | "ts_node_descendant_for_position_wasm", - 28 | "ts_node_descendants_of_type_wasm", - 29 | "ts_node_end_index_wasm", - 30 | "ts_node_end_point_wasm", - 31 | "ts_node_has_changes_wasm", - 32 | "ts_node_has_error_wasm", - 33 | "ts_node_is_error_wasm", - 34 | "ts_node_is_missing_wasm", - 35 | "ts_node_is_extra_wasm", - 36 | "ts_node_is_named_wasm", - 37 | "ts_node_parse_state_wasm", - 38 | "ts_node_next_parse_state_wasm", - 39 | "ts_node_named_child_count_wasm", - 40 | "ts_node_named_child_wasm", - 41 | "ts_node_named_children_wasm", - 42 | "ts_node_named_descendant_for_index_wasm", - 43 | "ts_node_named_descendant_for_position_wasm", - 44 | "ts_node_next_named_sibling_wasm", - 45 | "ts_node_next_sibling_wasm", - 46 | "ts_node_parent_wasm", - 47 | "ts_node_child_with_descendant_wasm", - 48 | "ts_node_prev_named_sibling_wasm", - 49 | "ts_node_prev_sibling_wasm", - 50 | "ts_node_descendant_count_wasm", - 51 | "ts_node_start_index_wasm", - 52 | "ts_node_start_point_wasm", - 53 | "ts_node_symbol_wasm", - 54 | "ts_node_grammar_symbol_wasm", - 55 | "ts_node_to_string_wasm", - 56 | "ts_parser_delete", - 57 | "ts_parser_enable_logger_wasm", - 58 | "ts_parser_new_wasm", - 59 | "ts_parser_parse_wasm", - 60 | "ts_parser_reset", - 61 | "ts_parser_set_language", - 62 | "ts_parser_set_included_ranges", - 63 | "ts_parser_included_ranges_wasm", - 64 | "ts_point_edit", - 65 | "ts_query_capture_count", - 66 | "ts_query_capture_name_for_id", - 67 | "ts_query_captures_wasm", - 68 | "ts_query_delete", - 69 | "ts_query_matches_wasm", - 70 | "ts_query_new", - 71 | "ts_query_pattern_count", - 72 | "ts_query_predicates_for_pattern", - 73 | "ts_query_disable_capture", - 74 | "ts_query_start_byte_for_pattern", - 75 | "ts_query_end_byte_for_pattern", - 76 | "ts_query_string_count", - 77 | "ts_query_string_value_for_id", - 78 | "ts_query_disable_pattern", - 79 | "ts_query_capture_quantifier_for_id", - 80 | "ts_query_is_pattern_non_local", - 81 | "ts_query_is_pattern_rooted", - 82 | "ts_query_is_pattern_guaranteed_at_step", - 83 | "ts_range_edit", - 84 | "ts_tree_copy", - 85 | "ts_tree_cursor_current_field_id_wasm", - 86 | "ts_tree_cursor_current_depth_wasm", - 87 | "ts_tree_cursor_current_descendant_index_wasm", - 88 | "ts_tree_cursor_current_node_id_wasm", - 89 | "ts_tree_cursor_current_node_is_missing_wasm", - 90 | "ts_tree_cursor_current_node_is_named_wasm", - 91 | "ts_tree_cursor_current_node_type_id_wasm", - 92 | "ts_tree_cursor_current_node_state_id_wasm", - 93 | "ts_tree_cursor_current_node_wasm", - 94 | "ts_tree_cursor_delete_wasm", - 95 | "ts_tree_cursor_end_index_wasm", - 96 | "ts_tree_cursor_end_position_wasm", - 97 | "ts_tree_cursor_goto_first_child_wasm", - 98 | "ts_tree_cursor_goto_last_child_wasm", - 99 | "ts_tree_cursor_goto_first_child_for_index_wasm", - 100 | "ts_tree_cursor_goto_first_child_for_position_wasm", - 101 | "ts_tree_cursor_goto_next_sibling_wasm", - 102 | "ts_tree_cursor_goto_previous_sibling_wasm", - 103 | "ts_tree_cursor_goto_descendant_wasm", - 104 | "ts_tree_cursor_goto_parent_wasm", - 105 | "ts_tree_cursor_new_wasm", - 106 | "ts_tree_cursor_reset_wasm", - 107 | "ts_tree_cursor_reset_to_wasm", - 108 | "ts_tree_cursor_start_index_wasm", - 109 | "ts_tree_cursor_start_position_wasm", - 110 | "ts_tree_cursor_copy_wasm", - 111 | "ts_tree_delete", - 112 | "ts_tree_included_ranges_wasm", - 113 | "ts_tree_edit_wasm", - 114 | "ts_tree_get_changed_ranges_wasm", - 115 | "ts_tree_root_node_wasm", - 116 | "ts_tree_root_node_with_offset_wasm", - 117 | "ts_lookahead_iterator_new", - 118 | "ts_lookahead_iterator_delete", - 119 | "ts_lookahead_iterator_reset_state", - 120 | "ts_lookahead_iterator_reset", - 121 | "ts_lookahead_iterator_next", - 122 | "ts_lookahead_iterator_current_symbol", - - - --------------------------------------------------------------------------------- -/lib/binding_web/lib/imports.js: --------------------------------------------------------------------------------- - 1 | mergeInto(LibraryManager.library, { - 2 | tree_sitter_parse_callback( - 3 | inputBufferAddress, - 4 | index, - 5 | row, - 6 | column, - 7 | lengthAddress, - 8 | ) { - 9 | const INPUT_BUFFER_SIZE = 10 * 1024; - 10 | const string = Module.currentParseCallback(index, { row, column }); - 11 | if (typeof string === 'string') { - 12 | setValue(lengthAddress, string.length, 'i32'); - 13 | stringToUTF16(string, inputBufferAddress, INPUT_BUFFER_SIZE); - 14 | } else { - 15 | setValue(lengthAddress, 0, 'i32'); - 16 | } - 17 | }, - | - 18 | tree_sitter_log_callback(isLexMessage, messageAddress) { - 19 | if (Module.currentLogCallback) { - 20 | const message = UTF8ToString(messageAddress); - 21 | Module.currentLogCallback(message, isLexMessage !== 0); - 22 | } - 23 | }, - | - 24 | tree_sitter_progress_callback(currentOffset, hasError) { - 25 | if (Module.currentProgressCallback) { - 26 | return Module.currentProgressCallback({ currentOffset, hasError }); - 27 | } - 28 | return false; - 29 | }, - | - 30 | tree_sitter_query_progress_callback(currentOffset) { - 31 | if (Module.currentQueryProgressCallback) { - 32 | return Module.currentQueryProgressCallback({ currentOffset }); - 33 | } - 34 | return false; - 35 | }, - 36 | }); - - - --------------------------------------------------------------------------------- -/lib/binding_web/lib/prefix.js: --------------------------------------------------------------------------------- - 1 | Module.currentQueryProgressCallback = null; - 2 | Module.currentProgressCallback = null; - 3 | Module.currentLogCallback = null; - 4 | Module.currentParseCallback = null; - - - --------------------------------------------------------------------------------- -/lib/binding_web/lib/tree-sitter.c: --------------------------------------------------------------------------------- - 1 | #include "array.h" - 2 | #include "point.h" - | - 3 | #include - 4 | #include - | - 5 | /*****************************/ - 6 | /* Section - Data marshaling */ - 7 | /*****************************/ - | - 8 | static const uint32_t INPUT_BUFFER_SIZE = 10 * 1024; - | - 9 | const void *TRANSFER_BUFFER[12] = { - 10 | NULL, NULL, NULL, NULL, - 11 | NULL, NULL, NULL, NULL, - 12 | NULL, NULL, NULL, NULL, - 13 | }; - | - 14 | static const int SIZE_OF_CURSOR = 4; - 15 | static const int SIZE_OF_NODE = 5; - 16 | static const int SIZE_OF_POINT = 2; - 17 | static const int SIZE_OF_RANGE = 2 + (2 * SIZE_OF_POINT); - 18 | static const int SIZE_OF_CAPTURE = 1 + SIZE_OF_NODE; - | - 19 | void *ts_init() { - 20 | TRANSFER_BUFFER[0] = (const void *)TREE_SITTER_LANGUAGE_VERSION; - 21 | TRANSFER_BUFFER[1] = (const void *)TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION; - 22 | return (void*)TRANSFER_BUFFER; - 23 | } - | - 24 | static uint32_t code_unit_to_byte(uint32_t unit) { - 25 | return unit << 1; - 26 | } - | - 27 | static uint32_t byte_to_code_unit(uint32_t byte) { - 28 | return byte >> 1; - 29 | } - | - 30 | static inline void marshal_node(const void **buffer, TSNode node) { - 31 | buffer[0] = node.id; - 32 | buffer[1] = (const void *)byte_to_code_unit(node.context[0]); - 33 | buffer[2] = (const void *)node.context[1]; - 34 | buffer[3] = (const void *)byte_to_code_unit(node.context[2]); - 35 | buffer[4] = (const void *)node.context[3]; - 36 | } - | - 37 | static inline TSNode unmarshal_node_at(const TSTree *tree, uint32_t index) { - 38 | TSNode node; - 39 | const void **buffer = TRANSFER_BUFFER + index * SIZE_OF_NODE; - 40 | node.id = buffer[0]; - 41 | node.context[0] = code_unit_to_byte((uint32_t)buffer[1]); - 42 | node.context[1] = (uint32_t)buffer[2]; - 43 | node.context[2] = code_unit_to_byte((uint32_t)buffer[3]); - 44 | node.context[3] = (uint32_t)buffer[4]; - 45 | node.tree = tree; - 46 | return node; - 47 | } - | - 48 | static inline TSNode unmarshal_node(const TSTree *tree) { - 49 | return unmarshal_node_at(tree, 0); - 50 | } - | - 51 | static inline void marshal_cursor(const TSTreeCursor *cursor) { - 52 | TRANSFER_BUFFER[0] = cursor->id; - 53 | TRANSFER_BUFFER[1] = (const void *)cursor->context[0]; - 54 | TRANSFER_BUFFER[2] = (const void *)cursor->context[1]; - 55 | TRANSFER_BUFFER[3] = (const void *)cursor->context[2]; - 56 | } - | - 57 | static inline TSTreeCursor unmarshal_cursor(const void **buffer, const TSTree *tree) { - 58 | TSTreeCursor cursor; - 59 | cursor.id = buffer[0]; - 60 | cursor.context[0] = (uint32_t)buffer[1]; - 61 | cursor.context[1] = (uint32_t)buffer[2]; - 62 | cursor.context[2] = (uint32_t)buffer[3]; - 63 | cursor.tree = tree; - 64 | return cursor; - 65 | } - | - 66 | static void marshal_point(TSPoint point) { - 67 | TRANSFER_BUFFER[0] = (const void *)point.row; - 68 | TRANSFER_BUFFER[1] = (const void *)byte_to_code_unit(point.column); - 69 | } - | - 70 | static TSPoint unmarshal_point(const void **address) { - 71 | TSPoint point; - 72 | point.row = (uint32_t)address[0]; - 73 | point.column = code_unit_to_byte((uint32_t)address[1]); - 74 | return point; - 75 | } - | - 76 | static void marshal_range(TSRange *range) { - 77 | range->start_byte = byte_to_code_unit(range->start_byte); - 78 | range->end_byte = byte_to_code_unit(range->end_byte); - 79 | range->start_point.column = byte_to_code_unit(range->start_point.column); - 80 | range->end_point.column = byte_to_code_unit(range->end_point.column); - 81 | } - | - 82 | static void unmarshal_range(TSRange *range) { - 83 | range->start_byte = code_unit_to_byte(range->start_byte); - 84 | range->end_byte = code_unit_to_byte(range->end_byte); - 85 | range->start_point.column = code_unit_to_byte(range->start_point.column); - 86 | range->end_point.column = code_unit_to_byte(range->end_point.column); - 87 | } - | - 88 | static TSInputEdit unmarshal_edit() { - 89 | TSInputEdit edit; - 90 | const void **address = TRANSFER_BUFFER; - 91 | edit.start_point = unmarshal_point(address); address += SIZE_OF_POINT; - 92 | edit.old_end_point = unmarshal_point(address); address += SIZE_OF_POINT; - 93 | edit.new_end_point = unmarshal_point(address); address += SIZE_OF_POINT; - 94 | edit.start_byte = code_unit_to_byte((uint32_t)*address); address += 1; - 95 | edit.old_end_byte = code_unit_to_byte((uint32_t)*address); address += 1; - 96 | edit.new_end_byte = code_unit_to_byte((uint32_t)*address); address += 1; - 97 | return edit; - 98 | } - | - 99 | static void marshal_language_metadata(const TSLanguageMetadata *metadata) { - 100 | if (metadata == NULL) { - 101 | TRANSFER_BUFFER[0] = 0; - 102 | return; - 103 | } - 104 | TRANSFER_BUFFER[0] = (const void*)3; - 105 | TRANSFER_BUFFER[1] = (const void*)(uint32_t)metadata->major_version; - 106 | TRANSFER_BUFFER[2] = (const void*)(uint32_t)metadata->minor_version; - 107 | TRANSFER_BUFFER[3] = (const void*)(uint32_t)metadata->patch_version; - 108 | } - | - 109 | /********************/ - 110 | /* Section - Parser */ - 111 | /********************/ - | - 112 | extern void tree_sitter_parse_callback( - 113 | char *input_buffer, - 114 | uint32_t index, - 115 | uint32_t row, - 116 | uint32_t column, - 117 | uint32_t *length_read - 118 | ); - | - 119 | extern void tree_sitter_log_callback( - 120 | bool is_lex_message, - 121 | const char *message - 122 | ); - | - 123 | extern bool tree_sitter_progress_callback( - 124 | uint32_t current_offset, - 125 | bool has_error - 126 | ); - | - 127 | extern bool tree_sitter_query_progress_callback( - 128 | uint32_t current_offset - 129 | ); - | - 130 | static const char *call_parse_callback( - 131 | void *payload, - 132 | uint32_t byte, - 133 | TSPoint position, - 134 | uint32_t *bytes_read - 135 | ) { - 136 | char *buffer = (char *)payload; - 137 | tree_sitter_parse_callback( - 138 | buffer, - 139 | byte_to_code_unit(byte), - 140 | position.row, - 141 | byte_to_code_unit(position.column), - 142 | bytes_read - 143 | ); - 144 | *bytes_read = code_unit_to_byte(*bytes_read); - 145 | if (*bytes_read >= INPUT_BUFFER_SIZE) { - 146 | *bytes_read = INPUT_BUFFER_SIZE - 2; - 147 | } - 148 | return buffer; - 149 | } - | - 150 | static void call_log_callback( - 151 | void *payload, - 152 | TSLogType log_type, - 153 | const char *message - 154 | ) { - 155 | tree_sitter_log_callback(log_type == TSLogTypeLex, message); - 156 | } - | - 157 | static bool progress_callback( - 158 | TSParseState *state - 159 | ) { - 160 | return tree_sitter_progress_callback(state->current_byte_offset, state->has_error); - 161 | } - | - 162 | static bool query_progress_callback( - 163 | TSQueryCursorState *state - 164 | ) { - 165 | return tree_sitter_query_progress_callback(state->current_byte_offset); - 166 | } - | - 167 | void ts_parser_new_wasm() { - 168 | TSParser *parser = ts_parser_new(); - 169 | char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char)); - 170 | TRANSFER_BUFFER[0] = parser; - 171 | TRANSFER_BUFFER[1] = input_buffer; - 172 | } - | - 173 | void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) { - 174 | TSLogger logger = {self, should_log ? call_log_callback : NULL}; - 175 | ts_parser_set_logger(self, logger); - 176 | } - | - 177 | TSTree *ts_parser_parse_wasm( - 178 | TSParser *self, - 179 | char *input_buffer, - 180 | const TSTree *old_tree, - 181 | TSRange *ranges, - 182 | uint32_t range_count - 183 | ) { - 184 | TSInput input = { - 185 | input_buffer, - 186 | call_parse_callback, - 187 | TSInputEncodingUTF16LE, - 188 | NULL, - 189 | }; - 190 | if (range_count) { - 191 | for (unsigned i = 0; i < range_count; i++) { - 192 | unmarshal_range(&ranges[i]); - 193 | } - 194 | ts_parser_set_included_ranges(self, ranges, range_count); - 195 | free(ranges); - 196 | } else { - 197 | ts_parser_set_included_ranges(self, NULL, 0); - 198 | } - | - 199 | TSParseOptions options = {.payload = NULL, .progress_callback = progress_callback}; - | - 200 | return ts_parser_parse_with_options(self, old_tree, input, options); - 201 | } - | - 202 | void ts_parser_included_ranges_wasm(TSParser *self) { - 203 | uint32_t range_count = 0; - 204 | const TSRange *ranges = ts_parser_included_ranges(self, &range_count); - 205 | TSRange *copied_ranges = malloc(sizeof(TSRange) * range_count); - 206 | memcpy(copied_ranges, ranges, sizeof(TSRange) * range_count); - 207 | for (unsigned i = 0; i < range_count; i++) { - 208 | marshal_range(&copied_ranges[i]); - 209 | } - 210 | TRANSFER_BUFFER[0] = range_count ? (const void *)range_count : NULL; - 211 | TRANSFER_BUFFER[1] = copied_ranges; - 212 | } - | - 213 | /**********************/ - 214 | /* Section - Language */ - 215 | /**********************/ - | - 216 | int ts_language_type_is_named_wasm(const TSLanguage *self, TSSymbol typeId) { - 217 | const TSSymbolType symbolType = ts_language_symbol_type(self, typeId); - 218 | return symbolType == TSSymbolTypeRegular; - 219 | } - | - 220 | int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) { - 221 | const TSSymbolType symbolType = ts_language_symbol_type(self, typeId); - 222 | return symbolType <= TSSymbolTypeAnonymous; - 223 | } - | - 224 | void ts_language_metadata_wasm(const TSLanguage *self) { - 225 | const TSLanguageMetadata *metadata = ts_language_metadata(self); - 226 | marshal_language_metadata(metadata); - 227 | } - | - 228 | void ts_language_supertypes_wasm(const TSLanguage *self) { - 229 | uint32_t length; - 230 | const TSSymbol *supertypes = ts_language_supertypes(self, &length); - 231 | TRANSFER_BUFFER[0] = (const void *)length; - 232 | TRANSFER_BUFFER[1] = supertypes; - 233 | } - | - 234 | void ts_language_subtypes_wasm(const TSLanguage *self, TSSymbol supertype) { - 235 | uint32_t length; - 236 | const TSSymbol *subtypes = ts_language_subtypes(self, supertype, &length); - 237 | TRANSFER_BUFFER[0] = (const void *)length; - 238 | TRANSFER_BUFFER[1] = subtypes; - 239 | } - | - 240 | /******************/ - 241 | /* Section - Tree */ - 242 | /******************/ - | - 243 | void ts_tree_root_node_wasm(const TSTree *tree) { - 244 | marshal_node(TRANSFER_BUFFER, ts_tree_root_node(tree)); - 245 | } - | - 246 | void ts_tree_root_node_with_offset_wasm(const TSTree *tree) { - 247 | // read int and point from transfer buffer - 248 | const void **address = TRANSFER_BUFFER + SIZE_OF_NODE; - 249 | uint32_t offset = code_unit_to_byte((uint32_t)address[0]); - 250 | TSPoint extent = unmarshal_point(address + 1); - 251 | TSNode node = ts_tree_root_node_with_offset(tree, offset, extent); - 252 | marshal_node(TRANSFER_BUFFER, node); - 253 | } - | - 254 | void ts_tree_edit_wasm(TSTree *tree) { - 255 | TSInputEdit edit = unmarshal_edit(); - 256 | ts_tree_edit(tree, &edit); - 257 | } - | - 258 | void ts_tree_included_ranges_wasm(const TSTree *tree) { - 259 | uint32_t range_count; - 260 | TSRange *ranges = ts_tree_included_ranges(tree, &range_count); - 261 | for (unsigned i = 0; i < range_count; i++) { - 262 | marshal_range(&ranges[i]); - 263 | } - 264 | TRANSFER_BUFFER[0] = (range_count ? (const void *)range_count : NULL); - 265 | TRANSFER_BUFFER[1] = (const void *)ranges; - 266 | } - | - 267 | void ts_tree_get_changed_ranges_wasm(TSTree *tree, TSTree *other) { - 268 | unsigned range_count; - 269 | TSRange *ranges = ts_tree_get_changed_ranges(tree, other, &range_count); - 270 | for (unsigned i = 0; i < range_count; i++) { - 271 | marshal_range(&ranges[i]); - 272 | } - 273 | TRANSFER_BUFFER[0] = (const void *)range_count; - 274 | TRANSFER_BUFFER[1] = (const void *)ranges; - 275 | } - | - 276 | /************************/ - 277 | /* Section - TreeCursor */ - 278 | /************************/ - | - 279 | void ts_tree_cursor_new_wasm(const TSTree *tree) { - 280 | TSNode node = unmarshal_node(tree); - 281 | TSTreeCursor cursor = ts_tree_cursor_new(node); - 282 | marshal_cursor(&cursor); - 283 | } - | - 284 | void ts_tree_cursor_copy_wasm(const TSTree *tree) { - 285 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 286 | TSTreeCursor copy = ts_tree_cursor_copy(&cursor); - 287 | marshal_cursor(©); - 288 | } - | - 289 | void ts_tree_cursor_delete_wasm(const TSTree *tree) { - 290 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 291 | ts_tree_cursor_delete(&cursor); - 292 | } - | - 293 | void ts_tree_cursor_reset_wasm(const TSTree *tree) { - 294 | TSNode node = unmarshal_node(tree); - 295 | TSTreeCursor cursor = unmarshal_cursor(&TRANSFER_BUFFER[SIZE_OF_NODE], tree); - 296 | ts_tree_cursor_reset(&cursor, node); - 297 | marshal_cursor(&cursor); - 298 | } - | - 299 | void ts_tree_cursor_reset_to_wasm(const TSTree *_dst, const TSTree *_src) { - 300 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, _dst); - 301 | TSTreeCursor src = unmarshal_cursor(&TRANSFER_BUFFER[SIZE_OF_CURSOR], _src); - 302 | ts_tree_cursor_reset_to(&cursor, &src); - 303 | marshal_cursor(&cursor); - 304 | } - | - 305 | bool ts_tree_cursor_goto_first_child_wasm(const TSTree *tree) { - 306 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 307 | bool result = ts_tree_cursor_goto_first_child(&cursor); - 308 | marshal_cursor(&cursor); - 309 | return result; - 310 | } - | - 311 | bool ts_tree_cursor_goto_last_child_wasm(const TSTree *tree) { - 312 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 313 | bool result = ts_tree_cursor_goto_last_child(&cursor); - 314 | marshal_cursor(&cursor); - 315 | return result; - 316 | } - | - 317 | bool ts_tree_cursor_goto_first_child_for_index_wasm(const TSTree *tree) { - 318 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 319 | const void **address = TRANSFER_BUFFER + 3; - 320 | uint32_t index = code_unit_to_byte((uint32_t)address[0]); - 321 | bool result = ts_tree_cursor_goto_first_child_for_byte(&cursor, index); - 322 | marshal_cursor(&cursor); - 323 | return result; - 324 | } - | - 325 | bool ts_tree_cursor_goto_first_child_for_position_wasm(const TSTree *tree) { - 326 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 327 | const void **address = TRANSFER_BUFFER + 3; - 328 | TSPoint point = unmarshal_point(address); - 329 | bool result = ts_tree_cursor_goto_first_child_for_point(&cursor, point); - 330 | marshal_cursor(&cursor); - 331 | return result; - 332 | } - | - 333 | bool ts_tree_cursor_goto_next_sibling_wasm(const TSTree *tree) { - 334 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 335 | bool result = ts_tree_cursor_goto_next_sibling(&cursor); - 336 | marshal_cursor(&cursor); - 337 | return result; - 338 | } - | - 339 | bool ts_tree_cursor_goto_previous_sibling_wasm(const TSTree *tree) { - 340 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 341 | bool result = ts_tree_cursor_goto_previous_sibling(&cursor); - 342 | marshal_cursor(&cursor); - 343 | return result; - 344 | } - | - 345 | void ts_tree_cursor_goto_descendant_wasm(const TSTree *tree, uint32_t goal_descendant_index) { - 346 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 347 | ts_tree_cursor_goto_descendant(&cursor, goal_descendant_index); - 348 | marshal_cursor(&cursor); - 349 | } - | - 350 | bool ts_tree_cursor_goto_parent_wasm(const TSTree *tree) { - 351 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 352 | bool result = ts_tree_cursor_goto_parent(&cursor); - 353 | marshal_cursor(&cursor); - 354 | return result; - 355 | } - | - 356 | uint16_t ts_tree_cursor_current_node_type_id_wasm(const TSTree *tree) { - 357 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 358 | TSNode node = ts_tree_cursor_current_node(&cursor); - 359 | return ts_node_symbol(node); - 360 | } - | - 361 | uint16_t ts_tree_cursor_current_node_state_id_wasm(const TSTree *tree) { - 362 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 363 | TSNode node = ts_tree_cursor_current_node(&cursor); - 364 | return ts_node_parse_state(node); - 365 | } - | - 366 | bool ts_tree_cursor_current_node_is_named_wasm(const TSTree *tree) { - 367 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 368 | TSNode node = ts_tree_cursor_current_node(&cursor); - 369 | return ts_node_is_named(node); - 370 | } - | - 371 | bool ts_tree_cursor_current_node_is_missing_wasm(const TSTree *tree) { - 372 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 373 | TSNode node = ts_tree_cursor_current_node(&cursor); - 374 | return ts_node_is_missing(node); - 375 | } - | - 376 | uint32_t ts_tree_cursor_current_node_id_wasm(const TSTree *tree) { - 377 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 378 | TSNode node = ts_tree_cursor_current_node(&cursor); - 379 | return (uint32_t)node.id; - 380 | } - | - 381 | void ts_tree_cursor_start_position_wasm(const TSTree *tree) { - 382 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 383 | TSNode node = ts_tree_cursor_current_node(&cursor); - 384 | marshal_point(ts_node_start_point(node)); - 385 | } - | - 386 | void ts_tree_cursor_end_position_wasm(const TSTree *tree) { - 387 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 388 | TSNode node = ts_tree_cursor_current_node(&cursor); - 389 | marshal_point(ts_node_end_point(node)); - 390 | } - | - 391 | uint32_t ts_tree_cursor_start_index_wasm(const TSTree *tree) { - 392 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 393 | TSNode node = ts_tree_cursor_current_node(&cursor); - 394 | return byte_to_code_unit(ts_node_start_byte(node)); - 395 | } - | - 396 | uint32_t ts_tree_cursor_end_index_wasm(const TSTree *tree) { - 397 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 398 | TSNode node = ts_tree_cursor_current_node(&cursor); - 399 | return byte_to_code_unit(ts_node_end_byte(node)); - 400 | } - | - 401 | uint32_t ts_tree_cursor_current_field_id_wasm(const TSTree *tree) { - 402 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 403 | return ts_tree_cursor_current_field_id(&cursor); - 404 | } - | - 405 | uint32_t ts_tree_cursor_current_depth_wasm(const TSTree *tree) { - 406 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 407 | return ts_tree_cursor_current_depth(&cursor); - 408 | } - | - 409 | uint32_t ts_tree_cursor_current_descendant_index_wasm(const TSTree *tree) { - 410 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 411 | return ts_tree_cursor_current_descendant_index(&cursor); - 412 | } - | - 413 | void ts_tree_cursor_current_node_wasm(const TSTree *tree) { - 414 | TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); - 415 | marshal_node(TRANSFER_BUFFER, ts_tree_cursor_current_node(&cursor)); - 416 | } - | - 417 | /******************/ - 418 | /* Section - Node */ - 419 | /******************/ - | - 420 | static TSTreeCursor scratch_cursor = {0}; - 421 | static TSQueryCursor *scratch_query_cursor = NULL; - | - 422 | uint16_t ts_node_symbol_wasm(const TSTree *tree) { - 423 | TSNode node = unmarshal_node(tree); - 424 | return ts_node_symbol(node); - 425 | } - | - 426 | const char *ts_node_field_name_for_child_wasm(const TSTree *tree, uint32_t index) { - 427 | TSNode node = unmarshal_node(tree); - 428 | return ts_node_field_name_for_child(node, index); - 429 | } - | - 430 | const char *ts_node_field_name_for_named_child_wasm(const TSTree *tree, uint32_t index) { - 431 | TSNode node = unmarshal_node(tree); - 432 | return ts_node_field_name_for_named_child(node, index); - 433 | } - | - 434 | void ts_node_children_by_field_id_wasm(const TSTree *tree, uint32_t field_id) { - 435 | TSNode node = unmarshal_node(tree); - 436 | TSTreeCursor cursor = ts_tree_cursor_new(node); - | - 437 | bool done = field_id == 0; - 438 | if (!done) { - 439 | ts_tree_cursor_reset(&cursor, node); - 440 | ts_tree_cursor_goto_first_child(&cursor); - 441 | } - | - 442 | Array(const void*) result = array_new(); - | - 443 | while (!done) { - 444 | while (ts_tree_cursor_current_field_id(&cursor) != field_id) { - 445 | if (!ts_tree_cursor_goto_next_sibling(&cursor)) { - 446 | done = true; - 447 | break; - 448 | } - 449 | } - 450 | if (done) { - 451 | break; - 452 | } - 453 | TSNode result_node = ts_tree_cursor_current_node(&cursor); - 454 | if (!ts_tree_cursor_goto_next_sibling(&cursor)) { - 455 | done = true; - 456 | } - 457 | array_grow_by(&result, SIZE_OF_NODE); - 458 | marshal_node(result.contents + result.size - SIZE_OF_NODE, result_node); - 459 | } - 460 | ts_tree_cursor_delete(&cursor); - | - 461 | TRANSFER_BUFFER[0] = (const void*)(result.size / SIZE_OF_NODE); - 462 | TRANSFER_BUFFER[1] = (const void*)result.contents; - 463 | } - | - 464 | void ts_node_first_child_for_byte_wasm(const TSTree *tree) { - 465 | TSNode node = unmarshal_node(tree); - 466 | const void** address = TRANSFER_BUFFER + SIZE_OF_NODE; - 467 | uint32_t byte = code_unit_to_byte((uint32_t)address[0]); - 468 | marshal_node(TRANSFER_BUFFER, ts_node_first_child_for_byte(node, byte)); - 469 | } - | - 470 | void ts_node_first_named_child_for_byte_wasm(const TSTree *tree) { - 471 | TSNode node = unmarshal_node(tree); - 472 | const void** address = TRANSFER_BUFFER + SIZE_OF_NODE; - 473 | uint32_t byte = code_unit_to_byte((uint32_t)address[0]); - 474 | marshal_node(TRANSFER_BUFFER, ts_node_first_named_child_for_byte(node, byte)); - 475 | } - | - 476 | uint16_t ts_node_grammar_symbol_wasm(const TSTree *tree) { - 477 | TSNode node = unmarshal_node(tree); - 478 | return ts_node_grammar_symbol(node); - 479 | } - | - 480 | uint32_t ts_node_child_count_wasm(const TSTree *tree) { - 481 | TSNode node = unmarshal_node(tree); - 482 | return ts_node_child_count(node); - 483 | } - | - 484 | uint32_t ts_node_named_child_count_wasm(const TSTree *tree) { - 485 | TSNode node = unmarshal_node(tree); - 486 | return ts_node_named_child_count(node); - 487 | } - | - 488 | void ts_node_child_wasm(const TSTree *tree, uint32_t index) { - 489 | TSNode node = unmarshal_node(tree); - 490 | marshal_node(TRANSFER_BUFFER, ts_node_child(node, index)); - 491 | } - | - 492 | void ts_node_named_child_wasm(const TSTree *tree, uint32_t index) { - 493 | TSNode node = unmarshal_node(tree); - 494 | marshal_node(TRANSFER_BUFFER, ts_node_named_child(node, index)); - 495 | } - | - 496 | void ts_node_child_by_field_id_wasm(const TSTree *tree, uint32_t field_id) { - 497 | TSNode node = unmarshal_node(tree); - 498 | marshal_node(TRANSFER_BUFFER, ts_node_child_by_field_id(node, field_id)); - 499 | } - | - 500 | void ts_node_next_sibling_wasm(const TSTree *tree) { - 501 | TSNode node = unmarshal_node(tree); - 502 | marshal_node(TRANSFER_BUFFER, ts_node_next_sibling(node)); - 503 | } - | - 504 | void ts_node_prev_sibling_wasm(const TSTree *tree) { - 505 | TSNode node = unmarshal_node(tree); - 506 | marshal_node(TRANSFER_BUFFER, ts_node_prev_sibling(node)); - 507 | } - | - 508 | void ts_node_next_named_sibling_wasm(const TSTree *tree) { - 509 | TSNode node = unmarshal_node(tree); - 510 | marshal_node(TRANSFER_BUFFER, ts_node_next_named_sibling(node)); - 511 | } - | - 512 | void ts_node_prev_named_sibling_wasm(const TSTree *tree) { - 513 | TSNode node = unmarshal_node(tree); - 514 | marshal_node(TRANSFER_BUFFER, ts_node_prev_named_sibling(node)); - 515 | } - | - 516 | uint32_t ts_node_descendant_count_wasm(const TSTree *tree) { - 517 | TSNode node = unmarshal_node(tree); - 518 | return ts_node_descendant_count(node); - 519 | } - | - 520 | void ts_node_parent_wasm(const TSTree *tree) { - 521 | TSNode node = unmarshal_node(tree); - 522 | marshal_node(TRANSFER_BUFFER, ts_node_parent(node)); - 523 | } - | - 524 | void ts_node_child_with_descendant_wasm(const TSTree *tree) { - 525 | TSNode node = unmarshal_node(tree); - 526 | TSNode descendant = unmarshal_node_at(tree, 1); - 527 | marshal_node(TRANSFER_BUFFER, ts_node_child_with_descendant(node, descendant)); - 528 | } - | - 529 | void ts_node_descendant_for_index_wasm(const TSTree *tree) { - 530 | TSNode node = unmarshal_node(tree); - 531 | const void **address = TRANSFER_BUFFER + SIZE_OF_NODE; - 532 | uint32_t start = code_unit_to_byte((uint32_t)address[0]); - 533 | uint32_t end = code_unit_to_byte((uint32_t)address[1]); - 534 | marshal_node(TRANSFER_BUFFER, ts_node_descendant_for_byte_range(node, start, end)); - 535 | } - | - 536 | void ts_node_named_descendant_for_index_wasm(const TSTree *tree) { - 537 | TSNode node = unmarshal_node(tree); - 538 | const void **address = TRANSFER_BUFFER + SIZE_OF_NODE; - 539 | uint32_t start = code_unit_to_byte((uint32_t)address[0]); - 540 | uint32_t end = code_unit_to_byte((uint32_t)address[1]); - 541 | marshal_node(TRANSFER_BUFFER, ts_node_named_descendant_for_byte_range(node, start, end)); - 542 | } - | - 543 | void ts_node_descendant_for_position_wasm(const TSTree *tree) { - 544 | TSNode node = unmarshal_node(tree); - 545 | const void **address = TRANSFER_BUFFER + SIZE_OF_NODE; - 546 | TSPoint start = unmarshal_point(address); address += SIZE_OF_POINT; - 547 | TSPoint end = unmarshal_point(address); - 548 | marshal_node(TRANSFER_BUFFER, ts_node_descendant_for_point_range(node, start, end)); - 549 | } - | - 550 | void ts_node_named_descendant_for_position_wasm(const TSTree *tree) { - 551 | TSNode node = unmarshal_node(tree); - 552 | const void **address = TRANSFER_BUFFER + SIZE_OF_NODE; - 553 | TSPoint start = unmarshal_point(address); address += SIZE_OF_POINT; - 554 | TSPoint end = unmarshal_point(address); - 555 | marshal_node(TRANSFER_BUFFER, ts_node_named_descendant_for_point_range(node, start, end)); - 556 | } - | - 557 | void ts_node_start_point_wasm(const TSTree *tree) { - 558 | TSNode node = unmarshal_node(tree); - 559 | marshal_point(ts_node_start_point(node)); - 560 | } - | - 561 | void ts_node_end_point_wasm(const TSTree *tree) { - 562 | TSNode node = unmarshal_node(tree); - 563 | marshal_point(ts_node_end_point(node)); - 564 | } - | - 565 | uint32_t ts_node_start_index_wasm(const TSTree *tree) { - 566 | TSNode node = unmarshal_node(tree); - 567 | return byte_to_code_unit(ts_node_start_byte(node)); - 568 | } - | - 569 | uint32_t ts_node_end_index_wasm(const TSTree *tree) { - 570 | TSNode node = unmarshal_node(tree); - 571 | return byte_to_code_unit(ts_node_end_byte(node)); - 572 | } - | - 573 | char *ts_node_to_string_wasm(const TSTree *tree) { - 574 | TSNode node = unmarshal_node(tree); - 575 | return ts_node_string(node); - 576 | } - | - 577 | void ts_node_children_wasm(const TSTree *tree) { - 578 | TSNode node = unmarshal_node(tree); - 579 | uint32_t count = ts_node_child_count(node); - 580 | const void **result = NULL; - 581 | if (count > 0) { - 582 | result = (const void**)calloc(sizeof(void *), SIZE_OF_NODE * count); - 583 | const void **address = result; - 584 | ts_tree_cursor_reset(&scratch_cursor, node); - 585 | ts_tree_cursor_goto_first_child(&scratch_cursor); - 586 | marshal_node(address, ts_tree_cursor_current_node(&scratch_cursor)); - 587 | for (uint32_t i = 1; i < count; i++) { - 588 | address += SIZE_OF_NODE; - 589 | ts_tree_cursor_goto_next_sibling(&scratch_cursor); - 590 | TSNode child = ts_tree_cursor_current_node(&scratch_cursor); - 591 | marshal_node(address, child); - 592 | } - 593 | } - 594 | TRANSFER_BUFFER[0] = (const void *)count; - 595 | TRANSFER_BUFFER[1] = (const void *)result; - 596 | } - | - 597 | void ts_node_named_children_wasm(const TSTree *tree) { - 598 | TSNode node = unmarshal_node(tree); - 599 | uint32_t count = ts_node_named_child_count(node); - 600 | const void **result = NULL; - 601 | if (count > 0) { - 602 | result = (const void**)calloc(sizeof(void *), SIZE_OF_NODE * count); - 603 | const void **address = result; - 604 | ts_tree_cursor_reset(&scratch_cursor, node); - 605 | ts_tree_cursor_goto_first_child(&scratch_cursor); - 606 | uint32_t i = 0; - 607 | for (;;) { - 608 | TSNode child = ts_tree_cursor_current_node(&scratch_cursor); - 609 | if (ts_node_is_named(child)) { - 610 | marshal_node(address, child); - 611 | address += SIZE_OF_NODE; - 612 | i++; - 613 | if (i == count) { - 614 | break; - 615 | } - 616 | } - 617 | if (!ts_tree_cursor_goto_next_sibling(&scratch_cursor)) { - 618 | break; - 619 | } - 620 | } - 621 | } - 622 | TRANSFER_BUFFER[0] = (const void *)count; - 623 | TRANSFER_BUFFER[1] = (const void *)result; - 624 | } - | - 625 | bool symbols_contain(const uint32_t *set, uint32_t length, uint32_t value) { - 626 | for (unsigned i = 0; i < length; i++) { - 627 | if (set[i] == value) { - 628 | return true; - 629 | } - 630 | if (set[i] > value) { - 631 | break; - 632 | } - 633 | } - 634 | return false; - 635 | } - | - 636 | void ts_node_descendants_of_type_wasm( - 637 | const TSTree *tree, - 638 | const uint32_t *symbols, - 639 | uint32_t symbol_count, - 640 | uint32_t start_row, - 641 | uint32_t start_column, - 642 | uint32_t end_row, - 643 | uint32_t end_column - 644 | ) { - 645 | TSNode node = unmarshal_node(tree); - 646 | TSPoint start_point = {start_row, code_unit_to_byte(start_column)}; - 647 | TSPoint end_point = {end_row, code_unit_to_byte(end_column)}; - 648 | if (end_point.row == 0 && end_point.column == 0) { - 649 | end_point = (TSPoint) {UINT32_MAX, UINT32_MAX}; - 650 | } - | - 651 | Array(const void *) result = array_new(); - | - 652 | // Walk the tree depth first looking for matching nodes. - 653 | ts_tree_cursor_reset(&scratch_cursor, node); - 654 | bool already_visited_children = false; - 655 | while (true) { - 656 | TSNode descendant = ts_tree_cursor_current_node(&scratch_cursor); - | - 657 | if (!already_visited_children) { - 658 | // If this node is before the selected range, then avoid - 659 | // descending into it. - 660 | if (point_lte(ts_node_end_point(descendant), start_point)) { - 661 | if (ts_tree_cursor_goto_next_sibling(&scratch_cursor)) { - 662 | already_visited_children = false; - 663 | } else { - 664 | if (!ts_tree_cursor_goto_parent(&scratch_cursor)) { - 665 | break; - 666 | } - 667 | already_visited_children = true; - 668 | } - 669 | continue; - 670 | } - | - 671 | // If this node is after the selected range, then stop walking. - 672 | if (point_lte(end_point, ts_node_start_point(descendant))) { - 673 | break; - 674 | } - | - 675 | // Add the node to the result if its type matches one of the given - 676 | // node types. - 677 | if (symbols_contain(symbols, symbol_count, ts_node_symbol(descendant))) { - 678 | array_grow_by(&result, SIZE_OF_NODE); - 679 | marshal_node(result.contents + result.size - SIZE_OF_NODE, descendant); - 680 | } - | - 681 | // Continue walking. - 682 | if (ts_tree_cursor_goto_first_child(&scratch_cursor)) { - 683 | already_visited_children = false; - 684 | } else if (ts_tree_cursor_goto_next_sibling(&scratch_cursor)) { - 685 | already_visited_children = false; - 686 | } else { - 687 | if (!ts_tree_cursor_goto_parent(&scratch_cursor)) { - 688 | break; - 689 | } - 690 | already_visited_children = true; - 691 | } - 692 | } else { - 693 | if (ts_tree_cursor_goto_next_sibling(&scratch_cursor)) { - 694 | already_visited_children = false; - 695 | } else { - 696 | if (!ts_tree_cursor_goto_parent(&scratch_cursor)) { - 697 | break; - 698 | } - 699 | } - 700 | } - 701 | } - | - 702 | TRANSFER_BUFFER[0] = (const void *)(result.size / SIZE_OF_NODE); - 703 | TRANSFER_BUFFER[1] = (const void *)result.contents; - 704 | } - | - 705 | int ts_node_is_named_wasm(const TSTree *tree) { - 706 | TSNode node = unmarshal_node(tree); - 707 | return ts_node_is_named(node); - 708 | } - | - 709 | int ts_node_has_changes_wasm(const TSTree *tree) { - 710 | TSNode node = unmarshal_node(tree); - 711 | return ts_node_has_changes(node); - 712 | } - | - 713 | int ts_node_has_error_wasm(const TSTree *tree) { - 714 | TSNode node = unmarshal_node(tree); - 715 | return ts_node_has_error(node); - 716 | } - | - 717 | int ts_node_is_error_wasm(const TSTree *tree) { - 718 | TSNode node = unmarshal_node(tree); - 719 | return ts_node_is_error(node); - 720 | } - | - 721 | int ts_node_is_missing_wasm(const TSTree *tree) { - 722 | TSNode node = unmarshal_node(tree); - 723 | return ts_node_is_missing(node); - 724 | } - | - 725 | int ts_node_is_extra_wasm(const TSTree *tree) { - 726 | TSNode node = unmarshal_node(tree); - 727 | return ts_node_is_extra(node); - 728 | } - | - 729 | uint16_t ts_node_parse_state_wasm(const TSTree *tree) { - 730 | TSNode node = unmarshal_node(tree); - 731 | return ts_node_parse_state(node); - 732 | } - | - 733 | uint16_t ts_node_next_parse_state_wasm(const TSTree *tree) { - 734 | TSNode node = unmarshal_node(tree); - 735 | return ts_node_next_parse_state(node); - 736 | } - | - 737 | /******************/ - 738 | /* Section - Query */ - 739 | /******************/ - | - 740 | void ts_query_matches_wasm( - 741 | const TSQuery *self, - 742 | const TSTree *tree, - 743 | uint32_t start_row, - 744 | uint32_t start_column, - 745 | uint32_t end_row, - 746 | uint32_t end_column, - 747 | uint32_t start_index, - 748 | uint32_t end_index, - 749 | uint32_t match_limit, - 750 | uint32_t max_start_depth - 751 | ) { - 752 | if (!scratch_query_cursor) { - 753 | scratch_query_cursor = ts_query_cursor_new(); - 754 | } - 755 | if (match_limit == 0) { - 756 | ts_query_cursor_set_match_limit(scratch_query_cursor, UINT32_MAX); - 757 | } else { - 758 | ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); - 759 | } - | - 760 | TSNode node = unmarshal_node(tree); - 761 | TSPoint start_point = {start_row, code_unit_to_byte(start_column)}; - 762 | TSPoint end_point = {end_row, code_unit_to_byte(end_column)}; - 763 | ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point); - 764 | ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index); - 765 | ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); - 766 | ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth); - | - 767 | TSQueryCursorOptions options = {.payload = NULL, .progress_callback = query_progress_callback}; - | - 768 | ts_query_cursor_exec_with_options(scratch_query_cursor, self, node, &options); - | - 769 | uint32_t index = 0; - 770 | uint32_t match_count = 0; - 771 | Array(const void *) result = array_new(); - | - 772 | TSQueryMatch match; - 773 | while (ts_query_cursor_next_match(scratch_query_cursor, &match)) { - 774 | match_count++; - 775 | array_grow_by(&result, 2 + (SIZE_OF_CAPTURE * match.capture_count)); - 776 | result.contents[index++] = (const void *)(uint32_t)match.pattern_index; - 777 | result.contents[index++] = (const void *)(uint32_t)match.capture_count; - 778 | for (unsigned i = 0; i < match.capture_count; i++) { - 779 | const TSQueryCapture *capture = &match.captures[i]; - 780 | result.contents[index++] = (const void *)capture->index; - 781 | marshal_node(result.contents + index, capture->node); - 782 | index += SIZE_OF_NODE; - 783 | } - 784 | } - | - 785 | bool did_exceed_match_limit = - 786 | ts_query_cursor_did_exceed_match_limit(scratch_query_cursor); - 787 | TRANSFER_BUFFER[0] = (const void *)(match_count); - 788 | TRANSFER_BUFFER[1] = (const void *)result.contents; - 789 | TRANSFER_BUFFER[2] = (const void *)(did_exceed_match_limit); - 790 | } - | - 791 | void ts_query_captures_wasm( - 792 | const TSQuery *self, - 793 | const TSTree *tree, - 794 | uint32_t start_row, - 795 | uint32_t start_column, - 796 | uint32_t end_row, - 797 | uint32_t end_column, - 798 | uint32_t start_index, - 799 | uint32_t end_index, - 800 | uint32_t match_limit, - 801 | uint32_t max_start_depth - 802 | ) { - 803 | if (!scratch_query_cursor) { - 804 | scratch_query_cursor = ts_query_cursor_new(); - 805 | } - | - 806 | ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); - | - 807 | TSNode node = unmarshal_node(tree); - 808 | TSPoint start_point = {start_row, code_unit_to_byte(start_column)}; - 809 | TSPoint end_point = {end_row, code_unit_to_byte(end_column)}; - 810 | ts_query_cursor_set_point_range(scratch_query_cursor, start_point, end_point); - 811 | ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index); - 812 | ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); - 813 | ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth); - 814 | ts_query_cursor_exec(scratch_query_cursor, self, node); - | - 815 | unsigned index = 0; - 816 | unsigned capture_count = 0; - 817 | Array(const void *) result = array_new(); - | - 818 | TSQueryMatch match; - 819 | uint32_t capture_index; - 820 | while (ts_query_cursor_next_capture( - 821 | scratch_query_cursor, - 822 | &match, - 823 | &capture_index - 824 | )) { - 825 | capture_count++; - | - 826 | array_grow_by(&result, 3 + (SIZE_OF_CAPTURE * match.capture_count)); - 827 | result.contents[index++] = (const void *)(uint32_t)match.pattern_index; - 828 | result.contents[index++] = (const void *)(uint32_t)match.capture_count; - 829 | result.contents[index++] = (const void *)capture_index; - 830 | for (unsigned i = 0; i < match.capture_count; i++) { - 831 | const TSQueryCapture *capture = &match.captures[i]; - 832 | result.contents[index++] = (const void *)capture->index; - 833 | marshal_node(result.contents + index, capture->node); - 834 | index += SIZE_OF_NODE; - 835 | } - 836 | } - | - 837 | bool did_exceed_match_limit = - 838 | ts_query_cursor_did_exceed_match_limit(scratch_query_cursor); - 839 | TRANSFER_BUFFER[0] = (const void *)(capture_count); - 840 | TRANSFER_BUFFER[1] = (const void *)result.contents; - 841 | TRANSFER_BUFFER[2] = (const void *)(did_exceed_match_limit); - 842 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/lib/web-tree-sitter.d.ts: --------------------------------------------------------------------------------- - 1 | // TypeScript bindings for emscripten-generated code. Automatically @generated at compile time. - 2 | declare namespace RuntimeExports { - 3 | function AsciiToString(ptr: number): string; - 4 | function stringToUTF8(str: string, outPtr: number, maxBytesToWrite: number): number; - 5 | /** - 6 | * Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the - 7 | * emscripten HEAP, returns a copy of that string as a Javascript String object. - 8 | * - 9 | * @param {number} ptr - 10 | * @param {number=} maxBytesToRead - An optional length that specifies the - 11 | * maximum number of bytes to read. You can omit this parameter to scan the - 12 | * string until the first 0 byte. If maxBytesToRead is passed, and the string - 13 | * at [ptr, ptr+maxBytesToReadr[ contains a null byte in the middle, then the - 14 | * string will cut short at that byte index. - 15 | * @param {boolean=} ignoreNul - If true, the function will not stop on a NUL character. - 16 | * @return {string} - 17 | */ - 18 | function UTF8ToString(ptr: number, maxBytesToRead?: number | undefined, ignoreNul?: boolean | undefined): string; - 19 | function lengthBytesUTF8(str: string): number; - 20 | function stringToUTF16(str: string, outPtr: number, maxBytesToWrite: number): number; - 21 | /** - 22 | * @param {string=} libName - 23 | * @param {Object=} localScope - 24 | * @param {number=} handle - 25 | */ - 26 | function loadWebAssemblyModule(binary: Uint8Array | WebAssembly.Module, flags: Record, libName?: string, localScope?: Record, handle?: number): Promise number>>; - 27 | /** - 28 | * @param {number} ptr - 29 | * @param {string} type - 30 | */ - 31 | function getValue(ptr: number, type?: string): number; - 32 | /** - 33 | * @param {number} ptr - 34 | * @param {number} value - 35 | * @param {string} type - 36 | */ - 37 | function setValue(ptr: number, value: number, type?: string): void; - 38 | let HEAPF32: Float32Array; - 39 | let HEAPF64: Float64Array; - 40 | let HEAP_DATA_VIEW: DataView; - 41 | let HEAP8: Int8Array; - 42 | let HEAPU8: Uint8Array; - 43 | let HEAP16: Int16Array; - 44 | let HEAPU16: Uint16Array; - 45 | let HEAP32: Int32Array; - 46 | let HEAPU32: Uint32Array; - 47 | let HEAP64: BigInt64Array; - 48 | let HEAPU64: BigUint64Array; - 49 | function LE_HEAP_STORE_I64(byteOffset: any, value: any): any; - 50 | } - 51 | interface WasmModule { - 52 | _malloc(_0: number): number; - 53 | _calloc(_0: number, _1: number): number; - 54 | _realloc(_0: number, _1: number): number; - 55 | _free(_0: number): void; - 56 | _memcmp(_0: number, _1: number, _2: number): number; - 57 | _ts_language_symbol_count(_0: number): number; - 58 | _ts_language_state_count(_0: number): number; - 59 | _ts_language_abi_version(_0: number): number; - 60 | _ts_language_name(_0: number): number; - 61 | _ts_language_field_count(_0: number): number; - 62 | _ts_language_next_state(_0: number, _1: number, _2: number): number; - 63 | _ts_language_symbol_name(_0: number, _1: number): number; - 64 | _ts_language_symbol_for_name(_0: number, _1: number, _2: number, _3: number): number; - 65 | _strncmp(_0: number, _1: number, _2: number): number; - 66 | _ts_language_symbol_type(_0: number, _1: number): number; - 67 | _ts_language_field_name_for_id(_0: number, _1: number): number; - 68 | _ts_lookahead_iterator_new(_0: number, _1: number): number; - 69 | _ts_lookahead_iterator_delete(_0: number): void; - 70 | _ts_lookahead_iterator_reset_state(_0: number, _1: number): number; - 71 | _ts_lookahead_iterator_reset(_0: number, _1: number, _2: number): number; - 72 | _ts_lookahead_iterator_next(_0: number): number; - 73 | _ts_lookahead_iterator_current_symbol(_0: number): number; - 74 | _ts_parser_delete(_0: number): void; - 75 | _ts_parser_reset(_0: number): void; - 76 | _ts_parser_set_language(_0: number, _1: number): number; - 77 | _ts_parser_set_included_ranges(_0: number, _1: number, _2: number): number; - 78 | _ts_query_new(_0: number, _1: number, _2: number, _3: number, _4: number): number; - 79 | _ts_query_delete(_0: number): void; - 80 | _iswspace(_0: number): number; - 81 | _iswalnum(_0: number): number; - 82 | _ts_query_pattern_count(_0: number): number; - 83 | _ts_query_capture_count(_0: number): number; - 84 | _ts_query_string_count(_0: number): number; - 85 | _ts_query_capture_name_for_id(_0: number, _1: number, _2: number): number; - 86 | _ts_query_capture_quantifier_for_id(_0: number, _1: number, _2: number): number; - 87 | _ts_query_string_value_for_id(_0: number, _1: number, _2: number): number; - 88 | _ts_query_predicates_for_pattern(_0: number, _1: number, _2: number): number; - 89 | _ts_query_start_byte_for_pattern(_0: number, _1: number): number; - 90 | _ts_query_end_byte_for_pattern(_0: number, _1: number): number; - 91 | _ts_query_is_pattern_rooted(_0: number, _1: number): number; - 92 | _ts_query_is_pattern_non_local(_0: number, _1: number): number; - 93 | _ts_query_is_pattern_guaranteed_at_step(_0: number, _1: number): number; - 94 | _ts_query_disable_capture(_0: number, _1: number, _2: number): void; - 95 | _ts_query_disable_pattern(_0: number, _1: number): void; - 96 | _ts_tree_copy(_0: number): number; - 97 | _ts_tree_delete(_0: number): void; - 98 | _ts_init(): number; - 99 | _ts_parser_new_wasm(): void; - 100 | _ts_parser_enable_logger_wasm(_0: number, _1: number): void; - 101 | _ts_parser_parse_wasm(_0: number, _1: number, _2: number, _3: number, _4: number): number; - 102 | _ts_parser_included_ranges_wasm(_0: number): void; - 103 | _ts_language_type_is_named_wasm(_0: number, _1: number): number; - 104 | _ts_language_type_is_visible_wasm(_0: number, _1: number): number; - 105 | _ts_language_metadata_wasm(_0: number): void; - 106 | _ts_language_supertypes_wasm(_0: number): void; - 107 | _ts_language_subtypes_wasm(_0: number, _1: number): void; - 108 | _ts_tree_root_node_wasm(_0: number): void; - 109 | _ts_tree_root_node_with_offset_wasm(_0: number): void; - 110 | _ts_tree_edit_wasm(_0: number): void; - 111 | _ts_tree_included_ranges_wasm(_0: number): void; - 112 | _ts_tree_get_changed_ranges_wasm(_0: number, _1: number): void; - 113 | _ts_tree_cursor_new_wasm(_0: number): void; - 114 | _ts_tree_cursor_copy_wasm(_0: number): void; - 115 | _ts_tree_cursor_delete_wasm(_0: number): void; - 116 | _ts_tree_cursor_reset_wasm(_0: number): void; - 117 | _ts_tree_cursor_reset_to_wasm(_0: number, _1: number): void; - 118 | _ts_tree_cursor_goto_first_child_wasm(_0: number): number; - 119 | _ts_tree_cursor_goto_last_child_wasm(_0: number): number; - 120 | _ts_tree_cursor_goto_first_child_for_index_wasm(_0: number): number; - 121 | _ts_tree_cursor_goto_first_child_for_position_wasm(_0: number): number; - 122 | _ts_tree_cursor_goto_next_sibling_wasm(_0: number): number; - 123 | _ts_tree_cursor_goto_previous_sibling_wasm(_0: number): number; - 124 | _ts_tree_cursor_goto_descendant_wasm(_0: number, _1: number): void; - 125 | _ts_tree_cursor_goto_parent_wasm(_0: number): number; - 126 | _ts_tree_cursor_current_node_type_id_wasm(_0: number): number; - 127 | _ts_tree_cursor_current_node_state_id_wasm(_0: number): number; - 128 | _ts_tree_cursor_current_node_is_named_wasm(_0: number): number; - 129 | _ts_tree_cursor_current_node_is_missing_wasm(_0: number): number; - 130 | _ts_tree_cursor_current_node_id_wasm(_0: number): number; - 131 | _ts_tree_cursor_start_position_wasm(_0: number): void; - 132 | _ts_tree_cursor_end_position_wasm(_0: number): void; - 133 | _ts_tree_cursor_start_index_wasm(_0: number): number; - 134 | _ts_tree_cursor_end_index_wasm(_0: number): number; - 135 | _ts_tree_cursor_current_field_id_wasm(_0: number): number; - 136 | _ts_tree_cursor_current_depth_wasm(_0: number): number; - 137 | _ts_tree_cursor_current_descendant_index_wasm(_0: number): number; - 138 | _ts_tree_cursor_current_node_wasm(_0: number): void; - 139 | _ts_node_symbol_wasm(_0: number): number; - 140 | _ts_node_field_name_for_child_wasm(_0: number, _1: number): number; - 141 | _ts_node_field_name_for_named_child_wasm(_0: number, _1: number): number; - 142 | _ts_node_children_by_field_id_wasm(_0: number, _1: number): void; - 143 | _ts_node_first_child_for_byte_wasm(_0: number): void; - 144 | _ts_node_first_named_child_for_byte_wasm(_0: number): void; - 145 | _ts_node_grammar_symbol_wasm(_0: number): number; - 146 | _ts_node_child_count_wasm(_0: number): number; - 147 | _ts_node_named_child_count_wasm(_0: number): number; - 148 | _ts_node_child_wasm(_0: number, _1: number): void; - 149 | _ts_node_named_child_wasm(_0: number, _1: number): void; - 150 | _ts_node_child_by_field_id_wasm(_0: number, _1: number): void; - 151 | _ts_node_next_sibling_wasm(_0: number): void; - 152 | _ts_node_prev_sibling_wasm(_0: number): void; - 153 | _ts_node_next_named_sibling_wasm(_0: number): void; - 154 | _ts_node_prev_named_sibling_wasm(_0: number): void; - 155 | _ts_node_descendant_count_wasm(_0: number): number; - 156 | _ts_node_parent_wasm(_0: number): void; - 157 | _ts_node_child_with_descendant_wasm(_0: number): void; - 158 | _ts_node_descendant_for_index_wasm(_0: number): void; - 159 | _ts_node_named_descendant_for_index_wasm(_0: number): void; - 160 | _ts_node_descendant_for_position_wasm(_0: number): void; - 161 | _ts_node_named_descendant_for_position_wasm(_0: number): void; - 162 | _ts_node_start_point_wasm(_0: number): void; - 163 | _ts_node_end_point_wasm(_0: number): void; - 164 | _ts_node_start_index_wasm(_0: number): number; - 165 | _ts_node_end_index_wasm(_0: number): number; - 166 | _ts_node_to_string_wasm(_0: number): number; - 167 | _ts_node_children_wasm(_0: number): void; - 168 | _ts_node_named_children_wasm(_0: number): void; - 169 | _ts_node_descendants_of_type_wasm(_0: number, _1: number, _2: number, _3: number, _4: number, _5: number, _6: number): void; - 170 | _ts_node_is_named_wasm(_0: number): number; - 171 | _ts_node_has_changes_wasm(_0: number): number; - 172 | _ts_node_has_error_wasm(_0: number): number; - 173 | _ts_node_is_error_wasm(_0: number): number; - 174 | _ts_node_is_missing_wasm(_0: number): number; - 175 | _ts_node_is_extra_wasm(_0: number): number; - 176 | _ts_node_parse_state_wasm(_0: number): number; - 177 | _ts_node_next_parse_state_wasm(_0: number): number; - 178 | _ts_query_matches_wasm(_0: number, _1: number, _2: number, _3: number, _4: number, _5: number, _6: number, _7: number, _8: number, _9: number): void; - 179 | _ts_query_captures_wasm(_0: number, _1: number, _2: number, _3: number, _4: number, _5: number, _6: number, _7: number, _8: number, _9: number): void; - 180 | _memset(_0: number, _1: number, _2: number): number; - 181 | _memcpy(_0: number, _1: number, _2: number): number; - 182 | _memmove(_0: number, _1: number, _2: number): number; - 183 | _iswalpha(_0: number): number; - 184 | _iswblank(_0: number): number; - 185 | _iswdigit(_0: number): number; - 186 | _iswlower(_0: number): number; - 187 | _iswupper(_0: number): number; - 188 | _iswxdigit(_0: number): number; - 189 | _memchr(_0: number, _1: number, _2: number): number; - 190 | _strlen(_0: number): number; - 191 | _strcmp(_0: number, _1: number): number; - 192 | _strncat(_0: number, _1: number, _2: number): number; - 193 | _strncpy(_0: number, _1: number, _2: number): number; - 194 | _towlower(_0: number): number; - 195 | _towupper(_0: number): number; - 196 | } - | - 197 | export type MainModule = WasmModule & typeof RuntimeExports & { - 198 | currentParseCallback: ((index: number, position: {row: number, column: number}) => string | undefined) | null; - 199 | currentLogCallback: ((message: string, isLex: boolean) => void) | null; - 200 | currentProgressCallback: ((state: {currentOffset: number, hasError: boolean}) => void) | null; - 201 | currentQueryProgressCallback: ((state: {currentOffset: number}) => void) | null; - 202 | }; - | - 203 | export default function MainModuleFactory(options?: Partial): Promise; - - - --------------------------------------------------------------------------------- -/lib/binding_web/package.nix: --------------------------------------------------------------------------------- - 1 | { - 2 | wasm-test-grammars, - 3 | lib, - 4 | buildNpmPackage, - 5 | rustPlatform, - 6 | cargo, - 7 | pkg-config, - 8 | emscripten, - 9 | src, - 10 | version, - 11 | }: - 12 | buildNpmPackage { - 13 | inherit src version; - | - 14 | pname = "web-tree-sitter"; - | - 15 | npmDepsHash = "sha256-y0GobcskcZTmju90TM64GjeWiBmPFCrTOg0yfccdB+Q="; - | - 16 | nativeBuildInputs = [ - 17 | rustPlatform.cargoSetupHook - 18 | cargo - 19 | pkg-config - 20 | emscripten - 21 | ]; - | - 22 | cargoDeps = rustPlatform.importCargoLock { - 23 | lockFile = ../../Cargo.lock; - 24 | }; - | - 25 | doCheck = true; - | - 26 | postPatch = '' - 27 | cp lib/binding_web/package{,-lock}.json . - 28 | ''; - | - 29 | buildPhase = '' - 30 | pushd lib/binding_web - | - 31 | CJS=true npm run build - 32 | CJS=true npm run build:debug - 33 | npm run build:debug - 34 | npm run build - | - 35 | popd - | - 36 | mkdir -p target/release - | - 37 | for grammar in ${wasm-test-grammars}/*.wasm; do - 38 | if [ -f "$grammar" ]; then - 39 | cp "$grammar" target/release/ - 40 | fi - 41 | done - 42 | ''; - | - 43 | checkPhase = '' - 44 | cd lib/binding_web && npm test - 45 | ''; - | - 46 | meta = { - 47 | description = "web-tree-sitter - WebAssembly bindings to the Tree-sitter parsing library."; - 48 | longDescription = '' - 49 | web-tree-sitter provides WebAssembly bindings to the Tree-sitter parsing library. - 50 | It can build a concrete syntax tree for a source file and efficiently update - 51 | the syntax tree as the source file is edited. This package provides the WebAssembly bindings - 52 | and a JavaScript API for using them in web browsers - 53 | ''; - 54 | homepage = "https://tree-sitter.github.io/tree-sitter"; - 55 | changelog = "https://github.com/tree-sitter/tree-sitter/releases/tag/v${version}"; - 56 | license = lib.licenses.mit; - 57 | maintainers = with lib.maintainers; [ amaanq ]; - 58 | platforms = lib.platforms.all; - 59 | }; - 60 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/README.md: --------------------------------------------------------------------------------- - 1 | # Web Tree-sitter - | - 2 | [![npmjs.com badge]][npmjs.com] - | - 3 | [npmjs.com]: https://www.npmjs.org/package/web-tree-sitter - 4 | [npmjs.com badge]: https://img.shields.io/npm/v/web-tree-sitter.svg?color=%23BF4A4A - | - 5 | WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library. - | - 6 | ## Setup - | - 7 | You can download the `web-tree-sitter.js` and `web-tree-sitter.wasm` files from [the latest GitHub release][gh release] and load - 8 | them using a standalone script: - | - 9 | ```html - 10 | - | - 11 | - 15 | ``` - | - 16 | You can also install [the `web-tree-sitter` module][npm module] from NPM and load it using a system like Webpack: - | - 17 | ```js - 18 | const { Parser } = require('web-tree-sitter'); - 19 | Parser.init().then(() => { /* the library is ready */ }); - 20 | ``` - | - 21 | or Vite: - | - 22 | ```js - 23 | import { Parser } from 'web-tree-sitter'; - 24 | Parser.init().then(() => { /* the library is ready */ }); - 25 | ``` - | - 26 | With Vite, you also need to make sure your server provides the `tree-sitter.wasm` - 27 | file to your `public` directory. You can do this automatically with a `postinstall` - 28 | [script](https://docs.npmjs.com/cli/v10/using-npm/scripts) in your `package.json`: - | - 29 | ```js - 30 | "postinstall": "cp node_modules/web-tree-sitter/tree-sitter.wasm public" - 31 | ``` - | - 32 | You can also use this module with [deno](https://deno.land/): - | - 33 | ```js - 34 | import { Parser } from "npm:web-tree-sitter"; - 35 | await Parser.init(); - 36 | // the library is ready - 37 | ``` - | - 38 | To use the debug version of the library, replace your import of `web-tree-sitter` with `web-tree-sitter/debug`: - | - 39 | ```js - 40 | import { Parser } from 'web-tree-sitter/debug'; // or require('web-tree-sitter/debug') - | - 41 | Parser.init().then(() => { /* the library is ready */ }); - 42 | ``` - | - 43 | This will load the debug version of the `.js` and `.wasm` file, which includes debug symbols and assertions. - | - 44 | > [!NOTE] - 45 | > The `web-tree-sitter.js` file on GH releases is an ES6 module. If you are interested in using a pure CommonJS library, such - 46 | > as for Electron, you should use the `web-tree-sitter.cjs` file instead. - | - 47 | ### Basic Usage - | - 48 | First, create a parser: - | - 49 | ```js - 50 | const parser = new Parser(); - 51 | ``` - | - 52 | Then assign a language to the parser. Tree-sitter languages are packaged as individual `.wasm` files (more on this below): - | - 53 | ```js - 54 | const { Language } = require('web-tree-sitter'); - 55 | const JavaScript = await Language.load('/path/to/tree-sitter-javascript.wasm'); - 56 | parser.setLanguage(JavaScript); - 57 | ``` - | - 58 | Now you can parse source code: - | - 59 | ```js - 60 | const sourceCode = 'let x = 1; console.log(x);'; - 61 | const tree = parser.parse(sourceCode); - 62 | ``` - | - 63 | and inspect the syntax tree. - | - 64 | ```javascript - 65 | console.log(tree.rootNode.toString()); - | - 66 | // (program - 67 | // (lexical_declaration - 68 | // (variable_declarator (identifier) (number))) - 69 | // (expression_statement - 70 | // (call_expression - 71 | // (member_expression (identifier) (property_identifier)) - 72 | // (arguments (identifier))))) - | - 73 | const callExpression = tree.rootNode.child(1).firstChild; - 74 | console.log(callExpression); - | - 75 | // { type: 'call_expression', - 76 | // startPosition: {row: 0, column: 16}, - 77 | // endPosition: {row: 0, column: 30}, - 78 | // startIndex: 0, - 79 | // endIndex: 30 } - 80 | ``` - | - 81 | ### Editing - | - 82 | If your source code *changes*, you can update the syntax tree. This will take less time than the first parse. - | - 83 | ```javascript - 84 | // Replace 'let' with 'const' - 85 | const newSourceCode = 'const x = 1; console.log(x);'; - | - 86 | tree.edit({ - 87 | startIndex: 0, - 88 | oldEndIndex: 3, - 89 | newEndIndex: 5, - 90 | startPosition: {row: 0, column: 0}, - 91 | oldEndPosition: {row: 0, column: 3}, - 92 | newEndPosition: {row: 0, column: 5}, - 93 | }); - | - 94 | const newTree = parser.parse(newSourceCode, tree); - 95 | ``` - | - 96 | ### Parsing Text From a Custom Data Structure - | - 97 | If your text is stored in a data structure other than a single string, you can parse it by supplying a callback to `parse` - 98 | instead of a string: - | - 99 | ```javascript - 100 | const sourceLines = [ - 101 | 'let x = 1;', - 102 | 'console.log(x);' - 103 | ]; - | - 104 | const tree = parser.parse((index, position) => { - 105 | let line = sourceLines[position.row]; - 106 | if (line) return line.slice(position.column); - 107 | }); - 108 | ``` - | - 109 | ### Getting the `.wasm` language files - | - 110 | There are several options on how to get the `.wasm` files for the languages you want to parse. - | - 111 | #### From npmjs.com - | - 112 | The recommended way is to just install the package from npm. For example, to parse JavaScript, you can install the `tree-sitter-javascript` - 113 | package: - | - 114 | ```sh - 115 | npm install tree-sitter-javascript - 116 | ``` - | - 117 | Then you can find the `.wasm` file in the `node_modules/tree-sitter-javascript` directory. - | - 118 | #### From GitHub - | - 119 | You can also download the `.wasm` files from GitHub releases, so long as the repository uses our reusable workflow to publish - 120 | them. - 121 | For example, you can download the JavaScript `.wasm` file from the tree-sitter-javascript [releases page][gh release js]. - | - 122 | #### Generating `.wasm` files - | - 123 | You can also generate the `.wasm` file for your desired grammar. Shown below is an example of how to generate the `.wasm` - 124 | file for the JavaScript grammar. - | - 125 | **IMPORTANT**: [Emscripten][emscripten], [Docker][docker], or [Podman][podman] need to be installed. - | - 126 | First install `tree-sitter-cli`, and the tree-sitter language for which to generate `.wasm` - 127 | (`tree-sitter-javascript` in this example): - | - 128 | ```sh - 129 | npm install --save-dev tree-sitter-cli tree-sitter-javascript - 130 | ``` - | - 131 | Then just use tree-sitter cli tool to generate the `.wasm`. - | - 132 | ```sh - 133 | npx tree-sitter build --wasm node_modules/tree-sitter-javascript - 134 | ``` - | - 135 | If everything is fine, file `tree-sitter-javascript.wasm` should be generated in current directory. - | - 136 | ### Running .wasm in Node.js - | - 137 | Notice that executing `.wasm` files in Node.js is considerably slower than running [Node.js bindings][node bindings]. - 138 | However, this could be useful for testing purposes: - | - 139 | ```javascript - 140 | const Parser = require('web-tree-sitter'); - | - 141 | (async () => { - 142 | await Parser.init(); - 143 | const parser = new Parser(); - 144 | const Lang = await Parser.Language.load('tree-sitter-javascript.wasm'); - 145 | parser.setLanguage(Lang); - 146 | const tree = parser.parse('let x = 1;'); - 147 | console.log(tree.rootNode.toString()); - 148 | })(); - 149 | ``` - | - 150 | ### Running .wasm in browser - | - 151 | `web-tree-sitter` can run in the browser, but there are some common pitfalls. - | - 152 | #### Loading the .wasm file - | - 153 | `web-tree-sitter` needs to load the `tree-sitter.wasm` file. By default, it assumes that this file is available in the - 154 | same path as the JavaScript code. Therefore, if the code is being served from `http://localhost:3000/bundle.js`, then - 155 | the Wasm file should be at `http://localhost:3000/tree-sitter.wasm`. - | - 156 | For server side frameworks like NextJS, this can be tricky as pages are often served from a path such as - 157 | `http://localhost:3000/_next/static/chunks/pages/index.js`. The loader will therefore look for the Wasm file at - 158 | `http://localhost:3000/_next/static/chunks/pages/tree-sitter.wasm`. The solution is to pass a `locateFile` function in - 159 | the `moduleOptions` argument to `Parser.init()`: - | - 160 | ```javascript - 161 | await Parser.init({ - 162 | locateFile(scriptName: string, scriptDirectory: string) { - 163 | return scriptName; - 164 | }, - 165 | }); - 166 | ``` - | - 167 | `locateFile` takes in two parameters, `scriptName`, i.e. the Wasm file name, and `scriptDirectory`, i.e. the directory - 168 | where the loader expects the script to be. It returns the path where the loader will look for the Wasm file. In the NextJS - 169 | case, we want to return just the `scriptName` so that the loader will look at `http://localhost:3000/tree-sitter.wasm` - 170 | and not `http://localhost:3000/_next/static/chunks/pages/tree-sitter.wasm`. - | - 171 | For more information on the module options you can pass in, see the [emscripten documentation][emscripten-module-options]. - | - 172 | #### "Can't resolve 'fs' in 'node_modules/web-tree-sitter" - | - 173 | Most bundlers will notice that the `web-tree-sitter.js` file is attempting to import `fs`, i.e. node's file system library. - 174 | Since this doesn't exist in the browser, the bundlers will get confused. For Webpack, you can fix this by adding the - 175 | following to your webpack config: - | - 176 | ```javascript - 177 | { - 178 | resolve: { - 179 | fallback: { - 180 | fs: false - 181 | } - 182 | } - 183 | } - 184 | ``` - | - 185 | [docker]: https://www.docker.com - 186 | [emscripten]: https://emscripten.org - 187 | [emscripten-module-options]: https://emscripten.org/docs/api_reference/module.html#affecting-execution - 188 | [gh release]: https://github.com/tree-sitter/tree-sitter/releases/latest - 189 | [gh release js]: https://github.com/tree-sitter/tree-sitter-javascript/releases/latest - 190 | [node bindings]: https://github.com/tree-sitter/node-tree-sitter - 191 | [npm module]: https://www.npmjs.com/package/web-tree-sitter - 192 | [podman]: https://podman.io - - - --------------------------------------------------------------------------------- -/lib/binding_web/script/check-artifacts-fresh.ts: --------------------------------------------------------------------------------- - 1 | import fs from 'fs'; - 2 | import path from 'path'; - 3 | import { fileURLToPath } from 'node:url'; - | - 4 | const scriptDir = path.dirname(fileURLToPath(import.meta.url)); - | - 5 | const inputFiles = [ - 6 | '../lib/tree-sitter.c', - 7 | '../src/constants.ts', - 8 | '../src/index.ts', - 9 | '../src/language.ts', - 10 | '../src/lookahead_iterator.ts', - 11 | '../src/marshal.ts', - 12 | '../src/node.ts', - 13 | '../src/parser.ts', - 14 | '../src/query.ts', - 15 | '../src/tree.ts', - 16 | '../src/tree_cursor.ts', - 17 | '../lib/exports.txt', - 18 | '../lib/imports.js', - 19 | '../lib/prefix.js', - 20 | ...listFiles('../../include/tree_sitter'), - 21 | ...listFiles('../../src'), - 22 | ]; - | - 23 | const outputFiles = ['../web-tree-sitter.js', '../web-tree-sitter.wasm']; - 24 | const outputMtime = Math.min(...outputFiles.map(getMtime)); - | - 25 | for (const inputFile of inputFiles) { - 26 | if (getMtime(inputFile) > outputMtime) { - 27 | console.log(`File '${inputFile}' has changed. Re-run 'npm run build:wasm'.`); - 28 | process.exit(1); - 29 | } - 30 | } - | - 31 | function listFiles(dir: string): string[] { - 32 | return fs - 33 | .readdirSync(path.resolve(scriptDir, dir)) - 34 | .filter(p => !p.startsWith('.')) - 35 | .map(p => path.join(dir, p)); - 36 | } - | - 37 | function getMtime(p: string): number { - 38 | return fs.statSync(path.resolve(scriptDir, p)).mtime.getTime(); - 39 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/script/generate-dts.js: --------------------------------------------------------------------------------- - 1 | import { createBundle } from 'dts-buddy'; - | - 2 | for (let ext of ['ts', 'cts']) { - 3 | await createBundle({ - 4 | project: 'tsconfig.json', - 5 | output: `web-tree-sitter.d.${ext}`, - 6 | modules: { - 7 | 'web-tree-sitter': 'src/index.ts' - 8 | }, - 9 | compilerOptions: { - 10 | stripInternal: true, - 11 | }, - 12 | }); - 13 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/bindings.ts: --------------------------------------------------------------------------------- - 1 | import createModule, { type MainModule } from '../lib/web-tree-sitter'; - 2 | // eslint-disable-next-line @typescript-eslint/no-unused-vars - 3 | import { type Parser } from './parser'; - | - 4 | export let Module: MainModule | null = null; - | - 5 | /** - 6 | * @internal - 7 | * - 8 | * Initialize the Tree-sitter Wasm module. This should only be called by the {@link Parser} class via {@link Parser.init}. - 9 | */ - 10 | export async function initializeBinding(moduleOptions?: Partial): Promise { - 11 | return Module ??= await createModule(moduleOptions); - 12 | } - | - 13 | /** - 14 | * @internal - 15 | * - 16 | * Checks if the Tree-sitter Wasm module has been initialized. - 17 | */ - 18 | export function checkModule(): boolean { - 19 | return !!Module; - 20 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/constants.ts: --------------------------------------------------------------------------------- - 1 | import { type MainModule } from '../lib/web-tree-sitter'; - 2 | // eslint-disable-next-line @typescript-eslint/no-unused-vars - 3 | import { ParseState, type Parser } from './parser'; - | - 4 | /** - 5 | * A position in a multi-line text document, in terms of rows and columns. - 6 | * - 7 | * Rows and columns are zero-based. - 8 | */ - 9 | export interface Point { - 10 | /** The zero-based row number. */ - 11 | row: number; - | - 12 | /** The zero-based column number. */ - 13 | column: number; - 14 | } - | - 15 | /** - 16 | * A range of positions in a multi-line text document, both in terms of bytes - 17 | * and of rows and columns. - 18 | */ - 19 | export interface Range { - 20 | /** The start position of the range. */ - 21 | startPosition: Point; - | - 22 | /** The end position of the range. */ - 23 | endPosition: Point; - | - 24 | /** The start index of the range. */ - 25 | startIndex: number; - | - 26 | /** The end index of the range. */ - 27 | endIndex: number; - 28 | } - | - 29 | /** @internal */ - 30 | export const SIZE_OF_SHORT = 2; - | - 31 | /** @internal */ - 32 | export const SIZE_OF_INT = 4; - | - 33 | /** @internal */ - 34 | export const SIZE_OF_CURSOR = 4 * SIZE_OF_INT; - | - 35 | /** @internal */ - 36 | export const SIZE_OF_NODE = 5 * SIZE_OF_INT; - | - 37 | /** @internal */ - 38 | export const SIZE_OF_POINT = 2 * SIZE_OF_INT; - | - 39 | /** @internal */ - 40 | export const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT; - | - 41 | /** @internal */ - 42 | export const ZERO_POINT: Point = { row: 0, column: 0 }; - | - 43 | /** - 44 | * A callback for parsing that takes an index and point, and should return a string. - 45 | */ - 46 | export type ParseCallback = (index: number, position: Point) => string | undefined; - | - 47 | /** - 48 | * A callback that receives the parse state during parsing. - 49 | */ - 50 | export type ProgressCallback = (progress: ParseState) => boolean; - | - 51 | /** - 52 | * A callback for logging messages. - 53 | * - 54 | * If `isLex` is `true`, the message is from the lexer, otherwise it's from the parser. - 55 | */ - 56 | export type LogCallback = (message: string, isLex: boolean) => void; - | - 57 | // Helper type for internal use - 58 | /** @internal */ - 59 | export const INTERNAL = Symbol('INTERNAL'); - 60 | /** @internal */ - 61 | export type Internal = typeof INTERNAL; - | - 62 | // Helper functions for type checking - 63 | /** @internal */ - 64 | export function assertInternal(x: unknown): asserts x is Internal { - 65 | if (x !== INTERNAL) throw new Error('Illegal constructor'); - 66 | } - | - 67 | /** @internal */ - 68 | export function isPoint(point?: Point): point is Point { - 69 | return ( - 70 | !!point && - 71 | typeof (point).row === 'number' && - 72 | typeof (point).column === 'number' - 73 | ); - 74 | } - | - 75 | /** - 76 | * @internal - 77 | * - 78 | * Sets the Tree-sitter Wasm module. This should only be called by the {@link Parser} class via {@link Parser.init}. - 79 | */ - 80 | export function setModule(module: MainModule) { - 81 | C = module; - 82 | } - | - 83 | /** - 84 | * @internal - 85 | * - 86 | * `C` is a convenient shorthand for the Tree-sitter Wasm module, - 87 | * which allows us to call all of the exported functions. - 88 | */ - 89 | export let C: MainModule; - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/edit.ts: --------------------------------------------------------------------------------- - 1 | import { Point, Range } from "./constants"; - | - 2 | export class Edit { - 3 | /** The start position of the change. */ - 4 | startPosition: Point; - | - 5 | /** The end position of the change before the edit. */ - 6 | oldEndPosition: Point; - | - 7 | /** The end position of the change after the edit. */ - 8 | newEndPosition: Point; - | - 9 | /** The start index of the change. */ - 10 | startIndex: number; - | - 11 | /** The end index of the change before the edit. */ - 12 | oldEndIndex: number; - | - 13 | /** The end index of the change after the edit. */ - 14 | newEndIndex: number; - | - 15 | constructor({ - 16 | startIndex, - 17 | oldEndIndex, - 18 | newEndIndex, - 19 | startPosition, - 20 | oldEndPosition, - 21 | newEndPosition, - 22 | }: { - 23 | startIndex: number; - 24 | oldEndIndex: number; - 25 | newEndIndex: number; - 26 | startPosition: Point; - 27 | oldEndPosition: Point; - 28 | newEndPosition: Point; - 29 | }) { - 30 | this.startIndex = startIndex >>> 0; - 31 | this.oldEndIndex = oldEndIndex >>> 0; - 32 | this.newEndIndex = newEndIndex >>> 0; - 33 | this.startPosition = startPosition; - 34 | this.oldEndPosition = oldEndPosition; - 35 | this.newEndPosition = newEndPosition; - 36 | } - | - 37 | /** - 38 | * Edit a point and index to keep it in-sync with source code that has been edited. - 39 | * - 40 | * This function updates a single point's byte offset and row/column position - 41 | * based on an edit operation. This is useful for editing points without - 42 | * requiring a tree or node instance. - 43 | */ - 44 | editPoint(point: Point, index: number): { point: Point; index: number } { - 45 | let newIndex = index; - 46 | const newPoint = { ...point }; - | - 47 | if (index >= this.oldEndIndex) { - 48 | newIndex = this.newEndIndex + (index - this.oldEndIndex); - 49 | const originalRow = point.row; - 50 | newPoint.row = this.newEndPosition.row + (point.row - this.oldEndPosition.row); - 51 | newPoint.column = originalRow === this.oldEndPosition.row - 52 | ? this.newEndPosition.column + (point.column - this.oldEndPosition.column) - 53 | : point.column; - 54 | } else if (index > this.startIndex) { - 55 | newIndex = this.newEndIndex; - 56 | newPoint.row = this.newEndPosition.row; - 57 | newPoint.column = this.newEndPosition.column; - 58 | } - | - 59 | return { point: newPoint, index: newIndex }; - 60 | } - | - 61 | /** - 62 | * Edit a range to keep it in-sync with source code that has been edited. - 63 | * - 64 | * This function updates a range's start and end positions based on an edit - 65 | * operation. This is useful for editing ranges without requiring a tree - 66 | * or node instance. - 67 | */ - 68 | editRange(range: Range): Range { - 69 | const newRange: Range = { - 70 | startIndex: range.startIndex, - 71 | startPosition: { ...range.startPosition }, - 72 | endIndex: range.endIndex, - 73 | endPosition: { ...range.endPosition } - 74 | }; - | - 75 | if (range.endIndex >= this.oldEndIndex) { - 76 | if (range.endIndex !== Number.MAX_SAFE_INTEGER) { - 77 | newRange.endIndex = this.newEndIndex + (range.endIndex - this.oldEndIndex); - 78 | newRange.endPosition = { - 79 | row: this.newEndPosition.row + (range.endPosition.row - this.oldEndPosition.row), - 80 | column: range.endPosition.row === this.oldEndPosition.row - 81 | ? this.newEndPosition.column + (range.endPosition.column - this.oldEndPosition.column) - 82 | : range.endPosition.column, - 83 | }; - 84 | if (newRange.endIndex < this.newEndIndex) { - 85 | newRange.endIndex = Number.MAX_SAFE_INTEGER; - 86 | newRange.endPosition = { row: Number.MAX_SAFE_INTEGER, column: Number.MAX_SAFE_INTEGER }; - 87 | } - 88 | } - 89 | } else if (range.endIndex > this.startIndex) { - 90 | newRange.endIndex = this.startIndex; - 91 | newRange.endPosition = { ...this.startPosition }; - 92 | } - | - 93 | if (range.startIndex >= this.oldEndIndex) { - 94 | newRange.startIndex = this.newEndIndex + (range.startIndex - this.oldEndIndex); - 95 | newRange.startPosition = { - 96 | row: this.newEndPosition.row + (range.startPosition.row - this.oldEndPosition.row), - 97 | column: range.startPosition.row === this.oldEndPosition.row - 98 | ? this.newEndPosition.column + (range.startPosition.column - this.oldEndPosition.column) - 99 | : range.startPosition.column, - 100 | }; - 101 | if (newRange.startIndex < this.newEndIndex) { - 102 | newRange.startIndex = Number.MAX_SAFE_INTEGER; - 103 | newRange.startPosition = { row: Number.MAX_SAFE_INTEGER, column: Number.MAX_SAFE_INTEGER }; - 104 | } - 105 | } else if (range.startIndex > this.startIndex) { - 106 | newRange.startIndex = this.startIndex; - 107 | newRange.startPosition = { ...this.startPosition }; - 108 | } - | - 109 | return newRange; - 110 | } - 111 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/index.ts: --------------------------------------------------------------------------------- - 1 | export type { - 2 | Point, - 3 | Range, - 4 | ParseCallback, - 5 | ProgressCallback, - 6 | LogCallback, - 7 | } from './constants'; - 8 | export { Edit } from './edit'; - 9 | export { - 10 | type ParseOptions, - 11 | type ParseState, - 12 | LANGUAGE_VERSION, - 13 | MIN_COMPATIBLE_VERSION, - 14 | Parser, - 15 | } from './parser'; - 16 | export { Language } from './language'; - 17 | export { Tree } from './tree'; - 18 | export { Node } from './node'; - 19 | export { TreeCursor } from './tree_cursor'; - 20 | export { - 21 | type QueryOptions, - 22 | type QueryState, - 23 | type QueryProperties, - 24 | type QueryPredicate, - 25 | type QueryCapture, - 26 | type QueryMatch, - 27 | CaptureQuantifier, - 28 | type PredicateStep, - 29 | Query, - 30 | } from './query'; - 31 | export { LookaheadIterator } from './lookahead_iterator'; - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/language.ts: --------------------------------------------------------------------------------- - 1 | import { C, INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_SHORT } from './constants'; - 2 | import { LookaheadIterator } from './lookahead_iterator'; - 3 | import { unmarshalLanguageMetadata } from './marshal'; - 4 | import { TRANSFER_BUFFER } from './parser'; - | - 5 | const LANGUAGE_FUNCTION_REGEX = /^tree_sitter_\w+$/; - | - 6 | export interface LanguageMetadata { - 7 | readonly major_version: number; - 8 | readonly minor_version: number; - 9 | readonly patch_version: number; - 10 | } - | - 11 | /** - 12 | * An opaque object that defines how to parse a particular language. - 13 | * The code for each `Language` is generated by the Tree-sitter CLI. - 14 | */ - 15 | export class Language { - 16 | /** @internal */ - 17 | private [0] = 0; // Internal handle for Wasm - | - 18 | /** - 19 | * A list of all node types in the language. The index of each type in this - 20 | * array is its node type id. - 21 | */ - 22 | types: string[]; - | - 23 | /** - 24 | * A list of all field names in the language. The index of each field name in - 25 | * this array is its field id. - 26 | */ - 27 | fields: (string | null)[]; - | - 28 | /** @internal */ - 29 | constructor(internal: Internal, address: number) { - 30 | assertInternal(internal); - 31 | this[0] = address; - 32 | this.types = new Array(C._ts_language_symbol_count(this[0])); - 33 | for (let i = 0, n = this.types.length; i < n; i++) { - 34 | if (C._ts_language_symbol_type(this[0], i) < 2) { - 35 | this.types[i] = C.UTF8ToString(C._ts_language_symbol_name(this[0], i)); - 36 | } - 37 | } - 38 | this.fields = new Array(C._ts_language_field_count(this[0]) + 1); - 39 | for (let i = 0, n = this.fields.length; i < n; i++) { - 40 | const fieldName = C._ts_language_field_name_for_id(this[0], i); - 41 | if (fieldName !== 0) { - 42 | this.fields[i] = C.UTF8ToString(fieldName); - 43 | } else { - 44 | this.fields[i] = null; - 45 | } - 46 | } - 47 | } - | - | - 48 | /** - 49 | * Gets the name of the language. - 50 | */ - 51 | get name(): string | null { - 52 | const ptr = C._ts_language_name(this[0]); - 53 | if (ptr === 0) return null; - 54 | return C.UTF8ToString(ptr); - 55 | } - | - 56 | /** - 57 | * Gets the ABI version of the language. - 58 | */ - 59 | get abiVersion(): number { - 60 | return C._ts_language_abi_version(this[0]); - 61 | } - | - 62 | /** - 63 | * Get the metadata for this language. This information is generated by the - 64 | * CLI, and relies on the language author providing the correct metadata in - 65 | * the language's `tree-sitter.json` file. - 66 | */ - 67 | get metadata(): LanguageMetadata | null { - 68 | C._ts_language_metadata_wasm(this[0]); - 69 | const length = C.getValue(TRANSFER_BUFFER, 'i32'); - 70 | if (length === 0) return null; - 71 | return unmarshalLanguageMetadata(TRANSFER_BUFFER + SIZE_OF_INT); - 72 | } - | - 73 | /** - 74 | * Gets the number of fields in the language. - 75 | */ - 76 | get fieldCount(): number { - 77 | return this.fields.length - 1; - 78 | } - | - 79 | /** - 80 | * Gets the number of states in the language. - 81 | */ - 82 | get stateCount(): number { - 83 | return C._ts_language_state_count(this[0]); - 84 | } - | - 85 | /** - 86 | * Get the field id for a field name. - 87 | */ - 88 | fieldIdForName(fieldName: string): number | null { - 89 | const result = this.fields.indexOf(fieldName); - 90 | return result !== -1 ? result : null; - 91 | } - | - 92 | /** - 93 | * Get the field name for a field id. - 94 | */ - 95 | fieldNameForId(fieldId: number): string | null { - 96 | return this.fields[fieldId] ?? null; - 97 | } - | - 98 | /** - 99 | * Get the node type id for a node type name. - 100 | */ - 101 | idForNodeType(type: string, named: boolean): number | null { - 102 | const typeLength = C.lengthBytesUTF8(type); - 103 | const typeAddress = C._malloc(typeLength + 1); - 104 | C.stringToUTF8(type, typeAddress, typeLength + 1); - 105 | const result = C._ts_language_symbol_for_name(this[0], typeAddress, typeLength, named ? 1 : 0); - 106 | C._free(typeAddress); - 107 | return result || null; - 108 | } - | - 109 | /** - 110 | * Gets the number of node types in the language. - 111 | */ - 112 | get nodeTypeCount(): number { - 113 | return C._ts_language_symbol_count(this[0]); - 114 | } - | - 115 | /** - 116 | * Get the node type name for a node type id. - 117 | */ - 118 | nodeTypeForId(typeId: number): string | null { - 119 | const name = C._ts_language_symbol_name(this[0], typeId); - 120 | return name ? C.UTF8ToString(name) : null; - 121 | } - | - 122 | /** - 123 | * Check if a node type is named. - 124 | * - 125 | * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/2-basic-parsing.html#named-vs-anonymous-nodes} - 126 | */ - 127 | nodeTypeIsNamed(typeId: number): boolean { - 128 | return C._ts_language_type_is_named_wasm(this[0], typeId) ? true : false; - 129 | } - | - 130 | /** - 131 | * Check if a node type is visible. - 132 | */ - 133 | nodeTypeIsVisible(typeId: number): boolean { - 134 | return C._ts_language_type_is_visible_wasm(this[0], typeId) ? true : false; - 135 | } - | - 136 | /** - 137 | * Get the supertypes ids of this language. - 138 | * - 139 | * @see {@link https://tree-sitter.github.io/tree-sitter/using-parsers/6-static-node-types.html?highlight=supertype#supertype-nodes} - 140 | */ - 141 | get supertypes(): number[] { - 142 | C._ts_language_supertypes_wasm(this[0]); - 143 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 144 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 145 | const result = new Array(count); - | - 146 | if (count > 0) { - 147 | let address = buffer; - 148 | for (let i = 0; i < count; i++) { - 149 | result[i] = C.getValue(address, 'i16'); - 150 | address += SIZE_OF_SHORT; - 151 | } - 152 | } - | - 153 | return result; - 154 | } - | - 155 | /** - 156 | * Get the subtype ids for a given supertype node id. - 157 | */ - 158 | subtypes(supertype: number): number[] { - 159 | C._ts_language_subtypes_wasm(this[0], supertype); - 160 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 161 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 162 | const result = new Array(count); - | - 163 | if (count > 0) { - 164 | let address = buffer; - 165 | for (let i = 0; i < count; i++) { - 166 | result[i] = C.getValue(address, 'i16'); - 167 | address += SIZE_OF_SHORT; - 168 | } - 169 | } - | - 170 | return result; - 171 | } - | - 172 | /** - 173 | * Get the next state id for a given state id and node type id. - 174 | */ - 175 | nextState(stateId: number, typeId: number): number { - 176 | return C._ts_language_next_state(this[0], stateId, typeId); - 177 | } - | - 178 | /** - 179 | * Create a new lookahead iterator for this language and parse state. - 180 | * - 181 | * This returns `null` if state is invalid for this language. - 182 | * - 183 | * Iterating {@link LookaheadIterator} will yield valid symbols in the given - 184 | * parse state. Newly created lookahead iterators will return the `ERROR` - 185 | * symbol from {@link LookaheadIterator#currentType}. - 186 | * - 187 | * Lookahead iterators can be useful for generating suggestions and improving - 188 | * syntax error diagnostics. To get symbols valid in an `ERROR` node, use the - 189 | * lookahead iterator on its first leaf node state. For `MISSING` nodes, a - 190 | * lookahead iterator created on the previous non-extra leaf node may be - 191 | * appropriate. - 192 | */ - 193 | lookaheadIterator(stateId: number): LookaheadIterator | null { - 194 | const address = C._ts_lookahead_iterator_new(this[0], stateId); - 195 | if (address) return new LookaheadIterator(INTERNAL, address, this); - 196 | return null; - 197 | } - | - 198 | /** - 199 | * Load a language from a WebAssembly module. - 200 | * The module can be provided as a path to a file or as a buffer. - 201 | */ - 202 | static async load(input: string | Uint8Array): Promise { - 203 | let binary: Uint8Array | WebAssembly.Module; - 204 | if (input instanceof Uint8Array) { - 205 | binary = input; - 206 | // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition - 207 | } else if (globalThis.process?.versions.node) { - 208 | const fs: typeof import('fs/promises') = await import('fs/promises'); - 209 | binary = await fs.readFile(input); - 210 | } else { - 211 | const response = await fetch(input); - | - 212 | if (!response.ok){ - 213 | const body = await response.text(); - 214 | throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`); - 215 | } - | - 216 | const retryResp = response.clone(); - 217 | try { - 218 | binary = await WebAssembly.compileStreaming(response); - 219 | } catch (reason) { - 220 | console.error('wasm streaming compile failed:', reason); - 221 | console.error('falling back to ArrayBuffer instantiation'); - 222 | // fallback, probably because of bad MIME type - 223 | binary = new Uint8Array(await retryResp.arrayBuffer()) - 224 | } - 225 | } - | - 226 | const mod = await C.loadWebAssemblyModule(binary, { loadAsync: true }); - 227 | const symbolNames = Object.keys(mod); - 228 | const functionName = symbolNames.find((key) => LANGUAGE_FUNCTION_REGEX.test(key) && - 229 | !key.includes('external_scanner_')); - 230 | if (!functionName) { - 231 | console.log(`Couldn't find language function in Wasm file. Symbols:\n${JSON.stringify(symbolNames, null, 2)}`); - 232 | throw new Error('Language.load failed: no language function found in Wasm file'); - 233 | } - 234 | const languageAddress = mod[functionName](); - 235 | return new Language(INTERNAL, languageAddress); - 236 | } - 237 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/lookahead_iterator.ts: --------------------------------------------------------------------------------- - 1 | import { C, Internal, assertInternal } from './constants'; - 2 | import { Language } from './language'; - | - 3 | export class LookaheadIterator implements Iterable { - 4 | /** @internal */ - 5 | private [0] = 0; // Internal handle for Wasm - | - 6 | /** @internal */ - 7 | private language: Language; - | - 8 | /** @internal */ - 9 | constructor(internal: Internal, address: number, language: Language) { - 10 | assertInternal(internal); - 11 | this[0] = address; - 12 | this.language = language; - 13 | } - | - 14 | /** Get the current symbol of the lookahead iterator. */ - 15 | get currentTypeId(): number { - 16 | return C._ts_lookahead_iterator_current_symbol(this[0]); - 17 | } - | - 18 | /** Get the current symbol name of the lookahead iterator. */ - 19 | get currentType(): string { - 20 | return this.language.types[this.currentTypeId] || 'ERROR'; - 21 | } - | - 22 | /** Delete the lookahead iterator, freeing its resources. */ - 23 | delete(): void { - 24 | C._ts_lookahead_iterator_delete(this[0]); - 25 | this[0] = 0; - 26 | } - | - | - 27 | /** - 28 | * Reset the lookahead iterator. - 29 | * - 30 | * This returns `true` if the language was set successfully and `false` - 31 | * otherwise. - 32 | */ - 33 | reset(language: Language, stateId: number): boolean { - 34 | if (C._ts_lookahead_iterator_reset(this[0], language[0], stateId)) { - 35 | this.language = language; - 36 | return true; - 37 | } - 38 | return false; - 39 | } - | - 40 | /** - 41 | * Reset the lookahead iterator to another state. - 42 | * - 43 | * This returns `true` if the iterator was reset to the given state and - 44 | * `false` otherwise. - 45 | */ - 46 | resetState(stateId: number): boolean { - 47 | return Boolean(C._ts_lookahead_iterator_reset_state(this[0], stateId)); - 48 | } - | - 49 | /** - 50 | * Returns an iterator that iterates over the symbols of the lookahead iterator. - 51 | * - 52 | * The iterator will yield the current symbol name as a string for each step - 53 | * until there are no more symbols to iterate over. - 54 | */ - 55 | [Symbol.iterator](): Iterator { - 56 | return { - 57 | next: (): IteratorResult => { - 58 | if (C._ts_lookahead_iterator_next(this[0])) { - 59 | return { done: false, value: this.currentType }; - 60 | } - 61 | return { done: true, value: '' }; - 62 | } - 63 | }; - 64 | } - 65 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/marshal.ts: --------------------------------------------------------------------------------- - 1 | import { INTERNAL, Point, Range, SIZE_OF_INT, SIZE_OF_NODE, SIZE_OF_POINT, C } from "./constants"; - 2 | import { Node } from "./node"; - 3 | import { Tree } from "./tree"; - 4 | // eslint-disable-next-line @typescript-eslint/no-unused-vars - 5 | import { Query, QueryCapture, type QueryMatch } from "./query"; - 6 | import { TreeCursor } from "./tree_cursor"; - 7 | import { TRANSFER_BUFFER } from "./parser"; - 8 | import { LanguageMetadata } from "./language"; - 9 | import { Edit } from "./edit"; - | - 10 | /** - 11 | * @internal - 12 | * - 13 | * Unmarshals a {@link QueryMatch} to the transfer buffer. - 14 | */ - 15 | export function unmarshalCaptures( - 16 | query: Query, - 17 | tree: Tree, - 18 | address: number, - 19 | patternIndex: number, - 20 | result: QueryCapture[] - 21 | ) { - 22 | for (let i = 0, n = result.length; i < n; i++) { - 23 | const captureIndex = C.getValue(address, 'i32'); - 24 | address += SIZE_OF_INT; - 25 | const node = unmarshalNode(tree, address)!; - 26 | address += SIZE_OF_NODE; - 27 | result[i] = {patternIndex, name: query.captureNames[captureIndex], node}; - 28 | } - 29 | return address; - 30 | } - | - 31 | /** - 32 | * @internal - 33 | * - 34 | * Marshals a {@link Node} to the transfer buffer. - 35 | */ - 36 | export function marshalNode(node: Node, index = 0) { - 37 | let address = TRANSFER_BUFFER + index * SIZE_OF_NODE; - 38 | C.setValue(address, node.id, 'i32'); - 39 | address += SIZE_OF_INT; - 40 | C.setValue(address, node.startIndex, 'i32'); - 41 | address += SIZE_OF_INT; - 42 | C.setValue(address, node.startPosition.row, 'i32'); - 43 | address += SIZE_OF_INT; - 44 | C.setValue(address, node.startPosition.column, 'i32'); - 45 | address += SIZE_OF_INT; - 46 | C.setValue(address, node[0], 'i32'); - 47 | } - | - 48 | /** - 49 | * @internal - 50 | * - 51 | * Unmarshals a {@link Node} from the transfer buffer. - 52 | */ - 53 | export function unmarshalNode(tree: Tree, address = TRANSFER_BUFFER): Node | null { - 54 | const id = C.getValue(address, 'i32'); - 55 | address += SIZE_OF_INT; - 56 | if (id === 0) return null; - | - 57 | const index = C.getValue(address, 'i32'); - 58 | address += SIZE_OF_INT; - 59 | const row = C.getValue(address, 'i32'); - 60 | address += SIZE_OF_INT; - 61 | const column = C.getValue(address, 'i32'); - 62 | address += SIZE_OF_INT; - 63 | const other = C.getValue(address, 'i32'); - | - 64 | const result = new Node(INTERNAL, { - 65 | id, - 66 | tree, - 67 | startIndex: index, - 68 | startPosition: {row, column}, - 69 | other, - 70 | }); - | - 71 | return result; - 72 | } - | - 73 | /** - 74 | * @internal - 75 | * - 76 | * Marshals a {@link TreeCursor} to the transfer buffer. - 77 | */ - 78 | export function marshalTreeCursor(cursor: TreeCursor, address = TRANSFER_BUFFER) { - 79 | C.setValue(address + 0 * SIZE_OF_INT, cursor[0], 'i32'); - 80 | C.setValue(address + 1 * SIZE_OF_INT, cursor[1], 'i32'); - 81 | C.setValue(address + 2 * SIZE_OF_INT, cursor[2], 'i32'); - 82 | C.setValue(address + 3 * SIZE_OF_INT, cursor[3], 'i32'); - 83 | } - | - 84 | /** - 85 | * @internal - 86 | * - 87 | * Unmarshals a {@link TreeCursor} from the transfer buffer. - 88 | */ - 89 | export function unmarshalTreeCursor(cursor: TreeCursor) { - 90 | cursor[0] = C.getValue(TRANSFER_BUFFER + 0 * SIZE_OF_INT, 'i32'); - 91 | cursor[1] = C.getValue(TRANSFER_BUFFER + 1 * SIZE_OF_INT, 'i32'); - 92 | cursor[2] = C.getValue(TRANSFER_BUFFER + 2 * SIZE_OF_INT, 'i32'); - 93 | cursor[3] = C.getValue(TRANSFER_BUFFER + 3 * SIZE_OF_INT, 'i32'); - 94 | } - | - 95 | /** - 96 | * @internal - 97 | * - 98 | * Marshals a {@link Point} to the transfer buffer. - 99 | */ - 100 | export function marshalPoint(address: number, point: Point): void { - 101 | C.setValue(address, point.row, 'i32'); - 102 | C.setValue(address + SIZE_OF_INT, point.column, 'i32'); - 103 | } - | - 104 | /** - 105 | * @internal - 106 | * - 107 | * Unmarshals a {@link Point} from the transfer buffer. - 108 | */ - 109 | export function unmarshalPoint(address: number): Point { - 110 | const result = { - 111 | row: C.getValue(address, 'i32') >>> 0, - 112 | column: C.getValue(address + SIZE_OF_INT, 'i32') >>> 0, - 113 | }; - 114 | return result; - 115 | } - | - 116 | /** - 117 | * @internal - 118 | * - 119 | * Marshals a {@link Range} to the transfer buffer. - 120 | */ - 121 | export function marshalRange(address: number, range: Range): void { - 122 | marshalPoint(address, range.startPosition); address += SIZE_OF_POINT; - 123 | marshalPoint(address, range.endPosition); address += SIZE_OF_POINT; - 124 | C.setValue(address, range.startIndex, 'i32'); address += SIZE_OF_INT; - 125 | C.setValue(address, range.endIndex, 'i32'); address += SIZE_OF_INT; - 126 | } - | - 127 | /** - 128 | * @internal - 129 | * - 130 | * Unmarshals a {@link Range} from the transfer buffer. - 131 | */ - 132 | export function unmarshalRange(address: number): Range { - 133 | const result = {} as Range; - 134 | result.startPosition = unmarshalPoint(address); address += SIZE_OF_POINT; - 135 | result.endPosition = unmarshalPoint(address); address += SIZE_OF_POINT; - 136 | result.startIndex = C.getValue(address, 'i32') >>> 0; address += SIZE_OF_INT; - 137 | result.endIndex = C.getValue(address, 'i32') >>> 0; - 138 | return result; - 139 | } - | - 140 | /** - 141 | * @internal - 142 | * - 143 | * Marshals an {@link Edit} to the transfer buffer. - 144 | */ - 145 | export function marshalEdit(edit: Edit, address = TRANSFER_BUFFER) { - 146 | marshalPoint(address, edit.startPosition); address += SIZE_OF_POINT; - 147 | marshalPoint(address, edit.oldEndPosition); address += SIZE_OF_POINT; - 148 | marshalPoint(address, edit.newEndPosition); address += SIZE_OF_POINT; - 149 | C.setValue(address, edit.startIndex, 'i32'); address += SIZE_OF_INT; - 150 | C.setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT; - 151 | C.setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT; - 152 | } - | - 153 | /** - 154 | * @internal - 155 | * - 156 | * Unmarshals a {@link LanguageMetadata} from the transfer buffer. - 157 | */ - 158 | export function unmarshalLanguageMetadata(address: number): LanguageMetadata { - 159 | const major_version = C.getValue(address, 'i32'); - 160 | const minor_version = C.getValue(address += SIZE_OF_INT, 'i32'); - 161 | const patch_version = C.getValue(address += SIZE_OF_INT, 'i32'); - 162 | return { major_version, minor_version, patch_version }; - 163 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/node.ts: --------------------------------------------------------------------------------- - 1 | import { INTERNAL, Internal, assertInternal, SIZE_OF_INT, SIZE_OF_NODE, SIZE_OF_POINT, ZERO_POINT, isPoint, C, Point } from './constants'; - 2 | import { getText, Tree } from './tree'; - 3 | import { TreeCursor } from './tree_cursor'; - 4 | // eslint-disable-next-line @typescript-eslint/no-unused-vars - 5 | import { Language } from './language'; - 6 | import { marshalNode, marshalPoint, unmarshalNode, unmarshalPoint } from './marshal'; - 7 | import { TRANSFER_BUFFER } from './parser'; - 8 | import { Edit } from './edit'; - | - 9 | /** A single node within a syntax {@link Tree}. */ - 10 | export class Node { - 11 | /** @internal */ - 12 | // @ts-expect-error: never read - 13 | private [0] = 0; // Internal handle for Wasm - | - 14 | /** @internal */ - 15 | private _children?: Node[]; - | - 16 | /** @internal */ - 17 | private _namedChildren?: Node[]; - | - 18 | /** @internal */ - 19 | constructor( - 20 | internal: Internal, - 21 | { - 22 | id, - 23 | tree, - 24 | startIndex, - 25 | startPosition, - 26 | other, - 27 | }: { - 28 | id: number; - 29 | tree: Tree; - 30 | startIndex: number; - 31 | startPosition: Point; - 32 | other: number; - 33 | } - 34 | ) { - 35 | assertInternal(internal); - 36 | this[0] = other; - 37 | this.id = id; - 38 | this.tree = tree; - 39 | this.startIndex = startIndex; - 40 | this.startPosition = startPosition; - 41 | } - | - 42 | /** - 43 | * The numeric id for this node that is unique. - 44 | * - 45 | * Within a given syntax tree, no two nodes have the same id. However: - 46 | * - 47 | * * If a new tree is created based on an older tree, and a node from the old tree is reused in - 48 | * the process, then that node will have the same id in both trees. - 49 | * - 50 | * * A node not marked as having changes does not guarantee it was reused. - 51 | * - 52 | * * If a node is marked as having changed in the old tree, it will not be reused. - 53 | */ - 54 | id: number; - | - 55 | /** The byte index where this node starts. */ - 56 | startIndex: number; - | - 57 | /** The position where this node starts. */ - 58 | startPosition: Point; - | - 59 | /** The tree that this node belongs to. */ - 60 | tree: Tree; - | - 61 | /** Get this node's type as a numerical id. */ - 62 | get typeId(): number { - 63 | marshalNode(this); - 64 | return C._ts_node_symbol_wasm(this.tree[0]); - 65 | } - | - 66 | /** - 67 | * Get the node's type as a numerical id as it appears in the grammar, - 68 | * ignoring aliases. - 69 | */ - 70 | get grammarId(): number { - 71 | marshalNode(this); - 72 | return C._ts_node_grammar_symbol_wasm(this.tree[0]); - 73 | } - | - 74 | /** Get this node's type as a string. */ - 75 | get type(): string { - 76 | return this.tree.language.types[this.typeId] || 'ERROR'; - 77 | } - | - 78 | /** - 79 | * Get this node's symbol name as it appears in the grammar, ignoring - 80 | * aliases as a string. - 81 | */ - 82 | get grammarType(): string { - 83 | return this.tree.language.types[this.grammarId] || 'ERROR'; - 84 | } - | - 85 | /** - 86 | * Check if this node is *named*. - 87 | * - 88 | * Named nodes correspond to named rules in the grammar, whereas - 89 | * *anonymous* nodes correspond to string literals in the grammar. - 90 | */ - 91 | get isNamed(): boolean { - 92 | marshalNode(this); - 93 | return C._ts_node_is_named_wasm(this.tree[0]) === 1; - 94 | } - | - 95 | /** - 96 | * Check if this node is *extra*. - 97 | * - 98 | * Extra nodes represent things like comments, which are not required - 99 | * by the grammar, but can appear anywhere. - 100 | */ - 101 | get isExtra(): boolean { - 102 | marshalNode(this); - 103 | return C._ts_node_is_extra_wasm(this.tree[0]) === 1; - 104 | } - | - 105 | /** - 106 | * Check if this node represents a syntax error. - 107 | * - 108 | * Syntax errors represent parts of the code that could not be incorporated - 109 | * into a valid syntax tree. - 110 | */ - 111 | get isError(): boolean { - 112 | marshalNode(this); - 113 | return C._ts_node_is_error_wasm(this.tree[0]) === 1; - 114 | } - | - 115 | /** - 116 | * Check if this node is *missing*. - 117 | * - 118 | * Missing nodes are inserted by the parser in order to recover from - 119 | * certain kinds of syntax errors. - 120 | */ - 121 | get isMissing(): boolean { - 122 | marshalNode(this); - 123 | return C._ts_node_is_missing_wasm(this.tree[0]) === 1; - 124 | } - | - 125 | /** Check if this node has been edited. */ - 126 | get hasChanges(): boolean { - 127 | marshalNode(this); - 128 | return C._ts_node_has_changes_wasm(this.tree[0]) === 1; - 129 | } - | - 130 | /** - 131 | * Check if this node represents a syntax error or contains any syntax - 132 | * errors anywhere within it. - 133 | */ - 134 | get hasError(): boolean { - 135 | marshalNode(this); - 136 | return C._ts_node_has_error_wasm(this.tree[0]) === 1; - 137 | } - | - 138 | /** Get the byte index where this node ends. */ - 139 | get endIndex(): number { - 140 | marshalNode(this); - 141 | return C._ts_node_end_index_wasm(this.tree[0]); - 142 | } - | - 143 | /** Get the position where this node ends. */ - 144 | get endPosition(): Point { - 145 | marshalNode(this); - 146 | C._ts_node_end_point_wasm(this.tree[0]); - 147 | return unmarshalPoint(TRANSFER_BUFFER); - 148 | } - | - 149 | /** Get the string content of this node. */ - 150 | get text(): string { - 151 | return getText(this.tree, this.startIndex, this.endIndex, this.startPosition); - 152 | } - | - 153 | /** Get this node's parse state. */ - 154 | get parseState(): number { - 155 | marshalNode(this); - 156 | return C._ts_node_parse_state_wasm(this.tree[0]); - 157 | } - | - 158 | /** Get the parse state after this node. */ - 159 | get nextParseState(): number { - 160 | marshalNode(this); - 161 | return C._ts_node_next_parse_state_wasm(this.tree[0]); - 162 | } - | - 163 | /** Check if this node is equal to another node. */ - 164 | equals(other: Node): boolean { - 165 | return this.tree === other.tree && this.id === other.id; - 166 | } - | - 167 | /** - 168 | * Get the node's child at the given index, where zero represents the first child. - 169 | * - 170 | * This method is fairly fast, but its cost is technically log(n), so if - 171 | * you might be iterating over a long list of children, you should use - 172 | * {@link Node#children} instead. - 173 | */ - 174 | child(index: number): Node | null { - 175 | marshalNode(this); - 176 | C._ts_node_child_wasm(this.tree[0], index); - 177 | return unmarshalNode(this.tree); - 178 | } - | - 179 | /** - 180 | * Get this node's *named* child at the given index. - 181 | * - 182 | * See also {@link Node#isNamed}. - 183 | * This method is fairly fast, but its cost is technically log(n), so if - 184 | * you might be iterating over a long list of children, you should use - 185 | * {@link Node#namedChildren} instead. - 186 | */ - 187 | namedChild(index: number): Node | null { - 188 | marshalNode(this); - 189 | C._ts_node_named_child_wasm(this.tree[0], index); - 190 | return unmarshalNode(this.tree); - 191 | } - | - 192 | /** - 193 | * Get this node's child with the given numerical field id. - 194 | * - 195 | * See also {@link Node#childForFieldName}. You can - 196 | * convert a field name to an id using {@link Language#fieldIdForName}. - 197 | */ - 198 | childForFieldId(fieldId: number): Node | null { - 199 | marshalNode(this); - 200 | C._ts_node_child_by_field_id_wasm(this.tree[0], fieldId); - 201 | return unmarshalNode(this.tree); - 202 | } - | - 203 | /** - 204 | * Get the first child with the given field name. - 205 | * - 206 | * If multiple children may have the same field name, access them using - 207 | * {@link Node#childrenForFieldName}. - 208 | */ - 209 | childForFieldName(fieldName: string): Node | null { - 210 | const fieldId = this.tree.language.fields.indexOf(fieldName); - 211 | if (fieldId !== -1) return this.childForFieldId(fieldId); - 212 | return null; - 213 | } - | - 214 | /** Get the field name of this node's child at the given index. */ - 215 | fieldNameForChild(index: number): string | null { - 216 | marshalNode(this); - 217 | const address = C._ts_node_field_name_for_child_wasm(this.tree[0], index); - 218 | if (!address) return null; - 219 | return C.AsciiToString(address); - 220 | } - | - 221 | /** Get the field name of this node's named child at the given index. */ - 222 | fieldNameForNamedChild(index: number): string | null { - 223 | marshalNode(this); - 224 | const address = C._ts_node_field_name_for_named_child_wasm(this.tree[0], index); - 225 | if (!address) return null; - 226 | return C.AsciiToString(address); - 227 | } - 228 | /** - 229 | * Get an array of this node's children with a given field name. - 230 | * - 231 | * See also {@link Node#children}. - 232 | */ - 233 | childrenForFieldName(fieldName: string): Node[] { - 234 | const fieldId = this.tree.language.fields.indexOf(fieldName); - 235 | if (fieldId !== -1 && fieldId !== 0) return this.childrenForFieldId(fieldId); - 236 | return []; - 237 | } - | - 238 | /** - 239 | * Get an array of this node's children with a given field id. - 240 | * - 241 | * See also {@link Node#childrenForFieldName}. - 242 | */ - 243 | childrenForFieldId(fieldId: number): Node[] { - 244 | marshalNode(this); - 245 | C._ts_node_children_by_field_id_wasm(this.tree[0], fieldId); - 246 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 247 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 248 | const result = new Array(count); - | - 249 | if (count > 0) { - 250 | let address = buffer; - 251 | for (let i = 0; i < count; i++) { - 252 | result[i] = unmarshalNode(this.tree, address)!; - 253 | address += SIZE_OF_NODE; - 254 | } - 255 | C._free(buffer); - 256 | } - 257 | return result; - 258 | } - | - 259 | /** Get the node's first child that contains or starts after the given byte offset. */ - 260 | firstChildForIndex(index: number): Node | null { - 261 | marshalNode(this); - 262 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 263 | C.setValue(address, index, 'i32'); - 264 | C._ts_node_first_child_for_byte_wasm(this.tree[0]); - 265 | return unmarshalNode(this.tree); - 266 | } - | - 267 | /** Get the node's first named child that contains or starts after the given byte offset. */ - 268 | firstNamedChildForIndex(index: number): Node | null { - 269 | marshalNode(this); - 270 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 271 | C.setValue(address, index, 'i32'); - 272 | C._ts_node_first_named_child_for_byte_wasm(this.tree[0]); - 273 | return unmarshalNode(this.tree); - 274 | } - | - 275 | /** Get this node's number of children. */ - 276 | get childCount(): number { - 277 | marshalNode(this); - 278 | return C._ts_node_child_count_wasm(this.tree[0]); - 279 | } - | - | - 280 | /** - 281 | * Get this node's number of *named* children. - 282 | * - 283 | * See also {@link Node#isNamed}. - 284 | */ - 285 | get namedChildCount(): number { - 286 | marshalNode(this); - 287 | return C._ts_node_named_child_count_wasm(this.tree[0]); - 288 | } - | - 289 | /** Get this node's first child. */ - 290 | get firstChild(): Node | null { - 291 | return this.child(0); - 292 | } - | - 293 | /** - 294 | * Get this node's first named child. - 295 | * - 296 | * See also {@link Node#isNamed}. - 297 | */ - 298 | get firstNamedChild(): Node | null { - 299 | return this.namedChild(0); - 300 | } - | - 301 | /** Get this node's last child. */ - 302 | get lastChild(): Node | null { - 303 | return this.child(this.childCount - 1); - 304 | } - | - 305 | /** - 306 | * Get this node's last named child. - 307 | * - 308 | * See also {@link Node#isNamed}. - 309 | */ - 310 | get lastNamedChild(): Node | null { - 311 | return this.namedChild(this.namedChildCount - 1); - 312 | } - | - 313 | /** - 314 | * Iterate over this node's children. - 315 | * - 316 | * If you're walking the tree recursively, you may want to use the - 317 | * {@link TreeCursor} APIs directly instead. - 318 | */ - 319 | get children(): Node[] { - 320 | if (!this._children) { - 321 | marshalNode(this); - 322 | C._ts_node_children_wasm(this.tree[0]); - 323 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 324 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 325 | this._children = new Array(count); - 326 | if (count > 0) { - 327 | let address = buffer; - 328 | for (let i = 0; i < count; i++) { - 329 | this._children[i] = unmarshalNode(this.tree, address)!; - 330 | address += SIZE_OF_NODE; - 331 | } - 332 | C._free(buffer); - 333 | } - 334 | } - 335 | return this._children; - 336 | } - | - 337 | /** - 338 | * Iterate over this node's named children. - 339 | * - 340 | * See also {@link Node#children}. - 341 | */ - 342 | get namedChildren(): Node[] { - 343 | if (!this._namedChildren) { - 344 | marshalNode(this); - 345 | C._ts_node_named_children_wasm(this.tree[0]); - 346 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 347 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 348 | this._namedChildren = new Array(count); - 349 | if (count > 0) { - 350 | let address = buffer; - 351 | for (let i = 0; i < count; i++) { - 352 | this._namedChildren[i] = unmarshalNode(this.tree, address)!; - 353 | address += SIZE_OF_NODE; - 354 | } - 355 | C._free(buffer); - 356 | } - 357 | } - 358 | return this._namedChildren; - 359 | } - | - 360 | /** - 361 | * Get the descendants of this node that are the given type, or in the given types array. - 362 | * - 363 | * The types array should contain node type strings, which can be retrieved from {@link Language#types}. - 364 | * - 365 | * Additionally, a `startPosition` and `endPosition` can be passed in to restrict the search to a byte range. - 366 | */ - 367 | descendantsOfType( - 368 | types: string | string[], - 369 | startPosition: Point = ZERO_POINT, - 370 | endPosition: Point = ZERO_POINT - 371 | ): Node[] { - 372 | if (!Array.isArray(types)) types = [types]; - | - 373 | // Convert the type strings to numeric type symbols - 374 | const symbols: number[] = []; - 375 | const typesBySymbol = this.tree.language.types; - 376 | for (const node_type of types) { - 377 | if (node_type == "ERROR") { - 378 | symbols.push(65535); // Internally, ts_builtin_sym_error is -1, which is UINT_16MAX - 379 | } - 380 | } - 381 | for (let i = 0, n = typesBySymbol.length; i < n; i++) { - 382 | if (types.includes(typesBySymbol[i])) { - 383 | symbols.push(i); - 384 | } - 385 | } - | - 386 | // Copy the array of symbols to the Wasm heap - 387 | const symbolsAddress = C._malloc(SIZE_OF_INT * symbols.length); - 388 | for (let i = 0, n = symbols.length; i < n; i++) { - 389 | C.setValue(symbolsAddress + i * SIZE_OF_INT, symbols[i], 'i32'); - 390 | } - | - 391 | // Call the C API to compute the descendants - 392 | marshalNode(this); - 393 | C._ts_node_descendants_of_type_wasm( - 394 | this.tree[0], - 395 | symbolsAddress, - 396 | symbols.length, - 397 | startPosition.row, - 398 | startPosition.column, - 399 | endPosition.row, - 400 | endPosition.column - 401 | ); - | - 402 | // Instantiate the nodes based on the data returned - 403 | const descendantCount = C.getValue(TRANSFER_BUFFER, 'i32'); - 404 | const descendantAddress = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 405 | const result = new Array(descendantCount); - 406 | if (descendantCount > 0) { - 407 | let address = descendantAddress; - 408 | for (let i = 0; i < descendantCount; i++) { - 409 | result[i] = unmarshalNode(this.tree, address)!; - 410 | address += SIZE_OF_NODE; - 411 | } - 412 | } - | - 413 | // Free the intermediate buffers - 414 | C._free(descendantAddress); - 415 | C._free(symbolsAddress); - 416 | return result; - 417 | } - | - 418 | /** Get this node's next sibling. */ - 419 | get nextSibling(): Node | null { - 420 | marshalNode(this); - 421 | C._ts_node_next_sibling_wasm(this.tree[0]); - 422 | return unmarshalNode(this.tree); - 423 | } - | - 424 | /** Get this node's previous sibling. */ - 425 | get previousSibling(): Node | null { - 426 | marshalNode(this); - 427 | C._ts_node_prev_sibling_wasm(this.tree[0]); - 428 | return unmarshalNode(this.tree); - 429 | } - | - 430 | /** - 431 | * Get this node's next *named* sibling. - 432 | * - 433 | * See also {@link Node#isNamed}. - 434 | */ - 435 | get nextNamedSibling(): Node | null { - 436 | marshalNode(this); - 437 | C._ts_node_next_named_sibling_wasm(this.tree[0]); - 438 | return unmarshalNode(this.tree); - 439 | } - | - 440 | /** - 441 | * Get this node's previous *named* sibling. - 442 | * - 443 | * See also {@link Node#isNamed}. - 444 | */ - 445 | get previousNamedSibling(): Node | null { - 446 | marshalNode(this); - 447 | C._ts_node_prev_named_sibling_wasm(this.tree[0]); - 448 | return unmarshalNode(this.tree); - 449 | } - | - 450 | /** Get the node's number of descendants, including one for the node itself. */ - 451 | get descendantCount(): number { - 452 | marshalNode(this); - 453 | return C._ts_node_descendant_count_wasm(this.tree[0]); - 454 | } - | - 455 | /** - 456 | * Get this node's immediate parent. - 457 | * Prefer {@link Node#childWithDescendant} for iterating over this node's ancestors. - 458 | */ - 459 | get parent(): Node | null { - 460 | marshalNode(this); - 461 | C._ts_node_parent_wasm(this.tree[0]); - 462 | return unmarshalNode(this.tree); - 463 | } - | - 464 | /** - 465 | * Get the node that contains `descendant`. - 466 | * - 467 | * Note that this can return `descendant` itself. - 468 | */ - 469 | childWithDescendant(descendant: Node): Node | null { - 470 | marshalNode(this); - 471 | marshalNode(descendant, 1); - 472 | C._ts_node_child_with_descendant_wasm(this.tree[0]); - 473 | return unmarshalNode(this.tree); - 474 | } - | - 475 | /** Get the smallest node within this node that spans the given byte range. */ - 476 | descendantForIndex(start: number, end: number = start): Node | null { - 477 | if (typeof start !== 'number' || typeof end !== 'number') { - 478 | throw new Error('Arguments must be numbers'); - 479 | } - | - 480 | marshalNode(this); - 481 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 482 | C.setValue(address, start, 'i32'); - 483 | C.setValue(address + SIZE_OF_INT, end, 'i32'); - 484 | C._ts_node_descendant_for_index_wasm(this.tree[0]); - 485 | return unmarshalNode(this.tree); - 486 | } - | - 487 | /** Get the smallest named node within this node that spans the given byte range. */ - 488 | namedDescendantForIndex(start: number, end: number = start): Node | null { - 489 | if (typeof start !== 'number' || typeof end !== 'number') { - 490 | throw new Error('Arguments must be numbers'); - 491 | } - | - 492 | marshalNode(this); - 493 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 494 | C.setValue(address, start, 'i32'); - 495 | C.setValue(address + SIZE_OF_INT, end, 'i32'); - 496 | C._ts_node_named_descendant_for_index_wasm(this.tree[0]); - 497 | return unmarshalNode(this.tree); - 498 | } - | - 499 | /** Get the smallest node within this node that spans the given point range. */ - 500 | descendantForPosition(start: Point, end: Point = start) { - 501 | if (!isPoint(start) || !isPoint(end)) { - 502 | throw new Error('Arguments must be {row, column} objects'); - 503 | } - | - 504 | marshalNode(this); - 505 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 506 | marshalPoint(address, start); - 507 | marshalPoint(address + SIZE_OF_POINT, end); - 508 | C._ts_node_descendant_for_position_wasm(this.tree[0]); - 509 | return unmarshalNode(this.tree); - 510 | } - | - 511 | /** Get the smallest named node within this node that spans the given point range. */ - 512 | namedDescendantForPosition(start: Point, end: Point = start) { - 513 | if (!isPoint(start) || !isPoint(end)) { - 514 | throw new Error('Arguments must be {row, column} objects'); - 515 | } - | - 516 | marshalNode(this); - 517 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 518 | marshalPoint(address, start); - 519 | marshalPoint(address + SIZE_OF_POINT, end); - 520 | C._ts_node_named_descendant_for_position_wasm(this.tree[0]); - 521 | return unmarshalNode(this.tree); - 522 | } - | - 523 | /** - 524 | * Create a new {@link TreeCursor} starting from this node. - 525 | * - 526 | * Note that the given node is considered the root of the cursor, - 527 | * and the cursor cannot walk outside this node. - 528 | */ - 529 | walk(): TreeCursor { - 530 | marshalNode(this); - 531 | C._ts_tree_cursor_new_wasm(this.tree[0]); - 532 | return new TreeCursor(INTERNAL, this.tree); - 533 | } - | - 534 | /** - 535 | * Edit this node to keep it in-sync with source code that has been edited. - 536 | * - 537 | * This function is only rarely needed. When you edit a syntax tree with - 538 | * the {@link Tree#edit} method, all of the nodes that you retrieve from - 539 | * the tree afterward will already reflect the edit. You only need to - 540 | * use {@link Node#edit} when you have a specific {@link Node} instance that - 541 | * you want to keep and continue to use after an edit. - 542 | */ - 543 | edit(edit: Edit) { - 544 | if (this.startIndex >= edit.oldEndIndex) { - 545 | this.startIndex = edit.newEndIndex + (this.startIndex - edit.oldEndIndex); - 546 | let subbedPointRow; - 547 | let subbedPointColumn; - 548 | if (this.startPosition.row > edit.oldEndPosition.row) { - 549 | subbedPointRow = this.startPosition.row - edit.oldEndPosition.row; - 550 | subbedPointColumn = this.startPosition.column; - 551 | } else { - 552 | subbedPointRow = 0; - 553 | subbedPointColumn = this.startPosition.column; - 554 | if (this.startPosition.column >= edit.oldEndPosition.column) { - 555 | subbedPointColumn = - 556 | this.startPosition.column - edit.oldEndPosition.column; - 557 | } - 558 | } - | - 559 | if (subbedPointRow > 0) { - 560 | this.startPosition.row += subbedPointRow; - 561 | this.startPosition.column = subbedPointColumn; - 562 | } else { - 563 | this.startPosition.column += subbedPointColumn; - 564 | } - 565 | } else if (this.startIndex > edit.startIndex) { - 566 | this.startIndex = edit.newEndIndex; - 567 | this.startPosition.row = edit.newEndPosition.row; - 568 | this.startPosition.column = edit.newEndPosition.column; - 569 | } - 570 | } - | - 571 | /** Get the S-expression representation of this node. */ - 572 | toString(): string { - 573 | marshalNode(this); - 574 | const address = C._ts_node_to_string_wasm(this.tree[0]); - 575 | const result = C.AsciiToString(address); - 576 | C._free(address); - 577 | return result; - 578 | } - 579 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/parser.ts: --------------------------------------------------------------------------------- - 1 | import { C, INTERNAL, LogCallback, ParseCallback, Range, SIZE_OF_INT, SIZE_OF_RANGE, setModule } from './constants'; - 2 | import { Language } from './language'; - 3 | import { marshalRange, unmarshalRange } from './marshal'; - 4 | import { checkModule, initializeBinding } from './bindings'; - 5 | import { Tree } from './tree'; - | - 6 | /** - 7 | * Options for parsing - 8 | * - 9 | * The `includedRanges` property is an array of {@link Range} objects that - 10 | * represent the ranges of text that the parser should include when parsing. - 11 | * - 12 | * The `progressCallback` property is a function that is called periodically - 13 | * during parsing to check whether parsing should be cancelled. - 14 | * - 15 | * See {@link Parser#parse} for more information. - 16 | */ - 17 | export interface ParseOptions { - 18 | /** - 19 | * An array of {@link Range} objects that - 20 | * represent the ranges of text that the parser should include when parsing. - 21 | * - 22 | * This sets the ranges of text that the parser should include when parsing. - 23 | * By default, the parser will always include entire documents. This - 24 | * function allows you to parse only a *portion* of a document but - 25 | * still return a syntax tree whose ranges match up with the document - 26 | * as a whole. You can also pass multiple disjoint ranges. - 27 | * If `ranges` is empty, then the entire document will be parsed. - 28 | * Otherwise, the given ranges must be ordered from earliest to latest - 29 | * in the document, and they must not overlap. That is, the following - 30 | * must hold for all `i` < `length - 1`: - 31 | * ```text - 32 | * ranges[i].end_byte <= ranges[i + 1].start_byte - 33 | * ``` - 34 | */ - 35 | includedRanges?: Range[]; - | - 36 | /** - 37 | * A function that is called periodically during parsing to check - 38 | * whether parsing should be cancelled. If the progress callback returns - 39 | * `true`, then parsing will be cancelled. You can also use this to instrument - 40 | * parsing and check where the parser is at in the document. The progress callback - 41 | * takes a single argument, which is a {@link ParseState} representing the current - 42 | * state of the parser. - 43 | */ - 44 | progressCallback?: (state: ParseState) => void; - 45 | } - | - 46 | /** - 47 | * A stateful object that is passed into the progress callback {@link ParseOptions#progressCallback} - 48 | * to provide the current state of the parser. - 49 | */ - 50 | export interface ParseState { - 51 | /** The byte offset in the document that the parser is at. */ - 52 | currentOffset: number; - | - 53 | /** Indicates whether the parser has encountered an error during parsing. */ - 54 | hasError: boolean; - 55 | } - | - 56 | /** - 57 | * @internal - 58 | * - 59 | * Global variable for transferring data across the FFI boundary - 60 | */ - 61 | export let TRANSFER_BUFFER: number; - | - 62 | /** - 63 | * The latest ABI version that is supported by the current version of the - 64 | * library. - 65 | * - 66 | * When Languages are generated by the Tree-sitter CLI, they are - 67 | * assigned an ABI version number that corresponds to the current CLI version. - 68 | * The Tree-sitter library is generally backwards-compatible with languages - 69 | * generated using older CLI versions, but is not forwards-compatible. - 70 | */ - 71 | export let LANGUAGE_VERSION: number; - | - 72 | /** - 73 | * The earliest ABI version that is supported by the current version of the - 74 | * library. - 75 | */ - 76 | export let MIN_COMPATIBLE_VERSION: number; - | - 77 | /** - 78 | * A stateful object that is used to produce a {@link Tree} based on some - 79 | * source code. - 80 | */ - 81 | export class Parser { - 82 | /** @internal */ - 83 | private [0] = 0; // Internal handle for Wasm - | - 84 | /** @internal */ - 85 | private [1] = 0; // Internal handle for Wasm - | - 86 | /** @internal */ - 87 | private logCallback: LogCallback | null = null; - | - 88 | /** The parser's current language. */ - 89 | language: Language | null = null; - | - 90 | /** - 91 | * This must always be called before creating a Parser. - 92 | * - 93 | * You can optionally pass in options to configure the Wasm module, the most common - 94 | * one being `locateFile` to help the module find the `.wasm` file. - 95 | */ - 96 | static async init(moduleOptions?: Partial) { - 97 | setModule(await initializeBinding(moduleOptions)); - 98 | TRANSFER_BUFFER = C._ts_init(); - 99 | LANGUAGE_VERSION = C.getValue(TRANSFER_BUFFER, 'i32'); - 100 | MIN_COMPATIBLE_VERSION = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 101 | } - | - 102 | /** - 103 | * Create a new parser. - 104 | */ - 105 | constructor() { - 106 | this.initialize(); - 107 | } - | - 108 | /** @internal */ - 109 | initialize() { - 110 | if (!checkModule()) { - 111 | throw new Error("cannot construct a Parser before calling `init()`"); - 112 | } - 113 | C._ts_parser_new_wasm(); - 114 | this[0] = C.getValue(TRANSFER_BUFFER, 'i32'); - 115 | this[1] = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 116 | } - | - 117 | /** Delete the parser, freeing its resources. */ - 118 | delete() { - 119 | C._ts_parser_delete(this[0]); - 120 | C._free(this[1]); - 121 | this[0] = 0; - 122 | this[1] = 0; - 123 | } - | - 124 | /** - 125 | * Set the language that the parser should use for parsing. - 126 | * - 127 | * If the language was not successfully assigned, an error will be thrown. - 128 | * This happens if the language was generated with an incompatible - 129 | * version of the Tree-sitter CLI. Check the language's version using - 130 | * {@link Language#version} and compare it to this library's - 131 | * {@link LANGUAGE_VERSION} and {@link MIN_COMPATIBLE_VERSION} constants. - 132 | */ - 133 | setLanguage(language: Language | null): this { - 134 | let address: number; - 135 | if (!language) { - 136 | address = 0; - 137 | this.language = null; - 138 | } else if (language.constructor === Language) { - 139 | address = language[0]; - 140 | const version = C._ts_language_abi_version(address); - 141 | if (version < MIN_COMPATIBLE_VERSION || LANGUAGE_VERSION < version) { - 142 | throw new Error( - 143 | `Incompatible language version ${version}. ` + - 144 | `Compatibility range ${MIN_COMPATIBLE_VERSION} through ${LANGUAGE_VERSION}.` - 145 | ); - 146 | } - 147 | this.language = language; - 148 | } else { - 149 | throw new Error('Argument must be a Language'); - 150 | } - | - 151 | C._ts_parser_set_language(this[0], address); - 152 | return this; - 153 | } - | - 154 | /** - 155 | * Parse a slice of UTF8 text. - 156 | * - 157 | * @param {string | ParseCallback} callback - The UTF8-encoded text to parse or a callback function. - 158 | * - 159 | * @param {Tree | null} [oldTree] - A previous syntax tree parsed from the same document. If the text of the - 160 | * document has changed since `oldTree` was created, then you must edit `oldTree` to match - 161 | * the new text using {@link Tree#edit}. - 162 | * - 163 | * @param {ParseOptions} [options] - Options for parsing the text. - 164 | * This can be used to set the included ranges, or a progress callback. - 165 | * - 166 | * @returns {Tree | null} A {@link Tree} if parsing succeeded, or `null` if: - 167 | * - The parser has not yet had a language assigned with {@link Parser#setLanguage}. - 168 | * - The progress callback returned true. - 169 | */ - 170 | parse( - 171 | callback: string | ParseCallback, - 172 | oldTree?: Tree | null, - 173 | options?: ParseOptions, - 174 | ): Tree | null { - 175 | if (typeof callback === 'string') { - 176 | C.currentParseCallback = (index: number) => callback.slice(index); - 177 | } else if (typeof callback === 'function') { - 178 | C.currentParseCallback = callback; - 179 | } else { - 180 | throw new Error('Argument must be a string or a function'); - 181 | } - | - 182 | if (options?.progressCallback) { - 183 | C.currentProgressCallback = options.progressCallback; - 184 | } else { - 185 | C.currentProgressCallback = null; - 186 | } - | - 187 | if (this.logCallback) { - 188 | C.currentLogCallback = this.logCallback; - 189 | C._ts_parser_enable_logger_wasm(this[0], 1); - 190 | } else { - 191 | C.currentLogCallback = null; - 192 | C._ts_parser_enable_logger_wasm(this[0], 0); - 193 | } - | - 194 | let rangeCount = 0; - 195 | let rangeAddress = 0; - 196 | if (options?.includedRanges) { - 197 | rangeCount = options.includedRanges.length; - 198 | rangeAddress = C._calloc(rangeCount, SIZE_OF_RANGE); - 199 | let address = rangeAddress; - 200 | for (let i = 0; i < rangeCount; i++) { - 201 | marshalRange(address, options.includedRanges[i]); - 202 | address += SIZE_OF_RANGE; - 203 | } - 204 | } - | - 205 | const treeAddress = C._ts_parser_parse_wasm( - 206 | this[0], - 207 | this[1], - 208 | oldTree ? oldTree[0] : 0, - 209 | rangeAddress, - 210 | rangeCount - 211 | ); - | - 212 | if (!treeAddress) { - 213 | C.currentParseCallback = null; - 214 | C.currentLogCallback = null; - 215 | C.currentProgressCallback = null; - 216 | return null; - 217 | } - | - 218 | if (!this.language) { - 219 | throw new Error('Parser must have a language to parse'); - 220 | } - | - 221 | const result = new Tree(INTERNAL, treeAddress, this.language, C.currentParseCallback); - 222 | C.currentParseCallback = null; - 223 | C.currentLogCallback = null; - 224 | C.currentProgressCallback = null; - 225 | return result; - 226 | } - | - 227 | /** - 228 | * Instruct the parser to start the next parse from the beginning. - 229 | * - 230 | * If the parser previously failed because of a callback, - 231 | * then by default, it will resume where it left off on the - 232 | * next call to {@link Parser#parse} or other parsing functions. - 233 | * If you don't want to resume, and instead intend to use this parser to - 234 | * parse some other document, you must call `reset` first. - 235 | */ - 236 | reset(): void { - 237 | C._ts_parser_reset(this[0]); - 238 | } - | - 239 | /** Get the ranges of text that the parser will include when parsing. */ - 240 | getIncludedRanges(): Range[] { - 241 | C._ts_parser_included_ranges_wasm(this[0]); - 242 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 243 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 244 | const result = new Array(count); - | - 245 | if (count > 0) { - 246 | let address = buffer; - 247 | for (let i = 0; i < count; i++) { - 248 | result[i] = unmarshalRange(address); - 249 | address += SIZE_OF_RANGE; - 250 | } - 251 | C._free(buffer); - 252 | } - | - 253 | return result; - 254 | } - | - 255 | /** Set the logging callback that a parser should use during parsing. */ - 256 | setLogger(callback: LogCallback | boolean | null): this { - 257 | if (!callback) { - 258 | this.logCallback = null; - 259 | } else if (typeof callback !== 'function') { - 260 | throw new Error('Logger callback must be a function'); - 261 | } else { - 262 | this.logCallback = callback; - 263 | } - 264 | return this; - 265 | } - | - 266 | /** Get the parser's current logger. */ - 267 | getLogger(): LogCallback | null { - 268 | return this.logCallback; - 269 | } - 270 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/query.ts: --------------------------------------------------------------------------------- - 1 | import { Point, ZERO_POINT, SIZE_OF_INT, C } from './constants'; - 2 | import { Node } from './node'; - 3 | import { marshalNode, unmarshalCaptures } from './marshal'; - 4 | import { TRANSFER_BUFFER } from './parser'; - 5 | import { Language } from './language'; - | - 6 | const PREDICATE_STEP_TYPE_CAPTURE = 1; - | - 7 | const PREDICATE_STEP_TYPE_STRING = 2; - | - 8 | const QUERY_WORD_REGEX = /[\w-]+/g; - | - 9 | /** - 10 | * Options for query execution - 11 | */ - 12 | export interface QueryOptions { - 13 | /** The start position of the range to query */ - 14 | startPosition?: Point; - | - 15 | /** The end position of the range to query */ - 16 | endPosition?: Point; - | - 17 | /** The start index of the range to query */ - 18 | startIndex?: number; - | - 19 | /** The end index of the range to query */ - 20 | endIndex?: number; - | - 21 | /** - 22 | * The maximum number of in-progress matches for this query. - 23 | * The limit must be > 0 and <= 65536. - 24 | */ - 25 | matchLimit?: number; - | - 26 | /** - 27 | * The maximum start depth for a query cursor. - 28 | * - 29 | * This prevents cursors from exploring children nodes at a certain depth. - 30 | * Note if a pattern includes many children, then they will still be - 31 | * checked. - 32 | * - 33 | * The zero max start depth value can be used as a special behavior and - 34 | * it helps to destructure a subtree by staying on a node and using - 35 | * captures for interested parts. Note that the zero max start depth - 36 | * only limit a search depth for a pattern's root node but other nodes - 37 | * that are parts of the pattern may be searched at any depth what - 38 | * defined by the pattern structure. - 39 | * - 40 | * Set to `null` to remove the maximum start depth. - 41 | */ - 42 | maxStartDepth?: number; - | - 43 | /** - 44 | * A function that will be called periodically during the execution of the query to check - 45 | * if query execution should be cancelled. You can also use this to instrument query execution - 46 | * and check where the query is at in the document. The progress callback takes a single argument, - 47 | * which is a {@link QueryState} representing the current state of the query. - 48 | */ - 49 | progressCallback?: (state: QueryState) => void; - 50 | } - | - 51 | /** - 52 | * A stateful object that is passed into the progress callback {@link QueryOptions#progressCallback} - 53 | * to provide the current state of the query. - 54 | */ - 55 | export interface QueryState { - 56 | /** The byte offset in the document that the query is at. */ - 57 | currentOffset: number; - 58 | } - | - 59 | /** A record of key-value pairs associated with a particular pattern in a {@link Query}. */ - 60 | export type QueryProperties = Record; - | - 61 | /** - 62 | * A predicate that contains an operator and list of operands. - 63 | */ - 64 | export interface QueryPredicate { - 65 | /** The operator of the predicate, like `match?`, `eq?`, `set!`, etc. */ - 66 | operator: string; - | - 67 | /** The operands of the predicate, which are either captures or strings. */ - 68 | operands: PredicateStep[]; - 69 | } - | - 70 | /** - 71 | * A particular {@link Node} that has been captured with a particular name within a - 72 | * {@link Query}. - 73 | */ - 74 | export interface QueryCapture { - 75 | /** The index of the pattern that matched. */ - 76 | patternIndex: number; - | - 77 | /** The name of the capture */ - 78 | name: string; - | - 79 | /** The captured node */ - 80 | node: Node; - | - 81 | /** The properties for predicates declared with the operator `set!`. */ - 82 | setProperties?: QueryProperties; - | - 83 | /** The properties for predicates declared with the operator `is?`. */ - 84 | assertedProperties?: QueryProperties; - | - 85 | /** The properties for predicates declared with the operator `is-not?`. */ - 86 | refutedProperties?: QueryProperties; - 87 | } - | - 88 | /** A match of a {@link Query} to a particular set of {@link Node}s. */ - 89 | export interface QueryMatch { - 90 | /** The index of the pattern that matched. */ - 91 | patternIndex: number; - | - 92 | /** The captures associated with the match. */ - 93 | captures: QueryCapture[]; - | - 94 | /** The properties for predicates declared with the operator `set!`. */ - 95 | setProperties?: QueryProperties; - | - 96 | /** The properties for predicates declared with the operator `is?`. */ - 97 | assertedProperties?: QueryProperties; - | - 98 | /** The properties for predicates declared with the operator `is-not?`. */ - 99 | refutedProperties?: QueryProperties; - 100 | } - | - 101 | /** A quantifier for captures */ - 102 | export const CaptureQuantifier = { - 103 | Zero: 0, - 104 | ZeroOrOne: 1, - 105 | ZeroOrMore: 2, - 106 | One: 3, - 107 | OneOrMore: 4 - 108 | } as const; - | - 109 | /** A quantifier for captures */ - 110 | export type CaptureQuantifier = typeof CaptureQuantifier[keyof typeof CaptureQuantifier]; - | - 111 | /** - 112 | * Predicates are represented as a single array of steps. There are two - 113 | * types of steps, which correspond to the two legal values for - 114 | * the `type` field: - 115 | * - 116 | * - `CapturePredicateStep` - Steps with this type represent names - 117 | * of captures. - 118 | * - 119 | * - `StringPredicateStep` - Steps with this type represent literal - 120 | * strings. - 121 | */ - 122 | export type PredicateStep = CapturePredicateStep | StringPredicateStep; - | - 123 | /** - 124 | * A step in a predicate that refers to a capture. - 125 | * - 126 | * The `name` field is the name of the capture. - 127 | */ - 128 | export interface CapturePredicateStep { type: 'capture', name: string } - | - 129 | /** - 130 | * A step in a predicate that refers to a string. - 131 | * - 132 | * The `value` field is the string value. - 133 | */ - 134 | export interface StringPredicateStep { type: 'string', value: string } - | - 135 | const isCaptureStep = (step: PredicateStep): step is Extract => - 136 | step.type === 'capture'; - | - 137 | const isStringStep = (step: PredicateStep): step is Extract => - 138 | step.type === 'string'; - | - 139 | /** - 140 | * @internal - 141 | * - 142 | * A function that checks if a given set of captures matches a particular - 143 | * condition. This is used in the built-in `eq?`, `match?`, and `any-of?` - 144 | * predicates. - 145 | */ - 146 | export type TextPredicate = (captures: QueryCapture[]) => boolean; - | - 147 | /** Error codes returned from tree-sitter query parsing */ - 148 | export const QueryErrorKind = { - 149 | Syntax: 1, - 150 | NodeName: 2, - 151 | FieldName: 3, - 152 | CaptureName: 4, - 153 | PatternStructure: 5, - 154 | } as const; - | - 155 | /** An error that occurred while parsing a query string. */ - 156 | export type QueryErrorKind = typeof QueryErrorKind[keyof typeof QueryErrorKind]; - | - 157 | /** Information about a {@link QueryError}. */ - 158 | export interface QueryErrorInfo { - 159 | [QueryErrorKind.NodeName]: { word: string }; - 160 | [QueryErrorKind.FieldName]: { word: string }; - 161 | [QueryErrorKind.CaptureName]: { word: string }; - 162 | [QueryErrorKind.PatternStructure]: { suffix: string }; - 163 | [QueryErrorKind.Syntax]: { suffix: string }; - 164 | } - | - 165 | /** Error thrown when parsing a tree-sitter query fails */ - 166 | export class QueryError extends Error { - 167 | constructor( - 168 | public kind: QueryErrorKind, - 169 | public info: QueryErrorInfo[typeof kind], - 170 | public index: number, - 171 | public length: number - 172 | ) { - 173 | super(QueryError.formatMessage(kind, info)); - 174 | this.name = 'QueryError'; - 175 | } - | - 176 | /** Formats an error message based on the error kind and info */ - 177 | private static formatMessage(kind: QueryErrorKind, info: QueryErrorInfo[QueryErrorKind]): string { - 178 | switch (kind) { - 179 | case QueryErrorKind.NodeName: - 180 | return `Bad node name '${(info as QueryErrorInfo[2]).word}'`; - 181 | case QueryErrorKind.FieldName: - 182 | return `Bad field name '${(info as QueryErrorInfo[3]).word}'`; - 183 | case QueryErrorKind.CaptureName: - 184 | return `Bad capture name @${(info as QueryErrorInfo[4]).word}`; - 185 | case QueryErrorKind.PatternStructure: - 186 | return `Bad pattern structure at offset ${(info as QueryErrorInfo[5]).suffix}`; - 187 | case QueryErrorKind.Syntax: - 188 | return `Bad syntax at offset ${(info as QueryErrorInfo[1]).suffix}`; - 189 | } - 190 | } - 191 | } - | - 192 | /** - 193 | * Parses the `eq?` and `not-eq?` predicates in a query, and updates the text predicates. - 194 | */ - 195 | function parseAnyPredicate( - 196 | steps: PredicateStep[], - 197 | index: number, - 198 | operator: string, - 199 | textPredicates: TextPredicate[][], - 200 | ) { - 201 | if (steps.length !== 3) { - 202 | throw new Error( - 203 | `Wrong number of arguments to \`#${operator}\` predicate. Expected 2, got ${steps.length - 1}` - 204 | ); - 205 | } - | - 206 | if (!isCaptureStep(steps[1])) { - 207 | throw new Error( - 208 | `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}"` - 209 | ); - 210 | } - | - 211 | const isPositive = operator === 'eq?' || operator === 'any-eq?'; - 212 | const matchAll = !operator.startsWith('any-'); - | - 213 | if (isCaptureStep(steps[2])) { - 214 | const captureName1 = steps[1].name; - 215 | const captureName2 = steps[2].name; - 216 | textPredicates[index].push((captures) => { - 217 | const nodes1: Node[] = []; - 218 | const nodes2: Node[] = []; - 219 | for (const c of captures) { - 220 | if (c.name === captureName1) nodes1.push(c.node); - 221 | if (c.name === captureName2) nodes2.push(c.node); - 222 | } - 223 | const compare = (n1: { text: string }, n2: { text: string }, positive: boolean) => { - 224 | return positive ? n1.text === n2.text : n1.text !== n2.text; - 225 | }; - 226 | return matchAll - 227 | ? nodes1.every((n1) => nodes2.some((n2) => compare(n1, n2, isPositive))) - 228 | : nodes1.some((n1) => nodes2.some((n2) => compare(n1, n2, isPositive))); - 229 | }); - 230 | } else { - 231 | const captureName = steps[1].name; - 232 | const stringValue = steps[2].value; - 233 | const matches = (n: Node) => n.text === stringValue; - 234 | const doesNotMatch = (n: Node) => n.text !== stringValue; - 235 | textPredicates[index].push((captures) => { - 236 | const nodes = []; - 237 | for (const c of captures) { - 238 | if (c.name === captureName) nodes.push(c.node); - 239 | } - 240 | const test = isPositive ? matches : doesNotMatch; - 241 | return matchAll ? nodes.every(test) : nodes.some(test); - 242 | }); - 243 | } - 244 | } - | - 245 | /** - 246 | * Parses the `match?` and `not-match?` predicates in a query, and updates the text predicates. - 247 | */ - 248 | function parseMatchPredicate( - 249 | steps: PredicateStep[], - 250 | index: number, - 251 | operator: string, - 252 | textPredicates: TextPredicate[][], - 253 | ) { - 254 | if (steps.length !== 3) { - 255 | throw new Error( - 256 | `Wrong number of arguments to \`#${operator}\` predicate. Expected 2, got ${steps.length - 1}.`, - 257 | ); - 258 | } - | - 259 | if (steps[1].type !== 'capture') { - 260 | throw new Error( - 261 | `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}".`, - 262 | ); - 263 | } - | - 264 | if (steps[2].type !== 'string') { - 265 | throw new Error( - 266 | `Second argument of \`#${operator}\` predicate must be a string. Got @${steps[2].name}.`, - 267 | ); - 268 | } - | - 269 | const isPositive = operator === 'match?' || operator === 'any-match?'; - 270 | const matchAll = !operator.startsWith('any-'); - 271 | const captureName = steps[1].name; - 272 | const regex = new RegExp(steps[2].value); - 273 | textPredicates[index].push((captures) => { - 274 | const nodes = []; - 275 | for (const c of captures) { - 276 | if (c.name === captureName) nodes.push(c.node.text); - 277 | } - 278 | const test = (text: string, positive: boolean) => { - 279 | return positive ? - 280 | regex.test(text) : - 281 | !regex.test(text); - 282 | }; - 283 | if (nodes.length === 0) return !isPositive; - 284 | return matchAll ? - 285 | nodes.every((text) => test(text, isPositive)) : - 286 | nodes.some((text) => test(text, isPositive)); - 287 | }); - 288 | } - | - 289 | /** - 290 | * Parses the `any-of?` and `not-any-of?` predicates in a query, and updates the text predicates. - 291 | */ - 292 | function parseAnyOfPredicate( - 293 | steps: PredicateStep[], - 294 | index: number, - 295 | operator: string, - 296 | textPredicates: TextPredicate[][], - 297 | ) { - 298 | if (steps.length < 2) { - 299 | throw new Error( - 300 | `Wrong number of arguments to \`#${operator}\` predicate. Expected at least 1. Got ${steps.length - 1}.`, - 301 | ); - 302 | } - | - 303 | if (steps[1].type !== 'capture') { - 304 | throw new Error( - 305 | `First argument of \`#${operator}\` predicate must be a capture. Got "${steps[1].value}".`, - 306 | ); - 307 | } - | - 308 | const isPositive = operator === 'any-of?'; - 309 | const captureName = steps[1].name; - | - 310 | const stringSteps = steps.slice(2); - 311 | if (!stringSteps.every(isStringStep)) { - 312 | throw new Error( - 313 | `Arguments to \`#${operator}\` predicate must be strings.".`, - 314 | ); - 315 | } - 316 | const values = stringSteps.map((s) => s.value); - | - 317 | textPredicates[index].push((captures) => { - 318 | const nodes = []; - 319 | for (const c of captures) { - 320 | if (c.name === captureName) nodes.push(c.node.text); - 321 | } - 322 | if (nodes.length === 0) return !isPositive; - 323 | return nodes.every((text) => values.includes(text)) === isPositive; - 324 | }); - 325 | } - | - 326 | /** - 327 | * Parses the `is?` and `is-not?` predicates in a query, and updates the asserted or refuted properties, - 328 | * depending on if the operator is positive or negative. - 329 | */ - 330 | function parseIsPredicate( - 331 | steps: PredicateStep[], - 332 | index: number, - 333 | operator: string, - 334 | assertedProperties: QueryProperties[], - 335 | refutedProperties: QueryProperties[], - 336 | ) { - 337 | if (steps.length < 2 || steps.length > 3) { - 338 | throw new Error( - 339 | `Wrong number of arguments to \`#${operator}\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`, - 340 | ); - 341 | } - | - 342 | if (!steps.every(isStringStep)) { - 343 | throw new Error( - 344 | `Arguments to \`#${operator}\` predicate must be strings.".`, - 345 | ); - 346 | } - | - 347 | const properties = operator === 'is?' ? assertedProperties : refutedProperties; - 348 | if (!properties[index]) properties[index] = {}; - 349 | properties[index][steps[1].value] = steps[2]?.value ?? null; - 350 | } - | - 351 | /** - 352 | * Parses the `set!` directive in a query, and updates the set properties. - 353 | */ - 354 | function parseSetDirective( - 355 | steps: PredicateStep[], - 356 | index: number, - 357 | setProperties: QueryProperties[], - 358 | ) { - 359 | if (steps.length < 2 || steps.length > 3) { - 360 | throw new Error(`Wrong number of arguments to \`#set!\` predicate. Expected 1 or 2. Got ${steps.length - 1}.`); - 361 | } - 362 | if (!steps.every(isStringStep)) { - 363 | throw new Error(`Arguments to \`#set!\` predicate must be strings.".`); - 364 | } - 365 | if (!setProperties[index]) setProperties[index] = {}; - 366 | setProperties[index][steps[1].value] = steps[2]?.value ?? null; - 367 | } - | - 368 | /** - 369 | * Parses the predicate at a given step in a pattern, and updates the appropriate - 370 | * predicates or properties. - 371 | */ - 372 | function parsePattern( - 373 | index: number, - 374 | stepType: number, - 375 | stepValueId: number, - 376 | captureNames: string[], - 377 | stringValues: string[], - 378 | steps: PredicateStep[], - 379 | textPredicates: TextPredicate[][], - 380 | predicates: QueryPredicate[][], - 381 | setProperties: QueryProperties[], - 382 | assertedProperties: QueryProperties[], - 383 | refutedProperties: QueryProperties[], - 384 | ) { - 385 | if (stepType === PREDICATE_STEP_TYPE_CAPTURE) { - 386 | const name = captureNames[stepValueId]; - 387 | steps.push({ type: 'capture', name }); - 388 | } else if (stepType === PREDICATE_STEP_TYPE_STRING) { - 389 | steps.push({ type: 'string', value: stringValues[stepValueId] }); - 390 | } else if (steps.length > 0) { - 391 | if (steps[0].type !== 'string') { - 392 | throw new Error('Predicates must begin with a literal value'); - 393 | } - | - 394 | const operator = steps[0].value; - 395 | switch (operator) { - 396 | case 'any-not-eq?': - 397 | case 'not-eq?': - 398 | case 'any-eq?': - 399 | case 'eq?': - 400 | parseAnyPredicate(steps, index, operator, textPredicates); - 401 | break; - | - 402 | case 'any-not-match?': - 403 | case 'not-match?': - 404 | case 'any-match?': - 405 | case 'match?': - 406 | parseMatchPredicate(steps, index, operator, textPredicates); - 407 | break; - | - 408 | case 'not-any-of?': - 409 | case 'any-of?': - 410 | parseAnyOfPredicate(steps, index, operator, textPredicates); - 411 | break; - | - 412 | case 'is?': - 413 | case 'is-not?': - 414 | parseIsPredicate(steps, index, operator, assertedProperties, refutedProperties); - 415 | break; - | - 416 | case 'set!': - 417 | parseSetDirective(steps, index, setProperties); - 418 | break; - | - 419 | default: - 420 | predicates[index].push({ operator, operands: steps.slice(1) }); - 421 | } - | - 422 | steps.length = 0; - 423 | } - 424 | } - | - 425 | export class Query { - 426 | /** @internal */ - 427 | private [0] = 0; // Internal handle for Wasm - | - 428 | /** @internal */ - 429 | private exceededMatchLimit: boolean; - | - 430 | /** @internal */ - 431 | private textPredicates: TextPredicate[][]; - | - 432 | /** The names of the captures used in the query. */ - 433 | readonly captureNames: string[]; - | - 434 | /** The quantifiers of the captures used in the query. */ - 435 | readonly captureQuantifiers: CaptureQuantifier[][]; - | - 436 | /** - 437 | * The other user-defined predicates associated with the given index. - 438 | * - 439 | * This includes predicates with operators other than: - 440 | * - `match?` - 441 | * - `eq?` and `not-eq?` - 442 | * - `any-of?` and `not-any-of?` - 443 | * - `is?` and `is-not?` - 444 | * - `set!` - 445 | */ - 446 | readonly predicates: QueryPredicate[][]; - | - 447 | /** The properties for predicates with the operator `set!`. */ - 448 | readonly setProperties: QueryProperties[]; - | - 449 | /** The properties for predicates with the operator `is?`. */ - 450 | readonly assertedProperties: QueryProperties[]; - | - 451 | /** The properties for predicates with the operator `is-not?`. */ - 452 | readonly refutedProperties: QueryProperties[]; - | - 453 | /** The maximum number of in-progress matches for this cursor. */ - 454 | matchLimit?: number; - | - 455 | /** - 456 | * Create a new query from a string containing one or more S-expression - 457 | * patterns. - 458 | * - 459 | * The query is associated with a particular language, and can only be run - 460 | * on syntax nodes parsed with that language. References to Queries can be - 461 | * shared between multiple threads. - 462 | * - 463 | * @link {@see https://tree-sitter.github.io/tree-sitter/using-parsers/queries} - 464 | */ - 465 | constructor(language: Language, source: string) { - 466 | const sourceLength = C.lengthBytesUTF8(source); - 467 | const sourceAddress = C._malloc(sourceLength + 1); - 468 | C.stringToUTF8(source, sourceAddress, sourceLength + 1); - 469 | const address = C._ts_query_new( - 470 | language[0], - 471 | sourceAddress, - 472 | sourceLength, - 473 | TRANSFER_BUFFER, - 474 | TRANSFER_BUFFER + SIZE_OF_INT - 475 | ); - | - 476 | if (!address) { - 477 | const errorId = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32') as QueryErrorKind; - 478 | const errorByte = C.getValue(TRANSFER_BUFFER, 'i32'); - 479 | const errorIndex = C.UTF8ToString(sourceAddress, errorByte).length; - 480 | const suffix = source.slice(errorIndex, errorIndex + 100).split('\n')[0]; - 481 | const word = suffix.match(QUERY_WORD_REGEX)?.[0] ?? ''; - 482 | C._free(sourceAddress); - | - 483 | switch (errorId) { - 484 | case QueryErrorKind.Syntax: - 485 | throw new QueryError(QueryErrorKind.Syntax, { suffix: `${errorIndex}: '${suffix}'...` }, errorIndex, 0); - 486 | case QueryErrorKind.NodeName: - 487 | throw new QueryError(errorId, { word }, errorIndex, word.length); - 488 | case QueryErrorKind.FieldName: - 489 | throw new QueryError(errorId, { word }, errorIndex, word.length); - 490 | case QueryErrorKind.CaptureName: - 491 | throw new QueryError(errorId, { word }, errorIndex, word.length); - 492 | case QueryErrorKind.PatternStructure: - 493 | throw new QueryError(errorId, { suffix: `${errorIndex}: '${suffix}'...` }, errorIndex, 0); - 494 | } - 495 | } - | - 496 | const stringCount = C._ts_query_string_count(address); - 497 | const captureCount = C._ts_query_capture_count(address); - 498 | const patternCount = C._ts_query_pattern_count(address); - 499 | const captureNames = new Array(captureCount); - 500 | const captureQuantifiers = new Array(patternCount); - 501 | const stringValues = new Array(stringCount); - | - 502 | // Fill in the capture names - 503 | for (let i = 0; i < captureCount; i++) { - 504 | const nameAddress = C._ts_query_capture_name_for_id( - 505 | address, - 506 | i, - 507 | TRANSFER_BUFFER - 508 | ); - 509 | const nameLength = C.getValue(TRANSFER_BUFFER, 'i32'); - 510 | captureNames[i] = C.UTF8ToString(nameAddress, nameLength); - 511 | } - | - 512 | // Fill in the capture quantifiers - 513 | for (let i = 0; i < patternCount; i++) { - 514 | const captureQuantifiersArray = new Array(captureCount); - 515 | for (let j = 0; j < captureCount; j++) { - 516 | const quantifier = C._ts_query_capture_quantifier_for_id(address, i, j); - 517 | captureQuantifiersArray[j] = quantifier as CaptureQuantifier; - 518 | } - 519 | captureQuantifiers[i] = captureQuantifiersArray; - 520 | } - | - 521 | // Fill in the string values - 522 | for (let i = 0; i < stringCount; i++) { - 523 | const valueAddress = C._ts_query_string_value_for_id( - 524 | address, - 525 | i, - 526 | TRANSFER_BUFFER - 527 | ); - 528 | const nameLength = C.getValue(TRANSFER_BUFFER, 'i32'); - 529 | stringValues[i] = C.UTF8ToString(valueAddress, nameLength); - 530 | } - | - 531 | const setProperties = new Array(patternCount); - 532 | const assertedProperties = new Array(patternCount); - 533 | const refutedProperties = new Array(patternCount); - 534 | const predicates = new Array(patternCount); - 535 | const textPredicates = new Array(patternCount); - | - 536 | // Parse the predicates, and add the appropriate predicates or properties - 537 | for (let i = 0; i < patternCount; i++) { - 538 | const predicatesAddress = C._ts_query_predicates_for_pattern(address, i, TRANSFER_BUFFER); - 539 | const stepCount = C.getValue(TRANSFER_BUFFER, 'i32'); - | - 540 | predicates[i] = []; - 541 | textPredicates[i] = []; - | - 542 | const steps = new Array(); - | - 543 | let stepAddress = predicatesAddress; - 544 | for (let j = 0; j < stepCount; j++) { - 545 | const stepType = C.getValue(stepAddress, 'i32'); - 546 | stepAddress += SIZE_OF_INT; - | - 547 | const stepValueId = C.getValue(stepAddress, 'i32'); - 548 | stepAddress += SIZE_OF_INT; - | - 549 | parsePattern( - 550 | i, - 551 | stepType, - 552 | stepValueId, - 553 | captureNames, - 554 | stringValues, - 555 | steps, - 556 | textPredicates, - 557 | predicates, - 558 | setProperties, - 559 | assertedProperties, - 560 | refutedProperties, - 561 | ); - 562 | } - | - 563 | Object.freeze(textPredicates[i]); - 564 | Object.freeze(predicates[i]); - 565 | Object.freeze(setProperties[i]); - 566 | Object.freeze(assertedProperties[i]); - 567 | Object.freeze(refutedProperties[i]); - 568 | } - | - 569 | C._free(sourceAddress); - | - | - 570 | this[0] = address; - 571 | this.captureNames = captureNames; - 572 | this.captureQuantifiers = captureQuantifiers; - 573 | this.textPredicates = textPredicates; - 574 | this.predicates = predicates; - 575 | this.setProperties = setProperties; - 576 | this.assertedProperties = assertedProperties; - 577 | this.refutedProperties = refutedProperties; - 578 | this.exceededMatchLimit = false; - 579 | } - | - 580 | /** Delete the query, freeing its resources. */ - 581 | delete(): void { - 582 | C._ts_query_delete(this[0]); - 583 | this[0] = 0; - 584 | } - | - 585 | /** - 586 | * Iterate over all of the matches in the order that they were found. - 587 | * - 588 | * Each match contains the index of the pattern that matched, and a list of - 589 | * captures. Because multiple patterns can match the same set of nodes, - 590 | * one match may contain captures that appear *before* some of the - 591 | * captures from a previous match. - 592 | * - 593 | * @param {Node} node - The node to execute the query on. - 594 | * - 595 | * @param {QueryOptions} options - Options for query execution. - 596 | */ - 597 | matches( - 598 | node: Node, - 599 | options: QueryOptions = {} - 600 | ): QueryMatch[] { - 601 | const startPosition = options.startPosition ?? ZERO_POINT; - 602 | const endPosition = options.endPosition ?? ZERO_POINT; - 603 | const startIndex = options.startIndex ?? 0; - 604 | const endIndex = options.endIndex ?? 0; - 605 | const matchLimit = options.matchLimit ?? 0xFFFFFFFF; - 606 | const maxStartDepth = options.maxStartDepth ?? 0xFFFFFFFF; - 607 | const progressCallback = options.progressCallback; - | - 608 | if (typeof matchLimit !== 'number') { - 609 | throw new Error('Arguments must be numbers'); - 610 | } - 611 | this.matchLimit = matchLimit; - | - 612 | if (endIndex !== 0 && startIndex > endIndex) { - 613 | throw new Error('`startIndex` cannot be greater than `endIndex`'); - 614 | } - | - 615 | if (endPosition !== ZERO_POINT && ( - 616 | startPosition.row > endPosition.row || - 617 | (startPosition.row === endPosition.row && startPosition.column > endPosition.column) - 618 | )) { - 619 | throw new Error('`startPosition` cannot be greater than `endPosition`'); - 620 | } - | - 621 | if (progressCallback) { - 622 | C.currentQueryProgressCallback = progressCallback; - 623 | } - | - 624 | marshalNode(node); - | - 625 | C._ts_query_matches_wasm( - 626 | this[0], - 627 | node.tree[0], - 628 | startPosition.row, - 629 | startPosition.column, - 630 | endPosition.row, - 631 | endPosition.column, - 632 | startIndex, - 633 | endIndex, - 634 | matchLimit, - 635 | maxStartDepth, - 636 | ); - | - 637 | const rawCount = C.getValue(TRANSFER_BUFFER, 'i32'); - 638 | const startAddress = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 639 | const didExceedMatchLimit = C.getValue(TRANSFER_BUFFER + 2 * SIZE_OF_INT, 'i32'); - 640 | const result = new Array(rawCount); - 641 | this.exceededMatchLimit = Boolean(didExceedMatchLimit); - | - 642 | let filteredCount = 0; - 643 | let address = startAddress; - 644 | for (let i = 0; i < rawCount; i++) { - 645 | const patternIndex = C.getValue(address, 'i32'); - 646 | address += SIZE_OF_INT; - 647 | const captureCount = C.getValue(address, 'i32'); - 648 | address += SIZE_OF_INT; - | - 649 | const captures = new Array(captureCount); - 650 | address = unmarshalCaptures(this, node.tree, address, patternIndex, captures); - | - 651 | if (this.textPredicates[patternIndex].every((p) => p(captures))) { - 652 | result[filteredCount] = { patternIndex, captures }; - 653 | const setProperties = this.setProperties[patternIndex]; - 654 | result[filteredCount].setProperties = setProperties; - 655 | const assertedProperties = this.assertedProperties[patternIndex]; - 656 | result[filteredCount].assertedProperties = assertedProperties; - 657 | const refutedProperties = this.refutedProperties[patternIndex]; - 658 | result[filteredCount].refutedProperties = refutedProperties; - 659 | filteredCount++; - 660 | } - 661 | } - 662 | result.length = filteredCount; - | - 663 | C._free(startAddress); - 664 | C.currentQueryProgressCallback = null; - 665 | return result; - 666 | } - | - 667 | /** - 668 | * Iterate over all of the individual captures in the order that they - 669 | * appear. - 670 | * - 671 | * This is useful if you don't care about which pattern matched, and just - 672 | * want a single, ordered sequence of captures. - 673 | * - 674 | * @param {Node} node - The node to execute the query on. - 675 | * - 676 | * @param {QueryOptions} options - Options for query execution. - 677 | */ - 678 | captures( - 679 | node: Node, - 680 | options: QueryOptions = {} - 681 | ): QueryCapture[] { - 682 | const startPosition = options.startPosition ?? ZERO_POINT; - 683 | const endPosition = options.endPosition ?? ZERO_POINT; - 684 | const startIndex = options.startIndex ?? 0; - 685 | const endIndex = options.endIndex ?? 0; - 686 | const matchLimit = options.matchLimit ?? 0xFFFFFFFF; - 687 | const maxStartDepth = options.maxStartDepth ?? 0xFFFFFFFF; - 688 | const progressCallback = options.progressCallback; - | - 689 | if (typeof matchLimit !== 'number') { - 690 | throw new Error('Arguments must be numbers'); - 691 | } - 692 | this.matchLimit = matchLimit; - | - 693 | if (endIndex !== 0 && startIndex > endIndex) { - 694 | throw new Error('`startIndex` cannot be greater than `endIndex`'); - 695 | } - | - 696 | if (endPosition !== ZERO_POINT && ( - 697 | startPosition.row > endPosition.row || - 698 | (startPosition.row === endPosition.row && startPosition.column > endPosition.column) - 699 | )) { - 700 | throw new Error('`startPosition` cannot be greater than `endPosition`'); - 701 | } - | - 702 | if (progressCallback) { - 703 | C.currentQueryProgressCallback = progressCallback; - 704 | } - | - 705 | marshalNode(node); - | - 706 | C._ts_query_captures_wasm( - 707 | this[0], - 708 | node.tree[0], - 709 | startPosition.row, - 710 | startPosition.column, - 711 | endPosition.row, - 712 | endPosition.column, - 713 | startIndex, - 714 | endIndex, - 715 | matchLimit, - 716 | maxStartDepth, - 717 | ); - | - 718 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 719 | const startAddress = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 720 | const didExceedMatchLimit = C.getValue(TRANSFER_BUFFER + 2 * SIZE_OF_INT, 'i32'); - 721 | const result = new Array(); - 722 | this.exceededMatchLimit = Boolean(didExceedMatchLimit); - | - 723 | const captures = new Array(); - 724 | let address = startAddress; - 725 | for (let i = 0; i < count; i++) { - 726 | const patternIndex = C.getValue(address, 'i32'); - 727 | address += SIZE_OF_INT; - 728 | const captureCount = C.getValue(address, 'i32'); - 729 | address += SIZE_OF_INT; - 730 | const captureIndex = C.getValue(address, 'i32'); - 731 | address += SIZE_OF_INT; - | - 732 | captures.length = captureCount; - 733 | address = unmarshalCaptures(this, node.tree, address, patternIndex, captures); - | - 734 | if (this.textPredicates[patternIndex].every(p => p(captures))) { - 735 | const capture = captures[captureIndex]; - 736 | const setProperties = this.setProperties[patternIndex]; - 737 | capture.setProperties = setProperties; - 738 | const assertedProperties = this.assertedProperties[patternIndex]; - 739 | capture.assertedProperties = assertedProperties; - 740 | const refutedProperties = this.refutedProperties[patternIndex]; - 741 | capture.refutedProperties = refutedProperties; - 742 | result.push(capture); - 743 | } - 744 | } - | - 745 | C._free(startAddress); - 746 | C.currentQueryProgressCallback = null; - 747 | return result; - 748 | } - | - 749 | /** Get the predicates for a given pattern. */ - 750 | predicatesForPattern(patternIndex: number): QueryPredicate[] { - 751 | return this.predicates[patternIndex]; - 752 | } - | - 753 | /** - 754 | * Disable a certain capture within a query. - 755 | * - 756 | * This prevents the capture from being returned in matches, and also - 757 | * avoids any resource usage associated with recording the capture. - 758 | */ - 759 | disableCapture(captureName: string): void { - 760 | const captureNameLength = C.lengthBytesUTF8(captureName); - 761 | const captureNameAddress = C._malloc(captureNameLength + 1); - 762 | C.stringToUTF8(captureName, captureNameAddress, captureNameLength + 1); - 763 | C._ts_query_disable_capture(this[0], captureNameAddress, captureNameLength); - 764 | C._free(captureNameAddress); - 765 | } - | - 766 | /** - 767 | * Disable a certain pattern within a query. - 768 | * - 769 | * This prevents the pattern from matching, and also avoids any resource - 770 | * usage associated with the pattern. This throws an error if the pattern - 771 | * index is out of bounds. - 772 | */ - 773 | disablePattern(patternIndex: number): void { - 774 | if (patternIndex >= this.predicates.length) { - 775 | throw new Error( - 776 | `Pattern index is ${patternIndex} but the pattern count is ${this.predicates.length}` - 777 | ); - 778 | } - 779 | C._ts_query_disable_pattern(this[0], patternIndex); - 780 | } - | - 781 | /** - 782 | * Check if, on its last execution, this cursor exceeded its maximum number - 783 | * of in-progress matches. - 784 | */ - 785 | didExceedMatchLimit(): boolean { - 786 | return this.exceededMatchLimit; - 787 | } - | - 788 | /** Get the byte offset where the given pattern starts in the query's source. */ - 789 | startIndexForPattern(patternIndex: number): number { - 790 | if (patternIndex >= this.predicates.length) { - 791 | throw new Error( - 792 | `Pattern index is ${patternIndex} but the pattern count is ${this.predicates.length}` - 793 | ); - 794 | } - 795 | return C._ts_query_start_byte_for_pattern(this[0], patternIndex); - 796 | } - | - 797 | /** Get the byte offset where the given pattern ends in the query's source. */ - 798 | endIndexForPattern(patternIndex: number): number { - 799 | if (patternIndex >= this.predicates.length) { - 800 | throw new Error( - 801 | `Pattern index is ${patternIndex} but the pattern count is ${this.predicates.length}` - 802 | ); - 803 | } - 804 | return C._ts_query_end_byte_for_pattern(this[0], patternIndex); - 805 | } - | - 806 | /** Get the number of patterns in the query. */ - 807 | patternCount(): number { - 808 | return C._ts_query_pattern_count(this[0]); - 809 | } - | - 810 | /** Get the index for a given capture name. */ - 811 | captureIndexForName(captureName: string): number { - 812 | return this.captureNames.indexOf(captureName); - 813 | } - | - 814 | /** Check if a given pattern within a query has a single root node. */ - 815 | isPatternRooted(patternIndex: number): boolean { - 816 | return C._ts_query_is_pattern_rooted(this[0], patternIndex) === 1; - 817 | } - | - 818 | /** Check if a given pattern within a query has a single root node. */ - 819 | isPatternNonLocal(patternIndex: number): boolean { - 820 | return C._ts_query_is_pattern_non_local(this[0], patternIndex) === 1; - 821 | } - | - 822 | /** - 823 | * Check if a given step in a query is 'definite'. - 824 | * - 825 | * A query step is 'definite' if its parent pattern will be guaranteed to - 826 | * match successfully once it reaches the step. - 827 | */ - 828 | isPatternGuaranteedAtStep(byteIndex: number): boolean { - 829 | return C._ts_query_is_pattern_guaranteed_at_step(this[0], byteIndex) === 1; - 830 | } - 831 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/tree_cursor.ts: --------------------------------------------------------------------------------- - 1 | import { INTERNAL, Internal, assertInternal, Point, SIZE_OF_NODE, SIZE_OF_CURSOR, C } from './constants'; - 2 | import { marshalNode, marshalPoint, marshalTreeCursor, unmarshalNode, unmarshalPoint, unmarshalTreeCursor } from './marshal'; - 3 | import { Node } from './node'; - 4 | import { TRANSFER_BUFFER } from './parser'; - 5 | import { getText, Tree } from './tree'; - | - 6 | /** A stateful object for walking a syntax {@link Tree} efficiently. */ - 7 | export class TreeCursor { - 8 | /** @internal */ - 9 | // @ts-expect-error: never read - 10 | private [0] = 0; // Internal handle for Wasm - | - 11 | /** @internal */ - 12 | // @ts-expect-error: never read - 13 | private [1] = 0; // Internal handle for Wasm - | - 14 | /** @internal */ - 15 | // @ts-expect-error: never read - 16 | private [2] = 0; // Internal handle for Wasm - | - 17 | /** @internal */ - 18 | // @ts-expect-error: never read - 19 | private [3] = 0; // Internal handle for Wasm - | - 20 | /** @internal */ - 21 | private tree: Tree; - | - 22 | /** @internal */ - 23 | constructor(internal: Internal, tree: Tree) { - 24 | assertInternal(internal); - 25 | this.tree = tree; - 26 | unmarshalTreeCursor(this); - 27 | } - | - 28 | /** Creates a deep copy of the tree cursor. This allocates new memory. */ - 29 | copy(): TreeCursor { - 30 | const copy = new TreeCursor(INTERNAL, this.tree); - 31 | C._ts_tree_cursor_copy_wasm(this.tree[0]); - 32 | unmarshalTreeCursor(copy); - 33 | return copy; - 34 | } - | - 35 | /** Delete the tree cursor, freeing its resources. */ - 36 | delete(): void { - 37 | marshalTreeCursor(this); - 38 | C._ts_tree_cursor_delete_wasm(this.tree[0]); - 39 | this[0] = this[1] = this[2] = 0; - 40 | } - | - 41 | /** Get the tree cursor's current {@link Node}. */ - 42 | get currentNode(): Node { - 43 | marshalTreeCursor(this); - 44 | C._ts_tree_cursor_current_node_wasm(this.tree[0]); - 45 | return unmarshalNode(this.tree)!; - 46 | } - | - 47 | /** - 48 | * Get the numerical field id of this tree cursor's current node. - 49 | * - 50 | * See also {@link TreeCursor#currentFieldName}. - 51 | */ - 52 | get currentFieldId(): number { - 53 | marshalTreeCursor(this); - 54 | return C._ts_tree_cursor_current_field_id_wasm(this.tree[0]); - 55 | } - | - 56 | /** Get the field name of this tree cursor's current node. */ - 57 | get currentFieldName(): string | null { - 58 | return this.tree.language.fields[this.currentFieldId]; - 59 | } - | - 60 | /** - 61 | * Get the depth of the cursor's current node relative to the original - 62 | * node that the cursor was constructed with. - 63 | */ - 64 | get currentDepth(): number { - 65 | marshalTreeCursor(this); - 66 | return C._ts_tree_cursor_current_depth_wasm(this.tree[0]); - 67 | } - | - 68 | /** - 69 | * Get the index of the cursor's current node out of all of the - 70 | * descendants of the original node that the cursor was constructed with. - 71 | */ - 72 | get currentDescendantIndex(): number { - 73 | marshalTreeCursor(this); - 74 | return C._ts_tree_cursor_current_descendant_index_wasm(this.tree[0]); - 75 | } - | - 76 | /** Get the type of the cursor's current node. */ - 77 | get nodeType(): string { - 78 | return this.tree.language.types[this.nodeTypeId] || 'ERROR'; - 79 | } - | - 80 | /** Get the type id of the cursor's current node. */ - 81 | get nodeTypeId(): number { - 82 | marshalTreeCursor(this); - 83 | return C._ts_tree_cursor_current_node_type_id_wasm(this.tree[0]); - 84 | } - | - 85 | /** Get the state id of the cursor's current node. */ - 86 | get nodeStateId(): number { - 87 | marshalTreeCursor(this); - 88 | return C._ts_tree_cursor_current_node_state_id_wasm(this.tree[0]); - 89 | } - | - 90 | /** Get the id of the cursor's current node. */ - 91 | get nodeId(): number { - 92 | marshalTreeCursor(this); - 93 | return C._ts_tree_cursor_current_node_id_wasm(this.tree[0]); - 94 | } - | - 95 | /** - 96 | * Check if the cursor's current node is *named*. - 97 | * - 98 | * Named nodes correspond to named rules in the grammar, whereas - 99 | * *anonymous* nodes correspond to string literals in the grammar. - 100 | */ - 101 | get nodeIsNamed(): boolean { - 102 | marshalTreeCursor(this); - 103 | return C._ts_tree_cursor_current_node_is_named_wasm(this.tree[0]) === 1; - 104 | } - | - 105 | /** - 106 | * Check if the cursor's current node is *missing*. - 107 | * - 108 | * Missing nodes are inserted by the parser in order to recover from - 109 | * certain kinds of syntax errors. - 110 | */ - 111 | get nodeIsMissing(): boolean { - 112 | marshalTreeCursor(this); - 113 | return C._ts_tree_cursor_current_node_is_missing_wasm(this.tree[0]) === 1; - 114 | } - | - 115 | /** Get the string content of the cursor's current node. */ - 116 | get nodeText(): string { - 117 | marshalTreeCursor(this); - 118 | const startIndex = C._ts_tree_cursor_start_index_wasm(this.tree[0]); - 119 | const endIndex = C._ts_tree_cursor_end_index_wasm(this.tree[0]); - 120 | C._ts_tree_cursor_start_position_wasm(this.tree[0]); - 121 | const startPosition = unmarshalPoint(TRANSFER_BUFFER); - 122 | return getText(this.tree, startIndex, endIndex, startPosition); - 123 | } - | - 124 | /** Get the start position of the cursor's current node. */ - 125 | get startPosition(): Point { - 126 | marshalTreeCursor(this); - 127 | C._ts_tree_cursor_start_position_wasm(this.tree[0]); - 128 | return unmarshalPoint(TRANSFER_BUFFER); - 129 | } - | - 130 | /** Get the end position of the cursor's current node. */ - 131 | get endPosition(): Point { - 132 | marshalTreeCursor(this); - 133 | C._ts_tree_cursor_end_position_wasm(this.tree[0]); - 134 | return unmarshalPoint(TRANSFER_BUFFER); - 135 | } - | - 136 | /** Get the start index of the cursor's current node. */ - 137 | get startIndex(): number { - 138 | marshalTreeCursor(this); - 139 | return C._ts_tree_cursor_start_index_wasm(this.tree[0]); - 140 | } - | - 141 | /** Get the end index of the cursor's current node. */ - 142 | get endIndex(): number { - 143 | marshalTreeCursor(this); - 144 | return C._ts_tree_cursor_end_index_wasm(this.tree[0]); - 145 | } - | - 146 | /** - 147 | * Move this cursor to the first child of its current node. - 148 | * - 149 | * This returns `true` if the cursor successfully moved, and returns - 150 | * `false` if there were no children. - 151 | */ - 152 | gotoFirstChild(): boolean { - 153 | marshalTreeCursor(this); - 154 | const result = C._ts_tree_cursor_goto_first_child_wasm(this.tree[0]); - 155 | unmarshalTreeCursor(this); - 156 | return result === 1; - 157 | } - | - 158 | /** - 159 | * Move this cursor to the last child of its current node. - 160 | * - 161 | * This returns `true` if the cursor successfully moved, and returns - 162 | * `false` if there were no children. - 163 | * - 164 | * Note that this function may be slower than - 165 | * {@link TreeCursor#gotoFirstChild} because it needs to - 166 | * iterate through all the children to compute the child's position. - 167 | */ - 168 | gotoLastChild(): boolean { - 169 | marshalTreeCursor(this); - 170 | const result = C._ts_tree_cursor_goto_last_child_wasm(this.tree[0]); - 171 | unmarshalTreeCursor(this); - 172 | return result === 1; - 173 | } - | - 174 | /** - 175 | * Move this cursor to the parent of its current node. - 176 | * - 177 | * This returns `true` if the cursor successfully moved, and returns - 178 | * `false` if there was no parent node (the cursor was already on the - 179 | * root node). - 180 | * - 181 | * Note that the node the cursor was constructed with is considered the root - 182 | * of the cursor, and the cursor cannot walk outside this node. - 183 | */ - 184 | gotoParent(): boolean { - 185 | marshalTreeCursor(this); - 186 | const result = C._ts_tree_cursor_goto_parent_wasm(this.tree[0]); - 187 | unmarshalTreeCursor(this); - 188 | return result === 1; - 189 | } - | - 190 | /** - 191 | * Move this cursor to the next sibling of its current node. - 192 | * - 193 | * This returns `true` if the cursor successfully moved, and returns - 194 | * `false` if there was no next sibling node. - 195 | * - 196 | * Note that the node the cursor was constructed with is considered the root - 197 | * of the cursor, and the cursor cannot walk outside this node. - 198 | */ - 199 | gotoNextSibling(): boolean { - 200 | marshalTreeCursor(this); - 201 | const result = C._ts_tree_cursor_goto_next_sibling_wasm(this.tree[0]); - 202 | unmarshalTreeCursor(this); - 203 | return result === 1; - 204 | } - | - 205 | /** - 206 | * Move this cursor to the previous sibling of its current node. - 207 | * - 208 | * This returns `true` if the cursor successfully moved, and returns - 209 | * `false` if there was no previous sibling node. - 210 | * - 211 | * Note that this function may be slower than - 212 | * {@link TreeCursor#gotoNextSibling} due to how node - 213 | * positions are stored. In the worst case, this will need to iterate - 214 | * through all the children up to the previous sibling node to recalculate - 215 | * its position. Also note that the node the cursor was constructed with is - 216 | * considered the root of the cursor, and the cursor cannot walk outside this node. - 217 | */ - 218 | gotoPreviousSibling(): boolean { - 219 | marshalTreeCursor(this); - 220 | const result = C._ts_tree_cursor_goto_previous_sibling_wasm(this.tree[0]); - 221 | unmarshalTreeCursor(this); - 222 | return result === 1; - 223 | } - | - 224 | /** - 225 | * Move the cursor to the node that is the nth descendant of - 226 | * the original node that the cursor was constructed with, where - 227 | * zero represents the original node itself. - 228 | */ - 229 | gotoDescendant(goalDescendantIndex: number): void { - 230 | marshalTreeCursor(this); - 231 | C._ts_tree_cursor_goto_descendant_wasm(this.tree[0], goalDescendantIndex); - 232 | unmarshalTreeCursor(this); - 233 | } - | - 234 | /** - 235 | * Move this cursor to the first child of its current node that contains or - 236 | * starts after the given byte offset. - 237 | * - 238 | * This returns `true` if the cursor successfully moved to a child node, and returns - 239 | * `false` if no such child was found. - 240 | */ - 241 | gotoFirstChildForIndex(goalIndex: number): boolean { - 242 | marshalTreeCursor(this); - 243 | C.setValue(TRANSFER_BUFFER + SIZE_OF_CURSOR, goalIndex, 'i32'); - 244 | const result = C._ts_tree_cursor_goto_first_child_for_index_wasm(this.tree[0]); - 245 | unmarshalTreeCursor(this); - 246 | return result === 1; - 247 | } - | - 248 | /** - 249 | * Move this cursor to the first child of its current node that contains or - 250 | * starts after the given byte offset. - 251 | * - 252 | * This returns the index of the child node if one was found, and returns - 253 | * `null` if no such child was found. - 254 | */ - 255 | gotoFirstChildForPosition(goalPosition: Point): boolean { - 256 | marshalTreeCursor(this); - 257 | marshalPoint(TRANSFER_BUFFER + SIZE_OF_CURSOR, goalPosition); - 258 | const result = C._ts_tree_cursor_goto_first_child_for_position_wasm(this.tree[0]); - 259 | unmarshalTreeCursor(this); - 260 | return result === 1; - 261 | } - | - 262 | /** - 263 | * Re-initialize this tree cursor to start at the original node that the - 264 | * cursor was constructed with. - 265 | */ - 266 | reset(node: Node): void { - 267 | marshalNode(node); - 268 | marshalTreeCursor(this, TRANSFER_BUFFER + SIZE_OF_NODE); - 269 | C._ts_tree_cursor_reset_wasm(this.tree[0]); - 270 | unmarshalTreeCursor(this); - 271 | } - | - 272 | /** - 273 | * Re-initialize a tree cursor to the same position as another cursor. - 274 | * - 275 | * Unlike {@link TreeCursor#reset}, this will not lose parent - 276 | * information and allows reusing already created cursors. - 277 | */ - 278 | resetTo(cursor: TreeCursor): void { - 279 | marshalTreeCursor(this, TRANSFER_BUFFER); - 280 | marshalTreeCursor(cursor, TRANSFER_BUFFER + SIZE_OF_CURSOR); - 281 | C._ts_tree_cursor_reset_to_wasm(this.tree[0], cursor.tree[0]); - 282 | unmarshalTreeCursor(this); - 283 | } - 284 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/src/tree.ts: --------------------------------------------------------------------------------- - 1 | import { INTERNAL, Internal, assertInternal, ParseCallback, Point, Range, SIZE_OF_NODE, SIZE_OF_INT, SIZE_OF_RANGE, C } from './constants'; - 2 | import { Language } from './language'; - 3 | import { Node } from './node'; - 4 | import { TreeCursor } from './tree_cursor'; - 5 | import { marshalEdit, marshalPoint, unmarshalNode, unmarshalRange } from './marshal'; - 6 | import { TRANSFER_BUFFER } from './parser'; - 7 | import { Edit } from './edit'; - | - 8 | /** @internal */ - 9 | export function getText(tree: Tree, startIndex: number, endIndex: number, startPosition: Point): string { - 10 | const length = endIndex - startIndex; - 11 | let result = tree.textCallback(startIndex, startPosition); - 12 | if (result) { - 13 | startIndex += result.length; - 14 | while (startIndex < endIndex) { - 15 | const string = tree.textCallback(startIndex, startPosition); - 16 | if (string && string.length > 0) { - 17 | startIndex += string.length; - 18 | result += string; - 19 | } else { - 20 | break; - 21 | } - 22 | } - 23 | if (startIndex > endIndex) { - 24 | result = result.slice(0, length); - 25 | } - 26 | } - 27 | return result ?? ''; - 28 | } - | - 29 | /** A tree that represents the syntactic structure of a source code file. */ - 30 | export class Tree { - 31 | /** @internal */ - 32 | private [0] = 0; // Internal handle for Wasm - | - 33 | /** @internal */ - 34 | textCallback: ParseCallback; - | - 35 | /** The language that was used to parse the syntax tree. */ - 36 | language: Language; - | - 37 | /** @internal */ - 38 | constructor(internal: Internal, address: number, language: Language, textCallback: ParseCallback) { - 39 | assertInternal(internal); - 40 | this[0] = address; - 41 | this.language = language; - 42 | this.textCallback = textCallback; - 43 | } - | - 44 | /** Create a shallow copy of the syntax tree. This is very fast. */ - 45 | copy(): Tree { - 46 | const address = C._ts_tree_copy(this[0]); - 47 | return new Tree(INTERNAL, address, this.language, this.textCallback); - 48 | } - | - 49 | /** Delete the syntax tree, freeing its resources. */ - 50 | delete(): void { - 51 | C._ts_tree_delete(this[0]); - 52 | this[0] = 0; - 53 | } - | - 54 | /** Get the root node of the syntax tree. */ - 55 | get rootNode(): Node { - 56 | C._ts_tree_root_node_wasm(this[0]); - 57 | return unmarshalNode(this)!; - 58 | } - | - 59 | /** - 60 | * Get the root node of the syntax tree, but with its position shifted - 61 | * forward by the given offset. - 62 | */ - 63 | rootNodeWithOffset(offsetBytes: number, offsetExtent: Point): Node { - 64 | const address = TRANSFER_BUFFER + SIZE_OF_NODE; - 65 | C.setValue(address, offsetBytes, 'i32'); - 66 | marshalPoint(address + SIZE_OF_INT, offsetExtent); - 67 | C._ts_tree_root_node_with_offset_wasm(this[0]); - 68 | return unmarshalNode(this)!; - 69 | } - | - 70 | /** - 71 | * Edit the syntax tree to keep it in sync with source code that has been - 72 | * edited. - 73 | * - 74 | * You must describe the edit both in terms of byte offsets and in terms of - 75 | * row/column coordinates. - 76 | */ - 77 | edit(edit: Edit): void { - 78 | marshalEdit(edit); - 79 | C._ts_tree_edit_wasm(this[0]); - 80 | } - | - 81 | /** Create a new {@link TreeCursor} starting from the root of the tree. */ - 82 | walk(): TreeCursor { - 83 | return this.rootNode.walk(); - 84 | } - | - 85 | /** - 86 | * Compare this old edited syntax tree to a new syntax tree representing - 87 | * the same document, returning a sequence of ranges whose syntactic - 88 | * structure has changed. - 89 | * - 90 | * For this to work correctly, this syntax tree must have been edited such - 91 | * that its ranges match up to the new tree. Generally, you'll want to - 92 | * call this method right after calling one of the [`Parser::parse`] - 93 | * functions. Call it on the old tree that was passed to parse, and - 94 | * pass the new tree that was returned from `parse`. - 95 | */ - 96 | getChangedRanges(other: Tree): Range[] { - 97 | if (!(other instanceof Tree)) { - 98 | throw new TypeError('Argument must be a Tree'); - 99 | } - | - 100 | C._ts_tree_get_changed_ranges_wasm(this[0], other[0]); - 101 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 102 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 103 | const result = new Array(count); - | - 104 | if (count > 0) { - 105 | let address = buffer; - 106 | for (let i = 0; i < count; i++) { - 107 | result[i] = unmarshalRange(address); - 108 | address += SIZE_OF_RANGE; - 109 | } - 110 | C._free(buffer); - 111 | } - 112 | return result; - 113 | } - | - 114 | /** Get the included ranges that were used to parse the syntax tree. */ - 115 | getIncludedRanges(): Range[] { - 116 | C._ts_tree_included_ranges_wasm(this[0]); - 117 | const count = C.getValue(TRANSFER_BUFFER, 'i32'); - 118 | const buffer = C.getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); - 119 | const result = new Array(count); - | - 120 | if (count > 0) { - 121 | let address = buffer; - 122 | for (let i = 0; i < count; i++) { - 123 | result[i] = unmarshalRange(address); - 124 | address += SIZE_OF_RANGE; - 125 | } - 126 | C._free(buffer); - 127 | } - 128 | return result; - 129 | } - 130 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/edit.test.ts: --------------------------------------------------------------------------------- - 1 | import { describe, it, expect } from 'vitest'; - 2 | import { Edit } from '../src'; - | - 3 | describe('Edit', () => { - 4 | it('edits a point after the edit', () => { - 5 | const edit = new Edit({ - 6 | startIndex: 5, - 7 | oldEndIndex: 5, - 8 | newEndIndex: 10, - 9 | startPosition: { row: 0, column: 5 }, - 10 | oldEndPosition: { row: 0, column: 5 }, - 11 | newEndPosition: { row: 0, column: 10 }, - 12 | }); - | - 13 | const point = { row: 0, column: 8 }; - 14 | const index = 8; - 15 | const result = edit.editPoint(point, index); - 16 | expect(result.point).toEqual({ row: 0, column: 13 }); - 17 | expect(result.index).toBe(13); - 18 | }); - | - 19 | it('edits a point before the edit', () => { - 20 | const edit = new Edit({ - 21 | startIndex: 5, - 22 | oldEndIndex: 5, - 23 | newEndIndex: 10, - 24 | startPosition: { row: 0, column: 5 }, - 25 | oldEndPosition: { row: 0, column: 5 }, - 26 | newEndPosition: { row: 0, column: 10 }, - 27 | }); - | - 28 | const point = { row: 0, column: 2 }; - 29 | const index = 2; - 30 | const result = edit.editPoint(point, index); - 31 | expect(result.point).toEqual({ row: 0, column: 2 }); - 32 | expect(result.index).toBe(2); - 33 | }); - | - 34 | it('edits a point at the start of the edit', () => { - 35 | const edit = new Edit({ - 36 | startIndex: 5, - 37 | oldEndIndex: 5, - 38 | newEndIndex: 10, - 39 | startPosition: { row: 0, column: 5 }, - 40 | oldEndPosition: { row: 0, column: 5 }, - 41 | newEndPosition: { row: 0, column: 10 }, - 42 | }); - | - 43 | const point = { row: 0, column: 5 }; - 44 | const index = 5; - 45 | const result = edit.editPoint(point, index); - 46 | expect(result.point).toEqual({ row: 0, column: 10 }); - 47 | expect(result.index).toBe(10); - 48 | }); - | - 49 | it('edits a range after the edit', () => { - 50 | const edit = new Edit({ - 51 | startIndex: 10, - 52 | oldEndIndex: 15, - 53 | newEndIndex: 20, - 54 | startPosition: { row: 1, column: 0 }, - 55 | oldEndPosition: { row: 1, column: 5 }, - 56 | newEndPosition: { row: 2, column: 0 }, - 57 | }); - | - 58 | const range = { - 59 | startPosition: { row: 2, column: 0 }, - 60 | endPosition: { row: 2, column: 5 }, - 61 | startIndex: 20, - 62 | endIndex: 25, - 63 | }; - 64 | const result = edit.editRange(range); - 65 | expect(result.startIndex).toBe(25); - 66 | expect(result.endIndex).toBe(30); - 67 | expect(result.startPosition).toEqual({ row: 3, column: 0 }); - 68 | expect(result.endPosition).toEqual({ row: 3, column: 5 }); - 69 | }); - | - 70 | it('edits a range before the edit', () => { - 71 | const edit = new Edit({ - 72 | startIndex: 10, - 73 | oldEndIndex: 15, - 74 | newEndIndex: 20, - 75 | startPosition: { row: 1, column: 0 }, - 76 | oldEndPosition: { row: 1, column: 5 }, - 77 | newEndPosition: { row: 2, column: 0 }, - 78 | }); - | - 79 | const range = { - 80 | startPosition: { row: 0, column: 5 }, - 81 | endPosition: { row: 0, column: 8 }, - 82 | startIndex: 5, - 83 | endIndex: 8, - 84 | }; - 85 | const result = edit.editRange(range); - 86 | expect(result.startIndex).toBe(5); - 87 | expect(result.endIndex).toBe(8); - 88 | expect(result.startPosition).toEqual({ row: 0, column: 5 }); - 89 | expect(result.endPosition).toEqual({ row: 0, column: 8 }); - 90 | }); - | - 91 | it('edits a range overlapping the edit', () => { - 92 | const edit = new Edit({ - 93 | startIndex: 10, - 94 | oldEndIndex: 15, - 95 | newEndIndex: 20, - 96 | startPosition: { row: 1, column: 0 }, - 97 | oldEndPosition: { row: 1, column: 5 }, - 98 | newEndPosition: { row: 2, column: 0 } - 99 | }); - | - 100 | const range = { - 101 | startPosition: { row: 0, column: 8 }, - 102 | endPosition: { row: 1, column: 2 }, - 103 | startIndex: 8, - 104 | endIndex: 12, - 105 | }; - 106 | const result = edit.editRange(range); - 107 | expect(result.startIndex).toBe(8); - 108 | expect(result.endIndex).toBe(10); - 109 | expect(result.startPosition).toEqual({ row: 0, column: 8 }); - 110 | expect(result.endPosition).toEqual({ row: 1, column: 0 }); - 111 | }); - 112 | }); - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/helper.ts: --------------------------------------------------------------------------------- - 1 | import { Parser, Language } from '../src'; - 2 | import path from 'path'; - | - 3 | // https://github.com/tree-sitter/tree-sitter/blob/master/xtask/src/fetch.rs#L15 - 4 | export type LanguageName = 'bash' | 'c' | 'cpp' | 'embedded-template' | 'go' | 'html' | 'java' | 'javascript' | 'jsdoc' | 'json' | 'php' | 'python' | 'ruby' | 'rust' | 'typescript' | 'tsx'; - | - 5 | function languageURL(name: LanguageName): string { - 6 | const basePath = process.cwd(); - 7 | return path.join(basePath, `../../target/release/tree-sitter-${name}.wasm`); - 8 | } - | - 9 | export default Parser.init().then(async () => ({ - 10 | languageURL, - 11 | C: await Language.load(languageURL('c')), - 12 | EmbeddedTemplate: await Language.load(languageURL('embedded-template')), - 13 | HTML: await Language.load(languageURL('html')), - 14 | JavaScript: await Language.load(languageURL('javascript')), - 15 | JSON: await Language.load(languageURL('json')), - 16 | Python: await Language.load(languageURL('python')), - 17 | Rust: await Language.load(languageURL('rust')), - 18 | })); - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/language.test.ts: --------------------------------------------------------------------------------- - 1 | import { describe, it, expect, beforeAll, afterAll } from 'vitest'; - 2 | import helper from './helper'; - 3 | import type { LookaheadIterator, Language } from '../src'; - 4 | import { Parser } from '../src'; - | - 5 | let JavaScript: Language; - 6 | let Rust: Language; - | - 7 | describe('Language', () => { - 8 | beforeAll(async () => ({ JavaScript, Rust } = await helper)); - | - 9 | describe('.name, .version', () => { - 10 | it('returns the name and version of the language', () => { - 11 | expect(JavaScript.name).toBe('javascript'); - 12 | expect(JavaScript.abiVersion).toBe(15); - 13 | }); - 14 | }); - | - 15 | describe('.fieldIdForName, .fieldNameForId', () => { - 16 | it('converts between the string and integer representations of fields', () => { - 17 | const nameId = JavaScript.fieldIdForName('name'); - 18 | const bodyId = JavaScript.fieldIdForName('body'); - | - 19 | expect(nameId).toBeLessThan(JavaScript.fieldCount); - 20 | expect(bodyId).toBeLessThan(JavaScript.fieldCount); - 21 | expect(JavaScript.fieldNameForId(nameId!)).toBe('name'); - 22 | expect(JavaScript.fieldNameForId(bodyId!)).toBe('body'); - 23 | }); - | - 24 | it('handles invalid inputs', () => { - 25 | expect(JavaScript.fieldIdForName('namezzz')).toBeNull(); - 26 | expect(JavaScript.fieldNameForId(-3)).toBeNull(); - 27 | expect(JavaScript.fieldNameForId(10000)).toBeNull(); - 28 | }); - 29 | }); - | - 30 | describe('.idForNodeType, .nodeTypeForId, .nodeTypeIsNamed', () => { - 31 | it('converts between the string and integer representations of a node type', () => { - 32 | const exportStatementId = JavaScript.idForNodeType('export_statement', true)!; - 33 | const starId = JavaScript.idForNodeType('*', false)!; - | - 34 | expect(exportStatementId).toBeLessThan(JavaScript.nodeTypeCount); - 35 | expect(starId).toBeLessThan(JavaScript.nodeTypeCount); - 36 | expect(JavaScript.nodeTypeIsNamed(exportStatementId)).toBe(true); - 37 | expect(JavaScript.nodeTypeForId(exportStatementId)).toBe('export_statement'); - 38 | expect(JavaScript.nodeTypeIsNamed(starId)).toBe(false); - 39 | expect(JavaScript.nodeTypeForId(starId)).toBe('*'); - 40 | }); - | - 41 | it('handles invalid inputs', () => { - 42 | expect(JavaScript.nodeTypeForId(-3)).toBeNull(); - 43 | expect(JavaScript.nodeTypeForId(10000)).toBeNull(); - 44 | expect(JavaScript.idForNodeType('export_statement', false)).toBeNull(); - 45 | }); - 46 | }); - | - 47 | describe('Supertypes', () => { - 48 | it('gets the supertypes and subtypes of a parser', () => { - 49 | const supertypes = Rust.supertypes; - 50 | const names = supertypes.map((id) => Rust.nodeTypeForId(id)); - 51 | expect(names).toEqual([ - 52 | '_expression', - 53 | '_literal', - 54 | '_literal_pattern', - 55 | '_pattern', - 56 | '_type' - 57 | ]); - | - 58 | for (const id of supertypes) { - 59 | const name = Rust.nodeTypeForId(id); - 60 | const subtypes = Rust.subtypes(id); - 61 | let subtypeNames = subtypes.map((id) => Rust.nodeTypeForId(id)); - 62 | subtypeNames = [...new Set(subtypeNames)].sort(); // Remove duplicates & sort - | - 63 | switch (name) { - 64 | case '_literal': - 65 | expect(subtypeNames).toEqual([ - 66 | 'boolean_literal', - 67 | 'char_literal', - 68 | 'float_literal', - 69 | 'integer_literal', - 70 | 'raw_string_literal', - 71 | 'string_literal', - 72 | ]); - 73 | break; - 74 | case '_pattern': - 75 | expect(subtypeNames).toEqual([ - 76 | '_', - 77 | '_literal_pattern', - 78 | 'captured_pattern', - 79 | 'const_block', - 80 | 'generic_pattern', - 81 | 'identifier', - 82 | 'macro_invocation', - 83 | 'mut_pattern', - 84 | 'or_pattern', - 85 | 'range_pattern', - 86 | 'ref_pattern', - 87 | 'reference_pattern', - 88 | 'remaining_field_pattern', - 89 | 'scoped_identifier', - 90 | 'slice_pattern', - 91 | 'struct_pattern', - 92 | 'tuple_pattern', - 93 | 'tuple_struct_pattern', - 94 | ]); - 95 | break; - 96 | case '_type': - 97 | expect(subtypeNames).toEqual([ - 98 | 'abstract_type', - 99 | 'array_type', - 100 | 'bounded_type', - 101 | 'dynamic_type', - 102 | 'function_type', - 103 | 'generic_type', - 104 | 'macro_invocation', - 105 | 'metavariable', - 106 | 'never_type', - 107 | 'pointer_type', - 108 | 'primitive_type', - 109 | 'reference_type', - 110 | 'removed_trait_bound', - 111 | 'scoped_type_identifier', - 112 | 'tuple_type', - 113 | 'type_identifier', - 114 | 'unit_type', - 115 | ]); - 116 | break; - 117 | } - 118 | } - 119 | }); - 120 | }); - 121 | }); - | - 122 | describe('Lookahead iterator', () => { - 123 | let lookahead: LookaheadIterator; - 124 | let state: number; - | - 125 | beforeAll(async () => { - 126 | ({ JavaScript } = await helper); - 127 | const parser = new Parser(); - 128 | parser.setLanguage(JavaScript); - 129 | const tree = parser.parse('function fn() {}')!; - 130 | parser.delete(); - 131 | const cursor = tree.walk(); - 132 | expect(cursor.gotoFirstChild()).toBe(true); - 133 | expect(cursor.gotoFirstChild()).toBe(true); - 134 | state = cursor.currentNode.nextParseState; - 135 | lookahead = JavaScript.lookaheadIterator(state)!; - 136 | expect(lookahead).toBeDefined(); - 137 | }); - | - 138 | afterAll(() => { lookahead.delete() }); - | - 139 | const expected = ['(', 'identifier', '*', 'formal_parameters', 'html_comment', 'comment']; - | - 140 | it('should iterate over valid symbols in the state', () => { - 141 | const symbols = Array.from(lookahead); - 142 | expect(symbols).toEqual(expect.arrayContaining(expected)); - 143 | expect(symbols).toHaveLength(expected.length); - 144 | }); - | - 145 | it('should reset to the initial state', () => { - 146 | expect(lookahead.resetState(state)).toBe(true); - 147 | const symbols = Array.from(lookahead); - 148 | expect(symbols).toEqual(expect.arrayContaining(expected)); - 149 | expect(symbols).toHaveLength(expected.length); - 150 | }); - | - 151 | it('should reset', () => { - 152 | expect(lookahead.reset(JavaScript, state)).toBe(true); - 153 | const symbols = Array.from(lookahead); - 154 | expect(symbols).toEqual(expect.arrayContaining(expected)); - 155 | expect(symbols).toHaveLength(expected.length); - 156 | }); - 157 | }); - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/node.test.ts: --------------------------------------------------------------------------------- - 1 | import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest'; - 2 | import type { Language, Tree, Node } from '../src'; - 3 | import { Parser } from '../src'; - 4 | import helper from './helper'; - | - 5 | let C: Language; - 6 | let JavaScript: Language; - 7 | let JSON: Language; - 8 | let EmbeddedTemplate: Language; - 9 | let Python: Language; - | - 10 | const JSON_EXAMPLE = ` - 11 | [ - 12 | 123, - 13 | false, - 14 | { - 15 | "x": null - 16 | } - 17 | ] - 18 | `; - | - 19 | function getAllNodes(tree: Tree): Node[] { - 20 | const result: Node[] = []; - 21 | let visitedChildren = false; - 22 | const cursor = tree.walk(); - | - 23 | while (true) { - 24 | if (!visitedChildren) { - 25 | result.push(cursor.currentNode); - 26 | if (!cursor.gotoFirstChild()) { - 27 | visitedChildren = true; - 28 | } - 29 | } else if (cursor.gotoNextSibling()) { - 30 | visitedChildren = false; - 31 | } else if (!cursor.gotoParent()) { - 32 | break; - 33 | } - 34 | } - 35 | return result; - 36 | } - | - 37 | describe('Node', () => { - 38 | let parser: Parser; - 39 | let tree: Tree | null; - | - 40 | beforeAll(async () => { - 41 | ({ C, EmbeddedTemplate, JavaScript, JSON, Python } = await helper); - 42 | }); - | - 43 | beforeEach(() => { - 44 | tree = null; - 45 | parser = new Parser(); - 46 | parser.setLanguage(JavaScript); - 47 | }); - | - 48 | afterEach(() => { - 49 | parser.delete(); - 50 | tree!.delete(); - 51 | }); - | - 52 | describe('.children', () => { - 53 | it('returns an array of child nodes', () => { - 54 | tree = parser.parse('x10 + 1000')!; - 55 | expect(tree.rootNode.children).toHaveLength(1); - 56 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 57 | expect(sumNode.children.map(child => child.type)).toEqual(['identifier', '+', 'number']); - 58 | }); - 59 | }); - | - 60 | describe('.namedChildren', () => { - 61 | it('returns an array of named child nodes', () => { - 62 | tree = parser.parse('x10 + 1000')!; - 63 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 64 | expect(tree.rootNode.namedChildren).toHaveLength(1); - 65 | expect(sumNode.namedChildren.map(child => child.type)).toEqual(['identifier', 'number']); - 66 | }); - 67 | }); - | - 68 | describe('.childrenForFieldName', () => { - 69 | it('returns an array of child nodes for the given field name', () => { - 70 | parser.setLanguage(Python); - 71 | const source = ` - 72 | if one: - 73 | a() - 74 | elif two: - 75 | b() - 76 | elif three: - 77 | c() - 78 | elif four: - 79 | d()`; - | - 80 | tree = parser.parse(source)!; - 81 | const node = tree.rootNode.firstChild!; - 82 | expect(node.type).toBe('if_statement'); - 83 | const alternatives = node.childrenForFieldName('alternative'); - 84 | const alternativeTexts = alternatives.map(n => { - 85 | const condition = n.childForFieldName('condition')!; - 86 | return source.slice(condition.startIndex, condition.endIndex); - 87 | }); - 88 | expect(alternativeTexts).toEqual(['two', 'three', 'four']); - 89 | }); - 90 | }); - | - 91 | describe('.startIndex and .endIndex', () => { - 92 | it('returns the character index where the node starts/ends in the text', () => { - 93 | tree = parser.parse('a👍👎1 / b👎c👎')!; - 94 | const quotientNode = tree.rootNode.firstChild!.firstChild!; - | - 95 | expect(quotientNode.startIndex).toBe(0); - 96 | expect(quotientNode.endIndex).toBe(15); - 97 | expect(quotientNode.children.map(child => child.startIndex)).toEqual([0, 7, 9]); - 98 | expect(quotientNode.children.map(child => child.endIndex)).toEqual([6, 8, 15]); - 99 | }); - 100 | }); - | - 101 | describe('.startPosition and .endPosition', () => { - 102 | it('returns the row and column where the node starts/ends in the text', () => { - 103 | tree = parser.parse('x10 + 1000')!; - 104 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 105 | expect(sumNode.type).toBe('binary_expression'); - | - 106 | expect(sumNode.startPosition).toEqual({ row: 0, column: 0 }); - 107 | expect(sumNode.endPosition).toEqual({ row: 0, column: 10 }); - 108 | expect(sumNode.children.map((child) => child.startPosition)).toEqual([ - 109 | { row: 0, column: 0 }, - 110 | { row: 0, column: 4 }, - 111 | { row: 0, column: 6 }, - 112 | ]); - 113 | expect(sumNode.children.map((child) => child.endPosition)).toEqual([ - 114 | { row: 0, column: 3 }, - 115 | { row: 0, column: 5 }, - 116 | { row: 0, column: 10 }, - 117 | ]); - 118 | }); - | - 119 | it('handles characters that occupy two UTF16 code units', () => { - 120 | tree = parser.parse('a👍👎1 /\n b👎c👎')!; - 121 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 122 | expect(sumNode.children.map(child => [child.startPosition, child.endPosition])).toEqual([ - 123 | [{ row: 0, column: 0 }, { row: 0, column: 6 }], - 124 | [{ row: 0, column: 7 }, { row: 0, column: 8 }], - 125 | [{ row: 1, column: 1 }, { row: 1, column: 7 }] - 126 | ]); - 127 | }); - 128 | }); - | - 129 | describe('.parent', () => { - 130 | it('returns the node\'s parent', () => { - 131 | tree = parser.parse('x10 + 1000')!; - 132 | const sumNode = tree.rootNode.firstChild!; - 133 | const variableNode = sumNode.firstChild!; - 134 | expect(sumNode.id).not.toBe(variableNode.id); - 135 | expect(sumNode.id).toBe(variableNode.parent!.id); - 136 | expect(tree.rootNode.id).toBe(sumNode.parent!.id); - 137 | }); - 138 | }); - | - 139 | describe('.child(), .firstChild, .lastChild', () => { - 140 | it('returns null when the node has no children', () => { - 141 | tree = parser.parse('x10 + 1000')!; - 142 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 143 | const variableNode = sumNode.firstChild!; - 144 | expect(variableNode.firstChild).toBeNull(); - 145 | expect(variableNode.lastChild).toBeNull(); - 146 | expect(variableNode.firstNamedChild).toBeNull(); - 147 | expect(variableNode.lastNamedChild).toBeNull(); - 148 | expect(variableNode.child(1)).toBeNull(); - 149 | }); - 150 | }); - | - 151 | describe('.childForFieldName()', () => { - 152 | it('returns node for the given field name', () => { - 153 | tree = parser.parse('class A { b() {} }')!; - | - 154 | const classNode = tree.rootNode.firstChild!; - 155 | expect(classNode.type).toBe('class_declaration'); - | - 156 | const classNameNode = classNode.childForFieldName('name')!; - 157 | expect(classNameNode.type).toBe('identifier'); - 158 | expect(classNameNode.text).toBe('A'); - | - 159 | const bodyNode = classNode.childForFieldName('body')!; - 160 | expect(bodyNode.type).toBe('class_body'); - 161 | expect(bodyNode.text).toBe('{ b() {} }'); - | - 162 | const methodNode = bodyNode.firstNamedChild!; - 163 | expect(methodNode.type).toBe('method_definition'); - 164 | expect(methodNode.text).toBe('b() {}'); - 165 | }); - 166 | }); - | - 167 | describe('.childWithDescendant()', () => { - 168 | it('correctly retrieves immediate children', () => { - 169 | const sourceCode = 'let x = 1; console.log(x);'; - 170 | tree = parser.parse(sourceCode)!; - 171 | const root = tree.rootNode - 172 | const child = root.children[0].children[0] - 173 | const a = root.childWithDescendant(child) - 174 | expect(a!.startIndex).toBe(0) - 175 | const b = a!.childWithDescendant(child) - 176 | expect(b).toEqual(child) - 177 | const c = b!.childWithDescendant(child) - 178 | expect(c).toBeNull() - 179 | }); - 180 | }); - | - 181 | describe('.nextSibling and .previousSibling', () => { - 182 | it('returns the node\'s next and previous sibling', () => { - 183 | tree = parser.parse('x10 + 1000')!; - 184 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 185 | expect(sumNode.children[1].id).toBe(sumNode.children[0].nextSibling!.id); - 186 | expect(sumNode.children[2].id).toBe(sumNode.children[1].nextSibling!.id); - 187 | expect(sumNode.children[0].id).toBe(sumNode.children[1].previousSibling!.id); - 188 | expect(sumNode.children[1].id).toBe(sumNode.children[2].previousSibling!.id); - 189 | }); - 190 | }); - | - 191 | describe('.nextNamedSibling and .previousNamedSibling', () => { - 192 | it('returns the node\'s next and previous named sibling', () => { - 193 | tree = parser.parse('x10 + 1000')!; - 194 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 195 | expect(sumNode.namedChildren[1].id).toBe(sumNode.namedChildren[0].nextNamedSibling!.id); - 196 | expect(sumNode.namedChildren[0].id).toBe(sumNode.namedChildren[1].previousNamedSibling!.id); - 197 | }); - 198 | }); - | - 199 | describe('.descendantForIndex(min, max)', () => { - 200 | it('returns the smallest node that spans the given range', () => { - 201 | tree = parser.parse('x10 + 1000')!; - 202 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 203 | expect(sumNode.descendantForIndex(1, 2)!.type).toBe('identifier'); - 204 | expect(sumNode.descendantForIndex(4, 4)!.type).toBe('+'); - | - 205 | expect(() => { - 206 | // @ts-expect-error Testing invalid arguments - 207 | sumNode.descendantForIndex(1, {}); - 208 | }).toThrow('Arguments must be numbers'); - | - 209 | expect(() => { - 210 | // @ts-expect-error Testing invalid arguments - 211 | sumNode.descendantForIndex(undefined); - 212 | }).toThrow('Arguments must be numbers'); - 213 | }); - 214 | }); - | - 215 | describe('.namedDescendantForIndex', () => { - 216 | it('returns the smallest named node that spans the given range', () => { - 217 | tree = parser.parse('x10 + 1000')!; - 218 | const sumNode = tree.rootNode.firstChild!; - 219 | expect(sumNode.descendantForIndex(1, 2)!.type).toBe('identifier'); - 220 | expect(sumNode.descendantForIndex(4, 4)!.type).toBe('+'); - 221 | }); - 222 | }); - | - 223 | describe('.descendantForPosition', () => { - 224 | it('returns the smallest node that spans the given range', () => { - 225 | tree = parser.parse('x10 + 1000')!; - 226 | const sumNode = tree.rootNode.firstChild!; - | - 227 | expect(sumNode.descendantForPosition({ row: 0, column: 1 }, { row: 0, column: 2 })!.type).toBe('identifier'); - 228 | expect(sumNode.descendantForPosition({ row: 0, column: 4 })!.type).toBe('+'); - | - 229 | expect(() => { - 230 | // @ts-expect-error Testing invalid arguments - 231 | sumNode.descendantForPosition(1, {}); - 232 | }).toThrow('Arguments must be {row, column} objects'); - | - 233 | expect(() => { - 234 | // @ts-expect-error Testing invalid arguments - 235 | sumNode.descendantForPosition(undefined); - 236 | }).toThrow('Arguments must be {row, column} objects'); - 237 | }); - 238 | }); - | - 239 | describe('.namedDescendantForPosition(min, max)', () => { - 240 | it('returns the smallest named node that spans the given range', () => { - 241 | tree = parser.parse('x10 + 1000')!; - 242 | const sumNode = tree.rootNode.firstChild!; - | - 243 | expect(sumNode.namedDescendantForPosition({ row: 0, column: 1 }, { row: 0, column: 2 })!.type).toBe('identifier') - 244 | expect(sumNode.namedDescendantForPosition({ row: 0, column: 4 })!.type).toBe('binary_expression'); - 245 | }); - 246 | }); - | - 247 | describe('.hasError', () => { - 248 | it('returns true if the node contains an error', () => { - 249 | tree = parser.parse('1 + 2 * * 3')!; - 250 | const node = tree.rootNode; - 251 | expect(node.toString()).toBe( - 252 | '(program (expression_statement (binary_expression left: (number) right: (binary_expression left: (number) (ERROR) right: (number)))))' - 253 | ); - | - 254 | const sum = node.firstChild!.firstChild!; - 255 | expect(sum.hasError).toBe(true); - 256 | expect(sum.children[0].hasError).toBe(false); - 257 | expect(sum.children[1].hasError).toBe(false); - 258 | expect(sum.children[2].hasError).toBe(true); - 259 | }); - 260 | }); - | - 261 | describe('.isError', () => { - 262 | it('returns true if the node is an error', () => { - 263 | tree = parser.parse('2 * * 3')!; - 264 | const node = tree.rootNode; - 265 | expect(node.toString()).toBe( - 266 | '(program (expression_statement (binary_expression left: (number) (ERROR) right: (number))))' - 267 | ); - | - 268 | const multi = node.firstChild!.firstChild!; - 269 | expect(multi.hasError).toBe(true); - 270 | expect(multi.children[0].isError).toBe(false); - 271 | expect(multi.children[1].isError).toBe(false); - 272 | expect(multi.children[2].isError).toBe(true); - 273 | expect(multi.children[3].isError).toBe(false); - 274 | }); - 275 | }); - | - 276 | describe('.isMissing', () => { - 277 | it('returns true if the node was inserted via error recovery', () => { - 278 | tree = parser.parse('(2 ||)')!; - 279 | const node = tree.rootNode; - 280 | expect(node.toString()).toBe( - 281 | '(program (expression_statement (parenthesized_expression (binary_expression left: (number) right: (MISSING identifier)))))' - 282 | ); - | - 283 | const sum = node.firstChild!.firstChild!.firstNamedChild!; - 284 | expect(sum.type).toBe('binary_expression'); - 285 | expect(sum.hasError).toBe(true); - 286 | expect(sum.children[0].isMissing).toBe(false); - 287 | expect(sum.children[1].isMissing).toBe(false); - 288 | expect(sum.children[2].isMissing).toBe(true); - 289 | }); - 290 | }); - | - 291 | describe('.isExtra', () => { - 292 | it('returns true if the node is an extra node like comments', () => { - 293 | tree = parser.parse('foo(/* hi */);')!; - 294 | const node = tree.rootNode; - 295 | const commentNode = node.descendantForIndex(7, 7)!; - | - 296 | expect(node.type).toBe('program'); - 297 | expect(commentNode.type).toBe('comment'); - 298 | expect(node.isExtra).toBe(false); - 299 | expect(commentNode.isExtra).toBe(true); - 300 | }); - 301 | }); - | - 302 | describe('.text', () => { - 303 | const text = 'α0 / b👎c👎'; - | - 304 | Object.entries({ - 305 | '.parse(String)': text, - 306 | '.parse(Function)': (offset: number) => text.slice(offset, offset + 4), - 307 | }).forEach(([method, _parse]) => { - 308 | it(`returns the text of a node generated by ${method}`, () => { - 309 | const [numeratorSrc, denominatorSrc] = text.split(/\s*\/\s+/); - 310 | tree = parser.parse(_parse)!; - 311 | const quotientNode = tree.rootNode.firstChild!.firstChild!; - 312 | const [numerator, slash, denominator] = quotientNode.children; - | - 313 | expect(tree.rootNode.text).toBe(text); - 314 | expect(denominator.text).toBe(denominatorSrc); - 315 | expect(quotientNode.text).toBe(text); - 316 | expect(numerator.text).toBe(numeratorSrc); - 317 | expect(slash.text).toBe('/'); - 318 | }); - 319 | }); - 320 | }); - | - 321 | describe('.descendantCount', () => { - 322 | it('returns the number of descendants', () => { - 323 | parser.setLanguage(JSON); - 324 | tree = parser.parse(JSON_EXAMPLE)!; - 325 | const valueNode = tree.rootNode; - 326 | const allNodes = getAllNodes(tree); - | - 327 | expect(valueNode.descendantCount).toBe(allNodes.length); - | - 328 | const cursor = tree.walk(); - 329 | for (let i = 0; i < allNodes.length; i++) { - 330 | const node = allNodes[i]; - 331 | cursor.gotoDescendant(i); - 332 | expect(cursor.currentNode.id).toBe(node.id); - 333 | } - | - 334 | for (let i = allNodes.length - 1; i >= 0; i--) { - 335 | const node = allNodes[i]; - 336 | cursor.gotoDescendant(i); - 337 | expect(cursor.currentNode.id).toBe(node.id); - 338 | } - 339 | }); - | - 340 | it('tests a single node tree', () => { - 341 | parser.setLanguage(EmbeddedTemplate); - 342 | tree = parser.parse('hello')!; - | - 343 | const nodes = getAllNodes(tree); - 344 | expect(nodes).toHaveLength(2); - 345 | expect(tree.rootNode.descendantCount).toBe(2); - | - 346 | const cursor = tree.walk(); - | - 347 | cursor.gotoDescendant(0); - 348 | expect(cursor.currentDepth).toBe(0); - 349 | expect(cursor.currentNode.id).toBe(nodes[0].id); - | - 350 | cursor.gotoDescendant(1); - 351 | expect(cursor.currentDepth).toBe(1); - 352 | expect(cursor.currentNode.id).toBe(nodes[1].id); - 353 | }); - 354 | }); - | - 355 | describe('.rootNodeWithOffset', () => { - 356 | it('returns the root node of the tree, offset by the given byte offset', () => { - 357 | tree = parser.parse(' if (a) b')!; - 358 | const node = tree.rootNodeWithOffset(6, { row: 2, column: 2 }); - 359 | expect(node.startIndex).toBe(8); - 360 | expect(node.endIndex).toBe(16); - 361 | expect(node.startPosition).toEqual({ row: 2, column: 4 }); - 362 | expect(node.endPosition).toEqual({ row: 2, column: 12 }); - | - 363 | let child = node.firstChild!.child(2)!; - 364 | expect(child.type).toBe('expression_statement'); - 365 | expect(child.startIndex).toBe(15); - 366 | expect(child.endIndex).toBe(16); - 367 | expect(child.startPosition).toEqual({ row: 2, column: 11 }); - 368 | expect(child.endPosition).toEqual({ row: 2, column: 12 }); - | - 369 | const cursor = node.walk(); - 370 | cursor.gotoFirstChild(); - 371 | cursor.gotoFirstChild(); - 372 | cursor.gotoNextSibling(); - 373 | child = cursor.currentNode; - 374 | expect(child.type).toBe('parenthesized_expression'); - 375 | expect(child.startIndex).toBe(11); - 376 | expect(child.endIndex).toBe(14); - 377 | expect(child.startPosition).toEqual({ row: 2, column: 7 }); - 378 | expect(child.endPosition).toEqual({ row: 2, column: 10 }); - 379 | }); - 380 | }); - | - 381 | describe('.parseState, .nextParseState', () => { - 382 | const text = '10 / 5'; - | - 383 | it('returns node parse state ids', () => { - 384 | tree = parser.parse(text)!; - 385 | const quotientNode = tree.rootNode.firstChild!.firstChild!; - 386 | const [numerator, slash, denominator] = quotientNode.children; - | - 387 | expect(tree.rootNode.parseState).toBe(0); - 388 | // parse states will change on any change to the grammar so test that it - 389 | // returns something instead - 390 | expect(numerator.parseState).toBeGreaterThan(0); - 391 | expect(slash.parseState).toBeGreaterThan(0); - 392 | expect(denominator.parseState).toBeGreaterThan(0); - 393 | }); - | - 394 | it('returns next parse state equal to the language', () => { - 395 | tree = parser.parse(text)!; - 396 | const quotientNode = tree.rootNode.firstChild!.firstChild!; - 397 | quotientNode.children.forEach((node) => { - 398 | expect(node.nextParseState).toBe(JavaScript.nextState(node.parseState, node.grammarId)); - 399 | }); - 400 | }); - 401 | }); - | - 402 | describe('.descendantsOfType("ERROR")', () => { - 403 | it('finds all of the descendants of an ERROR node', () => { - 404 | tree = parser.parse( - 405 | `if ({a: 'b'} {c: 'd'}) { - 406 | // ^ ERROR - 407 | x = function(a) { b; } function(c) { d; } - 408 | }` - 409 | )!; - 410 | const errorNode = tree.rootNode; - 411 | const descendants = errorNode.descendantsOfType('ERROR'); - 412 | expect( - 413 | descendants.map((node) => node.startIndex) - 414 | ).toEqual( - 415 | [4] - 416 | ); - 417 | }); - 418 | }); - | - 419 | describe('.descendantsOfType', () => { - 420 | it('finds all descendants of a given type in the given range', () => { - 421 | tree = parser.parse('a + 1 * b * 2 + c + 3')!; - 422 | const outerSum = tree.rootNode.firstChild!.firstChild!; - | - 423 | const descendants = outerSum.descendantsOfType('number', { row: 0, column: 2 }, { row: 0, column: 15 }); - 424 | expect(descendants.map(node => node.startIndex)).toEqual([4, 12]); - 425 | expect(descendants.map(node => node.endPosition)).toEqual([{ row: 0, column: 5 }, { row: 0, column: 13 }]); - 426 | }); - 427 | }); - | - | - | - 428 | describe('.firstChildForIndex(index)', () => { - 429 | it('returns the first child that contains or starts after the given index', () => { - 430 | tree = parser.parse('x10 + 1000')!; - 431 | const sumNode = tree.rootNode.firstChild!.firstChild!; - | - 432 | expect(sumNode.firstChildForIndex(0)!.type).toBe('identifier'); - 433 | expect(sumNode.firstChildForIndex(1)!.type).toBe('identifier'); - 434 | expect(sumNode.firstChildForIndex(3)!.type).toBe('+'); - 435 | expect(sumNode.firstChildForIndex(5)!.type).toBe('number'); - 436 | }); - 437 | }); - | - 438 | describe('.firstNamedChildForIndex(index)', () => { - 439 | it('returns the first child that contains or starts after the given index', () => { - 440 | tree = parser.parse('x10 + 1000')!; - 441 | const sumNode = tree.rootNode.firstChild!.firstChild!; - | - 442 | expect(sumNode.firstNamedChildForIndex(0)!.type).toBe('identifier'); - 443 | expect(sumNode.firstNamedChildForIndex(1)!.type).toBe('identifier'); - 444 | expect(sumNode.firstNamedChildForIndex(3)!.type).toBe('number'); - 445 | }); - 446 | }); - | - 447 | describe('.equals(other)', () => { - 448 | it('returns true if the nodes are the same', () => { - 449 | tree = parser.parse('1 + 2')!; - | - 450 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 451 | const node1 = sumNode.firstChild!; - 452 | const node2 = sumNode.firstChild!; - 453 | expect(node1.equals(node2)).toBe(true); - 454 | }); - | - 455 | it('returns false if the nodes are not the same', () => { - 456 | tree = parser.parse('1 + 2')!; - | - 457 | const sumNode = tree.rootNode.firstChild!.firstChild!; - 458 | const node1 = sumNode.firstChild!; - 459 | const node2 = node1.nextSibling!; - 460 | expect(node1.equals(node2)).toBe(false); - 461 | }); - 462 | }); - | - 463 | describe('.fieldNameForChild(index)', () => { - 464 | it('returns the field of a child or null', () => { - 465 | parser.setLanguage(C); - 466 | tree = parser.parse('int w = x + /* y is special! */ y;')!; - | - 467 | const translationUnitNode = tree.rootNode; - 468 | const declarationNode = translationUnitNode.firstChild; - 469 | const binaryExpressionNode = declarationNode! - 470 | .childForFieldName('declarator')! - 471 | .childForFieldName('value')!; - | - 472 | // ------------------- - 473 | // left: (identifier) 0 - 474 | // operator: "+" 1 <--- (not a named child) - 475 | // (comment) 2 <--- (is an extra) - 476 | // right: (identifier) 3 - 477 | // ------------------- - | - 478 | expect(binaryExpressionNode.fieldNameForChild(0)).toBe('left'); - 479 | expect(binaryExpressionNode.fieldNameForChild(1)).toBe('operator'); - 480 | // The comment should not have a field name, as it's just an extra - 481 | expect(binaryExpressionNode.fieldNameForChild(2)).toBeNull(); - 482 | expect(binaryExpressionNode.fieldNameForChild(3)).toBe('right'); - 483 | // Negative test - Not a valid child index - 484 | expect(binaryExpressionNode.fieldNameForChild(4)).toBeNull(); - 485 | }); - 486 | }); - | - 487 | describe('.fieldNameForNamedChild(index)', () => { - 488 | it('returns the field of a named child or null', () => { - 489 | parser.setLanguage(C); - 490 | tree = parser.parse('int w = x + /* y is special! */ y;')!; - | - 491 | const translationUnitNode = tree.rootNode; - 492 | const declarationNode = translationUnitNode.firstNamedChild; - 493 | const binaryExpressionNode = declarationNode! - 494 | .childForFieldName('declarator')! - 495 | .childForFieldName('value')!; - | - 496 | // ------------------- - 497 | // left: (identifier) 0 - 498 | // operator: "+" _ <--- (not a named child) - 499 | // (comment) 1 <--- (is an extra) - 500 | // right: (identifier) 2 - 501 | // ------------------- - | - 502 | expect(binaryExpressionNode.fieldNameForNamedChild(0)).toBe('left'); - 503 | // The comment should not have a field name, as it's just an extra - 504 | expect(binaryExpressionNode.fieldNameForNamedChild(1)).toBeNull(); - 505 | // The operator is not a named child, so the named child at index 2 is the right child - 506 | expect(binaryExpressionNode.fieldNameForNamedChild(2)).toBe('right'); - 507 | // Negative test - Not a valid child index - 508 | expect(binaryExpressionNode.fieldNameForNamedChild(3)).toBeNull(); - 509 | }); - 510 | }); - 511 | }); - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/parser.test.ts: --------------------------------------------------------------------------------- - 1 | import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest'; - 2 | import helper, { type LanguageName } from './helper'; - 3 | import type { ParseState, Tree } from '../src'; - 4 | import { Parser, Language } from '../src'; - | - 5 | let JavaScript: Language; - 6 | let HTML: Language; - 7 | let JSON: Language; - 8 | let languageURL: (name: LanguageName) => string; - | - 9 | describe('Parser', () => { - 10 | let parser: Parser; - | - 11 | beforeAll(async () => { - 12 | ({ JavaScript, HTML, JSON, languageURL } = await helper); - 13 | }); - | - 14 | beforeEach(() => { - 15 | parser = new Parser(); - 16 | }); - | - 17 | afterEach(() => { - 18 | parser.delete(); - 19 | }); - | - 20 | describe('.setLanguage', () => { - 21 | it('allows setting the language to null', () => { - 22 | expect(parser.language).toBeNull(); - 23 | parser.setLanguage(JavaScript); - 24 | expect(parser.language).toBe(JavaScript); - 25 | parser.setLanguage(null); - 26 | expect(parser.language).toBeNull(); - 27 | }); - | - 28 | it('throws an exception when the given object is not a tree-sitter language', () => { - 29 | // @ts-expect-error Testing invalid arguments - 30 | expect(() => { parser.setLanguage({}); }).toThrow(/Argument must be a Language/); - 31 | // @ts-expect-error Testing invalid arguments - 32 | expect(() => { parser.setLanguage(1); }).toThrow(/Argument must be a Language/); - 33 | }); - 34 | }); - | - 35 | describe('.setLogger', () => { - 36 | beforeEach(() => { - 37 | parser.setLanguage(JavaScript); - 38 | }); - | - 39 | it('calls the given callback for each parse event', () => { - 40 | const debugMessages: string[] = []; - 41 | parser.setLogger((message) => debugMessages.push(message)); - 42 | parser.parse('a + b + c')!; - 43 | expect(debugMessages).toEqual(expect.arrayContaining([ - 44 | 'skip character:\' \'', - 45 | 'consume character:\'b\'', - 46 | 'reduce sym:program, child_count:1', - 47 | 'accept' - 48 | ])); - 49 | }); - | - 50 | it('allows the callback to be retrieved later', () => { - 51 | const callback = () => { return; }; - 52 | parser.setLogger(callback); - 53 | expect(parser.getLogger()).toBe(callback); - 54 | parser.setLogger(false); - 55 | expect(parser.getLogger()).toBeNull(); - 56 | }); - | - 57 | it('disables debugging when given a falsy value', () => { - 58 | const debugMessages: string[] = []; - 59 | parser.setLogger((message) => debugMessages.push(message)); - 60 | parser.setLogger(false); - 61 | parser.parse('a + b * c')!; - 62 | expect(debugMessages).toHaveLength(0); - 63 | }); - | - 64 | it('throws an error when given a truthy value that isn\'t a function', () => { - 65 | // @ts-expect-error Testing invalid arguments - 66 | expect(() => { parser.setLogger('5'); }).toThrow('Logger callback must be a function'); - 67 | }); - | - 68 | it('rethrows errors thrown by the logging callback', () => { - 69 | const error = new Error('The error message'); - 70 | parser.setLogger(() => { - 71 | throw error; - 72 | }); - 73 | expect(() => parser.parse('ok;')).toThrow('The error message'); - 74 | }); - 75 | }); - | - 76 | describe('one included range', () => { - 77 | it('parses the text within a range', () => { - 78 | parser.setLanguage(HTML); - 79 | const sourceCode = 'hi'; - 80 | const htmlTree = parser.parse(sourceCode)!; - 81 | const scriptContentNode = htmlTree.rootNode.child(1)!.child(1)!; - 82 | expect(scriptContentNode.type).toBe('raw_text'); - | - 83 | parser.setLanguage(JavaScript); - 84 | expect(parser.getIncludedRanges()).toEqual([{ - 85 | startIndex: 0, - 86 | endIndex: 2147483647, - 87 | startPosition: { row: 0, column: 0 }, - 88 | endPosition: { row: 4294967295, column: 2147483647 } - 89 | }]); - | - 90 | const ranges = [{ - 91 | startIndex: scriptContentNode.startIndex, - 92 | endIndex: scriptContentNode.endIndex, - 93 | startPosition: scriptContentNode.startPosition, - 94 | endPosition: scriptContentNode.endPosition, - 95 | }]; - | - 96 | const jsTree = parser.parse( - 97 | sourceCode, - 98 | null, - 99 | { includedRanges: ranges } - 100 | )!; - 101 | expect(parser.getIncludedRanges()).toEqual(ranges); - | - 102 | expect(jsTree.rootNode.toString()).toBe( - 103 | '(program (expression_statement (call_expression ' + - 104 | 'function: (member_expression object: (identifier) property: (property_identifier)) ' + - 105 | 'arguments: (arguments (string (string_fragment))))))' - 106 | ); - 107 | expect(jsTree.rootNode.startPosition).toEqual({ row: 0, column: sourceCode.indexOf('console') }); - 108 | }); - 109 | }); - | - 110 | describe('multiple included ranges', () => { - 111 | it('parses the text within multiple ranges', () => { - 112 | parser.setLanguage(JavaScript); - 113 | const sourceCode = 'html `
Hello, ${name.toUpperCase()}, it\'s ${now()}.
`'; - 114 | const jsTree = parser.parse(sourceCode)!; - 115 | const templateStringNode = jsTree.rootNode.descendantForIndex( - 116 | sourceCode.indexOf('`<'), - 117 | sourceCode.indexOf('>`') - 118 | )!; - 119 | expect(templateStringNode.type).toBe('template_string'); - | - 120 | const openQuoteNode = templateStringNode.child(0)!; - 121 | const interpolationNode1 = templateStringNode.child(2)!; - 122 | const interpolationNode2 = templateStringNode.child(4)!; - 123 | const closeQuoteNode = templateStringNode.child(6)!; - | - 124 | parser.setLanguage(HTML); - 125 | const htmlRanges = [ - 126 | { - 127 | startIndex: openQuoteNode.endIndex, - 128 | startPosition: openQuoteNode.endPosition, - 129 | endIndex: interpolationNode1.startIndex, - 130 | endPosition: interpolationNode1.startPosition, - 131 | }, - 132 | { - 133 | startIndex: interpolationNode1.endIndex, - 134 | startPosition: interpolationNode1.endPosition, - 135 | endIndex: interpolationNode2.startIndex, - 136 | endPosition: interpolationNode2.startPosition, - 137 | }, - 138 | { - 139 | startIndex: interpolationNode2.endIndex, - 140 | startPosition: interpolationNode2.endPosition, - 141 | endIndex: closeQuoteNode.startIndex, - 142 | endPosition: closeQuoteNode.startPosition, - 143 | }, - 144 | ]; - | - 145 | const htmlTree = parser.parse(sourceCode, null, { includedRanges: htmlRanges })!; - | - 146 | expect(htmlTree.rootNode.toString()).toBe( - 147 | '(document (element' + - 148 | ' (start_tag (tag_name))' + - 149 | ' (text)' + - 150 | ' (element (start_tag (tag_name)) (end_tag (tag_name)))' + - 151 | ' (text)' + - 152 | ' (end_tag (tag_name))))' - 153 | ); - 154 | expect(htmlTree.getIncludedRanges()).toEqual(htmlRanges); - | - 155 | const divElementNode = htmlTree.rootNode.child(0)!; - 156 | const helloTextNode = divElementNode.child(1)!; - 157 | const bElementNode = divElementNode.child(2)!; - 158 | const bStartTagNode = bElementNode.child(0)!; - 159 | const bEndTagNode = bElementNode.child(1)!; - | - 160 | expect(helloTextNode.type).toBe('text'); - 161 | expect(helloTextNode.startIndex).toBe(sourceCode.indexOf('Hello')); - 162 | expect(helloTextNode.endIndex).toBe(sourceCode.indexOf(' ')); - | - 163 | expect(bStartTagNode.type).toBe('start_tag'); - 164 | expect(bStartTagNode.startIndex).toBe(sourceCode.indexOf('')); - 165 | expect(bStartTagNode.endIndex).toBe(sourceCode.indexOf('${now()}')); - | - 166 | expect(bEndTagNode.type).toBe('end_tag'); - 167 | expect(bEndTagNode.startIndex).toBe(sourceCode.indexOf('')); - 168 | expect(bEndTagNode.endIndex).toBe(sourceCode.indexOf('.')); - 169 | }); - 170 | }); - | - 171 | describe('an included range containing mismatched positions', () => { - 172 | it('parses the text within the range', () => { - 173 | const sourceCode = '
test
{_ignore_this_part_}'; - | - 174 | parser.setLanguage(HTML); - | - 175 | const endIndex = sourceCode.indexOf('{_ignore_this_part_'); - | - 176 | const rangeToParse = { - 177 | startIndex: 0, - 178 | startPosition: { row: 10, column: 12 }, - 179 | endIndex, - 180 | endPosition: { row: 10, column: 12 + endIndex }, - 181 | }; - | - 182 | const htmlTree = parser.parse(sourceCode, null, { includedRanges: [rangeToParse] })!; - | - 183 | expect(htmlTree.getIncludedRanges()[0]).toEqual(rangeToParse); - | - 184 | expect(htmlTree.rootNode.toString()).toBe( - 185 | '(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))' - 186 | ); - 187 | }); - 188 | }); - | - 189 | describe('.parse', () => { - 190 | let tree: Tree | null; - | - 191 | beforeEach(() => { - 192 | tree = null; - 193 | parser.setLanguage(JavaScript); - 194 | }); - | - 195 | afterEach(() => { - 196 | if (tree) tree.delete(); - 197 | }); - | - 198 | it('reads from the given input', () => { - 199 | const parts = ['first', '_', 'second', '_', 'third']; - 200 | tree = parser.parse(() => parts.shift())!; - 201 | expect(tree.rootNode.toString()).toBe('(program (expression_statement (identifier)))'); - 202 | }); - | - 203 | it('stops reading when the input callback returns something that\'s not a string', () => { - 204 | const parts = ['abc', 'def', 'ghi', {}, {}, {}, 'second-word', ' ']; - 205 | tree = parser.parse(() => parts.shift() as string)!; - 206 | expect(tree.rootNode.toString()).toBe('(program (expression_statement (identifier)))'); - 207 | expect(tree.rootNode.endIndex).toBe(9); - 208 | expect(parts).toHaveLength(2); - 209 | }); - | - 210 | it('throws an exception when the given input is not a function', () => { - 211 | // @ts-expect-error Testing invalid arguments - 212 | expect(() => parser.parse(null)).toThrow('Argument must be a string or a function'); - 213 | // @ts-expect-error Testing invalid arguments - 214 | expect(() => parser.parse(5)).toThrow('Argument must be a string or a function'); - 215 | // @ts-expect-error Testing invalid arguments - 216 | expect(() => parser.parse({})).toThrow('Argument must be a string or a function'); - 217 | }); - | - 218 | it('handles long input strings', { timeout: 10000 }, () => { - 219 | const repeatCount = 10000; - 220 | const inputString = `[${Array(repeatCount).fill('0').join(',')}]`; - | - 221 | tree = parser.parse(inputString)!; - 222 | expect(tree.rootNode.type).toBe('program'); - 223 | expect(tree.rootNode.firstChild!.firstChild!.namedChildCount).toBe(repeatCount); - 224 | }); - | - 225 | it('can use the bash parser', { timeout: 5000 }, async () => { - 226 | parser.setLanguage(await Language.load(languageURL('bash'))); - 227 | tree = parser.parse('FOO=bar echo < err.txt > hello.txt \nhello${FOO}\nEOF')!; - 228 | expect(tree.rootNode.toString()).toBe( - 229 | '(program ' + - 230 | '(redirected_statement ' + - 231 | 'body: (command ' + - 232 | '(variable_assignment name: (variable_name) value: (word)) ' + - 233 | 'name: (command_name (word))) ' + - 234 | 'redirect: (heredoc_redirect (heredoc_start) ' + - 235 | 'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' + - 236 | 'redirect: (file_redirect destination: (word)) ' + - 237 | '(heredoc_body ' + - 238 | '(expansion (variable_name)) (heredoc_content)) (heredoc_end))))' - 239 | ); - 240 | }); - | - 241 | it('can use the c++ parser', { timeout: 5000 }, async () => { - 242 | parser.setLanguage(await Language.load(languageURL('cpp'))); - 243 | tree = parser.parse('const char *s = R"EOF(HELLO WORLD)EOF";')!; - 244 | expect(tree.rootNode.toString()).toBe( - 245 | '(translation_unit (declaration ' + - 246 | '(type_qualifier) ' + - 247 | 'type: (primitive_type) ' + - 248 | 'declarator: (init_declarator ' + - 249 | 'declarator: (pointer_declarator declarator: (identifier)) ' + - 250 | 'value: (raw_string_literal delimiter: (raw_string_delimiter) (raw_string_content) (raw_string_delimiter)))))' - 251 | ); - 252 | }); - | - 253 | it('can use the HTML parser', { timeout: 5000 }, async () => { - 254 | parser.setLanguage(await Language.load(languageURL('html'))); - 255 | tree = parser.parse('
')!; - 256 | expect(tree.rootNode.toString()).toBe( - 257 | '(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) ' + - 258 | '(element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))) (end_tag (tag_name))))' - 259 | ); - 260 | }); - | - 261 | it('can use the python parser', { timeout: 5000 }, async () => { - 262 | parser.setLanguage(await Language.load(languageURL('python'))); - 263 | tree = parser.parse('class A:\n def b():\n c()')!; - 264 | expect(tree.rootNode.toString()).toBe( - 265 | '(module (class_definition ' + - 266 | 'name: (identifier) ' + - 267 | 'body: (block ' + - 268 | '(function_definition ' + - 269 | 'name: (identifier) ' + - 270 | 'parameters: (parameters) ' + - 271 | 'body: (block (expression_statement (call ' + - 272 | 'function: (identifier) ' + - 273 | 'arguments: (argument_list))))))))' - 274 | ); - 275 | }); - | - 276 | it('can use the rust parser', { timeout: 5000 }, async () => { - 277 | parser.setLanguage(await Language.load(languageURL('rust'))); - 278 | tree = parser.parse('const x: &\'static str = r###"hello"###;')!; - 279 | expect(tree.rootNode.toString()).toBe( - 280 | '(source_file (const_item ' + - 281 | 'name: (identifier) ' + - 282 | 'type: (reference_type (lifetime (identifier)) type: (primitive_type)) ' + - 283 | 'value: (raw_string_literal (string_content))))' - 284 | ); - 285 | }); - | - 286 | it('can use the typescript parser', { timeout: 5000 }, async () => { - 287 | parser.setLanguage(await Language.load(languageURL('typescript'))); - 288 | tree = parser.parse('a()\nb()\n[c]')!; - 289 | expect(tree.rootNode.toString()).toBe( - 290 | '(program ' + - 291 | '(expression_statement (call_expression function: (identifier) arguments: (arguments))) ' + - 292 | '(expression_statement (subscript_expression ' + - 293 | 'object: (call_expression ' + - 294 | 'function: (identifier) ' + - 295 | 'arguments: (arguments)) ' + - 296 | 'index: (identifier))))' - 297 | ); - 298 | }); - | - 299 | it('can use the tsx parser', { timeout: 5000 }, async () => { - 300 | parser.setLanguage(await Language.load(languageURL('tsx'))); - 301 | tree = parser.parse('a()\nb()\n[c]')!; - 302 | expect(tree.rootNode.toString()).toBe( - 303 | '(program ' + - 304 | '(expression_statement (call_expression function: (identifier) arguments: (arguments))) ' + - 305 | '(expression_statement (subscript_expression ' + - 306 | 'object: (call_expression ' + - 307 | 'function: (identifier) ' + - 308 | 'arguments: (arguments)) ' + - 309 | 'index: (identifier))))', - | - 310 | ); - 311 | }); - | - 312 | it('parses only the text within the `includedRanges` if they are specified', () => { - 313 | const sourceCode = '<% foo() %> <% bar %>'; - | - 314 | const start1 = sourceCode.indexOf('foo'); - 315 | const end1 = start1 + 5; - 316 | const start2 = sourceCode.indexOf('bar'); - 317 | const end2 = start2 + 3; - | - 318 | const tree = parser.parse(sourceCode, null, { - 319 | includedRanges: [ - 320 | { - 321 | startIndex: start1, - 322 | endIndex: end1, - 323 | startPosition: { row: 0, column: start1 }, - 324 | endPosition: { row: 0, column: end1 }, - 325 | }, - 326 | { - 327 | startIndex: start2, - 328 | endIndex: end2, - 329 | startPosition: { row: 0, column: start2 }, - 330 | endPosition: { row: 0, column: end2 }, - 331 | }, - 332 | ], - 333 | })!; - | - 334 | expect(tree.rootNode.toString()).toBe( - 335 | '(program ' + - 336 | '(expression_statement (call_expression function: (identifier) arguments: (arguments))) ' + - 337 | '(expression_statement (identifier)))' - 338 | ); - 339 | }); - | - 340 | it('parses with a timeout', { timeout: 5000 }, () => { - 341 | parser.setLanguage(JSON); - | - 342 | const startTime = performance.now(); - 343 | let currentByteOffset = 0; - 344 | const progressCallback = (state: ParseState) => { - 345 | expect(state.currentOffset).toBeGreaterThanOrEqual(currentByteOffset); - 346 | currentByteOffset = state.currentOffset; - | - 347 | if (performance.now() - startTime > 1) { - 348 | return true; - 349 | } - 350 | return false; - 351 | }; - | - 352 | expect(parser.parse( - 353 | (offset) => offset === 0 ? '[' : ',0', - 354 | null, - 355 | { progressCallback }, - 356 | )).toBeNull(); - 357 | }); - | - 358 | it('times out when an error is detected', { timeout: 5000 }, () => { - 359 | parser.setLanguage(JSON); - | - 360 | let offset = 0; - 361 | const erroneousCode = '!,'; - 362 | const progressCallback = (state: ParseState) => { - 363 | offset = state.currentOffset; - 364 | return state.hasError; - 365 | }; - | - 366 | const tree = parser.parse( - 367 | (offset) => { - 368 | if (offset === 0) return '['; - 369 | if (offset >= 1 && offset < 1000) return '0,'; - 370 | return erroneousCode; - 371 | }, - 372 | null, - 373 | { progressCallback }, - 374 | ); - | - 375 | // The callback is called at the end of parsing, however, what we're asserting here is that - 376 | // parsing ends immediately as the error is detected. This is verified by checking the offset - 377 | // of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or - 378 | // 1000 + the length of the erroneous code. Note that in this Wasm test, we multiply the offset - 379 | // by 2 because JavaScript strings are UTF-16 encoded. - 380 | expect(offset).toBe((1000 + erroneousCode.length) * 2); - 381 | expect(tree).toBeNull(); - 382 | }); - 383 | }); - 384 | }); - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/query.test.ts: --------------------------------------------------------------------------------- - 1 | import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest'; - 2 | import type { Language, Tree, QueryMatch, QueryCapture } from '../src'; - 3 | import { Parser, Query } from '../src'; - 4 | import helper from './helper'; - | - 5 | let JavaScript: Language; - | - 6 | describe('Query', () => { - 7 | let parser: Parser; - 8 | let tree: Tree | null; - 9 | let query: Query | null; - | - 10 | beforeAll(async () => { - 11 | ({ JavaScript } = await helper); - 12 | }); - | - 13 | beforeEach(() => { - 14 | parser = new Parser(); - 15 | parser.setLanguage(JavaScript); - 16 | }); - | - 17 | afterEach(() => { - 18 | parser.delete(); - 19 | if (tree) tree.delete(); - 20 | if (query) query.delete(); - 21 | }); - | - 22 | describe('construction', () => { - 23 | it('throws an error on invalid patterns', () => { - 24 | expect(() => { - 25 | new Query(JavaScript, '(function_declaration wat)'); - 26 | }).toThrow('Bad syntax at offset 22: \'wat)\'...'); - | - 27 | expect(() => { - 28 | new Query(JavaScript, '(non_existent)'); - 29 | }).toThrow('Bad node name \'non_existent\''); - | - 30 | expect(() => { - 31 | new Query(JavaScript, '(a)'); - 32 | }).toThrow('Bad node name \'a\''); - | - 33 | expect(() => { - 34 | new Query(JavaScript, '(function_declaration non_existent:(identifier))'); - 35 | }).toThrow('Bad field name \'non_existent\''); - | - 36 | expect(() => { - 37 | new Query(JavaScript, '(function_declaration name:(statement_block))'); - 38 | }).toThrow('Bad pattern structure at offset 22: \'name:(statement_block))\''); - 39 | }); - | - 40 | it('throws an error on invalid predicates', () => { - 41 | expect(() => { - 42 | new Query(JavaScript, '((identifier) @abc (#eq? @ab hi))'); - 43 | }).toThrow('Bad capture name @ab'); - | - 44 | expect(() => { - 45 | new Query(JavaScript, '((identifier) @abc (#eq?))'); - 46 | }).toThrow('Wrong number of arguments to `#eq?` predicate. Expected 2, got 0'); - | - 47 | expect(() => { - 48 | new Query(JavaScript, '((identifier) @a (#eq? @a @a @a))'); - 49 | }).toThrow('Wrong number of arguments to `#eq?` predicate. Expected 2, got 3'); - 50 | }); - 51 | }); - | - 52 | describe('.matches', () => { - 53 | it('returns all of the matches for the given query', { timeout: 10000 }, () => { - 54 | tree = parser.parse('function one() { two(); function three() {} }')!; - 55 | query = new Query(JavaScript, ` - 56 | (function_declaration name: (identifier) @fn-def) - 57 | (call_expression function: (identifier) @fn-ref) - 58 | `); - 59 | const matches = query.matches(tree.rootNode); - 60 | expect(formatMatches(matches)).toEqual([ - 61 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'fn-def', text: 'one' }] }, - 62 | { patternIndex: 1, captures: [{ patternIndex: 1, name: 'fn-ref', text: 'two' }] }, - 63 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'fn-def', text: 'three' }] }, - 64 | ]); - 65 | }); - | - 66 | it('can search in specified ranges', () => { - 67 | tree = parser.parse('[a, b,\nc, d,\ne, f,\ng, h]')!; - 68 | query = new Query(JavaScript, '(identifier) @element'); - 69 | const matches = query.matches( - 70 | tree.rootNode, - 71 | { - 72 | startPosition: { row: 1, column: 1 }, - 73 | endPosition: { row: 3, column: 1 }, - 74 | } - 75 | ); - 76 | expect(formatMatches(matches)).toEqual([ - 77 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'd' }] }, - 78 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'e' }] }, - 79 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'f' }] }, - 80 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'element', text: 'g' }] }, - 81 | ]); - 82 | }); - | - 83 | it('handles predicates that compare the text of capture to literal strings', () => { - 84 | tree = parser.parse(` - 85 | giraffe(1, 2, []); - 86 | helment([false]); - 87 | goat(false); - 88 | gross(3, []); - 89 | hiccup([]); - 90 | gaff(5); - 91 | `)!; - | - 92 | // Find all calls to functions beginning with 'g', where one argument - 93 | // is an array literal. - 94 | query = new Query(JavaScript, ` - 95 | (call_expression - 96 | function: (identifier) @name - 97 | arguments: (arguments (array)) - 98 | (#match? @name "^g")) - 99 | `); - | - 100 | const matches = query.matches(tree.rootNode); - 101 | expect(formatMatches(matches)).toEqual([ - 102 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'name', text: 'giraffe' }] }, - 103 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'name', text: 'gross' }] }, - 104 | ]); - 105 | }); - | - 106 | it('handles multiple matches where the first one is filtered', () => { - 107 | tree = parser.parse(` - 108 | const a = window.b; - 109 | `)!; - | - 110 | query = new Query(JavaScript, ` - 111 | ((identifier) @variable.builtin - 112 | (#match? @variable.builtin "^(arguments|module|console|window|document)$") - 113 | (#is-not? local)) - 114 | `); - | - 115 | const matches = query.matches(tree.rootNode); - 116 | expect(formatMatches(matches)).toEqual([ - 117 | { patternIndex: 0, captures: [{ patternIndex: 0, name: 'variable.builtin', text: 'window' }] }, - 118 | ]); - 119 | }); - 120 | }); - | - 121 | describe('.captures', () => { - 122 | it('returns all of the captures for the given query, in order', () => { - 123 | tree = parser.parse(` - 124 | a({ - 125 | bc: function de() { - 126 | const fg = function hi() {} - 127 | }, - 128 | jk: function lm() { - 129 | const no = function pq() {} - 130 | }, - 131 | }); - 132 | `)!; - 133 | query = new Query(JavaScript, ` - 134 | (pair - 135 | key: _ @method.def - 136 | (function_expression - 137 | name: (identifier) @method.alias)) - | - 138 | (variable_declarator - 139 | name: _ @function.def - 140 | value: (function_expression - 141 | name: (identifier) @function.alias)) - | - 142 | ":" @delimiter - 143 | "=" @operator - 144 | `); - | - 145 | const captures = query.captures(tree.rootNode); - 146 | expect(formatCaptures(captures)).toEqual([ - 147 | { patternIndex: 0, name: 'method.def', text: 'bc' }, - 148 | { patternIndex: 2, name: 'delimiter', text: ':' }, - 149 | { patternIndex: 0, name: 'method.alias', text: 'de' }, - 150 | { patternIndex: 1, name: 'function.def', text: 'fg' }, - 151 | { patternIndex: 3, name: 'operator', text: '=' }, - 152 | { patternIndex: 1, name: 'function.alias', text: 'hi' }, - 153 | { patternIndex: 0, name: 'method.def', text: 'jk' }, - 154 | { patternIndex: 2, name: 'delimiter', text: ':' }, - 155 | { patternIndex: 0, name: 'method.alias', text: 'lm' }, - 156 | { patternIndex: 1, name: 'function.def', text: 'no' }, - 157 | { patternIndex: 3, name: 'operator', text: '=' }, - 158 | { patternIndex: 1, name: 'function.alias', text: 'pq' }, - 159 | ]); - 160 | }); - | - 161 | it('handles conditions that compare the text of capture to literal strings', () => { - 162 | tree = parser.parse(` - 163 | lambda - 164 | panda - 165 | load - 166 | toad - 167 | const ab = require('./ab'); - 168 | new Cd(EF); - 169 | `)!; - | - 170 | query = new Query(JavaScript, ` - 171 | ((identifier) @variable - 172 | (#not-match? @variable "^(lambda|load)$")) - | - 173 | ((identifier) @function.builtin - 174 | (#eq? @function.builtin "require")) - | - 175 | ((identifier) @constructor - 176 | (#match? @constructor "^[A-Z]")) - | - 177 | ((identifier) @constant - 178 | (#match? @constant "^[A-Z]{2,}$")) - 179 | `); - | - 180 | const captures = query.captures(tree.rootNode); - 181 | expect(formatCaptures(captures)).toEqual([ - 182 | { patternIndex: 0, name: 'variable', text: 'panda' }, - 183 | { patternIndex: 0, name: 'variable', text: 'toad' }, - 184 | { patternIndex: 0, name: 'variable', text: 'ab' }, - 185 | { patternIndex: 0, name: 'variable', text: 'require' }, - 186 | { patternIndex: 1, name: 'function.builtin', text: 'require' }, - 187 | { patternIndex: 0, name: 'variable', text: 'Cd' }, - 188 | { patternIndex: 2, name: 'constructor', text: 'Cd' }, - 189 | { patternIndex: 0, name: 'variable', text: 'EF' }, - 190 | { patternIndex: 2, name: 'constructor', text: 'EF' }, - 191 | { patternIndex: 3, name: 'constant', text: 'EF' }, - 192 | ]); - 193 | }); - | - 194 | it('handles conditions that compare the text of captures to each other', () => { - 195 | tree = parser.parse(` - 196 | ab = abc + 1; - 197 | def = de + 1; - 198 | ghi = ghi + 1; - 199 | `)!; - | - 200 | query = new Query(JavaScript, ` - 201 | ( - 202 | (assignment_expression - 203 | left: (identifier) @id1 - 204 | right: (binary_expression - 205 | left: (identifier) @id2)) - 206 | (#eq? @id1 @id2) - 207 | ) - 208 | `); - | - 209 | const captures = query.captures(tree.rootNode); - 210 | expect(formatCaptures(captures)).toEqual([ - 211 | { patternIndex: 0, name: 'id1', text: 'ghi' }, - 212 | { patternIndex: 0, name: 'id2', text: 'ghi' }, - 213 | ]); - 214 | }); - | - 215 | it('handles patterns with properties', () => { - 216 | tree = parser.parse(`a(b.c);`)!; - 217 | query = new Query(JavaScript, ` - 218 | ((call_expression (identifier) @func) - 219 | (#set! foo) - 220 | (#set! bar baz)) - | - 221 | ((property_identifier) @prop - 222 | (#is? foo) - 223 | (#is-not? bar baz)) - 224 | `); - | - 225 | const captures = query.captures(tree.rootNode); - 226 | expect(formatCaptures(captures)).toEqual([ - 227 | { - 228 | patternIndex: 0, - 229 | name: 'func', - 230 | text: 'a', - 231 | setProperties: { foo: null, bar: 'baz' } - 232 | }, - 233 | { - 234 | patternIndex: 1, - 235 | name: 'prop', - 236 | text: 'c', - 237 | assertedProperties: { foo: null }, - 238 | refutedProperties: { bar: 'baz' }, - 239 | }, - 240 | ]); - 241 | expect(query.didExceedMatchLimit()).toBe(false); - 242 | }); - | - 243 | it('detects queries with too many permutations to track', () => { - 244 | tree = parser.parse(` - 245 | [ - 246 | hello, hello, hello, hello, hello, hello, hello, hello, hello, hello, - 247 | hello, hello, hello, hello, hello, hello, hello, hello, hello, hello, - 248 | hello, hello, hello, hello, hello, hello, hello, hello, hello, hello, - 249 | hello, hello, hello, hello, hello, hello, hello, hello, hello, hello, - 250 | hello, hello, hello, hello, hello, hello, hello, hello, hello, hello, - 251 | ]; - 252 | `)!; - | - 253 | query = new Query(JavaScript, `(array (identifier) @pre (identifier) @post)`); - | - 254 | query.captures(tree.rootNode, { matchLimit: 32 }); - 255 | expect(query.didExceedMatchLimit()).toBe(true); - 256 | }); - | - 257 | it('handles quantified captures properly', () => { - 258 | tree = parser.parse(` - 259 | /// foo - 260 | /// bar - 261 | /// baz - 262 | `)!; - | - 263 | const expectCount = (tree: Tree, queryText: string, expectedCount: number) => { - 264 | query = new Query(JavaScript, queryText); - 265 | const captures = query.captures(tree.rootNode); - 266 | expect(captures).toHaveLength(expectedCount); - 267 | }; - | - 268 | expectCount( - 269 | tree, - 270 | `((comment)+ @foo (#any-eq? @foo "/// foo"))`, - 271 | 3 - 272 | ); - | - 273 | expectCount( - 274 | tree, - 275 | `((comment)+ @foo (#eq? @foo "/// foo"))`, - 276 | 0 - 277 | ); - | - 278 | expectCount( - 279 | tree, - 280 | `((comment)+ @foo (#any-not-eq? @foo "/// foo"))`, - 281 | 3 - 282 | ); - | - 283 | expectCount( - 284 | tree, - 285 | `((comment)+ @foo (#not-eq? @foo "/// foo"))`, - 286 | 0 - 287 | ); - | - 288 | expectCount( - 289 | tree, - 290 | `((comment)+ @foo (#match? @foo "^/// foo"))`, - 291 | 0 - 292 | ); - | - 293 | expectCount( - 294 | tree, - 295 | `((comment)+ @foo (#any-match? @foo "^/// foo"))`, - 296 | 3 - 297 | ); - | - 298 | expectCount( - 299 | tree, - 300 | `((comment)+ @foo (#not-match? @foo "^/// foo"))`, - 301 | 0 - 302 | ); - | - 303 | expectCount( - 304 | tree, - 305 | `((comment)+ @foo (#not-match? @foo "fsdfsdafdfs"))`, - 306 | 3 - 307 | ); - | - 308 | expectCount( - 309 | tree, - 310 | `((comment)+ @foo (#any-not-match? @foo "^///"))`, - 311 | 0 - 312 | ); - | - 313 | expectCount( - 314 | tree, - 315 | `((comment)+ @foo (#any-not-match? @foo "^/// foo"))`, - 316 | 3 - 317 | ); - 318 | }); - 319 | }); - | - 320 | describe('.predicatesForPattern(index)', () => { - 321 | it('returns all of the predicates as objects', () => { - 322 | query = new Query(JavaScript, ` - 323 | ( - 324 | (binary_expression - 325 | left: (identifier) @a - 326 | right: (identifier) @b) - 327 | (#something? @a @b) - 328 | (#match? @a "c") - 329 | (#something-else? @a "A" @b "B") - 330 | ) - | - 331 | ((identifier) @c - 332 | (#hello! @c)) - | - 333 | "if" @d - 334 | `); - | - 335 | expect(query.predicatesForPattern(0)).toStrictEqual([ - 336 | { - 337 | operator: 'something?', - 338 | operands: [ - 339 | { type: 'capture', name: 'a' }, - 340 | { type: 'capture', name: 'b' }, - 341 | ], - 342 | }, - 343 | { - 344 | operator: 'something-else?', - 345 | operands: [ - 346 | { type: 'capture', name: 'a' }, - 347 | { type: 'string', value: 'A' }, - 348 | { type: 'capture', name: 'b' }, - 349 | { type: 'string', value: 'B' }, - 350 | ], - 351 | }, - 352 | ]); - | - 353 | expect(query.predicatesForPattern(1)).toStrictEqual([ - 354 | { - 355 | operator: 'hello!', - 356 | operands: [{ type: 'capture', name: 'c' }], - 357 | }, - 358 | ]); - | - 359 | expect(query.predicatesForPattern(2)).toEqual([]); - 360 | }); - 361 | }); - | - 362 | describe('.disableCapture', () => { - 363 | it('disables a capture', () => { - 364 | query = new Query(JavaScript, ` - 365 | (function_declaration - 366 | (identifier) @name1 @name2 @name3 - 367 | (statement_block) @body1 @body2) - 368 | `); - | - 369 | const source = 'function foo() { return 1; }'; - 370 | const tree = parser.parse(source)!; - | - 371 | let matches = query.matches(tree.rootNode); - 372 | expect(formatMatches(matches)).toEqual([ - 373 | { - 374 | patternIndex: 0, - 375 | captures: [ - 376 | { patternIndex: 0, name: 'name1', text: 'foo' }, - 377 | { patternIndex: 0, name: 'name2', text: 'foo' }, - 378 | { patternIndex: 0, name: 'name3', text: 'foo' }, - 379 | { patternIndex: 0, name: 'body1', text: '{ return 1; }' }, - 380 | { patternIndex: 0, name: 'body2', text: '{ return 1; }' }, - 381 | ], - 382 | }, - 383 | ]); - | - 384 | // disabling captures still works when there are multiple captures on a - 385 | // single node. - 386 | query.disableCapture('name2'); - 387 | matches = query.matches(tree.rootNode); - 388 | expect(formatMatches(matches)).toEqual([ - 389 | { - 390 | patternIndex: 0, - 391 | captures: [ - 392 | { patternIndex: 0, name: 'name1', text: 'foo' }, - 393 | { patternIndex: 0, name: 'name3', text: 'foo' }, - 394 | { patternIndex: 0, name: 'body1', text: '{ return 1; }' }, - 395 | { patternIndex: 0, name: 'body2', text: '{ return 1; }' }, - 396 | ], - 397 | }, - 398 | ]); - 399 | }); - 400 | }); - | - 401 | describe('Start and end indices for patterns', () => { - 402 | it('Returns the start and end indices for a pattern', () => { - 403 | const patterns1 = ` - 404 | "+" @operator - 405 | "-" @operator - 406 | "*" @operator - 407 | "=" @operator - 408 | "=>" @operator - 409 | `.trim(); - | - 410 | const patterns2 = ` - 411 | (identifier) @a - 412 | (string) @b - 413 | `.trim(); - | - 414 | const patterns3 = ` - 415 | ((identifier) @b (#match? @b i)) - 416 | (function_declaration name: (identifier) @c) - 417 | (method_definition name: (property_identifier) @d) - 418 | `.trim(); - | - 419 | const source = patterns1 + patterns2 + patterns3; - | - 420 | const query = new Query(JavaScript, source); - | - 421 | expect(query.startIndexForPattern(0)).toBe(0); - 422 | expect(query.endIndexForPattern(0)).toBe('"+" @operator\n'.length); - 423 | expect(query.startIndexForPattern(5)).toBe(patterns1.length); - 424 | expect(query.endIndexForPattern(5)).toBe( - 425 | patterns1.length + '(identifier) @a\n'.length - 426 | ); - 427 | expect(query.startIndexForPattern(7)).toBe(patterns1.length + patterns2.length); - 428 | expect(query.endIndexForPattern(7)).toBe( - 429 | patterns1.length + - 430 | patterns2.length + - 431 | '((identifier) @b (#match? @b i))\n'.length - 432 | ); - 433 | }); - 434 | }); - | - 435 | describe('Disable pattern', () => { - 436 | it('Disables patterns in the query', () => { - 437 | const query = new Query(JavaScript, ` - 438 | (function_declaration name: (identifier) @name) - 439 | (function_declaration body: (statement_block) @body) - 440 | (class_declaration name: (identifier) @name) - 441 | (class_declaration body: (class_body) @body) - 442 | `); - | - 443 | // disable the patterns that match names - 444 | query.disablePattern(0); - 445 | query.disablePattern(2); - | - 446 | const source = 'class A { constructor() {} } function b() { return 1; }'; - 447 | tree = parser.parse(source)!; - 448 | const matches = query.matches(tree.rootNode); - 449 | expect(formatMatches(matches)).toEqual([ - 450 | { - 451 | patternIndex: 3, - 452 | captures: [{ patternIndex: 3, name: 'body', text: '{ constructor() {} }' }], - 453 | }, - 454 | { patternIndex: 1, captures: [{ patternIndex: 1, name: 'body', text: '{ return 1; }' }] }, - 455 | ]); - 456 | }); - 457 | }); - | - 458 | describe('Executes with a timeout', { timeout: 10000 }, () => { - 459 | it('Returns less than the expected matches', () => { - 460 | tree = parser.parse('function foo() while (true) { } }\n'.repeat(1000))!; - 461 | query = new Query(JavaScript, '(function_declaration) @function'); - | - 462 | const startTime = performance.now(); - | - 463 | const matches = query.matches( - 464 | tree.rootNode, - 465 | { - 466 | progressCallback: () => { - 467 | if (performance.now() - startTime > 1) { - 468 | return true; - 469 | } - 470 | return false; - 471 | }, - 472 | } - 473 | ); - 474 | expect(matches.length).toBeLessThan(1000); - | - 475 | const matches2 = query.matches(tree.rootNode); - 476 | expect(matches2).toHaveLength(1000); - 477 | }); - 478 | }); - 479 | }); - | - 480 | // Helper functions - 481 | function formatMatches(matches: QueryMatch[]): Omit[] { - 482 | return matches.map(({ patternIndex, captures }) => ({ - 483 | patternIndex, - 484 | captures: formatCaptures(captures), - 485 | })); - 486 | } - | - 487 | function formatCaptures(captures: QueryCapture[]): (QueryCapture & { text: string })[] { - 488 | return captures.map((c) => { - 489 | const node = c.node; - 490 | // @ts-expect-error We're not interested in the node object for these tests - 491 | delete c.node; - 492 | return { ...c, text: node.text }; - 493 | }); - 494 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/test/tree.test.ts: --------------------------------------------------------------------------------- - 1 | import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest'; - 2 | import type { Point, Language, Tree, TreeCursor } from '../src'; - 3 | import { Parser, Edit } from '../src'; - 4 | import helper from './helper'; - | - 5 | let JavaScript: Language; - | - 6 | interface CursorState { - 7 | nodeType: string; - 8 | nodeIsNamed: boolean; - 9 | startPosition: Point; - 10 | endPosition: Point; - 11 | startIndex: number; - 12 | endIndex: number; - 13 | } - | - 14 | describe('Tree', () => { - 15 | let parser: Parser; - 16 | let tree: Tree; - | - 17 | beforeAll(async () => { - 18 | ({ JavaScript } = await helper); - 19 | }); - | - 20 | beforeEach(() => { - 21 | parser = new Parser(); - 22 | parser.setLanguage(JavaScript); - 23 | }); - | - 24 | afterEach(() => { - 25 | parser.delete(); - 26 | tree.delete(); - 27 | }); - | - 28 | describe('.edit', () => { - 29 | let input: string; - 30 | let edit: Edit; - | - 31 | it('updates the positions of nodes', () => { - 32 | input = 'abc + cde'; - 33 | tree = parser.parse(input)!; - 34 | expect(tree.rootNode.toString()).toBe( - 35 | '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))' - 36 | ); - | - 37 | let sumNode = tree.rootNode.firstChild!.firstChild; - 38 | let variableNode1 = sumNode!.firstChild; - 39 | let variableNode2 = sumNode!.lastChild; - 40 | expect(variableNode1!.startIndex).toBe(0); - 41 | expect(variableNode1!.endIndex).toBe(3); - 42 | expect(variableNode2!.startIndex).toBe(6); - 43 | expect(variableNode2!.endIndex).toBe(9); - | - 44 | [input, edit] = spliceInput(input, input.indexOf('bc'), 0, ' * '); - 45 | expect(input).toBe('a * bc + cde'); - 46 | tree.edit(edit); - | - 47 | sumNode = tree.rootNode.firstChild!.firstChild; - 48 | variableNode1 = sumNode!.firstChild; - 49 | variableNode2 = sumNode!.lastChild; - 50 | expect(variableNode1!.startIndex).toBe(0); - 51 | expect(variableNode1!.endIndex).toBe(6); - 52 | expect(variableNode2!.startIndex).toBe(9); - 53 | expect(variableNode2!.endIndex).toBe(12); - | - 54 | tree = parser.parse(input, tree)!; - 55 | expect(tree.rootNode.toString()).toBe( - 56 | '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))' - 57 | ); - 58 | }); - | - 59 | it('handles non-ascii characters', () => { - 60 | input = 'αβδ + cde'; - | - 61 | tree = parser.parse(input)!; - 62 | expect(tree.rootNode.toString()).toBe( - 63 | '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))' - 64 | ); - | - 65 | let variableNode = tree.rootNode.firstChild!.firstChild!.lastChild; - | - 66 | [input, edit] = spliceInput(input, input.indexOf('δ'), 0, '👍 * '); - 67 | expect(input).toBe('αβ👍 * δ + cde'); - 68 | tree.edit(edit); - | - 69 | variableNode = tree.rootNode.firstChild!.firstChild!.lastChild; - 70 | expect(variableNode!.startIndex).toBe(input.indexOf('cde')); - | - 71 | tree = parser.parse(input, tree)!; - 72 | expect(tree.rootNode.toString()).toBe( - 73 | '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))' - 74 | ); - 75 | }); - 76 | }); - | - 77 | describe('.getChangedRanges(previous)', () => { - 78 | it('reports the ranges of text whose syntactic meaning has changed', () => { - 79 | let sourceCode = 'abcdefg + hij'; - 80 | tree = parser.parse(sourceCode)!; - | - 81 | expect(tree.rootNode.toString()).toBe( - 82 | '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))' - 83 | ); - | - 84 | sourceCode = 'abc + defg + hij'; - 85 | tree.edit(new Edit({ - 86 | startIndex: 2, - 87 | oldEndIndex: 2, - 88 | newEndIndex: 5, - 89 | startPosition: { row: 0, column: 2 }, - 90 | oldEndPosition: { row: 0, column: 2 }, - 91 | newEndPosition: { row: 0, column: 5 }, - 92 | })); - | - 93 | const tree2 = parser.parse(sourceCode, tree)!; - 94 | expect(tree2.rootNode.toString()).toBe( - 95 | '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))' - 96 | ); - | - 97 | const ranges = tree.getChangedRanges(tree2); - 98 | expect(ranges).toEqual([ - 99 | { - 100 | startIndex: 0, - 101 | endIndex: 'abc + defg'.length, - 102 | startPosition: { row: 0, column: 0 }, - 103 | endPosition: { row: 0, column: 'abc + defg'.length }, - 104 | }, - 105 | ]); - | - 106 | tree2.delete(); - 107 | }); - | - 108 | it('throws an exception if the argument is not a tree', () => { - 109 | tree = parser.parse('abcdefg + hij')!; - | - 110 | expect(() => { - 111 | tree.getChangedRanges({} as Tree); - 112 | }).toThrow(/Argument must be a Tree/); - 113 | }); - 114 | }); - | - 115 | describe('.walk()', () => { - 116 | let cursor: TreeCursor; - | - 117 | afterEach(() => { - 118 | cursor.delete(); - 119 | }); - | - 120 | it('returns a cursor that can be used to walk the tree', () => { - 121 | tree = parser.parse('a * b + c / d')!; - 122 | cursor = tree.walk(); - | - 123 | assertCursorState(cursor, { - 124 | nodeType: 'program', - 125 | nodeIsNamed: true, - 126 | startPosition: { row: 0, column: 0 }, - 127 | endPosition: { row: 0, column: 13 }, - 128 | startIndex: 0, - 129 | endIndex: 13, - 130 | }); - | - 131 | expect(cursor.gotoFirstChild()).toBe(true); - 132 | assertCursorState(cursor, { - 133 | nodeType: 'expression_statement', - 134 | nodeIsNamed: true, - 135 | startPosition: { row: 0, column: 0 }, - 136 | endPosition: { row: 0, column: 13 }, - 137 | startIndex: 0, - 138 | endIndex: 13, - 139 | }); - | - 140 | expect(cursor.gotoFirstChild()).toBe(true); - 141 | assertCursorState(cursor, { - 142 | nodeType: 'binary_expression', - 143 | nodeIsNamed: true, - 144 | startPosition: { row: 0, column: 0 }, - 145 | endPosition: { row: 0, column: 13 }, - 146 | startIndex: 0, - 147 | endIndex: 13, - 148 | }); - | - 149 | expect(cursor.gotoFirstChild()).toBe(true); - 150 | assertCursorState(cursor, { - 151 | nodeType: 'binary_expression', - 152 | nodeIsNamed: true, - 153 | startPosition: { row: 0, column: 0 }, - 154 | endPosition: { row: 0, column: 5 }, - 155 | startIndex: 0, - 156 | endIndex: 5, - 157 | }); - | - 158 | expect(cursor.gotoFirstChild()).toBe(true); - 159 | expect(cursor.nodeText).toBe('a'); - 160 | assertCursorState(cursor, { - 161 | nodeType: 'identifier', - 162 | nodeIsNamed: true, - 163 | startPosition: { row: 0, column: 0 }, - 164 | endPosition: { row: 0, column: 1 }, - 165 | startIndex: 0, - 166 | endIndex: 1, - 167 | }); - | - 168 | expect(cursor.gotoFirstChild()).toBe(false); - 169 | expect(cursor.gotoNextSibling()).toBe(true); - 170 | expect(cursor.nodeText).toBe('*'); - 171 | assertCursorState(cursor, { - 172 | nodeType: '*', - 173 | nodeIsNamed: false, - 174 | startPosition: { row: 0, column: 2 }, - 175 | endPosition: { row: 0, column: 3 }, - 176 | startIndex: 2, - 177 | endIndex: 3, - 178 | }); - | - 179 | expect(cursor.gotoNextSibling()).toBe(true); - 180 | expect(cursor.nodeText).toBe('b'); - 181 | assertCursorState(cursor, { - 182 | nodeType: 'identifier', - 183 | nodeIsNamed: true, - 184 | startPosition: { row: 0, column: 4 }, - 185 | endPosition: { row: 0, column: 5 }, - 186 | startIndex: 4, - 187 | endIndex: 5, - 188 | }); - | - 189 | expect(cursor.gotoNextSibling()).toBe(false); - 190 | expect(cursor.gotoParent()).toBe(true); - 191 | assertCursorState(cursor, { - 192 | nodeType: 'binary_expression', - 193 | nodeIsNamed: true, - 194 | startPosition: { row: 0, column: 0 }, - 195 | endPosition: { row: 0, column: 5 }, - 196 | startIndex: 0, - 197 | endIndex: 5, - 198 | }); - | - 199 | expect(cursor.gotoNextSibling()).toBe(true); - 200 | assertCursorState(cursor, { - 201 | nodeType: '+', - 202 | nodeIsNamed: false, - 203 | startPosition: { row: 0, column: 6 }, - 204 | endPosition: { row: 0, column: 7 }, - 205 | startIndex: 6, - 206 | endIndex: 7, - 207 | }); - | - 208 | expect(cursor.gotoNextSibling()).toBe(true); - 209 | assertCursorState(cursor, { - 210 | nodeType: 'binary_expression', - 211 | nodeIsNamed: true, - 212 | startPosition: { row: 0, column: 8 }, - 213 | endPosition: { row: 0, column: 13 }, - 214 | startIndex: 8, - 215 | endIndex: 13, - 216 | }); - | - 217 | const copy = tree.walk(); - 218 | copy.resetTo(cursor); - | - 219 | expect(copy.gotoPreviousSibling()).toBe(true); - 220 | assertCursorState(copy, { - 221 | nodeType: '+', - 222 | nodeIsNamed: false, - 223 | startPosition: { row: 0, column: 6 }, - 224 | endPosition: { row: 0, column: 7 }, - 225 | startIndex: 6, - 226 | endIndex: 7, - 227 | }); - | - 228 | expect(copy.gotoPreviousSibling()).toBe(true); - 229 | assertCursorState(copy, { - 230 | nodeType: 'binary_expression', - 231 | nodeIsNamed: true, - 232 | startPosition: { row: 0, column: 0 }, - 233 | endPosition: { row: 0, column: 5 }, - 234 | startIndex: 0, - 235 | endIndex: 5, - 236 | }); - | - 237 | expect(copy.gotoLastChild()).toBe(true); - 238 | assertCursorState(copy, { - 239 | nodeType: 'identifier', - 240 | nodeIsNamed: true, - 241 | startPosition: { row: 0, column: 4 }, - 242 | endPosition: { row: 0, column: 5 }, - 243 | startIndex: 4, - 244 | endIndex: 5, - 245 | }); - | - 246 | expect(copy.gotoParent()).toBe(true); - 247 | expect(copy.gotoParent()).toBe(true); - 248 | expect(copy.nodeType).toBe('binary_expression'); - 249 | expect(copy.gotoParent()).toBe(true); - 250 | expect(copy.nodeType).toBe('expression_statement'); - 251 | expect(copy.gotoParent()).toBe(true); - 252 | expect(copy.nodeType).toBe('program'); - 253 | expect(copy.gotoParent()).toBe(false); - 254 | copy.delete(); - | - 255 | expect(cursor.gotoParent()).toBe(true); - 256 | expect(cursor.nodeType).toBe('binary_expression'); - 257 | expect(cursor.gotoParent()).toBe(true); - 258 | expect(cursor.nodeType).toBe('expression_statement'); - 259 | expect(cursor.gotoParent()).toBe(true); - 260 | expect(cursor.nodeType).toBe('program'); - 261 | }); - | - 262 | it('keeps track of the field name associated with each node', () => { - 263 | tree = parser.parse('a.b();')!; - 264 | cursor = tree.walk(); - 265 | cursor.gotoFirstChild(); - 266 | cursor.gotoFirstChild(); - | - 267 | expect(cursor.currentNode.type).toBe('call_expression'); - 268 | expect(cursor.currentFieldName).toBeNull(); - | - 269 | cursor.gotoFirstChild(); - 270 | expect(cursor.currentNode.type).toBe('member_expression'); - 271 | expect(cursor.currentFieldName).toBe('function'); - | - 272 | cursor.gotoFirstChild(); - 273 | expect(cursor.currentNode.type).toBe('identifier'); - 274 | expect(cursor.currentFieldName).toBe('object'); - | - 275 | cursor.gotoNextSibling(); - 276 | cursor.gotoNextSibling(); - 277 | expect(cursor.currentNode.type).toBe('property_identifier'); - 278 | expect(cursor.currentFieldName).toBe('property'); - | - 279 | cursor.gotoParent(); - 280 | cursor.gotoNextSibling(); - 281 | expect(cursor.currentNode.type).toBe('arguments'); - 282 | expect(cursor.currentFieldName).toBe('arguments'); - 283 | }); - | - 284 | it('returns a cursor that can be reset anywhere in the tree', () => { - 285 | tree = parser.parse('a * b + c / d')!; - 286 | cursor = tree.walk(); - 287 | const root = tree.rootNode.firstChild; - | - 288 | cursor.reset(root!.firstChild!.firstChild!); - 289 | assertCursorState(cursor, { - 290 | nodeType: 'binary_expression', - 291 | nodeIsNamed: true, - 292 | startPosition: { row: 0, column: 0 }, - 293 | endPosition: { row: 0, column: 5 }, - 294 | startIndex: 0, - 295 | endIndex: 5, - 296 | }); - | - 297 | cursor.gotoFirstChild(); - 298 | assertCursorState(cursor, { - 299 | nodeType: 'identifier', - 300 | nodeIsNamed: true, - 301 | startPosition: { row: 0, column: 0 }, - 302 | endPosition: { row: 0, column: 1 }, - 303 | startIndex: 0, - 304 | endIndex: 1, - 305 | }); - | - 306 | expect(cursor.gotoParent()).toBe(true); - 307 | expect(cursor.gotoParent()).toBe(false); - 308 | }); - 309 | }); - | - 310 | describe('.copy', () => { - 311 | let input: string; - 312 | let edit: Edit; - | - 313 | it('creates another tree that remains stable if the original tree is edited', () => { - 314 | input = 'abc + cde'; - 315 | tree = parser.parse(input)!; - 316 | expect(tree.rootNode.toString()).toBe( - 317 | '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))' - 318 | ); - | - 319 | const tree2 = tree.copy(); - 320 | [input, edit] = spliceInput(input, 3, 0, '123'); - 321 | expect(input).toBe('abc123 + cde'); - 322 | tree.edit(edit); - | - 323 | const leftNode = tree.rootNode.firstChild!.firstChild!.firstChild; - 324 | const leftNode2 = tree2.rootNode.firstChild!.firstChild!.firstChild; - 325 | const rightNode = tree.rootNode.firstChild!.firstChild!.lastChild; - 326 | const rightNode2 = tree2.rootNode.firstChild!.firstChild!.lastChild; - 327 | expect(leftNode!.endIndex).toBe(6); - 328 | expect(leftNode2!.endIndex).toBe(3); - 329 | expect(rightNode!.startIndex).toBe(9); - 330 | expect(rightNode2!.startIndex).toBe(6); - | - 331 | tree2.delete(); - 332 | }); - 333 | }); - 334 | }); - | - 335 | function spliceInput(input: string, startIndex: number, lengthRemoved: number, newText: string): [string, Edit] { - 336 | const oldEndIndex = startIndex + lengthRemoved; - 337 | const newEndIndex = startIndex + newText.length; - 338 | const startPosition = getExtent(input.slice(0, startIndex)); - 339 | const oldEndPosition = getExtent(input.slice(0, oldEndIndex)); - 340 | input = input.slice(0, startIndex) + newText + input.slice(oldEndIndex); - 341 | const newEndPosition = getExtent(input.slice(0, newEndIndex)); - 342 | return [ - 343 | input, - 344 | new Edit({ - 345 | startIndex, - 346 | startPosition, - 347 | oldEndIndex, - 348 | oldEndPosition, - 349 | newEndIndex, - 350 | newEndPosition, - 351 | }), - 352 | ]; - 353 | } - | - 354 | // Gets the extent of the text in terms of zero-based row and column numbers. - 355 | function getExtent(text: string): Point { - 356 | let row = 0; - 357 | let index = -1; - 358 | let lastIndex = 0; - 359 | while ((index = text.indexOf('\n', index + 1)) !== -1) { - 360 | row++; - 361 | lastIndex = index + 1; - 362 | } - 363 | return { row, column: text.length - lastIndex }; - 364 | } - | - 365 | function assertCursorState(cursor: TreeCursor, params: CursorState): void { - 366 | expect(cursor.nodeType).toBe(params.nodeType); - 367 | expect(cursor.nodeIsNamed).toBe(params.nodeIsNamed); - 368 | expect(cursor.startPosition).toEqual(params.startPosition); - 369 | expect(cursor.endPosition).toEqual(params.endPosition); - 370 | expect(cursor.startIndex).toEqual(params.startIndex); - 371 | expect(cursor.endIndex).toEqual(params.endIndex); - | - 372 | const node = cursor.currentNode; - 373 | expect(node.type).toBe(params.nodeType); - 374 | expect(node.isNamed).toBe(params.nodeIsNamed); - 375 | expect(node.startPosition).toEqual(params.startPosition); - 376 | expect(node.endPosition).toEqual(params.endPosition); - 377 | expect(node.startIndex).toEqual(params.startIndex); - 378 | expect(node.endIndex).toEqual(params.endIndex); - 379 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/tsconfig.json: --------------------------------------------------------------------------------- - 1 | { - 2 | "compilerOptions": { - 3 | "target": "es2022", - 4 | "module": "es2022", - 5 | "lib": [ - 6 | "es2022", - 7 | "dom" - 8 | ], - 9 | "declaration": true, - 10 | "declarationMap": true, - 11 | "sourceMap": true, - 12 | "rootDir": "./", - 13 | "outDir": "./dist", - 14 | "strict": true, - 15 | "noImplicitAny": true, - 16 | "strictNullChecks": true, - 17 | "strictFunctionTypes": true, - 18 | "strictPropertyInitialization": true, - 19 | "noImplicitThis": true, - 20 | "alwaysStrict": true, - 21 | "noUnusedLocals": true, - 22 | "noUnusedParameters": true, - 23 | "noImplicitReturns": true, - 24 | "moduleResolution": "node", - 25 | "esModuleInterop": true, - 26 | "forceConsistentCasingInFileNames": true, - 27 | "skipLibCheck": true, - 28 | "composite": true, - 29 | "isolatedModules": true, - 30 | }, - 31 | "include": [ - 32 | "src/*.ts", - 33 | "script/*", - 34 | "test/*", - 35 | "lib/*.ts" - 36 | ], - 37 | "exclude": [ - 38 | "node_modules", - 39 | "dist", - 40 | ] - 41 | } - - - --------------------------------------------------------------------------------- -/lib/binding_web/vitest.config.ts: --------------------------------------------------------------------------------- - 1 | import { defineConfig } from 'vitest/config' - | - 2 | export default defineConfig({ - 3 | test: { - 4 | globals: true, - 5 | environment: 'node', - 6 | coverage: { - 7 | include: [ - 8 | 'web-tree-sitter.js', - 9 | ], - 10 | exclude: [ - 11 | 'test/**', - 12 | 'dist/**', - 13 | 'lib/**', - 14 | 'wasm/**' - 15 | ], - 16 | }, - 17 | } - 18 | }) - - - --------------------------------------------------------------------------------- -/lib/binding_web/wasm-test-grammars.nix: --------------------------------------------------------------------------------- - 1 | { - 2 | cli, - 3 | lib, - 4 | nodejs_22, - 5 | pkgsCross, - 6 | src, - 7 | stdenv, - 8 | test-grammars, - 9 | version, - 10 | }: - 11 | let - 12 | grammars = [ - 13 | "bash" - 14 | "c" - 15 | "cpp" - 16 | "embedded-template" - 17 | "html" - 18 | "javascript" - 19 | "json" - 20 | "python" - 21 | "rust" - 22 | "typescript" - 23 | ]; - 24 | in - 25 | stdenv.mkDerivation { - 26 | inherit src version; - | - 27 | pname = "wasm-test-grammars"; - | - 28 | nativeBuildInputs = [ - 29 | cli - 30 | pkgsCross.wasi32.stdenv.cc - 31 | nodejs_22 - 32 | ]; - | - 33 | buildPhase = '' - 34 | export HOME=$TMPDIR - 35 | export TREE_SITTER_WASI_SDK_PATH=${pkgsCross.wasi32.stdenv.cc} - 36 | export NIX_LDFLAGS="" - | - 37 | cp -r ${test-grammars}/fixtures . - 38 | chmod -R u+w fixtures - | - 39 | for grammar in ${lib.concatStringsSep " " grammars}; do - 40 | if [ -d "fixtures/grammars/$grammar" ]; then - 41 | echo "Building WASM for $grammar" - | - 42 | if [ "$grammar" = "typescript" ]; then - 43 | tree-sitter build --wasm -o "tree-sitter-typescript.wasm" "fixtures/grammars/$grammar/typescript" - 44 | tree-sitter build --wasm -o "tree-sitter-tsx.wasm" "fixtures/grammars/$grammar/tsx" - 45 | else - 46 | tree-sitter build --wasm -o "tree-sitter-$grammar.wasm" "fixtures/grammars/$grammar" - 47 | fi - 48 | fi - 49 | done - 50 | ''; - | - 51 | installPhase = '' - 52 | mkdir -p $out - 53 | for wasm in *.wasm; do - 54 | if [ -f "$wasm" ]; then - 55 | echo "Installing $wasm" - 56 | cp "$wasm" $out/ - 57 | fi - 58 | done - 59 | ''; - 60 | } - - - --------------------------------------------------------------------------------- -/lib/Cargo.toml: --------------------------------------------------------------------------------- - 1 | [package] - 2 | name = "tree-sitter" - 3 | version.workspace = true - 4 | description = "Rust bindings to the Tree-sitter parsing library" - 5 | authors.workspace = true - 6 | edition.workspace = true - 7 | rust-version = "1.77" - 8 | readme = "binding_rust/README.md" - 9 | homepage.workspace = true - 10 | repository.workspace = true - 11 | documentation = "https://docs.rs/tree-sitter" - 12 | license.workspace = true - 13 | keywords.workspace = true - 14 | categories = [ - 15 | "api-bindings", - 16 | "external-ffi-bindings", - 17 | "parsing", - 18 | "text-editors", - 19 | ] - | - 20 | build = "binding_rust/build.rs" - 21 | links = "tree-sitter" - | - 22 | include = [ - 23 | "/binding_rust/*", - 24 | "/Cargo.toml", - 25 | "/src/*.h", - 26 | "/src/*.c", - 27 | "/src/portable/*", - 28 | "/src/unicode/*", - 29 | "/src/wasm/*", - 30 | "/include/tree_sitter/api.h", - 31 | "/LICENSE", - 32 | ] - | - 33 | [package.metadata.docs.rs] - 34 | all-features = true - 35 | rustdoc-args = ["--cfg", "docsrs"] - 36 | targets = ["x86_64-unknown-linux-gnu", "x86_64-pc-windows-gnu"] - | - 37 | [lints] - 38 | workspace = true - | - 39 | [features] - 40 | default = ["std"] - 41 | std = ["regex/std", "regex/perf", "regex-syntax/unicode"] - 42 | wasm = ["std", "wasmtime-c-api"] - | - 43 | [dependencies] - 44 | regex = { version = "1.11.3", default-features = false, features = ["unicode"] } - 45 | regex-syntax = { version = "0.8.6", default-features = false } - 46 | tree-sitter-language.workspace = true - 47 | streaming-iterator = "0.1.9" - | - 48 | [dependencies.wasmtime-c-api] - 49 | version = "33.0.2" - 50 | optional = true - 51 | package = "wasmtime-c-api-impl" - 52 | default-features = false - 53 | features = ["cranelift", "gc-drc"] - | - 54 | [build-dependencies] - 55 | bindgen = { version = "0.72.0", optional = true } - 56 | cc.workspace = true - 57 | serde_json.workspace = true - | - 58 | [lib] - 59 | path = "binding_rust/lib.rs" - - - --------------------------------------------------------------------------------- -/lib/include/tree_sitter/api.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_API_H_ - 2 | #define TREE_SITTER_API_H_ - | - 3 | #ifndef TREE_SITTER_HIDE_SYMBOLS - 4 | #if defined(__GNUC__) || defined(__clang__) - 5 | #pragma GCC visibility push(default) - 6 | #endif - 7 | #endif - | - 8 | #include - 9 | #include - 10 | #include - | - 11 | #ifdef __cplusplus - 12 | extern "C" { - 13 | #endif - | - 14 | /****************************/ - 15 | /* Section - ABI Versioning */ - 16 | /****************************/ - | - 17 | /** - 18 | * The latest ABI version that is supported by the current version of the - 19 | * library. When Languages are generated by the Tree-sitter CLI, they are - 20 | * assigned an ABI version number that corresponds to the current CLI version. - 21 | * The Tree-sitter library is generally backwards-compatible with languages - 22 | * generated using older CLI versions, but is not forwards-compatible. - 23 | */ - 24 | #define TREE_SITTER_LANGUAGE_VERSION 15 - | - 25 | /** - 26 | * The earliest ABI version that is supported by the current version of the - 27 | * library. - 28 | */ - 29 | #define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 - | - 30 | /*******************/ - 31 | /* Section - Types */ - 32 | /*******************/ - | - 33 | typedef uint16_t TSStateId; - 34 | typedef uint16_t TSSymbol; - 35 | typedef uint16_t TSFieldId; - 36 | typedef struct TSLanguage TSLanguage; - 37 | typedef struct TSParser TSParser; - 38 | typedef struct TSTree TSTree; - 39 | typedef struct TSQuery TSQuery; - 40 | typedef struct TSQueryCursor TSQueryCursor; - 41 | typedef struct TSLookaheadIterator TSLookaheadIterator; - | - 42 | // This function signature reads one code point from the given string, - 43 | // returning the number of bytes consumed. It should write the code point - 44 | // to the `code_point` pointer, or write -1 if the input is invalid. - 45 | typedef uint32_t (*TSDecodeFunction)( - 46 | const uint8_t *string, - 47 | uint32_t length, - 48 | int32_t *code_point - 49 | ); - | - 50 | // Deprecated alias to be removed in ABI 16 - 51 | typedef TSDecodeFunction DecodeFunction; - | - 52 | typedef enum TSInputEncoding { - 53 | TSInputEncodingUTF8, - 54 | TSInputEncodingUTF16LE, - 55 | TSInputEncodingUTF16BE, - 56 | TSInputEncodingCustom - 57 | } TSInputEncoding; - | - 58 | typedef enum TSSymbolType { - 59 | TSSymbolTypeRegular, - 60 | TSSymbolTypeAnonymous, - 61 | TSSymbolTypeSupertype, - 62 | TSSymbolTypeAuxiliary, - 63 | } TSSymbolType; - | - 64 | typedef struct TSPoint { - 65 | uint32_t row; - 66 | uint32_t column; - 67 | } TSPoint; - | - 68 | typedef struct TSRange { - 69 | TSPoint start_point; - 70 | TSPoint end_point; - 71 | uint32_t start_byte; - 72 | uint32_t end_byte; - 73 | } TSRange; - | - 74 | typedef struct TSInput { - 75 | void *payload; - 76 | const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read); - 77 | TSInputEncoding encoding; - 78 | TSDecodeFunction decode; - 79 | } TSInput; - | - 80 | typedef struct TSParseState { - 81 | void *payload; - 82 | uint32_t current_byte_offset; - 83 | bool has_error; - 84 | } TSParseState; - | - 85 | typedef struct TSParseOptions { - 86 | void *payload; - 87 | bool (*progress_callback)(TSParseState *state); - 88 | } TSParseOptions; - | - 89 | typedef enum TSLogType { - 90 | TSLogTypeParse, - 91 | TSLogTypeLex, - 92 | } TSLogType; - | - 93 | typedef struct TSLogger { - 94 | void *payload; - 95 | void (*log)(void *payload, TSLogType log_type, const char *buffer); - 96 | } TSLogger; - | - 97 | typedef struct TSInputEdit { - 98 | uint32_t start_byte; - 99 | uint32_t old_end_byte; - 100 | uint32_t new_end_byte; - 101 | TSPoint start_point; - 102 | TSPoint old_end_point; - 103 | TSPoint new_end_point; - 104 | } TSInputEdit; - | - 105 | typedef struct TSNode { - 106 | uint32_t context[4]; - 107 | const void *id; - 108 | const TSTree *tree; - 109 | } TSNode; - | - 110 | typedef struct TSTreeCursor { - 111 | const void *tree; - 112 | const void *id; - 113 | uint32_t context[3]; - 114 | } TSTreeCursor; - | - 115 | typedef struct TSQueryCapture { - 116 | TSNode node; - 117 | uint32_t index; - 118 | } TSQueryCapture; - | - 119 | typedef enum TSQuantifier { - 120 | TSQuantifierZero = 0, // must match the array initialization value - 121 | TSQuantifierZeroOrOne, - 122 | TSQuantifierZeroOrMore, - 123 | TSQuantifierOne, - 124 | TSQuantifierOneOrMore, - 125 | } TSQuantifier; - | - 126 | typedef struct TSQueryMatch { - 127 | uint32_t id; - 128 | uint16_t pattern_index; - 129 | uint16_t capture_count; - 130 | const TSQueryCapture *captures; - 131 | } TSQueryMatch; - | - 132 | typedef enum TSQueryPredicateStepType { - 133 | TSQueryPredicateStepTypeDone, - 134 | TSQueryPredicateStepTypeCapture, - 135 | TSQueryPredicateStepTypeString, - 136 | } TSQueryPredicateStepType; - | - 137 | typedef struct TSQueryPredicateStep { - 138 | TSQueryPredicateStepType type; - 139 | uint32_t value_id; - 140 | } TSQueryPredicateStep; - | - 141 | typedef enum TSQueryError { - 142 | TSQueryErrorNone = 0, - 143 | TSQueryErrorSyntax, - 144 | TSQueryErrorNodeType, - 145 | TSQueryErrorField, - 146 | TSQueryErrorCapture, - 147 | TSQueryErrorStructure, - 148 | TSQueryErrorLanguage, - 149 | } TSQueryError; - | - 150 | typedef struct TSQueryCursorState { - 151 | void *payload; - 152 | uint32_t current_byte_offset; - 153 | } TSQueryCursorState; - | - 154 | typedef struct TSQueryCursorOptions { - 155 | void *payload; - 156 | bool (*progress_callback)(TSQueryCursorState *state); - 157 | } TSQueryCursorOptions; - | - 158 | /** - 159 | * The metadata associated with a language. - 160 | * - 161 | * Currently, this metadata can be used to check the [Semantic Version](https://semver.org/) - 162 | * of the language. This version information should be used to signal if a given parser might - 163 | * be incompatible with existing queries when upgrading between major versions, or minor versions - 164 | * if it's in zerover. - 165 | */ - 166 | typedef struct TSLanguageMetadata { - 167 | uint8_t major_version; - 168 | uint8_t minor_version; - 169 | uint8_t patch_version; - 170 | } TSLanguageMetadata; - | - 171 | /********************/ - 172 | /* Section - Parser */ - 173 | /********************/ - | - 174 | /** - 175 | * Create a new parser. - 176 | */ - 177 | TSParser *ts_parser_new(void); - | - 178 | /** - 179 | * Delete the parser, freeing all of the memory that it used. - 180 | */ - 181 | void ts_parser_delete(TSParser *self); - | - 182 | /** - 183 | * Get the parser's current language. - 184 | */ - 185 | const TSLanguage *ts_parser_language(const TSParser *self); - | - 186 | /** - 187 | * Set the language that the parser should use for parsing. - 188 | * - 189 | * Returns a boolean indicating whether or not the language was successfully - 190 | * assigned. True means assignment succeeded. False means there was a version - 191 | * mismatch: the language was generated with an incompatible version of the - 192 | * Tree-sitter CLI. Check the language's ABI version using [`ts_language_abi_version`] - 193 | * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and - 194 | * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. - 195 | */ - 196 | bool ts_parser_set_language(TSParser *self, const TSLanguage *language); - | - 197 | /** - 198 | * Set the ranges of text that the parser should include when parsing. - 199 | * - 200 | * By default, the parser will always include entire documents. This function - 201 | * allows you to parse only a *portion* of a document but still return a syntax - 202 | * tree whose ranges match up with the document as a whole. You can also pass - 203 | * multiple disjoint ranges. - 204 | * - 205 | * The second and third parameters specify the location and length of an array - 206 | * of ranges. The parser does *not* take ownership of these ranges; it copies - 207 | * the data, so it doesn't matter how these ranges are allocated. - 208 | * - 209 | * If `count` is zero, then the entire document will be parsed. Otherwise, - 210 | * the given ranges must be ordered from earliest to latest in the document, - 211 | * and they must not overlap. That is, the following must hold for all: - 212 | * - 213 | * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` - 214 | * - 215 | * If this requirement is not satisfied, the operation will fail, the ranges - 216 | * will not be assigned, and this function will return `false`. On success, - 217 | * this function returns `true` - 218 | */ - 219 | bool ts_parser_set_included_ranges( - 220 | TSParser *self, - 221 | const TSRange *ranges, - 222 | uint32_t count - 223 | ); - | - 224 | /** - 225 | * Get the ranges of text that the parser will include when parsing. - 226 | * - 227 | * The returned pointer is owned by the parser. The caller should not free it - 228 | * or write to it. The length of the array will be written to the given - 229 | * `count` pointer. - 230 | */ - 231 | const TSRange *ts_parser_included_ranges( - 232 | const TSParser *self, - 233 | uint32_t *count - 234 | ); - | - 235 | /** - 236 | * Use the parser to parse some source code and create a syntax tree. - 237 | * - 238 | * If you are parsing this document for the first time, pass `NULL` for the - 239 | * `old_tree` parameter. Otherwise, if you have already parsed an earlier - 240 | * version of this document and the document has since been edited, pass the - 241 | * previous syntax tree so that the unchanged parts of it can be reused. - 242 | * This will save time and memory. For this to work correctly, you must have - 243 | * already edited the old syntax tree using the [`ts_tree_edit`] function in a - 244 | * way that exactly matches the source code changes. - 245 | * - 246 | * The [`TSInput`] parameter lets you specify how to read the text. It has the - 247 | * following three fields: - 248 | * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset - 249 | * and (row, column) position. The function should return a pointer to the - 250 | * text and write its length to the [`bytes_read`] pointer. The parser does - 251 | * not take ownership of this buffer; it just borrows it until it has - 252 | * finished reading it. The function should write a zero value to the - 253 | * [`bytes_read`] pointer to indicate the end of the document. - 254 | * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation - 255 | * of the [`read`] function. - 256 | * 3. [`encoding`]: An indication of how the text is encoded. Either - 257 | * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. - 258 | * - 259 | * This function returns a syntax tree on success, and `NULL` on failure. There - 260 | * are four possible reasons for failure: - 261 | * 1. The parser does not have a language assigned. Check for this using the - 262 | [`ts_parser_language`] function. - 263 | * 2. Parsing was cancelled due to the progress callback returning true. This callback - 264 | * is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct. - 265 | * - 266 | * [`read`]: TSInput::read - 267 | * [`payload`]: TSInput::payload - 268 | * [`encoding`]: TSInput::encoding - 269 | * [`bytes_read`]: TSInput::read - 270 | */ - 271 | TSTree *ts_parser_parse( - 272 | TSParser *self, - 273 | const TSTree *old_tree, - 274 | TSInput input - 275 | ); - | - 276 | /** - 277 | * Use the parser to parse some source code and create a syntax tree, with some options. - 278 | * - 279 | * See [`ts_parser_parse`] for more details. - 280 | * - 281 | * See [`TSParseOptions`] for more details on the options. - 282 | */ - 283 | TSTree* ts_parser_parse_with_options( - 284 | TSParser *self, - 285 | const TSTree *old_tree, - 286 | TSInput input, - 287 | TSParseOptions parse_options - 288 | ); - | - 289 | /** - 290 | * Use the parser to parse some source code stored in one contiguous buffer. - 291 | * The first two parameters are the same as in the [`ts_parser_parse`] function - 292 | * above. The second two parameters indicate the location of the buffer and its - 293 | * length in bytes. - 294 | */ - 295 | TSTree *ts_parser_parse_string( - 296 | TSParser *self, - 297 | const TSTree *old_tree, - 298 | const char *string, - 299 | uint32_t length - 300 | ); - | - 301 | /** - 302 | * Use the parser to parse some source code stored in one contiguous buffer with - 303 | * a given encoding. The first four parameters work the same as in the - 304 | * [`ts_parser_parse_string`] method above. The final parameter indicates whether - 305 | * the text is encoded as UTF8 or UTF16. - 306 | */ - 307 | TSTree *ts_parser_parse_string_encoding( - 308 | TSParser *self, - 309 | const TSTree *old_tree, - 310 | const char *string, - 311 | uint32_t length, - 312 | TSInputEncoding encoding - 313 | ); - | - 314 | /** - 315 | * Instruct the parser to start the next parse from the beginning. - 316 | * - 317 | * If the parser previously failed because of the progress callback, then - 318 | * by default, it will resume where it left off on the next call to - 319 | * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, - 320 | * and instead intend to use this parser to parse some other document, you must - 321 | * call [`ts_parser_reset`] first. - 322 | */ - 323 | void ts_parser_reset(TSParser *self); - | - 324 | /** - 325 | * Set the logger that a parser should use during parsing. - 326 | * - 327 | * The parser does not take ownership over the logger payload. If a logger was - 328 | * previously assigned, the caller is responsible for releasing any memory - 329 | * owned by the previous logger. - 330 | */ - 331 | void ts_parser_set_logger(TSParser *self, TSLogger logger); - | - 332 | /** - 333 | * Get the parser's current logger. - 334 | */ - 335 | TSLogger ts_parser_logger(const TSParser *self); - | - 336 | /** - 337 | * Set the file descriptor to which the parser should write debugging graphs - 338 | * during parsing. The graphs are formatted in the DOT language. You may want - 339 | * to pipe these graphs directly to a `dot(1)` process in order to generate - 340 | * SVG output. You can turn off this logging by passing a negative number. - 341 | */ - 342 | void ts_parser_print_dot_graphs(TSParser *self, int fd); - | - 343 | /******************/ - 344 | /* Section - Tree */ - 345 | /******************/ - | - 346 | /** - 347 | * Create a shallow copy of the syntax tree. This is very fast. - 348 | * - 349 | * You need to copy a syntax tree in order to use it on more than one thread at - 350 | * a time, as syntax trees are not thread safe. - 351 | */ - 352 | TSTree *ts_tree_copy(const TSTree *self); - | - 353 | /** - 354 | * Delete the syntax tree, freeing all of the memory that it used. - 355 | */ - 356 | void ts_tree_delete(TSTree *self); - | - 357 | /** - 358 | * Get the root node of the syntax tree. - 359 | */ - 360 | TSNode ts_tree_root_node(const TSTree *self); - | - 361 | /** - 362 | * Get the root node of the syntax tree, but with its position - 363 | * shifted forward by the given offset. - 364 | */ - 365 | TSNode ts_tree_root_node_with_offset( - 366 | const TSTree *self, - 367 | uint32_t offset_bytes, - 368 | TSPoint offset_extent - 369 | ); - | - 370 | /** - 371 | * Get the language that was used to parse the syntax tree. - 372 | */ - 373 | const TSLanguage *ts_tree_language(const TSTree *self); - | - 374 | /** - 375 | * Get the array of included ranges that was used to parse the syntax tree. - 376 | * - 377 | * The returned pointer must be freed by the caller. - 378 | */ - 379 | TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length); - | - 380 | /** - 381 | * Edit the syntax tree to keep it in sync with source code that has been - 382 | * edited. - 383 | * - 384 | * You must describe the edit both in terms of byte offsets and in terms of - 385 | * (row, column) coordinates. - 386 | */ - 387 | void ts_tree_edit(TSTree *self, const TSInputEdit *edit); - | - 388 | /** - 389 | * Compare an old edited syntax tree to a new syntax tree representing the same - 390 | * document, returning an array of ranges whose syntactic structure has changed. - 391 | * - 392 | * For this to work correctly, the old syntax tree must have been edited such - 393 | * that its ranges match up to the new tree. Generally, you'll want to call - 394 | * this function right after calling one of the [`ts_parser_parse`] functions. - 395 | * You need to pass the old tree that was passed to parse, as well as the new - 396 | * tree that was returned from that function. - 397 | * - 398 | * The returned ranges indicate areas where the hierarchical structure of syntax - 399 | * nodes (from root to leaf) has changed between the old and new trees. Characters - 400 | * outside these ranges have identical ancestor nodes in both trees. - 401 | * - 402 | * Note that the returned ranges may be slightly larger than the exact changed areas, - 403 | * but Tree-sitter attempts to make them as small as possible. - 404 | * - 405 | * The returned array is allocated using `malloc` and the caller is responsible - 406 | * for freeing it using `free`. The length of the array will be written to the - 407 | * given `length` pointer. - 408 | */ - 409 | TSRange *ts_tree_get_changed_ranges( - 410 | const TSTree *old_tree, - 411 | const TSTree *new_tree, - 412 | uint32_t *length - 413 | ); - | - 414 | /** - 415 | * Write a DOT graph describing the syntax tree to the given file. - 416 | */ - 417 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor); - | - 418 | /******************/ - 419 | /* Section - Node */ - 420 | /******************/ - | - 421 | /** - 422 | * Get the node's type as a null-terminated string. - 423 | */ - 424 | const char *ts_node_type(TSNode self); - | - 425 | /** - 426 | * Get the node's type as a numerical id. - 427 | */ - 428 | TSSymbol ts_node_symbol(TSNode self); - | - 429 | /** - 430 | * Get the node's language. - 431 | */ - 432 | const TSLanguage *ts_node_language(TSNode self); - | - 433 | /** - 434 | * Get the node's type as it appears in the grammar ignoring aliases as a - 435 | * null-terminated string. - 436 | */ - 437 | const char *ts_node_grammar_type(TSNode self); - | - 438 | /** - 439 | * Get the node's type as a numerical id as it appears in the grammar ignoring - 440 | * aliases. This should be used in [`ts_language_next_state`] instead of - 441 | * [`ts_node_symbol`]. - 442 | */ - 443 | TSSymbol ts_node_grammar_symbol(TSNode self); - | - 444 | /** - 445 | * Get the node's start byte. - 446 | */ - 447 | uint32_t ts_node_start_byte(TSNode self); - | - 448 | /** - 449 | * Get the node's start position in terms of rows and columns. - 450 | */ - 451 | TSPoint ts_node_start_point(TSNode self); - | - 452 | /** - 453 | * Get the node's end byte. - 454 | */ - 455 | uint32_t ts_node_end_byte(TSNode self); - | - 456 | /** - 457 | * Get the node's end position in terms of rows and columns. - 458 | */ - 459 | TSPoint ts_node_end_point(TSNode self); - | - 460 | /** - 461 | * Get an S-expression representing the node as a string. - 462 | * - 463 | * This string is allocated with `malloc` and the caller is responsible for - 464 | * freeing it using `free`. - 465 | */ - 466 | char *ts_node_string(TSNode self); - | - 467 | /** - 468 | * Check if the node is null. Functions like [`ts_node_child`] and - 469 | * [`ts_node_next_sibling`] will return a null node to indicate that no such node - 470 | * was found. - 471 | */ - 472 | bool ts_node_is_null(TSNode self); - | - 473 | /** - 474 | * Check if the node is *named*. Named nodes correspond to named rules in the - 475 | * grammar, whereas *anonymous* nodes correspond to string literals in the - 476 | * grammar. - 477 | */ - 478 | bool ts_node_is_named(TSNode self); - | - 479 | /** - 480 | * Check if the node is *missing*. Missing nodes are inserted by the parser in - 481 | * order to recover from certain kinds of syntax errors. - 482 | */ - 483 | bool ts_node_is_missing(TSNode self); - | - 484 | /** - 485 | * Check if the node is *extra*. Extra nodes represent things like comments, - 486 | * which are not required the grammar, but can appear anywhere. - 487 | */ - 488 | bool ts_node_is_extra(TSNode self); - | - 489 | /** - 490 | * Check if a syntax node has been edited. - 491 | */ - 492 | bool ts_node_has_changes(TSNode self); - | - 493 | /** - 494 | * Check if the node is a syntax error or contains any syntax errors. - 495 | */ - 496 | bool ts_node_has_error(TSNode self); - | - 497 | /** - 498 | * Check if the node is a syntax error. - 499 | */ - 500 | bool ts_node_is_error(TSNode self); - | - 501 | /** - 502 | * Get this node's parse state. - 503 | */ - 504 | TSStateId ts_node_parse_state(TSNode self); - | - 505 | /** - 506 | * Get the parse state after this node. - 507 | */ - 508 | TSStateId ts_node_next_parse_state(TSNode self); - | - 509 | /** - 510 | * Get the node's immediate parent. - 511 | * Prefer [`ts_node_child_with_descendant`] for - 512 | * iterating over the node's ancestors. - 513 | */ - 514 | TSNode ts_node_parent(TSNode self); - | - 515 | /** - 516 | * Get the node that contains `descendant`. - 517 | * - 518 | * Note that this can return `descendant` itself. - 519 | */ - 520 | TSNode ts_node_child_with_descendant(TSNode self, TSNode descendant); - | - 521 | /** - 522 | * Get the node's child at the given index, where zero represents the first - 523 | * child. - 524 | */ - 525 | TSNode ts_node_child(TSNode self, uint32_t child_index); - | - 526 | /** - 527 | * Get the field name for node's child at the given index, where zero represents - 528 | * the first child. Returns NULL, if no field is found. - 529 | */ - 530 | const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index); - | - 531 | /** - 532 | * Get the field name for node's named child at the given index, where zero - 533 | * represents the first named child. Returns NULL, if no field is found. - 534 | */ - 535 | const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index); - | - 536 | /** - 537 | * Get the node's number of children. - 538 | */ - 539 | uint32_t ts_node_child_count(TSNode self); - | - 540 | /** - 541 | * Get the node's *named* child at the given index. - 542 | * - 543 | * See also [`ts_node_is_named`]. - 544 | */ - 545 | TSNode ts_node_named_child(TSNode self, uint32_t child_index); - | - 546 | /** - 547 | * Get the node's number of *named* children. - 548 | * - 549 | * See also [`ts_node_is_named`]. - 550 | */ - 551 | uint32_t ts_node_named_child_count(TSNode self); - | - 552 | /** - 553 | * Get the node's child with the given field name. - 554 | */ - 555 | TSNode ts_node_child_by_field_name( - 556 | TSNode self, - 557 | const char *name, - 558 | uint32_t name_length - 559 | ); - | - 560 | /** - 561 | * Get the node's child with the given numerical field id. - 562 | * - 563 | * You can convert a field name to an id using the - 564 | * [`ts_language_field_id_for_name`] function. - 565 | */ - 566 | TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id); - | - 567 | /** - 568 | * Get the node's next / previous sibling. - 569 | */ - 570 | TSNode ts_node_next_sibling(TSNode self); - 571 | TSNode ts_node_prev_sibling(TSNode self); - | - 572 | /** - 573 | * Get the node's next / previous *named* sibling. - 574 | */ - 575 | TSNode ts_node_next_named_sibling(TSNode self); - 576 | TSNode ts_node_prev_named_sibling(TSNode self); - | - 577 | /** - 578 | * Get the node's first child that contains or starts after the given byte offset. - 579 | */ - 580 | TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte); - | - 581 | /** - 582 | * Get the node's first named child that contains or starts after the given byte offset. - 583 | */ - 584 | TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte); - | - 585 | /** - 586 | * Get the node's number of descendants, including one for the node itself. - 587 | */ - 588 | uint32_t ts_node_descendant_count(TSNode self); - | - 589 | /** - 590 | * Get the smallest node within this node that spans the given range of bytes - 591 | * or (row, column) positions. - 592 | */ - 593 | TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); - 594 | TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); - | - 595 | /** - 596 | * Get the smallest named node within this node that spans the given range of - 597 | * bytes or (row, column) positions. - 598 | */ - 599 | TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); - 600 | TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); - | - 601 | /** - 602 | * Edit the node to keep it in-sync with source code that has been edited. - 603 | * - 604 | * This function is only rarely needed. When you edit a syntax tree with the - 605 | * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree - 606 | * afterward will already reflect the edit. You only need to use [`ts_node_edit`] - 607 | * when you have a [`TSNode`] instance that you want to keep and continue to use - 608 | * after an edit. - 609 | */ - 610 | void ts_node_edit(TSNode *self, const TSInputEdit *edit); - | - 611 | /** - 612 | * Check if two nodes are identical. - 613 | */ - 614 | bool ts_node_eq(TSNode self, TSNode other); - | - 615 | /** - 616 | * Edit a point to keep it in-sync with source code that has been edited. - 617 | * - 618 | * This function updates a single point's byte offset and row/column position - 619 | * based on an edit operation. This is useful for editing points without - 620 | * requiring a tree or node instance. - 621 | */ - 622 | void ts_point_edit(TSPoint *point, uint32_t *point_byte, const TSInputEdit *edit); - | - 623 | /** - 624 | * Edit a range to keep it in-sync with source code that has been edited. - 625 | * - 626 | * This function updates a range's start and end positions based on an edit - 627 | * operation. This is useful for editing ranges without requiring a tree - 628 | * or node instance. - 629 | */ - 630 | void ts_range_edit(TSRange *range, const TSInputEdit *edit); - | - 631 | /************************/ - 632 | /* Section - TreeCursor */ - 633 | /************************/ - | - 634 | /** - 635 | * Create a new tree cursor starting from the given node. - 636 | * - 637 | * A tree cursor allows you to walk a syntax tree more efficiently than is - 638 | * possible using the [`TSNode`] functions. It is a mutable object that is always - 639 | * on a certain syntax node, and can be moved imperatively to different nodes. - 640 | * - 641 | * Note that the given node is considered the root of the cursor, - 642 | * and the cursor cannot walk outside this node. - 643 | */ - 644 | TSTreeCursor ts_tree_cursor_new(TSNode node); - | - 645 | /** - 646 | * Delete a tree cursor, freeing all of the memory that it used. - 647 | */ - 648 | void ts_tree_cursor_delete(TSTreeCursor *self); - | - 649 | /** - 650 | * Re-initialize a tree cursor to start at the original node that the cursor was - 651 | * constructed with. - 652 | */ - 653 | void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); - | - 654 | /** - 655 | * Re-initialize a tree cursor to the same position as another cursor. - 656 | * - 657 | * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and - 658 | * allows reusing already created cursors. - 659 | */ - 660 | void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src); - | - 661 | /** - 662 | * Get the tree cursor's current node. - 663 | */ - 664 | TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); - | - 665 | /** - 666 | * Get the field name of the tree cursor's current node. - 667 | * - 668 | * This returns `NULL` if the current node doesn't have a field. - 669 | * See also [`ts_node_child_by_field_name`]. - 670 | */ - 671 | const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); - | - 672 | /** - 673 | * Get the field id of the tree cursor's current node. - 674 | * - 675 | * This returns zero if the current node doesn't have a field. - 676 | * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. - 677 | */ - 678 | TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); - | - 679 | /** - 680 | * Move the cursor to the parent of its current node. - 681 | * - 682 | * This returns `true` if the cursor successfully moved, and returns `false` - 683 | * if there was no parent node (the cursor was already on the root node). - 684 | * - 685 | * Note that the node the cursor was constructed with is considered the root - 686 | * of the cursor, and the cursor cannot walk outside this node. - 687 | */ - 688 | bool ts_tree_cursor_goto_parent(TSTreeCursor *self); - | - 689 | /** - 690 | * Move the cursor to the next sibling of its current node. - 691 | * - 692 | * This returns `true` if the cursor successfully moved, and returns `false` - 693 | * if there was no next sibling node. - 694 | * - 695 | * Note that the node the cursor was constructed with is considered the root - 696 | * of the cursor, and the cursor cannot walk outside this node. - 697 | */ - 698 | bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); - | - 699 | /** - 700 | * Move the cursor to the previous sibling of its current node. - 701 | * - 702 | * This returns `true` if the cursor successfully moved, and returns `false` if - 703 | * there was no previous sibling node. - 704 | * - 705 | * Note, that this function may be slower than - 706 | * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In - 707 | * the worst case, this will need to iterate through all the children up to the - 708 | * previous sibling node to recalculate its position. Also note that the node the cursor - 709 | * was constructed with is considered the root of the cursor, and the cursor cannot - 710 | * walk outside this node. - 711 | */ - 712 | bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self); - | - 713 | /** - 714 | * Move the cursor to the first child of its current node. - 715 | * - 716 | * This returns `true` if the cursor successfully moved, and returns `false` - 717 | * if there were no children. - 718 | */ - 719 | bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); - | - 720 | /** - 721 | * Move the cursor to the last child of its current node. - 722 | * - 723 | * This returns `true` if the cursor successfully moved, and returns `false` if - 724 | * there were no children. - 725 | * - 726 | * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] - 727 | * because it needs to iterate through all the children to compute the child's - 728 | * position. - 729 | */ - 730 | bool ts_tree_cursor_goto_last_child(TSTreeCursor *self); - | - 731 | /** - 732 | * Move the cursor to the node that is the nth descendant of - 733 | * the original node that the cursor was constructed with, where - 734 | * zero represents the original node itself. - 735 | */ - 736 | void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index); - | - 737 | /** - 738 | * Get the index of the cursor's current node out of all of the - 739 | * descendants of the original node that the cursor was constructed with. - 740 | */ - 741 | uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self); - | - 742 | /** - 743 | * Get the depth of the cursor's current node relative to the original - 744 | * node that the cursor was constructed with. - 745 | */ - 746 | uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self); - | - 747 | /** - 748 | * Move the cursor to the first child of its current node that contains or starts after - 749 | * the given byte offset or point. - 750 | * - 751 | * This returns the index of the child node if one was found, and returns -1 - 752 | * if no such child was found. - 753 | */ - 754 | int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte); - 755 | int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point); - | - 756 | TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); - | - 757 | /*******************/ - 758 | /* Section - Query */ - 759 | /*******************/ - | - 760 | /** - 761 | * Create a new query from a string containing one or more S-expression - 762 | * patterns. The query is associated with a particular language, and can - 763 | * only be run on syntax nodes parsed with that language. - 764 | * - 765 | * If all of the given patterns are valid, this returns a [`TSQuery`]. - 766 | * If a pattern is invalid, this returns `NULL`, and provides two pieces - 767 | * of information about the problem: - 768 | * 1. The byte offset of the error is written to the `error_offset` parameter. - 769 | * 2. The type of error is written to the `error_type` parameter. - 770 | */ - 771 | TSQuery *ts_query_new( - 772 | const TSLanguage *language, - 773 | const char *source, - 774 | uint32_t source_len, - 775 | uint32_t *error_offset, - 776 | TSQueryError *error_type - 777 | ); - | - 778 | /** - 779 | * Delete a query, freeing all of the memory that it used. - 780 | */ - 781 | void ts_query_delete(TSQuery *self); - | - 782 | /** - 783 | * Get the number of patterns, captures, or string literals in the query. - 784 | */ - 785 | uint32_t ts_query_pattern_count(const TSQuery *self); - 786 | uint32_t ts_query_capture_count(const TSQuery *self); - 787 | uint32_t ts_query_string_count(const TSQuery *self); - | - 788 | /** - 789 | * Get the byte offset where the given pattern starts in the query's source. - 790 | * - 791 | * This can be useful when combining queries by concatenating their source - 792 | * code strings. - 793 | */ - 794 | uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); - | - 795 | /** - 796 | * Get the byte offset where the given pattern ends in the query's source. - 797 | * - 798 | * This can be useful when combining queries by concatenating their source - 799 | * code strings. - 800 | */ - 801 | uint32_t ts_query_end_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); - | - 802 | /** - 803 | * Get all of the predicates for the given pattern in the query. - 804 | * - 805 | * The predicates are represented as a single array of steps. There are three - 806 | * types of steps in this array, which correspond to the three legal values for - 807 | * the `type` field: - 808 | * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names - 809 | * of captures. Their `value_id` can be used with the - 810 | * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. - 811 | * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal - 812 | * strings. Their `value_id` can be used with the - 813 | * [`ts_query_string_value_for_id`] function to obtain their string value. - 814 | * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* - 815 | * that represent the end of an individual predicate. If a pattern has two - 816 | * predicates, then there will be two steps with this `type` in the array. - 817 | */ - 818 | const TSQueryPredicateStep *ts_query_predicates_for_pattern( - 819 | const TSQuery *self, - 820 | uint32_t pattern_index, - 821 | uint32_t *step_count - 822 | ); - | - 823 | /* - 824 | * Check if the given pattern in the query has a single root node. - 825 | */ - 826 | bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index); - | - 827 | /* - 828 | * Check if the given pattern in the query is 'non local'. - 829 | * - 830 | * A non-local pattern has multiple root nodes and can match within a - 831 | * repeating sequence of nodes, as specified by the grammar. Non-local - 832 | * patterns disable certain optimizations that would otherwise be possible - 833 | * when executing a query on a specific range of a syntax tree. - 834 | */ - 835 | bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index); - | - 836 | /* - 837 | * Check if a given pattern is guaranteed to match once a given step is reached. - 838 | * The step is specified by its byte offset in the query's source code. - 839 | */ - 840 | bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset); - | - 841 | /** - 842 | * Get the name and length of one of the query's captures, or one of the - 843 | * query's string literals. Each capture and string is associated with a - 844 | * numeric id based on the order that it appeared in the query's source. - 845 | */ - 846 | const char *ts_query_capture_name_for_id( - 847 | const TSQuery *self, - 848 | uint32_t index, - 849 | uint32_t *length - 850 | ); - | - 851 | /** - 852 | * Get the quantifier of the query's captures. Each capture is * associated - 853 | * with a numeric id based on the order that it appeared in the query's source. - 854 | */ - 855 | TSQuantifier ts_query_capture_quantifier_for_id( - 856 | const TSQuery *self, - 857 | uint32_t pattern_index, - 858 | uint32_t capture_index - 859 | ); - | - 860 | const char *ts_query_string_value_for_id( - 861 | const TSQuery *self, - 862 | uint32_t index, - 863 | uint32_t *length - 864 | ); - | - 865 | /** - 866 | * Disable a certain capture within a query. - 867 | * - 868 | * This prevents the capture from being returned in matches, and also avoids - 869 | * any resource usage associated with recording the capture. Currently, there - 870 | * is no way to undo this. - 871 | */ - 872 | void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length); - | - 873 | /** - 874 | * Disable a certain pattern within a query. - 875 | * - 876 | * This prevents the pattern from matching and removes most of the overhead - 877 | * associated with the pattern. Currently, there is no way to undo this. - 878 | */ - 879 | void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); - | - 880 | /** - 881 | * Create a new cursor for executing a given query. - 882 | * - 883 | * The cursor stores the state that is needed to iteratively search - 884 | * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] - 885 | * to start running a given query on a given syntax node. Then, there are - 886 | * two options for consuming the results of the query: - 887 | * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the - 888 | * *matches* in the order that they were found. Each match contains the - 889 | * index of the pattern that matched, and an array of captures. Because - 890 | * multiple patterns can match the same set of nodes, one match may contain - 891 | * captures that appear *before* some of the captures from a previous match. - 892 | * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the - 893 | * individual *captures* in the order that they appear. This is useful if - 894 | * don't care about which pattern matched, and just want a single ordered - 895 | * sequence of captures. - 896 | * - 897 | * If you don't care about consuming all of the results, you can stop calling - 898 | * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. - 899 | * You can then start executing another query on another node by calling - 900 | * [`ts_query_cursor_exec`] again. - 901 | */ - 902 | TSQueryCursor *ts_query_cursor_new(void); - | - 903 | /** - 904 | * Delete a query cursor, freeing all of the memory that it used. - 905 | */ - 906 | void ts_query_cursor_delete(TSQueryCursor *self); - | - 907 | /** - 908 | * Start running a given query on a given node. - 909 | */ - 910 | void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node); - | - 911 | /** - 912 | * Start running a given query on a given node, with some options. - 913 | */ - 914 | void ts_query_cursor_exec_with_options( - 915 | TSQueryCursor *self, - 916 | const TSQuery *query, - 917 | TSNode node, - 918 | const TSQueryCursorOptions *query_options - 919 | ); - | - 920 | /** - 921 | * Manage the maximum number of in-progress matches allowed by this query - 922 | * cursor. - 923 | * - 924 | * Query cursors have an optional maximum capacity for storing lists of - 925 | * in-progress captures. If this capacity is exceeded, then the - 926 | * earliest-starting match will silently be dropped to make room for further - 927 | * matches. This maximum capacity is optional — by default, query cursors allow - 928 | * any number of pending matches, dynamically allocating new space for them as - 929 | * needed as the query is executed. - 930 | */ - 931 | bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self); - 932 | uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self); - 933 | void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit); - | - 934 | /** - 935 | * Set the range of bytes in which the query will be executed. - 936 | * - 937 | * The query cursor will return matches that intersect with the given point range. - 938 | * This means that a match may be returned even if some of its captures fall - 939 | * outside the specified range, as long as at least part of the match - 940 | * overlaps with the range. - 941 | * - 942 | * For example, if a query pattern matches a node that spans a larger area - 943 | * than the specified range, but part of that node intersects with the range, - 944 | * the entire match will be returned. - 945 | * - 946 | * This will return `false` if the start byte is greater than the end byte, otherwise - 947 | * it will return `true`. - 948 | */ - 949 | bool ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte); - | - 950 | /** - 951 | * Set the range of (row, column) positions in which the query will be executed. - 952 | * - 953 | * The query cursor will return matches that intersect with the given point range. - 954 | * This means that a match may be returned even if some of its captures fall - 955 | * outside the specified range, as long as at least part of the match - 956 | * overlaps with the range. - 957 | * - 958 | * For example, if a query pattern matches a node that spans a larger area - 959 | * than the specified range, but part of that node intersects with the range, - 960 | * the entire match will be returned. - 961 | * - 962 | * This will return `false` if the start point is greater than the end point, otherwise - 963 | * it will return `true`. - 964 | */ - 965 | bool ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point); - | - 966 | /** - 967 | * Advance to the next match of the currently running query. - 968 | * - 969 | * If there is a match, write it to `*match` and return `true`. - 970 | * Otherwise, return `false`. - 971 | */ - 972 | bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match); - 973 | void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id); - | - 974 | /** - 975 | * Advance to the next capture of the currently running query. - 976 | * - 977 | * If there is a capture, write its match to `*match` and its index within - 978 | * the match's capture list to `*capture_index`. Otherwise, return `false`. - 979 | */ - 980 | bool ts_query_cursor_next_capture( - 981 | TSQueryCursor *self, - 982 | TSQueryMatch *match, - 983 | uint32_t *capture_index - 984 | ); - | - 985 | /** - 986 | * Set the maximum start depth for a query cursor. - 987 | * - 988 | * This prevents cursors from exploring children nodes at a certain depth. - 989 | * Note if a pattern includes many children, then they will still be checked. - 990 | * - 991 | * The zero max start depth value can be used as a special behavior and - 992 | * it helps to destructure a subtree by staying on a node and using captures - 993 | * for interested parts. Note that the zero max start depth only limit a search - 994 | * depth for a pattern's root node but other nodes that are parts of the pattern - 995 | * may be searched at any depth what defined by the pattern structure. - 996 | * - 997 | * Set to `UINT32_MAX` to remove the maximum start depth. - 998 | */ - 999 | void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth); - | -1000 | /**********************/ -1001 | /* Section - Language */ -1002 | /**********************/ - | -1003 | /** -1004 | * Get another reference to the given language. -1005 | */ -1006 | const TSLanguage *ts_language_copy(const TSLanguage *self); - | -1007 | /** -1008 | * Free any dynamically-allocated resources for this language, if -1009 | * this is the last reference. -1010 | */ -1011 | void ts_language_delete(const TSLanguage *self); - | -1012 | /** -1013 | * Get the number of distinct node types in the language. -1014 | */ -1015 | uint32_t ts_language_symbol_count(const TSLanguage *self); - | -1016 | /** -1017 | * Get the number of valid states in this language. -1018 | */ -1019 | uint32_t ts_language_state_count(const TSLanguage *self); - | -1020 | /** -1021 | * Get the numerical id for the given node type string. -1022 | */ -1023 | TSSymbol ts_language_symbol_for_name( -1024 | const TSLanguage *self, -1025 | const char *string, -1026 | uint32_t length, -1027 | bool is_named -1028 | ); - | -1029 | /** -1030 | * Get the number of distinct field names in the language. -1031 | */ -1032 | uint32_t ts_language_field_count(const TSLanguage *self); - | -1033 | /** -1034 | * Get the field name string for the given numerical id. -1035 | */ -1036 | const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id); - | -1037 | /** -1038 | * Get the numerical id for the given field name string. -1039 | */ -1040 | TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length); - | -1041 | /** -1042 | * Get a list of all supertype symbols for the language. -1043 | */ -1044 | const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length); - | -1045 | /** -1046 | * Get a list of all subtype symbol ids for a given supertype symbol. -1047 | * -1048 | * See [`ts_language_supertypes`] for fetching all supertype symbols. -1049 | */ -1050 | const TSSymbol *ts_language_subtypes( -1051 | const TSLanguage *self, -1052 | TSSymbol supertype, -1053 | uint32_t *length -1054 | ); - | -1055 | /** -1056 | * Get a node type string for the given numerical id. -1057 | */ -1058 | const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol); - | -1059 | /** -1060 | * Check whether the given node type id belongs to named nodes, anonymous nodes, -1061 | * or a hidden nodes. -1062 | * -1063 | * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. -1064 | */ -1065 | TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); - | -1066 | /** -1067 | * Get the ABI version number for this language. This version number is used -1068 | * to ensure that languages were generated by a compatible version of -1069 | * Tree-sitter. -1070 | * -1071 | * See also [`ts_parser_set_language`]. -1072 | */ -1073 | uint32_t ts_language_abi_version(const TSLanguage *self); - | -1074 | /** -1075 | * Get the metadata for this language. This information is generated by the -1076 | * CLI, and relies on the language author providing the correct metadata in -1077 | * the language's `tree-sitter.json` file. -1078 | * -1079 | * See also [`TSMetadata`]. -1080 | */ -1081 | const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self); - | -1082 | /** -1083 | * Get the next parse state. Combine this with lookahead iterators to generate -1084 | * completion suggestions or valid symbols in error nodes. Use -1085 | * [`ts_node_grammar_symbol`] for valid symbols. -1086 | */ -1087 | TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); - | -1088 | /** -1089 | * Get the name of this language. This returns `NULL` in older parsers. -1090 | */ -1091 | const char *ts_language_name(const TSLanguage *self); - | -1092 | /********************************/ -1093 | /* Section - Lookahead Iterator */ -1094 | /********************************/ - | -1095 | /** -1096 | * Create a new lookahead iterator for the given language and parse state. -1097 | * -1098 | * This returns `NULL` if state is invalid for the language. -1099 | * -1100 | * Repeatedly using [`ts_lookahead_iterator_next`] and -1101 | * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the -1102 | * given parse state. Newly created lookahead iterators will contain the `ERROR` -1103 | * symbol. -1104 | * -1105 | * Lookahead iterators can be useful to generate suggestions and improve syntax -1106 | * error diagnostics. To get symbols valid in an ERROR node, use the lookahead -1107 | * iterator on its first leaf node state. For `MISSING` nodes, a lookahead -1108 | * iterator created on the previous non-extra leaf node may be appropriate. -1109 | */ -1110 | TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state); - | -1111 | /** -1112 | * Delete a lookahead iterator freeing all the memory used. -1113 | */ -1114 | void ts_lookahead_iterator_delete(TSLookaheadIterator *self); - | -1115 | /** -1116 | * Reset the lookahead iterator to another state. -1117 | * -1118 | * This returns `true` if the iterator was reset to the given state and `false` -1119 | * otherwise. -1120 | */ -1121 | bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state); - | -1122 | /** -1123 | * Reset the lookahead iterator. -1124 | * -1125 | * This returns `true` if the language was set successfully and `false` -1126 | * otherwise. -1127 | */ -1128 | bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state); - | -1129 | /** -1130 | * Get the current language of the lookahead iterator. -1131 | */ -1132 | const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self); - | -1133 | /** -1134 | * Advance the lookahead iterator to the next symbol. -1135 | * -1136 | * This returns `true` if there is a new symbol and `false` otherwise. -1137 | */ -1138 | bool ts_lookahead_iterator_next(TSLookaheadIterator *self); - | -1139 | /** -1140 | * Get the current symbol of the lookahead iterator; -1141 | */ -1142 | TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self); - | -1143 | /** -1144 | * Get the current symbol type of the lookahead iterator as a null terminated -1145 | * string. -1146 | */ -1147 | const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self); - | -1148 | /*************************************/ -1149 | /* Section - WebAssembly Integration */ -1150 | /************************************/ - | -1151 | typedef struct wasm_engine_t TSWasmEngine; -1152 | typedef struct TSWasmStore TSWasmStore; - | -1153 | typedef enum { -1154 | TSWasmErrorKindNone = 0, -1155 | TSWasmErrorKindParse, -1156 | TSWasmErrorKindCompile, -1157 | TSWasmErrorKindInstantiate, -1158 | TSWasmErrorKindAllocate, -1159 | } TSWasmErrorKind; - | -1160 | typedef struct { -1161 | TSWasmErrorKind kind; -1162 | char *message; -1163 | } TSWasmError; - | -1164 | /** -1165 | * Create a Wasm store. -1166 | */ -1167 | TSWasmStore *ts_wasm_store_new( -1168 | TSWasmEngine *engine, -1169 | TSWasmError *error -1170 | ); - | -1171 | /** -1172 | * Free the memory associated with the given Wasm store. -1173 | */ -1174 | void ts_wasm_store_delete(TSWasmStore *); - | -1175 | /** -1176 | * Create a language from a buffer of Wasm. The resulting language behaves -1177 | * like any other Tree-sitter language, except that in order to use it with -1178 | * a parser, that parser must have a Wasm store. Note that the language -1179 | * can be used with any Wasm store, it doesn't need to be the same store that -1180 | * was used to originally load it. -1181 | */ -1182 | const TSLanguage *ts_wasm_store_load_language( -1183 | TSWasmStore *, -1184 | const char *name, -1185 | const char *wasm, -1186 | uint32_t wasm_len, -1187 | TSWasmError *error -1188 | ); - | -1189 | /** -1190 | * Get the number of languages instantiated in the given Wasm store. -1191 | */ -1192 | size_t ts_wasm_store_language_count(const TSWasmStore *); - | -1193 | /** -1194 | * Check if the language came from a Wasm module. If so, then in order to use -1195 | * this language with a Parser, that parser must have a Wasm store assigned. -1196 | */ -1197 | bool ts_language_is_wasm(const TSLanguage *); - | -1198 | /** -1199 | * Assign the given Wasm store to the parser. A parser must have a Wasm store -1200 | * in order to use Wasm languages. -1201 | */ -1202 | void ts_parser_set_wasm_store(TSParser *, TSWasmStore *); - | -1203 | /** -1204 | * Remove the parser's current Wasm store and return it. This returns NULL if -1205 | * the parser doesn't have a Wasm store. -1206 | */ -1207 | TSWasmStore *ts_parser_take_wasm_store(TSParser *); - | -1208 | /**********************************/ -1209 | /* Section - Global Configuration */ -1210 | /**********************************/ - | -1211 | /** -1212 | * Set the allocation functions used by the library. -1213 | * -1214 | * By default, Tree-sitter uses the standard libc allocation functions, -1215 | * but aborts the process when an allocation fails. This function lets -1216 | * you supply alternative allocation functions at runtime. -1217 | * -1218 | * If you pass `NULL` for any parameter, Tree-sitter will switch back to -1219 | * its default implementation of that function. -1220 | * -1221 | * If you call this function after the library has already been used, then -1222 | * you must ensure that either: -1223 | * 1. All the existing objects have been freed. -1224 | * 2. The new allocator shares its state with the old one, so it is capable -1225 | * of freeing memory that was allocated by the old allocator. -1226 | */ -1227 | void ts_set_allocator( -1228 | void *(*new_malloc)(size_t), -1229 | void *(*new_calloc)(size_t, size_t), -1230 | void *(*new_realloc)(void *, size_t), -1231 | void (*new_free)(void *) -1232 | ); - | -1233 | #ifdef __cplusplus -1234 | } -1235 | #endif - | -1236 | #ifndef TREE_SITTER_HIDE_SYMBOLS -1237 | #if defined(__GNUC__) || defined(__clang__) -1238 | #pragma GCC visibility pop -1239 | #endif -1240 | #endif - | -1241 | #endif // TREE_SITTER_API_H_ - - - --------------------------------------------------------------------------------- -/lib/lldb_pretty_printers/table_entry.py: --------------------------------------------------------------------------------- - 1 | from lldb import SBValue - | - 2 | # typedef struct { - 3 | # const TSParseAction *actions; - 4 | # uint32_t action_count; - 5 | # bool is_reusable; - 6 | # } TableEntry; - | - 7 | # TODO: Same inline issue as with `TSTreeSyntheticProvider`. - | - | - 8 | class TableEntrySyntheticProvider: - 9 | def __init__(self, valobj: SBValue, _dict): - 10 | self.valobj: SBValue = valobj - 11 | self.update() - | - 12 | def num_children(self) -> int: - 13 | # is_reusable, action_count, actions - 14 | return 2 + max(1, self.action_count.GetValueAsUnsigned()) - | - 15 | def get_child_index(self, name: str) -> int: - 16 | if name == "is_reusable": - 17 | return 0 - 18 | elif name == "action_count": - 19 | return 1 - 20 | else: - 21 | if self.action_count.GetValueAsUnsigned() == 0: - 22 | return 2 - 23 | index = name.lstrip("actions[").rstrip("]") - 24 | if index.isdigit(): - 25 | return int(index) - 26 | else: - 27 | return -1 - | - 28 | def get_child_at_index(self, index: int) -> SBValue: - 29 | if index == 0: - 30 | return self.is_reusable - 31 | elif index == 1: - 32 | return self.action_count - 33 | else: - 34 | if self.action_count.GetValueAsUnsigned() == 0: - 35 | return self.actions - 36 | offset: int = index - 3 - 37 | start: int = self.actions.GetValueAsUnsigned() - 38 | address: int = start + offset * self.element_type_size - 39 | element: SBValue = self.actions.CreateValueFromAddress( - 40 | "action[%s]" % (offset), address, self.element_type - 41 | ) - 42 | return element - | - 43 | def update(self): - 44 | self.is_reusable: SBValue = self.valobj.GetChildMemberWithName("is_reusable") - 45 | self.action_count: SBValue = self.valobj.GetChildMemberWithName("action_count") - 46 | self.actions: SBValue = self.valobj.GetChildMemberWithName("actions") - | - 47 | self.element_type: SBType = self.actions.GetType().GetPointeeType() - 48 | self.element_type_size: int = self.element_type.GetByteSize() - | - 49 | def has_children(self) -> bool: - 50 | return True - - - --------------------------------------------------------------------------------- -/lib/lldb_pretty_printers/tree_sitter_types.py: --------------------------------------------------------------------------------- - 1 | import lldb - | - 2 | # Even though these are "unused", we still need them in scope in order for the classes - 3 | # to exist when we register them with the debugger - 4 | from ts_tree import TSTreeSyntheticProvider - 5 | from table_entry import TableEntrySyntheticProvider - 6 | from ts_array import ArraySyntheticProvider, anon_array_recognizer - | - | - 7 | class TreeSitterType(object): - 8 | TS_TREE: str = "TSTree" - 9 | SUBTREE_ARRAY: str = "SubtreeArray" - 10 | MUTABLE_SUBTREE_ARRAY: str = "MutableSubtreeArray" - 11 | STACK_SLICE_ARRAY: str = "StackSliceArray" - 12 | STACK_SUMMARY: str = "StackSummary" - 13 | STACK_ENTRY: str = "StackEntry" - 14 | REUSABLE_NODE: str = "ReusableNode" - 15 | REDUCE_ACTION_SET: str = "ReduceActionSet" - 16 | TABLE_ENTRY: str = "TableEntry" - 17 | TS_RANGE_ARRAY: str = "TSRangeArray" - 18 | CAPTURE_QUANTIFIERS: str = "CaptureQuantifiers" - 19 | CAPTURE_LIST: str = "CaptureList" - 20 | ANALYSIS_STATE_SET: str = "AnalysisStateSet" - 21 | ANALYSIS_SUBGRAPH_ARRAY: str = "AnalysisSubgraphArray" - 22 | STACK_NODE_ARRAY: str = "StackNodeArray" - 23 | STRING_DATA: str = "StringData" - | - | - 24 | def ts_type_to_regex(type: str) -> str: - 25 | return f"^{type}$|^struct {type}$|^typedef {type}$" - | - | - 26 | # Holds all tree-sitter types defined via the `Array` macro. These types will - 27 | # all share the same `ArrayTypeSyntheticProvider` synthetic provider - 28 | TS_ARRAY_TYPES = [ - 29 | TreeSitterType.REDUCE_ACTION_SET, - 30 | TreeSitterType.TS_RANGE_ARRAY, - 31 | TreeSitterType.CAPTURE_QUANTIFIERS, - 32 | TreeSitterType.ANALYSIS_STATE_SET, - 33 | TreeSitterType.CAPTURE_LIST, - 34 | TreeSitterType.ANALYSIS_SUBGRAPH_ARRAY, - 35 | TreeSitterType.STACK_SLICE_ARRAY, - 36 | TreeSitterType.STACK_SUMMARY, - 37 | TreeSitterType.SUBTREE_ARRAY, - 38 | TreeSitterType.MUTABLE_SUBTREE_ARRAY, - 39 | TreeSitterType.STRING_DATA, - 40 | TreeSitterType.STACK_NODE_ARRAY, - 41 | ] - | - | - 42 | def __lldb_init_module(debugger: lldb.SBDebugger, _dict): - 43 | debugger.HandleCommand( - 44 | f"type synthetic add -l tree_sitter_types.TSTreeSyntheticProvider -x '{ts_type_to_regex(TreeSitterType.TS_TREE)}'" - 45 | ) - 46 | debugger.HandleCommand( - 47 | f"type synthetic add -l tree_sitter_types.TableEntrySyntheticProvider -x '{ts_type_to_regex(TreeSitterType.TABLE_ENTRY)}'" - 48 | ) - 49 | debugger.HandleCommand( - 50 | f"type synthetic add -l tree_sitter_types.ArraySyntheticProvider --recognizer-function tree_sitter_types.anon_array_recognizer" - 51 | ) - 52 | for type in TS_ARRAY_TYPES: - 53 | debugger.HandleCommand( - 54 | f"type synthetic add -l tree_sitter_types.ArraySyntheticProvider -x '{ts_type_to_regex(type)}'" - 55 | ) - - - --------------------------------------------------------------------------------- -/lib/lldb_pretty_printers/ts_array.py: --------------------------------------------------------------------------------- - 1 | from lldb import SBValue, SBType - 2 | import re - | - 3 | # define Array(T) \ - 4 | # struct { \ - 5 | # T *contents; \ - 6 | # uint32_t size; \ - 7 | # uint32_t capacity; \ - 8 | # } - | - | - 9 | class ArraySyntheticProvider: - 10 | def __init__(self, valobj: SBValue, _dict): - 11 | self.valobj: SBValue = valobj - 12 | self.update() - | - 13 | def num_children(self) -> int: - 14 | return 2 + self.size.GetValueAsUnsigned() # size, capacity, and elements - | - 15 | def get_child_index(self, name: str) -> int: - 16 | if name == "size": - 17 | return 0 - 18 | elif name == "capacity": - 19 | return 1 - 20 | else: - 21 | if self.size.GetValueAsUnsigned() == 0: - 22 | return 2 - 23 | index = name.lstrip("[").rstrip("]") - 24 | if index.isdigit(): - 25 | return int(index) - 26 | else: - 27 | return -1 - | - 28 | def get_child_at_index(self, index: int) -> SBValue: - 29 | if index == 0: - 30 | return self.size - 31 | elif index == 1: - 32 | return self.capacity - 33 | else: - 34 | if self.size.GetValueAsUnsigned() == 0: - 35 | return self.contents - 36 | offset: int = index - 2 - 37 | start: int = self.contents.GetValueAsUnsigned() - 38 | address: int = start + offset * self.element_type_size - 39 | element: SBValue = self.contents.CreateValueFromAddress( - 40 | "[%s]" % (offset), address, self.element_type - 41 | ) - 42 | return element - | - 43 | def update(self): - 44 | self.contents: SBValue = self.valobj.GetChildMemberWithName("contents") - 45 | self.size: SBValue = self.valobj.GetChildMemberWithName("size") - 46 | self.capacity: SBValue = self.valobj.GetChildMemberWithName("capacity") - | - 47 | self.element_type: SBType = self.contents.GetType().GetPointeeType() - 48 | self.element_type_size: int = self.element_type.GetByteSize() - | - 49 | def has_children(self) -> bool: - 50 | return True - | - | - 51 | anon_re = re.compile( - 52 | r"struct\s*{$\s*\w+ \*contents;$\s*uint32_t size;$\s*uint32_t capacity;$\s*}", - 53 | re.MULTILINE, - 54 | ) - | - | - 55 | # Used to recognize "anonymous" `Array(T)` types, i.e.: - 56 | # struct Foo { - 57 | # Array(Bar) bars; // Render this field usign `ArraySyntheticProvider` - 58 | # }; - 59 | def anon_array_recognizer(valobj: SBType, _dict) -> bool: - 60 | type_name = valobj.GetName() - 61 | if type_name == "(unnamed struct)": - 62 | type_str = str(valobj) - 63 | return anon_re.search(type_str) is not None - 64 | else: - 65 | return False - - - --------------------------------------------------------------------------------- -/lib/lldb_pretty_printers/ts_tree.py: --------------------------------------------------------------------------------- - 1 | from lldb import SBType, SBValue - | - 2 | # struct TSTree { - 3 | # Subtree root; - 4 | # const TSLanguage *language; - 5 | # TSRange *included_ranges; - 6 | # unsigned included_range_count; - 7 | # }; - | - 8 | # TODO: Ideally, we'd display the elements of `included_ranges` as - 9 | # children of `included_ranges` rather than separate items, i.e.: - | - 10 | # (TSTree) { - 11 | # root = ... - 12 | # language = ... - 13 | # included_range_count = ... - 14 | # included_ranges = { - 15 | # [0] = { - 16 | # ... - 17 | # } - 18 | # [1] = { - 19 | # ... - 20 | # } - 21 | # ... - 22 | # } - 23 | # } - 24 | # - 25 | # instead of the current behavior: - 26 | # - 27 | # (TSTree) { - 28 | # root = ... - 29 | # language = ... - 30 | # included_range_count = ... - 31 | # included_ranges[0] = { - 32 | # ... - 33 | # } - 34 | # included_ranges[1] = { - 35 | # ... - 36 | # } - 37 | # } - 38 | # - | - | - 39 | class TSTreeSyntheticProvider: - 40 | def __init__(self, valobj: SBValue, _dict): - 41 | self.valobj: SBValue = valobj - 42 | self.update() - | - 43 | def num_children(self) -> int: - 44 | # root, language, included_range_count, included_ranges - 45 | return 3 + self.included_range_count.GetValueAsUnsigned() - | - 46 | def get_child_index(self, name: str) -> int: - 47 | if name == "root": - 48 | return 0 - 49 | elif name == "language": - 50 | return 1 - 51 | elif name == "included_range_count": - 52 | return 2 - 53 | else: - 54 | if self.included_range_count.GetValueAsUnsigned() == 0: - 55 | return 3 - 56 | index = name.lstrip("included_ranges[").rstrip("]") - 57 | if index.isdigit(): - 58 | return int(index) - 59 | else: - 60 | return -1 - | - 61 | def get_child_at_index(self, index: int) -> SBValue: - 62 | if index == 0: - 63 | return self.root - 64 | elif index == 1: - 65 | return self.language - 66 | elif index == 2: - 67 | return self.included_range_count - 68 | else: - 69 | if self.included_range_count.GetValueAsUnsigned() == 0: - 70 | return self.included_ranges - 71 | offset: int = index - 3 - 72 | start: int = self.included_ranges.GetValueAsUnsigned() - 73 | address: int = start + offset * self.element_type_size - 74 | element: SBValue = self.included_ranges.CreateValueFromAddress( - 75 | "included_ranges[%s]" % (offset), address, self.element_type - 76 | ) - 77 | return element - | - 78 | def update(self): - 79 | self.root: SBValue = self.valobj.GetChildMemberWithName("root") - 80 | self.language: SBValue = self.valobj.GetChildMemberWithName("language") - 81 | self.included_range_count: SBValue = self.valobj.GetChildMemberWithName( - 82 | "included_range_count" - 83 | ) - 84 | self.included_ranges: SBValue = self.valobj.GetChildMemberWithName( - 85 | "included_ranges" - 86 | ) - | - 87 | self.element_type: SBType = self.included_ranges.GetType().GetPointeeType() - 88 | self.element_type_size: int = self.element_type.GetByteSize() - | - 89 | def has_children(self) -> bool: - 90 | return True - - - --------------------------------------------------------------------------------- -/lib/package.nix: --------------------------------------------------------------------------------- - 1 | { - 2 | stdenv, - 3 | cmake, - 4 | pkg-config, - 5 | src, - 6 | version, - 7 | lib, - 8 | }: - 9 | stdenv.mkDerivation { - 10 | inherit src version; - 11 | pname = "tree-sitter"; - | - 12 | nativeBuildInputs = [ - 13 | cmake - 14 | pkg-config - 15 | ]; - | - 16 | sourceRoot = "source"; - | - 17 | cmakeFlags = [ - 18 | "-DBUILD_SHARED_LIBS=ON" - 19 | "-DCMAKE_INSTALL_LIBDIR=lib" - 20 | "-DCMAKE_INSTALL_INCLUDEDIR=include" - 21 | "-DTREE_SITTER_FEATURE_WASM=OFF" - 22 | ]; - | - 23 | enableParallelBuilding = true; - | - 24 | postInstall = '' - 25 | mkdir -p $out/{lib/pkgconfig,share/tree-sitter} - 26 | substituteInPlace $out/lib/pkgconfig/tree-sitter.pc \ - 27 | --replace-fail "\''${prefix}" "$out" 2>/dev/null - 28 | ''; - | - 29 | meta = { - 30 | description = "Tree-sitter incremental parsing library"; - 31 | longDescription = '' - 32 | Tree-sitter is a parser generator tool and an incremental parsing library. - 33 | It can build a concrete syntax tree for a source file and efficiently update - 34 | the syntax tree as the source file is edited. This package provides the core - 35 | C library that can be used to parse source code using Tree-sitter grammars. - 36 | ''; - 37 | homepage = "https://tree-sitter.github.io/tree-sitter"; - 38 | changelog = "https://github.com/tree-sitter/tree-sitter/releases/tag/v${version}"; - 39 | license = lib.licenses.mit; - 40 | maintainers = [ lib.maintainers.amaanq ]; - 41 | platforms = lib.platforms.all; - 42 | }; - 43 | } - - - --------------------------------------------------------------------------------- -/lib/README.md: --------------------------------------------------------------------------------- - 1 | ## Subdirectories - | - 2 | * [`src`](./src) - C source code for the Tree-sitter library - 3 | * [`include`](./include) - C headers for the Tree-sitter library - 4 | * [`binding_rust`](./binding_rust) - Rust bindings to the Tree-sitter library - 5 | * [`binding_web`](./binding_web) - JavaScript bindings to the Tree-sitter library, using WebAssembly - - - --------------------------------------------------------------------------------- -/lib/src/alloc.c: --------------------------------------------------------------------------------- - 1 | #include "alloc.h" - 2 | #include "tree_sitter/api.h" - 3 | #include - | - 4 | static void *ts_malloc_default(size_t size) { - 5 | void *result = malloc(size); - 6 | if (size > 0 && !result) { - 7 | fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); - 8 | abort(); - 9 | } - 10 | return result; - 11 | } - | - 12 | static void *ts_calloc_default(size_t count, size_t size) { - 13 | void *result = calloc(count, size); - 14 | if (count > 0 && !result) { - 15 | fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); - 16 | abort(); - 17 | } - 18 | return result; - 19 | } - | - 20 | static void *ts_realloc_default(void *buffer, size_t size) { - 21 | void *result = realloc(buffer, size); - 22 | if (size > 0 && !result) { - 23 | fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); - 24 | abort(); - 25 | } - 26 | return result; - 27 | } - | - 28 | // Allow clients to override allocation functions dynamically - 29 | TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; - 30 | TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; - 31 | TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; - 32 | TS_PUBLIC void (*ts_current_free)(void *) = free; - | - 33 | void ts_set_allocator( - 34 | void *(*new_malloc)(size_t size), - 35 | void *(*new_calloc)(size_t count, size_t size), - 36 | void *(*new_realloc)(void *ptr, size_t size), - 37 | void (*new_free)(void *ptr) - 38 | ) { - 39 | ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; - 40 | ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; - 41 | ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; - 42 | ts_current_free = new_free ? new_free : free; - 43 | } - - - --------------------------------------------------------------------------------- -/lib/src/alloc.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ALLOC_H_ - 2 | #define TREE_SITTER_ALLOC_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include - 8 | #include - | - 9 | #if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) - 10 | #define TS_PUBLIC - 11 | #else - 12 | #define TS_PUBLIC __attribute__((visibility("default"))) - 13 | #endif - | - 14 | TS_PUBLIC extern void *(*ts_current_malloc)(size_t size); - 15 | TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size); - 16 | TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size); - 17 | TS_PUBLIC extern void (*ts_current_free)(void *ptr); - | - 18 | // Allow clients to override allocation functions - 19 | #ifndef ts_malloc - 20 | #define ts_malloc ts_current_malloc - 21 | #endif - 22 | #ifndef ts_calloc - 23 | #define ts_calloc ts_current_calloc - 24 | #endif - 25 | #ifndef ts_realloc - 26 | #define ts_realloc ts_current_realloc - 27 | #endif - 28 | #ifndef ts_free - 29 | #define ts_free ts_current_free - 30 | #endif - | - 31 | #ifdef __cplusplus - 32 | } - 33 | #endif - | - 34 | #endif // TREE_SITTER_ALLOC_H_ - - - --------------------------------------------------------------------------------- -/lib/src/array.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ARRAY_H_ - 2 | #define TREE_SITTER_ARRAY_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./alloc.h" - 7 | #include "./ts_assert.h" - | - 8 | #include - 9 | #include - 10 | #include - 11 | #include - | - 12 | #ifdef _MSC_VER - 13 | #pragma warning(push) - 14 | #pragma warning(disable : 4101) - 15 | #elif defined(__GNUC__) || defined(__clang__) - 16 | #pragma GCC diagnostic push - 17 | #pragma GCC diagnostic ignored "-Wunused-variable" - 18 | #endif - | - 19 | #define Array(T) \ - 20 | struct { \ - 21 | T *contents; \ - 22 | uint32_t size; \ - 23 | uint32_t capacity; \ - 24 | } - | - 25 | /// Initialize an array. - 26 | #define array_init(self) \ - 27 | ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) - | - 28 | /// Create an empty array. - 29 | #define array_new() \ - 30 | { NULL, 0, 0 } - | - 31 | /// Get a pointer to the element at a given `index` in the array. - 32 | #define array_get(self, _index) \ - 33 | (ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) - | - 34 | /// Get a pointer to the first element in the array. - 35 | #define array_front(self) array_get(self, 0) - | - 36 | /// Get a pointer to the last element in the array. - 37 | #define array_back(self) array_get(self, (self)->size - 1) - | - 38 | /// Clear the array, setting its size to zero. Note that this does not free any - 39 | /// memory allocated for the array's contents. - 40 | #define array_clear(self) ((self)->size = 0) - | - 41 | /// Reserve `new_capacity` elements of space in the array. If `new_capacity` is - 42 | /// less than the array's current capacity, this function has no effect. - 43 | #define array_reserve(self, new_capacity) \ - 44 | _array__reserve((Array *)(self), array_elem_size(self), new_capacity) - | - 45 | /// Free any memory allocated for this array. Note that this does not free any - 46 | /// memory allocated for the array's contents. - 47 | #define array_delete(self) _array__delete((Array *)(self)) - | - 48 | /// Push a new `element` onto the end of the array. - 49 | #define array_push(self, element) \ - 50 | (_array__grow((Array *)(self), 1, array_elem_size(self)), \ - 51 | (self)->contents[(self)->size++] = (element)) - | - 52 | /// Increase the array's size by `count` elements. - 53 | /// New elements are zero-initialized. - 54 | #define array_grow_by(self, count) \ - 55 | do { \ - 56 | if ((count) == 0) break; \ - 57 | _array__grow((Array *)(self), count, array_elem_size(self)); \ - 58 | memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ - 59 | (self)->size += (count); \ - 60 | } while (0) - | - 61 | /// Append all elements from one array to the end of another. - 62 | #define array_push_all(self, other) \ - 63 | array_extend((self), (other)->size, (other)->contents) - | - 64 | /// Append `count` elements to the end of the array, reading their values from the - 65 | /// `contents` pointer. - 66 | #define array_extend(self, count, contents) \ - 67 | _array__splice( \ - 68 | (Array *)(self), array_elem_size(self), (self)->size, \ - 69 | 0, count, contents \ - 70 | ) - | - 71 | /// Remove `old_count` elements from the array starting at the given `index`. At - 72 | /// the same index, insert `new_count` new elements, reading their values from the - 73 | /// `new_contents` pointer. - 74 | #define array_splice(self, _index, old_count, new_count, new_contents) \ - 75 | _array__splice( \ - 76 | (Array *)(self), array_elem_size(self), _index, \ - 77 | old_count, new_count, new_contents \ - 78 | ) - | - 79 | /// Insert one `element` into the array at the given `index`. - 80 | #define array_insert(self, _index, element) \ - 81 | _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) - | - 82 | /// Remove one element from the array at the given `index`. - 83 | #define array_erase(self, _index) \ - 84 | _array__erase((Array *)(self), array_elem_size(self), _index) - | - 85 | /// Pop the last element off the array, returning the element by value. - 86 | #define array_pop(self) ((self)->contents[--(self)->size]) - | - 87 | /// Assign the contents of one array to another, reallocating if necessary. - 88 | #define array_assign(self, other) \ - 89 | _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) - | - 90 | /// Swap one array with another - 91 | #define array_swap(self, other) \ - 92 | _array__swap((Array *)(self), (Array *)(other)) - | - 93 | /// Get the size of the array contents - 94 | #define array_elem_size(self) (sizeof *(self)->contents) - | - 95 | /// Search a sorted array for a given `needle` value, using the given `compare` - 96 | /// callback to determine the order. - 97 | /// - 98 | /// If an existing element is found to be equal to `needle`, then the `index` - 99 | /// out-parameter is set to the existing value's index, and the `exists` - 100 | /// out-parameter is set to true. Otherwise, `index` is set to an index where - 101 | /// `needle` should be inserted in order to preserve the sorting, and `exists` - 102 | /// is set to false. - 103 | #define array_search_sorted_with(self, compare, needle, _index, _exists) \ - 104 | _array__search_sorted(self, 0, compare, , needle, _index, _exists) - | - 105 | /// Search a sorted array for a given `needle` value, using integer comparisons - 106 | /// of a given struct field (specified with a leading dot) to determine the order. - 107 | /// - 108 | /// See also `array_search_sorted_with`. - 109 | #define array_search_sorted_by(self, field, needle, _index, _exists) \ - 110 | _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) - | - 111 | /// Insert a given `value` into a sorted array, using the given `compare` - 112 | /// callback to determine the order. - 113 | #define array_insert_sorted_with(self, compare, value) \ - 114 | do { \ - 115 | unsigned _index, _exists; \ - 116 | array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ - 117 | if (!_exists) array_insert(self, _index, value); \ - 118 | } while (0) - | - 119 | /// Insert a given `value` into a sorted array, using integer comparisons of - 120 | /// a given struct field (specified with a leading dot) to determine the order. - 121 | /// - 122 | /// See also `array_search_sorted_by`. - 123 | #define array_insert_sorted_by(self, field, value) \ - 124 | do { \ - 125 | unsigned _index, _exists; \ - 126 | array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ - 127 | if (!_exists) array_insert(self, _index, value); \ - 128 | } while (0) - | - 129 | // Private - | - 130 | typedef Array(void) Array; - | - 131 | /// This is not what you're looking for, see `array_delete`. - 132 | static inline void _array__delete(Array *self) { - 133 | if (self->contents) { - 134 | ts_free(self->contents); - 135 | self->contents = NULL; - 136 | self->size = 0; - 137 | self->capacity = 0; - 138 | } - 139 | } - | - 140 | /// This is not what you're looking for, see `array_erase`. - 141 | static inline void _array__erase(Array *self, size_t element_size, - 142 | uint32_t index) { - 143 | ts_assert(index < self->size); - 144 | char *contents = (char *)self->contents; - 145 | memmove(contents + index * element_size, contents + (index + 1) * element_size, - 146 | (self->size - index - 1) * element_size); - 147 | self->size--; - 148 | } - | - 149 | /// This is not what you're looking for, see `array_reserve`. - 150 | static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { - 151 | if (new_capacity > self->capacity) { - 152 | if (self->contents) { - 153 | self->contents = ts_realloc(self->contents, new_capacity * element_size); - 154 | } else { - 155 | self->contents = ts_malloc(new_capacity * element_size); - 156 | } - 157 | self->capacity = new_capacity; - 158 | } - 159 | } - | - 160 | /// This is not what you're looking for, see `array_assign`. - 161 | static inline void _array__assign(Array *self, const Array *other, size_t element_size) { - 162 | _array__reserve(self, element_size, other->size); - 163 | self->size = other->size; - 164 | memcpy(self->contents, other->contents, self->size * element_size); - 165 | } - | - 166 | /// This is not what you're looking for, see `array_swap`. - 167 | static inline void _array__swap(Array *self, Array *other) { - 168 | Array swap = *other; - 169 | *other = *self; - 170 | *self = swap; - 171 | } - | - 172 | /// This is not what you're looking for, see `array_push` or `array_grow_by`. - 173 | static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { - 174 | uint32_t new_size = self->size + count; - 175 | if (new_size > self->capacity) { - 176 | uint32_t new_capacity = self->capacity * 2; - 177 | if (new_capacity < 8) new_capacity = 8; - 178 | if (new_capacity < new_size) new_capacity = new_size; - 179 | _array__reserve(self, element_size, new_capacity); - 180 | } - 181 | } - | - 182 | /// This is not what you're looking for, see `array_splice`. - 183 | static inline void _array__splice(Array *self, size_t element_size, - 184 | uint32_t index, uint32_t old_count, - 185 | uint32_t new_count, const void *elements) { - 186 | uint32_t new_size = self->size + new_count - old_count; - 187 | uint32_t old_end = index + old_count; - 188 | uint32_t new_end = index + new_count; - 189 | ts_assert(old_end <= self->size); - | - 190 | _array__reserve(self, element_size, new_size); - | - 191 | char *contents = (char *)self->contents; - 192 | if (self->size > old_end) { - 193 | memmove( - 194 | contents + new_end * element_size, - 195 | contents + old_end * element_size, - 196 | (self->size - old_end) * element_size - 197 | ); - 198 | } - 199 | if (new_count > 0) { - 200 | if (elements) { - 201 | memcpy( - 202 | (contents + index * element_size), - 203 | elements, - 204 | new_count * element_size - 205 | ); - 206 | } else { - 207 | memset( - 208 | (contents + index * element_size), - 209 | 0, - 210 | new_count * element_size - 211 | ); - 212 | } - 213 | } - 214 | self->size += new_count - old_count; - 215 | } - | - 216 | /// A binary search routine, based on Rust's `std::slice::binary_search_by`. - 217 | /// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. - 218 | #define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ - 219 | do { \ - 220 | *(_index) = start; \ - 221 | *(_exists) = false; \ - 222 | uint32_t size = (self)->size - *(_index); \ - 223 | if (size == 0) break; \ - 224 | int comparison; \ - 225 | while (size > 1) { \ - 226 | uint32_t half_size = size / 2; \ - 227 | uint32_t mid_index = *(_index) + half_size; \ - 228 | comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - 229 | if (comparison <= 0) *(_index) = mid_index; \ - 230 | size -= half_size; \ - 231 | } \ - 232 | comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ - 233 | if (comparison == 0) *(_exists) = true; \ - 234 | else if (comparison < 0) *(_index) += 1; \ - 235 | } while (0) - | - 236 | /// Helper macro for the `_sorted_by` routines below. This takes the left (existing) - 237 | /// parameter by reference in order to work with the generic sorting function above. - 238 | #define _compare_int(a, b) ((int)*(a) - (int)(b)) - | - 239 | #ifdef _MSC_VER - 240 | #pragma warning(pop) - 241 | #elif defined(__GNUC__) || defined(__clang__) - 242 | #pragma GCC diagnostic pop - 243 | #endif - | - 244 | #ifdef __cplusplus - 245 | } - 246 | #endif - | - 247 | #endif // TREE_SITTER_ARRAY_H_ - - - --------------------------------------------------------------------------------- -/lib/src/atomic.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ATOMIC_H_ - 2 | #define TREE_SITTER_ATOMIC_H_ - | - 3 | #include - 4 | #include - 5 | #include - | - 6 | #ifdef __TINYC__ - | - 7 | static inline size_t atomic_load(const volatile size_t *p) { - 8 | return *p; - 9 | } - | - 10 | static inline uint32_t atomic_inc(volatile uint32_t *p) { - 11 | *p += 1; - 12 | return *p; - 13 | } - | - 14 | static inline uint32_t atomic_dec(volatile uint32_t *p) { - 15 | *p-= 1; - 16 | return *p; - 17 | } - | - 18 | #elif defined(_WIN32) - | - 19 | #include - | - 20 | static inline size_t atomic_load(const volatile size_t *p) { - 21 | return *p; - 22 | } - | - 23 | static inline uint32_t atomic_inc(volatile uint32_t *p) { - 24 | return InterlockedIncrement((long volatile *)p); - 25 | } - | - 26 | static inline uint32_t atomic_dec(volatile uint32_t *p) { - 27 | return InterlockedDecrement((long volatile *)p); - 28 | } - | - 29 | #else - | - 30 | static inline size_t atomic_load(const volatile size_t *p) { - 31 | #ifdef __ATOMIC_RELAXED - 32 | return __atomic_load_n(p, __ATOMIC_RELAXED); - 33 | #else - 34 | return __sync_fetch_and_add((volatile size_t *)p, 0); - 35 | #endif - 36 | } - | - 37 | static inline uint32_t atomic_inc(volatile uint32_t *p) { - 38 | #ifdef __ATOMIC_RELAXED - 39 | return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); - 40 | #else - 41 | return __sync_add_and_fetch(p, 1U); - 42 | #endif - 43 | } - | - 44 | static inline uint32_t atomic_dec(volatile uint32_t *p) { - 45 | #ifdef __ATOMIC_RELAXED - 46 | return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); - 47 | #else - 48 | return __sync_sub_and_fetch(p, 1U); - 49 | #endif - 50 | } - | - 51 | #endif - | - 52 | #endif // TREE_SITTER_ATOMIC_H_ - - - --------------------------------------------------------------------------------- -/lib/src/error_costs.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ERROR_COSTS_H_ - 2 | #define TREE_SITTER_ERROR_COSTS_H_ - | - 3 | #define ERROR_STATE 0 - 4 | #define ERROR_COST_PER_RECOVERY 500 - 5 | #define ERROR_COST_PER_MISSING_TREE 110 - 6 | #define ERROR_COST_PER_SKIPPED_TREE 100 - 7 | #define ERROR_COST_PER_SKIPPED_LINE 30 - 8 | #define ERROR_COST_PER_SKIPPED_CHAR 1 - | - 9 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/get_changed_ranges.c: --------------------------------------------------------------------------------- - 1 | #include "./get_changed_ranges.h" - 2 | #include "./subtree.h" - 3 | #include "./language.h" - 4 | #include "./error_costs.h" - 5 | #include "./tree_cursor.h" - 6 | #include "./ts_assert.h" - | - 7 | // #define DEBUG_GET_CHANGED_RANGES - | - 8 | static void ts_range_array_add( - 9 | TSRangeArray *self, - 10 | Length start, - 11 | Length end - 12 | ) { - 13 | if (self->size > 0) { - 14 | TSRange *last_range = array_back(self); - 15 | if (start.bytes <= last_range->end_byte) { - 16 | last_range->end_byte = end.bytes; - 17 | last_range->end_point = end.extent; - 18 | return; - 19 | } - 20 | } - | - 21 | if (start.bytes < end.bytes) { - 22 | TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; - 23 | array_push(self, range); - 24 | } - 25 | } - | - 26 | bool ts_range_array_intersects( - 27 | const TSRangeArray *self, - 28 | unsigned start_index, - 29 | uint32_t start_byte, - 30 | uint32_t end_byte - 31 | ) { - 32 | for (unsigned i = start_index; i < self->size; i++) { - 33 | TSRange *range = array_get(self, i); - 34 | if (range->end_byte > start_byte) { - 35 | if (range->start_byte >= end_byte) break; - 36 | return true; - 37 | } - 38 | } - 39 | return false; - 40 | } - | - 41 | void ts_range_array_get_changed_ranges( - 42 | const TSRange *old_ranges, unsigned old_range_count, - 43 | const TSRange *new_ranges, unsigned new_range_count, - 44 | TSRangeArray *differences - 45 | ) { - 46 | unsigned new_index = 0; - 47 | unsigned old_index = 0; - 48 | Length current_position = length_zero(); - 49 | bool in_old_range = false; - 50 | bool in_new_range = false; - | - 51 | while (old_index < old_range_count || new_index < new_range_count) { - 52 | const TSRange *old_range = &old_ranges[old_index]; - 53 | const TSRange *new_range = &new_ranges[new_index]; - | - 54 | Length next_old_position; - 55 | if (in_old_range) { - 56 | next_old_position = (Length) {old_range->end_byte, old_range->end_point}; - 57 | } else if (old_index < old_range_count) { - 58 | next_old_position = (Length) {old_range->start_byte, old_range->start_point}; - 59 | } else { - 60 | next_old_position = LENGTH_MAX; - 61 | } - | - 62 | Length next_new_position; - 63 | if (in_new_range) { - 64 | next_new_position = (Length) {new_range->end_byte, new_range->end_point}; - 65 | } else if (new_index < new_range_count) { - 66 | next_new_position = (Length) {new_range->start_byte, new_range->start_point}; - 67 | } else { - 68 | next_new_position = LENGTH_MAX; - 69 | } - | - 70 | if (next_old_position.bytes < next_new_position.bytes) { - 71 | if (in_old_range != in_new_range) { - 72 | ts_range_array_add(differences, current_position, next_old_position); - 73 | } - 74 | if (in_old_range) old_index++; - 75 | current_position = next_old_position; - 76 | in_old_range = !in_old_range; - 77 | } else if (next_new_position.bytes < next_old_position.bytes) { - 78 | if (in_old_range != in_new_range) { - 79 | ts_range_array_add(differences, current_position, next_new_position); - 80 | } - 81 | if (in_new_range) new_index++; - 82 | current_position = next_new_position; - 83 | in_new_range = !in_new_range; - 84 | } else { - 85 | if (in_old_range != in_new_range) { - 86 | ts_range_array_add(differences, current_position, next_new_position); - 87 | } - 88 | if (in_old_range) old_index++; - 89 | if (in_new_range) new_index++; - 90 | in_old_range = !in_old_range; - 91 | in_new_range = !in_new_range; - 92 | current_position = next_new_position; - 93 | } - 94 | } - 95 | } - | - 96 | void ts_range_edit(TSRange *range, const TSInputEdit *edit) { - 97 | if (range->end_byte >= edit->old_end_byte) { - 98 | if (range->end_byte != UINT32_MAX) { - 99 | range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); - 100 | range->end_point = point_add( - 101 | edit->new_end_point, - 102 | point_sub(range->end_point, edit->old_end_point) - 103 | ); - 104 | if (range->end_byte < edit->new_end_byte) { - 105 | range->end_byte = UINT32_MAX; - 106 | range->end_point = POINT_MAX; - 107 | } - 108 | } - 109 | } else if (range->end_byte > edit->start_byte) { - 110 | range->end_byte = edit->start_byte; - 111 | range->end_point = edit->start_point; - 112 | } - | - 113 | if (range->start_byte >= edit->old_end_byte) { - 114 | range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); - 115 | range->start_point = point_add( - 116 | edit->new_end_point, - 117 | point_sub(range->start_point, edit->old_end_point) - 118 | ); - 119 | if (range->start_byte < edit->new_end_byte) { - 120 | range->start_byte = UINT32_MAX; - 121 | range->start_point = POINT_MAX; - 122 | } - 123 | } else if (range->start_byte > edit->start_byte) { - 124 | range->start_byte = edit->start_byte; - 125 | range->start_point = edit->start_point; - 126 | } - 127 | } - | - 128 | typedef struct { - 129 | TreeCursor cursor; - 130 | const TSLanguage *language; - 131 | unsigned visible_depth; - 132 | bool in_padding; - 133 | Subtree prev_external_token; - 134 | } Iterator; - | - 135 | static Iterator iterator_new( - 136 | TreeCursor *cursor, - 137 | const Subtree *tree, - 138 | const TSLanguage *language - 139 | ) { - 140 | array_clear(&cursor->stack); - 141 | array_push(&cursor->stack, ((TreeCursorEntry) { - 142 | .subtree = tree, - 143 | .position = length_zero(), - 144 | .child_index = 0, - 145 | .structural_child_index = 0, - 146 | })); - 147 | return (Iterator) { - 148 | .cursor = *cursor, - 149 | .language = language, - 150 | .visible_depth = 1, - 151 | .in_padding = false, - 152 | .prev_external_token = NULL_SUBTREE, - 153 | }; - 154 | } - | - 155 | static bool iterator_done(Iterator *self) { - 156 | return self->cursor.stack.size == 0; - 157 | } - | - 158 | static Length iterator_start_position(Iterator *self) { - 159 | TreeCursorEntry entry = *array_back(&self->cursor.stack); - 160 | if (self->in_padding) { - 161 | return entry.position; - 162 | } else { - 163 | return length_add(entry.position, ts_subtree_padding(*entry.subtree)); - 164 | } - 165 | } - | - 166 | static Length iterator_end_position(Iterator *self) { - 167 | TreeCursorEntry entry = *array_back(&self->cursor.stack); - 168 | Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); - 169 | if (self->in_padding) { - 170 | return result; - 171 | } else { - 172 | return length_add(result, ts_subtree_size(*entry.subtree)); - 173 | } - 174 | } - | - 175 | static bool iterator_tree_is_visible(const Iterator *self) { - 176 | TreeCursorEntry entry = *array_back(&self->cursor.stack); - 177 | if (ts_subtree_visible(*entry.subtree)) return true; - 178 | if (self->cursor.stack.size > 1) { - 179 | Subtree parent = *array_get(&self->cursor.stack, self->cursor.stack.size - 2)->subtree; - 180 | return ts_language_alias_at( - 181 | self->language, - 182 | parent.ptr->production_id, - 183 | entry.structural_child_index - 184 | ) != 0; - 185 | } - 186 | return false; - 187 | } - | - 188 | static void iterator_get_visible_state( - 189 | const Iterator *self, - 190 | Subtree *tree, - 191 | TSSymbol *alias_symbol, - 192 | uint32_t *start_byte - 193 | ) { - 194 | uint32_t i = self->cursor.stack.size - 1; - | - 195 | if (self->in_padding) { - 196 | if (i == 0) return; - 197 | i--; - 198 | } - | - 199 | for (; i + 1 > 0; i--) { - 200 | TreeCursorEntry entry = *array_get(&self->cursor.stack, i); - | - 201 | if (i > 0) { - 202 | const Subtree *parent = array_get(&self->cursor.stack, i - 1)->subtree; - 203 | *alias_symbol = ts_language_alias_at( - 204 | self->language, - 205 | parent->ptr->production_id, - 206 | entry.structural_child_index - 207 | ); - 208 | } - | - 209 | if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { - 210 | *tree = *entry.subtree; - 211 | *start_byte = entry.position.bytes; - 212 | break; - 213 | } - 214 | } - 215 | } - | - 216 | static void iterator_ascend(Iterator *self) { - 217 | if (iterator_done(self)) return; - 218 | if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; - 219 | if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; - 220 | self->cursor.stack.size--; - 221 | } - | - 222 | static bool iterator_descend(Iterator *self, uint32_t goal_position) { - 223 | if (self->in_padding) return false; - | - 224 | bool did_descend = false; - 225 | do { - 226 | did_descend = false; - 227 | TreeCursorEntry entry = *array_back(&self->cursor.stack); - 228 | Length position = entry.position; - 229 | uint32_t structural_child_index = 0; - 230 | for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { - 231 | const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; - 232 | Length child_left = length_add(position, ts_subtree_padding(*child)); - 233 | Length child_right = length_add(child_left, ts_subtree_size(*child)); - | - 234 | if (child_right.bytes > goal_position) { - 235 | array_push(&self->cursor.stack, ((TreeCursorEntry) { - 236 | .subtree = child, - 237 | .position = position, - 238 | .child_index = i, - 239 | .structural_child_index = structural_child_index, - 240 | })); - | - 241 | if (iterator_tree_is_visible(self)) { - 242 | if (child_left.bytes > goal_position) { - 243 | self->in_padding = true; - 244 | } else { - 245 | self->visible_depth++; - 246 | } - 247 | return true; - 248 | } - | - 249 | did_descend = true; - 250 | break; - 251 | } - | - 252 | position = child_right; - 253 | if (!ts_subtree_extra(*child)) structural_child_index++; - 254 | Subtree last_external_token = ts_subtree_last_external_token(*child); - 255 | if (last_external_token.ptr) { - 256 | self->prev_external_token = last_external_token; - 257 | } - 258 | } - 259 | } while (did_descend); - | - 260 | return false; - 261 | } - | - 262 | static void iterator_advance(Iterator *self) { - 263 | if (self->in_padding) { - 264 | self->in_padding = false; - 265 | if (iterator_tree_is_visible(self)) { - 266 | self->visible_depth++; - 267 | } else { - 268 | iterator_descend(self, 0); - 269 | } - 270 | return; - 271 | } - | - 272 | for (;;) { - 273 | if (iterator_tree_is_visible(self)) self->visible_depth--; - 274 | TreeCursorEntry entry = array_pop(&self->cursor.stack); - 275 | if (iterator_done(self)) return; - | - 276 | const Subtree *parent = array_back(&self->cursor.stack)->subtree; - 277 | uint32_t child_index = entry.child_index + 1; - 278 | Subtree last_external_token = ts_subtree_last_external_token(*entry.subtree); - 279 | if (last_external_token.ptr) { - 280 | self->prev_external_token = last_external_token; - 281 | } - 282 | if (ts_subtree_child_count(*parent) > child_index) { - 283 | Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - 284 | uint32_t structural_child_index = entry.structural_child_index; - 285 | if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; - 286 | const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; - | - 287 | array_push(&self->cursor.stack, ((TreeCursorEntry) { - 288 | .subtree = next_child, - 289 | .position = position, - 290 | .child_index = child_index, - 291 | .structural_child_index = structural_child_index, - 292 | })); - | - 293 | if (iterator_tree_is_visible(self)) { - 294 | if (ts_subtree_padding(*next_child).bytes > 0) { - 295 | self->in_padding = true; - 296 | } else { - 297 | self->visible_depth++; - 298 | } - 299 | } else { - 300 | iterator_descend(self, 0); - 301 | } - 302 | break; - 303 | } - 304 | } - 305 | } - | - 306 | typedef enum { - 307 | IteratorDiffers, - 308 | IteratorMayDiffer, - 309 | IteratorMatches, - 310 | } IteratorComparison; - | - 311 | static IteratorComparison iterator_compare( - 312 | const Iterator *old_iter, - 313 | const Iterator *new_iter - 314 | ) { - 315 | Subtree old_tree = NULL_SUBTREE; - 316 | Subtree new_tree = NULL_SUBTREE; - 317 | uint32_t old_start = 0; - 318 | uint32_t new_start = 0; - 319 | TSSymbol old_alias_symbol = 0; - 320 | TSSymbol new_alias_symbol = 0; - 321 | iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); - 322 | iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); - 323 | TSSymbol old_symbol = ts_subtree_symbol(old_tree); - 324 | TSSymbol new_symbol = ts_subtree_symbol(new_tree); - | - 325 | if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; - 326 | if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; - 327 | if (old_alias_symbol != new_alias_symbol || old_symbol != new_symbol) return IteratorDiffers; - | - 328 | uint32_t old_size = ts_subtree_size(old_tree).bytes; - 329 | uint32_t new_size = ts_subtree_size(new_tree).bytes; - 330 | TSStateId old_state = ts_subtree_parse_state(old_tree); - 331 | TSStateId new_state = ts_subtree_parse_state(new_tree); - 332 | bool old_has_external_tokens = ts_subtree_has_external_tokens(old_tree); - 333 | bool new_has_external_tokens = ts_subtree_has_external_tokens(new_tree); - 334 | uint32_t old_error_cost = ts_subtree_error_cost(old_tree); - 335 | uint32_t new_error_cost = ts_subtree_error_cost(new_tree); - | - 336 | if ( - 337 | old_start != new_start || - 338 | old_symbol == ts_builtin_sym_error || - 339 | old_size != new_size || - 340 | old_state == TS_TREE_STATE_NONE || - 341 | new_state == TS_TREE_STATE_NONE || - 342 | ((old_state == ERROR_STATE) != (new_state == ERROR_STATE)) || - 343 | old_error_cost != new_error_cost || - 344 | old_has_external_tokens != new_has_external_tokens || - 345 | ts_subtree_has_changes(old_tree) || - 346 | ( - 347 | old_has_external_tokens && - 348 | !ts_subtree_external_scanner_state_eq(old_iter->prev_external_token, new_iter->prev_external_token) - 349 | ) - 350 | ) { - 351 | return IteratorMayDiffer; - 352 | } - | - 353 | return IteratorMatches; - 354 | } - | - 355 | #ifdef DEBUG_GET_CHANGED_RANGES - 356 | static inline void iterator_print_state(Iterator *self) { - 357 | TreeCursorEntry entry = *array_back(&self->cursor.stack); - 358 | TSPoint start = iterator_start_position(self).extent; - 359 | TSPoint end = iterator_end_position(self).extent; - 360 | const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); - 361 | printf( - 362 | "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", - 363 | name, self->in_padding ? "(p)" : " ", - 364 | self->visible_depth, - 365 | start.row, start.column, - 366 | end.row, end.column - 367 | ); - 368 | } - 369 | #endif - | - 370 | unsigned ts_subtree_get_changed_ranges( - 371 | const Subtree *old_tree, const Subtree *new_tree, - 372 | TreeCursor *cursor1, TreeCursor *cursor2, - 373 | const TSLanguage *language, - 374 | const TSRangeArray *included_range_differences, - 375 | TSRange **ranges - 376 | ) { - 377 | TSRangeArray results = array_new(); - | - 378 | Iterator old_iter = iterator_new(cursor1, old_tree, language); - 379 | Iterator new_iter = iterator_new(cursor2, new_tree, language); - | - 380 | unsigned included_range_difference_index = 0; - | - 381 | Length position = iterator_start_position(&old_iter); - 382 | Length next_position = iterator_start_position(&new_iter); - 383 | if (position.bytes < next_position.bytes) { - 384 | ts_range_array_add(&results, position, next_position); - 385 | position = next_position; - 386 | } else if (position.bytes > next_position.bytes) { - 387 | ts_range_array_add(&results, next_position, position); - 388 | next_position = position; - 389 | } - | - 390 | do { - 391 | #ifdef DEBUG_GET_CHANGED_RANGES - 392 | printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column); - 393 | iterator_print_state(&old_iter); - 394 | printf("\tvs\t"); - 395 | iterator_print_state(&new_iter); - 396 | puts(""); - 397 | #endif - | - 398 | // Compare the old and new subtrees. - 399 | IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); - | - 400 | // Even if the two subtrees appear to be identical, they could differ - 401 | // internally if they contain a range of text that was previously - 402 | // excluded from the parse, and is now included, or vice-versa. - 403 | if (comparison == IteratorMatches && ts_range_array_intersects( - 404 | included_range_differences, - 405 | included_range_difference_index, - 406 | position.bytes, - 407 | iterator_end_position(&old_iter).bytes - 408 | )) { - 409 | comparison = IteratorMayDiffer; - 410 | } - | - 411 | bool is_changed = false; - 412 | switch (comparison) { - 413 | // If the subtrees are definitely identical, move to the end - 414 | // of both subtrees. - 415 | case IteratorMatches: - 416 | next_position = iterator_end_position(&old_iter); - 417 | break; - | - 418 | // If the subtrees might differ internally, descend into both - 419 | // subtrees, finding the first child that spans the current position. - 420 | case IteratorMayDiffer: - 421 | if (iterator_descend(&old_iter, position.bytes)) { - 422 | if (!iterator_descend(&new_iter, position.bytes)) { - 423 | is_changed = true; - 424 | next_position = iterator_end_position(&old_iter); - 425 | } - 426 | } else if (iterator_descend(&new_iter, position.bytes)) { - 427 | is_changed = true; - 428 | next_position = iterator_end_position(&new_iter); - 429 | } else { - 430 | next_position = length_min( - 431 | iterator_end_position(&old_iter), - 432 | iterator_end_position(&new_iter) - 433 | ); - 434 | } - 435 | break; - | - 436 | // If the subtrees are different, record a change and then move - 437 | // to the end of both subtrees. - 438 | case IteratorDiffers: - 439 | is_changed = true; - 440 | next_position = length_min( - 441 | iterator_end_position(&old_iter), - 442 | iterator_end_position(&new_iter) - 443 | ); - 444 | break; - 445 | } - | - 446 | // Ensure that both iterators are caught up to the current position. - 447 | while ( - 448 | !iterator_done(&old_iter) && - 449 | iterator_end_position(&old_iter).bytes <= next_position.bytes - 450 | ) iterator_advance(&old_iter); - 451 | while ( - 452 | !iterator_done(&new_iter) && - 453 | iterator_end_position(&new_iter).bytes <= next_position.bytes - 454 | ) iterator_advance(&new_iter); - | - 455 | // Ensure that both iterators are at the same depth in the tree. - 456 | while (old_iter.visible_depth > new_iter.visible_depth) { - 457 | iterator_ascend(&old_iter); - 458 | } - 459 | while (new_iter.visible_depth > old_iter.visible_depth) { - 460 | iterator_ascend(&new_iter); - 461 | } - | - 462 | if (is_changed) { - 463 | #ifdef DEBUG_GET_CHANGED_RANGES - 464 | printf( - 465 | " change: [[%u, %u] - [%u, %u]]\n", - 466 | position.extent.row + 1, position.extent.column, - 467 | next_position.extent.row + 1, next_position.extent.column - 468 | ); - 469 | #endif - | - 470 | ts_range_array_add(&results, position, next_position); - 471 | } - | - 472 | position = next_position; - | - 473 | // Keep track of the current position in the included range differences - 474 | // array in order to avoid scanning the entire array on each iteration. - 475 | while (included_range_difference_index < included_range_differences->size) { - 476 | const TSRange *range = array_get(included_range_differences, - 477 | included_range_difference_index - 478 | ); - 479 | if (range->end_byte <= position.bytes) { - 480 | included_range_difference_index++; - 481 | } else { - 482 | break; - 483 | } - 484 | } - 485 | } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); - | - 486 | Length old_size = ts_subtree_total_size(*old_tree); - 487 | Length new_size = ts_subtree_total_size(*new_tree); - 488 | if (old_size.bytes < new_size.bytes) { - 489 | ts_range_array_add(&results, old_size, new_size); - 490 | } else if (new_size.bytes < old_size.bytes) { - 491 | ts_range_array_add(&results, new_size, old_size); - 492 | } - | - 493 | *cursor1 = old_iter.cursor; - 494 | *cursor2 = new_iter.cursor; - 495 | *ranges = results.contents; - 496 | return results.size; - 497 | } - - - --------------------------------------------------------------------------------- -/lib/src/get_changed_ranges.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_GET_CHANGED_RANGES_H_ - 2 | #define TREE_SITTER_GET_CHANGED_RANGES_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./tree_cursor.h" - 7 | #include "./subtree.h" - | - 8 | typedef Array(TSRange) TSRangeArray; - | - 9 | void ts_range_array_get_changed_ranges( - 10 | const TSRange *old_ranges, unsigned old_range_count, - 11 | const TSRange *new_ranges, unsigned new_range_count, - 12 | TSRangeArray *differences - 13 | ); - | - 14 | bool ts_range_array_intersects( - 15 | const TSRangeArray *self, unsigned start_index, - 16 | uint32_t start_byte, uint32_t end_byte - 17 | ); - | - 18 | unsigned ts_subtree_get_changed_ranges( - 19 | const Subtree *old_tree, const Subtree *new_tree, - 20 | TreeCursor *cursor1, TreeCursor *cursor2, - 21 | const TSLanguage *language, - 22 | const TSRangeArray *included_range_differences, - 23 | TSRange **ranges - 24 | ); - | - 25 | #ifdef __cplusplus - 26 | } - 27 | #endif - | - 28 | #endif // TREE_SITTER_GET_CHANGED_RANGES_H_ - - - --------------------------------------------------------------------------------- -/lib/src/host.h: --------------------------------------------------------------------------------- - | - 1 | // Determine endian and pointer size based on known defines. - 2 | // TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments - 3 | // to override this. - | - 4 | #if !defined(TS_BIG_ENDIAN) - 5 | #if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ - 6 | || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) - 7 | #define TS_BIG_ENDIAN 1 - 8 | #else - 9 | #define TS_BIG_ENDIAN 0 - 10 | #endif - 11 | #endif - | - 12 | #if !defined(TS_PTR_SIZE) - 13 | #if UINTPTR_MAX == 0xFFFFFFFF - 14 | #define TS_PTR_SIZE 32 - 15 | #else - 16 | #define TS_PTR_SIZE 64 - 17 | #endif - 18 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/language.c: --------------------------------------------------------------------------------- - 1 | #include "./language.h" - 2 | #include "./wasm_store.h" - 3 | #include "tree_sitter/api.h" - 4 | #include - | - 5 | const TSLanguage *ts_language_copy(const TSLanguage *self) { - 6 | if (self && ts_language_is_wasm(self)) { - 7 | ts_wasm_language_retain(self); - 8 | } - 9 | return self; - 10 | } - | - 11 | void ts_language_delete(const TSLanguage *self) { - 12 | if (self && ts_language_is_wasm(self)) { - 13 | ts_wasm_language_release(self); - 14 | } - 15 | } - | - 16 | uint32_t ts_language_symbol_count(const TSLanguage *self) { - 17 | return self->symbol_count + self->alias_count; - 18 | } - | - 19 | uint32_t ts_language_state_count(const TSLanguage *self) { - 20 | return self->state_count; - 21 | } - | - 22 | const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) { - 23 | if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { - 24 | *length = self->supertype_count; - 25 | return self->supertype_symbols; - 26 | } else { - 27 | *length = 0; - 28 | return NULL; - 29 | } - 30 | } - | - 31 | const TSSymbol *ts_language_subtypes( - 32 | const TSLanguage *self, - 33 | TSSymbol supertype, - 34 | uint32_t *length - 35 | ) { - 36 | if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) { - 37 | *length = 0; - 38 | return NULL; - 39 | } - | - 40 | TSMapSlice slice = self->supertype_map_slices[supertype]; - 41 | *length = slice.length; - 42 | return &self->supertype_map_entries[slice.index]; - 43 | } - | - 44 | uint32_t ts_language_abi_version(const TSLanguage *self) { - 45 | return self->abi_version; - 46 | } - | - 47 | const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) { - 48 | return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL; - 49 | } - | - 50 | const char *ts_language_name(const TSLanguage *self) { - 51 | return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL; - 52 | } - | - 53 | uint32_t ts_language_field_count(const TSLanguage *self) { - 54 | return self->field_count; - 55 | } - | - 56 | void ts_language_table_entry( - 57 | const TSLanguage *self, - 58 | TSStateId state, - 59 | TSSymbol symbol, - 60 | TableEntry *result - 61 | ) { - 62 | if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - 63 | result->action_count = 0; - 64 | result->is_reusable = false; - 65 | result->actions = NULL; - 66 | } else { - 67 | ts_assert(symbol < self->token_count); - 68 | uint32_t action_index = ts_language_lookup(self, state, symbol); - 69 | const TSParseActionEntry *entry = &self->parse_actions[action_index]; - 70 | result->action_count = entry->entry.count; - 71 | result->is_reusable = entry->entry.reusable; - 72 | result->actions = (const TSParseAction *)(entry + 1); - 73 | } - 74 | } - | - 75 | TSLexerMode ts_language_lex_mode_for_state( - 76 | const TSLanguage *self, - 77 | TSStateId state - 78 | ) { - 79 | if (self->abi_version < 15) { - 80 | TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state]; - 81 | return (TSLexerMode) { - 82 | .lex_state = mode.lex_state, - 83 | .external_lex_state = mode.external_lex_state, - 84 | .reserved_word_set_id = 0, - 85 | }; - 86 | } else { - 87 | return self->lex_modes[state]; - 88 | } - 89 | } - | - 90 | bool ts_language_is_reserved_word( - 91 | const TSLanguage *self, - 92 | TSStateId state, - 93 | TSSymbol symbol - 94 | ) { - 95 | TSLexerMode lex_mode = ts_language_lex_mode_for_state(self, state); - 96 | if (lex_mode.reserved_word_set_id > 0) { - 97 | unsigned start = lex_mode.reserved_word_set_id * self->max_reserved_word_set_size; - 98 | unsigned end = start + self->max_reserved_word_set_size; - 99 | for (unsigned i = start; i < end; i++) { - 100 | if (self->reserved_words[i] == symbol) return true; - 101 | if (self->reserved_words[i] == 0) break; - 102 | } - 103 | } - 104 | return false; - 105 | } - | - 106 | TSSymbolMetadata ts_language_symbol_metadata( - 107 | const TSLanguage *self, - 108 | TSSymbol symbol - 109 | ) { - 110 | if (symbol == ts_builtin_sym_error) { - 111 | return (TSSymbolMetadata) {.visible = true, .named = true}; - 112 | } else if (symbol == ts_builtin_sym_error_repeat) { - 113 | return (TSSymbolMetadata) {.visible = false, .named = false}; - 114 | } else { - 115 | return self->symbol_metadata[symbol]; - 116 | } - 117 | } - | - 118 | TSSymbol ts_language_public_symbol( - 119 | const TSLanguage *self, - 120 | TSSymbol symbol - 121 | ) { - 122 | if (symbol == ts_builtin_sym_error) return symbol; - 123 | return self->public_symbol_map[symbol]; - 124 | } - | - 125 | TSStateId ts_language_next_state( - 126 | const TSLanguage *self, - 127 | TSStateId state, - 128 | TSSymbol symbol - 129 | ) { - 130 | if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - 131 | return 0; - 132 | } else if (symbol < self->token_count) { - 133 | uint32_t count; - 134 | const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); - 135 | if (count > 0) { - 136 | TSParseAction action = actions[count - 1]; - 137 | if (action.type == TSParseActionTypeShift) { - 138 | return action.shift.extra ? state : action.shift.state; - 139 | } - 140 | } - 141 | return 0; - 142 | } else { - 143 | return ts_language_lookup(self, state, symbol); - 144 | } - 145 | } - | - 146 | const char *ts_language_symbol_name( - 147 | const TSLanguage *self, - 148 | TSSymbol symbol - 149 | ) { - 150 | if (symbol == ts_builtin_sym_error) { - 151 | return "ERROR"; - 152 | } else if (symbol == ts_builtin_sym_error_repeat) { - 153 | return "_ERROR"; - 154 | } else if (symbol < ts_language_symbol_count(self)) { - 155 | return self->symbol_names[symbol]; - 156 | } else { - 157 | return NULL; - 158 | } - 159 | } - | - 160 | TSSymbol ts_language_symbol_for_name( - 161 | const TSLanguage *self, - 162 | const char *string, - 163 | uint32_t length, - 164 | bool is_named - 165 | ) { - 166 | if (is_named && !strncmp(string, "ERROR", length)) return ts_builtin_sym_error; - 167 | uint16_t count = (uint16_t)ts_language_symbol_count(self); - 168 | for (TSSymbol i = 0; i < count; i++) { - 169 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); - 170 | if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; - 171 | const char *symbol_name = self->symbol_names[i]; - 172 | if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { - 173 | return self->public_symbol_map[i]; - 174 | } - 175 | } - 176 | return 0; - 177 | } - | - 178 | TSSymbolType ts_language_symbol_type( - 179 | const TSLanguage *self, - 180 | TSSymbol symbol - 181 | ) { - 182 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); - 183 | if (metadata.named && metadata.visible) { - 184 | return TSSymbolTypeRegular; - 185 | } else if (metadata.visible) { - 186 | return TSSymbolTypeAnonymous; - 187 | } else if (metadata.supertype) { - 188 | return TSSymbolTypeSupertype; - 189 | } else { - 190 | return TSSymbolTypeAuxiliary; - 191 | } - 192 | } - | - 193 | const char *ts_language_field_name_for_id( - 194 | const TSLanguage *self, - 195 | TSFieldId id - 196 | ) { - 197 | uint32_t count = ts_language_field_count(self); - 198 | if (count && id <= count) { - 199 | return self->field_names[id]; - 200 | } else { - 201 | return NULL; - 202 | } - 203 | } - | - 204 | TSFieldId ts_language_field_id_for_name( - 205 | const TSLanguage *self, - 206 | const char *name, - 207 | uint32_t name_length - 208 | ) { - 209 | uint16_t count = (uint16_t)ts_language_field_count(self); - 210 | for (TSSymbol i = 1; i < count + 1; i++) { - 211 | switch (strncmp(name, self->field_names[i], name_length)) { - 212 | case 0: - 213 | if (self->field_names[i][name_length] == 0) return i; - 214 | break; - 215 | case -1: - 216 | return 0; - 217 | default: - 218 | break; - 219 | } - 220 | } - 221 | return 0; - 222 | } - | - 223 | TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { - 224 | if (state >= self->state_count) return NULL; - 225 | LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); - 226 | *iterator = ts_language_lookaheads(self, state); - 227 | return (TSLookaheadIterator *)iterator; - 228 | } - | - 229 | void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { - 230 | ts_free(self); - 231 | } - | - 232 | bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { - 233 | LookaheadIterator *iterator = (LookaheadIterator *)self; - 234 | if (state >= iterator->language->state_count) return false; - 235 | *iterator = ts_language_lookaheads(iterator->language, state); - 236 | return true; - 237 | } - | - 238 | const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { - 239 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; - 240 | return iterator->language; - 241 | } - | - 242 | bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { - 243 | if (state >= language->state_count) return false; - 244 | LookaheadIterator *iterator = (LookaheadIterator *)self; - 245 | *iterator = ts_language_lookaheads(language, state); - 246 | return true; - 247 | } - | - 248 | bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { - 249 | LookaheadIterator *iterator = (LookaheadIterator *)self; - 250 | return ts_lookahead_iterator__next(iterator); - 251 | } - | - 252 | TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { - 253 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; - 254 | return iterator->symbol; - 255 | } - | - 256 | const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { - 257 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; - 258 | return ts_language_symbol_name(iterator->language, iterator->symbol); - 259 | } - - - --------------------------------------------------------------------------------- -/lib/src/language.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_LANGUAGE_H_ - 2 | #define TREE_SITTER_LANGUAGE_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./subtree.h" - 7 | #include "./parser.h" - | - 8 | #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) - | - 9 | #define LANGUAGE_VERSION_WITH_RESERVED_WORDS 15 - 10 | #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 - | - 11 | typedef struct { - 12 | const TSParseAction *actions; - 13 | uint32_t action_count; - 14 | bool is_reusable; - 15 | } TableEntry; - | - 16 | typedef struct { - 17 | const TSLanguage *language; - 18 | const uint16_t *data; - 19 | const uint16_t *group_end; - 20 | TSStateId state; - 21 | uint16_t table_value; - 22 | uint16_t section_index; - 23 | uint16_t group_count; - 24 | bool is_small_state; - | - 25 | const TSParseAction *actions; - 26 | TSSymbol symbol; - 27 | TSStateId next_state; - 28 | uint16_t action_count; - 29 | } LookaheadIterator; - | - 30 | void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result); - 31 | TSLexerMode ts_language_lex_mode_for_state(const TSLanguage *self, TSStateId state); - 32 | bool ts_language_is_reserved_word(const TSLanguage *self, TSStateId state, TSSymbol symbol); - 33 | TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol); - 34 | TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol); - | - 35 | static inline const TSParseAction *ts_language_actions( - 36 | const TSLanguage *self, - 37 | TSStateId state, - 38 | TSSymbol symbol, - 39 | uint32_t *count - 40 | ) { - 41 | TableEntry entry; - 42 | ts_language_table_entry(self, state, symbol, &entry); - 43 | *count = entry.action_count; - 44 | return entry.actions; - 45 | } - | - 46 | static inline bool ts_language_has_reduce_action( - 47 | const TSLanguage *self, - 48 | TSStateId state, - 49 | TSSymbol symbol - 50 | ) { - 51 | TableEntry entry; - 52 | ts_language_table_entry(self, state, symbol, &entry); - 53 | return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; - 54 | } - | - 55 | // Lookup the table value for a given symbol and state. - 56 | // - 57 | // For non-terminal symbols, the table value represents a successor state. - 58 | // For terminal symbols, it represents an index in the actions table. - 59 | // For 'large' parse states, this is a direct lookup. For 'small' parse - 60 | // states, this requires searching through the symbol groups to find - 61 | // the given symbol. - 62 | static inline uint16_t ts_language_lookup( - 63 | const TSLanguage *self, - 64 | TSStateId state, - 65 | TSSymbol symbol - 66 | ) { - 67 | if (state >= self->large_state_count) { - 68 | uint32_t index = self->small_parse_table_map[state - self->large_state_count]; - 69 | const uint16_t *data = &self->small_parse_table[index]; - 70 | uint16_t group_count = *(data++); - 71 | for (unsigned i = 0; i < group_count; i++) { - 72 | uint16_t section_value = *(data++); - 73 | uint16_t symbol_count = *(data++); - 74 | for (unsigned j = 0; j < symbol_count; j++) { - 75 | if (*(data++) == symbol) return section_value; - 76 | } - 77 | } - 78 | return 0; - 79 | } else { - 80 | return self->parse_table[state * self->symbol_count + symbol]; - 81 | } - 82 | } - | - 83 | static inline bool ts_language_has_actions( - 84 | const TSLanguage *self, - 85 | TSStateId state, - 86 | TSSymbol symbol - 87 | ) { - 88 | return ts_language_lookup(self, state, symbol) != 0; - 89 | } - | - 90 | // Iterate over all of the symbols that are valid in the given state. - 91 | // - 92 | // For 'large' parse states, this just requires iterating through - 93 | // all possible symbols and checking the parse table for each one. - 94 | // For 'small' parse states, this exploits the structure of the - 95 | // table to only visit the valid symbols. - 96 | static inline LookaheadIterator ts_language_lookaheads( - 97 | const TSLanguage *self, - 98 | TSStateId state - 99 | ) { - 100 | bool is_small_state = state >= self->large_state_count; - 101 | const uint16_t *data; - 102 | const uint16_t *group_end = NULL; - 103 | uint16_t group_count = 0; - 104 | if (is_small_state) { - 105 | uint32_t index = self->small_parse_table_map[state - self->large_state_count]; - 106 | data = &self->small_parse_table[index]; - 107 | group_end = data + 1; - 108 | group_count = *data; - 109 | } else { - 110 | data = &self->parse_table[state * self->symbol_count] - 1; - 111 | } - 112 | return (LookaheadIterator) { - 113 | .language = self, - 114 | .data = data, - 115 | .group_end = group_end, - 116 | .group_count = group_count, - 117 | .is_small_state = is_small_state, - 118 | .symbol = UINT16_MAX, - 119 | .next_state = 0, - 120 | }; - 121 | } - | - 122 | static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { - 123 | // For small parse states, valid symbols are listed explicitly, - 124 | // grouped by their value. There's no need to look up the actions - 125 | // again until moving to the next group. - 126 | if (self->is_small_state) { - 127 | self->data++; - 128 | if (self->data == self->group_end) { - 129 | if (self->group_count == 0) return false; - 130 | self->group_count--; - 131 | self->table_value = *(self->data++); - 132 | unsigned symbol_count = *(self->data++); - 133 | self->group_end = self->data + symbol_count; - 134 | self->symbol = *self->data; - 135 | } else { - 136 | self->symbol = *self->data; - 137 | return true; - 138 | } - 139 | } - | - 140 | // For large parse states, iterate through every symbol until one - 141 | // is found that has valid actions. - 142 | else { - 143 | do { - 144 | self->data++; - 145 | self->symbol++; - 146 | if (self->symbol >= self->language->symbol_count) return false; - 147 | self->table_value = *self->data; - 148 | } while (!self->table_value); - 149 | } - | - 150 | // Depending on if the symbols is terminal or non-terminal, the table value either - 151 | // represents a list of actions or a successor state. - 152 | if (self->symbol < self->language->token_count) { - 153 | const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value]; - 154 | self->action_count = entry->entry.count; - 155 | self->actions = (const TSParseAction *)(entry + 1); - 156 | self->next_state = 0; - 157 | } else { - 158 | self->action_count = 0; - 159 | self->next_state = self->table_value; - 160 | } - 161 | return true; - 162 | } - | - 163 | // Whether the state is a "primary state". If this returns false, it indicates that there exists - 164 | // another state that behaves identically to this one with respect to query analysis. - 165 | static inline bool ts_language_state_is_primary( - 166 | const TSLanguage *self, - 167 | TSStateId state - 168 | ) { - 169 | if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { - 170 | return state == self->primary_state_ids[state]; - 171 | } else { - 172 | return true; - 173 | } - 174 | } - | - 175 | static inline const bool *ts_language_enabled_external_tokens( - 176 | const TSLanguage *self, - 177 | unsigned external_scanner_state - 178 | ) { - 179 | if (external_scanner_state == 0) { - 180 | return NULL; - 181 | } else { - 182 | return self->external_scanner.states + self->external_token_count * external_scanner_state; - 183 | } - 184 | } - | - 185 | static inline const TSSymbol *ts_language_alias_sequence( - 186 | const TSLanguage *self, - 187 | uint32_t production_id - 188 | ) { - 189 | return production_id ? - 190 | &self->alias_sequences[production_id * self->max_alias_sequence_length] : - 191 | NULL; - 192 | } - | - 193 | static inline TSSymbol ts_language_alias_at( - 194 | const TSLanguage *self, - 195 | uint32_t production_id, - 196 | uint32_t child_index - 197 | ) { - 198 | return production_id ? - 199 | self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] : - 200 | 0; - 201 | } - | - 202 | static inline void ts_language_field_map( - 203 | const TSLanguage *self, - 204 | uint32_t production_id, - 205 | const TSFieldMapEntry **start, - 206 | const TSFieldMapEntry **end - 207 | ) { - 208 | if (self->field_count == 0) { - 209 | *start = NULL; - 210 | *end = NULL; - 211 | return; - 212 | } - | - 213 | TSMapSlice slice = self->field_map_slices[production_id]; - 214 | *start = &self->field_map_entries[slice.index]; - 215 | *end = &self->field_map_entries[slice.index] + slice.length; - 216 | } - | - 217 | static inline void ts_language_aliases_for_symbol( - 218 | const TSLanguage *self, - 219 | TSSymbol original_symbol, - 220 | const TSSymbol **start, - 221 | const TSSymbol **end - 222 | ) { - 223 | *start = &self->public_symbol_map[original_symbol]; - 224 | *end = *start + 1; - | - 225 | unsigned idx = 0; - 226 | for (;;) { - 227 | TSSymbol symbol = self->alias_map[idx++]; - 228 | if (symbol == 0 || symbol > original_symbol) break; - 229 | uint16_t count = self->alias_map[idx++]; - 230 | if (symbol == original_symbol) { - 231 | *start = &self->alias_map[idx]; - 232 | *end = &self->alias_map[idx + count]; - 233 | break; - 234 | } - 235 | idx += count; - 236 | } - 237 | } - | - 238 | static inline void ts_language_write_symbol_as_dot_string( - 239 | const TSLanguage *self, - 240 | FILE *f, - 241 | TSSymbol symbol - 242 | ) { - 243 | const char *name = ts_language_symbol_name(self, symbol); - 244 | for (const char *chr = name; *chr; chr++) { - 245 | switch (*chr) { - 246 | case '"': - 247 | case '\\': - 248 | fputc('\\', f); - 249 | fputc(*chr, f); - 250 | break; - 251 | case '\n': - 252 | fputs("\\n", f); - 253 | break; - 254 | case '\t': - 255 | fputs("\\t", f); - 256 | break; - 257 | default: - 258 | fputc(*chr, f); - 259 | break; - 260 | } - 261 | } - 262 | } - | - 263 | #ifdef __cplusplus - 264 | } - 265 | #endif - | - 266 | #endif // TREE_SITTER_LANGUAGE_H_ - - - --------------------------------------------------------------------------------- -/lib/src/length.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_LENGTH_H_ - 2 | #define TREE_SITTER_LENGTH_H_ - | - 3 | #include - 4 | #include - 5 | #include "./point.h" - 6 | #include "tree_sitter/api.h" - | - 7 | typedef struct { - 8 | uint32_t bytes; - 9 | TSPoint extent; - 10 | } Length; - | - 11 | static const Length LENGTH_UNDEFINED = {0, {0, 1}}; - 12 | static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; - | - 13 | static inline bool length_is_undefined(Length length) { - 14 | return length.bytes == 0 && length.extent.column != 0; - 15 | } - | - 16 | static inline Length length_min(Length len1, Length len2) { - 17 | return (len1.bytes < len2.bytes) ? len1 : len2; - 18 | } - | - 19 | static inline Length length_add(Length len1, Length len2) { - 20 | Length result; - 21 | result.bytes = len1.bytes + len2.bytes; - 22 | result.extent = point_add(len1.extent, len2.extent); - 23 | return result; - 24 | } - | - 25 | static inline Length length_sub(Length len1, Length len2) { - 26 | Length result; - 27 | result.bytes = (len1.bytes >= len2.bytes) ? len1.bytes - len2.bytes : 0; - 28 | result.extent = point_sub(len1.extent, len2.extent); - 29 | return result; - 30 | } - | - 31 | static inline Length length_zero(void) { - 32 | Length result = {0, {0, 0}}; - 33 | return result; - 34 | } - | - 35 | static inline Length length_saturating_sub(Length len1, Length len2) { - 36 | if (len1.bytes > len2.bytes) { - 37 | return length_sub(len1, len2); - 38 | } else { - 39 | return length_zero(); - 40 | } - 41 | } - | - 42 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/lexer.c: --------------------------------------------------------------------------------- - 1 | #include "./length.h" - 2 | #include "./lexer.h" - 3 | #include "./unicode.h" - | - 4 | #include "tree_sitter/api.h" - | - 5 | #include - 6 | #include - | - 7 | #define LOG(message, character) \ - 8 | if (self->logger.log) { \ - 9 | snprintf( \ - 10 | self->debug_buffer, \ - 11 | TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ - 12 | 32 <= character && character < 127 ? \ - 13 | message " character:'%c'" : \ - 14 | message " character:%d", \ - 15 | character \ - 16 | ); \ - 17 | self->logger.log( \ - 18 | self->logger.payload, \ - 19 | TSLogTypeLex, \ - 20 | self->debug_buffer \ - 21 | ); \ - 22 | } - | - 23 | static const int32_t BYTE_ORDER_MARK = 0xFEFF; - | - 24 | static const TSRange DEFAULT_RANGE = { - 25 | .start_point = { - 26 | .row = 0, - 27 | .column = 0, - 28 | }, - 29 | .end_point = { - 30 | .row = UINT32_MAX, - 31 | .column = UINT32_MAX, - 32 | }, - 33 | .start_byte = 0, - 34 | .end_byte = UINT32_MAX - 35 | }; - | - 36 | /** - 37 | * Sets the column data to the given value and marks it valid. - 38 | * @param self The lexer state. - 39 | * @param val The new value of the column data. - 40 | */ - 41 | static void ts_lexer__set_column_data(Lexer *self, uint32_t val) { - 42 | self->column_data.valid = true; - 43 | self->column_data.value = val; - 44 | } - | - 45 | /** - 46 | * Increments the value of the column data; no-op if invalid. - 47 | * @param self The lexer state. - 48 | */ - 49 | static void ts_lexer__increment_column_data(Lexer *self) { - 50 | if (self->column_data.valid) { - 51 | self->column_data.value++; - 52 | } - 53 | } - | - 54 | /** - 55 | * Marks the column data as invalid. - 56 | * @param self The lexer state. - 57 | */ - 58 | static void ts_lexer__invalidate_column_data(Lexer *self) { - 59 | self->column_data.valid = false; - 60 | self->column_data.value = 0; - 61 | } - | - 62 | // Check if the lexer has reached EOF. This state is stored - 63 | // by setting the lexer's `current_included_range_index` such that - 64 | // it has consumed all of its available ranges. - 65 | static bool ts_lexer__eof(const TSLexer *_self) { - 66 | Lexer *self = (Lexer *)_self; - 67 | return self->current_included_range_index == self->included_range_count; - 68 | } - | - 69 | // Clear the currently stored chunk of source code, because the lexer's - 70 | // position has changed. - 71 | static void ts_lexer__clear_chunk(Lexer *self) { - 72 | self->chunk = NULL; - 73 | self->chunk_size = 0; - 74 | self->chunk_start = 0; - 75 | } - | - 76 | // Call the lexer's input callback to obtain a new chunk of source code - 77 | // for the current position. - 78 | static void ts_lexer__get_chunk(Lexer *self) { - 79 | self->chunk_start = self->current_position.bytes; - 80 | self->chunk = self->input.read( - 81 | self->input.payload, - 82 | self->current_position.bytes, - 83 | self->current_position.extent, - 84 | &self->chunk_size - 85 | ); - 86 | if (!self->chunk_size) { - 87 | self->current_included_range_index = self->included_range_count; - 88 | self->chunk = NULL; - 89 | } - 90 | } - | - 91 | // Decode the next unicode character in the current chunk of source code. - 92 | // This assumes that the lexer has already retrieved a chunk of source - 93 | // code that spans the current position. - 94 | static void ts_lexer__get_lookahead(Lexer *self) { - 95 | uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; - 96 | uint32_t size = self->chunk_size - position_in_chunk; - | - 97 | if (size == 0) { - 98 | self->lookahead_size = 1; - 99 | self->data.lookahead = '\0'; - 100 | return; - 101 | } - | - 102 | const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; - 103 | TSDecodeFunction decode = - 104 | self->input.encoding == TSInputEncodingUTF8 ? ts_decode_utf8 : - 105 | self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le : - 106 | self->input.encoding == TSInputEncodingUTF16BE ? ts_decode_utf16_be : self->input.decode; - | - 107 | self->lookahead_size = decode(chunk, size, &self->data.lookahead); - | - 108 | // If this chunk ended in the middle of a multi-byte character, - 109 | // try again with a fresh chunk. - 110 | if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { - 111 | ts_lexer__get_chunk(self); - 112 | chunk = (const uint8_t *)self->chunk; - 113 | size = self->chunk_size; - 114 | self->lookahead_size = decode(chunk, size, &self->data.lookahead); - 115 | } - | - 116 | if (self->data.lookahead == TS_DECODE_ERROR) { - 117 | self->lookahead_size = 1; - 118 | } - 119 | } - | - 120 | static void ts_lexer_goto(Lexer *self, Length position) { - 121 | if (position.bytes != self->current_position.bytes) { - 122 | ts_lexer__invalidate_column_data(self); - 123 | } - | - 124 | self->current_position = position; - | - 125 | // Move to the first valid position at or after the given position. - 126 | bool found_included_range = false; - 127 | for (unsigned i = 0; i < self->included_range_count; i++) { - 128 | TSRange *included_range = &self->included_ranges[i]; - 129 | if ( - 130 | included_range->end_byte > self->current_position.bytes && - 131 | included_range->end_byte > included_range->start_byte - 132 | ) { - 133 | if (included_range->start_byte >= self->current_position.bytes) { - 134 | self->current_position = (Length) { - 135 | .bytes = included_range->start_byte, - 136 | .extent = included_range->start_point, - 137 | }; - 138 | } - | - 139 | self->current_included_range_index = i; - 140 | found_included_range = true; - 141 | break; - 142 | } - 143 | } - | - 144 | if (found_included_range) { - 145 | // If the current position is outside of the current chunk of text, - 146 | // then clear out the current chunk of text. - 147 | if (self->chunk && ( - 148 | self->current_position.bytes < self->chunk_start || - 149 | self->current_position.bytes >= self->chunk_start + self->chunk_size - 150 | )) { - 151 | ts_lexer__clear_chunk(self); - 152 | } - | - 153 | self->lookahead_size = 0; - 154 | self->data.lookahead = '\0'; - 155 | } - | - 156 | // If the given position is beyond any of included ranges, move to the EOF - 157 | // state - past the end of the included ranges. - 158 | else { - 159 | self->current_included_range_index = self->included_range_count; - 160 | TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; - 161 | self->current_position = (Length) { - 162 | .bytes = last_included_range->end_byte, - 163 | .extent = last_included_range->end_point, - 164 | }; - 165 | ts_lexer__clear_chunk(self); - 166 | self->lookahead_size = 1; - 167 | self->data.lookahead = '\0'; - 168 | } - 169 | } - | - 170 | /** - 171 | * Actually advances the lexer. Does not log anything. - 172 | * @param self The lexer state. - 173 | * @param skip Whether to mark the consumed codepoint as whitespace. - 174 | */ - 175 | static void ts_lexer__do_advance(Lexer *self, bool skip) { - 176 | if (self->lookahead_size) { - 177 | if (self->data.lookahead == '\n') { - 178 | self->current_position.extent.row++; - 179 | self->current_position.extent.column = 0; - 180 | ts_lexer__set_column_data(self, 0); - 181 | } else { - 182 | bool is_bom = self->current_position.bytes == 0 && - 183 | self->data.lookahead == BYTE_ORDER_MARK; - 184 | if (!is_bom) ts_lexer__increment_column_data(self); - 185 | self->current_position.extent.column += self->lookahead_size; - 186 | } - 187 | self->current_position.bytes += self->lookahead_size; - 188 | } - | - 189 | const TSRange *current_range = &self->included_ranges[self->current_included_range_index]; - 190 | while ( - 191 | self->current_position.bytes >= current_range->end_byte || - 192 | current_range->end_byte == current_range->start_byte - 193 | ) { - 194 | if (self->current_included_range_index < self->included_range_count) { - 195 | self->current_included_range_index++; - 196 | } - 197 | if (self->current_included_range_index < self->included_range_count) { - 198 | current_range++; - 199 | self->current_position = (Length) { - 200 | current_range->start_byte, - 201 | current_range->start_point, - 202 | }; - 203 | } else { - 204 | current_range = NULL; - 205 | break; - 206 | } - 207 | } - | - 208 | if (skip) self->token_start_position = self->current_position; - | - 209 | if (current_range) { - 210 | if ( - 211 | self->current_position.bytes < self->chunk_start || - 212 | self->current_position.bytes >= self->chunk_start + self->chunk_size - 213 | ) { - 214 | ts_lexer__get_chunk(self); - 215 | } - 216 | ts_lexer__get_lookahead(self); - 217 | } else { - 218 | ts_lexer__clear_chunk(self); - 219 | self->data.lookahead = '\0'; - 220 | self->lookahead_size = 1; - 221 | } - 222 | } - | - 223 | // Advance to the next character in the source code, retrieving a new - 224 | // chunk of source code if needed. - 225 | static void ts_lexer__advance(TSLexer *_self, bool skip) { - 226 | Lexer *self = (Lexer *)_self; - 227 | if (!self->chunk) return; - | - 228 | if (skip) { - 229 | LOG("skip", self->data.lookahead) - 230 | } else { - 231 | LOG("consume", self->data.lookahead) - 232 | } - | - 233 | ts_lexer__do_advance(self, skip); - 234 | } - | - 235 | // Mark that a token match has completed. This can be called multiple - 236 | // times if a longer match is found later. - 237 | static void ts_lexer__mark_end(TSLexer *_self) { - 238 | Lexer *self = (Lexer *)_self; - 239 | if (!ts_lexer__eof(&self->data)) { - 240 | // If the lexer is right at the beginning of included range, - 241 | // then the token should be considered to end at the *end* of the - 242 | // previous included range, rather than here. - 243 | TSRange *current_included_range = &self->included_ranges[ - 244 | self->current_included_range_index - 245 | ]; - 246 | if ( - 247 | self->current_included_range_index > 0 && - 248 | self->current_position.bytes == current_included_range->start_byte - 249 | ) { - 250 | TSRange *previous_included_range = current_included_range - 1; - 251 | self->token_end_position = (Length) { - 252 | previous_included_range->end_byte, - 253 | previous_included_range->end_point, - 254 | }; - 255 | return; - 256 | } - 257 | } - 258 | self->token_end_position = self->current_position; - 259 | } - | - 260 | static uint32_t ts_lexer__get_column(TSLexer *_self) { - 261 | Lexer *self = (Lexer *)_self; - | - 262 | self->did_get_column = true; - | - 263 | if (!self->column_data.valid) { - 264 | // Record current position - 265 | uint32_t goal_byte = self->current_position.bytes; - | - 266 | // Back up to the beginning of the line - 267 | Length start_of_col = { - 268 | self->current_position.bytes - self->current_position.extent.column, - 269 | {self->current_position.extent.row, 0}, - 270 | }; - 271 | ts_lexer_goto(self, start_of_col); - 272 | ts_lexer__set_column_data(self, 0); - 273 | ts_lexer__get_chunk(self); - | - 274 | if (!ts_lexer__eof(_self)) { - 275 | ts_lexer__get_lookahead(self); - | - 276 | // Advance to the recorded position - 277 | while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) { - 278 | ts_lexer__do_advance(self, false); - 279 | if (ts_lexer__eof(_self)) break; - 280 | } - 281 | } - 282 | } - | - 283 | return self->column_data.value; - 284 | } - | - 285 | // Is the lexer at a boundary between two disjoint included ranges of - 286 | // source code? This is exposed as an API because some languages' external - 287 | // scanners need to perform custom actions at these boundaries. - 288 | static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { - 289 | const Lexer *self = (const Lexer *)_self; - 290 | if (self->current_included_range_index < self->included_range_count) { - 291 | TSRange *current_range = &self->included_ranges[self->current_included_range_index]; - 292 | return self->current_position.bytes == current_range->start_byte; - 293 | } else { - 294 | return false; - 295 | } - 296 | } - | - 297 | static void ts_lexer__log(const TSLexer *_self, const char *fmt, ...) { - 298 | Lexer *self = (Lexer *)_self; - 299 | va_list args; - 300 | va_start(args, fmt); - 301 | if (self->logger.log) { - 302 | vsnprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, fmt, args); - 303 | self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); - 304 | } - 305 | va_end(args); - 306 | } - | - 307 | void ts_lexer_init(Lexer *self) { - 308 | *self = (Lexer) { - 309 | .data = { - 310 | // The lexer's methods are stored as struct fields so that generated - 311 | // parsers can call them without needing to be linked against this - 312 | // library. - 313 | .advance = ts_lexer__advance, - 314 | .mark_end = ts_lexer__mark_end, - 315 | .get_column = ts_lexer__get_column, - 316 | .is_at_included_range_start = ts_lexer__is_at_included_range_start, - 317 | .eof = ts_lexer__eof, - 318 | .log = ts_lexer__log, - 319 | .lookahead = 0, - 320 | .result_symbol = 0, - 321 | }, - 322 | .chunk = NULL, - 323 | .chunk_size = 0, - 324 | .chunk_start = 0, - 325 | .current_position = {0, {0, 0}}, - 326 | .logger = { - 327 | .payload = NULL, - 328 | .log = NULL - 329 | }, - 330 | .included_ranges = NULL, - 331 | .included_range_count = 0, - 332 | .current_included_range_index = 0, - 333 | .did_get_column = false, - 334 | .column_data = { - 335 | .valid = false, - 336 | .value = 0 - 337 | } - 338 | }; - 339 | ts_lexer_set_included_ranges(self, NULL, 0); - 340 | } - | - 341 | void ts_lexer_delete(Lexer *self) { - 342 | ts_free(self->included_ranges); - 343 | } - | - 344 | void ts_lexer_set_input(Lexer *self, TSInput input) { - 345 | self->input = input; - 346 | ts_lexer__clear_chunk(self); - 347 | ts_lexer_goto(self, self->current_position); - 348 | } - | - 349 | // Move the lexer to the given position. This doesn't do any work - 350 | // if the parser is already at the given position. - 351 | void ts_lexer_reset(Lexer *self, Length position) { - 352 | if (position.bytes != self->current_position.bytes) { - 353 | ts_lexer_goto(self, position); - 354 | } - 355 | } - | - 356 | void ts_lexer_start(Lexer *self) { - 357 | self->token_start_position = self->current_position; - 358 | self->token_end_position = LENGTH_UNDEFINED; - 359 | self->data.result_symbol = 0; - 360 | self->did_get_column = false; - 361 | if (!ts_lexer__eof(&self->data)) { - 362 | if (!self->chunk_size) ts_lexer__get_chunk(self); - 363 | if (!self->lookahead_size) ts_lexer__get_lookahead(self); - 364 | if (self->current_position.bytes == 0) { - 365 | if (self->data.lookahead == BYTE_ORDER_MARK) { - 366 | ts_lexer__advance(&self->data, true); - 367 | } - 368 | ts_lexer__set_column_data(self, 0); - 369 | } - 370 | } - 371 | } - | - 372 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { - 373 | if (length_is_undefined(self->token_end_position)) { - 374 | ts_lexer__mark_end(&self->data); - 375 | } - | - 376 | // If the token ended at an included range boundary, then its end position - 377 | // will have been reset to the end of the preceding range. Reset the start - 378 | // position to match. - 379 | if (self->token_end_position.bytes < self->token_start_position.bytes) { - 380 | self->token_start_position = self->token_end_position; - 381 | } - | - 382 | uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; - | - 383 | // In order to determine that a byte sequence is invalid UTF8 or UTF16, - 384 | // the character decoding algorithm may have looked at the following byte. - 385 | // Therefore, the next byte *after* the current (invalid) character - 386 | // affects the interpretation of the current character. - 387 | if (self->data.lookahead == TS_DECODE_ERROR) { - 388 | current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point - 389 | } - | - 390 | if (current_lookahead_end_byte > *lookahead_end_byte) { - 391 | *lookahead_end_byte = current_lookahead_end_byte; - 392 | } - 393 | } - | - 394 | void ts_lexer_mark_end(Lexer *self) { - 395 | ts_lexer__mark_end(&self->data); - 396 | } - | - 397 | bool ts_lexer_set_included_ranges( - 398 | Lexer *self, - 399 | const TSRange *ranges, - 400 | uint32_t count - 401 | ) { - 402 | if (count == 0 || !ranges) { - 403 | ranges = &DEFAULT_RANGE; - 404 | count = 1; - 405 | } else { - 406 | uint32_t previous_byte = 0; - 407 | for (unsigned i = 0; i < count; i++) { - 408 | const TSRange *range = &ranges[i]; - 409 | if ( - 410 | range->start_byte < previous_byte || - 411 | range->end_byte < range->start_byte - 412 | ) return false; - 413 | previous_byte = range->end_byte; - 414 | } - 415 | } - | - 416 | size_t size = count * sizeof(TSRange); - 417 | self->included_ranges = ts_realloc(self->included_ranges, size); - 418 | memcpy(self->included_ranges, ranges, size); - 419 | self->included_range_count = count; - 420 | ts_lexer_goto(self, self->current_position); - 421 | return true; - 422 | } - | - 423 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { - 424 | *count = self->included_range_count; - 425 | return self->included_ranges; - 426 | } - | - 427 | #undef LOG - - - --------------------------------------------------------------------------------- -/lib/src/lexer.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_LEXER_H_ - 2 | #define TREE_SITTER_LEXER_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./length.h" - 7 | #include "./subtree.h" - 8 | #include "tree_sitter/api.h" - 9 | #include "./parser.h" - | - 10 | typedef struct { - 11 | uint32_t value; - 12 | bool valid; - 13 | } ColumnData; - | - 14 | typedef struct { - 15 | TSLexer data; - 16 | Length current_position; - 17 | Length token_start_position; - 18 | Length token_end_position; - | - 19 | TSRange *included_ranges; - 20 | const char *chunk; - 21 | TSInput input; - 22 | TSLogger logger; - | - 23 | uint32_t included_range_count; - 24 | uint32_t current_included_range_index; - 25 | uint32_t chunk_start; - 26 | uint32_t chunk_size; - 27 | uint32_t lookahead_size; - 28 | bool did_get_column; - 29 | ColumnData column_data; - | - 30 | char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; - 31 | } Lexer; - | - 32 | void ts_lexer_init(Lexer *self); - 33 | void ts_lexer_delete(Lexer *self); - 34 | void ts_lexer_set_input(Lexer *self, TSInput input); - 35 | void ts_lexer_reset(Lexer *self, Length position); - 36 | void ts_lexer_start(Lexer *self); - 37 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte); - 38 | void ts_lexer_mark_end(Lexer *self); - 39 | bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); - 40 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); - | - 41 | #ifdef __cplusplus - 42 | } - 43 | #endif - | - 44 | #endif // TREE_SITTER_LEXER_H_ - - - --------------------------------------------------------------------------------- -/lib/src/lib.c: --------------------------------------------------------------------------------- - 1 | #include "./alloc.c" - 2 | #include "./get_changed_ranges.c" - 3 | #include "./language.c" - 4 | #include "./lexer.c" - 5 | #include "./node.c" - 6 | #include "./parser.c" - 7 | #include "./point.c" - 8 | #include "./query.c" - 9 | #include "./stack.c" - 10 | #include "./subtree.c" - 11 | #include "./tree_cursor.c" - 12 | #include "./tree.c" - 13 | #include "./wasm_store.c" - - - --------------------------------------------------------------------------------- -/lib/src/node.c: --------------------------------------------------------------------------------- - 1 | #include - 2 | #include "./point.h" - 3 | #include "./subtree.h" - 4 | #include "./tree.h" - 5 | #include "./language.h" - | - 6 | typedef struct { - 7 | Subtree parent; - 8 | const TSTree *tree; - 9 | Length position; - 10 | uint32_t child_index; - 11 | uint32_t structural_child_index; - 12 | const TSSymbol *alias_sequence; - 13 | } NodeChildIterator; - | - 14 | static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous); - | - 15 | // TSNode - constructors - | - 16 | TSNode ts_node_new( - 17 | const TSTree *tree, - 18 | const Subtree *subtree, - 19 | Length position, - 20 | TSSymbol alias - 21 | ) { - 22 | return (TSNode) { - 23 | {position.bytes, position.extent.row, position.extent.column, alias}, - 24 | subtree, - 25 | tree, - 26 | }; - 27 | } - | - 28 | static inline TSNode ts_node__null(void) { - 29 | return ts_node_new(NULL, NULL, length_zero(), 0); - 30 | } - | - 31 | // TSNode - accessors - | - 32 | uint32_t ts_node_start_byte(TSNode self) { - 33 | return self.context[0]; - 34 | } - | - 35 | TSPoint ts_node_start_point(TSNode self) { - 36 | return (TSPoint) {self.context[1], self.context[2]}; - 37 | } - | - 38 | static inline uint32_t ts_node__alias(const TSNode *self) { - 39 | return self->context[3]; - 40 | } - | - 41 | static inline Subtree ts_node__subtree(TSNode self) { - 42 | return *(const Subtree *)self.id; - 43 | } - | - 44 | // NodeChildIterator - | - 45 | static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { - 46 | Subtree subtree = ts_node__subtree(*node); - 47 | if (ts_subtree_child_count(subtree) == 0) { - 48 | return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; - 49 | } - 50 | const TSSymbol *alias_sequence = ts_language_alias_sequence( - 51 | node->tree->language, - 52 | subtree.ptr->production_id - 53 | ); - 54 | return (NodeChildIterator) { - 55 | .tree = node->tree, - 56 | .parent = subtree, - 57 | .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, - 58 | .child_index = 0, - 59 | .structural_child_index = 0, - 60 | .alias_sequence = alias_sequence, - 61 | }; - 62 | } - | - 63 | static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { - 64 | return self->child_index == self->parent.ptr->child_count; - 65 | } - | - 66 | static inline bool ts_node_child_iterator_next( - 67 | NodeChildIterator *self, - 68 | TSNode *result - 69 | ) { - 70 | if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; - 71 | const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - 72 | TSSymbol alias_symbol = 0; - 73 | if (!ts_subtree_extra(*child)) { - 74 | if (self->alias_sequence) { - 75 | alias_symbol = self->alias_sequence[self->structural_child_index]; - 76 | } - 77 | self->structural_child_index++; - 78 | } - 79 | if (self->child_index > 0) { - 80 | self->position = length_add(self->position, ts_subtree_padding(*child)); - 81 | } - 82 | *result = ts_node_new( - 83 | self->tree, - 84 | child, - 85 | self->position, - 86 | alias_symbol - 87 | ); - 88 | self->position = length_add(self->position, ts_subtree_size(*child)); - 89 | self->child_index++; - 90 | return true; - 91 | } - | - 92 | // TSNode - private - | - 93 | static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { - 94 | Subtree tree = ts_node__subtree(self); - 95 | if (include_anonymous) { - 96 | return ts_subtree_visible(tree) || ts_node__alias(&self); - 97 | } else { - 98 | TSSymbol alias = ts_node__alias(&self); - 99 | if (alias) { - 100 | return ts_language_symbol_metadata(self.tree->language, alias).named; - 101 | } else { - 102 | return ts_subtree_visible(tree) && ts_subtree_named(tree); - 103 | } - 104 | } - 105 | } - | - 106 | static inline uint32_t ts_node__relevant_child_count( - 107 | TSNode self, - 108 | bool include_anonymous - 109 | ) { - 110 | Subtree tree = ts_node__subtree(self); - 111 | if (ts_subtree_child_count(tree) > 0) { - 112 | if (include_anonymous) { - 113 | return tree.ptr->visible_child_count; - 114 | } else { - 115 | return tree.ptr->named_child_count; - 116 | } - 117 | } else { - 118 | return 0; - 119 | } - 120 | } - | - 121 | static inline TSNode ts_node__child( - 122 | TSNode self, - 123 | uint32_t child_index, - 124 | bool include_anonymous - 125 | ) { - 126 | TSNode result = self; - 127 | bool did_descend = true; - | - 128 | while (did_descend) { - 129 | did_descend = false; - | - 130 | TSNode child; - 131 | uint32_t index = 0; - 132 | NodeChildIterator iterator = ts_node_iterate_children(&result); - 133 | while (ts_node_child_iterator_next(&iterator, &child)) { - 134 | if (ts_node__is_relevant(child, include_anonymous)) { - 135 | if (index == child_index) { - 136 | return child; - 137 | } - 138 | index++; - 139 | } else { - 140 | uint32_t grandchild_index = child_index - index; - 141 | uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous); - 142 | if (grandchild_index < grandchild_count) { - 143 | did_descend = true; - 144 | result = child; - 145 | child_index = grandchild_index; - 146 | break; - 147 | } - 148 | index += grandchild_count; - 149 | } - 150 | } - 151 | } - | - 152 | return ts_node__null(); - 153 | } - | - 154 | static bool ts_subtree_has_trailing_empty_descendant( - 155 | Subtree self, - 156 | Subtree other - 157 | ) { - 158 | for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) { - 159 | Subtree child = ts_subtree_children(self)[i]; - 160 | if (ts_subtree_total_bytes(child) > 0) break; - 161 | if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) { - 162 | return true; - 163 | } - 164 | } - 165 | return false; - 166 | } - | - 167 | static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) { - 168 | Subtree self_subtree = ts_node__subtree(self); - 169 | bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0; - 170 | uint32_t target_end_byte = ts_node_end_byte(self); - | - 171 | TSNode node = ts_node_parent(self); - 172 | TSNode earlier_node = ts_node__null(); - 173 | bool earlier_node_is_relevant = false; - | - 174 | while (!ts_node_is_null(node)) { - 175 | TSNode earlier_child = ts_node__null(); - 176 | bool earlier_child_is_relevant = false; - 177 | bool found_child_containing_target = false; - | - 178 | TSNode child; - 179 | NodeChildIterator iterator = ts_node_iterate_children(&node); - 180 | while (ts_node_child_iterator_next(&iterator, &child)) { - 181 | if (child.id == self.id) break; - 182 | if (iterator.position.bytes > target_end_byte) { - 183 | found_child_containing_target = true; - 184 | break; - 185 | } - | - 186 | if (iterator.position.bytes == target_end_byte && - 187 | (!self_is_empty || - 188 | ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) { - 189 | found_child_containing_target = true; - 190 | break; - 191 | } - | - 192 | if (ts_node__is_relevant(child, include_anonymous)) { - 193 | earlier_child = child; - 194 | earlier_child_is_relevant = true; - 195 | } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { - 196 | earlier_child = child; - 197 | earlier_child_is_relevant = false; - 198 | } - 199 | } - | - 200 | if (found_child_containing_target) { - 201 | if (!ts_node_is_null(earlier_child)) { - 202 | earlier_node = earlier_child; - 203 | earlier_node_is_relevant = earlier_child_is_relevant; - 204 | } - 205 | node = child; - 206 | } else if (earlier_child_is_relevant) { - 207 | return earlier_child; - 208 | } else if (!ts_node_is_null(earlier_child)) { - 209 | node = earlier_child; - 210 | } else if (earlier_node_is_relevant) { - 211 | return earlier_node; - 212 | } else { - 213 | node = earlier_node; - 214 | earlier_node = ts_node__null(); - 215 | earlier_node_is_relevant = false; - 216 | } - 217 | } - | - 218 | return ts_node__null(); - 219 | } - | - 220 | static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) { - 221 | uint32_t target_end_byte = ts_node_end_byte(self); - | - 222 | TSNode node = ts_node_parent(self); - 223 | TSNode later_node = ts_node__null(); - 224 | bool later_node_is_relevant = false; - | - 225 | while (!ts_node_is_null(node)) { - 226 | TSNode later_child = ts_node__null(); - 227 | bool later_child_is_relevant = false; - 228 | TSNode child_containing_target = ts_node__null(); - | - 229 | TSNode child; - 230 | NodeChildIterator iterator = ts_node_iterate_children(&node); - 231 | while (ts_node_child_iterator_next(&iterator, &child)) { - 232 | if (iterator.position.bytes <= target_end_byte) continue; - 233 | uint32_t start_byte = ts_node_start_byte(self); - 234 | uint32_t child_start_byte = ts_node_start_byte(child); - | - 235 | bool is_empty = start_byte == target_end_byte; - 236 | bool contains_target = is_empty ? - 237 | child_start_byte < start_byte : - 238 | child_start_byte <= start_byte; - | - 239 | if (contains_target) { - 240 | if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) { - 241 | child_containing_target = child; - 242 | } - 243 | } else if (ts_node__is_relevant(child, include_anonymous)) { - 244 | later_child = child; - 245 | later_child_is_relevant = true; - 246 | break; - 247 | } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) { - 248 | later_child = child; - 249 | later_child_is_relevant = false; - 250 | break; - 251 | } - 252 | } - | - 253 | if (!ts_node_is_null(child_containing_target)) { - 254 | if (!ts_node_is_null(later_child)) { - 255 | later_node = later_child; - 256 | later_node_is_relevant = later_child_is_relevant; - 257 | } - 258 | node = child_containing_target; - 259 | } else if (later_child_is_relevant) { - 260 | return later_child; - 261 | } else if (!ts_node_is_null(later_child)) { - 262 | node = later_child; - 263 | } else if (later_node_is_relevant) { - 264 | return later_node; - 265 | } else { - 266 | node = later_node; - 267 | } - 268 | } - | - 269 | return ts_node__null(); - 270 | } - | - 271 | static inline TSNode ts_node__first_child_for_byte( - 272 | TSNode self, - 273 | uint32_t goal, - 274 | bool include_anonymous - 275 | ) { - 276 | TSNode node = self; - 277 | bool did_descend = true; - | - 278 | NodeChildIterator last_iterator; - 279 | bool has_last_iterator = false; - | - 280 | while (did_descend) { - 281 | did_descend = false; - | - 282 | TSNode child; - 283 | NodeChildIterator iterator = ts_node_iterate_children(&node); - 284 | loop: - 285 | while (ts_node_child_iterator_next(&iterator, &child)) { - 286 | if (ts_node_end_byte(child) > goal) { - 287 | if (ts_node__is_relevant(child, include_anonymous)) { - 288 | return child; - 289 | } else if (ts_node_child_count(child) > 0) { - 290 | if (iterator.child_index < ts_subtree_child_count(ts_node__subtree(child))) { - 291 | last_iterator = iterator; - 292 | has_last_iterator = true; - 293 | } - 294 | did_descend = true; - 295 | node = child; - 296 | break; - 297 | } - 298 | } - 299 | } - | - 300 | if (!did_descend && has_last_iterator) { - 301 | iterator = last_iterator; - 302 | has_last_iterator = false; - 303 | goto loop; - 304 | } - 305 | } - | - 306 | return ts_node__null(); - 307 | } - | - 308 | static inline TSNode ts_node__descendant_for_byte_range( - 309 | TSNode self, - 310 | uint32_t range_start, - 311 | uint32_t range_end, - 312 | bool include_anonymous - 313 | ) { - 314 | if (range_start > range_end) { - 315 | return ts_node__null(); - 316 | } - 317 | TSNode node = self; - 318 | TSNode last_visible_node = self; - | - 319 | bool did_descend = true; - 320 | while (did_descend) { - 321 | did_descend = false; - | - 322 | TSNode child; - 323 | NodeChildIterator iterator = ts_node_iterate_children(&node); - 324 | while (ts_node_child_iterator_next(&iterator, &child)) { - 325 | uint32_t node_end = iterator.position.bytes; - | - 326 | // The end of this node must extend far enough forward to touch - 327 | // the end of the range - 328 | if (node_end < range_end) continue; - | - 329 | // ...and exceed the start of the range, unless the node itself is - 330 | // empty, in which case it must at least be equal to the start of the range. - 331 | bool is_empty = ts_node_start_byte(child) == node_end; - 332 | if (is_empty ? node_end < range_start : node_end <= range_start) continue; - | - 333 | // The start of this node must extend far enough backward to - 334 | // touch the start of the range. - 335 | if (range_start < ts_node_start_byte(child)) break; - | - 336 | node = child; - 337 | if (ts_node__is_relevant(node, include_anonymous)) { - 338 | last_visible_node = node; - 339 | } - 340 | did_descend = true; - 341 | break; - 342 | } - 343 | } - | - 344 | return last_visible_node; - 345 | } - | - 346 | static inline TSNode ts_node__descendant_for_point_range( - 347 | TSNode self, - 348 | TSPoint range_start, - 349 | TSPoint range_end, - 350 | bool include_anonymous - 351 | ) { - 352 | if (point_gt(range_start, range_end)) { - 353 | return ts_node__null(); - 354 | } - 355 | TSNode node = self; - 356 | TSNode last_visible_node = self; - | - 357 | bool did_descend = true; - 358 | while (did_descend) { - 359 | did_descend = false; - | - 360 | TSNode child; - 361 | NodeChildIterator iterator = ts_node_iterate_children(&node); - 362 | while (ts_node_child_iterator_next(&iterator, &child)) { - 363 | TSPoint node_end = iterator.position.extent; - | - 364 | // The end of this node must extend far enough forward to touch - 365 | // the end of the range - 366 | if (point_lt(node_end, range_end)) continue; - | - 367 | // ...and exceed the start of the range, unless the node itself is - 368 | // empty, in which case it must at least be equal to the start of the range. - 369 | bool is_empty = point_eq(ts_node_start_point(child), node_end); - 370 | if (is_empty ? point_lt(node_end, range_start) : point_lte(node_end, range_start)) { - 371 | continue; - 372 | } - | - 373 | // The start of this node must extend far enough backward to - 374 | // touch the start of the range. - 375 | if (point_lt(range_start, ts_node_start_point(child))) break; - | - 376 | node = child; - 377 | if (ts_node__is_relevant(node, include_anonymous)) { - 378 | last_visible_node = node; - 379 | } - 380 | did_descend = true; - 381 | break; - 382 | } - 383 | } - | - 384 | return last_visible_node; - 385 | } - | - 386 | // TSNode - public - | - 387 | uint32_t ts_node_end_byte(TSNode self) { - 388 | return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes; - 389 | } - | - 390 | TSPoint ts_node_end_point(TSNode self) { - 391 | return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent); - 392 | } - | - 393 | TSSymbol ts_node_symbol(TSNode self) { - 394 | TSSymbol symbol = ts_node__alias(&self); - 395 | if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); - 396 | return ts_language_public_symbol(self.tree->language, symbol); - 397 | } - | - 398 | const char *ts_node_type(TSNode self) { - 399 | TSSymbol symbol = ts_node__alias(&self); - 400 | if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self)); - 401 | return ts_language_symbol_name(self.tree->language, symbol); - 402 | } - | - 403 | const TSLanguage *ts_node_language(TSNode self) { - 404 | return self.tree->language; - 405 | } - | - 406 | TSSymbol ts_node_grammar_symbol(TSNode self) { - 407 | return ts_subtree_symbol(ts_node__subtree(self)); - 408 | } - | - 409 | const char *ts_node_grammar_type(TSNode self) { - 410 | TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); - 411 | return ts_language_symbol_name(self.tree->language, symbol); - 412 | } - | - 413 | char *ts_node_string(TSNode self) { - 414 | TSSymbol alias_symbol = ts_node__alias(&self); - 415 | return ts_subtree_string( - 416 | ts_node__subtree(self), - 417 | alias_symbol, - 418 | ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, - 419 | self.tree->language, - 420 | false - 421 | ); - 422 | } - | - 423 | bool ts_node_eq(TSNode self, TSNode other) { - 424 | return self.tree == other.tree && self.id == other.id; - 425 | } - | - 426 | bool ts_node_is_null(TSNode self) { - 427 | return self.id == 0; - 428 | } - | - 429 | bool ts_node_is_extra(TSNode self) { - 430 | return ts_subtree_extra(ts_node__subtree(self)); - 431 | } - | - 432 | bool ts_node_is_named(TSNode self) { - 433 | TSSymbol alias = ts_node__alias(&self); - 434 | return alias - 435 | ? ts_language_symbol_metadata(self.tree->language, alias).named - 436 | : ts_subtree_named(ts_node__subtree(self)); - 437 | } - | - 438 | bool ts_node_is_missing(TSNode self) { - 439 | return ts_subtree_missing(ts_node__subtree(self)); - 440 | } - | - 441 | bool ts_node_has_changes(TSNode self) { - 442 | return ts_subtree_has_changes(ts_node__subtree(self)); - 443 | } - | - 444 | bool ts_node_has_error(TSNode self) { - 445 | return ts_subtree_error_cost(ts_node__subtree(self)) > 0; - 446 | } - | - 447 | bool ts_node_is_error(TSNode self) { - 448 | TSSymbol symbol = ts_node_symbol(self); - 449 | return symbol == ts_builtin_sym_error; - 450 | } - | - 451 | uint32_t ts_node_descendant_count(TSNode self) { - 452 | return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; - 453 | } - | - 454 | TSStateId ts_node_parse_state(TSNode self) { - 455 | return ts_subtree_parse_state(ts_node__subtree(self)); - 456 | } - | - 457 | TSStateId ts_node_next_parse_state(TSNode self) { - 458 | const TSLanguage *language = self.tree->language; - 459 | uint16_t state = ts_node_parse_state(self); - 460 | if (state == TS_TREE_STATE_NONE) { - 461 | return TS_TREE_STATE_NONE; - 462 | } - 463 | uint16_t symbol = ts_node_grammar_symbol(self); - 464 | return ts_language_next_state(language, state, symbol); - 465 | } - | - 466 | TSNode ts_node_parent(TSNode self) { - 467 | TSNode node = ts_tree_root_node(self.tree); - 468 | if (node.id == self.id) return ts_node__null(); - | - 469 | while (true) { - 470 | TSNode next_node = ts_node_child_with_descendant(node, self); - 471 | if (next_node.id == self.id || ts_node_is_null(next_node)) break; - 472 | node = next_node; - 473 | } - | - 474 | return node; - 475 | } - | - 476 | TSNode ts_node_child_with_descendant(TSNode self, TSNode descendant) { - 477 | uint32_t start_byte = ts_node_start_byte(descendant); - 478 | uint32_t end_byte = ts_node_end_byte(descendant); - 479 | bool is_empty = start_byte == end_byte; - | - 480 | do { - 481 | NodeChildIterator iter = ts_node_iterate_children(&self); - 482 | do { - 483 | if ( - 484 | !ts_node_child_iterator_next(&iter, &self) - 485 | || ts_node_start_byte(self) > start_byte - 486 | ) { - 487 | return ts_node__null(); - 488 | } - 489 | if (self.id == descendant.id) { - 490 | return self; - 491 | } - | - 492 | // If the descendant is empty, and the end byte is within `self`, - 493 | // we check whether `self` contains it or not. - 494 | if (is_empty && iter.position.bytes >= end_byte && ts_node_child_count(self) > 0) { - 495 | TSNode child = ts_node_child_with_descendant(self, descendant); - 496 | // If the child is not null, return self if it's relevant, else return the child - 497 | if (!ts_node_is_null(child)) { - 498 | return ts_node__is_relevant(self, true) ? self : child; - 499 | } - 500 | } - 501 | } while ((is_empty ? iter.position.bytes <= end_byte : iter.position.bytes < end_byte) || ts_node_child_count(self) == 0); - 502 | } while (!ts_node__is_relevant(self, true)); - | - 503 | return self; - 504 | } - | - 505 | TSNode ts_node_child(TSNode self, uint32_t child_index) { - 506 | return ts_node__child(self, child_index, true); - 507 | } - | - 508 | TSNode ts_node_named_child(TSNode self, uint32_t child_index) { - 509 | return ts_node__child(self, child_index, false); - 510 | } - | - 511 | TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { - 512 | recur: - 513 | if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); - | - 514 | const TSFieldMapEntry *field_map, *field_map_end; - 515 | ts_language_field_map( - 516 | self.tree->language, - 517 | ts_node__subtree(self).ptr->production_id, - 518 | &field_map, - 519 | &field_map_end - 520 | ); - 521 | if (field_map == field_map_end) return ts_node__null(); - | - 522 | // The field mappings are sorted by their field id. Scan all - 523 | // the mappings to find the ones for the given field id. - 524 | while (field_map->field_id < field_id) { - 525 | field_map++; - 526 | if (field_map == field_map_end) return ts_node__null(); - 527 | } - 528 | while (field_map_end[-1].field_id > field_id) { - 529 | field_map_end--; - 530 | if (field_map == field_map_end) return ts_node__null(); - 531 | } - | - 532 | TSNode child; - 533 | NodeChildIterator iterator = ts_node_iterate_children(&self); - 534 | while (ts_node_child_iterator_next(&iterator, &child)) { - 535 | if (!ts_subtree_extra(ts_node__subtree(child))) { - 536 | uint32_t index = iterator.structural_child_index - 1; - 537 | if (index < field_map->child_index) continue; - | - 538 | // Hidden nodes' fields are "inherited" by their visible parent. - 539 | if (field_map->inherited) { - | - 540 | // If this is the *last* possible child node for this field, - 541 | // then perform a tail call to avoid recursion. - 542 | if (field_map + 1 == field_map_end) { - 543 | self = child; - 544 | goto recur; - 545 | } - | - 546 | // Otherwise, descend into this child, but if it doesn't contain - 547 | // the field, continue searching subsequent children. - 548 | else { - 549 | TSNode result = ts_node_child_by_field_id(child, field_id); - 550 | if (result.id) return result; - 551 | field_map++; - 552 | if (field_map == field_map_end) return ts_node__null(); - 553 | } - 554 | } - | - 555 | else if (ts_node__is_relevant(child, true)) { - 556 | return child; - 557 | } - | - 558 | // If the field refers to a hidden node with visible children, - 559 | // return the first visible child. - 560 | else if (ts_node_child_count(child) > 0 ) { - 561 | return ts_node_child(child, 0); - 562 | } - | - 563 | // Otherwise, continue searching subsequent children. - 564 | else { - 565 | field_map++; - 566 | if (field_map == field_map_end) return ts_node__null(); - 567 | } - 568 | } - 569 | } - | - 570 | return ts_node__null(); - 571 | } - | - 572 | static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) { - 573 | const TSFieldMapEntry *field_map, *field_map_end; - 574 | ts_language_field_map( - 575 | self.tree->language, - 576 | ts_node__subtree(self).ptr->production_id, - 577 | &field_map, - 578 | &field_map_end - 579 | ); - 580 | for (; field_map != field_map_end; field_map++) { - 581 | if (!field_map->inherited && field_map->child_index == structural_child_index) { - 582 | return self.tree->language->field_names[field_map->field_id]; - 583 | } - 584 | } - 585 | return NULL; - 586 | } - | - 587 | const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { - 588 | TSNode result = self; - 589 | bool did_descend = true; - 590 | const char *inherited_field_name = NULL; - | - 591 | while (did_descend) { - 592 | did_descend = false; - | - 593 | TSNode child; - 594 | uint32_t index = 0; - 595 | NodeChildIterator iterator = ts_node_iterate_children(&result); - 596 | while (ts_node_child_iterator_next(&iterator, &child)) { - 597 | if (ts_node__is_relevant(child, true)) { - 598 | if (index == child_index) { - 599 | if (ts_node_is_extra(child)) { - 600 | return NULL; - 601 | } - 602 | const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - 603 | if (field_name) return field_name; - 604 | return inherited_field_name; - 605 | } - 606 | index++; - 607 | } else { - 608 | uint32_t grandchild_index = child_index - index; - 609 | uint32_t grandchild_count = ts_node__relevant_child_count(child, true); - 610 | if (grandchild_index < grandchild_count) { - 611 | const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - 612 | if (field_name) inherited_field_name = field_name; - | - 613 | did_descend = true; - 614 | result = child; - 615 | child_index = grandchild_index; - 616 | break; - 617 | } - 618 | index += grandchild_count; - 619 | } - 620 | } - 621 | } - | - 622 | return NULL; - 623 | } - | - 624 | const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index) { - 625 | TSNode result = self; - 626 | bool did_descend = true; - 627 | const char *inherited_field_name = NULL; - | - 628 | while (did_descend) { - 629 | did_descend = false; - | - 630 | TSNode child; - 631 | uint32_t index = 0; - 632 | NodeChildIterator iterator = ts_node_iterate_children(&result); - 633 | while (ts_node_child_iterator_next(&iterator, &child)) { - 634 | if (ts_node__is_relevant(child, false)) { - 635 | if (index == named_child_index) { - 636 | if (ts_node_is_extra(child)) { - 637 | return NULL; - 638 | } - 639 | const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - 640 | if (field_name) return field_name; - 641 | return inherited_field_name; - 642 | } - 643 | index++; - 644 | } else { - 645 | uint32_t named_grandchild_index = named_child_index - index; - 646 | uint32_t grandchild_count = ts_node__relevant_child_count(child, false); - 647 | if (named_grandchild_index < grandchild_count) { - 648 | const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); - 649 | if (field_name) inherited_field_name = field_name; - | - 650 | did_descend = true; - 651 | result = child; - 652 | named_child_index = named_grandchild_index; - 653 | break; - 654 | } - 655 | index += grandchild_count; - 656 | } - 657 | } - 658 | } - | - 659 | return NULL; - 660 | } - | - 661 | TSNode ts_node_child_by_field_name( - 662 | TSNode self, - 663 | const char *name, - 664 | uint32_t name_length - 665 | ) { - 666 | TSFieldId field_id = ts_language_field_id_for_name( - 667 | self.tree->language, - 668 | name, - 669 | name_length - 670 | ); - 671 | return ts_node_child_by_field_id(self, field_id); - 672 | } - | - 673 | uint32_t ts_node_child_count(TSNode self) { - 674 | Subtree tree = ts_node__subtree(self); - 675 | if (ts_subtree_child_count(tree) > 0) { - 676 | return tree.ptr->visible_child_count; - 677 | } else { - 678 | return 0; - 679 | } - 680 | } - | - 681 | uint32_t ts_node_named_child_count(TSNode self) { - 682 | Subtree tree = ts_node__subtree(self); - 683 | if (ts_subtree_child_count(tree) > 0) { - 684 | return tree.ptr->named_child_count; - 685 | } else { - 686 | return 0; - 687 | } - 688 | } - | - 689 | TSNode ts_node_next_sibling(TSNode self) { - 690 | return ts_node__next_sibling(self, true); - 691 | } - | - 692 | TSNode ts_node_next_named_sibling(TSNode self) { - 693 | return ts_node__next_sibling(self, false); - 694 | } - | - 695 | TSNode ts_node_prev_sibling(TSNode self) { - 696 | return ts_node__prev_sibling(self, true); - 697 | } - | - 698 | TSNode ts_node_prev_named_sibling(TSNode self) { - 699 | return ts_node__prev_sibling(self, false); - 700 | } - | - 701 | TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) { - 702 | return ts_node__first_child_for_byte(self, byte, true); - 703 | } - | - 704 | TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) { - 705 | return ts_node__first_child_for_byte(self, byte, false); - 706 | } - | - 707 | TSNode ts_node_descendant_for_byte_range( - 708 | TSNode self, - 709 | uint32_t start, - 710 | uint32_t end - 711 | ) { - 712 | return ts_node__descendant_for_byte_range(self, start, end, true); - 713 | } - | - 714 | TSNode ts_node_named_descendant_for_byte_range( - 715 | TSNode self, - 716 | uint32_t start, - 717 | uint32_t end - 718 | ) { - 719 | return ts_node__descendant_for_byte_range(self, start, end, false); - 720 | } - | - 721 | TSNode ts_node_descendant_for_point_range( - 722 | TSNode self, - 723 | TSPoint start, - 724 | TSPoint end - 725 | ) { - 726 | return ts_node__descendant_for_point_range(self, start, end, true); - 727 | } - | - 728 | TSNode ts_node_named_descendant_for_point_range( - 729 | TSNode self, - 730 | TSPoint start, - 731 | TSPoint end - 732 | ) { - 733 | return ts_node__descendant_for_point_range(self, start, end, false); - 734 | } - | - 735 | void ts_node_edit(TSNode *self, const TSInputEdit *edit) { - 736 | uint32_t start_byte = ts_node_start_byte(*self); - 737 | TSPoint start_point = ts_node_start_point(*self); - | - 738 | ts_point_edit(&start_point, &start_byte, edit); - | - 739 | self->context[0] = start_byte; - 740 | self->context[1] = start_point.row; - 741 | self->context[2] = start_point.column; - 742 | } - - - --------------------------------------------------------------------------------- -/lib/src/parser.c: --------------------------------------------------------------------------------- - 1 | #include - 2 | #include - 3 | #include - 4 | #include - 5 | #include "tree_sitter/api.h" - 6 | #include "./alloc.h" - 7 | #include "./array.h" - 8 | #include "./error_costs.h" - 9 | #include "./get_changed_ranges.h" - 10 | #include "./language.h" - 11 | #include "./length.h" - 12 | #include "./lexer.h" - 13 | #include "./reduce_action.h" - 14 | #include "./reusable_node.h" - 15 | #include "./stack.h" - 16 | #include "./subtree.h" - 17 | #include "./tree.h" - 18 | #include "./ts_assert.h" - 19 | #include "./wasm_store.h" - | - 20 | #define LOG(...) \ - 21 | if (self->lexer.logger.log || self->dot_graph_file) { \ - 22 | snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \ - 23 | ts_parser__log(self); \ - 24 | } - | - 25 | #define LOG_LOOKAHEAD(symbol_name, size) \ - 26 | if (self->lexer.logger.log || self->dot_graph_file) { \ - 27 | char *buf = self->lexer.debug_buffer; \ - 28 | const char *symbol = symbol_name; \ - 29 | int off = snprintf( \ - 30 | buf, \ - 31 | TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ - 32 | "lexed_lookahead sym:" \ - 33 | ); \ - 34 | for ( \ - 35 | int i = 0; \ - 36 | symbol[i] != '\0' \ - 37 | && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ - 38 | i++ \ - 39 | ) { \ - 40 | switch (symbol[i]) { \ - 41 | case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ - 42 | case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ - 43 | case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ - 44 | case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ - 45 | case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ - 46 | case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ - 47 | default: buf[off++] = symbol[i]; break; \ - 48 | } \ - 49 | } \ - 50 | snprintf( \ - 51 | buf + off, \ - 52 | TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ - 53 | ", size:%u", \ - 54 | size \ - 55 | ); \ - 56 | ts_parser__log(self); \ - 57 | } - | - 58 | #define LOG_STACK() \ - 59 | if (self->dot_graph_file) { \ - 60 | ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ - 61 | fputs("\n\n", self->dot_graph_file); \ - 62 | } - | - 63 | #define LOG_TREE(tree) \ - 64 | if (self->dot_graph_file) { \ - 65 | ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \ - 66 | fputs("\n", self->dot_graph_file); \ - 67 | } - | - 68 | #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol) - | - 69 | #define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree)) - | - 70 | static const unsigned MAX_VERSION_COUNT = 6; - 71 | static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; - 72 | static const unsigned MAX_SUMMARY_DEPTH = 16; - 73 | static const unsigned MAX_COST_DIFFERENCE = 18 * ERROR_COST_PER_SKIPPED_TREE; - 74 | static const unsigned OP_COUNT_PER_PARSER_CALLBACK_CHECK = 100; - | - 75 | typedef struct { - 76 | Subtree token; - 77 | Subtree last_external_token; - 78 | uint32_t byte_index; - 79 | } TokenCache; - | - 80 | struct TSParser { - 81 | Lexer lexer; - 82 | Stack *stack; - 83 | SubtreePool tree_pool; - 84 | const TSLanguage *language; - 85 | TSWasmStore *wasm_store; - 86 | ReduceActionSet reduce_actions; - 87 | Subtree finished_tree; - 88 | SubtreeArray trailing_extras; - 89 | SubtreeArray trailing_extras2; - 90 | SubtreeArray scratch_trees; - 91 | TokenCache token_cache; - 92 | ReusableNode reusable_node; - 93 | void *external_scanner_payload; - 94 | FILE *dot_graph_file; - 95 | unsigned accept_count; - 96 | unsigned operation_count; - 97 | Subtree old_tree; - 98 | TSRangeArray included_range_differences; - 99 | TSParseOptions parse_options; - 100 | TSParseState parse_state; - 101 | unsigned included_range_difference_index; - 102 | bool has_scanner_error; - 103 | bool canceled_balancing; - 104 | bool has_error; - 105 | }; - | - 106 | typedef struct { - 107 | unsigned cost; - 108 | unsigned node_count; - 109 | int dynamic_precedence; - 110 | bool is_in_error; - 111 | } ErrorStatus; - | - 112 | typedef enum { - 113 | ErrorComparisonTakeLeft, - 114 | ErrorComparisonPreferLeft, - 115 | ErrorComparisonNone, - 116 | ErrorComparisonPreferRight, - 117 | ErrorComparisonTakeRight, - 118 | } ErrorComparison; - | - 119 | typedef struct { - 120 | const char *string; - 121 | uint32_t length; - 122 | } TSStringInput; - | - 123 | // StringInput - | - 124 | static const char *ts_string_input_read( - 125 | void *_self, - 126 | uint32_t byte, - 127 | TSPoint point, - 128 | uint32_t *length - 129 | ) { - 130 | (void)point; - 131 | TSStringInput *self = (TSStringInput *)_self; - 132 | if (byte >= self->length) { - 133 | *length = 0; - 134 | return ""; - 135 | } else { - 136 | *length = self->length - byte; - 137 | return self->string + byte; - 138 | } - 139 | } - | - 140 | // Parser - Private - | - 141 | static void ts_parser__log(TSParser *self) { - 142 | if (self->lexer.logger.log) { - 143 | self->lexer.logger.log( - 144 | self->lexer.logger.payload, - 145 | TSLogTypeParse, - 146 | self->lexer.debug_buffer - 147 | ); - 148 | } - | - 149 | if (self->dot_graph_file) { - 150 | fprintf(self->dot_graph_file, "graph {\nlabel=\""); - 151 | for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { - 152 | if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); - 153 | fputc(*chr, self->dot_graph_file); - 154 | } - 155 | fprintf(self->dot_graph_file, "\"\n}\n\n"); - 156 | } - 157 | } - | - 158 | static bool ts_parser__breakdown_top_of_stack( - 159 | TSParser *self, - 160 | StackVersion version - 161 | ) { - 162 | bool did_break_down = false; - 163 | bool pending = false; - | - 164 | do { - 165 | StackSliceArray pop = ts_stack_pop_pending(self->stack, version); - 166 | if (!pop.size) break; - | - 167 | did_break_down = true; - 168 | pending = false; - 169 | for (uint32_t i = 0; i < pop.size; i++) { - 170 | StackSlice slice = *array_get(&pop, i); - 171 | TSStateId state = ts_stack_state(self->stack, slice.version); - 172 | Subtree parent = *array_front(&slice.subtrees); - | - 173 | for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) { - 174 | Subtree child = ts_subtree_children(parent)[j]; - 175 | pending = ts_subtree_child_count(child) > 0; - | - 176 | if (ts_subtree_is_error(child)) { - 177 | state = ERROR_STATE; - 178 | } else if (!ts_subtree_extra(child)) { - 179 | state = ts_language_next_state(self->language, state, ts_subtree_symbol(child)); - 180 | } - | - 181 | ts_subtree_retain(child); - 182 | ts_stack_push(self->stack, slice.version, child, pending, state); - 183 | } - | - 184 | for (uint32_t j = 1; j < slice.subtrees.size; j++) { - 185 | Subtree tree = *array_get(&slice.subtrees, j); - 186 | ts_stack_push(self->stack, slice.version, tree, false, state); - 187 | } - | - 188 | ts_subtree_release(&self->tree_pool, parent); - 189 | array_delete(&slice.subtrees); - | - 190 | LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent)); - 191 | LOG_STACK(); - 192 | } - 193 | } while (pending); - | - 194 | return did_break_down; - 195 | } - | - 196 | static void ts_parser__breakdown_lookahead( - 197 | TSParser *self, - 198 | Subtree *lookahead, - 199 | TSStateId state, - 200 | ReusableNode *reusable_node - 201 | ) { - 202 | bool did_descend = false; - 203 | Subtree tree = reusable_node_tree(reusable_node); - 204 | while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) { - 205 | LOG("state_mismatch sym:%s", TREE_NAME(tree)); - 206 | reusable_node_descend(reusable_node); - 207 | tree = reusable_node_tree(reusable_node); - 208 | did_descend = true; - 209 | } - | - 210 | if (did_descend) { - 211 | ts_subtree_release(&self->tree_pool, *lookahead); - 212 | *lookahead = tree; - 213 | ts_subtree_retain(*lookahead); - 214 | } - 215 | } - | - 216 | static ErrorComparison ts_parser__compare_versions( - 217 | TSParser *self, - 218 | ErrorStatus a, - 219 | ErrorStatus b - 220 | ) { - 221 | (void)self; - 222 | if (!a.is_in_error && b.is_in_error) { - 223 | if (a.cost < b.cost) { - 224 | return ErrorComparisonTakeLeft; - 225 | } else { - 226 | return ErrorComparisonPreferLeft; - 227 | } - 228 | } - | - 229 | if (a.is_in_error && !b.is_in_error) { - 230 | if (b.cost < a.cost) { - 231 | return ErrorComparisonTakeRight; - 232 | } else { - 233 | return ErrorComparisonPreferRight; - 234 | } - 235 | } - | - 236 | if (a.cost < b.cost) { - 237 | if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) { - 238 | return ErrorComparisonTakeLeft; - 239 | } else { - 240 | return ErrorComparisonPreferLeft; - 241 | } - 242 | } - | - 243 | if (b.cost < a.cost) { - 244 | if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) { - 245 | return ErrorComparisonTakeRight; - 246 | } else { - 247 | return ErrorComparisonPreferRight; - 248 | } - 249 | } - | - 250 | if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft; - 251 | if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight; - 252 | return ErrorComparisonNone; - 253 | } - | - 254 | static ErrorStatus ts_parser__version_status( - 255 | TSParser *self, - 256 | StackVersion version - 257 | ) { - 258 | unsigned cost = ts_stack_error_cost(self->stack, version); - 259 | bool is_paused = ts_stack_is_paused(self->stack, version); - 260 | if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE; - 261 | return (ErrorStatus) { - 262 | .cost = cost, - 263 | .node_count = ts_stack_node_count_since_error(self->stack, version), - 264 | .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - 265 | .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE - 266 | }; - 267 | } - | - 268 | static bool ts_parser__better_version_exists( - 269 | TSParser *self, - 270 | StackVersion version, - 271 | bool is_in_error, - 272 | unsigned cost - 273 | ) { - 274 | if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) { - 275 | return true; - 276 | } - | - 277 | Length position = ts_stack_position(self->stack, version); - 278 | ErrorStatus status = { - 279 | .cost = cost, - 280 | .is_in_error = is_in_error, - 281 | .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version), - 282 | .node_count = ts_stack_node_count_since_error(self->stack, version), - 283 | }; - | - 284 | for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { - 285 | if (i == version || - 286 | !ts_stack_is_active(self->stack, i) || - 287 | ts_stack_position(self->stack, i).bytes < position.bytes) continue; - 288 | ErrorStatus status_i = ts_parser__version_status(self, i); - 289 | switch (ts_parser__compare_versions(self, status, status_i)) { - 290 | case ErrorComparisonTakeRight: - 291 | return true; - 292 | case ErrorComparisonPreferRight: - 293 | if (ts_stack_can_merge(self->stack, i, version)) return true; - 294 | break; - 295 | default: - 296 | break; - 297 | } - 298 | } - | - 299 | return false; - 300 | } - | - 301 | static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexerMode lex_mode) { - 302 | if (ts_language_is_wasm(self->language)) { - 303 | return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state); - 304 | } else { - 305 | return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); - 306 | } - 307 | } - | - 308 | static bool ts_parser__call_keyword_lex_fn(TSParser *self) { - 309 | if (ts_language_is_wasm(self->language)) { - 310 | return ts_wasm_store_call_lex_keyword(self->wasm_store, 0); - 311 | } else { - 312 | return self->language->keyword_lex_fn(&self->lexer.data, 0); - 313 | } - 314 | } - | - 315 | static void ts_parser__external_scanner_create( - 316 | TSParser *self - 317 | ) { - 318 | if (self->language && self->language->external_scanner.states) { - 319 | if (ts_language_is_wasm(self->language)) { - 320 | self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create( - 321 | self->wasm_store - 322 | ); - 323 | if (ts_wasm_store_has_error(self->wasm_store)) { - 324 | self->has_scanner_error = true; - 325 | } - 326 | } else if (self->language->external_scanner.create) { - 327 | self->external_scanner_payload = self->language->external_scanner.create(); - 328 | } - 329 | } - 330 | } - | - 331 | static void ts_parser__external_scanner_destroy( - 332 | TSParser *self - 333 | ) { - 334 | if ( - 335 | self->language && - 336 | self->external_scanner_payload && - 337 | self->language->external_scanner.destroy && - 338 | !ts_language_is_wasm(self->language) - 339 | ) { - 340 | self->language->external_scanner.destroy( - 341 | self->external_scanner_payload - 342 | ); - 343 | } - 344 | self->external_scanner_payload = NULL; - 345 | } - | - 346 | static unsigned ts_parser__external_scanner_serialize( - 347 | TSParser *self - 348 | ) { - 349 | uint32_t length; - 350 | if (ts_language_is_wasm(self->language)) { - 351 | length = ts_wasm_store_call_scanner_serialize( - 352 | self->wasm_store, - 353 | (uintptr_t)self->external_scanner_payload, - 354 | self->lexer.debug_buffer - 355 | ); - 356 | if (ts_wasm_store_has_error(self->wasm_store)) { - 357 | self->has_scanner_error = true; - 358 | } - 359 | } else { - 360 | length = self->language->external_scanner.serialize( - 361 | self->external_scanner_payload, - 362 | self->lexer.debug_buffer - 363 | ); - 364 | } - 365 | ts_assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); - 366 | return length; - 367 | } - | - 368 | static void ts_parser__external_scanner_deserialize( - 369 | TSParser *self, - 370 | Subtree external_token - 371 | ) { - 372 | const char *data = NULL; - 373 | uint32_t length = 0; - 374 | if (external_token.ptr) { - 375 | data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state); - 376 | length = external_token.ptr->external_scanner_state.length; - 377 | } - | - 378 | if (ts_language_is_wasm(self->language)) { - 379 | ts_wasm_store_call_scanner_deserialize( - 380 | self->wasm_store, - 381 | (uintptr_t)self->external_scanner_payload, - 382 | data, - 383 | length - 384 | ); - 385 | if (ts_wasm_store_has_error(self->wasm_store)) { - 386 | self->has_scanner_error = true; - 387 | } - 388 | } else { - 389 | self->language->external_scanner.deserialize( - 390 | self->external_scanner_payload, - 391 | data, - 392 | length - 393 | ); - 394 | } - 395 | } - | - 396 | static bool ts_parser__external_scanner_scan( - 397 | TSParser *self, - 398 | TSStateId external_lex_state - 399 | ) { - 400 | if (ts_language_is_wasm(self->language)) { - 401 | bool result = ts_wasm_store_call_scanner_scan( - 402 | self->wasm_store, - 403 | (uintptr_t)self->external_scanner_payload, - 404 | external_lex_state * self->language->external_token_count - 405 | ); - 406 | if (ts_wasm_store_has_error(self->wasm_store)) { - 407 | self->has_scanner_error = true; - 408 | } - 409 | return result; - 410 | } else { - 411 | const bool *valid_external_tokens = ts_language_enabled_external_tokens( - 412 | self->language, - 413 | external_lex_state - 414 | ); - 415 | return self->language->external_scanner.scan( - 416 | self->external_scanner_payload, - 417 | &self->lexer.data, - 418 | valid_external_tokens - 419 | ); - 420 | } - 421 | } - | - 422 | static bool ts_parser__can_reuse_first_leaf( - 423 | TSParser *self, - 424 | TSStateId state, - 425 | Subtree tree, - 426 | TableEntry *table_entry - 427 | ) { - 428 | TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree); - 429 | TSStateId leaf_state = ts_subtree_leaf_parse_state(tree); - 430 | TSLexerMode current_lex_mode = ts_language_lex_mode_for_state(self->language, state); - 431 | TSLexerMode leaf_lex_mode = ts_language_lex_mode_for_state(self->language, leaf_state); - | - 432 | // At the end of a non-terminal extra node, the lexer normally returns - 433 | // NULL, which indicates that the parser should look for a reduce action - 434 | // at symbol `0`. Avoid reusing tokens in this situation to ensure that - 435 | // the same thing happens when incrementally reparsing. - 436 | if (current_lex_mode.lex_state == (uint16_t)(-1)) return false; - | - 437 | // If the token was created in a state with the same set of lookaheads, it is reusable. - 438 | if ( - 439 | table_entry->action_count > 0 && - 440 | memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexerMode)) == 0 && - 441 | ( - 442 | leaf_symbol != self->language->keyword_capture_token || - 443 | (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state) - 444 | ) - 445 | ) return true; - | - 446 | // Empty tokens are not reusable in states with different lookaheads. - 447 | if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false; - | - 448 | // If the current state allows external tokens or other tokens that conflict with this - 449 | // token, this token is not reusable. - 450 | return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable; - 451 | } - | - 452 | static Subtree ts_parser__lex( - 453 | TSParser *self, - 454 | StackVersion version, - 455 | TSStateId parse_state - 456 | ) { - 457 | TSLexerMode lex_mode = ts_language_lex_mode_for_state(self->language, parse_state); - 458 | if (lex_mode.lex_state == (uint16_t)-1) { - 459 | LOG("no_lookahead_after_non_terminal_extra"); - 460 | return NULL_SUBTREE; - 461 | } - | - 462 | const Length start_position = ts_stack_position(self->stack, version); - 463 | const Subtree external_token = ts_stack_last_external_token(self->stack, version); - | - 464 | bool found_external_token = false; - 465 | bool error_mode = parse_state == ERROR_STATE; - 466 | bool skipped_error = false; - 467 | bool called_get_column = false; - 468 | int32_t first_error_character = 0; - 469 | Length error_start_position = length_zero(); - 470 | Length error_end_position = length_zero(); - 471 | uint32_t lookahead_end_byte = 0; - 472 | uint32_t external_scanner_state_len = 0; - 473 | bool external_scanner_state_changed = false; - 474 | ts_lexer_reset(&self->lexer, start_position); - | - 475 | for (;;) { - 476 | bool found_token = false; - 477 | Length current_position = self->lexer.current_position; - 478 | ColumnData column_data = self->lexer.column_data; - | - 479 | if (lex_mode.external_lex_state != 0) { - 480 | LOG( - 481 | "lex_external state:%d, row:%u, column:%u", - 482 | lex_mode.external_lex_state, - 483 | current_position.extent.row, - 484 | current_position.extent.column - 485 | ); - 486 | ts_lexer_start(&self->lexer); - 487 | ts_parser__external_scanner_deserialize(self, external_token); - 488 | found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state); - 489 | if (self->has_scanner_error) return NULL_SUBTREE; - 490 | ts_lexer_finish(&self->lexer, &lookahead_end_byte); - | - 491 | if (found_token) { - 492 | external_scanner_state_len = ts_parser__external_scanner_serialize(self); - 493 | external_scanner_state_changed = !ts_external_scanner_state_eq( - 494 | ts_subtree_external_scanner_state(external_token), - 495 | self->lexer.debug_buffer, - 496 | external_scanner_state_len - 497 | ); - | - 498 | // Avoid infinite loops caused by the external scanner returning empty tokens. - 499 | // Empty tokens are needed in some circumstances, e.g. indent/dedent tokens - 500 | // in Python. Ignore the following classes of empty tokens: - 501 | // - 502 | // * Tokens produced during error recovery. When recovering from an error, - 503 | // all tokens are allowed, so it's easy to accidentally return unwanted - 504 | // empty tokens. - 505 | // * Tokens that are marked as 'extra' in the grammar. These don't change - 506 | // the parse state, so they would definitely cause an infinite loop. - 507 | if ( - 508 | self->lexer.token_end_position.bytes <= current_position.bytes && - 509 | !external_scanner_state_changed - 510 | ) { - 511 | TSSymbol symbol = self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]; - 512 | TSStateId next_parse_state = ts_language_next_state(self->language, parse_state, symbol); - 513 | bool token_is_extra = (next_parse_state == parse_state); - 514 | if (error_mode || !ts_stack_has_advanced_since_error(self->stack, version) || token_is_extra) { - 515 | LOG( - 516 | "ignore_empty_external_token symbol:%s", - 517 | SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol]) - 518 | ); - 519 | found_token = false; - 520 | } - 521 | } - 522 | } - | - 523 | if (found_token) { - 524 | found_external_token = true; - 525 | called_get_column = self->lexer.did_get_column; - 526 | break; - 527 | } - | - 528 | ts_lexer_reset(&self->lexer, current_position); - 529 | self->lexer.column_data = column_data; - 530 | } - | - 531 | LOG( - 532 | "lex_internal state:%d, row:%u, column:%u", - 533 | lex_mode.lex_state, - 534 | current_position.extent.row, - 535 | current_position.extent.column - 536 | ); - 537 | ts_lexer_start(&self->lexer); - 538 | found_token = ts_parser__call_main_lex_fn(self, lex_mode); - 539 | ts_lexer_finish(&self->lexer, &lookahead_end_byte); - 540 | if (found_token) break; - | - 541 | if (!error_mode) { - 542 | error_mode = true; - 543 | lex_mode = ts_language_lex_mode_for_state(self->language, ERROR_STATE); - 544 | ts_lexer_reset(&self->lexer, start_position); - 545 | continue; - 546 | } - | - 547 | if (!skipped_error) { - 548 | LOG("skip_unrecognized_character"); - 549 | skipped_error = true; - 550 | error_start_position = self->lexer.token_start_position; - 551 | error_end_position = self->lexer.token_start_position; - 552 | first_error_character = self->lexer.data.lookahead; - 553 | } - | - 554 | if (self->lexer.current_position.bytes == error_end_position.bytes) { - 555 | if (self->lexer.data.eof(&self->lexer.data)) { - 556 | self->lexer.data.result_symbol = ts_builtin_sym_error; - 557 | break; - 558 | } - 559 | self->lexer.data.advance(&self->lexer.data, false); - 560 | } - | - 561 | error_end_position = self->lexer.current_position; - 562 | } - | - 563 | Subtree result; - 564 | if (skipped_error) { - 565 | Length padding = length_sub(error_start_position, start_position); - 566 | Length size = length_sub(error_end_position, error_start_position); - 567 | uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes; - 568 | result = ts_subtree_new_error( - 569 | &self->tree_pool, - 570 | first_error_character, - 571 | padding, - 572 | size, - 573 | lookahead_bytes, - 574 | parse_state, - 575 | self->language - 576 | ); - 577 | } else { - 578 | bool is_keyword = false; - 579 | TSSymbol symbol = self->lexer.data.result_symbol; - 580 | Length padding = length_sub(self->lexer.token_start_position, start_position); - 581 | Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position); - 582 | uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes; - | - 583 | if (found_external_token) { - 584 | symbol = self->language->external_scanner.symbol_map[symbol]; - 585 | } else if (symbol == self->language->keyword_capture_token && symbol != 0) { - 586 | uint32_t end_byte = self->lexer.token_end_position.bytes; - 587 | ts_lexer_reset(&self->lexer, self->lexer.token_start_position); - 588 | ts_lexer_start(&self->lexer); - | - 589 | is_keyword = ts_parser__call_keyword_lex_fn(self); - | - 590 | if ( - 591 | is_keyword && - 592 | self->lexer.token_end_position.bytes == end_byte && - 593 | ( - 594 | ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) || - 595 | ts_language_is_reserved_word(self->language, parse_state, self->lexer.data.result_symbol) - 596 | ) - 597 | ) { - 598 | symbol = self->lexer.data.result_symbol; - 599 | } - 600 | } - | - 601 | result = ts_subtree_new_leaf( - 602 | &self->tree_pool, - 603 | symbol, - 604 | padding, - 605 | size, - 606 | lookahead_bytes, - 607 | parse_state, - 608 | found_external_token, - 609 | called_get_column, - 610 | is_keyword, - 611 | self->language - 612 | ); - | - 613 | if (found_external_token) { - 614 | MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result); - 615 | ts_external_scanner_state_init( - 616 | &mut_result.ptr->external_scanner_state, - 617 | self->lexer.debug_buffer, - 618 | external_scanner_state_len - 619 | ); - 620 | mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed; - 621 | } - 622 | } - | - 623 | LOG_LOOKAHEAD( - 624 | SYM_NAME(ts_subtree_symbol(result)), - 625 | ts_subtree_total_size(result).bytes - 626 | ); - 627 | return result; - 628 | } - | - 629 | static Subtree ts_parser__get_cached_token( - 630 | TSParser *self, - 631 | TSStateId state, - 632 | size_t position, - 633 | Subtree last_external_token, - 634 | TableEntry *table_entry - 635 | ) { - 636 | TokenCache *cache = &self->token_cache; - 637 | if ( - 638 | cache->token.ptr && cache->byte_index == position && - 639 | ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token) - 640 | ) { - 641 | ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry); - 642 | if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) { - 643 | ts_subtree_retain(cache->token); - 644 | return cache->token; - 645 | } - 646 | } - 647 | return NULL_SUBTREE; - 648 | } - | - 649 | static void ts_parser__set_cached_token( - 650 | TSParser *self, - 651 | uint32_t byte_index, - 652 | Subtree last_external_token, - 653 | Subtree token - 654 | ) { - 655 | TokenCache *cache = &self->token_cache; - 656 | if (token.ptr) ts_subtree_retain(token); - 657 | if (last_external_token.ptr) ts_subtree_retain(last_external_token); - 658 | if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token); - 659 | if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token); - 660 | cache->token = token; - 661 | cache->byte_index = byte_index; - 662 | cache->last_external_token = last_external_token; - 663 | } - | - 664 | static bool ts_parser__has_included_range_difference( - 665 | const TSParser *self, - 666 | uint32_t start_position, - 667 | uint32_t end_position - 668 | ) { - 669 | return ts_range_array_intersects( - 670 | &self->included_range_differences, - 671 | self->included_range_difference_index, - 672 | start_position, - 673 | end_position - 674 | ); - 675 | } - | - 676 | static Subtree ts_parser__reuse_node( - 677 | TSParser *self, - 678 | StackVersion version, - 679 | TSStateId *state, - 680 | uint32_t position, - 681 | Subtree last_external_token, - 682 | TableEntry *table_entry - 683 | ) { - 684 | Subtree result; - 685 | while ((result = reusable_node_tree(&self->reusable_node)).ptr) { - 686 | uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node); - 687 | uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result); - | - 688 | // Do not reuse an EOF node if the included ranges array has changes - 689 | // later on in the file. - 690 | if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX; - | - 691 | if (byte_offset > position) { - 692 | LOG("before_reusable_node symbol:%s", TREE_NAME(result)); - 693 | break; - 694 | } - | - 695 | if (byte_offset < position) { - 696 | LOG("past_reusable_node symbol:%s", TREE_NAME(result)); - 697 | if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) { - 698 | reusable_node_advance(&self->reusable_node); - 699 | } - 700 | continue; - 701 | } - | - 702 | if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) { - 703 | LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result)); - 704 | reusable_node_advance(&self->reusable_node); - 705 | continue; - 706 | } - | - 707 | const char *reason = NULL; - 708 | if (ts_subtree_has_changes(result)) { - 709 | reason = "has_changes"; - 710 | } else if (ts_subtree_is_error(result)) { - 711 | reason = "is_error"; - 712 | } else if (ts_subtree_missing(result)) { - 713 | reason = "is_missing"; - 714 | } else if (ts_subtree_is_fragile(result)) { - 715 | reason = "is_fragile"; - 716 | } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) { - 717 | reason = "contains_different_included_range"; - 718 | } - | - 719 | if (reason) { - 720 | LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result)); - 721 | if (!reusable_node_descend(&self->reusable_node)) { - 722 | reusable_node_advance(&self->reusable_node); - 723 | ts_parser__breakdown_top_of_stack(self, version); - 724 | *state = ts_stack_state(self->stack, version); - 725 | } - 726 | continue; - 727 | } - | - 728 | TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result); - 729 | ts_language_table_entry(self->language, *state, leaf_symbol, table_entry); - 730 | if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) { - 731 | LOG( - 732 | "cant_reuse_node symbol:%s, first_leaf_symbol:%s", - 733 | TREE_NAME(result), - 734 | SYM_NAME(leaf_symbol) - 735 | ); - 736 | reusable_node_advance_past_leaf(&self->reusable_node); - 737 | break; - 738 | } - | - 739 | LOG("reuse_node symbol:%s", TREE_NAME(result)); - 740 | ts_subtree_retain(result); - 741 | return result; - 742 | } - | - 743 | return NULL_SUBTREE; - 744 | } - | - 745 | // Determine if a given tree should be replaced by an alternative tree. - 746 | // - 747 | // The decision is based on the trees' error costs (if any), their dynamic precedence, - 748 | // and finally, as a default, by a recursive comparison of the trees' symbols. - 749 | static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) { - 750 | if (!left.ptr) return true; - 751 | if (!right.ptr) return false; - | - 752 | if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) { - 753 | LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); - 754 | return true; - 755 | } - | - 756 | if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) { - 757 | LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - 758 | return false; - 759 | } - | - 760 | if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { - 761 | LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, - 762 | TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), - 763 | ts_subtree_dynamic_precedence(left)); - 764 | return true; - 765 | } - | - 766 | if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { - 767 | LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, - 768 | TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), - 769 | ts_subtree_dynamic_precedence(right)); - 770 | return false; - 771 | } - | - 772 | if (ts_subtree_error_cost(left) > 0) return true; - | - 773 | int comparison = ts_subtree_compare(left, right, &self->tree_pool); - 774 | switch (comparison) { - 775 | case -1: - 776 | LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - 777 | return false; - 778 | break; - 779 | case 1: - 780 | LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left)); - 781 | return true; - 782 | default: - 783 | LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); - 784 | return false; - 785 | } - 786 | } - | - 787 | // Determine if a given tree's children should be replaced by an alternative - 788 | // array of children. - 789 | static bool ts_parser__select_children( - 790 | TSParser *self, - 791 | Subtree left, - 792 | const SubtreeArray *children - 793 | ) { - 794 | array_assign(&self->scratch_trees, children); - | - 795 | // Create a temporary subtree using the scratch trees array. This node does - 796 | // not perform any allocation except for possibly growing the array to make - 797 | // room for its own heap data. The scratch tree is never explicitly released, - 798 | // so the same 'scratch trees' array can be reused again later. - 799 | MutableSubtree scratch_tree = ts_subtree_new_node( - 800 | ts_subtree_symbol(left), - 801 | &self->scratch_trees, - 802 | 0, - 803 | self->language - 804 | ); - | - 805 | return ts_parser__select_tree( - 806 | self, - 807 | left, - 808 | ts_subtree_from_mut(scratch_tree) - 809 | ); - 810 | } - | - 811 | static void ts_parser__shift( - 812 | TSParser *self, - 813 | StackVersion version, - 814 | TSStateId state, - 815 | Subtree lookahead, - 816 | bool extra - 817 | ) { - 818 | bool is_leaf = ts_subtree_child_count(lookahead) == 0; - 819 | Subtree subtree_to_push = lookahead; - 820 | if (extra != ts_subtree_extra(lookahead) && is_leaf) { - 821 | MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead); - 822 | ts_subtree_set_extra(&result, extra); - 823 | subtree_to_push = ts_subtree_from_mut(result); - 824 | } - | - 825 | ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); - 826 | if (ts_subtree_has_external_tokens(subtree_to_push)) { - 827 | ts_stack_set_last_external_token( - 828 | self->stack, version, ts_subtree_last_external_token(subtree_to_push) - 829 | ); - 830 | } - 831 | } - | - 832 | static StackVersion ts_parser__reduce( - 833 | TSParser *self, - 834 | StackVersion version, - 835 | TSSymbol symbol, - 836 | uint32_t count, - 837 | int dynamic_precedence, - 838 | uint16_t production_id, - 839 | bool is_fragile, - 840 | bool end_of_non_terminal_extra - 841 | ) { - 842 | uint32_t initial_version_count = ts_stack_version_count(self->stack); - | - 843 | // Pop the given number of nodes from the given version of the parse stack. - 844 | // If stack versions have previously merged, then there may be more than one - 845 | // path back through the stack. For each path, create a new parent node to - 846 | // contain the popped children, and push it onto the stack in place of the - 847 | // children. - 848 | StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); - 849 | uint32_t removed_version_count = 0; - 850 | uint32_t halted_version_count = ts_stack_halted_version_count(self->stack); - 851 | for (uint32_t i = 0; i < pop.size; i++) { - 852 | StackSlice slice = *array_get(&pop, i); - 853 | StackVersion slice_version = slice.version - removed_version_count; - | - 854 | // This is where new versions are added to the parse stack. The versions - 855 | // will all be sorted and truncated at the end of the outer parsing loop. - 856 | // Allow the maximum version count to be temporarily exceeded, but only - 857 | // by a limited threshold. - 858 | if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW + halted_version_count) { - 859 | ts_stack_remove_version(self->stack, slice_version); - 860 | ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); - 861 | removed_version_count++; - 862 | while (i + 1 < pop.size) { - 863 | LOG("aborting reduce with too many versions") - 864 | StackSlice next_slice = *array_get(&pop, i + 1); - 865 | if (next_slice.version != slice.version) break; - 866 | ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - 867 | i++; - 868 | } - 869 | continue; - 870 | } - | - 871 | // Extra tokens on top of the stack should not be included in this new parent - 872 | // node. They will be re-pushed onto the stack after the parent node is - 873 | // created and pushed. - 874 | SubtreeArray children = slice.subtrees; - 875 | ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras); - | - 876 | MutableSubtree parent = ts_subtree_new_node( - 877 | symbol, &children, production_id, self->language - 878 | ); - | - 879 | // This pop operation may have caused multiple stack versions to collapse - 880 | // into one, because they all diverged from a common state. In that case, - 881 | // choose one of the arrays of trees to be the parent node's children, and - 882 | // delete the rest of the tree arrays. - 883 | while (i + 1 < pop.size) { - 884 | StackSlice next_slice = *array_get(&pop, i + 1); - 885 | if (next_slice.version != slice.version) break; - 886 | i++; - | - 887 | SubtreeArray next_slice_children = next_slice.subtrees; - 888 | ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); - | - 889 | if (ts_parser__select_children( - 890 | self, - 891 | ts_subtree_from_mut(parent), - 892 | &next_slice_children - 893 | )) { - 894 | ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); - 895 | ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); - 896 | array_swap(&self->trailing_extras, &self->trailing_extras2); - 897 | parent = ts_subtree_new_node( - 898 | symbol, &next_slice_children, production_id, self->language - 899 | ); - 900 | } else { - 901 | array_clear(&self->trailing_extras2); - 902 | ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees); - 903 | } - 904 | } - | - 905 | TSStateId state = ts_stack_state(self->stack, slice_version); - 906 | TSStateId next_state = ts_language_next_state(self->language, state, symbol); - 907 | if (end_of_non_terminal_extra && next_state == state) { - 908 | parent.ptr->extra = true; - 909 | } - 910 | if (is_fragile || pop.size > 1 || initial_version_count > 1) { - 911 | parent.ptr->fragile_left = true; - 912 | parent.ptr->fragile_right = true; - 913 | parent.ptr->parse_state = TS_TREE_STATE_NONE; - 914 | } else { - 915 | parent.ptr->parse_state = state; - 916 | } - 917 | parent.ptr->dynamic_precedence += dynamic_precedence; - | - 918 | // Push the parent node onto the stack, along with any extra tokens that - 919 | // were previously on top of the stack. - 920 | ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); - 921 | for (uint32_t j = 0; j < self->trailing_extras.size; j++) { - 922 | ts_stack_push(self->stack, slice_version, *array_get(&self->trailing_extras, j), false, next_state); - 923 | } - | - 924 | for (StackVersion j = 0; j < slice_version; j++) { - 925 | if (j == version) continue; - 926 | if (ts_stack_merge(self->stack, j, slice_version)) { - 927 | removed_version_count++; - 928 | break; - 929 | } - 930 | } - 931 | } - | - 932 | // Return the first new stack version that was created. - 933 | return ts_stack_version_count(self->stack) > initial_version_count - 934 | ? initial_version_count - 935 | : STACK_VERSION_NONE; - 936 | } - | - 937 | static void ts_parser__accept( - 938 | TSParser *self, - 939 | StackVersion version, - 940 | Subtree lookahead - 941 | ) { - 942 | ts_assert(ts_subtree_is_eof(lookahead)); - 943 | ts_stack_push(self->stack, version, lookahead, false, 1); - | - 944 | StackSliceArray pop = ts_stack_pop_all(self->stack, version); - 945 | for (uint32_t i = 0; i < pop.size; i++) { - 946 | SubtreeArray trees = array_get(&pop, i)->subtrees; - | - 947 | Subtree root = NULL_SUBTREE; - 948 | for (uint32_t j = trees.size - 1; j + 1 > 0; j--) { - 949 | Subtree tree = *array_get(&trees, j); - 950 | if (!ts_subtree_extra(tree)) { - 951 | ts_assert(!tree.data.is_inline); - 952 | uint32_t child_count = ts_subtree_child_count(tree); - 953 | const Subtree *children = ts_subtree_children(tree); - 954 | for (uint32_t k = 0; k < child_count; k++) { - 955 | ts_subtree_retain(children[k]); - 956 | } - 957 | array_splice(&trees, j, 1, child_count, children); - 958 | root = ts_subtree_from_mut(ts_subtree_new_node( - 959 | ts_subtree_symbol(tree), - 960 | &trees, - 961 | tree.ptr->production_id, - 962 | self->language - 963 | )); - 964 | ts_subtree_release(&self->tree_pool, tree); - 965 | break; - 966 | } - 967 | } - | - 968 | ts_assert(root.ptr); - 969 | self->accept_count++; - | - 970 | if (self->finished_tree.ptr) { - 971 | if (ts_parser__select_tree(self, self->finished_tree, root)) { - 972 | ts_subtree_release(&self->tree_pool, self->finished_tree); - 973 | self->finished_tree = root; - 974 | } else { - 975 | ts_subtree_release(&self->tree_pool, root); - 976 | } - 977 | } else { - 978 | self->finished_tree = root; - 979 | } - 980 | } - | - 981 | ts_stack_remove_version(self->stack, array_get(&pop, 0)->version); - 982 | ts_stack_halt(self->stack, version); - 983 | } - | - 984 | static bool ts_parser__do_all_potential_reductions( - 985 | TSParser *self, - 986 | StackVersion starting_version, - 987 | TSSymbol lookahead_symbol - 988 | ) { - 989 | uint32_t initial_version_count = ts_stack_version_count(self->stack); - | - 990 | bool can_shift_lookahead_symbol = false; - 991 | StackVersion version = starting_version; - 992 | for (unsigned i = 0; true; i++) { - 993 | uint32_t version_count = ts_stack_version_count(self->stack); - 994 | if (version >= version_count) break; - | - 995 | bool merged = false; - 996 | for (StackVersion j = initial_version_count; j < version; j++) { - 997 | if (ts_stack_merge(self->stack, j, version)) { - 998 | merged = true; - 999 | break; -1000 | } -1001 | } -1002 | if (merged) continue; - | -1003 | TSStateId state = ts_stack_state(self->stack, version); -1004 | bool has_shift_action = false; -1005 | array_clear(&self->reduce_actions); - | -1006 | TSSymbol first_symbol, end_symbol; -1007 | if (lookahead_symbol != 0) { -1008 | first_symbol = lookahead_symbol; -1009 | end_symbol = lookahead_symbol + 1; -1010 | } else { -1011 | first_symbol = 1; -1012 | end_symbol = self->language->token_count; -1013 | } - | -1014 | for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) { -1015 | TableEntry entry; -1016 | ts_language_table_entry(self->language, state, symbol, &entry); -1017 | for (uint32_t j = 0; j < entry.action_count; j++) { -1018 | TSParseAction action = entry.actions[j]; -1019 | switch (action.type) { -1020 | case TSParseActionTypeShift: -1021 | case TSParseActionTypeRecover: -1022 | if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; -1023 | break; -1024 | case TSParseActionTypeReduce: -1025 | if (action.reduce.child_count > 0) -1026 | ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) { -1027 | .symbol = action.reduce.symbol, -1028 | .count = action.reduce.child_count, -1029 | .dynamic_precedence = action.reduce.dynamic_precedence, -1030 | .production_id = action.reduce.production_id, -1031 | }); -1032 | break; -1033 | default: -1034 | break; -1035 | } -1036 | } -1037 | } - | -1038 | StackVersion reduction_version = STACK_VERSION_NONE; -1039 | for (uint32_t j = 0; j < self->reduce_actions.size; j++) { -1040 | ReduceAction action = *array_get(&self->reduce_actions, j); - | -1041 | reduction_version = ts_parser__reduce( -1042 | self, version, action.symbol, action.count, -1043 | action.dynamic_precedence, action.production_id, -1044 | true, false -1045 | ); -1046 | } - | -1047 | if (has_shift_action) { -1048 | can_shift_lookahead_symbol = true; -1049 | } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) { -1050 | ts_stack_renumber_version(self->stack, reduction_version, version); -1051 | continue; -1052 | } else if (lookahead_symbol != 0) { -1053 | ts_stack_remove_version(self->stack, version); -1054 | } - | -1055 | if (version == starting_version) { -1056 | version = version_count; -1057 | } else { -1058 | version++; -1059 | } -1060 | } - | -1061 | return can_shift_lookahead_symbol; -1062 | } - | -1063 | static bool ts_parser__recover_to_state( -1064 | TSParser *self, -1065 | StackVersion version, -1066 | unsigned depth, -1067 | TSStateId goal_state -1068 | ) { -1069 | StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth); -1070 | StackVersion previous_version = STACK_VERSION_NONE; - | -1071 | for (unsigned i = 0; i < pop.size; i++) { -1072 | StackSlice slice = *array_get(&pop, i); - | -1073 | if (slice.version == previous_version) { -1074 | ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); -1075 | array_erase(&pop, i--); -1076 | continue; -1077 | } - | -1078 | if (ts_stack_state(self->stack, slice.version) != goal_state) { -1079 | ts_stack_halt(self->stack, slice.version); -1080 | ts_subtree_array_delete(&self->tree_pool, &slice.subtrees); -1081 | array_erase(&pop, i--); -1082 | continue; -1083 | } - | -1084 | SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version); -1085 | if (error_trees.size > 0) { -1086 | ts_assert(error_trees.size == 1); -1087 | Subtree error_tree = *array_get(&error_trees, 0); -1088 | uint32_t error_child_count = ts_subtree_child_count(error_tree); -1089 | if (error_child_count > 0) { -1090 | array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree)); -1091 | for (unsigned j = 0; j < error_child_count; j++) { -1092 | ts_subtree_retain(*array_get(&slice.subtrees, j)); -1093 | } -1094 | } -1095 | ts_subtree_array_delete(&self->tree_pool, &error_trees); -1096 | } - | -1097 | ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras); - | -1098 | if (slice.subtrees.size > 0) { -1099 | Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); -1100 | ts_stack_push(self->stack, slice.version, error, false, goal_state); -1101 | } else { -1102 | array_delete(&slice.subtrees); -1103 | } - | -1104 | for (unsigned j = 0; j < self->trailing_extras.size; j++) { -1105 | Subtree tree = *array_get(&self->trailing_extras, j); -1106 | ts_stack_push(self->stack, slice.version, tree, false, goal_state); -1107 | } - | -1108 | previous_version = slice.version; -1109 | } - | -1110 | return previous_version != STACK_VERSION_NONE; -1111 | } - | -1112 | static void ts_parser__recover( -1113 | TSParser *self, -1114 | StackVersion version, -1115 | Subtree lookahead -1116 | ) { -1117 | bool did_recover = false; -1118 | unsigned previous_version_count = ts_stack_version_count(self->stack); -1119 | Length position = ts_stack_position(self->stack, version); -1120 | StackSummary *summary = ts_stack_get_summary(self->stack, version); -1121 | unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version); -1122 | unsigned current_error_cost = ts_stack_error_cost(self->stack, version); - | -1123 | // When the parser is in the error state, there are two strategies for recovering with a -1124 | // given lookahead token: -1125 | // 1. Find a previous state on the stack in which that lookahead token would be valid. Then, -1126 | // create a new stack version that is in that state again. This entails popping all of the -1127 | // subtrees that have been pushed onto the stack since that previous state, and wrapping -1128 | // them in an ERROR node. -1129 | // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and -1130 | // move on to the next lookahead token, remaining in the error state. -1131 | // -1132 | // First, try the strategy 1. Upon entering the error state, the parser recorded a summary -1133 | // of the previous parse states and their depths. Look at each state in the summary, to see -1134 | // if the current lookahead token would be valid in that state. -1135 | if (summary && !ts_subtree_is_error(lookahead)) { -1136 | for (unsigned i = 0; i < summary->size; i++) { -1137 | StackSummaryEntry entry = *array_get(summary, i); - | -1138 | if (entry.state == ERROR_STATE) continue; -1139 | if (entry.position.bytes == position.bytes) continue; -1140 | unsigned depth = entry.depth; -1141 | if (node_count_since_error > 0) depth++; - | -1142 | // Do not recover in ways that create redundant stack versions. -1143 | bool would_merge = false; -1144 | for (unsigned j = 0; j < previous_version_count; j++) { -1145 | if ( -1146 | ts_stack_state(self->stack, j) == entry.state && -1147 | ts_stack_position(self->stack, j).bytes == position.bytes -1148 | ) { -1149 | would_merge = true; -1150 | break; -1151 | } -1152 | } -1153 | if (would_merge) continue; - | -1154 | // Do not recover if the result would clearly be worse than some existing stack version. -1155 | unsigned new_cost = -1156 | current_error_cost + -1157 | entry.depth * ERROR_COST_PER_SKIPPED_TREE + -1158 | (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR + -1159 | (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE; -1160 | if (ts_parser__better_version_exists(self, version, false, new_cost)) break; - | -1161 | // If the current lookahead token is valid in some previous state, recover to that state. -1162 | // Then stop looking for further recoveries. -1163 | if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) { -1164 | if (ts_parser__recover_to_state(self, version, depth, entry.state)) { -1165 | did_recover = true; -1166 | LOG("recover_to_previous state:%u, depth:%u", entry.state, depth); -1167 | LOG_STACK(); -1168 | break; -1169 | } -1170 | } -1171 | } -1172 | } - | -1173 | // In the process of attempting to recover, some stack versions may have been created -1174 | // and subsequently halted. Remove those versions. -1175 | for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) { -1176 | if (!ts_stack_is_active(self->stack, i)) { -1177 | LOG("removed paused version:%u", i); -1178 | ts_stack_remove_version(self->stack, i--); -1179 | LOG_STACK(); -1180 | } -1181 | } - | -1182 | // If the parser is still in the error state at the end of the file, just wrap everything -1183 | // in an ERROR node and terminate. -1184 | if (ts_subtree_is_eof(lookahead)) { -1185 | LOG("recover_eof"); -1186 | SubtreeArray children = array_new(); -1187 | Subtree parent = ts_subtree_new_error_node(&children, false, self->language); -1188 | ts_stack_push(self->stack, version, parent, false, 1); -1189 | ts_parser__accept(self, version, lookahead); -1190 | return; -1191 | } - | -1192 | // If strategy 1 succeeded, a new stack version will have been created which is able to handle -1193 | // the current lookahead token. Now, in addition, try strategy 2 described above: skip the -1194 | // current lookahead token by wrapping it in an ERROR node. - | -1195 | // Don't pursue this additional strategy if there are already too many stack versions. -1196 | if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { -1197 | ts_stack_halt(self->stack, version); -1198 | ts_subtree_release(&self->tree_pool, lookahead); -1199 | return; -1200 | } - | -1201 | if ( -1202 | did_recover && -1203 | ts_subtree_has_external_scanner_state_change(lookahead) -1204 | ) { -1205 | ts_stack_halt(self->stack, version); -1206 | ts_subtree_release(&self->tree_pool, lookahead); -1207 | return; -1208 | } - | -1209 | // Do not recover if the result would clearly be worse than some existing stack version. -1210 | unsigned new_cost = -1211 | current_error_cost + ERROR_COST_PER_SKIPPED_TREE + -1212 | ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR + -1213 | ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE; -1214 | if (ts_parser__better_version_exists(self, version, false, new_cost)) { -1215 | ts_stack_halt(self->stack, version); -1216 | ts_subtree_release(&self->tree_pool, lookahead); -1217 | return; -1218 | } - | -1219 | // If the current lookahead token is an extra token, mark it as extra. This means it won't -1220 | // be counted in error cost calculations. -1221 | unsigned n; -1222 | const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); -1223 | if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { -1224 | MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); -1225 | ts_subtree_set_extra(&mutable_lookahead, true); -1226 | lookahead = ts_subtree_from_mut(mutable_lookahead); -1227 | } - | -1228 | // Wrap the lookahead token in an ERROR. -1229 | LOG("skip_token symbol:%s", TREE_NAME(lookahead)); -1230 | SubtreeArray children = array_new(); -1231 | array_reserve(&children, 1); -1232 | array_push(&children, lookahead); -1233 | MutableSubtree error_repeat = ts_subtree_new_node( -1234 | ts_builtin_sym_error_repeat, -1235 | &children, -1236 | 0, -1237 | self->language -1238 | ); - | -1239 | // If other tokens have already been skipped, so there is already an ERROR at the top of the -1240 | // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger -1241 | // ERROR. -1242 | if (node_count_since_error > 0) { -1243 | StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1); - | -1244 | // TODO: Figure out how to make this condition occur. -1245 | // See https://github.com/atom/atom/issues/18450#issuecomment-439579778 -1246 | // If multiple stack versions have merged at this point, just pick one of the errors -1247 | // arbitrarily and discard the rest. -1248 | if (pop.size > 1) { -1249 | for (unsigned i = 1; i < pop.size; i++) { -1250 | ts_subtree_array_delete(&self->tree_pool, &array_get(&pop, i)->subtrees); -1251 | } -1252 | while (ts_stack_version_count(self->stack) > array_get(&pop, 0)->version + 1) { -1253 | ts_stack_remove_version(self->stack, array_get(&pop, 0)->version + 1); -1254 | } -1255 | } - | -1256 | ts_stack_renumber_version(self->stack, array_get(&pop, 0)->version, version); -1257 | array_push(&array_get(&pop, 0)->subtrees, ts_subtree_from_mut(error_repeat)); -1258 | error_repeat = ts_subtree_new_node( -1259 | ts_builtin_sym_error_repeat, -1260 | &array_get(&pop, 0)->subtrees, -1261 | 0, -1262 | self->language -1263 | ); -1264 | } - | -1265 | // Push the new ERROR onto the stack. -1266 | ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); -1267 | if (ts_subtree_has_external_tokens(lookahead)) { -1268 | ts_stack_set_last_external_token( -1269 | self->stack, version, ts_subtree_last_external_token(lookahead) -1270 | ); -1271 | } - | -1272 | bool has_error = true; -1273 | for (unsigned i = 0; i < ts_stack_version_count(self->stack); i++) { -1274 | ErrorStatus status = ts_parser__version_status(self, i); -1275 | if (!status.is_in_error) { -1276 | has_error = false; -1277 | break; -1278 | } -1279 | } -1280 | self->has_error = has_error; -1281 | } - | -1282 | static void ts_parser__handle_error( -1283 | TSParser *self, -1284 | StackVersion version, -1285 | Subtree lookahead -1286 | ) { -1287 | uint32_t previous_version_count = ts_stack_version_count(self->stack); - | -1288 | // Perform any reductions that can happen in this state, regardless of the lookahead. After -1289 | // skipping one or more invalid tokens, the parser might find a token that would have allowed -1290 | // a reduction to take place. -1291 | ts_parser__do_all_potential_reductions(self, version, 0); -1292 | uint32_t version_count = ts_stack_version_count(self->stack); -1293 | Length position = ts_stack_position(self->stack, version); - | -1294 | // Push a discontinuity onto the stack. Merge all of the stack versions that -1295 | // were created in the previous step. -1296 | bool did_insert_missing_token = false; -1297 | for (StackVersion v = version; v < version_count;) { -1298 | if (!did_insert_missing_token) { -1299 | TSStateId state = ts_stack_state(self->stack, v); -1300 | for ( -1301 | TSSymbol missing_symbol = 1; -1302 | missing_symbol < (uint16_t)self->language->token_count; -1303 | missing_symbol++ -1304 | ) { -1305 | TSStateId state_after_missing_symbol = ts_language_next_state( -1306 | self->language, state, missing_symbol -1307 | ); -1308 | if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) { -1309 | continue; -1310 | } - | -1311 | if (ts_language_has_reduce_action( -1312 | self->language, -1313 | state_after_missing_symbol, -1314 | ts_subtree_leaf_symbol(lookahead) -1315 | )) { -1316 | // In case the parser is currently outside of any included range, the lexer will -1317 | // snap to the beginning of the next included range. The missing token's padding -1318 | // must be assigned to position it within the next included range. -1319 | ts_lexer_reset(&self->lexer, position); -1320 | ts_lexer_mark_end(&self->lexer); -1321 | Length padding = length_sub(self->lexer.token_end_position, position); -1322 | uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead); - | -1323 | StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v); -1324 | Subtree missing_tree = ts_subtree_new_missing_leaf( -1325 | &self->tree_pool, missing_symbol, -1326 | padding, lookahead_bytes, -1327 | self->language -1328 | ); -1329 | ts_stack_push( -1330 | self->stack, version_with_missing_tree, -1331 | missing_tree, false, -1332 | state_after_missing_symbol -1333 | ); - | -1334 | if (ts_parser__do_all_potential_reductions( -1335 | self, version_with_missing_tree, -1336 | ts_subtree_leaf_symbol(lookahead) -1337 | )) { -1338 | LOG( -1339 | "recover_with_missing symbol:%s, state:%u", -1340 | SYM_NAME(missing_symbol), -1341 | ts_stack_state(self->stack, version_with_missing_tree) -1342 | ); -1343 | did_insert_missing_token = true; -1344 | break; -1345 | } -1346 | } -1347 | } -1348 | } - | -1349 | ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); -1350 | v = (v == version) ? previous_version_count : v + 1; -1351 | } - | -1352 | for (unsigned i = previous_version_count; i < version_count; i++) { -1353 | bool did_merge = ts_stack_merge(self->stack, version, previous_version_count); -1354 | ts_assert(did_merge); -1355 | } - | -1356 | ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); - | -1357 | // Begin recovery with the current lookahead node, rather than waiting for the -1358 | // next turn of the parse loop. This ensures that the tree accounts for the -1359 | // current lookahead token's "lookahead bytes" value, which describes how far -1360 | // the lexer needed to look ahead beyond the content of the token in order to -1361 | // recognize it. -1362 | if (ts_subtree_child_count(lookahead) > 0) { -1363 | ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); -1364 | } -1365 | ts_parser__recover(self, version, lookahead); - | -1366 | LOG_STACK(); -1367 | } - | -1368 | static bool ts_parser__check_progress(TSParser *self, Subtree *lookahead, const uint32_t *position, unsigned operations) { -1369 | self->operation_count += operations; -1370 | if (self->operation_count >= OP_COUNT_PER_PARSER_CALLBACK_CHECK) { -1371 | self->operation_count = 0; -1372 | } -1373 | if (position != NULL) { -1374 | self->parse_state.current_byte_offset = *position; -1375 | self->parse_state.has_error = self->has_error; -1376 | } -1377 | if ( -1378 | self->operation_count == 0 && -1379 | (self->parse_options.progress_callback && self->parse_options.progress_callback(&self->parse_state)) -1380 | ) { -1381 | if (lookahead && lookahead->ptr) { -1382 | ts_subtree_release(&self->tree_pool, *lookahead); -1383 | } -1384 | return false; -1385 | } -1386 | return true; -1387 | } - | -1388 | static bool ts_parser__advance( -1389 | TSParser *self, -1390 | StackVersion version, -1391 | bool allow_node_reuse -1392 | ) { -1393 | TSStateId state = ts_stack_state(self->stack, version); -1394 | uint32_t position = ts_stack_position(self->stack, version).bytes; -1395 | Subtree last_external_token = ts_stack_last_external_token(self->stack, version); - | -1396 | bool did_reuse = true; -1397 | Subtree lookahead = NULL_SUBTREE; -1398 | TableEntry table_entry = {.action_count = 0}; - | -1399 | // If possible, reuse a node from the previous syntax tree. -1400 | if (allow_node_reuse) { -1401 | lookahead = ts_parser__reuse_node( -1402 | self, version, &state, position, last_external_token, &table_entry -1403 | ); -1404 | } - | -1405 | // If no node from the previous syntax tree could be reused, then try to -1406 | // reuse the token previously returned by the lexer. -1407 | if (!lookahead.ptr) { -1408 | did_reuse = false; -1409 | lookahead = ts_parser__get_cached_token( -1410 | self, state, position, last_external_token, &table_entry -1411 | ); -1412 | } - | -1413 | bool needs_lex = !lookahead.ptr; -1414 | for (;;) { -1415 | // Otherwise, re-run the lexer. -1416 | if (needs_lex) { -1417 | needs_lex = false; -1418 | lookahead = ts_parser__lex(self, version, state); -1419 | if (self->has_scanner_error) return false; - | -1420 | if (lookahead.ptr) { -1421 | ts_parser__set_cached_token(self, position, last_external_token, lookahead); -1422 | ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); -1423 | } - | -1424 | // When parsing a non-terminal extra, a null lookahead indicates the -1425 | // end of the rule. The reduction is stored in the EOF table entry. -1426 | // After the reduction, the lexer needs to be run again. -1427 | else { -1428 | ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); -1429 | } -1430 | } - | -1431 | // If a progress callback was provided, then check every -1432 | // time a fixed number of parse actions has been processed. -1433 | if (!ts_parser__check_progress(self, &lookahead, &position, 1)) { -1434 | return false; -1435 | } - | -1436 | // Process each parse action for the current lookahead token in -1437 | // the current state. If there are multiple actions, then this is -1438 | // an ambiguous state. REDUCE actions always create a new stack -1439 | // version, whereas SHIFT actions update the existing stack version -1440 | // and terminate this loop. -1441 | bool did_reduce = false; -1442 | StackVersion last_reduction_version = STACK_VERSION_NONE; -1443 | for (uint32_t i = 0; i < table_entry.action_count; i++) { -1444 | TSParseAction action = table_entry.actions[i]; - | -1445 | switch (action.type) { -1446 | case TSParseActionTypeShift: { -1447 | if (action.shift.repetition) break; -1448 | TSStateId next_state; -1449 | if (action.shift.extra) { -1450 | next_state = state; -1451 | LOG("shift_extra"); -1452 | } else { -1453 | next_state = action.shift.state; -1454 | LOG("shift state:%u", next_state); -1455 | } - | -1456 | if (ts_subtree_child_count(lookahead) > 0) { -1457 | ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node); -1458 | next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); -1459 | } - | -1460 | ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); -1461 | if (did_reuse) reusable_node_advance(&self->reusable_node); -1462 | return true; -1463 | } - | -1464 | case TSParseActionTypeReduce: { -1465 | bool is_fragile = table_entry.action_count > 1; -1466 | bool end_of_non_terminal_extra = lookahead.ptr == NULL; -1467 | LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); -1468 | StackVersion reduction_version = ts_parser__reduce( -1469 | self, version, action.reduce.symbol, action.reduce.child_count, -1470 | action.reduce.dynamic_precedence, action.reduce.production_id, -1471 | is_fragile, end_of_non_terminal_extra -1472 | ); -1473 | did_reduce = true; -1474 | if (reduction_version != STACK_VERSION_NONE) { -1475 | last_reduction_version = reduction_version; -1476 | } -1477 | break; -1478 | } - | -1479 | case TSParseActionTypeAccept: { -1480 | LOG("accept"); -1481 | ts_parser__accept(self, version, lookahead); -1482 | return true; -1483 | } - | -1484 | case TSParseActionTypeRecover: { -1485 | if (ts_subtree_child_count(lookahead) > 0) { -1486 | ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node); -1487 | } - | -1488 | ts_parser__recover(self, version, lookahead); -1489 | if (did_reuse) reusable_node_advance(&self->reusable_node); -1490 | return true; -1491 | } -1492 | } -1493 | } - | -1494 | // If a reduction was performed, then replace the current stack version -1495 | // with one of the stack versions created by a reduction, and continue -1496 | // processing this version of the stack with the same lookahead symbol. -1497 | if (last_reduction_version != STACK_VERSION_NONE) { -1498 | ts_stack_renumber_version(self->stack, last_reduction_version, version); -1499 | LOG_STACK(); -1500 | state = ts_stack_state(self->stack, version); - | -1501 | // At the end of a non-terminal extra rule, the lexer will return a -1502 | // null subtree, because the parser needs to perform a fixed reduction -1503 | // regardless of the lookahead node. After performing that reduction, -1504 | // (and completing the non-terminal extra rule) run the lexer again based -1505 | // on the current parse state. -1506 | if (!lookahead.ptr) { -1507 | needs_lex = true; -1508 | } else { -1509 | ts_language_table_entry( -1510 | self->language, -1511 | state, -1512 | ts_subtree_leaf_symbol(lookahead), -1513 | &table_entry -1514 | ); -1515 | } - | -1516 | continue; -1517 | } - | -1518 | // A reduction was performed, but was merged into an existing stack version. -1519 | // This version can be discarded. -1520 | if (did_reduce) { -1521 | if (lookahead.ptr) { -1522 | ts_subtree_release(&self->tree_pool, lookahead); -1523 | } -1524 | ts_stack_halt(self->stack, version); -1525 | return true; -1526 | } - | -1527 | // If the current lookahead token is a keyword that is not valid, but the -1528 | // default word token *is* valid, then treat the lookahead token as the word -1529 | // token instead. -1530 | if ( -1531 | ts_subtree_is_keyword(lookahead) && -1532 | ts_subtree_symbol(lookahead) != self->language->keyword_capture_token && -1533 | !ts_language_is_reserved_word(self->language, state, ts_subtree_symbol(lookahead)) -1534 | ) { -1535 | ts_language_table_entry( -1536 | self->language, -1537 | state, -1538 | self->language->keyword_capture_token, -1539 | &table_entry -1540 | ); -1541 | if (table_entry.action_count > 0) { -1542 | LOG( -1543 | "switch from_keyword:%s, to_word_token:%s", -1544 | TREE_NAME(lookahead), -1545 | SYM_NAME(self->language->keyword_capture_token) -1546 | ); - | -1547 | MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); -1548 | ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language); -1549 | lookahead = ts_subtree_from_mut(mutable_lookahead); -1550 | continue; -1551 | } -1552 | } - | -1553 | // If the current lookahead token is not valid and the previous subtree on -1554 | // the stack was reused from an old tree, then it wasn't actually valid to -1555 | // reuse that previous subtree. Remove it from the stack, and in its place, -1556 | // push each of its children. Then try again to process the current lookahead. -1557 | if (ts_parser__breakdown_top_of_stack(self, version)) { -1558 | state = ts_stack_state(self->stack, version); -1559 | ts_subtree_release(&self->tree_pool, lookahead); -1560 | needs_lex = true; -1561 | continue; -1562 | } - | -1563 | // Otherwise, there is definitely an error in this version of the parse stack. -1564 | // Mark this version as paused and continue processing any other stack -1565 | // versions that exist. If some other version advances successfully, then -1566 | // this version can simply be removed. But if all versions end up paused, -1567 | // then error recovery is needed. -1568 | LOG("detect_error lookahead:%s", TREE_NAME(lookahead)); -1569 | ts_stack_pause(self->stack, version, lookahead); -1570 | return true; -1571 | } -1572 | } - | -1573 | static unsigned ts_parser__condense_stack(TSParser *self) { -1574 | bool made_changes = false; -1575 | unsigned min_error_cost = UINT_MAX; -1576 | for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) { -1577 | // Prune any versions that have been marked for removal. -1578 | if (ts_stack_is_halted(self->stack, i)) { -1579 | ts_stack_remove_version(self->stack, i); -1580 | i--; -1581 | continue; -1582 | } - | -1583 | // Keep track of the minimum error cost of any stack version so -1584 | // that it can be returned. -1585 | ErrorStatus status_i = ts_parser__version_status(self, i); -1586 | if (!status_i.is_in_error && status_i.cost < min_error_cost) { -1587 | min_error_cost = status_i.cost; -1588 | } - | -1589 | // Examine each pair of stack versions, removing any versions that -1590 | // are clearly worse than another version. Ensure that the versions -1591 | // are ordered from most promising to least promising. -1592 | for (StackVersion j = 0; j < i; j++) { -1593 | ErrorStatus status_j = ts_parser__version_status(self, j); - | -1594 | switch (ts_parser__compare_versions(self, status_j, status_i)) { -1595 | case ErrorComparisonTakeLeft: -1596 | made_changes = true; -1597 | ts_stack_remove_version(self->stack, i); -1598 | i--; -1599 | j = i; -1600 | break; - | -1601 | case ErrorComparisonPreferLeft: -1602 | case ErrorComparisonNone: -1603 | if (ts_stack_merge(self->stack, j, i)) { -1604 | made_changes = true; -1605 | i--; -1606 | j = i; -1607 | } -1608 | break; - | -1609 | case ErrorComparisonPreferRight: -1610 | made_changes = true; -1611 | if (ts_stack_merge(self->stack, j, i)) { -1612 | i--; -1613 | j = i; -1614 | } else { -1615 | ts_stack_swap_versions(self->stack, i, j); -1616 | } -1617 | break; - | -1618 | case ErrorComparisonTakeRight: -1619 | made_changes = true; -1620 | ts_stack_remove_version(self->stack, j); -1621 | i--; -1622 | j--; -1623 | break; -1624 | } -1625 | } -1626 | } - | -1627 | // Enforce a hard upper bound on the number of stack versions by -1628 | // discarding the least promising versions. -1629 | while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { -1630 | ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); -1631 | made_changes = true; -1632 | } - | -1633 | // If the best-performing stack version is currently paused, or all -1634 | // versions are paused, then resume the best paused version and begin -1635 | // the error recovery process. Otherwise, remove the paused versions. -1636 | if (ts_stack_version_count(self->stack) > 0) { -1637 | bool has_unpaused_version = false; -1638 | for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) { -1639 | if (ts_stack_is_paused(self->stack, i)) { -1640 | if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) { -1641 | LOG("resume version:%u", i); -1642 | min_error_cost = ts_stack_error_cost(self->stack, i); -1643 | Subtree lookahead = ts_stack_resume(self->stack, i); -1644 | ts_parser__handle_error(self, i, lookahead); -1645 | has_unpaused_version = true; -1646 | } else { -1647 | ts_stack_remove_version(self->stack, i); -1648 | made_changes = true; -1649 | i--; -1650 | n--; -1651 | } -1652 | } else { -1653 | has_unpaused_version = true; -1654 | } -1655 | } -1656 | } - | -1657 | if (made_changes) { -1658 | LOG("condense"); -1659 | LOG_STACK(); -1660 | } - | -1661 | return min_error_cost; -1662 | } - | -1663 | static bool ts_parser__balance_subtree(TSParser *self) { -1664 | Subtree finished_tree = self->finished_tree; - | -1665 | // If we haven't canceled balancing in progress before, then we want to clear the tree stack and -1666 | // push the initial finished tree onto it. Otherwise, if we're resuming balancing after a -1667 | // cancellation, we don't want to clear the tree stack. -1668 | if (!self->canceled_balancing) { -1669 | array_clear(&self->tree_pool.tree_stack); -1670 | if (ts_subtree_child_count(finished_tree) > 0 && finished_tree.ptr->ref_count == 1) { -1671 | array_push(&self->tree_pool.tree_stack, ts_subtree_to_mut_unsafe(finished_tree)); -1672 | } -1673 | } - | -1674 | while (self->tree_pool.tree_stack.size > 0) { -1675 | if (!ts_parser__check_progress(self, NULL, NULL, 1)) { -1676 | return false; -1677 | } - | -1678 | MutableSubtree tree = *array_get(&self->tree_pool.tree_stack, -1679 | self->tree_pool.tree_stack.size - 1 -1680 | ); - | -1681 | if (tree.ptr->repeat_depth > 0) { -1682 | Subtree child1 = ts_subtree_children(tree)[0]; -1683 | Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; -1684 | long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); -1685 | if (repeat_delta > 0) { -1686 | unsigned n = (unsigned)repeat_delta; - | -1687 | for (unsigned i = n / 2; i > 0; i /= 2) { -1688 | ts_subtree_compress(tree, i, self->language, &self->tree_pool.tree_stack); -1689 | n -= i; - | -1690 | // We scale the operation count increment in `ts_parser__check_progress` proportionately to the compression -1691 | // size since larger values of i take longer to process. Shifting by 4 empirically provides good check -1692 | // intervals (e.g. 193 operations when i=3100) to prevent blocking during large compressions. -1693 | uint8_t operations = i >> 4 > 0 ? i >> 4 : 1; -1694 | if (!ts_parser__check_progress(self, NULL, NULL, operations)) { -1695 | return false; -1696 | } -1697 | } -1698 | } -1699 | } - | -1700 | (void)array_pop(&self->tree_pool.tree_stack); - | -1701 | for (uint32_t i = 0; i < tree.ptr->child_count; i++) { -1702 | Subtree child = ts_subtree_children(tree)[i]; -1703 | if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) { -1704 | array_push(&self->tree_pool.tree_stack, ts_subtree_to_mut_unsafe(child)); -1705 | } -1706 | } -1707 | } - | -1708 | return true; -1709 | } - | -1710 | static bool ts_parser_has_outstanding_parse(TSParser *self) { -1711 | return ( -1712 | self->canceled_balancing || -1713 | self->external_scanner_payload || -1714 | ts_stack_state(self->stack, 0) != 1 || -1715 | ts_stack_node_count_since_error(self->stack, 0) != 0 -1716 | ); -1717 | } - | -1718 | // Parser - Public - | -1719 | TSParser *ts_parser_new(void) { -1720 | TSParser *self = ts_calloc(1, sizeof(TSParser)); -1721 | ts_lexer_init(&self->lexer); -1722 | array_init(&self->reduce_actions); -1723 | array_reserve(&self->reduce_actions, 4); -1724 | self->tree_pool = ts_subtree_pool_new(32); -1725 | self->stack = ts_stack_new(&self->tree_pool); -1726 | self->finished_tree = NULL_SUBTREE; -1727 | self->reusable_node = reusable_node_new(); -1728 | self->dot_graph_file = NULL; -1729 | self->language = NULL; -1730 | self->has_scanner_error = false; -1731 | self->has_error = false; -1732 | self->canceled_balancing = false; -1733 | self->external_scanner_payload = NULL; -1734 | self->operation_count = 0; -1735 | self->old_tree = NULL_SUBTREE; -1736 | self->included_range_differences = (TSRangeArray) array_new(); -1737 | self->included_range_difference_index = 0; -1738 | ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); -1739 | return self; -1740 | } - | -1741 | void ts_parser_delete(TSParser *self) { -1742 | if (!self) return; - | -1743 | ts_parser_set_language(self, NULL); -1744 | ts_stack_delete(self->stack); -1745 | if (self->reduce_actions.contents) { -1746 | array_delete(&self->reduce_actions); -1747 | } -1748 | if (self->included_range_differences.contents) { -1749 | array_delete(&self->included_range_differences); -1750 | } -1751 | if (self->old_tree.ptr) { -1752 | ts_subtree_release(&self->tree_pool, self->old_tree); -1753 | self->old_tree = NULL_SUBTREE; -1754 | } -1755 | ts_wasm_store_delete(self->wasm_store); -1756 | ts_lexer_delete(&self->lexer); -1757 | ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); -1758 | ts_subtree_pool_delete(&self->tree_pool); -1759 | reusable_node_delete(&self->reusable_node); -1760 | array_delete(&self->trailing_extras); -1761 | array_delete(&self->trailing_extras2); -1762 | array_delete(&self->scratch_trees); -1763 | ts_free(self); -1764 | } - | -1765 | const TSLanguage *ts_parser_language(const TSParser *self) { -1766 | return self->language; -1767 | } - | -1768 | bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { -1769 | ts_parser_reset(self); -1770 | ts_language_delete(self->language); -1771 | self->language = NULL; - | -1772 | if (language) { -1773 | if ( -1774 | language->abi_version > TREE_SITTER_LANGUAGE_VERSION || -1775 | language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION -1776 | ) return false; - | -1777 | if (ts_language_is_wasm(language)) { -1778 | if ( -1779 | !self->wasm_store || -1780 | !ts_wasm_store_start(self->wasm_store, &self->lexer.data, language) -1781 | ) return false; -1782 | } -1783 | } - | -1784 | self->language = ts_language_copy(language); -1785 | return true; -1786 | } - | -1787 | TSLogger ts_parser_logger(const TSParser *self) { -1788 | return self->lexer.logger; -1789 | } - | -1790 | void ts_parser_set_logger(TSParser *self, TSLogger logger) { -1791 | self->lexer.logger = logger; -1792 | } - | -1793 | void ts_parser_print_dot_graphs(TSParser *self, int fd) { -1794 | if (self->dot_graph_file) { -1795 | fclose(self->dot_graph_file); -1796 | } - | -1797 | if (fd >= 0) { -1798 | #ifdef _WIN32 -1799 | self->dot_graph_file = _fdopen(fd, "a"); -1800 | #else -1801 | self->dot_graph_file = fdopen(fd, "a"); -1802 | #endif -1803 | } else { -1804 | self->dot_graph_file = NULL; -1805 | } -1806 | } - | -1807 | bool ts_parser_set_included_ranges( -1808 | TSParser *self, -1809 | const TSRange *ranges, -1810 | uint32_t count -1811 | ) { -1812 | return ts_lexer_set_included_ranges(&self->lexer, ranges, count); -1813 | } - | -1814 | const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) { -1815 | return ts_lexer_included_ranges(&self->lexer, count); -1816 | } - | -1817 | void ts_parser_reset(TSParser *self) { -1818 | ts_parser__external_scanner_destroy(self); -1819 | if (self->wasm_store) { -1820 | ts_wasm_store_reset(self->wasm_store); -1821 | } - | -1822 | if (self->old_tree.ptr) { -1823 | ts_subtree_release(&self->tree_pool, self->old_tree); -1824 | self->old_tree = NULL_SUBTREE; -1825 | } - | -1826 | reusable_node_clear(&self->reusable_node); -1827 | ts_lexer_reset(&self->lexer, length_zero()); -1828 | ts_stack_clear(self->stack); -1829 | ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); -1830 | if (self->finished_tree.ptr) { -1831 | ts_subtree_release(&self->tree_pool, self->finished_tree); -1832 | self->finished_tree = NULL_SUBTREE; -1833 | } -1834 | self->accept_count = 0; -1835 | self->has_scanner_error = false; -1836 | self->has_error = false; -1837 | self->canceled_balancing = false; -1838 | self->parse_options = (TSParseOptions) {0}; -1839 | self->parse_state = (TSParseState) {0}; -1840 | } - | -1841 | TSTree *ts_parser_parse( -1842 | TSParser *self, -1843 | const TSTree *old_tree, -1844 | TSInput input -1845 | ) { -1846 | TSTree *result = NULL; -1847 | if (!self->language || !input.read) return NULL; - | -1848 | if (ts_language_is_wasm(self->language)) { -1849 | if (!self->wasm_store) return NULL; -1850 | ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language); -1851 | } - | -1852 | ts_lexer_set_input(&self->lexer, input); -1853 | array_clear(&self->included_range_differences); -1854 | self->included_range_difference_index = 0; - | -1855 | self->operation_count = 0; - | -1856 | if (ts_parser_has_outstanding_parse(self)) { -1857 | LOG("resume_parsing"); -1858 | if (self->canceled_balancing) goto balance; -1859 | } else { -1860 | ts_parser__external_scanner_create(self); -1861 | if (self->has_scanner_error) goto exit; - | -1862 | if (old_tree) { -1863 | ts_subtree_retain(old_tree->root); -1864 | self->old_tree = old_tree->root; -1865 | ts_range_array_get_changed_ranges( -1866 | old_tree->included_ranges, old_tree->included_range_count, -1867 | self->lexer.included_ranges, self->lexer.included_range_count, -1868 | &self->included_range_differences -1869 | ); -1870 | reusable_node_reset(&self->reusable_node, old_tree->root); -1871 | LOG("parse_after_edit"); -1872 | LOG_TREE(self->old_tree); -1873 | for (unsigned i = 0; i < self->included_range_differences.size; i++) { -1874 | TSRange *range = array_get(&self->included_range_differences, i); -1875 | LOG("different_included_range %u - %u", range->start_byte, range->end_byte); -1876 | } -1877 | } else { -1878 | reusable_node_clear(&self->reusable_node); -1879 | LOG("new_parse"); -1880 | } -1881 | } - | -1882 | uint32_t position = 0, last_position = 0, version_count = 0; -1883 | do { -1884 | for ( -1885 | StackVersion version = 0; -1886 | version_count = ts_stack_version_count(self->stack), -1887 | version < version_count; -1888 | version++ -1889 | ) { -1890 | bool allow_node_reuse = version_count == 1; -1891 | while (ts_stack_is_active(self->stack, version)) { -1892 | LOG( -1893 | "process version:%u, version_count:%u, state:%d, row:%u, col:%u", -1894 | version, -1895 | ts_stack_version_count(self->stack), -1896 | ts_stack_state(self->stack, version), -1897 | ts_stack_position(self->stack, version).extent.row, -1898 | ts_stack_position(self->stack, version).extent.column -1899 | ); - | -1900 | if (!ts_parser__advance(self, version, allow_node_reuse)) { -1901 | if (self->has_scanner_error) goto exit; -1902 | return NULL; -1903 | } - | -1904 | LOG_STACK(); - | -1905 | position = ts_stack_position(self->stack, version).bytes; -1906 | if (position > last_position || (version > 0 && position == last_position)) { -1907 | last_position = position; -1908 | break; -1909 | } -1910 | } -1911 | } - | -1912 | // After advancing each version of the stack, re-sort the versions by their cost, -1913 | // removing any versions that are no longer worth pursuing. -1914 | unsigned min_error_cost = ts_parser__condense_stack(self); - | -1915 | // If there's already a finished parse tree that's better than any in-progress version, -1916 | // then terminate parsing. Clear the parse stack to remove any extra references to subtrees -1917 | // within the finished tree, ensuring that these subtrees can be safely mutated in-place -1918 | // for rebalancing. -1919 | if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) { -1920 | ts_stack_clear(self->stack); -1921 | break; -1922 | } - | -1923 | while (self->included_range_difference_index < self->included_range_differences.size) { -1924 | TSRange *range = array_get(&self->included_range_differences, self->included_range_difference_index); -1925 | if (range->end_byte <= position) { -1926 | self->included_range_difference_index++; -1927 | } else { -1928 | break; -1929 | } -1930 | } -1931 | } while (version_count != 0); - | -1932 | balance: -1933 | ts_assert(self->finished_tree.ptr); -1934 | if (!ts_parser__balance_subtree(self)) { -1935 | self->canceled_balancing = true; -1936 | return false; -1937 | } -1938 | self->canceled_balancing = false; -1939 | LOG("done"); -1940 | LOG_TREE(self->finished_tree); - | -1941 | result = ts_tree_new( -1942 | self->finished_tree, -1943 | self->language, -1944 | self->lexer.included_ranges, -1945 | self->lexer.included_range_count -1946 | ); -1947 | self->finished_tree = NULL_SUBTREE; - | -1948 | exit: -1949 | ts_parser_reset(self); -1950 | return result; -1951 | } - | -1952 | TSTree *ts_parser_parse_with_options( -1953 | TSParser *self, -1954 | const TSTree *old_tree, -1955 | TSInput input, -1956 | TSParseOptions parse_options -1957 | ) { -1958 | self->parse_options = parse_options; -1959 | self->parse_state.payload = parse_options.payload; -1960 | TSTree *result = ts_parser_parse(self, old_tree, input); -1961 | // Reset parser options before further parse calls. -1962 | self->parse_options = (TSParseOptions) {0}; -1963 | return result; -1964 | } - | -1965 | TSTree *ts_parser_parse_string( -1966 | TSParser *self, -1967 | const TSTree *old_tree, -1968 | const char *string, -1969 | uint32_t length -1970 | ) { -1971 | return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8); -1972 | } - | -1973 | TSTree *ts_parser_parse_string_encoding( -1974 | TSParser *self, -1975 | const TSTree *old_tree, -1976 | const char *string, -1977 | uint32_t length, -1978 | TSInputEncoding encoding -1979 | ) { -1980 | TSStringInput input = {string, length}; -1981 | return ts_parser_parse(self, old_tree, (TSInput) { -1982 | &input, -1983 | ts_string_input_read, -1984 | encoding, -1985 | NULL, -1986 | }); -1987 | } - | -1988 | void ts_parser_set_wasm_store(TSParser *self, TSWasmStore *store) { -1989 | if (self->language && ts_language_is_wasm(self->language)) { -1990 | // Copy the assigned language into the new store. -1991 | const TSLanguage *copy = ts_language_copy(self->language); -1992 | ts_parser_set_language(self, copy); -1993 | ts_language_delete(copy); -1994 | } - | -1995 | ts_wasm_store_delete(self->wasm_store); -1996 | self->wasm_store = store; -1997 | } - | -1998 | TSWasmStore *ts_parser_take_wasm_store(TSParser *self) { -1999 | if (self->language && ts_language_is_wasm(self->language)) { -2000 | ts_parser_set_language(self, NULL); -2001 | } - | -2002 | TSWasmStore *result = self->wasm_store; -2003 | self->wasm_store = NULL; -2004 | return result; -2005 | } - | -2006 | #undef LOG - - - --------------------------------------------------------------------------------- -/lib/src/parser.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_PARSER_H_ - 2 | #define TREE_SITTER_PARSER_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include - 8 | #include - | - 9 | #define ts_builtin_sym_error ((TSSymbol)-1) - 10 | #define ts_builtin_sym_end 0 - 11 | #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 - | - 12 | #ifndef TREE_SITTER_API_H_ - 13 | typedef uint16_t TSStateId; - 14 | typedef uint16_t TSSymbol; - 15 | typedef uint16_t TSFieldId; - 16 | typedef struct TSLanguage TSLanguage; - 17 | typedef struct TSLanguageMetadata { - 18 | uint8_t major_version; - 19 | uint8_t minor_version; - 20 | uint8_t patch_version; - 21 | } TSLanguageMetadata; - 22 | #endif - | - 23 | typedef struct { - 24 | TSFieldId field_id; - 25 | uint8_t child_index; - 26 | bool inherited; - 27 | } TSFieldMapEntry; - | - 28 | // Used to index the field and supertype maps. - 29 | typedef struct { - 30 | uint16_t index; - 31 | uint16_t length; - 32 | } TSMapSlice; - | - 33 | typedef struct { - 34 | bool visible; - 35 | bool named; - 36 | bool supertype; - 37 | } TSSymbolMetadata; - | - 38 | typedef struct TSLexer TSLexer; - | - 39 | struct TSLexer { - 40 | int32_t lookahead; - 41 | TSSymbol result_symbol; - 42 | void (*advance)(TSLexer *, bool); - 43 | void (*mark_end)(TSLexer *); - 44 | uint32_t (*get_column)(TSLexer *); - 45 | bool (*is_at_included_range_start)(const TSLexer *); - 46 | bool (*eof)(const TSLexer *); - 47 | void (*log)(const TSLexer *, const char *, ...); - 48 | }; - | - 49 | typedef enum { - 50 | TSParseActionTypeShift, - 51 | TSParseActionTypeReduce, - 52 | TSParseActionTypeAccept, - 53 | TSParseActionTypeRecover, - 54 | } TSParseActionType; - | - 55 | typedef union { - 56 | struct { - 57 | uint8_t type; - 58 | TSStateId state; - 59 | bool extra; - 60 | bool repetition; - 61 | } shift; - 62 | struct { - 63 | uint8_t type; - 64 | uint8_t child_count; - 65 | TSSymbol symbol; - 66 | int16_t dynamic_precedence; - 67 | uint16_t production_id; - 68 | } reduce; - 69 | uint8_t type; - 70 | } TSParseAction; - | - 71 | typedef struct { - 72 | uint16_t lex_state; - 73 | uint16_t external_lex_state; - 74 | } TSLexMode; - | - 75 | typedef struct { - 76 | uint16_t lex_state; - 77 | uint16_t external_lex_state; - 78 | uint16_t reserved_word_set_id; - 79 | } TSLexerMode; - | - 80 | typedef union { - 81 | TSParseAction action; - 82 | struct { - 83 | uint8_t count; - 84 | bool reusable; - 85 | } entry; - 86 | } TSParseActionEntry; - | - 87 | typedef struct { - 88 | int32_t start; - 89 | int32_t end; - 90 | } TSCharacterRange; - | - 91 | struct TSLanguage { - 92 | uint32_t abi_version; - 93 | uint32_t symbol_count; - 94 | uint32_t alias_count; - 95 | uint32_t token_count; - 96 | uint32_t external_token_count; - 97 | uint32_t state_count; - 98 | uint32_t large_state_count; - 99 | uint32_t production_id_count; - 100 | uint32_t field_count; - 101 | uint16_t max_alias_sequence_length; - 102 | const uint16_t *parse_table; - 103 | const uint16_t *small_parse_table; - 104 | const uint32_t *small_parse_table_map; - 105 | const TSParseActionEntry *parse_actions; - 106 | const char * const *symbol_names; - 107 | const char * const *field_names; - 108 | const TSMapSlice *field_map_slices; - 109 | const TSFieldMapEntry *field_map_entries; - 110 | const TSSymbolMetadata *symbol_metadata; - 111 | const TSSymbol *public_symbol_map; - 112 | const uint16_t *alias_map; - 113 | const TSSymbol *alias_sequences; - 114 | const TSLexerMode *lex_modes; - 115 | bool (*lex_fn)(TSLexer *, TSStateId); - 116 | bool (*keyword_lex_fn)(TSLexer *, TSStateId); - 117 | TSSymbol keyword_capture_token; - 118 | struct { - 119 | const bool *states; - 120 | const TSSymbol *symbol_map; - 121 | void *(*create)(void); - 122 | void (*destroy)(void *); - 123 | bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); - 124 | unsigned (*serialize)(void *, char *); - 125 | void (*deserialize)(void *, const char *, unsigned); - 126 | } external_scanner; - 127 | const TSStateId *primary_state_ids; - 128 | const char *name; - 129 | const TSSymbol *reserved_words; - 130 | uint16_t max_reserved_word_set_size; - 131 | uint32_t supertype_count; - 132 | const TSSymbol *supertype_symbols; - 133 | const TSMapSlice *supertype_map_slices; - 134 | const TSSymbol *supertype_map_entries; - 135 | TSLanguageMetadata metadata; - 136 | }; - | - 137 | static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { - 138 | uint32_t index = 0; - 139 | uint32_t size = len - index; - 140 | while (size > 1) { - 141 | uint32_t half_size = size / 2; - 142 | uint32_t mid_index = index + half_size; - 143 | const TSCharacterRange *range = &ranges[mid_index]; - 144 | if (lookahead >= range->start && lookahead <= range->end) { - 145 | return true; - 146 | } else if (lookahead > range->end) { - 147 | index = mid_index; - 148 | } - 149 | size -= half_size; - 150 | } - 151 | const TSCharacterRange *range = &ranges[index]; - 152 | return (lookahead >= range->start && lookahead <= range->end); - 153 | } - | - 154 | /* - 155 | * Lexer Macros - 156 | */ - | - 157 | #ifdef _MSC_VER - 158 | #define UNUSED __pragma(warning(suppress : 4101)) - 159 | #else - 160 | #define UNUSED __attribute__((unused)) - 161 | #endif - | - 162 | #define START_LEXER() \ - 163 | bool result = false; \ - 164 | bool skip = false; \ - 165 | UNUSED \ - 166 | bool eof = false; \ - 167 | int32_t lookahead; \ - 168 | goto start; \ - 169 | next_state: \ - 170 | lexer->advance(lexer, skip); \ - 171 | start: \ - 172 | skip = false; \ - 173 | lookahead = lexer->lookahead; - | - 174 | #define ADVANCE(state_value) \ - 175 | { \ - 176 | state = state_value; \ - 177 | goto next_state; \ - 178 | } - | - 179 | #define ADVANCE_MAP(...) \ - 180 | { \ - 181 | static const uint16_t map[] = { __VA_ARGS__ }; \ - 182 | for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ - 183 | if (map[i] == lookahead) { \ - 184 | state = map[i + 1]; \ - 185 | goto next_state; \ - 186 | } \ - 187 | } \ - 188 | } - | - 189 | #define SKIP(state_value) \ - 190 | { \ - 191 | skip = true; \ - 192 | state = state_value; \ - 193 | goto next_state; \ - 194 | } - | - 195 | #define ACCEPT_TOKEN(symbol_value) \ - 196 | result = true; \ - 197 | lexer->result_symbol = symbol_value; \ - 198 | lexer->mark_end(lexer); - | - 199 | #define END_STATE() return result; - | - 200 | /* - 201 | * Parse Table Macros - 202 | */ - | - 203 | #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) - | - 204 | #define STATE(id) id - | - 205 | #define ACTIONS(id) id - | - 206 | #define SHIFT(state_value) \ - 207 | {{ \ - 208 | .shift = { \ - 209 | .type = TSParseActionTypeShift, \ - 210 | .state = (state_value) \ - 211 | } \ - 212 | }} - | - 213 | #define SHIFT_REPEAT(state_value) \ - 214 | {{ \ - 215 | .shift = { \ - 216 | .type = TSParseActionTypeShift, \ - 217 | .state = (state_value), \ - 218 | .repetition = true \ - 219 | } \ - 220 | }} - | - 221 | #define SHIFT_EXTRA() \ - 222 | {{ \ - 223 | .shift = { \ - 224 | .type = TSParseActionTypeShift, \ - 225 | .extra = true \ - 226 | } \ - 227 | }} - | - 228 | #define REDUCE(symbol_name, children, precedence, prod_id) \ - 229 | {{ \ - 230 | .reduce = { \ - 231 | .type = TSParseActionTypeReduce, \ - 232 | .symbol = symbol_name, \ - 233 | .child_count = children, \ - 234 | .dynamic_precedence = precedence, \ - 235 | .production_id = prod_id \ - 236 | }, \ - 237 | }} - | - 238 | #define RECOVER() \ - 239 | {{ \ - 240 | .type = TSParseActionTypeRecover \ - 241 | }} - | - 242 | #define ACCEPT_INPUT() \ - 243 | {{ \ - 244 | .type = TSParseActionTypeAccept \ - 245 | }} - | - 246 | #ifdef __cplusplus - 247 | } - 248 | #endif - | - 249 | #endif // TREE_SITTER_PARSER_H_ - - - --------------------------------------------------------------------------------- -/lib/src/point.c: --------------------------------------------------------------------------------- - 1 | #include "point.h" - | - 2 | void ts_point_edit(TSPoint *point, uint32_t *byte, const TSInputEdit *edit) { - 3 | uint32_t start_byte = *byte; - 4 | TSPoint start_point = *point; - | - 5 | if (start_byte >= edit->old_end_byte) { - 6 | start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte); - 7 | start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point)); - 8 | } else if (start_byte > edit->start_byte) { - 9 | start_byte = edit->new_end_byte; - 10 | start_point = edit->new_end_point; - 11 | } - | - 12 | *point = start_point; - 13 | *byte = start_byte; - 14 | } - - - --------------------------------------------------------------------------------- -/lib/src/point.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_POINT_H_ - 2 | #define TREE_SITTER_POINT_H_ - | - 3 | #include "tree_sitter/api.h" - | - 4 | #define POINT_ZERO ((TSPoint) {0, 0}) - 5 | #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX}) - | - 6 | static inline TSPoint point__new(unsigned row, unsigned column) { - 7 | TSPoint result = {row, column}; - 8 | return result; - 9 | } - | - 10 | static inline TSPoint point_add(TSPoint a, TSPoint b) { - 11 | if (b.row > 0) - 12 | return point__new(a.row + b.row, b.column); - 13 | else - 14 | return point__new(a.row, a.column + b.column); - 15 | } - | - 16 | static inline TSPoint point_sub(TSPoint a, TSPoint b) { - 17 | if (a.row > b.row) - 18 | return point__new(a.row - b.row, a.column); - 19 | else - 20 | return point__new(0, (a.column >= b.column) ? a.column - b.column : 0); - 21 | } - | - 22 | static inline bool point_lte(TSPoint a, TSPoint b) { - 23 | return (a.row < b.row) || (a.row == b.row && a.column <= b.column); - 24 | } - | - 25 | static inline bool point_lt(TSPoint a, TSPoint b) { - 26 | return (a.row < b.row) || (a.row == b.row && a.column < b.column); - 27 | } - | - 28 | static inline bool point_gt(TSPoint a, TSPoint b) { - 29 | return (a.row > b.row) || (a.row == b.row && a.column > b.column); - 30 | } - | - 31 | static inline bool point_gte(TSPoint a, TSPoint b) { - 32 | return (a.row > b.row) || (a.row == b.row && a.column >= b.column); - 33 | } - | - 34 | static inline bool point_eq(TSPoint a, TSPoint b) { - 35 | return a.row == b.row && a.column == b.column; - 36 | } - | - 37 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/portable/endian.h: --------------------------------------------------------------------------------- - 1 | // "License": Public Domain - 2 | // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like. - 3 | // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to - 4 | // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it - 5 | // an example on how to get the endian conversion functions on different platforms. - | - 6 | // updates from https://github.com/mikepb/endian.h/issues/4 - | - 7 | #ifndef ENDIAN_H - 8 | #define ENDIAN_H - | - 9 | #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) - | - 10 | # define __WINDOWS__ - | - 11 | #endif - | - 12 | #if defined(HAVE_ENDIAN_H) || \ - 13 | defined(__linux__) || \ - 14 | defined(__GNU__) || \ - 15 | defined(__HAIKU__) || \ - 16 | defined(__illumos__) || \ - 17 | defined(__NetBSD__) || \ - 18 | defined(__OpenBSD__) || \ - 19 | defined(__CYGWIN__) || \ - 20 | defined(__MSYS__) || \ - 21 | defined(__EMSCRIPTEN__) || \ - 22 | defined(__wasi__) || \ - 23 | defined(__wasm__) - | - 24 | #if defined(__NetBSD__) - 25 | #define _NETBSD_SOURCE 1 - 26 | #endif - | - 27 | # include - | - 28 | #elif defined(HAVE_SYS_ENDIAN_H) || \ - 29 | defined(__FreeBSD__) || \ - 30 | defined(__DragonFly__) - | - 31 | # include - | - 32 | #elif defined(__APPLE__) - 33 | # define __BYTE_ORDER BYTE_ORDER - 34 | # define __BIG_ENDIAN BIG_ENDIAN - 35 | # define __LITTLE_ENDIAN LITTLE_ENDIAN - 36 | # define __PDP_ENDIAN PDP_ENDIAN - | - 37 | # if !defined(_POSIX_C_SOURCE) - 38 | # include - | - 39 | # define htobe16(x) OSSwapHostToBigInt16(x) - 40 | # define htole16(x) OSSwapHostToLittleInt16(x) - 41 | # define be16toh(x) OSSwapBigToHostInt16(x) - 42 | # define le16toh(x) OSSwapLittleToHostInt16(x) - | - 43 | # define htobe32(x) OSSwapHostToBigInt32(x) - 44 | # define htole32(x) OSSwapHostToLittleInt32(x) - 45 | # define be32toh(x) OSSwapBigToHostInt32(x) - 46 | # define le32toh(x) OSSwapLittleToHostInt32(x) - | - 47 | # define htobe64(x) OSSwapHostToBigInt64(x) - 48 | # define htole64(x) OSSwapHostToLittleInt64(x) - 49 | # define be64toh(x) OSSwapBigToHostInt64(x) - 50 | # define le64toh(x) OSSwapLittleToHostInt64(x) - 51 | # else - 52 | # if BYTE_ORDER == LITTLE_ENDIAN - 53 | # define htobe16(x) __builtin_bswap16(x) - 54 | # define htole16(x) (x) - 55 | # define be16toh(x) __builtin_bswap16(x) - 56 | # define le16toh(x) (x) - | - 57 | # define htobe32(x) __builtin_bswap32(x) - 58 | # define htole32(x) (x) - 59 | # define be32toh(x) __builtin_bswap32(x) - 60 | # define le32toh(x) (x) - | - 61 | # define htobe64(x) __builtin_bswap64(x) - 62 | # define htole64(x) (x) - 63 | # define be64toh(x) __builtin_bswap64(x) - 64 | # define le64toh(x) (x) - 65 | # elif BYTE_ORDER == BIG_ENDIAN - 66 | # define htobe16(x) (x) - 67 | # define htole16(x) __builtin_bswap16(x) - 68 | # define be16toh(x) (x) - 69 | # define le16toh(x) __builtin_bswap16(x) - | - 70 | # define htobe32(x) (x) - 71 | # define htole32(x) __builtin_bswap32(x) - 72 | # define be32toh(x) (x) - 73 | # define le32toh(x) __builtin_bswap32(x) - | - 74 | # define htobe64(x) (x) - 75 | # define htole64(x) __builtin_bswap64(x) - 76 | # define be64toh(x) (x) - 77 | # define le64toh(x) __builtin_bswap64(x) - 78 | # else - 79 | # error byte order not supported - 80 | # endif - 81 | # endif - | - 82 | #elif defined(__WINDOWS__) - | - 83 | # if defined(_MSC_VER) && !defined(__clang__) - 84 | # include - 85 | # define B_SWAP_16(x) _byteswap_ushort(x) - 86 | # define B_SWAP_32(x) _byteswap_ulong(x) - 87 | # define B_SWAP_64(x) _byteswap_uint64(x) - 88 | # else - 89 | # define B_SWAP_16(x) __builtin_bswap16(x) - 90 | # define B_SWAP_32(x) __builtin_bswap32(x) - 91 | # define B_SWAP_64(x) __builtin_bswap64(x) - 92 | # endif - | - 93 | # if defined(__MINGW32__) || defined(HAVE_SYS_PARAM_H) - 94 | # include - 95 | # endif - | - 96 | # ifndef BIG_ENDIAN - 97 | # ifdef __BIG_ENDIAN - 98 | # define BIG_ENDIAN __BIG_ENDIAN - 99 | # elif defined(__ORDER_BIG_ENDIAN__) - 100 | # define BIG_ENDIAN __ORDER_BIG_ENDIAN__ - 101 | # else - 102 | # define BIG_ENDIAN 4321 - 103 | # endif - 104 | # endif - | - 105 | # ifndef LITTLE_ENDIAN - 106 | # ifdef __LITTLE_ENDIAN - 107 | # define LITTLE_ENDIAN __LITTLE_ENDIAN - 108 | # elif defined(__ORDER_LITTLE_ENDIAN__) - 109 | # define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ - 110 | # else - 111 | # define LITTLE_ENDIAN 1234 - 112 | # endif - 113 | # endif - | - 114 | # ifndef BYTE_ORDER - 115 | # ifdef __BYTE_ORDER - 116 | # define BYTE_ORDER __BYTE_ORDER - 117 | # elif defined(__BYTE_ORDER__) - 118 | # define BYTE_ORDER __BYTE_ORDER__ - 119 | # else - 120 | /* assume LE on Windows if nothing was defined */ - 121 | # define BYTE_ORDER LITTLE_ENDIAN - 122 | # endif - 123 | # endif - | - 124 | # if BYTE_ORDER == LITTLE_ENDIAN - | - 125 | # define htobe16(x) B_SWAP_16(x) - 126 | # define htole16(x) (x) - 127 | # define be16toh(x) B_SWAP_16(x) - 128 | # define le16toh(x) (x) - | - 129 | # define htobe32(x) B_SWAP_32(x) - 130 | # define htole32(x) (x) - 131 | # define be32toh(x) B_SWAP_32(x) - 132 | # define le32toh(x) (x) - | - 133 | # define htobe64(x) B_SWAP_64(x) - 134 | # define htole64(x) (x) - 135 | # define be64toh(x) B_SWAP_64(x) - 136 | # define le64toh(x) (x) - | - 137 | # elif BYTE_ORDER == BIG_ENDIAN - | - 138 | # define htobe16(x) (x) - 139 | # define htole16(x) B_SWAP_16(x) - 140 | # define be16toh(x) (x) - 141 | # define le16toh(x) B_SWAP_16(x) - | - 142 | # define htobe32(x) (x) - 143 | # define htole32(x) B_SWAP_32(x) - 144 | # define be32toh(x) (x) - 145 | # define le32toh(x) B_SWAP_32(x) - | - 146 | # define htobe64(x) (x) - 147 | # define htole64(x) B_SWAP_64(x) - 148 | # define be64toh(x) (x) - 149 | # define le64toh(x) B_SWAP_64(x) - | - 150 | # else - | - 151 | # error byte order not supported - | - 152 | # endif - | - 153 | #elif defined(__QNXNTO__) - | - 154 | # include - | - 155 | # define __LITTLE_ENDIAN 1234 - 156 | # define __BIG_ENDIAN 4321 - 157 | # define __PDP_ENDIAN 3412 - | - 158 | # if defined(__BIGENDIAN__) - | - 159 | # define __BYTE_ORDER __BIG_ENDIAN - | - 160 | # define htobe16(x) (x) - 161 | # define htobe32(x) (x) - 162 | # define htobe64(x) (x) - | - 163 | # define htole16(x) ENDIAN_SWAP16(x) - 164 | # define htole32(x) ENDIAN_SWAP32(x) - 165 | # define htole64(x) ENDIAN_SWAP64(x) - | - 166 | # elif defined(__LITTLEENDIAN__) - | - 167 | # define __BYTE_ORDER __LITTLE_ENDIAN - | - 168 | # define htole16(x) (x) - 169 | # define htole32(x) (x) - 170 | # define htole64(x) (x) - | - 171 | # define htobe16(x) ENDIAN_SWAP16(x) - 172 | # define htobe32(x) ENDIAN_SWAP32(x) - 173 | # define htobe64(x) ENDIAN_SWAP64(x) - | - 174 | # else - | - 175 | # error byte order not supported - | - 176 | # endif - | - 177 | # define be16toh(x) ENDIAN_BE16(x) - 178 | # define be32toh(x) ENDIAN_BE32(x) - 179 | # define be64toh(x) ENDIAN_BE64(x) - 180 | # define le16toh(x) ENDIAN_LE16(x) - 181 | # define le32toh(x) ENDIAN_LE32(x) - 182 | # define le64toh(x) ENDIAN_LE64(x) - | - 183 | #else - | - 184 | # error platform not supported - | - 185 | #endif - | - 186 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/query.c: --------------------------------------------------------------------------------- - 1 | /* - 2 | * On NetBSD, defining standard requirements like this removes symbols - 3 | * from the namespace; however, we need non-standard symbols for - 4 | * endian.h. - 5 | */ - 6 | #if defined(__NetBSD__) && defined(_POSIX_C_SOURCE) - 7 | #undef _POSIX_C_SOURCE - 8 | #endif - | - 9 | #include "tree_sitter/api.h" - 10 | #include "./alloc.h" - 11 | #include "./array.h" - 12 | #include "./language.h" - 13 | #include "./point.h" - 14 | #include "./tree_cursor.h" - 15 | #include "./unicode.h" - 16 | #include - | - 17 | // #define DEBUG_ANALYZE_QUERY - 18 | // #define DEBUG_EXECUTE_QUERY - | - 19 | #define MAX_STEP_CAPTURE_COUNT 3 - 20 | #define MAX_NEGATED_FIELD_COUNT 8 - 21 | #define MAX_STATE_PREDECESSOR_COUNT 256 - 22 | #define MAX_ANALYSIS_STATE_DEPTH 8 - 23 | #define MAX_ANALYSIS_ITERATION_COUNT 256 - | - 24 | /* - 25 | * Stream - A sequence of unicode characters derived from a UTF8 string. - 26 | * This struct is used in parsing queries from S-expressions. - 27 | */ - 28 | typedef struct { - 29 | const char *input; - 30 | const char *start; - 31 | const char *end; - 32 | int32_t next; - 33 | uint8_t next_size; - 34 | } Stream; - | - 35 | /* - 36 | * QueryStep - A step in the process of matching a query. Each node within - 37 | * a query S-expression corresponds to one of these steps. An entire pattern - 38 | * is represented as a sequence of these steps. The basic properties of a - 39 | * node are represented by these fields: - 40 | * - `symbol` - The grammar symbol to match. A zero value represents the - 41 | * wildcard symbol, '_'. - 42 | * - `field` - The field name to match. A zero value means that a field name - 43 | * was not specified. - 44 | * - `capture_ids` - An array of integers representing the names of captures - 45 | * associated with this node in the pattern, terminated by a `NONE` value. - 46 | * - `depth` - The depth where this node occurs in the pattern. The root node - 47 | * of the pattern has depth zero. - 48 | * - `negated_field_list_id` - An id representing a set of fields that must - 49 | * not be present on a node matching this step. - 50 | * - 51 | * Steps have some additional fields in order to handle the `.` (or "anchor") operator, - 52 | * which forbids additional child nodes: - 53 | * - `is_immediate` - Indicates that the node matching this step cannot be preceded - 54 | * by other sibling nodes that weren't specified in the pattern. - 55 | * - `is_last_child` - Indicates that the node matching this step cannot have any - 56 | * subsequent named siblings. - 57 | * - 58 | * For simple patterns, steps are matched in sequential order. But in order to - 59 | * handle alternative/repeated/optional sub-patterns, query steps are not always - 60 | * structured as a linear sequence; they sometimes need to split and merge. This - 61 | * is done using the following fields: - 62 | * - `alternative_index` - The index of a different query step that serves as - 63 | * an alternative to this step. A `NONE` value represents no alternative. - 64 | * When a query state reaches a step with an alternative index, the state - 65 | * is duplicated, with one copy remaining at the original step, and one copy - 66 | * moving to the alternative step. The alternative may have its own alternative - 67 | * step, so this splitting is an iterative process. - 68 | * - `is_dead_end` - Indicates that this state cannot be passed directly, and - 69 | * exists only in order to redirect to an alternative index, with no splitting. - 70 | * - `is_pass_through` - Indicates that state has no matching logic of its own, - 71 | * and exists only to split a state. One copy of the state advances immediately - 72 | * to the next step, and one moves to the alternative step. - 73 | * - `alternative_is_immediate` - Indicates that this step's alternative step - 74 | * should be treated as if `is_immediate` is true. - 75 | * - 76 | * Steps also store some derived state that summarizes how they relate to other - 77 | * steps within the same pattern. This is used to optimize the matching process: - 78 | * - `contains_captures` - Indicates that this step or one of its child steps - 79 | * has a non-empty `capture_ids` list. - 80 | * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then - 81 | * it and all of its subsequent sibling steps within the same parent pattern - 82 | * are guaranteed to match. - 83 | * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but - 84 | * for the entire top-level pattern. When iterating through a query's - 85 | * captures using `ts_query_cursor_next_capture`, this field is used to - 86 | * detect that a capture can safely be returned from a match that has not - 87 | * even completed yet. - 88 | */ - 89 | typedef struct { - 90 | TSSymbol symbol; - 91 | TSSymbol supertype_symbol; - 92 | TSFieldId field; - 93 | uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - 94 | uint16_t depth; - 95 | uint16_t alternative_index; - 96 | uint16_t negated_field_list_id; - 97 | bool is_named: 1; - 98 | bool is_immediate: 1; - 99 | bool is_last_child: 1; - 100 | bool is_pass_through: 1; - 101 | bool is_dead_end: 1; - 102 | bool alternative_is_immediate: 1; - 103 | bool contains_captures: 1; - 104 | bool root_pattern_guaranteed: 1; - 105 | bool parent_pattern_guaranteed: 1; - 106 | bool is_missing: 1; - 107 | } QueryStep; - | - 108 | /* - 109 | * Slice - A slice of an external array. Within a query, capture names, - 110 | * literal string values, and predicate step information are stored in three - 111 | * contiguous arrays. Individual captures, string values, and predicates are - 112 | * represented as slices of these three arrays. - 113 | */ - 114 | typedef struct { - 115 | uint32_t offset; - 116 | uint32_t length; - 117 | } Slice; - | - 118 | /* - 119 | * SymbolTable - a two-way mapping of strings to ids. - 120 | */ - 121 | typedef struct { - 122 | Array(char) characters; - 123 | Array(Slice) slices; - 124 | } SymbolTable; - | - 125 | /** - 126 | * CaptureQuantifiers - a data structure holding the quantifiers of pattern captures. - 127 | */ - 128 | typedef Array(uint8_t) CaptureQuantifiers; - | - 129 | /* - 130 | * PatternEntry - Information about the starting point for matching a particular - 131 | * pattern. These entries are stored in a 'pattern map' - a sorted array that - 132 | * makes it possible to efficiently lookup patterns based on the symbol for their - 133 | * first step. The entry consists of the following fields: - 134 | * - `pattern_index` - the index of the pattern within the query - 135 | * - `step_index` - the index of the pattern's first step in the shared `steps` array - 136 | * - `is_rooted` - whether or not the pattern has a single root node. This property - 137 | * affects decisions about whether or not to start the pattern for nodes outside - 138 | * of a QueryCursor's range restriction. - 139 | */ - 140 | typedef struct { - 141 | uint16_t step_index; - 142 | uint16_t pattern_index; - 143 | bool is_rooted; - 144 | } PatternEntry; - | - 145 | typedef struct { - 146 | Slice steps; - 147 | Slice predicate_steps; - 148 | uint32_t start_byte; - 149 | uint32_t end_byte; - 150 | bool is_non_local; - 151 | } QueryPattern; - | - 152 | typedef struct { - 153 | uint32_t byte_offset; - 154 | uint16_t step_index; - 155 | } StepOffset; - | - 156 | /* - 157 | * QueryState - The state of an in-progress match of a particular pattern - 158 | * in a query. While executing, a `TSQueryCursor` must keep track of a number - 159 | * of possible in-progress matches. Each of those possible matches is - 160 | * represented as one of these states. Fields: - 161 | * - `id` - A numeric id that is exposed to the public API. This allows the - 162 | * caller to remove a given match, preventing any more of its captures - 163 | * from being returned. - 164 | * - `start_depth` - The depth in the tree where the first step of the state's - 165 | * pattern was matched. - 166 | * - `pattern_index` - The pattern that the state is matching. - 167 | * - `consumed_capture_count` - The number of captures from this match that - 168 | * have already been returned. - 169 | * - `capture_list_id` - A numeric id that can be used to retrieve the state's - 170 | * list of captures from the `CaptureListPool`. - 171 | * - `seeking_immediate_match` - A flag that indicates that the state's next - 172 | * step must be matched by the very next sibling. This is used when - 173 | * processing repetitions, or when processing a wildcard node followed by - 174 | * an anchor. - 175 | * - `has_in_progress_alternatives` - A flag that indicates that there is are - 176 | * other states that have the same captures as this state, but are at - 177 | * different steps in their pattern. This means that in order to obey the - 178 | * 'longest-match' rule, this state should not be returned as a match until - 179 | * it is clear that there can be no other alternative match with more captures. - 180 | */ - 181 | typedef struct { - 182 | uint32_t id; - 183 | uint32_t capture_list_id; - 184 | uint16_t start_depth; - 185 | uint16_t step_index; - 186 | uint16_t pattern_index; - 187 | uint16_t consumed_capture_count: 12; - 188 | bool seeking_immediate_match: 1; - 189 | bool has_in_progress_alternatives: 1; - 190 | bool dead: 1; - 191 | bool needs_parent: 1; - 192 | } QueryState; - | - 193 | typedef Array(TSQueryCapture) CaptureList; - | - 194 | /* - 195 | * CaptureListPool - A collection of *lists* of captures. Each query state needs - 196 | * to maintain its own list of captures. To avoid repeated allocations, this struct - 197 | * maintains a fixed set of capture lists, and keeps track of which ones are - 198 | * currently in use by a query state. - 199 | */ - 200 | typedef struct { - 201 | Array(CaptureList) list; - 202 | CaptureList empty_list; - 203 | // The maximum number of capture lists that we are allowed to allocate. We - 204 | // never allow `list` to allocate more entries than this, dropping pending - 205 | // matches if needed to stay under the limit. - 206 | uint32_t max_capture_list_count; - 207 | // The number of capture lists allocated in `list` that are not currently in - 208 | // use. We reuse those existing-but-unused capture lists before trying to - 209 | // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture - 210 | // list's length to indicate that it's not in use. - 211 | uint32_t free_capture_list_count; - 212 | } CaptureListPool; - | - 213 | /* - 214 | * AnalysisState - The state needed for walking the parse table when analyzing - 215 | * a query pattern, to determine at which steps the pattern might fail to match. - 216 | */ - 217 | typedef struct { - 218 | TSStateId parse_state; - 219 | TSSymbol parent_symbol; - 220 | uint16_t child_index; - 221 | TSFieldId field_id: 15; - 222 | bool done: 1; - 223 | } AnalysisStateEntry; - | - 224 | typedef struct { - 225 | AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH]; - 226 | uint16_t depth; - 227 | uint16_t step_index; - 228 | TSSymbol root_symbol; - 229 | } AnalysisState; - | - 230 | typedef Array(AnalysisState *) AnalysisStateSet; - | - 231 | typedef struct { - 232 | AnalysisStateSet states; - 233 | AnalysisStateSet next_states; - 234 | AnalysisStateSet deeper_states; - 235 | AnalysisStateSet state_pool; - 236 | Array(uint16_t) final_step_indices; - 237 | Array(TSSymbol) finished_parent_symbols; - 238 | bool did_abort; - 239 | } QueryAnalysis; - | - 240 | /* - 241 | * AnalysisSubgraph - A subset of the states in the parse table that are used - 242 | * in constructing nodes with a certain symbol. Each state is accompanied by - 243 | * some information about the possible node that could be produced in - 244 | * downstream states. - 245 | */ - 246 | typedef struct { - 247 | TSStateId state; - 248 | uint16_t production_id; - 249 | uint8_t child_index: 7; - 250 | bool done: 1; - 251 | } AnalysisSubgraphNode; - | - 252 | typedef struct { - 253 | TSSymbol symbol; - 254 | Array(TSStateId) start_states; - 255 | Array(AnalysisSubgraphNode) nodes; - 256 | } AnalysisSubgraph; - | - 257 | typedef Array(AnalysisSubgraph) AnalysisSubgraphArray; - | - 258 | /* - 259 | * StatePredecessorMap - A map that stores the predecessors of each parse state. - 260 | * This is used during query analysis to determine which parse states can lead - 261 | * to which reduce actions. - 262 | */ - 263 | typedef struct { - 264 | TSStateId *contents; - 265 | } StatePredecessorMap; - | - 266 | /* - 267 | * TSQuery - A tree query, compiled from a string of S-expressions. The query - 268 | * itself is immutable. The mutable state used in the process of executing the - 269 | * query is stored in a `TSQueryCursor`. - 270 | */ - 271 | struct TSQuery { - 272 | SymbolTable captures; - 273 | SymbolTable predicate_values; - 274 | Array(CaptureQuantifiers) capture_quantifiers; - 275 | Array(QueryStep) steps; - 276 | Array(PatternEntry) pattern_map; - 277 | Array(TSQueryPredicateStep) predicate_steps; - 278 | Array(QueryPattern) patterns; - 279 | Array(StepOffset) step_offsets; - 280 | Array(TSFieldId) negated_fields; - 281 | Array(char) string_buffer; - 282 | Array(TSSymbol) repeat_symbols_with_rootless_patterns; - 283 | const TSLanguage *language; - 284 | uint16_t wildcard_root_pattern_count; - 285 | }; - | - 286 | /* - 287 | * TSQueryCursor - A stateful struct used to execute a query on a tree. - 288 | */ - 289 | struct TSQueryCursor { - 290 | const TSQuery *query; - 291 | TSTreeCursor cursor; - 292 | Array(QueryState) states; - 293 | Array(QueryState) finished_states; - 294 | CaptureListPool capture_list_pool; - 295 | uint32_t depth; - 296 | uint32_t max_start_depth; - 297 | uint32_t start_byte; - 298 | uint32_t end_byte; - 299 | TSPoint start_point; - 300 | TSPoint end_point; - 301 | uint32_t next_state_id; - 302 | const TSQueryCursorOptions *query_options; - 303 | TSQueryCursorState query_state; - 304 | unsigned operation_count; - 305 | bool on_visible_node; - 306 | bool ascending; - 307 | bool halted; - 308 | bool did_exceed_match_limit; - 309 | }; - | - 310 | static const TSQueryError PARENT_DONE = -1; - 311 | static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; - 312 | static const uint16_t NONE = UINT16_MAX; - 313 | static const TSSymbol WILDCARD_SYMBOL = 0; - 314 | static const unsigned OP_COUNT_PER_QUERY_CALLBACK_CHECK = 100; - | - 315 | /********** - 316 | * Stream - 317 | **********/ - | - 318 | // Advance to the next unicode code point in the stream. - 319 | static bool stream_advance(Stream *self) { - 320 | self->input += self->next_size; - 321 | if (self->input < self->end) { - 322 | uint32_t size = ts_decode_utf8( - 323 | (const uint8_t *)self->input, - 324 | (uint32_t)(self->end - self->input), - 325 | &self->next - 326 | ); - 327 | if (size > 0) { - 328 | self->next_size = size; - 329 | return true; - 330 | } - 331 | } else { - 332 | self->next_size = 0; - 333 | self->next = '\0'; - 334 | } - 335 | return false; - 336 | } - | - 337 | // Reset the stream to the given input position, represented as a pointer - 338 | // into the input string. - 339 | static void stream_reset(Stream *self, const char *input) { - 340 | self->input = input; - 341 | self->next_size = 0; - 342 | stream_advance(self); - 343 | } - | - 344 | static Stream stream_new(const char *string, uint32_t length) { - 345 | Stream self = { - 346 | .next = 0, - 347 | .input = string, - 348 | .start = string, - 349 | .end = string + length, - 350 | }; - 351 | stream_advance(&self); - 352 | return self; - 353 | } - | - 354 | static void stream_skip_whitespace(Stream *self) { - 355 | for (;;) { - 356 | if (iswspace(self->next)) { - 357 | stream_advance(self); - 358 | } else if (self->next == ';') { - 359 | // skip over comments - 360 | stream_advance(self); - 361 | while (self->next && self->next != '\n') { - 362 | if (!stream_advance(self)) break; - 363 | } - 364 | } else { - 365 | break; - 366 | } - 367 | } - 368 | } - | - 369 | static bool stream_is_ident_start(Stream *self) { - 370 | return iswalnum(self->next) || self->next == '_' || self->next == '-'; - 371 | } - | - 372 | static void stream_scan_identifier(Stream *stream) { - 373 | do { - 374 | stream_advance(stream); - 375 | } while ( - 376 | iswalnum(stream->next) || - 377 | stream->next == '_' || - 378 | stream->next == '-' || - 379 | stream->next == '.' - 380 | ); - 381 | } - | - 382 | static uint32_t stream_offset(Stream *self) { - 383 | return (uint32_t)(self->input - self->start); - 384 | } - | - 385 | /****************** - 386 | * CaptureListPool - 387 | ******************/ - | - 388 | static CaptureListPool capture_list_pool_new(void) { - 389 | return (CaptureListPool) { - 390 | .list = array_new(), - 391 | .empty_list = array_new(), - 392 | .max_capture_list_count = UINT32_MAX, - 393 | .free_capture_list_count = 0, - 394 | }; - 395 | } - | - 396 | static void capture_list_pool_reset(CaptureListPool *self) { - 397 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - 398 | // This invalid size means that the list is not in use. - 399 | array_get(&self->list, i)->size = UINT32_MAX; - 400 | } - 401 | self->free_capture_list_count = self->list.size; - 402 | } - | - 403 | static void capture_list_pool_delete(CaptureListPool *self) { - 404 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - 405 | array_delete(array_get(&self->list, i)); - 406 | } - 407 | array_delete(&self->list); - 408 | } - | - 409 | static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - 410 | if (id >= self->list.size) return &self->empty_list; - 411 | return array_get(&self->list, id); - 412 | } - | - 413 | static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - 414 | ts_assert(id < self->list.size); - 415 | return array_get(&self->list, id); - 416 | } - | - 417 | static bool capture_list_pool_is_empty(const CaptureListPool *self) { - 418 | // The capture list pool is empty if all allocated lists are in use, and we - 419 | // have reached the maximum allowed number of allocated lists. - 420 | return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; - 421 | } - | - 422 | static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - 423 | // First see if any already allocated capture list is currently unused. - 424 | if (self->free_capture_list_count > 0) { - 425 | for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { - 426 | if (array_get(&self->list, i)->size == UINT32_MAX) { - 427 | array_clear(array_get(&self->list, i)); - 428 | self->free_capture_list_count--; - 429 | return i; - 430 | } - 431 | } - 432 | } - | - 433 | // Otherwise allocate and initialize a new capture list, as long as that - 434 | // doesn't put us over the requested maximum. - 435 | uint32_t i = self->list.size; - 436 | if (i >= self->max_capture_list_count) { - 437 | return NONE; - 438 | } - 439 | CaptureList list; - 440 | array_init(&list); - 441 | array_push(&self->list, list); - 442 | return i; - 443 | } - | - 444 | static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - 445 | if (id >= self->list.size) return; - 446 | array_get(&self->list, id)->size = UINT32_MAX; - 447 | self->free_capture_list_count++; - 448 | } - | - 449 | /************** - 450 | * Quantifiers - 451 | **************/ - | - 452 | static TSQuantifier quantifier_mul( - 453 | TSQuantifier left, - 454 | TSQuantifier right - 455 | ) { - 456 | switch (left) - 457 | { - 458 | case TSQuantifierZero: - 459 | return TSQuantifierZero; - 460 | case TSQuantifierZeroOrOne: - 461 | switch (right) { - 462 | case TSQuantifierZero: - 463 | return TSQuantifierZero; - 464 | case TSQuantifierZeroOrOne: - 465 | case TSQuantifierOne: - 466 | return TSQuantifierZeroOrOne; - 467 | case TSQuantifierZeroOrMore: - 468 | case TSQuantifierOneOrMore: - 469 | return TSQuantifierZeroOrMore; - 470 | }; - 471 | break; - 472 | case TSQuantifierZeroOrMore: - 473 | switch (right) { - 474 | case TSQuantifierZero: - 475 | return TSQuantifierZero; - 476 | case TSQuantifierZeroOrOne: - 477 | case TSQuantifierZeroOrMore: - 478 | case TSQuantifierOne: - 479 | case TSQuantifierOneOrMore: - 480 | return TSQuantifierZeroOrMore; - 481 | }; - 482 | break; - 483 | case TSQuantifierOne: - 484 | return right; - 485 | case TSQuantifierOneOrMore: - 486 | switch (right) { - 487 | case TSQuantifierZero: - 488 | return TSQuantifierZero; - 489 | case TSQuantifierZeroOrOne: - 490 | case TSQuantifierZeroOrMore: - 491 | return TSQuantifierZeroOrMore; - 492 | case TSQuantifierOne: - 493 | case TSQuantifierOneOrMore: - 494 | return TSQuantifierOneOrMore; - 495 | }; - 496 | break; - 497 | } - 498 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! - 499 | } - | - 500 | static TSQuantifier quantifier_join( - 501 | TSQuantifier left, - 502 | TSQuantifier right - 503 | ) { - 504 | switch (left) - 505 | { - 506 | case TSQuantifierZero: - 507 | switch (right) { - 508 | case TSQuantifierZero: - 509 | return TSQuantifierZero; - 510 | case TSQuantifierZeroOrOne: - 511 | case TSQuantifierOne: - 512 | return TSQuantifierZeroOrOne; - 513 | case TSQuantifierZeroOrMore: - 514 | case TSQuantifierOneOrMore: - 515 | return TSQuantifierZeroOrMore; - 516 | }; - 517 | break; - 518 | case TSQuantifierZeroOrOne: - 519 | switch (right) { - 520 | case TSQuantifierZero: - 521 | case TSQuantifierZeroOrOne: - 522 | case TSQuantifierOne: - 523 | return TSQuantifierZeroOrOne; - 524 | break; - 525 | case TSQuantifierZeroOrMore: - 526 | case TSQuantifierOneOrMore: - 527 | return TSQuantifierZeroOrMore; - 528 | break; - 529 | }; - 530 | break; - 531 | case TSQuantifierZeroOrMore: - 532 | return TSQuantifierZeroOrMore; - 533 | case TSQuantifierOne: - 534 | switch (right) { - 535 | case TSQuantifierZero: - 536 | case TSQuantifierZeroOrOne: - 537 | return TSQuantifierZeroOrOne; - 538 | case TSQuantifierZeroOrMore: - 539 | return TSQuantifierZeroOrMore; - 540 | case TSQuantifierOne: - 541 | return TSQuantifierOne; - 542 | case TSQuantifierOneOrMore: - 543 | return TSQuantifierOneOrMore; - 544 | }; - 545 | break; - 546 | case TSQuantifierOneOrMore: - 547 | switch (right) { - 548 | case TSQuantifierZero: - 549 | case TSQuantifierZeroOrOne: - 550 | case TSQuantifierZeroOrMore: - 551 | return TSQuantifierZeroOrMore; - 552 | case TSQuantifierOne: - 553 | case TSQuantifierOneOrMore: - 554 | return TSQuantifierOneOrMore; - 555 | }; - 556 | break; - 557 | } - 558 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! - 559 | } - | - 560 | static TSQuantifier quantifier_add( - 561 | TSQuantifier left, - 562 | TSQuantifier right - 563 | ) { - 564 | switch (left) - 565 | { - 566 | case TSQuantifierZero: - 567 | return right; - 568 | case TSQuantifierZeroOrOne: - 569 | switch (right) { - 570 | case TSQuantifierZero: - 571 | return TSQuantifierZeroOrOne; - 572 | case TSQuantifierZeroOrOne: - 573 | case TSQuantifierZeroOrMore: - 574 | return TSQuantifierZeroOrMore; - 575 | case TSQuantifierOne: - 576 | case TSQuantifierOneOrMore: - 577 | return TSQuantifierOneOrMore; - 578 | }; - 579 | break; - 580 | case TSQuantifierZeroOrMore: - 581 | switch (right) { - 582 | case TSQuantifierZero: - 583 | return TSQuantifierZeroOrMore; - 584 | case TSQuantifierZeroOrOne: - 585 | case TSQuantifierZeroOrMore: - 586 | return TSQuantifierZeroOrMore; - 587 | case TSQuantifierOne: - 588 | case TSQuantifierOneOrMore: - 589 | return TSQuantifierOneOrMore; - 590 | }; - 591 | break; - 592 | case TSQuantifierOne: - 593 | switch (right) { - 594 | case TSQuantifierZero: - 595 | return TSQuantifierOne; - 596 | case TSQuantifierZeroOrOne: - 597 | case TSQuantifierZeroOrMore: - 598 | case TSQuantifierOne: - 599 | case TSQuantifierOneOrMore: - 600 | return TSQuantifierOneOrMore; - 601 | }; - 602 | break; - 603 | case TSQuantifierOneOrMore: - 604 | return TSQuantifierOneOrMore; - 605 | } - 606 | return TSQuantifierZero; // to make compiler happy, but all cases should be covered above! - 607 | } - | - 608 | // Create new capture quantifiers structure - 609 | static CaptureQuantifiers capture_quantifiers_new(void) { - 610 | return (CaptureQuantifiers) array_new(); - 611 | } - | - 612 | // Delete capture quantifiers structure - 613 | static void capture_quantifiers_delete( - 614 | CaptureQuantifiers *self - 615 | ) { - 616 | array_delete(self); - 617 | } - | - 618 | // Clear capture quantifiers structure - 619 | static void capture_quantifiers_clear( - 620 | CaptureQuantifiers *self - 621 | ) { - 622 | array_clear(self); - 623 | } - | - 624 | // Replace capture quantifiers with the given quantifiers - 625 | static void capture_quantifiers_replace( - 626 | CaptureQuantifiers *self, - 627 | CaptureQuantifiers *quantifiers - 628 | ) { - 629 | array_clear(self); - 630 | array_push_all(self, quantifiers); - 631 | } - | - 632 | // Return capture quantifier for the given capture id - 633 | static TSQuantifier capture_quantifier_for_id( - 634 | const CaptureQuantifiers *self, - 635 | uint16_t id - 636 | ) { - 637 | return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id); - 638 | } - | - 639 | // Add the given quantifier to the current value for id - 640 | static void capture_quantifiers_add_for_id( - 641 | CaptureQuantifiers *self, - 642 | uint16_t id, - 643 | TSQuantifier quantifier - 644 | ) { - 645 | if (self->size <= id) { - 646 | array_grow_by(self, id + 1 - self->size); - 647 | } - 648 | uint8_t *own_quantifier = array_get(self, id); - 649 | *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier); - 650 | } - | - 651 | // Point-wise add the given quantifiers to the current values - 652 | static void capture_quantifiers_add_all( - 653 | CaptureQuantifiers *self, - 654 | CaptureQuantifiers *quantifiers - 655 | ) { - 656 | if (self->size < quantifiers->size) { - 657 | array_grow_by(self, quantifiers->size - self->size); - 658 | } - 659 | for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { - 660 | uint8_t *quantifier = array_get(quantifiers, id); - 661 | uint8_t *own_quantifier = array_get(self, id); - 662 | *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); - 663 | } - 664 | } - | - 665 | // Join the given quantifier with the current values - 666 | static void capture_quantifiers_mul( - 667 | CaptureQuantifiers *self, - 668 | TSQuantifier quantifier - 669 | ) { - 670 | for (uint16_t id = 0; id < (uint16_t)self->size; id++) { - 671 | uint8_t *own_quantifier = array_get(self, id); - 672 | *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); - 673 | } - 674 | } - | - 675 | // Point-wise join the quantifiers from a list of alternatives with the current values - 676 | static void capture_quantifiers_join_all( - 677 | CaptureQuantifiers *self, - 678 | CaptureQuantifiers *quantifiers - 679 | ) { - 680 | if (self->size < quantifiers->size) { - 681 | array_grow_by(self, quantifiers->size - self->size); - 682 | } - 683 | for (uint32_t id = 0; id < quantifiers->size; id++) { - 684 | uint8_t *quantifier = array_get(quantifiers, id); - 685 | uint8_t *own_quantifier = array_get(self, id); - 686 | *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); - 687 | } - 688 | for (uint32_t id = quantifiers->size; id < self->size; id++) { - 689 | uint8_t *own_quantifier = array_get(self, id); - 690 | *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero); - 691 | } - 692 | } - | - 693 | /************** - 694 | * SymbolTable - 695 | **************/ - | - 696 | static SymbolTable symbol_table_new(void) { - 697 | return (SymbolTable) { - 698 | .characters = array_new(), - 699 | .slices = array_new(), - 700 | }; - 701 | } - | - 702 | static void symbol_table_delete(SymbolTable *self) { - 703 | array_delete(&self->characters); - 704 | array_delete(&self->slices); - 705 | } - | - 706 | static int symbol_table_id_for_name( - 707 | const SymbolTable *self, - 708 | const char *name, - 709 | uint32_t length - 710 | ) { - 711 | for (unsigned i = 0; i < self->slices.size; i++) { - 712 | Slice slice = *array_get(&self->slices, i); - 713 | if ( - 714 | slice.length == length && - 715 | !strncmp(array_get(&self->characters, slice.offset), name, length) - 716 | ) return i; - 717 | } - 718 | return -1; - 719 | } - | - 720 | static const char *symbol_table_name_for_id( - 721 | const SymbolTable *self, - 722 | uint16_t id, - 723 | uint32_t *length - 724 | ) { - 725 | Slice slice = *(array_get(&self->slices,id)); - 726 | *length = slice.length; - 727 | return array_get(&self->characters, slice.offset); - 728 | } - | - 729 | static uint16_t symbol_table_insert_name( - 730 | SymbolTable *self, - 731 | const char *name, - 732 | uint32_t length - 733 | ) { - 734 | int id = symbol_table_id_for_name(self, name, length); - 735 | if (id >= 0) return (uint16_t)id; - 736 | Slice slice = { - 737 | .offset = self->characters.size, - 738 | .length = length, - 739 | }; - 740 | array_grow_by(&self->characters, length + 1); - 741 | memcpy(array_get(&self->characters, slice.offset), name, length); - 742 | *array_get(&self->characters, self->characters.size - 1) = 0; - 743 | array_push(&self->slices, slice); - 744 | return self->slices.size - 1; - 745 | } - | - 746 | /************ - 747 | * QueryStep - 748 | ************/ - | - 749 | static QueryStep query_step__new( - 750 | TSSymbol symbol, - 751 | uint16_t depth, - 752 | bool is_immediate - 753 | ) { - 754 | QueryStep step = { - 755 | .symbol = symbol, - 756 | .depth = depth, - 757 | .field = 0, - 758 | .alternative_index = NONE, - 759 | .negated_field_list_id = 0, - 760 | .contains_captures = false, - 761 | .is_last_child = false, - 762 | .is_named = false, - 763 | .is_pass_through = false, - 764 | .is_dead_end = false, - 765 | .root_pattern_guaranteed = false, - 766 | .is_immediate = is_immediate, - 767 | .alternative_is_immediate = false, - 768 | }; - 769 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - 770 | step.capture_ids[i] = NONE; - 771 | } - 772 | return step; - 773 | } - | - 774 | static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { - 775 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - 776 | if (self->capture_ids[i] == NONE) { - 777 | self->capture_ids[i] = capture_id; - 778 | break; - 779 | } - 780 | } - 781 | } - | - 782 | static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { - 783 | for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { - 784 | if (self->capture_ids[i] == capture_id) { - 785 | self->capture_ids[i] = NONE; - 786 | while (i + 1 < MAX_STEP_CAPTURE_COUNT) { - 787 | if (self->capture_ids[i + 1] == NONE) break; - 788 | self->capture_ids[i] = self->capture_ids[i + 1]; - 789 | self->capture_ids[i + 1] = NONE; - 790 | i++; - 791 | } - 792 | break; - 793 | } - 794 | } - 795 | } - | - 796 | /********************** - 797 | * StatePredecessorMap - 798 | **********************/ - | - 799 | static inline StatePredecessorMap state_predecessor_map_new( - 800 | const TSLanguage *language - 801 | ) { - 802 | return (StatePredecessorMap) { - 803 | .contents = ts_calloc( - 804 | (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1), - 805 | sizeof(TSStateId) - 806 | ), - 807 | }; - 808 | } - | - 809 | static inline void state_predecessor_map_delete(StatePredecessorMap *self) { - 810 | ts_free(self->contents); - 811 | } - | - 812 | static inline void state_predecessor_map_add( - 813 | StatePredecessorMap *self, - 814 | TSStateId state, - 815 | TSStateId predecessor - 816 | ) { - 817 | size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - 818 | TSStateId *count = &self->contents[index]; - 819 | if ( - 820 | *count == 0 || - 821 | (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor) - 822 | ) { - 823 | (*count)++; - 824 | self->contents[index + *count] = predecessor; - 825 | } - 826 | } - | - 827 | static inline const TSStateId *state_predecessor_map_get( - 828 | const StatePredecessorMap *self, - 829 | TSStateId state, - 830 | unsigned *count - 831 | ) { - 832 | size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1); - 833 | *count = self->contents[index]; - 834 | return &self->contents[index + 1]; - 835 | } - | - 836 | /**************** - 837 | * AnalysisState - 838 | ****************/ - | - 839 | static unsigned analysis_state__recursion_depth(const AnalysisState *self) { - 840 | unsigned result = 0; - 841 | for (unsigned i = 0; i < self->depth; i++) { - 842 | TSSymbol symbol = self->stack[i].parent_symbol; - 843 | for (unsigned j = 0; j < i; j++) { - 844 | if (self->stack[j].parent_symbol == symbol) { - 845 | result++; - 846 | break; - 847 | } - 848 | } - 849 | } - 850 | return result; - 851 | } - | - 852 | static inline int analysis_state__compare( - 853 | AnalysisState *const *self, - 854 | AnalysisState *const *other - 855 | ) { - 856 | if ((*self)->depth < (*other)->depth) return 1; - 857 | for (unsigned i = 0; i < (*self)->depth; i++) { - 858 | if (i >= (*other)->depth) return -1; - 859 | AnalysisStateEntry s1 = (*self)->stack[i]; - 860 | AnalysisStateEntry s2 = (*other)->stack[i]; - 861 | if (s1.child_index < s2.child_index) return -1; - 862 | if (s1.child_index > s2.child_index) return 1; - 863 | if (s1.parent_symbol < s2.parent_symbol) return -1; - 864 | if (s1.parent_symbol > s2.parent_symbol) return 1; - 865 | if (s1.parse_state < s2.parse_state) return -1; - 866 | if (s1.parse_state > s2.parse_state) return 1; - 867 | if (s1.field_id < s2.field_id) return -1; - 868 | if (s1.field_id > s2.field_id) return 1; - 869 | } - 870 | if ((*self)->step_index < (*other)->step_index) return -1; - 871 | if ((*self)->step_index > (*other)->step_index) return 1; - 872 | return 0; - 873 | } - | - 874 | static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { - 875 | if (self->depth == 0) { - 876 | return &self->stack[0]; - 877 | } - 878 | return &self->stack[self->depth - 1]; - 879 | } - | - 880 | static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) { - 881 | for (unsigned i = 0; i < self->depth; i++) { - 882 | if (self->stack[i].parent_symbol == symbol) return true; - 883 | } - 884 | return false; - 885 | } - | - 886 | /****************** - 887 | * AnalysisStateSet - 888 | ******************/ - | - 889 | // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by - 890 | // cloning one from scratch. - 891 | static inline AnalysisState *analysis_state_pool__clone_or_reuse( - 892 | AnalysisStateSet *self, - 893 | AnalysisState *borrowed_item - 894 | ) { - 895 | AnalysisState *new_item; - 896 | if (self->size) { - 897 | new_item = array_pop(self); - 898 | } else { - 899 | new_item = ts_malloc(sizeof(AnalysisState)); - 900 | } - 901 | *new_item = *borrowed_item; - 902 | return new_item; - 903 | } - | - 904 | // Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this - 905 | // set. The set does not contain duplicates, so if the item is already present, it will not be - 906 | // inserted, and no clone will be made. - 907 | // - 908 | // The caller retains ownership of the passed-in memory. However, the clone that is created by this - 909 | // function will be managed by the state set. - 910 | static inline void analysis_state_set__insert_sorted( - 911 | AnalysisStateSet *self, - 912 | AnalysisStateSet *pool, - 913 | AnalysisState *borrowed_item - 914 | ) { - 915 | unsigned index, exists; - 916 | array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists); - 917 | if (!exists) { - 918 | AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - 919 | array_insert(self, index, new_item); - 920 | } - 921 | } - | - 922 | // Inserts a clone of the passed-in item at the end position of this list. - 923 | // - 924 | // IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function - 925 | // `analysis_state__compare`) than largest item already in this set. If items are inserted in the - 926 | // wrong order, the set will not function properly for future use. - 927 | // - 928 | // The caller retains ownership of the passed-in memory. However, the clone that is created by this - 929 | // function will be managed by the state set. - 930 | static inline void analysis_state_set__push( - 931 | AnalysisStateSet *self, - 932 | AnalysisStateSet *pool, - 933 | AnalysisState *borrowed_item - 934 | ) { - 935 | AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item); - 936 | array_push(self, new_item); - 937 | } - | - 938 | // Removes all items from this set, returning it to an empty state. - 939 | static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) { - 940 | array_push_all(pool, self); - 941 | array_clear(self); - 942 | } - | - 943 | // Releases all memory that is managed with this state set, including any items currently present. - 944 | // After calling this function, the set is no longer suitable for use. - 945 | static inline void analysis_state_set__delete(AnalysisStateSet *self) { - 946 | for (unsigned i = 0; i < self->size; i++) { - 947 | ts_free(self->contents[i]); - 948 | } - 949 | array_delete(self); - 950 | } - | - 951 | /**************** - 952 | * QueryAnalyzer - 953 | ****************/ - | - 954 | static inline QueryAnalysis query_analysis__new(void) { - 955 | return (QueryAnalysis) { - 956 | .states = array_new(), - 957 | .next_states = array_new(), - 958 | .deeper_states = array_new(), - 959 | .state_pool = array_new(), - 960 | .final_step_indices = array_new(), - 961 | .finished_parent_symbols = array_new(), - 962 | .did_abort = false, - 963 | }; - 964 | } - | - 965 | static inline void query_analysis__delete(QueryAnalysis *self) { - 966 | analysis_state_set__delete(&self->states); - 967 | analysis_state_set__delete(&self->next_states); - 968 | analysis_state_set__delete(&self->deeper_states); - 969 | analysis_state_set__delete(&self->state_pool); - 970 | array_delete(&self->final_step_indices); - 971 | array_delete(&self->finished_parent_symbols); - 972 | } - | - 973 | /*********************** - 974 | * AnalysisSubgraphNode - 975 | ***********************/ - | - 976 | static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) { - 977 | if (self->state < other->state) return -1; - 978 | if (self->state > other->state) return 1; - 979 | if (self->child_index < other->child_index) return -1; - 980 | if (self->child_index > other->child_index) return 1; - 981 | if (self->done < other->done) return -1; - 982 | if (self->done > other->done) return 1; - 983 | if (self->production_id < other->production_id) return -1; - 984 | if (self->production_id > other->production_id) return 1; - 985 | return 0; - 986 | } - | - 987 | /********* - 988 | * Query - 989 | *********/ - | - 990 | // The `pattern_map` contains a mapping from TSSymbol values to indices in the - 991 | // `steps` array. For a given syntax node, the `pattern_map` makes it possible - 992 | // to quickly find the starting steps of all of the patterns whose root matches - 993 | // that node. Each entry has two fields: a `pattern_index`, which identifies one - 994 | // of the patterns in the query, and a `step_index`, which indicates the start - 995 | // offset of that pattern's steps within the `steps` array. - 996 | // - 997 | // The entries are sorted by the patterns' root symbols, and lookups use a - 998 | // binary search. This ensures that the cost of this initial lookup step - 999 | // scales logarithmically with the number of patterns in the query. -1000 | // -1001 | // This returns `true` if the symbol is present and `false` otherwise. -1002 | // If the symbol is not present `*result` is set to the index where the -1003 | // symbol should be inserted. -1004 | static inline bool ts_query__pattern_map_search( -1005 | const TSQuery *self, -1006 | TSSymbol needle, -1007 | uint32_t *result -1008 | ) { -1009 | uint32_t base_index = self->wildcard_root_pattern_count; -1010 | uint32_t size = self->pattern_map.size - base_index; -1011 | if (size == 0) { -1012 | *result = base_index; -1013 | return false; -1014 | } -1015 | while (size > 1) { -1016 | uint32_t half_size = size / 2; -1017 | uint32_t mid_index = base_index + half_size; -1018 | TSSymbol mid_symbol = array_get(&self->steps, -1019 | array_get(&self->pattern_map, mid_index)->step_index -1020 | )->symbol; -1021 | if (needle > mid_symbol) base_index = mid_index; -1022 | size -= half_size; -1023 | } - | -1024 | TSSymbol symbol = array_get(&self->steps, -1025 | array_get(&self->pattern_map, base_index)->step_index -1026 | )->symbol; - | -1027 | if (needle > symbol) { -1028 | base_index++; -1029 | if (base_index < self->pattern_map.size) { -1030 | symbol = array_get(&self->steps, -1031 | array_get(&self->pattern_map, base_index)->step_index -1032 | )->symbol; -1033 | } -1034 | } - | -1035 | *result = base_index; -1036 | return needle == symbol; -1037 | } - | -1038 | // Insert a new pattern's start index into the pattern map, maintaining -1039 | // the pattern map's ordering invariant. -1040 | static inline void ts_query__pattern_map_insert( -1041 | TSQuery *self, -1042 | TSSymbol symbol, -1043 | PatternEntry new_entry -1044 | ) { -1045 | uint32_t index; -1046 | ts_query__pattern_map_search(self, symbol, &index); - | -1047 | // Ensure that the entries are sorted not only by symbol, but also -1048 | // by pattern_index. This way, states for earlier patterns will be -1049 | // initiated first, which allows the ordering of the states array -1050 | // to be maintained more efficiently. -1051 | while (index < self->pattern_map.size) { -1052 | PatternEntry *entry = array_get(&self->pattern_map, index); -1053 | if ( -1054 | array_get(&self->steps, entry->step_index)->symbol == symbol && -1055 | entry->pattern_index < new_entry.pattern_index -1056 | ) { -1057 | index++; -1058 | } else { -1059 | break; -1060 | } -1061 | } - | -1062 | array_insert(&self->pattern_map, index, new_entry); -1063 | } - | -1064 | // Walk the subgraph for this non-terminal, tracking all of the possible -1065 | // sequences of progress within the pattern. -1066 | static void ts_query__perform_analysis( -1067 | TSQuery *self, -1068 | const AnalysisSubgraphArray *subgraphs, -1069 | QueryAnalysis *analysis -1070 | ) { -1071 | unsigned recursion_depth_limit = 0; -1072 | unsigned prev_final_step_count = 0; -1073 | array_clear(&analysis->final_step_indices); -1074 | array_clear(&analysis->finished_parent_symbols); - | -1075 | for (unsigned iteration = 0;; iteration++) { -1076 | if (iteration == MAX_ANALYSIS_ITERATION_COUNT) { -1077 | analysis->did_abort = true; -1078 | break; -1079 | } - | -1080 | #ifdef DEBUG_ANALYZE_QUERY -1081 | printf("Iteration: %u. Final step indices:", iteration); -1082 | for (unsigned j = 0; j < analysis->final_step_indices.size; j++) { -1083 | printf(" %4u", *array_get(&analysis->final_step_indices, j)); -1084 | } -1085 | printf("\n"); -1086 | for (unsigned j = 0; j < analysis->states.size; j++) { -1087 | AnalysisState *state = *array_get(&analysis->states, j); -1088 | printf(" %3u: step: %u, stack: [", j, state->step_index); -1089 | for (unsigned k = 0; k < state->depth; k++) { -1090 | printf( -1091 | " {%s, child: %u, state: %4u", -1092 | self->language->symbol_names[state->stack[k].parent_symbol], -1093 | state->stack[k].child_index, -1094 | state->stack[k].parse_state -1095 | ); -1096 | if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]); -1097 | if (state->stack[k].done) printf(", DONE"); -1098 | printf("}"); -1099 | } -1100 | printf(" ]\n"); -1101 | } -1102 | #endif - | -1103 | // If no further progress can be made within the current recursion depth limit, then -1104 | // bump the depth limit by one, and continue to process the states the exceeded the -1105 | // limit. But only allow this if progress has been made since the last time the depth -1106 | // limit was increased. -1107 | if (analysis->states.size == 0) { -1108 | if ( -1109 | analysis->deeper_states.size > 0 && -1110 | analysis->final_step_indices.size > prev_final_step_count -1111 | ) { -1112 | #ifdef DEBUG_ANALYZE_QUERY -1113 | printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1); -1114 | #endif - | -1115 | prev_final_step_count = analysis->final_step_indices.size; -1116 | recursion_depth_limit++; -1117 | AnalysisStateSet _states = analysis->states; -1118 | analysis->states = analysis->deeper_states; -1119 | analysis->deeper_states = _states; -1120 | continue; -1121 | } - | -1122 | break; -1123 | } - | -1124 | analysis_state_set__clear(&analysis->next_states, &analysis->state_pool); -1125 | for (unsigned j = 0; j < analysis->states.size; j++) { -1126 | AnalysisState * const state = *array_get(&analysis->states, j); - | -1127 | // For efficiency, it's important to avoid processing the same analysis state more -1128 | // than once. To achieve this, keep the states in order of ascending position within -1129 | // their hypothetical syntax trees. In each iteration of this loop, start by advancing -1130 | // the states that have made the least progress. Avoid advancing states that have already -1131 | // made more progress. -1132 | if (analysis->next_states.size > 0) { -1133 | int comparison = analysis_state__compare( -1134 | &state, -1135 | array_back(&analysis->next_states) -1136 | ); -1137 | if (comparison == 0) { -1138 | analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state); -1139 | continue; -1140 | } else if (comparison > 0) { -1141 | #ifdef DEBUG_ANALYZE_QUERY -1142 | printf("Terminate iteration at state %u\n", j); -1143 | #endif -1144 | while (j < analysis->states.size) { -1145 | analysis_state_set__push( -1146 | &analysis->next_states, -1147 | &analysis->state_pool, -1148 | *array_get(&analysis->states, j) -1149 | ); -1150 | j++; -1151 | } -1152 | break; -1153 | } -1154 | } - | -1155 | const TSStateId parse_state = analysis_state__top(state)->parse_state; -1156 | const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol; -1157 | const TSFieldId parent_field_id = analysis_state__top(state)->field_id; -1158 | const unsigned child_index = analysis_state__top(state)->child_index; -1159 | const QueryStep * const step = array_get(&self->steps, state->step_index); - | -1160 | unsigned subgraph_index, exists; -1161 | array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); -1162 | if (!exists) continue; -1163 | const AnalysisSubgraph *subgraph = array_get(subgraphs, subgraph_index); - | -1164 | // Follow every possible path in the parse table, but only visit states that -1165 | // are part of the subgraph for the current symbol. -1166 | LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); -1167 | while (ts_lookahead_iterator__next(&lookahead_iterator)) { -1168 | TSSymbol sym = lookahead_iterator.symbol; - | -1169 | AnalysisSubgraphNode successor = { -1170 | .state = parse_state, -1171 | .child_index = child_index, -1172 | }; -1173 | if (lookahead_iterator.action_count) { -1174 | const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; -1175 | if (action->type == TSParseActionTypeShift) { -1176 | if (!action->shift.extra) { -1177 | successor.state = action->shift.state; -1178 | successor.child_index++; -1179 | } -1180 | } else { -1181 | continue; -1182 | } -1183 | } else if (lookahead_iterator.next_state != 0) { -1184 | successor.state = lookahead_iterator.next_state; -1185 | successor.child_index++; -1186 | } else { -1187 | continue; -1188 | } - | -1189 | unsigned node_index; -1190 | array_search_sorted_with( -1191 | &subgraph->nodes, -1192 | analysis_subgraph_node__compare, &successor, -1193 | &node_index, &exists -1194 | ); -1195 | while (node_index < subgraph->nodes.size) { -1196 | AnalysisSubgraphNode *node = array_get(&subgraph->nodes, node_index); -1197 | node_index++; -1198 | if (node->state != successor.state || node->child_index != successor.child_index) break; - | -1199 | // Use the subgraph to determine what alias and field will eventually be applied -1200 | // to this child node. -1201 | TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index); -1202 | TSSymbol visible_symbol = alias -1203 | ? alias -1204 | : self->language->symbol_metadata[sym].visible -1205 | ? self->language->public_symbol_map[sym] -1206 | : 0; -1207 | TSFieldId field_id = parent_field_id; -1208 | if (!field_id) { -1209 | const TSFieldMapEntry *field_map, *field_map_end; -1210 | ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end); -1211 | for (; field_map != field_map_end; field_map++) { -1212 | if (!field_map->inherited && field_map->child_index == child_index) { -1213 | field_id = field_map->field_id; -1214 | break; -1215 | } -1216 | } -1217 | } - | -1218 | // Create a new state that has advanced past this hypothetical subtree. -1219 | AnalysisState next_state = *state; -1220 | AnalysisStateEntry *next_state_top = analysis_state__top(&next_state); -1221 | next_state_top->child_index = successor.child_index; -1222 | next_state_top->parse_state = successor.state; -1223 | if (node->done) next_state_top->done = true; - | -1224 | // Determine if this hypothetical child node would match the current step -1225 | // of the query pattern. -1226 | bool does_match = false; - | -1227 | // ERROR nodes can appear anywhere, so if the step is -1228 | // looking for an ERROR node, consider it potentially matchable. -1229 | if (step->symbol == ts_builtin_sym_error) { -1230 | does_match = true; -1231 | } else if (visible_symbol) { -1232 | does_match = true; -1233 | if (step->symbol == WILDCARD_SYMBOL) { -1234 | if ( -1235 | step->is_named && -1236 | !self->language->symbol_metadata[visible_symbol].named -1237 | ) does_match = false; -1238 | } else if (step->symbol != visible_symbol) { -1239 | does_match = false; -1240 | } -1241 | if (step->field && step->field != field_id) { -1242 | does_match = false; -1243 | } -1244 | if ( -1245 | step->supertype_symbol && -1246 | !analysis_state__has_supertype(state, step->supertype_symbol) -1247 | ) does_match = false; -1248 | } - | -1249 | // If this child is hidden, then descend into it and walk through its children. -1250 | // If the top entry of the stack is at the end of its rule, then that entry can -1251 | // be replaced. Otherwise, push a new entry onto the stack. -1252 | else if (sym >= self->language->token_count) { -1253 | if (!next_state_top->done) { -1254 | if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) { -1255 | #ifdef DEBUG_ANALYZE_QUERY -1256 | printf("Exceeded depth limit for state %u\n", j); -1257 | #endif - | -1258 | analysis->did_abort = true; -1259 | continue; -1260 | } - | -1261 | next_state.depth++; -1262 | next_state_top = analysis_state__top(&next_state); -1263 | } - | -1264 | *next_state_top = (AnalysisStateEntry) { -1265 | .parse_state = parse_state, -1266 | .parent_symbol = sym, -1267 | .child_index = 0, -1268 | .field_id = field_id, -1269 | .done = false, -1270 | }; - | -1271 | if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) { -1272 | analysis_state_set__insert_sorted( -1273 | &analysis->deeper_states, -1274 | &analysis->state_pool, -1275 | &next_state -1276 | ); -1277 | continue; -1278 | } -1279 | } - | -1280 | // Pop from the stack when this state reached the end of its current syntax node. -1281 | while (next_state.depth > 0 && next_state_top->done) { -1282 | next_state.depth--; -1283 | next_state_top = analysis_state__top(&next_state); -1284 | } - | -1285 | // If this hypothetical child did match the current step of the query pattern, -1286 | // then advance to the next step at the current depth. This involves skipping -1287 | // over any descendant steps of the current child. -1288 | const QueryStep *next_step = step; -1289 | if (does_match) { -1290 | for (;;) { -1291 | next_state.step_index++; -1292 | next_step = array_get(&self->steps, next_state.step_index); -1293 | if ( -1294 | next_step->depth == PATTERN_DONE_MARKER || -1295 | next_step->depth <= step->depth -1296 | ) break; -1297 | } -1298 | } else if (successor.state == parse_state) { -1299 | continue; -1300 | } - | -1301 | for (;;) { -1302 | // Skip pass-through states. Although these states have alternatives, they are only -1303 | // used to implement repetitions, and query analysis does not need to process -1304 | // repetitions in order to determine whether steps are possible and definite. -1305 | if (next_step->is_pass_through) { -1306 | next_state.step_index++; -1307 | next_step++; -1308 | continue; -1309 | } - | -1310 | // If the pattern is finished or hypothetical parent node is complete, then -1311 | // record that matching can terminate at this step of the pattern. Otherwise, -1312 | // add this state to the list of states to process on the next iteration. -1313 | if (!next_step->is_dead_end) { -1314 | bool did_finish_pattern = array_get(&self->steps, next_state.step_index)->depth != step->depth; -1315 | if (did_finish_pattern) { -1316 | array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol); -1317 | } else if (next_state.depth == 0) { -1318 | array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index); -1319 | } else { -1320 | analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state); -1321 | } -1322 | } - | -1323 | // If the state has advanced to a step with an alternative step, then add another state -1324 | // at that alternative step. This process is simpler than the process of actually matching a -1325 | // pattern during query execution, because for the purposes of query analysis, there is no -1326 | // need to process repetitions. -1327 | if ( -1328 | does_match && -1329 | next_step->alternative_index != NONE && -1330 | next_step->alternative_index > next_state.step_index -1331 | ) { -1332 | next_state.step_index = next_step->alternative_index; -1333 | next_step = array_get(&self->steps, next_state.step_index); -1334 | } else { -1335 | break; -1336 | } -1337 | } -1338 | } -1339 | } -1340 | } - | -1341 | AnalysisStateSet _states = analysis->states; -1342 | analysis->states = analysis->next_states; -1343 | analysis->next_states = _states; -1344 | } -1345 | } - | -1346 | static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { -1347 | Array(uint16_t) non_rooted_pattern_start_steps = array_new(); -1348 | for (unsigned i = 0; i < self->pattern_map.size; i++) { -1349 | PatternEntry *pattern = array_get(&self->pattern_map, i); -1350 | if (!pattern->is_rooted) { -1351 | QueryStep *step = array_get(&self->steps, pattern->step_index); -1352 | if (step->symbol != WILDCARD_SYMBOL) { -1353 | array_push(&non_rooted_pattern_start_steps, i); -1354 | } -1355 | } -1356 | } - | -1357 | // Walk forward through all of the steps in the query, computing some -1358 | // basic information about each step. Mark all of the steps that contain -1359 | // captures, and record the indices of all of the steps that have child steps. -1360 | Array(uint32_t) parent_step_indices = array_new(); -1361 | bool all_patterns_are_valid = true; -1362 | for (unsigned i = 0; i < self->steps.size; i++) { -1363 | QueryStep *step = array_get(&self->steps, i); -1364 | if (step->depth == PATTERN_DONE_MARKER) { -1365 | step->parent_pattern_guaranteed = true; -1366 | step->root_pattern_guaranteed = true; -1367 | continue; -1368 | } - | -1369 | bool has_children = false; -1370 | bool is_wildcard = step->symbol == WILDCARD_SYMBOL; -1371 | step->contains_captures = step->capture_ids[0] != NONE; -1372 | for (unsigned j = i + 1; j < self->steps.size; j++) { -1373 | QueryStep *next_step = array_get(&self->steps, j); -1374 | if ( -1375 | next_step->depth == PATTERN_DONE_MARKER || -1376 | next_step->depth <= step->depth -1377 | ) break; -1378 | if (next_step->capture_ids[0] != NONE) { -1379 | step->contains_captures = true; -1380 | } -1381 | if (!is_wildcard) { -1382 | next_step->root_pattern_guaranteed = true; -1383 | next_step->parent_pattern_guaranteed = true; -1384 | } -1385 | has_children = true; -1386 | } - | -1387 | if (has_children) { -1388 | if (!is_wildcard) { -1389 | array_push(&parent_step_indices, i); -1390 | } else if (step->supertype_symbol && self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { -1391 | // Look at the child steps to see if any aren't valid subtypes for this supertype. -1392 | uint32_t subtype_length; -1393 | const TSSymbol *subtypes = ts_language_subtypes( -1394 | self->language, -1395 | step->supertype_symbol, -1396 | &subtype_length -1397 | ); - | -1398 | for (unsigned j = i + 1; j < self->steps.size; j++) { -1399 | QueryStep *child_step = array_get(&self->steps, j); -1400 | if (child_step->depth == PATTERN_DONE_MARKER || child_step->depth <= step->depth) { -1401 | break; -1402 | } -1403 | if (child_step->depth == step->depth + 1 && child_step->symbol != WILDCARD_SYMBOL) { -1404 | bool is_valid_subtype = false; -1405 | for (uint32_t k = 0; k < subtype_length; k++) { -1406 | if (child_step->symbol == subtypes[k]) { -1407 | is_valid_subtype = true; -1408 | break; -1409 | } -1410 | } - | -1411 | if (!is_valid_subtype) { -1412 | for (unsigned offset_idx = 0; offset_idx < self->step_offsets.size; offset_idx++) { -1413 | StepOffset *step_offset = array_get(&self->step_offsets, offset_idx); -1414 | if (step_offset->step_index >= j) { -1415 | *error_offset = step_offset->byte_offset; -1416 | all_patterns_are_valid = false; -1417 | goto supertype_cleanup; -1418 | } -1419 | } -1420 | } -1421 | } -1422 | } -1423 | } -1424 | } -1425 | } - | -1426 | // For every parent symbol in the query, initialize an 'analysis subgraph'. -1427 | // This subgraph lists all of the states in the parse table that are directly -1428 | // involved in building subtrees for this symbol. -1429 | // -1430 | // In addition to the parent symbols in the query, construct subgraphs for all -1431 | // of the hidden symbols in the grammar, because these might occur within -1432 | // one of the parent nodes, such that their children appear to belong to the -1433 | // parent. -1434 | AnalysisSubgraphArray subgraphs = array_new(); -1435 | for (unsigned i = 0; i < parent_step_indices.size; i++) { -1436 | uint32_t parent_step_index = *array_get(&parent_step_indices, i); -1437 | TSSymbol parent_symbol = array_get(&self->steps, parent_step_index)->symbol; -1438 | AnalysisSubgraph subgraph = { .symbol = parent_symbol }; -1439 | array_insert_sorted_by(&subgraphs, .symbol, subgraph); -1440 | } -1441 | for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { -1442 | if (!ts_language_symbol_metadata(self->language, sym).visible) { -1443 | AnalysisSubgraph subgraph = { .symbol = sym }; -1444 | array_insert_sorted_by(&subgraphs, .symbol, subgraph); -1445 | } -1446 | } - | -1447 | // Scan the parse table to find the data needed to populate these subgraphs. -1448 | // Collect three things during this scan: -1449 | // 1) All of the parse states where one of these symbols can start. -1450 | // 2) All of the parse states where one of these symbols can end, along -1451 | // with information about the node that would be created. -1452 | // 3) A list of predecessor states for each state. -1453 | StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); -1454 | for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { -1455 | unsigned subgraph_index, exists; -1456 | LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); -1457 | while (ts_lookahead_iterator__next(&lookahead_iterator)) { -1458 | if (lookahead_iterator.action_count) { -1459 | for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { -1460 | const TSParseAction *action = &lookahead_iterator.actions[i]; -1461 | if (action->type == TSParseActionTypeReduce) { -1462 | const TSSymbol *aliases, *aliases_end; -1463 | ts_language_aliases_for_symbol( -1464 | self->language, -1465 | action->reduce.symbol, -1466 | &aliases, -1467 | &aliases_end -1468 | ); -1469 | for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { -1470 | array_search_sorted_by( -1471 | &subgraphs, -1472 | .symbol, -1473 | *symbol, -1474 | &subgraph_index, -1475 | &exists -1476 | ); -1477 | if (exists) { -1478 | AnalysisSubgraph *subgraph = array_get(&subgraphs, subgraph_index); -1479 | if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { -1480 | array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { -1481 | .state = state, -1482 | .production_id = action->reduce.production_id, -1483 | .child_index = action->reduce.child_count, -1484 | .done = true, -1485 | })); -1486 | } -1487 | } -1488 | } -1489 | } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { -1490 | TSStateId next_state = action->shift.state; -1491 | state_predecessor_map_add(&predecessor_map, next_state, state); -1492 | } -1493 | } -1494 | } else if (lookahead_iterator.next_state != 0) { -1495 | if (lookahead_iterator.next_state != state) { -1496 | state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); -1497 | } -1498 | if (ts_language_state_is_primary(self->language, state)) { -1499 | const TSSymbol *aliases, *aliases_end; -1500 | ts_language_aliases_for_symbol( -1501 | self->language, -1502 | lookahead_iterator.symbol, -1503 | &aliases, -1504 | &aliases_end -1505 | ); -1506 | for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) { -1507 | array_search_sorted_by( -1508 | &subgraphs, -1509 | .symbol, -1510 | *symbol, -1511 | &subgraph_index, -1512 | &exists -1513 | ); -1514 | if (exists) { -1515 | AnalysisSubgraph *subgraph = array_get(&subgraphs, subgraph_index); -1516 | if ( -1517 | subgraph->start_states.size == 0 || -1518 | *array_back(&subgraph->start_states) != state -1519 | ) -1520 | array_push(&subgraph->start_states, state); -1521 | } -1522 | } -1523 | } -1524 | } -1525 | } -1526 | } - | -1527 | // For each subgraph, compute the preceding states by walking backward -1528 | // from the end states using the predecessor map. -1529 | Array(AnalysisSubgraphNode) next_nodes = array_new(); -1530 | for (unsigned i = 0; i < subgraphs.size; i++) { -1531 | AnalysisSubgraph *subgraph = array_get(&subgraphs, i); -1532 | if (subgraph->nodes.size == 0) { -1533 | array_delete(&subgraph->start_states); -1534 | array_erase(&subgraphs, i); -1535 | i--; -1536 | continue; -1537 | } -1538 | array_assign(&next_nodes, &subgraph->nodes); -1539 | while (next_nodes.size > 0) { -1540 | AnalysisSubgraphNode node = array_pop(&next_nodes); -1541 | if (node.child_index > 1) { -1542 | unsigned predecessor_count; -1543 | const TSStateId *predecessors = state_predecessor_map_get( -1544 | &predecessor_map, -1545 | node.state, -1546 | &predecessor_count -1547 | ); -1548 | for (unsigned j = 0; j < predecessor_count; j++) { -1549 | AnalysisSubgraphNode predecessor_node = { -1550 | .state = predecessors[j], -1551 | .child_index = node.child_index - 1, -1552 | .production_id = node.production_id, -1553 | .done = false, -1554 | }; -1555 | unsigned index, exists; -1556 | array_search_sorted_with( -1557 | &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node, -1558 | &index, &exists -1559 | ); -1560 | if (!exists) { -1561 | array_insert(&subgraph->nodes, index, predecessor_node); -1562 | array_push(&next_nodes, predecessor_node); -1563 | } -1564 | } -1565 | } -1566 | } -1567 | } - | -1568 | #ifdef DEBUG_ANALYZE_QUERY -1569 | printf("\nSubgraphs:\n"); -1570 | for (unsigned i = 0; i < subgraphs.size; i++) { -1571 | AnalysisSubgraph *subgraph = array_get(&subgraphs, i); -1572 | printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol)); -1573 | for (unsigned j = 0; j < subgraph->start_states.size; j++) { -1574 | printf( -1575 | " {state: %u}\n", -1576 | *array_get(&subgraph->start_states, j) -1577 | ); -1578 | } -1579 | for (unsigned j = 0; j < subgraph->nodes.size; j++) { -1580 | AnalysisSubgraphNode *node = array_get(&subgraph->nodes, j); -1581 | printf( -1582 | " {state: %u, child_index: %u, production_id: %u, done: %d}\n", -1583 | node->state, node->child_index, node->production_id, node->done -1584 | ); -1585 | } -1586 | printf("\n"); -1587 | } -1588 | #endif - | -1589 | // For each non-terminal pattern, determine if the pattern can successfully match, -1590 | // and identify all of the possible children within the pattern where matching could fail. -1591 | QueryAnalysis analysis = query_analysis__new(); -1592 | for (unsigned i = 0; i < parent_step_indices.size; i++) { -1593 | uint16_t parent_step_index = *array_get(&parent_step_indices, i); -1594 | uint16_t parent_depth = array_get(&self->steps, parent_step_index)->depth; -1595 | TSSymbol parent_symbol = array_get(&self->steps, parent_step_index)->symbol; -1596 | if (parent_symbol == ts_builtin_sym_error) continue; - | -1597 | // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's -1598 | // root symbol is a terminal, then return an error. -1599 | unsigned subgraph_index, exists; -1600 | array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); -1601 | if (!exists) { -1602 | unsigned first_child_step_index = parent_step_index + 1; -1603 | uint32_t j, child_exists; -1604 | array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); -1605 | ts_assert(child_exists); -1606 | *error_offset = array_get(&self->step_offsets, j)->byte_offset; -1607 | all_patterns_are_valid = false; -1608 | break; -1609 | } - | -1610 | // Initialize an analysis state at every parse state in the table where -1611 | // this parent symbol can occur. -1612 | AnalysisSubgraph *subgraph = array_get(&subgraphs, subgraph_index); -1613 | analysis_state_set__clear(&analysis.states, &analysis.state_pool); -1614 | analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); -1615 | for (unsigned j = 0; j < subgraph->start_states.size; j++) { -1616 | TSStateId parse_state = *array_get(&subgraph->start_states, j); -1617 | analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { -1618 | .step_index = parent_step_index + 1, -1619 | .stack = { -1620 | [0] = { -1621 | .parse_state = parse_state, -1622 | .parent_symbol = parent_symbol, -1623 | .child_index = 0, -1624 | .field_id = 0, -1625 | .done = false, -1626 | }, -1627 | }, -1628 | .depth = 1, -1629 | .root_symbol = parent_symbol, -1630 | })); -1631 | } - | -1632 | #ifdef DEBUG_ANALYZE_QUERY -1633 | printf( -1634 | "\nWalk states for %s:\n", -1635 | ts_language_symbol_name(self->language, (*array_get(&analysis.states, 0))->stack[0].parent_symbol) -1636 | ); -1637 | #endif - | -1638 | analysis.did_abort = false; -1639 | ts_query__perform_analysis(self, &subgraphs, &analysis); - | -1640 | // If this pattern could not be fully analyzed, then every step should -1641 | // be considered fallible. -1642 | if (analysis.did_abort) { -1643 | for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) { -1644 | QueryStep *step = array_get(&self->steps, j); -1645 | if ( -1646 | step->depth <= parent_depth || -1647 | step->depth == PATTERN_DONE_MARKER -1648 | ) break; -1649 | if (!step->is_dead_end) { -1650 | step->parent_pattern_guaranteed = false; -1651 | step->root_pattern_guaranteed = false; -1652 | } -1653 | } -1654 | continue; -1655 | } - | -1656 | // If this pattern cannot match, store the pattern index so that it can be -1657 | // returned to the caller. -1658 | if (analysis.finished_parent_symbols.size == 0) { -1659 | uint16_t impossible_step_index; -1660 | if (analysis.final_step_indices.size > 0) { -1661 | impossible_step_index = *array_back(&analysis.final_step_indices); -1662 | } else { -1663 | // If there isn't a final step, then that means the parent step itself is unreachable. -1664 | impossible_step_index = parent_step_index; -1665 | } -1666 | uint32_t j, impossible_exists; -1667 | array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); -1668 | if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; -1669 | *error_offset = array_get(&self->step_offsets, j)->byte_offset; -1670 | all_patterns_are_valid = false; -1671 | break; -1672 | } - | -1673 | // Mark as fallible any step where a match terminated. -1674 | // Later, this property will be propagated to all of the step's predecessors. -1675 | for (unsigned j = 0; j < analysis.final_step_indices.size; j++) { -1676 | uint32_t final_step_index = *array_get(&analysis.final_step_indices, j); -1677 | QueryStep *step = array_get(&self->steps, final_step_index); -1678 | if ( -1679 | step->depth != PATTERN_DONE_MARKER && -1680 | step->depth > parent_depth && -1681 | !step->is_dead_end -1682 | ) { -1683 | step->parent_pattern_guaranteed = false; -1684 | step->root_pattern_guaranteed = false; -1685 | } -1686 | } -1687 | } - | -1688 | // Mark as indefinite any step with captures that are used in predicates. -1689 | Array(uint16_t) predicate_capture_ids = array_new(); -1690 | for (unsigned i = 0; i < self->patterns.size; i++) { -1691 | QueryPattern *pattern = array_get(&self->patterns, i); - | -1692 | // Gather all of the captures that are used in predicates for this pattern. -1693 | array_clear(&predicate_capture_ids); -1694 | for ( -1695 | unsigned start = pattern->predicate_steps.offset, -1696 | end = start + pattern->predicate_steps.length, -1697 | j = start; j < end; j++ -1698 | ) { -1699 | TSQueryPredicateStep *step = array_get(&self->predicate_steps, j); -1700 | if (step->type == TSQueryPredicateStepTypeCapture) { -1701 | uint16_t value_id = step->value_id; -1702 | array_insert_sorted_by(&predicate_capture_ids, , value_id); -1703 | } -1704 | } - | -1705 | // Find all of the steps that have these captures. -1706 | for ( -1707 | unsigned start = pattern->steps.offset, -1708 | end = start + pattern->steps.length, -1709 | j = start; j < end; j++ -1710 | ) { -1711 | QueryStep *step = array_get(&self->steps, j); -1712 | for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) { -1713 | uint16_t capture_id = step->capture_ids[k]; -1714 | if (capture_id == NONE) break; -1715 | unsigned index, exists; -1716 | array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists); -1717 | if (exists) { -1718 | step->root_pattern_guaranteed = false; -1719 | break; -1720 | } -1721 | } -1722 | } -1723 | } - | -1724 | // Propagate fallibility. If a pattern is fallible at a given step, then it is -1725 | // fallible at all of its preceding steps. -1726 | bool done = self->steps.size == 0; -1727 | while (!done) { -1728 | done = true; -1729 | for (unsigned i = self->steps.size - 1; i > 0; i--) { -1730 | QueryStep *step = array_get(&self->steps, i); -1731 | if (step->depth == PATTERN_DONE_MARKER) continue; - | -1732 | // Determine if this step is definite or has definite alternatives. -1733 | bool parent_pattern_guaranteed = false; -1734 | for (;;) { -1735 | if (step->root_pattern_guaranteed) { -1736 | parent_pattern_guaranteed = true; -1737 | break; -1738 | } -1739 | if (step->alternative_index == NONE || step->alternative_index < i) { -1740 | break; -1741 | } -1742 | step = array_get(&self->steps, step->alternative_index); -1743 | } - | -1744 | // If not, mark its predecessor as indefinite. -1745 | if (!parent_pattern_guaranteed) { -1746 | QueryStep *prev_step = array_get(&self->steps, i - 1); -1747 | if ( -1748 | !prev_step->is_dead_end && -1749 | prev_step->depth != PATTERN_DONE_MARKER && -1750 | prev_step->root_pattern_guaranteed -1751 | ) { -1752 | prev_step->root_pattern_guaranteed = false; -1753 | done = false; -1754 | } -1755 | } -1756 | } -1757 | } - | -1758 | #ifdef DEBUG_ANALYZE_QUERY -1759 | printf("Steps:\n"); -1760 | for (unsigned i = 0; i < self->steps.size; i++) { -1761 | QueryStep *step = array_get(&self->steps, i); -1762 | if (step->depth == PATTERN_DONE_MARKER) { -1763 | printf(" %u: DONE\n", i); -1764 | } else { -1765 | printf( -1766 | " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n", -1767 | i, -1768 | (step->symbol == WILDCARD_SYMBOL) -1769 | ? "ANY" -1770 | : ts_language_symbol_name(self->language, step->symbol), -1771 | (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"), -1772 | step->depth, -1773 | step->parent_pattern_guaranteed, -1774 | step->root_pattern_guaranteed -1775 | ); -1776 | } -1777 | } -1778 | #endif - | -1779 | // Determine which repetition symbols in this language have the possibility -1780 | // of matching non-rooted patterns in this query. These repetition symbols -1781 | // prevent certain optimizations with range restrictions. -1782 | analysis.did_abort = false; -1783 | for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) { -1784 | uint16_t pattern_entry_index = *array_get(&non_rooted_pattern_start_steps, i); -1785 | PatternEntry *pattern_entry = array_get(&self->pattern_map, pattern_entry_index); - | -1786 | analysis_state_set__clear(&analysis.states, &analysis.state_pool); -1787 | analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool); -1788 | for (unsigned j = 0; j < subgraphs.size; j++) { -1789 | AnalysisSubgraph *subgraph = array_get(&subgraphs, j); -1790 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol); -1791 | if (metadata.visible || metadata.named) continue; - | -1792 | for (uint32_t k = 0; k < subgraph->start_states.size; k++) { -1793 | TSStateId parse_state = *array_get(&subgraph->start_states, k); -1794 | analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) { -1795 | .step_index = pattern_entry->step_index, -1796 | .stack = { -1797 | [0] = { -1798 | .parse_state = parse_state, -1799 | .parent_symbol = subgraph->symbol, -1800 | .child_index = 0, -1801 | .field_id = 0, -1802 | .done = false, -1803 | }, -1804 | }, -1805 | .root_symbol = subgraph->symbol, -1806 | .depth = 1, -1807 | })); -1808 | } -1809 | } - | -1810 | #ifdef DEBUG_ANALYZE_QUERY -1811 | printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); -1812 | #endif - | -1813 | ts_query__perform_analysis( -1814 | self, -1815 | &subgraphs, -1816 | &analysis -1817 | ); - | -1818 | if (analysis.finished_parent_symbols.size > 0) { -1819 | array_get(&self->patterns, pattern_entry->pattern_index)->is_non_local = true; -1820 | } - | -1821 | for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) { -1822 | TSSymbol symbol = *array_get(&analysis.finished_parent_symbols, k); -1823 | array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol); -1824 | } -1825 | } - | -1826 | #ifdef DEBUG_ANALYZE_QUERY -1827 | if (self->repeat_symbols_with_rootless_patterns.size > 0) { -1828 | printf("\nRepetition symbols with rootless patterns:\n"); -1829 | printf("aborted analysis: %d\n", analysis.did_abort); -1830 | for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) { -1831 | TSSymbol symbol = *array_get(&self->repeat_symbols_with_rootless_patterns, i); -1832 | printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol)); -1833 | } -1834 | printf("\n"); -1835 | } -1836 | #endif - | -1837 | // Cleanup -1838 | for (unsigned i = 0; i < subgraphs.size; i++) { -1839 | array_delete(&array_get(&subgraphs, i)->start_states); -1840 | array_delete(&array_get(&subgraphs, i)->nodes); -1841 | } -1842 | array_delete(&subgraphs); -1843 | query_analysis__delete(&analysis); -1844 | array_delete(&next_nodes); -1845 | array_delete(&predicate_capture_ids); -1846 | state_predecessor_map_delete(&predecessor_map); - | -1847 | supertype_cleanup: -1848 | array_delete(&non_rooted_pattern_start_steps); -1849 | array_delete(&parent_step_indices); - | -1850 | return all_patterns_are_valid; -1851 | } - | -1852 | static void ts_query__add_negated_fields( -1853 | TSQuery *self, -1854 | uint16_t step_index, -1855 | TSFieldId *field_ids, -1856 | uint16_t field_count -1857 | ) { -1858 | QueryStep *step = array_get(&self->steps, step_index); - | -1859 | // The negated field array stores a list of field lists, separated by zeros. -1860 | // Try to find the start index of an existing list that matches this new list. -1861 | bool failed_match = false; -1862 | unsigned match_count = 0; -1863 | unsigned start_i = 0; -1864 | for (unsigned i = 0; i < self->negated_fields.size; i++) { -1865 | TSFieldId existing_field_id = *array_get(&self->negated_fields, i); - | -1866 | // At each zero value, terminate the match attempt. If we've exactly -1867 | // matched the new field list, then reuse this index. Otherwise, -1868 | // start over the matching process. -1869 | if (existing_field_id == 0) { -1870 | if (match_count == field_count) { -1871 | step->negated_field_list_id = start_i; -1872 | return; -1873 | } else { -1874 | start_i = i + 1; -1875 | match_count = 0; -1876 | failed_match = false; -1877 | } -1878 | } - | -1879 | // If the existing list matches our new list so far, then advance -1880 | // to the next element of the new list. -1881 | else if ( -1882 | match_count < field_count && -1883 | existing_field_id == field_ids[match_count] && -1884 | !failed_match -1885 | ) { -1886 | match_count++; -1887 | } - | -1888 | // Otherwise, this existing list has failed to match. -1889 | else { -1890 | match_count = 0; -1891 | failed_match = true; -1892 | } -1893 | } - | -1894 | step->negated_field_list_id = self->negated_fields.size; -1895 | array_extend(&self->negated_fields, field_count, field_ids); -1896 | array_push(&self->negated_fields, 0); -1897 | } - | -1898 | static TSQueryError ts_query__parse_string_literal( -1899 | TSQuery *self, -1900 | Stream *stream -1901 | ) { -1902 | const char *string_start = stream->input; -1903 | if (stream->next != '"') return TSQueryErrorSyntax; -1904 | stream_advance(stream); -1905 | const char *prev_position = stream->input; - | -1906 | bool is_escaped = false; -1907 | array_clear(&self->string_buffer); -1908 | for (;;) { -1909 | if (is_escaped) { -1910 | is_escaped = false; -1911 | switch (stream->next) { -1912 | case 'n': -1913 | array_push(&self->string_buffer, '\n'); -1914 | break; -1915 | case 'r': -1916 | array_push(&self->string_buffer, '\r'); -1917 | break; -1918 | case 't': -1919 | array_push(&self->string_buffer, '\t'); -1920 | break; -1921 | case '0': -1922 | array_push(&self->string_buffer, '\0'); -1923 | break; -1924 | default: -1925 | array_extend(&self->string_buffer, stream->next_size, stream->input); -1926 | break; -1927 | } -1928 | prev_position = stream->input + stream->next_size; -1929 | } else { -1930 | if (stream->next == '\\') { -1931 | array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); -1932 | prev_position = stream->input + 1; -1933 | is_escaped = true; -1934 | } else if (stream->next == '"') { -1935 | array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position); -1936 | stream_advance(stream); -1937 | return TSQueryErrorNone; -1938 | } else if (stream->next == '\n') { -1939 | stream_reset(stream, string_start); -1940 | return TSQueryErrorSyntax; -1941 | } -1942 | } -1943 | if (!stream_advance(stream)) { -1944 | stream_reset(stream, string_start); -1945 | return TSQueryErrorSyntax; -1946 | } -1947 | } -1948 | } - | -1949 | // Parse a single predicate associated with a pattern, adding it to the -1950 | // query's internal `predicate_steps` array. Predicates are arbitrary -1951 | // S-expressions associated with a pattern which are meant to be handled at -1952 | // a higher level of abstraction, such as the Rust/JavaScript bindings. They -1953 | // can contain '@'-prefixed capture names, double-quoted strings, and bare -1954 | // symbols, which also represent strings. -1955 | static TSQueryError ts_query__parse_predicate( -1956 | TSQuery *self, -1957 | Stream *stream -1958 | ) { -1959 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; -1960 | const char *predicate_name = stream->input; -1961 | stream_scan_identifier(stream); -1962 | if (stream->next != '?' && stream->next != '!') { -1963 | return TSQueryErrorSyntax; -1964 | } -1965 | stream_advance(stream); -1966 | uint32_t length = (uint32_t)(stream->input - predicate_name); -1967 | uint16_t id = symbol_table_insert_name( -1968 | &self->predicate_values, -1969 | predicate_name, -1970 | length -1971 | ); -1972 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { -1973 | .type = TSQueryPredicateStepTypeString, -1974 | .value_id = id, -1975 | })); -1976 | stream_skip_whitespace(stream); - | -1977 | for (;;) { -1978 | if (stream->next == ')') { -1979 | stream_advance(stream); -1980 | stream_skip_whitespace(stream); -1981 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { -1982 | .type = TSQueryPredicateStepTypeDone, -1983 | .value_id = 0, -1984 | })); -1985 | break; -1986 | } - | -1987 | // Parse an '@'-prefixed capture name -1988 | else if (stream->next == '@') { -1989 | stream_advance(stream); - | -1990 | // Parse the capture name -1991 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; -1992 | const char *capture_name = stream->input; -1993 | stream_scan_identifier(stream); -1994 | uint32_t capture_length = (uint32_t)(stream->input - capture_name); - | -1995 | // Add the capture id to the first step of the pattern -1996 | int capture_id = symbol_table_id_for_name( -1997 | &self->captures, -1998 | capture_name, -1999 | capture_length -2000 | ); -2001 | if (capture_id == -1) { -2002 | stream_reset(stream, capture_name); -2003 | return TSQueryErrorCapture; -2004 | } - | -2005 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { -2006 | .type = TSQueryPredicateStepTypeCapture, -2007 | .value_id = capture_id, -2008 | })); -2009 | } - | -2010 | // Parse a string literal -2011 | else if (stream->next == '"') { -2012 | TSQueryError e = ts_query__parse_string_literal(self, stream); -2013 | if (e) return e; -2014 | uint16_t query_id = symbol_table_insert_name( -2015 | &self->predicate_values, -2016 | self->string_buffer.contents, -2017 | self->string_buffer.size -2018 | ); -2019 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { -2020 | .type = TSQueryPredicateStepTypeString, -2021 | .value_id = query_id, -2022 | })); -2023 | } - | -2024 | // Parse a bare symbol -2025 | else if (stream_is_ident_start(stream)) { -2026 | const char *symbol_start = stream->input; -2027 | stream_scan_identifier(stream); -2028 | uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); -2029 | uint16_t query_id = symbol_table_insert_name( -2030 | &self->predicate_values, -2031 | symbol_start, -2032 | symbol_length -2033 | ); -2034 | array_push(&self->predicate_steps, ((TSQueryPredicateStep) { -2035 | .type = TSQueryPredicateStepTypeString, -2036 | .value_id = query_id, -2037 | })); -2038 | } - | -2039 | else { -2040 | return TSQueryErrorSyntax; -2041 | } - | -2042 | stream_skip_whitespace(stream); -2043 | } - | -2044 | return 0; -2045 | } - | -2046 | // Read one S-expression pattern from the stream, and incorporate it into -2047 | // the query's internal state machine representation. For nested patterns, -2048 | // this function calls itself recursively. -2049 | // -2050 | // The caller is responsible for passing in a dedicated CaptureQuantifiers. -2051 | // These should not be shared between different calls to ts_query__parse_pattern! -2052 | static TSQueryError ts_query__parse_pattern( -2053 | TSQuery *self, -2054 | Stream *stream, -2055 | uint32_t depth, -2056 | bool is_immediate, -2057 | CaptureQuantifiers *capture_quantifiers -2058 | ) { -2059 | if (stream->next == 0) return TSQueryErrorSyntax; -2060 | if (stream->next == ')' || stream->next == ']') return PARENT_DONE; - | -2061 | const uint32_t starting_step_index = self->steps.size; - | -2062 | // Store the byte offset of each step in the query. -2063 | if ( -2064 | self->step_offsets.size == 0 || -2065 | array_back(&self->step_offsets)->step_index != starting_step_index -2066 | ) { -2067 | array_push(&self->step_offsets, ((StepOffset) { -2068 | .step_index = starting_step_index, -2069 | .byte_offset = stream_offset(stream), -2070 | })); -2071 | } - | -2072 | // An open bracket is the start of an alternation. -2073 | if (stream->next == '[') { -2074 | stream_advance(stream); -2075 | stream_skip_whitespace(stream); - | -2076 | // Parse each branch, and add a placeholder step in between the branches. -2077 | Array(uint32_t) branch_step_indices = array_new(); -2078 | CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new(); -2079 | for (;;) { -2080 | uint32_t start_index = self->steps.size; -2081 | TSQueryError e = ts_query__parse_pattern( -2082 | self, -2083 | stream, -2084 | depth, -2085 | is_immediate, -2086 | &branch_capture_quantifiers -2087 | ); - | -2088 | if (e == PARENT_DONE) { -2089 | if (stream->next == ']' && branch_step_indices.size > 0) { -2090 | stream_advance(stream); -2091 | break; -2092 | } -2093 | e = TSQueryErrorSyntax; -2094 | } -2095 | if (e) { -2096 | capture_quantifiers_delete(&branch_capture_quantifiers); -2097 | array_delete(&branch_step_indices); -2098 | return e; -2099 | } - | -2100 | if (start_index == starting_step_index) { -2101 | capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers); -2102 | } else { -2103 | capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers); -2104 | } - | -2105 | array_push(&branch_step_indices, start_index); -2106 | array_push(&self->steps, query_step__new(0, depth, false)); -2107 | capture_quantifiers_clear(&branch_capture_quantifiers); -2108 | } -2109 | (void)array_pop(&self->steps); - | -2110 | // For all of the branches except for the last one, add the subsequent branch as an -2111 | // alternative, and link the end of the branch to the current end of the steps. -2112 | for (unsigned i = 0; i < branch_step_indices.size - 1; i++) { -2113 | uint32_t step_index = *array_get(&branch_step_indices, i); -2114 | uint32_t next_step_index = *array_get(&branch_step_indices, i + 1); -2115 | QueryStep *start_step = array_get(&self->steps, step_index); -2116 | QueryStep *end_step = array_get(&self->steps, next_step_index - 1); -2117 | start_step->alternative_index = next_step_index; -2118 | end_step->alternative_index = self->steps.size; -2119 | end_step->is_dead_end = true; -2120 | } - | -2121 | capture_quantifiers_delete(&branch_capture_quantifiers); -2122 | array_delete(&branch_step_indices); -2123 | } - | -2124 | // An open parenthesis can be the start of three possible constructs: -2125 | // * A grouped sequence -2126 | // * A predicate -2127 | // * A named node -2128 | else if (stream->next == '(') { -2129 | stream_advance(stream); -2130 | stream_skip_whitespace(stream); - | -2131 | // If this parenthesis is followed by a node, then it represents a grouped sequence. -2132 | if (stream->next == '(' || stream->next == '"' || stream->next == '[') { -2133 | bool child_is_immediate = is_immediate; -2134 | CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); -2135 | for (;;) { -2136 | if (stream->next == '.') { -2137 | child_is_immediate = true; -2138 | stream_advance(stream); -2139 | stream_skip_whitespace(stream); -2140 | } -2141 | TSQueryError e = ts_query__parse_pattern( -2142 | self, -2143 | stream, -2144 | depth, -2145 | child_is_immediate, -2146 | &child_capture_quantifiers -2147 | ); -2148 | if (e == PARENT_DONE) { -2149 | if (stream->next == ')') { -2150 | stream_advance(stream); -2151 | break; -2152 | } -2153 | e = TSQueryErrorSyntax; -2154 | } -2155 | if (e) { -2156 | capture_quantifiers_delete(&child_capture_quantifiers); -2157 | return e; -2158 | } - | -2159 | capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); -2160 | capture_quantifiers_clear(&child_capture_quantifiers); -2161 | child_is_immediate = false; -2162 | } - | -2163 | capture_quantifiers_delete(&child_capture_quantifiers); -2164 | } - | -2165 | // A dot/pound character indicates the start of a predicate. -2166 | else if (stream->next == '.' || stream->next == '#') { -2167 | stream_advance(stream); -2168 | return ts_query__parse_predicate(self, stream); -2169 | } - | -2170 | // Otherwise, this parenthesis is the start of a named node. -2171 | else { -2172 | TSSymbol symbol; -2173 | bool is_missing = false; -2174 | const char *node_name = stream->input; - | -2175 | // Parse a normal node name -2176 | if (stream_is_ident_start(stream)) { -2177 | stream_scan_identifier(stream); -2178 | uint32_t length = (uint32_t)(stream->input - node_name); - | -2179 | // Parse the wildcard symbol -2180 | if (length == 1 && node_name[0] == '_') { -2181 | symbol = WILDCARD_SYMBOL; -2182 | } else if (!strncmp(node_name, "MISSING", length)) { -2183 | is_missing = true; -2184 | stream_skip_whitespace(stream); - | -2185 | if (stream_is_ident_start(stream)) { -2186 | const char *missing_node_name = stream->input; -2187 | stream_scan_identifier(stream); -2188 | uint32_t missing_node_length = (uint32_t)(stream->input - missing_node_name); -2189 | symbol = ts_language_symbol_for_name( -2190 | self->language, -2191 | missing_node_name, -2192 | missing_node_length, -2193 | true -2194 | ); -2195 | if (!symbol) { -2196 | stream_reset(stream, missing_node_name); -2197 | return TSQueryErrorNodeType; -2198 | } -2199 | } - | -2200 | else if (stream->next == '"') { -2201 | const char *string_start = stream->input; -2202 | TSQueryError e = ts_query__parse_string_literal(self, stream); -2203 | if (e) return e; - | -2204 | symbol = ts_language_symbol_for_name( -2205 | self->language, -2206 | self->string_buffer.contents, -2207 | self->string_buffer.size, -2208 | false -2209 | ); -2210 | if (!symbol) { -2211 | stream_reset(stream, string_start + 1); -2212 | return TSQueryErrorNodeType; -2213 | } -2214 | } - | -2215 | else if (stream->next == ')') { -2216 | symbol = WILDCARD_SYMBOL; -2217 | } - | -2218 | else { -2219 | stream_reset(stream, stream->input); -2220 | return TSQueryErrorSyntax; -2221 | } -2222 | } - | -2223 | else { -2224 | symbol = ts_language_symbol_for_name( -2225 | self->language, -2226 | node_name, -2227 | length, -2228 | true -2229 | ); -2230 | if (!symbol) { -2231 | stream_reset(stream, node_name); -2232 | return TSQueryErrorNodeType; -2233 | } -2234 | } -2235 | } else { -2236 | return TSQueryErrorSyntax; -2237 | } - | -2238 | // Add a step for the node. -2239 | array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); -2240 | QueryStep *step = array_back(&self->steps); -2241 | if (ts_language_symbol_metadata(self->language, symbol).supertype) { -2242 | step->supertype_symbol = step->symbol; -2243 | step->symbol = WILDCARD_SYMBOL; -2244 | } -2245 | if (is_missing) { -2246 | step->is_missing = true; -2247 | } -2248 | if (symbol == WILDCARD_SYMBOL) { -2249 | step->is_named = true; -2250 | } - | -2251 | // Parse a supertype symbol -2252 | if (stream->next == '/') { -2253 | if (!step->supertype_symbol) { -2254 | stream_reset(stream, node_name - 1); // reset to the start of the node -2255 | return TSQueryErrorStructure; -2256 | } - | -2257 | stream_advance(stream); - | -2258 | const char *subtype_node_name = stream->input; - | -2259 | if (stream_is_ident_start(stream)) { // Named node -2260 | stream_scan_identifier(stream); -2261 | uint32_t length = (uint32_t)(stream->input - subtype_node_name); -2262 | step->symbol = ts_language_symbol_for_name( -2263 | self->language, -2264 | subtype_node_name, -2265 | length, -2266 | true -2267 | ); -2268 | } else if (stream->next == '"') { // Anonymous leaf node -2269 | TSQueryError e = ts_query__parse_string_literal(self, stream); -2270 | if (e) return e; -2271 | step->symbol = ts_language_symbol_for_name( -2272 | self->language, -2273 | self->string_buffer.contents, -2274 | self->string_buffer.size, -2275 | false -2276 | ); -2277 | } else { -2278 | return TSQueryErrorSyntax; -2279 | } - | -2280 | if (!step->symbol) { -2281 | stream_reset(stream, subtype_node_name); -2282 | return TSQueryErrorNodeType; -2283 | } - | -2284 | // Get all the possible subtypes for the given supertype, -2285 | // and check if the given subtype is valid. -2286 | if (self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { -2287 | uint32_t subtype_length; -2288 | const TSSymbol *subtypes = ts_language_subtypes( -2289 | self->language, -2290 | step->supertype_symbol, -2291 | &subtype_length -2292 | ); - | -2293 | bool subtype_is_valid = false; -2294 | for (uint32_t i = 0; i < subtype_length; i++) { -2295 | if (subtypes[i] == step->symbol) { -2296 | subtype_is_valid = true; -2297 | break; -2298 | } -2299 | } - | -2300 | // This subtype is not valid for the given supertype. -2301 | if (!subtype_is_valid) { -2302 | stream_reset(stream, node_name - 1); // reset to the start of the node -2303 | return TSQueryErrorStructure; -2304 | } -2305 | } -2306 | } - | -2307 | stream_skip_whitespace(stream); - | -2308 | // Parse the child patterns -2309 | bool child_is_immediate = false; -2310 | uint16_t last_child_step_index = 0; -2311 | uint16_t negated_field_count = 0; -2312 | TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; -2313 | CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); -2314 | for (;;) { -2315 | // Parse a negated field assertion -2316 | if (stream->next == '!') { -2317 | stream_advance(stream); -2318 | stream_skip_whitespace(stream); -2319 | if (!stream_is_ident_start(stream)) { -2320 | capture_quantifiers_delete(&child_capture_quantifiers); -2321 | return TSQueryErrorSyntax; -2322 | } -2323 | const char *field_name = stream->input; -2324 | stream_scan_identifier(stream); -2325 | uint32_t length = (uint32_t)(stream->input - field_name); -2326 | stream_skip_whitespace(stream); - | -2327 | TSFieldId field_id = ts_language_field_id_for_name( -2328 | self->language, -2329 | field_name, -2330 | length -2331 | ); -2332 | if (!field_id) { -2333 | stream->input = field_name; -2334 | capture_quantifiers_delete(&child_capture_quantifiers); -2335 | return TSQueryErrorField; -2336 | } - | -2337 | // Keep the field ids sorted. -2338 | if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { -2339 | negated_field_ids[negated_field_count] = field_id; -2340 | negated_field_count++; -2341 | } - | -2342 | continue; -2343 | } - | -2344 | // Parse a sibling anchor -2345 | if (stream->next == '.') { -2346 | child_is_immediate = true; -2347 | stream_advance(stream); -2348 | stream_skip_whitespace(stream); -2349 | } - | -2350 | uint16_t step_index = self->steps.size; -2351 | TSQueryError e = ts_query__parse_pattern( -2352 | self, -2353 | stream, -2354 | depth + 1, -2355 | child_is_immediate, -2356 | &child_capture_quantifiers -2357 | ); -2358 | // In the event we only parsed a predicate, meaning no new steps were added, -2359 | // then subtract one so we're not indexing past the end of the array -2360 | if (step_index == self->steps.size) step_index--; -2361 | if (e == PARENT_DONE) { -2362 | if (stream->next == ')') { -2363 | if (child_is_immediate) { -2364 | if (last_child_step_index == 0) { -2365 | capture_quantifiers_delete(&child_capture_quantifiers); -2366 | return TSQueryErrorSyntax; -2367 | } -2368 | // Mark this step *and* its alternatives as the last child of the parent. -2369 | QueryStep *last_child_step = array_get(&self->steps, last_child_step_index); -2370 | last_child_step->is_last_child = true; -2371 | if ( -2372 | last_child_step->alternative_index != NONE && -2373 | last_child_step->alternative_index < self->steps.size -2374 | ) { -2375 | QueryStep *alternative_step = array_get(&self->steps, last_child_step->alternative_index); -2376 | alternative_step->is_last_child = true; -2377 | while ( -2378 | alternative_step->alternative_index != NONE && -2379 | alternative_step->alternative_index < self->steps.size -2380 | ) { -2381 | alternative_step = array_get(&self->steps, alternative_step->alternative_index); -2382 | alternative_step->is_last_child = true; -2383 | } -2384 | } -2385 | } - | -2386 | if (negated_field_count) { -2387 | ts_query__add_negated_fields( -2388 | self, -2389 | starting_step_index, -2390 | negated_field_ids, -2391 | negated_field_count -2392 | ); -2393 | } - | -2394 | stream_advance(stream); -2395 | break; -2396 | } -2397 | e = TSQueryErrorSyntax; -2398 | } -2399 | if (e) { -2400 | capture_quantifiers_delete(&child_capture_quantifiers); -2401 | return e; -2402 | } - | -2403 | capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers); - | -2404 | last_child_step_index = step_index; -2405 | child_is_immediate = false; -2406 | capture_quantifiers_clear(&child_capture_quantifiers); -2407 | } -2408 | capture_quantifiers_delete(&child_capture_quantifiers); -2409 | } -2410 | } - | -2411 | // Parse a wildcard pattern -2412 | else if (stream->next == '_') { -2413 | stream_advance(stream); -2414 | stream_skip_whitespace(stream); - | -2415 | // Add a step that matches any kind of node -2416 | array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); -2417 | } - | -2418 | // Parse a double-quoted anonymous leaf node expression -2419 | else if (stream->next == '"') { -2420 | const char *string_start = stream->input; -2421 | TSQueryError e = ts_query__parse_string_literal(self, stream); -2422 | if (e) return e; - | -2423 | // Add a step for the node -2424 | TSSymbol symbol = ts_language_symbol_for_name( -2425 | self->language, -2426 | self->string_buffer.contents, -2427 | self->string_buffer.size, -2428 | false -2429 | ); -2430 | if (!symbol) { -2431 | stream_reset(stream, string_start + 1); -2432 | return TSQueryErrorNodeType; -2433 | } -2434 | array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); -2435 | } - | -2436 | // Parse a field-prefixed pattern -2437 | else if (stream_is_ident_start(stream)) { -2438 | // Parse the field name -2439 | const char *field_name = stream->input; -2440 | stream_scan_identifier(stream); -2441 | uint32_t length = (uint32_t)(stream->input - field_name); -2442 | stream_skip_whitespace(stream); - | -2443 | if (stream->next != ':') { -2444 | stream_reset(stream, field_name); -2445 | return TSQueryErrorSyntax; -2446 | } -2447 | stream_advance(stream); -2448 | stream_skip_whitespace(stream); - | -2449 | // Parse the pattern -2450 | CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new(); -2451 | TSQueryError e = ts_query__parse_pattern( -2452 | self, -2453 | stream, -2454 | depth, -2455 | is_immediate, -2456 | &field_capture_quantifiers -2457 | ); -2458 | if (e) { -2459 | capture_quantifiers_delete(&field_capture_quantifiers); -2460 | if (e == PARENT_DONE) e = TSQueryErrorSyntax; -2461 | return e; -2462 | } - | -2463 | // Add the field name to the first step of the pattern -2464 | TSFieldId field_id = ts_language_field_id_for_name( -2465 | self->language, -2466 | field_name, -2467 | length -2468 | ); -2469 | if (!field_id) { -2470 | stream->input = field_name; -2471 | return TSQueryErrorField; -2472 | } - | -2473 | uint32_t step_index = starting_step_index; -2474 | QueryStep *step = array_get(&self->steps, step_index); -2475 | for (;;) { -2476 | step->field = field_id; -2477 | if ( -2478 | step->alternative_index != NONE && -2479 | step->alternative_index > step_index && -2480 | step->alternative_index < self->steps.size -2481 | ) { -2482 | step_index = step->alternative_index; -2483 | step = array_get(&self->steps, step_index); -2484 | } else { -2485 | break; -2486 | } -2487 | } - | -2488 | capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers); -2489 | capture_quantifiers_delete(&field_capture_quantifiers); -2490 | } - | -2491 | else { -2492 | return TSQueryErrorSyntax; -2493 | } - | -2494 | stream_skip_whitespace(stream); - | -2495 | // Parse suffixes modifiers for this pattern -2496 | TSQuantifier quantifier = TSQuantifierOne; -2497 | for (;;) { -2498 | // Parse the one-or-more operator. -2499 | if (stream->next == '+') { -2500 | quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier); - | -2501 | stream_advance(stream); -2502 | stream_skip_whitespace(stream); - | -2503 | QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); -2504 | repeat_step.alternative_index = starting_step_index; -2505 | repeat_step.is_pass_through = true; -2506 | repeat_step.alternative_is_immediate = true; -2507 | array_push(&self->steps, repeat_step); -2508 | } - | -2509 | // Parse the zero-or-more repetition operator. -2510 | else if (stream->next == '*') { -2511 | quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier); - | -2512 | stream_advance(stream); -2513 | stream_skip_whitespace(stream); - | -2514 | QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false); -2515 | repeat_step.alternative_index = starting_step_index; -2516 | repeat_step.is_pass_through = true; -2517 | repeat_step.alternative_is_immediate = true; -2518 | array_push(&self->steps, repeat_step); - | -2519 | // Stop when `step->alternative_index` is `NONE` or it points to -2520 | // `repeat_step` or beyond. Note that having just been pushed, -2521 | // `repeat_step` occupies slot `self->steps.size - 1`. -2522 | QueryStep *step = array_get(&self->steps, starting_step_index); -2523 | while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { -2524 | step = array_get(&self->steps, step->alternative_index); -2525 | } -2526 | step->alternative_index = self->steps.size; -2527 | } - | -2528 | // Parse the optional operator. -2529 | else if (stream->next == '?') { -2530 | quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier); - | -2531 | stream_advance(stream); -2532 | stream_skip_whitespace(stream); - | -2533 | QueryStep *step = array_get(&self->steps, starting_step_index); -2534 | while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { -2535 | step = array_get(&self->steps, step->alternative_index); -2536 | } -2537 | step->alternative_index = self->steps.size; -2538 | } - | -2539 | // Parse an '@'-prefixed capture pattern -2540 | else if (stream->next == '@') { -2541 | stream_advance(stream); -2542 | if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; -2543 | const char *capture_name = stream->input; -2544 | stream_scan_identifier(stream); -2545 | uint32_t length = (uint32_t)(stream->input - capture_name); -2546 | stream_skip_whitespace(stream); - | -2547 | // Add the capture id to the first step of the pattern -2548 | uint16_t capture_id = symbol_table_insert_name( -2549 | &self->captures, -2550 | capture_name, -2551 | length -2552 | ); - | -2553 | // Add the capture quantifier -2554 | capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne); - | -2555 | uint32_t step_index = starting_step_index; -2556 | for (;;) { -2557 | QueryStep *step = array_get(&self->steps, step_index); -2558 | query_step__add_capture(step, capture_id); -2559 | if ( -2560 | step->alternative_index != NONE && -2561 | step->alternative_index > step_index && -2562 | step->alternative_index < self->steps.size -2563 | ) { -2564 | step_index = step->alternative_index; -2565 | } else { -2566 | break; -2567 | } -2568 | } -2569 | } - | -2570 | // No more suffix modifiers -2571 | else { -2572 | break; -2573 | } -2574 | } - | -2575 | capture_quantifiers_mul(capture_quantifiers, quantifier); - | -2576 | return 0; -2577 | } - | -2578 | TSQuery *ts_query_new( -2579 | const TSLanguage *language, -2580 | const char *source, -2581 | uint32_t source_len, -2582 | uint32_t *error_offset, -2583 | TSQueryError *error_type -2584 | ) { -2585 | if ( -2586 | !language || -2587 | language->abi_version > TREE_SITTER_LANGUAGE_VERSION || -2588 | language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION -2589 | ) { -2590 | *error_type = TSQueryErrorLanguage; -2591 | return NULL; -2592 | } - | -2593 | TSQuery *self = ts_malloc(sizeof(TSQuery)); -2594 | *self = (TSQuery) { -2595 | .steps = array_new(), -2596 | .pattern_map = array_new(), -2597 | .captures = symbol_table_new(), -2598 | .capture_quantifiers = array_new(), -2599 | .predicate_values = symbol_table_new(), -2600 | .predicate_steps = array_new(), -2601 | .patterns = array_new(), -2602 | .step_offsets = array_new(), -2603 | .string_buffer = array_new(), -2604 | .negated_fields = array_new(), -2605 | .repeat_symbols_with_rootless_patterns = array_new(), -2606 | .wildcard_root_pattern_count = 0, -2607 | .language = ts_language_copy(language), -2608 | }; - | -2609 | array_push(&self->negated_fields, 0); - | -2610 | // Parse all of the S-expressions in the given string. -2611 | Stream stream = stream_new(source, source_len); -2612 | stream_skip_whitespace(&stream); -2613 | while (stream.input < stream.end) { -2614 | uint32_t pattern_index = self->patterns.size; -2615 | uint32_t start_step_index = self->steps.size; -2616 | uint32_t start_predicate_step_index = self->predicate_steps.size; -2617 | array_push(&self->patterns, ((QueryPattern) { -2618 | .steps = (Slice) {.offset = start_step_index}, -2619 | .predicate_steps = (Slice) {.offset = start_predicate_step_index}, -2620 | .start_byte = stream_offset(&stream), -2621 | .is_non_local = false, -2622 | })); -2623 | CaptureQuantifiers capture_quantifiers = capture_quantifiers_new(); -2624 | *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers); -2625 | array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); - | -2626 | QueryPattern *pattern = array_back(&self->patterns); -2627 | pattern->steps.length = self->steps.size - start_step_index; -2628 | pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index; -2629 | pattern->end_byte = stream_offset(&stream); - | -2630 | // If any pattern could not be parsed, then report the error information -2631 | // and terminate. -2632 | if (*error_type) { -2633 | if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax; -2634 | *error_offset = stream_offset(&stream); -2635 | capture_quantifiers_delete(&capture_quantifiers); -2636 | ts_query_delete(self); -2637 | return NULL; -2638 | } - | -2639 | // Maintain a list of capture quantifiers for each pattern -2640 | array_push(&self->capture_quantifiers, capture_quantifiers); - | -2641 | // Maintain a map that can look up patterns for a given root symbol. -2642 | uint16_t wildcard_root_alternative_index = NONE; -2643 | for (;;) { -2644 | QueryStep *step = array_get(&self->steps, start_step_index); - | -2645 | // If a pattern has a wildcard at its root, but it has a non-wildcard child, -2646 | // then optimize the matching process by skipping matching the wildcard. -2647 | // Later, during the matching process, the query cursor will check that -2648 | // there is a parent node, and capture it if necessary. -2649 | if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) { -2650 | QueryStep *second_step = array_get(&self->steps, start_step_index + 1); -2651 | if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1 && !second_step->is_immediate) { -2652 | wildcard_root_alternative_index = step->alternative_index; -2653 | start_step_index += 1; -2654 | step = second_step; -2655 | } -2656 | } - | -2657 | // Determine whether the pattern has a single root node. This affects -2658 | // decisions about whether or not to start matching the pattern when -2659 | // a query cursor has a range restriction or when immediately within an -2660 | // error node. -2661 | uint32_t start_depth = step->depth; -2662 | bool is_rooted = start_depth == 0; -2663 | for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { -2664 | QueryStep *child_step = array_get(&self->steps, step_index); -2665 | if (child_step->is_dead_end) break; -2666 | if (child_step->depth == start_depth) { -2667 | is_rooted = false; -2668 | break; -2669 | } -2670 | } - | -2671 | ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { -2672 | .step_index = start_step_index, -2673 | .pattern_index = pattern_index, -2674 | .is_rooted = is_rooted -2675 | }); -2676 | if (step->symbol == WILDCARD_SYMBOL) { -2677 | self->wildcard_root_pattern_count++; -2678 | } - | -2679 | // If there are alternatives or options at the root of the pattern, -2680 | // then add multiple entries to the pattern map. -2681 | if (step->alternative_index != NONE) { -2682 | start_step_index = step->alternative_index; -2683 | } else if (wildcard_root_alternative_index != NONE) { -2684 | start_step_index = wildcard_root_alternative_index; -2685 | wildcard_root_alternative_index = NONE; -2686 | } else { -2687 | break; -2688 | } -2689 | } -2690 | } - | -2691 | if (!ts_query__analyze_patterns(self, error_offset)) { -2692 | *error_type = TSQueryErrorStructure; -2693 | ts_query_delete(self); -2694 | return NULL; -2695 | } - | -2696 | array_delete(&self->string_buffer); -2697 | return self; -2698 | } - | -2699 | void ts_query_delete(TSQuery *self) { -2700 | if (self) { -2701 | array_delete(&self->steps); -2702 | array_delete(&self->pattern_map); -2703 | array_delete(&self->predicate_steps); -2704 | array_delete(&self->patterns); -2705 | array_delete(&self->step_offsets); -2706 | array_delete(&self->string_buffer); -2707 | array_delete(&self->negated_fields); -2708 | array_delete(&self->repeat_symbols_with_rootless_patterns); -2709 | ts_language_delete(self->language); -2710 | symbol_table_delete(&self->captures); -2711 | symbol_table_delete(&self->predicate_values); -2712 | for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { -2713 | CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index); -2714 | capture_quantifiers_delete(capture_quantifiers); -2715 | } -2716 | array_delete(&self->capture_quantifiers); -2717 | ts_free(self); -2718 | } -2719 | } - | -2720 | uint32_t ts_query_pattern_count(const TSQuery *self) { -2721 | return self->patterns.size; -2722 | } - | -2723 | uint32_t ts_query_capture_count(const TSQuery *self) { -2724 | return self->captures.slices.size; -2725 | } - | -2726 | uint32_t ts_query_string_count(const TSQuery *self) { -2727 | return self->predicate_values.slices.size; -2728 | } - | -2729 | const char *ts_query_capture_name_for_id( -2730 | const TSQuery *self, -2731 | uint32_t index, -2732 | uint32_t *length -2733 | ) { -2734 | return symbol_table_name_for_id(&self->captures, index, length); -2735 | } - | -2736 | TSQuantifier ts_query_capture_quantifier_for_id( -2737 | const TSQuery *self, -2738 | uint32_t pattern_index, -2739 | uint32_t capture_index -2740 | ) { -2741 | CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index); -2742 | return capture_quantifier_for_id(capture_quantifiers, capture_index); -2743 | } - | -2744 | const char *ts_query_string_value_for_id( -2745 | const TSQuery *self, -2746 | uint32_t index, -2747 | uint32_t *length -2748 | ) { -2749 | return symbol_table_name_for_id(&self->predicate_values, index, length); -2750 | } - | -2751 | const TSQueryPredicateStep *ts_query_predicates_for_pattern( -2752 | const TSQuery *self, -2753 | uint32_t pattern_index, -2754 | uint32_t *step_count -2755 | ) { -2756 | Slice slice = array_get(&self->patterns, pattern_index)->predicate_steps; -2757 | *step_count = slice.length; -2758 | if (slice.length == 0) return NULL; -2759 | return array_get(&self->predicate_steps, slice.offset); -2760 | } - | -2761 | uint32_t ts_query_start_byte_for_pattern( -2762 | const TSQuery *self, -2763 | uint32_t pattern_index -2764 | ) { -2765 | return array_get(&self->patterns, pattern_index)->start_byte; -2766 | } - | -2767 | uint32_t ts_query_end_byte_for_pattern( -2768 | const TSQuery *self, -2769 | uint32_t pattern_index -2770 | ) { -2771 | return array_get(&self->patterns, pattern_index)->end_byte; -2772 | } - | -2773 | bool ts_query_is_pattern_rooted( -2774 | const TSQuery *self, -2775 | uint32_t pattern_index -2776 | ) { -2777 | for (unsigned i = 0; i < self->pattern_map.size; i++) { -2778 | PatternEntry *entry = array_get(&self->pattern_map, i); -2779 | if (entry->pattern_index == pattern_index) { -2780 | if (!entry->is_rooted) return false; -2781 | } -2782 | } -2783 | return true; -2784 | } - | -2785 | bool ts_query_is_pattern_non_local( -2786 | const TSQuery *self, -2787 | uint32_t pattern_index -2788 | ) { -2789 | if (pattern_index < self->patterns.size) { -2790 | return array_get(&self->patterns, pattern_index)->is_non_local; -2791 | } else { -2792 | return false; -2793 | } -2794 | } - | -2795 | bool ts_query_is_pattern_guaranteed_at_step( -2796 | const TSQuery *self, -2797 | uint32_t byte_offset -2798 | ) { -2799 | uint32_t step_index = UINT32_MAX; -2800 | for (unsigned i = 0; i < self->step_offsets.size; i++) { -2801 | StepOffset *step_offset = array_get(&self->step_offsets, i); -2802 | if (step_offset->byte_offset > byte_offset) break; -2803 | step_index = step_offset->step_index; -2804 | } -2805 | if (step_index < self->steps.size) { -2806 | return array_get(&self->steps, step_index)->root_pattern_guaranteed; -2807 | } else { -2808 | return false; -2809 | } -2810 | } - | -2811 | bool ts_query__step_is_fallible( -2812 | const TSQuery *self, -2813 | uint16_t step_index -2814 | ) { -2815 | ts_assert((uint32_t)step_index + 1 < self->steps.size); -2816 | QueryStep *step = array_get(&self->steps, step_index); -2817 | QueryStep *next_step = array_get(&self->steps, step_index + 1); -2818 | return ( -2819 | next_step->depth != PATTERN_DONE_MARKER && -2820 | next_step->depth > step->depth && -2821 | (!next_step->parent_pattern_guaranteed || step->symbol == WILDCARD_SYMBOL) -2822 | ); -2823 | } - | -2824 | void ts_query_disable_capture( -2825 | TSQuery *self, -2826 | const char *name, -2827 | uint32_t length -2828 | ) { -2829 | // Remove capture information for any pattern step that previously -2830 | // captured with the given name. -2831 | int id = symbol_table_id_for_name(&self->captures, name, length); -2832 | if (id != -1) { -2833 | for (unsigned i = 0; i < self->steps.size; i++) { -2834 | QueryStep *step = array_get(&self->steps, i); -2835 | query_step__remove_capture(step, id); -2836 | } -2837 | } -2838 | } - | -2839 | void ts_query_disable_pattern( -2840 | TSQuery *self, -2841 | uint32_t pattern_index -2842 | ) { -2843 | // Remove the given pattern from the pattern map. Its steps will still -2844 | // be in the `steps` array, but they will never be read. -2845 | for (unsigned i = 0; i < self->pattern_map.size; i++) { -2846 | PatternEntry *pattern = array_get(&self->pattern_map, i); -2847 | if (pattern->pattern_index == pattern_index) { -2848 | array_erase(&self->pattern_map, i); -2849 | i--; -2850 | } -2851 | } -2852 | } - | -2853 | /*************** -2854 | * QueryCursor -2855 | ***************/ - | -2856 | TSQueryCursor *ts_query_cursor_new(void) { -2857 | TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); -2858 | *self = (TSQueryCursor) { -2859 | .did_exceed_match_limit = false, -2860 | .ascending = false, -2861 | .halted = false, -2862 | .states = array_new(), -2863 | .finished_states = array_new(), -2864 | .capture_list_pool = capture_list_pool_new(), -2865 | .start_byte = 0, -2866 | .end_byte = UINT32_MAX, -2867 | .start_point = {0, 0}, -2868 | .end_point = POINT_MAX, -2869 | .max_start_depth = UINT32_MAX, -2870 | .operation_count = 0, -2871 | }; -2872 | array_reserve(&self->states, 8); -2873 | array_reserve(&self->finished_states, 8); -2874 | return self; -2875 | } - | -2876 | void ts_query_cursor_delete(TSQueryCursor *self) { -2877 | array_delete(&self->states); -2878 | array_delete(&self->finished_states); -2879 | ts_tree_cursor_delete(&self->cursor); -2880 | capture_list_pool_delete(&self->capture_list_pool); -2881 | ts_free(self); -2882 | } - | -2883 | bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { -2884 | return self->did_exceed_match_limit; -2885 | } - | -2886 | uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { -2887 | return self->capture_list_pool.max_capture_list_count; -2888 | } - | -2889 | void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { -2890 | self->capture_list_pool.max_capture_list_count = limit; -2891 | } - | -2892 | #ifdef DEBUG_EXECUTE_QUERY -2893 | #define LOG(...) fprintf(stderr, __VA_ARGS__) -2894 | #else -2895 | #define LOG(...) -2896 | #endif - | -2897 | void ts_query_cursor_exec( -2898 | TSQueryCursor *self, -2899 | const TSQuery *query, -2900 | TSNode node -2901 | ) { -2902 | if (query) { -2903 | LOG("query steps:\n"); -2904 | for (unsigned i = 0; i < query->steps.size; i++) { -2905 | QueryStep *step = array_get(&query->steps, i); -2906 | LOG(" %u: {", i); -2907 | if (step->depth == PATTERN_DONE_MARKER) { -2908 | LOG("DONE"); -2909 | } else if (step->is_dead_end) { -2910 | LOG("dead_end"); -2911 | } else if (step->is_pass_through) { -2912 | LOG("pass_through"); -2913 | } else if (step->symbol != WILDCARD_SYMBOL) { -2914 | LOG("symbol: %s", query->language->symbol_names[step->symbol]); -2915 | } else { -2916 | LOG("symbol: *"); -2917 | } -2918 | if (step->field) { -2919 | LOG(", field: %s", query->language->field_names[step->field]); -2920 | } -2921 | if (step->alternative_index != NONE) { -2922 | LOG(", alternative: %u", step->alternative_index); -2923 | } -2924 | LOG("},\n"); -2925 | } -2926 | } - | -2927 | array_clear(&self->states); -2928 | array_clear(&self->finished_states); -2929 | ts_tree_cursor_reset(&self->cursor, node); -2930 | capture_list_pool_reset(&self->capture_list_pool); -2931 | self->on_visible_node = true; -2932 | self->next_state_id = 0; -2933 | self->depth = 0; -2934 | self->ascending = false; -2935 | self->halted = false; -2936 | self->query = query; -2937 | self->did_exceed_match_limit = false; -2938 | self->operation_count = 0; -2939 | self->query_options = NULL; -2940 | self->query_state = (TSQueryCursorState) {0}; -2941 | } - | -2942 | void ts_query_cursor_exec_with_options( -2943 | TSQueryCursor *self, -2944 | const TSQuery *query, -2945 | TSNode node, -2946 | const TSQueryCursorOptions *query_options -2947 | ) { -2948 | ts_query_cursor_exec(self, query, node); -2949 | if (query_options) { -2950 | self->query_options = query_options; -2951 | self->query_state = (TSQueryCursorState) { -2952 | .payload = query_options->payload -2953 | }; -2954 | } -2955 | } - | -2956 | bool ts_query_cursor_set_byte_range( -2957 | TSQueryCursor *self, -2958 | uint32_t start_byte, -2959 | uint32_t end_byte -2960 | ) { -2961 | if (end_byte == 0) { -2962 | end_byte = UINT32_MAX; -2963 | } -2964 | if (start_byte > end_byte) { -2965 | return false; -2966 | } -2967 | self->start_byte = start_byte; -2968 | self->end_byte = end_byte; -2969 | return true; -2970 | } - | -2971 | bool ts_query_cursor_set_point_range( -2972 | TSQueryCursor *self, -2973 | TSPoint start_point, -2974 | TSPoint end_point -2975 | ) { -2976 | if (end_point.row == 0 && end_point.column == 0) { -2977 | end_point = POINT_MAX; -2978 | } -2979 | if (point_gt(start_point, end_point)) { -2980 | return false; -2981 | } -2982 | self->start_point = start_point; -2983 | self->end_point = end_point; -2984 | return true; -2985 | } - | -2986 | // Search through all of the in-progress states, and find the captured -2987 | // node that occurs earliest in the document. -2988 | static bool ts_query_cursor__first_in_progress_capture( -2989 | TSQueryCursor *self, -2990 | uint32_t *state_index, -2991 | uint32_t *byte_offset, -2992 | uint32_t *pattern_index, -2993 | bool *is_definite -2994 | ) { -2995 | bool result = false; -2996 | *state_index = UINT32_MAX; -2997 | *byte_offset = UINT32_MAX; -2998 | *pattern_index = UINT32_MAX; -2999 | for (unsigned i = 0; i < self->states.size; i++) { -3000 | QueryState *state = array_get(&self->states, i); -3001 | if (state->dead) continue; - | -3002 | const CaptureList *captures = capture_list_pool_get( -3003 | &self->capture_list_pool, -3004 | state->capture_list_id -3005 | ); -3006 | if (state->consumed_capture_count >= captures->size) { -3007 | continue; -3008 | } - | -3009 | TSNode node = array_get(captures, state->consumed_capture_count)->node; -3010 | if ( -3011 | ts_node_end_byte(node) <= self->start_byte || -3012 | point_lte(ts_node_end_point(node), self->start_point) -3013 | ) { -3014 | state->consumed_capture_count++; -3015 | i--; -3016 | continue; -3017 | } - | -3018 | uint32_t node_start_byte = ts_node_start_byte(node); -3019 | if ( -3020 | !result || -3021 | node_start_byte < *byte_offset || -3022 | (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) -3023 | ) { -3024 | QueryStep *step = array_get(&self->query->steps, state->step_index); -3025 | if (is_definite) { -3026 | // We're being a bit conservative here by asserting that the following step -3027 | // is not immediate, because this capture might end up being discarded if the -3028 | // following symbol in the tree isn't the required symbol for this step. -3029 | *is_definite = step->root_pattern_guaranteed && !step->is_immediate; -3030 | } else if (step->root_pattern_guaranteed) { -3031 | continue; -3032 | } - | -3033 | result = true; -3034 | *state_index = i; -3035 | *byte_offset = node_start_byte; -3036 | *pattern_index = state->pattern_index; -3037 | } -3038 | } -3039 | return result; -3040 | } - | -3041 | // Determine which node is first in a depth-first traversal -3042 | int ts_query_cursor__compare_nodes(TSNode left, TSNode right) { -3043 | if (left.id != right.id) { -3044 | uint32_t left_start = ts_node_start_byte(left); -3045 | uint32_t right_start = ts_node_start_byte(right); -3046 | if (left_start < right_start) return -1; -3047 | if (left_start > right_start) return 1; -3048 | uint32_t left_node_count = ts_node_end_byte(left); -3049 | uint32_t right_node_count = ts_node_end_byte(right); -3050 | if (left_node_count > right_node_count) return -1; -3051 | if (left_node_count < right_node_count) return 1; -3052 | } -3053 | return 0; -3054 | } - | -3055 | // Determine if either state contains a superset of the other state's captures. -3056 | void ts_query_cursor__compare_captures( -3057 | TSQueryCursor *self, -3058 | QueryState *left_state, -3059 | QueryState *right_state, -3060 | bool *left_contains_right, -3061 | bool *right_contains_left -3062 | ) { -3063 | const CaptureList *left_captures = capture_list_pool_get( -3064 | &self->capture_list_pool, -3065 | left_state->capture_list_id -3066 | ); -3067 | const CaptureList *right_captures = capture_list_pool_get( -3068 | &self->capture_list_pool, -3069 | right_state->capture_list_id -3070 | ); -3071 | *left_contains_right = true; -3072 | *right_contains_left = true; -3073 | unsigned i = 0, j = 0; -3074 | for (;;) { -3075 | if (i < left_captures->size) { -3076 | if (j < right_captures->size) { -3077 | TSQueryCapture *left = array_get(left_captures, i); -3078 | TSQueryCapture *right = array_get(right_captures, j); -3079 | if (left->node.id == right->node.id && left->index == right->index) { -3080 | i++; -3081 | j++; -3082 | } else { -3083 | switch (ts_query_cursor__compare_nodes(left->node, right->node)) { -3084 | case -1: -3085 | *right_contains_left = false; -3086 | i++; -3087 | break; -3088 | case 1: -3089 | *left_contains_right = false; -3090 | j++; -3091 | break; -3092 | default: -3093 | *right_contains_left = false; -3094 | *left_contains_right = false; -3095 | i++; -3096 | j++; -3097 | break; -3098 | } -3099 | } -3100 | } else { -3101 | *right_contains_left = false; -3102 | break; -3103 | } -3104 | } else { -3105 | if (j < right_captures->size) { -3106 | *left_contains_right = false; -3107 | } -3108 | break; -3109 | } -3110 | } -3111 | } - | -3112 | static void ts_query_cursor__add_state( -3113 | TSQueryCursor *self, -3114 | const PatternEntry *pattern -3115 | ) { -3116 | QueryStep *step = array_get(&self->query->steps, pattern->step_index); -3117 | uint32_t start_depth = self->depth - step->depth; - | -3118 | // Keep the states array in ascending order of start_depth and pattern_index, -3119 | // so that it can be processed more efficiently elsewhere. Usually, there is -3120 | // no work to do here because of two facts: -3121 | // * States with lower start_depth are naturally added first due to the -3122 | // order in which nodes are visited. -3123 | // * Earlier patterns are naturally added first because of the ordering of the -3124 | // pattern_map data structure that's used to initiate matches. -3125 | // -3126 | // This loop is only needed in cases where two conditions hold: -3127 | // * A pattern consists of more than one sibling node, so that its states -3128 | // remain in progress after exiting the node that started the match. -3129 | // * The first node in the pattern matches against multiple nodes at the -3130 | // same depth. -3131 | // -3132 | // An example of this is the pattern '((comment)* (function))'. If multiple -3133 | // `comment` nodes appear in a row, then we may initiate a new state for this -3134 | // pattern while another state for the same pattern is already in progress. -3135 | // If there are multiple patterns like this in a query, then this loop will -3136 | // need to execute in order to keep the states ordered by pattern_index. -3137 | uint32_t index = self->states.size; -3138 | while (index > 0) { -3139 | QueryState *prev_state = array_get(&self->states, index - 1); -3140 | if (prev_state->start_depth < start_depth) break; -3141 | if (prev_state->start_depth == start_depth) { -3142 | // Avoid inserting an unnecessary duplicate state, which would be -3143 | // immediately pruned by the longest-match criteria. -3144 | if ( -3145 | prev_state->pattern_index == pattern->pattern_index && -3146 | prev_state->step_index == pattern->step_index -3147 | ) return; -3148 | if (prev_state->pattern_index <= pattern->pattern_index) break; -3149 | } -3150 | index--; -3151 | } - | -3152 | LOG( -3153 | " start state. pattern:%u, step:%u\n", -3154 | pattern->pattern_index, -3155 | pattern->step_index -3156 | ); -3157 | array_insert(&self->states, index, ((QueryState) { -3158 | .id = UINT32_MAX, -3159 | .capture_list_id = NONE, -3160 | .step_index = pattern->step_index, -3161 | .pattern_index = pattern->pattern_index, -3162 | .start_depth = start_depth, -3163 | .consumed_capture_count = 0, -3164 | .seeking_immediate_match = true, -3165 | .has_in_progress_alternatives = false, -3166 | .needs_parent = step->depth == 1, -3167 | .dead = false, -3168 | })); -3169 | } - | -3170 | // Acquire a capture list for this state. If there are no capture lists left in the -3171 | // pool, this will steal the capture list from another existing state, and mark that -3172 | // other state as 'dead'. -3173 | static CaptureList *ts_query_cursor__prepare_to_capture( -3174 | TSQueryCursor *self, -3175 | QueryState *state, -3176 | unsigned state_index_to_preserve -3177 | ) { -3178 | if (state->capture_list_id == NONE) { -3179 | state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool); - | -3180 | // If there are no capture lists left in the pool, then terminate whichever -3181 | // state has captured the earliest node in the document, and steal its -3182 | // capture list. -3183 | if (state->capture_list_id == NONE) { -3184 | self->did_exceed_match_limit = true; -3185 | uint32_t state_index, byte_offset, pattern_index; -3186 | if ( -3187 | ts_query_cursor__first_in_progress_capture( -3188 | self, -3189 | &state_index, -3190 | &byte_offset, -3191 | &pattern_index, -3192 | NULL -3193 | ) && -3194 | state_index != state_index_to_preserve -3195 | ) { -3196 | LOG( -3197 | " abandon state. index:%u, pattern:%u, offset:%u.\n", -3198 | state_index, pattern_index, byte_offset -3199 | ); -3200 | QueryState *other_state = array_get(&self->states, state_index); -3201 | state->capture_list_id = other_state->capture_list_id; -3202 | other_state->capture_list_id = NONE; -3203 | other_state->dead = true; -3204 | CaptureList *list = capture_list_pool_get_mut( -3205 | &self->capture_list_pool, -3206 | state->capture_list_id -3207 | ); -3208 | array_clear(list); -3209 | return list; -3210 | } else { -3211 | LOG(" ran out of capture lists"); -3212 | return NULL; -3213 | } -3214 | } -3215 | } -3216 | return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id); -3217 | } - | -3218 | static void ts_query_cursor__capture( -3219 | TSQueryCursor *self, -3220 | QueryState *state, -3221 | QueryStep *step, -3222 | TSNode node -3223 | ) { -3224 | if (state->dead) return; -3225 | CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX); -3226 | if (!capture_list) { -3227 | state->dead = true; -3228 | return; -3229 | } - | -3230 | for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { -3231 | uint16_t capture_id = step->capture_ids[j]; -3232 | if (step->capture_ids[j] == NONE) break; -3233 | array_push(capture_list, ((TSQueryCapture) { node, capture_id })); -3234 | LOG( -3235 | " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n", -3236 | ts_node_type(node), -3237 | state->pattern_index, -3238 | capture_id, -3239 | capture_list->size -3240 | ); -3241 | } -3242 | } - | -3243 | // Duplicate the given state and insert the newly-created state immediately after -3244 | // the given state in the `states` array. Ensures that the given state reference is -3245 | // still valid, even if the states array is reallocated. -3246 | static QueryState *ts_query_cursor__copy_state( -3247 | TSQueryCursor *self, -3248 | QueryState **state_ref -3249 | ) { -3250 | const QueryState *state = *state_ref; -3251 | uint32_t state_index = (uint32_t)(state - self->states.contents); -3252 | QueryState copy = *state; -3253 | copy.capture_list_id = NONE; - | -3254 | // If the state has captures, copy its capture list. -3255 | if (state->capture_list_id != NONE) { -3256 | CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, ©, state_index); -3257 | if (!new_captures) return NULL; -3258 | const CaptureList *old_captures = capture_list_pool_get( -3259 | &self->capture_list_pool, -3260 | state->capture_list_id -3261 | ); -3262 | array_push_all(new_captures, old_captures); -3263 | } - | -3264 | array_insert(&self->states, state_index + 1, copy); -3265 | *state_ref = array_get(&self->states, state_index); -3266 | return array_get(&self->states, state_index + 1); -3267 | } - | -3268 | static inline bool ts_query_cursor__should_descend( -3269 | TSQueryCursor *self, -3270 | bool node_intersects_range -3271 | ) { - | -3272 | if (node_intersects_range && self->depth < self->max_start_depth) { -3273 | return true; -3274 | } - | -3275 | // If there are in-progress matches whose remaining steps occur -3276 | // deeper in the tree, then descend. -3277 | for (unsigned i = 0; i < self->states.size; i++) { -3278 | QueryState *state = array_get(&self->states, i); -3279 | QueryStep *next_step = array_get(&self->query->steps, state->step_index); -3280 | if ( -3281 | next_step->depth != PATTERN_DONE_MARKER && -3282 | state->start_depth + next_step->depth > self->depth -3283 | ) { -3284 | return true; -3285 | } -3286 | } - | -3287 | if (self->depth >= self->max_start_depth) { -3288 | return false; -3289 | } - | -3290 | // If the current node is hidden, then a non-rooted pattern might match -3291 | // one if its roots inside of this node, and match another of its roots -3292 | // as part of a sibling node, so we may need to descend. -3293 | if (!self->on_visible_node) { -3294 | // Descending into a repetition node outside of the range can be -3295 | // expensive, because these nodes can have many visible children. -3296 | // Avoid descending into repetition nodes unless we have already -3297 | // determined that this query can match rootless patterns inside -3298 | // of this type of repetition node. -3299 | Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor); -3300 | if (ts_subtree_is_repetition(subtree)) { -3301 | bool exists; -3302 | uint32_t index; -3303 | array_search_sorted_by( -3304 | &self->query->repeat_symbols_with_rootless_patterns,, -3305 | ts_subtree_symbol(subtree), -3306 | &index, -3307 | &exists -3308 | ); -3309 | return exists; -3310 | } - | -3311 | return true; -3312 | } - | -3313 | return false; -3314 | } - | -3315 | // Walk the tree, processing patterns until at least one pattern finishes, -3316 | // If one or more patterns finish, return `true` and store their states in the -3317 | // `finished_states` array. Multiple patterns can finish on the same node. If -3318 | // there are no more matches, return `false`. -3319 | static inline bool ts_query_cursor__advance( -3320 | TSQueryCursor *self, -3321 | bool stop_on_definite_step -3322 | ) { -3323 | bool did_match = false; -3324 | for (;;) { -3325 | if (self->halted) { -3326 | while (self->states.size > 0) { -3327 | QueryState state = array_pop(&self->states); -3328 | capture_list_pool_release( -3329 | &self->capture_list_pool, -3330 | state.capture_list_id -3331 | ); -3332 | } -3333 | } - | -3334 | if (++self->operation_count == OP_COUNT_PER_QUERY_CALLBACK_CHECK) { -3335 | self->operation_count = 0; -3336 | } - | -3337 | if (self->query_options && self->query_options->progress_callback) { -3338 | self->query_state.current_byte_offset = ts_node_start_byte(ts_tree_cursor_current_node(&self->cursor)); -3339 | } -3340 | if ( -3341 | did_match || -3342 | self->halted || -3343 | ( -3344 | self->operation_count == 0 && -3345 | ( -3346 | (self->query_options && self->query_options->progress_callback && self->query_options->progress_callback(&self->query_state)) -3347 | ) -3348 | ) -3349 | ) { -3350 | return did_match; -3351 | } - | -3352 | // Exit the current node. -3353 | if (self->ascending) { -3354 | if (self->on_visible_node) { -3355 | LOG( -3356 | "leave node. depth:%u, type:%s\n", -3357 | self->depth, -3358 | ts_node_type(ts_tree_cursor_current_node(&self->cursor)) -3359 | ); - | -3360 | // After leaving a node, remove any states that cannot make further progress. -3361 | uint32_t deleted_count = 0; -3362 | for (unsigned i = 0, n = self->states.size; i < n; i++) { -3363 | QueryState *state = array_get(&self->states, i); -3364 | QueryStep *step = array_get(&self->query->steps, state->step_index); - | -3365 | // If a state completed its pattern inside of this node, but was deferred from finishing -3366 | // in order to search for longer matches, mark it as finished. -3367 | if ( -3368 | step->depth == PATTERN_DONE_MARKER && -3369 | (state->start_depth > self->depth || self->depth == 0) -3370 | ) { -3371 | LOG(" finish pattern %u\n", state->pattern_index); -3372 | array_push(&self->finished_states, *state); -3373 | did_match = true; -3374 | deleted_count++; -3375 | } - | -3376 | // If a state needed to match something within this node, then remove that state -3377 | // as it has failed to match. -3378 | else if ( -3379 | step->depth != PATTERN_DONE_MARKER && -3380 | (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth -3381 | ) { -3382 | LOG( -3383 | " failed to match. pattern:%u, step:%u\n", -3384 | state->pattern_index, -3385 | state->step_index -3386 | ); -3387 | capture_list_pool_release( -3388 | &self->capture_list_pool, -3389 | state->capture_list_id -3390 | ); -3391 | deleted_count++; -3392 | } - | -3393 | else if (deleted_count > 0) { -3394 | *array_get(&self->states, i - deleted_count) = *state; -3395 | } -3396 | } -3397 | self->states.size -= deleted_count; -3398 | } - | -3399 | // Leave this node by stepping to its next sibling or to its parent. -3400 | switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { -3401 | case TreeCursorStepVisible: -3402 | if (!self->on_visible_node) { -3403 | self->depth++; -3404 | self->on_visible_node = true; -3405 | } -3406 | self->ascending = false; -3407 | break; -3408 | case TreeCursorStepHidden: -3409 | if (self->on_visible_node) { -3410 | self->depth--; -3411 | self->on_visible_node = false; -3412 | } -3413 | self->ascending = false; -3414 | break; -3415 | default: -3416 | if (ts_tree_cursor_goto_parent(&self->cursor)) { -3417 | self->depth--; -3418 | } else { -3419 | LOG("halt at root\n"); -3420 | self->halted = true; -3421 | } -3422 | } -3423 | } - | -3424 | // Enter a new node. -3425 | else { -3426 | // Get the properties of the current node. -3427 | TSNode node = ts_tree_cursor_current_node(&self->cursor); -3428 | TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); - | -3429 | uint32_t start_byte = ts_node_start_byte(node); -3430 | uint32_t end_byte = ts_node_end_byte(node); -3431 | TSPoint start_point = ts_node_start_point(node); -3432 | TSPoint end_point = ts_node_end_point(node); -3433 | bool is_empty = start_byte == end_byte; - | -3434 | bool parent_precedes_range = !ts_node_is_null(parent_node) && ( -3435 | ts_node_end_byte(parent_node) <= self->start_byte || -3436 | point_lte(ts_node_end_point(parent_node), self->start_point) -3437 | ); -3438 | bool parent_follows_range = !ts_node_is_null(parent_node) && ( -3439 | ts_node_start_byte(parent_node) >= self->end_byte || -3440 | point_gte(ts_node_start_point(parent_node), self->end_point) -3441 | ); -3442 | bool node_precedes_range = -3443 | parent_precedes_range || -3444 | end_byte < self->start_byte || -3445 | point_lt(end_point, self->start_point) || -3446 | (!is_empty && end_byte == self->start_byte) || -3447 | (!is_empty && point_eq(end_point, self->start_point)); - | -3448 | bool node_follows_range = parent_follows_range || ( -3449 | start_byte >= self->end_byte || -3450 | point_gte(start_point, self->end_point) -3451 | ); -3452 | bool parent_intersects_range = !parent_precedes_range && !parent_follows_range; -3453 | bool node_intersects_range = !node_precedes_range && !node_follows_range; - | -3454 | if (self->on_visible_node) { -3455 | TSSymbol symbol = ts_node_symbol(node); -3456 | bool is_named = ts_node_is_named(node); -3457 | bool is_missing = ts_node_is_missing(node); -3458 | bool has_later_siblings; -3459 | bool has_later_named_siblings; -3460 | bool can_have_later_siblings_with_this_field; -3461 | TSFieldId field_id = 0; -3462 | TSSymbol supertypes[8] = {0}; -3463 | unsigned supertype_count = 8; -3464 | ts_tree_cursor_current_status( -3465 | &self->cursor, -3466 | &field_id, -3467 | &has_later_siblings, -3468 | &has_later_named_siblings, -3469 | &can_have_later_siblings_with_this_field, -3470 | supertypes, -3471 | &supertype_count -3472 | ); -3473 | LOG( -3474 | "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n", -3475 | self->depth, -3476 | ts_node_type(node), -3477 | ts_language_field_name_for_id(self->query->language, field_id), -3478 | ts_node_start_point(node).row, -3479 | self->states.size, -3480 | self->finished_states.size -3481 | ); - | -3482 | bool node_is_error = symbol == ts_builtin_sym_error; -3483 | bool parent_is_error = -3484 | !ts_node_is_null(parent_node) && -3485 | ts_node_symbol(parent_node) == ts_builtin_sym_error; - | -3486 | // Add new states for any patterns whose root node is a wildcard. -3487 | if (!node_is_error) { -3488 | for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { -3489 | PatternEntry *pattern = array_get(&self->query->pattern_map, i); - | -3490 | // If this node matches the first step of the pattern, then add a new -3491 | // state at the start of this pattern. -3492 | QueryStep *step = array_get(&self->query->steps, pattern->step_index); -3493 | uint32_t start_depth = self->depth - step->depth; -3494 | if ( -3495 | (pattern->is_rooted ? -3496 | node_intersects_range : -3497 | (parent_intersects_range && !parent_is_error)) && -3498 | (!step->field || field_id == step->field) && -3499 | (!step->supertype_symbol || supertype_count > 0) && -3500 | (start_depth <= self->max_start_depth) -3501 | ) { -3502 | ts_query_cursor__add_state(self, pattern); -3503 | } -3504 | } -3505 | } - | -3506 | // Add new states for any patterns whose root node matches this node. -3507 | unsigned i; -3508 | if (ts_query__pattern_map_search(self->query, symbol, &i)) { -3509 | PatternEntry *pattern = array_get(&self->query->pattern_map, i); - | -3510 | QueryStep *step = array_get(&self->query->steps, pattern->step_index); -3511 | uint32_t start_depth = self->depth - step->depth; -3512 | do { -3513 | // If this node matches the first step of the pattern, then add a new -3514 | // state at the start of this pattern. -3515 | if ( -3516 | (pattern->is_rooted ? -3517 | node_intersects_range : -3518 | (parent_intersects_range && !parent_is_error)) && -3519 | (!step->field || field_id == step->field) && -3520 | (start_depth <= self->max_start_depth) -3521 | ) { -3522 | ts_query_cursor__add_state(self, pattern); -3523 | } - | -3524 | // Advance to the next pattern whose root node matches this node. -3525 | i++; -3526 | if (i == self->query->pattern_map.size) break; -3527 | pattern = array_get(&self->query->pattern_map, i); -3528 | step = array_get(&self->query->steps, pattern->step_index); -3529 | } while (step->symbol == symbol); -3530 | } - | -3531 | // Update all of the in-progress states with current node. -3532 | for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { -3533 | QueryState *state = array_get(&self->states, j); -3534 | QueryStep *step = array_get(&self->query->steps, state->step_index); -3535 | state->has_in_progress_alternatives = false; -3536 | copy_count = 0; - | -3537 | // Check that the node matches all of the criteria for the next -3538 | // step of the pattern. -3539 | if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; - | -3540 | // Determine if this node matches this step of the pattern, and also -3541 | // if this node can have later siblings that match this step of the -3542 | // pattern. -3543 | bool node_does_match = false; -3544 | if (step->symbol == WILDCARD_SYMBOL) { -3545 | if (step->is_missing) { -3546 | node_does_match = is_missing; -3547 | } else { -3548 | node_does_match = !node_is_error && (is_named || !step->is_named); -3549 | } -3550 | } else { -3551 | node_does_match = symbol == step->symbol && (!step->is_missing || is_missing); -3552 | } -3553 | bool later_sibling_can_match = has_later_siblings; -3554 | if ((step->is_immediate && is_named) || state->seeking_immediate_match) { -3555 | later_sibling_can_match = false; -3556 | } -3557 | if (step->is_last_child && has_later_named_siblings) { -3558 | node_does_match = false; -3559 | } -3560 | if (step->supertype_symbol) { -3561 | bool has_supertype = false; -3562 | for (unsigned k = 0; k < supertype_count; k++) { -3563 | if (supertypes[k] == step->supertype_symbol) { -3564 | has_supertype = true; -3565 | break; -3566 | } -3567 | } -3568 | if (!has_supertype) node_does_match = false; -3569 | } -3570 | if (step->field) { -3571 | if (step->field == field_id) { -3572 | if (!can_have_later_siblings_with_this_field) { -3573 | later_sibling_can_match = false; -3574 | } -3575 | } else { -3576 | node_does_match = false; -3577 | } -3578 | } - | -3579 | if (step->negated_field_list_id) { -3580 | TSFieldId *negated_field_ids = array_get(&self->query->negated_fields, step->negated_field_list_id); -3581 | for (;;) { -3582 | TSFieldId negated_field_id = *negated_field_ids; -3583 | if (negated_field_id) { -3584 | negated_field_ids++; -3585 | if (ts_node_child_by_field_id(node, negated_field_id).id) { -3586 | node_does_match = false; -3587 | break; -3588 | } -3589 | } else { -3590 | break; -3591 | } -3592 | } -3593 | } - | -3594 | // Remove states immediately if it is ever clear that they cannot match. -3595 | if (!node_does_match) { -3596 | if (!later_sibling_can_match) { -3597 | LOG( -3598 | " discard state. pattern:%u, step:%u\n", -3599 | state->pattern_index, -3600 | state->step_index -3601 | ); -3602 | capture_list_pool_release( -3603 | &self->capture_list_pool, -3604 | state->capture_list_id -3605 | ); -3606 | array_erase(&self->states, j); -3607 | j--; -3608 | } -3609 | continue; -3610 | } - | -3611 | // Some patterns can match their root node in multiple ways, capturing different -3612 | // children. If this pattern step could match later children within the same -3613 | // parent, then this query state cannot simply be updated in place. It must be -3614 | // split into two states: one that matches this node, and one which skips over -3615 | // this node, to preserve the possibility of matching later siblings. -3616 | if (later_sibling_can_match && ( -3617 | step->contains_captures || -3618 | ts_query__step_is_fallible(self->query, state->step_index) -3619 | )) { -3620 | if (ts_query_cursor__copy_state(self, &state)) { -3621 | LOG( -3622 | " split state for capture. pattern:%u, step:%u\n", -3623 | state->pattern_index, -3624 | state->step_index -3625 | ); -3626 | copy_count++; -3627 | } -3628 | } - | -3629 | // If this pattern started with a wildcard, such that the pattern map -3630 | // actually points to the *second* step of the pattern, then check -3631 | // that the node has a parent, and capture the parent node if necessary. -3632 | if (state->needs_parent) { -3633 | TSNode parent = ts_tree_cursor_parent_node(&self->cursor); -3634 | if (ts_node_is_null(parent)) { -3635 | LOG(" missing parent node\n"); -3636 | state->dead = true; -3637 | } else { -3638 | state->needs_parent = false; -3639 | QueryStep *skipped_wildcard_step = step; -3640 | do { -3641 | skipped_wildcard_step--; -3642 | } while ( -3643 | skipped_wildcard_step->is_dead_end || -3644 | skipped_wildcard_step->is_pass_through || -3645 | skipped_wildcard_step->depth > 0 -3646 | ); -3647 | if (skipped_wildcard_step->capture_ids[0] != NONE) { -3648 | LOG(" capture wildcard parent\n"); -3649 | ts_query_cursor__capture( -3650 | self, -3651 | state, -3652 | skipped_wildcard_step, -3653 | parent -3654 | ); -3655 | } -3656 | } -3657 | } - | -3658 | // If the current node is captured in this pattern, add it to the capture list. -3659 | if (step->capture_ids[0] != NONE) { -3660 | ts_query_cursor__capture(self, state, step, node); -3661 | } - | -3662 | if (state->dead) { -3663 | array_erase(&self->states, j); -3664 | j--; -3665 | continue; -3666 | } - | -3667 | // Advance this state to the next step of its pattern. -3668 | state->step_index++; -3669 | LOG( -3670 | " advance state. pattern:%u, step:%u\n", -3671 | state->pattern_index, -3672 | state->step_index -3673 | ); - | -3674 | QueryStep *next_step = array_get(&self->query->steps, state->step_index); - | -3675 | // For a given step, if the current symbol is the wildcard symbol, `_`, and it is **not** -3676 | // named, meaning it should capture anonymous nodes, **and** the next step is immediate, -3677 | // we reuse the `seeking_immediate_match` flag to indicate that we are looking for an -3678 | // immediate match due to an unnamed wildcard symbol. -3679 | // -3680 | // The reason for this is that typically, anchors will not consider anonymous nodes, -3681 | // but we're special casing the wildcard symbol to allow for any immediate matches, -3682 | // regardless of whether they are named or not. -3683 | if (step->symbol == WILDCARD_SYMBOL && !step->is_named && next_step->is_immediate) { -3684 | state->seeking_immediate_match = true; -3685 | } else { -3686 | state->seeking_immediate_match = false; -3687 | } - | -3688 | if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true; - | -3689 | // If this state's next step has an alternative step, then copy the state in order -3690 | // to pursue both alternatives. The alternative step itself may have an alternative, -3691 | // so this is an interactive process. -3692 | unsigned end_index = j + 1; -3693 | for (unsigned k = j; k < end_index; k++) { -3694 | QueryState *child_state = array_get(&self->states, k); -3695 | QueryStep *child_step = array_get(&self->query->steps, child_state->step_index); -3696 | if (child_step->alternative_index != NONE) { -3697 | // A "dead-end" step exists only to add a non-sequential jump into the step sequence, -3698 | // via its alternative index. When a state reaches a dead-end step, it jumps straight -3699 | // to the step's alternative. -3700 | if (child_step->is_dead_end) { -3701 | child_state->step_index = child_step->alternative_index; -3702 | k--; -3703 | continue; -3704 | } - | -3705 | // A "pass-through" step exists only to add a branch into the step sequence, -3706 | // via its alternative_index. When a state reaches a pass-through step, it splits -3707 | // in order to process the alternative step, and then it advances to the next step. -3708 | if (child_step->is_pass_through) { -3709 | child_state->step_index++; -3710 | k--; -3711 | } - | -3712 | QueryState *copy = ts_query_cursor__copy_state(self, &child_state); -3713 | if (copy) { -3714 | LOG( -3715 | " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", -3716 | copy->pattern_index, -3717 | copy->step_index, -3718 | next_step->alternative_index, -3719 | next_step->alternative_is_immediate, -3720 | capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size -3721 | ); -3722 | end_index++; -3723 | copy_count++; -3724 | copy->step_index = child_step->alternative_index; -3725 | if (child_step->alternative_is_immediate) { -3726 | copy->seeking_immediate_match = true; -3727 | } -3728 | } -3729 | } -3730 | } -3731 | } - | -3732 | for (unsigned j = 0; j < self->states.size; j++) { -3733 | QueryState *state = array_get(&self->states, j); -3734 | if (state->dead) { -3735 | array_erase(&self->states, j); -3736 | j--; -3737 | continue; -3738 | } - | -3739 | // Enforce the longest-match criteria. When a query pattern contains optional or -3740 | // repeated nodes, this is necessary to avoid multiple redundant states, where -3741 | // one state has a strict subset of another state's captures. -3742 | bool did_remove = false; -3743 | for (unsigned k = j + 1; k < self->states.size; k++) { -3744 | QueryState *other_state = array_get(&self->states, k); - | -3745 | // Query states are kept in ascending order of start_depth and pattern_index. -3746 | // Since the longest-match criteria is only used for deduping matches of the same -3747 | // pattern and root node, we only need to perform pairwise comparisons within a -3748 | // small slice of the states array. -3749 | if ( -3750 | other_state->start_depth != state->start_depth || -3751 | other_state->pattern_index != state->pattern_index -3752 | ) break; - | -3753 | bool left_contains_right, right_contains_left; -3754 | ts_query_cursor__compare_captures( -3755 | self, -3756 | state, -3757 | other_state, -3758 | &left_contains_right, -3759 | &right_contains_left -3760 | ); -3761 | if (left_contains_right) { -3762 | if (state->step_index == other_state->step_index) { -3763 | LOG( -3764 | " drop shorter state. pattern: %u, step_index: %u\n", -3765 | state->pattern_index, -3766 | state->step_index -3767 | ); -3768 | capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); -3769 | array_erase(&self->states, k); -3770 | k--; -3771 | continue; -3772 | } -3773 | other_state->has_in_progress_alternatives = true; -3774 | } -3775 | if (right_contains_left) { -3776 | if (state->step_index == other_state->step_index) { -3777 | LOG( -3778 | " drop shorter state. pattern: %u, step_index: %u\n", -3779 | state->pattern_index, -3780 | state->step_index -3781 | ); -3782 | capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); -3783 | array_erase(&self->states, j); -3784 | j--; -3785 | did_remove = true; -3786 | break; -3787 | } -3788 | state->has_in_progress_alternatives = true; -3789 | } -3790 | } - | -3791 | // If the state is at the end of its pattern, remove it from the list -3792 | // of in-progress states and add it to the list of finished states. -3793 | if (!did_remove) { -3794 | LOG( -3795 | " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n", -3796 | state->pattern_index, -3797 | state->start_depth, -3798 | state->step_index, -3799 | capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size -3800 | ); -3801 | QueryStep *next_step = array_get(&self->query->steps, state->step_index); -3802 | if (next_step->depth == PATTERN_DONE_MARKER) { -3803 | if (state->has_in_progress_alternatives) { -3804 | LOG(" defer finishing pattern %u\n", state->pattern_index); -3805 | } else { -3806 | LOG(" finish pattern %u\n", state->pattern_index); -3807 | array_push(&self->finished_states, *state); -3808 | array_erase(&self->states, (uint32_t)(state - self->states.contents)); -3809 | did_match = true; -3810 | j--; -3811 | } -3812 | } -3813 | } -3814 | } -3815 | } - | -3816 | if (ts_query_cursor__should_descend(self, node_intersects_range)) { -3817 | switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { -3818 | case TreeCursorStepVisible: -3819 | self->depth++; -3820 | self->on_visible_node = true; -3821 | continue; -3822 | case TreeCursorStepHidden: -3823 | self->on_visible_node = false; -3824 | continue; -3825 | default: -3826 | break; -3827 | } -3828 | } - | -3829 | self->ascending = true; -3830 | } -3831 | } -3832 | } - | -3833 | bool ts_query_cursor_next_match( -3834 | TSQueryCursor *self, -3835 | TSQueryMatch *match -3836 | ) { -3837 | if (self->finished_states.size == 0) { -3838 | if (!ts_query_cursor__advance(self, false)) { -3839 | return false; -3840 | } -3841 | } - | -3842 | QueryState *state = array_get(&self->finished_states, 0); -3843 | if (state->id == UINT32_MAX) state->id = self->next_state_id++; -3844 | match->id = state->id; -3845 | match->pattern_index = state->pattern_index; -3846 | const CaptureList *captures = capture_list_pool_get( -3847 | &self->capture_list_pool, -3848 | state->capture_list_id -3849 | ); -3850 | match->captures = captures->contents; -3851 | match->capture_count = captures->size; -3852 | capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); -3853 | array_erase(&self->finished_states, 0); -3854 | return true; -3855 | } - | -3856 | void ts_query_cursor_remove_match( -3857 | TSQueryCursor *self, -3858 | uint32_t match_id -3859 | ) { -3860 | for (unsigned i = 0; i < self->finished_states.size; i++) { -3861 | const QueryState *state = array_get(&self->finished_states, i); -3862 | if (state->id == match_id) { -3863 | capture_list_pool_release( -3864 | &self->capture_list_pool, -3865 | state->capture_list_id -3866 | ); -3867 | array_erase(&self->finished_states, i); -3868 | return; -3869 | } -3870 | } - | -3871 | // Remove unfinished query states as well to prevent future -3872 | // captures for a match being removed. -3873 | for (unsigned i = 0; i < self->states.size; i++) { -3874 | const QueryState *state = array_get(&self->states, i); -3875 | if (state->id == match_id) { -3876 | capture_list_pool_release( -3877 | &self->capture_list_pool, -3878 | state->capture_list_id -3879 | ); -3880 | array_erase(&self->states, i); -3881 | return; -3882 | } -3883 | } -3884 | } - | -3885 | bool ts_query_cursor_next_capture( -3886 | TSQueryCursor *self, -3887 | TSQueryMatch *match, -3888 | uint32_t *capture_index -3889 | ) { -3890 | // The goal here is to return captures in order, even though they may not -3891 | // be discovered in order, because patterns can overlap. Search for matches -3892 | // until there is a finished capture that is before any unfinished capture. -3893 | for (;;) { -3894 | // First, find the earliest capture in an unfinished match. -3895 | uint32_t first_unfinished_capture_byte; -3896 | uint32_t first_unfinished_pattern_index; -3897 | uint32_t first_unfinished_state_index; -3898 | bool first_unfinished_state_is_definite = false; -3899 | bool found_unfinished_state = ts_query_cursor__first_in_progress_capture( -3900 | self, -3901 | &first_unfinished_state_index, -3902 | &first_unfinished_capture_byte, -3903 | &first_unfinished_pattern_index, -3904 | &first_unfinished_state_is_definite -3905 | ); - | -3906 | // Then find the earliest capture in a finished match. It must occur -3907 | // before the first capture in an *unfinished* match. -3908 | QueryState *first_finished_state = NULL; -3909 | uint32_t first_finished_capture_byte = first_unfinished_capture_byte; -3910 | uint32_t first_finished_pattern_index = first_unfinished_pattern_index; -3911 | for (unsigned i = 0; i < self->finished_states.size;) { -3912 | QueryState *state = array_get(&self->finished_states, i); -3913 | const CaptureList *captures = capture_list_pool_get( -3914 | &self->capture_list_pool, -3915 | state->capture_list_id -3916 | ); - | -3917 | // Remove states whose captures are all consumed. -3918 | if (state->consumed_capture_count >= captures->size) { -3919 | capture_list_pool_release( -3920 | &self->capture_list_pool, -3921 | state->capture_list_id -3922 | ); -3923 | array_erase(&self->finished_states, i); -3924 | continue; -3925 | } - | -3926 | TSNode node = array_get(captures, state->consumed_capture_count)->node; - | -3927 | bool node_precedes_range = ( -3928 | ts_node_end_byte(node) <= self->start_byte || -3929 | point_lte(ts_node_end_point(node), self->start_point) -3930 | ); -3931 | bool node_follows_range = ( -3932 | ts_node_start_byte(node) >= self->end_byte || -3933 | point_gte(ts_node_start_point(node), self->end_point) -3934 | ); -3935 | bool node_outside_of_range = node_precedes_range || node_follows_range; - | -3936 | // Skip captures that are outside of the cursor's range. -3937 | if (node_outside_of_range) { -3938 | state->consumed_capture_count++; -3939 | continue; -3940 | } - | -3941 | uint32_t node_start_byte = ts_node_start_byte(node); -3942 | if ( -3943 | node_start_byte < first_finished_capture_byte || -3944 | ( -3945 | node_start_byte == first_finished_capture_byte && -3946 | state->pattern_index < first_finished_pattern_index -3947 | ) -3948 | ) { -3949 | first_finished_state = state; -3950 | first_finished_capture_byte = node_start_byte; -3951 | first_finished_pattern_index = state->pattern_index; -3952 | } -3953 | i++; -3954 | } - | -3955 | // If there is finished capture that is clearly before any unfinished -3956 | // capture, then return its match, and its capture index. Internally -3957 | // record the fact that the capture has been 'consumed'. -3958 | QueryState *state; -3959 | if (first_finished_state) { -3960 | state = first_finished_state; -3961 | } else if (first_unfinished_state_is_definite) { -3962 | state = array_get(&self->states, first_unfinished_state_index); -3963 | } else { -3964 | state = NULL; -3965 | } - | -3966 | if (state) { -3967 | if (state->id == UINT32_MAX) state->id = self->next_state_id++; -3968 | match->id = state->id; -3969 | match->pattern_index = state->pattern_index; -3970 | const CaptureList *captures = capture_list_pool_get( -3971 | &self->capture_list_pool, -3972 | state->capture_list_id -3973 | ); -3974 | match->captures = captures->contents; -3975 | match->capture_count = captures->size; -3976 | *capture_index = state->consumed_capture_count; -3977 | state->consumed_capture_count++; -3978 | return true; -3979 | } - | -3980 | if (capture_list_pool_is_empty(&self->capture_list_pool) && found_unfinished_state) { -3981 | LOG( -3982 | " abandon state. index:%u, pattern:%u, offset:%u.\n", -3983 | first_unfinished_state_index, -3984 | first_unfinished_pattern_index, -3985 | first_unfinished_capture_byte -3986 | ); -3987 | capture_list_pool_release( -3988 | &self->capture_list_pool, -3989 | array_get(&self->states, first_unfinished_state_index)->capture_list_id -3990 | ); -3991 | array_erase(&self->states, first_unfinished_state_index); -3992 | } - | -3993 | // If there are no finished matches that are ready to be returned, then -3994 | // continue finding more matches. -3995 | if ( -3996 | !ts_query_cursor__advance(self, true) && -3997 | self->finished_states.size == 0 -3998 | ) return false; -3999 | } -4000 | } - | -4001 | void ts_query_cursor_set_max_start_depth( -4002 | TSQueryCursor *self, -4003 | uint32_t max_start_depth -4004 | ) { -4005 | self->max_start_depth = max_start_depth; -4006 | } - | -4007 | #undef LOG - - - --------------------------------------------------------------------------------- -/lib/src/reduce_action.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_REDUCE_ACTION_H_ - 2 | #define TREE_SITTER_REDUCE_ACTION_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./array.h" - 7 | #include "tree_sitter/api.h" - | - 8 | typedef struct { - 9 | uint32_t count; - 10 | TSSymbol symbol; - 11 | int dynamic_precedence; - 12 | unsigned short production_id; - 13 | } ReduceAction; - | - 14 | typedef Array(ReduceAction) ReduceActionSet; - | - 15 | static inline void ts_reduce_action_set_add(ReduceActionSet *self, - 16 | ReduceAction new_action) { - 17 | for (uint32_t i = 0; i < self->size; i++) { - 18 | ReduceAction action = self->contents[i]; - 19 | if (action.symbol == new_action.symbol && action.count == new_action.count) - 20 | return; - 21 | } - 22 | array_push(self, new_action); - 23 | } - | - 24 | #ifdef __cplusplus - 25 | } - 26 | #endif - | - 27 | #endif // TREE_SITTER_REDUCE_ACTION_H_ - - - --------------------------------------------------------------------------------- -/lib/src/reusable_node.h: --------------------------------------------------------------------------------- - 1 | #include "./subtree.h" - | - 2 | typedef struct { - 3 | Subtree tree; - 4 | uint32_t child_index; - 5 | uint32_t byte_offset; - 6 | } StackEntry; - | - 7 | typedef struct { - 8 | Array(StackEntry) stack; - 9 | Subtree last_external_token; - 10 | } ReusableNode; - | - 11 | static inline ReusableNode reusable_node_new(void) { - 12 | return (ReusableNode) {array_new(), NULL_SUBTREE}; - 13 | } - | - 14 | static inline void reusable_node_clear(ReusableNode *self) { - 15 | array_clear(&self->stack); - 16 | self->last_external_token = NULL_SUBTREE; - 17 | } - | - 18 | static inline Subtree reusable_node_tree(ReusableNode *self) { - 19 | return self->stack.size > 0 - 20 | ? self->stack.contents[self->stack.size - 1].tree - 21 | : NULL_SUBTREE; - 22 | } - | - 23 | static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { - 24 | return self->stack.size > 0 - 25 | ? self->stack.contents[self->stack.size - 1].byte_offset - 26 | : UINT32_MAX; - 27 | } - | - 28 | static inline void reusable_node_delete(ReusableNode *self) { - 29 | array_delete(&self->stack); - 30 | } - | - 31 | static inline void reusable_node_advance(ReusableNode *self) { - 32 | StackEntry last_entry = *array_back(&self->stack); - 33 | uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); - 34 | if (ts_subtree_has_external_tokens(last_entry.tree)) { - 35 | self->last_external_token = ts_subtree_last_external_token(last_entry.tree); - 36 | } - | - 37 | Subtree tree; - 38 | uint32_t next_index; - 39 | do { - 40 | StackEntry popped_entry = array_pop(&self->stack); - 41 | next_index = popped_entry.child_index + 1; - 42 | if (self->stack.size == 0) return; - 43 | tree = array_back(&self->stack)->tree; - 44 | } while (ts_subtree_child_count(tree) <= next_index); - | - 45 | array_push(&self->stack, ((StackEntry) { - 46 | .tree = ts_subtree_children(tree)[next_index], - 47 | .child_index = next_index, - 48 | .byte_offset = byte_offset, - 49 | })); - 50 | } - | - 51 | static inline bool reusable_node_descend(ReusableNode *self) { - 52 | StackEntry last_entry = *array_back(&self->stack); - 53 | if (ts_subtree_child_count(last_entry.tree) > 0) { - 54 | array_push(&self->stack, ((StackEntry) { - 55 | .tree = ts_subtree_children(last_entry.tree)[0], - 56 | .child_index = 0, - 57 | .byte_offset = last_entry.byte_offset, - 58 | })); - 59 | return true; - 60 | } else { - 61 | return false; - 62 | } - 63 | } - | - 64 | static inline void reusable_node_advance_past_leaf(ReusableNode *self) { - 65 | while (reusable_node_descend(self)) {} - 66 | reusable_node_advance(self); - 67 | } - | - 68 | static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { - 69 | reusable_node_clear(self); - 70 | array_push(&self->stack, ((StackEntry) { - 71 | .tree = tree, - 72 | .child_index = 0, - 73 | .byte_offset = 0, - 74 | })); - | - 75 | // Never reuse the root node, because it has a non-standard internal structure - 76 | // due to transformations that are applied when it is accepted: adding the EOF - 77 | // child and any extra children. - 78 | if (!reusable_node_descend(self)) { - 79 | reusable_node_clear(self); - 80 | } - 81 | } - - - --------------------------------------------------------------------------------- -/lib/src/stack.c: --------------------------------------------------------------------------------- - 1 | #include "./alloc.h" - 2 | #include "./language.h" - 3 | #include "./subtree.h" - 4 | #include "./array.h" - 5 | #include "./stack.h" - 6 | #include "./length.h" - 7 | #include - 8 | #include - 9 | #include - | - 10 | #define MAX_LINK_COUNT 8 - 11 | #define MAX_NODE_POOL_SIZE 50 - 12 | #define MAX_ITERATOR_COUNT 64 - | - 13 | #if defined _WIN32 && !defined __GNUC__ - 14 | #define forceinline __forceinline - 15 | #else - 16 | #define forceinline static inline __attribute__((always_inline)) - 17 | #endif - | - 18 | typedef struct StackNode StackNode; - | - 19 | typedef struct { - 20 | StackNode *node; - 21 | Subtree subtree; - 22 | bool is_pending; - 23 | } StackLink; - | - 24 | struct StackNode { - 25 | TSStateId state; - 26 | Length position; - 27 | StackLink links[MAX_LINK_COUNT]; - 28 | short unsigned int link_count; - 29 | uint32_t ref_count; - 30 | unsigned error_cost; - 31 | unsigned node_count; - 32 | int dynamic_precedence; - 33 | }; - | - 34 | typedef struct { - 35 | StackNode *node; - 36 | SubtreeArray subtrees; - 37 | uint32_t subtree_count; - 38 | bool is_pending; - 39 | } StackIterator; - | - 40 | typedef Array(StackNode *) StackNodeArray; - | - 41 | typedef enum { - 42 | StackStatusActive, - 43 | StackStatusPaused, - 44 | StackStatusHalted, - 45 | } StackStatus; - | - 46 | typedef struct { - 47 | StackNode *node; - 48 | StackSummary *summary; - 49 | unsigned node_count_at_last_error; - 50 | Subtree last_external_token; - 51 | Subtree lookahead_when_paused; - 52 | StackStatus status; - 53 | } StackHead; - | - 54 | struct Stack { - 55 | Array(StackHead) heads; - 56 | StackSliceArray slices; - 57 | Array(StackIterator) iterators; - 58 | StackNodeArray node_pool; - 59 | StackNode *base_node; - 60 | SubtreePool *subtree_pool; - 61 | }; - | - 62 | typedef unsigned StackAction; - 63 | enum { - 64 | StackActionNone, - 65 | StackActionStop = 1, - 66 | StackActionPop = 2, - 67 | }; - | - 68 | typedef StackAction (*StackCallback)(void *, const StackIterator *); - | - 69 | static void stack_node_retain(StackNode *self) { - 70 | if (!self) - 71 | return; - 72 | ts_assert(self->ref_count > 0); - 73 | self->ref_count++; - 74 | ts_assert(self->ref_count != 0); - 75 | } - | - 76 | static void stack_node_release( - 77 | StackNode *self, - 78 | StackNodeArray *pool, - 79 | SubtreePool *subtree_pool - 80 | ) { - 81 | recur: - 82 | ts_assert(self->ref_count != 0); - 83 | self->ref_count--; - 84 | if (self->ref_count > 0) return; - | - 85 | StackNode *first_predecessor = NULL; - 86 | if (self->link_count > 0) { - 87 | for (unsigned i = self->link_count - 1; i > 0; i--) { - 88 | StackLink link = self->links[i]; - 89 | if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - 90 | stack_node_release(link.node, pool, subtree_pool); - 91 | } - 92 | StackLink link = self->links[0]; - 93 | if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree); - 94 | first_predecessor = self->links[0].node; - 95 | } - | - 96 | if (pool->size < MAX_NODE_POOL_SIZE) { - 97 | array_push(pool, self); - 98 | } else { - 99 | ts_free(self); - 100 | } - | - 101 | if (first_predecessor) { - 102 | self = first_predecessor; - 103 | goto recur; - 104 | } - 105 | } - | - 106 | /// Get the number of nodes in the subtree, for the purpose of measuring - 107 | /// how much progress has been made by a given version of the stack. - 108 | static uint32_t stack__subtree_node_count(Subtree subtree) { - 109 | uint32_t count = ts_subtree_visible_descendant_count(subtree); - 110 | if (ts_subtree_visible(subtree)) count++; - | - 111 | // Count intermediate error nodes even though they are not visible, - 112 | // because a stack version's node count is used to check whether it - 113 | // has made any progress since the last time it encountered an error. - 114 | if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; - | - 115 | return count; - 116 | } - | - 117 | static StackNode *stack_node_new( - 118 | StackNode *previous_node, - 119 | Subtree subtree, - 120 | bool is_pending, - 121 | TSStateId state, - 122 | StackNodeArray *pool - 123 | ) { - 124 | StackNode *node = pool->size > 0 - 125 | ? array_pop(pool) - 126 | : ts_malloc(sizeof(StackNode)); - 127 | *node = (StackNode) { - 128 | .ref_count = 1, - 129 | .link_count = 0, - 130 | .state = state - 131 | }; - | - 132 | if (previous_node) { - 133 | node->link_count = 1; - 134 | node->links[0] = (StackLink) { - 135 | .node = previous_node, - 136 | .subtree = subtree, - 137 | .is_pending = is_pending, - 138 | }; - | - 139 | node->position = previous_node->position; - 140 | node->error_cost = previous_node->error_cost; - 141 | node->dynamic_precedence = previous_node->dynamic_precedence; - 142 | node->node_count = previous_node->node_count; - | - 143 | if (subtree.ptr) { - 144 | node->error_cost += ts_subtree_error_cost(subtree); - 145 | node->position = length_add(node->position, ts_subtree_total_size(subtree)); - 146 | node->node_count += stack__subtree_node_count(subtree); - 147 | node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); - 148 | } - 149 | } else { - 150 | node->position = length_zero(); - 151 | node->error_cost = 0; - 152 | } - | - 153 | return node; - 154 | } - | - 155 | static bool stack__subtree_is_equivalent(Subtree left, Subtree right) { - 156 | if (left.ptr == right.ptr) return true; - 157 | if (!left.ptr || !right.ptr) return false; - | - 158 | // Symbols must match - 159 | if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false; - | - 160 | // If both have errors, don't bother keeping both. - 161 | if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true; - | - 162 | return ( - 163 | ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes && - 164 | ts_subtree_size(left).bytes == ts_subtree_size(right).bytes && - 165 | ts_subtree_child_count(left) == ts_subtree_child_count(right) && - 166 | ts_subtree_extra(left) == ts_subtree_extra(right) && - 167 | ts_subtree_external_scanner_state_eq(left, right) - 168 | ); - 169 | } - | - 170 | static void stack_node_add_link( - 171 | StackNode *self, - 172 | StackLink link, - 173 | SubtreePool *subtree_pool - 174 | ) { - 175 | if (link.node == self) return; - | - 176 | for (int i = 0; i < self->link_count; i++) { - 177 | StackLink *existing_link = &self->links[i]; - 178 | if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) { - 179 | // In general, we preserve ambiguities until they are removed from the stack - 180 | // during a pop operation where multiple paths lead to the same node. But in - 181 | // the special case where two links directly connect the same pair of nodes, - 182 | // we can safely remove the ambiguity ahead of time without changing behavior. - 183 | if (existing_link->node == link.node) { - 184 | if ( - 185 | ts_subtree_dynamic_precedence(link.subtree) > - 186 | ts_subtree_dynamic_precedence(existing_link->subtree) - 187 | ) { - 188 | ts_subtree_retain(link.subtree); - 189 | ts_subtree_release(subtree_pool, existing_link->subtree); - 190 | existing_link->subtree = link.subtree; - 191 | self->dynamic_precedence = - 192 | link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree); - 193 | } - 194 | return; - 195 | } - | - 196 | // If the previous nodes are mergeable, merge them recursively. - 197 | if ( - 198 | existing_link->node->state == link.node->state && - 199 | existing_link->node->position.bytes == link.node->position.bytes && - 200 | existing_link->node->error_cost == link.node->error_cost - 201 | ) { - 202 | for (int j = 0; j < link.node->link_count; j++) { - 203 | stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); - 204 | } - 205 | int32_t dynamic_precedence = link.node->dynamic_precedence; - 206 | if (link.subtree.ptr) { - 207 | dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - 208 | } - 209 | if (dynamic_precedence > self->dynamic_precedence) { - 210 | self->dynamic_precedence = dynamic_precedence; - 211 | } - 212 | return; - 213 | } - 214 | } - 215 | } - | - 216 | if (self->link_count == MAX_LINK_COUNT) return; - | - 217 | stack_node_retain(link.node); - 218 | unsigned node_count = link.node->node_count; - 219 | int dynamic_precedence = link.node->dynamic_precedence; - 220 | self->links[self->link_count++] = link; - | - 221 | if (link.subtree.ptr) { - 222 | ts_subtree_retain(link.subtree); - 223 | node_count += stack__subtree_node_count(link.subtree); - 224 | dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); - 225 | } - | - 226 | if (node_count > self->node_count) self->node_count = node_count; - 227 | if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence; - 228 | } - | - 229 | static void stack_head_delete( - 230 | StackHead *self, - 231 | StackNodeArray *pool, - 232 | SubtreePool *subtree_pool - 233 | ) { - 234 | if (self->node) { - 235 | if (self->last_external_token.ptr) { - 236 | ts_subtree_release(subtree_pool, self->last_external_token); - 237 | } - 238 | if (self->lookahead_when_paused.ptr) { - 239 | ts_subtree_release(subtree_pool, self->lookahead_when_paused); - 240 | } - 241 | if (self->summary) { - 242 | array_delete(self->summary); - 243 | ts_free(self->summary); - 244 | } - 245 | stack_node_release(self->node, pool, subtree_pool); - 246 | } - 247 | } - | - 248 | static StackVersion ts_stack__add_version( - 249 | Stack *self, - 250 | StackVersion original_version, - 251 | StackNode *node - 252 | ) { - 253 | StackHead head = { - 254 | .node = node, - 255 | .node_count_at_last_error = array_get(&self->heads, original_version)->node_count_at_last_error, - 256 | .last_external_token = array_get(&self->heads, original_version)->last_external_token, - 257 | .status = StackStatusActive, - 258 | .lookahead_when_paused = NULL_SUBTREE, - 259 | }; - 260 | array_push(&self->heads, head); - 261 | stack_node_retain(node); - 262 | if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token); - 263 | return (StackVersion)(self->heads.size - 1); - 264 | } - | - 265 | static void ts_stack__add_slice( - 266 | Stack *self, - 267 | StackVersion original_version, - 268 | StackNode *node, - 269 | SubtreeArray *subtrees - 270 | ) { - 271 | for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) { - 272 | StackVersion version = array_get(&self->slices, i)->version; - 273 | if (array_get(&self->heads, version)->node == node) { - 274 | StackSlice slice = {*subtrees, version}; - 275 | array_insert(&self->slices, i + 1, slice); - 276 | return; - 277 | } - 278 | } - | - 279 | StackVersion version = ts_stack__add_version(self, original_version, node); - 280 | StackSlice slice = { *subtrees, version }; - 281 | array_push(&self->slices, slice); - 282 | } - | - 283 | static StackSliceArray stack__iter( - 284 | Stack *self, - 285 | StackVersion version, - 286 | StackCallback callback, - 287 | void *payload, - 288 | int goal_subtree_count - 289 | ) { - 290 | array_clear(&self->slices); - 291 | array_clear(&self->iterators); - | - 292 | StackHead *head = array_get(&self->heads, version); - 293 | StackIterator new_iterator = { - 294 | .node = head->node, - 295 | .subtrees = array_new(), - 296 | .subtree_count = 0, - 297 | .is_pending = true, - 298 | }; - | - 299 | bool include_subtrees = false; - 300 | if (goal_subtree_count >= 0) { - 301 | include_subtrees = true; - 302 | array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); - 303 | } - | - 304 | array_push(&self->iterators, new_iterator); - | - 305 | while (self->iterators.size > 0) { - 306 | for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { - 307 | StackIterator *iterator = array_get(&self->iterators, i); - 308 | StackNode *node = iterator->node; - | - 309 | StackAction action = callback(payload, iterator); - 310 | bool should_pop = action & StackActionPop; - 311 | bool should_stop = action & StackActionStop || node->link_count == 0; - | - 312 | if (should_pop) { - 313 | SubtreeArray subtrees = iterator->subtrees; - 314 | if (!should_stop) { - 315 | ts_subtree_array_copy(subtrees, &subtrees); - 316 | } - 317 | ts_subtree_array_reverse(&subtrees); - 318 | ts_stack__add_slice( - 319 | self, - 320 | version, - 321 | node, - 322 | &subtrees - 323 | ); - 324 | } - | - 325 | if (should_stop) { - 326 | if (!should_pop) { - 327 | ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees); - 328 | } - 329 | array_erase(&self->iterators, i); - 330 | i--, size--; - 331 | continue; - 332 | } - | - 333 | for (uint32_t j = 1; j <= node->link_count; j++) { - 334 | StackIterator *next_iterator; - 335 | StackLink link; - 336 | if (j == node->link_count) { - 337 | link = node->links[0]; - 338 | next_iterator = array_get(&self->iterators, i); - 339 | } else { - 340 | if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; - 341 | link = node->links[j]; - 342 | StackIterator current_iterator = *array_get(&self->iterators, i); - 343 | array_push(&self->iterators, current_iterator); - 344 | next_iterator = array_back(&self->iterators); - 345 | ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees); - 346 | } - | - 347 | next_iterator->node = link.node; - 348 | if (link.subtree.ptr) { - 349 | if (include_subtrees) { - 350 | array_push(&next_iterator->subtrees, link.subtree); - 351 | ts_subtree_retain(link.subtree); - 352 | } - | - 353 | if (!ts_subtree_extra(link.subtree)) { - 354 | next_iterator->subtree_count++; - 355 | if (!link.is_pending) { - 356 | next_iterator->is_pending = false; - 357 | } - 358 | } - 359 | } else { - 360 | next_iterator->subtree_count++; - 361 | next_iterator->is_pending = false; - 362 | } - 363 | } - 364 | } - 365 | } - | - 366 | return self->slices; - 367 | } - | - 368 | Stack *ts_stack_new(SubtreePool *subtree_pool) { - 369 | Stack *self = ts_calloc(1, sizeof(Stack)); - | - 370 | array_init(&self->heads); - 371 | array_init(&self->slices); - 372 | array_init(&self->iterators); - 373 | array_init(&self->node_pool); - 374 | array_reserve(&self->heads, 4); - 375 | array_reserve(&self->slices, 4); - 376 | array_reserve(&self->iterators, 4); - 377 | array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE); - | - 378 | self->subtree_pool = subtree_pool; - 379 | self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool); - 380 | ts_stack_clear(self); - | - 381 | return self; - 382 | } - | - 383 | void ts_stack_delete(Stack *self) { - 384 | if (self->slices.contents) - 385 | array_delete(&self->slices); - 386 | if (self->iterators.contents) - 387 | array_delete(&self->iterators); - 388 | stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); - 389 | for (uint32_t i = 0; i < self->heads.size; i++) { - 390 | stack_head_delete(array_get(&self->heads, i), &self->node_pool, self->subtree_pool); - 391 | } - 392 | array_clear(&self->heads); - 393 | if (self->node_pool.contents) { - 394 | for (uint32_t i = 0; i < self->node_pool.size; i++) - 395 | ts_free(*array_get(&self->node_pool, i)); - 396 | array_delete(&self->node_pool); - 397 | } - 398 | array_delete(&self->heads); - 399 | ts_free(self); - 400 | } - | - 401 | uint32_t ts_stack_version_count(const Stack *self) { - 402 | return self->heads.size; - 403 | } - | - 404 | uint32_t ts_stack_halted_version_count(Stack *self) { - 405 | uint32_t count = 0; - 406 | for (uint32_t i = 0; i < self->heads.size; i++) { - 407 | StackHead *head = array_get(&self->heads, i); - 408 | if (head->status == StackStatusHalted) { - 409 | count++; - 410 | } - 411 | } - 412 | return count; - 413 | } - | - 414 | TSStateId ts_stack_state(const Stack *self, StackVersion version) { - 415 | return array_get(&self->heads, version)->node->state; - 416 | } - | - 417 | Length ts_stack_position(const Stack *self, StackVersion version) { - 418 | return array_get(&self->heads, version)->node->position; - 419 | } - | - 420 | Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) { - 421 | return array_get(&self->heads, version)->last_external_token; - 422 | } - | - 423 | void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) { - 424 | StackHead *head = array_get(&self->heads, version); - 425 | if (token.ptr) ts_subtree_retain(token); - 426 | if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token); - 427 | head->last_external_token = token; - 428 | } - | - 429 | unsigned ts_stack_error_cost(const Stack *self, StackVersion version) { - 430 | StackHead *head = array_get(&self->heads, version); - 431 | unsigned result = head->node->error_cost; - 432 | if ( - 433 | head->status == StackStatusPaused || - 434 | (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) { - 435 | result += ERROR_COST_PER_RECOVERY; - 436 | } - 437 | return result; - 438 | } - | - 439 | unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) { - 440 | StackHead *head = array_get(&self->heads, version); - 441 | if (head->node->node_count < head->node_count_at_last_error) { - 442 | head->node_count_at_last_error = head->node->node_count; - 443 | } - 444 | return head->node->node_count - head->node_count_at_last_error; - 445 | } - | - 446 | void ts_stack_push( - 447 | Stack *self, - 448 | StackVersion version, - 449 | Subtree subtree, - 450 | bool pending, - 451 | TSStateId state - 452 | ) { - 453 | StackHead *head = array_get(&self->heads, version); - 454 | StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); - 455 | if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count; - 456 | head->node = new_node; - 457 | } - | - 458 | forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { - 459 | unsigned *goal_subtree_count = payload; - 460 | if (iterator->subtree_count == *goal_subtree_count) { - 461 | return StackActionPop | StackActionStop; - 462 | } else { - 463 | return StackActionNone; - 464 | } - 465 | } - | - 466 | StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { - 467 | return stack__iter(self, version, pop_count_callback, &count, (int)count); - 468 | } - | - | - 469 | forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { - 470 | (void)payload; - 471 | if (iterator->subtree_count >= 1) { - 472 | if (iterator->is_pending) { - 473 | return StackActionPop | StackActionStop; - 474 | } else { - 475 | return StackActionStop; - 476 | } - 477 | } else { - 478 | return StackActionNone; - 479 | } - 480 | } - | - 481 | StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { - 482 | StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0); - 483 | if (pop.size > 0) { - 484 | ts_stack_renumber_version(self, array_get(&pop, 0)->version, version); - 485 | array_get(&pop, 0)->version = version; - 486 | } - 487 | return pop; - 488 | } - | - 489 | forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { - 490 | if (iterator->subtrees.size > 0) { - 491 | bool *found_error = payload; - 492 | if (!*found_error && ts_subtree_is_error(*array_get(&iterator->subtrees, 0))) { - 493 | *found_error = true; - 494 | return StackActionPop | StackActionStop; - 495 | } else { - 496 | return StackActionStop; - 497 | } - 498 | } else { - 499 | return StackActionNone; - 500 | } - 501 | } - | - 502 | SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { - 503 | StackNode *node = array_get(&self->heads, version)->node; - 504 | for (unsigned i = 0; i < node->link_count; i++) { - 505 | if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) { - 506 | bool found_error = false; - 507 | StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1); - 508 | if (pop.size > 0) { - 509 | ts_assert(pop.size == 1); - 510 | ts_stack_renumber_version(self, array_get(&pop, 0)->version, version); - 511 | return array_get(&pop, 0)->subtrees; - 512 | } - 513 | break; - 514 | } - 515 | } - 516 | return (SubtreeArray) {.size = 0}; - 517 | } - | - 518 | forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { - 519 | (void)payload; - 520 | return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; - 521 | } - | - 522 | StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) { - 523 | return stack__iter(self, version, pop_all_callback, NULL, 0); - 524 | } - | - 525 | typedef struct { - 526 | StackSummary *summary; - 527 | unsigned max_depth; - 528 | } SummarizeStackSession; - | - 529 | forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { - 530 | SummarizeStackSession *session = payload; - 531 | TSStateId state = iterator->node->state; - 532 | unsigned depth = iterator->subtree_count; - 533 | if (depth > session->max_depth) return StackActionStop; - 534 | for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { - 535 | StackSummaryEntry entry = *array_get(session->summary, i); - 536 | if (entry.depth < depth) break; - 537 | if (entry.depth == depth && entry.state == state) return StackActionNone; - 538 | } - 539 | array_push(session->summary, ((StackSummaryEntry) { - 540 | .position = iterator->node->position, - 541 | .depth = depth, - 542 | .state = state, - 543 | })); - 544 | return StackActionNone; - 545 | } - | - 546 | void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { - 547 | SummarizeStackSession session = { - 548 | .summary = ts_malloc(sizeof(StackSummary)), - 549 | .max_depth = max_depth - 550 | }; - 551 | array_init(session.summary); - 552 | stack__iter(self, version, summarize_stack_callback, &session, -1); - 553 | StackHead *head = array_get(&self->heads, version); - 554 | if (head->summary) { - 555 | array_delete(head->summary); - 556 | ts_free(head->summary); - 557 | } - 558 | head->summary = session.summary; - 559 | } - | - 560 | StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) { - 561 | return array_get(&self->heads, version)->summary; - 562 | } - | - 563 | int ts_stack_dynamic_precedence(Stack *self, StackVersion version) { - 564 | return array_get(&self->heads, version)->node->dynamic_precedence; - 565 | } - | - 566 | bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) { - 567 | const StackHead *head = array_get(&self->heads, version); - 568 | const StackNode *node = head->node; - 569 | if (node->error_cost == 0) return true; - 570 | while (node) { - 571 | if (node->link_count > 0) { - 572 | Subtree subtree = node->links[0].subtree; - 573 | if (subtree.ptr) { - 574 | if (ts_subtree_total_bytes(subtree) > 0) { - 575 | return true; - 576 | } else if ( - 577 | node->node_count > head->node_count_at_last_error && - 578 | ts_subtree_error_cost(subtree) == 0 - 579 | ) { - 580 | node = node->links[0].node; - 581 | continue; - 582 | } - 583 | } - 584 | } - 585 | break; - 586 | } - 587 | return false; - 588 | } - | - 589 | void ts_stack_remove_version(Stack *self, StackVersion version) { - 590 | stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool); - 591 | array_erase(&self->heads, version); - 592 | } - | - 593 | void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { - 594 | if (v1 == v2) return; - 595 | ts_assert(v2 < v1); - 596 | ts_assert((uint32_t)v1 < self->heads.size); - 597 | StackHead *source_head = array_get(&self->heads, v1); - 598 | StackHead *target_head = array_get(&self->heads, v2); - 599 | if (target_head->summary && !source_head->summary) { - 600 | source_head->summary = target_head->summary; - 601 | target_head->summary = NULL; - 602 | } - 603 | stack_head_delete(target_head, &self->node_pool, self->subtree_pool); - 604 | *target_head = *source_head; - 605 | array_erase(&self->heads, v1); - 606 | } - | - 607 | void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) { - 608 | StackHead temporary_head = *array_get(&self->heads, v1); - 609 | *array_get(&self->heads, v1) = *array_get(&self->heads, v2); - 610 | *array_get(&self->heads, v2) = temporary_head; - 611 | } - | - 612 | StackVersion ts_stack_copy_version(Stack *self, StackVersion version) { - 613 | ts_assert(version < self->heads.size); - 614 | StackHead version_head = *array_get(&self->heads, version); - 615 | array_push(&self->heads, version_head); - 616 | StackHead *head = array_back(&self->heads); - 617 | stack_node_retain(head->node); - 618 | if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token); - 619 | head->summary = NULL; - 620 | return self->heads.size - 1; - 621 | } - | - 622 | bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) { - 623 | if (!ts_stack_can_merge(self, version1, version2)) return false; - 624 | StackHead *head1 = array_get(&self->heads, version1); - 625 | StackHead *head2 = array_get(&self->heads, version2); - 626 | for (uint32_t i = 0; i < head2->node->link_count; i++) { - 627 | stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); - 628 | } - 629 | if (head1->node->state == ERROR_STATE) { - 630 | head1->node_count_at_last_error = head1->node->node_count; - 631 | } - 632 | ts_stack_remove_version(self, version2); - 633 | return true; - 634 | } - | - 635 | bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) { - 636 | StackHead *head1 = array_get(&self->heads, version1); - 637 | StackHead *head2 = array_get(&self->heads, version2); - 638 | return - 639 | head1->status == StackStatusActive && - 640 | head2->status == StackStatusActive && - 641 | head1->node->state == head2->node->state && - 642 | head1->node->position.bytes == head2->node->position.bytes && - 643 | head1->node->error_cost == head2->node->error_cost && - 644 | ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token); - 645 | } - | - 646 | void ts_stack_halt(Stack *self, StackVersion version) { - 647 | array_get(&self->heads, version)->status = StackStatusHalted; - 648 | } - | - 649 | void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) { - 650 | StackHead *head = array_get(&self->heads, version); - 651 | head->status = StackStatusPaused; - 652 | head->lookahead_when_paused = lookahead; - 653 | head->node_count_at_last_error = head->node->node_count; - 654 | } - | - 655 | bool ts_stack_is_active(const Stack *self, StackVersion version) { - 656 | return array_get(&self->heads, version)->status == StackStatusActive; - 657 | } - | - 658 | bool ts_stack_is_halted(const Stack *self, StackVersion version) { - 659 | return array_get(&self->heads, version)->status == StackStatusHalted; - 660 | } - | - 661 | bool ts_stack_is_paused(const Stack *self, StackVersion version) { - 662 | return array_get(&self->heads, version)->status == StackStatusPaused; - 663 | } - | - 664 | Subtree ts_stack_resume(Stack *self, StackVersion version) { - 665 | StackHead *head = array_get(&self->heads, version); - 666 | ts_assert(head->status == StackStatusPaused); - 667 | Subtree result = head->lookahead_when_paused; - 668 | head->status = StackStatusActive; - 669 | head->lookahead_when_paused = NULL_SUBTREE; - 670 | return result; - 671 | } - | - 672 | void ts_stack_clear(Stack *self) { - 673 | stack_node_retain(self->base_node); - 674 | for (uint32_t i = 0; i < self->heads.size; i++) { - 675 | stack_head_delete(array_get(&self->heads, i), &self->node_pool, self->subtree_pool); - 676 | } - 677 | array_clear(&self->heads); - 678 | array_push(&self->heads, ((StackHead) { - 679 | .node = self->base_node, - 680 | .status = StackStatusActive, - 681 | .last_external_token = NULL_SUBTREE, - 682 | .lookahead_when_paused = NULL_SUBTREE, - 683 | })); - 684 | } - | - 685 | bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) { - 686 | array_reserve(&self->iterators, 32); - 687 | if (!f) f = stderr; - | - 688 | fprintf(f, "digraph stack {\n"); - 689 | fprintf(f, "rankdir=\"RL\";\n"); - 690 | fprintf(f, "edge [arrowhead=none]\n"); - | - 691 | Array(StackNode *) visited_nodes = array_new(); - | - 692 | array_clear(&self->iterators); - 693 | for (uint32_t i = 0; i < self->heads.size; i++) { - 694 | StackHead *head = array_get(&self->heads, i); - 695 | if (head->status == StackStatusHalted) continue; - | - 696 | fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - 697 | fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); - | - 698 | if (head->status == StackStatusPaused) { - 699 | fprintf(f, "color=red "); - 700 | } - 701 | fprintf(f, - 702 | "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u", - 703 | i, - 704 | ts_stack_node_count_since_error(self, i), - 705 | ts_stack_error_cost(self, i) - 706 | ); - | - 707 | if (head->summary) { - 708 | fprintf(f, "\nsummary:"); - 709 | for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", array_get(head->summary, j)->state); - 710 | } - | - 711 | if (head->last_external_token.ptr) { - 712 | const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; - 713 | const char *data = ts_external_scanner_state_data(state); - 714 | fprintf(f, "\nexternal_scanner_state:"); - 715 | for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); - 716 | } - | - 717 | fprintf(f, "\"]\n"); - 718 | array_push(&self->iterators, ((StackIterator) { - 719 | .node = head->node - 720 | })); - 721 | } - | - 722 | bool all_iterators_done = false; - 723 | while (!all_iterators_done) { - 724 | all_iterators_done = true; - | - 725 | for (uint32_t i = 0; i < self->iterators.size; i++) { - 726 | StackIterator iterator = *array_get(&self->iterators, i); - 727 | StackNode *node = iterator.node; - | - 728 | for (uint32_t j = 0; j < visited_nodes.size; j++) { - 729 | if (*array_get(&visited_nodes, j) == node) { - 730 | node = NULL; - 731 | break; - 732 | } - 733 | } - | - 734 | if (!node) continue; - 735 | all_iterators_done = false; - | - 736 | fprintf(f, "node_%p [", (void *)node); - 737 | if (node->state == ERROR_STATE) { - 738 | fprintf(f, "label=\"?\""); - 739 | } else if ( - 740 | node->link_count == 1 && - 741 | node->links[0].subtree.ptr && - 742 | ts_subtree_extra(node->links[0].subtree) - 743 | ) { - 744 | fprintf(f, "shape=point margin=0 label=\"\""); - 745 | } else { - 746 | fprintf(f, "label=\"%d\"", node->state); - 747 | } - | - 748 | fprintf( - 749 | f, - 750 | " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n", - 751 | node->position.extent.row + 1, - 752 | node->position.extent.column, - 753 | node->node_count, - 754 | node->error_cost, - 755 | node->dynamic_precedence - 756 | ); - | - 757 | for (int j = 0; j < node->link_count; j++) { - 758 | StackLink link = node->links[j]; - 759 | fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); - 760 | if (link.is_pending) fprintf(f, "style=dashed "); - 761 | if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); - | - 762 | if (!link.subtree.ptr) { - 763 | fprintf(f, "color=red"); - 764 | } else { - 765 | fprintf(f, "label=\""); - 766 | bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree); - 767 | if (quoted) fprintf(f, "'"); - 768 | ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree)); - 769 | if (quoted) fprintf(f, "'"); - 770 | fprintf(f, "\""); - 771 | fprintf( - 772 | f, - 773 | "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"", - 774 | ts_subtree_error_cost(link.subtree), - 775 | ts_subtree_dynamic_precedence(link.subtree) - 776 | ); - 777 | } - | - 778 | fprintf(f, "];\n"); - | - 779 | StackIterator *next_iterator; - 780 | if (j == 0) { - 781 | next_iterator = array_get(&self->iterators, i); - 782 | } else { - 783 | array_push(&self->iterators, iterator); - 784 | next_iterator = array_back(&self->iterators); - 785 | } - 786 | next_iterator->node = link.node; - 787 | } - | - 788 | array_push(&visited_nodes, node); - 789 | } - 790 | } - | - 791 | fprintf(f, "}\n"); - | - 792 | array_delete(&visited_nodes); - 793 | return true; - 794 | } - | - 795 | #undef forceinline - - - --------------------------------------------------------------------------------- -/lib/src/stack.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_PARSE_STACK_H_ - 2 | #define TREE_SITTER_PARSE_STACK_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "./array.h" - 7 | #include "./subtree.h" - 8 | #include - | - 9 | typedef struct Stack Stack; - | - 10 | typedef unsigned StackVersion; - 11 | #define STACK_VERSION_NONE ((StackVersion)-1) - | - 12 | typedef struct { - 13 | SubtreeArray subtrees; - 14 | StackVersion version; - 15 | } StackSlice; - 16 | typedef Array(StackSlice) StackSliceArray; - | - 17 | typedef struct { - 18 | Length position; - 19 | unsigned depth; - 20 | TSStateId state; - 21 | } StackSummaryEntry; - 22 | typedef Array(StackSummaryEntry) StackSummary; - | - 23 | // Create a stack. - 24 | Stack *ts_stack_new(SubtreePool *subtree_pool); - | - 25 | // Release the memory reserved for a given stack. - 26 | void ts_stack_delete(Stack *self); - | - 27 | // Get the stack's current number of versions. - 28 | uint32_t ts_stack_version_count(const Stack *self); - | - 29 | // Get the stack's current number of halted versions. - 30 | uint32_t ts_stack_halted_version_count(Stack *self); - | - 31 | // Get the state at the top of the given version of the stack. If the stack is - 32 | // empty, this returns the initial state, 0. - 33 | TSStateId ts_stack_state(const Stack *self, StackVersion version); - | - 34 | // Get the last external token associated with a given version of the stack. - 35 | Subtree ts_stack_last_external_token(const Stack *self, StackVersion version); - | - 36 | // Set the last external token associated with a given version of the stack. - 37 | void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token); - | - 38 | // Get the position of the given version of the stack within the document. - 39 | Length ts_stack_position(const Stack *, StackVersion); - | - 40 | // Push a tree and state onto the given version of the stack. - 41 | // - 42 | // This transfers ownership of the tree to the Stack. Callers that - 43 | // need to retain ownership of the tree for their own purposes should - 44 | // first retain the tree. - 45 | void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state); - | - 46 | // Pop the given number of entries from the given version of the stack. This - 47 | // operation can increase the number of stack versions by revealing multiple - 48 | // versions which had previously been merged. It returns an array that - 49 | // specifies the index of each revealed version and the trees that were - 50 | // removed from that version. - 51 | StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count); - | - 52 | // Remove an error at the top of the given version of the stack. - 53 | SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version); - | - 54 | // Remove any pending trees from the top of the given version of the stack. - 55 | StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version); - | - 56 | // Remove all trees from the given version of the stack. - 57 | StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version); - | - 58 | // Get the maximum number of tree nodes reachable from this version of the stack - 59 | // since the last error was detected. - 60 | unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version); - | - 61 | int ts_stack_dynamic_precedence(Stack *self, StackVersion version); - | - 62 | bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version); - | - 63 | // Compute a summary of all the parse states near the top of the given - 64 | // version of the stack and store the summary for later retrieval. - 65 | void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth); - | - 66 | // Retrieve a summary of all the parse states near the top of the - 67 | // given version of the stack. - 68 | StackSummary *ts_stack_get_summary(Stack *self, StackVersion version); - | - 69 | // Get the total cost of all errors on the given version of the stack. - 70 | unsigned ts_stack_error_cost(const Stack *self, StackVersion version); - | - 71 | // Merge the given two stack versions if possible, returning true - 72 | // if they were successfully merged and false otherwise. - 73 | bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2); - | - 74 | // Determine whether the given two stack versions can be merged. - 75 | bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2); - | - 76 | Subtree ts_stack_resume(Stack *self, StackVersion version); - | - 77 | void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead); - | - 78 | void ts_stack_halt(Stack *self, StackVersion version); - | - 79 | bool ts_stack_is_active(const Stack *self, StackVersion version); - | - 80 | bool ts_stack_is_paused(const Stack *self, StackVersion version); - | - 81 | bool ts_stack_is_halted(const Stack *self, StackVersion version); - | - 82 | void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2); - | - 83 | void ts_stack_swap_versions(Stack *, StackVersion v1, StackVersion v2); - | - 84 | StackVersion ts_stack_copy_version(Stack *self, StackVersion version); - | - 85 | // Remove the given version from the stack. - 86 | void ts_stack_remove_version(Stack *self, StackVersion version); - | - 87 | void ts_stack_clear(Stack *self); - | - 88 | bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f); - | - 89 | #ifdef __cplusplus - 90 | } - 91 | #endif - | - 92 | #endif // TREE_SITTER_PARSE_STACK_H_ - - - --------------------------------------------------------------------------------- -/lib/src/subtree.c: --------------------------------------------------------------------------------- - 1 | #include - 2 | #include - 3 | #include - 4 | #include - 5 | #include - 6 | #include "./alloc.h" - 7 | #include "./array.h" - 8 | #include "./atomic.h" - 9 | #include "./subtree.h" - 10 | #include "./length.h" - 11 | #include "./language.h" - 12 | #include "./error_costs.h" - 13 | #include "./ts_assert.h" - 14 | #include - | - 15 | typedef struct { - 16 | Length start; - 17 | Length old_end; - 18 | Length new_end; - 19 | } Edit; - | - 20 | #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX - 21 | #define TS_MAX_TREE_POOL_SIZE 32 - | - 22 | // ExternalScannerState - | - 23 | void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { - 24 | self->length = length; - 25 | if (length > sizeof(self->short_data)) { - 26 | self->long_data = ts_malloc(length); - 27 | memcpy(self->long_data, data, length); - 28 | } else { - 29 | memcpy(self->short_data, data, length); - 30 | } - 31 | } - | - 32 | ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) { - 33 | ExternalScannerState result = *self; - 34 | if (self->length > sizeof(self->short_data)) { - 35 | result.long_data = ts_malloc(self->length); - 36 | memcpy(result.long_data, self->long_data, self->length); - 37 | } - 38 | return result; - 39 | } - | - 40 | void ts_external_scanner_state_delete(ExternalScannerState *self) { - 41 | if (self->length > sizeof(self->short_data)) { - 42 | ts_free(self->long_data); - 43 | } - 44 | } - | - 45 | const char *ts_external_scanner_state_data(const ExternalScannerState *self) { - 46 | if (self->length > sizeof(self->short_data)) { - 47 | return self->long_data; - 48 | } else { - 49 | return self->short_data; - 50 | } - 51 | } - | - 52 | bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { - 53 | return - 54 | self->length == length && - 55 | memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; - 56 | } - | - 57 | // SubtreeArray - | - 58 | void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) { - 59 | dest->size = self.size; - 60 | dest->capacity = self.capacity; - 61 | dest->contents = self.contents; - 62 | if (self.capacity > 0) { - 63 | dest->contents = ts_calloc(self.capacity, sizeof(Subtree)); - 64 | memcpy(dest->contents, self.contents, self.size * sizeof(Subtree)); - 65 | for (uint32_t i = 0; i < self.size; i++) { - 66 | ts_subtree_retain(*array_get(dest, i)); - 67 | } - 68 | } - 69 | } - | - 70 | void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self) { - 71 | for (uint32_t i = 0; i < self->size; i++) { - 72 | ts_subtree_release(pool, *array_get(self, i)); - 73 | } - 74 | array_clear(self); - 75 | } - | - 76 | void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) { - 77 | ts_subtree_array_clear(pool, self); - 78 | array_delete(self); - 79 | } - | - 80 | void ts_subtree_array_remove_trailing_extras( - 81 | SubtreeArray *self, - 82 | SubtreeArray *destination - 83 | ) { - 84 | array_clear(destination); - 85 | while (self->size > 0) { - 86 | Subtree last = *array_get(self, self->size - 1); - 87 | if (ts_subtree_extra(last)) { - 88 | self->size--; - 89 | array_push(destination, last); - 90 | } else { - 91 | break; - 92 | } - 93 | } - 94 | ts_subtree_array_reverse(destination); - 95 | } - | - 96 | void ts_subtree_array_reverse(SubtreeArray *self) { - 97 | for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) { - 98 | size_t reverse_index = self->size - 1 - i; - 99 | Subtree swap = *array_get(self, i); - 100 | *array_get(self, i) = *array_get(self, reverse_index); - 101 | *array_get(self, reverse_index) = swap; - 102 | } - 103 | } - | - 104 | // SubtreePool - | - 105 | SubtreePool ts_subtree_pool_new(uint32_t capacity) { - 106 | SubtreePool self = {array_new(), array_new()}; - 107 | array_reserve(&self.free_trees, capacity); - 108 | return self; - 109 | } - | - 110 | void ts_subtree_pool_delete(SubtreePool *self) { - 111 | if (self->free_trees.contents) { - 112 | for (unsigned i = 0; i < self->free_trees.size; i++) { - 113 | ts_free(array_get(&self->free_trees, i)->ptr); - 114 | } - 115 | array_delete(&self->free_trees); - 116 | } - 117 | if (self->tree_stack.contents) array_delete(&self->tree_stack); - 118 | } - | - 119 | static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) { - 120 | if (self->free_trees.size > 0) { - 121 | return array_pop(&self->free_trees).ptr; - 122 | } else { - 123 | return ts_malloc(sizeof(SubtreeHeapData)); - 124 | } - 125 | } - | - 126 | static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) { - 127 | if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) { - 128 | array_push(&self->free_trees, (MutableSubtree) {.ptr = tree}); - 129 | } else { - 130 | ts_free(tree); - 131 | } - 132 | } - | - 133 | // Subtree - | - 134 | static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) { - 135 | return - 136 | padding.bytes < TS_MAX_INLINE_TREE_LENGTH && - 137 | padding.extent.row < 16 && - 138 | padding.extent.column < TS_MAX_INLINE_TREE_LENGTH && - 139 | size.bytes < TS_MAX_INLINE_TREE_LENGTH && - 140 | size.extent.row == 0 && - 141 | size.extent.column < TS_MAX_INLINE_TREE_LENGTH && - 142 | lookahead_bytes < 16; - 143 | } - | - 144 | Subtree ts_subtree_new_leaf( - 145 | SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - 146 | uint32_t lookahead_bytes, TSStateId parse_state, - 147 | bool has_external_tokens, bool depends_on_column, - 148 | bool is_keyword, const TSLanguage *language - 149 | ) { - 150 | TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - 151 | bool extra = symbol == ts_builtin_sym_end; - | - 152 | bool is_inline = ( - 153 | symbol <= UINT8_MAX && - 154 | !has_external_tokens && - 155 | ts_subtree_can_inline(padding, size, lookahead_bytes) - 156 | ); - | - 157 | if (is_inline) { - 158 | return (Subtree) {{ - 159 | .parse_state = parse_state, - 160 | .symbol = symbol, - 161 | .padding_bytes = padding.bytes, - 162 | .padding_rows = padding.extent.row, - 163 | .padding_columns = padding.extent.column, - 164 | .size_bytes = size.bytes, - 165 | .lookahead_bytes = lookahead_bytes, - 166 | .visible = metadata.visible, - 167 | .named = metadata.named, - 168 | .extra = extra, - 169 | .has_changes = false, - 170 | .is_missing = false, - 171 | .is_keyword = is_keyword, - 172 | .is_inline = true, - 173 | }}; - 174 | } else { - 175 | SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - 176 | *data = (SubtreeHeapData) { - 177 | .ref_count = 1, - 178 | .padding = padding, - 179 | .size = size, - 180 | .lookahead_bytes = lookahead_bytes, - 181 | .error_cost = 0, - 182 | .child_count = 0, - 183 | .symbol = symbol, - 184 | .parse_state = parse_state, - 185 | .visible = metadata.visible, - 186 | .named = metadata.named, - 187 | .extra = extra, - 188 | .fragile_left = false, - 189 | .fragile_right = false, - 190 | .has_changes = false, - 191 | .has_external_tokens = has_external_tokens, - 192 | .has_external_scanner_state_change = false, - 193 | .depends_on_column = depends_on_column, - 194 | .is_missing = false, - 195 | .is_keyword = is_keyword, - 196 | {{.first_leaf = {.symbol = 0, .parse_state = 0}}} - 197 | }; - 198 | return (Subtree) {.ptr = data}; - 199 | } - 200 | } - | - 201 | void ts_subtree_set_symbol( - 202 | MutableSubtree *self, - 203 | TSSymbol symbol, - 204 | const TSLanguage *language - 205 | ) { - 206 | TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - 207 | if (self->data.is_inline) { - 208 | ts_assert(symbol < UINT8_MAX); - 209 | self->data.symbol = symbol; - 210 | self->data.named = metadata.named; - 211 | self->data.visible = metadata.visible; - 212 | } else { - 213 | self->ptr->symbol = symbol; - 214 | self->ptr->named = metadata.named; - 215 | self->ptr->visible = metadata.visible; - 216 | } - 217 | } - | - 218 | Subtree ts_subtree_new_error( - 219 | SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, - 220 | uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language - 221 | ) { - 222 | Subtree result = ts_subtree_new_leaf( - 223 | pool, ts_builtin_sym_error, padding, size, bytes_scanned, - 224 | parse_state, false, false, false, language - 225 | ); - 226 | SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; - 227 | data->fragile_left = true; - 228 | data->fragile_right = true; - 229 | data->lookahead_char = lookahead_char; - 230 | return result; - 231 | } - | - 232 | // Clone a subtree. - 233 | MutableSubtree ts_subtree_clone(Subtree self) { - 234 | size_t alloc_size = ts_subtree_alloc_size(self.ptr->child_count); - 235 | Subtree *new_children = ts_malloc(alloc_size); - 236 | Subtree *old_children = ts_subtree_children(self); - 237 | memcpy(new_children, old_children, alloc_size); - 238 | SubtreeHeapData *result = (SubtreeHeapData *)&new_children[self.ptr->child_count]; - 239 | if (self.ptr->child_count > 0) { - 240 | for (uint32_t i = 0; i < self.ptr->child_count; i++) { - 241 | ts_subtree_retain(new_children[i]); - 242 | } - 243 | } else if (self.ptr->has_external_tokens) { - 244 | result->external_scanner_state = ts_external_scanner_state_copy( - 245 | &self.ptr->external_scanner_state - 246 | ); - 247 | } - 248 | result->ref_count = 1; - 249 | return (MutableSubtree) {.ptr = result}; - 250 | } - | - 251 | // Get mutable version of a subtree. - 252 | // - 253 | // This takes ownership of the subtree. If the subtree has only one owner, - 254 | // this will directly convert it into a mutable version. Otherwise, it will - 255 | // perform a copy. - 256 | MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) { - 257 | if (self.data.is_inline) return (MutableSubtree) {self.data}; - 258 | if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self); - 259 | MutableSubtree result = ts_subtree_clone(self); - 260 | ts_subtree_release(pool, self); - 261 | return result; - 262 | } - | - 263 | void ts_subtree_compress( - 264 | MutableSubtree self, - 265 | unsigned count, - 266 | const TSLanguage *language, - 267 | MutableSubtreeArray *stack - 268 | ) { - 269 | unsigned initial_stack_size = stack->size; - | - 270 | MutableSubtree tree = self; - 271 | TSSymbol symbol = tree.ptr->symbol; - 272 | for (unsigned i = 0; i < count; i++) { - 273 | if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break; - | - 274 | MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - 275 | if ( - 276 | child.data.is_inline || - 277 | child.ptr->child_count < 2 || - 278 | child.ptr->ref_count > 1 || - 279 | child.ptr->symbol != symbol - 280 | ) break; - | - 281 | MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[0]); - 282 | if ( - 283 | grandchild.data.is_inline || - 284 | grandchild.ptr->child_count < 2 || - 285 | grandchild.ptr->ref_count > 1 || - 286 | grandchild.ptr->symbol != symbol - 287 | ) break; - | - 288 | ts_subtree_children(tree)[0] = ts_subtree_from_mut(grandchild); - 289 | ts_subtree_children(child)[0] = ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1]; - 290 | ts_subtree_children(grandchild)[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child); - 291 | array_push(stack, tree); - 292 | tree = grandchild; - 293 | } - | - 294 | while (stack->size > initial_stack_size) { - 295 | tree = array_pop(stack); - 296 | MutableSubtree child = ts_subtree_to_mut_unsafe(ts_subtree_children(tree)[0]); - 297 | MutableSubtree grandchild = ts_subtree_to_mut_unsafe(ts_subtree_children(child)[child.ptr->child_count - 1]); - 298 | ts_subtree_summarize_children(grandchild, language); - 299 | ts_subtree_summarize_children(child, language); - 300 | ts_subtree_summarize_children(tree, language); - 301 | } - 302 | } - | - 303 | // Assign all of the node's properties that depend on its children. - 304 | void ts_subtree_summarize_children( - 305 | MutableSubtree self, - 306 | const TSLanguage *language - 307 | ) { - 308 | ts_assert(!self.data.is_inline); - | - 309 | self.ptr->named_child_count = 0; - 310 | self.ptr->visible_child_count = 0; - 311 | self.ptr->error_cost = 0; - 312 | self.ptr->repeat_depth = 0; - 313 | self.ptr->visible_descendant_count = 0; - 314 | self.ptr->has_external_tokens = false; - 315 | self.ptr->depends_on_column = false; - 316 | self.ptr->has_external_scanner_state_change = false; - 317 | self.ptr->dynamic_precedence = 0; - | - 318 | uint32_t structural_index = 0; - 319 | const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - 320 | uint32_t lookahead_end_byte = 0; - | - 321 | const Subtree *children = ts_subtree_children(self); - 322 | for (uint32_t i = 0; i < self.ptr->child_count; i++) { - 323 | Subtree child = children[i]; - | - 324 | if ( - 325 | self.ptr->size.extent.row == 0 && - 326 | ts_subtree_depends_on_column(child) - 327 | ) { - 328 | self.ptr->depends_on_column = true; - 329 | } - | - 330 | if (ts_subtree_has_external_scanner_state_change(child)) { - 331 | self.ptr->has_external_scanner_state_change = true; - 332 | } - | - 333 | if (i == 0) { - 334 | self.ptr->padding = ts_subtree_padding(child); - 335 | self.ptr->size = ts_subtree_size(child); - 336 | } else { - 337 | self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child)); - 338 | } - | - 339 | uint32_t child_lookahead_end_byte = - 340 | self.ptr->padding.bytes + - 341 | self.ptr->size.bytes + - 342 | ts_subtree_lookahead_bytes(child); - 343 | if (child_lookahead_end_byte > lookahead_end_byte) { - 344 | lookahead_end_byte = child_lookahead_end_byte; - 345 | } - | - 346 | if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) { - 347 | self.ptr->error_cost += ts_subtree_error_cost(child); - 348 | } - | - 349 | uint32_t grandchild_count = ts_subtree_child_count(child); - 350 | if ( - 351 | self.ptr->symbol == ts_builtin_sym_error || - 352 | self.ptr->symbol == ts_builtin_sym_error_repeat - 353 | ) { - 354 | if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { - 355 | if (ts_subtree_visible(child)) { - 356 | self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; - 357 | } else if (grandchild_count > 0) { - 358 | self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; - 359 | } - 360 | } - 361 | } - | - 362 | self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); - 363 | self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); - | - 364 | if ( - 365 | !ts_subtree_extra(child) && - 366 | ts_subtree_symbol(child) != 0 && - 367 | alias_sequence && - 368 | alias_sequence[structural_index] != 0 - 369 | ) { - 370 | self.ptr->visible_descendant_count++; - 371 | self.ptr->visible_child_count++; - 372 | if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { - 373 | self.ptr->named_child_count++; - 374 | } - 375 | } else if (ts_subtree_visible(child)) { - 376 | self.ptr->visible_descendant_count++; - 377 | self.ptr->visible_child_count++; - 378 | if (ts_subtree_named(child)) self.ptr->named_child_count++; - 379 | } else if (grandchild_count > 0) { - 380 | self.ptr->visible_child_count += child.ptr->visible_child_count; - 381 | self.ptr->named_child_count += child.ptr->named_child_count; - 382 | } - | - 383 | if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true; - | - 384 | if (ts_subtree_is_error(child)) { - 385 | self.ptr->fragile_left = self.ptr->fragile_right = true; - 386 | self.ptr->parse_state = TS_TREE_STATE_NONE; - 387 | } - | - 388 | if (!ts_subtree_extra(child)) structural_index++; - 389 | } - | - 390 | self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; - | - 391 | if ( - 392 | self.ptr->symbol == ts_builtin_sym_error || - 393 | self.ptr->symbol == ts_builtin_sym_error_repeat - 394 | ) { - 395 | self.ptr->error_cost += - 396 | ERROR_COST_PER_RECOVERY + - 397 | ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + - 398 | ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row; - 399 | } - | - 400 | if (self.ptr->child_count > 0) { - 401 | Subtree first_child = children[0]; - 402 | Subtree last_child = children[self.ptr->child_count - 1]; - | - 403 | self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child); - 404 | self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child); - | - 405 | if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true; - 406 | if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true; - | - 407 | if ( - 408 | self.ptr->child_count >= 2 && - 409 | !self.ptr->visible && - 410 | !self.ptr->named && - 411 | ts_subtree_symbol(first_child) == self.ptr->symbol - 412 | ) { - 413 | if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) { - 414 | self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1; - 415 | } else { - 416 | self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1; - 417 | } - 418 | } - 419 | } - 420 | } - | - 421 | // Create a new parent node with the given children. - 422 | // - 423 | // This takes ownership of the children array. - 424 | MutableSubtree ts_subtree_new_node( - 425 | TSSymbol symbol, - 426 | SubtreeArray *children, - 427 | unsigned production_id, - 428 | const TSLanguage *language - 429 | ) { - 430 | TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); - 431 | bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; - | - 432 | // Allocate the node's data at the end of the array of children. - 433 | size_t new_byte_size = ts_subtree_alloc_size(children->size); - 434 | if (children->capacity * sizeof(Subtree) < new_byte_size) { - 435 | children->contents = ts_realloc(children->contents, new_byte_size); - 436 | children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); - 437 | } - 438 | SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; - | - 439 | *data = (SubtreeHeapData) { - 440 | .ref_count = 1, - 441 | .symbol = symbol, - 442 | .child_count = children->size, - 443 | .visible = metadata.visible, - 444 | .named = metadata.named, - 445 | .has_changes = false, - 446 | .has_external_scanner_state_change = false, - 447 | .fragile_left = fragile, - 448 | .fragile_right = fragile, - 449 | .is_keyword = false, - 450 | {{ - 451 | .visible_descendant_count = 0, - 452 | .production_id = production_id, - 453 | .first_leaf = {.symbol = 0, .parse_state = 0}, - 454 | }} - 455 | }; - 456 | MutableSubtree result = {.ptr = data}; - 457 | ts_subtree_summarize_children(result, language); - 458 | return result; - 459 | } - | - 460 | // Create a new error node containing the given children. - 461 | // - 462 | // This node is treated as 'extra'. Its children are prevented from having - 463 | // having any effect on the parse state. - 464 | Subtree ts_subtree_new_error_node( - 465 | SubtreeArray *children, - 466 | bool extra, - 467 | const TSLanguage *language - 468 | ) { - 469 | MutableSubtree result = ts_subtree_new_node( - 470 | ts_builtin_sym_error, children, 0, language - 471 | ); - 472 | result.ptr->extra = extra; - 473 | return ts_subtree_from_mut(result); - 474 | } - | - 475 | // Create a new 'missing leaf' node. - 476 | // - 477 | // This node is treated as 'extra'. Its children are prevented from having - 478 | // having any effect on the parse state. - 479 | Subtree ts_subtree_new_missing_leaf( - 480 | SubtreePool *pool, - 481 | TSSymbol symbol, - 482 | Length padding, - 483 | uint32_t lookahead_bytes, - 484 | const TSLanguage *language - 485 | ) { - 486 | Subtree result = ts_subtree_new_leaf( - 487 | pool, symbol, padding, length_zero(), lookahead_bytes, - 488 | 0, false, false, false, language - 489 | ); - 490 | if (result.data.is_inline) { - 491 | result.data.is_missing = true; - 492 | } else { - 493 | ((SubtreeHeapData *)result.ptr)->is_missing = true; - 494 | } - 495 | return result; - 496 | } - | - 497 | void ts_subtree_retain(Subtree self) { - 498 | if (self.data.is_inline) return; - 499 | ts_assert(self.ptr->ref_count > 0); - 500 | atomic_inc((volatile uint32_t *)&self.ptr->ref_count); - 501 | ts_assert(self.ptr->ref_count != 0); - 502 | } - | - 503 | void ts_subtree_release(SubtreePool *pool, Subtree self) { - 504 | if (self.data.is_inline) return; - 505 | array_clear(&pool->tree_stack); - | - 506 | ts_assert(self.ptr->ref_count > 0); - 507 | if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) { - 508 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self)); - 509 | } - | - 510 | while (pool->tree_stack.size > 0) { - 511 | MutableSubtree tree = array_pop(&pool->tree_stack); - 512 | if (tree.ptr->child_count > 0) { - 513 | Subtree *children = ts_subtree_children(tree); - 514 | for (uint32_t i = 0; i < tree.ptr->child_count; i++) { - 515 | Subtree child = children[i]; - 516 | if (child.data.is_inline) continue; - 517 | ts_assert(child.ptr->ref_count > 0); - 518 | if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) { - 519 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child)); - 520 | } - 521 | } - 522 | ts_free(children); - 523 | } else { - 524 | if (tree.ptr->has_external_tokens) { - 525 | ts_external_scanner_state_delete(&tree.ptr->external_scanner_state); - 526 | } - 527 | ts_subtree_pool_free(pool, tree.ptr); - 528 | } - 529 | } - 530 | } - | - 531 | int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { - 532 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); - 533 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); - | - 534 | while (pool->tree_stack.size > 0) { - 535 | right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - 536 | left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); - | - 537 | int result = 0; - 538 | if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1; - 539 | else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1; - 540 | else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1; - 541 | else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1; - 542 | if (result != 0) { - 543 | array_clear(&pool->tree_stack); - 544 | return result; - 545 | } - | - 546 | for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) { - 547 | Subtree left_child = ts_subtree_children(left)[i - 1]; - 548 | Subtree right_child = ts_subtree_children(right)[i - 1]; - 549 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); - 550 | array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); - 551 | } - 552 | } - | - 553 | return 0; - 554 | } - | - 555 | static inline void ts_subtree_set_has_changes(MutableSubtree *self) { - 556 | if (self->data.is_inline) { - 557 | self->data.has_changes = true; - 558 | } else { - 559 | self->ptr->has_changes = true; - 560 | } - 561 | } - | - 562 | Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) { - 563 | typedef struct { - 564 | Subtree *tree; - 565 | Edit edit; - 566 | } EditEntry; - | - 567 | Array(EditEntry) stack = array_new(); - 568 | array_push(&stack, ((EditEntry) { - 569 | .tree = &self, - 570 | .edit = (Edit) { - 571 | .start = {input_edit->start_byte, input_edit->start_point}, - 572 | .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, - 573 | .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, - 574 | }, - 575 | })); - | - 576 | while (stack.size) { - 577 | EditEntry entry = array_pop(&stack); - 578 | Edit edit = entry.edit; - 579 | bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; - 580 | bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; - 581 | bool parent_depends_on_column = ts_subtree_depends_on_column(*entry.tree); - 582 | bool column_shifted = edit.new_end.extent.column != edit.old_end.extent.column; - | - 583 | Length size = ts_subtree_size(*entry.tree); - 584 | Length padding = ts_subtree_padding(*entry.tree); - 585 | Length total_size = length_add(padding, size); - 586 | uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree); - 587 | uint32_t end_byte = total_size.bytes + lookahead_bytes; - 588 | if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue; - | - 589 | // If the edit is entirely within the space before this subtree, then shift this - 590 | // subtree over according to the edit without changing its size. - 591 | if (edit.old_end.bytes <= padding.bytes) { - 592 | padding = length_add(edit.new_end, length_sub(padding, edit.old_end)); - 593 | } - | - 594 | // If the edit starts in the space before this subtree and extends into this subtree, - 595 | // shrink the subtree's content to compensate for the change in the space before it. - 596 | else if (edit.start.bytes < padding.bytes) { - 597 | size = length_saturating_sub(size, length_sub(edit.old_end, padding)); - 598 | padding = edit.new_end; - 599 | } - | - 600 | // If the edit is within this subtree, resize the subtree to reflect the edit. - 601 | else if ( - 602 | edit.start.bytes < total_size.bytes || - 603 | (edit.start.bytes == total_size.bytes && is_pure_insertion) - 604 | ) { - 605 | size = length_add( - 606 | length_sub(edit.new_end, padding), - 607 | length_saturating_sub(total_size, edit.old_end) - 608 | ); - 609 | } - | - 610 | MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree); - | - 611 | if (result.data.is_inline) { - 612 | if (ts_subtree_can_inline(padding, size, lookahead_bytes)) { - 613 | result.data.padding_bytes = padding.bytes; - 614 | result.data.padding_rows = padding.extent.row; - 615 | result.data.padding_columns = padding.extent.column; - 616 | result.data.size_bytes = size.bytes; - 617 | } else { - 618 | SubtreeHeapData *data = ts_subtree_pool_allocate(pool); - 619 | data->ref_count = 1; - 620 | data->padding = padding; - 621 | data->size = size; - 622 | data->lookahead_bytes = lookahead_bytes; - 623 | data->error_cost = 0; - 624 | data->child_count = 0; - 625 | data->symbol = result.data.symbol; - 626 | data->parse_state = result.data.parse_state; - 627 | data->visible = result.data.visible; - 628 | data->named = result.data.named; - 629 | data->extra = result.data.extra; - 630 | data->fragile_left = false; - 631 | data->fragile_right = false; - 632 | data->has_changes = false; - 633 | data->has_external_tokens = false; - 634 | data->depends_on_column = false; - 635 | data->is_missing = result.data.is_missing; - 636 | data->is_keyword = result.data.is_keyword; - 637 | result.ptr = data; - 638 | } - 639 | } else { - 640 | result.ptr->padding = padding; - 641 | result.ptr->size = size; - 642 | } - | - 643 | ts_subtree_set_has_changes(&result); - 644 | *entry.tree = ts_subtree_from_mut(result); - | - 645 | Length child_left, child_right = length_zero(); - 646 | for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) { - 647 | Subtree *child = &ts_subtree_children(*entry.tree)[i]; - 648 | Length child_size = ts_subtree_total_size(*child); - 649 | child_left = child_right; - 650 | child_right = length_add(child_left, child_size); - | - 651 | // If this child ends before the edit, it is not affected. - 652 | if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - | - 653 | // Keep editing child nodes until a node is reached that starts after the edit. - 654 | // Also, if this node's validity depends on its column position, then continue - 655 | // invalidating child nodes until reaching a line break. - 656 | if (( - 657 | (child_left.bytes > edit.old_end.bytes) || - 658 | (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) - 659 | ) && ( - 660 | !parent_depends_on_column || - 661 | child_left.extent.row > padding.extent.row - 662 | ) && ( - 663 | !ts_subtree_depends_on_column(*child) || - 664 | !column_shifted || - 665 | child_left.extent.row > edit.old_end.extent.row - 666 | )) { - 667 | break; - 668 | } - | - 669 | // Transform edit into the child's coordinate space. - 670 | Edit child_edit = { - 671 | .start = length_saturating_sub(edit.start, child_left), - 672 | .old_end = length_saturating_sub(edit.old_end, child_left), - 673 | .new_end = length_saturating_sub(edit.new_end, child_left), - 674 | }; - | - 675 | // Interpret all inserted text as applying to the *first* child that touches the edit. - 676 | // Subsequent children are only never have any text inserted into them; they are only - 677 | // shrunk to compensate for the edit. - 678 | if ( - 679 | child_right.bytes > edit.start.bytes || - 680 | (child_right.bytes == edit.start.bytes && is_pure_insertion) - 681 | ) { - 682 | edit.new_end = edit.start; - 683 | } - | - 684 | // Children that occur before the edit are not reshaped by the edit. - 685 | else { - 686 | child_edit.old_end = child_edit.start; - 687 | child_edit.new_end = child_edit.start; - 688 | } - | - 689 | // Queue processing of this child's subtree. - 690 | array_push(&stack, ((EditEntry) { - 691 | .tree = child, - 692 | .edit = child_edit, - 693 | })); - 694 | } - 695 | } - | - 696 | array_delete(&stack); - 697 | return self; - 698 | } - | - 699 | Subtree ts_subtree_last_external_token(Subtree tree) { - 700 | if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE; - 701 | while (tree.ptr->child_count > 0) { - 702 | for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) { - 703 | Subtree child = ts_subtree_children(tree)[i]; - 704 | if (ts_subtree_has_external_tokens(child)) { - 705 | tree = child; - 706 | break; - 707 | } - 708 | } - 709 | } - 710 | return tree; - 711 | } - | - 712 | static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { - 713 | if (chr == -1) - 714 | return snprintf(str, n, "INVALID"); - 715 | else if (chr == '\0') - 716 | return snprintf(str, n, "'\\0'"); - 717 | else if (chr == '\n') - 718 | return snprintf(str, n, "'\\n'"); - 719 | else if (chr == '\t') - 720 | return snprintf(str, n, "'\\t'"); - 721 | else if (chr == '\r') - 722 | return snprintf(str, n, "'\\r'"); - 723 | else if (0 < chr && chr < 128 && isprint(chr)) - 724 | return snprintf(str, n, "'%c'", chr); - 725 | else - 726 | return snprintf(str, n, "%d", chr); - 727 | } - | - 728 | static const char *const ROOT_FIELD = "__ROOT__"; - | - 729 | static size_t ts_subtree__write_to_string( - 730 | Subtree self, char *string, size_t limit, - 731 | const TSLanguage *language, bool include_all, - 732 | TSSymbol alias_symbol, bool alias_is_named, const char *field_name - 733 | ) { - 734 | if (!self.ptr) return snprintf(string, limit, "(NULL)"); - | - 735 | char *cursor = string; - 736 | char **writer = (limit > 1) ? &cursor : &string; - 737 | bool is_root = field_name == ROOT_FIELD; - 738 | bool is_visible = - 739 | include_all || - 740 | ts_subtree_missing(self) || - 741 | ( - 742 | alias_symbol - 743 | ? alias_is_named - 744 | : ts_subtree_visible(self) && ts_subtree_named(self) - 745 | ); - | - 746 | if (is_visible) { - 747 | if (!is_root) { - 748 | cursor += snprintf(*writer, limit, " "); - 749 | if (field_name) { - 750 | cursor += snprintf(*writer, limit, "%s: ", field_name); - 751 | } - 752 | } - | - 753 | if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { - 754 | cursor += snprintf(*writer, limit, "(UNEXPECTED "); - 755 | cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); - 756 | } else { - 757 | TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - 758 | const char *symbol_name = ts_language_symbol_name(language, symbol); - 759 | if (ts_subtree_missing(self)) { - 760 | cursor += snprintf(*writer, limit, "(MISSING "); - 761 | if (alias_is_named || ts_subtree_named(self)) { - 762 | cursor += snprintf(*writer, limit, "%s", symbol_name); - 763 | } else { - 764 | cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); - 765 | } - 766 | } else { - 767 | cursor += snprintf(*writer, limit, "(%s", symbol_name); - 768 | } - 769 | } - 770 | } else if (is_root) { - 771 | TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); - 772 | const char *symbol_name = ts_language_symbol_name(language, symbol); - 773 | if (ts_subtree_child_count(self) > 0) { - 774 | cursor += snprintf(*writer, limit, "(%s", symbol_name); - 775 | } else if (ts_subtree_named(self)) { - 776 | cursor += snprintf(*writer, limit, "(%s)", symbol_name); - 777 | } else { - 778 | cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); - 779 | } - 780 | } - | - 781 | if (ts_subtree_child_count(self)) { - 782 | const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); - 783 | const TSFieldMapEntry *field_map, *field_map_end; - 784 | ts_language_field_map( - 785 | language, - 786 | self.ptr->production_id, - 787 | &field_map, - 788 | &field_map_end - 789 | ); - | - 790 | uint32_t structural_child_index = 0; - 791 | for (uint32_t i = 0; i < self.ptr->child_count; i++) { - 792 | Subtree child = ts_subtree_children(self)[i]; - 793 | if (ts_subtree_extra(child)) { - 794 | cursor += ts_subtree__write_to_string( - 795 | child, *writer, limit, - 796 | language, include_all, - 797 | 0, false, NULL - 798 | ); - 799 | } else { - 800 | TSSymbol subtree_alias_symbol = alias_sequence - 801 | ? alias_sequence[structural_child_index] - 802 | : 0; - 803 | bool subtree_alias_is_named = subtree_alias_symbol - 804 | ? ts_language_symbol_metadata(language, subtree_alias_symbol).named - 805 | : false; - | - 806 | const char *child_field_name = is_visible ? NULL : field_name; - 807 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - 808 | if (!map->inherited && map->child_index == structural_child_index) { - 809 | child_field_name = language->field_names[map->field_id]; - 810 | break; - 811 | } - 812 | } - | - 813 | cursor += ts_subtree__write_to_string( - 814 | child, *writer, limit, - 815 | language, include_all, - 816 | subtree_alias_symbol, subtree_alias_is_named, child_field_name - 817 | ); - 818 | structural_child_index++; - 819 | } - 820 | } - 821 | } - | - 822 | if (is_visible) cursor += snprintf(*writer, limit, ")"); - | - 823 | return cursor - string; - 824 | } - | - 825 | char *ts_subtree_string( - 826 | Subtree self, - 827 | TSSymbol alias_symbol, - 828 | bool alias_is_named, - 829 | const TSLanguage *language, - 830 | bool include_all - 831 | ) { - 832 | char scratch_string[1]; - 833 | size_t size = ts_subtree__write_to_string( - 834 | self, scratch_string, 1, - 835 | language, include_all, - 836 | alias_symbol, alias_is_named, ROOT_FIELD - 837 | ) + 1; - 838 | char *result = ts_malloc(size * sizeof(char)); - 839 | ts_subtree__write_to_string( - 840 | self, result, size, - 841 | language, include_all, - 842 | alias_symbol, alias_is_named, ROOT_FIELD - 843 | ); - 844 | return result; - 845 | } - | - 846 | void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, - 847 | const TSLanguage *language, TSSymbol alias_symbol, - 848 | FILE *f) { - 849 | TSSymbol subtree_symbol = ts_subtree_symbol(*self); - 850 | TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; - 851 | uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); - 852 | fprintf(f, "tree_%p [label=\"", (void *)self); - 853 | ts_language_write_symbol_as_dot_string(language, f, symbol); - 854 | fprintf(f, "\""); - | - 855 | if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); - 856 | if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray"); - 857 | if (ts_subtree_has_changes(*self)) fprintf(f, ", color=green, penwidth=2"); - | - 858 | fprintf(f, ", tooltip=\"" - 859 | "range: %u - %u\n" - 860 | "state: %d\n" - 861 | "error-cost: %u\n" - 862 | "has-changes: %u\n" - 863 | "depends-on-column: %u\n" - 864 | "descendant-count: %u\n" - 865 | "repeat-depth: %u\n" - 866 | "lookahead-bytes: %u", - 867 | start_offset, end_offset, - 868 | ts_subtree_parse_state(*self), - 869 | ts_subtree_error_cost(*self), - 870 | ts_subtree_has_changes(*self), - 871 | ts_subtree_depends_on_column(*self), - 872 | ts_subtree_visible_descendant_count(*self), - 873 | ts_subtree_repeat_depth(*self), - 874 | ts_subtree_lookahead_bytes(*self) - 875 | ); - | - 876 | if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { - 877 | fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); - 878 | } - | - 879 | fprintf(f, "\"]\n"); - | - 880 | uint32_t child_start_offset = start_offset; - 881 | uint32_t child_info_offset = - 882 | language->max_alias_sequence_length * - 883 | ts_subtree_production_id(*self); - 884 | for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { - 885 | const Subtree *child = &ts_subtree_children(*self)[i]; - 886 | TSSymbol subtree_alias_symbol = 0; - 887 | if (!ts_subtree_extra(*child) && child_info_offset) { - 888 | subtree_alias_symbol = language->alias_sequences[child_info_offset]; - 889 | child_info_offset++; - 890 | } - 891 | ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); - 892 | fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); - 893 | child_start_offset += ts_subtree_total_bytes(*child); - 894 | } - 895 | } - | - 896 | void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) { - 897 | fprintf(f, "digraph tree {\n"); - 898 | fprintf(f, "edge [arrowhead=none]\n"); - 899 | ts_subtree__print_dot_graph(&self, 0, language, 0, f); - 900 | fprintf(f, "}\n"); - 901 | } - | - 902 | const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { - 903 | static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0}; - 904 | if ( - 905 | self.ptr && - 906 | !self.data.is_inline && - 907 | self.ptr->has_external_tokens && - 908 | self.ptr->child_count == 0 - 909 | ) { - 910 | return &self.ptr->external_scanner_state; - 911 | } else { - 912 | return &empty_state; - 913 | } - 914 | } - | - 915 | bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { - 916 | const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); - 917 | const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); - 918 | return ts_external_scanner_state_eq( - 919 | state_self, - 920 | ts_external_scanner_state_data(state_other), - 921 | state_other->length - 922 | ); - 923 | } - - - --------------------------------------------------------------------------------- -/lib/src/subtree.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_SUBTREE_H_ - 2 | #define TREE_SITTER_SUBTREE_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include - 8 | #include - 9 | #include "./length.h" - 10 | #include "./array.h" - 11 | #include "./error_costs.h" - 12 | #include "./host.h" - 13 | #include "tree_sitter/api.h" - 14 | #include "./parser.h" - | - 15 | #define TS_TREE_STATE_NONE USHRT_MAX - 16 | #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) - | - 17 | // The serialized state of an external scanner. - 18 | // - 19 | // Every time an external token subtree is created after a call to an - 20 | // external scanner, the scanner's `serialize` function is called to - 21 | // retrieve a serialized copy of its state. The bytes are then copied - 22 | // onto the subtree itself so that the scanner's state can later be - 23 | // restored using its `deserialize` function. - 24 | // - 25 | // Small byte arrays are stored inline, and long ones are allocated - 26 | // separately on the heap. - 27 | typedef struct { - 28 | union { - 29 | char *long_data; - 30 | char short_data[24]; - 31 | }; - 32 | uint32_t length; - 33 | } ExternalScannerState; - | - 34 | // A compact representation of a subtree. - 35 | // - 36 | // This representation is used for small leaf nodes that are not - 37 | // errors, and were not created by an external scanner. - 38 | // - 39 | // The idea behind the layout of this struct is that the `is_inline` - 40 | // bit will fall exactly into the same location as the least significant - 41 | // bit of the pointer in `Subtree` or `MutableSubtree`, respectively. - 42 | // Because of alignment, for any valid pointer this will be 0, giving - 43 | // us the opportunity to make use of this bit to signify whether to use - 44 | // the pointer or the inline struct. - 45 | typedef struct SubtreeInlineData SubtreeInlineData; - | - 46 | #define SUBTREE_BITS \ - 47 | bool visible : 1; \ - 48 | bool named : 1; \ - 49 | bool extra : 1; \ - 50 | bool has_changes : 1; \ - 51 | bool is_missing : 1; \ - 52 | bool is_keyword : 1; - | - 53 | #define SUBTREE_SIZE \ - 54 | uint8_t padding_columns; \ - 55 | uint8_t padding_rows : 4; \ - 56 | uint8_t lookahead_bytes : 4; \ - 57 | uint8_t padding_bytes; \ - 58 | uint8_t size_bytes; - | - 59 | #if TS_BIG_ENDIAN - 60 | #if TS_PTR_SIZE == 32 - | - 61 | struct SubtreeInlineData { - 62 | uint16_t parse_state; - 63 | uint8_t symbol; - 64 | SUBTREE_BITS - 65 | bool unused : 1; - 66 | bool is_inline : 1; - 67 | SUBTREE_SIZE - 68 | }; - | - 69 | #else - | - 70 | struct SubtreeInlineData { - 71 | SUBTREE_SIZE - 72 | uint16_t parse_state; - 73 | uint8_t symbol; - 74 | SUBTREE_BITS - 75 | bool unused : 1; - 76 | bool is_inline : 1; - 77 | }; - | - 78 | #endif - 79 | #else - | - 80 | struct SubtreeInlineData { - 81 | bool is_inline : 1; - 82 | SUBTREE_BITS - 83 | uint8_t symbol; - 84 | uint16_t parse_state; - 85 | SUBTREE_SIZE - 86 | }; - | - 87 | #endif - | - 88 | #undef SUBTREE_BITS - 89 | #undef SUBTREE_SIZE - | - 90 | // A heap-allocated representation of a subtree. - 91 | // - 92 | // This representation is used for parent nodes, external tokens, - 93 | // errors, and other leaf nodes whose data is too large to fit into - 94 | // the inline representation. - 95 | typedef struct { - 96 | volatile uint32_t ref_count; - 97 | Length padding; - 98 | Length size; - 99 | uint32_t lookahead_bytes; - 100 | uint32_t error_cost; - 101 | uint32_t child_count; - 102 | TSSymbol symbol; - 103 | TSStateId parse_state; - | - 104 | bool visible : 1; - 105 | bool named : 1; - 106 | bool extra : 1; - 107 | bool fragile_left : 1; - 108 | bool fragile_right : 1; - 109 | bool has_changes : 1; - 110 | bool has_external_tokens : 1; - 111 | bool has_external_scanner_state_change : 1; - 112 | bool depends_on_column: 1; - 113 | bool is_missing : 1; - 114 | bool is_keyword : 1; - | - 115 | union { - 116 | // Non-terminal subtrees (`child_count > 0`) - 117 | struct { - 118 | uint32_t visible_child_count; - 119 | uint32_t named_child_count; - 120 | uint32_t visible_descendant_count; - 121 | int32_t dynamic_precedence; - 122 | uint16_t repeat_depth; - 123 | uint16_t production_id; - 124 | struct { - 125 | TSSymbol symbol; - 126 | TSStateId parse_state; - 127 | } first_leaf; - 128 | }; - | - 129 | // External terminal subtrees (`child_count == 0 && has_external_tokens`) - 130 | ExternalScannerState external_scanner_state; - | - 131 | // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) - 132 | int32_t lookahead_char; - 133 | }; - 134 | } SubtreeHeapData; - | - 135 | // The fundamental building block of a syntax tree. - 136 | typedef union { - 137 | SubtreeInlineData data; - 138 | const SubtreeHeapData *ptr; - 139 | } Subtree; - | - 140 | // Like Subtree, but mutable. - 141 | typedef union { - 142 | SubtreeInlineData data; - 143 | SubtreeHeapData *ptr; - 144 | } MutableSubtree; - | - 145 | typedef Array(Subtree) SubtreeArray; - 146 | typedef Array(MutableSubtree) MutableSubtreeArray; - | - 147 | typedef struct { - 148 | MutableSubtreeArray free_trees; - 149 | MutableSubtreeArray tree_stack; - 150 | } SubtreePool; - | - 151 | void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length); - 152 | const char *ts_external_scanner_state_data(const ExternalScannerState *self); - 153 | bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length); - 154 | void ts_external_scanner_state_delete(ExternalScannerState *self); - | - 155 | void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest); - 156 | void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self); - 157 | void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self); - 158 | void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination); - 159 | void ts_subtree_array_reverse(SubtreeArray *self); - | - 160 | SubtreePool ts_subtree_pool_new(uint32_t capacity); - 161 | void ts_subtree_pool_delete(SubtreePool *self); - | - 162 | Subtree ts_subtree_new_leaf( - 163 | SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - 164 | uint32_t lookahead_bytes, TSStateId parse_state, - 165 | bool has_external_tokens, bool depends_on_column, - 166 | bool is_keyword, const TSLanguage *language - 167 | ); - 168 | Subtree ts_subtree_new_error( - 169 | SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, - 170 | uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language - 171 | ); - 172 | MutableSubtree ts_subtree_new_node( - 173 | TSSymbol symbol, - 174 | SubtreeArray *chiildren, - 175 | unsigned production_id, - 176 | const TSLanguage *language - 177 | ); - 178 | Subtree ts_subtree_new_error_node( - 179 | SubtreeArray *children, - 180 | bool extra, - 181 | const TSLanguage * language - 182 | ); - 183 | Subtree ts_subtree_new_missing_leaf( - 184 | SubtreePool *pool, - 185 | TSSymbol symbol, - 186 | Length padding, - 187 | uint32_t lookahead_bytes, - 188 | const TSLanguage *language - 189 | ); - 190 | MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self); - 191 | void ts_subtree_retain(Subtree self); - 192 | void ts_subtree_release(SubtreePool *pool, Subtree self); - 193 | int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool); - 194 | void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language); - 195 | void ts_subtree_compress(MutableSubtree self, unsigned count, const TSLanguage *language, MutableSubtreeArray *stack); - 196 | void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language); - 197 | Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool); - 198 | char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named, const TSLanguage *language, bool include_all); - 199 | void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f); - 200 | Subtree ts_subtree_last_external_token(Subtree tree); - 201 | const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); - 202 | bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other); - | - 203 | #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) - | - 204 | static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } - 205 | static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } - 206 | static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } - 207 | static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } - 208 | static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } - 209 | static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } - 210 | static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } - 211 | static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } - 212 | static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } - | - 213 | #undef SUBTREE_GET - | - 214 | // Get the size needed to store a heap-allocated subtree with the given - 215 | // number of children. - 216 | static inline size_t ts_subtree_alloc_size(uint32_t child_count) { - 217 | return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); - 218 | } - | - 219 | // Get a subtree's children, which are allocated immediately before the - 220 | // tree's own heap data. - 221 | #define ts_subtree_children(self) \ - 222 | ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) - | - 223 | static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { - 224 | if (self->data.is_inline) { - 225 | self->data.extra = is_extra; - 226 | } else { - 227 | self->ptr->extra = is_extra; - 228 | } - 229 | } - | - 230 | static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { - 231 | if (self.data.is_inline) return self.data.symbol; - 232 | if (self.ptr->child_count == 0) return self.ptr->symbol; - 233 | return self.ptr->first_leaf.symbol; - 234 | } - | - 235 | static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { - 236 | if (self.data.is_inline) return self.data.parse_state; - 237 | if (self.ptr->child_count == 0) return self.ptr->parse_state; - 238 | return self.ptr->first_leaf.parse_state; - 239 | } - | - 240 | static inline Length ts_subtree_padding(Subtree self) { - 241 | if (self.data.is_inline) { - 242 | Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; - 243 | return result; - 244 | } else { - 245 | return self.ptr->padding; - 246 | } - 247 | } - | - 248 | static inline Length ts_subtree_size(Subtree self) { - 249 | if (self.data.is_inline) { - 250 | Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; - 251 | return result; - 252 | } else { - 253 | return self.ptr->size; - 254 | } - 255 | } - | - 256 | static inline Length ts_subtree_total_size(Subtree self) { - 257 | return length_add(ts_subtree_padding(self), ts_subtree_size(self)); - 258 | } - | - 259 | static inline uint32_t ts_subtree_total_bytes(Subtree self) { - 260 | return ts_subtree_total_size(self).bytes; - 261 | } - | - 262 | static inline uint32_t ts_subtree_child_count(Subtree self) { - 263 | return self.data.is_inline ? 0 : self.ptr->child_count; - 264 | } - | - 265 | static inline uint32_t ts_subtree_repeat_depth(Subtree self) { - 266 | return self.data.is_inline ? 0 : self.ptr->repeat_depth; - 267 | } - | - 268 | static inline uint32_t ts_subtree_is_repetition(Subtree self) { - 269 | return self.data.is_inline - 270 | ? 0 - 271 | : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; - 272 | } - | - 273 | static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { - 274 | return (self.data.is_inline || self.ptr->child_count == 0) - 275 | ? 0 - 276 | : self.ptr->visible_descendant_count; - 277 | } - | - 278 | static inline uint32_t ts_subtree_visible_child_count(Subtree self) { - 279 | if (ts_subtree_child_count(self) > 0) { - 280 | return self.ptr->visible_child_count; - 281 | } else { - 282 | return 0; - 283 | } - 284 | } - | - 285 | static inline uint32_t ts_subtree_error_cost(Subtree self) { - 286 | if (ts_subtree_missing(self)) { - 287 | return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; - 288 | } else { - 289 | return self.data.is_inline ? 0 : self.ptr->error_cost; - 290 | } - 291 | } - | - 292 | static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { - 293 | return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; - 294 | } - | - 295 | static inline uint16_t ts_subtree_production_id(Subtree self) { - 296 | if (ts_subtree_child_count(self) > 0) { - 297 | return self.ptr->production_id; - 298 | } else { - 299 | return 0; - 300 | } - 301 | } - | - 302 | static inline bool ts_subtree_fragile_left(Subtree self) { - 303 | return self.data.is_inline ? false : self.ptr->fragile_left; - 304 | } - | - 305 | static inline bool ts_subtree_fragile_right(Subtree self) { - 306 | return self.data.is_inline ? false : self.ptr->fragile_right; - 307 | } - | - 308 | static inline bool ts_subtree_has_external_tokens(Subtree self) { - 309 | return self.data.is_inline ? false : self.ptr->has_external_tokens; - 310 | } - | - 311 | static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { - 312 | return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; - 313 | } - | - 314 | static inline bool ts_subtree_depends_on_column(Subtree self) { - 315 | return self.data.is_inline ? false : self.ptr->depends_on_column; - 316 | } - | - 317 | static inline bool ts_subtree_is_fragile(Subtree self) { - 318 | return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); - 319 | } - | - 320 | static inline bool ts_subtree_is_error(Subtree self) { - 321 | return ts_subtree_symbol(self) == ts_builtin_sym_error; - 322 | } - | - 323 | static inline bool ts_subtree_is_eof(Subtree self) { - 324 | return ts_subtree_symbol(self) == ts_builtin_sym_end; - 325 | } - | - 326 | static inline Subtree ts_subtree_from_mut(MutableSubtree self) { - 327 | Subtree result; - 328 | result.data = self.data; - 329 | return result; - 330 | } - | - 331 | static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { - 332 | MutableSubtree result; - 333 | result.data = self.data; - 334 | return result; - 335 | } - | - 336 | #ifdef __cplusplus - 337 | } - 338 | #endif - | - 339 | #endif // TREE_SITTER_SUBTREE_H_ - - - --------------------------------------------------------------------------------- -/lib/src/tree_cursor.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/api.h" - 2 | #include "./tree_cursor.h" - 3 | #include "./language.h" - 4 | #include "./tree.h" - | - 5 | typedef struct { - 6 | Subtree parent; - 7 | const TSTree *tree; - 8 | Length position; - 9 | uint32_t child_index; - 10 | uint32_t structural_child_index; - 11 | uint32_t descendant_index; - 12 | const TSSymbol *alias_sequence; - 13 | } CursorChildIterator; - | - 14 | // CursorChildIterator - | - 15 | static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { - 16 | TreeCursorEntry *entry = array_get(&self->stack, index); - 17 | if (index == 0 || ts_subtree_visible(*entry->subtree)) { - 18 | return true; - 19 | } else if (!ts_subtree_extra(*entry->subtree)) { - 20 | TreeCursorEntry *parent_entry = array_get(&self->stack, index - 1); - 21 | return ts_language_alias_at( - 22 | self->tree->language, - 23 | parent_entry->subtree->ptr->production_id, - 24 | entry->structural_child_index - 25 | ); - 26 | } else { - 27 | return false; - 28 | } - 29 | } - | - 30 | static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { - 31 | TreeCursorEntry *last_entry = array_back(&self->stack); - 32 | if (ts_subtree_child_count(*last_entry->subtree) == 0) { - 33 | return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; - 34 | } - 35 | const TSSymbol *alias_sequence = ts_language_alias_sequence( - 36 | self->tree->language, - 37 | last_entry->subtree->ptr->production_id - 38 | ); - | - 39 | uint32_t descendant_index = last_entry->descendant_index; - 40 | if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { - 41 | descendant_index += 1; - 42 | } - | - 43 | return (CursorChildIterator) { - 44 | .tree = self->tree, - 45 | .parent = *last_entry->subtree, - 46 | .position = last_entry->position, - 47 | .child_index = 0, - 48 | .structural_child_index = 0, - 49 | .descendant_index = descendant_index, - 50 | .alias_sequence = alias_sequence, - 51 | }; - 52 | } - | - 53 | static inline bool ts_tree_cursor_child_iterator_next( - 54 | CursorChildIterator *self, - 55 | TreeCursorEntry *result, - 56 | bool *visible - 57 | ) { - 58 | if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; - 59 | const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - 60 | *result = (TreeCursorEntry) { - 61 | .subtree = child, - 62 | .position = self->position, - 63 | .child_index = self->child_index, - 64 | .structural_child_index = self->structural_child_index, - 65 | .descendant_index = self->descendant_index, - 66 | }; - 67 | *visible = ts_subtree_visible(*child); - 68 | bool extra = ts_subtree_extra(*child); - 69 | if (!extra) { - 70 | if (self->alias_sequence) { - 71 | *visible |= self->alias_sequence[self->structural_child_index]; - 72 | } - 73 | self->structural_child_index++; - 74 | } - | - 75 | self->descendant_index += ts_subtree_visible_descendant_count(*child); - 76 | if (*visible) { - 77 | self->descendant_index += 1; - 78 | } - | - 79 | self->position = length_add(self->position, ts_subtree_size(*child)); - 80 | self->child_index++; - | - 81 | if (self->child_index < self->parent.ptr->child_count) { - 82 | Subtree next_child = ts_subtree_children(self->parent)[self->child_index]; - 83 | self->position = length_add(self->position, ts_subtree_padding(next_child)); - 84 | } - | - 85 | return true; - 86 | } - | - 87 | // Return a position that, when `b` is added to it, yields `a`. This - 88 | // can only be computed if `b` has zero rows. Otherwise, this function - 89 | // returns `LENGTH_UNDEFINED`, and the caller needs to recompute - 90 | // the position some other way. - 91 | static inline Length length_backtrack(Length a, Length b) { - 92 | if (length_is_undefined(a) || b.extent.row != 0) { - 93 | return LENGTH_UNDEFINED; - 94 | } - | - 95 | Length result; - 96 | result.bytes = a.bytes - b.bytes; - 97 | result.extent.row = a.extent.row; - 98 | result.extent.column = a.extent.column - b.extent.column; - 99 | return result; - 100 | } - | - 101 | static inline bool ts_tree_cursor_child_iterator_previous( - 102 | CursorChildIterator *self, - 103 | TreeCursorEntry *result, - 104 | bool *visible - 105 | ) { - 106 | // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into - 107 | // account unsigned underflow - 108 | if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; - 109 | const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; - 110 | *result = (TreeCursorEntry) { - 111 | .subtree = child, - 112 | .position = self->position, - 113 | .child_index = self->child_index, - 114 | .structural_child_index = self->structural_child_index, - 115 | }; - 116 | *visible = ts_subtree_visible(*child); - 117 | bool extra = ts_subtree_extra(*child); - | - 118 | self->position = length_backtrack(self->position, ts_subtree_padding(*child)); - 119 | self->child_index--; - | - 120 | if (!extra && self->alias_sequence) { - 121 | *visible |= self->alias_sequence[self->structural_child_index]; - 122 | if (self->structural_child_index > 0) { - 123 | self->structural_child_index--; - 124 | } - 125 | } - | - 126 | // unsigned can underflow so compare it to child_count - 127 | if (self->child_index < self->parent.ptr->child_count) { - 128 | Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; - 129 | Length size = ts_subtree_size(previous_child); - 130 | self->position = length_backtrack(self->position, size); - 131 | } - | - 132 | return true; - 133 | } - | - 134 | // TSTreeCursor - lifecycle - | - 135 | TSTreeCursor ts_tree_cursor_new(TSNode node) { - 136 | TSTreeCursor self = {NULL, NULL, {0, 0, 0}}; - 137 | ts_tree_cursor_init((TreeCursor *)&self, node); - 138 | return self; - 139 | } - | - 140 | void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) { - 141 | ts_tree_cursor_init((TreeCursor *)_self, node); - 142 | } - | - 143 | void ts_tree_cursor_init(TreeCursor *self, TSNode node) { - 144 | self->tree = node.tree; - 145 | self->root_alias_symbol = node.context[3]; - 146 | array_clear(&self->stack); - 147 | array_push(&self->stack, ((TreeCursorEntry) { - 148 | .subtree = (const Subtree *)node.id, - 149 | .position = { - 150 | ts_node_start_byte(node), - 151 | ts_node_start_point(node) - 152 | }, - 153 | .child_index = 0, - 154 | .structural_child_index = 0, - 155 | .descendant_index = 0, - 156 | })); - 157 | } - | - 158 | void ts_tree_cursor_delete(TSTreeCursor *_self) { - 159 | TreeCursor *self = (TreeCursor *)_self; - 160 | array_delete(&self->stack); - 161 | } - | - 162 | // TSTreeCursor - walking the tree - | - 163 | TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) { - 164 | TreeCursor *self = (TreeCursor *)_self; - 165 | bool visible; - 166 | TreeCursorEntry entry; - 167 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - 168 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - 169 | if (visible) { - 170 | array_push(&self->stack, entry); - 171 | return TreeCursorStepVisible; - 172 | } - 173 | if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - 174 | array_push(&self->stack, entry); - 175 | return TreeCursorStepHidden; - 176 | } - 177 | } - 178 | return TreeCursorStepNone; - 179 | } - | - 180 | bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { - 181 | for (;;) { - 182 | switch (ts_tree_cursor_goto_first_child_internal(self)) { - 183 | case TreeCursorStepHidden: - 184 | continue; - 185 | case TreeCursorStepVisible: - 186 | return true; - 187 | default: - 188 | return false; - 189 | } - 190 | } - 191 | } - | - 192 | TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { - 193 | TreeCursor *self = (TreeCursor *)_self; - 194 | bool visible; - 195 | TreeCursorEntry entry; - 196 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - 197 | if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; - | - 198 | TreeCursorEntry last_entry = {0}; - 199 | TreeCursorStep last_step = TreeCursorStepNone; - 200 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - 201 | if (visible) { - 202 | last_entry = entry; - 203 | last_step = TreeCursorStepVisible; - 204 | } - 205 | else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { - 206 | last_entry = entry; - 207 | last_step = TreeCursorStepHidden; - 208 | } - 209 | } - 210 | if (last_entry.subtree) { - 211 | array_push(&self->stack, last_entry); - 212 | return last_step; - 213 | } - | - 214 | return TreeCursorStepNone; - 215 | } - | - 216 | bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { - 217 | for (;;) { - 218 | switch (ts_tree_cursor_goto_last_child_internal(self)) { - 219 | case TreeCursorStepHidden: - 220 | continue; - 221 | case TreeCursorStepVisible: - 222 | return true; - 223 | default: - 224 | return false; - 225 | } - 226 | } - 227 | } - | - 228 | static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( - 229 | TSTreeCursor *_self, - 230 | uint32_t goal_byte, - 231 | TSPoint goal_point - 232 | ) { - 233 | TreeCursor *self = (TreeCursor *)_self; - 234 | uint32_t initial_size = self->stack.size; - 235 | uint32_t visible_child_index = 0; - | - 236 | bool did_descend; - 237 | do { - 238 | did_descend = false; - | - 239 | bool visible; - 240 | TreeCursorEntry entry; - 241 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - 242 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - 243 | Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); - 244 | bool at_goal = entry_end.bytes > goal_byte && point_gt(entry_end.extent, goal_point); - 245 | uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); - 246 | if (at_goal) { - 247 | if (visible) { - 248 | array_push(&self->stack, entry); - 249 | return visible_child_index; - 250 | } - 251 | if (visible_child_count > 0) { - 252 | array_push(&self->stack, entry); - 253 | did_descend = true; - 254 | break; - 255 | } - 256 | } else if (visible) { - 257 | visible_child_index++; - 258 | } else { - 259 | visible_child_index += visible_child_count; - 260 | } - 261 | } - 262 | } while (did_descend); - | - 263 | self->stack.size = initial_size; - 264 | return -1; - 265 | } - | - 266 | int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) { - 267 | return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO); - 268 | } - | - 269 | int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) { - 270 | return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); - 271 | } - | - 272 | TreeCursorStep ts_tree_cursor_goto_sibling_internal( - 273 | TSTreeCursor *_self, - 274 | bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *) - 275 | ) { - 276 | TreeCursor *self = (TreeCursor *)_self; - 277 | uint32_t initial_size = self->stack.size; - | - 278 | while (self->stack.size > 1) { - 279 | TreeCursorEntry entry = array_pop(&self->stack); - 280 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - 281 | iterator.child_index = entry.child_index; - 282 | iterator.structural_child_index = entry.structural_child_index; - 283 | iterator.position = entry.position; - 284 | iterator.descendant_index = entry.descendant_index; - | - 285 | bool visible = false; - 286 | advance(&iterator, &entry, &visible); - 287 | if (visible && self->stack.size + 1 < initial_size) break; - | - 288 | while (advance(&iterator, &entry, &visible)) { - 289 | if (visible) { - 290 | array_push(&self->stack, entry); - 291 | return TreeCursorStepVisible; - 292 | } - | - 293 | if (ts_subtree_visible_child_count(*entry.subtree)) { - 294 | array_push(&self->stack, entry); - 295 | return TreeCursorStepHidden; - 296 | } - 297 | } - 298 | } - | - 299 | self->stack.size = initial_size; - 300 | return TreeCursorStepNone; - 301 | } - | - 302 | TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { - 303 | return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); - 304 | } - | - 305 | bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { - 306 | switch (ts_tree_cursor_goto_next_sibling_internal(self)) { - 307 | case TreeCursorStepHidden: - 308 | ts_tree_cursor_goto_first_child(self); - 309 | return true; - 310 | case TreeCursorStepVisible: - 311 | return true; - 312 | default: - 313 | return false; - 314 | } - 315 | } - | - 316 | TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { - 317 | // since subtracting across row loses column information, we may have to - 318 | // restore it - 319 | TreeCursor *self = (TreeCursor *)_self; - | - 320 | // for that, save current position before traversing - 321 | TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( - 322 | _self, ts_tree_cursor_child_iterator_previous); - 323 | if (step == TreeCursorStepNone) - 324 | return step; - | - 325 | // if length is already valid, there's no need to recompute it - 326 | if (!length_is_undefined(array_back(&self->stack)->position)) - 327 | return step; - | - 328 | // restore position from the parent node - 329 | const TreeCursorEntry *parent = array_get(&self->stack, self->stack.size - 2); - 330 | Length position = parent->position; - 331 | uint32_t child_index = array_back(&self->stack)->child_index; - 332 | const Subtree *children = ts_subtree_children((*(parent->subtree))); - | - 333 | if (child_index > 0) { - 334 | // skip first child padding since its position should match the position of the parent - 335 | position = length_add(position, ts_subtree_size(children[0])); - 336 | for (uint32_t i = 1; i < child_index; ++i) { - 337 | position = length_add(position, ts_subtree_total_size(children[i])); - 338 | } - 339 | position = length_add(position, ts_subtree_padding(children[child_index])); - 340 | } - | - 341 | array_back(&self->stack)->position = position; - | - 342 | return step; - 343 | } - | - 344 | bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { - 345 | switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { - 346 | case TreeCursorStepHidden: - 347 | ts_tree_cursor_goto_last_child(self); - 348 | return true; - 349 | case TreeCursorStepVisible: - 350 | return true; - 351 | default: - 352 | return false; - 353 | } - 354 | } - | - 355 | bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { - 356 | TreeCursor *self = (TreeCursor *)_self; - 357 | for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { - 358 | if (ts_tree_cursor_is_entry_visible(self, i)) { - 359 | self->stack.size = i + 1; - 360 | return true; - 361 | } - 362 | } - 363 | return false; - 364 | } - | - 365 | void ts_tree_cursor_goto_descendant( - 366 | TSTreeCursor *_self, - 367 | uint32_t goal_descendant_index - 368 | ) { - 369 | TreeCursor *self = (TreeCursor *)_self; - | - 370 | // Ascend to the lowest ancestor that contains the goal node. - 371 | for (;;) { - 372 | uint32_t i = self->stack.size - 1; - 373 | TreeCursorEntry *entry = array_get(&self->stack, i); - 374 | uint32_t next_descendant_index = - 375 | entry->descendant_index + - 376 | (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + - 377 | ts_subtree_visible_descendant_count(*entry->subtree); - 378 | if ( - 379 | (entry->descendant_index <= goal_descendant_index) && - 380 | (next_descendant_index > goal_descendant_index) - 381 | ) { - 382 | break; - 383 | } else if (self->stack.size <= 1) { - 384 | return; - 385 | } else { - 386 | self->stack.size--; - 387 | } - 388 | } - | - 389 | // Descend to the goal node. - 390 | bool did_descend = true; - 391 | do { - 392 | did_descend = false; - 393 | bool visible; - 394 | TreeCursorEntry entry; - 395 | CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); - 396 | if (iterator.descendant_index > goal_descendant_index) { - 397 | return; - 398 | } - | - 399 | while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { - 400 | if (iterator.descendant_index > goal_descendant_index) { - 401 | array_push(&self->stack, entry); - 402 | if (visible && entry.descendant_index == goal_descendant_index) { - 403 | return; - 404 | } else { - 405 | did_descend = true; - 406 | break; - 407 | } - 408 | } - 409 | } - 410 | } while (did_descend); - 411 | } - | - 412 | uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { - 413 | const TreeCursor *self = (const TreeCursor *)_self; - 414 | TreeCursorEntry *last_entry = array_back(&self->stack); - 415 | return last_entry->descendant_index; - 416 | } - | - 417 | TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { - 418 | const TreeCursor *self = (const TreeCursor *)_self; - 419 | TreeCursorEntry *last_entry = array_back(&self->stack); - 420 | bool is_extra = ts_subtree_extra(*last_entry->subtree); - 421 | TSSymbol alias_symbol = is_extra ? 0 : self->root_alias_symbol; - 422 | if (self->stack.size > 1 && !is_extra) { - 423 | TreeCursorEntry *parent_entry = array_get(&self->stack, self->stack.size - 2); - 424 | alias_symbol = ts_language_alias_at( - 425 | self->tree->language, - 426 | parent_entry->subtree->ptr->production_id, - 427 | last_entry->structural_child_index - 428 | ); - 429 | } - 430 | return ts_node_new( - 431 | self->tree, - 432 | last_entry->subtree, - 433 | last_entry->position, - 434 | alias_symbol - 435 | ); - 436 | } - | - 437 | // Private - Get various facts about the current node that are needed - 438 | // when executing tree queries. - 439 | void ts_tree_cursor_current_status( - 440 | const TSTreeCursor *_self, - 441 | TSFieldId *field_id, - 442 | bool *has_later_siblings, - 443 | bool *has_later_named_siblings, - 444 | bool *can_have_later_siblings_with_this_field, - 445 | TSSymbol *supertypes, - 446 | unsigned *supertype_count - 447 | ) { - 448 | const TreeCursor *self = (const TreeCursor *)_self; - 449 | unsigned max_supertypes = *supertype_count; - 450 | *field_id = 0; - 451 | *supertype_count = 0; - 452 | *has_later_siblings = false; - 453 | *has_later_named_siblings = false; - 454 | *can_have_later_siblings_with_this_field = false; - | - 455 | // Walk up the tree, visiting the current node and its invisible ancestors, - 456 | // because fields can refer to nodes through invisible *wrapper* nodes, - 457 | for (unsigned i = self->stack.size - 1; i > 0; i--) { - 458 | TreeCursorEntry *entry = array_get(&self->stack, i); - 459 | TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1); - | - 460 | const TSSymbol *alias_sequence = ts_language_alias_sequence( - 461 | self->tree->language, - 462 | parent_entry->subtree->ptr->production_id - 463 | ); - | - 464 | #define subtree_symbol(subtree, structural_child_index) \ - 465 | (( \ - 466 | !ts_subtree_extra(subtree) && \ - 467 | alias_sequence && \ - 468 | alias_sequence[structural_child_index] \ - 469 | ) ? \ - 470 | alias_sequence[structural_child_index] : \ - 471 | ts_subtree_symbol(subtree)) - | - 472 | // Stop walking up when a visible ancestor is found. - 473 | TSSymbol entry_symbol = subtree_symbol( - 474 | *entry->subtree, - 475 | entry->structural_child_index - 476 | ); - 477 | TSSymbolMetadata entry_metadata = ts_language_symbol_metadata( - 478 | self->tree->language, - 479 | entry_symbol - 480 | ); - 481 | if (i != self->stack.size - 1 && entry_metadata.visible) break; - | - 482 | // Record any supertypes - 483 | if (entry_metadata.supertype && *supertype_count < max_supertypes) { - 484 | supertypes[*supertype_count] = entry_symbol; - 485 | (*supertype_count)++; - 486 | } - | - 487 | // Determine if the current node has later siblings. - 488 | if (!*has_later_siblings) { - 489 | unsigned sibling_count = parent_entry->subtree->ptr->child_count; - 490 | unsigned structural_child_index = entry->structural_child_index; - 491 | if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; - 492 | for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { - 493 | Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; - 494 | TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata( - 495 | self->tree->language, - 496 | subtree_symbol(sibling, structural_child_index) - 497 | ); - 498 | if (sibling_metadata.visible) { - 499 | *has_later_siblings = true; - 500 | if (*has_later_named_siblings) break; - 501 | if (sibling_metadata.named) { - 502 | *has_later_named_siblings = true; - 503 | break; - 504 | } - 505 | } else if (ts_subtree_visible_child_count(sibling) > 0) { - 506 | *has_later_siblings = true; - 507 | if (*has_later_named_siblings) break; - 508 | if (sibling.ptr->named_child_count > 0) { - 509 | *has_later_named_siblings = true; - 510 | break; - 511 | } - 512 | } - 513 | if (!ts_subtree_extra(sibling)) structural_child_index++; - 514 | } - 515 | } - | - 516 | #undef subtree_symbol - | - 517 | if (!ts_subtree_extra(*entry->subtree)) { - 518 | const TSFieldMapEntry *field_map, *field_map_end; - 519 | ts_language_field_map( - 520 | self->tree->language, - 521 | parent_entry->subtree->ptr->production_id, - 522 | &field_map, &field_map_end - 523 | ); - | - 524 | // Look for a field name associated with the current node. - 525 | if (!*field_id) { - 526 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - 527 | if (!map->inherited && map->child_index == entry->structural_child_index) { - 528 | *field_id = map->field_id; - 529 | break; - 530 | } - 531 | } - 532 | } - | - 533 | // Determine if the current node can have later siblings with the same field name. - 534 | if (*field_id) { - 535 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - 536 | if ( - 537 | map->field_id == *field_id && - 538 | map->child_index > entry->structural_child_index - 539 | ) { - 540 | *can_have_later_siblings_with_this_field = true; - 541 | break; - 542 | } - 543 | } - 544 | } - 545 | } - 546 | } - 547 | } - | - 548 | uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { - 549 | const TreeCursor *self = (const TreeCursor *)_self; - 550 | uint32_t depth = 0; - 551 | for (unsigned i = 1; i < self->stack.size; i++) { - 552 | if (ts_tree_cursor_is_entry_visible(self, i)) { - 553 | depth++; - 554 | } - 555 | } - 556 | return depth; - 557 | } - | - 558 | TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { - 559 | const TreeCursor *self = (const TreeCursor *)_self; - 560 | for (int i = (int)self->stack.size - 2; i >= 0; i--) { - 561 | TreeCursorEntry *entry = array_get(&self->stack, i); - 562 | bool is_visible = true; - 563 | TSSymbol alias_symbol = 0; - 564 | if (i > 0) { - 565 | TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1); - 566 | alias_symbol = ts_language_alias_at( - 567 | self->tree->language, - 568 | parent_entry->subtree->ptr->production_id, - 569 | entry->structural_child_index - 570 | ); - 571 | is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree); - 572 | } - 573 | if (is_visible) { - 574 | return ts_node_new( - 575 | self->tree, - 576 | entry->subtree, - 577 | entry->position, - 578 | alias_symbol - 579 | ); - 580 | } - 581 | } - 582 | return ts_node_new(NULL, NULL, length_zero(), 0); - 583 | } - | - 584 | TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { - 585 | const TreeCursor *self = (const TreeCursor *)_self; - | - 586 | // Walk up the tree, visiting the current node and its invisible ancestors. - 587 | for (unsigned i = self->stack.size - 1; i > 0; i--) { - 588 | TreeCursorEntry *entry = array_get(&self->stack, i); - 589 | TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1); - | - 590 | // Stop walking up when another visible node is found. - 591 | if ( - 592 | i != self->stack.size - 1 && - 593 | ts_tree_cursor_is_entry_visible(self, i) - 594 | ) break; - | - 595 | if (ts_subtree_extra(*entry->subtree)) break; - | - 596 | const TSFieldMapEntry *field_map, *field_map_end; - 597 | ts_language_field_map( - 598 | self->tree->language, - 599 | parent_entry->subtree->ptr->production_id, - 600 | &field_map, &field_map_end - 601 | ); - 602 | for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { - 603 | if (!map->inherited && map->child_index == entry->structural_child_index) { - 604 | return map->field_id; - 605 | } - 606 | } - 607 | } - 608 | return 0; - 609 | } - | - 610 | const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { - 611 | TSFieldId id = ts_tree_cursor_current_field_id(_self); - 612 | if (id) { - 613 | const TreeCursor *self = (const TreeCursor *)_self; - 614 | return self->tree->language->field_names[id]; - 615 | } else { - 616 | return NULL; - 617 | } - 618 | } - | - 619 | TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { - 620 | const TreeCursor *cursor = (const TreeCursor *)_cursor; - 621 | TSTreeCursor res = {NULL, NULL, {0, 0}}; - 622 | TreeCursor *copy = (TreeCursor *)&res; - 623 | copy->tree = cursor->tree; - 624 | copy->root_alias_symbol = cursor->root_alias_symbol; - 625 | array_init(©->stack); - 626 | array_push_all(©->stack, &cursor->stack); - 627 | return res; - 628 | } - | - 629 | void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { - 630 | const TreeCursor *cursor = (const TreeCursor *)_src; - 631 | TreeCursor *copy = (TreeCursor *)_dst; - 632 | copy->tree = cursor->tree; - 633 | copy->root_alias_symbol = cursor->root_alias_symbol; - 634 | array_clear(©->stack); - 635 | array_push_all(©->stack, &cursor->stack); - 636 | } - - - --------------------------------------------------------------------------------- -/lib/src/tree_cursor.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_TREE_CURSOR_H_ - 2 | #define TREE_SITTER_TREE_CURSOR_H_ - | - 3 | #include "./subtree.h" - | - 4 | typedef struct { - 5 | const Subtree *subtree; - 6 | Length position; - 7 | uint32_t child_index; - 8 | uint32_t structural_child_index; - 9 | uint32_t descendant_index; - 10 | } TreeCursorEntry; - | - 11 | typedef struct { - 12 | const TSTree *tree; - 13 | Array(TreeCursorEntry) stack; - 14 | TSSymbol root_alias_symbol; - 15 | } TreeCursor; - | - 16 | typedef enum { - 17 | TreeCursorStepNone, - 18 | TreeCursorStepHidden, - 19 | TreeCursorStepVisible, - 20 | } TreeCursorStep; - | - 21 | void ts_tree_cursor_init(TreeCursor *self, TSNode node); - 22 | void ts_tree_cursor_current_status( - 23 | const TSTreeCursor *_self, - 24 | TSFieldId *field_id, - 25 | bool *has_later_siblings, - 26 | bool *has_later_named_siblings, - 27 | bool *can_have_later_siblings_with_this_field, - 28 | TSSymbol *supertypes, - 29 | unsigned *supertype_count - 30 | ); - | - 31 | TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self); - 32 | TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self); - | - 33 | static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { - 34 | const TreeCursor *self = (const TreeCursor *)_self; - 35 | TreeCursorEntry *last_entry = array_back(&self->stack); - 36 | return *last_entry->subtree; - 37 | } - | - 38 | TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self); - | - 39 | #endif // TREE_SITTER_TREE_CURSOR_H_ - - - --------------------------------------------------------------------------------- -/lib/src/tree.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/api.h" - 2 | #include "./array.h" - 3 | #include "./get_changed_ranges.h" - 4 | #include "./length.h" - 5 | #include "./subtree.h" - 6 | #include "./tree_cursor.h" - 7 | #include "./tree.h" - | - 8 | TSTree *ts_tree_new( - 9 | Subtree root, const TSLanguage *language, - 10 | const TSRange *included_ranges, unsigned included_range_count - 11 | ) { - 12 | TSTree *result = ts_malloc(sizeof(TSTree)); - 13 | result->root = root; - 14 | result->language = ts_language_copy(language); - 15 | result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); - 16 | memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); - 17 | result->included_range_count = included_range_count; - 18 | return result; - 19 | } - | - 20 | TSTree *ts_tree_copy(const TSTree *self) { - 21 | ts_subtree_retain(self->root); - 22 | return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); - 23 | } - | - 24 | void ts_tree_delete(TSTree *self) { - 25 | if (!self) return; - | - 26 | SubtreePool pool = ts_subtree_pool_new(0); - 27 | ts_subtree_release(&pool, self->root); - 28 | ts_subtree_pool_delete(&pool); - 29 | ts_language_delete(self->language); - 30 | ts_free(self->included_ranges); - 31 | ts_free(self); - 32 | } - | - 33 | TSNode ts_tree_root_node(const TSTree *self) { - 34 | return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); - 35 | } - | - 36 | TSNode ts_tree_root_node_with_offset( - 37 | const TSTree *self, - 38 | uint32_t offset_bytes, - 39 | TSPoint offset_extent - 40 | ) { - 41 | Length offset = {offset_bytes, offset_extent}; - 42 | return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); - 43 | } - | - 44 | const TSLanguage *ts_tree_language(const TSTree *self) { - 45 | return self->language; - 46 | } - | - 47 | void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { - 48 | for (unsigned i = 0; i < self->included_range_count; i++) { - 49 | ts_range_edit(&self->included_ranges[i], edit); - 50 | } - | - 51 | SubtreePool pool = ts_subtree_pool_new(0); - 52 | self->root = ts_subtree_edit(self->root, edit, &pool); - 53 | ts_subtree_pool_delete(&pool); - 54 | } - | - 55 | TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { - 56 | *length = self->included_range_count; - 57 | TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); - 58 | memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); - 59 | return ranges; - 60 | } - | - 61 | TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { - 62 | TreeCursor cursor1 = {NULL, array_new(), 0}; - 63 | TreeCursor cursor2 = {NULL, array_new(), 0}; - 64 | ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); - 65 | ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); - | - 66 | TSRangeArray included_range_differences = array_new(); - 67 | ts_range_array_get_changed_ranges( - 68 | old_tree->included_ranges, old_tree->included_range_count, - 69 | new_tree->included_ranges, new_tree->included_range_count, - 70 | &included_range_differences - 71 | ); - | - 72 | TSRange *result; - 73 | *length = ts_subtree_get_changed_ranges( - 74 | &old_tree->root, &new_tree->root, &cursor1, &cursor2, - 75 | old_tree->language, &included_range_differences, &result - 76 | ); - | - 77 | array_delete(&included_range_differences); - 78 | array_delete(&cursor1.stack); - 79 | array_delete(&cursor2.stack); - 80 | return result; - 81 | } - | - 82 | #ifdef _WIN32 - | - 83 | #include - 84 | #include - | - 85 | int _ts_dup(HANDLE handle) { - 86 | HANDLE dup_handle; - 87 | if (!DuplicateHandle( - 88 | GetCurrentProcess(), handle, - 89 | GetCurrentProcess(), &dup_handle, - 90 | 0, FALSE, DUPLICATE_SAME_ACCESS - 91 | )) return -1; - | - 92 | return _open_osfhandle((intptr_t)dup_handle, 0); - 93 | } - | - 94 | void ts_tree_print_dot_graph(const TSTree *self, int fd) { - 95 | FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); - 96 | ts_subtree_print_dot_graph(self->root, self->language, file); - 97 | fclose(file); - 98 | } - | - 99 | #elif !defined(__wasm__) // Wasm doesn't support dup - | - 100 | #include - | - 101 | int _ts_dup(int file_descriptor) { - 102 | return dup(file_descriptor); - 103 | } - | - 104 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { - 105 | FILE *file = fdopen(_ts_dup(file_descriptor), "a"); - 106 | ts_subtree_print_dot_graph(self->root, self->language, file); - 107 | fclose(file); - 108 | } - | - 109 | #else - | - 110 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { - 111 | (void)self; - 112 | (void)file_descriptor; - 113 | } - | - 114 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/tree.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_TREE_H_ - 2 | #define TREE_SITTER_TREE_H_ - | - 3 | #include "./subtree.h" - | - 4 | #ifdef __cplusplus - 5 | extern "C" { - 6 | #endif - | - 7 | typedef struct { - 8 | const Subtree *child; - 9 | const Subtree *parent; - 10 | Length position; - 11 | TSSymbol alias_symbol; - 12 | } ParentCacheEntry; - | - 13 | struct TSTree { - 14 | Subtree root; - 15 | const TSLanguage *language; - 16 | TSRange *included_ranges; - 17 | unsigned included_range_count; - 18 | }; - | - 19 | TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count); - 20 | TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, TSSymbol alias); - | - 21 | #ifdef __cplusplus - 22 | } - 23 | #endif - | - 24 | #endif // TREE_SITTER_TREE_H_ - - - --------------------------------------------------------------------------------- -/lib/src/ts_assert.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_ASSERT_H_ - 2 | #define TREE_SITTER_ASSERT_H_ - | - 3 | #ifdef NDEBUG - 4 | #define ts_assert(e) ((void)(e)) - 5 | #else - 6 | #include - 7 | #define ts_assert(e) assert(e) - 8 | #endif - | - 9 | #endif // TREE_SITTER_ASSERT_H_ - - - --------------------------------------------------------------------------------- -/lib/src/unicode.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_UNICODE_H_ - 2 | #define TREE_SITTER_UNICODE_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include - 7 | #include - | - 8 | #define U_EXPORT - 9 | #define U_EXPORT2 - 10 | #include "unicode/utf8.h" - 11 | #include "unicode/utf16.h" - 12 | #include "portable/endian.h" - | - 13 | #define U16_NEXT_LE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ - 14 | (c)=le16toh((s)[(i)++]); \ - 15 | if(U16_IS_LEAD(c)) { \ - 16 | uint16_t __c2; \ - 17 | if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ - 18 | ++(i); \ - 19 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - 20 | } \ - 21 | } \ - 22 | } UPRV_BLOCK_MACRO_END - | - 23 | #define U16_NEXT_BE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ - 24 | (c)=be16toh((s)[(i)++]); \ - 25 | if(U16_IS_LEAD(c)) { \ - 26 | uint16_t __c2; \ - 27 | if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ - 28 | ++(i); \ - 29 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - 30 | } \ - 31 | } \ - 32 | } UPRV_BLOCK_MACRO_END - | - 33 | static const int32_t TS_DECODE_ERROR = U_SENTINEL; - | - 34 | static inline uint32_t ts_decode_utf8( - 35 | const uint8_t *string, - 36 | uint32_t length, - 37 | int32_t *code_point - 38 | ) { - 39 | uint32_t i = 0; - 40 | U8_NEXT(string, i, length, *code_point); - 41 | return i; - 42 | } - | - 43 | static inline uint32_t ts_decode_utf16_le( - 44 | const uint8_t *string, - 45 | uint32_t length, - 46 | int32_t *code_point - 47 | ) { - 48 | uint32_t i = 0; - 49 | U16_NEXT_LE(((uint16_t *)string), i, length, *code_point); - 50 | return i * 2; - 51 | } - | - 52 | static inline uint32_t ts_decode_utf16_be( - 53 | const uint8_t *string, - 54 | uint32_t length, - 55 | int32_t *code_point - 56 | ) { - 57 | uint32_t i = 0; - 58 | U16_NEXT_BE(((uint16_t *)string), i, length, *code_point); - 59 | return i * 2; - 60 | } - | - 61 | #ifdef __cplusplus - 62 | } - 63 | #endif - | - 64 | #endif // TREE_SITTER_UNICODE_H_ - - - --------------------------------------------------------------------------------- -/lib/src/unicode/ICU_SHA: --------------------------------------------------------------------------------- - 1 | 552b01f61127d30d6589aa4bf99468224979b661 - - - --------------------------------------------------------------------------------- -/lib/src/unicode/ptypes.h: --------------------------------------------------------------------------------- - 1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used. - - - --------------------------------------------------------------------------------- -/lib/src/unicode/README.md: --------------------------------------------------------------------------------- - 1 | # ICU Parts - | - 2 | This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu). - | - 3 | ### License - | - 4 | The license for these files is contained in the `LICENSE` file within this directory. - | - 5 | ### Contents - | - 6 | * Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory: - 7 | * `utf8.h` - 8 | * `utf16.h` - 9 | * `umachine.h` - 10 | * Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed: - 11 | * `ptypes.h` - 12 | * `urename.h` - 13 | * `utf.h` - 14 | * `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained. - 15 | * `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository. - 16 | * `README.md` - This text file. - | - 17 | ### Updating ICU - | - 18 | To incorporate changes from the upstream `icu` repository: - | - 19 | * Update `ICU_SHA` with the new Git SHA. - 20 | * Update `LICENSE` with the license text from the directory mentioned above. - 21 | * Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository. - - - --------------------------------------------------------------------------------- -/lib/src/unicode/umachine.h: --------------------------------------------------------------------------------- - 1 | // © 2016 and later: Unicode, Inc. and others. - 2 | // License & terms of use: http://www.unicode.org/copyright.html - 3 | /* - 4 | ****************************************************************************** - 5 | * - 6 | * Copyright (C) 1999-2015, International Business Machines - 7 | * Corporation and others. All Rights Reserved. - 8 | * - 9 | ****************************************************************************** - 10 | * file name: umachine.h - 11 | * encoding: UTF-8 - 12 | * tab size: 8 (not used) - 13 | * indentation:4 - 14 | * - 15 | * created on: 1999sep13 - 16 | * created by: Markus W. Scherer - 17 | * - 18 | * This file defines basic types and constants for ICU to be - 19 | * platform-independent. umachine.h and utf.h are included into - 20 | * utypes.h to provide all the general definitions for ICU. - 21 | * All of these definitions used to be in utypes.h before - 22 | * the UTF-handling macros made this unmaintainable. - 23 | */ - | - 24 | #ifndef __UMACHINE_H__ - 25 | #define __UMACHINE_H__ - | - | - 26 | /** - 27 | * \file - 28 | * \brief Basic types and constants for UTF - 29 | * - 30 | *

Basic types and constants for UTF

- 31 | * This file defines basic types and constants for utf.h to be - 32 | * platform-independent. umachine.h and utf.h are included into - 33 | * utypes.h to provide all the general definitions for ICU. - 34 | * All of these definitions used to be in utypes.h before - 35 | * the UTF-handling macros made this unmaintainable. - 36 | * - 37 | */ - 38 | /*==========================================================================*/ - 39 | /* Include platform-dependent definitions */ - 40 | /* which are contained in the platform-specific file platform.h */ - 41 | /*==========================================================================*/ - | - 42 | #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ - | - 43 | /* - 44 | * ANSI C headers: - 45 | * stddef.h defines wchar_t - 46 | */ - 47 | #include - | - 48 | /*==========================================================================*/ - 49 | /* For C wrappers, we use the symbol U_STABLE. */ - 50 | /* This works properly if the includer is C or C++. */ - 51 | /* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ - 52 | /*==========================================================================*/ - | - 53 | /** - 54 | * \def U_CFUNC - 55 | * This is used in a declaration of a library private ICU C function. - 56 | * @stable ICU 2.4 - 57 | */ - | - 58 | /** - 59 | * \def U_CDECL_BEGIN - 60 | * This is used to begin a declaration of a library private ICU C API. - 61 | * @stable ICU 2.4 - 62 | */ - | - 63 | /** - 64 | * \def U_CDECL_END - 65 | * This is used to end a declaration of a library private ICU C API - 66 | * @stable ICU 2.4 - 67 | */ - | - 68 | #ifdef __cplusplus - 69 | # define U_CFUNC extern "C" - 70 | # define U_CDECL_BEGIN extern "C" { - 71 | # define U_CDECL_END } - 72 | #else - 73 | # define U_CFUNC extern - 74 | # define U_CDECL_BEGIN - 75 | # define U_CDECL_END - 76 | #endif - | - 77 | #ifndef U_ATTRIBUTE_DEPRECATED - 78 | /** - 79 | * \def U_ATTRIBUTE_DEPRECATED - 80 | * This is used for GCC specific attributes - 81 | * @internal - 82 | */ - 83 | #if U_GCC_MAJOR_MINOR >= 302 - 84 | # define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) - 85 | /** - 86 | * \def U_ATTRIBUTE_DEPRECATED - 87 | * This is used for Visual C++ specific attributes - 88 | * @internal - 89 | */ - 90 | #elif defined(_MSC_VER) && (_MSC_VER >= 1400) - 91 | # define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) - 92 | #else - 93 | # define U_ATTRIBUTE_DEPRECATED - 94 | #endif - 95 | #endif - | - 96 | /** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ - 97 | #define U_CAPI U_CFUNC U_EXPORT - 98 | /** This is used to declare a function as a stable public ICU C API*/ - 99 | #define U_STABLE U_CAPI - 100 | /** This is used to declare a function as a draft public ICU C API */ - 101 | #define U_DRAFT U_CAPI - 102 | /** This is used to declare a function as a deprecated public ICU C API */ - 103 | #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED - 104 | /** This is used to declare a function as an obsolete public ICU C API */ - 105 | #define U_OBSOLETE U_CAPI - 106 | /** This is used to declare a function as an internal ICU C API */ - 107 | #define U_INTERNAL U_CAPI - | - 108 | /** - 109 | * \def U_OVERRIDE - 110 | * Defined to the C++11 "override" keyword if available. - 111 | * Denotes a class or member which is an override of the base class. - 112 | * May result in an error if it applied to something not an override. - 113 | * @internal - 114 | */ - 115 | #ifndef U_OVERRIDE - 116 | #define U_OVERRIDE override - 117 | #endif - | - 118 | /** - 119 | * \def U_FINAL - 120 | * Defined to the C++11 "final" keyword if available. - 121 | * Denotes a class or member which may not be overridden in subclasses. - 122 | * May result in an error if subclasses attempt to override. - 123 | * @internal - 124 | */ - 125 | #if !defined(U_FINAL) || defined(U_IN_DOXYGEN) - 126 | #define U_FINAL final - 127 | #endif - | - 128 | // Before ICU 65, function-like, multi-statement ICU macros were just defined as - 129 | // series of statements wrapped in { } blocks and the caller could choose to - 130 | // either treat them as if they were actual functions and end the invocation - 131 | // with a trailing ; creating an empty statement after the block or else omit - 132 | // this trailing ; using the knowledge that the macro would expand to { }. - 133 | // - 134 | // But doing so doesn't work well with macros that look like functions and - 135 | // compiler warnings about empty statements (ICU-20601) and ICU 65 therefore - 136 | // switches to the standard solution of wrapping such macros in do { } while. - 137 | // - 138 | // This will however break existing code that depends on being able to invoke - 139 | // these macros without a trailing ; so to be able to remain compatible with - 140 | // such code the wrapper is itself defined as macros so that it's possible to - 141 | // build ICU 65 and later with the old macro behaviour, like this: - 142 | // - 143 | // CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""' - 144 | // runConfigureICU ... - | - 145 | /** - 146 | * \def UPRV_BLOCK_MACRO_BEGIN - 147 | * Defined as the "do" keyword by default. - 148 | * @internal - 149 | */ - 150 | #ifndef UPRV_BLOCK_MACRO_BEGIN - 151 | #define UPRV_BLOCK_MACRO_BEGIN do - 152 | #endif - | - 153 | /** - 154 | * \def UPRV_BLOCK_MACRO_END - 155 | * Defined as "while (FALSE)" by default. - 156 | * @internal - 157 | */ - 158 | #ifndef UPRV_BLOCK_MACRO_END - 159 | #define UPRV_BLOCK_MACRO_END while (FALSE) - 160 | #endif - | - 161 | /*==========================================================================*/ - 162 | /* limits for int32_t etc., like in POSIX inttypes.h */ - 163 | /*==========================================================================*/ - | - 164 | #ifndef INT8_MIN - 165 | /** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ - 166 | # define INT8_MIN ((int8_t)(-128)) - 167 | #endif - 168 | #ifndef INT16_MIN - 169 | /** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ - 170 | # define INT16_MIN ((int16_t)(-32767-1)) - 171 | #endif - 172 | #ifndef INT32_MIN - 173 | /** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ - 174 | # define INT32_MIN ((int32_t)(-2147483647-1)) - 175 | #endif - | - 176 | #ifndef INT8_MAX - 177 | /** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ - 178 | # define INT8_MAX ((int8_t)(127)) - 179 | #endif - 180 | #ifndef INT16_MAX - 181 | /** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ - 182 | # define INT16_MAX ((int16_t)(32767)) - 183 | #endif - 184 | #ifndef INT32_MAX - 185 | /** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ - 186 | # define INT32_MAX ((int32_t)(2147483647)) - 187 | #endif - | - 188 | #ifndef UINT8_MAX - 189 | /** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ - 190 | # define UINT8_MAX ((uint8_t)(255U)) - 191 | #endif - 192 | #ifndef UINT16_MAX - 193 | /** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ - 194 | # define UINT16_MAX ((uint16_t)(65535U)) - 195 | #endif - 196 | #ifndef UINT32_MAX - 197 | /** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ - 198 | # define UINT32_MAX ((uint32_t)(4294967295U)) - 199 | #endif - | - 200 | #if defined(U_INT64_T_UNAVAILABLE) - 201 | # error int64_t is required for decimal format and rule-based number format. - 202 | #else - 203 | # ifndef INT64_C - 204 | /** - 205 | * Provides a platform independent way to specify a signed 64-bit integer constant. - 206 | * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C - 207 | * @stable ICU 2.8 - 208 | */ - 209 | # define INT64_C(c) c ## LL - 210 | # endif - 211 | # ifndef UINT64_C - 212 | /** - 213 | * Provides a platform independent way to specify an unsigned 64-bit integer constant. - 214 | * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C - 215 | * @stable ICU 2.8 - 216 | */ - 217 | # define UINT64_C(c) c ## ULL - 218 | # endif - 219 | # ifndef U_INT64_MIN - 220 | /** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ - 221 | # define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) - 222 | # endif - 223 | # ifndef U_INT64_MAX - 224 | /** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ - 225 | # define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) - 226 | # endif - 227 | # ifndef U_UINT64_MAX - 228 | /** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ - 229 | # define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) - 230 | # endif - 231 | #endif - | - 232 | /*==========================================================================*/ - 233 | /* Boolean data type */ - 234 | /*==========================================================================*/ - | - 235 | /** The ICU boolean type @stable ICU 2.0 */ - 236 | typedef int8_t UBool; - | - 237 | #ifndef TRUE - 238 | /** The TRUE value of a UBool @stable ICU 2.0 */ - 239 | # define TRUE 1 - 240 | #endif - 241 | #ifndef FALSE - 242 | /** The FALSE value of a UBool @stable ICU 2.0 */ - 243 | # define FALSE 0 - 244 | #endif - | - | - 245 | /*==========================================================================*/ - 246 | /* Unicode data types */ - 247 | /*==========================================================================*/ - | - 248 | /* wchar_t-related definitions -------------------------------------------- */ - | - 249 | /* - 250 | * \def U_WCHAR_IS_UTF16 - 251 | * Defined if wchar_t uses UTF-16. - 252 | * - 253 | * @stable ICU 2.0 - 254 | */ - 255 | /* - 256 | * \def U_WCHAR_IS_UTF32 - 257 | * Defined if wchar_t uses UTF-32. - 258 | * - 259 | * @stable ICU 2.0 - 260 | */ - 261 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) - 262 | # ifdef __STDC_ISO_10646__ - 263 | # if (U_SIZEOF_WCHAR_T==2) - 264 | # define U_WCHAR_IS_UTF16 - 265 | # elif (U_SIZEOF_WCHAR_T==4) - 266 | # define U_WCHAR_IS_UTF32 - 267 | # endif - 268 | # elif defined __UCS2__ - 269 | # if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) - 270 | # define U_WCHAR_IS_UTF16 - 271 | # endif - 272 | # elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) - 273 | # if (U_SIZEOF_WCHAR_T==4) - 274 | # define U_WCHAR_IS_UTF32 - 275 | # endif - 276 | # elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) - 277 | # define U_WCHAR_IS_UTF32 - 278 | # elif U_PLATFORM_HAS_WIN32_API - 279 | # define U_WCHAR_IS_UTF16 - 280 | # endif - 281 | #endif - | - 282 | /* UChar and UChar32 definitions -------------------------------------------- */ - | - 283 | /** Number of bytes in a UChar. @stable ICU 2.0 */ - 284 | #define U_SIZEOF_UCHAR 2 - | - 285 | /** - 286 | * \def U_CHAR16_IS_TYPEDEF - 287 | * If 1, then char16_t is a typedef and not a real type (yet) - 288 | * @internal - 289 | */ - 290 | #if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) - 291 | // for AIX, uchar.h needs to be included - 292 | # include - 293 | # define U_CHAR16_IS_TYPEDEF 1 - 294 | #elif defined(_MSC_VER) && (_MSC_VER < 1900) - 295 | // Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, - 296 | // and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx - 297 | # define U_CHAR16_IS_TYPEDEF 1 - 298 | #else - 299 | # define U_CHAR16_IS_TYPEDEF 0 - 300 | #endif - | - | - 301 | /** - 302 | * \var UChar - 303 | * - 304 | * The base type for UTF-16 code units and pointers. - 305 | * Unsigned 16-bit integer. - 306 | * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. - 307 | * - 308 | * UChar is configurable by defining the macro UCHAR_TYPE - 309 | * on the preprocessor or compiler command line: - 310 | * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. - 311 | * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.) - 312 | * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. - 313 | * - 314 | * The default is UChar=char16_t. - 315 | * - 316 | * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. - 317 | * - 318 | * In C, char16_t is a simple typedef of uint_least16_t. - 319 | * ICU requires uint_least16_t=uint16_t for data memory mapping. - 320 | * On macOS, char16_t is not available because the uchar.h standard header is missing. - 321 | * - 322 | * @stable ICU 4.4 - 323 | */ - | - 324 | #if 1 - 325 | // #if 1 is normal. UChar defaults to char16_t in C++. - 326 | // For configuration testing of UChar=uint16_t temporarily change this to #if 0. - 327 | // The intltest Makefile #defines UCHAR_TYPE=char16_t, - 328 | // so we only #define it to uint16_t if it is undefined so far. - 329 | #elif !defined(UCHAR_TYPE) - 330 | # define UCHAR_TYPE uint16_t - 331 | #endif - | - 332 | #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ - 333 | defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) - 334 | // Inside the ICU library code, never configurable. - 335 | typedef char16_t UChar; - 336 | #elif defined(UCHAR_TYPE) - 337 | typedef UCHAR_TYPE UChar; - 338 | #elif defined(__cplusplus) - 339 | typedef char16_t UChar; - 340 | #else - 341 | typedef uint16_t UChar; - 342 | #endif - | - 343 | /** - 344 | * \var OldUChar - 345 | * Default ICU 58 definition of UChar. - 346 | * A base type for UTF-16 code units and pointers. - 347 | * Unsigned 16-bit integer. - 348 | * - 349 | * Define OldUChar to be wchar_t if that is 16 bits wide. - 350 | * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. - 351 | * - 352 | * This makes the definition of OldUChar platform-dependent - 353 | * but allows direct string type compatibility with platforms with - 354 | * 16-bit wchar_t types. - 355 | * - 356 | * This is how UChar was defined in ICU 58, for transition convenience. - 357 | * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. - 358 | * The current UChar responds to UCHAR_TYPE but OldUChar does not. - 359 | * - 360 | * @stable ICU 59 - 361 | */ - 362 | #if U_SIZEOF_WCHAR_T==2 - 363 | typedef wchar_t OldUChar; - 364 | #elif defined(__CHAR16_TYPE__) - 365 | typedef __CHAR16_TYPE__ OldUChar; - 366 | #else - 367 | typedef uint16_t OldUChar; - 368 | #endif - | - 369 | /** - 370 | * Define UChar32 as a type for single Unicode code points. - 371 | * UChar32 is a signed 32-bit integer (same as int32_t). - 372 | * - 373 | * The Unicode code point range is 0..0x10ffff. - 374 | * All other values (negative or >=0x110000) are illegal as Unicode code points. - 375 | * They may be used as sentinel values to indicate "done", "error" - 376 | * or similar non-code point conditions. - 377 | * - 378 | * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined - 379 | * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) - 380 | * or else to be uint32_t. - 381 | * That is, the definition of UChar32 was platform-dependent. - 382 | * - 383 | * @see U_SENTINEL - 384 | * @stable ICU 2.4 - 385 | */ - 386 | typedef int32_t UChar32; - | - 387 | /** - 388 | * This value is intended for sentinel values for APIs that - 389 | * (take or) return single code points (UChar32). - 390 | * It is outside of the Unicode code point range 0..0x10ffff. - 391 | * - 392 | * For example, a "done" or "error" value in a new API - 393 | * could be indicated with U_SENTINEL. - 394 | * - 395 | * ICU APIs designed before ICU 2.4 usually define service-specific "done" - 396 | * values, mostly 0xffff. - 397 | * Those may need to be distinguished from - 398 | * actual U+ffff text contents by calling functions like - 399 | * CharacterIterator::hasNext() or UnicodeString::length(). - 400 | * - 401 | * @return -1 - 402 | * @see UChar32 - 403 | * @stable ICU 2.4 - 404 | */ - 405 | #define U_SENTINEL (-1) - | - 406 | #include "unicode/urename.h" - | - 407 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/unicode/urename.h: --------------------------------------------------------------------------------- - 1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used. - - - --------------------------------------------------------------------------------- -/lib/src/unicode/utf.h: --------------------------------------------------------------------------------- - 1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used. - - - --------------------------------------------------------------------------------- -/lib/src/unicode/utf16.h: --------------------------------------------------------------------------------- - 1 | // © 2016 and later: Unicode, Inc. and others. - 2 | // License & terms of use: http://www.unicode.org/copyright.html - 3 | /* - 4 | ******************************************************************************* - 5 | * - 6 | * Copyright (C) 1999-2012, International Business Machines - 7 | * Corporation and others. All Rights Reserved. - 8 | * - 9 | ******************************************************************************* - 10 | * file name: utf16.h - 11 | * encoding: UTF-8 - 12 | * tab size: 8 (not used) - 13 | * indentation:4 - 14 | * - 15 | * created on: 1999sep09 - 16 | * created by: Markus W. Scherer - 17 | */ - | - 18 | /** - 19 | * \file - 20 | * \brief C API: 16-bit Unicode handling macros - 21 | * - 22 | * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. - 23 | * - 24 | * For more information see utf.h and the ICU User Guide Strings chapter - 25 | * (http://userguide.icu-project.org/strings). - 26 | * - 27 | * Usage: - 28 | * ICU coding guidelines for if() statements should be followed when using these macros. - 29 | * Compound statements (curly braces {}) must be used for if-else-while... - 30 | * bodies and all macro statements should be terminated with semicolon. - 31 | */ - | - 32 | #ifndef __UTF16_H__ - 33 | #define __UTF16_H__ - | - 34 | #include "unicode/umachine.h" - 35 | #ifndef __UTF_H__ - 36 | # include "unicode/utf.h" - 37 | #endif - | - 38 | /* single-code point definitions -------------------------------------------- */ - | - 39 | /** - 40 | * Does this code unit alone encode a code point (BMP, not a surrogate)? - 41 | * @param c 16-bit code unit - 42 | * @return TRUE or FALSE - 43 | * @stable ICU 2.4 - 44 | */ - 45 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) - | - 46 | /** - 47 | * Is this code unit a lead surrogate (U+d800..U+dbff)? - 48 | * @param c 16-bit code unit - 49 | * @return TRUE or FALSE - 50 | * @stable ICU 2.4 - 51 | */ - 52 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) - | - 53 | /** - 54 | * Is this code unit a trail surrogate (U+dc00..U+dfff)? - 55 | * @param c 16-bit code unit - 56 | * @return TRUE or FALSE - 57 | * @stable ICU 2.4 - 58 | */ - 59 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) - | - 60 | /** - 61 | * Is this code unit a surrogate (U+d800..U+dfff)? - 62 | * @param c 16-bit code unit - 63 | * @return TRUE or FALSE - 64 | * @stable ICU 2.4 - 65 | */ - 66 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) - | - 67 | /** - 68 | * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), - 69 | * is it a lead surrogate? - 70 | * @param c 16-bit code unit - 71 | * @return TRUE or FALSE - 72 | * @stable ICU 2.4 - 73 | */ - 74 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) - | - 75 | /** - 76 | * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), - 77 | * is it a trail surrogate? - 78 | * @param c 16-bit code unit - 79 | * @return TRUE or FALSE - 80 | * @stable ICU 4.2 - 81 | */ - 82 | #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) - | - 83 | /** - 84 | * Helper constant for U16_GET_SUPPLEMENTARY. - 85 | * @internal - 86 | */ - 87 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) - | - 88 | /** - 89 | * Get a supplementary code point value (U+10000..U+10ffff) - 90 | * from its lead and trail surrogates. - 91 | * The result is undefined if the input values are not - 92 | * lead and trail surrogates. - 93 | * - 94 | * @param lead lead surrogate (U+d800..U+dbff) - 95 | * @param trail trail surrogate (U+dc00..U+dfff) - 96 | * @return supplementary code point (U+10000..U+10ffff) - 97 | * @stable ICU 2.4 - 98 | */ - 99 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ - 100 | (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) - | - | - 101 | /** - 102 | * Get the lead surrogate (0xd800..0xdbff) for a - 103 | * supplementary code point (0x10000..0x10ffff). - 104 | * @param supplementary 32-bit code point (U+10000..U+10ffff) - 105 | * @return lead surrogate (U+d800..U+dbff) for supplementary - 106 | * @stable ICU 2.4 - 107 | */ - 108 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) - | - 109 | /** - 110 | * Get the trail surrogate (0xdc00..0xdfff) for a - 111 | * supplementary code point (0x10000..0x10ffff). - 112 | * @param supplementary 32-bit code point (U+10000..U+10ffff) - 113 | * @return trail surrogate (U+dc00..U+dfff) for supplementary - 114 | * @stable ICU 2.4 - 115 | */ - 116 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) - | - 117 | /** - 118 | * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) - 119 | * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). - 120 | * @param c 32-bit code point - 121 | * @return 1 or 2 - 122 | * @stable ICU 2.4 - 123 | */ - 124 | #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) - | - 125 | /** - 126 | * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). - 127 | * @return 2 - 128 | * @stable ICU 2.4 - 129 | */ - 130 | #define U16_MAX_LENGTH 2 - | - 131 | /** - 132 | * Get a code point from a string at a random-access offset, - 133 | * without changing the offset. - 134 | * "Unsafe" macro, assumes well-formed UTF-16. - 135 | * - 136 | * The offset may point to either the lead or trail surrogate unit - 137 | * for a supplementary code point, in which case the macro will read - 138 | * the adjacent matching surrogate as well. - 139 | * The result is undefined if the offset points to a single, unpaired surrogate. - 140 | * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. - 141 | * - 142 | * @param s const UChar * string - 143 | * @param i string offset - 144 | * @param c output UChar32 variable - 145 | * @see U16_GET - 146 | * @stable ICU 2.4 - 147 | */ - 148 | #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ - 149 | (c)=(s)[i]; \ - 150 | if(U16_IS_SURROGATE(c)) { \ - 151 | if(U16_IS_SURROGATE_LEAD(c)) { \ - 152 | (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ - 153 | } else { \ - 154 | (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ - 155 | } \ - 156 | } \ - 157 | } UPRV_BLOCK_MACRO_END - | - 158 | /** - 159 | * Get a code point from a string at a random-access offset, - 160 | * without changing the offset. - 161 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 162 | * - 163 | * The offset may point to either the lead or trail surrogate unit - 164 | * for a supplementary code point, in which case the macro will read - 165 | * the adjacent matching surrogate as well. - 166 | * - 167 | * The length can be negative for a NUL-terminated string. - 168 | * - 169 | * If the offset points to a single, unpaired surrogate, then - 170 | * c is set to that unpaired surrogate. - 171 | * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. - 172 | * - 173 | * @param s const UChar * string - 174 | * @param start starting string offset (usually 0) - 175 | * @param i string offset, must be start<=i(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - 191 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - 192 | } \ - 193 | } \ - 194 | } \ - 195 | } UPRV_BLOCK_MACRO_END - | - 196 | /** - 197 | * Get a code point from a string at a random-access offset, - 198 | * without changing the offset. - 199 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 200 | * - 201 | * The offset may point to either the lead or trail surrogate unit - 202 | * for a supplementary code point, in which case the macro will read - 203 | * the adjacent matching surrogate as well. - 204 | * - 205 | * The length can be negative for a NUL-terminated string. - 206 | * - 207 | * If the offset points to a single, unpaired surrogate, then - 208 | * c is set to U+FFFD. - 209 | * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. - 210 | * - 211 | * @param s const UChar * string - 212 | * @param start starting string offset (usually 0) - 213 | * @param i string offset, must be start<=i(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - 231 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - 232 | } else { \ - 233 | (c)=0xfffd; \ - 234 | } \ - 235 | } \ - 236 | } \ - 237 | } UPRV_BLOCK_MACRO_END - | - 238 | /* definitions with forward iteration --------------------------------------- */ - | - 239 | /** - 240 | * Get a code point from a string at a code point boundary offset, - 241 | * and advance the offset to the next code point boundary. - 242 | * (Post-incrementing forward iteration.) - 243 | * "Unsafe" macro, assumes well-formed UTF-16. - 244 | * - 245 | * The offset may point to the lead surrogate unit - 246 | * for a supplementary code point, in which case the macro will read - 247 | * the following trail surrogate as well. - 248 | * If the offset points to a trail surrogate, then that itself - 249 | * will be returned as the code point. - 250 | * The result is undefined if the offset points to a single, unpaired lead surrogate. - 251 | * - 252 | * @param s const UChar * string - 253 | * @param i string offset - 254 | * @param c output UChar32 variable - 255 | * @see U16_NEXT - 256 | * @stable ICU 2.4 - 257 | */ - 258 | #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ - 259 | (c)=(s)[(i)++]; \ - 260 | if(U16_IS_LEAD(c)) { \ - 261 | (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ - 262 | } \ - 263 | } UPRV_BLOCK_MACRO_END - | - 264 | /** - 265 | * Get a code point from a string at a code point boundary offset, - 266 | * and advance the offset to the next code point boundary. - 267 | * (Post-incrementing forward iteration.) - 268 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 269 | * - 270 | * The length can be negative for a NUL-terminated string. - 271 | * - 272 | * The offset may point to the lead surrogate unit - 273 | * for a supplementary code point, in which case the macro will read - 274 | * the following trail surrogate as well. - 275 | * If the offset points to a trail surrogate or - 276 | * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. - 277 | * - 278 | * @param s const UChar * string - 279 | * @param i string offset, must be i>10)+0xd7c0); \ - 346 | (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - 347 | } \ - 348 | } UPRV_BLOCK_MACRO_END - | - 349 | /** - 350 | * Append a code point to a string, overwriting 1 or 2 code units. - 351 | * The offset points to the current end of the string contents - 352 | * and is advanced (post-increment). - 353 | * "Safe" macro, checks for a valid code point. - 354 | * If a surrogate pair is written, checks for sufficient space in the string. - 355 | * If the code point is not valid or a trail surrogate does not fit, - 356 | * then isError is set to TRUE. - 357 | * - 358 | * @param s const UChar * string buffer - 359 | * @param i string offset, must be i>10)+0xd7c0); \ - 371 | (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - 372 | } else /* c>0x10ffff or not enough space */ { \ - 373 | (isError)=TRUE; \ - 374 | } \ - 375 | } UPRV_BLOCK_MACRO_END - | - 376 | /** - 377 | * Advance the string offset from one code point boundary to the next. - 378 | * (Post-incrementing iteration.) - 379 | * "Unsafe" macro, assumes well-formed UTF-16. - 380 | * - 381 | * @param s const UChar * string - 382 | * @param i string offset - 383 | * @see U16_FWD_1 - 384 | * @stable ICU 2.4 - 385 | */ - 386 | #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 387 | if(U16_IS_LEAD((s)[(i)++])) { \ - 388 | ++(i); \ - 389 | } \ - 390 | } UPRV_BLOCK_MACRO_END - | - 391 | /** - 392 | * Advance the string offset from one code point boundary to the next. - 393 | * (Post-incrementing iteration.) - 394 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 395 | * - 396 | * The length can be negative for a NUL-terminated string. - 397 | * - 398 | * @param s const UChar * string - 399 | * @param i string offset, must be i0) { \ - 424 | U16_FWD_1_UNSAFE(s, i); \ - 425 | --__N; \ - 426 | } \ - 427 | } UPRV_BLOCK_MACRO_END - | - 428 | /** - 429 | * Advance the string offset from one code point boundary to the n-th next one, - 430 | * i.e., move forward by n code points. - 431 | * (Post-incrementing iteration.) - 432 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 433 | * - 434 | * The length can be negative for a NUL-terminated string. - 435 | * - 436 | * @param s const UChar * string - 437 | * @param i int32_t string offset, must be i0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ - 446 | U16_FWD_1(s, i, length); \ - 447 | --__N; \ - 448 | } \ - 449 | } UPRV_BLOCK_MACRO_END - | - 450 | /** - 451 | * Adjust a random-access offset to a code point boundary - 452 | * at the start of a code point. - 453 | * If the offset points to the trail surrogate of a surrogate pair, - 454 | * then the offset is decremented. - 455 | * Otherwise, it is not modified. - 456 | * "Unsafe" macro, assumes well-formed UTF-16. - 457 | * - 458 | * @param s const UChar * string - 459 | * @param i string offset - 460 | * @see U16_SET_CP_START - 461 | * @stable ICU 2.4 - 462 | */ - 463 | #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 464 | if(U16_IS_TRAIL((s)[i])) { \ - 465 | --(i); \ - 466 | } \ - 467 | } UPRV_BLOCK_MACRO_END - | - 468 | /** - 469 | * Adjust a random-access offset to a code point boundary - 470 | * at the start of a code point. - 471 | * If the offset points to the trail surrogate of a surrogate pair, - 472 | * then the offset is decremented. - 473 | * Otherwise, it is not modified. - 474 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 475 | * - 476 | * @param s const UChar * string - 477 | * @param start starting string offset (usually 0) - 478 | * @param i string offset, must be start<=i - 479 | * @see U16_SET_CP_START_UNSAFE - 480 | * @stable ICU 2.4 - 481 | */ - 482 | #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ - 483 | if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ - 484 | --(i); \ - 485 | } \ - 486 | } UPRV_BLOCK_MACRO_END - | - 487 | /* definitions with backward iteration -------------------------------------- */ - | - 488 | /** - 489 | * Move the string offset from one code point boundary to the previous one - 490 | * and get the code point between them. - 491 | * (Pre-decrementing backward iteration.) - 492 | * "Unsafe" macro, assumes well-formed UTF-16. - 493 | * - 494 | * The input offset may be the same as the string length. - 495 | * If the offset is behind a trail surrogate unit - 496 | * for a supplementary code point, then the macro will read - 497 | * the preceding lead surrogate as well. - 498 | * If the offset is behind a lead surrogate, then that itself - 499 | * will be returned as the code point. - 500 | * The result is undefined if the offset is behind a single, unpaired trail surrogate. - 501 | * - 502 | * @param s const UChar * string - 503 | * @param i string offset - 504 | * @param c output UChar32 variable - 505 | * @see U16_PREV - 506 | * @stable ICU 2.4 - 507 | */ - 508 | #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ - 509 | (c)=(s)[--(i)]; \ - 510 | if(U16_IS_TRAIL(c)) { \ - 511 | (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ - 512 | } \ - 513 | } UPRV_BLOCK_MACRO_END - | - 514 | /** - 515 | * Move the string offset from one code point boundary to the previous one - 516 | * and get the code point between them. - 517 | * (Pre-decrementing backward iteration.) - 518 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 519 | * - 520 | * The input offset may be the same as the string length. - 521 | * If the offset is behind a trail surrogate unit - 522 | * for a supplementary code point, then the macro will read - 523 | * the preceding lead surrogate as well. - 524 | * If the offset is behind a lead surrogate or behind a single, unpaired - 525 | * trail surrogate, then c is set to that unpaired surrogate. - 526 | * - 527 | * @param s const UChar * string - 528 | * @param start starting string offset (usually 0) - 529 | * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - 539 | --(i); \ - 540 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - 541 | } \ - 542 | } \ - 543 | } UPRV_BLOCK_MACRO_END - | - 544 | /** - 545 | * Move the string offset from one code point boundary to the previous one - 546 | * and get the code point between them. - 547 | * (Pre-decrementing backward iteration.) - 548 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 549 | * - 550 | * The input offset may be the same as the string length. - 551 | * If the offset is behind a trail surrogate unit - 552 | * for a supplementary code point, then the macro will read - 553 | * the preceding lead surrogate as well. - 554 | * If the offset is behind a lead surrogate or behind a single, unpaired - 555 | * trail surrogate, then c is set to U+FFFD. - 556 | * - 557 | * @param s const UChar * string - 558 | * @param start starting string offset (usually 0) - 559 | * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - 569 | --(i); \ - 570 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - 571 | } else { \ - 572 | (c)=0xfffd; \ - 573 | } \ - 574 | } \ - 575 | } UPRV_BLOCK_MACRO_END - | - 576 | /** - 577 | * Move the string offset from one code point boundary to the previous one. - 578 | * (Pre-decrementing backward iteration.) - 579 | * The input offset may be the same as the string length. - 580 | * "Unsafe" macro, assumes well-formed UTF-16. - 581 | * - 582 | * @param s const UChar * string - 583 | * @param i string offset - 584 | * @see U16_BACK_1 - 585 | * @stable ICU 2.4 - 586 | */ - 587 | #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 588 | if(U16_IS_TRAIL((s)[--(i)])) { \ - 589 | --(i); \ - 590 | } \ - 591 | } UPRV_BLOCK_MACRO_END - | - 592 | /** - 593 | * Move the string offset from one code point boundary to the previous one. - 594 | * (Pre-decrementing backward iteration.) - 595 | * The input offset may be the same as the string length. - 596 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 597 | * - 598 | * @param s const UChar * string - 599 | * @param start starting string offset (usually 0) - 600 | * @param i string offset, must be start(start) && U16_IS_LEAD((s)[(i)-1])) { \ - 606 | --(i); \ - 607 | } \ - 608 | } UPRV_BLOCK_MACRO_END - | - 609 | /** - 610 | * Move the string offset from one code point boundary to the n-th one before it, - 611 | * i.e., move backward by n code points. - 612 | * (Pre-decrementing backward iteration.) - 613 | * The input offset may be the same as the string length. - 614 | * "Unsafe" macro, assumes well-formed UTF-16. - 615 | * - 616 | * @param s const UChar * string - 617 | * @param i string offset - 618 | * @param n number of code points to skip - 619 | * @see U16_BACK_N - 620 | * @stable ICU 2.4 - 621 | */ - 622 | #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ - 623 | int32_t __N=(n); \ - 624 | while(__N>0) { \ - 625 | U16_BACK_1_UNSAFE(s, i); \ - 626 | --__N; \ - 627 | } \ - 628 | } UPRV_BLOCK_MACRO_END - | - 629 | /** - 630 | * Move the string offset from one code point boundary to the n-th one before it, - 631 | * i.e., move backward by n code points. - 632 | * (Pre-decrementing backward iteration.) - 633 | * The input offset may be the same as the string length. - 634 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 635 | * - 636 | * @param s const UChar * string - 637 | * @param start start of string - 638 | * @param i string offset, must be start0 && (i)>(start)) { \ - 646 | U16_BACK_1(s, start, i); \ - 647 | --__N; \ - 648 | } \ - 649 | } UPRV_BLOCK_MACRO_END - | - 650 | /** - 651 | * Adjust a random-access offset to a code point boundary after a code point. - 652 | * If the offset is behind the lead surrogate of a surrogate pair, - 653 | * then the offset is incremented. - 654 | * Otherwise, it is not modified. - 655 | * The input offset may be the same as the string length. - 656 | * "Unsafe" macro, assumes well-formed UTF-16. - 657 | * - 658 | * @param s const UChar * string - 659 | * @param i string offset - 660 | * @see U16_SET_CP_LIMIT - 661 | * @stable ICU 2.4 - 662 | */ - 663 | #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 664 | if(U16_IS_LEAD((s)[(i)-1])) { \ - 665 | ++(i); \ - 666 | } \ - 667 | } UPRV_BLOCK_MACRO_END - | - 668 | /** - 669 | * Adjust a random-access offset to a code point boundary after a code point. - 670 | * If the offset is behind the lead surrogate of a surrogate pair, - 671 | * then the offset is incremented. - 672 | * Otherwise, it is not modified. - 673 | * The input offset may be the same as the string length. - 674 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - 675 | * - 676 | * The length can be negative for a NUL-terminated string. - 677 | * - 678 | * @param s const UChar * string - 679 | * @param start int32_t starting string offset (usually 0) - 680 | * @param i int32_t string offset, start<=i<=length - 681 | * @param length int32_t string length - 682 | * @see U16_SET_CP_LIMIT_UNSAFE - 683 | * @stable ICU 2.4 - 684 | */ - 685 | #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ - 686 | if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ - 687 | ++(i); \ - 688 | } \ - 689 | } UPRV_BLOCK_MACRO_END - | - 690 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/unicode/utf8.h: --------------------------------------------------------------------------------- - 1 | // © 2016 and later: Unicode, Inc. and others. - 2 | // License & terms of use: http://www.unicode.org/copyright.html - 3 | /* - 4 | ******************************************************************************* - 5 | * - 6 | * Copyright (C) 1999-2015, International Business Machines - 7 | * Corporation and others. All Rights Reserved. - 8 | * - 9 | ******************************************************************************* - 10 | * file name: utf8.h - 11 | * encoding: UTF-8 - 12 | * tab size: 8 (not used) - 13 | * indentation:4 - 14 | * - 15 | * created on: 1999sep13 - 16 | * created by: Markus W. Scherer - 17 | */ - | - 18 | /** - 19 | * \file - 20 | * \brief C API: 8-bit Unicode handling macros - 21 | * - 22 | * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. - 23 | * - 24 | * For more information see utf.h and the ICU User Guide Strings chapter - 25 | * (http://userguide.icu-project.org/strings). - 26 | * - 27 | * Usage: - 28 | * ICU coding guidelines for if() statements should be followed when using these macros. - 29 | * Compound statements (curly braces {}) must be used for if-else-while... - 30 | * bodies and all macro statements should be terminated with semicolon. - 31 | */ - | - 32 | #ifndef __UTF8_H__ - 33 | #define __UTF8_H__ - | - 34 | #include "unicode/umachine.h" - 35 | #ifndef __UTF_H__ - 36 | # include "unicode/utf.h" - 37 | #endif - | - 38 | /* internal definitions ----------------------------------------------------- */ - | - 39 | /** - 40 | * Counts the trail bytes for a UTF-8 lead byte. - 41 | * Returns 0 for 0..0xc1 as well as for 0xf5..0xff. - 42 | * leadByte might be evaluated multiple times. - 43 | * - 44 | * This is internal since it is not meant to be called directly by external clients; - 45 | * however it is called by public macros in this file and thus must remain stable. - 46 | * - 47 | * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. - 48 | * @internal - 49 | */ - 50 | #define U8_COUNT_TRAIL_BYTES(leadByte) \ - 51 | (U8_IS_LEAD(leadByte) ? \ - 52 | ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0) - | - 53 | /** - 54 | * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. - 55 | * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff. - 56 | * leadByte might be evaluated multiple times. - 57 | * - 58 | * This is internal since it is not meant to be called directly by external clients; - 59 | * however it is called by public macros in this file and thus must remain stable. - 60 | * - 61 | * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. - 62 | * @internal - 63 | */ - 64 | #define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ - 65 | (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)) - | - 66 | /** - 67 | * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. - 68 | * - 69 | * This is internal since it is not meant to be called directly by external clients; - 70 | * however it is called by public macros in this file and thus must remain stable. - 71 | * @internal - 72 | */ - 73 | #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) - | - 74 | /** - 75 | * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. - 76 | * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. - 77 | * Lead byte E0..EF bits 3..0 are used as byte index, - 78 | * first trail byte bits 7..5 are used as bit index into that byte. - 79 | * @see U8_IS_VALID_LEAD3_AND_T1 - 80 | * @internal - 81 | */ - 82 | #define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" - | - 83 | /** - 84 | * Internal 3-byte UTF-8 validity check. - 85 | * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. - 86 | * @internal - 87 | */ - 88 | #define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) - | - 89 | /** - 90 | * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. - 91 | * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. - 92 | * First trail byte bits 7..4 are used as byte index, - 93 | * lead byte F0..F4 bits 2..0 are used as bit index into that byte. - 94 | * @see U8_IS_VALID_LEAD4_AND_T1 - 95 | * @internal - 96 | */ - 97 | #define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" - | - 98 | /** - 99 | * Internal 4-byte UTF-8 validity check. - 100 | * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. - 101 | * @internal - 102 | */ - 103 | #define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) - | - 104 | /** - 105 | * Function for handling "next code point" with error-checking. - 106 | * - 107 | * This is internal since it is not meant to be called directly by external clients; - 108 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - 109 | * file and thus must remain stable, and should not be hidden when other internal - 110 | * functions are hidden (otherwise public macros would fail to compile). - 111 | * @internal - 112 | */ - 113 | U_STABLE UChar32 U_EXPORT2 - 114 | utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); - | - 115 | /** - 116 | * Function for handling "append code point" with error-checking. - 117 | * - 118 | * This is internal since it is not meant to be called directly by external clients; - 119 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - 120 | * file and thus must remain stable, and should not be hidden when other internal - 121 | * functions are hidden (otherwise public macros would fail to compile). - 122 | * @internal - 123 | */ - 124 | U_STABLE int32_t U_EXPORT2 - 125 | utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); - | - 126 | /** - 127 | * Function for handling "previous code point" with error-checking. - 128 | * - 129 | * This is internal since it is not meant to be called directly by external clients; - 130 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - 131 | * file and thus must remain stable, and should not be hidden when other internal - 132 | * functions are hidden (otherwise public macros would fail to compile). - 133 | * @internal - 134 | */ - 135 | U_STABLE UChar32 U_EXPORT2 - 136 | utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); - | - 137 | /** - 138 | * Function for handling "skip backward one code point" with error-checking. - 139 | * - 140 | * This is internal since it is not meant to be called directly by external clients; - 141 | * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - 142 | * file and thus must remain stable, and should not be hidden when other internal - 143 | * functions are hidden (otherwise public macros would fail to compile). - 144 | * @internal - 145 | */ - 146 | U_STABLE int32_t U_EXPORT2 - 147 | utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); - | - 148 | /* single-code point definitions -------------------------------------------- */ - | - 149 | /** - 150 | * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? - 151 | * @param c 8-bit code unit (byte) - 152 | * @return TRUE or FALSE - 153 | * @stable ICU 2.4 - 154 | */ - 155 | #define U8_IS_SINGLE(c) (((c)&0x80)==0) - | - 156 | /** - 157 | * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) - 158 | * @param c 8-bit code unit (byte) - 159 | * @return TRUE or FALSE - 160 | * @stable ICU 2.4 - 161 | */ - 162 | #define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) - 163 | // 0x32=0xf4-0xc2 - | - 164 | /** - 165 | * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) - 166 | * @param c 8-bit code unit (byte) - 167 | * @return TRUE or FALSE - 168 | * @stable ICU 2.4 - 169 | */ - 170 | #define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40) - | - 171 | /** - 172 | * How many code units (bytes) are used for the UTF-8 encoding - 173 | * of this Unicode code point? - 174 | * @param c 32-bit code point - 175 | * @return 1..4, or 0 if c is a surrogate or not a Unicode code point - 176 | * @stable ICU 2.4 - 177 | */ - 178 | #define U8_LENGTH(c) \ - 179 | ((uint32_t)(c)<=0x7f ? 1 : \ - 180 | ((uint32_t)(c)<=0x7ff ? 2 : \ - 181 | ((uint32_t)(c)<=0xd7ff ? 3 : \ - 182 | ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ - 183 | ((uint32_t)(c)<=0xffff ? 3 : 4)\ - 184 | ) \ - 185 | ) \ - 186 | ) \ - 187 | ) - | - 188 | /** - 189 | * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). - 190 | * @return 4 - 191 | * @stable ICU 2.4 - 192 | */ - 193 | #define U8_MAX_LENGTH 4 - | - 194 | /** - 195 | * Get a code point from a string at a random-access offset, - 196 | * without changing the offset. - 197 | * The offset may point to either the lead byte or one of the trail bytes - 198 | * for a code point, in which case the macro will read all of the bytes - 199 | * for the code point. - 200 | * The result is undefined if the offset points to an illegal UTF-8 - 201 | * byte sequence. - 202 | * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. - 203 | * - 204 | * @param s const uint8_t * string - 205 | * @param i string offset - 206 | * @param c output UChar32 variable - 207 | * @see U8_GET - 208 | * @stable ICU 2.4 - 209 | */ - 210 | #define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ - 211 | int32_t _u8_get_unsafe_index=(int32_t)(i); \ - 212 | U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ - 213 | U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ - 214 | } UPRV_BLOCK_MACRO_END - | - 215 | /** - 216 | * Get a code point from a string at a random-access offset, - 217 | * without changing the offset. - 218 | * The offset may point to either the lead byte or one of the trail bytes - 219 | * for a code point, in which case the macro will read all of the bytes - 220 | * for the code point. - 221 | * - 222 | * The length can be negative for a NUL-terminated string. - 223 | * - 224 | * If the offset points to an illegal UTF-8 byte sequence, then - 225 | * c is set to a negative value. - 226 | * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. - 227 | * - 228 | * @param s const uint8_t * string - 229 | * @param start int32_t starting string offset - 230 | * @param i int32_t string offset, must be start<=i=0xe0 ? \ - 358 | ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ - 359 | U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ - 360 | (__t&=0x3f, 1) \ - 361 | : /* U+10000..U+10FFFF */ \ - 362 | ((c)-=0xf0)<=4 && \ - 363 | U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ - 364 | ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ - 365 | (__t=(s)[i]-0x80)<=0x3f) && \ - 366 | /* valid second-to-last trail byte */ \ - 367 | ((c)=((c)<<6)|__t, ++(i)!=(length)) \ - 368 | : /* U+0080..U+07FF */ \ - 369 | (c)>=0xc2 && ((c)&=0x1f, 1)) && \ - 370 | /* last trail byte */ \ - 371 | (__t=(s)[i]-0x80)<=0x3f && \ - 372 | ((c)=((c)<<6)|__t, ++(i), 1)) { \ - 373 | } else { \ - 374 | (c)=(sub); /* ill-formed*/ \ - 375 | } \ - 376 | } \ - 377 | } UPRV_BLOCK_MACRO_END - | - 378 | /** - 379 | * Append a code point to a string, overwriting 1 to 4 bytes. - 380 | * The offset points to the current end of the string contents - 381 | * and is advanced (post-increment). - 382 | * "Unsafe" macro, assumes a valid code point and sufficient space in the string. - 383 | * Otherwise, the result is undefined. - 384 | * - 385 | * @param s const uint8_t * string buffer - 386 | * @param i string offset - 387 | * @param c code point to append - 388 | * @see U8_APPEND - 389 | * @stable ICU 2.4 - 390 | */ - 391 | #define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ - 392 | uint32_t __uc=(c); \ - 393 | if(__uc<=0x7f) { \ - 394 | (s)[(i)++]=(uint8_t)__uc; \ - 395 | } else { \ - 396 | if(__uc<=0x7ff) { \ - 397 | (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ - 398 | } else { \ - 399 | if(__uc<=0xffff) { \ - 400 | (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ - 401 | } else { \ - 402 | (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ - 403 | (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ - 404 | } \ - 405 | (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ - 406 | } \ - 407 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ - 408 | } \ - 409 | } UPRV_BLOCK_MACRO_END - | - 410 | /** - 411 | * Append a code point to a string, overwriting 1 to 4 bytes. - 412 | * The offset points to the current end of the string contents - 413 | * and is advanced (post-increment). - 414 | * "Safe" macro, checks for a valid code point. - 415 | * If a non-ASCII code point is written, checks for sufficient space in the string. - 416 | * If the code point is not valid or trail bytes do not fit, - 417 | * then isError is set to TRUE. - 418 | * - 419 | * @param s const uint8_t * string buffer - 420 | * @param i int32_t string offset, must be i>6)|0xc0); \ - 433 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ - 434 | } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ - 435 | (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ - 436 | (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ - 437 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ - 438 | } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ - 439 | (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ - 440 | (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ - 441 | (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ - 442 | (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ - 443 | } else { \ - 444 | (isError)=TRUE; \ - 445 | } \ - 446 | } UPRV_BLOCK_MACRO_END - | - 447 | /** - 448 | * Advance the string offset from one code point boundary to the next. - 449 | * (Post-incrementing iteration.) - 450 | * "Unsafe" macro, assumes well-formed UTF-8. - 451 | * - 452 | * @param s const uint8_t * string - 453 | * @param i string offset - 454 | * @see U8_FWD_1 - 455 | * @stable ICU 2.4 - 456 | */ - 457 | #define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 458 | (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ - 459 | } UPRV_BLOCK_MACRO_END - | - 460 | /** - 461 | * Advance the string offset from one code point boundary to the next. - 462 | * (Post-incrementing iteration.) - 463 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 464 | * - 465 | * The length can be negative for a NUL-terminated string. - 466 | * - 467 | * @param s const uint8_t * string - 468 | * @param i int32_t string offset, must be i=0xf0 */ { \ - 487 | if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \ - 488 | ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \ - 489 | ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ - 490 | ++(i); \ - 491 | } \ - 492 | } \ - 493 | } \ - 494 | } UPRV_BLOCK_MACRO_END - | - 495 | /** - 496 | * Advance the string offset from one code point boundary to the n-th next one, - 497 | * i.e., move forward by n code points. - 498 | * (Post-incrementing iteration.) - 499 | * "Unsafe" macro, assumes well-formed UTF-8. - 500 | * - 501 | * @param s const uint8_t * string - 502 | * @param i string offset - 503 | * @param n number of code points to skip - 504 | * @see U8_FWD_N - 505 | * @stable ICU 2.4 - 506 | */ - 507 | #define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ - 508 | int32_t __N=(n); \ - 509 | while(__N>0) { \ - 510 | U8_FWD_1_UNSAFE(s, i); \ - 511 | --__N; \ - 512 | } \ - 513 | } UPRV_BLOCK_MACRO_END - | - 514 | /** - 515 | * Advance the string offset from one code point boundary to the n-th next one, - 516 | * i.e., move forward by n code points. - 517 | * (Post-incrementing iteration.) - 518 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 519 | * - 520 | * The length can be negative for a NUL-terminated string. - 521 | * - 522 | * @param s const uint8_t * string - 523 | * @param i int32_t string offset, must be i0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ - 532 | U8_FWD_1(s, i, length); \ - 533 | --__N; \ - 534 | } \ - 535 | } UPRV_BLOCK_MACRO_END - | - 536 | /** - 537 | * Adjust a random-access offset to a code point boundary - 538 | * at the start of a code point. - 539 | * If the offset points to a UTF-8 trail byte, - 540 | * then the offset is moved backward to the corresponding lead byte. - 541 | * Otherwise, it is not modified. - 542 | * "Unsafe" macro, assumes well-formed UTF-8. - 543 | * - 544 | * @param s const uint8_t * string - 545 | * @param i string offset - 546 | * @see U8_SET_CP_START - 547 | * @stable ICU 2.4 - 548 | */ - 549 | #define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 550 | while(U8_IS_TRAIL((s)[i])) { --(i); } \ - 551 | } UPRV_BLOCK_MACRO_END - | - 552 | /** - 553 | * Adjust a random-access offset to a code point boundary - 554 | * at the start of a code point. - 555 | * If the offset points to a UTF-8 trail byte, - 556 | * then the offset is moved backward to the corresponding lead byte. - 557 | * Otherwise, it is not modified. - 558 | * - 559 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 560 | * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. - 561 | * - 562 | * @param s const uint8_t * string - 563 | * @param start int32_t starting string offset (usually 0) - 564 | * @param i int32_t string offset, must be start<=i - 565 | * @see U8_SET_CP_START_UNSAFE - 566 | * @see U8_TRUNCATE_IF_INCOMPLETE - 567 | * @stable ICU 2.4 - 568 | */ - 569 | #define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ - 570 | if(U8_IS_TRAIL((s)[(i)])) { \ - 571 | (i)=utf8_back1SafeBody(s, start, (i)); \ - 572 | } \ - 573 | } UPRV_BLOCK_MACRO_END - | - 574 | /** - 575 | * If the string ends with a UTF-8 byte sequence that is valid so far - 576 | * but incomplete, then reduce the length of the string to end before - 577 | * the lead byte of that incomplete sequence. - 578 | * For example, if the string ends with E1 80, the length is reduced by 2. - 579 | * - 580 | * In all other cases (the string ends with a complete sequence, or it is not - 581 | * possible for any further trail byte to extend the trailing sequence) - 582 | * the length remains unchanged. - 583 | * - 584 | * Useful for processing text split across multiple buffers - 585 | * (save the incomplete sequence for later) - 586 | * and for optimizing iteration - 587 | * (check for string length only once per character). - 588 | * - 589 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 590 | * Unlike U8_SET_CP_START(), this macro never reads s[length]. - 591 | * - 592 | * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) - 593 | * - 594 | * @param s const uint8_t * string - 595 | * @param start int32_t starting string offset (usually 0) - 596 | * @param length int32_t string length (usually start<=length) - 597 | * @see U8_SET_CP_START - 598 | * @stable ICU 61 - 599 | */ - 600 | #define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \ - 601 | if((length)>(start)) { \ - 602 | uint8_t __b1=s[(length)-1]; \ - 603 | if(U8_IS_SINGLE(__b1)) { \ - 604 | /* common ASCII character */ \ - 605 | } else if(U8_IS_LEAD(__b1)) { \ - 606 | --(length); \ - 607 | } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ - 608 | uint8_t __b2=s[(length)-2]; \ - 609 | if(0xe0<=__b2 && __b2<=0xf4) { \ - 610 | if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ - 611 | U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ - 612 | (length)-=2; \ - 613 | } \ - 614 | } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ - 615 | uint8_t __b3=s[(length)-3]; \ - 616 | if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ - 617 | (length)-=3; \ - 618 | } \ - 619 | } \ - 620 | } \ - 621 | } \ - 622 | } UPRV_BLOCK_MACRO_END - | - 623 | /* definitions with backward iteration -------------------------------------- */ - | - 624 | /** - 625 | * Move the string offset from one code point boundary to the previous one - 626 | * and get the code point between them. - 627 | * (Pre-decrementing backward iteration.) - 628 | * "Unsafe" macro, assumes well-formed UTF-8. - 629 | * - 630 | * The input offset may be the same as the string length. - 631 | * If the offset is behind a multi-byte sequence, then the macro will read - 632 | * the whole sequence. - 633 | * If the offset is behind a lead byte, then that itself - 634 | * will be returned as the code point. - 635 | * The result is undefined if the offset is behind an illegal UTF-8 sequence. - 636 | * - 637 | * @param s const uint8_t * string - 638 | * @param i string offset - 639 | * @param c output UChar32 variable - 640 | * @see U8_PREV - 641 | * @stable ICU 2.4 - 642 | */ - 643 | #define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ - 644 | (c)=(uint8_t)(s)[--(i)]; \ - 645 | if(U8_IS_TRAIL(c)) { \ - 646 | uint8_t __b, __count=1, __shift=6; \ - 647 | \ - 648 | /* c is a trail byte */ \ - 649 | (c)&=0x3f; \ - 650 | for(;;) { \ - 651 | __b=(s)[--(i)]; \ - 652 | if(__b>=0xc0) { \ - 653 | U8_MASK_LEAD_BYTE(__b, __count); \ - 654 | (c)|=(UChar32)__b<<__shift; \ - 655 | break; \ - 656 | } else { \ - 657 | (c)|=(UChar32)(__b&0x3f)<<__shift; \ - 658 | ++__count; \ - 659 | __shift+=6; \ - 660 | } \ - 661 | } \ - 662 | } \ - 663 | } UPRV_BLOCK_MACRO_END - | - 664 | /** - 665 | * Move the string offset from one code point boundary to the previous one - 666 | * and get the code point between them. - 667 | * (Pre-decrementing backward iteration.) - 668 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 669 | * - 670 | * The input offset may be the same as the string length. - 671 | * If the offset is behind a multi-byte sequence, then the macro will read - 672 | * the whole sequence. - 673 | * If the offset is behind a lead byte, then that itself - 674 | * will be returned as the code point. - 675 | * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. - 676 | * - 677 | * @param s const uint8_t * string - 678 | * @param start int32_t starting string offset (usually 0) - 679 | * @param i int32_t string offset, must be start0) { \ - 767 | U8_BACK_1_UNSAFE(s, i); \ - 768 | --__N; \ - 769 | } \ - 770 | } UPRV_BLOCK_MACRO_END - | - 771 | /** - 772 | * Move the string offset from one code point boundary to the n-th one before it, - 773 | * i.e., move backward by n code points. - 774 | * (Pre-decrementing backward iteration.) - 775 | * The input offset may be the same as the string length. - 776 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 777 | * - 778 | * @param s const uint8_t * string - 779 | * @param start int32_t index of the start of the string - 780 | * @param i int32_t string offset, must be start0 && (i)>(start)) { \ - 788 | U8_BACK_1(s, start, i); \ - 789 | --__N; \ - 790 | } \ - 791 | } UPRV_BLOCK_MACRO_END - | - 792 | /** - 793 | * Adjust a random-access offset to a code point boundary after a code point. - 794 | * If the offset is behind a partial multi-byte sequence, - 795 | * then the offset is incremented to behind the whole sequence. - 796 | * Otherwise, it is not modified. - 797 | * The input offset may be the same as the string length. - 798 | * "Unsafe" macro, assumes well-formed UTF-8. - 799 | * - 800 | * @param s const uint8_t * string - 801 | * @param i string offset - 802 | * @see U8_SET_CP_LIMIT - 803 | * @stable ICU 2.4 - 804 | */ - 805 | #define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ - 806 | U8_BACK_1_UNSAFE(s, i); \ - 807 | U8_FWD_1_UNSAFE(s, i); \ - 808 | } UPRV_BLOCK_MACRO_END - | - 809 | /** - 810 | * Adjust a random-access offset to a code point boundary after a code point. - 811 | * If the offset is behind a partial multi-byte sequence, - 812 | * then the offset is incremented to behind the whole sequence. - 813 | * Otherwise, it is not modified. - 814 | * The input offset may be the same as the string length. - 815 | * "Safe" macro, checks for illegal sequences and for string boundaries. - 816 | * - 817 | * The length can be negative for a NUL-terminated string. - 818 | * - 819 | * @param s const uint8_t * string - 820 | * @param start int32_t starting string offset (usually 0) - 821 | * @param i int32_t string offset, must be start<=i<=length - 822 | * @param length int32_t string length - 823 | * @see U8_SET_CP_LIMIT_UNSAFE - 824 | * @stable ICU 2.4 - 825 | */ - 826 | #define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ - 827 | if((start)<(i) && ((i)<(length) || (length)<0)) { \ - 828 | U8_BACK_1(s, start, i); \ - 829 | U8_FWD_1(s, i, length); \ - 830 | } \ - 831 | } UPRV_BLOCK_MACRO_END - | - 832 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/wasm_store.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/api.h" - 2 | #include "./parser.h" - 3 | #include - | - 4 | #ifdef TREE_SITTER_FEATURE_WASM - | - 5 | #include "./alloc.h" - 6 | #include "./array.h" - 7 | #include "./atomic.h" - 8 | #include "./language.h" - 9 | #include "./lexer.h" - 10 | #include "./wasm/wasm-stdlib.h" - 11 | #include "./wasm_store.h" - | - 12 | #include - 13 | #include - 14 | #include - | - 15 | #ifdef _MSC_VER - 16 | #pragma warning(push) - 17 | #pragma warning(disable : 4100) - 18 | #elif defined(__GNUC__) || defined(__clang__) - 19 | #pragma GCC diagnostic push - 20 | #pragma GCC diagnostic ignored "-Wunused-parameter" - 21 | #endif - | - 22 | #define array_len(a) (sizeof(a) / sizeof(a[0])) - | - 23 | // The following symbols from the C and C++ standard libraries are available - 24 | // for external scanners to use. - 25 | const char *STDLIB_SYMBOLS[] = { - 26 | #include "./stdlib-symbols.txt" - 27 | }; - | - 28 | // The contents of the `dylink.0` custom section of a Wasm module, - 29 | // as specified by the current WebAssembly dynamic linking ABI proposal. - 30 | typedef struct { - 31 | uint32_t memory_size; - 32 | uint32_t memory_align; - 33 | uint32_t table_size; - 34 | uint32_t table_align; - 35 | } WasmDylinkInfo; - | - 36 | // WasmLanguageId - A pointer used to identify a language. This language id is - 37 | // reference-counted, so that its ownership can be shared between the language - 38 | // itself and the instances of the language that are held in Wasm stores. - 39 | typedef struct { - 40 | volatile uint32_t ref_count; - 41 | volatile uint32_t is_language_deleted; - 42 | } WasmLanguageId; - | - 43 | // LanguageWasmModule - Additional data associated with a Wasm-backed - 44 | // `TSLanguage`. This data is read-only and does not reference a particular - 45 | // Wasm store, so it can be shared by all users of a `TSLanguage`. A pointer to - 46 | // this is stored on the language itself. - 47 | typedef struct { - 48 | volatile uint32_t ref_count; - 49 | WasmLanguageId *language_id; - 50 | wasmtime_module_t *module; - 51 | const char *name; - 52 | char *symbol_name_buffer; - 53 | char *field_name_buffer; - 54 | WasmDylinkInfo dylink_info; - 55 | } LanguageWasmModule; - | - 56 | // LanguageWasmInstance - Additional data associated with an instantiation of - 57 | // a `TSLanguage` in a particular Wasm store. The Wasm store holds one of - 58 | // these structs for each language that it has instantiated. - 59 | typedef struct { - 60 | WasmLanguageId *language_id; - 61 | wasmtime_instance_t instance; - 62 | int32_t external_states_address; - 63 | int32_t lex_main_fn_index; - 64 | int32_t lex_keyword_fn_index; - 65 | int32_t scanner_create_fn_index; - 66 | int32_t scanner_destroy_fn_index; - 67 | int32_t scanner_serialize_fn_index; - 68 | int32_t scanner_deserialize_fn_index; - 69 | int32_t scanner_scan_fn_index; - 70 | } LanguageWasmInstance; - | - 71 | typedef struct { - 72 | uint32_t reset_heap; - 73 | uint32_t proc_exit; - 74 | uint32_t abort; - 75 | uint32_t assert_fail; - 76 | uint32_t notify_memory_growth; - 77 | uint32_t debug_message; - 78 | uint32_t at_exit; - 79 | uint32_t args_get; - 80 | uint32_t args_sizes_get; - 81 | } BuiltinFunctionIndices; - | - 82 | // TSWasmStore - A struct that allows a given `Parser` to use Wasm-backed - 83 | // languages. This struct is mutable, and can only be used by one parser at a - 84 | // time. - 85 | struct TSWasmStore { - 86 | wasm_engine_t *engine; - 87 | wasmtime_store_t *store; - 88 | wasmtime_table_t function_table; - 89 | wasmtime_memory_t memory; - 90 | TSLexer *current_lexer; - 91 | LanguageWasmInstance *current_instance; - 92 | Array(LanguageWasmInstance) language_instances; - 93 | uint32_t current_memory_offset; - 94 | uint32_t current_function_table_offset; - 95 | uint32_t *stdlib_fn_indices; - 96 | BuiltinFunctionIndices builtin_fn_indices; - 97 | wasmtime_global_t stack_pointer_global; - 98 | wasm_globaltype_t *const_i32_type; - 99 | bool has_error; - 100 | uint32_t lexer_address; - 101 | }; - | - 102 | typedef Array(char) StringData; - | - 103 | // LanguageInWasmMemory - The memory layout of a `TSLanguage` when compiled to - 104 | // wasm32. This is used to copy static language data out of the Wasm memory. - 105 | typedef struct { - 106 | uint32_t abi_version; - 107 | uint32_t symbol_count; - 108 | uint32_t alias_count; - 109 | uint32_t token_count; - 110 | uint32_t external_token_count; - 111 | uint32_t state_count; - 112 | uint32_t large_state_count; - 113 | uint32_t production_id_count; - 114 | uint32_t field_count; - 115 | uint16_t max_alias_sequence_length; - 116 | int32_t parse_table; - 117 | int32_t small_parse_table; - 118 | int32_t small_parse_table_map; - 119 | int32_t parse_actions; - 120 | int32_t symbol_names; - 121 | int32_t field_names; - 122 | int32_t field_map_slices; - 123 | int32_t field_map_entries; - 124 | int32_t symbol_metadata; - 125 | int32_t public_symbol_map; - 126 | int32_t alias_map; - 127 | int32_t alias_sequences; - 128 | int32_t lex_modes; - 129 | int32_t lex_fn; - 130 | int32_t keyword_lex_fn; - 131 | TSSymbol keyword_capture_token; - 132 | struct { - 133 | int32_t states; - 134 | int32_t symbol_map; - 135 | int32_t create; - 136 | int32_t destroy; - 137 | int32_t scan; - 138 | int32_t serialize; - 139 | int32_t deserialize; - 140 | } external_scanner; - 141 | int32_t primary_state_ids; - 142 | int32_t name; - 143 | int32_t reserved_words; - 144 | uint16_t max_reserved_word_set_size; - 145 | uint32_t supertype_count; - 146 | int32_t supertype_symbols; - 147 | int32_t supertype_map_slices; - 148 | int32_t supertype_map_entries; - 149 | TSLanguageMetadata metadata; - 150 | } LanguageInWasmMemory; - | - 151 | // LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32. - 152 | // This is used to copy mutable lexing state in and out of the Wasm memory. - 153 | typedef struct { - 154 | int32_t lookahead; - 155 | TSSymbol result_symbol; - 156 | int32_t advance; - 157 | int32_t mark_end; - 158 | int32_t get_column; - 159 | int32_t is_at_included_range_start; - 160 | int32_t eof; - 161 | } LexerInWasmMemory; - | - 162 | // Linear memory layout: - 163 | // [ <-- stack | stdlib statics | lexer | language statics --> | serialization_buffer | heap --> ] - 164 | #define MAX_MEMORY_SIZE (128 * 1024 * 1024 / MEMORY_PAGE_SIZE) - | - 165 | /************************ - 166 | * WasmDylinkMemoryInfo - 167 | ***********************/ - | - 168 | static uint8_t read_u8(const uint8_t **p) { - 169 | return *(*p)++; - 170 | } - | - 171 | static inline uint64_t read_uleb128(const uint8_t **p, const uint8_t *end) { - 172 | uint64_t value = 0; - 173 | unsigned shift = 0; - 174 | do { - 175 | if (*p == end) return UINT64_MAX; - 176 | value += (uint64_t)(**p & 0x7f) << shift; - 177 | shift += 7; - 178 | } while (*((*p)++) >= 128); - 179 | return value; - 180 | } - | - 181 | static bool wasm_dylink_info__parse( - 182 | const uint8_t *bytes, - 183 | size_t length, - 184 | WasmDylinkInfo *info - 185 | ) { - 186 | const uint8_t WASM_MAGIC_NUMBER[4] = {0, 'a', 's', 'm'}; - 187 | const uint8_t WASM_VERSION[4] = {1, 0, 0, 0}; - 188 | const uint8_t WASM_CUSTOM_SECTION = 0x0; - 189 | const uint8_t WASM_DYLINK_MEM_INFO = 0x1; - | - 190 | const uint8_t *p = bytes; - 191 | const uint8_t *end = bytes + length; - | - 192 | if (length < 8) return false; - 193 | if (memcmp(p, WASM_MAGIC_NUMBER, 4) != 0) return false; - 194 | p += 4; - 195 | if (memcmp(p, WASM_VERSION, 4) != 0) return false; - 196 | p += 4; - | - 197 | while (p < end) { - 198 | uint8_t section_id = read_u8(&p); - 199 | uint32_t section_length = read_uleb128(&p, end); - 200 | const uint8_t *section_end = p + section_length; - 201 | if (section_end > end) return false; - | - 202 | if (section_id == WASM_CUSTOM_SECTION) { - 203 | uint32_t name_length = read_uleb128(&p, section_end); - 204 | const uint8_t *name_end = p + name_length; - 205 | if (name_end > section_end) return false; - | - 206 | if (name_length == 8 && memcmp(p, "dylink.0", 8) == 0) { - 207 | p = name_end; - 208 | while (p < section_end) { - 209 | uint8_t subsection_type = read_u8(&p); - 210 | uint32_t subsection_size = read_uleb128(&p, section_end); - 211 | const uint8_t *subsection_end = p + subsection_size; - 212 | if (subsection_end > section_end) return false; - 213 | if (subsection_type == WASM_DYLINK_MEM_INFO) { - 214 | info->memory_size = read_uleb128(&p, subsection_end); - 215 | info->memory_align = read_uleb128(&p, subsection_end); - 216 | info->table_size = read_uleb128(&p, subsection_end); - 217 | info->table_align = read_uleb128(&p, subsection_end); - 218 | return true; - 219 | } - 220 | p = subsection_end; - 221 | } - 222 | } - 223 | } - 224 | p = section_end; - 225 | } - 226 | return false; - 227 | } - | - 228 | /******************************************* - 229 | * Native callbacks exposed to Wasm modules - 230 | *******************************************/ - | - 231 | static wasm_trap_t *callback__abort( - 232 | void *env, - 233 | wasmtime_caller_t* caller, - 234 | wasmtime_val_raw_t *args_and_results, - 235 | size_t args_and_results_len - 236 | ) { - 237 | return wasmtime_trap_new("Wasm module called abort", 24); - 238 | } - | - 239 | static wasm_trap_t *callback__debug_message( - 240 | void *env, - 241 | wasmtime_caller_t* caller, - 242 | wasmtime_val_raw_t *args_and_results, - 243 | size_t args_and_results_len - 244 | ) { - 245 | wasmtime_context_t *context = wasmtime_caller_context(caller); - 246 | TSWasmStore *store = env; - 247 | ts_assert(args_and_results_len == 2); - 248 | uint32_t string_address = args_and_results[0].i32; - 249 | uint32_t value = args_and_results[1].i32; - 250 | uint8_t *memory = wasmtime_memory_data(context, &store->memory); - 251 | printf("DEBUG: %s %u\n", &memory[string_address], value); - 252 | return NULL; - 253 | } - | - 254 | static wasm_trap_t *callback__noop( - 255 | void *env, - 256 | wasmtime_caller_t* caller, - 257 | wasmtime_val_raw_t *args_and_results, - 258 | size_t args_and_results_len - 259 | ) { - 260 | return NULL; - 261 | } - | - 262 | static wasm_trap_t *callback__lexer_advance( - 263 | void *env, - 264 | wasmtime_caller_t* caller, - 265 | wasmtime_val_raw_t *args_and_results, - 266 | size_t args_and_results_len - 267 | ) { - 268 | wasmtime_context_t *context = wasmtime_caller_context(caller); - 269 | ts_assert(args_and_results_len == 2); - | - 270 | TSWasmStore *store = env; - 271 | TSLexer *lexer = store->current_lexer; - 272 | bool skip = args_and_results[1].i32; - 273 | lexer->advance(lexer, skip); - | - 274 | uint8_t *memory = wasmtime_memory_data(context, &store->memory); - 275 | memcpy(&memory[store->lexer_address], &lexer->lookahead, sizeof(lexer->lookahead)); - 276 | return NULL; - 277 | } - | - 278 | static wasm_trap_t *callback__lexer_mark_end( - 279 | void *env, - 280 | wasmtime_caller_t* caller, - 281 | wasmtime_val_raw_t *args_and_results, - 282 | size_t args_and_results_len - 283 | ) { - 284 | TSWasmStore *store = env; - 285 | TSLexer *lexer = store->current_lexer; - 286 | lexer->mark_end(lexer); - 287 | return NULL; - 288 | } - | - 289 | static wasm_trap_t *callback__lexer_get_column( - 290 | void *env, - 291 | wasmtime_caller_t* caller, - 292 | wasmtime_val_raw_t *args_and_results, - 293 | size_t args_and_results_len - 294 | ) { - 295 | TSWasmStore *store = env; - 296 | TSLexer *lexer = store->current_lexer; - 297 | uint32_t result = lexer->get_column(lexer); - 298 | args_and_results[0].i32 = result; - 299 | return NULL; - 300 | } - | - 301 | static wasm_trap_t *callback__lexer_is_at_included_range_start( - 302 | void *env, - 303 | wasmtime_caller_t* caller, - 304 | wasmtime_val_raw_t *args_and_results, - 305 | size_t args_and_results_len - 306 | ) { - 307 | TSWasmStore *store = env; - 308 | TSLexer *lexer = store->current_lexer; - 309 | bool result = lexer->is_at_included_range_start(lexer); - 310 | args_and_results[0].i32 = result; - 311 | return NULL; - 312 | } - | - 313 | static wasm_trap_t *callback__lexer_eof( - 314 | void *env, - 315 | wasmtime_caller_t* caller, - 316 | wasmtime_val_raw_t *args_and_results, - 317 | size_t args_and_results_len - 318 | ) { - 319 | TSWasmStore *store = env; - 320 | TSLexer *lexer = store->current_lexer; - 321 | bool result = lexer->eof(lexer); - 322 | args_and_results[0].i32 = result; - 323 | return NULL; - 324 | } - | - 325 | typedef struct { - 326 | uint32_t *storage_location; - 327 | wasmtime_func_unchecked_callback_t callback; - 328 | wasm_functype_t *type; - 329 | } FunctionDefinition; - | - 330 | static void *copy(const void *data, size_t size) { - 331 | void *result = ts_malloc(size); - 332 | memcpy(result, data, size); - 333 | return result; - 334 | } - | - 335 | static void *copy_unsized_static_array( - 336 | const uint8_t *data, - 337 | int32_t start_address, - 338 | const int32_t all_addresses[], - 339 | size_t address_count - 340 | ) { - 341 | int32_t end_address = 0; - 342 | for (unsigned i = 0; i < address_count; i++) { - 343 | if (all_addresses[i] > start_address) { - 344 | if (!end_address || all_addresses[i] < end_address) { - 345 | end_address = all_addresses[i]; - 346 | } - 347 | } - 348 | } - | - 349 | if (!end_address) return NULL; - 350 | size_t size = end_address - start_address; - 351 | void *result = ts_malloc(size); - 352 | memcpy(result, &data[start_address], size); - 353 | return result; - 354 | } - | - 355 | static void *copy_strings( - 356 | const uint8_t *data, - 357 | int32_t array_address, - 358 | size_t count, - 359 | StringData *string_data - 360 | ) { - 361 | const char **result = ts_malloc(count * sizeof(char *)); - 362 | for (unsigned i = 0; i < count; i++) { - 363 | int32_t address; - 364 | memcpy(&address, &data[array_address + i * sizeof(address)], sizeof(address)); - 365 | if (address == 0) { - 366 | result[i] = (const char *)-1; - 367 | } else { - 368 | const uint8_t *string = &data[address]; - 369 | uint32_t len = strlen((const char *)string); - 370 | result[i] = (const char *)(uintptr_t)string_data->size; - 371 | array_extend(string_data, len + 1, string); - 372 | } - 373 | } - 374 | for (unsigned i = 0; i < count; i++) { - 375 | if (result[i] == (const char *)-1) { - 376 | result[i] = NULL; - 377 | } else { - 378 | result[i] = string_data->contents + (uintptr_t)result[i]; - 379 | } - 380 | } - 381 | return result; - 382 | } - | - 383 | static void *copy_string( - 384 | const uint8_t *data, - 385 | int32_t address - 386 | ) { - 387 | const char *string = (const char *)&data[address]; - 388 | size_t len = strlen(string); - 389 | char *result = ts_malloc(len + 1); - 390 | memcpy(result, string, len + 1); - 391 | return result; - 392 | } - | - 393 | static bool name_eq(const wasm_name_t *name, const char *string) { - 394 | return strncmp(string, name->data, name->size) == 0; - 395 | } - | - 396 | static inline wasm_functype_t* wasm_functype_new_4_0( - 397 | wasm_valtype_t* p1, - 398 | wasm_valtype_t* p2, - 399 | wasm_valtype_t* p3, - 400 | wasm_valtype_t* p4 - 401 | ) { - 402 | wasm_valtype_t* ps[4] = {p1, p2, p3, p4}; - 403 | wasm_valtype_vec_t params, results; - 404 | wasm_valtype_vec_new(¶ms, 4, ps); - 405 | wasm_valtype_vec_new_empty(&results); - 406 | return wasm_functype_new(¶ms, &results); - 407 | } - | - 408 | #define format(output, ...) \ - 409 | do { \ - 410 | size_t message_length = snprintf((char *)NULL, 0, __VA_ARGS__); \ - 411 | *output = ts_malloc(message_length + 1); \ - 412 | snprintf(*output, message_length + 1, __VA_ARGS__); \ - 413 | } while (0) - | - 414 | WasmLanguageId *language_id_new(void) { - 415 | WasmLanguageId *self = ts_malloc(sizeof(WasmLanguageId)); - 416 | self->is_language_deleted = false; - 417 | self->ref_count = 1; - 418 | return self; - 419 | } - | - 420 | WasmLanguageId *language_id_clone(WasmLanguageId *self) { - 421 | atomic_inc(&self->ref_count); - 422 | return self; - 423 | } - | - 424 | void language_id_delete(WasmLanguageId *self) { - 425 | if (atomic_dec(&self->ref_count) == 0) { - 426 | ts_free(self); - 427 | } - 428 | } - | - 429 | static wasmtime_extern_t get_builtin_extern( - 430 | wasmtime_table_t *table, - 431 | unsigned index - 432 | ) { - 433 | return (wasmtime_extern_t) { - 434 | .kind = WASMTIME_EXTERN_FUNC, - 435 | .of.func = (wasmtime_func_t) { - 436 | .store_id = table->store_id, - 437 | .__private = index - 438 | } - 439 | }; - 440 | } - | - 441 | static bool ts_wasm_store__provide_builtin_import( - 442 | TSWasmStore *self, - 443 | const wasm_name_t *import_name, - 444 | wasmtime_extern_t *import - 445 | ) { - 446 | wasmtime_error_t *error = NULL; - 447 | wasmtime_context_t *context = wasmtime_store_context(self->store); - | - 448 | // Dynamic linking parameters - 449 | if (name_eq(import_name, "__memory_base")) { - 450 | wasmtime_val_t value = WASM_I32_VAL(self->current_memory_offset); - 451 | wasmtime_global_t global; - 452 | error = wasmtime_global_new(context, self->const_i32_type, &value, &global); - 453 | ts_assert(!error); - 454 | *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global}; - 455 | } else if (name_eq(import_name, "__table_base")) { - 456 | wasmtime_val_t value = WASM_I32_VAL(self->current_function_table_offset); - 457 | wasmtime_global_t global; - 458 | error = wasmtime_global_new(context, self->const_i32_type, &value, &global); - 459 | ts_assert(!error); - 460 | *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global}; - 461 | } else if (name_eq(import_name, "__stack_pointer")) { - 462 | *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = self->stack_pointer_global}; - 463 | } else if (name_eq(import_name, "__indirect_function_table")) { - 464 | *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_TABLE, .of.table = self->function_table}; - 465 | } else if (name_eq(import_name, "memory")) { - 466 | *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_MEMORY, .of.memory = self->memory}; - 467 | } - | - 468 | // Builtin functions - 469 | else if (name_eq(import_name, "__assert_fail")) { - 470 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.assert_fail); - 471 | } else if (name_eq(import_name, "__cxa_atexit")) { - 472 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.at_exit); - 473 | } else if (name_eq(import_name, "args_get")) { - 474 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_get); - 475 | } else if (name_eq(import_name, "args_sizes_get")) { - 476 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_sizes_get); - 477 | } else if (name_eq(import_name, "abort")) { - 478 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.abort); - 479 | } else if (name_eq(import_name, "proc_exit")) { - 480 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.proc_exit); - 481 | } else if (name_eq(import_name, "emscripten_notify_memory_growth")) { - 482 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.notify_memory_growth); - 483 | } else if (name_eq(import_name, "tree_sitter_debug_message")) { - 484 | *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.debug_message); - 485 | } else { - 486 | return false; - 487 | } - | - 488 | return true; - 489 | } - | - 490 | static bool ts_wasm_store__call_module_initializer( - 491 | TSWasmStore *self, - 492 | const wasm_name_t *export_name, - 493 | wasmtime_extern_t *export, - 494 | wasm_trap_t **trap - 495 | ) { - 496 | if ( - 497 | name_eq(export_name, "_initialize") || - 498 | name_eq(export_name, "__wasm_apply_data_relocs") || - 499 | name_eq(export_name, "__wasm_call_ctors") - 500 | ) { - 501 | wasmtime_context_t *context = wasmtime_store_context(self->store); - 502 | wasmtime_func_t initialization_func = export->of.func; - 503 | wasmtime_error_t *error = wasmtime_func_call(context, &initialization_func, NULL, 0, NULL, 0, trap); - 504 | ts_assert(!error); - 505 | return true; - 506 | } else { - 507 | return false; - 508 | } - 509 | } - | - 510 | TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) { - 511 | TSWasmStore *self = ts_calloc(1, sizeof(TSWasmStore)); - 512 | wasmtime_store_t *store = wasmtime_store_new(engine, self, NULL); - 513 | wasmtime_context_t *context = wasmtime_store_context(store); - 514 | wasmtime_error_t *error = NULL; - 515 | wasm_trap_t *trap = NULL; - 516 | wasm_message_t message = WASM_EMPTY_VEC; - 517 | wasm_exporttype_vec_t export_types = WASM_EMPTY_VEC; - 518 | wasm_importtype_vec_t import_types = WASM_EMPTY_VEC; - 519 | wasmtime_extern_t *imports = NULL; - 520 | wasmtime_module_t *stdlib_module = NULL; - 521 | wasm_memorytype_t *memory_type = NULL; - 522 | wasm_tabletype_t *table_type = NULL; - | - 523 | // Define functions called by scanners via function pointers on the lexer. - 524 | LexerInWasmMemory lexer = { - 525 | .lookahead = 0, - 526 | .result_symbol = 0, - 527 | }; - 528 | FunctionDefinition lexer_definitions[] = { - 529 | { - 530 | (uint32_t *)&lexer.advance, - 531 | callback__lexer_advance, - 532 | wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 533 | }, - 534 | { - 535 | (uint32_t *)&lexer.mark_end, - 536 | callback__lexer_mark_end, - 537 | wasm_functype_new_1_0(wasm_valtype_new_i32()) - 538 | }, - 539 | { - 540 | (uint32_t *)&lexer.get_column, - 541 | callback__lexer_get_column, - 542 | wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 543 | }, - 544 | { - 545 | (uint32_t *)&lexer.is_at_included_range_start, - 546 | callback__lexer_is_at_included_range_start, - 547 | wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 548 | }, - 549 | { - 550 | (uint32_t *)&lexer.eof, - 551 | callback__lexer_eof, - 552 | wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 553 | }, - 554 | }; - | - 555 | // Define builtin functions that can be imported by scanners. - 556 | BuiltinFunctionIndices builtin_fn_indices; - 557 | FunctionDefinition builtin_definitions[] = { - 558 | { - 559 | &builtin_fn_indices.proc_exit, - 560 | callback__abort, - 561 | wasm_functype_new_1_0(wasm_valtype_new_i32()) - 562 | }, - 563 | { - 564 | &builtin_fn_indices.abort, - 565 | callback__abort, - 566 | wasm_functype_new_0_0() - 567 | }, - 568 | { - 569 | &builtin_fn_indices.assert_fail, - 570 | callback__abort, - 571 | wasm_functype_new_4_0(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 572 | }, - 573 | { - 574 | &builtin_fn_indices.notify_memory_growth, - 575 | callback__noop, - 576 | wasm_functype_new_1_0(wasm_valtype_new_i32()) - 577 | }, - 578 | { - 579 | &builtin_fn_indices.debug_message, - 580 | callback__debug_message, - 581 | wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 582 | }, - 583 | { - 584 | &builtin_fn_indices.at_exit, - 585 | callback__noop, - 586 | wasm_functype_new_3_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 587 | }, - 588 | { - 589 | &builtin_fn_indices.args_get, - 590 | callback__noop, - 591 | wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 592 | }, - 593 | { - 594 | &builtin_fn_indices.args_sizes_get, - 595 | callback__noop, - 596 | wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) - 597 | }, - 598 | }; - | - 599 | // Create all of the Wasm functions. - 600 | unsigned builtin_definitions_len = array_len(builtin_definitions); - 601 | unsigned lexer_definitions_len = array_len(lexer_definitions); - 602 | for (unsigned i = 0; i < builtin_definitions_len; i++) { - 603 | FunctionDefinition *definition = &builtin_definitions[i]; - 604 | wasmtime_func_t func; - 605 | wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func); - 606 | *definition->storage_location = func.__private; - 607 | wasm_functype_delete(definition->type); - 608 | } - 609 | for (unsigned i = 0; i < lexer_definitions_len; i++) { - 610 | FunctionDefinition *definition = &lexer_definitions[i]; - 611 | wasmtime_func_t func; - 612 | wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func); - 613 | *definition->storage_location = func.__private; - 614 | wasm_functype_delete(definition->type); - 615 | } - | - 616 | // Compile the stdlib module. - 617 | error = wasmtime_module_new(engine, STDLIB_WASM, STDLIB_WASM_LEN, &stdlib_module); - 618 | if (error) { - 619 | wasmtime_error_message(error, &message); - 620 | wasm_error->kind = TSWasmErrorKindCompile; - 621 | format( - 622 | &wasm_error->message, - 623 | "failed to compile Wasm stdlib: %.*s", - 624 | (int)message.size, message.data - 625 | ); - 626 | goto error; - 627 | } - | - 628 | // Retrieve the stdlib module's imports. - 629 | wasmtime_module_imports(stdlib_module, &import_types); - | - 630 | // Find the initial number of memory pages needed by the stdlib. - 631 | const wasm_memorytype_t *stdlib_memory_type = NULL; - 632 | for (unsigned i = 0; i < import_types.size; i++) { - 633 | wasm_importtype_t *import_type = import_types.data[i]; - 634 | const wasm_name_t *import_name = wasm_importtype_name(import_type); - 635 | if (name_eq(import_name, "memory")) { - 636 | const wasm_externtype_t *type = wasm_importtype_type(import_type); - 637 | stdlib_memory_type = wasm_externtype_as_memorytype_const(type); - 638 | } - 639 | } - 640 | if (!stdlib_memory_type) { - 641 | wasm_error->kind = TSWasmErrorKindCompile; - 642 | format( - 643 | &wasm_error->message, - 644 | "Wasm stdlib is missing the 'memory' import" - 645 | ); - 646 | goto error; - 647 | } - | - 648 | // Initialize store's memory - 649 | uint64_t initial_memory_pages = wasmtime_memorytype_minimum(stdlib_memory_type); - 650 | wasm_limits_t memory_limits = {.min = initial_memory_pages, .max = MAX_MEMORY_SIZE}; - 651 | memory_type = wasm_memorytype_new(&memory_limits); - 652 | wasmtime_memory_t memory; - 653 | error = wasmtime_memory_new(context, memory_type, &memory); - 654 | if (error) { - 655 | wasmtime_error_message(error, &message); - 656 | wasm_error->kind = TSWasmErrorKindAllocate; - 657 | format( - 658 | &wasm_error->message, - 659 | "failed to allocate Wasm memory: %.*s", - 660 | (int)message.size, message.data - 661 | ); - 662 | goto error; - 663 | } - 664 | wasm_memorytype_delete(memory_type); - 665 | memory_type = NULL; - | - 666 | // Initialize store's function table - 667 | wasm_limits_t table_limits = {.min = 1, .max = wasm_limits_max_default}; - 668 | table_type = wasm_tabletype_new(wasm_valtype_new(WASM_FUNCREF), &table_limits); - 669 | wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; - 670 | wasmtime_table_t function_table; - 671 | error = wasmtime_table_new(context, table_type, &initializer, &function_table); - 672 | if (error) { - 673 | wasmtime_error_message(error, &message); - 674 | wasm_error->kind = TSWasmErrorKindAllocate; - 675 | format( - 676 | &wasm_error->message, - 677 | "failed to allocate Wasm table: %.*s", - 678 | (int)message.size, message.data - 679 | ); - 680 | goto error; - 681 | } - 682 | wasm_tabletype_delete(table_type); - 683 | table_type = NULL; - | - 684 | unsigned stdlib_symbols_len = array_len(STDLIB_SYMBOLS); - | - 685 | // Define globals for the stack and heap start addresses. - 686 | wasm_globaltype_t *const_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_CONST); - 687 | wasm_globaltype_t *var_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_VAR); - | - 688 | wasmtime_val_t stack_pointer_value = WASM_I32_VAL(0); - 689 | wasmtime_global_t stack_pointer_global; - 690 | error = wasmtime_global_new(context, var_i32_type, &stack_pointer_value, &stack_pointer_global); - 691 | wasm_globaltype_delete(var_i32_type); - 692 | ts_assert(!error); - | - 693 | *self = (TSWasmStore) { - 694 | .engine = wasmtime_engine_clone(engine), - 695 | .store = store, - 696 | .memory = memory, - 697 | .function_table = function_table, - 698 | .language_instances = array_new(), - 699 | .stdlib_fn_indices = ts_calloc(stdlib_symbols_len, sizeof(uint32_t)), - 700 | .builtin_fn_indices = builtin_fn_indices, - 701 | .stack_pointer_global = stack_pointer_global, - 702 | .current_memory_offset = 0, - 703 | .current_function_table_offset = 0, - 704 | .const_i32_type = const_i32_type, - 705 | }; - | - 706 | // Set up the imports for the stdlib module. - 707 | imports = ts_calloc(import_types.size, sizeof(wasmtime_extern_t)); - 708 | for (unsigned i = 0; i < import_types.size; i++) { - 709 | wasm_importtype_t *type = import_types.data[i]; - 710 | const wasm_name_t *import_name = wasm_importtype_name(type); - 711 | if (!ts_wasm_store__provide_builtin_import(self, import_name, &imports[i])) { - 712 | wasm_error->kind = TSWasmErrorKindInstantiate; - 713 | format( - 714 | &wasm_error->message, - 715 | "unexpected import in Wasm stdlib: %.*s\n", - 716 | (int)import_name->size, import_name->data - 717 | ); - 718 | goto error; - 719 | } - 720 | } - | - 721 | // Instantiate the stdlib module. - 722 | wasmtime_instance_t instance; - 723 | error = wasmtime_instance_new(context, stdlib_module, imports, import_types.size, &instance, &trap); - 724 | ts_free(imports); - 725 | imports = NULL; - 726 | if (error) { - 727 | wasmtime_error_message(error, &message); - 728 | wasm_error->kind = TSWasmErrorKindInstantiate; - 729 | format( - 730 | &wasm_error->message, - 731 | "failed to instantiate Wasm stdlib module: %.*s", - 732 | (int)message.size, message.data - 733 | ); - 734 | goto error; - 735 | } - 736 | if (trap) { - 737 | wasm_trap_message(trap, &message); - 738 | wasm_error->kind = TSWasmErrorKindInstantiate; - 739 | format( - 740 | &wasm_error->message, - 741 | "trapped when instantiating Wasm stdlib module: %.*s", - 742 | (int)message.size, message.data - 743 | ); - 744 | goto error; - 745 | } - 746 | wasm_importtype_vec_delete(&import_types); - | - 747 | // Process the stdlib module's exports. - 748 | for (unsigned i = 0; i < stdlib_symbols_len; i++) { - 749 | self->stdlib_fn_indices[i] = UINT32_MAX; - 750 | } - 751 | wasmtime_module_exports(stdlib_module, &export_types); - 752 | for (unsigned i = 0; i < export_types.size; i++) { - 753 | wasm_exporttype_t *export_type = export_types.data[i]; - 754 | const wasm_name_t *name = wasm_exporttype_name(export_type); - | - 755 | char *export_name; - 756 | size_t name_len; - 757 | wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; - 758 | bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); - 759 | ts_assert(exists); - | - 760 | if (export.kind == WASMTIME_EXTERN_GLOBAL) { - 761 | if (name_eq(name, "__stack_pointer")) { - 762 | self->stack_pointer_global = export.of.global; - 763 | } - 764 | } - | - 765 | if (export.kind == WASMTIME_EXTERN_FUNC) { - 766 | if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) { - 767 | if (trap) { - 768 | wasm_trap_message(trap, &message); - 769 | wasm_error->kind = TSWasmErrorKindInstantiate; - 770 | format( - 771 | &wasm_error->message, - 772 | "trap when calling stdlib relocation function: %.*s\n", - 773 | (int)message.size, message.data - 774 | ); - 775 | goto error; - 776 | } - 777 | continue; - 778 | } - | - 779 | if (name_eq(name, "reset_heap")) { - 780 | self->builtin_fn_indices.reset_heap = export.of.func.__private; - 781 | continue; - 782 | } - | - 783 | for (unsigned j = 0; j < stdlib_symbols_len; j++) { - 784 | if (name_eq(name, STDLIB_SYMBOLS[j])) { - 785 | self->stdlib_fn_indices[j] = export.of.func.__private; - 786 | break; - 787 | } - 788 | } - 789 | } - 790 | } - | - 791 | if (self->builtin_fn_indices.reset_heap == UINT32_MAX) { - 792 | wasm_error->kind = TSWasmErrorKindInstantiate; - 793 | format( - 794 | &wasm_error->message, - 795 | "missing malloc reset function in Wasm stdlib" - 796 | ); - 797 | goto error; - 798 | } - | - 799 | for (unsigned i = 0; i < stdlib_symbols_len; i++) { - 800 | if (self->stdlib_fn_indices[i] == UINT32_MAX) { - 801 | wasm_error->kind = TSWasmErrorKindInstantiate; - 802 | format( - 803 | &wasm_error->message, - 804 | "missing exported symbol in Wasm stdlib: %s", - 805 | STDLIB_SYMBOLS[i] - 806 | ); - 807 | goto error; - 808 | } - 809 | } - | - 810 | wasm_exporttype_vec_delete(&export_types); - 811 | wasmtime_module_delete(stdlib_module); - | - 812 | // Add all of the lexer callback functions to the function table. Store their function table - 813 | // indices on the in-memory lexer. - 814 | uint64_t table_index; - 815 | error = wasmtime_table_grow(context, &function_table, lexer_definitions_len, &initializer, &table_index); - 816 | if (error) { - 817 | wasmtime_error_message(error, &message); - 818 | wasm_error->kind = TSWasmErrorKindAllocate; - 819 | format( - 820 | &wasm_error->message, - 821 | "failed to grow Wasm table to initial size: %.*s", - 822 | (int)message.size, message.data - 823 | ); - 824 | goto error; - 825 | } - 826 | for (unsigned i = 0; i < lexer_definitions_len; i++) { - 827 | FunctionDefinition *definition = &lexer_definitions[i]; - 828 | wasmtime_func_t func = {function_table.store_id, *definition->storage_location}; - 829 | wasmtime_val_t func_val = {.kind = WASMTIME_FUNCREF, .of.funcref = func}; - 830 | error = wasmtime_table_set(context, &function_table, table_index, &func_val); - 831 | ts_assert(!error); - 832 | *(int32_t *)(definition->storage_location) = table_index; - 833 | table_index++; - 834 | } - | - 835 | self->current_function_table_offset = table_index; - 836 | self->lexer_address = initial_memory_pages * MEMORY_PAGE_SIZE; - 837 | self->current_memory_offset = self->lexer_address + sizeof(LexerInWasmMemory); - | - 838 | // Grow the memory enough to hold the builtin lexer and serialization buffer. - 839 | uint32_t new_pages_needed = (self->current_memory_offset - self->lexer_address - 1) / MEMORY_PAGE_SIZE + 1; - 840 | uint64_t prev_memory_size; - 841 | wasmtime_memory_grow(context, &memory, new_pages_needed, &prev_memory_size); - | - 842 | uint8_t *memory_data = wasmtime_memory_data(context, &memory); - 843 | memcpy(&memory_data[self->lexer_address], &lexer, sizeof(lexer)); - 844 | return self; - | - 845 | error: - 846 | ts_free(self); - 847 | if (stdlib_module) wasmtime_module_delete(stdlib_module); - 848 | if (store) wasmtime_store_delete(store); - 849 | if (import_types.size) wasm_importtype_vec_delete(&import_types); - 850 | if (memory_type) wasm_memorytype_delete(memory_type); - 851 | if (table_type) wasm_tabletype_delete(table_type); - 852 | if (trap) wasm_trap_delete(trap); - 853 | if (error) wasmtime_error_delete(error); - 854 | if (message.size) wasm_byte_vec_delete(&message); - 855 | if (export_types.size) wasm_exporttype_vec_delete(&export_types); - 856 | if (imports) ts_free(imports); - 857 | return NULL; - 858 | } - | - 859 | void ts_wasm_store_delete(TSWasmStore *self) { - 860 | if (!self) return; - 861 | ts_free(self->stdlib_fn_indices); - 862 | wasm_globaltype_delete(self->const_i32_type); - 863 | wasmtime_store_delete(self->store); - 864 | wasm_engine_delete(self->engine); - 865 | for (unsigned i = 0; i < self->language_instances.size; i++) { - 866 | LanguageWasmInstance *instance = array_get(&self->language_instances, i); - 867 | language_id_delete(instance->language_id); - 868 | } - 869 | array_delete(&self->language_instances); - 870 | ts_free(self); - 871 | } - | - 872 | size_t ts_wasm_store_language_count(const TSWasmStore *self) { - 873 | size_t result = 0; - 874 | for (unsigned i = 0; i < self->language_instances.size; i++) { - 875 | const WasmLanguageId *id = array_get(&self->language_instances, i)->language_id; - 876 | if (!id->is_language_deleted) { - 877 | result++; - 878 | } - 879 | } - 880 | return result; - 881 | } - | - 882 | static uint32_t ts_wasm_store__heap_address(TSWasmStore *self) { - 883 | return self->current_memory_offset + TREE_SITTER_SERIALIZATION_BUFFER_SIZE; - 884 | } - | - 885 | static uint32_t ts_wasm_store__serialization_buffer_address(TSWasmStore *self) { - 886 | return self->current_memory_offset; - 887 | } - | - 888 | static bool ts_wasm_store__instantiate( - 889 | TSWasmStore *self, - 890 | wasmtime_module_t *module, - 891 | const char *language_name, - 892 | const WasmDylinkInfo *dylink_info, - 893 | wasmtime_instance_t *result, - 894 | int32_t *language_address, - 895 | char **error_message - 896 | ) { - 897 | wasmtime_error_t *error = NULL; - 898 | wasm_trap_t *trap = NULL; - 899 | wasm_message_t message = WASM_EMPTY_VEC; - 900 | char *language_function_name = NULL; - 901 | wasmtime_extern_t *imports = NULL; - 902 | wasmtime_context_t *context = wasmtime_store_context(self->store); - | - 903 | // Grow the function table to make room for the new functions. - 904 | wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; - 905 | uint64_t prev_table_size; - 906 | error = wasmtime_table_grow(context, &self->function_table, dylink_info->table_size, &initializer, &prev_table_size); - 907 | if (error) { - 908 | format(error_message, "invalid function table size %u", dylink_info->table_size); - 909 | goto error; - 910 | } - | - 911 | // Grow the memory to make room for the new data. - 912 | uint32_t needed_memory_size = ts_wasm_store__heap_address(self) + dylink_info->memory_size; - 913 | uint32_t current_memory_size = wasmtime_memory_data_size(context, &self->memory); - 914 | if (needed_memory_size > current_memory_size) { - 915 | uint32_t pages_to_grow = ( - 916 | needed_memory_size - current_memory_size + MEMORY_PAGE_SIZE - 1) / - 917 | MEMORY_PAGE_SIZE; - 918 | uint64_t prev_memory_size; - 919 | error = wasmtime_memory_grow(context, &self->memory, pages_to_grow, &prev_memory_size); - 920 | if (error) { - 921 | format(error_message, "invalid memory size %u", dylink_info->memory_size); - 922 | goto error; - 923 | } - 924 | } - | - 925 | // Construct the language function name as string. - 926 | format(&language_function_name, "tree_sitter_%s", language_name); - | - 927 | const uint64_t store_id = self->function_table.store_id; - | - 928 | // Build the imports list for the module. - 929 | wasm_importtype_vec_t import_types = WASM_EMPTY_VEC; - 930 | wasmtime_module_imports(module, &import_types); - 931 | imports = ts_calloc(import_types.size, sizeof(wasmtime_extern_t)); - | - 932 | for (unsigned i = 0; i < import_types.size; i++) { - 933 | const wasm_importtype_t *import_type = import_types.data[i]; - 934 | const wasm_name_t *import_name = wasm_importtype_name(import_type); - 935 | if (import_name->size == 0) { - 936 | format(error_message, "empty import name"); - 937 | goto error; - 938 | } - | - 939 | if (ts_wasm_store__provide_builtin_import(self, import_name, &imports[i])) { - 940 | continue; - 941 | } - | - 942 | bool defined_in_stdlib = false; - 943 | for (unsigned j = 0; j < array_len(STDLIB_SYMBOLS); j++) { - 944 | if (name_eq(import_name, STDLIB_SYMBOLS[j])) { - 945 | uint16_t address = self->stdlib_fn_indices[j]; - 946 | imports[i] = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_FUNC, .of.func = {store_id, address}}; - 947 | defined_in_stdlib = true; - 948 | break; - 949 | } - 950 | } - | - 951 | if (!defined_in_stdlib) { - 952 | format( - 953 | error_message, - 954 | "invalid import '%.*s'\n", - 955 | (int)import_name->size, import_name->data - 956 | ); - 957 | goto error; - 958 | } - 959 | } - | - 960 | wasmtime_instance_t instance; - 961 | error = wasmtime_instance_new(context, module, imports, import_types.size, &instance, &trap); - 962 | wasm_importtype_vec_delete(&import_types); - 963 | ts_free(imports); - 964 | imports = NULL; - 965 | if (error) { - 966 | wasmtime_error_message(error, &message); - 967 | format( - 968 | error_message, - 969 | "error instantiating Wasm module: %.*s\n", - 970 | (int)message.size, message.data - 971 | ); - 972 | goto error; - 973 | } - 974 | if (trap) { - 975 | wasm_trap_message(trap, &message); - 976 | format( - 977 | error_message, - 978 | "trap when instantiating Wasm module: %.*s\n", - 979 | (int)message.size, message.data - 980 | ); - 981 | goto error; - 982 | } - | - 983 | self->current_memory_offset += dylink_info->memory_size; - 984 | self->current_function_table_offset += dylink_info->table_size; - | - 985 | // Process the module's exports. - 986 | bool found_language = false; - 987 | wasmtime_extern_t language_extern; - 988 | wasm_exporttype_vec_t export_types = WASM_EMPTY_VEC; - 989 | wasmtime_module_exports(module, &export_types); - 990 | for (unsigned i = 0; i < export_types.size; i++) { - 991 | wasm_exporttype_t *export_type = export_types.data[i]; - 992 | const wasm_name_t *name = wasm_exporttype_name(export_type); - | - 993 | size_t name_len; - 994 | char *export_name; - 995 | wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; - 996 | bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); - 997 | ts_assert(exists); - | - 998 | // If the module exports an initialization or data-relocation function, call it. - 999 | if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) { -1000 | if (trap) { -1001 | wasm_trap_message(trap, &message); -1002 | format( -1003 | error_message, -1004 | "trap when calling data relocation function: %.*s\n", -1005 | (int)message.size, message.data -1006 | ); -1007 | goto error; -1008 | } -1009 | } - | -1010 | // Find the main language function for the module. -1011 | else if (name_eq(name, language_function_name)) { -1012 | language_extern = export; -1013 | found_language = true; -1014 | } -1015 | } -1016 | wasm_exporttype_vec_delete(&export_types); - | -1017 | if (!found_language) { -1018 | format( -1019 | error_message, -1020 | "module did not contain language function: %s", -1021 | language_function_name -1022 | ); -1023 | goto error; -1024 | } - | -1025 | // Invoke the language function to get the static address of the language object. -1026 | wasmtime_func_t language_func = language_extern.of.func; -1027 | wasmtime_val_t language_address_val; -1028 | error = wasmtime_func_call(context, &language_func, NULL, 0, &language_address_val, 1, &trap); -1029 | ts_assert(!error); -1030 | if (trap) { -1031 | wasm_trap_message(trap, &message); -1032 | format( -1033 | error_message, -1034 | "trapped when calling language function: %s: %.*s\n", -1035 | language_function_name, (int)message.size, message.data -1036 | ); -1037 | goto error; -1038 | } - | -1039 | if (language_address_val.kind != WASMTIME_I32) { -1040 | format( -1041 | error_message, -1042 | "language function did not return an integer: %s\n", -1043 | language_function_name -1044 | ); -1045 | goto error; -1046 | } - | -1047 | ts_free(language_function_name); -1048 | *result = instance; -1049 | *language_address = language_address_val.of.i32; -1050 | return true; - | -1051 | error: -1052 | if (language_function_name) ts_free(language_function_name); -1053 | if (message.size) wasm_byte_vec_delete(&message); -1054 | if (error) wasmtime_error_delete(error); -1055 | if (trap) wasm_trap_delete(trap); -1056 | if (imports) ts_free(imports); -1057 | return false; -1058 | } - | -1059 | static bool ts_wasm_store__sentinel_lex_fn(TSLexer *_lexer, TSStateId state) { -1060 | return false; -1061 | } - | -1062 | const TSLanguage *ts_wasm_store_load_language( -1063 | TSWasmStore *self, -1064 | const char *language_name, -1065 | const char *wasm, -1066 | uint32_t wasm_len, -1067 | TSWasmError *wasm_error -1068 | ) { -1069 | WasmDylinkInfo dylink_info; -1070 | wasmtime_module_t *module = NULL; -1071 | wasmtime_error_t *error = NULL; -1072 | wasm_error->kind = TSWasmErrorKindNone; - | -1073 | if (!wasm_dylink_info__parse((const unsigned char *)wasm, wasm_len, &dylink_info)) { -1074 | wasm_error->kind = TSWasmErrorKindParse; -1075 | format(&wasm_error->message, "failed to parse dylink section of Wasm module"); -1076 | goto error; -1077 | } - | -1078 | // Compile the Wasm code. -1079 | error = wasmtime_module_new(self->engine, (const uint8_t *)wasm, wasm_len, &module); -1080 | if (error) { -1081 | wasm_message_t message; -1082 | wasmtime_error_message(error, &message); -1083 | wasm_error->kind = TSWasmErrorKindCompile; -1084 | format(&wasm_error->message, "error compiling Wasm module: %.*s", (int)message.size, message.data); -1085 | wasm_byte_vec_delete(&message); -1086 | goto error; -1087 | } - | -1088 | // Instantiate the module in this store. -1089 | wasmtime_instance_t instance; -1090 | int32_t language_address; -1091 | if (!ts_wasm_store__instantiate( -1092 | self, -1093 | module, -1094 | language_name, -1095 | &dylink_info, -1096 | &instance, -1097 | &language_address, -1098 | &wasm_error->message -1099 | )) { -1100 | wasm_error->kind = TSWasmErrorKindInstantiate; -1101 | goto error; -1102 | } - | -1103 | // Copy all of the static data out of the language object in Wasm memory, -1104 | // constructing a native language object. -1105 | LanguageInWasmMemory wasm_language; -1106 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1107 | const uint8_t *memory = wasmtime_memory_data(context, &self->memory); -1108 | memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory)); - | -1109 | bool has_supertypes = -1110 | wasm_language.abi_version > LANGUAGE_VERSION_WITH_RESERVED_WORDS && -1111 | wasm_language.supertype_count > 0; - | -1112 | int32_t addresses[] = { -1113 | wasm_language.parse_table, -1114 | wasm_language.small_parse_table, -1115 | wasm_language.small_parse_table_map, -1116 | wasm_language.parse_actions, -1117 | wasm_language.symbol_names, -1118 | wasm_language.field_names, -1119 | wasm_language.field_map_slices, -1120 | wasm_language.field_map_entries, -1121 | wasm_language.symbol_metadata, -1122 | wasm_language.public_symbol_map, -1123 | wasm_language.alias_map, -1124 | wasm_language.alias_sequences, -1125 | wasm_language.lex_modes, -1126 | wasm_language.lex_fn, -1127 | wasm_language.keyword_lex_fn, -1128 | wasm_language.primary_state_ids, -1129 | wasm_language.name, -1130 | wasm_language.reserved_words, -1131 | has_supertypes ? wasm_language.supertype_symbols : 0, -1132 | has_supertypes ? wasm_language.supertype_map_entries : 0, -1133 | has_supertypes ? wasm_language.supertype_map_slices : 0, -1134 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.states : 0, -1135 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.symbol_map : 0, -1136 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.create : 0, -1137 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.destroy : 0, -1138 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.scan : 0, -1139 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.serialize : 0, -1140 | wasm_language.external_token_count > 0 ? wasm_language.external_scanner.deserialize : 0, -1141 | language_address, -1142 | self->current_memory_offset, -1143 | }; -1144 | uint32_t address_count = array_len(addresses); - | -1145 | TSLanguage *language = ts_calloc(1, sizeof(TSLanguage)); -1146 | StringData symbol_name_buffer = array_new(); -1147 | StringData field_name_buffer = array_new(); - | -1148 | *language = (TSLanguage) { -1149 | .abi_version = wasm_language.abi_version, -1150 | .symbol_count = wasm_language.symbol_count, -1151 | .alias_count = wasm_language.alias_count, -1152 | .token_count = wasm_language.token_count, -1153 | .external_token_count = wasm_language.external_token_count, -1154 | .state_count = wasm_language.state_count, -1155 | .large_state_count = wasm_language.large_state_count, -1156 | .production_id_count = wasm_language.production_id_count, -1157 | .field_count = wasm_language.field_count, -1158 | .supertype_count = wasm_language.supertype_count, -1159 | .max_alias_sequence_length = wasm_language.max_alias_sequence_length, -1160 | .keyword_capture_token = wasm_language.keyword_capture_token, -1161 | .metadata = wasm_language.metadata, -1162 | .parse_table = copy( -1163 | &memory[wasm_language.parse_table], -1164 | wasm_language.large_state_count * wasm_language.symbol_count * sizeof(uint16_t) -1165 | ), -1166 | .parse_actions = copy_unsized_static_array( -1167 | memory, -1168 | wasm_language.parse_actions, -1169 | addresses, -1170 | address_count -1171 | ), -1172 | .symbol_names = copy_strings( -1173 | memory, -1174 | wasm_language.symbol_names, -1175 | wasm_language.symbol_count + wasm_language.alias_count, -1176 | &symbol_name_buffer -1177 | ), -1178 | .symbol_metadata = copy( -1179 | &memory[wasm_language.symbol_metadata], -1180 | (wasm_language.symbol_count + wasm_language.alias_count) * sizeof(TSSymbolMetadata) -1181 | ), -1182 | .public_symbol_map = copy( -1183 | &memory[wasm_language.public_symbol_map], -1184 | (wasm_language.symbol_count + wasm_language.alias_count) * sizeof(TSSymbol) -1185 | ), -1186 | .lex_modes = copy( -1187 | &memory[wasm_language.lex_modes], -1188 | wasm_language.state_count * sizeof(TSLexerMode) -1189 | ), -1190 | }; - | -1191 | if (language->field_count > 0 && language->production_id_count > 0) { -1192 | language->field_map_slices = copy( -1193 | &memory[wasm_language.field_map_slices], -1194 | wasm_language.production_id_count * sizeof(TSMapSlice) -1195 | ); - | -1196 | // Determine the number of field map entries by finding the greatest index -1197 | // in any of the slices. -1198 | uint32_t field_map_entry_count = 0; -1199 | for (uint32_t i = 0; i < wasm_language.production_id_count; i++) { -1200 | TSMapSlice slice = language->field_map_slices[i]; -1201 | uint32_t slice_end = slice.index + slice.length; -1202 | if (slice_end > field_map_entry_count) { -1203 | field_map_entry_count = slice_end; -1204 | } -1205 | } - | -1206 | language->field_map_entries = copy( -1207 | &memory[wasm_language.field_map_entries], -1208 | field_map_entry_count * sizeof(TSFieldMapEntry) -1209 | ); -1210 | language->field_names = copy_strings( -1211 | memory, -1212 | wasm_language.field_names, -1213 | wasm_language.field_count + 1, -1214 | &field_name_buffer -1215 | ); -1216 | } - | -1217 | if (has_supertypes) { -1218 | language->supertype_symbols = copy( -1219 | &memory[wasm_language.supertype_symbols], -1220 | wasm_language.supertype_count * sizeof(TSSymbol) -1221 | ); - | -1222 | // Determine the number of supertype map slices by finding the greatest -1223 | // supertype ID. -1224 | int largest_supertype = 0; -1225 | for (unsigned i = 0; i < language->supertype_count; i++) { -1226 | TSSymbol supertype = language->supertype_symbols[i]; -1227 | if (supertype > largest_supertype) { -1228 | largest_supertype = supertype; -1229 | } -1230 | } - | -1231 | language->supertype_map_slices = copy( -1232 | &memory[wasm_language.supertype_map_slices], -1233 | (largest_supertype + 1) * sizeof(TSMapSlice) -1234 | ); - | -1235 | TSSymbol last_supertype = language->supertype_symbols[language->supertype_count - 1]; -1236 | TSMapSlice last_slice = language->supertype_map_slices[last_supertype]; -1237 | uint32_t supertype_map_entry_count = last_slice.index + last_slice.length; - | -1238 | language->supertype_map_entries = copy( -1239 | &memory[wasm_language.supertype_map_entries], -1240 | supertype_map_entry_count * sizeof(char *) -1241 | ); -1242 | } - | -1243 | if (language->max_alias_sequence_length > 0 && language->production_id_count > 0) { -1244 | // The alias map contains symbols, alias counts, and aliases, terminated by a null symbol. -1245 | int32_t alias_map_size = 0; -1246 | for (;;) { -1247 | TSSymbol symbol; -1248 | memcpy(&symbol, &memory[wasm_language.alias_map + alias_map_size], sizeof(symbol)); -1249 | alias_map_size += sizeof(TSSymbol); -1250 | if (symbol == 0) break; -1251 | uint16_t value_count; -1252 | memcpy(&value_count, &memory[wasm_language.alias_map + alias_map_size], sizeof(value_count)); -1253 | alias_map_size += sizeof(uint16_t); -1254 | alias_map_size += value_count * sizeof(TSSymbol); -1255 | } -1256 | language->alias_map = copy( -1257 | &memory[wasm_language.alias_map], -1258 | alias_map_size -1259 | ); -1260 | language->alias_sequences = copy( -1261 | &memory[wasm_language.alias_sequences], -1262 | wasm_language.production_id_count * wasm_language.max_alias_sequence_length * sizeof(TSSymbol) -1263 | ); -1264 | } - | -1265 | if (language->state_count > language->large_state_count) { -1266 | uint32_t small_state_count = wasm_language.state_count - wasm_language.large_state_count; -1267 | language->small_parse_table_map = copy( -1268 | &memory[wasm_language.small_parse_table_map], -1269 | small_state_count * sizeof(uint32_t) -1270 | ); -1271 | language->small_parse_table = copy_unsized_static_array( -1272 | memory, -1273 | wasm_language.small_parse_table, -1274 | addresses, -1275 | address_count -1276 | ); -1277 | } - | -1278 | if (language->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { -1279 | language->primary_state_ids = copy( -1280 | &memory[wasm_language.primary_state_ids], -1281 | wasm_language.state_count * sizeof(TSStateId) -1282 | ); -1283 | } - | -1284 | if (language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { -1285 | language->name = copy_string(memory, wasm_language.name); -1286 | language->reserved_words = copy( -1287 | &memory[wasm_language.reserved_words], -1288 | wasm_language.max_reserved_word_set_size * sizeof(TSSymbol) -1289 | ); -1290 | language->max_reserved_word_set_size = wasm_language.max_reserved_word_set_size; -1291 | } - | -1292 | if (language->external_token_count > 0) { -1293 | language->external_scanner.symbol_map = copy( -1294 | &memory[wasm_language.external_scanner.symbol_map], -1295 | wasm_language.external_token_count * sizeof(TSSymbol) -1296 | ); -1297 | language->external_scanner.states = (void *)(uintptr_t)wasm_language.external_scanner.states; -1298 | } - | -1299 | unsigned name_len = strlen(language_name); -1300 | char *name = ts_malloc(name_len + 1); -1301 | memcpy(name, language_name, name_len); -1302 | name[name_len] = '\0'; - | -1303 | LanguageWasmModule *language_module = ts_malloc(sizeof(LanguageWasmModule)); -1304 | *language_module = (LanguageWasmModule) { -1305 | .language_id = language_id_new(), -1306 | .module = module, -1307 | .name = name, -1308 | .symbol_name_buffer = symbol_name_buffer.contents, -1309 | .field_name_buffer = field_name_buffer.contents, -1310 | .dylink_info = dylink_info, -1311 | .ref_count = 1, -1312 | }; - | -1313 | // The lex functions are not used for Wasm languages. Use those two fields -1314 | // to mark this language as Wasm-based and to store the language's -1315 | // Wasm-specific data. -1316 | language->lex_fn = ts_wasm_store__sentinel_lex_fn; -1317 | language->keyword_lex_fn = (bool (*)(TSLexer *, TSStateId))language_module; - | -1318 | // Clear out any instances of languages that have been deleted. -1319 | for (unsigned i = 0; i < self->language_instances.size; i++) { -1320 | WasmLanguageId *id = array_get(&self->language_instances, i)->language_id; -1321 | if (id->is_language_deleted) { -1322 | language_id_delete(id); -1323 | array_erase(&self->language_instances, i); -1324 | i--; -1325 | } -1326 | } - | -1327 | // Store this store's instance of this language module. -1328 | array_push(&self->language_instances, ((LanguageWasmInstance) { -1329 | .language_id = language_id_clone(language_module->language_id), -1330 | .instance = instance, -1331 | .external_states_address = wasm_language.external_scanner.states, -1332 | .lex_main_fn_index = wasm_language.lex_fn, -1333 | .lex_keyword_fn_index = wasm_language.keyword_lex_fn, -1334 | .scanner_create_fn_index = wasm_language.external_scanner.create, -1335 | .scanner_destroy_fn_index = wasm_language.external_scanner.destroy, -1336 | .scanner_serialize_fn_index = wasm_language.external_scanner.serialize, -1337 | .scanner_deserialize_fn_index = wasm_language.external_scanner.deserialize, -1338 | .scanner_scan_fn_index = wasm_language.external_scanner.scan, -1339 | })); - | -1340 | return language; - | -1341 | error: -1342 | if (module) wasmtime_module_delete(module); -1343 | return NULL; -1344 | } - | -1345 | bool ts_wasm_store_add_language( -1346 | TSWasmStore *self, -1347 | const TSLanguage *language, -1348 | uint32_t *index -1349 | ) { -1350 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1351 | const LanguageWasmModule *language_module = (void *)language->keyword_lex_fn; - | -1352 | // Search for this store's instance of the language module. Also clear out any -1353 | // instances of languages that have been deleted. -1354 | bool exists = false; -1355 | for (unsigned i = 0; i < self->language_instances.size; i++) { -1356 | WasmLanguageId *id = array_get(&self->language_instances, i)->language_id; -1357 | if (id->is_language_deleted) { -1358 | language_id_delete(id); -1359 | array_erase(&self->language_instances, i); -1360 | i--; -1361 | } else if (id == language_module->language_id) { -1362 | exists = true; -1363 | *index = i; -1364 | } -1365 | } - | -1366 | // If the language module has not been instantiated in this store, then add -1367 | // it to this store. -1368 | if (!exists) { -1369 | *index = self->language_instances.size; -1370 | char *message; -1371 | wasmtime_instance_t instance; -1372 | int32_t language_address; -1373 | if (!ts_wasm_store__instantiate( -1374 | self, -1375 | language_module->module, -1376 | language_module->name, -1377 | &language_module->dylink_info, -1378 | &instance, -1379 | &language_address, -1380 | &message -1381 | )) { -1382 | ts_free(message); -1383 | return false; -1384 | } - | -1385 | LanguageInWasmMemory wasm_language; -1386 | const uint8_t *memory = wasmtime_memory_data(context, &self->memory); -1387 | memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory)); -1388 | array_push(&self->language_instances, ((LanguageWasmInstance) { -1389 | .language_id = language_id_clone(language_module->language_id), -1390 | .instance = instance, -1391 | .external_states_address = wasm_language.external_scanner.states, -1392 | .lex_main_fn_index = wasm_language.lex_fn, -1393 | .lex_keyword_fn_index = wasm_language.keyword_lex_fn, -1394 | .scanner_create_fn_index = wasm_language.external_scanner.create, -1395 | .scanner_destroy_fn_index = wasm_language.external_scanner.destroy, -1396 | .scanner_serialize_fn_index = wasm_language.external_scanner.serialize, -1397 | .scanner_deserialize_fn_index = wasm_language.external_scanner.deserialize, -1398 | .scanner_scan_fn_index = wasm_language.external_scanner.scan, -1399 | })); -1400 | } - | -1401 | return true; -1402 | } - | -1403 | void ts_wasm_store_reset_heap(TSWasmStore *self) { -1404 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1405 | wasmtime_func_t func = { -1406 | self->function_table.store_id, -1407 | self->builtin_fn_indices.reset_heap -1408 | }; -1409 | wasm_trap_t *trap = NULL; -1410 | wasmtime_val_t args[1] = { -1411 | {.of.i32 = ts_wasm_store__heap_address(self), .kind = WASMTIME_I32}, -1412 | }; - | -1413 | wasmtime_error_t *error = wasmtime_func_call(context, &func, args, 1, NULL, 0, &trap); -1414 | ts_assert(!error); -1415 | ts_assert(!trap); -1416 | } - | -1417 | bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language) { -1418 | uint32_t instance_index; -1419 | if (!ts_wasm_store_add_language(self, language, &instance_index)) return false; -1420 | self->current_lexer = lexer; -1421 | self->current_instance = array_get(&self->language_instances, instance_index); -1422 | self->has_error = false; -1423 | ts_wasm_store_reset_heap(self); -1424 | return true; -1425 | } - | -1426 | void ts_wasm_store_reset(TSWasmStore *self) { -1427 | self->current_lexer = NULL; -1428 | self->current_instance = NULL; -1429 | self->has_error = false; -1430 | ts_wasm_store_reset_heap(self); -1431 | } - | -1432 | static void ts_wasm_store__call( -1433 | TSWasmStore *self, -1434 | int32_t function_index, -1435 | wasmtime_val_raw_t *args_and_results, -1436 | size_t args_and_results_len -1437 | ) { -1438 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1439 | wasmtime_val_t value; -1440 | bool succeeded = wasmtime_table_get(context, &self->function_table, function_index, &value); -1441 | ts_assert(succeeded); -1442 | ts_assert(value.kind == WASMTIME_FUNCREF); -1443 | wasmtime_func_t func = value.of.funcref; - | -1444 | wasm_trap_t *trap = NULL; -1445 | wasmtime_error_t *error = wasmtime_func_call_unchecked(context, &func, args_and_results, args_and_results_len, &trap); -1446 | if (error) { -1447 | // wasm_message_t message; -1448 | // wasmtime_error_message(error, &message); -1449 | // fprintf( -1450 | // stderr, -1451 | // "error in Wasm module: %.*s\n", -1452 | // (int)message.size, message.data -1453 | // ); -1454 | wasmtime_error_delete(error); -1455 | self->has_error = true; -1456 | } else if (trap) { -1457 | // wasm_message_t message; -1458 | // wasm_trap_message(trap, &message); -1459 | // fprintf( -1460 | // stderr, -1461 | // "trap in Wasm module: %.*s\n", -1462 | // (int)message.size, message.data -1463 | // ); -1464 | wasm_trap_delete(trap); -1465 | self->has_error = true; -1466 | } -1467 | } - | -1468 | // The data fields of TSLexer, without the function pointers. -1469 | // -1470 | // This portion of the struct needs to be copied in and out -1471 | // of Wasm memory before and after calling a scan function. -1472 | typedef struct { -1473 | int32_t lookahead; -1474 | TSSymbol result_symbol; -1475 | } TSLexerDataPrefix; - | -1476 | static bool ts_wasm_store__call_lex_function(TSWasmStore *self, unsigned function_index, TSStateId state) { -1477 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1478 | uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); -1479 | memcpy( -1480 | &memory_data[self->lexer_address], -1481 | self->current_lexer, -1482 | sizeof(TSLexerDataPrefix) -1483 | ); - | -1484 | wasmtime_val_raw_t args[2] = { -1485 | {.i32 = self->lexer_address}, -1486 | {.i32 = state}, -1487 | }; -1488 | ts_wasm_store__call(self, function_index, args, 2); -1489 | if (self->has_error) return false; -1490 | bool result = args[0].i32; - | -1491 | memcpy( -1492 | self->current_lexer, -1493 | &memory_data[self->lexer_address], -1494 | sizeof(TSLexerDataPrefix) -1495 | ); -1496 | return result; -1497 | } - | -1498 | bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state) { -1499 | return ts_wasm_store__call_lex_function( -1500 | self, -1501 | self->current_instance->lex_main_fn_index, -1502 | state -1503 | ); -1504 | } - | -1505 | bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) { -1506 | return ts_wasm_store__call_lex_function( -1507 | self, -1508 | self->current_instance->lex_keyword_fn_index, -1509 | state -1510 | ); -1511 | } - | -1512 | uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) { -1513 | wasmtime_val_raw_t args[1] = {{.i32 = 0}}; -1514 | ts_wasm_store__call(self, self->current_instance->scanner_create_fn_index, args, 1); -1515 | if (self->has_error) return 0; -1516 | return args[0].i32; -1517 | } - | -1518 | void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address) { -1519 | if (self->current_instance) { -1520 | wasmtime_val_raw_t args[1] = {{.i32 = scanner_address}}; -1521 | ts_wasm_store__call(self, self->current_instance->scanner_destroy_fn_index, args, 1); -1522 | } -1523 | } - | -1524 | bool ts_wasm_store_call_scanner_scan( -1525 | TSWasmStore *self, -1526 | uint32_t scanner_address, -1527 | uint32_t valid_tokens_ix -1528 | ) { -1529 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1530 | uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); - | -1531 | memcpy( -1532 | &memory_data[self->lexer_address], -1533 | self->current_lexer, -1534 | sizeof(TSLexerDataPrefix) -1535 | ); - | -1536 | uint32_t valid_tokens_address = -1537 | self->current_instance->external_states_address + -1538 | (valid_tokens_ix * sizeof(bool)); -1539 | wasmtime_val_raw_t args[3] = { -1540 | {.i32 = scanner_address}, -1541 | {.i32 = self->lexer_address}, -1542 | {.i32 = valid_tokens_address} -1543 | }; -1544 | ts_wasm_store__call(self, self->current_instance->scanner_scan_fn_index, args, 3); -1545 | if (self->has_error) return false; - | -1546 | memcpy( -1547 | self->current_lexer, -1548 | &memory_data[self->lexer_address], -1549 | sizeof(TSLexerDataPrefix) -1550 | ); -1551 | return args[0].i32; -1552 | } - | -1553 | uint32_t ts_wasm_store_call_scanner_serialize( -1554 | TSWasmStore *self, -1555 | uint32_t scanner_address, -1556 | char *buffer -1557 | ) { -1558 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1559 | uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); -1560 | uint32_t serialization_buffer_address = ts_wasm_store__serialization_buffer_address(self); - | -1561 | wasmtime_val_raw_t args[2] = { -1562 | {.i32 = scanner_address}, -1563 | {.i32 = serialization_buffer_address}, -1564 | }; -1565 | ts_wasm_store__call(self, self->current_instance->scanner_serialize_fn_index, args, 2); -1566 | if (self->has_error) return 0; - | -1567 | uint32_t length = args[0].i32; -1568 | if (length > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { -1569 | self->has_error = true; -1570 | return 0; -1571 | } - | -1572 | if (length > 0) { -1573 | memcpy( -1574 | ((Lexer *)self->current_lexer)->debug_buffer, -1575 | &memory_data[serialization_buffer_address], -1576 | length -1577 | ); -1578 | } -1579 | return length; -1580 | } - | -1581 | void ts_wasm_store_call_scanner_deserialize( -1582 | TSWasmStore *self, -1583 | uint32_t scanner_address, -1584 | const char *buffer, -1585 | unsigned length -1586 | ) { -1587 | wasmtime_context_t *context = wasmtime_store_context(self->store); -1588 | uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); -1589 | uint32_t serialization_buffer_address = ts_wasm_store__serialization_buffer_address(self); - | -1590 | if (length > 0) { -1591 | memcpy( -1592 | &memory_data[serialization_buffer_address], -1593 | buffer, -1594 | length -1595 | ); -1596 | } - | -1597 | wasmtime_val_raw_t args[3] = { -1598 | {.i32 = scanner_address}, -1599 | {.i32 = serialization_buffer_address}, -1600 | {.i32 = length}, -1601 | }; -1602 | ts_wasm_store__call(self, self->current_instance->scanner_deserialize_fn_index, args, 3); -1603 | } - | -1604 | bool ts_wasm_store_has_error(const TSWasmStore *self) { -1605 | return self->has_error; -1606 | } - | -1607 | bool ts_language_is_wasm(const TSLanguage *self) { -1608 | return self->lex_fn == ts_wasm_store__sentinel_lex_fn; -1609 | } - | -1610 | static inline LanguageWasmModule *ts_language__wasm_module(const TSLanguage *self) { -1611 | return (LanguageWasmModule *)self->keyword_lex_fn; -1612 | } - | -1613 | void ts_wasm_language_retain(const TSLanguage *self) { -1614 | LanguageWasmModule *module = ts_language__wasm_module(self); -1615 | ts_assert(module->ref_count > 0); -1616 | atomic_inc(&module->ref_count); -1617 | } - | -1618 | void ts_wasm_language_release(const TSLanguage *self) { -1619 | LanguageWasmModule *module = ts_language__wasm_module(self); -1620 | ts_assert(module->ref_count > 0); -1621 | if (atomic_dec(&module->ref_count) == 0) { -1622 | // Update the language id to reflect that the language is deleted. This allows any Wasm stores -1623 | // that hold Wasm instances for this language to delete those instances. -1624 | atomic_inc(&module->language_id->is_language_deleted); -1625 | language_id_delete(module->language_id); - | -1626 | ts_free((void *)module->field_name_buffer); -1627 | ts_free((void *)module->symbol_name_buffer); -1628 | ts_free((void *)module->name); -1629 | wasmtime_module_delete(module->module); -1630 | ts_free(module); - | -1631 | ts_free((void *)self->alias_map); -1632 | ts_free((void *)self->alias_sequences); -1633 | ts_free((void *)self->external_scanner.symbol_map); -1634 | ts_free((void *)self->field_map_entries); -1635 | ts_free((void *)self->field_map_slices); -1636 | ts_free((void *)self->supertype_symbols); -1637 | ts_free((void *)self->supertype_map_entries); -1638 | ts_free((void *)self->supertype_map_slices); -1639 | ts_free((void *)self->field_names); -1640 | ts_free((void *)self->lex_modes); -1641 | ts_free((void *)self->name); -1642 | ts_free((void *)self->reserved_words); -1643 | ts_free((void *)self->parse_actions); -1644 | ts_free((void *)self->parse_table); -1645 | ts_free((void *)self->primary_state_ids); -1646 | ts_free((void *)self->public_symbol_map); -1647 | ts_free((void *)self->small_parse_table); -1648 | ts_free((void *)self->small_parse_table_map); -1649 | ts_free((void *)self->symbol_metadata); -1650 | ts_free((void *)self->symbol_names); -1651 | ts_free((void *)self); -1652 | } -1653 | } - | -1654 | #ifdef _MSC_VER -1655 | #pragma warning(pop) -1656 | #elif defined(__GNUC__) || defined(__clang__) -1657 | #pragma GCC diagnostic pop -1658 | #endif - | -1659 | #else - | -1660 | // If the Wasm feature is not enabled, define dummy versions of all of the -1661 | // Wasm-related functions. - | -1662 | void ts_wasm_store_delete(TSWasmStore *self) { -1663 | (void)self; -1664 | } - | -1665 | bool ts_wasm_store_start( -1666 | TSWasmStore *self, -1667 | TSLexer *lexer, -1668 | const TSLanguage *language -1669 | ) { -1670 | (void)self; -1671 | (void)lexer; -1672 | (void)language; -1673 | return false; -1674 | } - | -1675 | void ts_wasm_store_reset(TSWasmStore *self) { -1676 | (void)self; -1677 | } - | -1678 | bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state) { -1679 | (void)self; -1680 | (void)state; -1681 | return false; -1682 | } - | -1683 | bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) { -1684 | (void)self; -1685 | (void)state; -1686 | return false; -1687 | } - | -1688 | uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) { -1689 | (void)self; -1690 | return 0; -1691 | } - | -1692 | void ts_wasm_store_call_scanner_destroy( -1693 | TSWasmStore *self, -1694 | uint32_t scanner_address -1695 | ) { -1696 | (void)self; -1697 | (void)scanner_address; -1698 | } - | -1699 | bool ts_wasm_store_call_scanner_scan( -1700 | TSWasmStore *self, -1701 | uint32_t scanner_address, -1702 | uint32_t valid_tokens_ix -1703 | ) { -1704 | (void)self; -1705 | (void)scanner_address; -1706 | (void)valid_tokens_ix; -1707 | return false; -1708 | } - | -1709 | uint32_t ts_wasm_store_call_scanner_serialize( -1710 | TSWasmStore *self, -1711 | uint32_t scanner_address, -1712 | char *buffer -1713 | ) { -1714 | (void)self; -1715 | (void)scanner_address; -1716 | (void)buffer; -1717 | return 0; -1718 | } - | -1719 | void ts_wasm_store_call_scanner_deserialize( -1720 | TSWasmStore *self, -1721 | uint32_t scanner_address, -1722 | const char *buffer, -1723 | unsigned length -1724 | ) { -1725 | (void)self; -1726 | (void)scanner_address; -1727 | (void)buffer; -1728 | (void)length; -1729 | } - | -1730 | bool ts_wasm_store_has_error(const TSWasmStore *self) { -1731 | (void)self; -1732 | return false; -1733 | } - | -1734 | bool ts_language_is_wasm(const TSLanguage *self) { -1735 | (void)self; -1736 | return false; -1737 | } - | -1738 | void ts_wasm_language_retain(const TSLanguage *self) { -1739 | (void)self; -1740 | } - | -1741 | void ts_wasm_language_release(const TSLanguage *self) { -1742 | (void)self; -1743 | } - | -1744 | #endif - - - --------------------------------------------------------------------------------- -/lib/src/wasm_store.h: --------------------------------------------------------------------------------- - 1 | #ifndef TREE_SITTER_WASM_H_ - 2 | #define TREE_SITTER_WASM_H_ - | - 3 | #ifdef __cplusplus - 4 | extern "C" { - 5 | #endif - | - 6 | #include "tree_sitter/api.h" - 7 | #include "./parser.h" - | - 8 | bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language); - 9 | void ts_wasm_store_reset(TSWasmStore *self); - 10 | bool ts_wasm_store_has_error(const TSWasmStore *self); - | - 11 | bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state); - 12 | bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state); - | - 13 | uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self); - 14 | void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address); - 15 | bool ts_wasm_store_call_scanner_scan(TSWasmStore *self, uint32_t scanner_address, uint32_t valid_tokens_ix); - 16 | uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *self, uint32_t scanner_address, char *buffer); - 17 | void ts_wasm_store_call_scanner_deserialize(TSWasmStore *self, uint32_t scanner, const char *buffer, unsigned length); - | - 18 | void ts_wasm_language_retain(const TSLanguage *self); - 19 | void ts_wasm_language_release(const TSLanguage *self); - | - 20 | #ifdef __cplusplus - 21 | } - 22 | #endif - | - 23 | #endif // TREE_SITTER_WASM_H_ - - - --------------------------------------------------------------------------------- -/lib/src/wasm/stdlib-symbols.txt: --------------------------------------------------------------------------------- - 1 | "calloc", - 2 | "free", - 3 | "iswalnum", - 4 | "iswalpha", - 5 | "iswblank", - 6 | "iswdigit", - 7 | "iswlower", - 8 | "iswspace", - 9 | "iswupper", - 10 | "iswxdigit", - 11 | "malloc", - 12 | "memchr", - 13 | "memcmp", - 14 | "memcpy", - 15 | "memmove", - 16 | "memset", - 17 | "realloc", - 18 | "strcmp", - 19 | "strlen", - 20 | "strncat", - 21 | "strncmp", - 22 | "strncpy", - 23 | "towlower", - 24 | "towupper", - - - --------------------------------------------------------------------------------- -/lib/src/wasm/wasm-stdlib.h: --------------------------------------------------------------------------------- - 1 | unsigned char STDLIB_WASM[] = { - 2 | 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x1a, 0x05, 0x60, - 3 | 0x01, 0x7f, 0x01, 0x7f, 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, 0x60, - 4 | 0x02, 0x7f, 0x7f, 0x01, 0x7f, 0x60, 0x01, 0x7f, 0x00, 0x60, 0x00, 0x00, - 5 | 0x02, 0x7c, 0x04, 0x16, 0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, - 6 | 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, - 7 | 0x77, 0x31, 0x08, 0x61, 0x72, 0x67, 0x73, 0x5f, 0x67, 0x65, 0x74, 0x00, - 8 | 0x02, 0x16, 0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, - 9 | 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, 0x31, - 10 | 0x0e, 0x61, 0x72, 0x67, 0x73, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x73, 0x5f, - 11 | 0x67, 0x65, 0x74, 0x00, 0x02, 0x16, 0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, - 12 | 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, - 13 | 0x69, 0x65, 0x77, 0x31, 0x09, 0x70, 0x72, 0x6f, 0x63, 0x5f, 0x65, 0x78, - 14 | 0x69, 0x74, 0x00, 0x03, 0x03, 0x65, 0x6e, 0x76, 0x06, 0x6d, 0x65, 0x6d, - 15 | 0x6f, 0x72, 0x79, 0x02, 0x00, 0x02, 0x03, 0x1f, 0x1e, 0x04, 0x04, 0x04, - 16 | 0x03, 0x00, 0x03, 0x02, 0x02, 0x03, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, - 17 | 0x02, 0x00, 0x02, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, - 18 | 0x00, 0x00, 0x00, 0x06, 0x08, 0x01, 0x7f, 0x01, 0x41, 0x80, 0x80, 0x04, - 19 | 0x0b, 0x07, 0xad, 0x02, 0x1c, 0x11, 0x5f, 0x5f, 0x77, 0x61, 0x73, 0x6d, - 20 | 0x5f, 0x63, 0x61, 0x6c, 0x6c, 0x5f, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x00, - 21 | 0x03, 0x0f, 0x5f, 0x5f, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x5f, 0x70, 0x6f, - 22 | 0x69, 0x6e, 0x74, 0x65, 0x72, 0x03, 0x00, 0x06, 0x5f, 0x73, 0x74, 0x61, - 23 | 0x72, 0x74, 0x00, 0x05, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x68, - 24 | 0x65, 0x61, 0x70, 0x00, 0x06, 0x06, 0x6d, 0x61, 0x6c, 0x6c, 0x6f, 0x63, - 25 | 0x00, 0x07, 0x04, 0x66, 0x72, 0x65, 0x65, 0x00, 0x08, 0x06, 0x63, 0x61, - 26 | 0x6c, 0x6c, 0x6f, 0x63, 0x00, 0x09, 0x06, 0x6d, 0x65, 0x6d, 0x73, 0x65, - 27 | 0x74, 0x00, 0x0d, 0x07, 0x72, 0x65, 0x61, 0x6c, 0x6c, 0x6f, 0x63, 0x00, - 28 | 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x63, 0x70, 0x79, 0x00, 0x0c, 0x06, 0x73, - 29 | 0x74, 0x72, 0x6c, 0x65, 0x6e, 0x00, 0x0e, 0x08, 0x69, 0x73, 0x77, 0x61, - 30 | 0x6c, 0x6e, 0x75, 0x6d, 0x00, 0x20, 0x08, 0x69, 0x73, 0x77, 0x61, 0x6c, - 31 | 0x70, 0x68, 0x61, 0x00, 0x0f, 0x08, 0x69, 0x73, 0x77, 0x62, 0x6c, 0x61, - 32 | 0x6e, 0x6b, 0x00, 0x1a, 0x08, 0x69, 0x73, 0x77, 0x64, 0x69, 0x67, 0x69, - 33 | 0x74, 0x00, 0x1b, 0x08, 0x69, 0x73, 0x77, 0x6c, 0x6f, 0x77, 0x65, 0x72, - 34 | 0x00, 0x19, 0x08, 0x69, 0x73, 0x77, 0x73, 0x70, 0x61, 0x63, 0x65, 0x00, - 35 | 0x1f, 0x08, 0x69, 0x73, 0x77, 0x75, 0x70, 0x70, 0x65, 0x72, 0x00, 0x17, - 36 | 0x09, 0x69, 0x73, 0x77, 0x78, 0x64, 0x69, 0x67, 0x69, 0x74, 0x00, 0x1e, - 37 | 0x08, 0x74, 0x6f, 0x77, 0x6c, 0x6f, 0x77, 0x65, 0x72, 0x00, 0x13, 0x08, - 38 | 0x74, 0x6f, 0x77, 0x75, 0x70, 0x70, 0x65, 0x72, 0x00, 0x15, 0x06, 0x6d, - 39 | 0x65, 0x6d, 0x63, 0x68, 0x72, 0x00, 0x11, 0x06, 0x6d, 0x65, 0x6d, 0x63, - 40 | 0x6d, 0x70, 0x00, 0x10, 0x07, 0x6d, 0x65, 0x6d, 0x6d, 0x6f, 0x76, 0x65, - 41 | 0x00, 0x18, 0x06, 0x73, 0x74, 0x72, 0x63, 0x6d, 0x70, 0x00, 0x12, 0x07, - 42 | 0x73, 0x74, 0x72, 0x6e, 0x63, 0x61, 0x74, 0x00, 0x1c, 0x07, 0x73, 0x74, - 43 | 0x72, 0x6e, 0x63, 0x6d, 0x70, 0x00, 0x16, 0x07, 0x73, 0x74, 0x72, 0x6e, - 44 | 0x63, 0x70, 0x79, 0x00, 0x1d, 0x08, 0x01, 0x04, 0x0c, 0x01, 0x01, 0x0a, - 45 | 0x8b, 0x28, 0x1e, 0x02, 0x00, 0x0b, 0x0d, 0x00, 0x41, 0xe8, 0xc2, 0x04, - 46 | 0x41, 0x00, 0x41, 0x14, 0xfc, 0x0b, 0x00, 0x0b, 0xa4, 0x01, 0x01, 0x03, - 47 | 0x7f, 0x41, 0xe8, 0xc2, 0x04, 0x28, 0x02, 0x00, 0x45, 0x04, 0x40, 0x41, - 48 | 0xe8, 0xc2, 0x04, 0x41, 0x01, 0x36, 0x02, 0x00, 0x23, 0x00, 0x41, 0x10, - 49 | 0x6b, 0x22, 0x00, 0x24, 0x00, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, - 50 | 0x40, 0x20, 0x00, 0x41, 0x08, 0x6a, 0x20, 0x00, 0x41, 0x0c, 0x6a, 0x10, - 51 | 0x01, 0x41, 0xff, 0xff, 0x03, 0x71, 0x45, 0x04, 0x40, 0x20, 0x00, 0x28, - 52 | 0x02, 0x08, 0x41, 0x01, 0x6a, 0x22, 0x01, 0x45, 0x0d, 0x01, 0x20, 0x00, - 53 | 0x28, 0x02, 0x0c, 0x10, 0x07, 0x22, 0x02, 0x45, 0x0d, 0x02, 0x20, 0x01, - 54 | 0x41, 0x04, 0x10, 0x09, 0x22, 0x01, 0x45, 0x0d, 0x03, 0x20, 0x01, 0x20, - 55 | 0x02, 0x10, 0x00, 0x41, 0xff, 0xff, 0x03, 0x71, 0x0d, 0x04, 0x20, 0x00, - 56 | 0x28, 0x02, 0x08, 0x00, 0x0b, 0x41, 0xc7, 0x00, 0x10, 0x0b, 0x00, 0x0b, - 57 | 0x41, 0xc6, 0x00, 0x10, 0x0b, 0x00, 0x0b, 0x41, 0xc6, 0x00, 0x10, 0x0b, - 58 | 0x00, 0x0b, 0x20, 0x02, 0x10, 0x08, 0x41, 0xc6, 0x00, 0x10, 0x0b, 0x00, - 59 | 0x0b, 0x20, 0x02, 0x10, 0x08, 0x20, 0x01, 0x10, 0x08, 0x41, 0xc7, 0x00, - 60 | 0x10, 0x0b, 0x00, 0x0b, 0x00, 0x0b, 0x35, 0x01, 0x01, 0x7f, 0x41, 0xf0, - 61 | 0xc2, 0x04, 0x20, 0x00, 0x36, 0x02, 0x00, 0x41, 0xec, 0xc2, 0x04, 0x20, - 62 | 0x00, 0x36, 0x02, 0x00, 0x3f, 0x00, 0x21, 0x00, 0x20, 0x01, 0x41, 0xf8, - 63 | 0xc2, 0x04, 0x6a, 0x41, 0x00, 0x36, 0x02, 0x00, 0x20, 0x01, 0x41, 0xf4, - 64 | 0xc2, 0x04, 0x6a, 0x20, 0x00, 0x41, 0x10, 0x74, 0x36, 0x02, 0x00, 0x0b, - 65 | 0xd7, 0x01, 0x01, 0x04, 0x7f, 0x02, 0x40, 0x20, 0x00, 0x45, 0x0d, 0x00, - 66 | 0x02, 0x40, 0x41, 0xf8, 0xc2, 0x04, 0x28, 0x02, 0x00, 0x22, 0x01, 0x45, - 67 | 0x0d, 0x00, 0x02, 0x40, 0x20, 0x00, 0x20, 0x01, 0x28, 0x02, 0x00, 0x4d, - 68 | 0x04, 0x40, 0x20, 0x01, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x03, 0x40, 0x20, - 69 | 0x01, 0x28, 0x02, 0x04, 0x22, 0x02, 0x45, 0x0d, 0x02, 0x20, 0x01, 0x21, - 70 | 0x03, 0x20, 0x02, 0x22, 0x01, 0x28, 0x02, 0x00, 0x20, 0x00, 0x49, 0x0d, - 71 | 0x00, 0x0b, 0x0b, 0x20, 0x03, 0x41, 0x04, 0x6a, 0x41, 0xf8, 0xc2, 0x04, - 72 | 0x20, 0x03, 0x1b, 0x20, 0x02, 0x28, 0x02, 0x04, 0x36, 0x02, 0x00, 0x20, - 73 | 0x02, 0x41, 0x08, 0x6a, 0x0f, 0x0b, 0x41, 0xf0, 0xc2, 0x04, 0x28, 0x02, - 74 | 0x00, 0x22, 0x01, 0x20, 0x00, 0x6a, 0x41, 0x0b, 0x6a, 0x41, 0x7c, 0x71, - 75 | 0x22, 0x02, 0x41, 0xf4, 0xc2, 0x04, 0x28, 0x02, 0x00, 0x4b, 0x04, 0x40, - 76 | 0x20, 0x02, 0x41, 0xec, 0xc2, 0x04, 0x28, 0x02, 0x00, 0x6b, 0x41, 0x80, - 77 | 0x80, 0x80, 0x02, 0x4a, 0x0d, 0x01, 0x20, 0x00, 0x41, 0x01, 0x6b, 0x41, - 78 | 0x10, 0x76, 0x41, 0x01, 0x6a, 0x40, 0x00, 0x41, 0x7f, 0x46, 0x0d, 0x01, - 79 | 0x41, 0xf4, 0xc2, 0x04, 0x3f, 0x00, 0x41, 0x10, 0x74, 0x36, 0x02, 0x00, - 80 | 0x41, 0xf0, 0xc2, 0x04, 0x28, 0x02, 0x00, 0x21, 0x01, 0x0b, 0x20, 0x01, - 81 | 0x20, 0x00, 0x36, 0x02, 0x00, 0x41, 0xf0, 0xc2, 0x04, 0x20, 0x02, 0x36, - 82 | 0x02, 0x00, 0x20, 0x01, 0x41, 0x08, 0x6a, 0x21, 0x04, 0x0b, 0x20, 0x04, - 83 | 0x0b, 0x41, 0x01, 0x02, 0x7f, 0x20, 0x00, 0x04, 0x40, 0x41, 0xf0, 0xc2, - 84 | 0x04, 0x22, 0x01, 0x28, 0x02, 0x00, 0x20, 0x00, 0x41, 0x08, 0x6b, 0x22, - 85 | 0x02, 0x28, 0x02, 0x00, 0x20, 0x00, 0x6a, 0x41, 0x03, 0x6a, 0x41, 0x7c, - 86 | 0x71, 0x47, 0x04, 0x40, 0x20, 0x00, 0x41, 0x04, 0x6b, 0x41, 0xf8, 0xc2, - 87 | 0x04, 0x22, 0x01, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x0b, 0x20, 0x01, - 88 | 0x20, 0x02, 0x36, 0x02, 0x00, 0x0b, 0x0b, 0x11, 0x00, 0x20, 0x00, 0x20, - 89 | 0x01, 0x6c, 0x22, 0x00, 0x10, 0x07, 0x41, 0x00, 0x20, 0x00, 0x10, 0x0d, - 90 | 0x0b, 0x47, 0x01, 0x01, 0x7f, 0x02, 0x40, 0x20, 0x00, 0x45, 0x0d, 0x00, - 91 | 0x41, 0xf0, 0xc2, 0x04, 0x28, 0x02, 0x00, 0x20, 0x00, 0x41, 0x08, 0x6b, - 92 | 0x22, 0x02, 0x28, 0x02, 0x00, 0x20, 0x00, 0x6a, 0x41, 0x03, 0x6a, 0x41, - 93 | 0x7c, 0x71, 0x46, 0x04, 0x40, 0x41, 0xf0, 0xc2, 0x04, 0x20, 0x02, 0x36, - 94 | 0x02, 0x00, 0x0c, 0x01, 0x0b, 0x20, 0x01, 0x10, 0x07, 0x20, 0x00, 0x20, - 95 | 0x02, 0x28, 0x02, 0x00, 0x10, 0x0c, 0x0f, 0x0b, 0x20, 0x01, 0x10, 0x07, - 96 | 0x0b, 0x07, 0x00, 0x20, 0x00, 0x10, 0x02, 0x00, 0x0b, 0xbe, 0x07, 0x01, - 97 | 0x04, 0x7f, 0x02, 0x40, 0x02, 0x7f, 0x02, 0x40, 0x20, 0x02, 0x41, 0x20, - 98 | 0x4d, 0x04, 0x40, 0x20, 0x01, 0x41, 0x03, 0x71, 0x45, 0x20, 0x02, 0x45, - 99 | 0x72, 0x0d, 0x01, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, 0x00, - 100 | 0x00, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x22, - 101 | 0x03, 0x41, 0x03, 0x71, 0x45, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x22, 0x05, - 102 | 0x45, 0x72, 0x0d, 0x02, 0x1a, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x01, - 103 | 0x3a, 0x00, 0x01, 0x20, 0x00, 0x41, 0x02, 0x6a, 0x20, 0x01, 0x41, 0x02, - 104 | 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x45, 0x20, 0x02, 0x41, 0x02, 0x6b, - 105 | 0x22, 0x05, 0x45, 0x72, 0x0d, 0x02, 0x1a, 0x20, 0x00, 0x20, 0x01, 0x2d, - 106 | 0x00, 0x02, 0x3a, 0x00, 0x02, 0x20, 0x00, 0x41, 0x03, 0x6a, 0x20, 0x01, - 107 | 0x41, 0x03, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x45, 0x20, 0x02, 0x41, - 108 | 0x03, 0x6b, 0x22, 0x05, 0x45, 0x72, 0x0d, 0x02, 0x1a, 0x20, 0x00, 0x20, - 109 | 0x01, 0x2d, 0x00, 0x03, 0x3a, 0x00, 0x03, 0x20, 0x02, 0x41, 0x04, 0x6b, - 110 | 0x21, 0x05, 0x20, 0x01, 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, 0x00, 0x41, - 111 | 0x04, 0x6a, 0x0c, 0x02, 0x0b, 0x20, 0x00, 0x20, 0x01, 0x20, 0x02, 0xfc, - 112 | 0x0a, 0x00, 0x00, 0x20, 0x00, 0x0f, 0x0b, 0x20, 0x02, 0x21, 0x05, 0x20, - 113 | 0x01, 0x21, 0x03, 0x20, 0x00, 0x0b, 0x22, 0x04, 0x41, 0x03, 0x71, 0x22, - 114 | 0x02, 0x45, 0x04, 0x40, 0x02, 0x40, 0x20, 0x05, 0x41, 0x10, 0x49, 0x04, - 115 | 0x40, 0x20, 0x05, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x05, 0x41, 0x10, - 116 | 0x6b, 0x22, 0x02, 0x41, 0x10, 0x71, 0x45, 0x04, 0x40, 0x20, 0x04, 0x20, - 117 | 0x03, 0x29, 0x02, 0x00, 0x37, 0x02, 0x00, 0x20, 0x04, 0x20, 0x03, 0x29, - 118 | 0x02, 0x08, 0x37, 0x02, 0x08, 0x20, 0x04, 0x41, 0x10, 0x6a, 0x21, 0x04, - 119 | 0x20, 0x03, 0x41, 0x10, 0x6a, 0x21, 0x03, 0x20, 0x02, 0x21, 0x05, 0x0b, - 120 | 0x20, 0x02, 0x41, 0x10, 0x49, 0x0d, 0x00, 0x20, 0x05, 0x21, 0x02, 0x03, - 121 | 0x40, 0x20, 0x04, 0x20, 0x03, 0x29, 0x02, 0x00, 0x37, 0x02, 0x00, 0x20, - 122 | 0x04, 0x20, 0x03, 0x29, 0x02, 0x08, 0x37, 0x02, 0x08, 0x20, 0x04, 0x20, - 123 | 0x03, 0x29, 0x02, 0x10, 0x37, 0x02, 0x10, 0x20, 0x04, 0x20, 0x03, 0x29, - 124 | 0x02, 0x18, 0x37, 0x02, 0x18, 0x20, 0x04, 0x41, 0x20, 0x6a, 0x21, 0x04, - 125 | 0x20, 0x03, 0x41, 0x20, 0x6a, 0x21, 0x03, 0x20, 0x02, 0x41, 0x20, 0x6b, - 126 | 0x22, 0x02, 0x41, 0x0f, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x02, 0x41, - 127 | 0x08, 0x4f, 0x04, 0x40, 0x20, 0x04, 0x20, 0x03, 0x29, 0x02, 0x00, 0x37, - 128 | 0x02, 0x00, 0x20, 0x04, 0x41, 0x08, 0x6a, 0x21, 0x04, 0x20, 0x03, 0x41, - 129 | 0x08, 0x6a, 0x21, 0x03, 0x0b, 0x20, 0x02, 0x41, 0x04, 0x71, 0x04, 0x40, - 130 | 0x20, 0x04, 0x20, 0x03, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x04, - 131 | 0x41, 0x04, 0x6a, 0x21, 0x04, 0x20, 0x03, 0x41, 0x04, 0x6a, 0x21, 0x03, - 132 | 0x0b, 0x20, 0x02, 0x41, 0x02, 0x71, 0x04, 0x40, 0x20, 0x04, 0x20, 0x03, - 133 | 0x2f, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x20, 0x04, 0x41, 0x02, 0x6a, 0x21, - 134 | 0x04, 0x20, 0x03, 0x41, 0x02, 0x6a, 0x21, 0x03, 0x0b, 0x20, 0x02, 0x41, - 135 | 0x01, 0x71, 0x45, 0x0d, 0x01, 0x20, 0x04, 0x20, 0x03, 0x2d, 0x00, 0x00, - 136 | 0x3a, 0x00, 0x00, 0x20, 0x00, 0x0f, 0x0b, 0x02, 0x40, 0x02, 0x40, 0x02, - 137 | 0x7f, 0x02, 0x40, 0x20, 0x05, 0x41, 0x20, 0x4f, 0x04, 0x40, 0x20, 0x04, - 138 | 0x20, 0x03, 0x28, 0x02, 0x00, 0x22, 0x01, 0x3a, 0x00, 0x00, 0x02, 0x40, - 139 | 0x02, 0x40, 0x20, 0x02, 0x41, 0x02, 0x6b, 0x0e, 0x02, 0x00, 0x01, 0x03, - 140 | 0x0b, 0x20, 0x04, 0x20, 0x01, 0x41, 0x08, 0x76, 0x3a, 0x00, 0x01, 0x20, - 141 | 0x04, 0x20, 0x03, 0x41, 0x06, 0x6a, 0x29, 0x01, 0x00, 0x37, 0x02, 0x06, - 142 | 0x20, 0x04, 0x20, 0x03, 0x28, 0x02, 0x04, 0x41, 0x10, 0x74, 0x20, 0x01, - 143 | 0x41, 0x10, 0x76, 0x72, 0x36, 0x02, 0x02, 0x20, 0x03, 0x41, 0x12, 0x6a, - 144 | 0x21, 0x01, 0x41, 0x0e, 0x21, 0x06, 0x20, 0x03, 0x41, 0x0e, 0x6a, 0x28, - 145 | 0x01, 0x00, 0x21, 0x03, 0x41, 0x0e, 0x21, 0x05, 0x20, 0x04, 0x41, 0x12, - 146 | 0x6a, 0x0c, 0x03, 0x0b, 0x20, 0x04, 0x20, 0x03, 0x41, 0x05, 0x6a, 0x29, - 147 | 0x00, 0x00, 0x37, 0x02, 0x05, 0x20, 0x04, 0x20, 0x03, 0x28, 0x02, 0x04, - 148 | 0x41, 0x18, 0x74, 0x20, 0x01, 0x41, 0x08, 0x76, 0x72, 0x36, 0x02, 0x01, - 149 | 0x20, 0x03, 0x41, 0x11, 0x6a, 0x21, 0x01, 0x41, 0x0d, 0x21, 0x06, 0x20, - 150 | 0x03, 0x41, 0x0d, 0x6a, 0x28, 0x00, 0x00, 0x21, 0x03, 0x41, 0x0f, 0x21, - 151 | 0x05, 0x20, 0x04, 0x41, 0x11, 0x6a, 0x0c, 0x02, 0x0b, 0x02, 0x7f, 0x20, - 152 | 0x05, 0x41, 0x10, 0x49, 0x04, 0x40, 0x20, 0x04, 0x21, 0x02, 0x20, 0x03, - 153 | 0x0c, 0x01, 0x0b, 0x20, 0x04, 0x20, 0x03, 0x2d, 0x00, 0x00, 0x3a, 0x00, - 154 | 0x00, 0x20, 0x04, 0x20, 0x03, 0x28, 0x00, 0x01, 0x36, 0x00, 0x01, 0x20, - 155 | 0x04, 0x20, 0x03, 0x29, 0x00, 0x05, 0x37, 0x00, 0x05, 0x20, 0x04, 0x20, - 156 | 0x03, 0x2f, 0x00, 0x0d, 0x3b, 0x00, 0x0d, 0x20, 0x04, 0x20, 0x03, 0x2d, - 157 | 0x00, 0x0f, 0x3a, 0x00, 0x0f, 0x20, 0x04, 0x41, 0x10, 0x6a, 0x21, 0x02, - 158 | 0x20, 0x03, 0x41, 0x10, 0x6a, 0x0b, 0x21, 0x01, 0x20, 0x05, 0x41, 0x08, - 159 | 0x71, 0x0d, 0x02, 0x0c, 0x03, 0x0b, 0x20, 0x04, 0x20, 0x01, 0x41, 0x10, - 160 | 0x76, 0x3a, 0x00, 0x02, 0x20, 0x04, 0x20, 0x01, 0x41, 0x08, 0x76, 0x3a, - 161 | 0x00, 0x01, 0x20, 0x04, 0x20, 0x03, 0x41, 0x07, 0x6a, 0x29, 0x00, 0x00, - 162 | 0x37, 0x02, 0x07, 0x20, 0x04, 0x20, 0x03, 0x28, 0x02, 0x04, 0x41, 0x08, - 163 | 0x74, 0x20, 0x01, 0x41, 0x18, 0x76, 0x72, 0x36, 0x02, 0x03, 0x20, 0x03, - 164 | 0x41, 0x13, 0x6a, 0x21, 0x01, 0x41, 0x0f, 0x21, 0x06, 0x20, 0x03, 0x41, - 165 | 0x0f, 0x6a, 0x28, 0x00, 0x00, 0x21, 0x03, 0x41, 0x0d, 0x21, 0x05, 0x20, - 166 | 0x04, 0x41, 0x13, 0x6a, 0x0b, 0x21, 0x02, 0x20, 0x04, 0x20, 0x06, 0x6a, - 167 | 0x20, 0x03, 0x36, 0x02, 0x00, 0x0b, 0x20, 0x02, 0x20, 0x01, 0x29, 0x00, - 168 | 0x00, 0x37, 0x00, 0x00, 0x20, 0x02, 0x41, 0x08, 0x6a, 0x21, 0x02, 0x20, - 169 | 0x01, 0x41, 0x08, 0x6a, 0x21, 0x01, 0x0b, 0x20, 0x05, 0x41, 0x04, 0x71, - 170 | 0x04, 0x40, 0x20, 0x02, 0x20, 0x01, 0x28, 0x00, 0x00, 0x36, 0x00, 0x00, - 171 | 0x20, 0x02, 0x41, 0x04, 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, 0x04, 0x6a, - 172 | 0x21, 0x01, 0x0b, 0x20, 0x05, 0x41, 0x02, 0x71, 0x04, 0x40, 0x20, 0x02, - 173 | 0x20, 0x01, 0x2f, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x20, 0x02, 0x41, 0x02, - 174 | 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, 0x02, 0x6a, 0x21, 0x01, 0x0b, 0x20, - 175 | 0x05, 0x41, 0x01, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x02, 0x20, 0x01, 0x2d, - 176 | 0x00, 0x00, 0x3a, 0x00, 0x00, 0x0b, 0x20, 0x00, 0x0b, 0x86, 0x03, 0x02, - 177 | 0x03, 0x7f, 0x01, 0x7e, 0x20, 0x02, 0x41, 0x21, 0x4f, 0x04, 0x40, 0x20, - 178 | 0x00, 0x20, 0x01, 0x20, 0x02, 0xfc, 0x0b, 0x00, 0x20, 0x00, 0x0f, 0x0b, - 179 | 0x02, 0x40, 0x20, 0x02, 0x45, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x01, 0x3a, - 180 | 0x00, 0x00, 0x20, 0x00, 0x20, 0x02, 0x6a, 0x22, 0x03, 0x41, 0x01, 0x6b, - 181 | 0x20, 0x01, 0x3a, 0x00, 0x00, 0x20, 0x02, 0x41, 0x03, 0x49, 0x0d, 0x00, - 182 | 0x20, 0x00, 0x20, 0x01, 0x3a, 0x00, 0x02, 0x20, 0x00, 0x20, 0x01, 0x3a, - 183 | 0x00, 0x01, 0x20, 0x03, 0x41, 0x03, 0x6b, 0x20, 0x01, 0x3a, 0x00, 0x00, - 184 | 0x20, 0x03, 0x41, 0x02, 0x6b, 0x20, 0x01, 0x3a, 0x00, 0x00, 0x20, 0x02, - 185 | 0x41, 0x07, 0x49, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x01, 0x3a, 0x00, 0x03, - 186 | 0x20, 0x03, 0x41, 0x04, 0x6b, 0x20, 0x01, 0x3a, 0x00, 0x00, 0x20, 0x02, - 187 | 0x41, 0x09, 0x49, 0x0d, 0x00, 0x20, 0x00, 0x41, 0x00, 0x20, 0x00, 0x6b, - 188 | 0x41, 0x03, 0x71, 0x22, 0x05, 0x6a, 0x22, 0x04, 0x20, 0x01, 0x41, 0xff, - 189 | 0x01, 0x71, 0x41, 0x81, 0x82, 0x84, 0x08, 0x6c, 0x22, 0x03, 0x36, 0x02, - 190 | 0x00, 0x20, 0x04, 0x20, 0x02, 0x20, 0x05, 0x6b, 0x41, 0x3c, 0x71, 0x22, - 191 | 0x02, 0x6a, 0x22, 0x01, 0x41, 0x04, 0x6b, 0x20, 0x03, 0x36, 0x02, 0x00, - 192 | 0x20, 0x02, 0x41, 0x09, 0x49, 0x0d, 0x00, 0x20, 0x04, 0x20, 0x03, 0x36, - 193 | 0x02, 0x08, 0x20, 0x04, 0x20, 0x03, 0x36, 0x02, 0x04, 0x20, 0x01, 0x41, - 194 | 0x08, 0x6b, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x01, 0x41, 0x0c, 0x6b, - 195 | 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x02, 0x41, 0x19, 0x49, 0x0d, 0x00, - 196 | 0x20, 0x04, 0x20, 0x03, 0x36, 0x02, 0x18, 0x20, 0x04, 0x20, 0x03, 0x36, - 197 | 0x02, 0x14, 0x20, 0x04, 0x20, 0x03, 0x36, 0x02, 0x10, 0x20, 0x04, 0x20, - 198 | 0x03, 0x36, 0x02, 0x0c, 0x20, 0x01, 0x41, 0x10, 0x6b, 0x20, 0x03, 0x36, - 199 | 0x02, 0x00, 0x20, 0x01, 0x41, 0x14, 0x6b, 0x20, 0x03, 0x36, 0x02, 0x00, - 200 | 0x20, 0x01, 0x41, 0x18, 0x6b, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x01, - 201 | 0x41, 0x1c, 0x6b, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x02, 0x20, 0x04, - 202 | 0x41, 0x04, 0x71, 0x41, 0x18, 0x72, 0x22, 0x02, 0x6b, 0x22, 0x01, 0x41, - 203 | 0x20, 0x49, 0x0d, 0x00, 0x20, 0x03, 0xad, 0x42, 0x81, 0x80, 0x80, 0x80, - 204 | 0x10, 0x7e, 0x21, 0x06, 0x20, 0x02, 0x20, 0x04, 0x6a, 0x21, 0x02, 0x03, - 205 | 0x40, 0x20, 0x02, 0x20, 0x06, 0x37, 0x03, 0x18, 0x20, 0x02, 0x20, 0x06, - 206 | 0x37, 0x03, 0x10, 0x20, 0x02, 0x20, 0x06, 0x37, 0x03, 0x08, 0x20, 0x02, - 207 | 0x20, 0x06, 0x37, 0x03, 0x00, 0x20, 0x02, 0x41, 0x20, 0x6a, 0x21, 0x02, - 208 | 0x20, 0x01, 0x41, 0x20, 0x6b, 0x22, 0x01, 0x41, 0x1f, 0x4b, 0x0d, 0x00, - 209 | 0x0b, 0x0b, 0x20, 0x00, 0x0b, 0xc5, 0x01, 0x01, 0x03, 0x7f, 0x02, 0x40, - 210 | 0x02, 0x40, 0x20, 0x00, 0x22, 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, - 211 | 0x20, 0x01, 0x2d, 0x00, 0x00, 0x45, 0x04, 0x40, 0x41, 0x00, 0x0f, 0x0b, - 212 | 0x20, 0x00, 0x41, 0x01, 0x6a, 0x22, 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, - 213 | 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, - 214 | 0x02, 0x6a, 0x22, 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x01, - 215 | 0x2d, 0x00, 0x00, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, 0x03, 0x6a, 0x22, - 216 | 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, - 217 | 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x22, 0x01, 0x41, 0x03, - 218 | 0x71, 0x0d, 0x01, 0x0b, 0x20, 0x01, 0x41, 0x04, 0x6b, 0x21, 0x02, 0x20, - 219 | 0x01, 0x41, 0x05, 0x6b, 0x21, 0x01, 0x03, 0x40, 0x20, 0x01, 0x41, 0x04, - 220 | 0x6a, 0x21, 0x01, 0x41, 0x80, 0x82, 0x84, 0x08, 0x20, 0x02, 0x41, 0x04, - 221 | 0x6a, 0x22, 0x02, 0x28, 0x02, 0x00, 0x22, 0x03, 0x6b, 0x20, 0x03, 0x72, - 222 | 0x41, 0x80, 0x81, 0x82, 0x84, 0x78, 0x71, 0x41, 0x80, 0x81, 0x82, 0x84, - 223 | 0x78, 0x46, 0x0d, 0x00, 0x0b, 0x03, 0x40, 0x20, 0x01, 0x41, 0x01, 0x6a, - 224 | 0x21, 0x01, 0x20, 0x02, 0x2d, 0x00, 0x00, 0x20, 0x02, 0x41, 0x01, 0x6a, - 225 | 0x21, 0x02, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x01, 0x20, 0x00, 0x6b, 0x0b, - 226 | 0x3e, 0x00, 0x20, 0x00, 0x41, 0xff, 0xff, 0x07, 0x4d, 0x04, 0x40, 0x20, - 227 | 0x00, 0x41, 0x03, 0x76, 0x41, 0x1f, 0x71, 0x20, 0x00, 0x41, 0x08, 0x76, - 228 | 0x41, 0x80, 0x80, 0x04, 0x6a, 0x2d, 0x00, 0x00, 0x41, 0x05, 0x74, 0x72, - 229 | 0x41, 0x80, 0x80, 0x04, 0x6a, 0x2d, 0x00, 0x00, 0x20, 0x00, 0x41, 0x07, - 230 | 0x71, 0x76, 0x41, 0x01, 0x71, 0x0f, 0x0b, 0x20, 0x00, 0x41, 0xfe, 0xff, - 231 | 0x0b, 0x49, 0x0b, 0x43, 0x01, 0x03, 0x7f, 0x02, 0x40, 0x20, 0x02, 0x45, - 232 | 0x0d, 0x00, 0x03, 0x40, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x22, 0x04, 0x20, - 233 | 0x01, 0x2d, 0x00, 0x00, 0x22, 0x05, 0x46, 0x04, 0x40, 0x20, 0x01, 0x41, - 234 | 0x01, 0x6a, 0x21, 0x01, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x21, 0x00, 0x20, - 235 | 0x02, 0x41, 0x01, 0x6b, 0x22, 0x02, 0x0d, 0x01, 0x0c, 0x02, 0x0b, 0x0b, - 236 | 0x20, 0x04, 0x20, 0x05, 0x6b, 0x21, 0x03, 0x0b, 0x20, 0x03, 0x0b, 0xe9, - 237 | 0x02, 0x01, 0x03, 0x7f, 0x20, 0x02, 0x41, 0x00, 0x47, 0x21, 0x05, 0x02, - 238 | 0x40, 0x02, 0x40, 0x02, 0x40, 0x20, 0x00, 0x41, 0x03, 0x71, 0x45, 0x20, - 239 | 0x02, 0x45, 0x72, 0x45, 0x04, 0x40, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x20, - 240 | 0x01, 0x41, 0xff, 0x01, 0x71, 0x46, 0x04, 0x40, 0x20, 0x00, 0x21, 0x03, - 241 | 0x20, 0x02, 0x21, 0x04, 0x0c, 0x03, 0x0b, 0x20, 0x02, 0x41, 0x01, 0x6b, - 242 | 0x22, 0x04, 0x41, 0x00, 0x47, 0x21, 0x05, 0x20, 0x00, 0x41, 0x01, 0x6a, - 243 | 0x22, 0x03, 0x41, 0x03, 0x71, 0x45, 0x20, 0x04, 0x45, 0x72, 0x0d, 0x01, - 244 | 0x20, 0x03, 0x2d, 0x00, 0x00, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x46, - 245 | 0x0d, 0x02, 0x20, 0x02, 0x41, 0x02, 0x6b, 0x22, 0x04, 0x41, 0x00, 0x47, - 246 | 0x21, 0x05, 0x20, 0x00, 0x41, 0x02, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, - 247 | 0x45, 0x20, 0x04, 0x45, 0x72, 0x0d, 0x01, 0x20, 0x03, 0x2d, 0x00, 0x00, - 248 | 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x46, 0x0d, 0x02, 0x20, 0x02, 0x41, - 249 | 0x03, 0x6b, 0x22, 0x04, 0x41, 0x00, 0x47, 0x21, 0x05, 0x20, 0x00, 0x41, - 250 | 0x03, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x45, 0x20, 0x04, 0x45, 0x72, - 251 | 0x0d, 0x01, 0x20, 0x03, 0x2d, 0x00, 0x00, 0x20, 0x01, 0x41, 0xff, 0x01, - 252 | 0x71, 0x46, 0x0d, 0x02, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, - 253 | 0x02, 0x41, 0x04, 0x6b, 0x22, 0x04, 0x41, 0x00, 0x47, 0x21, 0x05, 0x0c, - 254 | 0x01, 0x0b, 0x20, 0x02, 0x21, 0x04, 0x20, 0x00, 0x21, 0x03, 0x0b, 0x20, - 255 | 0x05, 0x45, 0x0d, 0x01, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x22, 0x00, - 256 | 0x20, 0x03, 0x2d, 0x00, 0x00, 0x46, 0x20, 0x04, 0x41, 0x04, 0x49, 0x72, - 257 | 0x45, 0x04, 0x40, 0x20, 0x00, 0x41, 0x81, 0x82, 0x84, 0x08, 0x6c, 0x21, - 258 | 0x00, 0x03, 0x40, 0x41, 0x80, 0x82, 0x84, 0x08, 0x20, 0x03, 0x28, 0x02, - 259 | 0x00, 0x20, 0x00, 0x73, 0x22, 0x02, 0x6b, 0x20, 0x02, 0x72, 0x41, 0x80, - 260 | 0x81, 0x82, 0x84, 0x78, 0x71, 0x41, 0x80, 0x81, 0x82, 0x84, 0x78, 0x47, - 261 | 0x0d, 0x02, 0x20, 0x03, 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, 0x04, 0x41, - 262 | 0x04, 0x6b, 0x22, 0x04, 0x41, 0x03, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, - 263 | 0x04, 0x45, 0x0d, 0x01, 0x0b, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x21, - 264 | 0x00, 0x03, 0x40, 0x20, 0x00, 0x20, 0x03, 0x2d, 0x00, 0x00, 0x46, 0x04, - 265 | 0x40, 0x20, 0x03, 0x0f, 0x0b, 0x20, 0x03, 0x41, 0x01, 0x6a, 0x21, 0x03, - 266 | 0x20, 0x04, 0x41, 0x01, 0x6b, 0x22, 0x04, 0x0d, 0x00, 0x0b, 0x0b, 0x41, - 267 | 0x00, 0x0b, 0x58, 0x01, 0x02, 0x7f, 0x02, 0x40, 0x20, 0x00, 0x2d, 0x00, - 268 | 0x00, 0x22, 0x02, 0x45, 0x20, 0x02, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x22, - 269 | 0x03, 0x47, 0x72, 0x0d, 0x00, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x21, 0x00, - 270 | 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x03, 0x40, 0x20, 0x01, 0x2d, - 271 | 0x00, 0x00, 0x21, 0x03, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x22, 0x02, 0x45, - 272 | 0x0d, 0x01, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x21, 0x00, 0x20, 0x01, 0x41, - 273 | 0x01, 0x6a, 0x21, 0x01, 0x20, 0x02, 0x20, 0x03, 0x46, 0x0d, 0x00, 0x0b, - 274 | 0x0b, 0x20, 0x02, 0x20, 0x03, 0x6b, 0x0b, 0x08, 0x00, 0x20, 0x00, 0x41, - 275 | 0x00, 0x10, 0x14, 0x0b, 0xa0, 0x02, 0x01, 0x07, 0x7f, 0x02, 0x40, 0x20, - 276 | 0x00, 0x41, 0xff, 0xff, 0x07, 0x4b, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x00, - 277 | 0x41, 0xff, 0x01, 0x71, 0x22, 0x05, 0x41, 0x03, 0x6e, 0x22, 0x02, 0x41, - 278 | 0x03, 0x6c, 0x6b, 0x41, 0xff, 0x01, 0x71, 0x41, 0x02, 0x74, 0x41, 0xc0, - 279 | 0x9e, 0x04, 0x6a, 0x28, 0x02, 0x00, 0x20, 0x02, 0x20, 0x00, 0x41, 0x08, - 280 | 0x76, 0x22, 0x02, 0x41, 0xa0, 0xa9, 0x04, 0x6a, 0x2d, 0x00, 0x00, 0x41, - 281 | 0xd6, 0x00, 0x6c, 0x6a, 0x41, 0xa0, 0xa9, 0x04, 0x6a, 0x2d, 0x00, 0x00, - 282 | 0x6c, 0x41, 0x0b, 0x76, 0x41, 0x06, 0x70, 0x20, 0x02, 0x41, 0x90, 0xbe, - 283 | 0x04, 0x6a, 0x2d, 0x00, 0x00, 0x6a, 0x41, 0x02, 0x74, 0x41, 0xd0, 0x9e, - 284 | 0x04, 0x6a, 0x28, 0x02, 0x00, 0x22, 0x03, 0x41, 0x08, 0x75, 0x21, 0x02, - 285 | 0x20, 0x03, 0x41, 0xff, 0x01, 0x71, 0x22, 0x03, 0x41, 0x01, 0x4d, 0x04, - 286 | 0x40, 0x20, 0x02, 0x41, 0x00, 0x20, 0x01, 0x20, 0x03, 0x73, 0x6b, 0x71, - 287 | 0x20, 0x00, 0x6a, 0x0f, 0x0b, 0x20, 0x02, 0x41, 0xff, 0x01, 0x71, 0x22, - 288 | 0x03, 0x45, 0x0d, 0x00, 0x20, 0x02, 0x41, 0x08, 0x76, 0x21, 0x02, 0x03, - 289 | 0x40, 0x20, 0x03, 0x41, 0x01, 0x76, 0x22, 0x06, 0x20, 0x02, 0x6a, 0x22, - 290 | 0x04, 0x41, 0x01, 0x74, 0x41, 0x90, 0xa6, 0x04, 0x6a, 0x22, 0x07, 0x2d, - 291 | 0x00, 0x00, 0x22, 0x08, 0x20, 0x05, 0x46, 0x04, 0x40, 0x20, 0x07, 0x2d, - 292 | 0x00, 0x01, 0x41, 0x02, 0x74, 0x41, 0xd0, 0x9e, 0x04, 0x6a, 0x28, 0x02, - 293 | 0x00, 0x22, 0x02, 0x41, 0xff, 0x01, 0x71, 0x22, 0x03, 0x41, 0x01, 0x4d, - 294 | 0x04, 0x40, 0x41, 0x00, 0x20, 0x01, 0x20, 0x03, 0x73, 0x6b, 0x20, 0x02, - 295 | 0x41, 0x08, 0x75, 0x71, 0x20, 0x00, 0x6a, 0x0f, 0x0b, 0x41, 0x7f, 0x41, - 296 | 0x01, 0x20, 0x01, 0x1b, 0x20, 0x00, 0x6a, 0x0f, 0x0b, 0x20, 0x02, 0x20, - 297 | 0x04, 0x20, 0x05, 0x20, 0x08, 0x49, 0x22, 0x04, 0x1b, 0x21, 0x02, 0x20, - 298 | 0x06, 0x20, 0x03, 0x20, 0x06, 0x6b, 0x20, 0x04, 0x1b, 0x22, 0x03, 0x0d, - 299 | 0x00, 0x0b, 0x0b, 0x20, 0x00, 0x0b, 0x08, 0x00, 0x20, 0x00, 0x41, 0x01, - 300 | 0x10, 0x14, 0x0b, 0x75, 0x01, 0x02, 0x7f, 0x20, 0x02, 0x45, 0x04, 0x40, - 301 | 0x41, 0x00, 0x0f, 0x0b, 0x02, 0x40, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x22, - 302 | 0x03, 0x45, 0x04, 0x40, 0x41, 0x00, 0x21, 0x03, 0x0c, 0x01, 0x0b, 0x20, - 303 | 0x00, 0x41, 0x01, 0x6a, 0x21, 0x00, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x21, - 304 | 0x02, 0x02, 0x40, 0x03, 0x40, 0x20, 0x02, 0x45, 0x20, 0x03, 0x20, 0x01, - 305 | 0x2d, 0x00, 0x00, 0x22, 0x04, 0x47, 0x20, 0x04, 0x45, 0x72, 0x72, 0x0d, - 306 | 0x01, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x21, 0x02, 0x20, 0x01, 0x41, 0x01, - 307 | 0x6a, 0x21, 0x01, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x21, 0x03, 0x20, 0x00, - 308 | 0x41, 0x01, 0x6a, 0x21, 0x00, 0x20, 0x03, 0x0d, 0x00, 0x0b, 0x41, 0x00, - 309 | 0x21, 0x03, 0x0b, 0x0b, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x6b, - 310 | 0x0b, 0x09, 0x00, 0x20, 0x00, 0x10, 0x13, 0x20, 0x00, 0x47, 0x0b, 0xa1, - 311 | 0x09, 0x01, 0x04, 0x7f, 0x02, 0x40, 0x02, 0x40, 0x20, 0x02, 0x41, 0x21, - 312 | 0x49, 0x04, 0x40, 0x20, 0x00, 0x20, 0x01, 0x46, 0x0d, 0x02, 0x20, 0x01, - 313 | 0x20, 0x00, 0x20, 0x02, 0x6a, 0x22, 0x04, 0x6b, 0x41, 0x00, 0x20, 0x02, - 314 | 0x41, 0x01, 0x74, 0x6b, 0x4b, 0x0d, 0x01, 0x0b, 0x20, 0x00, 0x20, 0x01, - 315 | 0x20, 0x02, 0xfc, 0x0a, 0x00, 0x00, 0x0c, 0x01, 0x0b, 0x20, 0x00, 0x20, - 316 | 0x01, 0x73, 0x41, 0x03, 0x71, 0x21, 0x03, 0x02, 0x40, 0x02, 0x40, 0x20, - 317 | 0x00, 0x20, 0x01, 0x49, 0x04, 0x40, 0x20, 0x03, 0x04, 0x40, 0x20, 0x02, - 318 | 0x21, 0x04, 0x20, 0x00, 0x21, 0x03, 0x0c, 0x03, 0x0b, 0x20, 0x00, 0x41, - 319 | 0x03, 0x71, 0x45, 0x04, 0x40, 0x20, 0x02, 0x21, 0x04, 0x20, 0x00, 0x21, - 320 | 0x03, 0x0c, 0x02, 0x0b, 0x20, 0x02, 0x45, 0x0d, 0x03, 0x20, 0x00, 0x20, - 321 | 0x01, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x02, 0x41, 0x01, 0x6b, - 322 | 0x21, 0x04, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, - 323 | 0x45, 0x04, 0x40, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x0c, 0x02, - 324 | 0x0b, 0x20, 0x04, 0x45, 0x0d, 0x03, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, - 325 | 0x01, 0x3a, 0x00, 0x01, 0x20, 0x02, 0x41, 0x02, 0x6b, 0x21, 0x04, 0x20, - 326 | 0x00, 0x41, 0x02, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x45, 0x04, 0x40, - 327 | 0x20, 0x01, 0x41, 0x02, 0x6a, 0x21, 0x01, 0x0c, 0x02, 0x0b, 0x20, 0x04, - 328 | 0x45, 0x0d, 0x03, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x02, 0x3a, 0x00, - 329 | 0x02, 0x20, 0x02, 0x41, 0x03, 0x6b, 0x21, 0x04, 0x20, 0x00, 0x41, 0x03, - 330 | 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x45, 0x04, 0x40, 0x20, 0x01, 0x41, - 331 | 0x03, 0x6a, 0x21, 0x01, 0x0c, 0x02, 0x0b, 0x20, 0x04, 0x45, 0x0d, 0x03, - 332 | 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x03, 0x3a, 0x00, 0x03, 0x20, 0x00, - 333 | 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x04, 0x6a, 0x21, 0x01, - 334 | 0x20, 0x02, 0x41, 0x04, 0x6b, 0x21, 0x04, 0x0c, 0x01, 0x0b, 0x02, 0x40, - 335 | 0x20, 0x03, 0x0d, 0x00, 0x02, 0x40, 0x20, 0x04, 0x41, 0x03, 0x71, 0x45, - 336 | 0x0d, 0x00, 0x20, 0x02, 0x45, 0x0d, 0x04, 0x20, 0x00, 0x20, 0x02, 0x41, - 337 | 0x01, 0x6b, 0x22, 0x03, 0x6a, 0x22, 0x04, 0x20, 0x01, 0x20, 0x03, 0x6a, - 338 | 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x04, 0x41, 0x03, 0x71, 0x45, - 339 | 0x04, 0x40, 0x20, 0x03, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x03, 0x45, - 340 | 0x0d, 0x04, 0x20, 0x00, 0x20, 0x02, 0x41, 0x02, 0x6b, 0x22, 0x03, 0x6a, - 341 | 0x22, 0x04, 0x20, 0x01, 0x20, 0x03, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, - 342 | 0x00, 0x20, 0x04, 0x41, 0x03, 0x71, 0x45, 0x04, 0x40, 0x20, 0x03, 0x21, - 343 | 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x03, 0x45, 0x0d, 0x04, 0x20, 0x00, 0x20, - 344 | 0x02, 0x41, 0x03, 0x6b, 0x22, 0x03, 0x6a, 0x22, 0x04, 0x20, 0x01, 0x20, - 345 | 0x03, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x04, 0x41, 0x03, - 346 | 0x71, 0x45, 0x04, 0x40, 0x20, 0x03, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x20, - 347 | 0x03, 0x45, 0x0d, 0x04, 0x20, 0x00, 0x20, 0x02, 0x41, 0x04, 0x6b, 0x22, - 348 | 0x02, 0x6a, 0x20, 0x01, 0x20, 0x02, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, - 349 | 0x00, 0x0b, 0x20, 0x02, 0x41, 0x04, 0x49, 0x0d, 0x00, 0x20, 0x02, 0x41, - 350 | 0x04, 0x6b, 0x22, 0x04, 0x41, 0x02, 0x76, 0x41, 0x01, 0x6a, 0x41, 0x03, - 351 | 0x71, 0x22, 0x03, 0x04, 0x40, 0x20, 0x01, 0x41, 0x04, 0x6b, 0x21, 0x05, - 352 | 0x20, 0x00, 0x41, 0x04, 0x6b, 0x21, 0x06, 0x03, 0x40, 0x20, 0x02, 0x20, - 353 | 0x06, 0x6a, 0x20, 0x02, 0x20, 0x05, 0x6a, 0x28, 0x02, 0x00, 0x36, 0x02, - 354 | 0x00, 0x20, 0x02, 0x41, 0x04, 0x6b, 0x21, 0x02, 0x20, 0x03, 0x41, 0x01, - 355 | 0x6b, 0x22, 0x03, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x04, 0x41, 0x0c, 0x49, - 356 | 0x0d, 0x00, 0x20, 0x01, 0x41, 0x10, 0x6b, 0x21, 0x05, 0x20, 0x00, 0x41, - 357 | 0x10, 0x6b, 0x21, 0x06, 0x03, 0x40, 0x20, 0x02, 0x20, 0x06, 0x6a, 0x22, - 358 | 0x03, 0x41, 0x0c, 0x6a, 0x20, 0x02, 0x20, 0x05, 0x6a, 0x22, 0x04, 0x41, - 359 | 0x0c, 0x6a, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x03, 0x41, 0x08, - 360 | 0x6a, 0x20, 0x04, 0x41, 0x08, 0x6a, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, - 361 | 0x20, 0x03, 0x41, 0x04, 0x6a, 0x20, 0x04, 0x41, 0x04, 0x6a, 0x28, 0x02, - 362 | 0x00, 0x36, 0x02, 0x00, 0x20, 0x03, 0x20, 0x04, 0x28, 0x02, 0x00, 0x36, - 363 | 0x02, 0x00, 0x20, 0x02, 0x41, 0x10, 0x6b, 0x22, 0x02, 0x41, 0x03, 0x4b, - 364 | 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x02, 0x45, 0x0d, 0x02, 0x20, 0x02, 0x22, - 365 | 0x03, 0x41, 0x03, 0x71, 0x22, 0x05, 0x04, 0x40, 0x20, 0x01, 0x41, 0x01, - 366 | 0x6b, 0x21, 0x04, 0x20, 0x00, 0x41, 0x01, 0x6b, 0x21, 0x06, 0x03, 0x40, - 367 | 0x20, 0x03, 0x20, 0x06, 0x6a, 0x20, 0x03, 0x20, 0x04, 0x6a, 0x2d, 0x00, - 368 | 0x00, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x41, 0x01, 0x6b, 0x21, 0x03, 0x20, - 369 | 0x05, 0x41, 0x01, 0x6b, 0x22, 0x05, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x02, - 370 | 0x41, 0x04, 0x49, 0x0d, 0x02, 0x20, 0x01, 0x41, 0x04, 0x6b, 0x21, 0x04, - 371 | 0x20, 0x00, 0x41, 0x04, 0x6b, 0x21, 0x05, 0x03, 0x40, 0x20, 0x03, 0x20, - 372 | 0x05, 0x6a, 0x22, 0x01, 0x41, 0x03, 0x6a, 0x20, 0x03, 0x20, 0x04, 0x6a, - 373 | 0x22, 0x02, 0x41, 0x03, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, - 374 | 0x01, 0x41, 0x02, 0x6a, 0x20, 0x02, 0x41, 0x02, 0x6a, 0x2d, 0x00, 0x00, - 375 | 0x3a, 0x00, 0x00, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x20, 0x02, 0x41, 0x01, - 376 | 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x01, 0x20, 0x02, 0x2d, - 377 | 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x41, 0x04, 0x6b, 0x22, 0x03, - 378 | 0x0d, 0x00, 0x0b, 0x0c, 0x02, 0x0b, 0x20, 0x04, 0x41, 0x04, 0x49, 0x0d, - 379 | 0x00, 0x20, 0x04, 0x41, 0x04, 0x6b, 0x22, 0x05, 0x41, 0x02, 0x76, 0x41, - 380 | 0x01, 0x6a, 0x41, 0x07, 0x71, 0x22, 0x02, 0x04, 0x40, 0x20, 0x04, 0x20, - 381 | 0x02, 0x41, 0x02, 0x74, 0x6b, 0x21, 0x04, 0x03, 0x40, 0x20, 0x03, 0x20, - 382 | 0x01, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x01, 0x41, 0x04, 0x6a, - 383 | 0x21, 0x01, 0x20, 0x03, 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, 0x02, 0x41, - 384 | 0x01, 0x6b, 0x22, 0x02, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x05, 0x41, 0x1c, - 385 | 0x49, 0x0d, 0x00, 0x03, 0x40, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x00, - 386 | 0x36, 0x02, 0x00, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x04, 0x36, 0x02, - 387 | 0x04, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x08, 0x36, 0x02, 0x08, 0x20, - 388 | 0x03, 0x20, 0x01, 0x28, 0x02, 0x0c, 0x36, 0x02, 0x0c, 0x20, 0x03, 0x20, - 389 | 0x01, 0x28, 0x02, 0x10, 0x36, 0x02, 0x10, 0x20, 0x03, 0x20, 0x01, 0x28, - 390 | 0x02, 0x14, 0x36, 0x02, 0x14, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x18, - 391 | 0x36, 0x02, 0x18, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x1c, 0x36, 0x02, - 392 | 0x1c, 0x20, 0x01, 0x41, 0x20, 0x6a, 0x21, 0x01, 0x20, 0x03, 0x41, 0x20, - 393 | 0x6a, 0x21, 0x03, 0x20, 0x04, 0x41, 0x20, 0x6b, 0x22, 0x04, 0x41, 0x03, - 394 | 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x04, 0x45, 0x0d, 0x00, 0x02, 0x40, - 395 | 0x20, 0x04, 0x41, 0x07, 0x71, 0x22, 0x02, 0x45, 0x04, 0x40, 0x20, 0x04, - 396 | 0x21, 0x05, 0x0c, 0x01, 0x0b, 0x20, 0x04, 0x41, 0x78, 0x71, 0x21, 0x05, - 397 | 0x03, 0x40, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, - 398 | 0x20, 0x03, 0x41, 0x01, 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x01, 0x6a, - 399 | 0x21, 0x01, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x22, 0x02, 0x0d, 0x00, 0x0b, - 400 | 0x0b, 0x20, 0x04, 0x41, 0x08, 0x49, 0x0d, 0x00, 0x03, 0x40, 0x20, 0x03, - 401 | 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x20, 0x01, - 402 | 0x2d, 0x00, 0x01, 0x3a, 0x00, 0x01, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, - 403 | 0x02, 0x3a, 0x00, 0x02, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x03, 0x3a, - 404 | 0x00, 0x03, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x04, 0x3a, 0x00, 0x04, - 405 | 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x05, 0x3a, 0x00, 0x05, 0x20, 0x03, - 406 | 0x20, 0x01, 0x2d, 0x00, 0x06, 0x3a, 0x00, 0x06, 0x20, 0x03, 0x20, 0x01, - 407 | 0x2d, 0x00, 0x07, 0x3a, 0x00, 0x07, 0x20, 0x03, 0x41, 0x08, 0x6a, 0x21, - 408 | 0x03, 0x20, 0x01, 0x41, 0x08, 0x6a, 0x21, 0x01, 0x20, 0x05, 0x41, 0x08, - 409 | 0x6b, 0x22, 0x05, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x00, 0x0b, 0x09, 0x00, - 410 | 0x20, 0x00, 0x10, 0x15, 0x20, 0x00, 0x47, 0x0b, 0x0d, 0x00, 0x20, 0x00, - 411 | 0x41, 0x20, 0x46, 0x20, 0x00, 0x41, 0x09, 0x46, 0x72, 0x0b, 0x0a, 0x00, - 412 | 0x20, 0x00, 0x41, 0x30, 0x6b, 0x41, 0x0a, 0x49, 0x0b, 0x49, 0x01, 0x02, - 413 | 0x7f, 0x20, 0x00, 0x10, 0x0e, 0x20, 0x00, 0x6a, 0x21, 0x03, 0x02, 0x40, - 414 | 0x20, 0x02, 0x45, 0x0d, 0x00, 0x03, 0x40, 0x20, 0x01, 0x2d, 0x00, 0x00, - 415 | 0x22, 0x04, 0x45, 0x0d, 0x01, 0x20, 0x03, 0x20, 0x04, 0x3a, 0x00, 0x00, - 416 | 0x20, 0x03, 0x41, 0x01, 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x01, 0x6a, - 417 | 0x21, 0x01, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x22, 0x02, 0x0d, 0x00, 0x0b, - 418 | 0x0b, 0x20, 0x03, 0x41, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x00, 0x0b, 0xe6, - 419 | 0x03, 0x01, 0x04, 0x7f, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, - 420 | 0x20, 0x00, 0x20, 0x01, 0x22, 0x03, 0x73, 0x41, 0x03, 0x71, 0x04, 0x40, - 421 | 0x20, 0x00, 0x21, 0x04, 0x0c, 0x01, 0x0b, 0x20, 0x02, 0x41, 0x00, 0x47, - 422 | 0x21, 0x06, 0x02, 0x40, 0x20, 0x03, 0x41, 0x03, 0x71, 0x45, 0x04, 0x40, - 423 | 0x20, 0x00, 0x21, 0x04, 0x0c, 0x01, 0x0b, 0x20, 0x02, 0x45, 0x04, 0x40, - 424 | 0x20, 0x00, 0x21, 0x04, 0x0c, 0x01, 0x0b, 0x20, 0x00, 0x20, 0x03, 0x2d, - 425 | 0x00, 0x00, 0x22, 0x01, 0x3a, 0x00, 0x00, 0x20, 0x01, 0x45, 0x04, 0x40, - 426 | 0x20, 0x00, 0x21, 0x04, 0x20, 0x02, 0x21, 0x01, 0x0c, 0x05, 0x0b, 0x20, - 427 | 0x00, 0x41, 0x01, 0x6a, 0x21, 0x04, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x22, - 428 | 0x01, 0x41, 0x00, 0x47, 0x21, 0x06, 0x20, 0x03, 0x41, 0x01, 0x6a, 0x22, - 429 | 0x05, 0x41, 0x03, 0x71, 0x45, 0x20, 0x01, 0x45, 0x72, 0x45, 0x04, 0x40, - 430 | 0x20, 0x04, 0x20, 0x05, 0x2d, 0x00, 0x00, 0x22, 0x05, 0x3a, 0x00, 0x00, - 431 | 0x20, 0x05, 0x45, 0x0d, 0x05, 0x20, 0x00, 0x41, 0x02, 0x6a, 0x21, 0x04, - 432 | 0x20, 0x02, 0x41, 0x02, 0x6b, 0x22, 0x01, 0x41, 0x00, 0x47, 0x21, 0x06, - 433 | 0x20, 0x03, 0x41, 0x02, 0x6a, 0x22, 0x05, 0x41, 0x03, 0x71, 0x45, 0x20, - 434 | 0x01, 0x45, 0x72, 0x45, 0x04, 0x40, 0x20, 0x04, 0x20, 0x05, 0x2d, 0x00, - 435 | 0x00, 0x22, 0x05, 0x3a, 0x00, 0x00, 0x20, 0x05, 0x45, 0x0d, 0x06, 0x20, - 436 | 0x00, 0x41, 0x03, 0x6a, 0x21, 0x04, 0x20, 0x02, 0x41, 0x03, 0x6b, 0x22, - 437 | 0x01, 0x41, 0x00, 0x47, 0x21, 0x06, 0x20, 0x03, 0x41, 0x03, 0x6a, 0x22, - 438 | 0x05, 0x41, 0x03, 0x71, 0x45, 0x20, 0x01, 0x45, 0x72, 0x45, 0x04, 0x40, - 439 | 0x20, 0x04, 0x20, 0x05, 0x2d, 0x00, 0x00, 0x22, 0x05, 0x3a, 0x00, 0x00, - 440 | 0x20, 0x05, 0x45, 0x0d, 0x07, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x21, 0x04, - 441 | 0x20, 0x03, 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, 0x02, 0x41, 0x04, 0x6b, - 442 | 0x22, 0x02, 0x41, 0x00, 0x47, 0x21, 0x06, 0x0c, 0x03, 0x0b, 0x20, 0x05, - 443 | 0x21, 0x03, 0x20, 0x01, 0x21, 0x02, 0x0c, 0x02, 0x0b, 0x20, 0x05, 0x21, - 444 | 0x03, 0x20, 0x01, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x05, 0x21, 0x03, - 445 | 0x20, 0x01, 0x21, 0x02, 0x0b, 0x20, 0x06, 0x45, 0x0d, 0x02, 0x20, 0x03, - 446 | 0x2d, 0x00, 0x00, 0x45, 0x04, 0x40, 0x20, 0x02, 0x21, 0x01, 0x0c, 0x04, - 447 | 0x0b, 0x20, 0x02, 0x41, 0x04, 0x49, 0x0d, 0x00, 0x03, 0x40, 0x41, 0x80, - 448 | 0x82, 0x84, 0x08, 0x20, 0x03, 0x28, 0x02, 0x00, 0x22, 0x01, 0x6b, 0x20, - 449 | 0x01, 0x72, 0x41, 0x80, 0x81, 0x82, 0x84, 0x78, 0x71, 0x41, 0x80, 0x81, - 450 | 0x82, 0x84, 0x78, 0x47, 0x0d, 0x02, 0x20, 0x04, 0x20, 0x01, 0x36, 0x02, - 451 | 0x00, 0x20, 0x04, 0x41, 0x04, 0x6a, 0x21, 0x04, 0x20, 0x03, 0x41, 0x04, - 452 | 0x6a, 0x21, 0x03, 0x20, 0x02, 0x41, 0x04, 0x6b, 0x22, 0x02, 0x41, 0x03, - 453 | 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x02, 0x45, 0x0d, 0x01, 0x0b, 0x03, - 454 | 0x40, 0x20, 0x04, 0x20, 0x03, 0x2d, 0x00, 0x00, 0x22, 0x01, 0x3a, 0x00, - 455 | 0x00, 0x20, 0x01, 0x45, 0x04, 0x40, 0x20, 0x02, 0x21, 0x01, 0x0c, 0x03, - 456 | 0x0b, 0x20, 0x04, 0x41, 0x01, 0x6a, 0x21, 0x04, 0x20, 0x03, 0x41, 0x01, - 457 | 0x6a, 0x21, 0x03, 0x20, 0x02, 0x41, 0x01, 0x6b, 0x22, 0x02, 0x0d, 0x00, - 458 | 0x0b, 0x0b, 0x41, 0x00, 0x21, 0x01, 0x0b, 0x20, 0x04, 0x41, 0x00, 0x20, - 459 | 0x01, 0x10, 0x0d, 0x1a, 0x20, 0x00, 0x0b, 0x17, 0x00, 0x20, 0x00, 0x41, - 460 | 0x30, 0x6b, 0x41, 0x0a, 0x49, 0x20, 0x00, 0x41, 0x20, 0x72, 0x41, 0xe1, - 461 | 0x00, 0x6b, 0x41, 0x06, 0x49, 0x72, 0x0b, 0x67, 0x01, 0x02, 0x7f, 0x20, - 462 | 0x00, 0x45, 0x04, 0x40, 0x41, 0x00, 0x0f, 0x0b, 0x02, 0x7f, 0x20, 0x00, - 463 | 0x04, 0x40, 0x41, 0x8c, 0xc2, 0x04, 0x21, 0x01, 0x03, 0x40, 0x20, 0x01, - 464 | 0x41, 0x04, 0x6a, 0x22, 0x01, 0x28, 0x02, 0x00, 0x22, 0x02, 0x41, 0x00, - 465 | 0x20, 0x00, 0x20, 0x02, 0x47, 0x1b, 0x0d, 0x00, 0x0b, 0x20, 0x01, 0x41, - 466 | 0x00, 0x20, 0x02, 0x1b, 0x0c, 0x01, 0x0b, 0x41, 0x00, 0x21, 0x00, 0x03, - 467 | 0x40, 0x20, 0x00, 0x41, 0x90, 0xc2, 0x04, 0x6a, 0x20, 0x00, 0x41, 0x04, - 468 | 0x6a, 0x21, 0x00, 0x28, 0x02, 0x00, 0x0d, 0x00, 0x0b, 0x20, 0x00, 0x41, - 469 | 0x04, 0x6b, 0x41, 0x7c, 0x71, 0x41, 0x90, 0xc2, 0x04, 0x6a, 0x0b, 0x41, - 470 | 0x00, 0x47, 0x0b, 0x1d, 0x01, 0x01, 0x7f, 0x41, 0x01, 0x21, 0x01, 0x20, - 471 | 0x00, 0x41, 0x30, 0x6b, 0x41, 0x0a, 0x4f, 0x04, 0x7f, 0x20, 0x00, 0x10, - 472 | 0x0f, 0x41, 0x00, 0x47, 0x05, 0x20, 0x01, 0x0b, 0x0b, 0x0b, 0xf1, 0x42, - 473 | 0x01, 0x00, 0x41, 0x80, 0x80, 0x04, 0x0b, 0xe8, 0x42, 0x12, 0x11, 0x13, - 474 | 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 475 | 0x20, 0x21, 0x11, 0x22, 0x23, 0x24, 0x11, 0x25, 0x26, 0x27, 0x28, 0x29, - 476 | 0x2a, 0x2b, 0x2c, 0x11, 0x2d, 0x2e, 0x2f, 0x10, 0x10, 0x30, 0x10, 0x10, - 477 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x31, 0x32, 0x33, 0x10, 0x34, 0x35, 0x10, - 478 | 0x10, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 479 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 480 | 0x11, 0x11, 0x36, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 481 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 482 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 483 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 484 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 485 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 486 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 487 | 0x37, 0x11, 0x11, 0x11, 0x11, 0x38, 0x11, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, - 488 | 0x3e, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 489 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 490 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 491 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x3f, 0x10, 0x10, 0x10, - 492 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 493 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 494 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x40, 0x41, 0x11, 0x42, 0x43, - 495 | 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x11, 0x4b, 0x4c, 0x4d, 0x4e, - 496 | 0x4f, 0x50, 0x51, 0x10, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 497 | 0x5a, 0x5b, 0x5c, 0x5d, 0x10, 0x5e, 0x5f, 0x60, 0x10, 0x11, 0x11, 0x11, - 498 | 0x61, 0x62, 0x63, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 499 | 0x10, 0x11, 0x11, 0x11, 0x11, 0x64, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 500 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x65, - 501 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 502 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 503 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x66, - 504 | 0x67, 0x10, 0x10, 0x68, 0x69, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 505 | 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - 506 | 0x11, 0x11, 0x11, 0x11, 0x6a, 0x11, 0x11, 0x6b, 0x10, 0x10, 0x10, 0x10, - 507 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 508 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 509 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x6c, 0x6d, - 510 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x6e, 0x10, 0x10, - 511 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 512 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x6f, 0x70, 0x71, - 513 | 0x72, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x73, 0x74, 0x75, - 514 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x76, 0x77, 0x10, 0x10, 0x10, 0x10, 0x78, - 515 | 0x10, 0x10, 0x79, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, - 516 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 517 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 518 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 519 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 520 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 521 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, - 522 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, - 523 | 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x20, 0x04, 0xff, 0xff, 0x7f, - 524 | 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 525 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 526 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0xff, 0x03, 0x00, 0x1f, 0x50, 0x00, - 527 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, - 528 | 0x00, 0x00, 0x00, 0xdf, 0xbc, 0x40, 0xd7, 0xff, 0xff, 0xfb, 0xff, 0xff, - 529 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0xff, 0xff, - 530 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 531 | 0xff, 0x03, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 532 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, - 533 | 0xff, 0xff, 0xff, 0x7f, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, - 534 | 0x00, 0x00, 0x00, 0xff, 0xbf, 0xb6, 0x00, 0xff, 0xff, 0xff, 0x87, 0x07, - 535 | 0x00, 0x00, 0x00, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 536 | 0xfe, 0xff, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 537 | 0xff, 0xff, 0xff, 0xef, 0x1f, 0xfe, 0xe1, 0xff, 0x9f, 0x00, 0x00, 0xff, - 538 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, - 539 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, 0x00, 0xff, 0xff, 0xff, - 540 | 0xff, 0xff, 0x07, 0x30, 0x04, 0xff, 0xff, 0xff, 0xfc, 0xff, 0x1f, 0x00, - 541 | 0x00, 0xff, 0xff, 0xff, 0x01, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, - 542 | 0x00, 0xff, 0xff, 0xdf, 0x3f, 0x00, 0x00, 0xf0, 0xff, 0xf8, 0x03, 0xff, - 543 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff, 0xdf, 0xe1, - 544 | 0xff, 0xcf, 0xff, 0xfe, 0xff, 0xef, 0x9f, 0xf9, 0xff, 0xff, 0xfd, 0xc5, - 545 | 0xe3, 0x9f, 0x59, 0x80, 0xb0, 0xcf, 0xff, 0x03, 0x10, 0xee, 0x87, 0xf9, - 546 | 0xff, 0xff, 0xfd, 0x6d, 0xc3, 0x87, 0x19, 0x02, 0x5e, 0xc0, 0xff, 0x3f, - 547 | 0x00, 0xee, 0xbf, 0xfb, 0xff, 0xff, 0xfd, 0xed, 0xe3, 0xbf, 0x1b, 0x01, - 548 | 0x00, 0xcf, 0xff, 0x00, 0x1e, 0xee, 0x9f, 0xf9, 0xff, 0xff, 0xfd, 0xed, - 549 | 0xe3, 0x9f, 0x19, 0xc0, 0xb0, 0xcf, 0xff, 0x02, 0x00, 0xec, 0xc7, 0x3d, - 550 | 0xd6, 0x18, 0xc7, 0xff, 0xc3, 0xc7, 0x1d, 0x81, 0x00, 0xc0, 0xff, 0x00, - 551 | 0x00, 0xef, 0xdf, 0xfd, 0xff, 0xff, 0xfd, 0xff, 0xe3, 0xdf, 0x1d, 0x60, - 552 | 0x07, 0xcf, 0xff, 0x00, 0x00, 0xef, 0xdf, 0xfd, 0xff, 0xff, 0xfd, 0xef, - 553 | 0xe3, 0xdf, 0x1d, 0x60, 0x40, 0xcf, 0xff, 0x06, 0x00, 0xef, 0xdf, 0xfd, - 554 | 0xff, 0xff, 0xff, 0xff, 0xe7, 0xdf, 0x5d, 0xf0, 0x80, 0xcf, 0xff, 0x00, - 555 | 0xfc, 0xec, 0xff, 0x7f, 0xfc, 0xff, 0xff, 0xfb, 0x2f, 0x7f, 0x80, 0x5f, - 556 | 0xff, 0xc0, 0xff, 0x0c, 0x00, 0xfe, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, - 557 | 0x07, 0x3f, 0x20, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0xd6, 0xf7, 0xff, - 558 | 0xff, 0xaf, 0xff, 0xff, 0x3b, 0x5f, 0x20, 0xff, 0xf3, 0x00, 0x00, 0x00, - 559 | 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x03, 0x00, 0x00, 0xff, 0xfe, 0xff, - 560 | 0xff, 0xff, 0x1f, 0xfe, 0xff, 0x03, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, - 561 | 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 562 | 0xff, 0xff, 0xff, 0x7f, 0xf9, 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, - 563 | 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, 0xbf, 0x20, 0xff, - 564 | 0xff, 0xff, 0xff, 0xff, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 565 | 0xff, 0xff, 0x3d, 0x7f, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3d, 0xff, - 566 | 0xff, 0xff, 0xff, 0x3d, 0x7f, 0x3d, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, - 567 | 0xff, 0xff, 0xff, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 568 | 0x07, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, - 569 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x3f, 0xfe, 0xff, 0xff, - 570 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 571 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 572 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 573 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9f, 0xff, 0xff, 0xfe, 0xff, 0xff, - 574 | 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7, 0xff, - 575 | 0x01, 0xff, 0xdf, 0x0f, 0x00, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0x0f, - 576 | 0x00, 0xff, 0xdf, 0x0d, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcf, - 577 | 0xff, 0xff, 0x01, 0x80, 0x10, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff, - 578 | 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 579 | 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, - 580 | 0xff, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0xff, 0x7f, 0xff, 0x0f, 0xff, - 581 | 0x01, 0xc0, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x1f, 0x00, 0xff, 0xff, 0xff, - 582 | 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0x03, 0xff, 0x03, 0x00, 0x00, 0x00, - 583 | 0x00, 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 584 | 0x7f, 0xfe, 0xff, 0x1f, 0x00, 0xff, 0x03, 0xff, 0x03, 0x80, 0x00, 0x00, - 585 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 586 | 0xff, 0xff, 0xff, 0xef, 0xff, 0xef, 0x0f, 0xff, 0x03, 0x00, 0x00, 0x00, - 587 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, - 588 | 0xff, 0xbf, 0xff, 0x03, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, - 589 | 0x00, 0xff, 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0x01, 0xff, - 590 | 0xff, 0xff, 0xff, 0xff, 0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0x6f, - 591 | 0x04, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 592 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 593 | 0xff, 0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x1f, 0x00, 0xff, 0xff, 0x3f, - 594 | 0x3f, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x3f, 0xff, 0xaa, 0xff, 0xff, 0xff, - 595 | 0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xdf, 0x5f, 0xdc, 0x1f, 0xcf, - 596 | 0x0f, 0xff, 0x1f, 0xdc, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 597 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, 0xff, - 598 | 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 599 | 0x00, 0x84, 0xfc, 0x2f, 0x3e, 0x50, 0xbd, 0xff, 0xf3, 0xe0, 0x43, 0x00, - 600 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 601 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 602 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 603 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0xff, - 604 | 0xff, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, - 605 | 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 606 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x78, 0x0c, - 607 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xbf, 0x20, 0xff, 0xff, 0xff, 0xff, 0xff, - 608 | 0xff, 0xff, 0x80, 0x00, 0x00, 0xff, 0xff, 0x7f, 0x00, 0x7f, 0x7f, 0x7f, - 609 | 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, - 610 | 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 611 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 612 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0xfe, 0x03, 0x3e, - 613 | 0x1f, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, - 614 | 0xe0, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 615 | 0xf7, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, - 616 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, 0x00, 0xff, 0xff, 0xff, - 617 | 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, - 618 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 619 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, - 620 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 621 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 622 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - 623 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 624 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, - 625 | 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0x1f, 0xff, - 626 | 0xff, 0xff, 0x0f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xf0, - 627 | 0x8f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 628 | 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0xfc, 0xff, 0xff, - 629 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf9, 0xff, - 630 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, - 631 | 0xff, 0xbf, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 632 | 0xff, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 633 | 0xff, 0x2f, 0x00, 0xff, 0x03, 0x00, 0x00, 0xfc, 0xe8, 0xff, 0xff, 0xff, - 634 | 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, 0xff, - 635 | 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf7, 0xff, 0x00, 0x80, 0xff, - 636 | 0x03, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, - 637 | 0x00, 0xff, 0x3f, 0xff, 0x03, 0xff, 0xff, 0x7f, 0xfc, 0xff, 0xff, 0xff, - 638 | 0xff, 0xff, 0xff, 0xff, 0x7f, 0x05, 0x00, 0x00, 0x38, 0xff, 0xff, 0x3c, - 639 | 0x00, 0x7e, 0x7e, 0x7e, 0x00, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, - 640 | 0xf7, 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 641 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0xff, 0x03, 0xff, 0xff, 0xff, - 642 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 643 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0x7f, 0xf8, 0xff, - 644 | 0xff, 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 645 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, - 646 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00, - 647 | 0x00, 0x7f, 0x00, 0xf8, 0xe0, 0xff, 0xfd, 0x7f, 0x5f, 0xdb, 0xff, 0xff, - 648 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, - 649 | 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 650 | 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, - 651 | 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, - 652 | 0x00, 0x00, 0x00, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 653 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, 0xff, 0xff, 0xff, 0xff, - 654 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 655 | 0x1f, 0x00, 0x00, 0xff, 0x03, 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, - 656 | 0x07, 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 657 | 0x7f, 0xfc, 0xfc, 0xfc, 0x1c, 0x00, 0x00, 0x00, 0x00, 0xff, 0xef, 0xff, - 658 | 0xff, 0x7f, 0xff, 0xff, 0xb7, 0xff, 0x3f, 0xff, 0x3f, 0x00, 0x00, 0x00, - 659 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 660 | 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 661 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, - 662 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 663 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 664 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x1f, 0xff, 0xff, 0xff, - 665 | 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 666 | 0xff, 0x00, 0xe0, 0xff, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, - 667 | 0x07, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xff, 0x3e, - 668 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 669 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 670 | 0x3f, 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, - 671 | 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, - 672 | 0xff, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 673 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 674 | 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0xff, 0x3f, 0x00, 0xff, 0x00, 0x00, - 675 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 676 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xbf, - 677 | 0x91, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0x7f, 0x00, 0xff, 0xff, 0xff, - 678 | 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x37, - 679 | 0x00, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0x00, - 680 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 681 | 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0xf0, 0xef, - 682 | 0xfe, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 683 | 0x1f, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0xff, 0xfe, 0xff, - 684 | 0xff, 0x1f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, - 685 | 0x00, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, 0x03, - 686 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 687 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, - 688 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, - 689 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, 0xff, - 690 | 0xff, 0xff, 0x00, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 691 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 692 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x1f, 0x80, 0x00, 0xff, - 693 | 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 694 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x7f, - 695 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, - 696 | 0x00, 0xc0, 0xff, 0x00, 0x00, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 697 | 0x01, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0xff, 0x03, 0xff, 0xff, 0xff, - 698 | 0xff, 0xff, 0xff, 0xc7, 0xff, 0x70, 0x00, 0xff, 0xff, 0xff, 0xff, 0x47, - 699 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e, 0x00, 0xff, - 700 | 0x17, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0x9f, - 701 | 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xbd, 0xff, - 702 | 0xbf, 0xff, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0xff, - 703 | 0x03, 0xef, 0x9f, 0xf9, 0xff, 0xff, 0xfd, 0xed, 0xe3, 0x9f, 0x19, 0x81, - 704 | 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 705 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 706 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xbb, 0x07, 0xff, 0x83, 0x00, 0x00, 0x00, - 707 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb3, 0x00, 0xff, - 708 | 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 709 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 710 | 0xff, 0xff, 0xff, 0x3f, 0x7f, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, - 711 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x11, 0x00, 0xff, - 712 | 0x03, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, - 713 | 0x01, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 714 | 0xe7, 0xff, 0x07, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 715 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 716 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 717 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 718 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, 0x00, - 719 | 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 720 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xfc, 0xff, - 721 | 0xff, 0xff, 0xff, 0xff, 0xfc, 0x1a, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 722 | 0xff, 0xff, 0xff, 0xe7, 0x7f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, - 723 | 0xff, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 724 | 0xff, 0xff, 0xff, 0xff, 0x01, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0x7f, - 725 | 0x7f, 0x01, 0x00, 0xff, 0x03, 0x00, 0x00, 0xfc, 0xff, 0xff, 0xff, 0xfc, - 726 | 0xff, 0xff, 0xfe, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 727 | 0x00, 0x7f, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xb4, 0xcb, 0x00, 0xff, - 728 | 0x03, 0xbf, 0xfd, 0xff, 0xff, 0xff, 0x7f, 0x7b, 0x01, 0xff, 0x03, 0x00, - 729 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 730 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 731 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 732 | 0x00, 0xff, 0xff, 0x7f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 733 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 734 | 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 735 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 736 | 0xff, 0xff, 0x7f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 737 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 738 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 739 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 740 | 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, - 741 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 742 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 743 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, 0x00, - 744 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 745 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 746 | 0xff, 0xff, 0xff, 0xff, 0x01, 0xff, 0xff, 0xff, 0x7f, 0xff, 0x03, 0x00, - 747 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, - 748 | 0xff, 0xff, 0x3f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - 749 | 0x00, 0x0f, 0x00, 0xff, 0x03, 0xf8, 0xff, 0xff, 0xe0, 0xff, 0xff, 0x00, - 750 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 751 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 752 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 753 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 754 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x87, 0xff, 0xff, 0xff, 0xff, 0xff, - 755 | 0xff, 0xff, 0x80, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 756 | 0x00, 0x0b, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 757 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 758 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 759 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 760 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 761 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, 0xff, - 762 | 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0xf0, 0x00, 0xff, - 763 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 764 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 765 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 766 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 767 | 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 768 | 0xff, 0xff, 0x07, 0xff, 0x1f, 0xff, 0x01, 0xff, 0x43, 0x00, 0x00, 0x00, - 769 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 770 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, 0xff, - 771 | 0xff, 0xff, 0xff, 0xff, 0xdf, 0x64, 0xde, 0xff, 0xeb, 0xef, 0xff, 0xff, - 772 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xe7, 0xdf, 0xdf, 0xff, 0xff, 0xff, - 773 | 0x7b, 0x5f, 0xfc, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 774 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 775 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 776 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, - 777 | 0xff, 0xfd, 0xff, 0xff, 0xf7, 0xff, 0xff, 0xff, 0xf7, 0xff, 0xff, 0xdf, - 778 | 0xff, 0xff, 0xff, 0xdf, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, - 779 | 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xf7, 0xcf, 0xff, - 780 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xf9, 0xdb, 0x07, 0x00, - 781 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 782 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 783 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x80, 0x3f, 0xff, 0x43, 0x00, - 784 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 785 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 786 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 787 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, - 788 | 0xff, 0xff, 0x0f, 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 789 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 790 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 791 | 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x8f, 0x08, 0xff, - 792 | 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 793 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xef, 0xff, 0xff, - 794 | 0xff, 0x96, 0xfe, 0xf7, 0x0a, 0x84, 0xea, 0x96, 0xaa, 0x96, 0xf7, 0xf7, - 795 | 0x5e, 0xff, 0xfb, 0xff, 0x0f, 0xee, 0xfb, 0xff, 0x0f, 0x00, 0x00, 0x00, - 796 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, - 797 | 0xff, 0xff, 0x03, 0xff, 0xff, 0xff, 0x03, 0xff, 0xff, 0xff, 0x03, 0x00, - 798 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 799 | 0x00, 0x00, 0x08, 0x00, 0x00, 0x56, 0x01, 0x00, 0x00, 0x39, 0x00, 0x00, - 800 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, 0x00, - 801 | 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0xbf, 0x1d, 0x00, 0x00, 0xe7, 0x02, - 802 | 0x00, 0x00, 0x79, 0x00, 0x00, 0x02, 0x24, 0x00, 0x00, 0x01, 0x01, 0x00, - 803 | 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, - 804 | 0x00, 0x00, 0xfe, 0xff, 0xff, 0x01, 0x39, 0xff, 0xff, 0x00, 0x18, 0xff, - 805 | 0xff, 0x01, 0x87, 0xff, 0xff, 0x00, 0xd4, 0xfe, 0xff, 0x00, 0xc3, 0x00, - 806 | 0x00, 0x01, 0xd2, 0x00, 0x00, 0x01, 0xce, 0x00, 0x00, 0x01, 0xcd, 0x00, - 807 | 0x00, 0x01, 0x4f, 0x00, 0x00, 0x01, 0xca, 0x00, 0x00, 0x01, 0xcb, 0x00, - 808 | 0x00, 0x01, 0xcf, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x01, 0xd3, 0x00, - 809 | 0x00, 0x01, 0xd1, 0x00, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x01, 0xd5, 0x00, - 810 | 0x00, 0x00, 0x82, 0x00, 0x00, 0x01, 0xd6, 0x00, 0x00, 0x01, 0xda, 0x00, - 811 | 0x00, 0x01, 0xd9, 0x00, 0x00, 0x01, 0xdb, 0x00, 0x00, 0x00, 0x38, 0x00, - 812 | 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0xb1, 0xff, 0xff, 0x01, 0x9f, 0xff, - 813 | 0xff, 0x01, 0xc8, 0xff, 0xff, 0x02, 0x28, 0x24, 0x00, 0x00, 0x00, 0x00, - 814 | 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x33, 0xff, - 815 | 0xff, 0x00, 0x26, 0xff, 0xff, 0x01, 0x7e, 0xff, 0xff, 0x01, 0x2b, 0x2a, - 816 | 0x00, 0x01, 0x5d, 0xff, 0xff, 0x01, 0x28, 0x2a, 0x00, 0x00, 0x3f, 0x2a, - 817 | 0x00, 0x01, 0x3d, 0xff, 0xff, 0x01, 0x45, 0x00, 0x00, 0x01, 0x47, 0x00, - 818 | 0x00, 0x00, 0x1f, 0x2a, 0x00, 0x00, 0x1c, 0x2a, 0x00, 0x00, 0x1e, 0x2a, - 819 | 0x00, 0x00, 0x2e, 0xff, 0xff, 0x00, 0x32, 0xff, 0xff, 0x00, 0x36, 0xff, - 820 | 0xff, 0x00, 0x35, 0xff, 0xff, 0x00, 0x4f, 0xa5, 0x00, 0x00, 0x4b, 0xa5, - 821 | 0x00, 0x00, 0x31, 0xff, 0xff, 0x00, 0x28, 0xa5, 0x00, 0x00, 0x44, 0xa5, - 822 | 0x00, 0x00, 0x2f, 0xff, 0xff, 0x00, 0x2d, 0xff, 0xff, 0x00, 0xf7, 0x29, - 823 | 0x00, 0x00, 0x41, 0xa5, 0x00, 0x00, 0xfd, 0x29, 0x00, 0x00, 0x2b, 0xff, - 824 | 0xff, 0x00, 0x2a, 0xff, 0xff, 0x00, 0xe7, 0x29, 0x00, 0x00, 0x43, 0xa5, - 825 | 0x00, 0x00, 0x2a, 0xa5, 0x00, 0x00, 0xbb, 0xff, 0xff, 0x00, 0x27, 0xff, - 826 | 0xff, 0x00, 0xb9, 0xff, 0xff, 0x00, 0x25, 0xff, 0xff, 0x00, 0x15, 0xa5, - 827 | 0x00, 0x00, 0x12, 0xa5, 0x00, 0x02, 0x24, 0x4c, 0x00, 0x00, 0x00, 0x00, - 828 | 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0x01, 0x01, 0x00, - 829 | 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x54, 0x00, 0x00, 0x01, 0x74, 0x00, - 830 | 0x00, 0x01, 0x26, 0x00, 0x00, 0x01, 0x25, 0x00, 0x00, 0x01, 0x40, 0x00, - 831 | 0x00, 0x01, 0x3f, 0x00, 0x00, 0x00, 0xda, 0xff, 0xff, 0x00, 0xdb, 0xff, - 832 | 0xff, 0x00, 0xe1, 0xff, 0xff, 0x00, 0xc0, 0xff, 0xff, 0x00, 0xc1, 0xff, - 833 | 0xff, 0x01, 0x08, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0x00, 0xc7, 0xff, - 834 | 0xff, 0x00, 0xd1, 0xff, 0xff, 0x00, 0xca, 0xff, 0xff, 0x00, 0xf8, 0xff, - 835 | 0xff, 0x00, 0xaa, 0xff, 0xff, 0x00, 0xb0, 0xff, 0xff, 0x00, 0x07, 0x00, - 836 | 0x00, 0x00, 0x8c, 0xff, 0xff, 0x01, 0xc4, 0xff, 0xff, 0x00, 0xa0, 0xff, - 837 | 0xff, 0x01, 0xf9, 0xff, 0xff, 0x02, 0x1a, 0x70, 0x00, 0x01, 0x01, 0x00, - 838 | 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0x20, 0x00, 0x00, 0x00, 0xe0, 0xff, - 839 | 0xff, 0x01, 0x50, 0x00, 0x00, 0x01, 0x0f, 0x00, 0x00, 0x00, 0xf1, 0xff, - 840 | 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x30, 0x00, 0x00, 0x00, 0xd0, 0xff, - 841 | 0xff, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, - 842 | 0x00, 0x00, 0xc0, 0x0b, 0x00, 0x01, 0x60, 0x1c, 0x00, 0x00, 0x00, 0x00, - 843 | 0x00, 0x01, 0xd0, 0x97, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0xf8, 0xff, - 844 | 0xff, 0x02, 0x05, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0xf4, - 845 | 0xff, 0x00, 0x9e, 0xe7, 0xff, 0x00, 0xc2, 0x89, 0x00, 0x00, 0xdb, 0xe7, - 846 | 0xff, 0x00, 0x92, 0xe7, 0xff, 0x00, 0x93, 0xe7, 0xff, 0x00, 0x9c, 0xe7, - 847 | 0xff, 0x00, 0x9d, 0xe7, 0xff, 0x00, 0xa4, 0xe7, 0xff, 0x00, 0x00, 0x00, - 848 | 0x00, 0x00, 0x38, 0x8a, 0x00, 0x00, 0x04, 0x8a, 0x00, 0x00, 0xe6, 0x0e, - 849 | 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, - 850 | 0x00, 0x00, 0xc5, 0xff, 0xff, 0x01, 0x41, 0xe2, 0xff, 0x02, 0x1d, 0x8f, - 851 | 0x00, 0x00, 0x08, 0x00, 0x00, 0x01, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, - 852 | 0x00, 0x00, 0x56, 0x00, 0x00, 0x01, 0xaa, 0xff, 0xff, 0x00, 0x4a, 0x00, - 853 | 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x70, 0x00, - 854 | 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x01, 0xb6, 0xff, - 855 | 0xff, 0x01, 0xf7, 0xff, 0xff, 0x00, 0xdb, 0xe3, 0xff, 0x01, 0x9c, 0xff, - 856 | 0xff, 0x01, 0x90, 0xff, 0xff, 0x01, 0x80, 0xff, 0xff, 0x01, 0x82, 0xff, - 857 | 0xff, 0x02, 0x05, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, 0x00, - 858 | 0x00, 0x00, 0xf0, 0xff, 0xff, 0x01, 0x1c, 0x00, 0x00, 0x01, 0x01, 0x00, - 859 | 0x00, 0x01, 0xa3, 0xe2, 0xff, 0x01, 0x41, 0xdf, 0xff, 0x01, 0xba, 0xdf, - 860 | 0xff, 0x00, 0xe4, 0xff, 0xff, 0x02, 0x0b, 0xb1, 0x00, 0x01, 0x01, 0x00, - 861 | 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0x30, 0x00, 0x00, 0x00, 0xd0, 0xff, - 862 | 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x09, 0xd6, 0xff, 0x01, 0x1a, 0xf1, - 863 | 0xff, 0x01, 0x19, 0xd6, 0xff, 0x00, 0xd5, 0xd5, 0xff, 0x00, 0xd8, 0xd5, - 864 | 0xff, 0x01, 0xe4, 0xd5, 0xff, 0x01, 0x03, 0xd6, 0xff, 0x01, 0xe1, 0xd5, - 865 | 0xff, 0x01, 0xe2, 0xd5, 0xff, 0x01, 0xc1, 0xd5, 0xff, 0x00, 0x00, 0x00, - 866 | 0x00, 0x00, 0xa0, 0xe3, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, - 867 | 0x00, 0x00, 0xff, 0xff, 0xff, 0x02, 0x0c, 0xbc, 0x00, 0x00, 0x00, 0x00, - 868 | 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0xbc, 0x5a, - 869 | 0xff, 0x01, 0xa0, 0x03, 0x00, 0x01, 0xfc, 0x75, 0xff, 0x01, 0xd8, 0x5a, - 870 | 0xff, 0x00, 0x30, 0x00, 0x00, 0x01, 0xb1, 0x5a, 0xff, 0x01, 0xb5, 0x5a, - 871 | 0xff, 0x01, 0xbf, 0x5a, 0xff, 0x01, 0xee, 0x5a, 0xff, 0x01, 0xd6, 0x5a, - 872 | 0xff, 0x01, 0xeb, 0x5a, 0xff, 0x01, 0xd0, 0xff, 0xff, 0x01, 0xbd, 0x5a, - 873 | 0xff, 0x01, 0xc8, 0x75, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x68, - 874 | 0xff, 0x00, 0x60, 0xfc, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, 0x00, - 875 | 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x28, 0x00, - 876 | 0x00, 0x00, 0xd8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, - 877 | 0x00, 0x00, 0xc0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, 0x00, - 878 | 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, 0x00, - 879 | 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x22, 0x00, - 880 | 0x00, 0x00, 0xde, 0xff, 0xff, 0x30, 0x0c, 0x31, 0x0d, 0x78, 0x0e, 0x7f, - 881 | 0x0f, 0x80, 0x10, 0x81, 0x11, 0x86, 0x12, 0x89, 0x13, 0x8a, 0x13, 0x8e, - 882 | 0x14, 0x8f, 0x15, 0x90, 0x16, 0x93, 0x13, 0x94, 0x17, 0x95, 0x18, 0x96, - 883 | 0x19, 0x97, 0x1a, 0x9a, 0x1b, 0x9c, 0x19, 0x9d, 0x1c, 0x9e, 0x1d, 0x9f, - 884 | 0x1e, 0xa6, 0x1f, 0xa9, 0x1f, 0xae, 0x1f, 0xb1, 0x20, 0xb2, 0x20, 0xb7, - 885 | 0x21, 0xbf, 0x22, 0xc5, 0x23, 0xc8, 0x23, 0xcb, 0x23, 0xdd, 0x24, 0xf2, - 886 | 0x23, 0xf6, 0x25, 0xf7, 0x26, 0x20, 0x2d, 0x3a, 0x2e, 0x3d, 0x2f, 0x3e, - 887 | 0x30, 0x3f, 0x31, 0x40, 0x31, 0x43, 0x32, 0x44, 0x33, 0x45, 0x34, 0x50, - 888 | 0x35, 0x51, 0x36, 0x52, 0x37, 0x53, 0x38, 0x54, 0x39, 0x59, 0x3a, 0x5b, - 889 | 0x3b, 0x5c, 0x3c, 0x61, 0x3d, 0x63, 0x3e, 0x65, 0x3f, 0x66, 0x40, 0x68, - 890 | 0x41, 0x69, 0x42, 0x6a, 0x40, 0x6b, 0x43, 0x6c, 0x44, 0x6f, 0x42, 0x71, - 891 | 0x45, 0x72, 0x46, 0x75, 0x47, 0x7d, 0x48, 0x82, 0x49, 0x87, 0x4a, 0x89, - 892 | 0x4b, 0x8a, 0x4c, 0x8b, 0x4c, 0x8c, 0x4d, 0x92, 0x4e, 0x9d, 0x4f, 0x9e, - 893 | 0x50, 0x45, 0x57, 0x7b, 0x1d, 0x7c, 0x1d, 0x7d, 0x1d, 0x7f, 0x58, 0x86, - 894 | 0x59, 0x88, 0x5a, 0x89, 0x5a, 0x8a, 0x5a, 0x8c, 0x5b, 0x8e, 0x5c, 0x8f, - 895 | 0x5c, 0xac, 0x5d, 0xad, 0x5e, 0xae, 0x5e, 0xaf, 0x5e, 0xc2, 0x5f, 0xcc, - 896 | 0x60, 0xcd, 0x61, 0xce, 0x61, 0xcf, 0x62, 0xd0, 0x63, 0xd1, 0x64, 0xd5, - 897 | 0x65, 0xd6, 0x66, 0xd7, 0x67, 0xf0, 0x68, 0xf1, 0x69, 0xf2, 0x6a, 0xf3, - 898 | 0x6b, 0xf4, 0x6c, 0xf5, 0x6d, 0xf9, 0x6e, 0xfd, 0x2d, 0xfe, 0x2d, 0xff, - 899 | 0x2d, 0x50, 0x69, 0x51, 0x69, 0x52, 0x69, 0x53, 0x69, 0x54, 0x69, 0x55, - 900 | 0x69, 0x56, 0x69, 0x57, 0x69, 0x58, 0x69, 0x59, 0x69, 0x5a, 0x69, 0x5b, - 901 | 0x69, 0x5c, 0x69, 0x5d, 0x69, 0x5e, 0x69, 0x5f, 0x69, 0x82, 0x00, 0x83, - 902 | 0x00, 0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00, 0x88, 0x00, 0x89, - 903 | 0x00, 0xc0, 0x75, 0xcf, 0x76, 0x80, 0x89, 0x81, 0x8a, 0x82, 0x8b, 0x85, - 904 | 0x8c, 0x86, 0x8d, 0x70, 0x9d, 0x71, 0x9d, 0x76, 0x9e, 0x77, 0x9e, 0x78, - 905 | 0x9f, 0x79, 0x9f, 0x7a, 0xa0, 0x7b, 0xa0, 0x7c, 0xa1, 0x7d, 0xa1, 0xb3, - 906 | 0xa2, 0xba, 0xa3, 0xbb, 0xa3, 0xbc, 0xa4, 0xbe, 0xa5, 0xc3, 0xa2, 0xcc, - 907 | 0xa4, 0xda, 0xa6, 0xdb, 0xa6, 0xe5, 0x6a, 0xea, 0xa7, 0xeb, 0xa7, 0xec, - 908 | 0x6e, 0xf3, 0xa2, 0xf8, 0xa8, 0xf9, 0xa8, 0xfa, 0xa9, 0xfb, 0xa9, 0xfc, - 909 | 0xa4, 0x26, 0xb0, 0x2a, 0xb1, 0x2b, 0xb2, 0x4e, 0xb3, 0x84, 0x08, 0x62, - 910 | 0xba, 0x63, 0xbb, 0x64, 0xbc, 0x65, 0xbd, 0x66, 0xbe, 0x6d, 0xbf, 0x6e, - 911 | 0xc0, 0x6f, 0xc1, 0x70, 0xc2, 0x7e, 0xc3, 0x7f, 0xc3, 0x7d, 0xcf, 0x8d, - 912 | 0xd0, 0x94, 0xd1, 0xab, 0xd2, 0xac, 0xd3, 0xad, 0xd4, 0xb0, 0xd5, 0xb1, - 913 | 0xd6, 0xb2, 0xd7, 0xc4, 0xd8, 0xc5, 0xd9, 0xc6, 0xda, 0x07, 0x08, 0x09, - 914 | 0x0a, 0x0b, 0x0c, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 915 | 0x06, 0x0d, 0x06, 0x06, 0x0e, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 916 | 0x06, 0x0f, 0x10, 0x11, 0x12, 0x06, 0x13, 0x06, 0x06, 0x06, 0x06, 0x06, - 917 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x14, 0x15, 0x06, 0x06, 0x06, 0x06, 0x06, - 918 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 919 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 920 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 921 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 922 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 923 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 924 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 925 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 926 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 927 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x16, 0x17, 0x06, 0x06, 0x06, - 928 | 0x18, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 929 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 930 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 931 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 932 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 933 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 934 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 935 | 0x19, 0x06, 0x06, 0x06, 0x06, 0x1a, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 936 | 0x06, 0x1b, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 937 | 0x06, 0x1c, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 938 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 939 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 940 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 941 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 942 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 943 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 944 | 0x06, 0x06, 0x06, 0x1d, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 945 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 946 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 947 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 948 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 949 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 950 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 951 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 952 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 953 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 954 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1e, 0x06, 0x06, 0x06, 0x06, 0x06, - 955 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, - 956 | 0x06, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 957 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 958 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 959 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 960 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 961 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 962 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 963 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 964 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 965 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x2b, 0x2b, 0x2b, - 966 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x00, 0x54, 0x56, 0x56, 0x56, 0x56, - 967 | 0x56, 0x56, 0x56, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 968 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, - 969 | 0x00, 0x00, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x2b, - 970 | 0x2b, 0x5b, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x4a, 0x56, 0x56, - 971 | 0x05, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, - 972 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x24, 0x50, 0x79, 0x31, 0x50, 0x31, 0x50, - 973 | 0x31, 0x38, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, - 974 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x4e, 0x31, 0x02, 0x4e, 0x0d, 0x0d, 0x4e, - 975 | 0x03, 0x4e, 0x00, 0x24, 0x6e, 0x00, 0x4e, 0x31, 0x26, 0x6e, 0x51, 0x4e, - 976 | 0x24, 0x50, 0x4e, 0x39, 0x14, 0x81, 0x1b, 0x1d, 0x1d, 0x53, 0x31, 0x50, - 977 | 0x31, 0x50, 0x0d, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x1b, 0x53, 0x24, - 978 | 0x50, 0x31, 0x02, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, - 979 | 0x7b, 0x14, 0x79, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x2d, 0x2b, 0x49, 0x03, - 980 | 0x48, 0x03, 0x78, 0x5c, 0x7b, 0x14, 0x00, 0x96, 0x0a, 0x01, 0x2b, 0x28, - 981 | 0x06, 0x06, 0x00, 0x2a, 0x06, 0x2a, 0x2a, 0x2b, 0x07, 0xbb, 0xb5, 0x2b, - 982 | 0x1e, 0x00, 0x2b, 0x07, 0x2b, 0x2b, 0x2b, 0x01, 0x2b, 0x2b, 0x2b, 0x2b, - 983 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, - 984 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, - 985 | 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, - 986 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, - 987 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, - 988 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0xcd, 0x46, 0xcd, 0x2b, 0x00, 0x25, - 989 | 0x2b, 0x07, 0x01, 0x06, 0x01, 0x55, 0x56, 0x56, 0x56, 0x56, 0x56, 0x55, - 990 | 0x56, 0x56, 0x02, 0x24, 0x81, 0x81, 0x81, 0x81, 0x81, 0x15, 0x81, 0x81, - 991 | 0x81, 0x00, 0x00, 0x2b, 0x00, 0xb2, 0xd1, 0xb2, 0xd1, 0xb2, 0xd1, 0xb2, - 992 | 0xd1, 0x00, 0x00, 0xcd, 0xcc, 0x01, 0x00, 0xd7, 0xd7, 0xd7, 0xd7, 0xd7, - 993 | 0x83, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0xac, - 994 | 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0x1c, 0x00, 0x00, - 995 | 0x00, 0x00, 0x00, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, - 996 | 0x50, 0x31, 0x02, 0x00, 0x00, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, - 997 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x4e, - 998 | 0x31, 0x50, 0x31, 0x50, 0x4e, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, - 999 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x02, 0x87, 0xa6, 0x87, -1000 | 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, -1001 | 0xa6, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1002 | 0x2b, 0x2b, 0x00, 0x00, 0x00, 0x54, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1003 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1004 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1005 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1006 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1007 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1008 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1009 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1010 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1011 | 0x00, 0x00, 0x00, 0x00, 0x54, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1012 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x0c, 0x00, 0x0c, 0x2a, 0x2b, 0x2b, 0x2b, -1013 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x2a, -1014 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1015 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1016 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1017 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1018 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1019 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1020 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, 0x6c, -1021 | 0x81, 0x15, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1022 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1023 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1024 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x6c, 0x03, -1025 | 0x41, 0x2b, 0x2b, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1026 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x2c, 0x56, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1027 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1028 | 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1029 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1030 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1031 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x6c, 0x00, -1032 | 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1033 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1034 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1035 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x25, 0x06, 0x25, 0x06, -1036 | 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, -1037 | 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, -1038 | 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, -1039 | 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x56, 0x7a, 0x9e, -1040 | 0x26, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, -1041 | 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, -1042 | 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x01, 0x2b, 0x2b, 0x4f, -1043 | 0x56, 0x56, 0x2c, 0x2b, 0x7f, 0x56, 0x56, 0x39, 0x2b, 0x2b, 0x55, 0x56, -1044 | 0x56, 0x2b, 0x2b, 0x4f, 0x56, 0x56, 0x2c, 0x2b, 0x7f, 0x56, 0x56, 0x81, -1045 | 0x37, 0x75, 0x5b, 0x7b, 0x5c, 0x2b, 0x2b, 0x4f, 0x56, 0x56, 0x02, 0xac, -1046 | 0x04, 0x00, 0x00, 0x39, 0x2b, 0x2b, 0x55, 0x56, 0x56, 0x2b, 0x2b, 0x4f, -1047 | 0x56, 0x56, 0x2c, 0x2b, 0x2b, 0x56, 0x56, 0x32, 0x13, 0x81, 0x57, 0x00, -1048 | 0x6f, 0x81, 0x7e, 0xc9, 0xd7, 0x7e, 0x2d, 0x81, 0x81, 0x0e, 0x7e, 0x39, -1049 | 0x7f, 0x6f, 0x57, 0x00, 0x81, 0x81, 0x7e, 0x15, 0x00, 0x7e, 0x03, 0x2b, -1050 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, -1051 | 0x2b, 0x24, 0x2b, 0x97, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1052 | 0x2b, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, 0x56, 0x56, 0x56, -1053 | 0x80, 0x81, 0x81, 0x81, 0x81, 0x39, 0xbb, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, -1054 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1055 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1056 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1057 | 0x01, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, -1058 | 0x81, 0x81, 0x81, 0x81, 0xc9, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, -1059 | 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xd0, 0x0d, 0x00, 0x4e, -1060 | 0x31, 0x02, 0xb4, 0xc1, 0xc1, 0xd7, 0xd7, 0x24, 0x50, 0x31, 0x50, 0x31, -1061 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, -1062 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, -1063 | 0x50, 0x31, 0x50, 0x31, 0x50, 0xd7, 0xd7, 0x53, 0xc1, 0x47, 0xd4, 0xd7, -1064 | 0xd7, 0xd7, 0x05, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1065 | 0x2b, 0x2b, 0x2b, 0x07, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, -1066 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1067 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1068 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1069 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1070 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1071 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1072 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1073 | 0x00, 0x00, 0x4e, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, -1074 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, -1075 | 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x00, 0x00, 0x00, -1076 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1077 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1078 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1079 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x79, 0x5c, 0x7b, 0x5c, 0x7b, 0x4f, -1080 | 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, -1081 | 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x2d, 0x2b, 0x2b, 0x79, -1082 | 0x14, 0x5c, 0x7b, 0x5c, 0x2d, 0x79, 0x2a, 0x5c, 0x27, 0x5c, 0x7b, 0x5c, -1083 | 0x7b, 0x5c, 0x7b, 0xa4, 0x00, 0x0a, 0xb4, 0x5c, 0x7b, 0x5c, 0x7b, 0x4f, -1084 | 0x03, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1085 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x00, 0x00, 0x00, -1086 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1087 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1088 | 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, -1089 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1090 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1091 | 0x2b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1092 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1093 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x2b, -1094 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x00, 0x48, 0x56, 0x56, 0x56, -1095 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1096 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1097 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1098 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1099 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1100 | 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1101 | 0x2b, 0x2b, 0x55, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1102 | 0x56, 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1103 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1104 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, -1105 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, -1106 | 0x00, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1107 | 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1108 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1109 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1110 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x2b, 0x2b, -1111 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1112 | 0x2b, 0x2b, 0x07, 0x00, 0x00, 0x00, 0x00, 0x56, 0x56, 0x56, 0x56, 0x56, -1113 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1114 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1115 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1116 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1117 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1118 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, -1119 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, 0x56, -1120 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, 0x00, -1121 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1122 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1123 | 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, -1124 | 0x2b, 0x2b, 0x2b, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, -1125 | 0x56, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1126 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1127 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1128 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x2b, 0x2b, -1129 | 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x55, 0x56, 0x56, 0x56, -1130 | 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, 0x00, -1131 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1132 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1133 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1134 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1135 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1136 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x27, 0x51, 0x6f, 0x77, 0x00, -1137 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, -1138 | 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0x8e, 0x92, -1139 | 0x97, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1140 | 0x00, 0xb4, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1141 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1142 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1143 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1144 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1145 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1146 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1147 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1148 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1149 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1150 | 0x00, 0x00, 0x00, 0xc6, 0xc9, 0x00, 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, -1151 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1152 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1153 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1154 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1155 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1156 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1157 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0x00, 0x00, 0x00, -1158 | 0x00, 0xe1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, -1159 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe7, 0x00, 0x00, -1160 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1161 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1162 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1163 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1164 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1165 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1166 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xea, -1167 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1168 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1169 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1170 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1171 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1172 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1173 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1174 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1175 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1176 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1177 | 0x00, 0x00, 0xed, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1178 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -1179 | 0x00, 0x20, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, -1180 | 0x00, 0x0d, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, -1181 | 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x01, 0x20, 0x00, -1182 | 0x00, 0x02, 0x20, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x04, 0x20, 0x00, -1183 | 0x00, 0x05, 0x20, 0x00, 0x00, 0x06, 0x20, 0x00, 0x00, 0x08, 0x20, 0x00, -1184 | 0x00, 0x09, 0x20, 0x00, 0x00, 0x0a, 0x20, 0x00, 0x00, 0x28, 0x20, 0x00, -1185 | 0x00, 0x29, 0x20, 0x00, 0x00, 0x5f, 0x20, 0x00, 0x00, 0x00, 0x30, 0x00, -1186 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x01, 0x09, 0x70, 0x72, 0x6f, -1187 | 0x64, 0x75, 0x63, 0x65, 0x72, 0x73, 0x02, 0x08, 0x6c, 0x61, 0x6e, 0x67, -1188 | 0x75, 0x61, 0x67, 0x65, 0x01, 0x03, 0x43, 0x31, 0x31, 0x00, 0x0c, 0x70, -1189 | 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x2d, 0x62, 0x79, 0x01, -1190 | 0x05, 0x63, 0x6c, 0x61, 0x6e, 0x67, 0x5f, 0x31, 0x39, 0x2e, 0x31, 0x2e, -1191 | 0x35, 0x2d, 0x77, 0x61, 0x73, 0x69, 0x2d, 0x73, 0x64, 0x6b, 0x20, 0x28, -1192 | 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x69, 0x74, 0x68, -1193 | 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6c, 0x6c, 0x76, 0x6d, 0x2f, -1194 | 0x6c, 0x6c, 0x76, 0x6d, 0x2d, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, -1195 | 0x20, 0x61, 0x62, 0x34, 0x62, 0x35, 0x61, 0x32, 0x64, 0x62, 0x35, 0x38, -1196 | 0x32, 0x39, 0x35, 0x38, 0x61, 0x66, 0x31, 0x65, 0x65, 0x33, 0x30, 0x38, -1197 | 0x61, 0x37, 0x39, 0x30, 0x63, 0x66, 0x64, 0x62, 0x34, 0x32, 0x62, 0x64, -1198 | 0x32, 0x34, 0x37, 0x32, 0x30, 0x29, 0x00, 0x67, 0x0f, 0x74, 0x61, 0x72, -1199 | 0x67, 0x65, 0x74, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, -1200 | 0x06, 0x2b, 0x0f, 0x6d, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x2d, 0x67, -1201 | 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x2b, 0x0b, 0x62, 0x75, 0x6c, 0x6b, -1202 | 0x2d, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x2b, 0x08, 0x73, 0x69, 0x67, -1203 | 0x6e, 0x2d, 0x65, 0x78, 0x74, 0x2b, 0x0f, 0x72, 0x65, 0x66, 0x65, 0x72, -1204 | 0x65, 0x6e, 0x63, 0x65, 0x2d, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2b, 0x0a, -1205 | 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2b, 0x0f, -1206 | 0x62, 0x75, 0x6c, 0x6b, 0x2d, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x2d, -1207 | 0x6f, 0x70, 0x74 -1208 | }; -1209 | unsigned int STDLIB_WASM_LEN = 14463; - - - --------------------------------------------------------------------------------- -/lib/tree-sitter.pc.in: --------------------------------------------------------------------------------- - 1 | prefix=@CMAKE_INSTALL_PREFIX@ - 2 | libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ - 3 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - | - 4 | Name: tree-sitter - 5 | Description: @PROJECT_DESCRIPTION@ - 6 | URL: @PROJECT_HOMEPAGE_URL@ - 7 | Version: @PROJECT_VERSION@ - 8 | Libs: -L${libdir} -ltree-sitter - 9 | Cflags: -I${includedir} - - - --------------------------------------------------------------------------------- -/Package.swift: --------------------------------------------------------------------------------- - 1 | // swift-tools-version: 5.8 - 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. - | - 3 | import PackageDescription - | - 4 | let package = Package( - 5 | name: "TreeSitter", - 6 | products: [ - 7 | // Products define the executables and libraries a package produces, and make them visible to other packages. - 8 | .library( - 9 | name: "TreeSitter", - 10 | targets: ["TreeSitter"]), - 11 | ], - 12 | targets: [ - 13 | .target(name: "TreeSitter", - 14 | path: "lib", - 15 | exclude: [ - 16 | "src/unicode/ICU_SHA", - 17 | "src/unicode/README.md", - 18 | "src/unicode/LICENSE", - 19 | "src/wasm/stdlib-symbols.txt", - 20 | "src/lib.c", - 21 | ], - 22 | sources: ["src"], - 23 | publicHeadersPath: "include", - 24 | cSettings: [ - 25 | .headerSearchPath("src"), - 26 | .define("_POSIX_C_SOURCE", to: "200112L"), - 27 | .define("_DEFAULT_SOURCE"), - 28 | .define("_DARWIN_C_SOURCE"), - 29 | ]), - 30 | ], - 31 | cLanguageStandard: .c11 - 32 | ) - - - --------------------------------------------------------------------------------- -/README.md: --------------------------------------------------------------------------------- - 1 | # tree-sitter - | - 2 | [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618) - 3 | [![discord][discord]](https://discord.gg/w7nTvsVJhm) - 4 | [![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org) - | - 5 | Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: - | - 6 | - **General** enough to parse any programming language - 7 | - **Fast** enough to parse on every keystroke in a text editor - 8 | - **Robust** enough to provide useful results even in the presence of syntax errors - 9 | - **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application - | - 10 | ## Links - 11 | - [Documentation](https://tree-sitter.github.io) - 12 | - [Rust binding](lib/binding_rust/README.md) - 13 | - [Wasm binding](lib/binding_web/README.md) - 14 | - [Command-line interface](crates/cli/README.md) - | - 15 | [discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord - 16 | [matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix - - - --------------------------------------------------------------------------------- -/test/fixtures/error_corpus/c_errors.txt: --------------------------------------------------------------------------------- - 1 | ======================================= - 2 | Statements with missing semicolons - 3 | ======================================= - | - 4 | int main() { - 5 | puts("hello") - 6 | puts("world") - 7 | } - | - 8 | --- - | - 9 | (translation_unit - 10 | (function_definition - 11 | (primitive_type) - 12 | (function_declarator (identifier) (parameter_list)) - 13 | (compound_statement - 14 | (expression_statement (call_expression (identifier) (argument_list (string_literal (string_content)))) (MISSING ";")) - 15 | (expression_statement (call_expression (identifier) (argument_list (string_literal (string_content)))) (MISSING ";"))))) - | - 16 | ============================================== - 17 | Top-level declarations with missing semicolons - 18 | ============================================== - | - 19 | int x - 20 | static int b - | - 21 | --- - | - 22 | (translation_unit - 23 | (declaration (primitive_type) (identifier) (MISSING ";")) - 24 | (declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";"))) - | - 25 | ========================================== - 26 | Partial declaration lists inside ifdefs - 27 | ========================================== - | - 28 | #ifdef __cplusplus - 29 | extern "C" { - 30 | #endif - | - 31 | // ok - 32 | int b; - | - 33 | int c() { - 34 | return 5; - 35 | } - | - 36 | #ifdef __cplusplus - 37 | } - 38 | #endif - | - 39 | --- - | - 40 | (translation_unit - 41 | (preproc_ifdef (identifier) - 42 | (linkage_specification (string_literal (string_content)) (declaration_list - 43 | (preproc_call (preproc_directive)) - 44 | (comment) - 45 | (declaration (primitive_type) (identifier)) - 46 | (function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal)))) - 47 | (preproc_ifdef (identifier) (MISSING "#endif")))))) - | - 48 | ========================================== - 49 | If statements with incomplete expressions - 50 | ========================================== - | - 51 | int main() { - 52 | if (a.) { - 53 | b(); - 54 | c(); - | - 55 | if (*) d(); - 56 | } - 57 | } - | - 58 | --- - | - 59 | (translation_unit - 60 | (function_definition - 61 | (primitive_type) - 62 | (function_declarator (identifier) (parameter_list)) - 63 | (compound_statement - 64 | (if_statement - 65 | (parenthesized_expression (field_expression - 66 | (identifier) - 67 | (MISSING field_identifier))) - 68 | (compound_statement - 69 | (expression_statement (call_expression (identifier) (argument_list))) - 70 | (expression_statement (call_expression (identifier) (argument_list))) - 71 | (if_statement - 72 | (parenthesized_expression (pointer_expression (MISSING identifier))) - 73 | (expression_statement (call_expression (identifier) (argument_list))))))))) - | - 74 | ==================================== - 75 | Invalid characters in declarations - 76 | ==================================== - | - 77 | int main() { - 78 | int x; - 79 | int %$#@ - 80 | } - | - 81 | --- - | - 82 | (translation_unit - 83 | (function_definition - 84 | (primitive_type) - 85 | (function_declarator (identifier) (parameter_list)) - 86 | (compound_statement - 87 | (declaration (primitive_type) (identifier)) - 88 | (ERROR (primitive_type) (ERROR) (identifier) (UNEXPECTED '@'))))) - | - 89 | ========================================= - 90 | Extra values in parenthesized expressions - 91 | ========================================= - | - 92 | int main() { - 93 | int x = (123 123); - 94 | } - | - 95 | --- - | - 96 | (translation_unit - 97 | (function_definition - 98 | (primitive_type) - 99 | (function_declarator (identifier) (parameter_list)) - 100 | (compound_statement - 101 | (declaration (primitive_type) (init_declarator - 102 | (identifier) - 103 | (parenthesized_expression - 104 | (ERROR (number_literal)) - 105 | (number_literal))))))) - | - 106 | ======================================== - 107 | Extra identifiers in declarations - 108 | ======================================== - | - 109 | float x WTF; - 110 | int y = 5; - | - 111 | --- - | - 112 | (translation_unit - 113 | (declaration (primitive_type) (ERROR (identifier)) (identifier)) - 114 | (declaration (primitive_type) (init_declarator (identifier) (number_literal)))) - | - 115 | ========================================== - 116 | Declarations with missing variable names - 117 | ========================================== - | - 118 | int a() { - 119 | struct x = 1; - 120 | int = 2; - 121 | } - | - 122 | --- - | - 123 | (translation_unit - 124 | (function_definition - 125 | (primitive_type) - 126 | (function_declarator - 127 | (identifier) - 128 | (parameter_list)) - 129 | (compound_statement - 130 | (declaration - 131 | (struct_specifier (type_identifier)) - 132 | (init_declarator - 133 | (MISSING identifier) - 134 | (number_literal))) - 135 | (declaration - 136 | (primitive_type) - 137 | (init_declarator - 138 | (MISSING identifier) - 139 | (number_literal)))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/error_corpus/javascript_errors.txt: --------------------------------------------------------------------------------- - 1 | =================================================== - 2 | Missing default values for function parameters - 3 | =================================================== - | - 4 | class A { - 5 | constructor (a, b = ) { - 6 | this.a = a - 7 | } - | - 8 | foo() {} - 9 | } - | - 10 | --- - | - 11 | (program - 12 | (class_declaration (identifier) (class_body - 13 | (method_definition - 14 | (property_identifier) - 15 | (formal_parameters (identifier) (identifier) (ERROR)) - 16 | (statement_block (expression_statement (assignment_expression (member_expression (this) (property_identifier)) (identifier))))) - 17 | (method_definition - 18 | (property_identifier) - 19 | (formal_parameters) - 20 | (statement_block))))) - | - 21 | =================================================== - 22 | Missing object-literal values - 23 | =================================================== - | - 24 | { - 25 | a: b, - 26 | c: - 27 | } - | - 28 | --- - | - 29 | (program (expression_statement (object - 30 | (pair (property_identifier) (identifier)) - 31 | (pair (property_identifier) (MISSING identifier))))) - | - 32 | =================================================== - 33 | Extra identifiers in expressions - 34 | =================================================== - | - 35 | if (a b) { - 36 | c d; - 37 | } - 38 | e f; - | - 39 | --- - | - 40 | (program - 41 | (if_statement - 42 | (parenthesized_expression - 43 | (identifier) - 44 | (ERROR (identifier))) - 45 | (statement_block - 46 | (ERROR (identifier)) - 47 | (expression_statement (identifier)))) - 48 | (expression_statement - 49 | (identifier) - 50 | (ERROR (identifier)))) - | - 51 | =================================================== - 52 | Extra complex literals in expressions - 53 | =================================================== - | - 54 | if ({a: 'b'} {c: 'd'}) { - 55 | x = function(a) { b; } function(c) { d; } - 56 | } - | - 57 | --- - | - 58 | (program - 59 | (if_statement - 60 | (parenthesized_expression - 61 | (ERROR (object (pair (property_identifier) (string (string_fragment))))) - 62 | (object (pair (property_identifier) (string (string_fragment))))) - 63 | (statement_block - 64 | (expression_statement - 65 | (assignment_expression - 66 | (identifier) - 67 | (function_expression (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))) - 68 | (MISSING ";")) - 69 | (expression_statement - 70 | (function_expression (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))) - | - 71 | =================================================== - 72 | Extra tokens at the end of the file - 73 | =================================================== - | - 74 | // skip the equals sign - 75 | a.b = - 76 | --- - | - 77 | (program - 78 | (comment) - 79 | (ERROR (member_expression (identifier) (property_identifier)))) - | - 80 | =================================================== - 81 | Errors after a sequence of function declarations - 82 | =================================================== - | - 83 | /* - 84 | * The JS grammar has an ambiguity such that these functions - 85 | * can be parsed either as function declarations or as - 86 | * function expressions. This ambiguity causes a lot of - 87 | * splitting and merging in the parse stack. When iterating - 88 | * the parse stack during an error repair, there would then - 89 | * be a very large number (> 2^16) of paths through the parse - 90 | * stack. - 91 | */ - 92 | function a() {} - 93 | function b() {} - 94 | function c() {} - 95 | function e() {} - 96 | function f() {} - 97 | function g() {} - 98 | function h() {} - 99 | function i() {} - | - 100 | var x = !!! - | - 101 | --- - | - 102 | (program - 103 | (comment) - 104 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 105 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 106 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 107 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 108 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 109 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 110 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 111 | (function_declaration (identifier) (formal_parameters) (statement_block)) - 112 | (ERROR (identifier))) - | - 113 | ========================================================= - 114 | Errors inside of a template string substitution - 115 | ========================================================= - | - 116 | const a = `b c ${d += } f g` - 117 | const h = `i ${j(k} l` - | - 118 | --- - | - 119 | (program - 120 | (lexical_declaration - 121 | (variable_declarator - 122 | (identifier) - 123 | (template_string (string_fragment) (template_substitution - 124 | (augmented_assignment_expression (identifier) (MISSING identifier))) (string_fragment)))) - 125 | (lexical_declaration - 126 | (variable_declarator - 127 | (identifier) - 128 | (template_string (string_fragment) (template_substitution (call_expression - 129 | (identifier) - 130 | (arguments (identifier) (MISSING ")")))) (string_fragment))))) - | - 131 | ========================================================= - 132 | Long sequences of invalid tokens - 133 | ========================================================= - | - 134 | function main(x) { - 135 | console.log('a'); - 136 | what?????????????????????????????????????????????????? - 137 | console.log('b'); - 138 | return {}; - 139 | } - | - 140 | --- - | - 141 | (program - 142 | (function_declaration - 143 | (identifier) - 144 | (formal_parameters (identifier)) - 145 | (statement_block - 146 | (expression_statement - 147 | (call_expression - 148 | (member_expression (identifier) (property_identifier)) - 149 | (arguments (string (string_fragment))))) - 150 | (expression_statement - 151 | (binary_expression - 152 | (identifier) - 153 | (ERROR) - 154 | (call_expression - 155 | (member_expression (identifier) (property_identifier)) - 156 | (arguments (string (string_fragment)))))) - 157 | (return_statement (object))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/error_corpus/json_errors.txt: --------------------------------------------------------------------------------- - 1 | ========================================== - 2 | top-level errors - 3 | ========================================== - | - 4 | [} - | - 5 | --- - | - 6 | (document - 7 | (ERROR)) - | - 8 | ========================================== - 9 | unexpected tokens - 10 | ========================================== - | - 11 | barf - | - 12 | --- - | - 13 | (document - 14 | (ERROR - 15 | (UNEXPECTED 'b'))) - | - 16 | ========================================== - 17 | errors inside arrays - 18 | ========================================== - | - 19 | [1, , 2] - | - 20 | --- - | - 21 | (document - 22 | (array - 23 | (number) - 24 | (ERROR) - 25 | (number))) - | - 26 | ========================================== - 27 | errors inside objects - 28 | ========================================== - | - 29 | { "key1": 1, oops } - | - 30 | --- - | - 31 | (document - 32 | (object - 33 | (pair - 34 | (string - 35 | (string_content)) - 36 | (number)) - 37 | (ERROR - 38 | (UNEXPECTED 'o')))) - | - 39 | ========================================== - 40 | errors inside nested objects - 41 | ========================================== - | - 42 | { "key1": { "key2": 1, 2 }, "key3": 3 [ } - | - 43 | --- - | - 44 | (document - 45 | (object - 46 | (pair - 47 | (string - 48 | (string_content)) - 49 | (object - 50 | (pair - 51 | (string - 52 | (string_content)) - 53 | (number)) - 54 | (ERROR - 55 | (number)))) - 56 | (pair - 57 | (string - 58 | (string_content)) - 59 | (number)) - 60 | (ERROR))) - | - 61 | =============================== - 62 | incomplete tokens at EOF - 63 | ======================== - | - 64 | nul - 65 | --- - | - 66 | (document - 67 | (ERROR - 68 | (UNEXPECTED '\0'))) - - - --------------------------------------------------------------------------------- -/test/fixtures/error_corpus/python_errors.txt: --------------------------------------------------------------------------------- - 1 | ============================================= - 2 | incomplete condition in if statement - 3 | ============================================= - | - 4 | if a is: - 5 | print b - 6 | print c - 7 | print d - | - 8 | --- - | - 9 | (module - 10 | (if_statement - 11 | condition: (identifier) - 12 | (ERROR) - 13 | consequence: (block - 14 | (print_statement argument: (identifier)) - 15 | (print_statement argument: (identifier)))) - 16 | (print_statement argument: (identifier))) - | - 17 | ========================================== - 18 | extra colon in function definition - 19 | ========================================== - | - 20 | def a():: - 21 | b - 22 | c - 23 | d - | - 24 | --- - | - 25 | (module - 26 | (function_definition - 27 | name: (identifier) - 28 | parameters: (parameters) - 29 | (ERROR) - 30 | body: (block - 31 | (expression_statement (identifier)) - 32 | (expression_statement (identifier)))) - 33 | (expression_statement (identifier))) - | - 34 | ======================================================== - 35 | stray if keyword in function definition - 36 | ======================================================== - | - 37 | def a(): - 38 | if - | - 39 | --- - | - 40 | (module - 41 | (function_definition - 42 | name: (identifier) - 43 | parameters: (parameters) - 44 | (ERROR) - 45 | body: (block))) - | - 46 | ======================================================== - 47 | incomplete if statement in function definition - 48 | ======================================================== - | - 49 | def a(): - 50 | if a - | - 51 | --- - | - 52 | (module - 53 | (function_definition - 54 | name: (identifier) - 55 | parameters: (parameters) - 56 | (ERROR (identifier)) - 57 | body: (block))) - | - 58 | ======================================================== - 59 | incomplete expression before triple-quoted string - 60 | ======================================================== - | - 61 | def a(): - 62 | b. - 63 | """ - 64 | c - 65 | """ - | - 66 | --- - | - 67 | (module - 68 | (function_definition - 69 | name: (identifier) - 70 | parameters: (parameters) - 71 | (ERROR (identifier)) - 72 | body: (block - 73 | (expression_statement (string - 74 | (string_start) - 75 | (string_content) - 76 | (string_end)))))) - | - 77 | =========================================== - 78 | incomplete definition in class definition - 79 | =========================================== - | - 80 | class A: - 81 | def - | - 82 | b - | - 83 | --- - | - 84 | (module - 85 | (class_definition - 86 | name: (identifier) - 87 | (ERROR) - 88 | body: (block)) - 89 | (expression_statement - 90 | (identifier))) - - - --------------------------------------------------------------------------------- -/test/fixtures/error_corpus/readme.md: --------------------------------------------------------------------------------- - 1 | The Error Corpus - 2 | ================ - | - 3 | This directory contains corpus tests that exercise error recovery in a variety of languages. - | - 4 | These corpus tests provide a simple way of asserting that error recoveries are "reasonable" in a variety of situations. But they are also somewhat *overspecified*. It isn't critical that error recovery behaves *exactly* as these tests specify, just that most of the syntax tree is preserved despite the error. - | - 5 | Sometimes these tests can start failing when changes are pushed to the parser repositories like `tree-sitter-ruby`, `tree-sitter-javascript`, etc. Usually, we just need to tweak the expected syntax tree. - - - --------------------------------------------------------------------------------- -/test/fixtures/error_corpus/ruby_errors.txt: --------------------------------------------------------------------------------- - 1 | ========================== - 2 | Heredocs with errors 2 - 3 | ========================== - | - 4 | joins <<~SQL - 5 | b - 6 | SQL - 7 | ) - 8 | c - | - 9 | --- - | - 10 | (program - 11 | (call - 12 | method: (identifier) - 13 | arguments: (argument_list - 14 | (heredoc_beginning))) - 15 | (heredoc_body - 16 | (heredoc_content) - 17 | (heredoc_end)) - 18 | (ERROR) - 19 | (identifier)) - - - --------------------------------------------------------------------------------- -/test/fixtures/fixtures.json: --------------------------------------------------------------------------------- - 1 | [ - 2 | ["bash","v0.25.0"], - 3 | ["c","v0.24.1"], - 4 | ["cpp","v0.23.4"], - 5 | ["embedded-template","v0.25.0"], - 6 | ["go","v0.25.0"], - 7 | ["html","v0.23.2"], - 8 | ["java","v0.23.5"], - 9 | ["javascript","v0.25.0"], - 10 | ["jsdoc","v0.23.2"], - 11 | ["json","v0.24.8"], - 12 | ["php","v0.24.2"], - 13 | ["python","v0.23.6"], - 14 | ["ruby","v0.23.1"], - 15 | ["rust","v0.24.0"], - 16 | ["typescript","v0.23.2"] - 17 | ] - - --------------------------------------------------------------------------------- -/test/fixtures/template_corpus/readme.md: --------------------------------------------------------------------------------- - 1 | The Template Corpus - 2 | =================== - | - 3 | This directory contains corpus tests that exercise parsing a set of disjoint ranges within a file. - | - 4 | Each of these input files contains source code surrounded by the delimiters `<%` and `%>`. The content outside of these delimiters is meant to be ignored. - - --------------------------------------------------------------------------------- -/test/fixtures/template_corpus/ruby_templates.txt: --------------------------------------------------------------------------------- - 1 | ============================== - 2 | Templates with errors - 3 | ============================== - | - 4 |
- 5 | <% if notice.present? %> - 6 |

<% notice %>

- 7 | <% end %> - 8 |
- 9 |

Foods

- 10 |
- 11 | <% link_to 'New food', new_food_path, class: "block font-medium" %> - 12 | <% link_to 'Search Database', database_foods_search_path, class: "block font-medium" %> - 13 |
- 14 |
- | - 15 | <% . render partial: "form", locals: { food: @new_food } %> - | - 16 | <% form_with url: "/search", method: :get do |form| %> - 17 | <% form.label :previous_query, 'Search previous foods:' %> - 18 | <% form.text_field :previous_query %> - 19 | <% form.submit "Search" %> - 20 | <% end %> - | - 21 |
- 22 | <% render @foods %> - 23 |
- 24 |
- | - 25 | --- - | - 26 | (program - 27 | (if - 28 | (call (identifier) (identifier)) - 29 | (then (identifier))) - 30 | (call - 31 | (identifier) - 32 | (argument_list - 33 | (string (string_content)) - 34 | (identifier) - 35 | (pair (hash_key_symbol) (string (string_content))))) - 36 | (call - 37 | (identifier) - 38 | (argument_list - 39 | (string (string_content)) - 40 | (identifier) - 41 | (pair (hash_key_symbol) (string (string_content))))) - 42 | (ERROR) - 43 | (call - 44 | (identifier) - 45 | (argument_list - 46 | (pair (hash_key_symbol) (string (string_content))) - 47 | (pair (hash_key_symbol) (hash (pair (hash_key_symbol) (instance_variable)))))) - 48 | (call - 49 | (identifier) - 50 | (argument_list - 51 | (pair (hash_key_symbol) (string (string_content))) - 52 | (pair (hash_key_symbol) (simple_symbol))) - 53 | (do_block - 54 | (block_parameters - 55 | (identifier)) - 56 | (body_statement - 57 | (call - 58 | (identifier) - 59 | (identifier) - 60 | (argument_list (simple_symbol) (string (string_content)))) - 61 | (call - 62 | (identifier) - 63 | (identifier) - 64 | (argument_list - 65 | (simple_symbol))) - 66 | (call - 67 | (identifier) - 68 | (identifier) - 69 | (argument_list (string (string_content))))))) - 70 | (call - 71 | (identifier) - 72 | (argument_list (instance_variable)))) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_inlined_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================= - 2 | OK - 3 | ========================= - | - 4 | a.b.c; - | - 5 | --- - | - 6 | (statement - 7 | (member_expression - 8 | (member_expression - 9 | (variable_name) - 10 | (property_name)) - 11 | (property_name))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_inlined_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar shows that `ALIAS` rules can *contain* a rule that is marked as `inline`. It also - 2 | // shows that you can alias a rule that would otherwise be anonymous, and it will then appear as a - 3 | // named node. - | - 4 | export default grammar({ - 5 | name: 'aliased_inlined_rules', - | - 6 | extras: $ => [/\s/], - | - 7 | inline: $ => [$.identifier], - | - 8 | rules: { - 9 | statement: $ => seq($._expression, ';'), - | - 10 | _expression: $ => choice( - 11 | $.member_expression, - 12 | alias($.identifier, $.variable_name), - 13 | ), - | - 14 | member_expression: $ => prec.left(1, seq( - 15 | $._expression, - 16 | '.', - 17 | alias($.identifier, $.property_name) - 18 | )), - | - 19 | identifier: $ => choice('a', 'b', 'c') - 20 | } - 21 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ====================================== - 2 | Method calls - 3 | ====================================== - | - 4 | *a.b(c(d.e)); - | - 5 | --- - | - 6 | (statement - 7 | (star) - 8 | (call_expression - 9 | (member_expression - 10 | (variable_name) - 11 | (property_name)) - 12 | (call_expression - 13 | (variable_name) - 14 | (member_expression - 15 | (variable_name) - 16 | (property_name))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'aliased_rules', - | - 3 | extras: $ => [ - 4 | /\s/, - 5 | $.star, - 6 | ], - | - 7 | rules: { - 8 | statement: $ => seq($._expression, ';'), - | - 9 | _expression: $ => choice( - 10 | $.call_expression, - 11 | $.member_expression, - 12 | alias($.identifier, $.variable_name), - 13 | ), - | - 14 | call_expression: $ => prec.left(seq( - 15 | $._expression, - 16 | '(', - 17 | $._expression, - 18 | ')' - 19 | )), - | - 20 | member_expression: $ => prec.left(1, seq( - 21 | $._expression, - 22 | '.', - 23 | alias($.identifier, $.property_name) - 24 | )), - | - 25 | identifier: $ => /[a-z]+/, - | - 26 | // Tests for https://github.com/tree-sitter/tree-sitter/issues/1834 - 27 | // - 28 | // Even though the alias is unused, that issue causes all instances of - 29 | // the extra that appear in the tree to be renamed to `star_aliased`. - 30 | // - 31 | // Instead, this alias should have no effect because it is unused. - 32 | star: $ => '*', - 33 | unused: $ => alias($.star, $.star_aliased), - 34 | } - 35 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_token_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ====================== - 2 | Aliased token rules - 3 | ====================== - | - 4 | abcde - | - 5 | --- - | - 6 | (expression (X) (Y)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_token_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar shows that `ALIAS` rules can be applied directly to `TOKEN` and `IMMEDIATE_TOKEN` - 2 | // rules. - | - 3 | export default grammar({ - 4 | name: 'aliased_token_rules', - | - 5 | extras: $ => [/\s/], - | - 6 | rules: { - 7 | expression: $ => seq( - 8 | 'a', - 9 | alias(token(seq('b', 'c')), $.X), - 10 | alias(token.immediate(seq('d', 'e')), $.Y), - 11 | ), - 12 | } - 13 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_unit_reductions/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================================== - 2 | Aliases on rules that are unit reductions - 3 | ========================================== - | - 4 | one two three four; - | - 5 | --- - | - 6 | (statement - 7 | (identifier) - 8 | (b_prime (identifier)) - 9 | (c_prime (identifier)) - 10 | (identifier)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliased_unit_reductions/grammar.js: --------------------------------------------------------------------------------- - 1 | // Normally, when there are invisible rules (rules whose names start with an `_`) that simply wrap - 2 | // another rule, there is an optimization at parser-generation time called *Unit Reduction - 3 | // Elimination* that avoids creating nodes for those rules at runtime. One case where this - 4 | // optimization must *not* be applied is when those invisible rules are going to be aliased within - 5 | // their parent rule. In that situation, eliminating the invisible node could cause the alias to be - 6 | // incorrectly applied to its child. - | - 7 | export default grammar({ - 8 | name: 'aliased_unit_reductions', - | - 9 | extras: $ => [/\s/], - | - 10 | rules: { - 11 | statement: $ => seq( - 12 | $._a, - | - 13 | // The `_b` rule is always aliased to `b_prime`, so it is internally treated - 14 | // as a simple alias. - 15 | alias($._b, $.b_prime), - | - 16 | // The `_c` rule is used without an alias in addition to being aliased to `c_prime`, - 17 | // so it is not a simple alias. - 18 | alias($._c, $.c_prime), - | - 19 | $._c, - 20 | ';' - 21 | ), - | - 22 | _a: $ => $._A, - 23 | _b: $ => $._B, - 24 | _c: $ => $._C, - 25 | _A: $ => $.identifier, - 26 | _B: $ => $.identifier, - 27 | _C: $ => $.identifier, - | - 28 | identifier: $ => /[a-z]+/, - 29 | } - 30 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliases_in_root/corpus.txt: --------------------------------------------------------------------------------- - 1 | ====================================== - 2 | Aliases within the root node - 3 | ====================================== - | - 4 | # this is a comment - 5 | foo foo - | - 6 | --- - | - 7 | (document - 8 | (comment) - 9 | (bar) - 10 | (foo)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/aliases_in_root/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'aliases_in_root', - | - 3 | extras: $ => [ - 4 | /\s/, - 5 | $.comment, - 6 | ], - | - 7 | rules: { - 8 | document: $ => seq( - 9 | alias($.foo, $.bar), - 10 | $.foo, - 11 | ), - | - 12 | foo: $ => "foo", - | - 13 | comment: $ => /#.*/ - 14 | } - 15 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/anonymous_error/corpus.txt: --------------------------------------------------------------------------------- - 1 | ====================== - 2 | A simple error literal - 3 | ====================== - | - 4 | ERROR - | - 5 | --- - | - 6 | (document) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/anonymous_error/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'anonymous_error', - 3 | rules: { - 4 | document: $ => repeat(choice('ok', 'ERROR')), - 5 | } - 6 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt: --------------------------------------------------------------------------------- - 1 | ================================================ - 2 | anonymous tokens defined with character classes - 3 | ================================================ - 4 | 1234 - 5 | --- - | - 6 | (first_rule) - | - 7 | ================================================= - 8 | anonymous tokens defined with LF escape sequence - 9 | ================================================= - | - | - 10 | --- - | - 11 | (first_rule) - | - 12 | ================================================= - 13 | anonymous tokens defined with CR escape sequence - 14 | ================================================= - 15 | - | - 16 | --- - | - 17 | (first_rule) - | - 18 | ================================================ - 19 | anonymous tokens with quotes - 20 | ================================================ - 21 | 'hello' - 22 | --- - | - 23 | (first_rule) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.js: --------------------------------------------------------------------------------- - 1 | // Every token in a grammar is given a name in the generated parser. Anonymous tokens (tokens - 2 | // specified directly in the body of some larger rule) are named according their content. So when - 3 | // tokens contains characters that aren't valid in a C string literal, we need to escape those - 4 | // characters. This grammar tests that this escaping works. The test is basically that the generated - 5 | // parser compiles successfully. - | - 6 | export default grammar({ - 7 | name: "anonymous_tokens_with_escaped_chars", - 8 | rules: { - 9 | first_rule: $ => choice( - 10 | "\n", - 11 | "\r\n", - 12 | "'hello'", - 13 | /\d+/, - 14 | ) - 15 | } - 16 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/associativity_left/corpus.txt: --------------------------------------------------------------------------------- - 1 | =================== - 2 | chained operations - 3 | =================== - 4 | x+y+z - 5 | --- - 6 | (expression (math_operation - 7 | (expression (math_operation (expression (identifier)) (expression (identifier)))) - 8 | (expression (identifier)))) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/associativity_left/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'associativity_left', - | - 3 | rules: { - 4 | expression: $ => choice( - 5 | $.math_operation, - 6 | $.identifier - 7 | ), - | - 8 | math_operation: $ => prec.left(seq( - 9 | $.expression, - 10 | '+', - 11 | $.expression, - 12 | )), - | - 13 | identifier: $ => /[a-z]+/, - 14 | } - 15 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/associativity_missing/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Unresolved conflict for symbol sequence: - | - 2 | expression '+' expression • '+' … - | - 3 | Possible interpretations: - | - 4 | 1: (math_operation expression '+' expression) • '+' … - 5 | 2: expression '+' (math_operation expression • '+' expression) - | - 6 | Possible resolutions: - | - 7 | 1: Specify a left or right associativity in `math_operation` - 8 | 2: Add a conflict for these rules: `math_operation` - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/associativity_missing/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'associativity_missing', - | - 3 | rules: { - 4 | expression: $ => choice( - 5 | $.math_operation, - 6 | $.identifier - 7 | ), - | - 8 | math_operation: $ => seq( - 9 | $.expression, - 10 | '+', - 11 | $.expression, - 12 | ), - | - 13 | identifier: $ => /[a-z]+/, - 14 | } - 15 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/associativity_right/corpus.txt: --------------------------------------------------------------------------------- - 1 | =================== - 2 | chained operations - 3 | =================== - 4 | x+y+z - 5 | --- - 6 | (expression (math_operation - 7 | (expression (identifier)) - 8 | (expression (math_operation (expression (identifier)) (expression (identifier)))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/associativity_right/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'associativity_right', - | - 3 | rules: { - 4 | expression: $ => choice( - 5 | $.math_operation, - 6 | $.identifier - 7 | ), - | - 8 | math_operation: $ => prec.right(seq( - 9 | $.expression, - 10 | '+', - 11 | $.expression, - 12 | )), - | - 13 | identifier: $ => /[a-z]+/, - 14 | } - 15 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Unresolved conflict for symbol sequence: - | - 2 | _program_start '[' identifier • ']' … - | - 3 | Possible interpretations: - | - 4 | 1: _program_start '[' (array_repeat1 identifier) • ']' … - 5 | 2: _program_start '[' (array_type_repeat1 identifier) • ']' … - | - 6 | Possible resolutions: - | - 7 | 1: Specify a higher precedence in `array_repeat1` than in the other rules. - 8 | 2: Specify a higher precedence in `array_type_repeat1` than in the other rules. - 9 | 3: Add a conflict for these rules: `array`, `array_type` - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar is similar to the `conflict_in_repeat_rule` grammar, except that the conflict occurs - 2 | // after an external token is consumed. This tests that the logic for determining the repeat rule's - 3 | // "parent" rule works in the presence of external tokens. - | - 4 | export default grammar({ - 5 | name: 'conflict_in_repeat_rule_after_external_token', - | - 6 | externals: $ => [ - 7 | $._program_start, - 8 | ], - | - 9 | rules: { - 10 | statement: $ => choice( - 11 | seq($._program_start, $.array, ';'), - 12 | seq($._program_start, $.array_type, $.identifier, ';'), - 13 | ), - | - 14 | array: $ => seq( - 15 | '[', - 16 | repeat(choice($.identifier, '0')), - 17 | ']', - 18 | ), - | - 19 | array_type: $ => seq( - 20 | '[', - 21 | repeat(choice($.identifier, 'void')), - 22 | ']', - 23 | ), - | - 24 | identifier: $ => /[a-z]+/ - 25 | } - 26 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Unresolved conflict for symbol sequence: - | - 2 | '[' identifier • ']' … - | - 3 | Possible interpretations: - | - 4 | 1: '[' (array_repeat1 identifier) • ']' … - 5 | 2: '[' (array_type_repeat1 identifier) • ']' … - | - 6 | Possible resolutions: - | - 7 | 1: Specify a higher precedence in `array_repeat1` than in the other rules. - 8 | 2: Specify a higher precedence in `array_type_repeat1` than in the other rules. - 9 | 3: Add a conflict for these rules: `array`, `array_type` - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/conflict_in_repeat_rule/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar has a conflict that involves *repeat rules*: auxiliary rules that are added by the - 2 | // parser generator in order to implement repetition. There is no way of referring to these rules in - 3 | // the grammar DSL, so these conflicts must be resolved by referring to their parent rules. - | - 4 | export default grammar({ - 5 | name: 'conflict_in_repeat_rule', - | - 6 | rules: { - 7 | statement: $ => choice( - 8 | seq($.array, ';'), - 9 | seq($.array_type, $.identifier, ';'), - 10 | ), - | - 11 | array: $ => seq( - 12 | '[', - 13 | repeat(choice($.identifier, '0')), - 14 | ']', - 15 | ), - | - 16 | array_type: $ => seq( - 17 | '[', - 18 | repeat(choice($.identifier, 'void')), - 19 | ']', - 20 | ), - | - 21 | identifier: $ => /[a-z]+/ - 22 | } - 23 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Unresolved conflict for symbol sequence: - | - 2 | expression '+' expression • '*' … - | - 3 | Possible interpretations: - | - 4 | 1: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left) - 5 | 2: expression '+' (other_thing expression • '*' '*') (precedence: -1, associativity: Left) - 6 | 3: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left) - | - 7 | Possible resolutions: - | - 8 | 1: Specify a higher precedence in `product` and `other_thing` than in the other rules. - 9 | 2: Specify a higher precedence in `sum` than in the other rules. - 10 | 3: Add a conflict for these rules: `sum`, `product`, `other_thing` - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/conflicting_precedence/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'conflicting_precedence', - | - 3 | rules: { - 4 | expression: $ => choice( - 5 | $.sum, - 6 | $.product, - 7 | $.other_thing, - 8 | ), - | - 9 | sum: $ => prec.left(0, seq($.expression, '+', $.expression)), - 10 | product: $ => prec.left(1, seq($.expression, '*', $.expression)), - 11 | other_thing: $ => prec.left(-1, seq($.expression, '*', '*')), - 12 | identifier: $ => /[a-zA-Z]+/ - 13 | } - 14 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/depends_on_column/corpus.txt: --------------------------------------------------------------------------------- - 1 | ================== - 2 | X is at odd column - 3 | ================== - | - 4 | x - | - 5 | --- - | - 6 | (x_is_at - 7 | (odd_column)) - | - 8 | =================== - 9 | X is at even column - 10 | =================== - | - 11 | x - | - 12 | --- - | - 13 | (x_is_at - 14 | (even_column)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/depends_on_column/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "depends_on_column", - 3 | rules: { - 4 | x_is_at: ($) => seq(/[ \r\n]*/, choice($.odd_column, $.even_column), "x"), - 5 | }, - 6 | externals: ($) => [$.odd_column, $.even_column], - 7 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/depends_on_column/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum TokenType { ODD_COLUMN, EVEN_COLUMN }; - | - 3 | // The scanner is stateless - | - 4 | void *tree_sitter_depends_on_column_external_scanner_create() { - 5 | return NULL; - 6 | } - | - 7 | void tree_sitter_depends_on_column_external_scanner_destroy( - 8 | void *payload - 9 | ) { - 10 | // no-op - 11 | } - | - 12 | unsigned tree_sitter_depends_on_column_external_scanner_serialize( - 13 | void *payload, - 14 | char *buffer - 15 | ) { - 16 | return 0; - 17 | } - | - 18 | void tree_sitter_depends_on_column_external_scanner_deserialize( - 19 | void *payload, - 20 | const char *buffer, - 21 | unsigned length - 22 | ) { - 23 | // no-op - 24 | } - | - 25 | bool tree_sitter_depends_on_column_external_scanner_scan( - 26 | void *payload, - 27 | TSLexer *lexer, - 28 | const bool *valid_symbols - 29 | ) { - 30 | lexer->result_symbol = - 31 | lexer->get_column(lexer) % 2 ? ODD_COLUMN : EVEN_COLUMN; - 32 | return true; - 33 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/dynamic_precedence/corpus.txt: --------------------------------------------------------------------------------- - 1 | =============================== - 2 | Declarations - 3 | =============================== - | - 4 | T * x - | - 5 | --- - | - 6 | (program (declaration - 7 | (type (identifier)) - 8 | (declarator (identifier)))) - | - 9 | =============================== - 10 | Expressions - 11 | =============================== - | - 12 | w * x * y - | - 13 | --- - | - 14 | (program (expression - 15 | (expression - 16 | (expression (identifier)) - 17 | (expression (identifier))) - 18 | (expression (identifier)))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/dynamic_precedence/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'dynamic_precedence', - | - 3 | extras: $ => [/\s/], - | - 4 | conflicts: $ => [[$.expression, $.type]], - | - 5 | rules: { - 6 | program: $ => choice( - 7 | $.declaration, - 8 | $.expression, - 9 | ), - | - 10 | expression: $ => choice( - 11 | prec.left(seq($.expression, '*', $.expression)), - 12 | $.identifier - 13 | ), - | - 14 | declaration: $ => seq( - 15 | $.type, - 16 | $.declarator, - 17 | ), - | - 18 | declarator: $ => choice( - 19 | prec.dynamic(1, seq('*', $.identifier)), - 20 | $.identifier, - 21 | ), - | - 22 | type: $ => $.identifier, - 23 | identifier: $ => /[a-z-A-Z]+/ - 24 | } - 25 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/dynamic_precedence/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar contains a conflict that is resolved at runtime. The PREC_DYNAMIC rule is used to indicate that the `declarator` rule should be preferred to the `expression` rule at runtime. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_external_extra_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================== - 2 | A document - 3 | ========================== - | - 4 | a b - | - 5 | --- - | - 6 | (document) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_external_extra_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'epsilon_external_extra_tokens', - | - 3 | extras: $ => [/\s/, $.comment], - | - 4 | externals: $ => [$.comment], - | - 5 | rules: { - 6 | document: $ => seq('a', 'b'), - 7 | } - 8 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_external_extra_tokens/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum TokenType { - 3 | COMMENT - 4 | }; - | - 5 | void *tree_sitter_epsilon_external_extra_tokens_external_scanner_create(void) { - 6 | return NULL; - 7 | } - | - 8 | bool tree_sitter_epsilon_external_extra_tokens_external_scanner_scan( - 9 | void *payload, - 10 | TSLexer *lexer, - 11 | const bool *valid_symbols - 12 | ) { - 13 | lexer->result_symbol = COMMENT; - 14 | return true; - 15 | } - | - 16 | unsigned tree_sitter_epsilon_external_extra_tokens_external_scanner_serialize( - 17 | void *payload, - 18 | char *buffer - 19 | ) { - 20 | return 0; - 21 | } - | - 22 | void tree_sitter_epsilon_external_extra_tokens_external_scanner_deserialize( - 23 | void *payload, - 24 | const char *buffer, - 25 | unsigned length - 26 | ) {} - | - 27 | void tree_sitter_epsilon_external_extra_tokens_external_scanner_destroy(void *payload) {} - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_external_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================== - 2 | A leading zero-width token - 3 | ========================== - | - 4 | hello - | - 5 | --- - | - 6 | (document (zero_width)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_external_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'epsilon_external_tokens', - | - 3 | extras: $ => [/\s/], - 4 | externals: $ => [$.zero_width], - | - 5 | rules: { - 6 | document: $ => seq($.zero_width, 'hello'), - 7 | } - 8 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_external_tokens/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum TokenType { - 3 | ZERO_WIDTH_TOKEN - 4 | }; - | - 5 | void *tree_sitter_epsilon_external_tokens_external_scanner_create() { - 6 | return NULL; - 7 | } - | - 8 | bool tree_sitter_epsilon_external_tokens_external_scanner_scan( - 9 | void *payload, - 10 | TSLexer *lexer, - 11 | const bool *valid_symbols - 12 | ) { - 13 | lexer->result_symbol = ZERO_WIDTH_TOKEN; - 14 | return true; - 15 | } - | - 16 | unsigned tree_sitter_epsilon_external_tokens_external_scanner_serialize( - 17 | void *payload, - 18 | char *buffer - 19 | ) { - 20 | return 0; - 21 | } - | - 22 | void tree_sitter_epsilon_external_tokens_external_scanner_deserialize( - 23 | void *payload, - 24 | const char *buffer, - 25 | unsigned length - 26 | ) {} - | - 27 | void tree_sitter_epsilon_external_tokens_external_scanner_destroy(void *payload) {} - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_rules/expected_error.txt: --------------------------------------------------------------------------------- - 1 | The rule `rule_2` matches the empty string. - | - 2 | Tree-sitter does not support syntactic rules that match the empty string - 3 | unless they are used only as the grammar's start rule. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/epsilon_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'epsilon_rules', - | - 3 | rules: { - 4 | rule_1: $ => $.rule_2, - | - 5 | rule_2: $ => optional($.rule_3), - | - 6 | rule_3: $ => 'x' - 7 | } - 8 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================================= - 2 | single-line statements - internal tokens - 3 | ========================================= - | - 4 | a b - | - 5 | --- - | - 6 | (statement (variable) (variable)) - | - 7 | ========================================= - 8 | multi-line statements - internal tokens - 9 | ========================================= - | - 10 | a - 11 | b - | - 12 | --- - | - 13 | (statement (variable) (variable)) - | - 14 | ========================================= - 15 | single-line statements - external tokens - 16 | ========================================= - | - 17 | 'hello' 'world' - | - 18 | --- - | - 19 | (statement (string) (string)) - | - 20 | ========================================= - 21 | multi-line statements - external tokens - 22 | ========================================= - | - 23 | 'hello' - 24 | 'world' - | - 25 | --- - | - 26 | (statement (string) (string)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'external_and_internal_anonymous_tokens', - | - 3 | externals: $ => [ - 4 | $.string, - 5 | '\n' - 6 | ], - | - 7 | extras: $ => [/\s/], - | - 8 | rules: { - 9 | statement: $ => seq( - 10 | $._expression, - 11 | $._expression, - 12 | '\n' - 13 | ), - | - 14 | _expression: $ => choice( - 15 | $.string, - 16 | $.variable, - 17 | $.number - 18 | ), - | - 19 | variable: $ => /[a-z]+/, - | - 20 | number: $ => /\d+/ - 21 | } - 22 | }) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar is just like the `external_and_internal_tokens` grammar, except that the shared external token is *anonymous*; it's specified as a string in the grammar. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum { - 3 | STRING, - 4 | LINE_BREAK - 5 | }; - | - 6 | void *tree_sitter_external_and_internal_anonymous_tokens_external_scanner_create() { - 7 | return NULL; - 8 | } - | - 9 | void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_destroy( - 10 | void *payload - 11 | ) {} - | - 12 | unsigned tree_sitter_external_and_internal_anonymous_tokens_external_scanner_serialize( - 13 | void *payload, - 14 | char *buffer - 15 | ) { return 0; } - | - 16 | void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_deserialize( - 17 | void *payload, - 18 | const char *buffer, - 19 | unsigned length - 20 | ) {} - | - 21 | bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_scan( - 22 | void *payload, - 23 | TSLexer *lexer, - 24 | const bool *valid_symbols - 25 | ) { - 26 | // If a line-break is a valid lookahead token, only skip spaces. - 27 | if (valid_symbols[LINE_BREAK]) { - 28 | while (lexer->lookahead == ' ' || lexer->lookahead == '\r') { - 29 | lexer->advance(lexer, true); - 30 | } - | - 31 | if (lexer->lookahead == '\n') { - 32 | lexer->advance(lexer, false); - 33 | lexer->result_symbol = LINE_BREAK; - 34 | return true; - 35 | } - 36 | } - | - 37 | // If a line-break is not a valid lookahead token, skip line breaks as well - 38 | // as spaces. - 39 | if (valid_symbols[STRING]) { - 40 | while (lexer->lookahead == ' ' || lexer->lookahead == '\r' || lexer->lookahead == '\n') { - 41 | lexer->advance(lexer, true); - 42 | } - | - 43 | if (lexer->lookahead == '\'') { - 44 | lexer->advance(lexer, false); - | - 45 | while (lexer->lookahead != '\'') { - 46 | lexer->advance(lexer, false); - 47 | } - | - 48 | lexer->advance(lexer, false); - 49 | lexer->result_symbol = STRING; - 50 | return true; - 51 | } - 52 | } - | - 53 | return false; - 54 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================================= - 2 | single-line statements - internal tokens - 3 | ========================================= - | - 4 | a b - | - 5 | --- - | - 6 | (statement (variable) (variable) (line_break)) - | - 7 | ========================================= - 8 | multi-line statements - internal tokens - 9 | ========================================= - | - 10 | a - 11 | b - | - 12 | --- - | - 13 | (statement (variable) (variable) (line_break)) - | - 14 | ========================================= - 15 | single-line statements - external tokens - 16 | ========================================= - | - 17 | 'hello' 'world' - | - 18 | --- - | - 19 | (statement (string) (string) (line_break)) - | - 20 | ========================================= - 21 | multi-line statements - external tokens - 22 | ========================================= - | - 23 | 'hello' - 24 | 'world' - | - 25 | --- - | - 26 | (statement (string) (string) (line_break)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar has an external scanner whose `scan` method needs to be able to check for the - 2 | // validity of an *internal* token. This is done by including the names of that internal token - 3 | // (`line_break`) in the grammar's `externals` field. - | - 4 | export default grammar({ - 5 | name: 'external_and_internal_tokens', - | - 6 | externals: $ => [ - 7 | $.string, - 8 | $.line_break, - 9 | ], - | - 10 | extras: $ => [/\s/], - | - 11 | rules: { - 12 | statement: $ => seq( - 13 | $._expression, - 14 | $._expression, - 15 | $.line_break, - 16 | ), - | - 17 | _expression: $ => choice( - 18 | $.string, - 19 | $.variable, - 20 | $.number, - 21 | ), - | - 22 | variable: $ => /[a-z]+/, - 23 | number: $ => /\d+/, - 24 | line_break: $ => '\n', - 25 | } - 26 | }); - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum { - 3 | STRING, - 4 | LINE_BREAK - 5 | }; - | - 6 | void *tree_sitter_external_and_internal_tokens_external_scanner_create() { - 7 | return NULL; - 8 | } - | - 9 | void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {} - | - 10 | unsigned tree_sitter_external_and_internal_tokens_external_scanner_serialize( - 11 | void *payload, - 12 | char *buffer - 13 | ) { return 0; } - | - 14 | void tree_sitter_external_and_internal_tokens_external_scanner_deserialize( - 15 | void *payload, - 16 | const char *buffer, - 17 | unsigned length - 18 | ) {} - | - 19 | bool tree_sitter_external_and_internal_tokens_external_scanner_scan( - 20 | void *payload, - 21 | TSLexer *lexer, - 22 | const bool *valid_symbols - 23 | ) { - 24 | // If a line-break is a valid lookahead token, only skip spaces. - 25 | if (valid_symbols[LINE_BREAK]) { - 26 | while (lexer->lookahead == ' ' || lexer->lookahead == '\r') { - 27 | lexer->advance(lexer, true); - 28 | } - | - 29 | if (lexer->lookahead == '\n') { - 30 | lexer->advance(lexer, false); - 31 | lexer->result_symbol = LINE_BREAK; - 32 | return true; - 33 | } - 34 | } - | - 35 | // If a line-break is not a valid lookahead token, skip line breaks as well - 36 | // as spaces. - 37 | if (valid_symbols[STRING]) { - 38 | while (lexer->lookahead == ' ' || lexer->lookahead == '\r' || lexer->lookahead == '\n') { - 39 | lexer->advance(lexer, true); - 40 | } - | - 41 | if (lexer->lookahead == '\'') { - 42 | lexer->advance(lexer, false); - | - 43 | while (lexer->lookahead != '\'') { - 44 | lexer->advance(lexer, false); - 45 | } - | - 46 | lexer->advance(lexer, false); - 47 | lexer->result_symbol = STRING; - 48 | return true; - 49 | } - 50 | } - | - 51 | return false; - 52 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_extra_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | ======================== - 2 | extra external tokens - 3 | ======================== - | - 4 | x = # a comment - 5 | y - | - 6 | --- - | - 7 | (assignment (variable) (comment) (variable)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_extra_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "external_extra_tokens", - | - 3 | externals: $ => [ - 4 | $.comment - 5 | ], - | - 6 | extras: $ => [/\s/, $.comment], - | - 7 | rules: { - 8 | assignment: $ => seq($.variable, '=', $.variable), - 9 | variable: $ => /[a-z]+/ - 10 | } - 11 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_extra_tokens/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum { - 3 | COMMENT, - 4 | }; - | - 5 | void *tree_sitter_external_extra_tokens_external_scanner_create() { - 6 | return NULL; - 7 | } - | - 8 | void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {} - | - 9 | unsigned tree_sitter_external_extra_tokens_external_scanner_serialize( - 10 | void *payload, - 11 | char *buffer - 12 | ) { return 0; } - | - 13 | void tree_sitter_external_extra_tokens_external_scanner_deserialize( - 14 | void *payload, - 15 | const char *buffer, - 16 | unsigned length - 17 | ) {} - | - 18 | bool tree_sitter_external_extra_tokens_external_scanner_scan( - 19 | void *payload, - 20 | TSLexer *lexer, - 21 | const bool *valid_symbols - 22 | ) { - 23 | while (lexer->lookahead == ' ') { - 24 | lexer->advance(lexer, true); - 25 | } - | - 26 | if (lexer->lookahead == '#') { - 27 | lexer->advance(lexer, false); - 28 | while (lexer->lookahead != '\n') { - 29 | lexer->advance(lexer, false); - 30 | } - | - 31 | lexer->result_symbol = COMMENT; - 32 | return true; - 33 | } - | - 34 | return false; - 35 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | ======================== - 2 | simple external tokens - 3 | ========================= - | - 4 | x + %(sup (external) scanner?) - | - 5 | --- - | - 6 | (expression (sum (expression (identifier)) (expression (string)))) - | - 7 | ================================== - 8 | external tokens that require state - 9 | ================================== - | - 10 | %{sup {} #{x + y} {} scanner?} - | - 11 | --- - | - 12 | (expression (string - 13 | (expression (sum - 14 | (expression (identifier)) - 15 | (expression (identifier)))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar uses an external scanner to match special string literals, - 2 | // that track the nesting depth of parentheses, similar to Ruby's percent - 3 | // string literals. - | - 4 | export default grammar({ - 5 | name: "external_tokens", - | - 6 | externals: $ => [ - 7 | $._percent_string, - 8 | $._percent_string_start, - 9 | $._percent_string_end, - 10 | ], - | - 11 | extras: $ => [/\s/], - | - 12 | rules: { - 13 | expression: $ => choice($.string, $.sum, $.identifier), - | - 14 | sum: $ => prec.left(seq($.expression, '+', $.expression)), - | - 15 | string: $ => choice($._percent_string, seq( - 16 | $._percent_string_start, - 17 | $.expression, - 18 | $._percent_string_end, - 19 | )), - | - 20 | identifier: $ => /[a-z]+/ - 21 | } - 22 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_tokens/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/alloc.h" - 2 | #include "tree_sitter/parser.h" - | - 3 | enum { - 4 | percent_string, - 5 | percent_string_start, - 6 | percent_string_end - 7 | }; - | - 8 | typedef struct { - 9 | int32_t open_delimiter; - 10 | int32_t close_delimiter; - 11 | uint32_t depth; - 12 | } Scanner; - | - 13 | void *tree_sitter_external_tokens_external_scanner_create() { - 14 | Scanner *scanner = ts_malloc(sizeof(Scanner)); - 15 | *scanner = (Scanner) { - 16 | .open_delimiter = 0, - 17 | .close_delimiter = 0, - 18 | .depth = 0 - 19 | }; - 20 | return scanner; - 21 | } - | - 22 | void tree_sitter_external_tokens_external_scanner_destroy(void *payload) { - 23 | ts_free(payload); - 24 | } - | - 25 | unsigned tree_sitter_external_tokens_external_scanner_serialize( - 26 | void *payload, - 27 | char *buffer - 28 | ) { return 0; } - | - 29 | void tree_sitter_external_tokens_external_scanner_deserialize( - 30 | void *payload, - 31 | const char *buffer, - 32 | unsigned length - 33 | ) {} - | - 34 | bool tree_sitter_external_tokens_external_scanner_scan( - 35 | void *payload, TSLexer *lexer, const bool *valid_symbols) { - 36 | Scanner *scanner = payload; - | - 37 | if (valid_symbols[percent_string]) { - 38 | while (lexer->lookahead == ' ' || - 39 | lexer->lookahead == '\t' || - 40 | lexer->lookahead == '\n' || - 41 | lexer->lookahead == '\r') { - 42 | lexer->advance(lexer, true); - 43 | } - | - 44 | if (lexer->lookahead != '%') return false; - 45 | lexer->advance(lexer, false); - | - 46 | switch (lexer->lookahead) { - 47 | case '(': - 48 | scanner->open_delimiter = '('; - 49 | scanner->close_delimiter = ')'; - 50 | scanner->depth = 1; - 51 | break; - 52 | case '[': - 53 | scanner->open_delimiter = '['; - 54 | scanner->close_delimiter = ']'; - 55 | scanner->depth = 1; - 56 | break; - 57 | case '{': - 58 | scanner->open_delimiter = '{'; - 59 | scanner->close_delimiter = '}'; - 60 | scanner->depth = 1; - 61 | break; - 62 | default: - 63 | return false; - 64 | } - | - 65 | lexer->advance(lexer, false); - | - 66 | for (;;) { - 67 | if (scanner->depth == 0) { - 68 | lexer->log(lexer, "Found a percent string"); - 69 | lexer->result_symbol = percent_string; - 70 | return true; - 71 | } - | - 72 | if (lexer->lookahead == scanner->open_delimiter) { - 73 | scanner->depth++; - 74 | } else if (lexer->lookahead == scanner->close_delimiter) { - 75 | scanner->depth--; - 76 | } else if (lexer->lookahead == '#') { - 77 | lexer->advance(lexer, false); - 78 | if (lexer->lookahead == '{') { - 79 | lexer->advance(lexer, false); - 80 | lexer->result_symbol = percent_string_start; - 81 | return true; - 82 | } - 83 | } - | - 84 | lexer->advance(lexer, false); - 85 | } - 86 | } else if (valid_symbols[percent_string_end]) { - 87 | if (lexer->lookahead != '}') return false; - 88 | lexer->advance(lexer, false); - | - 89 | for (;;) { - 90 | if (scanner->depth == 0) { - 91 | lexer->result_symbol = percent_string_end; - 92 | return true; - 93 | } - | - 94 | if (lexer->lookahead == scanner->open_delimiter) { - 95 | scanner->depth++; - 96 | } else if (lexer->lookahead == scanner->close_delimiter) { - 97 | scanner->depth--; - 98 | } - | - 99 | lexer->advance(lexer, false); - 100 | } - 101 | } - | - 102 | return false; - 103 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_unicode_column_alignment/corpus.txt: --------------------------------------------------------------------------------- - 1 | ======================== - 2 | Single list, no boxes - 3 | ======================== - | - 4 | - - 5 | - - 6 | - - | - 7 | ---------------------- - | - 8 | (expression - 9 | (list - 10 | (list_item) - 11 | (list_item) - 12 | (list_item) - 13 | ) - 14 | ) - | - 15 | ======================== - 16 | Two lists, no boxes - 17 | ======================== - | - 18 | - - 19 | - - 20 | - - 21 | - - 22 | - - | - 23 | ---------------------- - | - 24 | (expression - 25 | (list - 26 | (list_item) - 27 | (list_item) - 28 | (list_item) - 29 | ) - 30 | (list - 31 | (list_item) - 32 | (list_item) - 33 | ) - 34 | ) - | - 35 | ======================== - 36 | List with boxes - 37 | ======================== - | - 38 | - - 39 | □- - 40 | - - | - 41 | ---------------------- - | - 42 | (expression - 43 | (list - 44 | (list_item) - 45 | (list_item) - 46 | (list_item) - 47 | ) - 48 | ) - | - 49 | ======================== - 50 | Multiple lists with boxes - 51 | ======================== - | - 52 | - - 53 | □ □- - 54 | □ - - 55 | □□□□□□- - 56 | □ □ □ - - 57 | - - 58 | □□□ - - 59 | □□□- - 60 | □ □- - | - 61 | ---------------------- - | - 62 | (expression - 63 | (list - 64 | (list_item) - 65 | (list_item) - 66 | (list_item) - 67 | ) - 68 | (list - 69 | (list_item) - 70 | (list_item) - 71 | (list_item) - 72 | (list_item) - 73 | ) - 74 | (list - 75 | (list_item) - 76 | (list_item) - 77 | ) - 78 | ) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_unicode_column_alignment/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "external_unicode_column_alignment", - | - 3 | externals: $ => [ - 4 | $._start_list, - 5 | $.list_item, - 6 | $._end_list - 7 | ], - | - 8 | extras: $ => [/\s/, '□'], - | - 9 | rules: { - 10 | expression: $ => repeat($.list), - 11 | - 12 | list: $ => seq($._start_list, repeat1($.list_item), $._end_list) - 13 | } - 14 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_unicode_column_alignment/README.md: --------------------------------------------------------------------------------- - 1 | This tests that `get_column` correctly counts codepoints since start of line. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/alloc.h" - 2 | #include "tree_sitter/parser.h" - | - 3 | #include - 4 | #include - | - 5 | enum { - 6 | LIST_START, - 7 | LIST_ITEM, - 8 | LIST_END - 9 | }; - | - 10 | typedef struct { - 11 | int32_t column; - 12 | } Scanner; - | - 13 | void *tree_sitter_external_unicode_column_alignment_external_scanner_create() { - 14 | Scanner *scanner = ts_malloc(sizeof(Scanner)); - 15 | *scanner = (Scanner){ - 16 | .column = -1 - 17 | }; - 18 | return scanner; - 19 | } - | - 20 | void tree_sitter_external_unicode_column_alignment_external_scanner_destroy(void *payload) { - 21 | ts_free(payload); - 22 | } - | - 23 | unsigned tree_sitter_external_unicode_column_alignment_external_scanner_serialize( - 24 | void *payload, - 25 | char *buffer - 26 | ) { - 27 | Scanner *scanner = payload; - 28 | unsigned copied = sizeof(int32_t); - 29 | memcpy(buffer, &(scanner->column), copied); - 30 | return copied; - 31 | } - | - 32 | void tree_sitter_external_unicode_column_alignment_external_scanner_deserialize( - 33 | void *payload, - 34 | const char *buffer, - 35 | unsigned length - 36 | ) { - 37 | Scanner *scanner = payload; - 38 | scanner->column = -1; - 39 | if (length > 0) { - 40 | memcpy(&(scanner->column), buffer, sizeof(int32_t)); - 41 | } - 42 | } - | - 43 | bool tree_sitter_external_unicode_column_alignment_external_scanner_scan( - 44 | void *payload, - 45 | TSLexer *lexer, - 46 | const bool *valid_symbols - 47 | ) { - 48 | Scanner *scanner = payload; - 49 | // U+25A1 is unicode codepoint □ - 50 | while (iswspace(lexer->lookahead) || 0x25A1 == lexer->lookahead) { - 51 | lexer->advance(lexer, true); - 52 | } - 53 | if ('-' == lexer->lookahead) { - 54 | const int32_t column = lexer->get_column(lexer); - 55 | if (-1 == scanner->column) { - 56 | lexer->result_symbol = LIST_START; - 57 | scanner->column = column; - 58 | return true; - 59 | } else { - 60 | if (column == scanner->column) { - 61 | lexer->result_symbol = LIST_ITEM; - 62 | lexer->advance(lexer, false); - 63 | return true; - 64 | } else { - 65 | lexer->result_symbol = LIST_END; - 66 | scanner->column = -1; - 67 | return true; - 68 | } - 69 | } - 70 | } - 71 | - 72 | if (lexer->eof(lexer) && -1 != scanner->column) { - 73 | lexer->result_symbol = LIST_END; - 74 | scanner->column = -1; - 75 | return true; - 76 | } - 77 | - 78 | return false; - 79 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ===== - 2 | Extras - 3 | ===== - | - 4 | ; - 5 | %; - 6 | %foo:; - 7 | ; - 8 | bar: baz:; - 9 | ; - | - 10 | --- - | - 11 | (program - 12 | (statement) - 13 | (macro_statement (statement)) - 14 | (macro_statement (statement - 15 | (label_declaration (identifier)))) - 16 | (statement) - 17 | (statement - 18 | (label_declaration (identifier)) - 19 | (label_declaration (identifier))) - 20 | (statement)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/extra_non_terminals_with_shared_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar has a non-terminal extra rule `macro_statement` that contains - 2 | // child rules that are also used elsewhere in the grammar. - | - 3 | export default grammar({ - 4 | name: "extra_non_terminals_with_shared_rules", - | - 5 | extras: $ => [/\s+/, $.macro_statement], - | - 6 | rules: { - 7 | program: $ => repeat($.statement), - 8 | statement: $ => seq(repeat($.label_declaration), ';'), - 9 | macro_statement: $ => seq('%', $.statement), - 10 | label_declaration: $ => seq($.identifier, ':'), - 11 | identifier: $ => /[a-zA-Z]+/ - 12 | } - 13 | }) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/extra_non_terminals/corpus.txt: --------------------------------------------------------------------------------- - 1 | ============== - 2 | No extras - 3 | ============== - | - 4 | a b c d - | - 5 | --- - | - 6 | (module) - | - 7 | ============== - 8 | Extras - 9 | ============== - | - 10 | a (one) b (two) (three) c d // e - | - 11 | --- - | - 12 | (module - 13 | (comment (paren_comment)) - 14 | (comment (paren_comment)) - 15 | (comment (paren_comment)) - 16 | (comment (line_comment))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/extra_non_terminals/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar has an "extra" rule, `comment`, that is a non-terminal. - | - 2 | export default grammar({ - 3 | name: "extra_non_terminals", - | - 4 | extras: $ => [ - 5 | /\s/, - 6 | $.comment, - 7 | ], - | - 8 | rules: { - 9 | module: _ => seq('a', 'b', 'c', 'd'), - | - 10 | comment: $ => choice($.paren_comment, $.line_comment), - | - 11 | paren_comment: _ => token(seq('(', repeat(/[a-z]+/), ')')), - | - 12 | line_comment: _ => token(seq('//', /.*/)), - 13 | } - 14 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/get_col_eof/corpus.txt: --------------------------------------------------------------------------------- -[EMPTY FILE] - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/get_col_eof/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "get_col_eof", - | - 3 | externals: $ => [ - 4 | $.char - 5 | ], - | - 6 | rules: { - 7 | source_file: $ => repeat($.char), - 8 | } - 9 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/get_col_eof/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum TokenType { CHAR }; - | - 3 | void *tree_sitter_get_col_eof_external_scanner_create(void) { return NULL; } - | - 4 | void tree_sitter_get_col_eof_external_scanner_destroy(void *scanner) {} - | - 5 | unsigned tree_sitter_get_col_eof_external_scanner_serialize(void *scanner, - 6 | char *buffer) { - 7 | return 0; - 8 | } - | - 9 | void tree_sitter_get_col_eof_external_scanner_deserialize(void *scanner, - 10 | const char *buffer, - 11 | unsigned length) {} - | - 12 | bool tree_sitter_get_col_eof_external_scanner_scan(void *scanner, - 13 | TSLexer *lexer, - 14 | const bool *valid_symbols) { - 15 | if (lexer->eof(lexer)) { - 16 | return false; - 17 | } - | - 18 | if (valid_symbols[CHAR]) { - 19 | lexer->advance(lexer, false); - 20 | lexer->get_column(lexer); - 21 | lexer->result_symbol = CHAR; - 22 | lexer->mark_end(lexer); - 23 | return true; - 24 | } - | - 25 | return false; - 26 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt: --------------------------------------------------------------------------------- -[EMPTY FILE] - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'get_col_should_hang_not_crash', - | - 3 | externals: $ => [ - 4 | $.test, - 5 | ], - | - 6 | rules: { - 7 | source_file: $ => seq( - 8 | $.test - 9 | ), - 10 | }, - 11 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | unsigned tree_sitter_get_col_should_hang_not_crash_external_scanner_serialize() { return 0; } - | - 3 | void tree_sitter_get_col_should_hang_not_crash_external_scanner_deserialize() {} - | - 4 | void *tree_sitter_get_col_should_hang_not_crash_external_scanner_create() { return NULL; } - | - 5 | void tree_sitter_get_col_should_hang_not_crash_external_scanner_destroy() {} - | - 6 | bool tree_sitter_get_col_should_hang_not_crash_external_scanner_scan(void *payload, TSLexer *lexer, - 7 | const bool *valid_symbols) { - 8 | while (true) { - 9 | lexer->advance(lexer, false); - 10 | lexer->get_column(lexer); - 11 | } - 12 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/immediate_tokens/corpus.txt: --------------------------------------------------------------------------------- - 1 | =============================== - 2 | prefix expressions as arguments - 3 | =============================== - | - 4 | a ::b ::c - | - 5 | --- - | - 6 | (program - 7 | (call - 8 | (call - 9 | (identifier) - 10 | (prefix (identifier))) - 11 | (prefix (identifier)))) - | - 12 | =============================== - 13 | infix expressions - 14 | =============================== - | - 15 | a::b::c - | - 16 | --- - | - 17 | (program - 18 | (infix - 19 | (infix - 20 | (identifier) - 21 | (identifier)) - 22 | (identifier))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/immediate_tokens/grammar.js: --------------------------------------------------------------------------------- - 1 | // This grammar demonstrates the usage of the IMMEDIATE_TOKEN rule. It allows the parser to produce - 2 | // a different token based on whether or not there are `extras` preceding the token's main content. - 3 | // When there are *no* leading `extras`, an immediate token is preferred over a normal token which - 4 | // would otherwise match. - | - 5 | export default grammar({ - 6 | name: "immediate_tokens", - | - 7 | extras: $ => [/\s/], - | - 8 | rules: { - 9 | program: $ => $._expression, - | - 10 | _expression: $ => choice( - 11 | $.call, - 12 | $.infix, - 13 | $.prefix, - 14 | $.identifier, - 15 | ), - | - 16 | call: $ => prec.left(-1, seq( - 17 | $._expression, - 18 | $._expression, - 19 | )), - | - 20 | prefix: $ => seq( - 21 | '::', - 22 | $.identifier, - 23 | ), - | - 24 | infix: $ => seq( - 25 | $._expression, - 26 | token.immediate('::'), - 27 | $.identifier, - 28 | ), - | - 29 | identifier: $ => /[a-z]+/ - 30 | } - 31 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/indirect_recursion_in_transitions/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Grammar contains an indirectly recursive rule: type_expression -> _expression -> identifier_expression -> type_expression - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/indirect_recursion_in_transitions/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'indirect_recursive_in_single_symbol_transitions', - 3 | rules: { - 4 | source_file: $ => repeat($._statement), - | - 5 | _statement: $ => seq($.initialization_part, $.type_expression), - | - 6 | type_expression: $ => choice('int', $._expression), - | - 7 | initialization_part: $ => seq('=', $._expression), - | - 8 | _expression: $ => choice($.identifier_expression, $.type_expression), - | - 9 | identifier_expression: $ => choice(/[a-zA-Z_][a-zA-Z0-9_]*/, $.type_expression), - 10 | } - 11 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inline_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ================================== - 2 | Expressions - 3 | ================================== - | - 4 | 1 + 2 * 3; - 5 | 4 * 5 + 6; - 6 | 7 * (8 + 9); - | - 7 | --- - | - 8 | (program - 9 | (statement (sum - 10 | (number) - 11 | (product (number) (number)))) - 12 | (statement (sum - 13 | (product (number) (number)) - 14 | (number))) - 15 | (statement (product - 16 | (number) - 17 | (parenthesized_expression (sum (number) (number)))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inline_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "inline_rules", - | - 3 | extras: $ => [/\s/], - | - 4 | inline: $ => [$.expression], - | - 5 | rules: { - 6 | program: $ => repeat1($.statement), - 7 | statement: $ => seq($.expression, ";"), - 8 | expression: $ => choice( - 9 | $.sum, - 10 | $.product, - 11 | $.number, - 12 | $.parenthesized_expression, - 13 | ), - 14 | parenthesized_expression: $ => seq("(", $.expression, ")"), - 15 | sum: $ => prec.left(seq($.expression, "+", $.expression)), - 16 | product: $ => prec.left(2, seq($.expression, "*", $.expression)), - 17 | number: $ => /\d+/, - 18 | } - 19 | }) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inlined_aliased_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ====================================== - 2 | Method calls - 3 | ====================================== - | - 4 | a.b(c(d.e)); - | - 5 | --- - | - 6 | (statement - 7 | (call_expression - 8 | (member_expression - 9 | (variable_name) - 10 | (property_name)) - 11 | (call_expression - 12 | (variable_name) - 13 | (member_expression - 14 | (variable_name) - 15 | (property_name))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inlined_aliased_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "inlined_aliased_rules", - | - 3 | extras: $ => [/\s/], - | - 4 | inline: $ => [$.expression], - | - 5 | rules: { - 6 | statement: $ => seq($.expression, ";"), - | - 7 | expression: $ => - 8 | choice( - 9 | $.call_expression, - 10 | $.member_expression, - 11 | alias($.identifier, $.variable_name), - 12 | ), - | - 13 | call_expression: $ => prec.left(seq($.expression, "(", $.expression, ")")), - | - 14 | member_expression: $ => - 15 | prec.left( - 16 | 1, - 17 | seq($.expression, ".", alias($.identifier, $.property_name)), - 18 | ), - | - 19 | identifier: $ => /[a-z]+/, - 20 | }, - 21 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inlined_aliased_rules/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar shows that a rule marked as `inline` can *contain* a `ALIAS` rule. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inverted_external_token/corpus.txt: --------------------------------------------------------------------------------- - 1 | ======================== - 2 | Expressions on one line - 3 | ========================= - | - 4 | a - 5 | b - 6 | .c - 7 | d - 8 | .e - 9 | .f - | - 10 | --- - | - 11 | (program - 12 | (statement (identifier) (line_break)) - 13 | (statement (member_expression (identifier) (identifier)) (line_break)) - 14 | (statement (member_expression (member_expression (identifier) (identifier)) (identifier)) (line_break))) - | - 15 | ===================================== - 16 | Line breaks followed by whitespace - 17 | ===================================== - | - 18 | a - 19 | b - 20 | c - | - 21 | --- - | - 22 | (program - 23 | (statement (identifier) (line_break)) - 24 | (statement (identifier) (line_break)) - 25 | (statement (identifier) (line_break))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inverted_external_token/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "inverted_external_token", - | - 3 | externals: $ => [$.line_break], - | - 4 | extras: $ => [/\s/], - | - 5 | rules: { - 6 | program: $ => repeat($.statement), - 7 | statement: $ => seq($._expression, $.line_break), - 8 | _expression: $ => choice($.identifier, $.member_expression), - 9 | member_expression: $ => prec.left(seq($._expression, ".", $.identifier)), - 10 | identifier: $ => /[a-z]+/, - 11 | }, - 12 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inverted_external_token/readme.md: --------------------------------------------------------------------------------- - 1 | This language has an external scanner that calls `lexer->advance(lexer, true)` (in order to skip whitespace) *after* having called `lexer->mark_end(lexer)`. This tests an edge case in the parser's handling of token start and end positions. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/inverted_external_token/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/parser.h" - | - 2 | enum { - 3 | LINE_BREAK - 4 | }; - | - 5 | void *tree_sitter_inverted_external_token_external_scanner_create() { return NULL; } - | - 6 | void tree_sitter_inverted_external_token_external_scanner_destroy(void *payload) {} - | - 7 | unsigned tree_sitter_inverted_external_token_external_scanner_serialize( - 8 | void *payload, - 9 | char *buffer - 10 | ) { return true; } - | - 11 | void tree_sitter_inverted_external_token_external_scanner_deserialize( - 12 | void *payload, - 13 | const char *buffer, - 14 | unsigned length - 15 | ) {} - | - 16 | bool tree_sitter_inverted_external_token_external_scanner_scan( - 17 | void *payload, - 18 | TSLexer *lexer, - 19 | const bool *valid_symbols - 20 | ) { - 21 | while (lexer->lookahead == ' ' || lexer->lookahead == '\r') { - 22 | lexer->advance(lexer, true); - 23 | } - | - 24 | if (lexer->lookahead == '\n') { - 25 | lexer->advance(lexer, false); - | - 26 | // Mark the end of the line break token. - 27 | lexer->mark_end(lexer); - | - 28 | // Skip whitespace *after* having marked the end. - 29 | while (lexer->lookahead == ' ' || lexer->lookahead == '\n' || lexer->lookahead == '\r') { - 30 | lexer->advance(lexer, true); - 31 | } - | - 32 | if (lexer->lookahead != '.') { - 33 | lexer->result_symbol = LINE_BREAK; - 34 | return true; - 35 | } - 36 | } - | - 37 | return false; - 38 | } - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/invisible_start_rule/expected_error.txt: --------------------------------------------------------------------------------- - 1 | A grammar's start rule must be visible. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/invisible_start_rule/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "invisible_start_rule", - 3 | rules: { - 4 | _value: $ => choice($.a, $.b), - 5 | a: $ => "a", - 6 | b: $ => "b", - 7 | }, - 8 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/corpus.txt: --------------------------------------------------------------------------------- - 1 | ======================== - 2 | regexes - 3 | ======================== - | - 4 | /a+/ - | - 5 | --- - | - 6 | (expression (regex)) - | - 7 | ======================== - 8 | conditionals - 9 | ======================== - | - 10 | (if (1) /a+/) - | - 11 | --- - | - 12 | (expression (parenthesized (expression (conditional - 13 | (parenthesized (expression (number))) - 14 | (expression (regex)))))) - | - 15 | ======================== - 16 | quotients - 17 | ======================== - | - 18 | ((1) / 2) - | - 19 | --- - | - 20 | (expression (parenthesized (expression (quotient - 21 | (expression (parenthesized (expression (number)))) - 22 | (expression (number)))))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'lexical_conflicts_due_to_state_merging', - | - 3 | rules: { - 4 | expression: $ => choice( - 5 | $.conditional, - 6 | $.quotient, - 7 | $.regex, - 8 | $.number, - 9 | $.parenthesized, - 10 | ), - | - 11 | conditional: $ => prec.left(1, seq( - 12 | 'if', - 13 | $.parenthesized, - 14 | $.expression - 15 | )), - | - 16 | quotient: $ => prec.left(seq( - 17 | $.expression, - 18 | '/', - 19 | $.expression - 20 | )), - | - 21 | regex: $ => /\/[^/\n]+\//, - | - 22 | number: $ => /\d+/, - | - 23 | parenthesized: $ => seq('(', $.expression, ')'), - 24 | }, - 25 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar has two tokens, `regex` and `/`, which conflict: when a `/` character is encountered, the lexer can't tell if it is part of a `/` token or a `regex` by looking ahead only one character. But because these tokens are never valid in the same position, this doesn't cause any problem. - | - 2 | When merging similar parse states in order to reduce the size of the parse table, it is important that we avoid merging states in a way that causes these two tokens to both appear as valid lookahead symbols in a given state. - | - 3 | If we weren't careful, this grammar would cause that to happen, because a `regex` is valid in this state: - | - 4 | ``` - 5 | (if (1) /\w+/) - 6 | ^ - 7 | ``` - | - 8 | and a `/` is valid in this state: - | - | - 9 | ``` - 10 | ((1) / 2) - 11 | ^ - 12 | ``` - | - 13 | And these two states would otherwise be candidates for merging, because they both contain only the action `reduce(parenthesized, 3)`. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/named_precedences/corpus.txt: --------------------------------------------------------------------------------- - 1 | ============= - 2 | Declarations - 3 | ============= - | - 4 | A||B c = d; - 5 | E.F g = h; - | - 6 | ============= - 7 | Expressions - 8 | ============= - | - 9 | a || b.c; - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/named_precedences/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'named_precedences', - | - 3 | conflicts: $ => [ - 4 | [$.expression, $.type], - 5 | [$.expression, $.nested_type], - 6 | ], - | - 7 | precedences: $ => [ - 8 | [$.member_expression, "and", "or"], - 9 | [$.nested_type, "type_intersection", "type_union"], - 10 | ], - | - 11 | rules: { - 12 | program: $ => repeat(choice( - 13 | $.expression_statement, - 14 | $.declaration_statement, - 15 | )), - | - 16 | expression_statement: $ => seq($.expression, ';'), - | - 17 | declaration_statement: $ => seq($.type, $.expression, ';'), - | - 18 | expression: $ => choice( - 19 | $.member_expression, - 20 | $.binary_expression, - 21 | $.identifier, - 22 | ), - | - 23 | member_expression: $ => seq($.expression, '.', $.identifier), - | - 24 | binary_expression: $ => choice( - 25 | prec.left('or', seq($.expression, '||', $.expression)), - 26 | prec.left('and', seq($.expression, '&&', $.expression)), - 27 | ), - | - 28 | type: $ => choice($.nested_type, $.binary_type, $.identifier), - | - 29 | nested_type: $ => seq($.identifier, '.', $.identifier), - | - 30 | binary_type: $ => choice( - 31 | prec.left('type_union', seq($.type, '||', $.type)), - 32 | prec.left('type_intersection', seq($.type, '&&', $.type)), - 33 | ), - | - 34 | identifier: $ => /[a-z]\w+/, - 35 | }, - 36 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/named_precedences/readme.txt: --------------------------------------------------------------------------------- - 1 | This grammar uses named precedences, which have a partial order specified via the grammar's `precedences` field. Named - 2 | precedences allow certain conflicts to be resolved statically without accidentally resolving *other* conflicts, which - 3 | are intended to be resolved dynamically. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/corpus.txt: --------------------------------------------------------------------------------- - 1 | ================================================ - 2 | Named rules that are aliased as anonymous tokens - 3 | ================================================ - | - 4 | B C B - | - 5 | --- - | - 6 | (a (c) (b)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'named_rule_aliased_as_anonymous', - | - 3 | rules: { - 4 | a: $ => seq( - 5 | alias($.b, 'the-alias'), - 6 | $.c, - 7 | $.b, - 8 | ), - | - 9 | b: _ => 'B', - | - 10 | c: _ => 'C', - 11 | }, - 12 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar checks that if a named node is aliased as an anonymous node (e.g. `alias($.foo, 'bar')`), then the rule will behave like an anonymous node. In particular, it will not show up in the tree's S-expression representation. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/nested_inlined_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ================================== - 2 | Statements - 3 | ================================== - | - 4 | return 1; - 5 | return 2; - | - 6 | --- - | - 7 | (program - 8 | (return_statement (number)) - 9 | (return_statement (number))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/nested_inlined_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'nested_inlined_rules', - | - 3 | inline: $ => [ - 4 | $.top_level_item, - 5 | $.statement, - 6 | ], - | - 7 | rules: { - 8 | program: $ => repeat1($.top_level_item), - | - 9 | top_level_item: $ => choice($.statement, '!'), - | - 10 | statement: $ => choice($.expression_statement, $.return_statement), - | - 11 | return_statement: $ => seq('return', $.number, ';'), - | - 12 | expression_statement: $ => seq($.number, ';'), - | - 13 | number: _ => /\d+/, - 14 | }, - 15 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/nested_inlined_rules/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar demonstrates that you can have an inlined rule that contains another inlined rule. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/next_sibling_from_zwt/corpus.txt: --------------------------------------------------------------------------------- - 1 | =========================== - 2 | missing c node - 3 | =========================== - | - 4 | abdef - | - 5 | --- - | - 6 | (source - 7 | (MISSING "c")) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/next_sibling_from_zwt/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: "next_sibling_from_zwt", - 3 | extras: $ => [ - 4 | /\s|\\\r?\n/, - 5 | ], - | - 6 | rules: { - 7 | source: $ => seq( - 8 | 'a', - 9 | $._bc, - 10 | 'd', - 11 | 'e', - 12 | 'f', - 13 | ), - | - 14 | _bc: $ => seq( - 15 | 'b', - 16 | 'c', - 17 | ), - 18 | } - 19 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Unresolved conflict for symbol sequence: - | - 2 | '!' expression • '<' … - | - 3 | Possible interpretations: - | - 4 | 1: (unary_a '!' expression) • '<' … (precedence: 2) - 5 | 2: (unary_b '!' expression) • '<' … (precedence: 2) - | - 6 | Possible resolutions: - | - 7 | 1: Specify a higher precedence in `unary_a` than in the other rules. - 8 | 2: Specify a higher precedence in `unary_b` than in the other rules. - 9 | 3: Add a conflict for these rules: `unary_a`, `unary_b` - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/partially_resolved_conflict/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'partially_resolved_conflict', - | - 3 | rules: { - 4 | expression: $ => choice($.binary, $.identifier), - | - 5 | unary_a: $ => prec(2, seq('!', $.expression)), - | - 6 | unary_b: $ => prec(2, seq('!', $.expression)), - | - 7 | binary: $ => seq( - 8 | choice($.unary_a, $.unary_b, $.expression), - 9 | '<', - 10 | $.expression, - 11 | ), - | - 12 | identifier: _ => /[a-z]+/, - 13 | }, - 14 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/partially_resolved_conflict/readme.txt: --------------------------------------------------------------------------------- - 1 | This grammar has a conflict with three possible actions: a shift in the middle of the `binary` rule and two reductions: one for `unary_a` and one for `unary_b`. Both `unary_a` and `unary_b` have a higher precedence than `binary`, therefore we can rule out the interpretation where a `binary` occurs *inside* of a `unary_a` or `unary_b`, so the error message (and suggested `conflict`) should not include that interpretation. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt: --------------------------------------------------------------------------------- - 1 | Unresolved conflict for symbol sequence: - | - 2 | identifier identifier • '{' … - | - 3 | Possible interpretations: - | - 4 | 1: identifier (expression identifier) • '{' … - 5 | 2: identifier (function_call identifier • block) (precedence: 0, associativity: Right) - | - 6 | Possible resolutions: - | - 7 | 1: Specify a higher precedence in `function_call` than in the other rules. - 8 | 2: Specify a higher precedence in `expression` than in the other rules. - 9 | 3: Specify a left or right associativity in `expression` - 10 | 4: Add a conflict for these rules: `expression`, `function_call` - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'precedence_on_single_child_missing', - | - 3 | rules: { - 4 | expression: $ => choice($.function_call, $.identifier), - | - 5 | function_call: $ => prec.right(choice( - 6 | seq($.identifier, $.expression), - 7 | seq($.identifier, $.block), - 8 | seq($.identifier, $.expression, $.block), - 9 | )), - | - 10 | block: $ => seq('{', $.expression, '}'), - | - 11 | identifier: _ => /[a-zA-Z]+/, - 12 | }, - 13 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_missing/readme.md: --------------------------------------------------------------------------------- - 1 | This language has function calls similar to Ruby's, with no parentheses required, and optional blocks. - | - 2 | There is a shift/reduce conflict here: - | - 3 | ``` - 4 | foo bar { baz } - 5 | ^ - 6 | ``` - | - 7 | The possible actions are: - | - 8 | 1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function. - 9 | 2. `shift` - `bar` is a function being called with the block `{ baz }` - | - 10 | The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt: --------------------------------------------------------------------------------- - 1 | =========================== - 2 | function calls with blocks - 3 | =========================== - | - 4 | foo bar { baz } - | - 5 | --- - | - 6 | (expression (function_call - 7 | (identifier) - 8 | (expression (identifier)) - 9 | (block (expression (identifier))))) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'precedence_on_single_child_negative', - | - 3 | rules: { - 4 | expression: $ => choice($.function_call, $.identifier), - | - 5 | function_call: $ => prec.right(-1, choice( - 6 | seq($.identifier, $.expression), - 7 | seq($.identifier, $.block), - 8 | seq($.identifier, $.expression, $.block), - 9 | )), - | - 10 | block: $ => seq('{', $.expression, '}'), - | - 11 | identifier: _ => /[a-zA-Z]+/, - 12 | }, - 13 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_negative/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt: --------------------------------------------------------------------------------- - 1 | =========================== - 2 | function calls with blocks - 3 | =========================== - | - 4 | foo bar { baz } - | - 5 | --- - | - 6 | (expression (function_call - 7 | (identifier) - 8 | (expression (function_call - 9 | (identifier) - 10 | (block (expression (identifier))))))) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'precedence_on_single_child_positive', - | - 3 | rules: { - 4 | expression: $ => choice($.function_call, $.identifier), - | - 5 | function_call: $ => prec.right(1, choice( - 6 | seq($.identifier, $.expression), - 7 | seq($.identifier, $.block), - 8 | seq($.identifier, $.expression, $.block), - 9 | )), - | - 10 | block: $ => seq('{', $.expression, '}'), - | - 11 | identifier: _ => /[a-zA-X]+/, - 12 | }, - 13 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_single_child_positive/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_subsequence/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================================== - 2 | curly brace blocks with high precedence - 3 | ========================================== - | - 4 | a b {} - | - 5 | --- - | - 6 | (expression (function_call - 7 | (identifier) - 8 | (expression (function_call (identifier) (block))))) - | - 9 | ========================================== - 10 | do blocks with low precedence - 11 | ========================================== - | - 12 | a b do end - | - 13 | --- - | - 14 | (expression (function_call - 15 | (identifier) - 16 | (expression (identifier)) - 17 | (do_block))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_subsequence/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'precedence_on_subsequence', - | - 3 | rules: { - 4 | expression: $ => prec.left(choice( - 5 | $.function_call, - 6 | $.identifier, - 7 | $.scope_resolution, - 8 | )), - | - 9 | function_call: $ => choice( - 10 | seq($.identifier, $.expression), - 11 | prec(1, seq($.identifier, $.block)), - 12 | prec(-1, seq($.identifier, $.do_block)), - 13 | seq($.identifier, prec(1, seq($.expression, $.block))), - 14 | seq($.identifier, prec(-1, seq($.expression, $.do_block))), - 15 | ), - | - 16 | scope_resolution: $ => prec.left(1, choice( - 17 | seq($.expression, '::', $.expression), - 18 | seq('::', $.expression), - 19 | )), - | - 20 | block: _ => '{}', - | - 21 | do_block: _ => 'do end', - | - 22 | identifier: _ => /[a-zA-Z]+/, - 23 | }, - 24 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_token/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================================== - 2 | obvious tokens - 3 | ========================================== - | - 4 | // hi - 5 | /* hi */ - 6 | hi - 7 | / - 8 | "hi" - 9 | /hi/ - | - 10 | --- - | - 11 | (program - 12 | (comment) - 13 | (comment) - 14 | (identifier) - 15 | (slash) - 16 | (string) - 17 | (regex)) - | - 18 | ========================================== - 19 | strings starting with double slashes - 20 | ========================================== - | - 21 | /* - 22 | The lexer matches the string content correctly even though - 23 | a comment could match all the way until the end of the line, - 24 | because the string content token has a higher precedence - 25 | than the comment token. - 26 | */ - | - 27 | "//one\n//two" - | - 28 | --- - | - 29 | (program - 30 | (comment) - 31 | (string (escape_sequence))) - | - 32 | ========================================== - 33 | comments that resemble regexes - 34 | ========================================== - | - 35 | /* - 36 | The lexer matches this as a comment followed by an identifier - 37 | even though a regex token could match the entire thing, because - 38 | the comment token has a higher precedence than the regex token - 39 | */ - | - 40 | /* hello */ui - | - 41 | --- - | - 42 | (program - 43 | (comment) - 44 | (comment) - 45 | (identifier)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_token/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'precedence_on_token', - | - 3 | extras: $ => [ - 4 | /\s/, - 5 | $.comment, - 6 | ], - | - 7 | rules: { - 8 | program: $ => repeat(choice( - 9 | $.string, - 10 | $.regex, - 11 | $.identifier, - 12 | $.slash, - 13 | )), - | - 14 | comment: _ => token(prec(1, /\/\/.*|\/\*[^*]*\*\//)), - | - 15 | string: $ => seq( - 16 | '"', - 17 | repeat(choice( - 18 | token(prec(2, /[^\"\n\\]+/)), - 19 | $.escape_sequence, - 20 | )), - 21 | '"', - 22 | ), - | - 23 | escape_sequence: _ => /\\./, - | - 24 | regex: _ => /\/[^\/\n]+\/[a-z]*/, - | - 25 | identifier: _ => /[a-z]\w*/, - | - 26 | slash: _ => '/', - 27 | }, - 28 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/precedence_on_token/readme.md: --------------------------------------------------------------------------------- - 1 | This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string. - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/readme_grammar/corpus.txt: --------------------------------------------------------------------------------- - 1 | ================================== - 2 | the readme example - 3 | ================================== - | - 4 | a + b * c - | - 5 | --- - | - 6 | (expression (sum - 7 | (expression (variable)) - 8 | (expression (product - 9 | (expression (variable)) - 10 | (expression (variable)))))) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/readme_grammar/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'readme_grammar', - | - 3 | // Things that can appear anywhere in the language, like comments - 4 | // and whitespace, are expressed as 'extras'. - 5 | extras: $ => [ - 6 | /\s/, - 7 | $.comment, - 8 | ], - | - 9 | rules: { - 10 | // The first rule listed in the grammar becomes the 'start rule'. - 11 | expression: $ => choice( - 12 | $.sum, - 13 | $.product, - 14 | $.number, - 15 | $.variable, - 16 | seq('(', $.expression, ')'), - 17 | ), - | - 18 | // Tokens like '+' and '*' are described directly within the - 19 | // grammar's rules, as opposed to in a separate lexer description. - 20 | sum: $ => prec.left(1, seq($.expression, '+', $.expression)), - | - 21 | // Ambiguities can be resolved at compile time by assigning precedence - 22 | // values to rule subtrees. - 23 | product: $ => prec.left(2, seq($.expression, '*', $.expression)), - | - 24 | // Tokens can be specified using ECMAScript regexps. - 25 | number: _ => /\d+/, - | - 26 | comment: _ => /#.*/, - | - 27 | variable: _ => new RustRegex('(?i:[a-z])\\w*'), - 28 | }, - 29 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/readme.md: --------------------------------------------------------------------------------- - 1 | These small grammars demonstrate specific features or test for certain specific regressions. - | - 2 | For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/reserved_words/corpus.txt: --------------------------------------------------------------------------------- - 1 | ============== - 2 | Valid Code - 3 | ============== - | - 4 | if (a) { - 5 | var b = { - 6 | c: d, - 7 | e: f, - 8 | }; - 9 | while (g) { - 10 | h(); - 11 | } - 12 | } - | - 13 | --- - | - 14 | (program - 15 | (if_statement - 16 | (parenthesized_expression (identifier)) - 17 | (block - 18 | (var_declaration - 19 | (identifier) - 20 | (object - 21 | (pair (identifier) (identifier)) - 22 | (pair (identifier) (identifier)))) - 23 | (while_statement - 24 | (parenthesized_expression (identifier)) - 25 | (block (expression_statement (call_expression (identifier)))))))) - | - 26 | ================================================ - 27 | Error detected at globally-reserved word - 28 | ================================================ - | - 29 | var a = - | - 30 | if (something) { - 31 | c(); - 32 | } - | - 33 | --- - | - 34 | (program - 35 | (ERROR (identifier)) - 36 | (if_statement - 37 | (parenthesized_expression (identifier)) - 38 | (block - 39 | (expression_statement (call_expression (identifier)))))) - | - 40 | ================================================ - 41 | Object keys that are reserved in other contexts - 42 | ================================================ - | - 43 | var x = { - 44 | if: a, - 45 | while: b, - 46 | }; - | - 47 | --- - | - 48 | (program - 49 | (var_declaration - 50 | (identifier) - 51 | (object - 52 | (pair (identifier) (identifier)) - 53 | (pair (identifier) (identifier))))) - | - 54 | ================================================ - 55 | Error detected at context-specific reserved word - 56 | ================================================ - | - 57 | var x = { - 58 | var y = z; - | - 59 | --- - | - 60 | (program - 61 | (ERROR (identifier)) - | - 62 | ; Important - var declaration is still recognized, - 63 | ; because in this example grammar, `var` is a keyword - 64 | ; even within object literals. - 65 | (var_declaration - 66 | (identifier) - 67 | (identifier))) - | - 68 | ============================================= - 69 | Other tokens that overlap with keyword tokens - 70 | ============================================= - | - 71 | var a = /reserved-words-should-not-affect-this/; - 72 | var d = /if/; - | - 73 | --- - | - 74 | (program - 75 | (var_declaration - 76 | (identifier) - 77 | (regex (regex_pattern))) - 78 | (var_declaration - 79 | (identifier) - 80 | (regex (regex_pattern)))) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/reserved_words/grammar.js: --------------------------------------------------------------------------------- - 1 | const RESERVED_NAMES = ["if", "while", "var"]; - 2 | const RESERVED_PROPERTY_NAMES = ["var"]; - | - 3 | export default grammar({ - 4 | name: "reserved_words", - | - 5 | reserved: { - 6 | global: $ => RESERVED_NAMES, - 7 | property: $ => RESERVED_PROPERTY_NAMES, - 8 | }, - | - 9 | word: $ => $.identifier, - | - 10 | rules: { - 11 | program: $ => repeat($._statement), - | - 12 | block: $ => seq("{", repeat($._statement), "}"), - | - 13 | _statement: $ => choice( - 14 | $.var_declaration, - 15 | $.if_statement, - 16 | $.while_statement, - 17 | $.expression_statement, - 18 | ), - | - 19 | var_declaration: $ => seq("var", $.identifier, "=", $._expression, ";"), - | - 20 | if_statement: $ => seq("if", $.parenthesized_expression, $.block), - | - 21 | while_statement: $ => seq("while", $.parenthesized_expression, $.block), - | - 22 | expression_statement: $ => seq($._expression, ";"), - | - 23 | _expression: $ => choice( - 24 | $.identifier, - 25 | $.parenthesized_expression, - 26 | $.call_expression, - 27 | $.member_expression, - 28 | $.object, - 29 | $.regex, - 30 | ), - | - 31 | parenthesized_expression: $ => seq("(", $._expression, ")"), - | - 32 | member_expression: $ => seq($._expression, ".", $.identifier), - | - 33 | call_expression: $ => seq($._expression, "(", repeat(seq($._expression, ",")), ")"), - | - 34 | object: $ => seq("{", repeat(seq(choice($.pair, $.getter), ",")), "}"), - | - 35 | regex: $ => seq('/', $.regex_pattern, '/'), - | - 36 | regex_pattern: $ => token(prec(-1, /[^/\n]+/)), - | - 37 | pair: $ => seq(reserved('property', $.identifier), ":", $._expression), - | - 38 | getter: $ => seq( - 39 | "get", - 40 | reserved('property', $.identifier), - 41 | "(", - 42 | ")", - 43 | $.block, - 44 | ), - | - 45 | identifier: $ => /[a-z_]\w*/, - 46 | }, - 47 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/start_rule_is_blank/corpus.txt: --------------------------------------------------------------------------------- - 1 | ======================== - 2 | the empty string - 3 | ======================= - | - 4 | --- - | - 5 | (first_rule) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/start_rule_is_blank/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'start_rule_is_blank', - | - 3 | rules: { - 4 | first_rule: _ => blank(), - 5 | }, - 6 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/start_rule_is_token/corpus.txt: --------------------------------------------------------------------------------- - 1 | =========================== - 2 | the single token - 3 | ========================== - 4 | the-value - 5 | --- - 6 | (first_rule) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/start_rule_is_token/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'start_rule_is_token', - | - 3 | rules: { - 4 | first_rule: _ => 'the-value', - 5 | }, - 6 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/unicode_classes/corpus.txt: --------------------------------------------------------------------------------- - 1 | =============== - 2 | Uppercase words - 3 | =============== - | - 4 | Δბㄱ Ψ Ɓƀ Ƒ Ɣ Śřř - | - 5 | --- - | - 6 | (program - 7 | (upper) (upper) (upper) (upper) (upper) (upper)) - | - 8 | ================ - 9 | Lowercase words - 10 | ================ - | - 11 | śś ťť ßß - | - 12 | --- - | - 13 | (program - 14 | (lower) (lower) (lower)) - | - 15 | ================ - 16 | Math symbols - 17 | ================ - | - 18 | ≺ ≼ ≠ ≝ ⨔∑ - | - 19 | --- - | - 20 | (program - 21 | (math_sym) (math_sym) (math_sym) (math_sym) (math_sym)) - | - 22 | ================================ - 23 | Letterlike numeric characters - 24 | ================================ - | - 25 | ᛯ Ⅵ 〩 - | - 26 | --- - | - 27 | (program - 28 | (letter_number) (letter_number) (letter_number)) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/unicode_classes/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'unicode_classes', - | - 3 | rules: { - 4 | program: $ => repeat(choice( - 5 | $.lower, - 6 | $.upper, - 7 | $.math_sym, - 8 | $.letter_number, - 9 | )), - | - 10 | lower: _ => /\p{Ll}\p{L}*/, - | - 11 | upper: _ => /\p{Lu}\p{L}*/, - | - 12 | math_sym: _ => /\p{Sm}+/, - | - 13 | letter_number: _ => /\p{Letter_Number}/, - 14 | }, - 15 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/unused_rules/corpus.txt: --------------------------------------------------------------------------------- - 1 | ========================= - 2 | the language - 3 | ========================= - | - 4 | E F I J - | - 5 | --- - | - 6 | (a (d (e) (f)) (h (i) (j))) - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/unused_rules/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'unused_rules', - | - 3 | rules: { - 4 | a: $ => seq($.d, $.h), - | - 5 | b: _ => 'B', - | - 6 | c: _ => 'C', - | - 7 | d: $ => seq($.e, $.f), - | - 8 | e: _ => 'E', - | - 9 | f: _ => 'F', - | - 10 | g: _ => 'G', - | - 11 | h: $ => seq($.i, $.j), - | - 12 | i: _ => 'I', - | - 13 | j: _ => 'J', - | - 14 | k: _ => 'K', - 15 | }, - 16 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/unused_rules/readme.md: --------------------------------------------------------------------------------- - 1 | The generated parsers use the grammar's token count to distinguish between terminal and non-terminal symbols. When the grammar has unused tokens, these tokens don't appear in the parser, so they need to be omitted from the token count. - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/uses_current_column/corpus.txt: --------------------------------------------------------------------------------- - 1 | =============== - 2 | Simple blocks - 3 | =============== - | - 4 | do a - 5 | e - 6 | f - | - 7 | --- - | - 8 | (block - 9 | (do_expression (block - 10 | (identifier) - 11 | (identifier))) - 12 | (identifier)) - | - 13 | ===================== - 14 | Nested blocks - 15 | ===================== - | - 16 | a = do b - 17 | c + do e - 18 | f - 19 | g - 20 | h - 21 | i - | - 22 | --- - | - 23 | (block - 24 | (binary_expression - 25 | (identifier) - 26 | (do_expression (block - 27 | (identifier) - 28 | (binary_expression - 29 | (identifier) - 30 | (do_expression (block - 31 | (identifier) - 32 | (identifier) - 33 | (identifier)))) - 34 | (identifier)))) - 35 | (identifier)) - | - 36 | =============================== - 37 | Blocks with leading newlines - 38 | =============================== - | - 39 | do - | - | - 40 | a = b - 41 | do - 42 | c - 43 | d - 44 | e - 45 | f - | - 46 | --- - | - 47 | (block - 48 | (do_expression (block - 49 | (binary_expression (identifier) (identifier)) - 50 | (do_expression (block - 51 | (identifier) - 52 | (identifier))) - 53 | (identifier) - 54 | (identifier)))) - | - 55 | ===================== - 56 | Unterminated blocks - 57 | ===================== - | - 58 | do - 59 | --- - | - 60 | (ERROR) - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/uses_current_column/grammar.js: --------------------------------------------------------------------------------- - 1 | export default grammar({ - 2 | name: 'uses_current_column', - | - 3 | externals: $ => [ - 4 | $._indent, - 5 | $._dedent, - 6 | $._newline, - 7 | ], - | - 8 | rules: { - 9 | block: $ => repeat1($._statement), - | - 10 | _statement: $ => seq($._expression, $._newline), - | - 11 | _expression: $ => choice( - 12 | $.do_expression, - 13 | $.binary_expression, - 14 | $.identifier, - 15 | ), - | - 16 | do_expression: $ => seq( - 17 | 'do', - 18 | $._indent, - 19 | $.block, - 20 | $._dedent, - 21 | ), - | - 22 | binary_expression: $ => prec.left(1, seq( - 23 | $._expression, - 24 | choice('=', '+', '-'), - 25 | $._expression, - 26 | )), - | - 27 | identifier: _ => /\w+/, - 28 | }, - 29 | }); - - - --------------------------------------------------------------------------------- -/test/fixtures/test_grammars/uses_current_column/scanner.c: --------------------------------------------------------------------------------- - 1 | #include "tree_sitter/alloc.h" - 2 | #include "tree_sitter/parser.h" - | - 3 | #include - 4 | #include - | - 5 | enum TokenType { - 6 | INDENT, - 7 | DEDENT, - 8 | NEWLINE, - 9 | }; - | - 10 | typedef struct { - 11 | uint8_t queued_dedent_count; - 12 | uint8_t indent_count; - 13 | int8_t indents[32]; - 14 | } Scanner; - | - 15 | void *tree_sitter_uses_current_column_external_scanner_create() { - 16 | Scanner *self = ts_malloc(sizeof(Scanner)); - 17 | self->queued_dedent_count = 0; - 18 | self->indent_count = 1; - 19 | self->indents[0] = 0; - 20 | return (void *)self; - 21 | } - | - 22 | void tree_sitter_uses_current_column_external_scanner_destroy(void *payload) { - 23 | ts_free(payload); - 24 | } - | - 25 | unsigned tree_sitter_uses_current_column_external_scanner_serialize( - 26 | void *payload, - 27 | char *buffer - 28 | ) { - 29 | Scanner *self = (Scanner *)payload; - 30 | buffer[0] = self->queued_dedent_count; - 31 | for (unsigned i = 0; i < self->indent_count; i++) { - 32 | buffer[i + 1] = self->indents[i]; - 33 | } - 34 | return self->indent_count + 1; - 35 | } - | - 36 | void tree_sitter_uses_current_column_external_scanner_deserialize( - 37 | void *payload, - 38 | const char *buffer, - 39 | unsigned length - 40 | ) { - 41 | Scanner *self = (Scanner *)payload; - 42 | if (length > 0) { - 43 | self->queued_dedent_count = buffer[0]; - 44 | self->indent_count = length - 1; - 45 | for (unsigned i = 0; i < self->indent_count; i++) { - 46 | self->indents[i] = buffer[i + 1]; - 47 | } - 48 | } else { - 49 | self->queued_dedent_count = 0; - 50 | self->indent_count = 1; - 51 | self->indents[0] = 0; - 52 | } - 53 | } - | - 54 | bool tree_sitter_uses_current_column_external_scanner_scan( - 55 | void *payload, - 56 | TSLexer *lexer, - 57 | const bool *valid_symbols - 58 | ) { - 59 | Scanner *self = (Scanner *)payload; - 60 | lexer->mark_end(lexer); - | - 61 | // If dedents were found in a previous run, and are valid now, - 62 | // then return a dedent. - 63 | if (self->queued_dedent_count > 0 && valid_symbols[DEDENT]) { - 64 | lexer->result_symbol = DEDENT; - 65 | self->queued_dedent_count--; - 66 | return true; - 67 | } - | - 68 | // If an indent is valid, then add an entry to the indent stack - 69 | // for the current column, and return an indent. - 70 | if (valid_symbols[INDENT]) { - 71 | while (iswspace(lexer->lookahead)) { - 72 | lexer->advance(lexer, false); - 73 | } - 74 | uint32_t column = lexer->get_column(lexer); - 75 | if (column > self->indents[self->indent_count - 1]) { - 76 | self->indents[self->indent_count++] = column - 2; - 77 | lexer->result_symbol = INDENT; - 78 | return true; - 79 | } else { - 80 | return false; - 81 | } - 82 | } - | - 83 | // If at the end of a statement, then get the current indent - 84 | // level and pop some number of entries off of the indent stack. - 85 | if (valid_symbols[NEWLINE] || valid_symbols[DEDENT]) { - 86 | while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') { - 87 | lexer->advance(lexer, false); - 88 | } - | - 89 | if (lexer->lookahead == '\n') { - 90 | lexer->advance(lexer, false); - | - 91 | uint32_t next_column = 0; - 92 | for (;;) { - 93 | if (lexer->lookahead == ' ') { - 94 | next_column++; - 95 | lexer->advance(lexer, false); - 96 | } else if (lexer->lookahead == '\n') { - 97 | next_column = 0; - 98 | lexer->advance(lexer, false); - 99 | } else { - 100 | break; - 101 | } - 102 | } - | - 103 | unsigned dedent_count = 0; - 104 | while (next_column < self->indents[self->indent_count - 1]) { - 105 | dedent_count++; - 106 | self->indent_count--; - 107 | } - | - 108 | if (dedent_count > 0 && valid_symbols[DEDENT]) { - 109 | lexer->result_symbol = DEDENT; - 110 | return true; - 111 | } else if (valid_symbols[NEWLINE]) { - 112 | self->queued_dedent_count += dedent_count; - 113 | lexer->result_symbol = NEWLINE; - 114 | return true; - 115 | } - 116 | } - 117 | } - | - 118 | return false; - 119 | } diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 000000000..78a6f670e --- /dev/null +++ b/evals/README.md @@ -0,0 +1,977 @@ +# cgr evaluation harness + +Scores the knowledge graph that `code-graph-rag` (cgr) builds against ground truth, with no Memgraph required (an in-memory capturing ingestor drives `GraphUpdater(...).run(force=True)`). + +## L1 — structure (containment) + +Scores cgr's definition nodes and `DEFINES`/`DEFINES_METHOD` edges against a scope-aware Python `ast` oracle. + +```bash +uv run python -m evals.cli --target codebase_rag +``` + +Writes `evals/results/scores.csv` and `evals/results/diff.json`. Node identity join is `(kind, file, start_line)`. + +## L2 — module-call attribution (ast oracle) + +Scores whether cgr attributes the right calls to the *module* (caller side). A +call runs at module-load time -- and so belongs to the module -- iff it is a +top-level statement, a decorator, or a default-argument expression, i.e. it is +NOT inside a function body. The L3 execution trace cannot measure this: it +records the innermost *function* frame as the caller and drops `` +frames, so module-level attribution is its structural blind spot. An `ast` +oracle fills it. + +```bash +uv run python -m evals.module_calls --target codebase_rag +``` + +How it works: + +- **Oracle** (`module_calls.oracle_module_calls`): walks each file's AST modelling + import-time execution. A call counts when it runs at module load: top-level + statements, list/set/dict comprehensions (eager), decorators, argument + defaults, and -- only when the file does not `from __future__ import + annotations` -- argument/return annotations. It does NOT count function/method + bodies, lambda bodies, or generator expressions (deferred until called or + consumed). Class bodies stay at module scope. It collects the simple name of + every such call whose callee is first-party (a name defined in the target), + excluding dunders. +- **cgr side** (`module_calls.cgr_module_calls`): every `CALLS` edge whose caller + is a `Module` node, keyed by `(module_file, callee_simple_name)`; a constructor + call resolved to a `Class.__init__` *method* is credited to `Class` (a bare + first-party function named `__init__` is left as a filtered dunder). +- **Score**: precision/recall over `(module_file, callee_simple_name)` edges. + +The exact-attribution guarantee is covered by `test_eval_module_calls.py` +(precision == recall == 1.0 on a controlled fixture: a top-level call, a +default-argument call, a `__main__` call, and a nested call that must NOT be +module-attributed). + +On the whole `codebase_rag` target the metric is a lower bound that surfaces two +real, separate cgr gaps (not attribution errors): + +- **Recall** is bounded by constructor calls to first-party classes with no + explicit `__init__` (NamedTuple/dataclass/pydantic) -- cgr has no method node + to point the call at, so no edge is emitted. Closing this needs constructor + calls to target the class node (tracked with the dead-code Class work). +- **Precision** is bounded by the trie suffix-match fallback occasionally + resolving a module-level call to an unrelated first-party name. + +## L3 — CALLS recall (execution-traced) + +Measures whether cgr's static `CALLS` graph contains the call edges that actually fire at runtime. + +```bash +uv run python -m evals.l3 +``` + +How it works: + +- **Static side** (`cgr_graph.extract_cgr_calls`): builds cgr's graph over the target package (default `codebase_rag`) and collects every `CALLS` edge. +- **Traced side** (`calls_trace.trace_calls`): runs cgr indexing a small fixture (`evals/results/l3_workspace/fixture/`, written by `_write_fixture`) under `sys.settrace`, recording every `(caller, callee)` where both are first-party functions in the target. This is a dynamic trace of *cgr's own code* executing — the fixture's only job is to drive cgr through diverse code paths. +- **Recall** = `|traced ∩ static| / |traced|`. `missed = traced − static` is written to `evals/results/calls_diff.json`. Two scopes are reported: *all calls* and *explicit* (excluding dunder callees). + +Because the ground truth is an execution trace, recall is a sound lower bound: it can only credit cgr for call sites the fixture actually exercises. Enriching the fixture (more Python constructs, more languages) widens coverage and is the intended way to harden the metric. + +### Decorator-wrapper normalization + +When a function is wrapped by a `functools.wraps` decorator (e.g. cgr's `@recursion_guard`), calling it dispatches at runtime through the decorator's generic inner `wrapper`, so a naive trace records two edges: + +``` +caller -> recursion_guard.decorator.wrapper # the generic wrapper frame +recursion_guard.decorator.wrapper -> the_real_method # wrapper calling func(...) +``` + +cgr's static graph instead "sees through" the decorator and records the single logical edge `caller -> the_real_method`, which is what a reader of the graph wants — the recycled `wrapper` is plumbing, not a meaningful call-graph node. + +To keep the trace and the static graph in agreement, `calls_trace._frame_qn` attributes a `wrapper` frame to the function it wraps (recovered from the wrapper's closed-over callable, following any `__wrapped__` chain). This turns `caller -> wrapper` into `caller -> the_real_method` and collapses `wrapper -> the_real_method` into a self-edge (which the tracer already drops). The decision is **normalize in the eval**, not model wrappers in cgr, so cgr's graph stays free of generic wrapper nodes. + +Covered by `codebase_rag/tests/test_l3_decorator_normalization.py`. + +## Retrieval — graph vs grep (file-level call localization) + +Answers the question raised in issue #424: does graph-augmented retrieval find +the code that calls a symbol better than plain grep? This is the retrieval layer +decoupled from any LLM, which is the measurement the GitLab GKG evaluation +([work item #224](https://gitlab.com/gitlab-org/rust/knowledge-graph/-/work_items/224)) +flagged as out of scope. (That work item, contrary to a widely repeated claim, +contains no "8% over grep" figure; its headline was an agentic SWE-bench-Lite +pass rate of roughly 6 to 7 of 23 issues. This benchmark measures retrieval +quality directly instead.) + +```bash +uv run python -m evals.retrieval --target codebase_rag +``` + +The task: for every first-party symbol `S`, find the files that call `S`. The +comparison unit is a file-level call edge `(caller_file, callee_simple_name)`, +which mirrors the GKG "did it open the right file" localization signal. Three +conditions are scored against one Python `ast` oracle over the same file and +first-party symbol universe: + +- **graph** (`retrieval.cgr_call_edges`): every cgr `CALLS`/`INSTANTIATES` edge, + reduced to its caller node's file and the callee's simple name (a constructor + resolved to `Class.__init__` is credited to `Class`, as in L2). +- **grep_name** (`retrieval.grep_call_edges`, `GrepMode.NAME`): ripgrep for the + bare symbol token `\b(name)\b`, the first thing a user reaches for. +- **grep_call** (`GrepMode.CALL`): ripgrep for the symbol followed by a paren + `\b(name)\s*\(`, a call-tuned pattern. +- **Oracle** (`retrieval.oracle_call_edges`): every `ast.Call` whose callee + simple name is first-party and non-dunder, attributed to its file. + +Requires `rg` (ripgrep) on `PATH`; `evals.retrieval` exits cleanly if it is +missing. Writes `evals/results/retrieval_scores.csv` and +`evals/results/retrieval_diff.json`. The thesis and grep's two failure modes (a +bare reference or import counts as a hit, and a definition site `def S(` is +indistinguishable from a call) are pinned by +`codebase_rag/tests/test_retrieval_eval.py`. + +Both grep conditions reach recall 1.0 by construction: the oracle is itself +name-based, so any called name is present textually and grep cannot miss it. The +entire story is therefore precision, which is exactly where the resolved graph +wins. Graph recall below 1.0 reflects the few call edges cgr does not resolve; +graph false positives are call edges cgr emits that the pure-`ast` notion of a +call does not see (worth a look, but a small fraction). + +## Incremental update — incremental vs clean re-index + +Answers a correctness question the other layers cannot: after cgr re-indexes only +the files that changed, does the resulting graph still equal a clean full +re-index of the same tree? Incremental indexing is where a knowledge graph +silently rots, so the clean re-index is the oracle and any divergence is a real +bug. + +```bash +uv run python -m evals.incremental --target codebase_rag --sample 25 +``` + +The probe is a semantically neutral edit: a trailing comment is appended to one +file, changing its hash (so cgr treats it as modified) without changing its AST +(so a clean re-index of the edited tree is identical to the original). For each +sampled file the harness indexes a fresh copy, applies the neutral edit, runs an +incremental update, then compares the mutated graph node for node and edge for +edge against a clean forced re-index of the identical on-disk state. + +The comparison runs against a faithful in-memory store (`cgr_graph._StatefulIngestor`) +that implements the exact delete and fetch Cypher the incremental updater issues +(`DETACH DELETE` of a changed file's `Module` subtree, file and folder deletes, +orphan-external pruning, and the prune path queries), so deletions take real +effect rather than being mocked away. The store's semantics are pinned by +`codebase_rag/tests/test_incremental_eval.py`; the same suite also pins the +runner's requirement to purge any pre-existing hash cache copied from the source +tree, without which the baseline index would skip every file. + +What it surfaced and drove a fix for: +[issue #532](https://github.com/vitali87/code-graph-rag/issues/532). Editing a +file `DETACH`-deletes its `Module` subtree, including the reference edges incident +on its functions. The eval showed the loss was broader than the issue recorded: +**inbound** `CALLS`/`IMPORTS`/`INSTANTIATES` from unchanged callers were deleted +and never rebuilt (the callers are not reprocessed), and a fresh incremental run +also rebuilt the function registry from changed files only, so even the changed +file's **outbound** calls to symbols defined in unchanged files were dropped. The +fix, verified by this eval, has two parts: + +- **Inbound** edges are captured before deletion and restored verbatim (rather + than re-resolved, which would diverge: cgr resolution is context-sensitive). +- **Outbound** resolution rehydrates the function registry from the persisted + graph so calls into unchanged files resolve again. + +Residual divergence is confined to the changed file's own calls resolved through +type inference / protocol dispatch (e.g. `self.x.method()`), which need the full +cross-file type context that a single-file reprocess does not rebuild; this is +documented as a deeper follow-on, not a regression. Writes +`evals/results/incremental_scores.csv` and `evals/results/incremental_diff.json`. + +Inbound capture is intentionally scoped to re-indexed files (changed, **not** new +or deleted), because a re-indexed file keeps its module qualified name, so the +restore target still exists after reprocessing. Moved or renamed files are not +captured by design: the old path is deleted and the new path is new, so an +unchanged caller's import of the old name no longer resolves, exactly as in a +clean re-index, and dropping that now-dangling edge is correct. Restoring edges +for a vanished module qn would instead fabricate a phantom module node, so the +scoping is the safe choice rather than a gap. A transparently re-exported rename +(old name still resolves) is the one narrow case left to a clean re-index. + +## Import resolution — internal vs external classification + +The structural L1 above grades internal `IMPORTS` edges by their resolved target +file. It does not check how cgr classifies the *other* imports: stdlib and +third-party. This eval does, against an `ast` plus filesystem oracle, to surface +internal/external misclassification (the shape of +[issue #498](https://github.com/vitali87/code-graph-rag/issues/498)). + +```bash +uv run python -m evals.import_resolution --target codebase_rag +``` + +The comparison unit is `(importing_file, top_level_package, is_external)`. Both +sides reduce an import to its top-level package name the same way (`import +numpy.linalg` and `from numpy import x` both reduce to `numpy`), and both decide +internal versus external by whether that top level is the project package, so the +oracle is independent of cgr's own resolver. cgr models an external import as a +`Module` node flagged `is_external=True` linked by `IMPORTS` (it does not emit +`ExternalPackage`/`DEPENDS_ON_EXTERNAL` for code-level imports), so the eval reads +the flag off each `IMPORTS` target. `from __future__ import ...` is a compiler +directive rather than a dependency and is excluded on both sides (a calibration +the tests pin). Writes `evals/results/imports_scores.csv` and +`evals/results/imports_diff.json`; the oracle and the misclassification signal +are pinned by `codebase_rag/tests/test_import_resolution_eval.py`. + +## Inheritance — resolved INHERITS and OVERRIDES + +The structural L1 grades `INHERITS` by the base's simple *name*. This eval grades +two deeper things against an `ast` oracle: that cgr resolves a base to the correct +first-party class qualified name (`INHERITS` target), and that method overrides +are attributed to the right base class (`OVERRIDES`). + +```bash +uv run python -m evals.inheritance --target codebase_rag +``` + +The oracle resolves a base only when it is unambiguous: defined in the same module +or imported via `from import `. Attribute bases (`pkg.Base`), +star-imported, and external bases are skipped and counted, never guessed. Two +deliberate scope limits keep the oracle honest rather than noisy: + +- **INHERITS** is graded only for top-level classes (the universe the oracle + enumerates); cgr edges whose subclass is a class nested inside a function are + not graded against an oracle that never saw them. +- **OVERRIDES** is graded only for single-inheritance classes, where "which base + does method `m` override" is unambiguous. Multi-base mixin classes are excluded + on both sides, because the answer there is decided by the MRO, which this ast + oracle does not model. + +Writes `evals/results/inheritance_scores.csv` and +`evals/results/inheritance_diff.json`; pinned by +`codebase_rag/tests/test_inheritance_eval.py`. + +## Instantiation — file-level INSTANTIATES + +The retrieval eval folds class instantiation into its `CALLS` localization (a +constructor call resolves to `Class.__init__`, credited to the class). This eval +grades cgr's `INSTANTIATES` edges on their own, so a constructor-resolution +regression is visible separately from ordinary calls. + +```bash +uv run python -m evals.instantiation --target codebase_rag +``` + +The unit is `(caller_file, class_simple_name)`. The oracle counts every `ast.Call` +whose callee simple name is a first-party class, **excluding bare-name calls whose +name is rebound in that file by a non-first-party import** (`from ext import +Config; Config()` names the external `Config`, not a same-named first-party +class). cgr contributes its `INSTANTIATES` edges reduced to the caller's file and +the class simple name. Writes `evals/results/instantiation_scores.csv` and +`evals/results/instantiation_diff.json`; pinned by +`codebase_rag/tests/test_instantiation_eval.py`. + +Making the oracle import-aware surfaced a cgr precision bug: a constructor whose +name was explicitly imported from an external module (`from evals.types_defs +import GraphData`, with `evals` outside the indexed project) was resolved by the +simple-name trie fallback to a same-named first-party class +(`codebase_rag.types_defs.GraphData`), emitting a wrong `INSTANTIATES` edge. Fixed +in `call_resolver.py` (`_is_external_import` suppresses the trie fallback for a +bare name bound to a genuinely external import; first-party imports, prefixed or +bare, are unaffected). On `codebase_rag`: precision rose from 0.976 to 1.000 +(9 false edges removed), recall 0.997. The one remaining miss is a class defined +in a test method and instantiated from inside a nested class's method (a closure +over an enclosing-function scope), a known resolution gap left documented rather +than scoped away. + +## Dead code — reachability over the captured graph + +cgr's `dead-code` command reports functions/methods unreachable from any entry +point. It runs as a Cypher reachability query against the database, which the +deterministic in-memory harness cannot execute, so this eval faithfully +re-implements that query's reachability over the captured graph and grades it on +controlled fixtures whose dead set is known by construction. + +```bash +uv run python -m evals.dead_code --target codebase_rag # informational report +``` + +Roots are project functions that are decorated with an entry-point decorator +(`@app.route` and friends), exported, named as an entry point, reached by a +`Module` via `CALLS` (a module-level call), or in a test file when tests are +included; everything reachable from a root via `CALLS` (plus `INSTANTIATES` / +`INHERITS` with classes) is live, and the rest is dead. The reachability is +unit-tested on hand-built graphs, so when it is run over a cgr-built graph from a +fixture with a known dead set, a mismatch indicts cgr's `CALLS` graph (a missing +edge would flag a live function as dead) rather than the scorer. The graded eval +is the fixture suite `codebase_rag/tests/test_dead_code_eval.py`; the CLI's +corpus mode is informational only, because a real repo has no independent dead-code +oracle (true reachability needs the very call graph under test). On `codebase_rag` +it currently reports 4450 unreachable functions/methods (tests excluded). + +## Cross-project — resolution across top-level packages (monorepo) + +Every other eval runs on a single top-level package (`codebase_rag`), so none +checks the case cgr is built for: a monorepo with several top-level packages where +one references another. This eval extracts cgr's `CALLS` and `IMPORTS` edges whose +endpoints live in *different* top-level packages and grades them on synthetic +multi-package fixtures whose cross-package edges are known by construction +(`codebase_rag/tests/test_cross_project_eval.py`). It confirms that +`pkg_b.use.run()` calling `pkg_a.core.shared()`, and `pkg_b`/`pkg_c` importing +`pkg_a` modules, resolve across the package boundary, while intra-package edges +are correctly excluded. cgr resolves all of these; the eval stands as a +regression guard for monorepo cross-package resolution. + +## Static calls — function-level direct-call recall + +Grades cgr's `CALLS` graph at function granularity against an `ast` oracle that +resolves only the calls a reader can resolve without type inference: a bare-name +call (`foo()`) whose target is a first-party function reached via a `from ... +import foo` or a same-module top-level def. Each becomes a `(caller_qn, +callee_qn)` edge. Method / attribute / dynamic calls need cgr's type inference and +are out of the oracle's scope, so only **recall** is graded (cgr resolving more +than static analysis can is expected, not a false positive). The oracle uses ast +import resolution, not cgr's function-registry trie, so it is independent. + +```bash +uv run python -m evals.static_calls --target codebase_rag +``` + +Decorator applications (`@deco(...)`) are excluded (they are not calls the +decorated function makes), and the oracle attributes a call to its real enclosing +function qn including nested scopes (`Class.method.nested`). + +**This eval caught a real cgr bug.** A call inside a function nested in a method +was emitted with a caller qn that dropped the method (`Class.nested` instead of +`Class.method.nested`, matching no node), and was also over-attributed to the +enclosing method. After the root-cause fix in `call_processor.py` +(`_class_member_qn_and_label` + `_calls_owned_by`), recall on `codebase_rag` is +4434/4434 = 1.0. Pinned by `codebase_rag/tests/test_static_calls_eval.py` and the +regression `codebase_rag/tests/test_nested_method_call_qn.py`. + +## Multi-language retrieval (Go) — Go CALLS vs `go/ast` + +The retrieval benchmark above is Python-only. This extends file-level call +localization to a second language: for each first-party Go symbol, which files +call it. cgr's Go `CALLS` edges, reduced to `(caller_file, callee_simple_name)`, +are graded against call sites extracted by Go's own `go/ast` (the same oracle +program as the Go L1 structure eval, extended to emit `CallExpr` callees), over +the same first-party name universe. The oracle uses Go's standard parser, fully +independent of cgr's tree-sitter Go frontend, so this measures cgr's cross-file +Go call resolution against ground truth. + +```bash +uv run python -m evals.go_retrieval --target +``` + +Requires the `go` toolchain on `PATH`; the eval exits cleanly if it is missing. +The oracle counts a call by its callee simple name (a bare `foo()` or the selector +tail of `x.Method()` / `pkg.Func()`), keeping only callees that are declared +first-party functions/methods, exactly mirroring the Python retrieval oracle. +Pinned by `codebase_rag/tests/test_go_retrieval_eval.py`, where cgr's Go call +graph matches the `go/ast` oracle on the fixture. The same harness shape +generalizes to the other native-oracle languages (Rust, TypeScript, Java) by +teaching each oracle to emit call sites. + +Running this on a real stdlib package (`encoding/json` via `GOROOT`) instead of +the fixture first surfaced two Go call-graph bugs, then drove their fix to a +clean result. (1) Receiver methods got a receiver-dropping caller qn that bound +to no node; fixed by `_go_receiver_method_caller`. (2) Go receiver dispatch +(`d.method()`, a method call on a receiver, parameter, or composite-literal +local) resolved to no callee at all, because Go calls were not typed and the Go +`selector_expression` callee node was never read by `_get_call_target_name`. +Fixed by adding Go to the typed-language set, a Go local-variable type inference +engine (`parsers/go/type_inference.py`) that maps receivers/parameters/`:=` +locals to their type, and reading the selector callee name. On `encoding/json`, +precision/recall went from 1.0/0.55 to 1.0/1.0 (110/110 call edges, zero false +positives). + +## Multi-language retrieval (Rust) — Rust CALLS vs `syn` + +The same harness applied to Rust: for each first-party Rust symbol, which files +call it. cgr's Rust `CALLS` edges, reduced to `(caller_file, callee_simple_name)`, +are graded against call sites extracted by `syn` (the de-facto Rust parser, the +same oracle as the Rust L1 structure eval, extended to emit `ExprCall` path +callees and `ExprMethodCall` method idents), over the same first-party name +universe. `syn` is independent of cgr's tree-sitter Rust frontend. + +```bash +uv run python -m evals.rust_retrieval --target +``` + +Requires the `cargo` toolchain on `PATH`; the eval exits cleanly if it is missing. +Pinned by `codebase_rag/tests/test_rust_retrieval_eval.py`, where cgr's Rust call +graph matches the `syn` oracle on the fixture. + +Running it on a real stdlib module (`core::str`, via the `rust-src` component +under the rustup sysroot) surfaced two cgr bugs and drove the fix from +precision/recall 0.91/0.65 to 0.94/0.95. (1) A method in a generic impl block +(`impl<'a> Thing for Chars<'a>`) was attributed to a caller qn that carried the +impl's generics (`crate.lib.Chars<'a>.go`), but the method node is registered on +the bare type (`crate.lib.Chars.go`), so every such `CALLS` edge had a dangling +caller and was dropped; fixed by routing `_get_rust_impl_class_name` through the +same `rs_utils.extract_impl_target` the definition pass uses (recovered 44 of 58 +missing edges). (2) A regression from the externally-imported-name fix: +`_is_external_import` mistook Rust relative imports (`use super::b::helper`, whose +recorded target is the `::`-separated `super::b::helper`) for external symbols and +suppressed the trie fallback, dropping the call; fixed by restricting that guard +to dotted absolute-path imports (Python/Java form), leaving `::`-path and relative +imports to the trie. Pinned by `codebase_rag/tests/test_rust_impl_method_call_qn.py`. +The remaining gap is field/generic method dispatch (`self.field.method()`, +`Pattern` trait calls) needing deeper Rust type inference, plus oracle +undercount inside macro bodies (`write!` expands to `.fmt()` calls `syn` does not +see), documented rather than scoped away. + +## Multi-language retrieval (Java) — Java CALLS vs the JDK Compiler Tree API + +The same harness applied to Java: for each first-party Java symbol, which files +call it. cgr's Java `CALLS` edges, reduced to `(caller_file, callee_simple_name)`, +are graded against method-invocation sites extracted by `javac` (the JDK Compiler +Tree API, the same oracle as the Java L1 structure eval, extended to emit the +trailing identifier of each `MethodInvocationTree`), over the same first-party +name universe. `javac` is independent of cgr's tree-sitter Java frontend. + +```bash +uv run python -m evals.java_retrieval --target +``` + +Requires the `javac`/`java` toolchain on `PATH`; the eval exits cleanly if it is +missing. Pinned by `codebase_rag/tests/test_java_retrieval_eval.py`, where cgr's +Java call graph matches the `javac` oracle on the fixture. + +Running it on a real stdlib package (`java.util`, 349 files from the JDK +`src.zip`) surfaced three cgr bugs and drove recall from 0 (every Java call +dropped) to 0.52 at precision 1.0 (zero false positives). (1) The definition pass +registers a Java method node with its parameter signature (`Class.name(args)` — +Java overloads), but the call pass built the enclosing-method caller qn without +it (`Class.name`), so every Java method's `CALLS` from-endpoint matched no node +and the edge would not attach in Memgraph; fixed by routing the caller qn through +the same signature build the definition pass uses +(`codebase_rag/tests/test_java_call_caller_qn.py`). (2) `find_package_start_index` +returns `None` for any project not under a recognized `src/main/java` layout, so +`_build_fqn_lookup_map` left the simple-name to module map empty and all +cross-file resolution (instance dispatch in sibling files) silently failed; fixed +by falling back to the segment after the project root for flat/non-standard +layouts. (3) A static call on a bare class-name receiver in a sibling file +(`T.make()`, same package, no import) never resolved because the receiver-type +lookup only checked the current module and explicit imports; fixed with a +same-package class-name fallback in `_resolve_java_object_type`. The remaining gap +is interface/abstract dispatch (the name-based oracle counts a call whenever the +callee name is any declared first-party method, but cgr emits an edge only when +the concrete receiver type is statically knowable) and deep receiver-type +inference (iterator/functional-interface/generic element types), documented rather +than scoped away. + +## Multi-language retrieval (TypeScript) — TS CALLS vs the TypeScript compiler API + +The same harness applied to TypeScript: for each first-party TS symbol, which +files call it. cgr's TS `CALLS` edges, reduced to `(caller_file, +callee_simple_name)`, are graded against call sites extracted by the TypeScript +compiler API (the same oracle as the TS L1 structure eval, extended to emit the +callee identifier of each `CallExpression` — bare identifier or property-access +trailing name), over the same first-party name universe. `tsc` is independent of +cgr's tree-sitter TS frontend. The oracle also now names arrow / function +expressions by their binding (`const foo = () => …` → `foo`), matching how cgr +names them, so they enter the declared-name universe. + +```bash +uv run python -m evals.ts_retrieval --target +``` + +Requires `node`/`npm` (the `typescript` dependency installs on first run); the +eval exits cleanly if node is missing. Pinned by +`codebase_rag/tests/test_ts_retrieval_eval.py`, where cgr's TS call graph matches +the `tsc` oracle on the fixture. + +Running it on a real library (`zod`, 88 non-test files from `packages/zod/src/v4`) +surfaced two cgr bugs and drove recall from 0.45 to 0.75 at precision 1.0 (zero +false positives). (1) **Exported-function duplication:** the definition pass +already ingests an exported function / const-arrow at its natural qn, but the +ES6-export pass (`ingest_exported_function`) re-registered it, minting a spurious +`qn@line` duplicate node (`register_unique_qn` collision) onto which call +resolution then bound — so callers of the natural node were invisible and ~half of +all TS call edges carried a mangled `name@line` callee. In an ESM codebase where +most functions are exported this polluted the whole graph; fixed by skipping the +export-pass registration when the natural qn already exists +(`codebase_rag/tests/test_ts_export_no_duplicate.py`). (2) **Arrow-body calls +unresolved:** the call pass derived a caller name with `child_by_field_name("name")`, +which is empty for an arrow / function expression, so it skipped the whole +`const f = () => …` body and never emitted its calls — and modern TS is mostly +arrow functions. Fixed by recovering the binding name for the `variable_declarator` +form so the body's calls attribute to the same qn the definition pass registered +(`codebase_rag/tests/test_ts_arrow_caller_calls.py`). A follow-up fix also models +**class-field arrow members** (`class T { helper = () => … }`): the definition +pass dropped them (the arrow has no `name` field, so `ingest_method` returned +early) so no `T.helper` node existed and its body's calls were lost. The binding +name is now taken from the enclosing field definition in both the definition and +call passes, modelling it as `T.helper` like a normal method +(`codebase_rag/tests/test_ts_class_field_arrow.py`). The remaining gap is calls +inside anonymous (returned/inline) arrows, method dispatch on typed receivers, and +the name-based oracle's over-count of common method names (`error` on a receiver +cgr cannot type) — documented rather than scoped away. + +## Multi-language retrieval (PHP) — PHP CALLS vs `php-parser` + +The same harness applied to PHP: for each first-party PHP symbol, which files +call it. cgr's PHP `CALLS` edges, reduced to `(caller_file, callee_simple_name)`, +are graded against call sites extracted by `php-parser` (the same oracle as the +PHP L1 structure eval, extended to emit the callee of each `call` — a bare +function name, or the trailing member of a `$obj->m()` / `Class::m()` lookup — +and to carry real declaration names), over the same first-party name universe. +`php-parser` is a pure-JS PHP parser, independent of cgr's tree-sitter PHP frontend. + +```bash +uv run python -m evals.php_retrieval --target +``` + +Requires `node`/`npm` (the `php-parser` dependency installs on first run; no PHP +runtime needed). Pinned by `codebase_rag/tests/test_php_retrieval_eval.py`, where +cgr's PHP call graph matches the `php-parser` oracle on the fixture. + +Running it on a real library (`monolog`, 121 files from `src/Monolog`) surfaced +one cgr bug and drove recall from 0.97 to 0.99 at precision 1.0 (zero false +positives). **Plain function calls were never resolved:** a PHP +`function_call_expression` carries its callee as a `name` node under the +`function` field, a type `_get_call_target_name` did not handle, so no callee name +was extracted and the edge was dropped — only method (`$obj->m()`) and static +(`Class::m()`) calls, which expose a `name` field directly, resolved. cgr's strong +OOP-PHP score had masked the gap (monolog is almost entirely methods). Fixed by +adding the PHP `name` type to the callee match arm +(`codebase_rag/tests/test_php_function_call.py`). The remaining gap is the +name-based oracle's over-count: monolog declares first-party methods named +`substr`/`reset`/`fwrite` (e.g. a UTF-8-aware `Utils::substr`), so a bare call to +the PHP builtin of the same name is counted as first-party — cgr's simple-name +trie fallback links it to the same-named first-party symbol, which the file+name +metric credits. Documented rather than scoped away. + +## Multi-language retrieval (Lua) — Lua CALLS vs `luaparse` + +The same harness applied to Lua: for each first-party Lua function, which files +call it. cgr's Lua `CALLS` edges, reduced to `(caller_file, callee_simple_name)`, +are graded against call sites extracted by `luaparse` (the same oracle as the Lua +L1 structure eval, extended to emit the callee of each call — a bare name, or the +trailing member of a `t.f()` / `t:m()` lookup — and to carry real declaration +names, including function expressions named after their assignment target), over +the same first-party name universe. `luaparse` is a pure-JS Lua parser, +independent of cgr's tree-sitter Lua frontend. + +```bash +uv run python -m evals.lua_retrieval --target +``` + +Requires `node`/`npm` (the `luaparse` dependency installs on first run; no Lua +runtime needed). Pinned by `codebase_rag/tests/test_lua_retrieval_eval.py`, where +cgr's Lua call graph matches the `luaparse` oracle on the fixture. + +Running it on a real library (`penlight`, 39 files from `lua/pl`) surfaced one cgr +bug and held precision at 1.0 (zero false positives), recall 0.93. **Calls inside +function expressions were never attributed:** a function bound to a variable or +table field (`M.runner = function() ... end`) has no name field, so the call pass +could not derive the caller qn and skipped the whole body — the same family as the +JS/TS arrow-caller gap. The definition pass already names these after their +assignment target (`lua_utils.extract_assigned_name`); the fix reuses that in the +call pass so the body's calls attribute to the right node +(`test_cgr_resolves_function_expression_body_calls`). The remaining recall gap is +colon-method dispatch and the name-based oracle's over-count: a colon method +`function List:append(i)` gets cgr qn `List:append` (simple name `List:append`, not +`append`), and a call `obj:append(x)` on a receiver whose type cgr cannot +statically infer (e.g. `res[klass]:append(val)`) cannot resolve, while the oracle +credits any call whose simple name is a declared first-party function. Documented +rather than scoped away. + +## Multi-language retrieval (C) — C CALLS vs `libclang` + +The same harness applied to C: for each first-party C function, which files call +it. cgr's C `CALLS` edges, reduced to `(caller_file, callee_simple_name)`, are +graded against call sites extracted by `libclang`, over the same first-party name +universe. cgr parses C with tree-sitter by default (`CPP_FRONTEND=libclang` is +off), so `libclang` — a full C front end — is an independent oracle. C has no +overloading, so a simple name is unambiguous. + +```bash +uv run python -m evals.c_retrieval --target +``` + +Requires `libclang` (the `clang.cindex` Python bindings). No `compile_commands.json` +is needed: each `.c` file is parsed directly, with the SDK sysroot +(`xcrun --show-sdk-path`) and clang's builtin-header directory +(`clang -print-resource-dir`) added so system and compiler headers resolve. A +translation unit that still emits an error diagnostic (a missing build-generated +header such as a configured `config.h`) has a possibly-truncated AST, so the oracle +**abstains** on it: that file is left out of the covered set and the cgr side is +held to the same files (the count of graded files is logged, never silently +dropped). Pinned by `codebase_rag/tests/test_c_retrieval_eval.py`, where cgr's C +call graph matches the `libclang` oracle on a header-free fixture. + +Running it on a real project (`jq`, 18 of 21 `src/*.c` files parsed cleanly; the +other three need build-generated headers) gives precision **0.98**, recall +**0.93**, F1 0.96. Every diff entry was classified against the source; the tail +splits into two causes. + +**Most of it is the C preprocessor gap** (inherent — cgr's tree-sitter front end +does not evaluate `#if`/macros, while `libclang` does), confirmed in both +directions: + +- All 11 false positives are calls cgr emits inside inactive conditional branches + that `libclang` correctly compiles out: `jvp_strtod` / `tsd_dtoa_context_get` in + `jv.c` under `#ifdef USE_DECNUM`, `jv_parser_new` in `jq_test.c` under + `#ifdef HAVE_PTHREAD`, `jv_mem_calloc` in `jv_print.c` under `#ifdef WIN32`, + `yysymbol_name` in `parser.c` under `#if YYDEBUG` (`YYDEBUG` is `0`). cgr makes + **zero misresolutions** — each is correct code that is simply dead. +- Most false negatives are calls that exist only after macro expansion, which cgr + cannot see: the `jv_object_foreach` macro (`jv.h`) expanding to `jv_object_iter*` + calls, the `token()` / `check_done()` macros in `jv_parse.c`, flex's input macros, + and bison's `api.prefix {jq_yy}` rename (`libclang` records `jq_yylex` after the + `#define`; cgr resolves the same call under the textual name `yylex`). + +**The rest is a genuine cgr limitation, not the preprocessor:** in the +bison-generated `parser.c`, tree-sitter produces 20 `ERROR` nodes, and the normal +`yyerror` definition (a plain `void yyerror(...)` at `parser.c:406`) falls inside +one. cgr therefore never registers `yyerror` as a function, so every call to it is +unresolved. cgr silently drops real definitions that land inside a tree-sitter +parse-error region on machine-generated code. The trigger was reduced to a +`tree-sitter-c` grammar bug (a block comment inside a `#define` body breaks the +parse and drops the following declaration, present through the latest 0.24.2); +tracked in issue #555 for an upstream report. It is rooted in the grammar, not in +cgr's resolution logic, so it is reported here, not hidden. + +## Multi-language retrieval (C++) — C++ CALLS vs `libclang` + +The same harness applied to C++: for each first-party C++ function or member +function, which files call it. cgr's C++ `CALLS` edges, reduced to +`(caller_file, callee_simple_name)`, are graded against call sites extracted by +`libclang`, over the same first-party name universe (free functions, function +templates, and member functions; constructors/destructors are excluded because +cgr models object creation as `INSTANTIATES`). cgr parses C++ with tree-sitter by +default (`CPP_FRONTEND=libclang` is off), so `libclang` is an independent oracle. +Overloads collapse under the `(file, simple-name)` metric, so they need no +disambiguation. + +```bash +uv run python -m evals.cpp_retrieval --target --define LEVELDB_PLATFORM_POSIX=1 +``` + +Requires `libclang`. C++ standard headers must be parsed by a `libclang` whose +clang version matches the active SDK's `libc++`; the bundled pip wheel's older +clang cannot, so the oracle prefers a system `libclang` +(`/Library/Developer/CommandLineTools/usr/lib/libclang.dylib` on macOS) and pins +it before the first parse. No `compile_commands.json` is needed: each source is +parsed directly with the SDK sysroot, the SDK's `libc++` headers (which must +precede clang's builtin resource headers), and every first-party header directory +added as an include path. A build normally supplies platform macros (e.g. +`LEVELDB_PLATFORM_POSIX`); pass them with `--define`. A translation unit that still +emits an error diagnostic **abstains** (left out of the covered set; the cgr side +is held to the same files, the graded count logged). To avoid crediting or +penalizing calls whose simple name merely collides with a first-party symbol, the +oracle grades a call only when `libclang` resolves its callee to a **first-party +declaration** (`child.referenced`), so a `std::string::size()` call is never +counted as a first-party `size` edge. Pinned by +`codebase_rag/tests/test_cpp_retrieval_eval.py`, where cgr's C++ call graph matches +the `libclang` oracle on a header-free namespaced fixture. + +Running it on a real project (`leveldb`, 40 of 42 core sources parsed cleanly; the +other two are Windows-only or need gmock) gives precision **0.99**, recall +**0.83**, F1 **0.90** — recall up from **0.54** before the namespace fix below, and +precision lifted from 0.96 by the receiver type inference chain (next section). + +**The dominant gap was a real cgr bug: the call pass dropped the namespace from +the caller qn.** The definition pass binds a C++ free function or class inside a +`namespace` to a namespaced qualified name (`module.ns.fn`, `module.ns.Class`), +but the call pass built the enclosing caller's qn without the namespace +(`module.fn`, `module.Class.method`). Every such `CALLS` edge's source therefore +pointed at a node that does not exist (904 of 1227 C++ call sources dangled on +`leveldb`, all of it in `namespace leveldb`), so the call never attached. The fix +routes both the free-function and class qns through the same +`cpp_utils.build_qualified_name` the definition pass uses, so caller and node qns +always agree (RED test `test_cpp_namespace_call_caller_qn.py`). Dangling sources +fell to 251 and recall rose 0.54 → 0.82. + +**Three follow-on cgr fixes then landed on top of the namespace fix, building out +C++ receiver type inference:** (a) C++ joined the typed-language set with an engine +(`parsers/cpp/type_inference.py`) mapping parameters and local variables — including +reference declarators, multi-variable declarations, and drop-on-conflict lexical +scope handling — to their class types, so `obj->method()` resolves to the method on +the receiver's class instead of by bare name; (b) a member call whose receiver has a +known external type (a `std::string`) skips the name-only trie fallback instead of +mis-binding to a same-named first-party method (`call_resolver._receiver_type_is_external`); +(c) member **fields** are captured per class at ingestion (`build_field_type_map`, +stored in `class_field_types` keyed by class qn) and merged into the receiver map by +the caller's enclosing class, so `mutex_.Lock()` in an out-of-line method resolves to +the field's type even though the field is declared in a different file (the header). +Together these lifted precision **0.96 → 0.99** (false positives 30 → 10) on `leveldb`. + +The remaining tail is documented, not scoped away: + +- **Operator overloads** (`operator=` ×25, `operator[]`, `operator==`/`!=`, 23% of + the misses): `libclang` records `a = b` and `a[i]` as calls to the overloaded + operator methods, while cgr models them as `builtin.cpp.*` operator calls — a + metric difference, not a misresolution. +- **tree-sitter-cpp parse corruption in complex files (the dominant residual FPs).** + Traced to a `tree-sitter-cpp` grammar limitation, not a cgr resolution bug. A + preprocessor conditional inside a construct — the reduced minimal trigger is a + constructor member-initializer list interrupted by `#if`/`#endif`: + + ```cpp + class A { public: A(int n) : + #if !defined(NDEBUG) + x_(n), + #endif + y_(n) {} int x_; int y_; }; + ``` + + emits `ERROR` nodes (4 vs 0 without the `#if`). In real files this cascades: + `util/env_posix.cc` (24 `ERROR` nodes) mis-nests every sibling class under the + first (`Limiter.PosixEnv`), turns `struct ::flock` references into phantom classes, + and degrades out-of-line methods like `DBImpl::BuildBatchGroup` into free functions + that lose member-field inference — which is what produces the residual `begin`/`end` + and `Schedule`/`SetReadOnly*` false positives. This exact trigger is already reported + upstream as [tree-sitter-cpp #297](https://github.com/tree-sitter/tree-sitter-cpp/issues/297) + ("ifdef not supported inside constructor member initializer list"); it is a sibling of + the tree-sitter-c grammar bug tracked in #555. It is an **emergent cascade** — every + isolated small reproduction (clean out-of-line methods, same-basename `.h`/`.cc`, an + unrelated `#if` error in the same file) resolves correctly, so there is no minimally + reproducible cgr-side fix; the fix is upstream (grammar) or the libclang frontend. +- **The libclang frontend trades this precision gain for recall.** Running the same + `leveldb` benchmark with `CPP_FRONTEND=libclang` (a `compile_commands.json` from + CMake) parses the corrupt files correctly — `env_posix` classes nest properly and the + FP count drops to 1 (precision **0.9975**) — but recall falls to **0.46** (F1 0.63): the + frontend emits far fewer `CALLS` edges than the tree-sitter resolver, a diffuse gap + (implicit operators, compile-DB parse-context differences from the oracle's + per-file parse, and header-inline caller-file attribution). So the default + tree-sitter path remains the higher-F1 choice; the libclang frontend is the correct + frontend for parse fidelity but needs its own call-edge recall work before it is a + drop-in improvement. + +## Semantic search — query to function relevance + +cgr's semantic search embeds each function's source and retrieves by cosine +similarity to a query embedding. This grades that relevance directly: for +controlled fixtures whose natural-language query maps unambiguously to one +function, does cgr's embedder rank that function in the top `k`? + +It uses cgr's own embedder over function source extracted from the captured graph +(the same text cgr embeds), computes the ranking, and scores recall@k against +curated `query -> function` cases (e.g. "read and parse a json file" should +retrieve `load_json_file`, not `send_email` or `compute_sales_tax`). This tests +the embedding-and-ranking pipeline that decides relevance; the Qdrant ANN layer +only approximates the same cosine ranking, so it is out of the loop here. + +Requires the `semantic` extra (embedding model); the eval is skipped when it is +absent. Pinned by `codebase_rag/tests/test_semantic_search_eval.py`, where cgr +reaches recall@3 = 1.0 on the fixture. The relevance set is curated and +deliberately clear-cut: this is a regression guard that the pipeline retrieves +obviously-relevant code, not a broad relevance benchmark (which would need a large +human-judged dataset). + +## L1 (Go) — structure against a native `go/ast` oracle + +The Python L1 above grades cgr against a Python `ast` oracle. To grade other languages with *independent* ground truth, each language is checked against its own standard-library parser rather than against cgr's own tree-sitter output. The first such oracle is Go. + +```bash +uv run python -m evals.go_l1 --target /path/to/go/repo --project-name myrepo +``` + +How it works: + +- **Oracle** (`evals/oracles/go_ast.go`): a small Go program that walks the target with the standard library's `go/parser` + `go/ast` and emits one JSON record per declaration (function-local type declarations included, via `ast.Inspect`, since cgr captures those too). The `kind` field already uses cgr's `NodeLabel` vocabulary (`Function`, `Method`, `Class`, `Interface`, `Type`), so records join cgr's nodes directly on `(kind, file, start_line)`. Mapping: `func` → `Function`, `func` with a receiver → `Method`, `type … struct` → `Class`, `type … interface` → `Interface`, any other `type …` (defined types and aliases) → `Type`. Requires the `go` toolchain on `PATH`; `evals.go_l1` exits cleanly if it is missing. +- **cgr side** (`cgr_graph.extract_cgr_go_nodes`): builds cgr's graph over the target and keeps the Go (`.go`) definition nodes. +- **Fair file set**: `run_go_oracle` drops oracle records under any directory in cgr's `IGNORE_PATTERNS` (e.g. `bin`, `vendor`, `build`), so the oracle grades exactly the files cgr indexes — single source of truth, no drift. +- **Score**: per-kind precision/recall/F1 via `score.score_node_kinds`, written to `evals/results/go_scores.csv` and `evals/results/go_diff.json`. + +Validated on `apache/thrift` (1604 cgr Go nodes vs 1604 oracle nodes — exact): + +| label | tp | fp | fn | precision | recall | +|---|---|---|---|---|---| +| Function | 535 | 0 | 0 | 1.0000 | 1.0000 | +| Method | 907 | 0 | 0 | 1.0000 | 1.0000 | +| Class | 106 | 0 | 0 | 1.0000 | 1.0000 | +| Interface | 30 | 0 | 0 | 1.0000 | 1.0000 | +| Type | 26 | 0 | 0 | 1.0000 | 1.0000 | + +Both gaps the oracle originally exposed are fixed: Go `type` declarations (struct/interface/defined-type) are captured (see `codebase_rag/tests/test_go_type_declarations.py`), and Go receiver methods are now `Method` nodes qualified by their receiver type with a `DEFINES_METHOD` edge from it (see `codebase_rag/tests/test_go_receiver_methods.py`), rather than being mislabelled `Function`. + +## L1 (Rust) — structure against a native `syn` oracle + +The second native oracle is Rust, checked against `syn` (the de-facto standard Rust parser). + +```bash +uv run python -m evals.rust_l1 --target /path/to/rust/repo --project-name myrepo +``` + +- **Oracle** (`evals/oracles/rs_oracle/`): a small Rust program that parses every `.rs` file with `syn` and emits one JSON record per declaration, in cgr's `NodeLabel` vocabulary. A `syn::visit::Visit` walk recurses into function bodies (function-local defs), `impl`/`trait` associated types, and closures (which cgr models as anonymous `Function` nodes), so the comparison is apples-to-apples. Mapping: `struct` → `Class`, `enum` → `Enum`, `union` → `Union`, `trait` → `Interface` (+ its methods → `Method`), `type` (incl. associated types) → `Type`, `fn`/closure → `Function`, `impl` method → `Method`. Requires the `cargo` toolchain (`proc-macro2`'s `span-locations` feature gives real line numbers); `evals.rust_l1` exits cleanly if it is missing. +- **cgr side** (`cgr_graph.extract_cgr_rust_nodes`), **score** (`score.score_node_kinds`), output to `rs_scores.csv` / `rs_diff.json`. + +Validated on `apache/thrift`'s `lib/rs` (758 cgr Rust nodes vs 758 oracle nodes — exact, all kinds 1.0). The oracle surfaced one cgr gap, now fixed: methods in an `impl Trait for ` block (e.g. `impl From for u8`) were dropped because the `primitive_type` impl target was unhandled (see `codebase_rag/tests/test_rust_impl_primitive_target.py`). + +## L1 (TypeScript) — structure against the TypeScript compiler API + +The third native oracle is TypeScript, checked against the TypeScript compiler API. + +```bash +uv run python -m evals.ts_l1 --target /path/to/ts/repo --project-name myrepo +``` + +- **Oracle** (`evals/oracles/ts_oracle/`): a Node script that parses every `.ts`/`.tsx` file (`.d.ts` excluded) with the TypeScript compiler API and emits one JSON record per declaration, in cgr's `NodeLabel` vocabulary. Mapping, matching how cgr models TypeScript: `class` → `Class`, `interface` → `Interface`, `enum` → `Enum`, `type` → `Type`, `namespace`/`module` → `Class` (a class-like container), `function` → `Function` (or `Method` inside a namespace/class), arrow functions and function expressions → `Function` (cgr captures every one, like a Rust closure), `method`/`constructor` → `Method`. Requires `node`/`npm` (the `typescript` dependency is installed on first run; `package-lock.json` is committed and `node_modules/` is gitignored). `evals.ts_l1` exits cleanly if node is missing. +- **cgr side** (`cgr_graph.extract_cgr_ts_nodes`), **score** (`score.score_node_kinds`), output to `ts_scores.csv` / `ts_diff.json`. + +Validated on `apache/thrift`'s TypeScript (`lib/nodets`, `lib/ts`): 136 cgr nodes vs 136 oracle nodes — exact, all kinds 1.0. No cgr gap found. + +## L1 (JavaScript) — structure against the TypeScript compiler API + +The same compiler-API oracle parses JavaScript too (the TypeScript compiler accepts JS), so JavaScript reuses `evals/oracles/ts_oracle/` over `.js`/`.jsx`. + +```bash +uv run python -m evals.js_l1 --target /path/to/js/repo --project-name myrepo +``` + +Same mapping as TypeScript, with two JS-specific points matching cgr: object-literal shorthand methods are modelled as standalone `Function`s (not `Method`s), and every arrow function / function expression is a `Function`. Output to `js_scores.csv` / `js_diff.json`. + +Validated on `apache/thrift`'s JavaScript (`lib/js`, `lib/nodejs`): 1087 cgr nodes vs 1087 oracle nodes — exact, all kinds 1.0. No cgr gap found. + +## L1 (Java) — structure against the JDK Compiler Tree API + +The sixth native oracle is Java, checked against the JDK's own parser (`com.sun.source` / `javax.tools`). + +```bash +uv run python -m evals.java_l1 --target /path/to/java/repo --project-name myrepo +``` + +- **Oracle** (`evals/oracles/java_oracle/Oracle.java`): parses every `.java` file with the JDK Compiler Tree API (`task.parse()` only parses, so missing dependencies are fine) and emits one JSON record per declaration. Mapping, matching how cgr models Java: `class` → `Class`, `interface` → `Interface` (+ its method signatures → `Method`), annotation type (`@interface`) → `Class`, `enum` → `Enum`, method/constructor → `Method`. A method declared inside an **anonymous class** (e.g. `new Runnable() { public void run() {...} }`) is modelled as a standalone `Function` — the same way cgr treats it (and JS object-literal methods); the oracle replicates cgr's rule (a member is a `Method` only when its nearest enclosing named class precedes any enclosing method/lambda body). Requires `javac`/`java`; the oracle is compiled on first run (the `.class` is gitignored, the source committed). `evals.java_l1` exits cleanly if the JDK is missing. +- **cgr side** (`cgr_graph.extract_cgr_java_nodes`), **score** (`score.score_node_kinds`), output to `java_scores.csv` / `java_diff.json`. + +Validated on `apache/thrift`'s `lib/java`: 2861 cgr nodes vs 2861 oracle nodes — exact, all kinds 1.0 (including the 103 anonymous-class methods graded as `Function`). No cgr gap found. + +## L1 (Lua) — structure against a `luaparse` oracle + +The seventh native oracle is Lua, checked against `luaparse`. + +```bash +uv run python -m evals.lua_l1 --target /path/to/lua/repo --project-name myrepo +``` + +- **Oracle** (`evals/oracles/lua_oracle/`): a Node script that parses every `.lua` file with `luaparse` (`luaVersion: "5.3"`, so bitwise operators / integer division parse) and emits a `Function` record per function declaration/expression. Lua has no classes, so cgr models every function — global, `local`, table (`t.f`), method (`t:m`), and anonymous function expressions — as a `Function`. Requires `node`/`npm` (the `luaparse` dependency installs on first run; `package-lock.json` committed, `node_modules/` gitignored). +- **cgr side** (`cgr_graph.extract_cgr_lua_nodes`), **score** (`score.score_node_kinds`), output to `lua_scores.csv` / `lua_diff.json`. + +Validated on `apache/thrift`'s Lua (`lib/lua`, `test/lua`): 376 cgr nodes vs 376 oracle nodes — exact, 1.0. No cgr gap found. + +## L1 (PHP) — structure against a `php-parser` oracle + +The eighth native oracle is PHP, checked against `php-parser` (a pure-JS PHP parser, so no `php` binary is needed). + +```bash +uv run python -m evals.php_l1 --target /path/to/php/repo --project-name myrepo +``` + +- **Oracle** (`evals/oracles/php_oracle/`): a Node script that parses every `.php` file with `php-parser` and emits one record per declaration. Mapping, matching cgr: `class` → `Class`, `interface` → `Interface` (+ methods → `Method`), `trait` → `Class` (+ methods → `Method`), `enum` → `Enum`, `function` → `Function`, closure / arrow `fn` → `Function`. Methods of an **anonymous class** (`new class {...}`) are `Function`s (like Java/JS object-literal members), and a declaration's line is its first attribute (`#[Attr]`) line when present — both matching cgr's node span. Requires `node`/`npm` (the `php-parser` dependency installs on first run; `package-lock.json` committed, `node_modules/` gitignored). +- **cgr side** (`cgr_graph.extract_cgr_php_nodes`), **score** (`score.score_node_kinds`), output to `php_scores.csv` / `php_diff.json`. + +Validated on `apache/thrift`'s PHP (`lib/php`): 1295 cgr nodes vs 1295 oracle nodes — exact, all kinds 1.0. No cgr gap found. + +## Latest results (target: `codebase_rag`) + +Committed snapshots live in `evals/results/` — `scores.csv` (L1), `diff.json` (L1 per-label missing/extra), `calls_diff.json` (L3 missed edges). Regenerate with the commands above. + +### L1 — structure (`uv run python -m evals.cli`) + +| category | label | tp | fp | fn | precision | recall | f1 | +|---|---|---|---|---|---|---|---| +| node | Module | 417 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| node | Class | 926 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| node | Function | 1955 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| node | Method | 3919 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | DEFINES | 2742 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | DEFINES_METHOD | 3919 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | INHERITS | 153 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | IMPORTS | 1274 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | + +Span (end_line) accuracy on matched defs: 6800/6800 exact. + +### L3 — CALLS recall (`uv run python -m evals.l3`) + +| scope | traced | captured | missed | recall | +|---|---|---|---|---| +| all calls | 634 | 634 | 0 | 1.0000 | +| explicit (no dunders) | 580 | 580 | 0 | 1.0000 | + +The L3 fixture exercises rich Python plus all 11 supported languages; recall is a sound lower bound over the cgr code paths that fixture drives. These numbers are for the Python `codebase_rag` target — graded multi-language recall (JS/Rust/Go/Java/C/C++/Lua/PHP/Scala) is future work pending a SCIP-based oracle. + +### Retrieval — graph vs grep (`uv run python -m evals.retrieval`) + +| category | label | tp | fp | fn | precision | recall | f1 | +|---|---|---|---|---|---|---|---| +| retrieval | graph | 3217 | 587 | 37 | 0.8457 | 0.9886 | 0.9116 | +| retrieval | grep_name | 3254 | 10591 | 0 | 0.2350 | 1.0000 | 0.3806 | +| retrieval | grep_call | 3254 | 5638 | 0 | 0.3659 | 1.0000 | 0.5358 | + +The resolved graph more than doubles the precision of even the call-tuned grep +(0.85 versus 0.37) at near-perfect recall, for an F1 of 0.91 versus 0.54: a gain +of roughly 0.38 absolute (about 70% relative) over the strongest grep baseline. +Bare-name grep, the common first attempt, scores far worse (F1 0.38). This is +the decoupled retrieval result behind the intuition that a structural graph +beats text search for code navigation. + +### Incremental update — incremental vs clean re-index (`uv run python -m evals.incremental`) + +Over a 25-file neutral-edit sample on `codebase_rag`, **after the #532 fix** +(micro-averaged across probes; clean re-index is the oracle, so `fn` is edges the +incremental graph dropped and `fp` is stale edges it kept): + +| category | label | tp | fp | fn | precision | recall | f1 | +|---|---|---|---|---|---|---|---| +| edge | CALLS | 333010 | 63 | 740 | 0.9998 | 0.9978 | 0.9988 | +| edge | IMPORTS | 82995 | 7 | 5 | 0.9999 | 0.9999 | 0.9999 | +| edge | INSTANTIATES | 25525 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | DEFINES / DEFINES_METHOD / CONTAINS_* / INHERITS / OVERRIDES | (all) | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| node | all kinds | (all) | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | + +**10 of 25** edits now reproduce a clean re-index exactly (up from 3 before the +fix), `INSTANTIATES` is perfect, and `IMPORTS` is all but perfect. For reference, +before the fix the same sample showed CALLS `fp`/`fn` of 4/3318, IMPORTS 7/599, +INSTANTIATES 0/414, and only 3/25 clean-equivalent: the fix cut CALLS divergence +by roughly 75% and IMPORTS by 98%. The residual is the changed file's own calls +resolved through type inference / protocol dispatch (the `fp`/`fn` are mostly the +same call resolved to the protocol method incrementally versus the concrete +implementation in a clean pass), which needs full cross-file type context to +close (see the methodology note above). Tracked under +[issue #532](https://github.com/vitali87/code-graph-rag/issues/532). + +### Import resolution — internal vs external (`uv run python -m evals.import_resolution`) + +| category | label | tp | fp | fn | precision | recall | f1 | +|---|---|---|---|---|---|---|---| +| edge | imports-all | 1986 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | imports-internal | 462 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | imports-external | 1524 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | + +A clean negative result: on `codebase_rag`, cgr classifies every import correctly, +internal and external alike. This rules out #498-style misclassification on this +corpus and stands as a regression guard. (The first run reported 247 missing +externals; investigation showed they were all `from __future__ import ...`, an +oracle over-count now corrected rather than a cgr bug.) + +### Inheritance — resolved INHERITS and OVERRIDES (`uv run python -m evals.inheritance`) + +| category | label | tp | fp | fn | precision | recall | f1 | +|---|---|---|---|---|---|---|---| +| edge | inherits-resolved | 31 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | +| edge | overrides | 57 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | + +Another clean negative result within the graded scope (top-level classes; +single-inheritance for overrides): cgr resolves every base to the correct +first-party class and attributes every single-inheritance override to the right +base. The first run showed minor `fp`/`fn`; investigation traced all of them to +oracle scope (a class nested in a test method, and multi-base mixin classes whose +override attribution is MRO-decided), not cgr defects, so the scope was tightened +rather than the discrepancies reported. + +### Instantiation — file-level INSTANTIATES (`uv run python -m evals.instantiation`) + +| category | label | tp | fp | fn | precision | recall | f1 | +|---|---|---|---|---|---|---|---| +| edge | instantiates | 378 | 0 | 0 | 1.0000 | 1.0000 | 1.0000 | + +cgr localizes every constructor call exactly on `codebase_rag`: the +`INSTANTIATES` set and the ast oracle's constructor-call set are identical. + +### Next step: agentic resolved-rate (out of scope here) + +The above isolates retrieval. The complementary end-to-end measurement is GKG's +own design: hold one agent and model fixed and vary only the tools (graph tools +versus grep), then report SWE-bench-style resolved rate over real issues. That +needs an LLM, a container harness, and many runs, so it is tracked separately +rather than run inside this deterministic harness. diff --git a/evals/__init__.py b/evals/__init__.py new file mode 100644 index 000000000..2a41b33c2 --- /dev/null +++ b/evals/__init__.py @@ -0,0 +1,5 @@ +from .ast_oracle import extract_oracle_graph +from .cgr_graph import extract_cgr_graph +from .score import score + +__all__ = ["extract_cgr_graph", "extract_oracle_graph", "score"] diff --git a/evals/ast_oracle.py b/evals/ast_oracle.py new file mode 100644 index 000000000..aac365052 --- /dev/null +++ b/evals/ast_oracle.py @@ -0,0 +1,168 @@ +import ast +from collections.abc import Iterator +from pathlib import Path + +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .types_defs import DefNode, EdgeKey, GraphData, NameEdge, NodeKey + +_MODULE = cs.NodeLabel.MODULE.value +_CLASS = cs.NodeLabel.CLASS.value +_FUNCTION = cs.NodeLabel.FUNCTION.value +_METHOD = cs.NodeLabel.METHOD.value +_DEFINES = cs.RelationshipType.DEFINES.value +_DEFINES_METHOD = cs.RelationshipType.DEFINES_METHOD.value +_INHERITS = cs.RelationshipType.INHERITS.value +_IMPORTS = cs.RelationshipType.IMPORTS.value + + +def extract_oracle_graph(target: Path, project_name: str) -> GraphData: + nodes: dict[NodeKey, DefNode] = {} + edges: set[EdgeKey] = set() + name_edges: set[NameEdge] = set() + + parsed: list[tuple[str, ast.Module]] = [] + module_index: dict[str, str] = {} + for path in _iter_py_files(target): + rel = path.relative_to(target).as_posix() + try: + tree = ast.parse(path.read_text(encoding="utf-8")) + except (SyntaxError, UnicodeDecodeError, ValueError) as error: + logger.warning(ls.ORACLE_PARSE_FAILED.format(path=rel, error=error)) + continue + parsed.append((rel, tree)) + module_index[_module_dotted(rel, project_name)] = rel + + for rel, tree in parsed: + module_key = NodeKey(_MODULE, rel, ec.MODULE_START_LINE) + nodes[module_key] = DefNode(module_key, Path(rel).stem, 0) + _walk_scope(tree.body, _MODULE, module_key, rel, nodes, edges, name_edges) + for target_file in _import_targets(tree, rel, module_index, project_name): + name_edges.add(NameEdge(_IMPORTS, module_key, target_file)) + + return GraphData(nodes=nodes, edges=edges, name_edges=name_edges) + + +def _module_dotted(rel: str, project_name: str) -> str: + parts = list(Path(rel).with_suffix("").parts) + if parts and parts[-1] == ec.INIT_STEM: + parts = parts[:-1] + return cs.SEPARATOR_DOT.join([project_name, *parts]) + + +def _from_base_parts(node: ast.ImportFrom, pkg_parts: list[str]) -> list[str] | None: + if node.level == 0: + return node.module.split(cs.SEPARATOR_DOT) if node.module else None + keep = len(pkg_parts) - (node.level - 1) + if keep < 0: + return None + parts = pkg_parts[:keep] + if node.module: + parts = parts + node.module.split(cs.SEPARATOR_DOT) + return parts + + +def _import_targets( + tree: ast.Module, rel: str, module_index: dict[str, str], project_name: str +) -> set[str]: + pkg_parts = [project_name, *Path(rel).parent.parts] + targets: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + if alias.name in module_index: + targets.add(module_index[alias.name]) + elif isinstance(node, ast.ImportFrom): + base_parts = _from_base_parts(node, pkg_parts) + if base_parts is None: + continue + base_dotted = cs.SEPARATOR_DOT.join(base_parts) + for alias in node.names: + if alias.name == "*": + if base_dotted in module_index: + targets.add(module_index[base_dotted]) + continue + sub = cs.SEPARATOR_DOT.join([*base_parts, alias.name]) + if sub in module_index: + targets.add(module_index[sub]) + elif base_dotted in module_index: + targets.add(module_index[base_dotted]) + return targets + + +def _base_name(expr: ast.expr) -> str | None: + if isinstance(expr, ast.Name): + return expr.id + if isinstance(expr, ast.Attribute): + return expr.attr + if isinstance(expr, ast.Subscript): + return _base_name(expr.value) + return None + + +def _iter_py_files(target: Path) -> Iterator[Path]: + for path in target.rglob(f"*{ec.PY_SUFFIX}"): + parts = path.relative_to(target).parts + if set(parts) & ec.IGNORE_DIRS: + continue + if any(part.endswith(ec.EGG_INFO_SUFFIX) for part in parts): + continue + yield path + + +def _end_line(node: ast.stmt) -> int: + end = node.end_lineno + return end if end is not None else node.lineno + + +def _child_stmts(node: ast.stmt) -> list[ast.stmt]: + out: list[ast.stmt] = [] + for _field, value in ast.iter_fields(node): + for item in value if isinstance(value, list) else [value]: + if isinstance(item, ast.stmt): + out.append(item) + elif isinstance(item, ast.ExceptHandler | ast.match_case): + # (H) except handlers and match cases are not ast.stmt but hold + # (H) statement bodies that may define functions/classes. + out.extend(s for s in item.body if isinstance(s, ast.stmt)) + return out + + +def _walk_scope( + stmts: list[ast.stmt], + scope_kind: str, + scope_key: NodeKey, + rel: str, + nodes: dict[NodeKey, DefNode], + edges: set[EdgeKey], + name_edges: set[NameEdge], +) -> None: + for node in stmts: + if isinstance(node, ast.ClassDef): + key = NodeKey(_CLASS, rel, node.lineno) + nodes[key] = DefNode(key, node.name, _end_line(node)) + if scope_kind == _MODULE: + edges.add(EdgeKey(_DEFINES, scope_key, key)) + for base in node.bases: + if base_name := _base_name(base): + name_edges.add(NameEdge(_INHERITS, key, base_name)) + _walk_scope(node.body, _CLASS, key, rel, nodes, edges, name_edges) + elif isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef): + if scope_kind == _CLASS: + key = NodeKey(_METHOD, rel, node.lineno) + nodes[key] = DefNode(key, node.name, _end_line(node)) + edges.add(EdgeKey(_DEFINES_METHOD, scope_key, key)) + else: + key = NodeKey(_FUNCTION, rel, node.lineno) + nodes[key] = DefNode(key, node.name, _end_line(node)) + if scope_kind == _MODULE: + edges.add(EdgeKey(_DEFINES, scope_key, key)) + _walk_scope(node.body, _FUNCTION, key, rel, nodes, edges, name_edges) + else: + _walk_scope( + _child_stmts(node), scope_kind, scope_key, rel, nodes, edges, name_edges + ) diff --git a/evals/c_retrieval.py b/evals/c_retrieval.py new file mode 100644 index 000000000..14cfd2444 --- /dev/null +++ b/evals/c_retrieval.py @@ -0,0 +1,118 @@ +# (H) Multi-language retrieval (C). Extends the file-level call-localization +# (H) benchmark to C: for each first-party C function, which files call it. +# (H) cgr's C CALLS edges (reduced to (caller_file, callee_simple_name)) are +# (H) graded against call sites extracted by libclang, over the same first-party +# (H) name universe. libclang resolves the true translation-unit call graph, +# (H) independent of cgr's tree-sitter C frontend (cgr parses C with tree-sitter +# (H) by default; CPP_FRONTEND=libclang is off), so this measures cgr's cross-file +# (H) C call resolution against ground truth (mirrors evals/lua_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import cpp_available, run_c_call_oracle +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.C_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_c_call_edges( + target: Path, +) -> tuple[set[CallEdge], frozenset[str], frozenset[str]]: + return run_c_call_oracle(target) + + +def cgr_c_call_edges( + target: Path, project: str, declared: frozenset[str], covered: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.C_SUFFIXES) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + # (H) Grade only files the oracle parsed cleanly (its authoritative set). + if path is None or path not in covered: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.C_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_c_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.C_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.C_RETRIEVAL_DIFF_PREFIX + ec.C_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of C sources to evaluate call retrieval.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option(help="Directory for c_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not cpp_available(): + logger.error(ls.C_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.C_RETRIEVAL_ORACLE.format(target=target)) + oracle, declared, covered = oracle_c_call_edges(target) + logger.success(ls.C_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + logger.info(ls.C_RETRIEVAL_COVERED.format(count=len(covered))) + + logger.info(ls.C_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_c_call_edges(target, project, declared, covered) + logger.success(ls.C_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_c_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.C_RETRIEVAL_SCORES_FILENAME, + ec.C_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.C_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/calls_trace.py b/evals/calls_trace.py new file mode 100644 index 000000000..2d9483b07 --- /dev/null +++ b/evals/calls_trace.py @@ -0,0 +1,109 @@ +import inspect +import sys +from collections.abc import Callable +from pathlib import Path +from types import CodeType, FrameType + +from . import constants as ec + +_SYNTHETIC_QUALNAME_MARKERS = ( + "", + "", + "", + "", + "", + "", +) +_LOCALS_SEGMENT = "." + +# (H) functools.wraps decorator wrappers: the inner function is named "wrapper" and +# (H) closes over the wrapped callable under one of these free-variable names. cgr +# (H) resolves a call to a decorated function as a call to the function itself (it sees +# (H) through the decorator), so the trace must attribute the generic wrapper frame to +# (H) the function it wraps; otherwise calls would be credited to the recycled wrapper +# (H) node. See evals/README.md ("Decorator-wrapper normalization"). +_WRAPPER_CODE_NAME = "wrapper" +_WRAPPED_FREE_VARS = ("func", "fn", "wrapped", "method", "f") + + +def _code_qn(code: CodeType, target: Path, project_name: str) -> str | None: + try: + file = Path(code.co_filename).resolve() + except (OSError, ValueError): + return None + try: + rel = file.relative_to(target) + except ValueError: + return None + if not file.name.endswith(ec.PY_SUFFIX): + return None + + qualname = code.co_qualname + if any(marker in qualname for marker in _SYNTHETIC_QUALNAME_MARKERS): + return None + qualname = qualname.replace(_LOCALS_SEGMENT, "") + + parts = list(rel.with_suffix("").parts) + if parts and parts[-1] == ec.INIT_STEM: + parts = parts[:-1] + module_dotted = ec.SEP.join([project_name, *parts]) + return ec.SEP.join([module_dotted, qualname]) + + +def _wrapped_code(frame: FrameType) -> CodeType | None: + # (H) Recover the wrapped function's code from a @wraps wrapper frame via its + # (H) closed-over callable, following any __wrapped__ chain to the real function. + code = frame.f_code + if code.co_name != _WRAPPER_CODE_NAME: + return None + for name in _WRAPPED_FREE_VARS: + if name not in code.co_freevars: + continue + candidate = frame.f_locals.get(name) + if not callable(candidate): + continue + unwrapped = inspect.unwrap(candidate) + wrapped_code = getattr(unwrapped, "__code__", None) or getattr( + getattr(unwrapped, "__func__", None), "__code__", None + ) + if isinstance(wrapped_code, CodeType): + return wrapped_code + return None + + +def _frame_qn(frame: FrameType, target: Path, project_name: str) -> str | None: + if (wrapped := _wrapped_code(frame)) is not None and ( + qn := _code_qn(wrapped, target, project_name) + ) is not None: + return qn + return _code_qn(frame.f_code, target, project_name) + + +def trace_calls( + workload: Callable[[], None], target: Path, project_name: str +) -> set[tuple[str, str]]: + target = target.resolve() + edges: set[tuple[str, str]] = set() + + def tracer(frame: FrameType, event: str, arg: object) -> None: + if event != ec.TRACE_CALL_EVENT: + return None + caller = frame.f_back + if caller is None: + return None + callee_qn = _frame_qn(frame, target, project_name) + if callee_qn is None: + return None + caller_qn = _frame_qn(caller, target, project_name) + if caller_qn is None or caller_qn == callee_qn: + return None + edges.add((caller_qn, callee_qn)) + return None + + previous = sys.gettrace() + sys.settrace(tracer) + try: + workload() + finally: + sys.settrace(previous) + return edges diff --git a/evals/cgr_graph.py b/evals/cgr_graph.py new file mode 100644 index 000000000..7d6650475 --- /dev/null +++ b/evals/cgr_graph.py @@ -0,0 +1,601 @@ +from pathlib import Path + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +from . import constants as ec +from .types_defs import DefNode, EdgeKey, GraphData, NameEdge, NodeKey + +_RelTuple = tuple[str, PropertyValue, str, str, PropertyValue] +_NodeId = tuple[str, PropertyValue] + + +class _CapturingIngestor: + def __init__(self) -> None: + self.nodes: dict[_NodeId, PropertyDict] = {} + self.rels: list[_RelTuple] = [] + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[cs.NODE_UNIQUE_CONSTRAINTS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + from_label, _from_key, from_val = from_spec + to_label, _to_key, to_val = to_spec + self.rels.append( + (str(from_label), from_val, str(rel_type), str(to_label), to_val) + ) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +_MODULE_LABEL = cs.NodeLabel.MODULE.value +_FILE_LABEL = cs.NodeLabel.FILE.value +_FOLDER_LABEL = cs.NodeLabel.FOLDER.value +_DEFINES_RELS = frozenset( + { + cs.RelationshipType.DEFINES.value, + cs.RelationshipType.DEFINES_METHOD.value, + } +) +_DEFINITION_LABELS = frozenset( + { + cs.NodeLabel.FUNCTION.value, + cs.NodeLabel.METHOD.value, + cs.NodeLabel.CLASS.value, + cs.NodeLabel.INTERFACE.value, + cs.NodeLabel.ENUM.value, + cs.NodeLabel.TYPE.value, + cs.NodeLabel.UNION.value, + } +) +_INBOUND_DEPENDENT_RELS = frozenset( + { + cs.RelationshipType.CALLS.value, + cs.RelationshipType.INSTANTIATES.value, + cs.RelationshipType.IMPORTS.value, + cs.RelationshipType.INHERITS.value, + cs.RelationshipType.OVERRIDES.value, + } +) + + +def _text(value: PropertyValue) -> str | None: + # (H) path / qualified_name / absolute_path are always textual; narrow the + # (H) general PropertyValue (which includes list[str]) so the row matches the + # (H) ResultValue shape the prune query consumer expects. + return value if isinstance(value, str) else None + + +class _StatefulIngestor: + # (H) A faithful in-memory stand-in for the persistent graph store. Unlike + # (H) _CapturingIngestor it implements the QueryProtocol delete/fetch Cypher + # (H) the incremental updater issues, so a graph mutated by an incremental run + # (H) can be compared against a clean re-index. Only the exact queries cgr + # (H) emits are emulated (matched by identity), nothing more. + def __init__(self) -> None: + self.nodes: dict[_NodeId, PropertyDict] = {} + self.edges: set[_RelTuple] = set() + + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + uid = properties[cs.NODE_UNIQUE_CONSTRAINTS[label]] + self.nodes[(str(label), uid)] = dict(properties) + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + from_label, _from_key, from_val = from_spec + to_label, _to_key, to_val = to_spec + self.edges.add( + (str(from_label), from_val, str(rel_type), str(to_label), to_val) + ) + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + match query: + case cs.CYPHER_ALL_FILE_PATHS: + return self._path_rows(_FILE_LABEL) + case cs.CYPHER_ALL_FOLDER_PATHS: + return self._path_rows(_FOLDER_LABEL) + case cs.CYPHER_INBOUND_EDGES: + raw_paths = params.get(cs.CYPHER_PARAM_PATHS) if params else None + changed: set[str] = ( + set(raw_paths) if isinstance(raw_paths, list) else set() + ) + inbound: list[ResultRow] = [] + for from_label, from_val, rel_type, to_label, to_val in self.edges: + if rel_type not in _INBOUND_DEPENDENT_RELS: + continue + target = self.nodes.get((to_label, to_val)) + caller = self.nodes.get((from_label, from_val)) + if target is None or caller is None: + continue + caller_path = caller.get(cs.KEY_PATH) + if target.get(cs.KEY_PATH) not in changed or caller_path in changed: + continue + inbound.append( + { + cs.KEY_CALLER_LABEL: from_label, + cs.KEY_CALLER_QN: _text(from_val), + cs.KEY_REL: rel_type, + cs.KEY_TARGET_LABEL: to_label, + cs.KEY_TARGET_QN: _text(to_val), + } + ) + return inbound + case cs.CYPHER_ALL_DEFINITION_QNS: + defs: list[ResultRow] = [] + for (label, uid), props in self.nodes.items(): + if label not in _DEFINITION_LABELS: + continue + qn = props.get(cs.KEY_QUALIFIED_NAME, uid) + row: ResultRow = { + cs.KEY_QUALIFIED_NAME: _text(qn), + cs.KEY_LABEL: label, + } + defs.append(row) + return defs + case cs.CYPHER_ALL_MODULE_PATHS_INTERNAL: + rows: list[ResultRow] = [] + for (label, _uid), props in self.nodes.items(): + if label != _MODULE_LABEL or props.get(cs.KEY_IS_EXTERNAL) is True: + continue + row: ResultRow = { + cs.KEY_PATH: _text(props.get(cs.KEY_PATH)), + cs.KEY_QUALIFIED_NAME: _text(props.get(cs.KEY_QUALIFIED_NAME)), + } + rows.append(row) + return rows + case _: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + path = params.get(cs.KEY_PATH) if params else None + match query: + case cs.CYPHER_DELETE_MODULE: + self._delete_module_subtree(path) + case cs.CYPHER_DELETE_FILE: + self._detach_delete(self._nodes_at_path(_FILE_LABEL, path)) + case cs.CYPHER_DELETE_FOLDER: + self._detach_delete(self._nodes_at_path(_FOLDER_LABEL, path)) + case cs.CYPHER_DELETE_ORPHAN_EXTERNAL_MODULES: + self._delete_orphan_external_modules() + case _: + return None + + def _path_rows(self, label: str) -> list[ResultRow]: + rows: list[ResultRow] = [] + for (node_label, _uid), props in self.nodes.items(): + if node_label != label: + continue + row: ResultRow = { + cs.KEY_PATH: _text(props.get(cs.KEY_PATH)), + cs.KEY_ABSOLUTE_PATH: _text(props.get(cs.KEY_ABSOLUTE_PATH)), + } + rows.append(row) + return rows + + def _nodes_at_path(self, label: str, path: PropertyValue) -> set[_NodeId]: + return { + (node_label, uid) + for (node_label, uid), props in self.nodes.items() + if node_label == label and props.get(cs.KEY_PATH) == path + } + + def _delete_module_subtree(self, path: PropertyValue) -> None: + doomed: set[_NodeId] = set() + frontier = list(self._nodes_at_path(_MODULE_LABEL, path)) + while frontier: + node = frontier.pop() + if node in doomed: + continue + doomed.add(node) + for from_label, from_val, rel_type, to_label, to_val in self.edges: + if rel_type in _DEFINES_RELS and (from_label, from_val) == node: + child = (to_label, to_val) + if child not in doomed: + frontier.append(child) + self._detach_delete(doomed) + + def _delete_orphan_external_modules(self) -> None: + incoming = {(to_label, to_val) for _f, _v, _r, to_label, to_val in self.edges} + doomed = { + (label, uid) + for (label, uid), props in self.nodes.items() + if label == _MODULE_LABEL + and props.get(cs.KEY_IS_EXTERNAL) is True + and (label, uid) not in incoming + } + self._detach_delete(doomed) + + def _detach_delete(self, doomed: set[_NodeId]) -> None: + if not doomed: + return + for node in doomed: + self.nodes.pop(node, None) + self.edges = { + edge + for edge in self.edges + if (edge[0], edge[1]) not in doomed and (edge[3], edge[4]) not in doomed + } + + +def _capture(target: Path, project_name: str) -> _CapturingIngestor: + parsers, queries = load_parsers() + ingestor = _CapturingIngestor() + GraphUpdater( + ingestor=ingestor, + repo_path=target, + parsers=parsers, + queries=queries, + project_name=project_name, + ).run(force=True) + return ingestor + + +def extract_cgr_graph(target: Path, project_name: str) -> GraphData: + return _to_graph_data(_capture(target, project_name), project_name) + + +def extract_cgr_calls(target: Path, project_name: str) -> set[tuple[str, str]]: + ingestor = _capture(target, project_name) + calls_value = cs.RelationshipType.CALLS.value + return { + (str(from_val), str(to_val)) + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels + if rel_type == calls_value + } + + +def _lang_node_key( + label: str, props: PropertyDict, suffix: str | tuple[str, ...] +) -> NodeKey | None: + path = props.get(cs.KEY_PATH) + if path is None: + return None + file = str(path) + if not file.endswith(suffix): + return None + raw_start = props.get(cs.KEY_START_LINE) + if not isinstance(raw_start, int | float): + return None + return NodeKey(label, file, int(raw_start)) + + +def extract_cgr_lang_nodes( + target: Path, + project_name: str, + suffix: str | tuple[str, ...], + kind_values: frozenset[str], +) -> dict[NodeKey, DefNode]: + ingestor = _capture(target, project_name) + nodes: dict[NodeKey, DefNode] = {} + for (label, _uid), props in ingestor.nodes.items(): + if label not in kind_values: + continue + key = _lang_node_key(label, props, suffix) + if key is None: + continue + raw_end = props.get(cs.KEY_END_LINE) + end_line = int(raw_end) if isinstance(raw_end, int | float) else 0 + nodes[key] = DefNode(key, str(props.get(cs.KEY_NAME, "")), end_line) + return nodes + + +def _lang_endpoint_key( + label: str, + props: PropertyDict, + suffix: str | tuple[str, ...], + exclude_suffix: str | None = None, +) -> NodeKey | None: + # (H) Resolve any node (incl. the per-file Module, which carries no + # (H) start_line) to a NodeKey so containment edges can join on it. cgr keys + # (H) module-level DEFINES parents at the module node; mirror the ast oracle + # (H) by placing the module at MODULE_START_LINE. + path = props.get(cs.KEY_PATH) + if path is None: + return None + file = str(path) + if not file.endswith(suffix): + return None + if exclude_suffix is not None and file.endswith(exclude_suffix): + return None + raw_start = props.get(cs.KEY_START_LINE) + if label == cs.NodeLabel.MODULE.value: + # (H) The per-file module carries no start line (keyed at line 0); an + # (H) inline module (Rust `mod`) carries its declaration line, which keeps + # (H) it distinct from the file module so nested containment can join. + if isinstance(raw_start, int | float): + return NodeKey(label, file, int(raw_start)) + return NodeKey(label, file, ec.MODULE_START_LINE) + if not isinstance(raw_start, int | float): + return None + return NodeKey(label, file, int(raw_start)) + + +def extract_cgr_lang_graph( + target: Path, + project_name: str, + suffix: str | tuple[str, ...], + kind_values: frozenset[str], + exclude_suffix: str | None = None, +) -> GraphData: + ingestor = _capture(target, project_name) + nodes: dict[NodeKey, DefNode] = {} + by_uid: dict[_NodeId, NodeKey] = {} + for (label, uid), props in ingestor.nodes.items(): + endpoint = _lang_endpoint_key(label, props, suffix, exclude_suffix) + if endpoint is None: + continue + by_uid[(label, uid)] = endpoint + if label not in kind_values: + continue + raw_end = props.get(cs.KEY_END_LINE) + end_line = int(raw_end) if isinstance(raw_end, int | float) else 0 + nodes[endpoint] = DefNode(endpoint, str(props.get(cs.KEY_NAME, "")), end_line) + + edges: set[EdgeKey] = set() + name_edges: set[NameEdge] = set() + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels: + if rel_type in ec.SCORED_EDGE_TYPE_VALUES: + parent = by_uid.get((from_label, from_val)) + child = by_uid.get((to_label, to_val)) + if parent is not None and child is not None: + edges.add(EdgeKey(rel_type, parent, child)) + elif rel_type in ec.INHERITANCE_NAME_EDGE_TYPE_VALUES: + # (H) Inheritance is graded by the base's SIMPLE NAME (cgr's to-value + # (H) is the resolved base qn, or the bare name when unresolved). + source = by_uid.get((from_label, from_val)) + if source is not None: + # (H) Base simple name: cgr's resolved target may be a dotted qn + # (H) (`module.Base`) or a Rust path (`std::io::Read`), so split on + # (H) both `.` and `::`. + flat = str(to_val).replace(cs.SEPARATOR_DOUBLE_COLON, cs.SEPARATOR_DOT) + target_name = flat.rsplit(cs.SEPARATOR_DOT, 1)[-1] + name_edges.add(NameEdge(rel_type, source, target_name)) + return GraphData(nodes=nodes, edges=edges, name_edges=name_edges) + + +def restrict_to_files(graph: GraphData, files: set[str]) -> GraphData: + # (H) Scope a graph to a file universe. A compile_commands.json oracle only + # (H) "sees" files its compiled TUs reach, while cgr indexes the whole tree + # (H) (bundled test deps, uncompiled sources). Grading cgr's out-of-universe + # (H) nodes against that oracle is meaningless, so restrict cgr to the files + # (H) the oracle actually parsed before scoring. Drops only false positives: + # (H) no oracle node lives outside its own universe, so recall is untouched. + nodes = {k: v for k, v in graph.nodes.items() if k.file in files} + edges = {e for e in graph.edges if e.parent.file in files and e.child.file in files} + name_edges = {n for n in graph.name_edges if n.source.file in files} + return GraphData(nodes=nodes, edges=edges, name_edges=name_edges) + + +def extract_cgr_cpp_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.CPP_SUFFIXES, ec.CPP_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_cpp_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.CPP_SUFFIXES, ec.CPP_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_go_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.GO_SUFFIX, ec.GO_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_go_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.GO_SUFFIX, ec.GO_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_rust_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.RS_SUFFIX, ec.RS_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_rust_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.RS_SUFFIX, ec.RS_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_lua_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.LUA_SUFFIX, ec.LUA_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_lua_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.LUA_SUFFIX, ec.LUA_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_php_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.PHP_SUFFIX, ec.PHP_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_php_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.PHP_SUFFIX, ec.PHP_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_java_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.JAVA_SUFFIX, ec.JAVA_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_java_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.JAVA_SUFFIX, ec.JAVA_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_js_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + return extract_cgr_lang_nodes( + target, project_name, ec.JS_SUFFIXES, ec.JS_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_js_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, project_name, ec.JS_SUFFIXES, ec.JS_SCORED_NODE_KIND_VALUES + ) + + +def extract_cgr_ts_graph(target: Path, project_name: str) -> GraphData: + return extract_cgr_lang_graph( + target, + project_name, + ec.TS_SUFFIXES, + ec.TS_SCORED_NODE_KIND_VALUES, + exclude_suffix=ec.TS_DTS_SUFFIX, + ) + + +def extract_cgr_ts_nodes(target: Path, project_name: str) -> dict[NodeKey, DefNode]: + ingestor = _capture(target, project_name) + nodes: dict[NodeKey, DefNode] = {} + for (label, _uid), props in ingestor.nodes.items(): + if label not in ec.TS_SCORED_NODE_KIND_VALUES: + continue + path = props.get(cs.KEY_PATH) + if path is None: + continue + file = str(path) + # (H) Match the oracle: real .ts/.tsx sources, excluding .d.ts type stubs. + if not file.endswith(ec.TS_SUFFIXES) or file.endswith(ec.TS_DTS_SUFFIX): + continue + raw_start = props.get(cs.KEY_START_LINE) + if not isinstance(raw_start, int | float): + continue + key = NodeKey(label, file, int(raw_start)) + raw_end = props.get(cs.KEY_END_LINE) + end_line = int(raw_end) if isinstance(raw_end, int | float) else 0 + nodes[key] = DefNode(key, str(props.get(cs.KEY_NAME, "")), end_line) + return nodes + + +def _node_key(label: str, props: PropertyDict) -> NodeKey | None: + path = props.get(cs.KEY_PATH) + if path is None: + return None + file = str(path) + if not file.endswith(ec.PY_SUFFIX): + return None + if label == cs.NodeLabel.MODULE.value: + return NodeKey(label, file, ec.MODULE_START_LINE) + raw_start = props.get(cs.KEY_START_LINE) + if not isinstance(raw_start, int | float): + return None + return NodeKey(label, file, int(raw_start)) + + +def _edge_allowed(rel_type: str, parent_kind: str) -> bool: + if rel_type == cs.RelationshipType.DEFINES.value: + return parent_kind == cs.NodeLabel.MODULE.value + return parent_kind == cs.NodeLabel.CLASS.value + + +def _internal_target_file(qn: str, internal_modules: dict[str, str]) -> str | None: + parts = qn.split(cs.SEPARATOR_DOT) + while parts: + candidate = cs.SEPARATOR_DOT.join(parts) + if candidate in internal_modules: + return internal_modules[candidate] + parts = parts[:-1] + return None + + +def _to_graph_data(ingestor: _CapturingIngestor, project_name: str) -> GraphData: + nodes: dict[NodeKey, DefNode] = {} + by_uid: dict[_NodeId, NodeKey] = {} + for (label, uid), props in ingestor.nodes.items(): + if label not in ec.SCORED_NODE_KIND_VALUES: + continue + key = _node_key(label, props) + if key is None: + continue + raw_end = props.get(cs.KEY_END_LINE) + end_line = int(raw_end) if isinstance(raw_end, int | float) else 0 + name = str(props.get(cs.KEY_NAME, "")) + nodes[key] = DefNode(key, name, end_line) + by_uid[(label, uid)] = key + + edges: set[EdgeKey] = set() + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels: + if rel_type not in ec.SCORED_EDGE_TYPE_VALUES: + continue + parent = by_uid.get((from_label, from_val)) + child = by_uid.get((to_label, to_val)) + if parent is None or child is None: + continue + if _edge_allowed(rel_type, parent.kind): + edges.add(EdgeKey(rel_type, parent, child)) + + prefix = project_name + cs.SEPARATOR_DOT + # (H) Only real in-repo Python modules count as internal import targets. cgr + # (H) also emits placeholder MODULE nodes for unresolved imports whose path is + # (H) the dotted import name (e.g. "thrift.TTornado", "std.set"); requiring a + # (H) .py path excludes those so IMPORTS is graded against real files only, + # (H) consistent with the .py node filter and the ast oracle. + internal_modules: dict[str, str] = { + str(uid): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if label == cs.NodeLabel.MODULE.value + and props.get(cs.KEY_PATH) + and str(props[cs.KEY_PATH]).endswith(ec.PY_SUFFIX) + and (str(uid) == project_name or str(uid).startswith(prefix)) + } + + name_edges: set[NameEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type not in ec.SCORED_NAME_EDGE_TYPE_VALUES: + continue + source = by_uid.get((from_label, from_val)) + if source is None: + continue + if rel_type == cs.RelationshipType.INHERITS.value: + target = str(to_val).rsplit(cs.SEPARATOR_DOT, 1)[-1] + name_edges.add(NameEdge(rel_type, source, target)) + elif rel_type == cs.RelationshipType.IMPORTS.value: + target_path = _internal_target_file(str(to_val), internal_modules) + if target_path is not None: + name_edges.add(NameEdge(rel_type, source, target_path)) + + return GraphData(nodes=nodes, edges=edges, name_edges=name_edges) diff --git a/evals/cli.py b/evals/cli.py new file mode 100644 index 000000000..b2792aa07 --- /dev/null +++ b/evals/cli.py @@ -0,0 +1,110 @@ +import csv +import json +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger +from rich.console import Console +from rich.table import Table + +from . import constants as ec +from . import logs as ls +from .ast_oracle import extract_oracle_graph +from .cgr_graph import extract_cgr_graph +from .score import score +from .types_defs import ScoreResult + +console = Console() + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory to evaluate (cgr repo source).") + ] = Path(ec.DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for scores.csv and diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + project = project_name or target.name + + logger.info(ls.EXTRACTING_CGR.format(target=target, project=project)) + cgr_graph = extract_cgr_graph(target, project) + logger.success( + ls.CGR_GRAPH_DONE.format(nodes=len(cgr_graph.nodes), edges=len(cgr_graph.edges)) + ) + + logger.info(ls.EXTRACTING_ORACLE.format(target=target)) + oracle_graph = extract_oracle_graph(target, project) + logger.success( + ls.ORACLE_GRAPH_DONE.format( + nodes=len(oracle_graph.nodes), edges=len(oracle_graph.edges) + ) + ) + + result = score(cgr_graph, oracle_graph) + _write_outputs(result, out_dir) + _render(result) + + +def _write_outputs(result: ScoreResult, out_dir: Path) -> None: + out_dir.mkdir(parents=True, exist_ok=True) + scores_path = out_dir / ec.SCORES_FILENAME + with scores_path.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=list(ec.CSV_FIELDS)) + writer.writeheader() + for row in result.rows: + writer.writerow(row) + logger.success(ls.WROTE_SCORES.format(path=scores_path)) + + diff_path = out_dir / ec.DIFF_FILENAME + diff_path.write_text(json.dumps(result.diff, indent=2), encoding="utf-8") + logger.success(ls.WROTE_DIFF.format(path=diff_path)) + + +def _render(result: ScoreResult) -> None: + table = Table(title="cgr L1 structure eval (Python)") + table.add_column("category") + table.add_column("label") + table.add_column("tp", justify="right") + table.add_column("fp", justify="right") + table.add_column("fn", justify="right") + table.add_column("precision", justify="right") + table.add_column("recall", justify="right") + table.add_column("f1", justify="right") + for row in result.rows: + table.add_row( + row["category"], + row["label"], + str(row["tp"]), + str(row["fp"]), + str(row["fn"]), + f"{row['precision']:.4f}", + f"{row['recall']:.4f}", + f"{row['f1']:.4f}", + ) + console.print(table) + + loc = result.location + location_table = Table(title="span (end_line) accuracy on matched defs") + location_table.add_column("matched", justify="right") + location_table.add_column("end_exact", justify="right") + location_table.add_column("end_within_1", justify="right") + location_table.add_column("mean_abs_delta", justify="right") + location_table.add_column("max_abs_delta", justify="right") + location_table.add_row( + str(loc.matched), + str(loc.end_exact), + str(loc.end_within_one), + f"{loc.mean_abs_delta:.4f}", + str(loc.max_abs_delta), + ) + console.print(location_table) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/constants.py b/evals/constants.py new file mode 100644 index 000000000..5fa054b01 --- /dev/null +++ b/evals/constants.py @@ -0,0 +1,559 @@ +from enum import StrEnum + +from codebase_rag import constants as cs + +PY_SUFFIX = ".py" +MODULE_START_LINE = 0 + +SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.MODULE, + cs.NodeLabel.CLASS, + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, +) +SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset(k.value for k in SCORED_NODE_KINDS) +# (H) Span (end_line) grading excludes Module: a module's end_line is the whole +# (H) file, which the ast oracle records as 0, so it is not a meaningful def span. +SPANNED_NODE_KINDS_TUPLE: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.CLASS, + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, +) +SPANNED_NODE_KINDS: frozenset[str] = frozenset( + k.value for k in SPANNED_NODE_KINDS_TUPLE +) + +SCORED_EDGE_TYPES: tuple[cs.RelationshipType, ...] = ( + cs.RelationshipType.DEFINES, + cs.RelationshipType.DEFINES_METHOD, +) +SCORED_EDGE_TYPE_VALUES: frozenset[str] = frozenset(e.value for e in SCORED_EDGE_TYPES) + +# (H) L2 dependency edges scored by name/path rather than node location: +# (H) INHERITS by base simple name; IMPORTS by in-repo target file path (internal +# (H) module dependency graph only; external targets are DEPENDS_ON_EXTERNAL). +SCORED_NAME_EDGE_TYPES: tuple[cs.RelationshipType, ...] = ( + cs.RelationshipType.INHERITS, + cs.RelationshipType.IMPORTS, +) +INIT_STEM = "__init__" +SEP = cs.SEPARATOR_DOT +TRACE_CALL_EVENT = "call" +L3_DIFF_FILENAME = "calls_diff.json" +L3_WORKSPACE = "l3_workspace" +SCORED_NAME_EDGE_TYPE_VALUES: frozenset[str] = frozenset( + e.value for e in SCORED_NAME_EDGE_TYPES +) +DIFF_NAME_EDGE_PREFIX = "name_edge:" +NAME_EDGE_REPR = "{rel} {sfile}:{sstart} -> {target}" + +IGNORE_DIRS: frozenset[str] = frozenset( + { + ".git", + ".venv", + "venv", + "__pycache__", + "build", + "dist", + "site", + "node_modules", + ".ruff_cache", + ".pytest_cache", + ".mypy_cache", + ".ty_cache", + } +) +EGG_INFO_SUFFIX = ".egg-info" + + +class Category(StrEnum): + NODE = "node" + EDGE = "edge" + SPAN = "span" + RETRIEVAL = "retrieval" + + +AGGREGATE_LABEL = "ALL" + +# (H) Span grading: among nodes matched by (kind, file, start), how often cgr's +# (H) end_line agrees with the oracle's. Surfaced as its own category so a wrong +# (H) node span is visible even when node identity is already 1.0. +DIFF_SPAN_PREFIX = "span:" +SPAN_REPR = "{kind} {file}:{start}-{end}" + +CSV_FIELDS: tuple[str, ...] = ( + "category", + "label", + "tp", + "fp", + "fn", + "precision", + "recall", + "f1", +) +LEFT_COLUMNS: frozenset[str] = frozenset({"category", "label"}) + +DEFAULT_TARGET = "codebase_rag" +DEFAULT_OUT_DIR = "evals/results" +SCORES_FILENAME = "scores.csv" +DIFF_FILENAME = "diff.json" + +NODE_REPR = "{kind} {file}:{start} {name}" +EDGE_REPR = "{rel} {pfile}:{pstart} -> {cfile}:{cstart}" +DIFF_NODE_PREFIX = "node:" +DIFF_EDGE_PREFIX = "edge:" + +ROUND_DIGITS = 4 + +# (H) Go structure eval: cgr nodes graded against the go/ast oracle +# (H) (evals/oracles/go_ast.go), joined on (kind, file, start_line). +GO_SUFFIX = ".go" +GO_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, + cs.NodeLabel.INTERFACE, + cs.NodeLabel.TYPE, +) +GO_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in GO_SCORED_NODE_KINDS +) +GO_ORACLE_DIRNAME = "oracles" +GO_ORACLE_GO_FILE = "go_ast.go" +GO_BIN = "go" +GO_RUN = "run" +GO_MODULE_ENV = "GO111MODULE" +GO_MODULE_OFF = "off" +GO_DEFAULT_TARGET = "." +GO_SCORES_FILENAME = "go_scores.csv" +GO_DIFF_FILENAME = "go_diff.json" + +# (H) Multi-language retrieval (Go): file-level call localization for a second +# (H) language, cgr's Go CALLS vs go/ast call sites over the same first-party name +# (H) universe. The go/ast oracle is independent of cgr's tree-sitter parser. +ORACLE_KEY_CALLS = "calls" +GO_RETRIEVAL_SCORES_FILENAME = "go_retrieval_scores.csv" +GO_RETRIEVAL_DIFF_FILENAME = "go_retrieval_diff.json" +GO_RETRIEVAL_DIFF_PREFIX = "go-retrieval:" +GO_RETRIEVAL_LABEL = "graph" +GO_RETRIEVAL_TITLE = "cgr multi-language retrieval: Go CALLS vs go/ast oracle" +GO_CALL_EDGE_REPR = "{file} -> {name}" + +RS_SUFFIX = ".rs" +RUST_DEFAULT_TARGET = "." +RUST_RETRIEVAL_SCORES_FILENAME = "rust_retrieval_scores.csv" +RUST_RETRIEVAL_DIFF_FILENAME = "rust_retrieval_diff.json" +RUST_RETRIEVAL_DIFF_PREFIX = "rust-retrieval:" +RUST_RETRIEVAL_LABEL = "graph" +RUST_RETRIEVAL_TITLE = "cgr multi-language retrieval: Rust CALLS vs syn oracle" +RUST_CALL_EDGE_REPR = "{file} -> {name}" + +JAVA_DEFAULT_TARGET = "." +JAVA_RETRIEVAL_SCORES_FILENAME = "java_retrieval_scores.csv" +JAVA_RETRIEVAL_DIFF_FILENAME = "java_retrieval_diff.json" +JAVA_RETRIEVAL_DIFF_PREFIX = "java-retrieval:" +JAVA_RETRIEVAL_LABEL = "graph" +JAVA_RETRIEVAL_TITLE = "cgr multi-language retrieval: Java CALLS vs javac oracle" +JAVA_CALL_EDGE_REPR = "{file} -> {name}" + +TS_DEFAULT_TARGET = "." +TS_RETRIEVAL_SCORES_FILENAME = "ts_retrieval_scores.csv" +TS_RETRIEVAL_DIFF_FILENAME = "ts_retrieval_diff.json" +TS_RETRIEVAL_DIFF_PREFIX = "ts-retrieval:" +TS_RETRIEVAL_LABEL = "graph" +TS_RETRIEVAL_TITLE = "cgr multi-language retrieval: TypeScript CALLS vs tsc oracle" +TS_CALL_EDGE_REPR = "{file} -> {name}" + +PHP_DEFAULT_TARGET = "." +PHP_RETRIEVAL_SCORES_FILENAME = "php_retrieval_scores.csv" +PHP_RETRIEVAL_DIFF_FILENAME = "php_retrieval_diff.json" +PHP_RETRIEVAL_DIFF_PREFIX = "php-retrieval:" +PHP_RETRIEVAL_LABEL = "graph" +PHP_RETRIEVAL_TITLE = "cgr multi-language retrieval: PHP CALLS vs php-parser oracle" +PHP_CALL_EDGE_REPR = "{file} -> {name}" + +LUA_DEFAULT_TARGET = "." +LUA_RETRIEVAL_SCORES_FILENAME = "lua_retrieval_scores.csv" +LUA_RETRIEVAL_DIFF_FILENAME = "lua_retrieval_diff.json" +LUA_RETRIEVAL_DIFF_PREFIX = "lua-retrieval:" +LUA_RETRIEVAL_LABEL = "graph" +LUA_RETRIEVAL_TITLE = "cgr multi-language retrieval: Lua CALLS vs luaparse oracle" +LUA_CALL_EDGE_REPR = "{file} -> {name}" + +C_DEFAULT_TARGET = "." +C_SOURCE_GLOB = "*.c" +C_HEADER_GLOB = "*.h" +C_SUFFIXES: tuple[str, ...] = (".c", ".h") +CLANG_INCLUDE_FLAG = "-I" +CLANG_C_STD = "-std=c11" +CLANG_ISYSROOT_FLAG = "-isysroot" +CLANG_ISYSTEM_FLAG = "-isystem" +CLANG_INCLUDE_DIR = "include" +CLANG_SEVERITY_ERROR = 3 +XCRUN_SDK_PATH_CMD: tuple[str, ...] = ("xcrun", "--show-sdk-path") +CLANG_RESOURCE_DIR_CMD: tuple[str, ...] = ("clang", "-print-resource-dir") +C_RETRIEVAL_SCORES_FILENAME = "c_retrieval_scores.csv" +C_RETRIEVAL_DIFF_FILENAME = "c_retrieval_diff.json" +C_RETRIEVAL_DIFF_PREFIX = "c-retrieval:" +C_RETRIEVAL_LABEL = "graph" +C_RETRIEVAL_TITLE = "cgr multi-language retrieval: C CALLS vs libclang oracle" +C_CALL_EDGE_REPR = "{file} -> {name}" + +CPP_SOURCE_GLOBS: tuple[str, ...] = ("*.cc", "*.cpp", "*.cxx") +CPP_HEADER_GLOBS: tuple[str, ...] = ("*.h", "*.hpp", "*.hh", "*.hxx") +CPP_SUFFIXES: tuple[str, ...] = (".cc", ".cpp", ".cxx", ".h", ".hpp", ".hh", ".hxx") +CLANG_CPP_STD = "-std=c++17" +CLANG_CPP_LANG_FLAG = "-x" +CLANG_CPP_LANG = "c++" +CLANG_DEFINE_FLAG = "-D" +# (H) Apple ships a libclang whose version matches the active macOS SDK's libc++, +# (H) which the pip `libclang` wheel does not; C++ standard headers need that match +# (H) to parse. Probed in order; first existing path wins, else the bundled default. +LIBCLANG_CANDIDATES: tuple[str, ...] = ( + "/Library/Developer/CommandLineTools/usr/lib/libclang.dylib", +) +# (H) libc++ headers live under /usr/include/c++/v1 and MUST precede the clang +# (H) builtin resource headers, else libc++'s finds the C first. +CLANG_LIBCXX_SUBPATH = "usr/include/c++/v1" +CPP_RETRIEVAL_SCORES_FILENAME = "cpp_retrieval_scores.csv" +CPP_RETRIEVAL_DIFF_FILENAME = "cpp_retrieval_diff.json" +CPP_RETRIEVAL_DIFF_PREFIX = "cpp-retrieval:" +CPP_RETRIEVAL_LABEL = "graph" +CPP_RETRIEVAL_TITLE = "cgr multi-language retrieval: C++ CALLS vs libclang oracle" +CPP_CALL_EDGE_REPR = "{file} -> {name}" + +# (H) Semantic-search relevance eval: does cgr's embedding ranking retrieve the +# (H) right function for a natural-language query? Uses cgr's own embedder over +# (H) function source extracted from the captured graph; graded as recall@k on +# (H) controlled fixtures whose query->function relevance is unambiguous. +SEMANTIC_TOP_K = 3 +SEMANTIC_SCORES_FILENAME = "semantic_scores.csv" +SEMANTIC_DIFF_FILENAME = "semantic_diff.json" +SEMANTIC_DIFF_PREFIX = "semantic:" +SEMANTIC_LABEL = "recall-at-k" +SEMANTIC_CASE_REPR = "{query} => {expected}" +SEMANTIC_TITLE = "cgr semantic-search eval: query->function recall@k" + +# (H) Static CALLS eval: function-level call recall. The oracle resolves only the +# (H) unambiguous direct calls (a bare-name call to a function reachable via a +# (H) first-party import or a same-module top-level def) to (caller_qn, callee_qn), +# (H) using ast import resolution rather than cgr's trie. Method / attribute / +# (H) dynamic calls need type inference and are out of the oracle's scope, so only +# (H) recall is graded: every statically-certain call must be in cgr's graph. +STATIC_CALLS_DEFAULT_TARGET = "codebase_rag" +STATIC_CALLS_SCORES_FILENAME = "static_calls_scores.csv" +STATIC_CALLS_DIFF_FILENAME = "static_calls_diff.json" +STATIC_CALLS_DIFF_PREFIX = "static-calls:" +STATIC_CALLS_LABEL = "direct-call-recall" +STATIC_CALL_EDGE_REPR = "{caller} -> {callee}" +STATIC_CALLS_TITLE = "cgr static-calls eval: function-level direct-call recall" +ORACLE_KEY_KIND = "kind" +ORACLE_KEY_FILE = "file" +ORACLE_KEY_LINE = "line" +ORACLE_KEY_END_LINE = "end_line" +ORACLE_KEY_NAME = "name" +# (H) Edge-payload keys: an oracle that grades containment edges emits a +# (H) {nodes: [...], edges: [...]} object, each edge carrying rel + parent/child +# (H) node references joined against cgr on (kind, file, line). +ORACLE_KEY_NODES = "nodes" +ORACLE_KEY_EDGES = "edges" +ORACLE_KEY_REL = "rel" +ORACLE_KEY_PARENT = "parent" +ORACLE_KEY_CHILD = "child" +# (H) Name-edge payload keys: an inheritance edge carries its source node ref and +# (H) the base type's SIMPLE NAME (cgr resolves bases by simple name, not qn). +ORACLE_KEY_NAME_EDGES = "name_edges" +ORACLE_KEY_SOURCE = "source" +ORACLE_KEY_TARGET_NAME = "target_name" + +# (H) Inheritance edges graded by base simple name: INHERITS (extends/superclass +# (H) and superinterface) and IMPLEMENTS (a class implementing an interface). +INHERITANCE_NAME_EDGE_TYPES: tuple[cs.RelationshipType, ...] = ( + cs.RelationshipType.INHERITS, + cs.RelationshipType.IMPLEMENTS, +) +INHERITANCE_NAME_EDGE_TYPE_VALUES: frozenset[str] = frozenset( + e.value for e in INHERITANCE_NAME_EDGE_TYPES +) + +# (H) Rust structure eval: cgr nodes graded against the syn oracle +# (H) (evals/oracles/rs_oracle), joined on (kind, file, start_line). +RS_SUFFIX = ".rs" +RS_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, + cs.NodeLabel.INTERFACE, + cs.NodeLabel.ENUM, + cs.NodeLabel.UNION, + cs.NodeLabel.TYPE, +) +RS_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in RS_SCORED_NODE_KINDS +) +RS_ORACLE_DIRNAME = "rs_oracle" +CARGO_BIN = "cargo" +CARGO_RUN = "run" +CARGO_RELEASE = "--release" +CARGO_MANIFEST = "--manifest-path" +CARGO_QUIET = "-q" +CARGO_ARG_SEP = "--" +RS_SCORES_FILENAME = "rs_scores.csv" +RS_DIFF_FILENAME = "rs_diff.json" + +# (H) TypeScript structure eval: cgr nodes graded against the TS-compiler-API +# (H) oracle (evals/oracles/ts_oracle), joined on (kind, file, start_line). +TS_SUFFIXES: tuple[str, ...] = (".ts", ".tsx") +TS_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, + cs.NodeLabel.INTERFACE, + cs.NodeLabel.ENUM, + cs.NodeLabel.TYPE, +) +TS_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in TS_SCORED_NODE_KINDS +) +TS_ORACLE_DIRNAME = "ts_oracle" +TS_ORACLE_SCRIPT = "ts_ast.js" +NODE_BIN = "node" +NPM_BIN = "npm" +NPM_INSTALL = "install" +NPM_FLAGS: tuple[str, ...] = ("--no-audit", "--no-fund") +NODE_MODULES_DIRNAME = "node_modules" +TS_DTS_SUFFIX = ".d.ts" +TS_SCORES_FILENAME = "ts_scores.csv" +TS_DIFF_FILENAME = "ts_diff.json" + +# (H) JavaScript structure eval: same TS-compiler-API oracle, run over .js/.jsx. +JS_SUFFIXES: tuple[str, ...] = (".js", ".jsx") +JS_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, +) +JS_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in JS_SCORED_NODE_KINDS +) +JS_SCORES_FILENAME = "js_scores.csv" +JS_DIFF_FILENAME = "js_diff.json" + +# (H) Java structure eval: cgr nodes graded against the JDK Compiler Tree API +# (H) oracle (evals/oracles/java_oracle/Oracle.java), joined on (kind, file, line). +JAVA_SUFFIX = ".java" +JAVA_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, + cs.NodeLabel.INTERFACE, + cs.NodeLabel.ENUM, +) +JAVA_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in JAVA_SCORED_NODE_KINDS +) +JAVA_ORACLE_DIRNAME = "java_oracle" +JAVA_ORACLE_SOURCE = "Oracle.java" +JAVA_ORACLE_CLASS = "Oracle" +JAVAC_BIN = "javac" +JAVA_BIN = "java" +JAVA_CP_FLAG = "-cp" +JAVA_SCORES_FILENAME = "java_scores.csv" +JAVA_DIFF_FILENAME = "java_diff.json" + +# (H) Lua structure eval: cgr nodes graded against a luaparse oracle. Lua has no +# (H) classes, so every function (global/local/table/method/anonymous) is Function. +LUA_SUFFIX = ".lua" +LUA_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = (cs.NodeLabel.FUNCTION,) +LUA_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in LUA_SCORED_NODE_KINDS +) +LUA_ORACLE_DIRNAME = "lua_oracle" +LUA_ORACLE_SCRIPT = "lua_ast.js" +LUA_SCORES_FILENAME = "lua_scores.csv" +LUA_DIFF_FILENAME = "lua_diff.json" + +# (H) PHP structure eval: cgr nodes graded against a php-parser oracle. +PHP_SUFFIX = ".php" +PHP_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, + cs.NodeLabel.INTERFACE, + cs.NodeLabel.ENUM, +) +PHP_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in PHP_SCORED_NODE_KINDS +) +PHP_ORACLE_DIRNAME = "php_oracle" +PHP_ORACLE_SCRIPT = "php_ast.js" +PHP_SCORES_FILENAME = "php_scores.csv" +PHP_DIFF_FILENAME = "php_diff.json" + +# (H) C/C++ structure eval: cgr nodes graded against a libclang oracle driven by a +# (H) compile_commands.json, so includes and macros resolve to the true AST (which +# (H) tree-sitter cannot do). Joined on (kind, file, start_line). +CPP_SUFFIXES: tuple[str, ...] = ( + ".cpp", + ".cc", + ".cxx", + ".c", + ".hpp", + ".hh", + ".hxx", + ".h", +) +CPP_SCORED_NODE_KINDS: tuple[cs.NodeLabel, ...] = ( + cs.NodeLabel.FUNCTION, + cs.NodeLabel.METHOD, + cs.NodeLabel.CLASS, +) +CPP_SCORED_NODE_KIND_VALUES: frozenset[str] = frozenset( + k.value for k in CPP_SCORED_NODE_KINDS +) +CPP_COMPDB_FILENAME = "compile_commands.json" +CPP_SCORES_FILENAME = "cpp_scores.csv" +CPP_DIFF_FILENAME = "cpp_diff.json" +CPP_DEFAULT_TARGET = "." + +# (H) Retrieval benchmark: does graph-augmented retrieval find the code that +# (H) calls a symbol better than grep? The unit is a file-level call edge +# (H) (caller_file, callee_simple_name): "file F contains a call to symbol S". +# (H) This mirrors the GitLab GKG "did it open the right file" localization +# (H) signal, and all conditions are scored against the same Python ast oracle +# (H) over the same file and first-party symbol universe. + + +class GrepMode(StrEnum): + # (H) NAME matches the bare symbol token anywhere (a user's first grep); CALL + # (H) matches the symbol immediately followed by `(` (a call-tuned grep). Both + # (H) still over-match: NAME on imports/aliases/comments, CALL on def sites. + NAME = "name" + CALL = "call" + + +class RetrievalCondition(StrEnum): + GRAPH = "graph" + GREP_NAME = "grep_name" + GREP_CALL = "grep_call" + + +RG_BIN = "rg" +RG_ONLY_MATCHING = "-o" +RG_WITH_FILENAME = "-H" +RG_NO_LINE_NUMBER = "--no-line-number" +RG_NO_HEADING = "--no-heading" +# (H) --null separates the path from the match with a NUL byte instead of `:`, so +# (H) a path containing a colon is parsed intact. -f - reads the patterns from +# (H) stdin (one per line), so the full symbol universe never lands in argv and +# (H) cannot trip the OS per-argument length limit (128KB on Linux, 32KB on +# (H) Windows). The pattern lines are ORed, equivalent to a single alternation. +RG_NULL = "--null" +RG_PATTERN_FILE_FLAG = "-f" +RG_STDIN = "-" +RG_GLOB_FLAG = "-g" +RG_PY_GLOB = "*.py" +RG_SEARCH_PATH = "." +RG_NULL_SEP = "\x00" +RG_OK_RETURNCODES: frozenset[int] = frozenset({0, 1}) + +PATTERN_SEP = "\n" +GREP_NAME_TEMPLATE = r"\b{name}\b" +GREP_CALL_TEMPLATE = r"\b{name}\s*\(" +IDENTIFIER_PATTERN = r"[A-Za-z_][A-Za-z0-9_]*" + +RETRIEVAL_DEFAULT_TARGET = "codebase_rag" +RETRIEVAL_SCORES_FILENAME = "retrieval_scores.csv" +RETRIEVAL_DIFF_FILENAME = "retrieval_diff.json" +RETRIEVAL_DIFF_PREFIX = "retrieval:" +RETRIEVAL_TITLE = "cgr retrieval eval: graph vs grep (file-level call localization)" + +# (H) Incremental-update eval: index, apply a semantically neutral edit (a +# (H) trailing comment that changes the file hash but not its AST), run an +# (H) incremental update, then compare against a clean forced re-index of the +# (H) same on-disk state. The clean re-index is the oracle; any divergence is an +# (H) incremental-update correctness bug. +INCREMENTAL_DEFAULT_TARGET = "codebase_rag" +INCREMENTAL_SCORES_FILENAME = "incremental_scores.csv" +INCREMENTAL_DIFF_FILENAME = "incremental_diff.json" +INCREMENTAL_NODE_DIFF_PREFIX = "incremental-node:" +INCREMENTAL_EDGE_DIFF_PREFIX = "incremental-edge:" +INCREMENTAL_TITLE = "cgr incremental-update eval: incremental vs clean re-index" +INCREMENTAL_WORK_DIRNAME = "repo" +INCREMENTAL_TMP_PREFIX = "cgr-incremental-eval-" +NEUTRAL_EDIT_COMMENT = "\n# cgr-incremental-eval neutral edit\n" +INCREMENTAL_MTIME_BUMP = 10.0 +INCREMENTAL_DEFAULT_SAMPLE = 25 +INCREMENTAL_DIFF_SAMPLE_CAP = 50 +STATE_NODE_REPR = "{label} {uid}" +STATE_EDGE_REPR = "{rel} {fl}:{fv} -> {tl}:{tv}" + +# (H) Import-resolution eval: classify each module's imports by top-level package +# (H) as internal (first-party, resolves into the repo) or external (stdlib or +# (H) third-party), against an ast + filesystem oracle. Surfaces internal/external +# (H) misclassification (issue #498). Both sides reduce an import to its top-level +# (H) package name, a unit each computes independently, so the oracle is clean. +IMPORTS_DEFAULT_TARGET = "codebase_rag" +IMPORTS_SCORES_FILENAME = "imports_scores.csv" +IMPORTS_DIFF_FILENAME = "imports_diff.json" +IMPORTS_DIFF_PREFIX = "imports:" +IMPORTS_ALL_LABEL = "imports-all" +IMPORTS_INTERNAL_LABEL = "imports-internal" +IMPORTS_EXTERNAL_LABEL = "imports-external" +IMPORT_DEP_REPR = "{file} -> {top} (external={external})" +IMPORTS_TITLE = "cgr import-resolution eval: internal vs external classification" +# (H) `__future__` is a compiler directive, not a dependency; cgr ignores it, so +# (H) the oracle excludes it to avoid false external-import misses. +IMPORTS_IGNORED_TOPS: frozenset[str] = frozenset({"__future__"}) + +# (H) Inheritance eval: grade resolved INHERITS (subclass_qn -> base_qn) and +# (H) OVERRIDES (subclass_qn, base_qn, method) against an ast oracle that resolves +# (H) bases via same-module and from-import only, skipping ambiguous/attribute/ +# (H) external bases. Goes beyond L1, which checks INHERITS by base simple name. +INHERITANCE_DEFAULT_TARGET = "codebase_rag" +INHERITANCE_SCORES_FILENAME = "inheritance_scores.csv" +INHERITANCE_DIFF_FILENAME = "inheritance_diff.json" +INHERITANCE_DIFF_PREFIX = "inheritance:" +INHERITS_LABEL = "inherits-resolved" +OVERRIDES_LABEL = "overrides" +INHERITS_EDGE_REPR = "{sub} -> {base}" +OVERRIDES_EDGE_REPR = "{sub} -> {base} .{method}" +INHERITANCE_TITLE = "cgr inheritance eval: resolved INHERITS and OVERRIDES" +STAR_IMPORT = "*" +SEP_NUL = "\x00" + +# (H) Dead-code eval: reproduce cgr's reachability (build_dead_code_query) over the +# (H) captured graph and grade the reported unreachable set against controlled +# (H) fixtures whose dead functions are known by construction. Surfaces missing +# (H) CALLS edges (a live function wrongly flagged dead). The reachability is a +# (H) faithful re-implementation of the documented query, unit-tested on hand-built +# (H) graphs, so a fixture mismatch points at cgr's graph, not the scorer. +DEAD_CODE_DEFAULT_TARGET = "codebase_rag" +DEAD_CODE_SCORES_FILENAME = "dead_code_scores.csv" +DEAD_CODE_DIFF_FILENAME = "dead_code_diff.json" +DEAD_CODE_DIFF_PREFIX = "dead-code:" +DEAD_CODE_LABEL = "dead-code" +DEAD_CODE_TITLE = "cgr dead-code eval: reachability over the captured graph" +DECORATOR_AT = "@" +DECORATOR_CALL_OPEN = "(" + +# (H) Cross-project (monorepo) eval: does cgr resolve CALLS and IMPORTS across +# (H) top-level package boundaries? The single-package corpora the other evals use +# (H) never exercise this; cgr's headline is monorepo RAG. Graded on synthetic +# (H) multi-package fixtures with known cross-package edges. +CROSS_PROJECT_DIFF_PREFIX = "cross-project:" +CROSS_CALLS_LABEL = "cross-package-calls" +CROSS_IMPORTS_LABEL = "cross-package-imports" +CROSS_EDGE_REPR = "{src} -> {dst}" + +# (H) Instantiation eval: file-level constructor localization. For each first-party +# (H) class, which files instantiate it. cgr INSTANTIATES edges vs an ast oracle of +# (H) calls whose callee simple name is a first-party class. Isolates the +# (H) INSTANTIATES signal the retrieval eval folds into CALLS. +INSTANTIATION_DEFAULT_TARGET = "codebase_rag" +INSTANTIATION_SCORES_FILENAME = "instantiation_scores.csv" +INSTANTIATION_DIFF_FILENAME = "instantiation_diff.json" +INSTANTIATION_DIFF_PREFIX = "instantiation:" +INSTANTIATES_LABEL = "instantiates" +INSTANTIATION_EDGE_REPR = "{file} -> {cls}" +INSTANTIATION_TITLE = "cgr instantiation eval: file-level INSTANTIATES vs ast oracle" diff --git a/evals/cpp_l1.py b/evals/cpp_l1.py new file mode 100644 index 000000000..840bf3ff3 --- /dev/null +++ b/evals/cpp_l1.py @@ -0,0 +1,61 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_cpp_graph, restrict_to_files +from .oracles import cpp_available, run_cpp_oracle +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (C/C++ vs libclang)" + + +def main( + target: Annotated[ + Path, + typer.Option(help="Directory of C/C++ sources with a compile_commands.json."), + ] = Path(ec.CPP_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for cpp_scores.csv and cpp_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + if not cpp_available() or not (target / ec.CPP_COMPDB_FILENAME).is_file(): + logger.error( + ls.CPP_ORACLE_MISSING.format(compdb=ec.CPP_COMPDB_FILENAME, target=target) + ) + raise typer.Exit(code=1) + + project = project_name or target.name + + logger.info(ls.CPP_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_cpp_graph(target, project) + logger.success(ls.CPP_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.CPP_EXTRACTING_ORACLE.format(target=target)) + oracle = run_cpp_oracle(target) + logger.success(ls.CPP_ORACLE_DONE.format(count=len(oracle.nodes))) + + # (H) The compile_commands.json defines the gradeable universe: the oracle only + # (H) sees files its compiled TUs reach, so scope cgr to those files before + # (H) scoring. Without this, cgr's whole-tree index (bundled test deps, + # (H) uncompiled sources) is graded as false positives against a partial oracle. + cgr = restrict_to_files(cgr, {key.file for key in oracle.nodes}) + logger.success(ls.CPP_CGR_SCOPED.format(count=len(cgr.nodes))) + + result = score_structure( + cgr, oracle, ec.CPP_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.CPP_SCORES_FILENAME, ec.CPP_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/cpp_retrieval.py b/evals/cpp_retrieval.py new file mode 100644 index 000000000..3aab7b472 --- /dev/null +++ b/evals/cpp_retrieval.py @@ -0,0 +1,123 @@ +# (H) Multi-language retrieval (C++). Extends the file-level call-localization +# (H) benchmark to C++: for each first-party C++ function/method, which files call +# (H) it. cgr's C++ CALLS edges (reduced to (caller_file, callee_simple_name)) are +# (H) graded against call sites extracted by libclang, over the same first-party +# (H) name universe. libclang resolves the true translation-unit call graph, +# (H) independent of cgr's tree-sitter C++ frontend (cgr parses C++ with tree-sitter +# (H) by default; CPP_FRONTEND=libclang is off), so this measures cgr's cross-file +# (H) C++ call resolution against ground truth (mirrors evals/c_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import cpp_available, run_cpp_call_oracle +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.CPP_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_cpp_call_edges( + target: Path, extra_defines: tuple[str, ...] = () +) -> tuple[set[CallEdge], frozenset[str], frozenset[str]]: + return run_cpp_call_oracle(target, extra_defines) + + +def cgr_cpp_call_edges( + target: Path, project: str, declared: frozenset[str], covered: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.CPP_SUFFIXES) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + # (H) Grade only files the oracle parsed cleanly (its authoritative set). + if path is None or path not in covered: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.CPP_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_cpp_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.CPP_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.CPP_RETRIEVAL_DIFF_PREFIX + ec.CPP_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, + typer.Option(help="Directory of C++ sources to evaluate call retrieval."), + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + define: Annotated[ + list[str], + typer.Option(help="Preprocessor macro the build would supply, e.g. NAME=1."), + ] = [], + out_dir: Annotated[ + Path, + typer.Option(help="Directory for cpp_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not cpp_available(): + logger.error(ls.CPP_RETRIEVAL_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.CPP_RETRIEVAL_ORACLE.format(target=target)) + oracle, declared, covered = oracle_cpp_call_edges(target, tuple(define)) + logger.success(ls.CPP_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + logger.info(ls.CPP_RETRIEVAL_COVERED.format(count=len(covered))) + + logger.info(ls.CPP_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_cpp_call_edges(target, project, declared, covered) + logger.success(ls.CPP_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_cpp_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.CPP_RETRIEVAL_SCORES_FILENAME, + ec.CPP_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.CPP_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/cross_project.py b/evals/cross_project.py new file mode 100644 index 000000000..206288bd4 --- /dev/null +++ b/evals/cross_project.py @@ -0,0 +1,74 @@ +# (H) Cross-project (monorepo) eval. Every other eval runs on a single top-level +# (H) package, so none checks that cgr resolves references that cross top-level +# (H) package boundaries -- the monorepo case cgr is built for. This extracts +# (H) cgr's CALLS and IMPORTS edges whose endpoints live in different top-level +# (H) packages and grades them, on synthetic multi-package fixtures whose cross +# (H) edges are known by construction. +from pathlib import Path + +from codebase_rag import constants as cs + +from . import constants as ec +from .cgr_graph import _capture +from .score import _prf +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +_MODULE = cs.NodeLabel.MODULE.value +_CALLS = cs.RelationshipType.CALLS.value +_IMPORTS = cs.RelationshipType.IMPORTS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +Edge = tuple[str, str] + + +def _top_package(qn: str, project: str) -> str | None: + # (H) qn is project..; the top-level package is the segment + # (H) right after the project root. Bare project-level modules have none. + parts = qn.split(cs.SEPARATOR_DOT) + if len(parts) >= 3 and parts[0] == project: + return parts[1] + return None + + +def cgr_cross_package(target: Path, project: str) -> tuple[set[Edge], set[Edge]]: + ingestor = _capture(target, project) + calls: set[Edge] = set() + imports: set[Edge] = set() + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels: + src, dst = str(from_val), str(to_val) + from_top = _top_package(src, project) + to_top = _top_package(dst, project) + if from_top is None or to_top is None or from_top == to_top: + continue + if rel_type == _CALLS: + calls.add((src, dst)) + elif rel_type == _IMPORTS and from_label == _MODULE and to_label == _MODULE: + imports.add((src, dst)) + return calls, imports + + +def _edge_repr(edge: Edge) -> str: + return ec.CROSS_EDGE_REPR.format(src=edge[0], dst=edge[1]) + + +def _bucket(cgr: set[Edge], oracle: set[Edge]) -> DiffBucket: + return DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + + +def score_cross_project( + cgr: tuple[set[Edge], set[Edge]], oracle: tuple[set[Edge], set[Edge]] +) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + for label, cgr_set, oracle_set in ( + (ec.CROSS_CALLS_LABEL, cgr[0], oracle[0]), + (ec.CROSS_IMPORTS_LABEL, cgr[1], oracle[1]), + ): + row = _prf(ec.Category.EDGE.value, label, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.CROSS_PROJECT_DIFF_PREFIX + label] = _bucket(cgr_set, oracle_set) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) diff --git a/evals/dead_code.py b/evals/dead_code.py new file mode 100644 index 000000000..7489a7a15 --- /dev/null +++ b/evals/dead_code.py @@ -0,0 +1,197 @@ +# (H) Dead-code eval. cgr's `dead-code` command reports functions/methods +# (H) unreachable from any entry point via a Cypher reachability query +# (H) (build_dead_code_query). The deterministic in-memory harness cannot run that +# (H) query against a database, so this faithfully re-implements its reachability +# (H) over the captured graph and grades the result on controlled fixtures whose +# (H) dead set is known by construction. The reachability is unit-tested on +# (H) hand-built graphs, so a fixture mismatch indicts cgr's CALLS graph (e.g. a +# (H) missing edge flagging a live function as dead), not the scorer. +import json +from collections import defaultdict +from pathlib import Path +from typing import Annotated, NamedTuple + +import typer +from loguru import logger + +from codebase_rag import constants as cs +from codebase_rag.types_defs import PropertyDict, PropertyValue + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .score import _prf +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.DEAD_CODE_DEFAULT_TARGET) + +_MODULE = cs.NodeLabel.MODULE.value +_FUNCTION = cs.NodeLabel.FUNCTION.value +_METHOD = cs.NodeLabel.METHOD.value +_CLASS = cs.NodeLabel.CLASS.value +_CALLS = cs.RelationshipType.CALLS.value +_INSTANTIATES = cs.RelationshipType.INSTANTIATES.value +_INHERITS = cs.RelationshipType.INHERITS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +_NodeId = tuple[str, PropertyValue] +_RelTuple = tuple[str, PropertyValue, str, str, PropertyValue] + + +class DeadCodeConfig(NamedTuple): + include_tests: bool + include_classes: bool + root_decorators: frozenset[str] + entry_points: tuple[str, ...] + test_patterns: tuple[str, ...] + + +def default_dead_code_config( + include_tests: bool, include_classes: bool +) -> DeadCodeConfig: + return DeadCodeConfig( + include_tests=include_tests, + include_classes=include_classes, + root_decorators=frozenset(d.lower() for d in cs.DEFAULT_ROOT_DECORATORS), + entry_points=(), + test_patterns=tuple(cs.TEST_PATH_PATTERNS), + ) + + +def _norm_decorator(decorator: str) -> str: + # (H) Mirror the query: drop '@', take the text before '(', then the last + # (H) dotted segment, lowercased -> `@app.route(...)` becomes `route`. + head = decorator.replace(ec.DECORATOR_AT, "").split(ec.DECORATOR_CALL_OPEN)[0] + return head.split(cs.SEPARATOR_DOT)[-1].lower() + + +def _has_root_decorator(props: PropertyDict, root_decorators: frozenset[str]) -> bool: + decorators = props.get(cs.KEY_DECORATORS) + if not isinstance(decorators, list): + return False + return any(_norm_decorator(str(d)) in root_decorators for d in decorators) + + +def dead_code_from_graph( + nodes: dict[_NodeId, PropertyDict], + rels: list[_RelTuple], + project_prefix: str, + config: DeadCodeConfig, +) -> set[str]: + labels = {_FUNCTION, _METHOD} + traversal = {_CALLS} + module_rels = {_CALLS} + if config.include_classes: + labels.add(_CLASS) + traversal |= {_INSTANTIATES, _INHERITS} + module_rels.add(_INSTANTIATES) + + candidates: set[str] = set() + props_by_qn: dict[str, PropertyDict] = {} + module_path: dict[str, str] = {} + for (label, uid), props in nodes.items(): + if label == _MODULE: + module_path[str(uid)] = str(props.get(cs.KEY_PATH, "")) + elif label in labels and str(uid).startswith(project_prefix): + candidates.add(str(uid)) + props_by_qn[str(uid)] = props + + roots: set[str] = set() + for from_label, from_val, rel_type, _to_label, to_val in rels: + if from_label != _MODULE or rel_type not in module_rels: + continue + target_qn = str(to_val) + if target_qn not in candidates: + continue + path = module_path.get(str(from_val), "") + is_test = any(pattern in path for pattern in config.test_patterns) + if config.include_tests or not is_test: + roots.add(target_qn) + + for qn in candidates: + if qn in roots: + continue + props = props_by_qn[qn] + if _has_root_decorator(props, config.root_decorators): + roots.add(qn) + elif props.get(cs.KEY_IS_EXPORTED) is True: + roots.add(qn) + elif any(qn.endswith(entry) for entry in config.entry_points): + roots.add(qn) + elif config.include_tests and any( + pattern in str(props.get(cs.KEY_PATH, "")) + for pattern in config.test_patterns + ): + roots.add(qn) + + adjacency: dict[str, set[str]] = defaultdict(set) + for from_label, from_val, rel_type, _to_label, to_val in rels: + if rel_type in traversal: + adjacency[str(from_val)].add(str(to_val)) + + live = set(roots) + stack = list(roots) + while stack: + current = stack.pop() + for nxt in adjacency.get(current, ()): + if nxt not in live: + live.add(nxt) + stack.append(nxt) + + return candidates - live + + +def cgr_dead_code(target: Path, project: str, config: DeadCodeConfig) -> set[str]: + ingestor = _capture(target, project) + prefix = project + cs.SEPARATOR_DOT + return dead_code_from_graph(ingestor.nodes, list(ingestor.rels), prefix, config) + + +def score_dead_code(cgr: set[str], oracle: set[str]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.NODE.value, ec.DEAD_CODE_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.DEAD_CODE_DIFF_PREFIX + ec.DEAD_CODE_LABEL] = DiffBucket( + missing=sorted(oracle - cgr), + extra=sorted(cgr - oracle), + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to report dead code for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + include_tests: Annotated[ + bool, typer.Option(help="Treat test functions/modules as roots.") + ] = False, + include_classes: Annotated[ + bool, typer.Option(help="Also report unreachable classes.") + ] = False, + out_dir: Annotated[ + Path, typer.Option(help="Directory for the dead-code report json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + # (H) Corpus mode is informational: a real repo has no independent dead-code + # (H) oracle (true reachability needs the same call graph), so this reports + # (H) cgr's reachable-from-roots dead set. The graded eval lives in the tests. + target = target.resolve() + project = project_name or target.name + logger.info(ls.DEAD_CODE_TARGET.format(target=target, project=project)) + + config = default_dead_code_config(include_tests, include_classes) + dead = cgr_dead_code(target, project, config) + logger.success(ls.DEAD_CODE_DONE.format(count=len(dead))) + + out_dir.mkdir(parents=True, exist_ok=True) + report = out_dir / ec.DEAD_CODE_DIFF_FILENAME + report.write_text(json.dumps(sorted(dead), indent=2), encoding="utf-8") + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/go_l1.py b/evals/go_l1.py new file mode 100644 index 000000000..58294bdf7 --- /dev/null +++ b/evals/go_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_go_graph +from .oracles import go_available, run_go_oracle +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (Go vs go/ast)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Go sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for go_scores.csv and go_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not go_available(): + logger.error(ls.GO_ORACLE_MISSING.format(binary=ec.GO_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.GO_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_go_graph(target, project) + logger.success(ls.GO_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.GO_EXTRACTING_ORACLE.format(binary=ec.GO_BIN, target=target)) + oracle = run_go_oracle(target) + logger.success(ls.GO_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.GO_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.GO_SCORES_FILENAME, ec.GO_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/go_retrieval.py b/evals/go_retrieval.py new file mode 100644 index 000000000..824e5cb57 --- /dev/null +++ b/evals/go_retrieval.py @@ -0,0 +1,108 @@ +# (H) Multi-language retrieval (Go). Extends the file-level call-localization +# (H) benchmark to a second language: for each first-party Go symbol, which files +# (H) call it. cgr's Go CALLS edges (reduced to caller file + callee simple name) +# (H) are graded against go/ast call sites over the same first-party name universe. +# (H) The oracle uses Go's own parser, independent of cgr's tree-sitter frontend, +# (H) so this measures cgr's cross-file Go call resolution against ground truth. +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import go_available, run_go_call_oracle +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.GO_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_go_call_edges(target: Path) -> tuple[set[CallEdge], frozenset[str]]: + return run_go_call_oracle(target) + + +def cgr_go_call_edges( + target: Path, project: str, declared: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.GO_SUFFIX) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.GO_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_go_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.GO_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.GO_RETRIEVAL_DIFF_PREFIX + ec.GO_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Go sources to evaluate call retrieval.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for go_retrieval_scores.csv and diff json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not go_available(): + logger.error(ls.GO_ORACLE_MISSING.format(binary=ec.GO_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.GO_RETRIEVAL_ORACLE.format(binary=ec.GO_BIN, target=target)) + oracle, declared = oracle_go_call_edges(target) + logger.success(ls.GO_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.GO_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_go_call_edges(target, project, declared) + logger.success(ls.GO_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_go_retrieval(cgr, oracle) + write_outputs( + result, out_dir, ec.GO_RETRIEVAL_SCORES_FILENAME, ec.GO_RETRIEVAL_DIFF_FILENAME + ) + render(result, ec.GO_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/import_resolution.py b/evals/import_resolution.py new file mode 100644 index 000000000..8c86a50ba --- /dev/null +++ b/evals/import_resolution.py @@ -0,0 +1,155 @@ +# (H) Import-resolution eval. For every module, classify each import by its +# (H) top-level package as internal (first-party, resolving into the repo) or +# (H) external (stdlib / third-party), and check cgr against an ast + filesystem +# (H) oracle. The comparison unit is (importing_file, top_level_package, +# (H) is_external): both sides reduce an import to its top-level name the same +# (H) way, so the oracle is independent of cgr. This isolates internal/external +# (H) misclassification (issue #498), which the structural L1 IMPORTS grading +# (H) (internal targets only, by resolved file) does not see. +import ast +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .ast_oracle import _from_base_parts, _iter_py_files +from .cgr_graph import _capture +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.IMPORTS_DEFAULT_TARGET) + +_MODULE = cs.NodeLabel.MODULE.value +_IMPORTS = cs.RelationshipType.IMPORTS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +ImportDep = tuple[str, str, bool] + + +def _import_deps_for_module(tree: ast.Module, rel: str, project: str) -> set[ImportDep]: + pkg_parts = [project, *Path(rel).parent.parts] + deps: set[ImportDep] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + top = alias.name.split(cs.SEPARATOR_DOT, 1)[0] + if top not in ec.IMPORTS_IGNORED_TOPS: + deps.add((rel, top, top != project)) + elif isinstance(node, ast.ImportFrom): + base_parts = _from_base_parts(node, pkg_parts) + if not base_parts: + # (H) A relative import that escapes the package root resolves to + # (H) nothing the repo defines; skip rather than guess. + continue + top = base_parts[0] + if top not in ec.IMPORTS_IGNORED_TOPS: + deps.add((rel, top, top != project)) + return deps + + +def oracle_import_deps(target: Path, project: str) -> set[ImportDep]: + deps: set[ImportDep] = set() + for path in _iter_py_files(target): + rel = path.relative_to(target).as_posix() + try: + tree = ast.parse(path.read_text(encoding=cs.ENCODING_UTF8)) + except (SyntaxError, UnicodeDecodeError, ValueError) as error: + logger.warning(ls.ORACLE_PARSE_FAILED.format(path=rel, error=error)) + continue + deps |= _import_deps_for_module(tree, rel, project) + return deps + + +def cgr_import_deps(target: Path, project: str) -> set[ImportDep]: + ingestor = _capture(target, project) + is_external: dict[str, bool] = {} + internal_file: dict[str, str] = {} + for (label, uid), props in ingestor.nodes.items(): + if label != _MODULE: + continue + external = props.get(cs.KEY_IS_EXTERNAL) is True + is_external[str(uid)] = external + path = props.get(cs.KEY_PATH) + if not external and path and str(path).endswith(ec.PY_SUFFIX): + internal_file[str(uid)] = str(path) + + deps: set[ImportDep] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _IMPORTS or from_label != _MODULE: + continue + src = internal_file.get(str(from_val)) + if src is None: + continue + top = str(to_val).split(cs.SEPARATOR_DOT, 1)[0] + deps.add((src, top, is_external.get(str(to_val), False))) + return deps + + +def _dep_repr(dep: ImportDep) -> str: + return ec.IMPORT_DEP_REPR.format(file=dep[0], top=dep[1], external=dep[2]) + + +def _row(label: str, cgr: set[ImportDep], oracle: set[ImportDep]) -> ScoreRow | None: + return _prf(ec.Category.EDGE.value, label, cgr, oracle) + + +def score_import_deps(cgr: set[ImportDep], oracle: set[ImportDep]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + subsets: list[tuple[str, set[ImportDep], set[ImportDep]]] = [ + (ec.IMPORTS_ALL_LABEL, cgr, oracle), + ( + ec.IMPORTS_INTERNAL_LABEL, + {d for d in cgr if not d[2]}, + {d for d in oracle if not d[2]}, + ), + ( + ec.IMPORTS_EXTERNAL_LABEL, + {d for d in cgr if d[2]}, + {d for d in oracle if d[2]}, + ), + ] + for label, cgr_set, oracle_set in subsets: + row = _row(label, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.IMPORTS_DIFF_PREFIX + label] = DiffBucket( + missing=[_dep_repr(d) for d in sorted(oracle_set - cgr_set)], + extra=[_dep_repr(d) for d in sorted(cgr_set - oracle_set)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate import resolution for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for imports_scores.csv and the diff json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + project = project_name or target.name + logger.info(ls.IMPORTS_TARGET.format(target=target, project=project)) + + oracle = oracle_import_deps(target, project) + logger.success(ls.IMPORTS_ORACLE_DONE.format(count=len(oracle))) + cgr = cgr_import_deps(target, project) + logger.success(ls.IMPORTS_CGR_DONE.format(count=len(cgr))) + + result = score_import_deps(cgr, oracle) + write_outputs(result, out_dir, ec.IMPORTS_SCORES_FILENAME, ec.IMPORTS_DIFF_FILENAME) + render(result, ec.IMPORTS_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/incremental.py b/evals/incremental.py new file mode 100644 index 000000000..cc0363ba8 --- /dev/null +++ b/evals/incremental.py @@ -0,0 +1,262 @@ +# (H) Incremental-update correctness eval. cgr's incremental indexer re-parses +# (H) only changed files; the promise is that the resulting graph equals a clean +# (H) forced re-index of the same tree. This eval verifies that promise: index a +# (H) repo, apply a semantically neutral edit to one file (a trailing comment +# (H) that changes the hash but not the AST), run an incremental update, then +# (H) diff the mutated graph against a clean re-index of the identical on-disk +# (H) state. The clean re-index is the oracle, so any divergence is a real +# (H) incremental-update bug (e.g. dropped inbound CALLS, issue #532). +import os +import shutil +import tempfile +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger +from tree_sitter import Parser + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import LanguageQueries + +from . import constants as ec +from . import logs as ls +from .ast_oracle import _iter_py_files +from .cgr_graph import _StatefulIngestor +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, GraphState, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.INCREMENTAL_DEFAULT_TARGET) + +_Parsers = dict[cs.SupportedLanguage, Parser] +_Queries = dict[cs.SupportedLanguage, LanguageQueries] +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + + +def neutral_edit(content: bytes) -> bytes: + return content + ec.NEUTRAL_EDIT_COMMENT.encode(cs.ENCODING_UTF8) + + +def snapshot(store: _StatefulIngestor) -> GraphState: + nodes = frozenset((label, str(uid)) for (label, uid) in store.nodes) + edges = frozenset( + (str(fl), str(fv), str(rel), str(tl), str(tv)) + for (fl, fv, rel, tl, tv) in store.edges + ) + return GraphState(nodes=nodes, edges=edges) + + +def _purge_index_state(work: Path) -> None: + # (H) A copied tree may carry cgr's own hash/dir-mtime caches (the real + # (H) codebase_rag source does). Left in place, a future-dated cache makes the + # (H) baseline index skip every file, so remove all such state before indexing. + for name in (cs.HASH_CACHE_FILENAME, cs.DIR_MTIMES_FILENAME): + for stale in work.rglob(name): + stale.unlink() + + +def _index( + store: _StatefulIngestor, + repo: Path, + project: str, + parsers: _Parsers, + queries: _Queries, + force: bool, +) -> None: + GraphUpdater( + ingestor=store, + repo_path=repo, + parsers=parsers, + queries=queries, + project_name=project, + ).run(force=force) + + +def run_neutral_edit_scenario( + repo_src: Path, + project: str, + target_rel: str, + parsers: _Parsers, + queries: _Queries, + work_root: Path, +) -> tuple[GraphState, GraphState]: + work = work_root / ec.INCREMENTAL_WORK_DIRNAME + if work.exists(): + shutil.rmtree(work) + shutil.copytree(repo_src, work) + _purge_index_state(work) + + store = _StatefulIngestor() + _index(store, work, project, parsers, queries, force=False) + + # (H) The neutral edit must read as "changed": bump its mtime past the hash + # (H) cache so the in-sync fast path and the per-file mtime gate both fire. + cache = work / cs.HASH_CACHE_FILENAME + future = cache.stat().st_mtime + ec.INCREMENTAL_MTIME_BUMP + target = work / target_rel + target.write_bytes(neutral_edit(target.read_bytes())) + os.utime(target, (future, future)) + + _index(store, work, project, parsers, queries, force=False) + incremental = snapshot(store) + + clean_store = _StatefulIngestor() + _index(clean_store, work, project, parsers, queries, force=True) + clean = snapshot(clean_store) + return incremental, clean + + +def _recompute(category: str, label: str, tp: int, fp: int, fn: int) -> ScoreRow: + precision = tp / (tp + fp) if tp + fp else 0.0 + recall = tp / (tp + fn) if tp + fn else 0.0 + f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0 + return ScoreRow( + category=category, + label=label, + tp=tp, + fp=fp, + fn=fn, + precision=round(precision, ec.ROUND_DIGITS), + recall=round(recall, ec.ROUND_DIGITS), + f1=round(f1, ec.ROUND_DIGITS), + ) + + +def _node_repr(node: tuple[str, str]) -> str: + return ec.STATE_NODE_REPR.format(label=node[0], uid=node[1]) + + +def _edge_repr(edge: tuple[str, str, str, str, str]) -> str: + return ec.STATE_EDGE_REPR.format( + rel=edge[2], fl=edge[0], fv=edge[1], tl=edge[3], tv=edge[4] + ) + + +def compare_states(incremental: GraphState, clean: GraphState) -> ScoreResult: + # (H) clean is the oracle: missing = present in clean, absent from incremental + # (H) (fn); stale = present in incremental, absent from clean (fp). + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + + for label in sorted({n[0] for n in incremental.nodes | clean.nodes}): + got = {n for n in incremental.nodes if n[0] == label} + want = {n for n in clean.nodes if n[0] == label} + row = _prf(ec.Category.NODE.value, label, got, want) + if row is not None: + rows.append(row) + diff[ec.INCREMENTAL_NODE_DIFF_PREFIX + label] = DiffBucket( + missing=[_node_repr(n) for n in sorted(want - got)], + extra=[_node_repr(n) for n in sorted(got - want)], + ) + + for rel in sorted({e[2] for e in incremental.edges | clean.edges}): + got_e = {e for e in incremental.edges if e[2] == rel} + want_e = {e for e in clean.edges if e[2] == rel} + row = _prf(ec.Category.EDGE.value, rel, got_e, want_e) + if row is not None: + rows.append(row) + diff[ec.INCREMENTAL_EDGE_DIFF_PREFIX + rel] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(want_e - got_e)], + extra=[_edge_repr(e) for e in sorted(got_e - want_e)], + ) + + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def _merge(results: list[ScoreResult]) -> ScoreResult: + totals: dict[tuple[str, str], tuple[int, int, int]] = {} + diff: dict[str, DiffBucket] = {} + for result in results: + for row in result.rows: + key = (row["category"], row["label"]) + tp, fp, fn = totals.get(key, (0, 0, 0)) + totals[key] = (tp + row["tp"], fp + row["fp"], fn + row["fn"]) + for bucket_key, bucket in result.diff.items(): + merged = diff.setdefault(bucket_key, DiffBucket(missing=[], extra=[])) + merged["missing"].extend(bucket["missing"]) + merged["extra"].extend(bucket["extra"]) + + rows = [ + _recompute(category, label, tp, fp, fn) + for (category, label), (tp, fp, fn) in sorted(totals.items()) + ] + capped = { + key: DiffBucket( + missing=sorted(set(bucket["missing"]))[: ec.INCREMENTAL_DIFF_SAMPLE_CAP], + extra=sorted(set(bucket["extra"]))[: ec.INCREMENTAL_DIFF_SAMPLE_CAP], + ) + for key, bucket in diff.items() + } + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=capped) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate incremental updates for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + sample: Annotated[ + int, typer.Option(help="Number of python files to probe with a neutral edit.") + ] = ec.INCREMENTAL_DEFAULT_SAMPLE, + out_dir: Annotated[ + Path, + typer.Option(help="Directory for incremental_scores.csv and the diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + project = project_name or target.name + logger.info(ls.INCREMENTAL_TARGET.format(target=target, project=project)) + + py_files = sorted(p.relative_to(target).as_posix() for p in _iter_py_files(target)) + if not py_files: + logger.error(ls.INCREMENTAL_NO_PY.format(target=target)) + raise typer.Exit(code=1) + + probes = py_files[:sample] if sample > 0 else py_files + logger.info(ls.INCREMENTAL_SAMPLED.format(count=len(probes), total=len(py_files))) + + parsers, queries = load_parsers() + results: list[ScoreResult] = [] + clean_equivalent = 0 + # (H) Work outside the repo tree: each probe copies the whole target, so a + # (H) work dir under out_dir would pollute the repo and be scanned by hooks. + work_root = Path(tempfile.mkdtemp(prefix=ec.INCREMENTAL_TMP_PREFIX)) + try: + for index, rel in enumerate(probes, start=1): + logger.info( + ls.INCREMENTAL_PROBE.format(index=index, total=len(probes), path=rel) + ) + incremental, clean = run_neutral_edit_scenario( + target, project, rel, parsers, queries, work_root + ) + if incremental == clean: + clean_equivalent += 1 + else: + logger.warning( + ls.INCREMENTAL_PROBE_DIVERGED.format( + path=rel, + missing=len(clean.edges - incremental.edges), + stale=len(incremental.edges - clean.edges), + ) + ) + results.append(compare_states(incremental, clean)) + finally: + shutil.rmtree(work_root, ignore_errors=True) + + logger.success( + ls.INCREMENTAL_DONE.format(clean=clean_equivalent, total=len(probes)) + ) + merged = _merge(results) + write_outputs( + merged, out_dir, ec.INCREMENTAL_SCORES_FILENAME, ec.INCREMENTAL_DIFF_FILENAME + ) + render(merged, ec.INCREMENTAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/inheritance.py b/evals/inheritance.py new file mode 100644 index 000000000..9d8cf27a3 --- /dev/null +++ b/evals/inheritance.py @@ -0,0 +1,271 @@ +# (H) Inheritance eval. Grades cgr's resolved INHERITS (subclass_qn -> base_qn) +# (H) and OVERRIDES (subclass_qn, base_qn, method) against an ast oracle. The L1 +# (H) structure eval only checks INHERITS by the base's simple name; this checks +# (H) that cgr resolves the base to the correct first-party class and that method +# (H) overrides are attributed to the right base. The oracle resolves a base only +# (H) via same-module definitions and `from import `, and +# (H) skips attribute/ambiguous/external bases (counted, never silently dropped), +# (H) so it stays independent of cgr's resolver and never invents an edge. +import ast +from pathlib import Path +from typing import Annotated, NamedTuple + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .ast_oracle import _from_base_parts, _iter_py_files, _module_dotted +from .cgr_graph import _capture +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.INHERITANCE_DEFAULT_TARGET) + +_CLASS = cs.NodeLabel.CLASS.value +_METHOD = cs.NodeLabel.METHOD.value +_INHERITS = cs.RelationshipType.INHERITS.value +_OVERRIDES = cs.RelationshipType.OVERRIDES.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +InheritEdge = tuple[str, str] +OverrideEdge = tuple[str, str, str] + + +class _ClassInfo(NamedTuple): + qn: str + module: str + methods: frozenset[str] + bases: tuple[ast.expr, ...] + + +class OracleResult(NamedTuple): + inherits: set[InheritEdge] + overrides: set[OverrideEdge] + # (H) Universe of top-level classes the oracle understands; cgr edges whose + # (H) subclass is outside it (e.g. a class nested in a function) are not graded. + top_classes: frozenset[str] + # (H) Subclasses eligible for OVERRIDES grading: top-level and single-base, so + # (H) override attribution is unambiguous. Multi-base (mixin/MRO) classes are + # (H) excluded on both sides rather than guessed at. + override_scope: frozenset[str] + + +class CgrResult(NamedTuple): + inherits: set[InheritEdge] + overrides: set[OverrideEdge] + + +def _method_names(node: ast.ClassDef) -> frozenset[str]: + return frozenset( + child.name + for child in node.body + if isinstance(child, ast.FunctionDef | ast.AsyncFunctionDef) + ) + + +def _from_import_map(tree: ast.Module, rel: str, project: str) -> dict[str, str]: + # (H) name -> source module dotted, for `from import ` whose + # (H) base resolves under the project package (first-party). + pkg_parts = [project, *Path(rel).parent.parts] + mapping: dict[str, str] = {} + for node in ast.walk(tree): + if not isinstance(node, ast.ImportFrom): + continue + base_parts = _from_base_parts(node, pkg_parts) + if not base_parts or base_parts[0] != project: + continue + source = cs.SEPARATOR_DOT.join(base_parts) + for alias in node.names: + if alias.name != ec.STAR_IMPORT: + mapping[alias.asname or alias.name] = source + return mapping + + +def _collect( + target: Path, project: str +) -> tuple[dict[str, _ClassInfo], dict[str, str]]: + classes: dict[str, _ClassInfo] = {} + # (H) import_maps is keyed "\x00" and filled after all modules + # (H) are collected so base resolution can look a name up in its own scope. + import_maps: dict[str, str] = {} + per_module_imports: dict[str, dict[str, str]] = {} + for path in _iter_py_files(target): + rel = path.relative_to(target).as_posix() + try: + tree = ast.parse(path.read_text(encoding=cs.ENCODING_UTF8)) + except (SyntaxError, UnicodeDecodeError, ValueError) as error: + logger.warning(ls.ORACLE_PARSE_FAILED.format(path=rel, error=error)) + continue + module = _module_dotted(rel, project) + per_module_imports[module] = _from_import_map(tree, rel, project) + for node in tree.body: + if isinstance(node, ast.ClassDef): + qn = f"{module}{cs.SEPARATOR_DOT}{node.name}" + classes[qn] = _ClassInfo( + qn=qn, + module=module, + methods=_method_names(node), + bases=tuple(node.bases), + ) + # (H) Flatten per-module import maps into a single "\x00" key so + # (H) base resolution can look up an imported name in its own module's scope. + for module, mapping in per_module_imports.items(): + for name, source in mapping.items(): + import_maps[f"{module}{ec.SEP_NUL}{name}"] = source + return classes, import_maps + + +def _resolve_base( + base: ast.expr, + info: _ClassInfo, + classes: dict[str, _ClassInfo], + import_maps: dict[str, str], +) -> str | None: + if not isinstance(base, ast.Name): + # (H) Attribute (pkg.Base) and other base forms are not resolved here. + return None + name = base.id + same_module = f"{info.module}{cs.SEPARATOR_DOT}{name}" + if same_module in classes: + return same_module + source = import_maps.get(f"{info.module}{ec.SEP_NUL}{name}") + if source is not None: + imported = f"{source}{cs.SEPARATOR_DOT}{name}" + if imported in classes: + return imported + return None + + +def oracle_inheritance(target: Path, project: str) -> OracleResult: + classes, import_maps = _collect(target, project) + inherits: set[InheritEdge] = set() + overrides: set[OverrideEdge] = set() + override_scope: set[str] = set() + skipped = 0 + for info in classes.values(): + resolved_bases: list[str] = [] + for base in info.bases: + base_qn = _resolve_base(base, info, classes, import_maps) + if base_qn is None: + skipped += 1 + continue + resolved_bases.append(base_qn) + inherits.add((info.qn, base_qn)) + # (H) Grade overrides only for unambiguous single first-party-base classes; + # (H) with multiple bases the MRO decides which base a method overrides, a + # (H) call this ast oracle does not model. + if len(resolved_bases) == 1: + override_scope.add(info.qn) + base_qn = resolved_bases[0] + for method in info.methods & classes[base_qn].methods: + overrides.add((info.qn, base_qn, method)) + logger.info(ls.INHERITANCE_SKIPPED_BASES.format(count=skipped)) + return OracleResult( + inherits=inherits, + overrides=overrides, + top_classes=frozenset(classes), + override_scope=frozenset(override_scope), + ) + + +def cgr_inheritance(target: Path, project: str) -> CgrResult: + ingestor = _capture(target, project) + first_party: set[str] = { + str(uid) + for (label, uid), props in ingestor.nodes.items() + if label == _CLASS + and props.get(cs.KEY_PATH) + and str(props[cs.KEY_PATH]).endswith(ec.PY_SUFFIX) + } + inherits: set[InheritEdge] = set() + overrides: set[OverrideEdge] = set() + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels: + if rel_type == _INHERITS and from_label == _CLASS and to_label == _CLASS: + if str(from_val) in first_party and str(to_val) in first_party: + inherits.add((str(from_val), str(to_val))) + elif rel_type == _OVERRIDES and from_label == _METHOD and to_label == _METHOD: + sub, _sep, method = str(from_val).rpartition(cs.SEPARATOR_DOT) + base, _sep2, _m = str(to_val).rpartition(cs.SEPARATOR_DOT) + if sub in first_party and base in first_party: + overrides.add((sub, base, method)) + return CgrResult(inherits=inherits, overrides=overrides) + + +def _inherit_repr(edge: InheritEdge) -> str: + return ec.INHERITS_EDGE_REPR.format(sub=edge[0], base=edge[1]) + + +def _override_repr(edge: OverrideEdge) -> str: + return ec.OVERRIDES_EDGE_REPR.format(sub=edge[0], base=edge[1], method=edge[2]) + + +def score_inheritance(cgr: CgrResult, oracle: OracleResult) -> ScoreResult: + # (H) Restrict cgr to the oracle's gradeable universe: subclasses the oracle + # (H) understands (top-level) for INHERITS, and single-base subclasses for + # (H) OVERRIDES. This drops nested-class and multi-base-MRO edges the oracle + # (H) cannot adjudicate, rather than scoring cgr against an incomplete oracle. + cgr_inh = {e for e in cgr.inherits if e[0] in oracle.top_classes} + cgr_ovr = {e for e in cgr.overrides if e[0] in oracle.override_scope} + oracle_inh = oracle.inherits + oracle_ovr = oracle.overrides + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + + inh_row = _prf(ec.Category.EDGE.value, ec.INHERITS_LABEL, cgr_inh, oracle_inh) + if inh_row is not None: + rows.append(inh_row) + diff[ec.INHERITANCE_DIFF_PREFIX + ec.INHERITS_LABEL] = DiffBucket( + missing=[_inherit_repr(e) for e in sorted(oracle_inh - cgr_inh)], + extra=[_inherit_repr(e) for e in sorted(cgr_inh - oracle_inh)], + ) + + ovr_row = _prf(ec.Category.EDGE.value, ec.OVERRIDES_LABEL, cgr_ovr, oracle_ovr) + if ovr_row is not None: + rows.append(ovr_row) + diff[ec.INHERITANCE_DIFF_PREFIX + ec.OVERRIDES_LABEL] = DiffBucket( + missing=[_override_repr(e) for e in sorted(oracle_ovr - cgr_ovr)], + extra=[_override_repr(e) for e in sorted(cgr_ovr - oracle_ovr)], + ) + + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate inheritance for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for inheritance_scores.csv and diff json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + project = project_name or target.name + logger.info(ls.INHERITANCE_TARGET.format(target=target, project=project)) + + oracle = oracle_inheritance(target, project) + logger.success( + ls.INHERITANCE_ORACLE_DONE.format( + inherits=len(oracle[0]), overrides=len(oracle[1]) + ) + ) + cgr = cgr_inheritance(target, project) + logger.success( + ls.INHERITANCE_CGR_DONE.format(inherits=len(cgr[0]), overrides=len(cgr[1])) + ) + + result = score_inheritance(cgr, oracle) + write_outputs( + result, out_dir, ec.INHERITANCE_SCORES_FILENAME, ec.INHERITANCE_DIFF_FILENAME + ) + render(result, ec.INHERITANCE_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/instantiation.py b/evals/instantiation.py new file mode 100644 index 000000000..c8ee38998 --- /dev/null +++ b/evals/instantiation.py @@ -0,0 +1,154 @@ +# (H) Instantiation eval. File-level constructor localization: for each +# (H) first-party class, which files instantiate it. cgr's INSTANTIATES edges are +# (H) compared against an ast oracle of calls whose callee simple name is a +# (H) first-party class, over the same file and class universe. This isolates the +# (H) INSTANTIATES signal that the retrieval eval folds into CALLS, so a +# (H) constructor-resolution regression shows up on its own. +import ast +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .ast_oracle import _from_base_parts +from .cgr_graph import _capture +from .module_calls import _callee_name, _is_dunder +from .retrieval import parse_py_trees +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.INSTANTIATION_DEFAULT_TARGET) + +_CLASS = cs.NodeLabel.CLASS.value +_INSTANTIATES = cs.RelationshipType.INSTANTIATES.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +InstantiationEdge = tuple[str, str] + + +def _class_names(trees: list[tuple[str, ast.Module]]) -> set[str]: + names: set[str] = set() + for _rel, tree in trees: + for node in ast.walk(tree): + if isinstance(node, ast.ClassDef) and not _is_dunder(node.name): + names.add(node.name) + return names + + +def _externally_shadowed_names(tree: ast.Module, rel: str, project: str) -> set[str]: + # (H) Names this file rebinds to a non-first-party import (a stdlib/third-party + # (H) `from ext import Name`, or any `import mod` that binds `mod`). A bare + # (H) `Name()` call on such a name is not a first-party instantiation, so the + # (H) oracle must not credit one against the shared simple-name class set, or + # (H) it would report a false missing edge and unfairly lower cgr recall. + pkg_parts = [project, *Path(rel).parent.parts] + shadowed: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + shadowed.add(alias.asname or alias.name.split(cs.SEPARATOR_DOT, 1)[0]) + elif isinstance(node, ast.ImportFrom): + base_parts = _from_base_parts(node, pkg_parts) + if base_parts and base_parts[0] == project: + continue + for alias in node.names: + shadowed.add(alias.asname or alias.name) + return shadowed + + +def oracle_instantiations(target: Path, project: str) -> set[InstantiationEdge]: + trees, _files = parse_py_trees(target) + classes = _class_names(trees) + edges: set[InstantiationEdge] = set() + for rel, tree in trees: + shadowed = _externally_shadowed_names(tree, rel, project) + for node in ast.walk(tree): + if isinstance(node, ast.Call) and (name := _callee_name(node.func)): + # (H) A simple-name callee bound to an external import names that + # (H) import, not the first-party class; an attribute callee + # (H) (`mod.Cls()`) is qualified, so the shadow check applies only + # (H) to the bare-name form. + if name in classes and not ( + isinstance(node.func, ast.Name) and name in shadowed + ): + edges.add((rel, name)) + return edges + + +def cgr_instantiations(target: Path, project: str) -> set[InstantiationEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.PY_SUFFIX) + } + edges: set[InstantiationEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _INSTANTIATES: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if not _is_dunder(name): + edges.add((path, name)) + return edges + + +def _edge_repr(edge: InstantiationEdge) -> str: + return ec.INSTANTIATION_EDGE_REPR.format(file=edge[0], cls=edge[1]) + + +def score_instantiations( + cgr: set[InstantiationEdge], oracle: set[InstantiationEdge] +) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.EDGE.value, ec.INSTANTIATES_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.INSTANTIATION_DIFF_PREFIX + ec.INSTANTIATES_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate instantiation for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for instantiation_scores.csv and diff json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + project = project_name or target.name + logger.info(ls.INSTANTIATION_TARGET.format(target=target, project=project)) + + oracle = oracle_instantiations(target, project) + logger.success(ls.INSTANTIATION_ORACLE_DONE.format(count=len(oracle))) + cgr = cgr_instantiations(target, project) + logger.success(ls.INSTANTIATION_CGR_DONE.format(count=len(cgr))) + + result = score_instantiations(cgr, oracle) + write_outputs( + result, + out_dir, + ec.INSTANTIATION_SCORES_FILENAME, + ec.INSTANTIATION_DIFF_FILENAME, + ) + render(result, ec.INSTANTIATION_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/java_l1.py b/evals/java_l1.py new file mode 100644 index 000000000..e9afc0aa7 --- /dev/null +++ b/evals/java_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_java_graph +from .oracles import java_available, run_java_oracle +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (Java vs JDK Compiler Tree API)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Java sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for java_scores.csv and java_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not java_available(): + logger.error(ls.JAVA_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.JAVA_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_java_graph(target, project) + logger.success(ls.JAVA_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.JAVA_EXTRACTING_ORACLE.format(binary=ec.JAVA_BIN, target=target)) + oracle = run_java_oracle(target) + logger.success(ls.JAVA_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.JAVA_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.JAVA_SCORES_FILENAME, ec.JAVA_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/java_retrieval.py b/evals/java_retrieval.py new file mode 100644 index 000000000..5de92dd41 --- /dev/null +++ b/evals/java_retrieval.py @@ -0,0 +1,115 @@ +# (H) Multi-language retrieval (Java). Extends the file-level call-localization +# (H) benchmark to Java: for each first-party Java symbol, which files call it. +# (H) cgr's Java CALLS edges (reduced to caller file + callee simple name) are +# (H) graded against javac method-invocation sites over the same first-party name +# (H) universe. The oracle uses the JDK's own Compiler Tree API (javac), +# (H) independent of cgr's tree-sitter frontend, so this measures cgr's cross-file +# (H) Java call resolution against ground truth (mirrors evals/rust_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import java_available, run_java_call_oracle +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.JAVA_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_java_call_edges(target: Path) -> tuple[set[CallEdge], frozenset[str]]: + return run_java_call_oracle(target) + + +def cgr_java_call_edges( + target: Path, project: str, declared: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.JAVA_SUFFIX) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + # (H) A Java Method qn carries its parameter signature (Class.name(args)), + # (H) so strip it to recover the simple callee name the oracle records. + name = str(to_val).split(cs.SEPARATOR_DOT)[-1].split(cs.CHAR_PAREN_OPEN)[0] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.JAVA_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_java_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.JAVA_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.JAVA_RETRIEVAL_DIFF_PREFIX + ec.JAVA_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Java sources to evaluate call retrieval.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option(help="Directory for java_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not java_available(): + logger.error(ls.JAVA_ORACLE_MISSING.format(binary=ec.JAVAC_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.JAVA_RETRIEVAL_ORACLE.format(binary=ec.JAVAC_BIN, target=target)) + oracle, declared = oracle_java_call_edges(target) + logger.success(ls.JAVA_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.JAVA_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_java_call_edges(target, project, declared) + logger.success(ls.JAVA_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_java_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.JAVA_RETRIEVAL_SCORES_FILENAME, + ec.JAVA_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.JAVA_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/js_l1.py b/evals/js_l1.py new file mode 100644 index 000000000..10380f58a --- /dev/null +++ b/evals/js_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_js_graph +from .oracles import run_javascript_oracle, typescript_available +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (JavaScript vs tsc)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of JavaScript sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for js_scores.csv and js_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not typescript_available(): + logger.error(ls.TS_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.JS_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_js_graph(target, project) + logger.success(ls.JS_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.JS_EXTRACTING_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle = run_javascript_oracle(target) + logger.success(ls.JS_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.JS_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.JS_SCORES_FILENAME, ec.JS_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/l3.py b/evals/l3.py new file mode 100644 index 000000000..20d416bd7 --- /dev/null +++ b/evals/l3.py @@ -0,0 +1,532 @@ +import json +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger +from rich.console import Console +from rich.table import Table + +from codebase_rag.graph_updater import GraphUpdater +from codebase_rag.parser_loader import load_parsers +from codebase_rag.types_defs import PropertyDict, PropertyValue, ResultRow + +from . import constants as ec +from . import logs as ls +from .calls_trace import trace_calls +from .cgr_graph import extract_cgr_calls + +console = Console() + +FIXTURE_A = """class Animal: + def speak(self) -> str: + return self.sound() + + def sound(self) -> str: + return "..." + + +class Dog(Animal): + def sound(self) -> str: + return "woof" + + +def make(kind: str) -> Animal: + return Dog() if kind == "dog" else Animal() +""" + +FIXTURE_B = """from .a import Animal, Dog, make + + +def greet(kind: str) -> str: + animal = make(kind) + return describe(animal) + + +def describe(animal: Animal) -> str: + return animal.speak() + + +def run() -> str: + d = Dog() + return d.speak() + greet("dog") +""" + + +FIXTURE_C = """import asyncio +from dataclasses import dataclass +from functools import wraps +from typing import Iterator + +from .a import Animal, Dog + + +def trace(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) + + return wrapper + + +@dataclass +class Counter: + total: int = 0 + + def add(self, value: int) -> int: + self.total += value + return self.total + + @property + def doubled(self) -> int: + return self.total * 2 + + @staticmethod + def zero() -> int: + return 0 + + @classmethod + def start(cls) -> "Counter": + return cls(total=cls.zero()) + + +class Shelter(Animal): + def __init__(self) -> None: + self.pets: list[Animal] = [] + + def admit(self, pet: Animal) -> None: + self.pets.append(pet) + + def noises(self) -> list[str]: + return [pet.sound() for pet in self.pets] + + def loud(self) -> dict[str, str]: + return {pet.sound(): pet.speak() for pet in self.pets} + + +@trace +def build_shelter(count: int) -> Shelter: + shelter = Shelter() + for _ in range(count): + shelter.admit(Dog()) + return shelter + + +def categorize(value: int) -> str: + match value: + case 0: + return Counter.zero.__name__ + case n if n > 0: + return "positive" + case _: + return "negative" + + +def stream(limit: int) -> Iterator[int]: + counter = Counter.start() + for i in range(limit): + yield counter.add(i) + + +async def gather(limit: int) -> int: + counter = Counter() + await asyncio.sleep(0) + return counter.add(limit) + + +def run_rich() -> int: + shelter = build_shelter(2) + total = sum(len(noise) for noise in shelter.noises()) + apply = lambda c: c.doubled + return total + apply(Counter.start()) +""" + + +FIXTURE_JS_UTIL = """export function greet(name) { + return "hi " + name; +} + + +export class Base { + speak() { + return this.sound(); + } + + sound() { + return "..."; + } +} +""" + +FIXTURE_JS_APP = """import { greet, Base } from "./util.js"; + + +class Dog extends Base { + sound() { + return "woof"; + } +} + + +function run() { + const d = new Dog(); + return d.speak() + greet("dog"); +} + + +const handler = () => run(); + +export { run, handler }; +""" + + +FIXTURE_TS_SHAPES = """export interface Shape { + area(): number; +} + + +export abstract class Base implements Shape { + abstract area(): number; + + describe(): string { + return `area=${this.area()}`; + } +} +""" + +FIXTURE_TS_MAIN = """import { Base, Shape } from "./shapes"; + + +class Square extends Base { + constructor(private side: number) { + super(); + } + + area(): number { + return this.side * this.side; + } +} + + +function total(shapes: Shape[]): number { + return shapes.reduce((acc, s) => acc + s.area(), 0); +} + + +function run(): string { + const sq = new Square(3); + return sq.describe() + total([sq]); +} + +export { run }; +""" + +FIXTURE_RS_SHAPES = """pub trait Shape { + fn area(&self) -> f64; +} + +pub struct Square { + pub side: f64, +} + +impl Square { + pub fn new(side: f64) -> Square { + Square { side } + } +} + +impl Shape for Square { + fn area(&self) -> f64 { + self.side * self.side + } +} + +pub fn describe(s: &dyn Shape) -> f64 { + s.area() +} +""" + +FIXTURE_RS_MAIN = """mod shapes; + +use shapes::{describe, Shape, Square}; + +fn run() -> f64 { + let sq = Square::new(3.0); + describe(&sq) + sq.area() +} + +fn main() { + run(); +} +""" + +FIXTURE_GO_MAIN = """package fixture + +type Shape interface { + Area() float64 +} + +type Square struct { + Side float64 +} + +func (s Square) Area() float64 { + return s.Side * s.Side +} + +func describe(s Shape) float64 { + return s.Area() +} + +func Run() float64 { + sq := Square{Side: 3.0} + return describe(sq) + sq.Area() +} +""" + + +FIXTURE_JAVA = """package fixture; + +interface Shape { + double area(); +} + +class Square implements Shape { + private double side; + + Square(double side) { + this.side = side; + } + + public double area() { + return this.side * this.side; + } +} + +public class Service { + double describe(Shape s) { + return s.area(); + } + + double run() { + Square sq = new Square(3.0); + return describe(sq) + sq.area(); + } +} +""" + +FIXTURE_C_HEADER = """int square(int x); +int compute(int n); +""" + +FIXTURE_C_SRC = """#include "calc.h" + +int square(int x) { + return x * x; +} + +int compute(int n) { + return square(n) + square(n + 1); +} +""" + +FIXTURE_CPP = """class Shape { +public: + virtual double area() const = 0; + double describe() const { return area(); } +}; + +class Square : public Shape { + double side; + +public: + Square(double s) : side(s) {} + double area() const override { return side * side; } +}; + +double run() { + Square sq(3.0); + return sq.describe() + sq.area(); +} +""" + +FIXTURE_LUA = """local M = {} + +function M.square(x) + return x * x +end + +function M.compute(n) + return M.square(n) + M.square(n + 1) +end + +return M +""" + +FIXTURE_PHP = """side = $side; + } + + public function area(): float { + return $this->side * $this->side; + } +} + +function describe(Shape $s): float { + return $s->area(); +} + +function run(): float { + $sq = new Square(3.0); + return describe($sq) + $sq->area(); +} +""" + +FIXTURE_SCALA = """package fixture + +trait Shape { + def area(): Double +} + +class Square(side: Double) extends Shape { + def area(): Double = side * side +} + +object Service { + def describe(s: Shape): Double = s.area() + + def run(): Double = { + val sq = new Square(3.0) + describe(sq) + sq.area() + } +} +""" + + +class _NullIngestor: + def ensure_node_batch(self, label: str, properties: PropertyDict) -> None: + return None + + def ensure_relationship_batch( + self, + from_spec: tuple[str, str, PropertyValue], + rel_type: str, + to_spec: tuple[str, str, PropertyValue], + properties: PropertyDict | None = None, + ) -> None: + return None + + def flush_all(self) -> None: + return None + + def fetch_all( + self, query: str, params: PropertyDict | None = None + ) -> list[ResultRow]: + return [] + + def execute_write(self, query: str, params: PropertyDict | None = None) -> None: + return None + + +def _is_dunder_callee(qn: str) -> bool: + name = qn.rsplit(ec.SEP, 1)[-1] + return name.startswith("__") and name.endswith("__") + + +def _write_fixture(root: Path) -> None: + pkg = root / "fixture" + pkg.mkdir(parents=True, exist_ok=True) + (pkg / "__init__.py").touch() + (pkg / "a.py").write_text(FIXTURE_A) + (pkg / "b.py").write_text(FIXTURE_B) + (pkg / "c.py").write_text(FIXTURE_C) + (pkg / "util.js").write_text(FIXTURE_JS_UTIL) + (pkg / "app.js").write_text(FIXTURE_JS_APP) + (pkg / "shapes.ts").write_text(FIXTURE_TS_SHAPES) + (pkg / "main.ts").write_text(FIXTURE_TS_MAIN) + (pkg / "shapes.rs").write_text(FIXTURE_RS_SHAPES) + (pkg / "main.rs").write_text(FIXTURE_RS_MAIN) + (pkg / "service.go").write_text(FIXTURE_GO_MAIN) + (pkg / "Service.java").write_text(FIXTURE_JAVA) + (pkg / "calc.h").write_text(FIXTURE_C_HEADER) + (pkg / "calc.c").write_text(FIXTURE_C_SRC) + (pkg / "shapes.cpp").write_text(FIXTURE_CPP) + (pkg / "module.lua").write_text(FIXTURE_LUA) + (pkg / "service.php").write_text(FIXTURE_PHP) + (pkg / "Shapes.scala").write_text(FIXTURE_SCALA) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate CALLS recall for.") + ] = Path(ec.DEFAULT_TARGET), + project_name: Annotated[str, typer.Option(help="cgr project name.")] = "", + out_dir: Annotated[Path, typer.Option(help="Directory for the calls diff.")] = Path( + ec.DEFAULT_OUT_DIR + ), +) -> None: + target = target.resolve() + project = project_name or target.name + + logger.info(ls.L3_STATIC.format(target=target, project=project)) + static_calls = extract_cgr_calls(target, project) + logger.success(ls.L3_STATIC_DONE.format(count=len(static_calls))) + + workspace = out_dir / ec.L3_WORKSPACE + _write_fixture(workspace) + parsers, queries = load_parsers() + + def workload() -> None: + GraphUpdater( + ingestor=_NullIngestor(), + repo_path=workspace / "fixture", + parsers=parsers, + queries=queries, + project_name=project, + ).run(force=True) + + logger.info(ls.L3_TRACING.format(target=target)) + traced = trace_calls(workload, target, project) + logger.success(ls.L3_TRACED_DONE.format(count=len(traced))) + + missed = sorted(traced - static_calls) + + out_dir.mkdir(parents=True, exist_ok=True) + diff_path = out_dir / ec.L3_DIFF_FILENAME + diff_path.write_text( + json.dumps({"missing": [f"{a} -> {b}" for a, b in missed]}, indent=2), + encoding="utf-8", + ) + logger.success(ls.WROTE_DIFF.format(path=diff_path)) + + explicit = {(a, b) for (a, b) in traced if not _is_dunder_callee(b)} + table = Table(title="cgr L3 CALLS recall (execution-traced ground truth)") + table.add_column("scope") + table.add_column("traced", justify="right") + table.add_column("captured", justify="right") + table.add_column("missed", justify="right") + table.add_column("recall", justify="right") + for label, edges in (("all calls", traced), ("explicit (no dunders)", explicit)): + captured = edges & static_calls + recall = len(captured) / len(edges) if edges else 1.0 + table.add_row( + label, + str(len(edges)), + str(len(captured)), + str(len(edges) - len(captured)), + f"{recall:.4f}", + ) + console.print(table) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/logs.py b/evals/logs.py new file mode 100644 index 000000000..094421529 --- /dev/null +++ b/evals/logs.py @@ -0,0 +1,130 @@ +EXTRACTING_CGR = "Building cgr graph for {target} (project={project})" +CGR_GRAPH_DONE = "cgr graph: {nodes} python nodes, {edges} scored edges" +EXTRACTING_ORACLE = "Building ast oracle for {target}" +ORACLE_GRAPH_DONE = "ast oracle: {nodes} python nodes, {edges} scored edges" +WROTE_SCORES = "Wrote scores to {path}" +WROTE_DIFF = "Wrote diff to {path}" +ORACLE_PARSE_FAILED = "Skipped unparseable file {path}: {error}" +L3_STATIC = "Extracting cgr static CALLS for {target} (project={project})" +L3_STATIC_DONE = "cgr static CALLS: {count} edges" +L3_TRACING = "Tracing a workload through {target} to collect runtime call edges" +L3_TRACED_DONE = "traced runtime call edges (first-party): {count}" +GO_EXTRACTING_CGR = "Building cgr Go nodes for {target} (project={project})" +GO_CGR_DONE = "cgr Go nodes: {count}" +GO_EXTRACTING_ORACLE = "Running go/ast oracle ({binary}) over {target}" +GO_ORACLE_DONE = "go/ast oracle nodes: {count}" +GO_ORACLE_MISSING = "Go toolchain '{binary}' not found on PATH; cannot run the oracle" +CPP_EXTRACTING_CGR = "Building cgr C/C++ nodes for {target} (project={project})" +CPP_CGR_DONE = "cgr C/C++ nodes: {count}" +CPP_CGR_SCOPED = "cgr C/C++ nodes scoped to compiled universe: {count}" +CPP_EXTRACTING_ORACLE = "Running libclang oracle over {target} (compile_commands.json)" +CPP_ORACLE_DONE = "libclang oracle nodes: {count}" +CPP_ORACLE_MISSING = "libclang unavailable, or no {compdb} found in {target}" +RS_EXTRACTING_CGR = "Building cgr Rust nodes for {target} (project={project})" +RS_CGR_DONE = "cgr Rust nodes: {count}" +RS_EXTRACTING_ORACLE = "Running syn oracle ({binary}) over {target}" +RS_ORACLE_DONE = "syn oracle nodes: {count}" +RS_ORACLE_MISSING = "Rust toolchain '{binary}' not found on PATH; cannot run the oracle" +TS_EXTRACTING_CGR = "Building cgr TypeScript nodes for {target} (project={project})" +TS_CGR_DONE = "cgr TypeScript nodes: {count}" +TS_EXTRACTING_ORACLE = "Running TypeScript compiler oracle ({binary}) over {target}" +TS_ORACLE_DONE = "TypeScript oracle nodes: {count}" +TS_ORACLE_MISSING = "node/npm not found on PATH; cannot run the TypeScript oracle" +JS_EXTRACTING_CGR = "Building cgr JavaScript nodes for {target} (project={project})" +JS_CGR_DONE = "cgr JavaScript nodes: {count}" +JS_EXTRACTING_ORACLE = "Running TypeScript compiler oracle ({binary}) over {target}" +JS_ORACLE_DONE = "JavaScript oracle nodes: {count}" +JAVA_EXTRACTING_CGR = "Building cgr Java nodes for {target} (project={project})" +JAVA_CGR_DONE = "cgr Java nodes: {count}" +JAVA_EXTRACTING_ORACLE = "Running JDK Compiler Tree API oracle ({binary}) over {target}" +JAVA_ORACLE_DONE = "Java oracle nodes: {count}" +JAVA_ORACLE_MISSING = "javac/java not found on PATH; cannot run the Java oracle" +LUA_EXTRACTING_CGR = "Building cgr Lua nodes for {target} (project={project})" +LUA_CGR_DONE = "cgr Lua nodes: {count}" +LUA_EXTRACTING_ORACLE = "Running luaparse oracle ({binary}) over {target}" +LUA_ORACLE_DONE = "luaparse oracle nodes: {count}" +LUA_ORACLE_MISSING = "node/npm not found on PATH; cannot run the Lua oracle" +PHP_EXTRACTING_CGR = "Building cgr PHP nodes for {target} (project={project})" +PHP_CGR_DONE = "cgr PHP nodes: {count}" +PHP_EXTRACTING_ORACLE = "Running php-parser oracle ({binary}) over {target}" +PHP_ORACLE_DONE = "php-parser oracle nodes: {count}" +PHP_ORACLE_MISSING = "node/npm not found on PATH; cannot run the PHP oracle" +RETRIEVAL_SYMBOLS = "first-party symbol universe: {count} names" +RETRIEVAL_EXTRACTING_ORACLE = "Building ast call oracle (file-level) for {target}" +RETRIEVAL_ORACLE_DONE = "oracle call edges (first-party): {count}" +RETRIEVAL_EXTRACTING_CGR = "Building cgr CALLS edges for {target} (project={project})" +RETRIEVAL_CGR_DONE = "cgr call edges (first-party): {count}" +RETRIEVAL_EXTRACTING_GREP = "Running ripgrep ({mode}) baseline over {target}" +RETRIEVAL_GREP_DONE = "grep ({mode}) call edges: {count}" +RETRIEVAL_RG_MISSING = ( + "ripgrep '{binary}' not found on PATH; cannot run the grep baseline" +) +INCREMENTAL_TARGET = "Incremental-update eval over {target} (project={project})" +INCREMENTAL_SAMPLED = "probing {count} of {total} python files with a neutral edit" +INCREMENTAL_NO_PY = "no python files found under {target}; nothing to probe" +INCREMENTAL_PROBE = "probe {index}/{total}: edit {path}" +INCREMENTAL_PROBE_DIVERGED = "DIVERGED on {path}: -{missing} edges, +{stale} edges" +INCREMENTAL_DONE = "clean-equivalent probes: {clean}/{total}" +IMPORTS_TARGET = "Import-resolution eval over {target} (project={project})" +IMPORTS_ORACLE_DONE = "oracle import deps: {count} (internal+external)" +IMPORTS_CGR_DONE = "cgr import deps: {count}" +INHERITANCE_TARGET = "Inheritance eval over {target} (project={project})" +INHERITANCE_ORACLE_DONE = "oracle: {inherits} INHERITS, {overrides} OVERRIDES" +INHERITANCE_CGR_DONE = "cgr: {inherits} INHERITS, {overrides} OVERRIDES" +INHERITANCE_SKIPPED_BASES = "oracle skipped {count} unresolved/ambiguous base refs" +INSTANTIATION_TARGET = "Instantiation eval over {target} (project={project})" +INSTANTIATION_ORACLE_DONE = "oracle constructor calls: {count}" +INSTANTIATION_CGR_DONE = "cgr INSTANTIATES edges: {count}" +DEAD_CODE_TARGET = "Dead-code eval over {target} (project={project})" +DEAD_CODE_DONE = "cgr reports {count} unreachable functions/methods" +GO_RETRIEVAL_ORACLE = "Running go/ast call oracle ({binary}) over {target}" +GO_RETRIEVAL_ORACLE_DONE = "go/ast first-party call edges: {count}" +GO_RETRIEVAL_CGR = "Building cgr Go CALLS edges for {target} (project={project})" +GO_RETRIEVAL_CGR_DONE = "cgr Go call edges (first-party): {count}" +RUST_ORACLE_MISSING = "Rust toolchain '{binary}' not found on PATH; cannot run oracle" +RUST_RETRIEVAL_ORACLE = "Running syn call oracle ({binary}) over {target}" +RUST_RETRIEVAL_ORACLE_DONE = "syn first-party call edges: {count}" +RUST_RETRIEVAL_CGR = "Building cgr Rust CALLS edges for {target} (project={project})" +RUST_RETRIEVAL_CGR_DONE = "cgr Rust call edges (first-party): {count}" +JAVA_ORACLE_MISSING = "Java toolchain '{binary}' not found on PATH; cannot run oracle" +JAVA_RETRIEVAL_ORACLE = "Running javac call oracle ({binary}) over {target}" +JAVA_RETRIEVAL_ORACLE_DONE = "javac first-party call edges: {count}" +JAVA_RETRIEVAL_CGR = "Building cgr Java CALLS edges for {target} (project={project})" +JAVA_RETRIEVAL_CGR_DONE = "cgr Java call edges (first-party): {count}" +TS_ORACLE_MISSING = "Node toolchain '{binary}' not found on PATH; cannot run oracle" +TS_RETRIEVAL_ORACLE = "Running tsc call oracle ({binary}) over {target}" +TS_RETRIEVAL_ORACLE_DONE = "tsc first-party call edges: {count}" +TS_RETRIEVAL_CGR = ( + "Building cgr TypeScript CALLS edges for {target} (project={project})" +) +TS_RETRIEVAL_CGR_DONE = "cgr TypeScript call edges (first-party): {count}" +PHP_ORACLE_MISSING = "Node toolchain '{binary}' not found on PATH; cannot run oracle" +PHP_RETRIEVAL_ORACLE = "Running php-parser call oracle ({binary}) over {target}" +PHP_RETRIEVAL_ORACLE_DONE = "php-parser first-party call edges: {count}" +PHP_RETRIEVAL_CGR = "Building cgr PHP CALLS edges for {target} (project={project})" +PHP_RETRIEVAL_CGR_DONE = "cgr PHP call edges (first-party): {count}" +LUA_ORACLE_MISSING = "Node toolchain '{binary}' not found on PATH; cannot run oracle" +LUA_RETRIEVAL_ORACLE = "Running luaparse call oracle ({binary}) over {target}" +LUA_RETRIEVAL_ORACLE_DONE = "luaparse first-party call edges: {count}" +LUA_RETRIEVAL_CGR = "Building cgr Lua CALLS edges for {target} (project={project})" +LUA_RETRIEVAL_CGR_DONE = "cgr Lua call edges (first-party): {count}" +C_ORACLE_MISSING = "libclang (clang.cindex) not importable; cannot run the C oracle" +C_RETRIEVAL_ORACLE = "Running libclang call oracle over {target}" +C_RETRIEVAL_ORACLE_DONE = "libclang first-party call edges: {count}" +C_RETRIEVAL_COVERED = "cleanly-parsed C source files graded: {count}" +C_RETRIEVAL_CGR = "Building cgr C CALLS edges for {target} (project={project})" +C_RETRIEVAL_CGR_DONE = "cgr C call edges (first-party): {count}" +CPP_RETRIEVAL_ORACLE_MISSING = ( + "libclang (clang.cindex) not importable; cannot run the C++ oracle" +) +CPP_RETRIEVAL_ORACLE = "Running libclang C++ call oracle over {target}" +CPP_RETRIEVAL_ORACLE_DONE = "libclang first-party C++ call edges: {count}" +CPP_RETRIEVAL_COVERED = "cleanly-parsed C++ source files graded: {count}" +CPP_RETRIEVAL_CGR = "Building cgr C++ CALLS edges for {target} (project={project})" +CPP_RETRIEVAL_CGR_DONE = "cgr C++ call edges (first-party): {count}" +SEMANTIC_MISSING = "semantic dependencies not installed; cannot run semantic eval" +SEMANTIC_TARGET = "Semantic-search eval over {target} (project={project})" +SEMANTIC_DONE = "recall@{k}: {hits}/{total} queries retrieved the expected function" +STATIC_CALLS_TARGET = "Static-calls eval over {target} (project={project})" +STATIC_CALLS_ORACLE_DONE = "oracle direct first-party call edges: {count}" +STATIC_CALLS_CGR_DONE = "cgr CALLS edges: {count}" diff --git a/evals/lua_l1.py b/evals/lua_l1.py new file mode 100644 index 000000000..57af56320 --- /dev/null +++ b/evals/lua_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_lua_graph +from .oracles import lua_oracle_available, run_lua_oracle +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (Lua vs luaparse)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Lua sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for lua_scores.csv and lua_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not lua_oracle_available(): + logger.error(ls.LUA_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.LUA_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_lua_graph(target, project) + logger.success(ls.LUA_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.LUA_EXTRACTING_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle = run_lua_oracle(target) + logger.success(ls.LUA_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.LUA_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.LUA_SCORES_FILENAME, ec.LUA_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/lua_retrieval.py b/evals/lua_retrieval.py new file mode 100644 index 000000000..b359facd5 --- /dev/null +++ b/evals/lua_retrieval.py @@ -0,0 +1,113 @@ +# (H) Multi-language retrieval (Lua). Extends the file-level call-localization +# (H) benchmark to Lua: for each first-party Lua function, which files call it. +# (H) cgr's Lua CALLS edges (reduced to (caller_file, callee_simple_name)) are +# (H) graded against call sites extracted by luaparse, over the same first-party +# (H) name universe. luaparse is independent of cgr's tree-sitter Lua frontend, +# (H) so this measures cgr's cross-file Lua call resolution against ground truth +# (H) (mirrors evals/php_retrieval.py / java_retrieval.py / ts_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import lua_oracle_available, run_lua_call_oracle +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.LUA_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_lua_call_edges(target: Path) -> tuple[set[CallEdge], frozenset[str]]: + return run_lua_call_oracle(target) + + +def cgr_lua_call_edges( + target: Path, project: str, declared: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.LUA_SUFFIX) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.LUA_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_lua_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.LUA_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.LUA_RETRIEVAL_DIFF_PREFIX + ec.LUA_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Lua sources to evaluate call retrieval.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option(help="Directory for lua_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not lua_oracle_available(): + logger.error(ls.LUA_ORACLE_MISSING.format(binary=ec.NODE_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.LUA_RETRIEVAL_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle, declared = oracle_lua_call_edges(target) + logger.success(ls.LUA_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.LUA_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_lua_call_edges(target, project, declared) + logger.success(ls.LUA_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_lua_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.LUA_RETRIEVAL_SCORES_FILENAME, + ec.LUA_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.LUA_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/module_calls.py b/evals/module_calls.py new file mode 100644 index 000000000..b75a54294 --- /dev/null +++ b/evals/module_calls.py @@ -0,0 +1,246 @@ +# (H) L2 module-call attribution: does cgr attribute the right calls to the +# (H) module? The L3 trace records the innermost function frame as the caller and +# (H) drops frames, so it is structurally blind to module-level call +# (H) attribution. This eval fills that gap with an AST oracle that models +# (H) import-time execution. Both sides are compared as (module_file, +# (H) callee_simple_name) name-edges, restricted to first-party callees and +# (H) excluding dunders, since cgr only emits first-party CALLS. +import ast +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger +from rich.console import Console +from rich.table import Table + +from codebase_rag import constants as cs + +from . import constants as ec +from .ast_oracle import _iter_py_files +from .cgr_graph import _capture +from .types_defs import NameEdge, NodeKey + +console = Console() + +_CALLS = cs.RelationshipType.CALLS.value +_INSTANTIATES = cs.RelationshipType.INSTANTIATES.value + + +def _is_dunder(name: str) -> bool: + return name.startswith("__") and name.endswith("__") + + +def _callee_name(func: ast.expr) -> str | None: + if isinstance(func, ast.Name): + return func.id + if isinstance(func, ast.Attribute): + return func.attr + return None + + +def _has_future_annotations(tree: ast.Module) -> bool: + for node in tree.body: + if isinstance(node, ast.ImportFrom) and node.module == "__future__": + if any(alias.name == "annotations" for alias in node.names): + return True + return False + + +class _ModuleCallVisitor(ast.NodeVisitor): + # (H) Collect callee names of calls that execute at module-load time. A + # (H) function's decorators, argument defaults, and (unless postponed) + # (H) annotations run in the enclosing scope, so they are visited at the + # (H) current depth; only its body is function scope. Class bodies execute at + # (H) definition time, so they stay at the enclosing depth. Lambda bodies and + # (H) generator expressions are deferred (run when called/consumed), so their + # (H) calls are not import-time and are entered as a nested (function) scope. + def __init__(self, count_annotations: bool) -> None: + self.names: set[str] = set() + self._func_depth = 0 + self._count_annotations = count_annotations + + def visit_Call(self, node: ast.Call) -> None: + if self._func_depth == 0 and (name := _callee_name(node.func)): + self.names.add(name) + self.generic_visit(node) + + def _visit_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None: + for decorator in node.decorator_list: + if self._func_depth == 0: + # (H) a bare decorator `@task` is a Name (not a Call), so record + # (H) its callee name explicitly; applying it runs at module load. + target = ( + decorator.func if isinstance(decorator, ast.Call) else decorator + ) + if name := _callee_name(target): + self.names.add(name) + self.visit(decorator) + if self._count_annotations: + args = node.args + for arg in ( + *args.posonlyargs, + *args.args, + *args.kwonlyargs, + args.vararg, + args.kwarg, + ): + if arg is not None and arg.annotation is not None: + self.visit(arg.annotation) + if node.returns is not None: + self.visit(node.returns) + for default in (*node.args.defaults, *node.args.kw_defaults): + if default is not None: + self.visit(default) + self._func_depth += 1 + for stmt in node.body: + self.visit(stmt) + self._func_depth -= 1 + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._visit_function(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._visit_function(node) + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + # (H) a class decorator runs at definition (module-load) time too; the + # (H) class body stays at the current depth (eager at import). + if self._func_depth == 0: + for decorator in node.decorator_list: + target = ( + decorator.func if isinstance(decorator, ast.Call) else decorator + ) + if name := _callee_name(target): + self.names.add(name) + self.generic_visit(node) + + def visit_Lambda(self, node: ast.Lambda) -> None: + for default in (*node.args.defaults, *node.args.kw_defaults): + if default is not None: + self.visit(default) + self._func_depth += 1 + self.visit(node.body) + self._func_depth -= 1 + + def visit_GeneratorExp(self, node: ast.GeneratorExp) -> None: + # (H) the outermost iterable is evaluated eagerly when the generator is + # (H) created (enclosing scope); the element, conditions, and any further + # (H) iterables are lazy (run during consumption). + if node.generators: + self.visit(node.generators[0].iter) + self._func_depth += 1 + self.visit(node.elt) + for index, comprehension in enumerate(node.generators): + if index > 0: + self.visit(comprehension.iter) + for condition in comprehension.ifs: + self.visit(condition) + self._func_depth -= 1 + + +def _first_party_names(trees: list[ast.Module]) -> set[str]: + names: set[str] = set() + for tree in trees: + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef): + names.add(node.name) + return names + + +def oracle_module_calls(target: Path, project_name: str) -> set[NameEdge]: + parsed: list[tuple[str, ast.Module]] = [] + for path in _iter_py_files(target): + rel = path.relative_to(target).as_posix() + try: + parsed.append((rel, ast.parse(path.read_text(encoding=cs.ENCODING_UTF8)))) + except (SyntaxError, UnicodeDecodeError, ValueError): + continue + first_party = _first_party_names([tree for _rel, tree in parsed]) + + edges: set[NameEdge] = set() + for rel, tree in parsed: + visitor = _ModuleCallVisitor( + count_annotations=not _has_future_annotations(tree) + ) + visitor.visit(tree) + module_key = NodeKey(cs.NodeLabel.MODULE.value, rel, ec.MODULE_START_LINE) + for name in visitor.names: + if name in first_party and not _is_dunder(name): + edges.add(NameEdge(_CALLS, module_key, name)) + return edges + + +def cgr_module_calls(target: Path, project_name: str) -> set[NameEdge]: + ingestor = _capture(target, project_name) + module_label = cs.NodeLabel.MODULE.value + module_paths: dict[str, str] = { + str(uid): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if label == module_label + and props.get(cs.KEY_PATH) + and str(props[cs.KEY_PATH]).endswith(ec.PY_SUFFIX) + } + + method_label = cs.NodeLabel.METHOD.value + edges: set[NameEdge] = set() + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels: + # (H) A module-scope construction `X()` is an INSTANTIATES edge to the + # (H) class node (callee is the class name directly); a function/method + # (H) call is a CALLS edge. The oracle records both as a bare callee name, + # (H) so credit both kinds of module-caller edge. + if rel_type not in (_CALLS, _INSTANTIATES) or from_label != module_label: + continue + path = module_paths.get(str(from_val)) + if path is None: + continue + segments = str(to_val).split(ec.SEP) + name = segments[-1] + # (H) A constructor call `X()` on a class WITH __init__ resolves to the + # (H) `X.__init__` METHOD via CALLS; the oracle sees the class name `X`, so + # (H) credit it to the class. A bare first-party FUNCTION named `__init__` + # (H) is left as a dunder (filtered below), not remapped to its segment. + if name == ec.INIT_STEM and to_label == method_label and len(segments) >= 2: + name = segments[-2] + if _is_dunder(name): + continue + module_key = NodeKey(module_label, path, ec.MODULE_START_LINE) + edges.add(NameEdge(_CALLS, module_key, name)) + return edges + + +def score_module_calls( + cgr: set[NameEdge], oracle: set[NameEdge] +) -> tuple[int, int, int, float, float]: + tp = len(cgr & oracle) + fp = len(cgr - oracle) + fn = len(oracle - cgr) + precision = tp / (tp + fp) if tp + fp else 1.0 + recall = tp / (tp + fn) if tp + fn else 1.0 + return tp, fp, fn, precision, recall + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate module-call attribution for.") + ] = Path(ec.DEFAULT_TARGET), + project_name: Annotated[str, typer.Option(help="cgr project name.")] = "", +) -> None: + target = target.resolve() + project = project_name or target.name + + logger.info("Building cgr module-call edges for {}", target) + cgr = cgr_module_calls(target, project) + logger.info("Building oracle module-call edges for {}", target) + oracle = oracle_module_calls(target, project) + + tp, fp, fn, precision, recall = score_module_calls(cgr, oracle) + table = Table(title="cgr L2 module-call attribution (ast oracle ground truth)") + for col in ("tp", "fp", "fn", "precision", "recall"): + table.add_column(col, justify="right") + table.add_row(str(tp), str(fp), str(fn), f"{precision:.4f}", f"{recall:.4f}") + console.print(table) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/oracles/__init__.py b/evals/oracles/__init__.py new file mode 100644 index 000000000..63e71c2cc --- /dev/null +++ b/evals/oracles/__init__.py @@ -0,0 +1,43 @@ +from .cpp_oracle import ( + cpp_available, + run_c_call_oracle, + run_cpp_call_oracle, + run_cpp_oracle, +) +from .go_oracle import go_available, run_go_call_oracle, run_go_oracle +from .java_oracle import java_available, run_java_call_oracle, run_java_oracle +from .lua_oracle import lua_oracle_available, run_lua_call_oracle, run_lua_oracle +from .php_oracle import php_oracle_available, run_php_call_oracle, run_php_oracle +from .rust_oracle import run_rust_call_oracle, run_rust_oracle, rust_available +from .typescript_oracle import ( + run_javascript_oracle, + run_typescript_call_oracle, + run_typescript_oracle, + typescript_available, +) + +__all__ = [ + "cpp_available", + "run_c_call_oracle", + "run_cpp_call_oracle", + "run_cpp_oracle", + "go_available", + "run_go_call_oracle", + "run_go_oracle", + "java_available", + "run_java_call_oracle", + "run_java_oracle", + "lua_oracle_available", + "run_lua_call_oracle", + "run_lua_oracle", + "php_oracle_available", + "run_php_call_oracle", + "run_php_oracle", + "run_rust_call_oracle", + "run_rust_oracle", + "rust_available", + "run_javascript_oracle", + "run_typescript_call_oracle", + "run_typescript_oracle", + "typescript_available", +] diff --git a/evals/oracles/_common.py b/evals/oracles/_common.py new file mode 100644 index 000000000..7ea487d29 --- /dev/null +++ b/evals/oracles/_common.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from pathlib import PurePosixPath + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import ( + DefNode, + EdgeKey, + GraphData, + NameEdge, + NodeKey, + OracleEdge, + OracleNameEdge, + OracleNodeRef, + OraclePayload, + OracleRecord, +) + + +def is_ignored(rel_file: str) -> bool: + # (H) Mirror cgr's directory-component ignore (path_utils.should_skip_path) + # (H) so an oracle grades the same file set cgr indexes. + dir_parts = PurePosixPath(rel_file).parent.parts + return not cs.IGNORE_PATTERNS.isdisjoint(dir_parts) + + +def records_to_nodes(records: list[OracleRecord]) -> dict[NodeKey, DefNode]: + nodes: dict[NodeKey, DefNode] = {} + for rec in records: + rel_file = rec[ec.ORACLE_KEY_FILE] + if is_ignored(rel_file): + continue + line = int(rec[ec.ORACLE_KEY_LINE]) + key = NodeKey(rec[ec.ORACLE_KEY_KIND], rel_file, line) + end_line = int(rec.get(ec.ORACLE_KEY_END_LINE, line)) + nodes[key] = DefNode(key, rec[ec.ORACLE_KEY_NAME], end_line) + return nodes + + +def _ref_to_key(ref: OracleNodeRef) -> NodeKey: + return NodeKey( + ref[ec.ORACLE_KEY_KIND], + ref[ec.ORACLE_KEY_FILE], + int(ref[ec.ORACLE_KEY_LINE]), + ) + + +def records_to_edges(edges: list[OracleEdge]) -> set[EdgeKey]: + out: set[EdgeKey] = set() + for edge in edges: + parent = edge[ec.ORACLE_KEY_PARENT] + child = edge[ec.ORACLE_KEY_CHILD] + if is_ignored(parent[ec.ORACLE_KEY_FILE]) or is_ignored( + child[ec.ORACLE_KEY_FILE] + ): + continue + out.add( + EdgeKey(edge[ec.ORACLE_KEY_REL], _ref_to_key(parent), _ref_to_key(child)) + ) + return out + + +def records_to_name_edges(name_edges: list[OracleNameEdge]) -> set[NameEdge]: + out: set[NameEdge] = set() + for edge in name_edges: + source = edge[ec.ORACLE_KEY_SOURCE] + if is_ignored(source[ec.ORACLE_KEY_FILE]): + continue + out.add( + NameEdge( + edge[ec.ORACLE_KEY_REL], + _ref_to_key(source), + edge[ec.ORACLE_KEY_TARGET_NAME], + ) + ) + return out + + +def payload_to_graph(payload: OraclePayload) -> GraphData: + return GraphData( + nodes=records_to_nodes(payload.get(ec.ORACLE_KEY_NODES, [])), + edges=records_to_edges(payload.get(ec.ORACLE_KEY_EDGES, [])), + name_edges=records_to_name_edges(payload.get(ec.ORACLE_KEY_NAME_EDGES, [])), + ) diff --git a/evals/oracles/cpp_oracle.py b/evals/oracles/cpp_oracle.py new file mode 100644 index 000000000..2700d68de --- /dev/null +++ b/evals/oracles/cpp_oracle.py @@ -0,0 +1,456 @@ +from __future__ import annotations + +import shutil +import subprocess +from pathlib import Path +from typing import TYPE_CHECKING + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import ( + GraphData, + OracleEdge, + OracleNameEdge, + OracleNodeRef, + OraclePayload, + OracleRecord, +) +from ._common import is_ignored, payload_to_graph + +if TYPE_CHECKING: + from clang.cindex import Cursor + +# (H) The libclang oracle is authoritative C/C++ ground truth: driven by a +# (H) compile_commands.json it resolves #includes and expands macros to the true +# (H) translation-unit AST, which tree-sitter (cgr's parser) cannot do. cgr's +# (H) C/C++ nodes are graded against it on (kind, file, start_line). + +_CLASS = cs.NodeLabel.CLASS.value +_FUNCTION = cs.NodeLabel.FUNCTION.value +_METHOD = cs.NodeLabel.METHOD.value +_MODULE = cs.NodeLabel.MODULE.value +_DEFINES = cs.RelationshipType.DEFINES.value +_DEFINES_METHOD = cs.RelationshipType.DEFINES_METHOD.value +_INHERITS = cs.RelationshipType.INHERITS.value +_BASE_SPECIFIER = "CXX_BASE_SPECIFIER" + +_NodeId = tuple[str, str, int] +_EdgeId = tuple[str, str, int, str, int] +_NameEdgeId = tuple[str, str, int, str] + +# (H) libclang CursorKind members are registered dynamically (not static class +# (H) attributes), so map by the kind's stable NAME string — exactly what +# (H) `cursor.kind.name` yields at runtime — instead of `ci.CursorKind.CLASS_DECL`. +_KIND_BY_NAME: dict[str, str] = { + "CLASS_DECL": _CLASS, + "STRUCT_DECL": _CLASS, + "CLASS_TEMPLATE": _CLASS, + "FUNCTION_DECL": _FUNCTION, + "FUNCTION_TEMPLATE": _FUNCTION, + "CXX_METHOD": _METHOD, + "CONSTRUCTOR": _METHOD, + "DESTRUCTOR": _METHOD, + "CONVERSION_FUNCTION": _METHOD, +} + + +_libclang_pinned = False + + +def _ensure_libclang() -> None: + # (H) Pin the libclang shared library BEFORE the first Index.create (libclang is + # (H) a global one-shot). Prefer a system libclang whose clang version matches the + # (H) active SDK's libc++ — required to parse C++ standard headers, which the + # (H) bundled pip wheel's older clang cannot. C parsing is unaffected by the + # (H) choice, so both the C and C++ oracles share one consistent toolchain. + global _libclang_pinned + if _libclang_pinned: + return + _libclang_pinned = True + # (H) clang is an optional dependency: if the bindings are absent this import + # (H) raises ModuleNotFoundError, so swallow it here and let cpp_available's own + # (H) try/except report the oracle as unavailable (returning False), rather than + # (H) letting the exception escape and break test collection / the CLI path. + try: + from clang.cindex import Config + except Exception: + return + + for candidate in ec.LIBCLANG_CANDIDATES: + if Path(candidate).exists(): + try: + Config.set_library_file(candidate) + return + except Exception: + # (H) libclang loading raises a wide, unpredictable range of errors + # (H) (arch mismatch, format errors, an already-loaded library); on + # (H) any, fall through to the next candidate, else the bundled + # (H) default the bindings load on their own. + continue + + +def cpp_available() -> bool: + _ensure_libclang() + try: + import clang.cindex as ci + + ci.Index.create() + except Exception: + return False + return True + + +def _rel(path: str, root: Path) -> str | None: + try: + return Path(path).resolve().relative_to(root).as_posix() + except ValueError: + return None + + +def run_cpp_oracle(target: Path) -> GraphData: + _ensure_libclang() + import clang.cindex as ci + + root = target.resolve() + db = ci.CompilationDatabase.fromDirectory(str(root)) + index = ci.Index.create() + nodes: dict[_NodeId, OracleRecord] = {} + edges: dict[_EdgeId, OracleEdge] = {} + name_edges: dict[_NameEdgeId, OracleNameEdge] = {} + + for command in db.getAllCompileCommands(): + args = list(command.arguments)[1:] + try: + tu = index.parse(None, args=args) + except ci.TranslationUnitLoadError: + continue + _walk(tu.cursor, root, nodes, edges, name_edges) + + payload = OraclePayload( + nodes=list(nodes.values()), + edges=list(edges.values()), + name_edges=list(name_edges.values()), + ) + return payload_to_graph(payload) + + +def _walk( + cursor: Cursor, + root: Path, + nodes: dict[_NodeId, OracleRecord], + edges: dict[_EdgeId, OracleEdge], + name_edges: dict[_NameEdgeId, OracleNameEdge], +) -> None: + for child in cursor.get_children(): + _emit(child, root, nodes, edges, name_edges) + _walk(child, root, nodes, edges, name_edges) + + +def _base_simple_name(spelling: str) -> str: + # (H) Mirror cgr's base-name normalization (extract_cgr_lang_graph): collapse + # (H) `::` to `.` and take the last component, so the oracle and cgr agree on + # (H) the inheritance target spelling. + flat = spelling.replace(cs.SEPARATOR_DOUBLE_COLON, cs.SEPARATOR_DOT) + return flat.rsplit(cs.SEPARATOR_DOT, 1)[-1] + + +def _emit( + cursor: Cursor, + root: Path, + nodes: dict[_NodeId, OracleRecord], + edges: dict[_EdgeId, OracleEdge], + name_edges: dict[_NameEdgeId, OracleNameEdge], +) -> None: + if not cursor.is_definition(): + return + kind = _KIND_BY_NAME.get(cursor.kind.name) + if kind is None or cursor.location.file is None: + return + rel = _rel(cursor.location.file.name, root) + if rel is None: + return + line = cursor.location.line + key: _NodeId = (kind, rel, line) + if key not in nodes: + nodes[key] = OracleRecord( + kind=kind, + file=rel, + line=line, + name=cursor.spelling, + end_line=cursor.extent.end.line, + ) + + if kind == _METHOD: + parent = cursor.semantic_parent + if parent is None or parent.location.file is None: + return + prel = _rel(parent.location.file.name, root) + if prel is not None: + _add_edge(edges, _DEFINES_METHOD, _CLASS, prel, parent.location.line, key) + return + + _add_edge(edges, _DEFINES, _MODULE, rel, ec.MODULE_START_LINE, key) + if kind == _CLASS: + for child in cursor.get_children(): + if child.kind.name != _BASE_SPECIFIER: + continue + base = _base_simple_name(child.type.spelling) + nk: _NameEdgeId = (_INHERITS, rel, line, base) + if nk not in name_edges: + name_edges[nk] = OracleNameEdge( + rel=_INHERITS, + source=OracleNodeRef(kind=_CLASS, file=rel, line=line), + target_name=base, + ) + + +_FUNCTION_DECL = "FUNCTION_DECL" +_FUNCTION_TEMPLATE = "FUNCTION_TEMPLATE" +_CXX_METHOD = "CXX_METHOD" +_CALL_EXPR = "CALL_EXPR" +# (H) C: only free functions are first-party callees. C++: free functions (incl. +# (H) templates) plus member functions; constructors/destructors are excluded +# (H) because cgr models object creation as INSTANTIATES, not CALLS. +_C_DECL_KINDS = frozenset({_FUNCTION_DECL}) +_CPP_DECL_KINDS = frozenset({_FUNCTION_DECL, _FUNCTION_TEMPLATE, _CXX_METHOD}) + + +def _capture_path(command: tuple[str, ...]) -> str | None: + if shutil.which(command[0]) is None: + return None + try: + out = subprocess.run( + command, capture_output=True, text=True, check=True + ).stdout.strip() + except (subprocess.SubprocessError, OSError): + return None + return out or None + + +def _clang_system_args() -> list[str]: + # (H) Resolve the SDK system headers and clang's own builtin headers + # (H) (stdarg.h, stddef.h) so a translation unit parses fully without a + # (H) compile_commands.json. Best-effort and portable: each probe is skipped + # (H) when its tool is absent (e.g. no SDK on Linux, headers found on PATH). + args: list[str] = [] + if sdk := _capture_path(ec.XCRUN_SDK_PATH_CMD): + args.extend((ec.CLANG_ISYSROOT_FLAG, sdk)) + if resource := _capture_path(ec.CLANG_RESOURCE_DIR_CMD): + args.extend((ec.CLANG_ISYSTEM_FLAG, str(Path(resource) / ec.CLANG_INCLUDE_DIR))) + return args + + +def _c_include_args(root: Path) -> list[str]: + # (H) Every dir holding a header becomes an -I path so first-party #includes + # (H) resolve without a compile database. + dirs = {root} + for header in root.rglob(ec.C_HEADER_GLOB): + rel = _rel(str(header), root) + if rel is not None and not is_ignored(rel): + dirs.add(header.parent) + args: list[str] = [] + for directory in sorted(dirs): + args.extend((ec.CLANG_INCLUDE_FLAG, str(directory))) + return args + + +def _callee_is_first_party(call: Cursor, root: Path) -> bool: + # (H) libclang resolves a call to its callee declaration; grade the call only + # (H) when that declaration is itself first-party. Without this, a call whose + # (H) simple name collides with a first-party symbol (e.g. `std::string::size` + # (H) vs a project `size()`) would be counted as a first-party edge, understating + # (H) cgr recall against calls it correctly resolves as external/builtin. C++'s + # (H) large STL surface (size/data/empty/clear/...) makes this collision common. + ref = call.referenced + if ref is None or ref.location.file is None: + return False + cref = _rel(ref.location.file.name, root) + return cref is not None and not is_ignored(cref) + + +def _collect_decls_and_calls( + cursor: Cursor, + root: Path, + declared: set[str], + raw_calls: list[tuple[str, str]] | None, + decl_kinds: frozenset[str], + strict_callee: bool = False, +) -> None: + # (H) raw_calls is None for an unclean translation unit: its AST may be + # (H) truncated by a missing header, so its call sites are not authoritative + # (H) and only its (reliable) definitions are harvested into `declared`. + for child in cursor.get_children(): + file = child.location.file + rel = _rel(file.name, root) if file else None + # (H) Prune non-first-party subtrees (system/library headers): they are + # (H) never graded and walking them is the dominant cost. + if rel is None or is_ignored(rel): + continue + if child.kind.name in decl_kinds and child.is_definition(): + declared.add(child.spelling) + elif ( + raw_calls is not None + and child.kind.name == _CALL_EXPR + and child.spelling + and (not strict_callee or _callee_is_first_party(child, root)) + ): + raw_calls.append((rel, child.spelling)) + _collect_decls_and_calls( + child, root, declared, raw_calls, decl_kinds, strict_callee + ) + + +def run_c_call_oracle( + target: Path, +) -> tuple[set[tuple[str, str]], frozenset[str], frozenset[str]]: + # (H) File-level C call sites restricted to first-party callees (a callee whose + # (H) name is a first-party defined function), the declared name universe, and + # (H) the set of cleanly-parsed source files. libclang resolves the true call + # (H) graph (independent of cgr's tree-sitter C frontend). Each .c file is + # (H) parsed directly (no compile_commands.json); C has no overloading, so a + # (H) simple name is unambiguous. A file whose TU emits an error diagnostic + # (H) (a missing build-generated header) is not authoritative, so it is left + # (H) out of the covered set and the cgr side is held to the same files. + _ensure_libclang() + import clang.cindex as ci + + root = target.resolve() + index = ci.Index.create() + base_args = [ec.CLANG_C_STD, *_clang_system_args(), *_c_include_args(root)] + declared: set[str] = set() + raw_calls: list[tuple[str, str]] = [] + covered: set[str] = set() + for source in sorted(root.rglob(ec.C_SOURCE_GLOB)): + rel = _rel(str(source), root) + if rel is None or is_ignored(rel): + continue + try: + tu = index.parse(str(source), args=base_args) + except ci.TranslationUnitLoadError: + continue + clean = not any( + diag.severity >= ec.CLANG_SEVERITY_ERROR for diag in tu.diagnostics + ) + _collect_decls_and_calls( + tu.cursor, root, declared, raw_calls if clean else None, _C_DECL_KINDS + ) + if clean: + covered.add(rel) + declared_names = frozenset(declared) + covered_files = frozenset(covered) + edges = { + (file, name) + for file, name in raw_calls + if name in declared_names and file in covered_files + } + return edges, declared_names, covered_files + + +def _cpp_system_args() -> list[str]: + # (H) Like _clang_system_args but for C++: the SDK's libc++ headers must precede + # (H) the clang builtin resource headers, else libc++'s resolves the C + # (H) first and the parse fails. isysroot supplies the platform C + # (H) library; the resource dir supplies clang builtins (stdarg.h, stddef.h). + args: list[str] = [] + if sdk := _capture_path(ec.XCRUN_SDK_PATH_CMD): + args.extend((ec.CLANG_ISYSROOT_FLAG, sdk)) + args.extend((ec.CLANG_ISYSTEM_FLAG, str(Path(sdk) / ec.CLANG_LIBCXX_SUBPATH))) + if resource := _capture_path(ec.CLANG_RESOURCE_DIR_CMD): + args.extend((ec.CLANG_ISYSTEM_FLAG, str(Path(resource) / ec.CLANG_INCLUDE_DIR))) + return args + + +def _cpp_include_args(root: Path) -> list[str]: + # (H) Root and a conventional include/ root plus every dir holding a C++ header + # (H) become -I paths so first-party #includes resolve without a compile database. + dirs = {root, root / ec.CLANG_INCLUDE_DIR} + for glob in ec.CPP_HEADER_GLOBS: + for header in root.rglob(glob): + rel = _rel(str(header), root) + if rel is not None and not is_ignored(rel): + dirs.add(header.parent) + args: list[str] = [] + for directory in sorted(dirs): + if directory.exists(): + args.extend((ec.CLANG_INCLUDE_FLAG, str(directory))) + return args + + +def run_cpp_call_oracle( + target: Path, + extra_defines: tuple[str, ...] = (), +) -> tuple[set[tuple[str, str]], frozenset[str], frozenset[str]]: + # (H) File-level C++ call sites restricted to first-party callees (free functions + # (H) and member functions), the declared name universe, and the cleanly-parsed + # (H) source files. libclang resolves the true translation-unit call graph + # (H) (independent of cgr's tree-sitter C++ frontend). Overloads collapse under + # (H) the (file, simple-name) metric, so they need no disambiguation. extra_defines + # (H) carries corpus-specific platform macros (e.g. LEVELDB_PLATFORM_POSIX) that a + # (H) build system would normally supply; a TU that still errors abstains. + _ensure_libclang() + import clang.cindex as ci + + root = target.resolve() + index = ci.Index.create() + defines = [ec.CLANG_DEFINE_FLAG + d for d in extra_defines] + base_args = [ + ec.CLANG_CPP_LANG_FLAG, + ec.CLANG_CPP_LANG, + ec.CLANG_CPP_STD, + *defines, + *_cpp_system_args(), + *_cpp_include_args(root), + ] + declared: set[str] = set() + raw_calls: list[tuple[str, str]] = [] + covered: set[str] = set() + for glob in ec.CPP_SOURCE_GLOBS: + for source in sorted(root.rglob(glob)): + rel = _rel(str(source), root) + if rel is None or is_ignored(rel): + continue + try: + tu = index.parse(str(source), args=base_args) + except ci.TranslationUnitLoadError: + continue + clean = not any( + diag.severity >= ec.CLANG_SEVERITY_ERROR for diag in tu.diagnostics + ) + _collect_decls_and_calls( + tu.cursor, + root, + declared, + raw_calls if clean else None, + _CPP_DECL_KINDS, + strict_callee=True, + ) + if clean: + covered.add(rel) + declared_names = frozenset(declared) + covered_files = frozenset(covered) + edges = { + (file, name) + for file, name in raw_calls + if name in declared_names and file in covered_files + } + return edges, declared_names, covered_files + + +def _add_edge( + edges: dict[_EdgeId, OracleEdge], + rel: str, + pkind: str, + pfile: str, + pline: int, + child: _NodeId, +) -> None: + ckind, cfile, cline = child + ek: _EdgeId = (rel, pfile, pline, cfile, cline) + if ek in edges: + return + edges[ek] = OracleEdge( + rel=rel, + parent=OracleNodeRef(kind=pkind, file=pfile, line=pline), + child=OracleNodeRef(kind=ckind, file=cfile, line=cline), + ) diff --git a/evals/oracles/go_ast.go b/evals/oracles/go_ast.go new file mode 100644 index 000000000..b1251b17d --- /dev/null +++ b/evals/oracles/go_ast.go @@ -0,0 +1,298 @@ +// Authoritative Go structure oracle for the cgr eval harness. +// +// Walks a directory of Go sources with the standard library's own go/parser +// and go/ast, and emits a JSON payload {nodes, edges}. Node "kind" fields use +// cgr's NodeLabel vocabulary (Function, Method, Class, Interface, Type) and +// edges use cgr's RelationshipType vocabulary, so both join cgr's graph on +// (kind, file, line). +// +// Mapping (Go declaration -> cgr NodeLabel): +// +// func without receiver -> Function +// func with receiver -> Method +// type ... struct {} -> Class +// type ... interface {} -> Interface +// type ... (other) -> Type (defined types and aliases alike) +// +// Containment edges (matching how cgr models Go containment): +// +// DEFINES : Module(file, line 0) -> top-level Function / Class / Interface / Type +// DEFINES_METHOD : receiver type's node -> Method (cross-file within a package) +// +// cgr models a Go module per file, so a DEFINES parent is the file's module +// keyed at line 0. A receiver method's parent is the node of its receiver type, +// resolved package-wide (a method may sit in a different file than its type). +// +// Run: GO111MODULE=off go run go_ast.go +package main + +import ( + "encoding/json" + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" +) + +// Def is a single declaration record. Line is the identifier line (the node's +// start, matching cgr); EndLine is the line of the declaration's last token. +type Def struct { + Kind string `json:"kind"` + File string `json:"file"` + Line int `json:"line"` + EndLine int `json:"end_line"` + Name string `json:"name"` +} + +// NodeRef identifies an edge endpoint by (kind, file, line). +type NodeRef struct { + Kind string `json:"kind"` + File string `json:"file"` + Line int `json:"line"` +} + +// Edge is a containment relationship between two node references. +type Edge struct { + Rel string `json:"rel"` + Parent NodeRef `json:"parent"` + Child NodeRef `json:"child"` +} + +// Call is a call site: the file it appears in and the callee's simple name +// (the bare identifier, or the selector tail for x.Method() / pkg.Func()). +type Call struct { + File string `json:"file"` + Name string `json:"name"` +} + +// Payload is the oracle's stdout shape. +type Payload struct { + Nodes []Def `json:"nodes"` + Edges []Edge `json:"edges"` + Calls []Call `json:"calls"` +} + +// ignoredDirs are skipped during the walk; they never hold first-party sources. +var ignoredDirs = map[string]bool{ + ".git": true, + "vendor": true, + "node_modules": true, + "testdata": true, +} + +const ( + kindFunction = "Function" + kindMethod = "Method" + kindClass = "Class" + kindInterface = "Interface" + kindType = "Type" + kindModule = "Module" + relDefines = "DEFINES" + relDefinesMeth = "DEFINES_METHOD" + moduleLine = 0 + goSuffix = ".go" +) + +func typeSpecKind(spec *ast.TypeSpec) string { + switch spec.Type.(type) { + case *ast.StructType: + return kindClass + case *ast.InterfaceType: + return kindInterface + default: + return kindType + } +} + +// baseTypeName strips pointer and generic instantiation wrappers off a receiver +// type expression, leaving the bare type name (e.g. *Point[T] -> "Point"). +func baseTypeName(expr ast.Expr) string { + switch t := expr.(type) { + case *ast.StarExpr: + return baseTypeName(t.X) + case *ast.IndexExpr: + return baseTypeName(t.X) + case *ast.IndexListExpr: + return baseTypeName(t.X) + case *ast.Ident: + return t.Name + } + return "" +} + +func recvTypeName(recv *ast.FieldList) string { + if recv == nil || len(recv.List) == 0 { + return "" + } + return baseTypeName(recv.List[0].Type) +} + +// parsedFile bundles a parsed source with its location data for the two passes. +type parsedFile struct { + fset *token.FileSet + file *ast.File + rel string + dir string +} + +// collectNodes records every declaration (including function-local types) so the +// node set is an apples-to-apples ground truth for cgr's node capture. +func collectNodes(pf parsedFile, defs *[]Def) { + ast.Inspect(pf.file, func(n ast.Node) bool { + switch d := n.(type) { + case *ast.FuncDecl: + kind := kindFunction + if d.Recv != nil { + kind = kindMethod + } + line := pf.fset.Position(d.Name.Pos()).Line + end := pf.fset.Position(d.End()).Line + *defs = append(*defs, Def{kind, pf.rel, line, end, d.Name.Name}) + case *ast.TypeSpec: + line := pf.fset.Position(d.Name.Pos()).Line + end := pf.fset.Position(d.End()).Line + *defs = append(*defs, Def{typeSpecKind(d), pf.rel, line, end, d.Name.Name}) + } + return true + }) +} + +// typeKey scopes a type name to its package directory; methods resolve their +// receiver type within the same package, which Go keeps in one directory. +func typeKey(dir, name string) string { + return dir + "\x00" + name +} + +// collectTypes records each top-level type's node so receiver methods can later +// point DEFINES_METHOD at the right (kind, file, line). +func collectTypes(pf parsedFile, types map[string]Def) { + for _, decl := range pf.file.Decls { + gen, ok := decl.(*ast.GenDecl) + if !ok || gen.Tok != token.TYPE { + continue + } + for _, spec := range gen.Specs { + ts, ok := spec.(*ast.TypeSpec) + if !ok { + continue + } + line := pf.fset.Position(ts.Name.Pos()).Line + end := pf.fset.Position(ts.End()).Line + types[typeKey(pf.dir, ts.Name.Name)] = Def{typeSpecKind(ts), pf.rel, line, end, ts.Name.Name} + } + } +} + +// collectEdges emits DEFINES for top-level funcs/types and DEFINES_METHOD for +// receiver methods, mirroring cgr's per-file module containment. +func collectEdges(pf parsedFile, types map[string]Def, edges *[]Edge) { + module := NodeRef{kindModule, pf.rel, moduleLine} + for _, decl := range pf.file.Decls { + switch d := decl.(type) { + case *ast.FuncDecl: + line := pf.fset.Position(d.Name.Pos()).Line + if d.Recv == nil { + child := NodeRef{kindFunction, pf.rel, line} + *edges = append(*edges, Edge{relDefines, module, child}) + continue + } + owner, ok := types[typeKey(pf.dir, recvTypeName(d.Recv))] + if !ok { + continue + } + parent := NodeRef{owner.Kind, owner.File, owner.Line} + child := NodeRef{kindMethod, pf.rel, line} + *edges = append(*edges, Edge{relDefinesMeth, parent, child}) + case *ast.GenDecl: + if d.Tok != token.TYPE { + continue + } + for _, spec := range d.Specs { + ts, ok := spec.(*ast.TypeSpec) + if !ok { + continue + } + line := pf.fset.Position(ts.Name.Pos()).Line + child := NodeRef{typeSpecKind(ts), pf.rel, line} + *edges = append(*edges, Edge{relDefines, module, child}) + } + } + } +} + +// calleeName returns the simple name a call expression targets: the bare +// identifier for foo(), or the selector tail for x.Method() and pkg.Func(). +func calleeName(expr ast.Expr) string { + switch f := expr.(type) { + case *ast.Ident: + return f.Name + case *ast.SelectorExpr: + return f.Sel.Name + case *ast.IndexExpr: + return calleeName(f.X) + case *ast.IndexListExpr: + return calleeName(f.X) + } + return "" +} + +// collectCalls records every call site's (file, callee simple name). First-party +// filtering happens in the Python harness against the declared name set. +func collectCalls(pf parsedFile, calls *[]Call) { + ast.Inspect(pf.file, func(n ast.Node) bool { + if call, ok := n.(*ast.CallExpr); ok { + if name := calleeName(call.Fun); name != "" { + *calls = append(*calls, Call{pf.rel, name}) + } + } + return true + }) +} + +func main() { + root := os.Args[1] + var parsed []parsedFile + _ = filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if info.IsDir() { + if ignoredDirs[info.Name()] { + return filepath.SkipDir + } + return nil + } + if !strings.HasSuffix(path, goSuffix) { + return nil + } + fset := token.NewFileSet() + file, perr := parser.ParseFile(fset, path, nil, 0) + if perr != nil { + return nil + } + rel, rerr := filepath.Rel(root, path) + if rerr != nil { + rel = path + } + rel = filepath.ToSlash(rel) + parsed = append(parsed, parsedFile{fset, file, rel, filepath.ToSlash(filepath.Dir(rel))}) + return nil + }) + + types := map[string]Def{} + for _, pf := range parsed { + collectTypes(pf, types) + } + + defs := []Def{} + edges := []Edge{} + calls := []Call{} + for _, pf := range parsed { + collectNodes(pf, &defs) + collectEdges(pf, types, &edges) + collectCalls(pf, &calls) + } + _ = json.NewEncoder(os.Stdout).Encode(Payload{Nodes: defs, Edges: edges, Calls: calls}) +} diff --git a/evals/oracles/go_oracle.py b/evals/oracles/go_oracle.py new file mode 100644 index 000000000..3430b234b --- /dev/null +++ b/evals/oracles/go_oracle.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import json +import os +import shutil +import subprocess +from pathlib import Path + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import GraphData, OraclePayload +from ._common import is_ignored, payload_to_graph + +_ORACLE_GO = Path(__file__).parent / ec.GO_ORACLE_GO_FILE +_CALLABLE_KINDS = frozenset({cs.NodeLabel.FUNCTION.value, cs.NodeLabel.METHOD.value}) + + +def go_available() -> bool: + return shutil.which(ec.GO_BIN) is not None + + +def _run_go_oracle_payload(target: Path) -> OraclePayload: + proc = subprocess.run( + [ec.GO_BIN, ec.GO_RUN, str(_ORACLE_GO), str(target)], + capture_output=True, + text=True, + check=True, + env={**os.environ, ec.GO_MODULE_ENV: ec.GO_MODULE_OFF}, + ) + payload: OraclePayload = json.loads(proc.stdout or "{}") + return payload + + +def run_go_oracle(target: Path) -> GraphData: + return payload_to_graph(_run_go_oracle_payload(target)) + + +def run_go_call_oracle(target: Path) -> tuple[set[tuple[str, str]], frozenset[str]]: + # (H) File-level Go call sites restricted to first-party callees (a callee + # (H) whose simple name is a declared Function/Method), with the declared name + # (H) universe so the cgr side can be held to the same set. + payload = _run_go_oracle_payload(target) + declared = frozenset( + rec[ec.ORACLE_KEY_NAME] + for rec in payload.get(ec.ORACLE_KEY_NODES, []) + if rec.get(ec.ORACLE_KEY_KIND) in _CALLABLE_KINDS + ) + edges = { + (call[ec.ORACLE_KEY_FILE], call[ec.ORACLE_KEY_NAME]) + for call in payload.get(ec.ORACLE_KEY_CALLS, []) + if call[ec.ORACLE_KEY_NAME] in declared + and not is_ignored(call[ec.ORACLE_KEY_FILE]) + } + return edges, declared diff --git a/evals/oracles/java_oracle.py b/evals/oracles/java_oracle.py new file mode 100644 index 000000000..5c6f7009a --- /dev/null +++ b/evals/oracles/java_oracle.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import GraphData, OraclePayload +from ._common import is_ignored, payload_to_graph + +_ORACLE_DIR = Path(__file__).parent / ec.JAVA_ORACLE_DIRNAME +_SOURCE = _ORACLE_DIR / ec.JAVA_ORACLE_SOURCE +_CLASS = _ORACLE_DIR / f"{ec.JAVA_ORACLE_CLASS}.class" +_CALLABLE_KINDS = frozenset({cs.NodeLabel.FUNCTION.value, cs.NodeLabel.METHOD.value}) + + +def java_available() -> bool: + return ( + shutil.which(ec.JAVAC_BIN) is not None and shutil.which(ec.JAVA_BIN) is not None + ) + + +def _ensure_compiled() -> None: + # (H) Recompile when the class is missing OR older than the source, so an + # (H) edited Oracle.java is never shadowed by a stale (gitignored) .class. + if _CLASS.is_file() and _CLASS.stat().st_mtime >= _SOURCE.stat().st_mtime: + return + javac = shutil.which(ec.JAVAC_BIN) + if javac is None: + return + subprocess.run( + [javac, str(_SOURCE)], + cwd=str(_ORACLE_DIR), + capture_output=True, + text=True, + check=True, + ) + + +def _run_java_oracle_payload(target: Path) -> OraclePayload: + _ensure_compiled() + java = shutil.which(ec.JAVA_BIN) + if java is None: + return OraclePayload(nodes=[], edges=[], name_edges=[]) + proc = subprocess.run( + [java, ec.JAVA_CP_FLAG, str(_ORACLE_DIR), ec.JAVA_ORACLE_CLASS, str(target)], + capture_output=True, + text=True, + check=True, + ) + payload: OraclePayload = json.loads(proc.stdout or "{}") + return payload + + +def run_java_oracle(target: Path) -> GraphData: + return payload_to_graph(_run_java_oracle_payload(target)) + + +def run_java_call_oracle(target: Path) -> tuple[set[tuple[str, str]], frozenset[str]]: + # (H) File-level Java call sites restricted to first-party callees (a callee + # (H) whose simple name is a declared Function/Method), with the declared name + # (H) universe so the cgr side can be held to the same set. Mirrors the Go and + # (H) Rust call oracles (run_go_call_oracle / run_rust_call_oracle). + payload = _run_java_oracle_payload(target) + declared = frozenset( + rec[ec.ORACLE_KEY_NAME] + for rec in payload.get(ec.ORACLE_KEY_NODES, []) + if rec.get(ec.ORACLE_KEY_KIND) in _CALLABLE_KINDS + ) + edges = { + (call[ec.ORACLE_KEY_FILE], call[ec.ORACLE_KEY_NAME]) + for call in payload.get(ec.ORACLE_KEY_CALLS, []) + if call[ec.ORACLE_KEY_NAME] in declared + and not is_ignored(call[ec.ORACLE_KEY_FILE]) + } + return edges, declared diff --git a/evals/oracles/java_oracle/Oracle.java b/evals/oracles/java_oracle/Oracle.java new file mode 100644 index 000000000..341d7faf1 --- /dev/null +++ b/evals/oracles/java_oracle/Oracle.java @@ -0,0 +1,254 @@ +// Authoritative Java structure oracle for the cgr eval harness. +// +// Parses every .java file under a directory with the JDK's own Compiler Tree API +// (javax.tools + com.sun.source) and emits one JSON record per declaration, in +// cgr's NodeLabel vocabulary, so records join cgr's graph on (kind, file, line). +// task.parse() only parses (no resolution), so missing dependencies are fine. +// +// Mapping (Java construct -> cgr NodeLabel): +// +// class -> Class +// interface / @interface -> Interface (its method signatures -> Method) +// enum -> Enum +// method / constructor -> Method +// +// Containment edges (matching how cgr models Java containment): +// +// DEFINES : the file module -> every named type (top-level OR nested) +// DEFINES_METHOD : the method's immediate enclosing named type -> Method +// +// cgr models a Java module per file (keyed at line 0) and DEFINES every named +// type from it (containment is flat, not nested-type-scoped). A method binds to +// its nearest enclosing named type. Methods of an anonymous class are Functions +// (no DEFINES_METHOD), matching the node mapping. +// +// Output is a {nodes, edges} payload joining cgr on (kind, file, line). +// +// Compile: javac Oracle.java ; Run: java -cp Oracle + +import com.sun.source.tree.ClassTree; +import com.sun.source.tree.CompilationUnitTree; +import com.sun.source.tree.ExpressionTree; +import com.sun.source.tree.IdentifierTree; +import com.sun.source.tree.LambdaExpressionTree; +import com.sun.source.tree.LineMap; +import com.sun.source.tree.MemberSelectTree; +import com.sun.source.tree.MethodInvocationTree; +import com.sun.source.tree.MethodTree; +import com.sun.source.tree.Tree; +import com.sun.source.util.JavacTask; +import com.sun.source.util.SourcePositions; +import com.sun.source.util.TreePath; +import com.sun.source.util.TreePathScanner; +import com.sun.source.util.Trees; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import javax.tools.JavaCompiler; +import javax.tools.JavaFileObject; +import javax.tools.StandardJavaFileManager; +import javax.tools.ToolProvider; + +public class Oracle { + static final Set IGNORED = + new HashSet<>(Arrays.asList(".git", "target", "build", "node_modules", "vendor")); + static final List recs = new ArrayList<>(); + static final List edges = new ArrayList<>(); + static final List nameEdges = new ArrayList<>(); + static final List calls = new ArrayList<>(); + static final long MODULE_LINE = 0; + + static String esc(String s) { + return s.replace("\\", "\\\\").replace("\"", "\\\""); + } + + // (H) Simple name of an extends/implements type: drop generics and any + // (H) package/outer qualifier, matching how cgr resolves bases by simple name. + static String simpleName(Object typeTree) { + String s = typeTree.toString(); + int lt = s.indexOf('<'); + if (lt >= 0) { + s = s.substring(0, lt); + } + int dot = s.lastIndexOf('.'); + if (dot >= 0) { + s = s.substring(dot + 1); + } + return s.trim(); + } + + static void emitNameEdge( + String rel, String file, String skind, long sline, String targetName) { + nameEdges.add("{\"rel\":\"" + rel + "\",\"source\":{\"kind\":\"" + skind + + "\",\"file\":\"" + esc(file) + "\",\"line\":" + sline + + "},\"target_name\":\"" + esc(targetName) + "\"}"); + } + + // (H) A file-level call site: caller file + callee simple name (the method + // (H) identifier). The Python side keeps only callees whose name is a declared + // (H) first-party Method/Function, mirroring the Go/Rust call oracles. + static void emitCall(String file, String name) { + calls.add("{\"file\":\"" + esc(file) + "\",\"name\":\"" + esc(name) + "\"}"); + } + + static void emit(String kind, String file, long line, long endLine, String name) { + recs.add("{\"kind\":\"" + kind + "\",\"file\":\"" + esc(file) + + "\",\"line\":" + line + ",\"end_line\":" + endLine + + ",\"name\":\"" + esc(name) + "\"}"); + } + + static void emitEdge( + String rel, String file, String pkind, long pline, String ckind, long cline) { + edges.add("{\"rel\":\"" + rel + "\",\"parent\":{\"kind\":\"" + pkind + + "\",\"file\":\"" + esc(file) + "\",\"line\":" + pline + + "},\"child\":{\"kind\":\"" + ckind + "\",\"file\":\"" + esc(file) + + "\",\"line\":" + cline + "}}"); + } + + static String classKind(ClassTree node) { + switch (node.getKind()) { + case INTERFACE: + return "Interface"; + case ENUM: + return "Enum"; + // (H) cgr models an annotation type (@interface) as a Class. + default: + return "Class"; + } + } + + public static void main(String[] args) throws Exception { + Path root = Paths.get(args[0]).toAbsolutePath().normalize(); + List files = new ArrayList<>(); + Files.walkFileTree(root, new SimpleFileVisitor() { + public FileVisitResult preVisitDirectory(Path d, BasicFileAttributes a) { + Path name = d.getFileName(); + if (name != null && IGNORED.contains(name.toString())) { + return FileVisitResult.SKIP_SUBTREE; + } + return FileVisitResult.CONTINUE; + } + + public FileVisitResult visitFile(Path f, BasicFileAttributes a) { + if (f.toString().endsWith(".java")) { + files.add(f); + } + return FileVisitResult.CONTINUE; + } + }); + if (files.isEmpty()) { + System.out.print("{\"nodes\":[],\"edges\":[],\"name_edges\":[]}"); + return; + } + + JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); + StandardJavaFileManager fm = compiler.getStandardFileManager(null, null, null); + Iterable units = fm.getJavaFileObjectsFromPaths(files); + JavacTask task = (JavacTask) compiler.getTask(null, fm, d -> {}, null, null, units); + SourcePositions sp = Trees.instance(task).getSourcePositions(); + + for (CompilationUnitTree unit : task.parse()) { + Path abs = Paths.get(unit.getSourceFile().toUri()); + String rel = root.relativize(abs).toString().replace('\\', '/'); + LineMap lm = unit.getLineMap(); + new TreePathScanner() { + public Void visitClass(ClassTree node, Void p) { + long pos = sp.getStartPosition(unit, node); + // (H) Anonymous classes have an empty name and no cgr node. + if (pos >= 0 && node.getSimpleName().length() > 0) { + long line = lm.getLineNumber(pos); + long endLine = lm.getLineNumber(sp.getEndPosition(unit, node)); + String kind = classKind(node); + emit(kind, rel, line, endLine, node.getSimpleName().toString()); + // (H) Every named type is DEFINEd by the file module, + // (H) including nested types (cgr keeps this flat). + emitEdge("DEFINES", rel, "Module", MODULE_LINE, kind, line); + // (H) extends superclass -> INHERITS (a class only). + if (node.getExtendsClause() != null) { + emitNameEdge("INHERITS", rel, kind, line, + simpleName(node.getExtendsClause())); + } + // (H) The implements clause holds a class/enum's interfaces + // (H) (-> IMPLEMENTS) but an interface's superinterfaces + // (H) (-> INHERITS, like cgr). + String hrel = node.getKind() == Tree.Kind.INTERFACE + ? "INHERITS" : "IMPLEMENTS"; + for (Tree it : node.getImplementsClause()) { + emitNameEdge(hrel, rel, kind, line, simpleName(it)); + } + } + return super.visitClass(node, p); + } + + public Void visitMethod(MethodTree node, Void p) { + long pos = sp.getStartPosition(unit, node); + if (pos >= 0) { + // (H) cgr labels a member a Method only when its nearest + // (H) enclosing named class precedes any enclosing method or + // (H) lambda body; members of an anonymous class (declared in + // (H) a method body) are modelled as standalone Functions. + String kind = "Function"; + ClassTree owner = null; + for (TreePath up = getCurrentPath().getParentPath(); + up != null; up = up.getParentPath()) { + Tree t = up.getLeaf(); + if (t instanceof ClassTree + && ((ClassTree) t).getSimpleName().length() > 0) { + kind = "Method"; + owner = (ClassTree) t; + break; + } + if (t instanceof MethodTree || t instanceof LambdaExpressionTree) { + break; + } + } + long line = lm.getLineNumber(pos); + long endLine = lm.getLineNumber(sp.getEndPosition(unit, node)); + emit(kind, rel, line, endLine, node.getName().toString()); + // (H) A Method binds to its enclosing named type; an + // (H) anonymous-class member (Function) has no such edge. + if (owner != null) { + long opos = sp.getStartPosition(unit, owner); + if (opos >= 0) { + emitEdge("DEFINES_METHOD", rel, classKind(owner), + lm.getLineNumber(opos), "Method", line); + } + } + } + return super.visitMethod(node, p); + } + + public Void visitMethodInvocation(MethodInvocationTree node, Void p) { + // (H) The callee simple name: the trailing identifier of a + // (H) member-select (`obj.foo()`, `Type.bar()`) or a bare + // (H) identifier (`foo()`, same-class or static-imported). A + // (H) `super()`/`this()` constructor call yields "super"/"this" + // (H) and is dropped downstream (never a declared method name). + ExpressionTree sel = node.getMethodSelect(); + String name = null; + if (sel instanceof MemberSelectTree) { + name = ((MemberSelectTree) sel).getIdentifier().toString(); + } else if (sel instanceof IdentifierTree) { + name = ((IdentifierTree) sel).getName().toString(); + } + if (name != null) { + emitCall(rel, name); + } + return super.visitMethodInvocation(node, p); + } + }.scan(unit, null); + } + System.out.print("{\"nodes\":[" + String.join(",", recs) + + "],\"edges\":[" + String.join(",", edges) + + "],\"name_edges\":[" + String.join(",", nameEdges) + + "],\"calls\":[" + String.join(",", calls) + "]}"); + } +} diff --git a/evals/oracles/lua_oracle.py b/evals/oracles/lua_oracle.py new file mode 100644 index 000000000..34bcd5066 --- /dev/null +++ b/evals/oracles/lua_oracle.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import GraphData, OraclePayload +from ._common import is_ignored, payload_to_graph + +_ORACLE_DIR = Path(__file__).parent / ec.LUA_ORACLE_DIRNAME +_SCRIPT = _ORACLE_DIR / ec.LUA_ORACLE_SCRIPT +_NODE_MODULES = _ORACLE_DIR / ec.NODE_MODULES_DIRNAME +_CALLABLE_KINDS = frozenset({cs.NodeLabel.FUNCTION.value}) + + +def lua_oracle_available() -> bool: + return ( + shutil.which(ec.NODE_BIN) is not None and shutil.which(ec.NPM_BIN) is not None + ) + + +def _ensure_deps() -> None: + if _NODE_MODULES.is_dir(): + return + npm = shutil.which(ec.NPM_BIN) + if npm is None: + return + subprocess.run( + [npm, ec.NPM_INSTALL, *ec.NPM_FLAGS], + cwd=str(_ORACLE_DIR), + capture_output=True, + text=True, + check=True, + ) + + +def _run_lua_oracle_payload(target: Path) -> OraclePayload: + _ensure_deps() + node = shutil.which(ec.NODE_BIN) + if node is None: + return OraclePayload(nodes=[], edges=[], name_edges=[]) + proc = subprocess.run( + [node, str(_SCRIPT), str(target)], + capture_output=True, + text=True, + check=True, + ) + payload: OraclePayload = json.loads(proc.stdout or "{}") + return payload + + +def run_lua_oracle(target: Path) -> GraphData: + return payload_to_graph(_run_lua_oracle_payload(target)) + + +def run_lua_call_oracle(target: Path) -> tuple[set[tuple[str, str]], frozenset[str]]: + # (H) File-level Lua call sites restricted to first-party callees (a callee + # (H) whose simple name is a declared Function), with the declared name + # (H) universe so the cgr side can be held to the same set. Mirrors the Go, + # (H) Rust, Java, TypeScript, and PHP call oracles. + payload = _run_lua_oracle_payload(target) + declared = frozenset( + rec[ec.ORACLE_KEY_NAME] + for rec in payload.get(ec.ORACLE_KEY_NODES, []) + if rec.get(ec.ORACLE_KEY_KIND) in _CALLABLE_KINDS + ) + edges = { + (call[ec.ORACLE_KEY_FILE], call[ec.ORACLE_KEY_NAME]) + for call in payload.get(ec.ORACLE_KEY_CALLS, []) + if call[ec.ORACLE_KEY_NAME] in declared + and not is_ignored(call[ec.ORACLE_KEY_FILE]) + } + return edges, declared diff --git a/evals/oracles/lua_oracle/lua_ast.js b/evals/oracles/lua_oracle/lua_ast.js new file mode 100644 index 000000000..81877f0d2 --- /dev/null +++ b/evals/oracles/lua_oracle/lua_ast.js @@ -0,0 +1,135 @@ +// Authoritative Lua structure oracle for the cgr eval harness. +// +// Parses every .lua file with luaparse and emits one JSON record per function +// declaration/expression, in cgr's NodeLabel vocabulary. Lua has no classes, so +// cgr models every function (global, local, table `t.f`, method `t:m`, and +// anonymous function expressions) as a Function node, joined on (kind, file, line). +// +// Containment edges: Lua has no classes/methods, so the only edge is DEFINES, +// from the enclosing function (for a nested function) else the file module +// (keyed at line 0) -> Function. +// +// Call sites: every CallExpression / StringCallExpression / TableCallExpression +// whose callee resolves to a static simple name (bare `foo()`, member `t.f()`, +// method `t:m()`); dynamic callees (`t[k]()`, `(expr)()`) yield no name. The +// Python side keeps only callees whose simple name is a declared first-party +// Function so this measures cgr's cross-file call resolution against ground +// truth. Output is a {nodes, edges, calls} payload. +// +// Run: node lua_ast.js + +const luaparse = require("luaparse"); +const fs = require("fs"); +const path = require("path"); + +const IGNORED = new Set([".git", "node_modules", "vendor"]); +const MODULE_LINE = 0; +const ANONYMOUS = "anonymous"; +const nodes = []; +const edges = []; +const calls = []; + +// (H) The simple name of a luaparse name reference: a bare Identifier's name, or +// (H) the trailing member of a MemberExpression (`t.f` / `t:m` -> f / m). A +// (H) dynamic index (`t["k"]`) or any other base has no static name. +function refName(ref) { + if (!ref) return null; + if (ref.type === "Identifier") return ref.name; + if (ref.type === "MemberExpression" && ref.identifier) { + return ref.identifier.name; + } + return null; +} + +// (H) A function declaration is named by its own identifier when present +// (`function foo`, `function t.f`, `function t:m`), else by the variable it is +// assigned to (`local foo = function`, `t.f = function`), else anonymous. +function declName(node, assignedNames) { + return refName(node.identifier) || assignedNames.get(node) || ANONYMOUS; +} + +function walk(node, file, parentRef, assignedNames) { + if (node === null || typeof node !== "object") return; + if (Array.isArray(node)) { + for (const c of node) walk(c, file, parentRef, assignedNames); + return; + } + // (H) Record the binding name of any function expression assigned in this + // (H) statement before recursing, so the FunctionDeclaration handler can name + // (H) it the way cgr does (lua_utils.extract_assigned_name). + if ( + (node.type === "LocalStatement" || node.type === "AssignmentStatement") && + Array.isArray(node.variables) && + Array.isArray(node.init) + ) { + for (let i = 0; i < node.init.length; i++) { + const value = node.init[i]; + if (value && value.type === "FunctionDeclaration") { + const name = refName(node.variables[i]); + if (name) assignedNames.set(value, name); + } + } + } + if (node.type === "FunctionDeclaration" && node.loc) { + const line = node.loc.start.line; + nodes.push({ + kind: "Function", + file, + line, + end_line: node.loc.end.line, + name: declName(node, assignedNames), + }); + edges.push({ + rel: "DEFINES", + parent: { kind: parentRef.kind, file, line: parentRef.line }, + child: { kind: "Function", file, line }, + }); + // (H) Functions nested in this one bind to it (its lexical parent). + const sub = { kind: "Function", line }; + for (const k of Object.keys(node)) { + if (k === "loc" || k === "range") continue; + walk(node[k], file, sub, assignedNames); + } + return; + } + if ( + node.type === "CallExpression" || + node.type === "StringCallExpression" || + node.type === "TableCallExpression" + ) { + const name = refName(node.base); + if (name) calls.push({ file, name }); + } + for (const k of Object.keys(node)) { + if (k === "loc" || k === "range") continue; + walk(node[k], file, parentRef, assignedNames); + } +} + +function visitDir(dir, root) { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const p = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (!IGNORED.has(entry.name)) visitDir(p, root); + } else if (entry.name.endsWith(".lua")) { + const src = fs.readFileSync(p, "utf8"); + try { + // luaVersion 5.3 enables bitwise operators / integer division so the + // oracle parses the same modern Lua that cgr's tree-sitter grammar does. + const ast = luaparse.parse(src, { + locations: true, + comments: false, + luaVersion: "5.3", + }); + const rel = path.relative(root, p).split(path.sep).join("/"); + walk(ast, rel, { kind: "Module", line: MODULE_LINE }, new Map()); + } catch (e) { + // skip files luaparse cannot parse + } + } + } +} + +const root = process.argv[2] || "."; +visitDir(root, root); +process.stdout.write(JSON.stringify({ nodes, edges, calls })); diff --git a/evals/oracles/lua_oracle/package-lock.json b/evals/oracles/lua_oracle/package-lock.json new file mode 100644 index 000000000..28f41d4d7 --- /dev/null +++ b/evals/oracles/lua_oracle/package-lock.json @@ -0,0 +1,27 @@ +{ + "name": "lua_oracle", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "lua_oracle", + "version": "0.1.0", + "dependencies": { + "luaparse": "^0.3.1" + }, + "bin": { + "lua_oracle": "lua_ast.js" + } + }, + "node_modules/luaparse": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/luaparse/-/luaparse-0.3.1.tgz", + "integrity": "sha512-b21h2bFEbtGXmVqguHogbyrMAA0wOHyp9u/rx+w6Yc9pW1t9YjhGUsp87lYcp7pFRqSWN/PhFkrdIqKEUzRjjQ==", + "license": "MIT", + "bin": { + "luaparse": "bin/luaparse" + } + } + } +} diff --git a/evals/oracles/lua_oracle/package.json b/evals/oracles/lua_oracle/package.json new file mode 100644 index 000000000..ed2aacbdd --- /dev/null +++ b/evals/oracles/lua_oracle/package.json @@ -0,0 +1,10 @@ +{ + "name": "lua_oracle", + "version": "0.1.0", + "private": true, + "description": "Authoritative Lua structure oracle for the cgr eval harness", + "bin": { "lua_oracle": "lua_ast.js" }, + "dependencies": { + "luaparse": "^0.3.1" + } +} diff --git a/evals/oracles/php_oracle.py b/evals/oracles/php_oracle.py new file mode 100644 index 000000000..4ec696e92 --- /dev/null +++ b/evals/oracles/php_oracle.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import GraphData, OraclePayload +from ._common import is_ignored, payload_to_graph + +_ORACLE_DIR = Path(__file__).parent / ec.PHP_ORACLE_DIRNAME +_SCRIPT = _ORACLE_DIR / ec.PHP_ORACLE_SCRIPT +_NODE_MODULES = _ORACLE_DIR / ec.NODE_MODULES_DIRNAME +_CALLABLE_KINDS = frozenset({cs.NodeLabel.FUNCTION.value, cs.NodeLabel.METHOD.value}) + + +def php_oracle_available() -> bool: + return ( + shutil.which(ec.NODE_BIN) is not None and shutil.which(ec.NPM_BIN) is not None + ) + + +def _ensure_deps() -> None: + if _NODE_MODULES.is_dir(): + return + npm = shutil.which(ec.NPM_BIN) + if npm is None: + return + subprocess.run( + [npm, ec.NPM_INSTALL, *ec.NPM_FLAGS], + cwd=str(_ORACLE_DIR), + capture_output=True, + text=True, + check=True, + ) + + +def _run_php_oracle_payload(target: Path) -> OraclePayload: + _ensure_deps() + node = shutil.which(ec.NODE_BIN) + if node is None: + return OraclePayload(nodes=[], edges=[], name_edges=[]) + proc = subprocess.run( + [node, str(_SCRIPT), str(target)], + capture_output=True, + text=True, + check=True, + ) + payload: OraclePayload = json.loads(proc.stdout or "{}") + return payload + + +def run_php_oracle(target: Path) -> GraphData: + return payload_to_graph(_run_php_oracle_payload(target)) + + +def run_php_call_oracle(target: Path) -> tuple[set[tuple[str, str]], frozenset[str]]: + # (H) File-level PHP call sites restricted to first-party callees (a callee + # (H) whose simple name is a declared Function/Method), with the declared name + # (H) universe so the cgr side can be held to the same set. Mirrors the Go, + # (H) Rust, Java, and TypeScript call oracles. + payload = _run_php_oracle_payload(target) + declared = frozenset( + rec[ec.ORACLE_KEY_NAME] + for rec in payload.get(ec.ORACLE_KEY_NODES, []) + if rec.get(ec.ORACLE_KEY_KIND) in _CALLABLE_KINDS + ) + edges = { + (call[ec.ORACLE_KEY_FILE], call[ec.ORACLE_KEY_NAME]) + for call in payload.get(ec.ORACLE_KEY_CALLS, []) + if call[ec.ORACLE_KEY_NAME] in declared + and not is_ignored(call[ec.ORACLE_KEY_FILE]) + } + return edges, declared diff --git a/evals/oracles/php_oracle/package-lock.json b/evals/oracles/php_oracle/package-lock.json new file mode 100644 index 000000000..c040cfa11 --- /dev/null +++ b/evals/oracles/php_oracle/package-lock.json @@ -0,0 +1,24 @@ +{ + "name": "php_oracle", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "php_oracle", + "version": "0.1.0", + "dependencies": { + "php-parser": "^3.2.5" + }, + "bin": { + "php_oracle": "php_ast.js" + } + }, + "node_modules/php-parser": { + "version": "3.7.0", + "resolved": "https://registry.npmjs.org/php-parser/-/php-parser-3.7.0.tgz", + "integrity": "sha512-JRc1t78GZAEa+MuzVC5A5RJS1NDFTS4UnprUEu/NnsN9cyHbGZLUqghO9IQZUSCay62HYQiWd3PxyWAEF45zmA==", + "license": "BSD-3-Clause" + } + } +} diff --git a/evals/oracles/php_oracle/package.json b/evals/oracles/php_oracle/package.json new file mode 100644 index 000000000..7cd287fdd --- /dev/null +++ b/evals/oracles/php_oracle/package.json @@ -0,0 +1,10 @@ +{ + "name": "php_oracle", + "version": "0.1.0", + "private": true, + "description": "Authoritative PHP structure oracle for the cgr eval harness", + "bin": { "php_oracle": "php_ast.js" }, + "dependencies": { + "php-parser": "^3.2.5" + } +} diff --git a/evals/oracles/php_oracle/php_ast.js b/evals/oracles/php_oracle/php_ast.js new file mode 100644 index 000000000..2dde46745 --- /dev/null +++ b/evals/oracles/php_oracle/php_ast.js @@ -0,0 +1,260 @@ +// Authoritative PHP structure oracle for the cgr eval harness. +// +// Parses every .php file with php-parser (a pure-JS PHP parser) and emits one +// JSON record per declaration, in cgr's NodeLabel vocabulary, joined on +// (kind, file, line). +// +// Mapping (PHP construct -> cgr NodeLabel), matching how cgr models PHP: +// +// class -> Class +// interface -> Interface (+ its methods -> Method) +// trait -> Class (cgr models a trait as a Class) +// enum -> Enum +// method (in named type) -> Method +// method (in anonymous class) -> Function (cgr models these as Functions) +// function -> Function +// closure / arrow fn -> Function (anonymous) +// +// A declaration's line is the line of its first attribute (`#[Attr]`) when +// present, matching cgr's node span; anonymous classes (`new class {...}`) get +// no Class node, like cgr. +// +// Containment edges (matching how cgr models PHP containment): +// +// DEFINES : the file module -> every named type and top-level function +// DEFINES_METHOD : the enclosing named type -> Method +// +// cgr keeps type containment flat (the file module DEFINES every named type, +// keyed at line 0); a Method binds to its enclosing class/interface/trait/enum; +// a Function/closure binds to its nearest enclosing function, else the module. +// An anonymous-class member is a Function (no DEFINES_METHOD). Output is a +// {nodes, edges} payload joining cgr on (kind, file, line). +// +// Run: node php_ast.js + +const phpParser = require("php-parser"); +const fs = require("fs"); +const path = require("path"); + +const IGNORED = new Set([".git", "node_modules", "vendor"]); +const MODULE_LINE = 0; +const nodes = []; +const edges = []; +const nameEdges = []; +const calls = []; + +// (H) A php-parser declaration name is an identifier object ({name:"foo"}) or a +// (H) bare string depending on node/version; normalise to the string. +function nameOf(n) { + if (!n) return "anonymous"; + if (typeof n === "string") return n; + return typeof n.name === "string" ? n.name : "anonymous"; +} + +function emit(kind, file, line, endLine, name) { + nodes.push({ kind, file, line, end_line: endLine, name: name || "decl" }); +} + +// (H) The callee simple name of a php-parser `call`: a bare function name +// (H) (`foo()`), or the trailing member of a method (`$this->h()`) or static +// (H) (`Bar::s()`) lookup. Dynamic callees (`$f()`, `$obj->$m()`) yield null. +function callName(what) { + if (!what) return null; + if (what.kind === "name") return what.name ? what.name.split("\\").pop() : null; + if ( + what.kind === "propertylookup" || + what.kind === "nullsafepropertylookup" || + what.kind === "staticlookup" + ) { + // (H) Only a static identifier offset is a real callee name; a dynamic + // (H) offset (`$obj->$m()`) is kind "variable" whose `name` is the variable + // (H) identifier, which must not be emitted as a call edge. + const off = what.offset; + if (off && off.kind === "identifier" && typeof off.name === "string") { + return off.name; + } + } + return null; +} + +function emitEdge(rel, file, pkind, pline, ckind, cline) { + edges.push({ + rel, + parent: { kind: pkind, file, line: pline }, + child: { kind: ckind, file, line: cline }, + }); +} + +function emitNameEdge(rel, file, skind, sline, targetName) { + nameEdges.push({ + rel, + source: { kind: skind, file, line: sline }, + target_name: targetName, + }); +} + +// (H) Simple name of a php-parser Name ref: its last namespace segment, matching +// (H) how cgr resolves bases by simple name (e.g. \App\Base -> Base). +function phpSimpleName(ref) { + const n = ref && ref.name ? ref.name : ""; + return n.split("\\").pop(); +} + +function asList(refs) { + if (!refs) return []; + return Array.isArray(refs) ? refs : [refs]; +} + +// (H) class extends -> INHERITS, implements -> IMPLEMENTS; interface extends +// (H) (an array) -> INHERITS (cgr models superinterfaces as inheritance). +function emitInheritance(node, file, kind, line) { + const extendsRel = "INHERITS"; + for (const ref of asList(node.extends)) { + emitNameEdge(extendsRel, file, kind, line, phpSimpleName(ref)); + } + for (const ref of asList(node.implements)) { + emitNameEdge("IMPLEMENTS", file, kind, line, phpSimpleName(ref)); + } +} + +function declLine(node) { + let line = node.loc.start.line; + if (Array.isArray(node.attrGroups)) { + for (const g of node.attrGroups) { + if (g.loc && g.loc.start.line < line) line = g.loc.start.line; + } + } + return line; +} + +function isAnonymous(node) { + return node.isAnonymous === true || node.name === null; +} + +function walkChildren(node, file, ctx) { + for (const k of Object.keys(node)) { + if (k === "loc") continue; + walk(node[k], file, ctx); + } +} + +// ctx: { container, typeRef, funcRef } +// container: "module" | "class" | "anon" | "function" +// typeRef: enclosing named type {kind,line} (DEFINES_METHOD parent) +// funcRef: enclosing function {kind,line} (DEFINES parent for nested fns) +function defineFunctionEdge(file, ctx, kind, line) { + if (kind === "Method") { + if (ctx.typeRef) { + emitEdge("DEFINES_METHOD", file, ctx.typeRef.kind, ctx.typeRef.line, "Method", line); + } + } else { + const parent = ctx.funcRef || { kind: "Module", line: MODULE_LINE }; + emitEdge("DEFINES", file, parent.kind, parent.line, "Function", line); + } +} + +function walk(node, file, ctx) { + if (node === null || typeof node !== "object") return; + if (Array.isArray(node)) { + for (const c of node) walk(c, file, ctx); + return; + } + switch (node.kind) { + case "class": { + if (isAnonymous(node)) { + // (H) Anonymous class: no node; its methods are Functions bound to the + // (H) enclosing function/module, so keep funcRef and mark the container. + walkChildren(node, file, { container: "anon", typeRef: null, funcRef: ctx.funcRef }); + } else { + const line = declLine(node); + emit("Class", file, line, node.loc.end.line, nameOf(node.name)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Class", line); + emitInheritance(node, file, "Class", line); + walkChildren(node, file, { container: "class", typeRef: { kind: "Class", line }, funcRef: null }); + } + return; + } + case "interface": { + const line = declLine(node); + emit("Interface", file, line, node.loc.end.line, nameOf(node.name)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Interface", line); + emitInheritance(node, file, "Interface", line); + walkChildren(node, file, { container: "class", typeRef: { kind: "Interface", line }, funcRef: null }); + return; + } + case "trait": { + const line = declLine(node); + emit("Class", file, line, node.loc.end.line, nameOf(node.name)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Class", line); + walkChildren(node, file, { container: "class", typeRef: { kind: "Class", line }, funcRef: null }); + return; + } + case "enum": { + const line = declLine(node); + emit("Enum", file, line, node.loc.end.line, nameOf(node.name)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Enum", line); + emitInheritance(node, file, "Enum", line); + walkChildren(node, file, { container: "class", typeRef: { kind: "Enum", line }, funcRef: null }); + return; + } + case "method": { + const kind = ctx.container === "anon" ? "Function" : "Method"; + const line = declLine(node); + emit(kind, file, line, node.loc.end.line, nameOf(node.name)); + defineFunctionEdge(file, ctx, kind, line); + walkChildren(node, file, { container: "function", typeRef: null, funcRef: { kind, line } }); + return; + } + case "function": { + const line = declLine(node); + emit("Function", file, line, node.loc.end.line, nameOf(node.name)); + defineFunctionEdge(file, ctx, "Function", line); + walkChildren(node, file, { container: "function", typeRef: null, funcRef: { kind: "Function", line } }); + return; + } + case "closure": + case "arrowfunc": { + const line = node.loc.start.line; + emit("Function", file, line, node.loc.end.line, "anonymous"); + defineFunctionEdge(file, ctx, "Function", line); + walkChildren(node, file, { container: "function", typeRef: null, funcRef: { kind: "Function", line } }); + return; + } + case "call": { + // (H) Emit the callee simple name; the Python side keeps only callees whose + // (H) name is a declared first-party Function/Method. Recurse for nested + // (H) calls in the arguments / receiver. + const name = callName(node.what); + if (name) calls.push({ file, name }); + walkChildren(node, file, ctx); + return; + } + default: + walkChildren(node, file, ctx); + } +} + +function visitDir(dir, root, parser) { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const p = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (!IGNORED.has(entry.name)) visitDir(p, root, parser); + } else if (entry.name.endsWith(".php")) { + try { + const ast = parser.parseCode(fs.readFileSync(p, "utf8")); + const rel = path.relative(root, p).split(path.sep).join("/"); + walk(ast, rel, { container: "module", typeRef: null, funcRef: null }); + } catch (e) { + // skip files php-parser cannot parse + } + } + } +} + +const root = process.argv[2] || "."; +const parser = new phpParser.Engine({ + parser: { extractDoc: false, suppressErrors: true }, + ast: { withPositions: true }, +}); +visitDir(root, root, parser); +process.stdout.write(JSON.stringify({ nodes, edges, name_edges: nameEdges, calls })); diff --git a/evals/oracles/rs_oracle/Cargo.lock b/evals/oracles/rs_oracle/Cargo.lock new file mode 100644 index 000000000..500aceee2 --- /dev/null +++ b/evals/oracles/rs_oracle/Cargo.lock @@ -0,0 +1,46 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rs_oracle" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" diff --git a/evals/oracles/rs_oracle/Cargo.toml b/evals/oracles/rs_oracle/Cargo.toml new file mode 100644 index 000000000..6381c7979 --- /dev/null +++ b/evals/oracles/rs_oracle/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "rs_oracle" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "rs_oracle" +path = "src/main.rs" + +[dependencies] +syn = { version = "2", features = ["full", "visit"] } +proc-macro2 = { version = "1", features = ["span-locations"] } diff --git a/evals/oracles/rs_oracle/src/main.rs b/evals/oracles/rs_oracle/src/main.rs new file mode 100644 index 000000000..43643a46e --- /dev/null +++ b/evals/oracles/rs_oracle/src/main.rs @@ -0,0 +1,520 @@ +// Authoritative Rust structure oracle for the cgr eval harness. +// +// Parses every .rs file under a directory with `syn` (the de-facto standard Rust +// parser) and emits a JSON payload {nodes, edges}. Node "kind" fields use cgr's +// NodeLabel vocabulary and edges use cgr's RelationshipType vocabulary, so both +// join cgr's graph on (kind, file, line). +// +// Mapping (Rust item -> cgr NodeLabel): +// +// struct -> Class +// enum -> Enum +// union -> Union +// trait -> Interface (its methods -> Method) +// type alias -> Type +// fn -> Function (free fns, including those nested in fn bodies) +// impl method -> Method +// +// Containment edges (matching how cgr models Rust containment): +// +// DEFINES : enclosing module -> item / nested module +// DEFINES_METHOD : the method's owner type (or trait) -> Method +// +// cgr models a Rust module per file (keyed at line 0) plus a Module node per +// inline `mod` (keyed at its declaration line). An item inside `mod inner` is +// DEFINEd by the inner module; an impl method binds to its target type resolved +// within the impl's enclosing module path (falling back to ancestor modules). +// +// The node walk uses `syn::visit::Visit` so function-local definitions and +// closures are captured too; edges use an explicit item recursion that tracks +// the enclosing module, which is what carries containment. +// +// Run: cargo run --release -- + +use std::collections::HashMap; +use std::env; +use std::fs; +use std::path::Path; +use syn::spanned::Spanned; +use syn::visit::Visit; + +const IGNORED_DIRS: [&str; 4] = [".git", "target", "vendor", "node_modules"]; + +const KIND_CLASS: &str = "Class"; +const KIND_ENUM: &str = "Enum"; +const KIND_UNION: &str = "Union"; +const KIND_INTERFACE: &str = "Interface"; +const KIND_TYPE: &str = "Type"; +const KIND_FUNCTION: &str = "Function"; +const KIND_METHOD: &str = "Method"; +const KIND_MODULE: &str = "Module"; +const REL_DEFINES: &str = "DEFINES"; +const REL_DEFINES_METHOD: &str = "DEFINES_METHOD"; +const REL_INHERITS: &str = "INHERITS"; +const REL_IMPLEMENTS: &str = "IMPLEMENTS"; +const MODULE_LINE: usize = 0; + +fn esc(s: &str) -> String { + s.replace('\\', "\\\\").replace('"', "\\\"") +} + +fn node_json(kind: &str, file: &str, line: usize, end_line: usize, name: &str) -> String { + format!( + "{{\"kind\":\"{}\",\"file\":\"{}\",\"line\":{},\"end_line\":{},\"name\":\"{}\"}}", + kind, + esc(file), + line, + end_line, + esc(name) + ) +} + +fn edge_json( + rel: &str, + file: &str, + pkind: &str, + pline: usize, + ckind: &str, + cline: usize, +) -> String { + format!( + "{{\"rel\":\"{}\",\"parent\":{{\"kind\":\"{}\",\"file\":\"{}\",\"line\":{}}},\"child\":{{\"kind\":\"{}\",\"file\":\"{}\",\"line\":{}}}}}", + rel, + pkind, + esc(file), + pline, + ckind, + esc(file), + cline + ) +} + +fn name_edge_json( + rel: &str, + file: &str, + skind: &str, + sline: usize, + target_name: &str, +) -> String { + format!( + "{{\"rel\":\"{}\",\"source\":{{\"kind\":\"{}\",\"file\":\"{}\",\"line\":{}}},\"target_name\":\"{}\"}}", + rel, + skind, + esc(file), + sline, + esc(target_name) + ) +} + +fn call_json(file: &str, name: &str) -> String { + format!("{{\"file\":\"{}\",\"name\":\"{}\"}}", esc(file), esc(name)) +} + +// (H) Last path segment of a trait reference (`a::b::Trait` / `Trait` -> Trait). +fn trait_path_name(path: &syn::Path) -> Option { + path.segments.last().map(|s| s.ident.to_string()) +} + +// ---- call-site collection ---- +// +// (H) Every call expression's (file, callee simple name): a path call's last +// (H) segment (`foo()`, `a::b::foo()`, `Type::assoc()` -> foo / assoc) and a +// (H) method call's method ident (`x.method()` -> method). Mirrors go_ast.go's +// (H) call oracle so cgr's Rust CALLS edges grade against an independent parser. + +struct CallCollector<'a> { + file: &'a str, + out: &'a mut Vec, +} + +impl<'ast, 'a> Visit<'ast> for CallCollector<'a> { + fn visit_expr_call(&mut self, node: &'ast syn::ExprCall) { + if let syn::Expr::Path(p) = &*node.func { + if let Some(seg) = p.path.segments.last() { + self.out.push(call_json(self.file, &seg.ident.to_string())); + } + } + syn::visit::visit_expr_call(self, node); + } + fn visit_expr_method_call(&mut self, node: &'ast syn::ExprMethodCall) { + self.out.push(call_json(self.file, &node.method.to_string())); + syn::visit::visit_expr_method_call(self, node); + } +} + +// ---- node collection (every declaration, including nested/closures) ---- + +struct NodeCollector<'a> { + file: &'a str, + out: &'a mut Vec, +} + +impl<'a> NodeCollector<'a> { + fn emit(&mut self, kind: &str, line: usize, end_line: usize, name: &str) { + self.out.push(node_json(kind, self.file, line, end_line, name)); + } +} + +impl<'ast, 'a> Visit<'ast> for NodeCollector<'a> { + fn visit_item_struct(&mut self, node: &'ast syn::ItemStruct) { + self.emit(KIND_CLASS, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_item_struct(self, node); + } + fn visit_item_enum(&mut self, node: &'ast syn::ItemEnum) { + self.emit(KIND_ENUM, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_item_enum(self, node); + } + fn visit_item_union(&mut self, node: &'ast syn::ItemUnion) { + self.emit(KIND_UNION, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_item_union(self, node); + } + fn visit_item_type(&mut self, node: &'ast syn::ItemType) { + self.emit(KIND_TYPE, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_item_type(self, node); + } + fn visit_impl_item_type(&mut self, node: &'ast syn::ImplItemType) { + self.emit(KIND_TYPE, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_impl_item_type(self, node); + } + fn visit_trait_item_type(&mut self, node: &'ast syn::TraitItemType) { + self.emit(KIND_TYPE, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_trait_item_type(self, node); + } + fn visit_expr_closure(&mut self, node: &'ast syn::ExprClosure) { + self.emit(KIND_FUNCTION, node.span().start().line, node.span().end().line, "closure"); + syn::visit::visit_expr_closure(self, node); + } + fn visit_item_trait(&mut self, node: &'ast syn::ItemTrait) { + self.emit(KIND_INTERFACE, node.ident.span().start().line, node.span().end().line, &node.ident.to_string()); + syn::visit::visit_item_trait(self, node); + } + fn visit_item_fn(&mut self, node: &'ast syn::ItemFn) { + self.emit(KIND_FUNCTION, node.sig.ident.span().start().line, node.span().end().line, &node.sig.ident.to_string()); + syn::visit::visit_item_fn(self, node); + } + fn visit_impl_item_fn(&mut self, node: &'ast syn::ImplItemFn) { + self.emit(KIND_METHOD, node.sig.ident.span().start().line, node.span().end().line, &node.sig.ident.to_string()); + syn::visit::visit_impl_item_fn(self, node); + } + fn visit_trait_item_fn(&mut self, node: &'ast syn::TraitItemFn) { + self.emit(KIND_METHOD, node.sig.ident.span().start().line, node.span().end().line, &node.sig.ident.to_string()); + syn::visit::visit_trait_item_fn(self, node); + } +} + +// ---- closure containment ---- +// +// (H) A closure is DEFINEd by the nearest enclosing function-like scope: a free +// (H) fn or another closure (Function), or an impl/trait method (Method); at item +// (H) scope it falls back to the enclosing module. This mirrors cgr, which routes +// (H) every closure through its free-function path and binds it to its lexical +// (H) parent. The walk keeps a stack of enclosing function-likes so nested +// (H) closures bind to the closure that contains them, not the outer method. + +struct ClosureEdges<'a> { + file: &'a str, + edges: &'a mut Vec, + stack: Vec<(&'static str, usize)>, + module_line: usize, +} + +impl<'ast, 'a> Visit<'ast> for ClosureEdges<'a> { + fn visit_item_mod(&mut self, node: &'ast syn::ItemMod) { + if node.content.is_some() { + let saved = self.module_line; + self.module_line = node.ident.span().start().line; + syn::visit::visit_item_mod(self, node); + self.module_line = saved; + } + } + fn visit_item_fn(&mut self, node: &'ast syn::ItemFn) { + self.stack.push((KIND_FUNCTION, node.sig.ident.span().start().line)); + syn::visit::visit_item_fn(self, node); + self.stack.pop(); + } + fn visit_impl_item_fn(&mut self, node: &'ast syn::ImplItemFn) { + self.stack.push((KIND_METHOD, node.sig.ident.span().start().line)); + syn::visit::visit_impl_item_fn(self, node); + self.stack.pop(); + } + fn visit_trait_item_fn(&mut self, node: &'ast syn::TraitItemFn) { + self.stack.push((KIND_METHOD, node.sig.ident.span().start().line)); + syn::visit::visit_trait_item_fn(self, node); + self.stack.pop(); + } + fn visit_expr_closure(&mut self, node: &'ast syn::ExprClosure) { + let cline = node.span().start().line; + let (pkind, pline) = self + .stack + .last() + .copied() + .unwrap_or((KIND_MODULE, self.module_line)); + self.edges.push(edge_json( + REL_DEFINES, self.file, pkind, pline, KIND_FUNCTION, cline, + )); + self.stack.push((KIND_FUNCTION, cline)); + syn::visit::visit_expr_closure(self, node); + self.stack.pop(); + } +} + +// ---- edge collection (containment) ---- + +fn type_table_key(modpath: &str, name: &str) -> String { + format!("{}\u{0}{}", modpath, name) +} + +// collect_types records each module-scoped type so an impl can resolve its +// target to the type's (kind, line). +fn collect_types(items: &[syn::Item], modpath: &str, table: &mut HashMap) { + for item in items { + match item { + syn::Item::Struct(s) => { + table.insert( + type_table_key(modpath, &s.ident.to_string()), + (KIND_CLASS.into(), s.ident.span().start().line), + ); + } + syn::Item::Enum(e) => { + table.insert( + type_table_key(modpath, &e.ident.to_string()), + (KIND_ENUM.into(), e.ident.span().start().line), + ); + } + syn::Item::Union(u) => { + table.insert( + type_table_key(modpath, &u.ident.to_string()), + (KIND_UNION.into(), u.ident.span().start().line), + ); + } + syn::Item::Type(t) => { + table.insert( + type_table_key(modpath, &t.ident.to_string()), + (KIND_TYPE.into(), t.ident.span().start().line), + ); + } + syn::Item::Trait(tr) => { + table.insert( + type_table_key(modpath, &tr.ident.to_string()), + (KIND_INTERFACE.into(), tr.ident.span().start().line), + ); + } + syn::Item::Mod(m) => { + if let Some((_, content)) = &m.content { + let child = child_modpath(modpath, &m.ident.to_string()); + collect_types(content, &child, table); + } + } + _ => {} + } + } +} + +fn child_modpath(modpath: &str, name: &str) -> String { + if modpath.is_empty() { + name.to_string() + } else { + format!("{}::{}", modpath, name) + } +} + +// resolve_type finds a type by name starting in modpath and walking outward to +// ancestor modules and the crate root (Rust name resolution is lexical). +fn resolve_type( + modpath: &str, + name: &str, + table: &HashMap, +) -> Option<(String, usize)> { + let mut parts: Vec<&str> = if modpath.is_empty() { + Vec::new() + } else { + modpath.split("::").collect() + }; + loop { + let mp = parts.join("::"); + if let Some(v) = table.get(&type_table_key(&mp, name)) { + return Some(v.clone()); + } + if parts.is_empty() { + break; + } + parts.pop(); + } + None +} + +// impl_target_name pulls the bare type name off an impl's self type. +fn impl_target_name(ty: &syn::Type) -> Option { + match ty { + syn::Type::Path(tp) => tp.path.segments.last().map(|s| s.ident.to_string()), + syn::Type::Reference(r) => impl_target_name(&r.elem), + _ => None, + } +} + +fn process_edges( + items: &[syn::Item], + file: &str, + module_line: usize, + modpath: &str, + table: &HashMap, + edges: &mut Vec, + name_edges: &mut Vec, +) { + for item in items { + match item { + syn::Item::Struct(s) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_CLASS, s.ident.span().start().line, + )), + syn::Item::Enum(e) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_ENUM, e.ident.span().start().line, + )), + syn::Item::Union(u) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_UNION, u.ident.span().start().line, + )), + syn::Item::Type(t) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_TYPE, t.ident.span().start().line, + )), + syn::Item::Fn(f) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_FUNCTION, f.sig.ident.span().start().line, + )), + syn::Item::Trait(tr) => { + let tline = tr.ident.span().start().line; + edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_INTERFACE, tline, + )); + // (H) Supertrait bounds (`trait Sub: Super`) -> Sub INHERITS Super. + for bound in &tr.supertraits { + if let syn::TypeParamBound::Trait(tb) = bound { + if let Some(name) = trait_path_name(&tb.path) { + name_edges.push(name_edge_json( + REL_INHERITS, file, KIND_INTERFACE, tline, &name, + )); + } + } + } + for ti in &tr.items { + match ti { + syn::TraitItem::Fn(m) => edges.push(edge_json( + REL_DEFINES_METHOD, file, KIND_INTERFACE, tline, KIND_METHOD, + m.sig.ident.span().start().line, + )), + // (H) An associated type is a module-scoped Type declaration + // (H) in cgr's model (DEFINEd by the enclosing module). + syn::TraitItem::Type(t) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_TYPE, + t.ident.span().start().line, + )), + _ => {} + } + } + } + syn::Item::Impl(im) => { + let owner = impl_target_name(&im.self_ty) + .and_then(|name| resolve_type(modpath, &name, table)); + // (H) `impl Trait for Type` -> Type IMPLEMENTS Trait. + if let (Some((kind, tline)), Some((_, path, _))) = (&owner, &im.trait_) { + if let Some(name) = trait_path_name(path) { + name_edges.push(name_edge_json( + REL_IMPLEMENTS, file, kind, *tline, &name, + )); + } + } + for ii in &im.items { + match ii { + syn::ImplItem::Fn(m) => { + if let Some((kind, tline)) = &owner { + edges.push(edge_json( + REL_DEFINES_METHOD, file, kind, *tline, KIND_METHOD, + m.sig.ident.span().start().line, + )); + } + } + syn::ImplItem::Type(t) => edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_TYPE, + t.ident.span().start().line, + )), + _ => {} + } + } + } + syn::Item::Mod(m) => { + if let Some((_, content)) = &m.content { + let mline = m.ident.span().start().line; + edges.push(edge_json( + REL_DEFINES, file, KIND_MODULE, module_line, KIND_MODULE, mline, + )); + let child = child_modpath(modpath, &m.ident.to_string()); + process_edges(content, file, mline, &child, table, edges, name_edges); + } + } + _ => {} + } + } +} + +fn visit_dir( + dir: &Path, + root: &Path, + nodes: &mut Vec, + edges: &mut Vec, + name_edges: &mut Vec, + calls: &mut Vec, +) { + let entries = match fs::read_dir(dir) { + Ok(entries) => entries, + Err(_) => return, + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + if !IGNORED_DIRS.contains(&name) { + visit_dir(&path, root, nodes, edges, name_edges, calls); + } + } else if path.extension().and_then(|e| e.to_str()) == Some("rs") { + if let Ok(src) = fs::read_to_string(&path) { + if let Ok(ast) = syn::parse_file(&src) { + let rel = path + .strip_prefix(root) + .unwrap_or(&path) + .to_string_lossy() + .replace('\\', "/"); + let mut collector = NodeCollector { file: &rel, out: nodes }; + collector.visit_file(&ast); + let mut table: HashMap = HashMap::new(); + collect_types(&ast.items, "", &mut table); + process_edges( + &ast.items, &rel, MODULE_LINE, "", &table, edges, name_edges, + ); + let mut closures = ClosureEdges { + file: &rel, + edges, + stack: Vec::new(), + module_line: MODULE_LINE, + }; + closures.visit_file(&ast); + let mut callcol = CallCollector { file: &rel, out: calls }; + callcol.visit_file(&ast); + } + } + } + } +} + +fn main() { + let root = env::args().nth(1).unwrap_or_else(|| ".".into()); + let root = Path::new(&root); + let mut nodes = Vec::new(); + let mut edges = Vec::new(); + let mut name_edges = Vec::new(); + let mut calls = Vec::new(); + visit_dir(root, root, &mut nodes, &mut edges, &mut name_edges, &mut calls); + println!( + "{{\"nodes\":[{}],\"edges\":[{}],\"name_edges\":[{}],\"calls\":[{}]}}", + nodes.join(","), + edges.join(","), + name_edges.join(","), + calls.join(",") + ); +} diff --git a/evals/oracles/rust_oracle.py b/evals/oracles/rust_oracle.py new file mode 100644 index 000000000..94ef3be72 --- /dev/null +++ b/evals/oracles/rust_oracle.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import GraphData, OraclePayload +from ._common import is_ignored, payload_to_graph + +_ORACLE_DIR = Path(__file__).parent / ec.RS_ORACLE_DIRNAME +_MANIFEST = _ORACLE_DIR / "Cargo.toml" +_CALLABLE_KINDS = frozenset({cs.NodeLabel.FUNCTION.value, cs.NodeLabel.METHOD.value}) + + +def rust_available() -> bool: + return shutil.which(ec.CARGO_BIN) is not None + + +def _run_rust_oracle_payload(target: Path) -> OraclePayload: + proc = subprocess.run( + [ + ec.CARGO_BIN, + ec.CARGO_RUN, + ec.CARGO_RELEASE, + ec.CARGO_QUIET, + ec.CARGO_MANIFEST, + str(_MANIFEST), + ec.CARGO_ARG_SEP, + str(target), + ], + capture_output=True, + text=True, + check=True, + ) + payload: OraclePayload = json.loads(proc.stdout or "{}") + return payload + + +def run_rust_oracle(target: Path) -> GraphData: + return payload_to_graph(_run_rust_oracle_payload(target)) + + +def run_rust_call_oracle(target: Path) -> tuple[set[tuple[str, str]], frozenset[str]]: + # (H) File-level Rust call sites restricted to first-party callees (a callee + # (H) whose simple name is a declared Function/Method), with the declared name + # (H) universe so the cgr side can be held to the same set. Mirrors the Go + # (H) call oracle (run_go_call_oracle). + payload = _run_rust_oracle_payload(target) + declared = frozenset( + rec[ec.ORACLE_KEY_NAME] + for rec in payload.get(ec.ORACLE_KEY_NODES, []) + if rec.get(ec.ORACLE_KEY_KIND) in _CALLABLE_KINDS + ) + edges = { + (call[ec.ORACLE_KEY_FILE], call[ec.ORACLE_KEY_NAME]) + for call in payload.get(ec.ORACLE_KEY_CALLS, []) + if call[ec.ORACLE_KEY_NAME] in declared + and not is_ignored(call[ec.ORACLE_KEY_FILE]) + } + return edges, declared diff --git a/evals/oracles/ts_oracle/package-lock.json b/evals/oracles/ts_oracle/package-lock.json new file mode 100644 index 000000000..88e302198 --- /dev/null +++ b/evals/oracles/ts_oracle/package-lock.json @@ -0,0 +1,31 @@ +{ + "name": "ts_oracle", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ts_oracle", + "version": "0.1.0", + "dependencies": { + "typescript": "^5.9.3" + }, + "bin": { + "ts_oracle": "ts_ast.js" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + } + } +} diff --git a/evals/oracles/ts_oracle/package.json b/evals/oracles/ts_oracle/package.json new file mode 100644 index 000000000..72554f0bc --- /dev/null +++ b/evals/oracles/ts_oracle/package.json @@ -0,0 +1,10 @@ +{ + "name": "ts_oracle", + "version": "0.1.0", + "private": true, + "description": "Authoritative TypeScript structure oracle for the cgr eval harness", + "bin": { "ts_oracle": "ts_ast.js" }, + "dependencies": { + "typescript": "^5.9.3" + } +} diff --git a/evals/oracles/ts_oracle/ts_ast.js b/evals/oracles/ts_oracle/ts_ast.js new file mode 100644 index 000000000..204e1c564 --- /dev/null +++ b/evals/oracles/ts_oracle/ts_ast.js @@ -0,0 +1,247 @@ +// Authoritative TypeScript structure oracle for the cgr eval harness. +// +// Parses every .ts/.tsx file under a directory with the TypeScript compiler API +// and emits one JSON record per declaration, in cgr's NodeLabel vocabulary, so +// records join cgr's graph on (kind, file, line). +// +// Mapping (TS construct -> cgr NodeLabel), matching how cgr models TypeScript: +// +// class -> Class +// interface -> Interface +// enum -> Enum +// type alias -> Type +// namespace / module -> Class (cgr treats it as a class container) +// function (top-level/in-fn) -> Function +// function (in namespace/class) -> Method +// const x = () => ... / fn expr -> Function (or Method inside a namespace) +// method / constructor -> Method +// +// Containment edges (matching how cgr models TypeScript containment): +// +// DEFINES : the file module -> every named type (class/interface/enum/ +// namespace, even when nested) and every Function +// DEFINES_METHOD : the enclosing class/namespace -> Method +// +// cgr keeps type containment flat (all types DEFINEd by the file module, keyed +// at line 0); a Method binds to its enclosing class/namespace; a Function binds +// to its nearest enclosing function, else the module. Output is a {nodes, edges} +// payload joining cgr on (kind, file, line). +// +// Run: node ts_ast.js + +const ts = require("typescript"); +const fs = require("fs"); +const path = require("path"); + +const IGNORED = new Set([".git", "node_modules", "vendor", "dist", "build", "out"]); +const MODULE_LINE = 0; +const nodes = []; +const edges = []; +const nameEdges = []; +const calls = []; + +function emit(kind, file, line, name, endLine) { + nodes.push({ kind, file, line, end_line: endLine, name }); +} + +function emitEdge(rel, file, pkind, pline, ckind, cline) { + edges.push({ + rel, + parent: { kind: pkind, file, line: pline }, + child: { kind: ckind, file, line: cline }, + }); +} + +function emitNameEdge(rel, file, skind, sline, targetName) { + nameEdges.push({ + rel, + source: { kind: skind, file, line: sline }, + target_name: targetName, + }); +} + +// (H) Simple name of an extends/implements entry: the base expression's last +// (H) identifier (type arguments live separately, so they're already excluded). +function heritageSimpleName(typeNode) { + let expr = typeNode.expression || typeNode; + while (expr && expr.name && expr.expression) { + expr = expr.name; // (H) a.b.Base -> Base + } + return expr && expr.text ? expr.text : expr.getText(); +} + +// (H) A class's extends -> INHERITS, implements -> IMPLEMENTS; an interface's +// (H) extends -> INHERITS (cgr models superinterfaces as inheritance). +function emitHeritage(node, sf, file, kind, line) { + if (!node.heritageClauses) return; + for (const clause of node.heritageClauses) { + const isExtends = clause.token === ts.SyntaxKind.ExtendsKeyword; + const rel = isExtends ? "INHERITS" : "IMPLEMENTS"; + for (const t of clause.types) { + emitNameEdge(rel, file, kind, line, heritageSimpleName(t)); + } + } +} + +function lineOf(sf, node) { + return sf.getLineAndCharacterOfPosition(node.getStart(sf)).line + 1; +} + +// (H) Last line of a node's full span (its end position), for span/end_line +// (H) grading against cgr's end_line. +function endLineOf(sf, node) { + return sf.getLineAndCharacterOfPosition(node.getEnd()).line + 1; +} + +function methodKind(container) { + return container === "namespace" || container === "class" ? "Method" : "Function"; +} + +// (H) The binding name of an arrow/function expression (`const foo = () => ...`, +// (H) `foo = () => ...` class property, `{ foo: () => ... }`), matching how cgr +// (H) names such a Function. Used so the call oracle's declared-name universe +// (H) includes these (cgr resolves `foo()` to them); falls back to "anonymous". +function bindingName(node) { + const p = node.parent; + if ( + p && + (ts.isVariableDeclaration(p) || + ts.isPropertyDeclaration(p) || + ts.isPropertyAssignment(p)) && + p.name && + ts.isIdentifier(p.name) + ) { + return p.name.text; + } + return "anonymous"; +} + +// ctx carries the file, the enclosing class/namespace ref (for Methods) and the +// enclosing function ref (for nested Functions). +function defineFunction(node, sf, file, container, ctx, kind, line) { + if (kind === "Method") { + if (ctx.typeRef) { + emitEdge("DEFINES_METHOD", file, ctx.typeRef.kind, ctx.typeRef.line, "Method", line); + } + } else { + const parent = ctx.funcRef || { kind: "Module", line: MODULE_LINE }; + emitEdge("DEFINES", file, parent.kind, parent.line, "Function", line); + } +} + +// container: "module" | "class" | "namespace" | "function" +function walk(node, sf, file, container, ctx) { + if (ts.isClassDeclaration(node) && node.name) { + const line = lineOf(sf, node); + emit("Class", file, line, node.name.text, endLineOf(sf, node)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Class", line); + emitHeritage(node, sf, file, "Class", line); + const sub = { typeRef: { kind: "Class", line }, funcRef: null }; + node.members.forEach((m) => walk(m, sf, file, "class", sub)); + return; + } + if (ts.isInterfaceDeclaration(node) && node.name) { + const line = lineOf(sf, node); + emit("Interface", file, line, node.name.text, endLineOf(sf, node)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Interface", line); + emitHeritage(node, sf, file, "Interface", line); + return; + } + if (ts.isEnumDeclaration(node) && node.name) { + const line = lineOf(sf, node); + emit("Enum", file, line, node.name.text, endLineOf(sf, node)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Enum", line); + return; + } + if (ts.isTypeAliasDeclaration(node) && node.name) { + const line = lineOf(sf, node); + emit("Type", file, line, node.name.text, endLineOf(sf, node)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Type", line); + return; + } + if (ts.isModuleDeclaration(node) && node.name) { + const line = lineOf(sf, node); + emit("Class", file, line, node.name.text || "", endLineOf(sf, node)); + emitEdge("DEFINES", file, "Module", MODULE_LINE, "Class", line); + const sub = { typeRef: { kind: "Class", line }, funcRef: null }; + if (node.body) node.body.forEachChild((c) => walk(c, sf, file, "namespace", sub)); + return; + } + if (ts.isFunctionDeclaration(node) && node.name) { + const kind = methodKind(container); + const line = lineOf(sf, node); + emit(kind, file, line, node.name.text, endLineOf(sf, node)); + defineFunction(node, sf, file, container, ctx, kind, line); + const sub = { typeRef: null, funcRef: { kind, line } }; + if (node.body) node.body.forEachChild((c) => walk(c, sf, file, "function", sub)); + return; + } + if (ts.isMethodDeclaration(node) || ts.isConstructorDeclaration(node)) { + const nm = ts.isConstructorDeclaration(node) + ? "constructor" + : node.name && ts.isIdentifier(node.name) + ? node.name.text + : node.name && node.name.text; + // (H) Class members are Methods; object-literal shorthand methods are modelled + // (H) by cgr as standalone Functions. + const kind = container === "class" ? "Method" : "Function"; + const line = lineOf(sf, node); + if (nm) { + emit(kind, file, line, nm, endLineOf(sf, node)); + defineFunction(node, sf, file, container, ctx, kind, line); + } + const sub = { typeRef: null, funcRef: { kind, line } }; + if (node.body) node.body.forEachChild((c) => walk(c, sf, file, "function", sub)); + return; + } + if (ts.isArrowFunction(node) || ts.isFunctionExpression(node)) { + // (H) cgr captures every arrow/function expression as a Function node (named + // by its variable when assigned, else anonymous), at the expression's own + // line. The name is irrelevant to the (kind, file, line) join. + const kind = methodKind(container); + const line = lineOf(sf, node); + emit(kind, file, line, bindingName(node), endLineOf(sf, node)); + defineFunction(node, sf, file, container, ctx, kind, line); + const sub = { typeRef: null, funcRef: { kind, line } }; + node.forEachChild((c) => walk(c, sf, file, "function", sub)); + return; + } + // (H) A call site: the callee simple name is a bare identifier (`foo()`, + // (H) same-scope or imported) or the trailing identifier of a property access + // (H) (`obj.foo()`, `Type.bar()`). The Python side keeps only callees whose + // (H) name is a declared first-party Function/Method, mirroring the Go/Rust/Java + // (H) call oracles. Do not return -- recurse so nested calls (`f(g())`) emit too. + if (ts.isCallExpression(node)) { + const callee = node.expression; + if (ts.isIdentifier(callee)) { + calls.push({ file, name: callee.text }); + } else if (ts.isPropertyAccessExpression(callee) && ts.isIdentifier(callee.name)) { + calls.push({ file, name: callee.name.text }); + } + } + node.forEachChild((c) => walk(c, sf, file, container, ctx)); +} + +function hasExt(name, exts) { + return exts.some((e) => name.endsWith(e)) && !name.endsWith(".d.ts"); +} + +function visitDir(dir, root, exts) { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const p = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (!IGNORED.has(entry.name)) visitDir(p, root, exts); + } else if (hasExt(entry.name, exts)) { + const src = fs.readFileSync(p, "utf8"); + const sf = ts.createSourceFile(p, src, ts.ScriptTarget.Latest, true); + const rel = path.relative(root, p).split(path.sep).join("/"); + const ctx = { typeRef: null, funcRef: null }; + sf.forEachChild((c) => walk(c, sf, rel, "module", ctx)); + } + } +} + +const root = process.argv[2] || "."; +const exts = process.argv.slice(3); +visitDir(root, root, exts.length ? exts : [".ts", ".tsx"]); +process.stdout.write(JSON.stringify({ nodes, edges, name_edges: nameEdges, calls })); diff --git a/evals/oracles/typescript_oracle.py b/evals/oracles/typescript_oracle.py new file mode 100644 index 000000000..5c8fa6aff --- /dev/null +++ b/evals/oracles/typescript_oracle.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +from pathlib import Path + +from codebase_rag import constants as cs + +from .. import constants as ec +from ..types_defs import GraphData, OraclePayload +from ._common import is_ignored, payload_to_graph + +_ORACLE_DIR = Path(__file__).parent / ec.TS_ORACLE_DIRNAME +_SCRIPT = _ORACLE_DIR / ec.TS_ORACLE_SCRIPT +_NODE_MODULES = _ORACLE_DIR / ec.NODE_MODULES_DIRNAME +_CALLABLE_KINDS = frozenset({cs.NodeLabel.FUNCTION.value, cs.NodeLabel.METHOD.value}) + + +def typescript_available() -> bool: + return ( + shutil.which(ec.NODE_BIN) is not None and shutil.which(ec.NPM_BIN) is not None + ) + + +def _ensure_deps() -> None: + if _NODE_MODULES.is_dir(): + return + npm = shutil.which(ec.NPM_BIN) + if npm is None: + return + subprocess.run( + [npm, ec.NPM_INSTALL, *ec.NPM_FLAGS], + cwd=str(_ORACLE_DIR), + capture_output=True, + text=True, + check=True, + ) + + +def _run_payload(target: Path, suffixes: tuple[str, ...]) -> OraclePayload: + _ensure_deps() + node = shutil.which(ec.NODE_BIN) + if node is None: + return OraclePayload(nodes=[], edges=[], name_edges=[]) + proc = subprocess.run( + [node, str(_SCRIPT), str(target), *suffixes], + capture_output=True, + text=True, + check=True, + ) + payload: OraclePayload = json.loads(proc.stdout or "{}") + return payload + + +def _run(target: Path, suffixes: tuple[str, ...]) -> GraphData: + return payload_to_graph(_run_payload(target, suffixes)) + + +def run_typescript_oracle(target: Path) -> GraphData: + return _run(target, ec.TS_SUFFIXES) + + +def run_javascript_oracle(target: Path) -> GraphData: + return _run(target, ec.JS_SUFFIXES) + + +def run_typescript_call_oracle( + target: Path, +) -> tuple[set[tuple[str, str]], frozenset[str]]: + # (H) File-level TypeScript call sites restricted to first-party callees (a + # (H) callee whose simple name is a declared Function/Method), with the declared + # (H) name universe so the cgr side can be held to the same set. Mirrors the Go, + # (H) Rust, and Java call oracles. + payload = _run_payload(target, ec.TS_SUFFIXES) + declared = frozenset( + rec[ec.ORACLE_KEY_NAME] + for rec in payload.get(ec.ORACLE_KEY_NODES, []) + if rec.get(ec.ORACLE_KEY_KIND) in _CALLABLE_KINDS + ) + edges = { + (call[ec.ORACLE_KEY_FILE], call[ec.ORACLE_KEY_NAME]) + for call in payload.get(ec.ORACLE_KEY_CALLS, []) + if call[ec.ORACLE_KEY_NAME] in declared + and not is_ignored(call[ec.ORACLE_KEY_FILE]) + } + return edges, declared diff --git a/evals/php_l1.py b/evals/php_l1.py new file mode 100644 index 000000000..6114f14fa --- /dev/null +++ b/evals/php_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_php_graph +from .oracles import php_oracle_available, run_php_oracle +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (PHP vs php-parser)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of PHP sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for php_scores.csv and php_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not php_oracle_available(): + logger.error(ls.PHP_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.PHP_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_php_graph(target, project) + logger.success(ls.PHP_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.PHP_EXTRACTING_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle = run_php_oracle(target) + logger.success(ls.PHP_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.PHP_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.PHP_SCORES_FILENAME, ec.PHP_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/php_retrieval.py b/evals/php_retrieval.py new file mode 100644 index 000000000..f779a9e5d --- /dev/null +++ b/evals/php_retrieval.py @@ -0,0 +1,113 @@ +# (H) Multi-language retrieval (PHP). Extends the file-level call-localization +# (H) benchmark to PHP: for each first-party PHP symbol, which files call it. +# (H) cgr's PHP CALLS edges (reduced to (caller_file, callee_simple_name)) are +# (H) graded against call sites extracted by php-parser, over the same first-party +# (H) name universe. php-parser is independent of cgr's tree-sitter PHP frontend, +# (H) so this measures cgr's cross-file PHP call resolution against ground truth +# (H) (mirrors evals/java_retrieval.py / ts_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import php_oracle_available, run_php_call_oracle +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.PHP_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_php_call_edges(target: Path) -> tuple[set[CallEdge], frozenset[str]]: + return run_php_call_oracle(target) + + +def cgr_php_call_edges( + target: Path, project: str, declared: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.PHP_SUFFIX) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.PHP_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_php_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.PHP_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.PHP_RETRIEVAL_DIFF_PREFIX + ec.PHP_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of PHP sources to evaluate call retrieval.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option(help="Directory for php_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not php_oracle_available(): + logger.error(ls.PHP_ORACLE_MISSING.format(binary=ec.NODE_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.PHP_RETRIEVAL_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle, declared = oracle_php_call_edges(target) + logger.success(ls.PHP_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.PHP_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_php_call_edges(target, project, declared) + logger.success(ls.PHP_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_php_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.PHP_RETRIEVAL_SCORES_FILENAME, + ec.PHP_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.PHP_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/results/calls_diff.json b/evals/results/calls_diff.json new file mode 100644 index 000000000..648da8ae9 --- /dev/null +++ b/evals/results/calls_diff.json @@ -0,0 +1,3 @@ +{ + "missing": [] +} diff --git a/evals/results/diff.json b/evals/results/diff.json new file mode 100644 index 000000000..25699abc4 --- /dev/null +++ b/evals/results/diff.json @@ -0,0 +1,34 @@ +{ + "node:Module": { + "missing": [], + "extra": [] + }, + "node:Class": { + "missing": [], + "extra": [] + }, + "node:Function": { + "missing": [], + "extra": [] + }, + "node:Method": { + "missing": [], + "extra": [] + }, + "edge:DEFINES": { + "missing": [], + "extra": [] + }, + "edge:DEFINES_METHOD": { + "missing": [], + "extra": [] + }, + "name_edge:INHERITS": { + "missing": [], + "extra": [] + }, + "name_edge:IMPORTS": { + "missing": [], + "extra": [] + } +} diff --git a/evals/results/imports_diff.json b/evals/results/imports_diff.json new file mode 100644 index 000000000..6ab6f4ed8 --- /dev/null +++ b/evals/results/imports_diff.json @@ -0,0 +1,14 @@ +{ + "imports:imports-all": { + "missing": [], + "extra": [] + }, + "imports:imports-internal": { + "missing": [], + "extra": [] + }, + "imports:imports-external": { + "missing": [], + "extra": [] + } +} diff --git a/evals/results/imports_scores.csv b/evals/results/imports_scores.csv new file mode 100644 index 000000000..cd55698e3 --- /dev/null +++ b/evals/results/imports_scores.csv @@ -0,0 +1,4 @@ +category,label,tp,fp,fn,precision,recall,f1 +edge,imports-all,1986,0,0,1.0,1.0,1.0 +edge,imports-internal,462,0,0,1.0,1.0,1.0 +edge,imports-external,1524,0,0,1.0,1.0,1.0 diff --git a/evals/results/incremental_diff.json b/evals/results/incremental_diff.json new file mode 100644 index 000000000..f1125bc24 --- /dev/null +++ b/evals/results/incremental_diff.json @@ -0,0 +1,194 @@ +{ + "incremental-node:Class": { + "missing": [], + "extra": [] + }, + "incremental-node:File": { + "missing": [], + "extra": [] + }, + "incremental-node:Folder": { + "missing": [], + "extra": [] + }, + "incremental-node:Function": { + "missing": [], + "extra": [] + }, + "incremental-node:Method": { + "missing": [], + "extra": [] + }, + "incremental-node:Module": { + "missing": [], + "extra": [] + }, + "incremental-node:Package": { + "missing": [], + "extra": [] + }, + "incremental-node:Project": { + "missing": [], + "extra": [] + }, + "incremental-edge:CALLS": { + "missing": [ + "CALLS Function:codebase_rag.cgr_state._save -> Method:codebase_rag.tests.conftest.MockNode.parent", + "CALLS Function:codebase_rag.cgr_state._save -> Method:codebase_rag.tests.conftest.MockNode.parent@58", + "CALLS Function:codebase_rag.cli._cleanup_project_embeddings -> Function:codebase_rag.vector_store.delete_project_embeddings@171", + "CALLS Function:codebase_rag.cli._maybe_start_stack -> Method:codebase_rag.stack.manager.StackManager.ensure_running", + "CALLS Function:codebase_rag.cli._sync_workspace -> Method:codebase_rag.types_defs.ColumnDescriptor.name", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.services.graph_service.MemgraphIngestor.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_abstract_method_override_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_callable_field_calls._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_chained_attribute_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_conditional_alias_call._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_constructor_call_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_duplicate_qn_definitions._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_external_package_name_collision._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_function_local_definitions._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_getattr_dispatch._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_graph_updater_incremental_rename.InMemoryGraph.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_higher_order_calls._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_inherits_attribute_base._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_instance_attr_type_inference._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_interprocedural_callback_flow._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_local_alias_calls._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_local_alias_chain_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_nested_function_defines._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_operator_dispatch_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_property_getter_calls._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_property_return_type_chain._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_protocol_dispatch_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_protocol_impl_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_protocol_operator_dispatch._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_reexport_chain_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_relative_import_package_init._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_relative_import_root_level._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_sibling_mixin_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.tests.test_truthiness_dispatch_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.doctor -> Method:codebase_rag.types_defs.ColumnDescriptor.name", + "CALLS Function:codebase_rag.cli.start -> Method:codebase_rag.types_defs.ColumnDescriptor.name", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.services.graph_service.MemgraphIngestor.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_abstract_method_override_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_callable_field_calls._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_chained_attribute_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_conditional_alias_call._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_constructor_call_resolution._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_duplicate_qn_definitions._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_external_package_name_collision._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_function_local_definitions._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_getattr_dispatch._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_graph_updater_incremental_rename.InMemoryGraph.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_higher_order_calls._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_inherits_attribute_base._Capture.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.tests.test_instance_attr_type_inference._Capture.fetch_all" + ], + "extra": [ + "CALLS Function:codebase_rag.cli._maybe_start_stack -> Function:codebase_rag.stack.manager.ensure_running", + "CALLS Function:codebase_rag.cli.dead_code -> Method:codebase_rag.services.QueryProtocol.fetch_all", + "CALLS Function:codebase_rag.cli.stats -> Method:codebase_rag.services.QueryProtocol.fetch_all", + "CALLS Function:codebase_rag.decorators.ensure_loaded.wrapper -> Method:codebase_rag.types_defs.LoadableProtocol._ensure_loaded", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._capture_inbound_edges -> Method:codebase_rag.services.QueryProtocol.fetch_all", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._delete_module_entities -> Method:codebase_rag.services.QueryProtocol.execute_write", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._generate_semantic_embeddings -> Method:codebase_rag.services.QueryProtocol.fetch_all", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._process_files -> Method:codebase_rag.services.IngestorProtocol.flush_all", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._process_files -> Method:codebase_rag.services.QueryProtocol.execute_write", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._prune_orphan_nodes -> Method:codebase_rag.services.QueryProtocol.execute_write", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._prune_orphan_nodes -> Method:codebase_rag.services.QueryProtocol.fetch_all", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._rehydrate_registry_from_graph -> Method:codebase_rag.services.QueryProtocol.fetch_all", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater._restore_inbound_edges -> Method:codebase_rag.services.IngestorProtocol.ensure_relationship_batch", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater.run -> Function:codebase_rag.parsers.class_ingest.method_override.process_all_method_overrides", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater.run -> Method:codebase_rag.services.IngestorProtocol.ensure_node_batch", + "CALLS Method:codebase_rag.graph_updater.GraphUpdater.run -> Method:codebase_rag.services.IngestorProtocol.flush_all", + "CALLS Method:codebase_rag.parsers.call_processor.CallProcessor._emit_operator_dunder -> Method:codebase_rag.services.IngestorProtocol.ensure_relationship_batch", + "CALLS Method:codebase_rag.parsers.call_processor.CallProcessor._ingest_decorator_calls -> Method:codebase_rag.services.IngestorProtocol.ensure_relationship_batch", + "CALLS Method:codebase_rag.parsers.call_processor.CallProcessor._ingest_function_calls -> Method:codebase_rag.services.IngestorProtocol.ensure_relationship_batch", + "CALLS Method:codebase_rag.parsers.call_processor.CallProcessor._ingest_property_accesses -> Method:codebase_rag.services.IngestorProtocol.ensure_relationship_batch", + "CALLS Method:codebase_rag.parsers.call_processor.CallProcessor.finalize_callable_param_flow -> Method:codebase_rag.services.IngestorProtocol.ensure_relationship_batch", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._follow_reexports -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._mro_method_qns -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._protocol_structural_implementers -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.find_with_prefix", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._redirect_protocol_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._redirect_protocol_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_chained_call -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_chained_call -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_imported_class_qn -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_inherited_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_inherited_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_multi_part_call -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_multi_part_call -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_rust_class_qn -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.find_ending_with", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_rust_class_qn -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.get", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_self_attribute_call -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_self_attribute_call -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_self_sibling_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._resolve_self_sibling_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.is_abstract", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_method_on_class -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_method_on_class -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_direct_import -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_direct_import -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_iife -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_iife -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_module_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_module_method -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__getitem__", + "CALLS Method:codebase_rag.parsers.call_resolver.CallResolver._try_resolve_same_module -> Method:codebase_rag.types_defs.FunctionRegistryTrieProtocol.__contains__" + ] + }, + "incremental-edge:CONTAINS_FILE": { + "missing": [], + "extra": [] + }, + "incremental-edge:CONTAINS_FOLDER": { + "missing": [], + "extra": [] + }, + "incremental-edge:CONTAINS_MODULE": { + "missing": [], + "extra": [] + }, + "incremental-edge:CONTAINS_PACKAGE": { + "missing": [], + "extra": [] + }, + "incremental-edge:DEFINES": { + "missing": [], + "extra": [] + }, + "incremental-edge:DEFINES_METHOD": { + "missing": [], + "extra": [] + }, + "incremental-edge:IMPORTS": { + "missing": [ + "IMPORTS Module:codebase_rag.graph_loader -> Module:codebase_rag.decorators", + "IMPORTS Module:codebase_rag.graph_updater -> Module:codebase_rag.embedder", + "IMPORTS Module:codebase_rag.mcp.server -> Module:codebase_rag.vector_store", + "IMPORTS Module:codebase_rag.mcp.tools -> Module:codebase_rag.parser_loader", + "IMPORTS Module:codebase_rag.mcp.tools -> Module:codebase_rag.vector_store" + ], + "extra": [ + "IMPORTS Module:codebase_rag.graph_loader -> Module:codebase_rag.decorators.ensure_loaded", + "IMPORTS Module:codebase_rag.graph_updater -> Module:codebase_rag.embedder.embed_code_batch", + "IMPORTS Module:codebase_rag.graph_updater -> Module:codebase_rag.embedder.get_embedding_cache", + "IMPORTS Module:codebase_rag.main -> Module:codebase_rag.config.load_cgrignore_patterns", + "IMPORTS Module:codebase_rag.mcp.server -> Module:codebase_rag.vector_store.close_qdrant_client", + "IMPORTS Module:codebase_rag.mcp.tools -> Module:codebase_rag.parser_loader.load_parsers", + "IMPORTS Module:codebase_rag.mcp.tools -> Module:codebase_rag.vector_store.delete_project_embeddings" + ] + }, + "incremental-edge:INHERITS": { + "missing": [], + "extra": [] + }, + "incremental-edge:INSTANTIATES": { + "missing": [], + "extra": [] + }, + "incremental-edge:OVERRIDES": { + "missing": [], + "extra": [] + } +} diff --git a/evals/results/incremental_scores.csv b/evals/results/incremental_scores.csv new file mode 100644 index 000000000..30ffbca75 --- /dev/null +++ b/evals/results/incremental_scores.csv @@ -0,0 +1,20 @@ +category,label,tp,fp,fn,precision,recall,f1 +edge,CALLS,333010,63,740,0.9998,0.9978,0.9988 +edge,CONTAINS_FILE,12175,0,0,1.0,1.0,1.0 +edge,CONTAINS_FOLDER,50,0,0,1.0,1.0,1.0 +edge,CONTAINS_MODULE,12150,0,0,1.0,1.0,1.0 +edge,CONTAINS_PACKAGE,475,0,0,1.0,1.0,1.0 +edge,DEFINES,77450,0,0,1.0,1.0,1.0 +edge,DEFINES_METHOD,100450,0,0,1.0,1.0,1.0 +edge,IMPORTS,82995,7,5,0.9999,0.9999,0.9999 +edge,INHERITS,3925,0,0,1.0,1.0,1.0 +edge,INSTANTIATES,25525,0,0,1.0,1.0,1.0 +edge,OVERRIDES,1625,0,0,1.0,1.0,1.0 +node,Class,23550,0,0,1.0,1.0,1.0 +node,File,12175,0,0,1.0,1.0,1.0 +node,Folder,50,0,0,1.0,1.0,1.0 +node,Function,53900,0,0,1.0,1.0,1.0 +node,Method,100450,0,0,1.0,1.0,1.0 +node,Module,17125,0,0,1.0,1.0,1.0 +node,Package,475,0,0,1.0,1.0,1.0 +node,Project,25,0,0,1.0,1.0,1.0 diff --git a/evals/results/inheritance_diff.json b/evals/results/inheritance_diff.json new file mode 100644 index 000000000..b9c85a9d6 --- /dev/null +++ b/evals/results/inheritance_diff.json @@ -0,0 +1,10 @@ +{ + "inheritance:inherits-resolved": { + "missing": [], + "extra": [] + }, + "inheritance:overrides": { + "missing": [], + "extra": [] + } +} diff --git a/evals/results/inheritance_scores.csv b/evals/results/inheritance_scores.csv new file mode 100644 index 000000000..04e3c36a8 --- /dev/null +++ b/evals/results/inheritance_scores.csv @@ -0,0 +1,3 @@ +category,label,tp,fp,fn,precision,recall,f1 +edge,inherits-resolved,31,0,0,1.0,1.0,1.0 +edge,overrides,57,0,0,1.0,1.0,1.0 diff --git a/evals/results/instantiation_diff.json b/evals/results/instantiation_diff.json new file mode 100644 index 000000000..5eb6d0c88 --- /dev/null +++ b/evals/results/instantiation_diff.json @@ -0,0 +1,6 @@ +{ + "instantiation:instantiates": { + "missing": [], + "extra": [] + } +} diff --git a/evals/results/instantiation_scores.csv b/evals/results/instantiation_scores.csv new file mode 100644 index 000000000..25e7ee5e0 --- /dev/null +++ b/evals/results/instantiation_scores.csv @@ -0,0 +1,2 @@ +category,label,tp,fp,fn,precision,recall,f1 +edge,instantiates,378,0,0,1.0,1.0,1.0 diff --git a/evals/results/retrieval_diff.json b/evals/results/retrieval_diff.json new file mode 100644 index 000000000..77846d30c --- /dev/null +++ b/evals/results/retrieval_diff.json @@ -0,0 +1,16871 @@ +{ + "retrieval:graph": { + "missing": [ + "CALLS graph_updater.py:0 -> save", + "CALLS parser_loader.py:0 -> items", + "CALLS services/anthropic_token_counter.py:0 -> get", + "CALLS stack/manager.py:0 -> ensure_running", + "CALLS stack/manager.py:0 -> logs", + "CALLS tests/conftest.py:0 -> reset_mock", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> run", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> run", + "CALLS tests/test_callable_field_calls.py:0 -> run", + "CALLS tests/test_chained_attribute_resolution.py:0 -> run", + "CALLS tests/test_conditional_alias_call.py:0 -> run", + "CALLS tests/test_constructor_call_resolution.py:0 -> run", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> run", + "CALLS tests/test_external_package_name_collision.py:0 -> run", + "CALLS tests/test_function_local_definitions.py:0 -> run", + "CALLS tests/test_getattr_dispatch.py:0 -> run", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> run", + "CALLS tests/test_higher_order_calls.py:0 -> run", + "CALLS tests/test_inherits_attribute_base.py:0 -> run", + "CALLS tests/test_instance_attr_type_inference.py:0 -> run", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> run", + "CALLS tests/test_java_field_access_chains.py:0 -> parse", + "CALLS tests/test_java_field_access_chains.py:0 -> run", + "CALLS tests/test_local_alias_calls.py:0 -> run", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> run", + "CALLS tests/test_nested_function_defines.py:0 -> run", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> run", + "CALLS tests/test_property_getter_calls.py:0 -> run", + "CALLS tests/test_property_return_type_chain.py:0 -> run", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> run", + "CALLS tests/test_protocol_impl_resolution.py:0 -> run", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> run", + "CALLS tests/test_reexport_chain_resolution.py:0 -> run", + "CALLS tests/test_relative_import_package_init.py:0 -> run", + "CALLS tests/test_relative_import_root_level.py:0 -> run", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> run", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> run" + ], + "extra": [ + "CALLS cgr_state.py:0 -> parent", + "CALLS cli.py:0 -> active_cypher_config", + "CALLS cli.py:0 -> active_orchestrator_config", + "CALLS cli.py:0 -> cli", + "CALLS cli.py:0 -> compose_file", + "CALLS cli.py:0 -> name", + "CALLS cli.py:0 -> value", + "CALLS config.py:0 -> ollama_endpoint", + "CALLS config.py:0 -> stack_home", + "CALLS constants.py:0 -> value", + "CALLS decorators.py:0 -> project_root", + "CALLS embedder.py:0 -> parent", + "CALLS graph_loader.py:0 -> ensure_loaded", + "CALLS graph_loader.py:0 -> metadata", + "CALLS graph_loader.py:0 -> nodes", + "CALLS graph_loader.py:0 -> relationships", + "CALLS graph_loader.py:0 -> type", + "CALLS graph_updater.py:0 -> call_processor", + "CALLS graph_updater.py:0 -> definition_processor", + "CALLS graph_updater.py:0 -> name", + "CALLS graph_updater.py:0 -> parent", + "CALLS graph_updater.py:0 -> structure_processor", + "CALLS graph_updater.py:0 -> type_inference", + "CALLS graph_updater.py:0 -> verify_stored_ids", + "CALLS language_spec.py:0 -> text", + "CALLS language_spec.py:0 -> type", + "CALLS main.py:0 -> active_cypher_config", + "CALLS main.py:0 -> active_orchestrator_config", + "CALLS main.py:0 -> name", + "CALLS main.py:0 -> ollama_endpoint", + "CALLS main.py:0 -> parent", + "CALLS main.py:0 -> set_cypher", + "CALLS main.py:0 -> stack_home", + "CALLS main.py:0 -> text", + "CALLS main.py:0 -> value", + "CALLS mcp/client.py:0 -> text", + "CALLS mcp/server.py:0 -> description", + "CALLS mcp/server.py:0 -> name", + "CALLS mcp/tools.py:0 -> description", + "CALLS mcp/tools.py:0 -> name", + "CALLS mcp/tools.py:0 -> project_root", + "CALLS mcp/tools.py:0 -> rag_agent", + "CALLS parsers/call_processor.py:0 -> children", + "CALLS parsers/call_processor.py:0 -> import_processor", + "CALLS parsers/call_processor.py:0 -> name", + "CALLS parsers/call_processor.py:0 -> parent", + "CALLS parsers/call_processor.py:0 -> resolve_builtin_call", + "CALLS parsers/call_processor.py:0 -> resolve_cpp_operator_call", + "CALLS parsers/call_processor.py:0 -> text", + "CALLS parsers/call_processor.py:0 -> type_inference", + "CALLS parsers/call_resolver.py:0 -> import_processor", + "CALLS parsers/call_resolver.py:0 -> java_type_inference", + "CALLS parsers/call_resolver.py:0 -> python_type_inference", + "CALLS parsers/call_resolver.py:0 -> text", + "CALLS parsers/call_resolver.py:0 -> type_inference", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> children", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> text", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> type", + "CALLS parsers/class_ingest/identity.py:0 -> _c_get_name", + "CALLS parsers/class_ingest/identity.py:0 -> _cpp_get_name", + "CALLS parsers/class_ingest/identity.py:0 -> _generic_get_name", + "CALLS parsers/class_ingest/identity.py:0 -> _js_get_name", + "CALLS parsers/class_ingest/identity.py:0 -> _python_get_name", + "CALLS parsers/class_ingest/identity.py:0 -> _rust_get_name", + "CALLS parsers/class_ingest/identity.py:0 -> children", + "CALLS parsers/class_ingest/identity.py:0 -> parent", + "CALLS parsers/class_ingest/identity.py:0 -> text", + "CALLS parsers/class_ingest/identity.py:0 -> type", + "CALLS parsers/class_ingest/mixin.py:0 -> import_processor", + "CALLS parsers/class_ingest/mixin.py:0 -> parent", + "CALLS parsers/class_ingest/mixin.py:0 -> text", + "CALLS parsers/class_ingest/mixin.py:0 -> type", + "CALLS parsers/class_ingest/mixin.py:0 -> value", + "CALLS parsers/class_ingest/node_type.py:0 -> children", + "CALLS parsers/class_ingest/node_type.py:0 -> text", + "CALLS parsers/class_ingest/node_type.py:0 -> type", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> _resolve_to_qn", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> children", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> text", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> type", + "CALLS parsers/class_ingest/relationships.py:0 -> type", + "CALLS parsers/class_ingest/utils.py:0 -> children", + "CALLS parsers/class_ingest/utils.py:0 -> text", + "CALLS parsers/class_ingest/utils.py:0 -> type", + "CALLS parsers/cpp/utils.py:0 -> children", + "CALLS parsers/cpp/utils.py:0 -> parent", + "CALLS parsers/cpp/utils.py:0 -> text", + "CALLS parsers/cpp/utils.py:0 -> type", + "CALLS parsers/cpp_frontend/constants.py:0 -> value", + "CALLS parsers/cpp_frontend/frontend.py:0 -> name", + "CALLS parsers/cpp_frontend/frontend.py:0 -> nodes", + "CALLS parsers/cpp_frontend/frontend.py:0 -> type", + "CALLS parsers/cpp_frontend/qn.py:0 -> name", + "CALLS parsers/cpp_frontend/qn.py:0 -> parent", + "CALLS parsers/definition_processor.py:0 -> JsTsModuleSystemMixin", + "CALLS parsers/definition_processor.py:0 -> children", + "CALLS parsers/definition_processor.py:0 -> import_processor", + "CALLS parsers/definition_processor.py:0 -> name", + "CALLS parsers/definition_processor.py:0 -> parent", + "CALLS parsers/definition_processor.py:0 -> text", + "CALLS parsers/definition_processor.py:0 -> type", + "CALLS parsers/dependency_parser.py:0 -> name", + "CALLS parsers/factory.py:0 -> definition_processor", + "CALLS parsers/factory.py:0 -> import_processor", + "CALLS parsers/factory.py:0 -> type_inference", + "CALLS parsers/function_ingest.py:0 -> _c_get_name", + "CALLS parsers/function_ingest.py:0 -> _cpp_get_name", + "CALLS parsers/function_ingest.py:0 -> _generic_get_name", + "CALLS parsers/function_ingest.py:0 -> _js_get_name", + "CALLS parsers/function_ingest.py:0 -> _python_get_name", + "CALLS parsers/function_ingest.py:0 -> _rust_get_name", + "CALLS parsers/function_ingest.py:0 -> children", + "CALLS parsers/function_ingest.py:0 -> name", + "CALLS parsers/function_ingest.py:0 -> parent", + "CALLS parsers/function_ingest.py:0 -> text", + "CALLS parsers/function_ingest.py:0 -> value", + "CALLS parsers/go/utils.py:0 -> children", + "CALLS parsers/go/utils.py:0 -> text", + "CALLS parsers/go/utils.py:0 -> type", + "CALLS parsers/handlers/base.py:0 -> parent", + "CALLS parsers/handlers/base.py:0 -> text", + "CALLS parsers/handlers/base.py:0 -> type", + "CALLS parsers/handlers/cpp.py:0 -> text", + "CALLS parsers/handlers/cpp.py:0 -> type", + "CALLS parsers/handlers/js_ts.py:0 -> children", + "CALLS parsers/handlers/js_ts.py:0 -> parent", + "CALLS parsers/handlers/js_ts.py:0 -> text", + "CALLS parsers/handlers/js_ts.py:0 -> type", + "CALLS parsers/handlers/lua.py:0 -> text", + "CALLS parsers/handlers/lua.py:0 -> type", + "CALLS parsers/handlers/php.py:0 -> children", + "CALLS parsers/handlers/php.py:0 -> parent", + "CALLS parsers/handlers/php.py:0 -> text", + "CALLS parsers/handlers/php.py:0 -> type", + "CALLS parsers/handlers/python.py:0 -> children", + "CALLS parsers/handlers/python.py:0 -> parent", + "CALLS parsers/handlers/python.py:0 -> type", + "CALLS parsers/handlers/rust.py:0 -> children", + "CALLS parsers/handlers/rust.py:0 -> type", + "CALLS parsers/import_processor.py:0 -> children", + "CALLS parsers/import_processor.py:0 -> text", + "CALLS parsers/import_processor.py:0 -> type", + "CALLS parsers/java/method_resolver.py:0 -> children", + "CALLS parsers/java/method_resolver.py:0 -> import_processor", + "CALLS parsers/java/method_resolver.py:0 -> type", + "CALLS parsers/java/type_inference.py:0 -> import_processor", + "CALLS parsers/java/type_inference.py:0 -> parent", + "CALLS parsers/java/type_inference.py:0 -> type", + "CALLS parsers/java/type_resolver.py:0 -> children", + "CALLS parsers/java/type_resolver.py:0 -> find_with_prefix", + "CALLS parsers/java/type_resolver.py:0 -> import_processor", + "CALLS parsers/java/type_resolver.py:0 -> type", + "CALLS parsers/java/utils.py:0 -> children", + "CALLS parsers/java/utils.py:0 -> parent", + "CALLS parsers/java/utils.py:0 -> type", + "CALLS parsers/java/variable_analyzer.py:0 -> children", + "CALLS parsers/java/variable_analyzer.py:0 -> parent", + "CALLS parsers/java/variable_analyzer.py:0 -> type", + "CALLS parsers/js_ts/ingest.py:0 -> children", + "CALLS parsers/js_ts/ingest.py:0 -> parent", + "CALLS parsers/js_ts/ingest.py:0 -> text", + "CALLS parsers/js_ts/ingest.py:0 -> type", + "CALLS parsers/js_ts/module_system.py:0 -> children", + "CALLS parsers/js_ts/module_system.py:0 -> import_processor", + "CALLS parsers/js_ts/module_system.py:0 -> text", + "CALLS parsers/js_ts/module_system.py:0 -> type", + "CALLS parsers/js_ts/type_inference.py:0 -> children", + "CALLS parsers/js_ts/type_inference.py:0 -> import_processor", + "CALLS parsers/js_ts/type_inference.py:0 -> text", + "CALLS parsers/js_ts/type_inference.py:0 -> type", + "CALLS parsers/js_ts/utils.py:0 -> children", + "CALLS parsers/js_ts/utils.py:0 -> text", + "CALLS parsers/js_ts/utils.py:0 -> type", + "CALLS parsers/lua/type_inference.py:0 -> children", + "CALLS parsers/lua/type_inference.py:0 -> import_processor", + "CALLS parsers/lua/type_inference.py:0 -> type", + "CALLS parsers/lua/utils.py:0 -> children", + "CALLS parsers/lua/utils.py:0 -> parent", + "CALLS parsers/lua/utils.py:0 -> type", + "CALLS parsers/py/ast_analyzer.py:0 -> children", + "CALLS parsers/py/ast_analyzer.py:0 -> find_method_in_ast", + "CALLS parsers/py/ast_analyzer.py:0 -> text", + "CALLS parsers/py/ast_analyzer.py:0 -> type", + "CALLS parsers/py/expression_analyzer.py:0 -> import_processor", + "CALLS parsers/py/expression_analyzer.py:0 -> text", + "CALLS parsers/py/expression_analyzer.py:0 -> type", + "CALLS parsers/py/type_inference.py:0 -> import_processor", + "CALLS parsers/py/variable_analyzer.py:0 -> children", + "CALLS parsers/py/variable_analyzer.py:0 -> import_processor", + "CALLS parsers/py/variable_analyzer.py:0 -> parent", + "CALLS parsers/py/variable_analyzer.py:0 -> text", + "CALLS parsers/py/variable_analyzer.py:0 -> type", + "CALLS parsers/rs/utils.py:0 -> children", + "CALLS parsers/rs/utils.py:0 -> parent", + "CALLS parsers/rs/utils.py:0 -> text", + "CALLS parsers/rs/utils.py:0 -> type", + "CALLS parsers/stdlib_extractor.py:0 -> stack_home", + "CALLS parsers/structure_processor.py:0 -> name", + "CALLS parsers/structure_processor.py:0 -> parent", + "CALLS parsers/type_inference.py:0 -> import_processor", + "CALLS parsers/type_inference.py:0 -> java_type_inference", + "CALLS parsers/type_inference.py:0 -> js_type_inference", + "CALLS parsers/type_inference.py:0 -> lua_type_inference", + "CALLS parsers/type_inference.py:0 -> python_type_inference", + "CALLS parsers/utils.py:0 -> _extract_decorators", + "CALLS parsers/utils.py:0 -> _get_docstring", + "CALLS parsers/utils.py:0 -> _is_export_inside_function", + "CALLS parsers/utils.py:0 -> _start_byte_key", + "CALLS parsers/utils.py:0 -> children", + "CALLS parsers/utils.py:0 -> nodes", + "CALLS parsers/utils.py:0 -> parent", + "CALLS parsers/utils.py:0 -> text", + "CALLS parsers/utils.py:0 -> type", + "CALLS parsers/utils.py:0 -> value", + "CALLS prompts.py:0 -> name", + "CALLS providers/base.py:0 -> ModelProvider", + "CALLS providers/base.py:0 -> ollama_endpoint", + "CALLS providers/litellm.py:0 -> ModelProvider", + "CALLS readme_sections.py:0 -> description", + "CALLS readme_sections.py:0 -> name", + "CALLS readme_sections.py:0 -> value", + "CALLS services/anthropic_token_counter.py:0 -> text", + "CALLS services/graph_service.py:0 -> description", + "CALLS services/graph_service.py:0 -> name", + "CALLS services/llm.py:0 -> active_cypher_config", + "CALLS services/llm.py:0 -> active_orchestrator_config", + "CALLS services/protobuf_service.py:0 -> nodes", + "CALLS services/protobuf_service.py:0 -> relationships", + "CALLS services/protobuf_service.py:0 -> type", + "CALLS stack/cli.py:0 -> compose_file", + "CALLS stack/cli.py:0 -> value", + "CALLS stack/manager.py:0 -> compose_file", + "CALLS tests/conftest.py:0 -> structure_processor", + "CALLS tests/integration/test_imports_e2e.py:0 -> name", + "CALLS tests/integration/test_node_label_e2e.py:0 -> value", + "CALLS tests/integration/test_tool_calling.py:0 -> active_orchestrator_config", + "CALLS tests/test_call_processor.py:0 -> children", + "CALLS tests/test_call_processor.py:0 -> type", + "CALLS tests/test_call_resolver.py:0 -> children", + "CALLS tests/test_call_resolver.py:0 -> python_type_inference", + "CALLS tests/test_call_resolver.py:0 -> type_inference", + "CALLS tests/test_cgr_instructions.py:0 -> active_orchestrator_config", + "CALLS tests/test_cgr_instructions.py:0 -> name", + "CALLS tests/test_cgrignore.py:0 -> name", + "CALLS tests/test_chained_attribute_resolution.py:0 -> parent", + "CALLS tests/test_check_no_docs.py:0 -> name", + "CALLS tests/test_class_ingest.py:0 -> name", + "CALLS tests/test_code_retrieval.py:0 -> description", + "CALLS tests/test_code_retrieval.py:0 -> project_root", + "CALLS tests/test_codebase_query.py:0 -> description", + "CALLS tests/test_complex_cross_file_calls.py:0 -> name", + "CALLS tests/test_conditional_alias_call.py:0 -> parent", + "CALLS tests/test_cpp_basic_syntax.py:0 -> name", + "CALLS tests/test_cpp_casting_operators.py:0 -> name", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> name", + "CALLS tests/test_cpp_concurrency.py:0 -> name", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> name", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> name", + "CALLS tests/test_cpp_coroutines.py:0 -> name", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> name", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> name", + "CALLS tests/test_cpp_error_handling.py:0 -> name", + "CALLS tests/test_cpp_friend_functions.py:0 -> name", + "CALLS tests/test_cpp_frontend_calls.py:0 -> name", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> name", + "CALLS tests/test_cpp_frontend_types.py:0 -> name", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> name", + "CALLS tests/test_cpp_line_numbers.py:0 -> _get_line_span", + "CALLS tests/test_cpp_memory_management.py:0 -> name", + "CALLS tests/test_cpp_modern_features.py:0 -> name", + "CALLS tests/test_cpp_modules.py:0 -> name", + "CALLS tests/test_cpp_move_semantics.py:0 -> name", + "CALLS tests/test_cpp_namespaces.py:0 -> name", + "CALLS tests/test_cpp_operators_overloading.py:0 -> name", + "CALLS tests/test_cpp_oracle.py:0 -> name", + "CALLS tests/test_cpp_oracle.py:0 -> nodes", + "CALLS tests/test_cpp_oracle.py:0 -> parent", + "CALLS tests/test_cpp_oracle.py:0 -> value", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> children", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> parent", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> type", + "CALLS tests/test_cpp_preprocessor.py:0 -> name", + "CALLS tests/test_cpp_ranges_views.py:0 -> name", + "CALLS tests/test_cpp_smart_pointers.py:0 -> name", + "CALLS tests/test_cpp_stl_usage.py:0 -> name", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> name", + "CALLS tests/test_cpp_templates.py:0 -> name", + "CALLS tests/test_cypher_validation.py:0 -> value", + "CALLS tests/test_decorators.py:0 -> value", + "CALLS tests/test_definition_processor.py:0 -> children", + "CALLS tests/test_definition_processor.py:0 -> type", + "CALLS tests/test_dependency_parser.py:0 -> name", + "CALLS tests/test_directory_lister.py:0 -> description", + "CALLS tests/test_directory_lister.py:0 -> project_root", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> nodes", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> value", + "CALLS tests/test_eval_score_span.py:0 -> value", + "CALLS tests/test_exclude_patterns.py:0 -> parent", + "CALLS tests/test_file_editor.py:0 -> description", + "CALLS tests/test_file_editor.py:0 -> project_root", + "CALLS tests/test_file_editor.py:0 -> type", + "CALLS tests/test_file_reader.py:0 -> description", + "CALLS tests/test_file_reader.py:0 -> project_root", + "CALLS tests/test_file_writer.py:0 -> description", + "CALLS tests/test_file_writer.py:0 -> project_root", + "CALLS tests/test_fqn_resolver.py:0 -> children", + "CALLS tests/test_function_ingest.py:0 -> children", + "CALLS tests/test_function_ingest.py:0 -> name", + "CALLS tests/test_function_ingest.py:0 -> parent", + "CALLS tests/test_function_ingest.py:0 -> text", + "CALLS tests/test_function_ingest.py:0 -> type", + "CALLS tests/test_function_local_definitions.py:0 -> nodes", + "CALLS tests/test_getattr_dispatch.py:0 -> parent", + "CALLS tests/test_github_issues_integration.py:0 -> active_cypher_config", + "CALLS tests/test_github_issues_integration.py:0 -> active_orchestrator_config", + "CALLS tests/test_go_containment_oracle.py:0 -> name", + "CALLS tests/test_go_containment_oracle.py:0 -> value", + "CALLS tests/test_go_receiver_methods.py:0 -> name", + "CALLS tests/test_go_receiver_methods.py:0 -> value", + "CALLS tests/test_go_span_oracle.py:0 -> name", + "CALLS tests/test_go_structure_oracle.py:0 -> name", + "CALLS tests/test_go_structure_oracle.py:0 -> value", + "CALLS tests/test_graph_export_integration.py:0 -> name", + "CALLS tests/test_graph_export_integration.py:0 -> relationships", + "CALLS tests/test_graph_export_integration.py:0 -> type", + "CALLS tests/test_graph_loader.py:0 -> metadata", + "CALLS tests/test_graph_loader.py:0 -> name", + "CALLS tests/test_graph_loader.py:0 -> nodes", + "CALLS tests/test_graph_loader.py:0 -> relationships", + "CALLS tests/test_graph_loader.py:0 -> type", + "CALLS tests/test_graph_service.py:0 -> description", + "CALLS tests/test_graph_service.py:0 -> name", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> nodes", + "CALLS tests/test_graph_updater_integration.py:0 -> name", + "CALLS tests/test_handlers_unit.py:0 -> children", + "CALLS tests/test_handlers_unit.py:0 -> type", + "CALLS tests/test_import_parsing.py:0 -> children", + "CALLS tests/test_import_parsing.py:0 -> type", + "CALLS tests/test_inherits_attribute_base.py:0 -> nodes", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> parent", + "CALLS tests/test_java_advanced_oop.py:0 -> name", + "CALLS tests/test_java_collections_frameworks.py:0 -> name", + "CALLS tests/test_java_comprehensive.py:0 -> name", + "CALLS tests/test_java_concurrency.py:0 -> name", + "CALLS tests/test_java_containment_oracle.py:0 -> name", + "CALLS tests/test_java_containment_oracle.py:0 -> value", + "CALLS tests/test_java_field_access_chains.py:0 -> children", + "CALLS tests/test_java_field_access_chains.py:0 -> type", + "CALLS tests/test_java_imports.py:0 -> name", + "CALLS tests/test_java_inheritance_edges.py:0 -> value", + "CALLS tests/test_java_inheritance_oracle.py:0 -> name", + "CALLS tests/test_java_inheritance_oracle.py:0 -> value", + "CALLS tests/test_java_label_name_collision.py:0 -> name", + "CALLS tests/test_java_label_name_collision.py:0 -> value", + "CALLS tests/test_java_method_calls.py:0 -> name", + "CALLS tests/test_java_modern_features.py:0 -> name", + "CALLS tests/test_java_modules.py:0 -> name", + "CALLS tests/test_java_nested_structures.py:0 -> name", + "CALLS tests/test_java_real_world.py:0 -> name", + "CALLS tests/test_java_reflection_annotations.py:0 -> name", + "CALLS tests/test_java_relationship_validation.py:0 -> name", + "CALLS tests/test_java_span_oracle.py:0 -> name", + "CALLS tests/test_java_streams_functional.py:0 -> name", + "CALLS tests/test_java_structure_oracle.py:0 -> name", + "CALLS tests/test_java_utils.py:0 -> parent", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> children", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> text", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> type", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> name", + "CALLS tests/test_javascript_async_patterns.py:0 -> name", + "CALLS tests/test_javascript_classes.py:0 -> name", + "CALLS tests/test_javascript_closures_scoping.py:0 -> name", + "CALLS tests/test_javascript_containment_oracle.py:0 -> name", + "CALLS tests/test_javascript_containment_oracle.py:0 -> value", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> name", + "CALLS tests/test_javascript_destructuring.py:0 -> name", + "CALLS tests/test_javascript_functions.py:0 -> name", + "CALLS tests/test_javascript_imports.py:0 -> name", + "CALLS tests/test_javascript_modules.py:0 -> name", + "CALLS tests/test_javascript_object_patterns.py:0 -> name", + "CALLS tests/test_javascript_prototypes.py:0 -> name", + "CALLS tests/test_javascript_span_oracle.py:0 -> name", + "CALLS tests/test_javascript_spread_rest.py:0 -> name", + "CALLS tests/test_javascript_structure_oracle.py:0 -> name", + "CALLS tests/test_javascript_this_binding.py:0 -> name", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> children", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> JsTsModuleSystemMixin", + "CALLS tests/test_js_ts_utils_integration.py:0 -> children", + "CALLS tests/test_js_ts_utils_integration.py:0 -> text", + "CALLS tests/test_js_ts_utils_integration.py:0 -> type", + "CALLS tests/test_js_utils.py:0 -> children", + "CALLS tests/test_js_utils.py:0 -> type", + "CALLS tests/test_language_node_coverage.py:0 -> name", + "CALLS tests/test_language_node_coverage.py:0 -> value", + "CALLS tests/test_language_tool_unit.py:0 -> name", + "CALLS tests/test_llm_service_unit.py:0 -> active_cypher_config", + "CALLS tests/test_llm_service_unit.py:0 -> active_orchestrator_config", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> parent", + "CALLS tests/test_lua_54_edge_cases.py:0 -> name", + "CALLS tests/test_lua_closures.py:0 -> name", + "CALLS tests/test_lua_containment_oracle.py:0 -> name", + "CALLS tests/test_lua_containment_oracle.py:0 -> value", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> name", + "CALLS tests/test_lua_edge_cases.py:0 -> name", + "CALLS tests/test_lua_error_handling.py:0 -> name", + "CALLS tests/test_lua_imports.py:0 -> name", + "CALLS tests/test_lua_imports_paths.py:0 -> name", + "CALLS tests/test_lua_metatables.py:0 -> name", + "CALLS tests/test_lua_modern_features.py:0 -> name", + "CALLS tests/test_lua_oop_patterns.py:0 -> name", + "CALLS tests/test_lua_span_oracle.py:0 -> name", + "CALLS tests/test_lua_structure_oracle.py:0 -> name", + "CALLS tests/test_lua_structure_oracle.py:0 -> value", + "CALLS tests/test_lua_utils.py:0 -> children", + "CALLS tests/test_lua_utils.py:0 -> text", + "CALLS tests/test_lua_utils.py:0 -> type", + "CALLS tests/test_mcp_update_and_search.py:0 -> project_root", + "CALLS tests/test_mcp_update_and_search.py:0 -> rag_agent", + "CALLS tests/test_mcp_update_and_search.py:0 -> text", + "CALLS tests/test_mcp_write_file.py:0 -> parent", + "CALLS tests/test_memgraph_batching.py:0 -> description", + "CALLS tests/test_memgraph_batching.py:0 -> name", + "CALLS tests/test_model_switching.py:0 -> active_orchestrator_config", + "CALLS tests/test_model_switching.py:0 -> ollama_endpoint", + "CALLS tests/test_multiline_input_keybindings.py:0 -> value", + "CALLS tests/test_nested_function_defines.py:0 -> nodes", + "CALLS tests/test_nested_function_qualified_names.py:0 -> name", + "CALLS tests/test_node_relationship_coverage.py:0 -> description", + "CALLS tests/test_node_relationship_coverage.py:0 -> name", + "CALLS tests/test_node_relationship_coverage.py:0 -> value", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> parent", + "CALLS tests/test_oracle_nested_defs.py:0 -> name", + "CALLS tests/test_oracle_nested_defs.py:0 -> nodes", + "CALLS tests/test_php_containment_oracle.py:0 -> name", + "CALLS tests/test_php_containment_oracle.py:0 -> value", + "CALLS tests/test_php_imports.py:0 -> name", + "CALLS tests/test_php_inheritance_edges.py:0 -> value", + "CALLS tests/test_php_inheritance_oracle.py:0 -> name", + "CALLS tests/test_php_inheritance_oracle.py:0 -> value", + "CALLS tests/test_php_span_oracle.py:0 -> name", + "CALLS tests/test_php_structure_oracle.py:0 -> name", + "CALLS tests/test_processor_factory.py:0 -> call_processor", + "CALLS tests/test_processor_factory.py:0 -> definition_processor", + "CALLS tests/test_processor_factory.py:0 -> import_processor", + "CALLS tests/test_processor_factory.py:0 -> structure_processor", + "CALLS tests/test_processor_factory.py:0 -> type_inference", + "CALLS tests/test_project_name_flag.py:0 -> parent", + "CALLS tests/test_project_name_flag.py:0 -> structure_processor", + "CALLS tests/test_project_name_flag.py:0 -> type_inference", + "CALLS tests/test_protobuf_end_to_end.py:0 -> nodes", + "CALLS tests/test_protobuf_end_to_end.py:0 -> parent", + "CALLS tests/test_protobuf_end_to_end.py:0 -> relationships", + "CALLS tests/test_protobuf_service.py:0 -> name", + "CALLS tests/test_protobuf_service.py:0 -> nodes", + "CALLS tests/test_protobuf_service.py:0 -> relationships", + "CALLS tests/test_protobuf_service.py:0 -> type", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> parent", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> parent", + "CALLS tests/test_provider_classes.py:0 -> provider_name", + "CALLS tests/test_provider_configuration.py:0 -> active_cypher_config", + "CALLS tests/test_provider_configuration.py:0 -> active_orchestrator_config", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> children", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> text", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> type", + "CALLS tests/test_python_class_method_relationship.py:0 -> name", + "CALLS tests/test_python_context_managers.py:0 -> name", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> name", + "CALLS tests/test_python_decorators.py:0 -> name", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> name", + "CALLS tests/test_python_imports.py:0 -> name", + "CALLS tests/test_python_inheritance.py:0 -> name", + "CALLS tests/test_python_nested_functions.py:0 -> name", + "CALLS tests/test_python_relative_import_resolution.py:0 -> children", + "CALLS tests/test_python_relative_import_resolution.py:0 -> text", + "CALLS tests/test_python_relative_import_resolution.py:0 -> type", + "CALLS tests/test_python_return_type_inference.py:0 -> name", + "CALLS tests/test_python_span_oracle.py:0 -> name", + "CALLS tests/test_python_span_oracle.py:0 -> value", + "CALLS tests/test_python_standard_library_imports.py:0 -> text", + "CALLS tests/test_python_standard_library_imports.py:0 -> type", + "CALLS tests/test_realtime_debounce.py:0 -> name", + "CALLS tests/test_realtime_event_filtering.py:0 -> structure_processor", + "CALLS tests/test_realtime_updater.py:0 -> structure_processor", + "CALLS tests/test_reexport_chain_resolution.py:0 -> parent", + "CALLS tests/test_retrieval_eval.py:0 -> name", + "CALLS tests/test_retrieval_eval.py:0 -> value", + "CALLS tests/test_rust.py:0 -> name", + "CALLS tests/test_rust_advanced_types.py:0 -> method_calls", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> name", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> value", + "CALLS tests/test_rust_closure_method_defines.py:0 -> value", + "CALLS tests/test_rust_closures_functions.py:0 -> method_calls", + "CALLS tests/test_rust_collections_iterators.py:0 -> method_calls", + "CALLS tests/test_rust_concurrency_async.py:0 -> method_calls", + "CALLS tests/test_rust_containment_oracle.py:0 -> name", + "CALLS tests/test_rust_containment_oracle.py:0 -> value", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> name", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> method_calls", + "CALLS tests/test_rust_inheritance_edges.py:0 -> value", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> name", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> value", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> method_calls", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> method_calls", + "CALLS tests/test_rust_memory_management.py:0 -> method_calls", + "CALLS tests/test_rust_modules_visibility.py:0 -> method_calls", + "CALLS tests/test_rust_nested_module_containment.py:0 -> value", + "CALLS tests/test_rust_pattern_matching.py:0 -> method_calls", + "CALLS tests/test_rust_performance_optimization.py:0 -> method_calls", + "CALLS tests/test_rust_smart_pointers.py:0 -> method_calls", + "CALLS tests/test_rust_span_oracle.py:0 -> name", + "CALLS tests/test_rust_structs_enums.py:0 -> method_calls", + "CALLS tests/test_rust_structure_oracle.py:0 -> name", + "CALLS tests/test_rust_trait_method_containment.py:0 -> value", + "CALLS tests/test_rust_trait_objects.py:0 -> method_calls", + "CALLS tests/test_rust_traits_generics.py:0 -> method_calls", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> method_calls", + "CALLS tests/test_rust_utils.py:0 -> children", + "CALLS tests/test_rust_utils.py:0 -> name", + "CALLS tests/test_rust_utils.py:0 -> type", + "CALLS tests/test_rust_web_networking.py:0 -> method_calls", + "CALLS tests/test_semantic_search.py:0 -> name", + "CALLS tests/test_shell_command.py:0 -> description", + "CALLS tests/test_shell_command.py:0 -> name", + "CALLS tests/test_shell_command.py:0 -> project_root", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> parent", + "CALLS tests/test_slots_lazy_logger.py:0 -> ollama_endpoint", + "CALLS tests/test_stack_manager.py:0 -> compose_file", + "CALLS tests/test_stack_manager.py:0 -> value", + "CALLS tests/test_status_bar_config.py:0 -> active_cypher_config", + "CALLS tests/test_status_bar_config.py:0 -> active_orchestrator_config", + "CALLS tests/test_status_bar_config.py:0 -> stack_home", + "CALLS tests/test_status_bar_config.py:0 -> value", + "CALLS tests/test_structural_relationships.py:0 -> name", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> parent", + "CALLS tests/test_ts_closure_containment.py:0 -> value", + "CALLS tests/test_type_inference_iterative.py:0 -> java_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> js_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> lua_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> python_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> text", + "CALLS tests/test_typescript_classes.py:0 -> name", + "CALLS tests/test_typescript_containment_oracle.py:0 -> name", + "CALLS tests/test_typescript_containment_oracle.py:0 -> value", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> name", + "CALLS tests/test_typescript_implements_edges.py:0 -> value", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> name", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> value", + "CALLS tests/test_typescript_span_oracle.py:0 -> name", + "CALLS tests/test_typescript_structure_oracle.py:0 -> name", + "CALLS tests/test_typescript_types.py:0 -> name", + "CALLS tests/test_unixcoder_unit.py:0 -> _MockIngestor", + "CALLS tests/test_workspaces.py:0 -> description", + "CALLS tests/test_workspaces.py:0 -> name", + "CALLS tools/code_retrieval.py:0 -> project_root", + "CALLS tools/directory_lister.py:0 -> project_root", + "CALLS tools/file_editor.py:0 -> project_root", + "CALLS tools/file_editor.py:0 -> text", + "CALLS tools/file_reader.py:0 -> project_root", + "CALLS tools/file_writer.py:0 -> parent", + "CALLS tools/file_writer.py:0 -> project_root", + "CALLS tools/health_checker.py:0 -> name", + "CALLS tools/language.py:0 -> name", + "CALLS tools/shell_command.py:0 -> async_timing_decorator", + "CALLS tools/shell_command.py:0 -> parent", + "CALLS tools/shell_command.py:0 -> project_root", + "CALLS utils/fqn_resolver.py:0 -> _c_get_name", + "CALLS utils/fqn_resolver.py:0 -> _cpp_get_name", + "CALLS utils/fqn_resolver.py:0 -> _generic_file_to_module", + "CALLS utils/fqn_resolver.py:0 -> _generic_get_name", + "CALLS utils/fqn_resolver.py:0 -> _js_file_to_module", + "CALLS utils/fqn_resolver.py:0 -> _js_get_name", + "CALLS utils/fqn_resolver.py:0 -> _php_file_to_module", + "CALLS utils/fqn_resolver.py:0 -> _python_file_to_module", + "CALLS utils/fqn_resolver.py:0 -> _python_get_name", + "CALLS utils/fqn_resolver.py:0 -> _rust_file_to_module", + "CALLS utils/fqn_resolver.py:0 -> _rust_get_name", + "CALLS utils/fqn_resolver.py:0 -> children", + "CALLS utils/fqn_resolver.py:0 -> parent", + "CALLS utils/fqn_resolver.py:0 -> type", + "CALLS utils/path_utils.py:0 -> name", + "CALLS utils/path_utils.py:0 -> parent", + "CALLS utils/rich_markdown.py:0 -> text", + "CALLS utils/source_extraction.py:0 -> mock_ast_extractor", + "CALLS workspaces/cli.py:0 -> description", + "CALLS workspaces/cli.py:0 -> name", + "CALLS workspaces/storage.py:0 -> name", + "CALLS workspaces/storage.py:0 -> parent" + ] + }, + "retrieval:grep_name": { + "missing": [], + "extra": [ + "CALLS cgr_state.py:0 -> parent", + "CALLS cgr_state.py:0 -> read_sync_timestamps", + "CALLS cgr_state.py:0 -> record_sync", + "CALLS cgr_state.py:0 -> save", + "CALLS cli.py:0 -> CLICommandName", + "CALLS cli.py:0 -> Color", + "CALLS cli.py:0 -> DeadCodeFormat", + "CALLS cli.py:0 -> MCPTransport", + "CALLS cli.py:0 -> MemgraphIngestor", + "CALLS cli.py:0 -> ModelRole", + "CALLS cli.py:0 -> QueryFormat", + "CALLS cli.py:0 -> StackError", + "CALLS cli.py:0 -> StackState", + "CALLS cli.py:0 -> StyleModifier", + "CALLS cli.py:0 -> WorkspaceConfig", + "CALLS cli.py:0 -> WorkspaceError", + "CALLS cli.py:0 -> _global_options", + "CALLS cli.py:0 -> _sync_workspace", + "CALLS cli.py:0 -> _version_callback", + "CALLS cli.py:0 -> active_cypher_config", + "CALLS cli.py:0 -> active_orchestrator_config", + "CALLS cli.py:0 -> agent", + "CALLS cli.py:0 -> ask_agent", + "CALLS cli.py:0 -> cli", + "CALLS cli.py:0 -> compose_file", + "CALLS cli.py:0 -> daemon_command", + "CALLS cli.py:0 -> dead_code", + "CALLS cli.py:0 -> decorator", + "CALLS cli.py:0 -> dim", + "CALLS cli.py:0 -> doctor", + "CALLS cli.py:0 -> export", + "CALLS cli.py:0 -> graph_file", + "CALLS cli.py:0 -> graph_loader_command", + "CALLS cli.py:0 -> graph_service", + "CALLS cli.py:0 -> graph_updater", + "CALLS cli.py:0 -> index", + "CALLS cli.py:0 -> language_command", + "CALLS cli.py:0 -> load", + "CALLS cli.py:0 -> load_cgr_instructions", + "CALLS cli.py:0 -> logs", + "CALLS cli.py:0 -> main", + "CALLS cli.py:0 -> mcp_server", + "CALLS cli.py:0 -> metadata", + "CALLS cli.py:0 -> name", + "CALLS cli.py:0 -> nodes", + "CALLS cli.py:0 -> optimize", + "CALLS cli.py:0 -> relationships", + "CALLS cli.py:0 -> repo", + "CALLS cli.py:0 -> start", + "CALLS cli.py:0 -> stats", + "CALLS cli.py:0 -> status_command", + "CALLS cli.py:0 -> stop_command", + "CALLS cli.py:0 -> type", + "CALLS cli.py:0 -> updater", + "CALLS cli.py:0 -> value", + "CALLS cli.py:0 -> workspace_command", + "CALLS cli_help.py:0 -> CLICommandName", + "CALLS cli_help.py:0 -> add", + "CALLS cli_help.py:0 -> agent", + "CALLS cli_help.py:0 -> decorator", + "CALLS cli_help.py:0 -> derive_project_name", + "CALLS cli_help.py:0 -> description", + "CALLS cli_help.py:0 -> doctor", + "CALLS cli_help.py:0 -> down", + "CALLS cli_help.py:0 -> export", + "CALLS cli_help.py:0 -> index", + "CALLS cli_help.py:0 -> loader", + "CALLS cli_help.py:0 -> logs", + "CALLS cli_help.py:0 -> name", + "CALLS cli_help.py:0 -> nodes", + "CALLS cli_help.py:0 -> optimize", + "CALLS cli_help.py:0 -> relationships", + "CALLS cli_help.py:0 -> repo", + "CALLS cli_help.py:0 -> run", + "CALLS cli_help.py:0 -> start", + "CALLS cli_help.py:0 -> stats", + "CALLS cli_help.py:0 -> status", + "CALLS cli_help.py:0 -> summary", + "CALLS cli_help.py:0 -> type", + "CALLS cli_help.py:0 -> up", + "CALLS cli_help.py:0 -> value", + "CALLS config.py:0 -> ApiKeyInfoEntry", + "CALLS config.py:0 -> CppFrontend", + "CALLS config.py:0 -> GoogleProviderType", + "CALLS config.py:0 -> ModelRole", + "CALLS config.py:0 -> Provider", + "CALLS config.py:0 -> active_cypher_config", + "CALLS config.py:0 -> active_orchestrator_config", + "CALLS config.py:0 -> export", + "CALLS config.py:0 -> keys", + "CALLS config.py:0 -> load_cgr_instructions", + "CALLS config.py:0 -> load_cgrignore_patterns", + "CALLS config.py:0 -> logs", + "CALLS config.py:0 -> name", + "CALLS config.py:0 -> ollama_endpoint", + "CALLS config.py:0 -> parse_model_string", + "CALLS config.py:0 -> repo_path", + "CALLS config.py:0 -> resolve_batch_size", + "CALLS config.py:0 -> set_cypher", + "CALLS config.py:0 -> set_orchestrator", + "CALLS config.py:0 -> status", + "CALLS config.py:0 -> to_update_kwargs", + "CALLS config.py:0 -> validate_api_key", + "CALLS constants.py:0 -> Architecture", + "CALLS constants.py:0 -> Color", + "CALLS constants.py:0 -> CppFrontend", + "CALLS constants.py:0 -> CppNodeType", + "CALLS constants.py:0 -> DeadCodeFormat", + "CALLS constants.py:0 -> Dependency", + "CALLS constants.py:0 -> DiffMarker", + "CALLS constants.py:0 -> EventType", + "CALLS constants.py:0 -> FileAction", + "CALLS constants.py:0 -> GoogleProviderType", + "CALLS constants.py:0 -> KeyBinding", + "CALLS constants.py:0 -> LanguageStatus", + "CALLS constants.py:0 -> MCPEnvVar", + "CALLS constants.py:0 -> MCPParamName", + "CALLS constants.py:0 -> MCPSchemaField", + "CALLS constants.py:0 -> MCPSchemaType", + "CALLS constants.py:0 -> MCPToolName", + "CALLS constants.py:0 -> MCPTransport", + "CALLS constants.py:0 -> ModelConfig", + "CALLS constants.py:0 -> ModelRole", + "CALLS constants.py:0 -> NodeLabel", + "CALLS constants.py:0 -> PermissionMode", + "CALLS constants.py:0 -> Provider", + "CALLS constants.py:0 -> QueryFormat", + "CALLS constants.py:0 -> RelationshipType", + "CALLS constants.py:0 -> StyleModifier", + "CALLS constants.py:0 -> SupportedLanguage", + "CALLS constants.py:0 -> TreeSitterModule", + "CALLS constants.py:0 -> UniXcoderMode", + "CALLS constants.py:0 -> UniqueKeyType", + "CALLS constants.py:0 -> _c_get_name", + "CALLS constants.py:0 -> _cpp_get_name", + "CALLS constants.py:0 -> _js_get_name", + "CALLS constants.py:0 -> _rust_get_name", + "CALLS constants.py:0 -> add", + "CALLS constants.py:0 -> agent", + "CALLS constants.py:0 -> analyze", + "CALLS constants.py:0 -> ask_agent", + "CALLS constants.py:0 -> child_by_field_name", + "CALLS constants.py:0 -> cli", + "CALLS constants.py:0 -> count_tokens", + "CALLS constants.py:0 -> decode", + "CALLS constants.py:0 -> decorator", + "CALLS constants.py:0 -> delete_project", + "CALLS constants.py:0 -> description", + "CALLS constants.py:0 -> dim", + "CALLS constants.py:0 -> done", + "CALLS constants.py:0 -> execute", + "CALLS constants.py:0 -> export", + "CALLS constants.py:0 -> extractor", + "CALLS constants.py:0 -> find_with_prefix", + "CALLS constants.py:0 -> get", + "CALLS constants.py:0 -> get_code_snippet", + "CALLS constants.py:0 -> index", + "CALLS constants.py:0 -> index_repository", + "CALLS constants.py:0 -> is_exported", + "CALLS constants.py:0 -> list_directory", + "CALLS constants.py:0 -> list_projects", + "CALLS constants.py:0 -> load", + "CALLS constants.py:0 -> loader", + "CALLS constants.py:0 -> main", + "CALLS constants.py:0 -> metadata", + "CALLS constants.py:0 -> name", + "CALLS constants.py:0 -> nodes", + "CALLS constants.py:0 -> parent", + "CALLS constants.py:0 -> parse", + "CALLS constants.py:0 -> process", + "CALLS constants.py:0 -> processor", + "CALLS constants.py:0 -> put", + "CALLS constants.py:0 -> query_code_graph", + "CALLS constants.py:0 -> read", + "CALLS constants.py:0 -> read_file", + "CALLS constants.py:0 -> relationships", + "CALLS constants.py:0 -> repo", + "CALLS constants.py:0 -> run", + "CALLS constants.py:0 -> semantic_search", + "CALLS constants.py:0 -> start", + "CALLS constants.py:0 -> status", + "CALLS constants.py:0 -> style", + "CALLS constants.py:0 -> submit", + "CALLS constants.py:0 -> summary", + "CALLS constants.py:0 -> surgical_replace_code", + "CALLS constants.py:0 -> text", + "CALLS constants.py:0 -> type", + "CALLS constants.py:0 -> up", + "CALLS constants.py:0 -> update_repository", + "CALLS constants.py:0 -> value", + "CALLS constants.py:0 -> wipe_database", + "CALLS constants.py:0 -> write_file", + "CALLS cypher_queries.py:0 -> build_constraint_query", + "CALLS cypher_queries.py:0 -> build_create_node_query", + "CALLS cypher_queries.py:0 -> build_create_relationship_query", + "CALLS cypher_queries.py:0 -> build_dead_code_query", + "CALLS cypher_queries.py:0 -> build_index_query", + "CALLS cypher_queries.py:0 -> build_merge_node_query", + "CALLS cypher_queries.py:0 -> build_merge_relationship_query", + "CALLS cypher_queries.py:0 -> build_nodes_by_ids_query", + "CALLS cypher_queries.py:0 -> decorator", + "CALLS cypher_queries.py:0 -> forward", + "CALLS cypher_queries.py:0 -> is_exported", + "CALLS cypher_queries.py:0 -> name", + "CALLS cypher_queries.py:0 -> start", + "CALLS cypher_queries.py:0 -> type", + "CALLS cypher_queries.py:0 -> wrap_with_unwind", + "CALLS decorators.py:0 -> LoadableProtocol", + "CALLS decorators.py:0 -> PathValidatorProtocol", + "CALLS decorators.py:0 -> async_timing_decorator", + "CALLS decorators.py:0 -> decorator", + "CALLS decorators.py:0 -> ensure_loaded", + "CALLS decorators.py:0 -> log_operation", + "CALLS decorators.py:0 -> logs", + "CALLS decorators.py:0 -> mcp_try_except", + "CALLS decorators.py:0 -> name", + "CALLS decorators.py:0 -> project_root", + "CALLS decorators.py:0 -> recursion_guard", + "CALLS decorators.py:0 -> start", + "CALLS decorators.py:0 -> timing_decorator", + "CALLS decorators.py:0 -> type", + "CALLS decorators.py:0 -> validate_project_path", + "CALLS decorators.py:0 -> wrapper", + "CALLS embedder.py:0 -> clear_embedding_cache", + "CALLS embedder.py:0 -> embed_code", + "CALLS embedder.py:0 -> embed_code_batch", + "CALLS embedder.py:0 -> logs", + "CALLS embedder.py:0 -> parent", + "CALLS embedder.py:0 -> save", + "CALLS embedder.py:0 -> start", + "CALLS exceptions.py:0 -> CypherGenerator", + "CALLS exceptions.py:0 -> Dependency", + "CALLS exceptions.py:0 -> LLMGenerationError", + "CALLS exceptions.py:0 -> Provider", + "CALLS exceptions.py:0 -> generate", + "CALLS exceptions.py:0 -> load", + "CALLS exceptions.py:0 -> name", + "CALLS exceptions.py:0 -> read", + "CALLS graph_loader.py:0 -> GraphData", + "CALLS graph_loader.py:0 -> GraphMetadata", + "CALLS graph_loader.py:0 -> _ensure_loaded", + "CALLS graph_loader.py:0 -> ensure_loaded", + "CALLS graph_loader.py:0 -> find_node_by_property", + "CALLS graph_loader.py:0 -> find_nodes_by_label", + "CALLS graph_loader.py:0 -> get_node_by_id", + "CALLS graph_loader.py:0 -> get_relationships_for_node", + "CALLS graph_loader.py:0 -> index", + "CALLS graph_loader.py:0 -> load_graph", + "CALLS graph_loader.py:0 -> loader", + "CALLS graph_loader.py:0 -> logs", + "CALLS graph_loader.py:0 -> metadata", + "CALLS graph_loader.py:0 -> nodes", + "CALLS graph_loader.py:0 -> relationships", + "CALLS graph_loader.py:0 -> summary", + "CALLS graph_loader.py:0 -> type", + "CALLS graph_loader.py:0 -> value", + "CALLS graph_updater.py:0 -> CppFrontend", + "CALLS graph_updater.py:0 -> GraphUpdater", + "CALLS graph_updater.py:0 -> IngestorProtocol", + "CALLS graph_updater.py:0 -> LanguageQueries", + "CALLS graph_updater.py:0 -> NodeType", + "CALLS graph_updater.py:0 -> QueryProtocol", + "CALLS graph_updater.py:0 -> SupportedLanguage", + "CALLS graph_updater.py:0 -> ast_extractor_func", + "CALLS graph_updater.py:0 -> call_processor", + "CALLS graph_updater.py:0 -> callable_params", + "CALLS graph_updater.py:0 -> definition_processor", + "CALLS graph_updater.py:0 -> description", + "CALLS graph_updater.py:0 -> done", + "CALLS graph_updater.py:0 -> factory", + "CALLS graph_updater.py:0 -> find_ending_with", + "CALLS graph_updater.py:0 -> find_with_prefix", + "CALLS graph_updater.py:0 -> find_with_prefix_and_suffix", + "CALLS graph_updater.py:0 -> is_abstract", + "CALLS graph_updater.py:0 -> is_property", + "CALLS graph_updater.py:0 -> logs", + "CALLS graph_updater.py:0 -> mark_abstract", + "CALLS graph_updater.py:0 -> mark_callable_params", + "CALLS graph_updater.py:0 -> mark_property", + "CALLS graph_updater.py:0 -> name", + "CALLS graph_updater.py:0 -> nodes", + "CALLS graph_updater.py:0 -> parent", + "CALLS graph_updater.py:0 -> property_names", + "CALLS graph_updater.py:0 -> register_unique_qn", + "CALLS graph_updater.py:0 -> repo_path", + "CALLS graph_updater.py:0 -> run", + "CALLS graph_updater.py:0 -> structure_processor", + "CALLS graph_updater.py:0 -> type", + "CALLS graph_updater.py:0 -> type_inference", + "CALLS graph_updater.py:0 -> value", + "CALLS graph_updater.py:0 -> variants", + "CALLS graph_updater.py:0 -> verify_stored_ids", + "CALLS language_spec.py:0 -> CppNodeType", + "CALLS language_spec.py:0 -> SupportedLanguage", + "CALLS language_spec.py:0 -> _c_get_name", + "CALLS language_spec.py:0 -> _cpp_get_name", + "CALLS language_spec.py:0 -> _generic_file_to_module", + "CALLS language_spec.py:0 -> _js_file_to_module", + "CALLS language_spec.py:0 -> _js_get_name", + "CALLS language_spec.py:0 -> _php_file_to_module", + "CALLS language_spec.py:0 -> _python_file_to_module", + "CALLS language_spec.py:0 -> _python_get_name", + "CALLS language_spec.py:0 -> _rust_file_to_module", + "CALLS language_spec.py:0 -> _rust_get_name", + "CALLS language_spec.py:0 -> get_language_for_extension", + "CALLS language_spec.py:0 -> get_language_spec", + "CALLS language_spec.py:0 -> name", + "CALLS language_spec.py:0 -> parent", + "CALLS language_spec.py:0 -> text", + "CALLS language_spec.py:0 -> type", + "CALLS language_spec.py:0 -> walk", + "CALLS logs.py:0 -> CodeRetriever", + "CALLS logs.py:0 -> CypherGenerator", + "CALLS logs.py:0 -> Dependency", + "CALLS logs.py:0 -> FileEditor", + "CALLS logs.py:0 -> FileReader", + "CALLS logs.py:0 -> FileWriter", + "CALLS logs.py:0 -> ProtobufFileIngestor", + "CALLS logs.py:0 -> Provider", + "CALLS logs.py:0 -> ShellCommander", + "CALLS logs.py:0 -> analyze", + "CALLS logs.py:0 -> ask_agent", + "CALLS logs.py:0 -> children", + "CALLS logs.py:0 -> class_qn", + "CALLS logs.py:0 -> clear", + "CALLS logs.py:0 -> done", + "CALLS logs.py:0 -> down", + "CALLS logs.py:0 -> export", + "CALLS logs.py:0 -> flush", + "CALLS logs.py:0 -> function_qn", + "CALLS logs.py:0 -> generate", + "CALLS logs.py:0 -> get", + "CALLS logs.py:0 -> get_code_snippet", + "CALLS logs.py:0 -> index", + "CALLS logs.py:0 -> infer", + "CALLS logs.py:0 -> keys", + "CALLS logs.py:0 -> list_directory", + "CALLS logs.py:0 -> load", + "CALLS logs.py:0 -> loader", + "CALLS logs.py:0 -> logs", + "CALLS logs.py:0 -> method_qn", + "CALLS logs.py:0 -> module_qn", + "CALLS logs.py:0 -> name", + "CALLS logs.py:0 -> nodes", + "CALLS logs.py:0 -> parent", + "CALLS logs.py:0 -> parse", + "CALLS logs.py:0 -> process", + "CALLS logs.py:0 -> processor", + "CALLS logs.py:0 -> query_code_graph", + "CALLS logs.py:0 -> read", + "CALLS logs.py:0 -> read_file", + "CALLS logs.py:0 -> relationships", + "CALLS logs.py:0 -> run", + "CALLS logs.py:0 -> save", + "CALLS logs.py:0 -> semantic_search", + "CALLS logs.py:0 -> start", + "CALLS logs.py:0 -> summary", + "CALLS logs.py:0 -> surgical_replace_code", + "CALLS logs.py:0 -> text", + "CALLS logs.py:0 -> type", + "CALLS logs.py:0 -> value", + "CALLS logs.py:0 -> write_file", + "CALLS main.py:0 -> AgentLoopUI", + "CALLS main.py:0 -> Color", + "CALLS main.py:0 -> DiffMarker", + "CALLS main.py:0 -> GraphData", + "CALLS main.py:0 -> KeyBinding", + "CALLS main.py:0 -> ModelRole", + "CALLS main.py:0 -> Provider", + "CALLS main.py:0 -> QueryFormat", + "CALLS main.py:0 -> QueryProtocol", + "CALLS main.py:0 -> StyleModifier", + "CALLS main.py:0 -> _interrupt", + "CALLS main.py:0 -> _rich_log_sink", + "CALLS main.py:0 -> _toggle", + "CALLS main.py:0 -> active_cypher_config", + "CALLS main.py:0 -> active_orchestrator_config", + "CALLS main.py:0 -> create_file", + "CALLS main.py:0 -> directory_lister", + "CALLS main.py:0 -> export_graph_to_file", + "CALLS main.py:0 -> file_editor", + "CALLS main.py:0 -> file_reader", + "CALLS main.py:0 -> file_writer", + "CALLS main.py:0 -> get_multiline_input", + "CALLS main.py:0 -> graph_service", + "CALLS main.py:0 -> keyboard_interrupt", + "CALLS main.py:0 -> load_cgr_instructions", + "CALLS main.py:0 -> logs", + "CALLS main.py:0 -> main_async", + "CALLS main.py:0 -> main_optimize_async", + "CALLS main.py:0 -> main_single_query", + "CALLS main.py:0 -> metadata", + "CALLS main.py:0 -> name", + "CALLS main.py:0 -> new_line", + "CALLS main.py:0 -> nodes", + "CALLS main.py:0 -> ollama_endpoint", + "CALLS main.py:0 -> on_input", + "CALLS main.py:0 -> parent", + "CALLS main.py:0 -> project_root", + "CALLS main.py:0 -> prompt_for_unignored_directories", + "CALLS main.py:0 -> provider_name", + "CALLS main.py:0 -> rag_agent", + "CALLS main.py:0 -> relationships", + "CALLS main.py:0 -> repo", + "CALLS main.py:0 -> repo_path", + "CALLS main.py:0 -> semantic_search", + "CALLS main.py:0 -> set_cypher", + "CALLS main.py:0 -> set_orchestrator", + "CALLS main.py:0 -> shell_commander", + "CALLS main.py:0 -> status", + "CALLS main.py:0 -> submit", + "CALLS main.py:0 -> submit_ctrl_e", + "CALLS main.py:0 -> text", + "CALLS main.py:0 -> toggle_permission_mode", + "CALLS main.py:0 -> value", + "CALLS mcp/__init__.py:0 -> serve_http", + "CALLS mcp/__init__.py:0 -> serve_stdio", + "CALLS mcp/client.py:0 -> MCPParamName", + "CALLS mcp/client.py:0 -> MCPToolName", + "CALLS mcp/client.py:0 -> agent", + "CALLS mcp/client.py:0 -> cli", + "CALLS mcp/client.py:0 -> main", + "CALLS mcp/client.py:0 -> read", + "CALLS mcp/client.py:0 -> text", + "CALLS mcp/server.py:0 -> MCPEnvVar", + "CALLS mcp/server.py:0 -> description", + "CALLS mcp/server.py:0 -> graph_service", + "CALLS mcp/server.py:0 -> lifespan", + "CALLS mcp/server.py:0 -> logs", + "CALLS mcp/server.py:0 -> name", + "CALLS mcp/server.py:0 -> project_root", + "CALLS mcp/server.py:0 -> repo_path", + "CALLS mcp/server.py:0 -> serve_http", + "CALLS mcp/server.py:0 -> text", + "CALLS mcp/server.py:0 -> type", + "CALLS mcp/tools.py:0 -> CypherGenerator", + "CALLS mcp/tools.py:0 -> MCPParamName", + "CALLS mcp/tools.py:0 -> MCPSchemaType", + "CALLS mcp/tools.py:0 -> MCPToolName", + "CALLS mcp/tools.py:0 -> MemgraphIngestor", + "CALLS mcp/tools.py:0 -> _delete_project_sync", + "CALLS mcp/tools.py:0 -> _index_repository_sync", + "CALLS mcp/tools.py:0 -> _update_repository_sync", + "CALLS mcp/tools.py:0 -> agent", + "CALLS mcp/tools.py:0 -> ask_agent", + "CALLS mcp/tools.py:0 -> clean_database", + "CALLS mcp/tools.py:0 -> create_mcp_tools_registry", + "CALLS mcp/tools.py:0 -> description", + "CALLS mcp/tools.py:0 -> directory_lister", + "CALLS mcp/tools.py:0 -> file_editor", + "CALLS mcp/tools.py:0 -> file_reader", + "CALLS mcp/tools.py:0 -> file_writer", + "CALLS mcp/tools.py:0 -> get_code_snippet", + "CALLS mcp/tools.py:0 -> get_tool_handler", + "CALLS mcp/tools.py:0 -> get_tool_schemas", + "CALLS mcp/tools.py:0 -> graph_service", + "CALLS mcp/tools.py:0 -> graph_updater", + "CALLS mcp/tools.py:0 -> handler", + "CALLS mcp/tools.py:0 -> index_repository", + "CALLS mcp/tools.py:0 -> list_directory", + "CALLS mcp/tools.py:0 -> logs", + "CALLS mcp/tools.py:0 -> metadata", + "CALLS mcp/tools.py:0 -> name", + "CALLS mcp/tools.py:0 -> project_root", + "CALLS mcp/tools.py:0 -> query_code_graph", + "CALLS mcp/tools.py:0 -> rag_agent", + "CALLS mcp/tools.py:0 -> read_file", + "CALLS mcp/tools.py:0 -> repo_path", + "CALLS mcp/tools.py:0 -> semantic_search", + "CALLS mcp/tools.py:0 -> shell_commander", + "CALLS mcp/tools.py:0 -> start", + "CALLS mcp/tools.py:0 -> summary", + "CALLS mcp/tools.py:0 -> surgical_replace_code", + "CALLS mcp/tools.py:0 -> type", + "CALLS mcp/tools.py:0 -> update_repository", + "CALLS mcp/tools.py:0 -> updater", + "CALLS mcp/tools.py:0 -> value", + "CALLS mcp/tools.py:0 -> wipe_database", + "CALLS mcp/tools.py:0 -> write_file", + "CALLS models.py:0 -> AppContext", + "CALLS models.py:0 -> Dependency", + "CALLS models.py:0 -> FQNSpec", + "CALLS models.py:0 -> GraphNode", + "CALLS models.py:0 -> GraphRelationship", + "CALLS models.py:0 -> LanguageSpec", + "CALLS models.py:0 -> MCPInputSchema", + "CALLS models.py:0 -> MethodModifiersAndAnnotations", + "CALLS models.py:0 -> PermissionMode", + "CALLS models.py:0 -> SessionState", + "CALLS models.py:0 -> SupportedLanguage", + "CALLS models.py:0 -> ToolMetadata", + "CALLS models.py:0 -> _default_console", + "CALLS models.py:0 -> cycle_permission_mode", + "CALLS models.py:0 -> description", + "CALLS models.py:0 -> handler", + "CALLS models.py:0 -> is_yolo", + "CALLS models.py:0 -> load_cgr_instructions", + "CALLS models.py:0 -> name", + "CALLS models.py:0 -> reset_cancelled", + "CALLS models.py:0 -> type", + "CALLS parser_loader.py:0 -> LanguageSpec", + "CALLS parser_loader.py:0 -> TreeSitterModule", + "CALLS parser_loader.py:0 -> load_parsers", + "CALLS parser_loader.py:0 -> loader", + "CALLS parser_loader.py:0 -> logs", + "CALLS parser_loader.py:0 -> text", + "CALLS parsers/__init__.py:0 -> CallProcessor", + "CALLS parsers/__init__.py:0 -> DefinitionProcessor", + "CALLS parsers/__init__.py:0 -> ImportProcessor", + "CALLS parsers/__init__.py:0 -> ProcessorFactory", + "CALLS parsers/__init__.py:0 -> StdlibExtractor", + "CALLS parsers/__init__.py:0 -> StructureProcessor", + "CALLS parsers/__init__.py:0 -> TypeInferenceEngine", + "CALLS parsers/__init__.py:0 -> call_processor", + "CALLS parsers/__init__.py:0 -> definition_processor", + "CALLS parsers/__init__.py:0 -> factory", + "CALLS parsers/__init__.py:0 -> import_processor", + "CALLS parsers/__init__.py:0 -> structure_processor", + "CALLS parsers/__init__.py:0 -> type_inference", + "CALLS parsers/call_processor.py:0 -> CallProcessor", + "CALLS parsers/call_processor.py:0 -> CppNodeType", + "CALLS parsers/call_processor.py:0 -> FQNSpec", + "CALLS parsers/call_processor.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/call_processor.py:0 -> ImportProcessor", + "CALLS parsers/call_processor.py:0 -> IngestorProtocol", + "CALLS parsers/call_processor.py:0 -> LanguageQueries", + "CALLS parsers/call_processor.py:0 -> LanguageSpec", + "CALLS parsers/call_processor.py:0 -> NodeLabel", + "CALLS parsers/call_processor.py:0 -> RelationshipType", + "CALLS parsers/call_processor.py:0 -> SupportedLanguage", + "CALLS parsers/call_processor.py:0 -> TypeInferenceEngine", + "CALLS parsers/call_processor.py:0 -> _python_get_name", + "CALLS parsers/call_processor.py:0 -> call_resolver", + "CALLS parsers/call_processor.py:0 -> children", + "CALLS parsers/call_processor.py:0 -> class_qn", + "CALLS parsers/call_processor.py:0 -> collect_callable_field_bindings", + "CALLS parsers/call_processor.py:0 -> decorator", + "CALLS parsers/call_processor.py:0 -> extractor", + "CALLS parsers/call_processor.py:0 -> finalize_callable_param_flow", + "CALLS parsers/call_processor.py:0 -> import_processor", + "CALLS parsers/call_processor.py:0 -> index", + "CALLS parsers/call_processor.py:0 -> load", + "CALLS parsers/call_processor.py:0 -> logs", + "CALLS parsers/call_processor.py:0 -> method_qn", + "CALLS parsers/call_processor.py:0 -> module_qn", + "CALLS parsers/call_processor.py:0 -> name", + "CALLS parsers/call_processor.py:0 -> nodes", + "CALLS parsers/call_processor.py:0 -> parent", + "CALLS parsers/call_processor.py:0 -> process_calls_in_file", + "CALLS parsers/call_processor.py:0 -> read", + "CALLS parsers/call_processor.py:0 -> repo_path", + "CALLS parsers/call_processor.py:0 -> resolve_builtin_call", + "CALLS parsers/call_processor.py:0 -> resolve_cpp_operator_call", + "CALLS parsers/call_processor.py:0 -> run", + "CALLS parsers/call_processor.py:0 -> start", + "CALLS parsers/call_processor.py:0 -> text", + "CALLS parsers/call_processor.py:0 -> type_inference", + "CALLS parsers/call_processor.py:0 -> value", + "CALLS parsers/call_processor.py:0 -> walk", + "CALLS parsers/call_resolver.py:0 -> CallResolver", + "CALLS parsers/call_resolver.py:0 -> ClassIngestMixin", + "CALLS parsers/call_resolver.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/call_resolver.py:0 -> ImportProcessor", + "CALLS parsers/call_resolver.py:0 -> NodeLabel", + "CALLS parsers/call_resolver.py:0 -> NodeType", + "CALLS parsers/call_resolver.py:0 -> TypeInferenceEngine", + "CALLS parsers/call_resolver.py:0 -> _calculate_import_distance", + "CALLS parsers/call_resolver.py:0 -> _get_separator", + "CALLS parsers/call_resolver.py:0 -> _has_separator", + "CALLS parsers/call_resolver.py:0 -> callable_field_targets", + "CALLS parsers/call_resolver.py:0 -> class_qn", + "CALLS parsers/call_resolver.py:0 -> export", + "CALLS parsers/call_resolver.py:0 -> import_processor", + "CALLS parsers/call_resolver.py:0 -> java_type_inference", + "CALLS parsers/call_resolver.py:0 -> logs", + "CALLS parsers/call_resolver.py:0 -> method_calls", + "CALLS parsers/call_resolver.py:0 -> method_qn", + "CALLS parsers/call_resolver.py:0 -> mixin", + "CALLS parsers/call_resolver.py:0 -> module_qn", + "CALLS parsers/call_resolver.py:0 -> name", + "CALLS parsers/call_resolver.py:0 -> operator_dunder_targets", + "CALLS parsers/call_resolver.py:0 -> protocol_dispatch_targets", + "CALLS parsers/call_resolver.py:0 -> python_type_inference", + "CALLS parsers/call_resolver.py:0 -> record_callable_field_binding", + "CALLS parsers/call_resolver.py:0 -> resolve_builtin_call", + "CALLS parsers/call_resolver.py:0 -> resolve_cpp_operator_call", + "CALLS parsers/call_resolver.py:0 -> resolve_function_call", + "CALLS parsers/call_resolver.py:0 -> text", + "CALLS parsers/call_resolver.py:0 -> type", + "CALLS parsers/call_resolver.py:0 -> type_inference", + "CALLS parsers/class_ingest/__init__.py:0 -> ClassIngestMixin", + "CALLS parsers/class_ingest/__init__.py:0 -> mixin", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> CppNodeType", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> IngestorProtocol", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> NodeLabel", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> RelationshipType", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> children", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> find_cpp_exported_classes", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> ingest_cpp_module_declarations", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> logs", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> module_qn", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> repo_path", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> text", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> type", + "CALLS parsers/class_ingest/identity.py:0 -> CppNodeType", + "CALLS parsers/class_ingest/identity.py:0 -> LanguageSpec", + "CALLS parsers/class_ingest/identity.py:0 -> SupportedLanguage", + "CALLS parsers/class_ingest/identity.py:0 -> children", + "CALLS parsers/class_ingest/identity.py:0 -> class_qn", + "CALLS parsers/class_ingest/identity.py:0 -> module_qn", + "CALLS parsers/class_ingest/identity.py:0 -> name", + "CALLS parsers/class_ingest/identity.py:0 -> parent", + "CALLS parsers/class_ingest/identity.py:0 -> resolve_class_identity", + "CALLS parsers/class_ingest/identity.py:0 -> text", + "CALLS parsers/class_ingest/identity.py:0 -> type", + "CALLS parsers/class_ingest/method_override.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/class_ingest/method_override.py:0 -> IngestorProtocol", + "CALLS parsers/class_ingest/method_override.py:0 -> NodeLabel", + "CALLS parsers/class_ingest/method_override.py:0 -> NodeType", + "CALLS parsers/class_ingest/method_override.py:0 -> RelationshipType", + "CALLS parsers/class_ingest/method_override.py:0 -> class_qn", + "CALLS parsers/class_ingest/method_override.py:0 -> logs", + "CALLS parsers/class_ingest/method_override.py:0 -> method_qn", + "CALLS parsers/class_ingest/method_override.py:0 -> process_all_method_overrides", + "CALLS parsers/class_ingest/mixin.py:0 -> ClassIngestMixin", + "CALLS parsers/class_ingest/mixin.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/class_ingest/mixin.py:0 -> ImportProcessor", + "CALLS parsers/class_ingest/mixin.py:0 -> IngestorProtocol", + "CALLS parsers/class_ingest/mixin.py:0 -> LanguageQueries", + "CALLS parsers/class_ingest/mixin.py:0 -> LanguageSpec", + "CALLS parsers/class_ingest/mixin.py:0 -> RelationshipType", + "CALLS parsers/class_ingest/mixin.py:0 -> SupportedLanguage", + "CALLS parsers/class_ingest/mixin.py:0 -> _extract_cpp_base_class_name", + "CALLS parsers/class_ingest/mixin.py:0 -> _get_node_type_for_inheritance", + "CALLS parsers/class_ingest/mixin.py:0 -> _ingest_classes_and_methods", + "CALLS parsers/class_ingest/mixin.py:0 -> _ingest_cpp_module_declarations", + "CALLS parsers/class_ingest/mixin.py:0 -> class_qn", + "CALLS parsers/class_ingest/mixin.py:0 -> import_processor", + "CALLS parsers/class_ingest/mixin.py:0 -> is_exported", + "CALLS parsers/class_ingest/mixin.py:0 -> logs", + "CALLS parsers/class_ingest/mixin.py:0 -> module_qn", + "CALLS parsers/class_ingest/mixin.py:0 -> name", + "CALLS parsers/class_ingest/mixin.py:0 -> parent", + "CALLS parsers/class_ingest/mixin.py:0 -> relationships", + "CALLS parsers/class_ingest/mixin.py:0 -> repo_path", + "CALLS parsers/class_ingest/mixin.py:0 -> text", + "CALLS parsers/class_ingest/mixin.py:0 -> type", + "CALLS parsers/class_ingest/mixin.py:0 -> value", + "CALLS parsers/class_ingest/node_type.py:0 -> CppNodeType", + "CALLS parsers/class_ingest/node_type.py:0 -> NodeType", + "CALLS parsers/class_ingest/node_type.py:0 -> SupportedLanguage", + "CALLS parsers/class_ingest/node_type.py:0 -> children", + "CALLS parsers/class_ingest/node_type.py:0 -> class_qn", + "CALLS parsers/class_ingest/node_type.py:0 -> determine_node_type", + "CALLS parsers/class_ingest/node_type.py:0 -> logs", + "CALLS parsers/class_ingest/node_type.py:0 -> name", + "CALLS parsers/class_ingest/node_type.py:0 -> text", + "CALLS parsers/class_ingest/node_type.py:0 -> type", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> CppNodeType", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> ImportProcessor", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> children", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> extract_implemented_interfaces", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> extract_parent_classes", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> import_processor", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> logs", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> module_qn", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> name", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> nodes", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> text", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> type", + "CALLS parsers/class_ingest/relationships.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/class_ingest/relationships.py:0 -> ImportProcessor", + "CALLS parsers/class_ingest/relationships.py:0 -> IngestorProtocol", + "CALLS parsers/class_ingest/relationships.py:0 -> NodeLabel", + "CALLS parsers/class_ingest/relationships.py:0 -> NodeType", + "CALLS parsers/class_ingest/relationships.py:0 -> RelationshipType", + "CALLS parsers/class_ingest/relationships.py:0 -> SupportedLanguage", + "CALLS parsers/class_ingest/relationships.py:0 -> class_qn", + "CALLS parsers/class_ingest/relationships.py:0 -> create_class_relationships", + "CALLS parsers/class_ingest/relationships.py:0 -> import_processor", + "CALLS parsers/class_ingest/relationships.py:0 -> is_exported", + "CALLS parsers/class_ingest/relationships.py:0 -> module_qn", + "CALLS parsers/class_ingest/relationships.py:0 -> type", + "CALLS parsers/class_ingest/utils.py:0 -> children", + "CALLS parsers/class_ingest/utils.py:0 -> decode_node_stripped", + "CALLS parsers/class_ingest/utils.py:0 -> find_child_by_type", + "CALLS parsers/class_ingest/utils.py:0 -> text", + "CALLS parsers/class_ingest/utils.py:0 -> type", + "CALLS parsers/cpp/utils.py:0 -> CppNodeType", + "CALLS parsers/cpp/utils.py:0 -> add", + "CALLS parsers/cpp/utils.py:0 -> build_qualified_name", + "CALLS parsers/cpp/utils.py:0 -> children", + "CALLS parsers/cpp/utils.py:0 -> extract_class_name_from_out_of_class_method", + "CALLS parsers/cpp/utils.py:0 -> extract_exported_class_name", + "CALLS parsers/cpp/utils.py:0 -> is_exported", + "CALLS parsers/cpp/utils.py:0 -> is_out_of_class_method_definition", + "CALLS parsers/cpp/utils.py:0 -> module_qn", + "CALLS parsers/cpp/utils.py:0 -> name", + "CALLS parsers/cpp/utils.py:0 -> parent", + "CALLS parsers/cpp/utils.py:0 -> text", + "CALLS parsers/cpp/utils.py:0 -> type", + "CALLS parsers/cpp_frontend/__init__.py:0 -> CppQnResolver", + "CALLS parsers/cpp_frontend/__init__.py:0 -> build_module_qn_map", + "CALLS parsers/cpp_frontend/__init__.py:0 -> cpp_frontend_available", + "CALLS parsers/cpp_frontend/__init__.py:0 -> find_compile_commands", + "CALLS parsers/cpp_frontend/__init__.py:0 -> run_cpp_frontend", + "CALLS parsers/cpp_frontend/constants.py:0 -> NodeLabel", + "CALLS parsers/cpp_frontend/constants.py:0 -> name", + "CALLS parsers/cpp_frontend/constants.py:0 -> parent", + "CALLS parsers/cpp_frontend/constants.py:0 -> type", + "CALLS parsers/cpp_frontend/constants.py:0 -> value", + "CALLS parsers/cpp_frontend/frontend.py:0 -> DefinitionProcessor", + "CALLS parsers/cpp_frontend/frontend.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/cpp_frontend/frontend.py:0 -> GraphUpdater", + "CALLS parsers/cpp_frontend/frontend.py:0 -> IngestorProtocol", + "CALLS parsers/cpp_frontend/frontend.py:0 -> NodeLabel", + "CALLS parsers/cpp_frontend/frontend.py:0 -> RelationshipType", + "CALLS parsers/cpp_frontend/frontend.py:0 -> cpp_frontend_available", + "CALLS parsers/cpp_frontend/frontend.py:0 -> find_compile_commands", + "CALLS parsers/cpp_frontend/frontend.py:0 -> forward", + "CALLS parsers/cpp_frontend/frontend.py:0 -> index", + "CALLS parsers/cpp_frontend/frontend.py:0 -> name", + "CALLS parsers/cpp_frontend/frontend.py:0 -> nodes", + "CALLS parsers/cpp_frontend/frontend.py:0 -> parent", + "CALLS parsers/cpp_frontend/frontend.py:0 -> repo", + "CALLS parsers/cpp_frontend/frontend.py:0 -> repo_path", + "CALLS parsers/cpp_frontend/frontend.py:0 -> run_cpp_frontend", + "CALLS parsers/cpp_frontend/frontend.py:0 -> start", + "CALLS parsers/cpp_frontend/frontend.py:0 -> type", + "CALLS parsers/cpp_frontend/frontend.py:0 -> up", + "CALLS parsers/cpp_frontend/qn.py:0 -> CppQnResolver", + "CALLS parsers/cpp_frontend/qn.py:0 -> DefinitionProcessor", + "CALLS parsers/cpp_frontend/qn.py:0 -> GraphUpdater", + "CALLS parsers/cpp_frontend/qn.py:0 -> _collect_eligible_files", + "CALLS parsers/cpp_frontend/qn.py:0 -> _disambiguate_module_qn", + "CALLS parsers/cpp_frontend/qn.py:0 -> _module_qn", + "CALLS parsers/cpp_frontend/qn.py:0 -> build_qualified_name", + "CALLS parsers/cpp_frontend/qn.py:0 -> down", + "CALLS parsers/cpp_frontend/qn.py:0 -> extract_destructor_name", + "CALLS parsers/cpp_frontend/qn.py:0 -> extract_operator_name", + "CALLS parsers/cpp_frontend/qn.py:0 -> function_qn", + "CALLS parsers/cpp_frontend/qn.py:0 -> keys", + "CALLS parsers/cpp_frontend/qn.py:0 -> method_qn", + "CALLS parsers/cpp_frontend/qn.py:0 -> name", + "CALLS parsers/cpp_frontend/qn.py:0 -> parent", + "CALLS parsers/cpp_frontend/qn.py:0 -> repo_path", + "CALLS parsers/cpp_frontend/qn.py:0 -> type_qn", + "CALLS parsers/definition_processor.py:0 -> ClassIngestMixin", + "CALLS parsers/definition_processor.py:0 -> DefinitionProcessor", + "CALLS parsers/definition_processor.py:0 -> FunctionIngestMixin", + "CALLS parsers/definition_processor.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/definition_processor.py:0 -> ImportProcessor", + "CALLS parsers/definition_processor.py:0 -> IngestorProtocol", + "CALLS parsers/definition_processor.py:0 -> JsTsIngestMixin", + "CALLS parsers/definition_processor.py:0 -> LanguageHandler", + "CALLS parsers/definition_processor.py:0 -> LanguageQueries", + "CALLS parsers/definition_processor.py:0 -> NodeLabel", + "CALLS parsers/definition_processor.py:0 -> RelationshipType", + "CALLS parsers/definition_processor.py:0 -> SupportedLanguage", + "CALLS parsers/definition_processor.py:0 -> _extract_decorators", + "CALLS parsers/definition_processor.py:0 -> _get_docstring", + "CALLS parsers/definition_processor.py:0 -> children", + "CALLS parsers/definition_processor.py:0 -> import_processor", + "CALLS parsers/definition_processor.py:0 -> logs", + "CALLS parsers/definition_processor.py:0 -> module_qn", + "CALLS parsers/definition_processor.py:0 -> name", + "CALLS parsers/definition_processor.py:0 -> nodes", + "CALLS parsers/definition_processor.py:0 -> parent", + "CALLS parsers/definition_processor.py:0 -> process_dependencies", + "CALLS parsers/definition_processor.py:0 -> process_file", + "CALLS parsers/definition_processor.py:0 -> repo_path", + "CALLS parsers/definition_processor.py:0 -> text", + "CALLS parsers/definition_processor.py:0 -> type", + "CALLS parsers/dependency_parser.py:0 -> DependencyParser", + "CALLS parsers/dependency_parser.py:0 -> logs", + "CALLS parsers/dependency_parser.py:0 -> name", + "CALLS parsers/dependency_parser.py:0 -> parse_dependencies", + "CALLS parsers/factory.py:0 -> ASTCacheProtocol", + "CALLS parsers/factory.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/factory.py:0 -> IngestorProtocol", + "CALLS parsers/factory.py:0 -> LanguageQueries", + "CALLS parsers/factory.py:0 -> ProcessorFactory", + "CALLS parsers/factory.py:0 -> SupportedLanguage", + "CALLS parsers/factory.py:0 -> call_processor", + "CALLS parsers/factory.py:0 -> definition_processor", + "CALLS parsers/factory.py:0 -> import_processor", + "CALLS parsers/factory.py:0 -> repo_path", + "CALLS parsers/factory.py:0 -> structure_processor", + "CALLS parsers/factory.py:0 -> type_inference", + "CALLS parsers/function_ingest.py:0 -> FunctionIngestMixin", + "CALLS parsers/function_ingest.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/function_ingest.py:0 -> IngestorProtocol", + "CALLS parsers/function_ingest.py:0 -> LanguageHandler", + "CALLS parsers/function_ingest.py:0 -> LanguageQueries", + "CALLS parsers/function_ingest.py:0 -> LanguageSpec", + "CALLS parsers/function_ingest.py:0 -> NodeType", + "CALLS parsers/function_ingest.py:0 -> RelationshipType", + "CALLS parsers/function_ingest.py:0 -> SupportedLanguage", + "CALLS parsers/function_ingest.py:0 -> _ingest_all_functions", + "CALLS parsers/function_ingest.py:0 -> children", + "CALLS parsers/function_ingest.py:0 -> class_qn", + "CALLS parsers/function_ingest.py:0 -> logs", + "CALLS parsers/function_ingest.py:0 -> method_qn", + "CALLS parsers/function_ingest.py:0 -> module_qn", + "CALLS parsers/function_ingest.py:0 -> name", + "CALLS parsers/function_ingest.py:0 -> parent", + "CALLS parsers/function_ingest.py:0 -> repo_path", + "CALLS parsers/function_ingest.py:0 -> resolve_deferred_cpp_methods", + "CALLS parsers/function_ingest.py:0 -> resolve_deferred_go_methods", + "CALLS parsers/function_ingest.py:0 -> text", + "CALLS parsers/function_ingest.py:0 -> up", + "CALLS parsers/function_ingest.py:0 -> value", + "CALLS parsers/go/__init__.py:0 -> extract_receiver_type_name", + "CALLS parsers/go/__init__.py:0 -> is_receiver_method", + "CALLS parsers/go/utils.py:0 -> children", + "CALLS parsers/go/utils.py:0 -> down", + "CALLS parsers/go/utils.py:0 -> extract_receiver_type_name", + "CALLS parsers/go/utils.py:0 -> is_receiver_method", + "CALLS parsers/go/utils.py:0 -> name", + "CALLS parsers/go/utils.py:0 -> text", + "CALLS parsers/go/utils.py:0 -> type", + "CALLS parsers/handlers/__init__.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/__init__.py:0 -> LanguageHandler", + "CALLS parsers/handlers/__init__.py:0 -> get_handler", + "CALLS parsers/handlers/base.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/base.py:0 -> LanguageSpec", + "CALLS parsers/handlers/base.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/base.py:0 -> build_method_qualified_name", + "CALLS parsers/handlers/base.py:0 -> build_nested_function_qn", + "CALLS parsers/handlers/base.py:0 -> class_qn", + "CALLS parsers/handlers/base.py:0 -> extract_base_class_name", + "CALLS parsers/handlers/base.py:0 -> extract_decorators", + "CALLS parsers/handlers/base.py:0 -> extract_function_name", + "CALLS parsers/handlers/base.py:0 -> extract_impl_target", + "CALLS parsers/handlers/base.py:0 -> is_class_method", + "CALLS parsers/handlers/base.py:0 -> is_export_inside_function", + "CALLS parsers/handlers/base.py:0 -> is_function_exported", + "CALLS parsers/handlers/base.py:0 -> is_inside_method_with_object_literals", + "CALLS parsers/handlers/base.py:0 -> module_qn", + "CALLS parsers/handlers/base.py:0 -> name", + "CALLS parsers/handlers/base.py:0 -> parent", + "CALLS parsers/handlers/base.py:0 -> repo_path", + "CALLS parsers/handlers/base.py:0 -> should_process_as_impl_block", + "CALLS parsers/handlers/base.py:0 -> text", + "CALLS parsers/handlers/base.py:0 -> type", + "CALLS parsers/handlers/cpp.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/cpp.py:0 -> CppHandler", + "CALLS parsers/handlers/cpp.py:0 -> LanguageSpec", + "CALLS parsers/handlers/cpp.py:0 -> SupportedLanguage", + "CALLS parsers/handlers/cpp.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/cpp.py:0 -> extract_base_class_name", + "CALLS parsers/handlers/cpp.py:0 -> is_function_exported", + "CALLS parsers/handlers/cpp.py:0 -> module_qn", + "CALLS parsers/handlers/cpp.py:0 -> repo_path", + "CALLS parsers/handlers/cpp.py:0 -> text", + "CALLS parsers/handlers/cpp.py:0 -> type", + "CALLS parsers/handlers/java.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/java.py:0 -> JavaHandler", + "CALLS parsers/handlers/java.py:0 -> build_method_qualified_name", + "CALLS parsers/handlers/java.py:0 -> class_qn", + "CALLS parsers/handlers/java.py:0 -> extract_decorators", + "CALLS parsers/handlers/js_ts.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/js_ts.py:0 -> JsTsHandler", + "CALLS parsers/handlers/js_ts.py:0 -> LanguageSpec", + "CALLS parsers/handlers/js_ts.py:0 -> build_nested_function_qn", + "CALLS parsers/handlers/js_ts.py:0 -> children", + "CALLS parsers/handlers/js_ts.py:0 -> extract_decorators", + "CALLS parsers/handlers/js_ts.py:0 -> is_class_method", + "CALLS parsers/handlers/js_ts.py:0 -> is_export_inside_function", + "CALLS parsers/handlers/js_ts.py:0 -> module_qn", + "CALLS parsers/handlers/js_ts.py:0 -> name", + "CALLS parsers/handlers/js_ts.py:0 -> parent", + "CALLS parsers/handlers/js_ts.py:0 -> text", + "CALLS parsers/handlers/js_ts.py:0 -> type", + "CALLS parsers/handlers/lua.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/lua.py:0 -> LuaHandler", + "CALLS parsers/handlers/lua.py:0 -> extract_function_name", + "CALLS parsers/handlers/lua.py:0 -> text", + "CALLS parsers/handlers/lua.py:0 -> type", + "CALLS parsers/handlers/php.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/php.py:0 -> PhpHandler", + "CALLS parsers/handlers/php.py:0 -> children", + "CALLS parsers/handlers/php.py:0 -> extract_decorators", + "CALLS parsers/handlers/php.py:0 -> extract_function_name", + "CALLS parsers/handlers/php.py:0 -> is_class_method", + "CALLS parsers/handlers/php.py:0 -> is_function_exported", + "CALLS parsers/handlers/php.py:0 -> parent", + "CALLS parsers/handlers/php.py:0 -> text", + "CALLS parsers/handlers/php.py:0 -> type", + "CALLS parsers/handlers/protocol.py:0 -> LanguageHandler", + "CALLS parsers/handlers/protocol.py:0 -> LanguageSpec", + "CALLS parsers/handlers/protocol.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/protocol.py:0 -> build_method_qualified_name", + "CALLS parsers/handlers/protocol.py:0 -> build_nested_function_qn", + "CALLS parsers/handlers/protocol.py:0 -> class_qn", + "CALLS parsers/handlers/protocol.py:0 -> extract_base_class_name", + "CALLS parsers/handlers/protocol.py:0 -> extract_decorators", + "CALLS parsers/handlers/protocol.py:0 -> extract_function_name", + "CALLS parsers/handlers/protocol.py:0 -> extract_impl_target", + "CALLS parsers/handlers/protocol.py:0 -> is_class_method", + "CALLS parsers/handlers/protocol.py:0 -> is_export_inside_function", + "CALLS parsers/handlers/protocol.py:0 -> is_function_exported", + "CALLS parsers/handlers/protocol.py:0 -> is_inside_method_with_object_literals", + "CALLS parsers/handlers/protocol.py:0 -> module_qn", + "CALLS parsers/handlers/protocol.py:0 -> repo_path", + "CALLS parsers/handlers/protocol.py:0 -> should_process_as_impl_block", + "CALLS parsers/handlers/python.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/python.py:0 -> PythonHandler", + "CALLS parsers/handlers/python.py:0 -> children", + "CALLS parsers/handlers/python.py:0 -> extract_decorators", + "CALLS parsers/handlers/python.py:0 -> parent", + "CALLS parsers/handlers/python.py:0 -> type", + "CALLS parsers/handlers/registry.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/registry.py:0 -> CppHandler", + "CALLS parsers/handlers/registry.py:0 -> JavaHandler", + "CALLS parsers/handlers/registry.py:0 -> JsTsHandler", + "CALLS parsers/handlers/registry.py:0 -> LanguageHandler", + "CALLS parsers/handlers/registry.py:0 -> LuaHandler", + "CALLS parsers/handlers/registry.py:0 -> PhpHandler", + "CALLS parsers/handlers/registry.py:0 -> PythonHandler", + "CALLS parsers/handlers/registry.py:0 -> RustHandler", + "CALLS parsers/handlers/registry.py:0 -> SupportedLanguage", + "CALLS parsers/handlers/registry.py:0 -> get_handler", + "CALLS parsers/handlers/registry.py:0 -> type", + "CALLS parsers/handlers/rust.py:0 -> BaseLanguageHandler", + "CALLS parsers/handlers/rust.py:0 -> LanguageSpec", + "CALLS parsers/handlers/rust.py:0 -> RustHandler", + "CALLS parsers/handlers/rust.py:0 -> SupportedLanguage", + "CALLS parsers/handlers/rust.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/rust.py:0 -> children", + "CALLS parsers/handlers/rust.py:0 -> extract_decorators", + "CALLS parsers/handlers/rust.py:0 -> module_qn", + "CALLS parsers/handlers/rust.py:0 -> repo_path", + "CALLS parsers/handlers/rust.py:0 -> should_process_as_impl_block", + "CALLS parsers/handlers/rust.py:0 -> type", + "CALLS parsers/import_processor.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/import_processor.py:0 -> ImportProcessor", + "CALLS parsers/import_processor.py:0 -> IngestorProtocol", + "CALLS parsers/import_processor.py:0 -> LanguageQueries", + "CALLS parsers/import_processor.py:0 -> LanguageSpec", + "CALLS parsers/import_processor.py:0 -> NodeLabel", + "CALLS parsers/import_processor.py:0 -> RelationshipType", + "CALLS parsers/import_processor.py:0 -> StdlibCacheStats", + "CALLS parsers/import_processor.py:0 -> SupportedLanguage", + "CALLS parsers/import_processor.py:0 -> children", + "CALLS parsers/import_processor.py:0 -> logs", + "CALLS parsers/import_processor.py:0 -> module_qn", + "CALLS parsers/import_processor.py:0 -> name", + "CALLS parsers/import_processor.py:0 -> nodes", + "CALLS parsers/import_processor.py:0 -> parse_imports", + "CALLS parsers/import_processor.py:0 -> repo", + "CALLS parsers/import_processor.py:0 -> repo_path", + "CALLS parsers/import_processor.py:0 -> text", + "CALLS parsers/import_processor.py:0 -> type", + "CALLS parsers/java/__init__.py:0 -> JavaMethodResolverMixin", + "CALLS parsers/java/__init__.py:0 -> JavaTypeInferenceEngine", + "CALLS parsers/java/__init__.py:0 -> JavaTypeResolverMixin", + "CALLS parsers/java/__init__.py:0 -> JavaVariableAnalyzerMixin", + "CALLS parsers/java/__init__.py:0 -> type_inference", + "CALLS parsers/java/method_resolver.py:0 -> ASTCacheProtocol", + "CALLS parsers/java/method_resolver.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/java/method_resolver.py:0 -> ImportProcessor", + "CALLS parsers/java/method_resolver.py:0 -> JavaMethodResolverMixin", + "CALLS parsers/java/method_resolver.py:0 -> NodeType", + "CALLS parsers/java/method_resolver.py:0 -> _do_resolve_java_method_call", + "CALLS parsers/java/method_resolver.py:0 -> _resolve_java_method_return_type", + "CALLS parsers/java/method_resolver.py:0 -> children", + "CALLS parsers/java/method_resolver.py:0 -> class_qn", + "CALLS parsers/java/method_resolver.py:0 -> engine", + "CALLS parsers/java/method_resolver.py:0 -> import_processor", + "CALLS parsers/java/method_resolver.py:0 -> key_func", + "CALLS parsers/java/method_resolver.py:0 -> logs", + "CALLS parsers/java/method_resolver.py:0 -> module_qn", + "CALLS parsers/java/method_resolver.py:0 -> name", + "CALLS parsers/java/method_resolver.py:0 -> parent", + "CALLS parsers/java/method_resolver.py:0 -> start", + "CALLS parsers/java/method_resolver.py:0 -> type", + "CALLS parsers/java/method_resolver.py:0 -> walk", + "CALLS parsers/java/type_inference.py:0 -> ASTCacheProtocol", + "CALLS parsers/java/type_inference.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/java/type_inference.py:0 -> ImportProcessor", + "CALLS parsers/java/type_inference.py:0 -> JavaMethodResolverMixin", + "CALLS parsers/java/type_inference.py:0 -> JavaTypeInferenceEngine", + "CALLS parsers/java/type_inference.py:0 -> JavaTypeResolverMixin", + "CALLS parsers/java/type_inference.py:0 -> JavaVariableAnalyzerMixin", + "CALLS parsers/java/type_inference.py:0 -> LanguageQueries", + "CALLS parsers/java/type_inference.py:0 -> SupportedLanguage", + "CALLS parsers/java/type_inference.py:0 -> _find_containing_java_class", + "CALLS parsers/java/type_inference.py:0 -> build_variable_type_map", + "CALLS parsers/java/type_inference.py:0 -> factory", + "CALLS parsers/java/type_inference.py:0 -> import_processor", + "CALLS parsers/java/type_inference.py:0 -> logs", + "CALLS parsers/java/type_inference.py:0 -> module_qn", + "CALLS parsers/java/type_inference.py:0 -> parent", + "CALLS parsers/java/type_inference.py:0 -> repo_path", + "CALLS parsers/java/type_inference.py:0 -> resolve_java_method_call", + "CALLS parsers/java/type_inference.py:0 -> type", + "CALLS parsers/java/type_inference.py:0 -> value", + "CALLS parsers/java/type_resolver.py:0 -> ASTCacheProtocol", + "CALLS parsers/java/type_resolver.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/java/type_resolver.py:0 -> ImportProcessor", + "CALLS parsers/java/type_resolver.py:0 -> JavaTypeResolverMixin", + "CALLS parsers/java/type_resolver.py:0 -> NodeType", + "CALLS parsers/java/type_resolver.py:0 -> _find_registry_entries_under", + "CALLS parsers/java/type_resolver.py:0 -> _get_current_class_name", + "CALLS parsers/java/type_resolver.py:0 -> _get_implemented_interfaces", + "CALLS parsers/java/type_resolver.py:0 -> _get_superclass_name", + "CALLS parsers/java/type_resolver.py:0 -> _rank_module_candidates", + "CALLS parsers/java/type_resolver.py:0 -> children", + "CALLS parsers/java/type_resolver.py:0 -> class_qn", + "CALLS parsers/java/type_resolver.py:0 -> import_processor", + "CALLS parsers/java/type_resolver.py:0 -> module_qn", + "CALLS parsers/java/type_resolver.py:0 -> type", + "CALLS parsers/java/utils.py:0 -> ASTCacheProtocol", + "CALLS parsers/java/utils.py:0 -> build_qualified_name", + "CALLS parsers/java/utils.py:0 -> children", + "CALLS parsers/java/utils.py:0 -> class_qn", + "CALLS parsers/java/utils.py:0 -> extract_annotation_info", + "CALLS parsers/java/utils.py:0 -> extract_class_info", + "CALLS parsers/java/utils.py:0 -> extract_field_info", + "CALLS parsers/java/utils.py:0 -> extract_import_path", + "CALLS parsers/java/utils.py:0 -> extract_method_call_info", + "CALLS parsers/java/utils.py:0 -> extract_method_info", + "CALLS parsers/java/utils.py:0 -> extract_package_name", + "CALLS parsers/java/utils.py:0 -> find_package_start_index", + "CALLS parsers/java/utils.py:0 -> get_class_context_from_qn", + "CALLS parsers/java/utils.py:0 -> get_java_visibility", + "CALLS parsers/java/utils.py:0 -> is_main_method", + "CALLS parsers/java/utils.py:0 -> module_qn", + "CALLS parsers/java/utils.py:0 -> name", + "CALLS parsers/java/utils.py:0 -> parent", + "CALLS parsers/java/utils.py:0 -> type", + "CALLS parsers/java/utils.py:0 -> wrapper", + "CALLS parsers/java/variable_analyzer.py:0 -> ASTCacheProtocol", + "CALLS parsers/java/variable_analyzer.py:0 -> JavaVariableAnalyzerMixin", + "CALLS parsers/java/variable_analyzer.py:0 -> _collect_all_variable_types", + "CALLS parsers/java/variable_analyzer.py:0 -> _find_field_type_in_class", + "CALLS parsers/java/variable_analyzer.py:0 -> children", + "CALLS parsers/java/variable_analyzer.py:0 -> class_qn", + "CALLS parsers/java/variable_analyzer.py:0 -> infer", + "CALLS parsers/java/variable_analyzer.py:0 -> logs", + "CALLS parsers/java/variable_analyzer.py:0 -> module_qn", + "CALLS parsers/java/variable_analyzer.py:0 -> name", + "CALLS parsers/java/variable_analyzer.py:0 -> parent", + "CALLS parsers/java/variable_analyzer.py:0 -> type", + "CALLS parsers/java/variable_analyzer.py:0 -> up", + "CALLS parsers/js_ts/__init__.py:0 -> JsTypeInferenceEngine", + "CALLS parsers/js_ts/__init__.py:0 -> type_inference", + "CALLS parsers/js_ts/ingest.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/js_ts/ingest.py:0 -> ImportProcessor", + "CALLS parsers/js_ts/ingest.py:0 -> IngestorProtocol", + "CALLS parsers/js_ts/ingest.py:0 -> JsTsIngestMixin", + "CALLS parsers/js_ts/ingest.py:0 -> JsTsModuleSystemMixin", + "CALLS parsers/js_ts/ingest.py:0 -> LanguageHandler", + "CALLS parsers/js_ts/ingest.py:0 -> LanguageQueries", + "CALLS parsers/js_ts/ingest.py:0 -> LanguageSpec", + "CALLS parsers/js_ts/ingest.py:0 -> NodeLabel", + "CALLS parsers/js_ts/ingest.py:0 -> NodeType", + "CALLS parsers/js_ts/ingest.py:0 -> RelationshipType", + "CALLS parsers/js_ts/ingest.py:0 -> SupportedLanguage", + "CALLS parsers/js_ts/ingest.py:0 -> _build_nested_qualified_name", + "CALLS parsers/js_ts/ingest.py:0 -> _ingest_assignment_arrow_functions", + "CALLS parsers/js_ts/ingest.py:0 -> _ingest_object_literal_methods", + "CALLS parsers/js_ts/ingest.py:0 -> _ingest_prototype_inheritance", + "CALLS parsers/js_ts/ingest.py:0 -> _is_export_inside_function", + "CALLS parsers/js_ts/ingest.py:0 -> _is_method_in_class", + "CALLS parsers/js_ts/ingest.py:0 -> _is_static_method_in_class", + "CALLS parsers/js_ts/ingest.py:0 -> children", + "CALLS parsers/js_ts/ingest.py:0 -> function_qn", + "CALLS parsers/js_ts/ingest.py:0 -> import_processor", + "CALLS parsers/js_ts/ingest.py:0 -> logs", + "CALLS parsers/js_ts/ingest.py:0 -> method_qn", + "CALLS parsers/js_ts/ingest.py:0 -> module_qn", + "CALLS parsers/js_ts/ingest.py:0 -> name", + "CALLS parsers/js_ts/ingest.py:0 -> parent", + "CALLS parsers/js_ts/ingest.py:0 -> repo_path", + "CALLS parsers/js_ts/ingest.py:0 -> text", + "CALLS parsers/js_ts/ingest.py:0 -> type", + "CALLS parsers/js_ts/module_system.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/js_ts/module_system.py:0 -> ImportProcessor", + "CALLS parsers/js_ts/module_system.py:0 -> IngestorProtocol", + "CALLS parsers/js_ts/module_system.py:0 -> JsTsModuleSystemMixin", + "CALLS parsers/js_ts/module_system.py:0 -> LanguageQueries", + "CALLS parsers/js_ts/module_system.py:0 -> NodeLabel", + "CALLS parsers/js_ts/module_system.py:0 -> RelationshipType", + "CALLS parsers/js_ts/module_system.py:0 -> SupportedLanguage", + "CALLS parsers/js_ts/module_system.py:0 -> _get_docstring", + "CALLS parsers/js_ts/module_system.py:0 -> _ingest_commonjs_exports", + "CALLS parsers/js_ts/module_system.py:0 -> _ingest_es6_exports", + "CALLS parsers/js_ts/module_system.py:0 -> _ingest_missing_import_patterns", + "CALLS parsers/js_ts/module_system.py:0 -> _is_export_inside_function", + "CALLS parsers/js_ts/module_system.py:0 -> children", + "CALLS parsers/js_ts/module_system.py:0 -> import_processor", + "CALLS parsers/js_ts/module_system.py:0 -> logs", + "CALLS parsers/js_ts/module_system.py:0 -> module_qn", + "CALLS parsers/js_ts/module_system.py:0 -> name", + "CALLS parsers/js_ts/module_system.py:0 -> repo_path", + "CALLS parsers/js_ts/module_system.py:0 -> text", + "CALLS parsers/js_ts/module_system.py:0 -> type", + "CALLS parsers/js_ts/type_inference.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/js_ts/type_inference.py:0 -> ImportProcessor", + "CALLS parsers/js_ts/type_inference.py:0 -> JsTypeInferenceEngine", + "CALLS parsers/js_ts/type_inference.py:0 -> LanguageQueries", + "CALLS parsers/js_ts/type_inference.py:0 -> NodeType", + "CALLS parsers/js_ts/type_inference.py:0 -> SupportedLanguage", + "CALLS parsers/js_ts/type_inference.py:0 -> build_local_variable_type_map", + "CALLS parsers/js_ts/type_inference.py:0 -> children", + "CALLS parsers/js_ts/type_inference.py:0 -> class_qn", + "CALLS parsers/js_ts/type_inference.py:0 -> import_processor", + "CALLS parsers/js_ts/type_inference.py:0 -> logs", + "CALLS parsers/js_ts/type_inference.py:0 -> method_qn", + "CALLS parsers/js_ts/type_inference.py:0 -> module_qn", + "CALLS parsers/js_ts/type_inference.py:0 -> name", + "CALLS parsers/js_ts/type_inference.py:0 -> text", + "CALLS parsers/js_ts/type_inference.py:0 -> type", + "CALLS parsers/js_ts/type_inference.py:0 -> value", + "CALLS parsers/js_ts/utils.py:0 -> LanguageQueries", + "CALLS parsers/js_ts/utils.py:0 -> SupportedLanguage", + "CALLS parsers/js_ts/utils.py:0 -> analyze_return_expression", + "CALLS parsers/js_ts/utils.py:0 -> children", + "CALLS parsers/js_ts/utils.py:0 -> extract_method_call", + "CALLS parsers/js_ts/utils.py:0 -> find_method_in_ast", + "CALLS parsers/js_ts/utils.py:0 -> find_return_statements", + "CALLS parsers/js_ts/utils.py:0 -> get_js_ts_language_obj", + "CALLS parsers/js_ts/utils.py:0 -> method_qn", + "CALLS parsers/js_ts/utils.py:0 -> text", + "CALLS parsers/js_ts/utils.py:0 -> type", + "CALLS parsers/lua/__init__.py:0 -> LuaTypeInferenceEngine", + "CALLS parsers/lua/__init__.py:0 -> type_inference", + "CALLS parsers/lua/type_inference.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/lua/type_inference.py:0 -> ImportProcessor", + "CALLS parsers/lua/type_inference.py:0 -> LuaTypeInferenceEngine", + "CALLS parsers/lua/type_inference.py:0 -> TreeSitterNodeProtocol", + "CALLS parsers/lua/type_inference.py:0 -> build_local_variable_type_map", + "CALLS parsers/lua/type_inference.py:0 -> children", + "CALLS parsers/lua/type_inference.py:0 -> class_qn", + "CALLS parsers/lua/type_inference.py:0 -> import_processor", + "CALLS parsers/lua/type_inference.py:0 -> logs", + "CALLS parsers/lua/type_inference.py:0 -> module_qn", + "CALLS parsers/lua/type_inference.py:0 -> type", + "CALLS parsers/lua/utils.py:0 -> children", + "CALLS parsers/lua/utils.py:0 -> extract_assigned_name", + "CALLS parsers/lua/utils.py:0 -> extract_pcall_second_identifier", + "CALLS parsers/lua/utils.py:0 -> parent", + "CALLS parsers/lua/utils.py:0 -> type", + "CALLS parsers/lua/utils.py:0 -> value", + "CALLS parsers/py/__init__.py:0 -> PythonAstAnalyzerMixin", + "CALLS parsers/py/__init__.py:0 -> PythonExpressionAnalyzerMixin", + "CALLS parsers/py/__init__.py:0 -> PythonTypeInferenceEngine", + "CALLS parsers/py/__init__.py:0 -> PythonVariableAnalyzerMixin", + "CALLS parsers/py/__init__.py:0 -> resolve_class_name", + "CALLS parsers/py/__init__.py:0 -> type_inference", + "CALLS parsers/py/ast_analyzer.py:0 -> ASTCacheProtocol", + "CALLS parsers/py/ast_analyzer.py:0 -> JsTypeInferenceEngine", + "CALLS parsers/py/ast_analyzer.py:0 -> LanguageQueries", + "CALLS parsers/py/ast_analyzer.py:0 -> PythonAstAnalyzerMixin", + "CALLS parsers/py/ast_analyzer.py:0 -> SupportedLanguage", + "CALLS parsers/py/ast_analyzer.py:0 -> _AstAnalyzerDeps", + "CALLS parsers/py/ast_analyzer.py:0 -> _analyze_method_return_statements", + "CALLS parsers/py/ast_analyzer.py:0 -> _find_class_node", + "CALLS parsers/py/ast_analyzer.py:0 -> _infer_type_from_expression", + "CALLS parsers/py/ast_analyzer.py:0 -> _traverse_for_assignments", + "CALLS parsers/py/ast_analyzer.py:0 -> _traverse_single_pass", + "CALLS parsers/py/ast_analyzer.py:0 -> children", + "CALLS parsers/py/ast_analyzer.py:0 -> class_qn", + "CALLS parsers/py/ast_analyzer.py:0 -> factory", + "CALLS parsers/py/ast_analyzer.py:0 -> find_method_in_ast", + "CALLS parsers/py/ast_analyzer.py:0 -> logs", + "CALLS parsers/py/ast_analyzer.py:0 -> method_qn", + "CALLS parsers/py/ast_analyzer.py:0 -> module_qn", + "CALLS parsers/py/ast_analyzer.py:0 -> name", + "CALLS parsers/py/ast_analyzer.py:0 -> read", + "CALLS parsers/py/ast_analyzer.py:0 -> text", + "CALLS parsers/py/ast_analyzer.py:0 -> type", + "CALLS parsers/py/ast_analyzer.py:0 -> type_inference", + "CALLS parsers/py/expression_analyzer.py:0 -> ASTCacheProtocol", + "CALLS parsers/py/expression_analyzer.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/py/expression_analyzer.py:0 -> ImportProcessor", + "CALLS parsers/py/expression_analyzer.py:0 -> NodeType", + "CALLS parsers/py/expression_analyzer.py:0 -> PythonExpressionAnalyzerMixin", + "CALLS parsers/py/expression_analyzer.py:0 -> SupportedLanguage", + "CALLS parsers/py/expression_analyzer.py:0 -> _ExpressionAnalyzerDeps", + "CALLS parsers/py/expression_analyzer.py:0 -> _infer_expression_return_type", + "CALLS parsers/py/expression_analyzer.py:0 -> _infer_type_from_expression_complex", + "CALLS parsers/py/expression_analyzer.py:0 -> build_local_variable_type_map", + "CALLS parsers/py/expression_analyzer.py:0 -> class_qn", + "CALLS parsers/py/expression_analyzer.py:0 -> factory", + "CALLS parsers/py/expression_analyzer.py:0 -> import_processor", + "CALLS parsers/py/expression_analyzer.py:0 -> key_func", + "CALLS parsers/py/expression_analyzer.py:0 -> logs", + "CALLS parsers/py/expression_analyzer.py:0 -> method_qn", + "CALLS parsers/py/expression_analyzer.py:0 -> module_qn", + "CALLS parsers/py/expression_analyzer.py:0 -> text", + "CALLS parsers/py/expression_analyzer.py:0 -> type", + "CALLS parsers/py/type_inference.py:0 -> ASTCacheProtocol", + "CALLS parsers/py/type_inference.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/py/type_inference.py:0 -> ImportProcessor", + "CALLS parsers/py/type_inference.py:0 -> JsTypeInferenceEngine", + "CALLS parsers/py/type_inference.py:0 -> LanguageQueries", + "CALLS parsers/py/type_inference.py:0 -> PythonAstAnalyzerMixin", + "CALLS parsers/py/type_inference.py:0 -> PythonExpressionAnalyzerMixin", + "CALLS parsers/py/type_inference.py:0 -> PythonTypeInferenceEngine", + "CALLS parsers/py/type_inference.py:0 -> PythonVariableAnalyzerMixin", + "CALLS parsers/py/type_inference.py:0 -> SupportedLanguage", + "CALLS parsers/py/type_inference.py:0 -> build_local_variable_type_map", + "CALLS parsers/py/type_inference.py:0 -> factory", + "CALLS parsers/py/type_inference.py:0 -> import_processor", + "CALLS parsers/py/type_inference.py:0 -> logs", + "CALLS parsers/py/type_inference.py:0 -> module_qn", + "CALLS parsers/py/type_inference.py:0 -> repo_path", + "CALLS parsers/py/type_inference.py:0 -> type", + "CALLS parsers/py/utils.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/py/utils.py:0 -> ImportProcessor", + "CALLS parsers/py/utils.py:0 -> import_processor", + "CALLS parsers/py/utils.py:0 -> module_qn", + "CALLS parsers/py/utils.py:0 -> resolve_class_name", + "CALLS parsers/py/variable_analyzer.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/py/variable_analyzer.py:0 -> ImportProcessor", + "CALLS parsers/py/variable_analyzer.py:0 -> LanguageHandler", + "CALLS parsers/py/variable_analyzer.py:0 -> NodeType", + "CALLS parsers/py/variable_analyzer.py:0 -> PythonVariableAnalyzerMixin", + "CALLS parsers/py/variable_analyzer.py:0 -> SupportedLanguage", + "CALLS parsers/py/variable_analyzer.py:0 -> _VariableAnalyzerDeps", + "CALLS parsers/py/variable_analyzer.py:0 -> _analyze_comprehension", + "CALLS parsers/py/variable_analyzer.py:0 -> _analyze_for_loop", + "CALLS parsers/py/variable_analyzer.py:0 -> _collect_local_aliases", + "CALLS parsers/py/variable_analyzer.py:0 -> _expand_chained_attribute_types", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_class_annotation_types", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_instance_attributes_from_init", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_instance_variable_types_from_assignments", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_property_return_types", + "CALLS parsers/py/variable_analyzer.py:0 -> children", + "CALLS parsers/py/variable_analyzer.py:0 -> class_qn", + "CALLS parsers/py/variable_analyzer.py:0 -> factory", + "CALLS parsers/py/variable_analyzer.py:0 -> forward", + "CALLS parsers/py/variable_analyzer.py:0 -> import_processor", + "CALLS parsers/py/variable_analyzer.py:0 -> infer", + "CALLS parsers/py/variable_analyzer.py:0 -> logs", + "CALLS parsers/py/variable_analyzer.py:0 -> module_qn", + "CALLS parsers/py/variable_analyzer.py:0 -> name", + "CALLS parsers/py/variable_analyzer.py:0 -> parent", + "CALLS parsers/py/variable_analyzer.py:0 -> text", + "CALLS parsers/py/variable_analyzer.py:0 -> type", + "CALLS parsers/rs/utils.py:0 -> build_module_path", + "CALLS parsers/rs/utils.py:0 -> children", + "CALLS parsers/rs/utils.py:0 -> extract_impl_trait", + "CALLS parsers/rs/utils.py:0 -> extract_use_imports", + "CALLS parsers/rs/utils.py:0 -> name", + "CALLS parsers/rs/utils.py:0 -> parent", + "CALLS parsers/rs/utils.py:0 -> text", + "CALLS parsers/rs/utils.py:0 -> type", + "CALLS parsers/stdlib_extractor.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/stdlib_extractor.py:0 -> StdlibExtractor", + "CALLS parsers/stdlib_extractor.py:0 -> SupportedLanguage", + "CALLS parsers/stdlib_extractor.py:0 -> clear_stdlib_cache", + "CALLS parsers/stdlib_extractor.py:0 -> extract_module_path", + "CALLS parsers/stdlib_extractor.py:0 -> flush_stdlib_cache", + "CALLS parsers/stdlib_extractor.py:0 -> get_stdlib_cache_stats", + "CALLS parsers/stdlib_extractor.py:0 -> load_persistent_cache", + "CALLS parsers/stdlib_extractor.py:0 -> logs", + "CALLS parsers/stdlib_extractor.py:0 -> main", + "CALLS parsers/stdlib_extractor.py:0 -> name", + "CALLS parsers/stdlib_extractor.py:0 -> process", + "CALLS parsers/stdlib_extractor.py:0 -> repo_path", + "CALLS parsers/stdlib_extractor.py:0 -> text", + "CALLS parsers/stdlib_extractor.py:0 -> type", + "CALLS parsers/structure_processor.py:0 -> IngestorProtocol", + "CALLS parsers/structure_processor.py:0 -> LanguageQueries", + "CALLS parsers/structure_processor.py:0 -> NodeLabel", + "CALLS parsers/structure_processor.py:0 -> RelationshipType", + "CALLS parsers/structure_processor.py:0 -> StructureProcessor", + "CALLS parsers/structure_processor.py:0 -> SupportedLanguage", + "CALLS parsers/structure_processor.py:0 -> identify_structure", + "CALLS parsers/structure_processor.py:0 -> logs", + "CALLS parsers/structure_processor.py:0 -> name", + "CALLS parsers/structure_processor.py:0 -> parent", + "CALLS parsers/structure_processor.py:0 -> process_generic_file", + "CALLS parsers/structure_processor.py:0 -> repo_path", + "CALLS parsers/type_inference.py:0 -> ASTCacheProtocol", + "CALLS parsers/type_inference.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/type_inference.py:0 -> ImportProcessor", + "CALLS parsers/type_inference.py:0 -> LanguageQueries", + "CALLS parsers/type_inference.py:0 -> SupportedLanguage", + "CALLS parsers/type_inference.py:0 -> TypeInferenceEngine", + "CALLS parsers/type_inference.py:0 -> _build_java_variable_type_map", + "CALLS parsers/type_inference.py:0 -> _find_method_ast_node", + "CALLS parsers/type_inference.py:0 -> _resolve_class_name", + "CALLS parsers/type_inference.py:0 -> factory", + "CALLS parsers/type_inference.py:0 -> import_processor", + "CALLS parsers/type_inference.py:0 -> java_type_inference", + "CALLS parsers/type_inference.py:0 -> js_type_inference", + "CALLS parsers/type_inference.py:0 -> lua_type_inference", + "CALLS parsers/type_inference.py:0 -> module_qn", + "CALLS parsers/type_inference.py:0 -> python_type_inference", + "CALLS parsers/type_inference.py:0 -> repo_path", + "CALLS parsers/utils.py:0 -> FunctionRegistryTrieProtocol", + "CALLS parsers/utils.py:0 -> IngestorProtocol", + "CALLS parsers/utils.py:0 -> LanguageQueries", + "CALLS parsers/utils.py:0 -> LanguageSpec", + "CALLS parsers/utils.py:0 -> NodeType", + "CALLS parsers/utils.py:0 -> RelationshipType", + "CALLS parsers/utils.py:0 -> SupportedLanguage", + "CALLS parsers/utils.py:0 -> TreeSitterNodeProtocol", + "CALLS parsers/utils.py:0 -> _start_byte_key", + "CALLS parsers/utils.py:0 -> children", + "CALLS parsers/utils.py:0 -> decorator", + "CALLS parsers/utils.py:0 -> function_qn", + "CALLS parsers/utils.py:0 -> get_cached_query", + "CALLS parsers/utils.py:0 -> get_function_captures", + "CALLS parsers/utils.py:0 -> get_query_cursor", + "CALLS parsers/utils.py:0 -> index", + "CALLS parsers/utils.py:0 -> ingest_exported_function", + "CALLS parsers/utils.py:0 -> ingest_method", + "CALLS parsers/utils.py:0 -> is_method_node", + "CALLS parsers/utils.py:0 -> logs", + "CALLS parsers/utils.py:0 -> method_qn", + "CALLS parsers/utils.py:0 -> module_qn", + "CALLS parsers/utils.py:0 -> name", + "CALLS parsers/utils.py:0 -> nodes", + "CALLS parsers/utils.py:0 -> parent", + "CALLS parsers/utils.py:0 -> process", + "CALLS parsers/utils.py:0 -> repo_path", + "CALLS parsers/utils.py:0 -> safe_decode_with_fallback", + "CALLS parsers/utils.py:0 -> text", + "CALLS parsers/utils.py:0 -> type", + "CALLS parsers/utils.py:0 -> up", + "CALLS parsers/utils.py:0 -> value", + "CALLS prompts.py:0 -> add", + "CALLS prompts.py:0 -> analyze", + "CALLS prompts.py:0 -> build_rag_orchestrator_prompt", + "CALLS prompts.py:0 -> clear", + "CALLS prompts.py:0 -> create_file", + "CALLS prompts.py:0 -> create_new_file", + "CALLS prompts.py:0 -> done", + "CALLS prompts.py:0 -> edit_file", + "CALLS prompts.py:0 -> engine", + "CALLS prompts.py:0 -> index", + "CALLS prompts.py:0 -> items", + "CALLS prompts.py:0 -> list_projects", + "CALLS prompts.py:0 -> main", + "CALLS prompts.py:0 -> name", + "CALLS prompts.py:0 -> nodes", + "CALLS prompts.py:0 -> process", + "CALLS prompts.py:0 -> query_codebase_knowledge_graph", + "CALLS prompts.py:0 -> query_graph", + "CALLS prompts.py:0 -> read", + "CALLS prompts.py:0 -> read_file", + "CALLS prompts.py:0 -> read_file_content", + "CALLS prompts.py:0 -> relationships", + "CALLS prompts.py:0 -> replace_code_surgically", + "CALLS prompts.py:0 -> run", + "CALLS prompts.py:0 -> semantic_code_search", + "CALLS prompts.py:0 -> semantic_search", + "CALLS prompts.py:0 -> start", + "CALLS prompts.py:0 -> type", + "CALLS providers/base.py:0 -> AnthropicProvider", + "CALLS providers/base.py:0 -> AzureOpenAIProvider", + "CALLS providers/base.py:0 -> GoogleProvider", + "CALLS providers/base.py:0 -> GoogleProviderType", + "CALLS providers/base.py:0 -> LiteLLMProvider", + "CALLS providers/base.py:0 -> ModelConfig", + "CALLS providers/base.py:0 -> ModelProvider", + "CALLS providers/base.py:0 -> OllamaProvider", + "CALLS providers/base.py:0 -> OpenAIProvider", + "CALLS providers/base.py:0 -> Provider", + "CALLS providers/base.py:0 -> check_litellm_proxy_running", + "CALLS providers/base.py:0 -> create_model", + "CALLS providers/base.py:0 -> get_provider_from_config", + "CALLS providers/base.py:0 -> list_providers", + "CALLS providers/base.py:0 -> logs", + "CALLS providers/base.py:0 -> name", + "CALLS providers/base.py:0 -> ollama_endpoint", + "CALLS providers/base.py:0 -> parse", + "CALLS providers/base.py:0 -> provider_name", + "CALLS providers/base.py:0 -> register_provider", + "CALLS providers/base.py:0 -> type", + "CALLS providers/litellm.py:0 -> LiteLLMProvider", + "CALLS providers/litellm.py:0 -> ModelProvider", + "CALLS providers/litellm.py:0 -> Provider", + "CALLS providers/litellm.py:0 -> create_model", + "CALLS providers/litellm.py:0 -> provider_name", + "CALLS readme_sections.py:0 -> LanguageStatus", + "CALLS readme_sections.py:0 -> SupportedLanguage", + "CALLS readme_sections.py:0 -> description", + "CALLS readme_sections.py:0 -> generate_all_sections", + "CALLS readme_sections.py:0 -> name", + "CALLS readme_sections.py:0 -> parent", + "CALLS readme_sections.py:0 -> project_root", + "CALLS readme_sections.py:0 -> status", + "CALLS readme_sections.py:0 -> summary", + "CALLS readme_sections.py:0 -> value", + "CALLS schema_builder.py:0 -> NodeSchema", + "CALLS schema_builder.py:0 -> RelationshipSchema", + "CALLS schemas.py:0 -> CodeSnippet", + "CALLS schemas.py:0 -> EditResult", + "CALLS schemas.py:0 -> FileCreationResult", + "CALLS schemas.py:0 -> FileReadResult", + "CALLS schemas.py:0 -> HealthCheckResult", + "CALLS schemas.py:0 -> QueryGraphData", + "CALLS schemas.py:0 -> ShellCommandResult", + "CALLS schemas.py:0 -> _format_results", + "CALLS schemas.py:0 -> _set_success_on_error", + "CALLS schemas.py:0 -> name", + "CALLS schemas.py:0 -> summary", + "CALLS services/__init__.py:0 -> IngestorProtocol", + "CALLS services/__init__.py:0 -> QueryProtocol", + "CALLS services/__init__.py:0 -> ensure_node_batch", + "CALLS services/__init__.py:0 -> ensure_relationship_batch", + "CALLS services/__init__.py:0 -> execute_write", + "CALLS services/__init__.py:0 -> fetch_all", + "CALLS services/__init__.py:0 -> flush_all", + "CALLS services/anthropic_token_counter.py:0 -> count_anthropic_context", + "CALLS services/anthropic_token_counter.py:0 -> name", + "CALLS services/anthropic_token_counter.py:0 -> text", + "CALLS services/anthropic_token_counter.py:0 -> type", + "CALLS services/anthropic_token_counter.py:0 -> value", + "CALLS services/graph_service.py:0 -> CursorProtocol", + "CALLS services/graph_service.py:0 -> MemgraphIngestor", + "CALLS services/graph_service.py:0 -> _flush_node_group_with_own_conn", + "CALLS services/graph_service.py:0 -> _flush_rel_group_with_own_conn", + "CALLS services/graph_service.py:0 -> build_create_node_query", + "CALLS services/graph_service.py:0 -> build_create_relationship_query", + "CALLS services/graph_service.py:0 -> build_merge_node_query", + "CALLS services/graph_service.py:0 -> build_merge_relationship_query", + "CALLS services/graph_service.py:0 -> clean_database", + "CALLS services/graph_service.py:0 -> delete_project", + "CALLS services/graph_service.py:0 -> description", + "CALLS services/graph_service.py:0 -> ensure_constraints", + "CALLS services/graph_service.py:0 -> ensure_node_batch", + "CALLS services/graph_service.py:0 -> ensure_relationship_batch", + "CALLS services/graph_service.py:0 -> execute_write", + "CALLS services/graph_service.py:0 -> export_graph_to_dict", + "CALLS services/graph_service.py:0 -> flush", + "CALLS services/graph_service.py:0 -> index", + "CALLS services/graph_service.py:0 -> list_projects", + "CALLS services/graph_service.py:0 -> logs", + "CALLS services/graph_service.py:0 -> metadata", + "CALLS services/graph_service.py:0 -> name", + "CALLS services/graph_service.py:0 -> nodes", + "CALLS services/graph_service.py:0 -> relationships", + "CALLS services/graph_service.py:0 -> type", + "CALLS services/llm.py:0 -> CypherGenerator", + "CALLS services/llm.py:0 -> ModelConfig", + "CALLS services/llm.py:0 -> Provider", + "CALLS services/llm.py:0 -> active_cypher_config", + "CALLS services/llm.py:0 -> active_orchestrator_config", + "CALLS services/llm.py:0 -> agent", + "CALLS services/llm.py:0 -> create_rag_orchestrator", + "CALLS services/llm.py:0 -> generate", + "CALLS services/llm.py:0 -> logs", + "CALLS services/llm.py:0 -> name", + "CALLS services/llm.py:0 -> project_root", + "CALLS services/llm.py:0 -> start", + "CALLS services/protobuf_service.py:0 -> ProtobufFileIngestor", + "CALLS services/protobuf_service.py:0 -> RelationshipType", + "CALLS services/protobuf_service.py:0 -> ensure_node_batch", + "CALLS services/protobuf_service.py:0 -> ensure_relationship_batch", + "CALLS services/protobuf_service.py:0 -> flush_all", + "CALLS services/protobuf_service.py:0 -> index", + "CALLS services/protobuf_service.py:0 -> logs", + "CALLS services/protobuf_service.py:0 -> nodes", + "CALLS services/protobuf_service.py:0 -> relationships", + "CALLS services/protobuf_service.py:0 -> type", + "CALLS services/protobuf_service.py:0 -> value", + "CALLS stack/__init__.py:0 -> StackManager", + "CALLS stack/__init__.py:0 -> StackStatus", + "CALLS stack/__init__.py:0 -> daemon_down", + "CALLS stack/__init__.py:0 -> daemon_logs", + "CALLS stack/__init__.py:0 -> daemon_restart", + "CALLS stack/__init__.py:0 -> daemon_status", + "CALLS stack/__init__.py:0 -> daemon_up", + "CALLS stack/__init__.py:0 -> ensure_running", + "CALLS stack/cli.py:0 -> StackError", + "CALLS stack/cli.py:0 -> cli", + "CALLS stack/cli.py:0 -> compose_file", + "CALLS stack/cli.py:0 -> down_cmd", + "CALLS stack/cli.py:0 -> logs_cmd", + "CALLS stack/cli.py:0 -> restart_cmd", + "CALLS stack/cli.py:0 -> status_cmd", + "CALLS stack/cli.py:0 -> up", + "CALLS stack/cli.py:0 -> up_cmd", + "CALLS stack/cli.py:0 -> value", + "CALLS stack/constants.py:0 -> StackState", + "CALLS stack/constants.py:0 -> down", + "CALLS stack/constants.py:0 -> up", + "CALLS stack/health.py:0 -> status", + "CALLS stack/health.py:0 -> wait_for_memgraph", + "CALLS stack/health.py:0 -> wait_for_qdrant", + "CALLS stack/manager.py:0 -> StackState", + "CALLS stack/manager.py:0 -> compose_file", + "CALLS stack/manager.py:0 -> daemon_down", + "CALLS stack/manager.py:0 -> daemon_logs", + "CALLS stack/manager.py:0 -> daemon_restart", + "CALLS stack/manager.py:0 -> daemon_status", + "CALLS stack/manager.py:0 -> daemon_up", + "CALLS stack/manager.py:0 -> parent", + "CALLS stack/manager.py:0 -> text", + "CALLS tests/conftest.py:0 -> NodeProtocol", + "CALLS tests/conftest.py:0 -> _disable_stack_autostart", + "CALLS tests/conftest.py:0 -> _isolate_cgr_home", + "CALLS tests/conftest.py:0 -> _maybe_start_stack", + "CALLS tests/conftest.py:0 -> child_by_field_name", + "CALLS tests/conftest.py:0 -> children", + "CALLS tests/conftest.py:0 -> cleanup_qdrant_client", + "CALLS tests/conftest.py:0 -> cli", + "CALLS tests/conftest.py:0 -> create_mock_node", + "CALLS tests/conftest.py:0 -> definition_processor", + "CALLS tests/conftest.py:0 -> ensure_node_batch", + "CALLS tests/conftest.py:0 -> ensure_relationship_batch", + "CALLS tests/conftest.py:0 -> execute_write", + "CALLS tests/conftest.py:0 -> factory", + "CALLS tests/conftest.py:0 -> fetch_all", + "CALLS tests/conftest.py:0 -> flush_all", + "CALLS tests/conftest.py:0 -> get_node_names", + "CALLS tests/conftest.py:0 -> get_relationships", + "CALLS tests/conftest.py:0 -> graph_updater", + "CALLS tests/conftest.py:0 -> method_calls", + "CALLS tests/conftest.py:0 -> mock_ingestor", + "CALLS tests/conftest.py:0 -> mock_updater", + "CALLS tests/conftest.py:0 -> name", + "CALLS tests/conftest.py:0 -> nodes", + "CALLS tests/conftest.py:0 -> parent", + "CALLS tests/conftest.py:0 -> process_file", + "CALLS tests/conftest.py:0 -> relationships", + "CALLS tests/conftest.py:0 -> repo_path", + "CALLS tests/conftest.py:0 -> run_updater", + "CALLS tests/conftest.py:0 -> structure_processor", + "CALLS tests/conftest.py:0 -> temp_repo", + "CALLS tests/conftest.py:0 -> text", + "CALLS tests/conftest.py:0 -> type", + "CALLS tests/conftest.py:0 -> up", + "CALLS tests/conftest.py:0 -> updater", + "CALLS tests/conftest.py:0 -> value", + "CALLS tests/fuzz_test_parsers.py:0 -> fuzz_language_spec", + "CALLS tests/integration/conftest.py:0 -> graph_service", + "CALLS tests/integration/conftest.py:0 -> memgraph_connection", + "CALLS tests/integration/conftest.py:0 -> memgraph_container", + "CALLS tests/integration/conftest.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> QueryGraphData", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> TestQueryResultStructure", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> TestQueryToolEndToEnd", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> TestQueryToolWithVariousInputs", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> add", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> anyio_backend", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> fetch_all", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> generate", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> generate_query", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> main", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> mock_cypher_gen_realistic", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> mock_ingestor_with_sample_data", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> name", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> silent_console", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> summary", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_complete_query_flow", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_about_classes", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_about_functions", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_handles_database_error_gracefully", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_handles_llm_error_gracefully", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_with_empty_results", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_with_unicode_characters", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_result_has_required_fields", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_result_preserves_data_types", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> type", + "CALLS tests/integration/test_cypher_queries.py:0 -> MemgraphIngestor", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildConstraintQueryUnit", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildDeadCodeQueryIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildDeadCodeQueryUnit", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildMergeNodeQueryIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildMergeNodeQueryUnit", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildMergeRelationshipQueryIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildMergeRelationshipQueryUnit", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildNodesByIdsQueryIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestBuildNodesByIdsQueryUnit", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestCypherDeleteAllIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestCypherExportNodesIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestCypherExportRelationshipsIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestCypherFindByQualifiedNameIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> TestCypherGetFunctionSourceLocationIntegration", + "CALLS tests/integration/test_cypher_queries.py:0 -> decorator", + "CALLS tests/integration/test_cypher_queries.py:0 -> export", + "CALLS tests/integration/test_cypher_queries.py:0 -> graph_service", + "CALLS tests/integration/test_cypher_queries.py:0 -> handler", + "CALLS tests/integration/test_cypher_queries.py:0 -> is_exported", + "CALLS tests/integration/test_cypher_queries.py:0 -> load", + "CALLS tests/integration/test_cypher_queries.py:0 -> main", + "CALLS tests/integration/test_cypher_queries.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_cypher_queries.py:0 -> name", + "CALLS tests/integration/test_cypher_queries.py:0 -> start", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_class_candidates_when_classes_included", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_creates_calls_relationship_with_properties", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_creates_relationship_between_nodes", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_deletes_all_nodes", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exclude_tests_omits_test_function_roots", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_excluding_tests_reports_orphan_and_test_only_code", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exports_multiple_nodes", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exports_node_with_labels_and_properties", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exports_relationship_with_type", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_fetches_nodes_by_ids", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_file_node_query", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_file_path_constraint", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_finds_function_by_qualified_name", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_function_calls_function_with_props", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_function_node_query", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_function_qualified_name_constraint", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_gets_source_location_by_node_id", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_include_classes_adds_class_candidates", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_include_tests_references_test_patterns", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_merge_creates_new_node", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_merge_updates_existing_node", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_module_defines_function_no_props", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_module_load_callee_is_a_root", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_module_load_callees_are_roots", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_multiple_node_ids", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_reports_only_the_orphan_with_tests_included", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_returns_empty_for_nonexistent_ids", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_returns_empty_for_nonexistent_name", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_returns_row_shape", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_single_node_id", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_subclass_only_base_is_reported_when_subclass_is_unreachable", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_test_module_call_is_not_a_root_when_excluding_tests", + "CALLS tests/integration/test_cypher_queries.py:0 -> type", + "CALLS tests/integration/test_imports_e2e.py:0 -> MemgraphIngestor", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestCppImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestGoImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestJavaImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestJsImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestLuaImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestPythonImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestRustImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> TestTsImportsRelationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> cpp_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> export", + "CALLS tests/integration/test_imports_e2e.py:0 -> go_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> graph_service", + "CALLS tests/integration/test_imports_e2e.py:0 -> graph_updater", + "CALLS tests/integration/test_imports_e2e.py:0 -> items", + "CALLS tests/integration/test_imports_e2e.py:0 -> java_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> js_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> lua_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> main", + "CALLS tests/integration/test_imports_e2e.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_imports_e2e.py:0 -> name", + "CALLS tests/integration/test_imports_e2e.py:0 -> nodes", + "CALLS tests/integration/test_imports_e2e.py:0 -> python_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> relationships", + "CALLS tests/integration/test_imports_e2e.py:0 -> repo_path", + "CALLS tests/integration/test_imports_e2e.py:0 -> rust_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_import_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_import_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_include_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_require_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_internal_import_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_internal_include_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_internal_require_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_stdlib_import_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_stdlib_import_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> ts_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> updater", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> MemgraphIngestor", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> graph_service", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> graph_updater", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> index", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> name", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> nodes", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> repo_path", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> test_incremental_rebuild_prunes_orphaned_external_module", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> TestMCPToolsIntegration", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> TestToolConsistency", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> add", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> anyio_backend", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> fetch_all", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> generate", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> mcp_registry", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> mock_cypher_gen", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> mock_generate", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> mock_ingestor", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> name", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> project_root", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> sample_file", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> start", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> temp_test_repo", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_all_tools_have_consistent_takes_ctx", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_get_code_snippet_actual_behavior", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_list_directory_works", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_query_code_graph_works", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_read_file_works", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> type", + "CALLS tests/integration/test_multi_project_integration.py:0 -> MemgraphIngestor", + "CALLS tests/integration/test_multi_project_integration.py:0 -> TestCleanDatabase", + "CALLS tests/integration/test_multi_project_integration.py:0 -> TestDeleteProject", + "CALLS tests/integration/test_multi_project_integration.py:0 -> TestListProjects", + "CALLS tests/integration/test_multi_project_integration.py:0 -> TestMultiProjectIsolation", + "CALLS tests/integration/test_multi_project_integration.py:0 -> graph_service", + "CALLS tests/integration/test_multi_project_integration.py:0 -> graph_updater", + "CALLS tests/integration/test_multi_project_integration.py:0 -> main", + "CALLS tests/integration/test_multi_project_integration.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_multi_project_integration.py:0 -> name", + "CALLS tests/integration/test_multi_project_integration.py:0 -> nodes", + "CALLS tests/integration/test_multi_project_integration.py:0 -> project1_path", + "CALLS tests/integration/test_multi_project_integration.py:0 -> project2_path", + "CALLS tests/integration/test_multi_project_integration.py:0 -> repo_path", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_clean_database_removes_all_projects", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_nonexistent_project_no_error", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_project_preserves_other_projects", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_project_removes_all_project_nodes", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_project_removes_files_and_folders", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_list_projects_after_indexing", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_list_projects_empty_database", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_list_projects_multiple", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_projects_have_separate_namespaces", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_reindex_only_affects_target_project", + "CALLS tests/integration/test_multi_project_integration.py:0 -> updater", + "CALLS tests/integration/test_node_label_e2e.py:0 -> MemgraphIngestor", + "CALLS tests/integration/test_node_label_e2e.py:0 -> NodeLabel", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestCppNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestGoNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestJavaNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestJavaScriptNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestLuaNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestPhpNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestPythonNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestRustNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestScalaNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> TestTypeScriptNodeLabels", + "CALLS tests/integration/test_node_label_e2e.py:0 -> cpp_module_impl_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> cpp_module_interface_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> cpp_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> export", + "CALLS tests/integration/test_node_label_e2e.py:0 -> go_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> graph_service", + "CALLS tests/integration/test_node_label_e2e.py:0 -> graph_updater", + "CALLS tests/integration/test_node_label_e2e.py:0 -> java_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> javascript_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> lua_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> main", + "CALLS tests/integration/test_node_label_e2e.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_node_label_e2e.py:0 -> name", + "CALLS tests/integration/test_node_label_e2e.py:0 -> php_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> python_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> repo_path", + "CALLS tests/integration/test_node_label_e2e.py:0 -> rust_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> scala_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> status", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_module_implementation_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_module_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_union_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_go_creates_class_nodes_for_structs", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_go_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_go_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_java_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_java_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_java_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_javascript_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_javascript_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_language_has_defines", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_lua_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_defines_relationships", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_inherits_relationships", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_method_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_class_nodes_for_structs", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_enum_nodes_for_enums", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_interface_nodes_for_traits", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_scala_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_scala_creates_interface_nodes_for_traits", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_type_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> type", + "CALLS tests/integration/test_node_label_e2e.py:0 -> typescript_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> updater", + "CALLS tests/integration/test_node_label_e2e.py:0 -> value", + "CALLS tests/integration/test_shell_command_integration.py:0 -> TestPipedCommandIntegration", + "CALLS tests/integration/test_shell_command_integration.py:0 -> TestShellCommandErrorHandling", + "CALLS tests/integration/test_shell_command_integration.py:0 -> TestShellCommandGitIntegration", + "CALLS tests/integration/test_shell_command_integration.py:0 -> TestShellCommandIntegration", + "CALLS tests/integration/test_shell_command_integration.py:0 -> TestShellCommandToolIntegration", + "CALLS tests/integration/test_shell_command_integration.py:0 -> anyio_backend", + "CALLS tests/integration/test_shell_command_integration.py:0 -> name", + "CALLS tests/integration/test_shell_command_integration.py:0 -> shell_commander", + "CALLS tests/integration/test_shell_command_integration.py:0 -> status", + "CALLS tests/integration/test_shell_command_integration.py:0 -> temp_test_repo", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_and_operator", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cat_pipe_cut", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cat_pipe_rg", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cat_reads_file_content", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_command_with_nonexistent_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cp_copies_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_disallowed_command_in_pipe_rejected", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_echo_outputs_text", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_echo_pipe_wc", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_find_locates_files", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_find_pipe_rg_pipe_wc", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_find_pipe_wc", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_git_init_and_status", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_git_status_without_repo", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_invalid_command_arguments", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_lists_files", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_pipe_head", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_pipe_sort", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_with_flags", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_mkdir_creates_directory", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_mv_moves_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_pwd_shows_working_directory", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_rg_searches_content", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_rm_removes_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_semicolon_operator", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_subshell_rejected", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_tool_executes_read_only_command_without_approval", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_tool_executes_write_command_with_approval", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_tool_requires_approval_for_write_command", + "CALLS tests/integration/test_tool_calling.py:0 -> TestToolCallingIntegration", + "CALLS tests/integration/test_tool_calling.py:0 -> active_orchestrator_config", + "CALLS tests/integration/test_tool_calling.py:0 -> agent", + "CALLS tests/integration/test_tool_calling.py:0 -> list_directory", + "CALLS tests/integration/test_tool_calling.py:0 -> main", + "CALLS tests/integration/test_tool_calling.py:0 -> name", + "CALLS tests/integration/test_tool_calling.py:0 -> query_graph", + "CALLS tests/integration/test_tool_calling.py:0 -> rag_agent", + "CALLS tests/integration/test_tool_calling.py:0 -> read", + "CALLS tests/integration/test_tool_calling.py:0 -> read_file", + "CALLS tests/integration/test_tool_calling.py:0 -> relationships", + "CALLS tests/integration/test_tool_calling.py:0 -> semantic_search", + "CALLS tests/integration/test_tool_calling.py:0 -> test_hybrid_search_completes", + "CALLS tests/integration/test_tool_calling.py:0 -> test_parallel_tool_calls_all_execute", + "CALLS tests/integration/test_tool_calling.py:0 -> tracker", + "CALLS tests/integration/test_tool_calling.py:0 -> tracking_tools", + "CALLS tests/test_absolute_path.py:0 -> NodeLabel", + "CALLS tests/test_absolute_path.py:0 -> SupportedLanguage", + "CALLS tests/test_absolute_path.py:0 -> TestAbsolutePathOnNodes", + "CALLS tests/test_absolute_path.py:0 -> TestCppModuleAbsolutePath", + "CALLS tests/test_absolute_path.py:0 -> TestTypeScriptAbsolutePath", + "CALLS tests/test_absolute_path.py:0 -> add", + "CALLS tests/test_absolute_path.py:0 -> cpp_module_project", + "CALLS tests/test_absolute_path.py:0 -> export", + "CALLS tests/test_absolute_path.py:0 -> mock_ingestor", + "CALLS tests/test_absolute_path.py:0 -> my_method", + "CALLS tests/test_absolute_path.py:0 -> nodes", + "CALLS tests/test_absolute_path.py:0 -> parsers_and_queries", + "CALLS tests/test_absolute_path.py:0 -> python_project", + "CALLS tests/test_absolute_path.py:0 -> temp_repo", + "CALLS tests/test_absolute_path.py:0 -> test_absolute_path_is_posix_format", + "CALLS tests/test_absolute_path.py:0 -> test_absolute_path_matches_resolved_file", + "CALLS tests/test_absolute_path.py:0 -> test_class_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_enum_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_file_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_folder_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_function_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_interface_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_method_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_module_implementation_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_module_interface_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_module_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_package_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_project_node_has_no_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> ts_project", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> RelationshipType", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> TestAbstractMethodOverrideResolution", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> engine", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> execute_write", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> fetch_all", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> flush_all", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> graph_updater", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> mixin", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> name", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> parse", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> read", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> repo_path", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> test_abstract_stub_is_not_the_call_target", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> test_self_call_resolves_to_concrete_sibling_not_abstract_stub", + "CALLS tests/test_anthropic_token_counter.py:0 -> semantic_search", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_injects_placeholder_when_only_system_prompt_present", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_retry_prompt_with_tool_name_becomes_tool_result_error_block", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_retry_prompt_without_tool_name_becomes_text_block", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_returns_zero_when_no_messages_and_no_system_prompt", + "CALLS tests/test_anthropic_token_counter.py:0 -> text", + "CALLS tests/test_anthropic_token_counter.py:0 -> type", + "CALLS tests/test_build_binary.py:0 -> TestBuildPackageArgs", + "CALLS tests/test_build_binary.py:0 -> TestGetTreesitterPackages", + "CALLS tests/test_build_binary.py:0 -> load", + "CALLS tests/test_build_binary.py:0 -> name", + "CALLS tests/test_build_binary.py:0 -> test_all_options_combined", + "CALLS tests/test_build_binary.py:0 -> test_collect_all_only", + "CALLS tests/test_build_binary.py:0 -> test_collect_data_only", + "CALLS tests/test_build_binary.py:0 -> test_extracts_treesitter_packages_from_pyproject", + "CALLS tests/test_build_binary.py:0 -> test_filters_non_treesitter_packages", + "CALLS tests/test_build_binary.py:0 -> test_handles_different_version_specifiers", + "CALLS tests/test_build_binary.py:0 -> test_hidden_import_only", + "CALLS tests/test_build_binary.py:0 -> test_no_options_returns_empty_list", + "CALLS tests/test_build_binary.py:0 -> test_returns_empty_list_when_no_optional_dependencies", + "CALLS tests/test_build_binary.py:0 -> test_returns_empty_list_when_no_treesitter_extra", + "CALLS tests/test_c_language.py:0 -> Color", + "CALLS tests/test_c_language.py:0 -> NodeLabel", + "CALLS tests/test_c_language.py:0 -> RelationshipType", + "CALLS tests/test_c_language.py:0 -> TestCCallsRelationships", + "CALLS tests/test_c_language.py:0 -> TestCDefinesRelationships", + "CALLS tests/test_c_language.py:0 -> TestCEnumNodes", + "CALLS tests/test_c_language.py:0 -> TestCFileAndModuleNodes", + "CALLS tests/test_c_language.py:0 -> TestCFunctionNodes", + "CALLS tests/test_c_language.py:0 -> TestCImportsRelationships", + "CALLS tests/test_c_language.py:0 -> TestCPackageDetection", + "CALLS tests/test_c_language.py:0 -> TestCQualifiedNames", + "CALLS tests/test_c_language.py:0 -> TestCStructNodes", + "CALLS tests/test_c_language.py:0 -> TestCUnionNodes", + "CALLS tests/test_c_language.py:0 -> add", + "CALLS tests/test_c_language.py:0 -> c_project", + "CALLS tests/test_c_language.py:0 -> c_subdir_project", + "CALLS tests/test_c_language.py:0 -> main", + "CALLS tests/test_c_language.py:0 -> mock_ingestor", + "CALLS tests/test_c_language.py:0 -> name", + "CALLS tests/test_c_language.py:0 -> run", + "CALLS tests/test_c_language.py:0 -> temp_repo", + "CALLS tests/test_c_language.py:0 -> test_c_file_nodes_created", + "CALLS tests/test_c_language.py:0 -> test_c_module_nodes_created", + "CALLS tests/test_c_language.py:0 -> test_cmakelists_creates_package", + "CALLS tests/test_c_language.py:0 -> test_enum_detected", + "CALLS tests/test_c_language.py:0 -> test_function_call_detected", + "CALLS tests/test_c_language.py:0 -> test_function_qualified_name_format", + "CALLS tests/test_c_language.py:0 -> test_function_qualified_name_has_project", + "CALLS tests/test_c_language.py:0 -> test_function_with_parameters", + "CALLS tests/test_c_language.py:0 -> test_header_file_node_created", + "CALLS tests/test_c_language.py:0 -> test_include_creates_external_module", + "CALLS tests/test_c_language.py:0 -> test_include_utils_h_module_exists", + "CALLS tests/test_c_language.py:0 -> test_main_calls_greet", + "CALLS tests/test_c_language.py:0 -> test_main_function_detected", + "CALLS tests/test_c_language.py:0 -> test_main_module_defines_add", + "CALLS tests/test_c_language.py:0 -> test_makefile_creates_package", + "CALLS tests/test_c_language.py:0 -> test_module_defines_functions", + "CALLS tests/test_c_language.py:0 -> test_multiple_calls_from_main", + "CALLS tests/test_c_language.py:0 -> test_pointer_return_function_detected", + "CALLS tests/test_c_language.py:0 -> test_simple_function_detected", + "CALLS tests/test_c_language.py:0 -> test_struct_detected", + "CALLS tests/test_c_language.py:0 -> test_struct_has_qualified_name", + "CALLS tests/test_c_language.py:0 -> test_subdirectory_with_makefile_is_package", + "CALLS tests/test_c_language.py:0 -> test_union_detected", + "CALLS tests/test_c_language.py:0 -> test_void_function_detected", + "CALLS tests/test_call_processor.py:0 -> CallProcessor", + "CALLS tests/test_call_processor.py:0 -> LanguageQueries", + "CALLS tests/test_call_processor.py:0 -> NodeLabel", + "CALLS tests/test_call_processor.py:0 -> NodeType", + "CALLS tests/test_call_processor.py:0 -> SupportedLanguage", + "CALLS tests/test_call_processor.py:0 -> TestBuildNestedQualifiedName", + "CALLS tests/test_call_processor.py:0 -> TestCalculateImportDistance", + "CALLS tests/test_call_processor.py:0 -> TestCallProcessorSlots", + "CALLS tests/test_call_processor.py:0 -> TestCollectAllCallNodes", + "CALLS tests/test_call_processor.py:0 -> TestCombinedQueryCompilationExceptionPaths", + "CALLS tests/test_call_processor.py:0 -> TestFilterCallsInNode", + "CALLS tests/test_call_processor.py:0 -> TestGetCallTargetName", + "CALLS tests/test_call_processor.py:0 -> TestGetIifeTargetName", + "CALLS tests/test_call_processor.py:0 -> TestGetNodeName", + "CALLS tests/test_call_processor.py:0 -> TestGetRustImplClassName", + "CALLS tests/test_call_processor.py:0 -> TestIngestFunctionCallsWithoutCallNodes", + "CALLS tests/test_call_processor.py:0 -> TestIsMethod", + "CALLS tests/test_call_processor.py:0 -> TestIsMethodChain", + "CALLS tests/test_call_processor.py:0 -> TestProcessCallsInClassesWithoutCombined", + "CALLS tests/test_call_processor.py:0 -> TestProcessCallsInFileErrorHandling", + "CALLS tests/test_call_processor.py:0 -> TestProcessCallsInFileWithoutCache", + "CALLS tests/test_call_processor.py:0 -> TestProcessCallsInFunctionsWithoutCombined", + "CALLS tests/test_call_processor.py:0 -> TestProcessMethodsInClassWithoutSortedFuncNodes", + "CALLS tests/test_call_processor.py:0 -> TestResolveBuiltinCall", + "CALLS tests/test_call_processor.py:0 -> TestResolveChainedCall", + "CALLS tests/test_call_processor.py:0 -> TestResolveClassQnFromType", + "CALLS tests/test_call_processor.py:0 -> TestResolveCppOperatorCall", + "CALLS tests/test_call_processor.py:0 -> TestResolveFunctionCall", + "CALLS tests/test_call_processor.py:0 -> TestResolveInheritedMethod", + "CALLS tests/test_call_processor.py:0 -> TestResolveSuperCall", + "CALLS tests/test_call_processor.py:0 -> TestTryResolveMethod", + "CALLS tests/test_call_processor.py:0 -> call_processor", + "CALLS tests/test_call_processor.py:0 -> children", + "CALLS tests/test_call_processor.py:0 -> factory", + "CALLS tests/test_call_processor.py:0 -> graph_updater", + "CALLS tests/test_call_processor.py:0 -> import_processor", + "CALLS tests/test_call_processor.py:0 -> keys", + "CALLS tests/test_call_processor.py:0 -> mock_ingestor", + "CALLS tests/test_call_processor.py:0 -> my_method", + "CALLS tests/test_call_processor.py:0 -> name", + "CALLS tests/test_call_processor.py:0 -> parsers_and_queries", + "CALLS tests/test_call_processor.py:0 -> patched_query", + "CALLS tests/test_call_processor.py:0 -> processor", + "CALLS tests/test_call_processor.py:0 -> processor_with_imports", + "CALLS tests/test_call_processor.py:0 -> processor_with_inheritance", + "CALLS tests/test_call_processor.py:0 -> processor_with_methods", + "CALLS tests/test_call_processor.py:0 -> processor_with_registry", + "CALLS tests/test_call_processor.py:0 -> processor_with_types", + "CALLS tests/test_call_processor.py:0 -> repo_path", + "CALLS tests/test_call_processor.py:0 -> save", + "CALLS tests/test_call_processor.py:0 -> temp_repo", + "CALLS tests/test_call_processor.py:0 -> test_apply_method", + "CALLS tests/test_call_processor.py:0 -> test_arrow_function_iife", + "CALLS tests/test_call_processor.py:0 -> test_attribute_call", + "CALLS tests/test_call_processor.py:0 -> test_bind_method", + "CALLS tests/test_call_processor.py:0 -> test_builtin_operator_equal", + "CALLS tests/test_call_processor.py:0 -> test_builtin_operator_plus", + "CALLS tests/test_call_processor.py:0 -> test_call_method", + "CALLS tests/test_call_processor.py:0 -> test_chained_attribute_call", + "CALLS tests/test_call_processor.py:0 -> test_chained_calls_is_chain", + "CALLS tests/test_call_processor.py:0 -> test_combined_func_class_query_exception_sets_none", + "CALLS tests/test_call_processor.py:0 -> test_common_prefix_reduces_distance", + "CALLS tests/test_call_processor.py:0 -> test_continues_after_error_in_single_file", + "CALLS tests/test_call_processor.py:0 -> test_cpp_binary_expression_minus", + "CALLS tests/test_call_processor.py:0 -> test_cpp_binary_expression_plus", + "CALLS tests/test_call_processor.py:0 -> test_cpp_unary_expression", + "CALLS tests/test_call_processor.py:0 -> test_cpp_update_expression", + "CALLS tests/test_call_processor.py:0 -> test_custom_operator_from_registry", + "CALLS tests/test_call_processor.py:0 -> test_distant_module_higher_distance", + "CALLS tests/test_call_processor.py:0 -> test_empty_string_not_chain", + "CALLS tests/test_call_processor.py:0 -> test_fallback_to_local_resolution", + "CALLS tests/test_call_processor.py:0 -> test_filters_calls_within_container", + "CALLS tests/test_call_processor.py:0 -> test_finds_method_in_grandparent", + "CALLS tests/test_call_processor.py:0 -> test_finds_method_in_parent", + "CALLS tests/test_call_processor.py:0 -> test_function_expression_iife", + "CALLS tests/test_call_processor.py:0 -> test_function_in_class_is_method", + "CALLS tests/test_call_processor.py:0 -> test_gets_field_by_custom_field_name", + "CALLS tests/test_call_processor.py:0 -> test_gets_name_from_class_def", + "CALLS tests/test_call_processor.py:0 -> test_gets_name_from_function_def", + "CALLS tests/test_call_processor.py:0 -> test_has_slots", + "CALLS tests/test_call_processor.py:0 -> test_identifier_call", + "CALLS tests/test_call_processor.py:0 -> test_iife_function_resolution", + "CALLS tests/test_call_processor.py:0 -> test_java_chained_method_invocation", + "CALLS tests/test_call_processor.py:0 -> test_java_method_invocation_with_object", + "CALLS tests/test_call_processor.py:0 -> test_java_method_invocation_without_object", + "CALLS tests/test_call_processor.py:0 -> test_js_builtin_pattern_json_parse", + "CALLS tests/test_call_processor.py:0 -> test_js_builtin_pattern_object_keys", + "CALLS tests/test_call_processor.py:0 -> test_logs_error_on_processing_failure", + "CALLS tests/test_call_processor.py:0 -> test_member_expression_js", + "CALLS tests/test_call_processor.py:0 -> test_method_not_found_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_method_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_method_with_parens_is_chain", + "CALLS tests/test_call_processor.py:0 -> test_nested_function", + "CALLS tests/test_call_processor.py:0 -> test_nested_function_is_not_method", + "CALLS tests/test_call_processor.py:0 -> test_no_dots_not_chain", + "CALLS tests/test_call_processor.py:0 -> test_no_function_child_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_no_instance_dict", + "CALLS tests/test_call_processor.py:0 -> test_non_builtin_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_non_iife_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_non_operator_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_process_calls_with_empty_combined_captures", + "CALLS tests/test_call_processor.py:0 -> test_process_calls_without_func_class_captures_cache", + "CALLS tests/test_call_processor.py:0 -> test_prototype_apply", + "CALLS tests/test_call_processor.py:0 -> test_prototype_call", + "CALLS tests/test_call_processor.py:0 -> test_rejects_arbitrary_attribute", + "CALLS tests/test_call_processor.py:0 -> test_resolves_direct_method", + "CALLS tests/test_call_processor.py:0 -> test_resolves_from_import_map", + "CALLS tests/test_call_processor.py:0 -> test_resolves_imported_function", + "CALLS tests/test_call_processor.py:0 -> test_resolves_inherited_method", + "CALLS tests/test_call_processor.py:0 -> test_resolves_local_variable_method_call", + "CALLS tests/test_call_processor.py:0 -> test_resolves_method_on_imported_class", + "CALLS tests/test_call_processor.py:0 -> test_resolves_same_module_function", + "CALLS tests/test_call_processor.py:0 -> test_returns_call_nodes_for_code_with_calls", + "CALLS tests/test_call_processor.py:0 -> test_returns_dotted_type_as_is", + "CALLS tests/test_call_processor.py:0 -> test_returns_empty_when_no_calls_query", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_chain_without_type_info", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_no_name", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_non_chained_expression", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_unknown_function", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_unknown_method", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_unresolvable_chain", + "CALLS tests/test_call_processor.py:0 -> test_rust_impl_fallback_to_children", + "CALLS tests/test_call_processor.py:0 -> test_same_module_distance_zero", + "CALLS tests/test_call_processor.py:0 -> test_sibling_module_distance_one", + "CALLS tests/test_call_processor.py:0 -> test_simple_method_not_chain", + "CALLS tests/test_call_processor.py:0 -> test_slot_attributes_accessible", + "CALLS tests/test_call_processor.py:0 -> test_super_calls_constructor", + "CALLS tests/test_call_processor.py:0 -> test_super_dot_method", + "CALLS tests/test_call_processor.py:0 -> test_super_inherited_from_grandparent", + "CALLS tests/test_call_processor.py:0 -> test_super_method_not_found_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_super_no_class_context_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_super_unknown_class_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_top_level_function", + "CALLS tests/test_call_processor.py:0 -> test_top_level_function_is_not_method", + "CALLS tests/test_call_processor.py:0 -> test_unknown_class_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_unknown_method_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_without_call_nodes", + "CALLS tests/test_call_processor.py:0 -> test_without_call_nodes_and_no_query", + "CALLS tests/test_call_processor.py:0 -> test_without_combined_captures", + "CALLS tests/test_call_processor.py:0 -> test_without_combined_captures_no_functions", + "CALLS tests/test_call_processor.py:0 -> test_without_sorted_func_nodes", + "CALLS tests/test_call_processor.py:0 -> text", + "CALLS tests/test_call_processor.py:0 -> type", + "CALLS tests/test_call_processor.py:0 -> updater", + "CALLS tests/test_call_processor_integration.py:0 -> LanguageQueries", + "CALLS tests/test_call_processor_integration.py:0 -> NodeLabel", + "CALLS tests/test_call_processor_integration.py:0 -> RelationshipType", + "CALLS tests/test_call_processor_integration.py:0 -> SupportedLanguage", + "CALLS tests/test_call_processor_integration.py:0 -> TestModuleCallsClassFiltered", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsEdgeCases", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsInFileCpp", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsInFileJava", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsInFileJavaScript", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsInFilePython", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsInFileRust", + "CALLS tests/test_call_processor_integration.py:0 -> TestProcessCallsInFileTypeScript", + "CALLS tests/test_call_processor_integration.py:0 -> add", + "CALLS tests/test_call_processor_integration.py:0 -> ensure_relationship_batch", + "CALLS tests/test_call_processor_integration.py:0 -> factory", + "CALLS tests/test_call_processor_integration.py:0 -> graph_updater", + "CALLS tests/test_call_processor_integration.py:0 -> keys", + "CALLS tests/test_call_processor_integration.py:0 -> main", + "CALLS tests/test_call_processor_integration.py:0 -> mock_ingestor", + "CALLS tests/test_call_processor_integration.py:0 -> name", + "CALLS tests/test_call_processor_integration.py:0 -> parse", + "CALLS tests/test_call_processor_integration.py:0 -> parsers_and_queries", + "CALLS tests/test_call_processor_integration.py:0 -> process", + "CALLS tests/test_call_processor_integration.py:0 -> repo_path", + "CALLS tests/test_call_processor_integration.py:0 -> temp_repo", + "CALLS tests/test_call_processor_integration.py:0 -> test_function_does_not_call_class_python", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_chained_method_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_empty_file", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_file_with_only_imports", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_init_py_module_qn", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_nested_function_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_module_does_not_call_class_python", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_builtin_calls_js", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_class_method_calls_ts", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_cpp", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_in_file", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_js", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_rust", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_ts", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_impl_method_calls_rust", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_imported_function_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_calls_cpp", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_calls_in_class", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_calls_js", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_invocation_java", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_module_level_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_same_class_method_calls_java", + "CALLS tests/test_call_processor_integration.py:0 -> updater", + "CALLS tests/test_call_processor_integration.py:0 -> value", + "CALLS tests/test_call_resolver.py:0 -> CallProcessor", + "CALLS tests/test_call_resolver.py:0 -> NodeType", + "CALLS tests/test_call_resolver.py:0 -> RelationshipType", + "CALLS tests/test_call_resolver.py:0 -> SupportedLanguage", + "CALLS tests/test_call_resolver.py:0 -> TestCalculateImportDistance", + "CALLS tests/test_call_resolver.py:0 -> TestChainedMethodPattern", + "CALLS tests/test_call_resolver.py:0 -> TestDequeBfs", + "CALLS tests/test_call_resolver.py:0 -> TestDeterministicFileOrder", + "CALLS tests/test_call_resolver.py:0 -> TestDeterministicResolution", + "CALLS tests/test_call_resolver.py:0 -> TestGetSeparator", + "CALLS tests/test_call_resolver.py:0 -> TestHasSeparator", + "CALLS tests/test_call_resolver.py:0 -> TestIsMethodChain", + "CALLS tests/test_call_resolver.py:0 -> TestIsSuperCall", + "CALLS tests/test_call_resolver.py:0 -> TestResolveChainedCall", + "CALLS tests/test_call_resolver.py:0 -> TestResolveClassQnFromType", + "CALLS tests/test_call_resolver.py:0 -> TestResolveFunctionCallIntegration", + "CALLS tests/test_call_resolver.py:0 -> TestResolveImportedClassQn", + "CALLS tests/test_call_resolver.py:0 -> TestResolveInheritedMethod", + "CALLS tests/test_call_resolver.py:0 -> TestResolveJavaMethodCall", + "CALLS tests/test_call_resolver.py:0 -> TestResolveMultiPartCall", + "CALLS tests/test_call_resolver.py:0 -> TestResolveRustClassQn", + "CALLS tests/test_call_resolver.py:0 -> TestResolveSelfAttributeCall", + "CALLS tests/test_call_resolver.py:0 -> TestResolveSuperCall", + "CALLS tests/test_call_resolver.py:0 -> TestResolveTwoPartCall", + "CALLS tests/test_call_resolver.py:0 -> TestSeparatorPattern", + "CALLS tests/test_call_resolver.py:0 -> TestTryMethodOnClass", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveDirectImport", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveIife", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveModuleMethod", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveQualifiedCall", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveSameModule", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveViaImport", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveViaImports", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveViaLocalType", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveViaTrie", + "CALLS tests/test_call_resolver.py:0 -> TestTryResolveWildcardImports", + "CALLS tests/test_call_resolver.py:0 -> TestTryWildcardQns", + "CALLS tests/test_call_resolver.py:0 -> call_processor", + "CALLS tests/test_call_resolver.py:0 -> call_resolver", + "CALLS tests/test_call_resolver.py:0 -> children", + "CALLS tests/test_call_resolver.py:0 -> close", + "CALLS tests/test_call_resolver.py:0 -> ensure_relationship_batch", + "CALLS tests/test_call_resolver.py:0 -> execute", + "CALLS tests/test_call_resolver.py:0 -> factory", + "CALLS tests/test_call_resolver.py:0 -> find_ending_with", + "CALLS tests/test_call_resolver.py:0 -> find_with_prefix", + "CALLS tests/test_call_resolver.py:0 -> graph_updater", + "CALLS tests/test_call_resolver.py:0 -> handler", + "CALLS tests/test_call_resolver.py:0 -> import_processor", + "CALLS tests/test_call_resolver.py:0 -> insert", + "CALLS tests/test_call_resolver.py:0 -> is_abstract", + "CALLS tests/test_call_resolver.py:0 -> is_property", + "CALLS tests/test_call_resolver.py:0 -> main", + "CALLS tests/test_call_resolver.py:0 -> mark_abstract", + "CALLS tests/test_call_resolver.py:0 -> mark_property", + "CALLS tests/test_call_resolver.py:0 -> mock_ast_cache", + "CALLS tests/test_call_resolver.py:0 -> mock_function_registry", + "CALLS tests/test_call_resolver.py:0 -> mock_import_processor", + "CALLS tests/test_call_resolver.py:0 -> mock_ingestor", + "CALLS tests/test_call_resolver.py:0 -> mock_type_inference", + "CALLS tests/test_call_resolver.py:0 -> name", + "CALLS tests/test_call_resolver.py:0 -> process", + "CALLS tests/test_call_resolver.py:0 -> processor", + "CALLS tests/test_call_resolver.py:0 -> property_names", + "CALLS tests/test_call_resolver.py:0 -> python_type_inference", + "CALLS tests/test_call_resolver.py:0 -> register_unique_qn", + "CALLS tests/test_call_resolver.py:0 -> repo_path", + "CALLS tests/test_call_resolver.py:0 -> save", + "CALLS tests/test_call_resolver.py:0 -> start", + "CALLS tests/test_call_resolver.py:0 -> temp_repo", + "CALLS tests/test_call_resolver.py:0 -> test_bfs_finds_deep_ancestor_method", + "CALLS tests/test_call_resolver.py:0 -> test_bfs_no_infinite_loop_on_cycle", + "CALLS tests/test_call_resolver.py:0 -> test_bfs_order_prefers_closer_parent", + "CALLS tests/test_call_resolver.py:0 -> test_chained_calls_is_chain", + "CALLS tests/test_call_resolver.py:0 -> test_cpp_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_distant_module_higher_distance", + "CALLS tests/test_call_resolver.py:0 -> test_dot_separated_qn", + "CALLS tests/test_call_resolver.py:0 -> test_dot_separator", + "CALLS tests/test_call_resolver.py:0 -> test_double_colon_separated_qn", + "CALLS tests/test_call_resolver.py:0 -> test_double_colon_separator", + "CALLS tests/test_call_resolver.py:0 -> test_eligible_files_are_sorted", + "CALLS tests/test_call_resolver.py:0 -> test_empty_string_not_chain", + "CALLS tests/test_call_resolver.py:0 -> test_falls_back_to_class_name_resolution", + "CALLS tests/test_call_resolver.py:0 -> test_falls_back_to_trie", + "CALLS tests/test_call_resolver.py:0 -> test_finds_method_in_grandparent", + "CALLS tests/test_call_resolver.py:0 -> test_finds_method_in_parent", + "CALLS tests/test_call_resolver.py:0 -> test_go_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_graph_output_deterministic_across_runs", + "CALLS tests/test_call_resolver.py:0 -> test_handles_diamond_inheritance", + "CALLS tests/test_call_resolver.py:0 -> test_handles_qualified_call_name", + "CALLS tests/test_call_resolver.py:0 -> test_java_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_javascript_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_last_element_matches_function_name", + "CALLS tests/test_call_resolver.py:0 -> test_lua_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_matches_deeply_chained", + "CALLS tests/test_call_resolver.py:0 -> test_matches_final_method", + "CALLS tests/test_call_resolver.py:0 -> test_method_with_parens_is_chain", + "CALLS tests/test_call_resolver.py:0 -> test_no_dots_not_chain", + "CALLS tests/test_call_resolver.py:0 -> test_no_match_on_parenthesized_suffix", + "CALLS tests/test_call_resolver.py:0 -> test_no_separator", + "CALLS tests/test_call_resolver.py:0 -> test_no_separator_returns_single_element", + "CALLS tests/test_call_resolver.py:0 -> test_regular_call_not_super", + "CALLS tests/test_call_resolver.py:0 -> test_resolve_function_call_deterministic_across_runs", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_chained_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_direct_import", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_from_import_map", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_iife_arrow_prefix", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_iife_function_prefix", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_iife_priority", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_imported_class_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_imported_function", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_imported_multi_part_call", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_inherited_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_inherited_self_attribute_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_java_method_call", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_js_builtin_type", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_local_variable_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_method_on_class", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_module_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_module_method_fallback", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_rust_class_qn", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_same_module_function", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_self_attribute_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_static_method_via_import", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_super_call", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_via_local_type", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_via_trie_match", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_wildcard_import", + "CALLS tests/test_call_resolver.py:0 -> test_returns_class_qn_for_matching_method", + "CALLS tests/test_call_resolver.py:0 -> test_returns_colon_over_dot", + "CALLS tests/test_call_resolver.py:0 -> test_returns_dot_as_default", + "CALLS tests/test_call_resolver.py:0 -> test_returns_dot_for_no_separator", + "CALLS tests/test_call_resolver.py:0 -> test_returns_dotted_type_as_is", + "CALLS tests/test_call_resolver.py:0 -> test_returns_double_colon_first", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_empty_call_name", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_local_type", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_local_types", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_match", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_separator", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_wildcard_match", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_non_iife", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unimported", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_call", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_class", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_function", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_method", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_module", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_object", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unregistered_iife", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unregistered_import", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unresolvable_chain", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unresolved_java_call", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_without_class_context", + "CALLS tests/test_call_resolver.py:0 -> test_returns_original_for_no_match", + "CALLS tests/test_call_resolver.py:0 -> test_rust_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_same_module_distance_zero", + "CALLS tests/test_call_resolver.py:0 -> test_self_attribute_call", + "CALLS tests/test_call_resolver.py:0 -> test_sibling_module_distance_low", + "CALLS tests/test_call_resolver.py:0 -> test_simple_method_not_chain", + "CALLS tests/test_call_resolver.py:0 -> test_single_colon_separator", + "CALLS tests/test_call_resolver.py:0 -> test_skips_non_wildcard_imports", + "CALLS tests/test_call_resolver.py:0 -> test_splits_on_colon", + "CALLS tests/test_call_resolver.py:0 -> test_splits_on_dot", + "CALLS tests/test_call_resolver.py:0 -> test_splits_on_double_colon", + "CALLS tests/test_call_resolver.py:0 -> test_super_constructor_call", + "CALLS tests/test_call_resolver.py:0 -> test_super_dot_method", + "CALLS tests/test_call_resolver.py:0 -> test_super_keyword_alone", + "CALLS tests/test_call_resolver.py:0 -> test_super_method_call", + "CALLS tests/test_call_resolver.py:0 -> test_super_parens_method", + "CALLS tests/test_call_resolver.py:0 -> test_trie_many_candidates_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_trie_tiebreak_by_qualified_name", + "CALLS tests/test_call_resolver.py:0 -> test_trie_tiebreak_distance_still_wins", + "CALLS tests/test_call_resolver.py:0 -> test_trie_tiebreak_picks_lexicographic_first", + "CALLS tests/test_call_resolver.py:0 -> test_two_part_call", + "CALLS tests/test_call_resolver.py:0 -> test_typescript_deterministic", + "CALLS tests/test_call_resolver.py:0 -> type_inference", + "CALLS tests/test_call_resolver.py:0 -> updater", + "CALLS tests/test_call_resolver.py:0 -> variants", + "CALLS tests/test_callable_field_calls.py:0 -> FQNSpec", + "CALLS tests/test_callable_field_calls.py:0 -> RelationshipType", + "CALLS tests/test_callable_field_calls.py:0 -> TestCallableFieldCalls", + "CALLS tests/test_callable_field_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_callable_field_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_callable_field_calls.py:0 -> execute_write", + "CALLS tests/test_callable_field_calls.py:0 -> fetch_all", + "CALLS tests/test_callable_field_calls.py:0 -> flush_all", + "CALLS tests/test_callable_field_calls.py:0 -> get", + "CALLS tests/test_callable_field_calls.py:0 -> graph_updater", + "CALLS tests/test_callable_field_calls.py:0 -> name", + "CALLS tests/test_callable_field_calls.py:0 -> repo_path", + "CALLS tests/test_callable_field_calls.py:0 -> test_ambiguous_field_name_not_resolved", + "CALLS tests/test_callable_field_calls.py:0 -> test_resolves_to_all_bound_functions", + "CALLS tests/test_callable_field_calls.py:0 -> test_resolves_to_first_bound_function", + "CALLS tests/test_callable_field_calls.py:0 -> type", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> main", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> read_file", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> run", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_appends_synthetic_return_for_each_orphan_tool_call", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_ignores_non_tool_call_parts_in_response", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_noop_when_history_empty", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_noop_when_last_message_is_request", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_noop_when_response_has_no_tool_calls", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> text", + "CALLS tests/test_cgr_instructions.py:0 -> active_orchestrator_config", + "CALLS tests/test_cgr_instructions.py:0 -> agent", + "CALLS tests/test_cgr_instructions.py:0 -> create_model", + "CALLS tests/test_cgr_instructions.py:0 -> get_provider_from_config", + "CALLS tests/test_cgr_instructions.py:0 -> isolated_global", + "CALLS tests/test_cgr_instructions.py:0 -> mock_open", + "CALLS tests/test_cgr_instructions.py:0 -> mock_settings", + "CALLS tests/test_cgr_instructions.py:0 -> name", + "CALLS tests/test_cgr_instructions.py:0 -> project_root", + "CALLS tests/test_cgr_instructions.py:0 -> read", + "CALLS tests/test_cgr_instructions.py:0 -> repo", + "CALLS tests/test_cgr_instructions.py:0 -> temp_repo", + "CALLS tests/test_cgr_instructions.py:0 -> test_create_rag_orchestrator_reads_global_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_create_rag_orchestrator_reads_project_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_create_rag_orchestrator_skips_instructions_when_disabled", + "CALLS tests/test_cgr_instructions.py:0 -> test_loads_global_only_when_repo_path_none", + "CALLS tests/test_cgr_instructions.py:0 -> test_loads_instructions_when_repo_file_present", + "CALLS tests/test_cgr_instructions.py:0 -> test_merges_global_and_repo", + "CALLS tests/test_cgr_instructions.py:0 -> test_orchestrator_prompt_appends_project_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_orchestrator_prompt_unchanged_without_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_returns_none_on_read_error", + "CALLS tests/test_cgr_instructions.py:0 -> test_returns_none_when_file_empty", + "CALLS tests/test_cgr_instructions.py:0 -> test_returns_none_when_no_file", + "CALLS tests/test_cgr_shim.py:0 -> CypherGenerator", + "CALLS tests/test_cgr_shim.py:0 -> GraphLoader", + "CALLS tests/test_cgr_shim.py:0 -> MemgraphIngestor", + "CALLS tests/test_cgr_shim.py:0 -> TestCgrShimExports", + "CALLS tests/test_cgr_shim.py:0 -> embed_code", + "CALLS tests/test_cgr_shim.py:0 -> graph_service", + "CALLS tests/test_cgr_shim.py:0 -> load_graph", + "CALLS tests/test_cgr_shim.py:0 -> name", + "CALLS tests/test_cgr_shim.py:0 -> test_all_matches_module_exports", + "CALLS tests/test_cgr_shim.py:0 -> test_all_symbols_importable", + "CALLS tests/test_cgr_shim.py:0 -> test_cypher_generator_is_canonical_class", + "CALLS tests/test_cgr_shim.py:0 -> test_embed_code_is_canonical_function", + "CALLS tests/test_cgr_shim.py:0 -> test_graph_loader_is_canonical_class", + "CALLS tests/test_cgr_shim.py:0 -> test_load_graph_is_canonical_function", + "CALLS tests/test_cgr_shim.py:0 -> test_memgraph_ingestor_is_canonical_class", + "CALLS tests/test_cgr_shim.py:0 -> test_settings_is_canonical_instance", + "CALLS tests/test_cgr_state_and_status.py:0 -> StackManager", + "CALLS tests/test_cgr_state_and_status.py:0 -> StackState", + "CALLS tests/test_cgr_state_and_status.py:0 -> TestRecordSync", + "CALLS tests/test_cgr_state_and_status.py:0 -> TestStatusCommand", + "CALLS tests/test_cgr_state_and_status.py:0 -> TestStopCommand", + "CALLS tests/test_cgr_state_and_status.py:0 -> _temp_home", + "CALLS tests/test_cgr_state_and_status.py:0 -> cli", + "CALLS tests/test_cgr_state_and_status.py:0 -> compose_file", + "CALLS tests/test_cgr_state_and_status.py:0 -> down", + "CALLS tests/test_cgr_state_and_status.py:0 -> runner", + "CALLS tests/test_cgr_state_and_status.py:0 -> status", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_read_when_no_state_returns_empty", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_record_sync_creates_file", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_record_sync_multiple_projects", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_record_sync_updates_existing", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_status_lists_recorded_projects", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_status_runs_clean", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_stop_invokes_daemon_down", + "CALLS tests/test_cgrignore.py:0 -> GraphUpdater", + "CALLS tests/test_cgrignore.py:0 -> ProtobufFileIngestor", + "CALLS tests/test_cgrignore.py:0 -> TestCgrignoreIntegration", + "CALLS tests/test_cgrignore.py:0 -> TestCgrignoreLoadedWithoutInteractiveSetup", + "CALLS tests/test_cgrignore.py:0 -> TestNegationIntegration", + "CALLS tests/test_cgrignore.py:0 -> TestNegationSyntax", + "CALLS tests/test_cgrignore.py:0 -> cli", + "CALLS tests/test_cgrignore.py:0 -> connect_memgraph", + "CALLS tests/test_cgrignore.py:0 -> index", + "CALLS tests/test_cgrignore.py:0 -> load_parsers", + "CALLS tests/test_cgrignore.py:0 -> main", + "CALLS tests/test_cgrignore.py:0 -> mock_ingestor", + "CALLS tests/test_cgrignore.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cgrignore.py:0 -> mock_open", + "CALLS tests/test_cgrignore.py:0 -> name", + "CALLS tests/test_cgrignore.py:0 -> read", + "CALLS tests/test_cgrignore.py:0 -> repo", + "CALLS tests/test_cgrignore.py:0 -> runner", + "CALLS tests/test_cgrignore.py:0 -> start", + "CALLS tests/test_cgrignore.py:0 -> temp_repo", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_alone_triggers_prompt", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_deduplicates_with_detected", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_merged_with_cli_excludes", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_only_returns_without_prompt_when_empty", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_patterns_included_in_candidates", + "CALLS tests/test_cgrignore.py:0 -> test_handles_duplicates", + "CALLS tests/test_cgrignore.py:0 -> test_ignores_comments_and_blank_lines", + "CALLS tests/test_cgrignore.py:0 -> test_index_loads_cgrignore_without_interactive_setup", + "CALLS tests/test_cgrignore.py:0 -> test_loads_exclude_patterns_from_file", + "CALLS tests/test_cgrignore.py:0 -> test_mixed_exclude_and_negation", + "CALLS tests/test_cgrignore.py:0 -> test_negation_strips_leading_whitespace", + "CALLS tests/test_cgrignore.py:0 -> test_negation_strips_whitespace_after_exclamation", + "CALLS tests/test_cgrignore.py:0 -> test_parses_negation_patterns", + "CALLS tests/test_cgrignore.py:0 -> test_returns_cgrignore_patterns_type", + "CALLS tests/test_cgrignore.py:0 -> test_returns_empty_if_cgrignore_is_a_directory", + "CALLS tests/test_cgrignore.py:0 -> test_returns_empty_on_read_error", + "CALLS tests/test_cgrignore.py:0 -> test_returns_empty_when_no_file", + "CALLS tests/test_cgrignore.py:0 -> test_start_does_not_prompt_without_interactive_setup", + "CALLS tests/test_cgrignore.py:0 -> test_start_loads_cgrignore_without_interactive_setup", + "CALLS tests/test_cgrignore.py:0 -> test_start_merges_cli_excludes_with_cgrignore", + "CALLS tests/test_cgrignore.py:0 -> test_strips_whitespace", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_included_when_user_selects_all", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_included_when_user_selects_none", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_merged_with_user_selection", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_only_returns_without_prompt", + "CALLS tests/test_chained_attribute_resolution.py:0 -> DefinitionProcessor", + "CALLS tests/test_chained_attribute_resolution.py:0 -> ProcessorFactory", + "CALLS tests/test_chained_attribute_resolution.py:0 -> RelationshipType", + "CALLS tests/test_chained_attribute_resolution.py:0 -> TestChainedAttributeResolution", + "CALLS tests/test_chained_attribute_resolution.py:0 -> definition_processor", + "CALLS tests/test_chained_attribute_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_chained_attribute_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_chained_attribute_resolution.py:0 -> execute_write", + "CALLS tests/test_chained_attribute_resolution.py:0 -> export", + "CALLS tests/test_chained_attribute_resolution.py:0 -> factory", + "CALLS tests/test_chained_attribute_resolution.py:0 -> fetch_all", + "CALLS tests/test_chained_attribute_resolution.py:0 -> flush_all", + "CALLS tests/test_chained_attribute_resolution.py:0 -> graph_updater", + "CALLS tests/test_chained_attribute_resolution.py:0 -> mixin", + "CALLS tests/test_chained_attribute_resolution.py:0 -> name", + "CALLS tests/test_chained_attribute_resolution.py:0 -> parent", + "CALLS tests/test_chained_attribute_resolution.py:0 -> process_all_method_overrides", + "CALLS tests/test_chained_attribute_resolution.py:0 -> repo_path", + "CALLS tests/test_chained_attribute_resolution.py:0 -> runner", + "CALLS tests/test_chained_attribute_resolution.py:0 -> test_does_not_resolve_to_module_level_function", + "CALLS tests/test_chained_attribute_resolution.py:0 -> test_three_level_chain_resolves_to_inherited_mixin_method", + "CALLS tests/test_check_no_docs.py:0 -> TestCheckFile", + "CALLS tests/test_check_no_docs.py:0 -> TestCheckModuleDocstring", + "CALLS tests/test_check_no_docs.py:0 -> TestFindCommentStart", + "CALLS tests/test_check_no_docs.py:0 -> TestHasAllowedMarker", + "CALLS tests/test_check_no_docs.py:0 -> name", + "CALLS tests/test_check_no_docs.py:0 -> test_comment_after_string_with_hash", + "CALLS tests/test_check_no_docs.py:0 -> test_comment_at_start", + "CALLS tests/test_check_no_docs.py:0 -> test_double_quote_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_empty_lines_before_code", + "CALLS tests/test_check_no_docs.py:0 -> test_empty_lines_before_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_empty_string", + "CALLS tests/test_check_no_docs.py:0 -> test_escaped_quote_in_string", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_allowed_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_disallowed_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_module_docstring_detected", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_no_comments", + "CALLS tests/test_check_no_docs.py:0 -> test_h_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_hash_in_double_quoted_string", + "CALLS tests/test_check_no_docs.py:0 -> test_hash_in_single_quoted_string", + "CALLS tests/test_check_no_docs.py:0 -> test_mixed_quotes", + "CALLS tests/test_check_no_docs.py:0 -> test_multiline_string_not_treated_as_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_multiple_errors_reported", + "CALLS tests/test_check_no_docs.py:0 -> test_multiple_strings", + "CALLS tests/test_check_no_docs.py:0 -> test_no_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_no_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_no_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_noqa_comment_allowed", + "CALLS tests/test_check_no_docs.py:0 -> test_noqa_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_partial_match_not_allowed", + "CALLS tests/test_check_no_docs.py:0 -> test_protoc_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_pyright_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_shebang_and_module_docstring_detected", + "CALLS tests/test_check_no_docs.py:0 -> test_shebang_then_code", + "CALLS tests/test_check_no_docs.py:0 -> test_shebang_then_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_simple_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_single_quote_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_ty_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_type_ignore_comment_allowed", + "CALLS tests/test_check_no_docs.py:0 -> test_type_marker", + "CALLS tests/test_check_no_docs.py:0 -> type", + "CALLS tests/test_class_ingest.py:0 -> NodeType", + "CALLS tests/test_class_ingest.py:0 -> TestExtractCppBaseClassName", + "CALLS tests/test_class_ingest.py:0 -> TestGetNodeTypeForInheritance", + "CALLS tests/test_class_ingest.py:0 -> TestIngestClassesAndMethodsWithoutCombinedCaptures", + "CALLS tests/test_class_ingest.py:0 -> TestIngestRustImplMethodsWithoutSortedFuncNodes", + "CALLS tests/test_class_ingest.py:0 -> TestResolveToQn", + "CALLS tests/test_class_ingest.py:0 -> abstract_class_project", + "CALLS tests/test_class_ingest.py:0 -> add", + "CALLS tests/test_class_ingest.py:0 -> circular_inheritance_project", + "CALLS tests/test_class_ingest.py:0 -> comments_only_project", + "CALLS tests/test_class_ingest.py:0 -> cpp_template_class_project", + "CALLS tests/test_class_ingest.py:0 -> deeply_nested_class_project", + "CALLS tests/test_class_ingest.py:0 -> definition_processor", + "CALLS tests/test_class_ingest.py:0 -> empty_file_project", + "CALLS tests/test_class_ingest.py:0 -> ensure_node_batch", + "CALLS tests/test_class_ingest.py:0 -> ensure_relationship_batch", + "CALLS tests/test_class_ingest.py:0 -> factory", + "CALLS tests/test_class_ingest.py:0 -> go_struct_project", + "CALLS tests/test_class_ingest.py:0 -> graph_updater", + "CALLS tests/test_class_ingest.py:0 -> import_processor", + "CALLS tests/test_class_ingest.py:0 -> index", + "CALLS tests/test_class_ingest.py:0 -> inline_module_project", + "CALLS tests/test_class_ingest.py:0 -> items", + "CALLS tests/test_class_ingest.py:0 -> java_interface_project", + "CALLS tests/test_class_ingest.py:0 -> js_class_expression_project", + "CALLS tests/test_class_ingest.py:0 -> main", + "CALLS tests/test_class_ingest.py:0 -> method_a", + "CALLS tests/test_class_ingest.py:0 -> method_b", + "CALLS tests/test_class_ingest.py:0 -> method_override_project", + "CALLS tests/test_class_ingest.py:0 -> mixin_instance", + "CALLS tests/test_class_ingest.py:0 -> mock_ingestor", + "CALLS tests/test_class_ingest.py:0 -> multiple_inheritance_project", + "CALLS tests/test_class_ingest.py:0 -> name", + "CALLS tests/test_class_ingest.py:0 -> nested_class_project", + "CALLS tests/test_class_ingest.py:0 -> nodes", + "CALLS tests/test_class_ingest.py:0 -> process", + "CALLS tests/test_class_ingest.py:0 -> python_class_project", + "CALLS tests/test_class_ingest.py:0 -> read", + "CALLS tests/test_class_ingest.py:0 -> relationships", + "CALLS tests/test_class_ingest.py:0 -> repo_path", + "CALLS tests/test_class_ingest.py:0 -> rust_impl_project", + "CALLS tests/test_class_ingest.py:0 -> special_characters_project", + "CALLS tests/test_class_ingest.py:0 -> temp_repo", + "CALLS tests/test_class_ingest.py:0 -> test_abstract_method_overrides", + "CALLS tests/test_class_ingest.py:0 -> test_circular_inheritance_does_not_crash", + "CALLS tests/test_class_ingest.py:0 -> test_classes_ingested_without_combined_captures", + "CALLS tests/test_class_ingest.py:0 -> test_comments_only_file_does_not_crash", + "CALLS tests/test_class_ingest.py:0 -> test_cpp_template_class_methods", + "CALLS tests/test_class_ingest.py:0 -> test_cpp_template_inheritance", + "CALLS tests/test_class_ingest.py:0 -> test_deeply_nested_classes_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_empty_file_does_not_crash", + "CALLS tests/test_class_ingest.py:0 -> test_extracts_last_namespace_component", + "CALLS tests/test_class_ingest.py:0 -> test_extracts_simple_class_name", + "CALLS tests/test_class_ingest.py:0 -> test_go_embedded_interface", + "CALLS tests/test_class_ingest.py:0 -> test_go_interface_nodes_created", + "CALLS tests/test_class_ingest.py:0 -> test_go_struct_methods_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_go_struct_nodes_created", + "CALLS tests/test_class_ingest.py:0 -> test_handles_deeply_nested_namespace", + "CALLS tests/test_class_ingest.py:0 -> test_handles_namespaced_template", + "CALLS tests/test_class_ingest.py:0 -> test_java_interface_nodes_created", + "CALLS tests/test_class_ingest.py:0 -> test_java_multiple_interface_implementation", + "CALLS tests/test_class_ingest.py:0 -> test_java_single_interface_implementation", + "CALLS tests/test_class_ingest.py:0 -> test_js_class_expression_inheritance", + "CALLS tests/test_class_ingest.py:0 -> test_js_class_expression_methods", + "CALLS tests/test_class_ingest.py:0 -> test_method_override_chain", + "CALLS tests/test_class_ingest.py:0 -> test_method_override_skips_non_overriding_methods", + "CALLS tests/test_class_ingest.py:0 -> test_multiple_inheritance_creates_all_relationships", + "CALLS tests/test_class_ingest.py:0 -> test_nested_class_method_qualified_names", + "CALLS tests/test_class_ingest.py:0 -> test_nested_class_qualified_names", + "CALLS tests/test_class_ingest.py:0 -> test_non_abstract_method_override", + "CALLS tests/test_class_ingest.py:0 -> test_resolves_imported_name", + "CALLS tests/test_class_ingest.py:0 -> test_returns_class_for_known_class", + "CALLS tests/test_class_ingest.py:0 -> test_returns_class_for_unknown", + "CALLS tests/test_class_ingest.py:0 -> test_returns_interface_for_known_interface", + "CALLS tests/test_class_ingest.py:0 -> test_returns_qualified_name_for_unknown", + "CALLS tests/test_class_ingest.py:0 -> test_rust_impl_method_calls", + "CALLS tests/test_class_ingest.py:0 -> test_rust_impl_methods_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_rust_impl_methods_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_rust_inline_modules_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_special_character_names_are_handled", + "CALLS tests/test_class_ingest.py:0 -> test_strips_nested_template_parameters", + "CALLS tests/test_class_ingest.py:0 -> test_strips_template_parameters", + "CALLS tests/test_class_ingest.py:0 -> test_typescript_interface_implementation", + "CALLS tests/test_class_ingest.py:0 -> test_typescript_mixin_inheritance", + "CALLS tests/test_class_ingest.py:0 -> test_uses_module_qn_as_prefix", + "CALLS tests/test_class_ingest.py:0 -> type", + "CALLS tests/test_class_ingest.py:0 -> typescript_mixin_project", + "CALLS tests/test_class_ingest.py:0 -> up", + "CALLS tests/test_class_ingest.py:0 -> updater", + "CALLS tests/test_class_ingest.py:0 -> value", + "CALLS tests/test_classless_constructor_calls.py:0 -> NodeLabel", + "CALLS tests/test_classless_constructor_calls.py:0 -> RelationshipType", + "CALLS tests/test_classless_constructor_calls.py:0 -> TestConstructionEdges", + "CALLS tests/test_classless_constructor_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_classless_constructor_calls.py:0 -> mock_ingestor", + "CALLS tests/test_classless_constructor_calls.py:0 -> temp_repo", + "CALLS tests/test_classless_constructor_calls.py:0 -> test_class_with_init_emits_both_instantiates_and_init_call", + "CALLS tests/test_classless_constructor_calls.py:0 -> test_dataclass_construction_emits_instantiates_not_calls", + "CALLS tests/test_classless_constructor_calls.py:0 -> type", + "CALLS tests/test_cli_autosync.py:0 -> _run_graph_sync", + "CALLS tests/test_cli_autosync.py:0 -> _update_and_validate_models", + "CALLS tests/test_cli_autosync.py:0 -> agent", + "CALLS tests/test_cli_autosync.py:0 -> cli", + "CALLS tests/test_cli_autosync.py:0 -> connect_memgraph", + "CALLS tests/test_cli_autosync.py:0 -> main_async", + "CALLS tests/test_cli_autosync.py:0 -> main_single_query", + "CALLS tests/test_cli_autosync.py:0 -> mock_agent_loops", + "CALLS tests/test_cli_autosync.py:0 -> mock_ingestor", + "CALLS tests/test_cli_autosync.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_autosync.py:0 -> mock_sync_path", + "CALLS tests/test_cli_autosync.py:0 -> mock_validate_models", + "CALLS tests/test_cli_autosync.py:0 -> name", + "CALLS tests/test_cli_autosync.py:0 -> repo", + "CALLS tests/test_cli_autosync.py:0 -> run", + "CALLS tests/test_cli_autosync.py:0 -> runner", + "CALLS tests/test_cli_autosync.py:0 -> start", + "CALLS tests/test_cli_autosync.py:0 -> test_start_auto_sync_respects_explicit_project_name", + "CALLS tests/test_cli_autosync.py:0 -> test_start_auto_sync_uses_derived_project_name_when_none_provided", + "CALLS tests/test_cli_autosync.py:0 -> test_start_clean_without_update_graph_does_not_sync", + "CALLS tests/test_cli_autosync.py:0 -> test_start_default_triggers_auto_sync", + "CALLS tests/test_cli_autosync.py:0 -> test_start_no_sync_skips_auto_sync", + "CALLS tests/test_cli_autosync.py:0 -> test_start_update_graph_uses_sync_helper", + "CALLS tests/test_cli_clean.py:0 -> GraphUpdater", + "CALLS tests/test_cli_clean.py:0 -> TestCleanWithUpdateGraph", + "CALLS tests/test_cli_clean.py:0 -> TestCleanWithoutUpdateGraph", + "CALLS tests/test_cli_clean.py:0 -> _update_and_validate_models", + "CALLS tests/test_cli_clean.py:0 -> clean_database", + "CALLS tests/test_cli_clean.py:0 -> cli", + "CALLS tests/test_cli_clean.py:0 -> connect_memgraph", + "CALLS tests/test_cli_clean.py:0 -> load_cgrignore_patterns", + "CALLS tests/test_cli_clean.py:0 -> load_parsers", + "CALLS tests/test_cli_clean.py:0 -> mock_ingestor", + "CALLS tests/test_cli_clean.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_clean.py:0 -> mock_updater", + "CALLS tests/test_cli_clean.py:0 -> repo", + "CALLS tests/test_cli_clean.py:0 -> runner", + "CALLS tests/test_cli_clean.py:0 -> start", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_deletes_hash_cache", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_does_not_invoke_graph_updater", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_no_cache_file_still_succeeds", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_shows_clean_done_message", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_skips_model_validation", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_wipes_database", + "CALLS tests/test_cli_clean.py:0 -> test_clean_with_update_calls_clean_database", + "CALLS tests/test_cli_clean.py:0 -> test_clean_with_update_deletes_hash_cache", + "CALLS tests/test_cli_clean.py:0 -> test_update_without_clean_preserves_hash_cache", + "CALLS tests/test_cli_delete_project.py:0 -> clean_database", + "CALLS tests/test_cli_delete_project.py:0 -> cli", + "CALLS tests/test_cli_delete_project.py:0 -> connect_memgraph", + "CALLS tests/test_cli_delete_project.py:0 -> delete_project", + "CALLS tests/test_cli_delete_project.py:0 -> delete_project_embeddings", + "CALLS tests/test_cli_delete_project.py:0 -> fetch_all", + "CALLS tests/test_cli_delete_project.py:0 -> list_projects", + "CALLS tests/test_cli_delete_project.py:0 -> mock_ingestor", + "CALLS tests/test_cli_delete_project.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_delete_project.py:0 -> name", + "CALLS tests/test_cli_delete_project.py:0 -> repo", + "CALLS tests/test_cli_delete_project.py:0 -> runner", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_calls_ingestor_delete_project", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_cleans_embeddings_with_node_ids", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_does_not_wipe_other_projects", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_fails_when_project_missing", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_rejects_blank_name", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_removes_hash_cache_when_repo_path_given", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_shows_success_message", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_without_repo_path_leaves_unrelated_hash_caches", + "CALLS tests/test_cli_delete_project.py:0 -> text", + "CALLS tests/test_cli_repo_path_validation.py:0 -> TestIndexRepoPathValidation", + "CALLS tests/test_cli_repo_path_validation.py:0 -> TestStartRepoPathValidation", + "CALLS tests/test_cli_repo_path_validation.py:0 -> _maybe_start_stack", + "CALLS tests/test_cli_repo_path_validation.py:0 -> cli", + "CALLS tests/test_cli_repo_path_validation.py:0 -> connect_memgraph", + "CALLS tests/test_cli_repo_path_validation.py:0 -> index", + "CALLS tests/test_cli_repo_path_validation.py:0 -> mock_ingestor", + "CALLS tests/test_cli_repo_path_validation.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_repo_path_validation.py:0 -> repo", + "CALLS tests/test_cli_repo_path_validation.py:0 -> runner", + "CALLS tests/test_cli_repo_path_validation.py:0 -> start", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_file_path_exits_with_error", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_git_dir_does_not_warn", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_git_file_worktree_does_not_warn", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_index_nonexistent_path_exits_with_error", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_nonexistent_path_exits_with_error", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_valid_non_git_dir_warns_but_proceeds", + "CALLS tests/test_cli_smoke.py:0 -> cli", + "CALLS tests/test_cli_smoke.py:0 -> metadata", + "CALLS tests/test_cli_smoke.py:0 -> parent", + "CALLS tests/test_cli_smoke.py:0 -> test_help_command_works", + "CALLS tests/test_cli_smoke.py:0 -> test_import_cli_module", + "CALLS tests/test_cli_smoke.py:0 -> test_version_flag", + "CALLS tests/test_cli_smoke.py:0 -> text", + "CALLS tests/test_code_retrieval.py:0 -> TestCodeRetrieverInit", + "CALLS tests/test_code_retrieval.py:0 -> TestCreateCodeRetrievalTool", + "CALLS tests/test_code_retrieval.py:0 -> TestFindCodeSnippet", + "CALLS tests/test_code_retrieval.py:0 -> description", + "CALLS tests/test_code_retrieval.py:0 -> fetch_all", + "CALLS tests/test_code_retrieval.py:0 -> mock_ingestor", + "CALLS tests/test_code_retrieval.py:0 -> name", + "CALLS tests/test_code_retrieval.py:0 -> project_root", + "CALLS tests/test_code_retrieval.py:0 -> retriever", + "CALLS tests/test_code_retrieval.py:0 -> start", + "CALLS tests/test_code_retrieval.py:0 -> test_creates_tool_with_description", + "CALLS tests/test_code_retrieval.py:0 -> test_handles_ingestor_error", + "CALLS tests/test_code_retrieval.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_code_retrieval.py:0 -> test_init_stores_ingestor", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_missing_end_line", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_missing_path", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_missing_start_line", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_no_results", + "CALLS tests/test_code_retrieval.py:0 -> test_tool_calls_retriever", + "CALLS tests/test_code_retrieval.py:0 -> test_uses_cypher_query_constant", + "CALLS tests/test_codebase_query.py:0 -> TestCreateQueryTool", + "CALLS tests/test_codebase_query.py:0 -> TestQueryCodebaseKnowledgeGraph", + "CALLS tests/test_codebase_query.py:0 -> TestQueryResultFormatting", + "CALLS tests/test_codebase_query.py:0 -> TestQueryWithVariousDataTypes", + "CALLS tests/test_codebase_query.py:0 -> anyio_backend", + "CALLS tests/test_codebase_query.py:0 -> description", + "CALLS tests/test_codebase_query.py:0 -> fetch_all", + "CALLS tests/test_codebase_query.py:0 -> generate", + "CALLS tests/test_codebase_query.py:0 -> mock_console", + "CALLS tests/test_codebase_query.py:0 -> mock_cypher_gen", + "CALLS tests/test_codebase_query.py:0 -> mock_ingestor", + "CALLS tests/test_codebase_query.py:0 -> name", + "CALLS tests/test_codebase_query.py:0 -> summary", + "CALLS tests/test_codebase_query.py:0 -> test_creates_default_console", + "CALLS tests/test_codebase_query.py:0 -> test_creates_tool_instance", + "CALLS tests/test_codebase_query.py:0 -> test_database_error_handled", + "CALLS tests/test_codebase_query.py:0 -> test_default_console_writes_to_stderr", + "CALLS tests/test_codebase_query.py:0 -> test_empty_results_returns_zero_count", + "CALLS tests/test_codebase_query.py:0 -> test_handles_boolean_values", + "CALLS tests/test_codebase_query.py:0 -> test_handles_none_values", + "CALLS tests/test_codebase_query.py:0 -> test_handles_numeric_values", + "CALLS tests/test_codebase_query.py:0 -> test_llm_generation_error_handled", + "CALLS tests/test_codebase_query.py:0 -> test_query_calls_cypher_generator", + "CALLS tests/test_codebase_query.py:0 -> test_query_calls_ingestor_fetch_all", + "CALLS tests/test_codebase_query.py:0 -> test_query_timeout_handled", + "CALLS tests/test_codebase_query.py:0 -> test_result_contains_query_used", + "CALLS tests/test_codebase_query.py:0 -> test_result_summary_contains_count", + "CALLS tests/test_codebase_query.py:0 -> test_successful_query_returns_results", + "CALLS tests/test_codebase_query.py:0 -> test_tool_has_description", + "CALLS tests/test_codebase_query.py:0 -> test_uses_provided_console", + "CALLS tests/test_codebase_query.py:0 -> type", + "CALLS tests/test_complex_cross_file_calls.py:0 -> GraphUpdater", + "CALLS tests/test_complex_cross_file_calls.py:0 -> complex_project", + "CALLS tests/test_complex_cross_file_calls.py:0 -> main", + "CALLS tests/test_complex_cross_file_calls.py:0 -> mock_ingestor", + "CALLS tests/test_complex_cross_file_calls.py:0 -> name", + "CALLS tests/test_complex_cross_file_calls.py:0 -> process", + "CALLS tests/test_complex_cross_file_calls.py:0 -> processor", + "CALLS tests/test_complex_cross_file_calls.py:0 -> temp_repo", + "CALLS tests/test_complex_cross_file_calls.py:0 -> test_complex_cross_file_function_calls", + "CALLS tests/test_complex_cross_file_calls.py:0 -> test_cross_file_calls_with_short_names", + "CALLS tests/test_complex_cross_file_calls.py:0 -> up", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> FileReader", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> close", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> get", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> insert", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> main", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> mock_ingestor", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> parse", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> process", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> read", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> temp_repo", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_all_languages_stdlib_consistency", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_cpp_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_go_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_java_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_javascript_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_lua_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_python_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_rust_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_typescript_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> text", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> value", + "CALLS tests/test_conditional_alias_call.py:0 -> CallProcessor", + "CALLS tests/test_conditional_alias_call.py:0 -> RelationshipType", + "CALLS tests/test_conditional_alias_call.py:0 -> TestConditionalAliasCall", + "CALLS tests/test_conditional_alias_call.py:0 -> _ingest_function_calls", + "CALLS tests/test_conditional_alias_call.py:0 -> ensure_node_batch", + "CALLS tests/test_conditional_alias_call.py:0 -> ensure_relationship_batch", + "CALLS tests/test_conditional_alias_call.py:0 -> execute_write", + "CALLS tests/test_conditional_alias_call.py:0 -> fetch_all", + "CALLS tests/test_conditional_alias_call.py:0 -> flush_all", + "CALLS tests/test_conditional_alias_call.py:0 -> graph_updater", + "CALLS tests/test_conditional_alias_call.py:0 -> parent", + "CALLS tests/test_conditional_alias_call.py:0 -> repo_path", + "CALLS tests/test_conditional_alias_call.py:0 -> resolve_builtin_call", + "CALLS tests/test_conditional_alias_call.py:0 -> test_conditional_bound_method_alias_resolves", + "CALLS tests/test_conditional_alias_call.py:0 -> value", + "CALLS tests/test_config_validation.py:0 -> GoogleProviderType", + "CALLS tests/test_config_validation.py:0 -> Provider", + "CALLS tests/test_config_validation.py:0 -> TestFormatMissingApiKeyErrors", + "CALLS tests/test_config_validation.py:0 -> TestValidateApiKey", + "CALLS tests/test_config_validation.py:0 -> keys", + "CALLS tests/test_config_validation.py:0 -> test_case_insensitive_lookup", + "CALLS tests/test_config_validation.py:0 -> test_default_role_omits_role_from_message", + "CALLS tests/test_config_validation.py:0 -> test_google_gla_requires_api_key", + "CALLS tests/test_config_validation.py:0 -> test_google_vertex_skips_validation", + "CALLS tests/test_config_validation.py:0 -> test_invalid_api_key_raises", + "CALLS tests/test_config_validation.py:0 -> test_known_provider_anthropic", + "CALLS tests/test_config_validation.py:0 -> test_known_provider_openai", + "CALLS tests/test_config_validation.py:0 -> test_local_providers_skip_validation", + "CALLS tests/test_config_validation.py:0 -> test_role_appears_in_message", + "CALLS tests/test_config_validation.py:0 -> test_role_forwarded_to_error_message", + "CALLS tests/test_config_validation.py:0 -> test_unknown_provider_generic_message", + "CALLS tests/test_config_validation.py:0 -> test_valid_api_key_passes", + "CALLS tests/test_constructor_call_resolution.py:0 -> RelationshipType", + "CALLS tests/test_constructor_call_resolution.py:0 -> TestConstructorCallResolution", + "CALLS tests/test_constructor_call_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_constructor_call_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_constructor_call_resolution.py:0 -> execute_write", + "CALLS tests/test_constructor_call_resolution.py:0 -> fetch_all", + "CALLS tests/test_constructor_call_resolution.py:0 -> flush_all", + "CALLS tests/test_constructor_call_resolution.py:0 -> graph_updater", + "CALLS tests/test_constructor_call_resolution.py:0 -> repo_path", + "CALLS tests/test_constructor_call_resolution.py:0 -> test_instantiation_calls_init", + "CALLS tests/test_constructor_call_resolution.py:0 -> test_instantiation_without_init_is_not_dropped_to_class", + "CALLS tests/test_cpp_attributes.py:0 -> cpp_attributes_project", + "CALLS tests/test_cpp_attributes.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_attributes.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_attributes.py:0 -> optimize", + "CALLS tests/test_cpp_attributes.py:0 -> status", + "CALLS tests/test_cpp_attributes.py:0 -> style", + "CALLS tests/test_cpp_attributes.py:0 -> temp_repo", + "CALLS tests/test_cpp_attributes.py:0 -> test_attribute_combinations_and_edge_cases", + "CALLS tests/test_cpp_attributes.py:0 -> test_compiler_specific_attributes", + "CALLS tests/test_cpp_attributes.py:0 -> test_cpp_attributes_comprehensive", + "CALLS tests/test_cpp_attributes.py:0 -> test_standard_attributes", + "CALLS tests/test_cpp_attributes.py:0 -> text", + "CALLS tests/test_cpp_attributes.py:0 -> type", + "CALLS tests/test_cpp_attributes.py:0 -> value", + "CALLS tests/test_cpp_basic_syntax.py:0 -> Color", + "CALLS tests/test_cpp_basic_syntax.py:0 -> add", + "CALLS tests/test_cpp_basic_syntax.py:0 -> cpp_basic_project", + "CALLS tests/test_cpp_basic_syntax.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_basic_syntax.py:0 -> main", + "CALLS tests/test_cpp_basic_syntax.py:0 -> method_calls", + "CALLS tests/test_cpp_basic_syntax.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_basic_syntax.py:0 -> name", + "CALLS tests/test_cpp_basic_syntax.py:0 -> relationships", + "CALLS tests/test_cpp_basic_syntax.py:0 -> start", + "CALLS tests/test_cpp_basic_syntax.py:0 -> status", + "CALLS tests/test_cpp_basic_syntax.py:0 -> temp_repo", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_class_declarations", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_function_declarations", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_member_functions", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_namespaces", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_cpp_basic_comprehensive", + "CALLS tests/test_cpp_casting_operators.py:0 -> Color", + "CALLS tests/test_cpp_casting_operators.py:0 -> GraphNode", + "CALLS tests/test_cpp_casting_operators.py:0 -> cpp_casting_project", + "CALLS tests/test_cpp_casting_operators.py:0 -> get", + "CALLS tests/test_cpp_casting_operators.py:0 -> is_abstract", + "CALLS tests/test_cpp_casting_operators.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_casting_operators.py:0 -> name", + "CALLS tests/test_cpp_casting_operators.py:0 -> nodes", + "CALLS tests/test_cpp_casting_operators.py:0 -> process", + "CALLS tests/test_cpp_casting_operators.py:0 -> relationships", + "CALLS tests/test_cpp_casting_operators.py:0 -> style", + "CALLS tests/test_cpp_casting_operators.py:0 -> temp_repo", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_cpp_casting_comprehensive", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_dynamic_cast_examples", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_other_cast_operators", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_static_cast_examples", + "CALLS tests/test_cpp_casting_operators.py:0 -> type", + "CALLS tests/test_cpp_casting_operators.py:0 -> value", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> Color", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> cpp_inheritance_project", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> get", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> name", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> parent", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> process", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> relationships", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> run", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> start", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> style", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> temp_repo", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_abstract_classes_and_interfaces", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_cpp_inheritance_comprehensive", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_cpp_inheritance_edge_cases", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_multiple_inheritance", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_single_inheritance", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> type", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> value", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> walk", + "CALLS tests/test_cpp_comprehensive.py:0 -> ProcessorFactory", + "CALLS tests/test_cpp_comprehensive.py:0 -> add", + "CALLS tests/test_cpp_comprehensive.py:0 -> cpp_comprehensive_project", + "CALLS tests/test_cpp_comprehensive.py:0 -> engine", + "CALLS tests/test_cpp_comprehensive.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_comprehensive.py:0 -> execute", + "CALLS tests/test_cpp_comprehensive.py:0 -> forward", + "CALLS tests/test_cpp_comprehensive.py:0 -> get", + "CALLS tests/test_cpp_comprehensive.py:0 -> main", + "CALLS tests/test_cpp_comprehensive.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_comprehensive.py:0 -> name", + "CALLS tests/test_cpp_comprehensive.py:0 -> process", + "CALLS tests/test_cpp_comprehensive.py:0 -> processor", + "CALLS tests/test_cpp_comprehensive.py:0 -> relationships", + "CALLS tests/test_cpp_comprehensive.py:0 -> temp_repo", + "CALLS tests/test_cpp_comprehensive.py:0 -> test_comprehensive_cpp_features", + "CALLS tests/test_cpp_comprehensive.py:0 -> test_cpp_comprehensive_complete", + "CALLS tests/test_cpp_comprehensive.py:0 -> test_real_world_cpp_scenario", + "CALLS tests/test_cpp_comprehensive.py:0 -> text", + "CALLS tests/test_cpp_comprehensive.py:0 -> type", + "CALLS tests/test_cpp_comprehensive.py:0 -> value", + "CALLS tests/test_cpp_concepts.py:0 -> cpp_concepts_project", + "CALLS tests/test_cpp_concepts.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_concepts.py:0 -> factory", + "CALLS tests/test_cpp_concepts.py:0 -> get", + "CALLS tests/test_cpp_concepts.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_concepts.py:0 -> process", + "CALLS tests/test_cpp_concepts.py:0 -> processor", + "CALLS tests/test_cpp_concepts.py:0 -> relationships", + "CALLS tests/test_cpp_concepts.py:0 -> temp_repo", + "CALLS tests/test_cpp_concepts.py:0 -> test_advanced_concept_patterns", + "CALLS tests/test_cpp_concepts.py:0 -> test_concept_composition_and_specialization", + "CALLS tests/test_cpp_concepts.py:0 -> test_concept_definitions_and_constraints", + "CALLS tests/test_cpp_concepts.py:0 -> type", + "CALLS tests/test_cpp_concepts.py:0 -> value", + "CALLS tests/test_cpp_concepts.py:0 -> wrapper", + "CALLS tests/test_cpp_concurrency.py:0 -> NodeType", + "CALLS tests/test_cpp_concurrency.py:0 -> add", + "CALLS tests/test_cpp_concurrency.py:0 -> clear", + "CALLS tests/test_cpp_concurrency.py:0 -> cpp_concurrency_project", + "CALLS tests/test_cpp_concurrency.py:0 -> done", + "CALLS tests/test_cpp_concurrency.py:0 -> ensure_node_batch", + "CALLS tests/test_cpp_concurrency.py:0 -> forward", + "CALLS tests/test_cpp_concurrency.py:0 -> get", + "CALLS tests/test_cpp_concurrency.py:0 -> index", + "CALLS tests/test_cpp_concurrency.py:0 -> insert", + "CALLS tests/test_cpp_concurrency.py:0 -> load", + "CALLS tests/test_cpp_concurrency.py:0 -> method_calls", + "CALLS tests/test_cpp_concurrency.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_concurrency.py:0 -> name", + "CALLS tests/test_cpp_concurrency.py:0 -> parent", + "CALLS tests/test_cpp_concurrency.py:0 -> process", + "CALLS tests/test_cpp_concurrency.py:0 -> read", + "CALLS tests/test_cpp_concurrency.py:0 -> relationships", + "CALLS tests/test_cpp_concurrency.py:0 -> run", + "CALLS tests/test_cpp_concurrency.py:0 -> start", + "CALLS tests/test_cpp_concurrency.py:0 -> submit", + "CALLS tests/test_cpp_concurrency.py:0 -> temp_repo", + "CALLS tests/test_cpp_concurrency.py:0 -> test_atomics_and_memory_ordering", + "CALLS tests/test_cpp_concurrency.py:0 -> test_condition_variables_and_futures", + "CALLS tests/test_cpp_concurrency.py:0 -> test_cpp_concurrency_comprehensive", + "CALLS tests/test_cpp_concurrency.py:0 -> test_mutex_and_locks", + "CALLS tests/test_cpp_concurrency.py:0 -> test_thread_basics", + "CALLS tests/test_cpp_concurrency.py:0 -> value", + "CALLS tests/test_cpp_concurrency.py:0 -> wrapper", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> cpp_constexpr_project", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> factory", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> forward", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> index", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> name", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> nodes", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> relationships", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> temp_repo", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> test_basic_constexpr", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> test_constexpr_if_and_templates", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> test_cpp_constexpr_comprehensive", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> type", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> up", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> value", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> close", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> cpp_constructor_project", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> factory", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> flush", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> main", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> name", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> relationships", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> start", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> temp_repo", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_basic_constructors_destructors", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_constructor_destructor_complete", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_cpp_constructor_destructor_comprehensive", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_raii_patterns", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_special_member_functions", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> up", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> value", + "CALLS tests/test_cpp_coroutines.py:0 -> cpp_coroutines_project", + "CALLS tests/test_cpp_coroutines.py:0 -> done", + "CALLS tests/test_cpp_coroutines.py:0 -> down", + "CALLS tests/test_cpp_coroutines.py:0 -> get", + "CALLS tests/test_cpp_coroutines.py:0 -> get_data", + "CALLS tests/test_cpp_coroutines.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_coroutines.py:0 -> name", + "CALLS tests/test_cpp_coroutines.py:0 -> operation", + "CALLS tests/test_cpp_coroutines.py:0 -> relationships", + "CALLS tests/test_cpp_coroutines.py:0 -> start", + "CALLS tests/test_cpp_coroutines.py:0 -> style", + "CALLS tests/test_cpp_coroutines.py:0 -> temp_repo", + "CALLS tests/test_cpp_coroutines.py:0 -> test_async_await_coroutines", + "CALLS tests/test_cpp_coroutines.py:0 -> test_basic_generator_coroutines", + "CALLS tests/test_cpp_coroutines.py:0 -> test_custom_coroutine_types", + "CALLS tests/test_cpp_coroutines.py:0 -> up", + "CALLS tests/test_cpp_coroutines.py:0 -> value", + "CALLS tests/test_cpp_coroutines.py:0 -> wrapper", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> class_qn", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> cpp_cross_file_project", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> method_qn", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> name", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> nodes", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> start", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> temp_repo", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_cross_file_constructor_destructor", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_header_source_method_resolution", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_multiple_source_files_one_class", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_nested_namespace_cross_file", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_no_orphan_methods_across_files", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_same_file_out_of_class_still_works", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> clear", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> cpp_singleton_project", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> load", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> main", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> name", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> nodes", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> save", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> start", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> temp_repo", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> test_cpp_singleton_pattern_cross_file_calls", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> up", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> value", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> NodeLabel", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> name", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> nodes", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> render", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> repo", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> temp_repo", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> test_cpp_method_does_not_steal_python_method_qn", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> GraphNode", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> cpp_designated_consteval_project", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> factory", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> forward", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> get", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> index", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> metadata", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> name", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> nodes", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> operation", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> processor", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> relationships", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> sink", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> start", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> temp_repo", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_comprehensive_modern_cpp_complete", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_consteval_immediate_functions", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_designated_initializers", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_lambda_init_captures", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> type", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> value", + "CALLS tests/test_cpp_error_handling.py:0 -> cpp_error_handling_project", + "CALLS tests/test_cpp_error_handling.py:0 -> get", + "CALLS tests/test_cpp_error_handling.py:0 -> index", + "CALLS tests/test_cpp_error_handling.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_error_handling.py:0 -> name", + "CALLS tests/test_cpp_error_handling.py:0 -> process", + "CALLS tests/test_cpp_error_handling.py:0 -> processor", + "CALLS tests/test_cpp_error_handling.py:0 -> relationships", + "CALLS tests/test_cpp_error_handling.py:0 -> save", + "CALLS tests/test_cpp_error_handling.py:0 -> status", + "CALLS tests/test_cpp_error_handling.py:0 -> temp_repo", + "CALLS tests/test_cpp_error_handling.py:0 -> test_basic_exception_handling", + "CALLS tests/test_cpp_error_handling.py:0 -> test_cpp_error_handling_comprehensive", + "CALLS tests/test_cpp_error_handling.py:0 -> test_raii_patterns", + "CALLS tests/test_cpp_error_handling.py:0 -> up", + "CALLS tests/test_cpp_error_handling.py:0 -> value", + "CALLS tests/test_cpp_error_handling.py:0 -> wrapper", + "CALLS tests/test_cpp_format_spaceship.py:0 -> cpp_format_spaceship_project", + "CALLS tests/test_cpp_format_spaceship.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_format_spaceship.py:0 -> name", + "CALLS tests/test_cpp_format_spaceship.py:0 -> parse", + "CALLS tests/test_cpp_format_spaceship.py:0 -> temp_repo", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_format_library_basics", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_format_spaceship_complete", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_format_spaceship_integration", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_spaceship_operator", + "CALLS tests/test_cpp_format_spaceship.py:0 -> value", + "CALLS tests/test_cpp_friend_functions.py:0 -> Analyzer", + "CALLS tests/test_cpp_friend_functions.py:0 -> GraphNode", + "CALLS tests/test_cpp_friend_functions.py:0 -> add", + "CALLS tests/test_cpp_friend_functions.py:0 -> cpp_friend_project", + "CALLS tests/test_cpp_friend_functions.py:0 -> insert", + "CALLS tests/test_cpp_friend_functions.py:0 -> items", + "CALLS tests/test_cpp_friend_functions.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_friend_functions.py:0 -> name", + "CALLS tests/test_cpp_friend_functions.py:0 -> nodes", + "CALLS tests/test_cpp_friend_functions.py:0 -> relationships", + "CALLS tests/test_cpp_friend_functions.py:0 -> temp_repo", + "CALLS tests/test_cpp_friend_functions.py:0 -> test_cpp_friend_comprehensive", + "CALLS tests/test_cpp_friend_functions.py:0 -> test_friend_functions", + "CALLS tests/test_cpp_friend_functions.py:0 -> test_friend_templates", + "CALLS tests/test_cpp_friend_functions.py:0 -> type", + "CALLS tests/test_cpp_frontend_calls.py:0 -> add", + "CALLS tests/test_cpp_frontend_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_frontend_calls.py:0 -> name", + "CALLS tests/test_cpp_frontend_calls.py:0 -> temp_repo", + "CALLS tests/test_cpp_frontend_calls.py:0 -> test_method_calls_free_function", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> name", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> parse", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> read", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> run", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> temp_repo", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> test_frontend_emits_inheritance_and_operator", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> test_frontend_qns_match_tree_sitter", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> test_frontend_recovers_macro_mangled_class", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> type", + "CALLS tests/test_cpp_frontend_types.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_frontend_types.py:0 -> name", + "CALLS tests/test_cpp_frontend_types.py:0 -> nodes", + "CALLS tests/test_cpp_frontend_types.py:0 -> temp_repo", + "CALLS tests/test_cpp_frontend_types.py:0 -> test_frontend_emits_type_aliases", + "CALLS tests/test_cpp_frontend_types.py:0 -> type", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> CppFrontend", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> graph_updater", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> parse", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> temp_repo", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> test_default_treesitter_does_not_recover_macro_class", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> test_libclang_frontend_recovers_macro_class", + "CALLS tests/test_cpp_includes.py:0 -> add", + "CALLS tests/test_cpp_includes.py:0 -> cpp_includes_project", + "CALLS tests/test_cpp_includes.py:0 -> export", + "CALLS tests/test_cpp_includes.py:0 -> items", + "CALLS tests/test_cpp_includes.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_includes.py:0 -> name", + "CALLS tests/test_cpp_includes.py:0 -> process", + "CALLS tests/test_cpp_includes.py:0 -> processor", + "CALLS tests/test_cpp_includes.py:0 -> relationships", + "CALLS tests/test_cpp_includes.py:0 -> status", + "CALLS tests/test_cpp_includes.py:0 -> style", + "CALLS tests/test_cpp_includes.py:0 -> temp_repo", + "CALLS tests/test_cpp_includes.py:0 -> test_conditional_includes", + "CALLS tests/test_cpp_includes.py:0 -> test_cpp20_module_import_syntax", + "CALLS tests/test_cpp_includes.py:0 -> test_cpp20_module_partition_imports", + "CALLS tests/test_cpp_includes.py:0 -> test_cpp_includes_comprehensive", + "CALLS tests/test_cpp_includes.py:0 -> test_include_guards_and_pragma_once", + "CALLS tests/test_cpp_includes.py:0 -> test_local_header_includes", + "CALLS tests/test_cpp_includes.py:0 -> test_standard_library_includes", + "CALLS tests/test_cpp_includes.py:0 -> test_system_vs_local_includes", + "CALLS tests/test_cpp_includes.py:0 -> value", + "CALLS tests/test_cpp_lambda_captures.py:0 -> cpp_lambda_project", + "CALLS tests/test_cpp_lambda_captures.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_lambda_captures.py:0 -> factory", + "CALLS tests/test_cpp_lambda_captures.py:0 -> get", + "CALLS tests/test_cpp_lambda_captures.py:0 -> index", + "CALLS tests/test_cpp_lambda_captures.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_lambda_captures.py:0 -> name", + "CALLS tests/test_cpp_lambda_captures.py:0 -> operation", + "CALLS tests/test_cpp_lambda_captures.py:0 -> processor", + "CALLS tests/test_cpp_lambda_captures.py:0 -> relationships", + "CALLS tests/test_cpp_lambda_captures.py:0 -> start", + "CALLS tests/test_cpp_lambda_captures.py:0 -> temp_repo", + "CALLS tests/test_cpp_lambda_captures.py:0 -> test_basic_lambda_captures", + "CALLS tests/test_cpp_lambda_captures.py:0 -> test_generalized_captures", + "CALLS tests/test_cpp_lambda_captures.py:0 -> test_lambda_validation_complete", + "CALLS tests/test_cpp_lambda_captures.py:0 -> text", + "CALLS tests/test_cpp_lambda_captures.py:0 -> value", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> add", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> cpp_lambdas_project", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> factory", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> get", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> handler", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> name", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> operation", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> processor", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> relationships", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> run", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> start", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> temp_repo", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> test_async_functional_patterns", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> test_basic_lambdas", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> test_cpp_lambdas_comprehensive", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> type", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> value", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestConstAndStaticMethodLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestDeclarationOnlyMethods", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestInlineMethodLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestIssue194OutOfClassLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestMixedInlineAndOutOfClassLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestNamespacedClassOutOfClassLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestNestedClassOutOfClassLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestOperatorOverloadingLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestStructMethodLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> TestTemplateOutOfClassLineNumbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> _get_line_span", + "CALLS tests/test_cpp_line_numbers.py:0 -> add", + "CALLS tests/test_cpp_line_numbers.py:0 -> cpp_line_numbers_project", + "CALLS tests/test_cpp_line_numbers.py:0 -> index", + "CALLS tests/test_cpp_line_numbers.py:0 -> method_calls", + "CALLS tests/test_cpp_line_numbers.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_line_numbers.py:0 -> name", + "CALLS tests/test_cpp_line_numbers.py:0 -> start", + "CALLS tests/test_cpp_line_numbers.py:0 -> temp_repo", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_const_method_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_constructor_out_of_class_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_declaration_only_methods_have_declaration_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_deeply_nested_namespace_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_destructor_out_of_class_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_inline_method_has_correct_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_mixed_methods_have_correct_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_multiline_inline_method_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_multiple_out_of_class_methods_have_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_namespaced_class_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_nested_class_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_operator_methods_have_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_simple_out_of_class_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_static_method_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_struct_out_of_class_method_has_definition_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_template_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> value", + "CALLS tests/test_cpp_memory_management.py:0 -> add", + "CALLS tests/test_cpp_memory_management.py:0 -> children", + "CALLS tests/test_cpp_memory_management.py:0 -> cpp_memory_project", + "CALLS tests/test_cpp_memory_management.py:0 -> forward", + "CALLS tests/test_cpp_memory_management.py:0 -> get", + "CALLS tests/test_cpp_memory_management.py:0 -> index", + "CALLS tests/test_cpp_memory_management.py:0 -> items", + "CALLS tests/test_cpp_memory_management.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_memory_management.py:0 -> name", + "CALLS tests/test_cpp_memory_management.py:0 -> operation", + "CALLS tests/test_cpp_memory_management.py:0 -> parent", + "CALLS tests/test_cpp_memory_management.py:0 -> relationships", + "CALLS tests/test_cpp_memory_management.py:0 -> temp_repo", + "CALLS tests/test_cpp_memory_management.py:0 -> test_cpp_memory_management_comprehensive", + "CALLS tests/test_cpp_memory_management.py:0 -> test_move_semantics", + "CALLS tests/test_cpp_memory_management.py:0 -> test_smart_pointers", + "CALLS tests/test_cpp_memory_management.py:0 -> up", + "CALLS tests/test_cpp_memory_management.py:0 -> value", + "CALLS tests/test_cpp_modern_features.py:0 -> add", + "CALLS tests/test_cpp_modern_features.py:0 -> cpp_modern_project", + "CALLS tests/test_cpp_modern_features.py:0 -> forward", + "CALLS tests/test_cpp_modern_features.py:0 -> get", + "CALLS tests/test_cpp_modern_features.py:0 -> index", + "CALLS tests/test_cpp_modern_features.py:0 -> main", + "CALLS tests/test_cpp_modern_features.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_modern_features.py:0 -> name", + "CALLS tests/test_cpp_modern_features.py:0 -> process", + "CALLS tests/test_cpp_modern_features.py:0 -> relationships", + "CALLS tests/test_cpp_modern_features.py:0 -> start", + "CALLS tests/test_cpp_modern_features.py:0 -> style", + "CALLS tests/test_cpp_modern_features.py:0 -> temp_repo", + "CALLS tests/test_cpp_modern_features.py:0 -> test_auto_keyword_type_deduction", + "CALLS tests/test_cpp_modern_features.py:0 -> test_cpp_modern_comprehensive", + "CALLS tests/test_cpp_modern_features.py:0 -> test_lambda_expressions", + "CALLS tests/test_cpp_modern_features.py:0 -> test_smart_pointers_move_semantics", + "CALLS tests/test_cpp_modern_features.py:0 -> test_structured_bindings_ranges", + "CALLS tests/test_cpp_modern_features.py:0 -> test_variadic_templates_constexpr", + "CALLS tests/test_cpp_modern_features.py:0 -> text", + "CALLS tests/test_cpp_modern_features.py:0 -> type", + "CALLS tests/test_cpp_modern_features.py:0 -> value", + "CALLS tests/test_cpp_modules.py:0 -> add", + "CALLS tests/test_cpp_modules.py:0 -> clear", + "CALLS tests/test_cpp_modules.py:0 -> cpp_modules_project", + "CALLS tests/test_cpp_modules.py:0 -> export", + "CALLS tests/test_cpp_modules.py:0 -> get", + "CALLS tests/test_cpp_modules.py:0 -> index", + "CALLS tests/test_cpp_modules.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_modules.py:0 -> name", + "CALLS tests/test_cpp_modules.py:0 -> process", + "CALLS tests/test_cpp_modules.py:0 -> processor", + "CALLS tests/test_cpp_modules.py:0 -> temp_repo", + "CALLS tests/test_cpp_modules.py:0 -> test_basic_module_interface", + "CALLS tests/test_cpp_modules.py:0 -> test_module_imports_usage", + "CALLS tests/test_cpp_modules.py:0 -> test_module_partitions", + "CALLS tests/test_cpp_modules.py:0 -> type", + "CALLS tests/test_cpp_modules.py:0 -> value", + "CALLS tests/test_cpp_move_semantics.py:0 -> NodeType", + "CALLS tests/test_cpp_move_semantics.py:0 -> clear", + "CALLS tests/test_cpp_move_semantics.py:0 -> cpp_move_semantics_project", + "CALLS tests/test_cpp_move_semantics.py:0 -> ensure_node_batch", + "CALLS tests/test_cpp_move_semantics.py:0 -> factory", + "CALLS tests/test_cpp_move_semantics.py:0 -> forward", + "CALLS tests/test_cpp_move_semantics.py:0 -> get", + "CALLS tests/test_cpp_move_semantics.py:0 -> index", + "CALLS tests/test_cpp_move_semantics.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_move_semantics.py:0 -> name", + "CALLS tests/test_cpp_move_semantics.py:0 -> operation", + "CALLS tests/test_cpp_move_semantics.py:0 -> process", + "CALLS tests/test_cpp_move_semantics.py:0 -> processor", + "CALLS tests/test_cpp_move_semantics.py:0 -> relationships", + "CALLS tests/test_cpp_move_semantics.py:0 -> sink", + "CALLS tests/test_cpp_move_semantics.py:0 -> temp_repo", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_basic_move_semantics", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_cpp_move_semantics_comprehensive", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_move_optimization_patterns", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_perfect_forwarding", + "CALLS tests/test_cpp_move_semantics.py:0 -> type", + "CALLS tests/test_cpp_move_semantics.py:0 -> value", + "CALLS tests/test_cpp_move_semantics.py:0 -> wrapper", + "CALLS tests/test_cpp_namespaces.py:0 -> Color", + "CALLS tests/test_cpp_namespaces.py:0 -> add", + "CALLS tests/test_cpp_namespaces.py:0 -> cpp_namespaces_project", + "CALLS tests/test_cpp_namespaces.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_namespaces.py:0 -> name", + "CALLS tests/test_cpp_namespaces.py:0 -> process", + "CALLS tests/test_cpp_namespaces.py:0 -> processor", + "CALLS tests/test_cpp_namespaces.py:0 -> relationships", + "CALLS tests/test_cpp_namespaces.py:0 -> style", + "CALLS tests/test_cpp_namespaces.py:0 -> temp_repo", + "CALLS tests/test_cpp_namespaces.py:0 -> test_anonymous_namespaces", + "CALLS tests/test_cpp_namespaces.py:0 -> test_basic_namespaces", + "CALLS tests/test_cpp_namespaces.py:0 -> test_cpp_namespaces_comprehensive", + "CALLS tests/test_cpp_namespaces.py:0 -> test_using_directives", + "CALLS tests/test_cpp_namespaces.py:0 -> text", + "CALLS tests/test_cpp_namespaces.py:0 -> value", + "CALLS tests/test_cpp_operators_overloading.py:0 -> cpp_operators_project", + "CALLS tests/test_cpp_operators_overloading.py:0 -> index", + "CALLS tests/test_cpp_operators_overloading.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_operators_overloading.py:0 -> name", + "CALLS tests/test_cpp_operators_overloading.py:0 -> relationships", + "CALLS tests/test_cpp_operators_overloading.py:0 -> temp_repo", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_arithmetic_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_comparison_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_cpp_operators_comprehensive", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_stream_function_call_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_subscript_increment_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> value", + "CALLS tests/test_cpp_oracle.py:0 -> NodeLabel", + "CALLS tests/test_cpp_oracle.py:0 -> RelationshipType", + "CALLS tests/test_cpp_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_cpp_oracle.py:0 -> name", + "CALLS tests/test_cpp_oracle.py:0 -> nodes", + "CALLS tests/test_cpp_oracle.py:0 -> parent", + "CALLS tests/test_cpp_oracle.py:0 -> test_cgr_matches_libclang_oracle_on_cpp_structure", + "CALLS tests/test_cpp_oracle.py:0 -> test_libclang_oracle_emits_inherits_edges", + "CALLS tests/test_cpp_oracle.py:0 -> test_restrict_to_files_scopes_graph_to_universe", + "CALLS tests/test_cpp_oracle.py:0 -> value", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> add", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> temp_repo", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> test_out_of_class_method_call_attributed_to_method_qn", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> cpp_out_of_class_project", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> index", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> method_calls", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> temp_repo", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_deeply_nested_qualified_identifier", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_mixed_inline_and_out_of_class_methods", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_nested_namespace_out_of_class_methods", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_out_of_class_constructor_destructor", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_out_of_class_operator_overloading", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_simple_out_of_class_method_definitions", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_template_out_of_class_methods", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> SupportedLanguage", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> TestExtractClassNameFromOutOfClassMethod", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> TestExtractFunctionNameForOutOfClass", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> TestGetInnerFunctionNode", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> TestIsOutOfClassMethodDefinition", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> add", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> children", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> cpp_parser", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> parent", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_constructor_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_destructor_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_inline_method_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_lambda_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_namespaced_function_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_nested_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_nested_class_out_of_class_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_operator_plus_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_inner_function_for_template", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_none_for_inline_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_none_for_standalone_function", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_same_node_for_function_definition", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_simple_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_simple_method_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_simple_out_of_class_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_standalone_function_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_struct_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_template_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_template_method_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_template_out_of_class_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> type", + "CALLS tests/test_cpp_preprocessor.py:0 -> cpp_preprocessor_project", + "CALLS tests/test_cpp_preprocessor.py:0 -> generate", + "CALLS tests/test_cpp_preprocessor.py:0 -> get", + "CALLS tests/test_cpp_preprocessor.py:0 -> index", + "CALLS tests/test_cpp_preprocessor.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_preprocessor.py:0 -> name", + "CALLS tests/test_cpp_preprocessor.py:0 -> operation", + "CALLS tests/test_cpp_preprocessor.py:0 -> optimize", + "CALLS tests/test_cpp_preprocessor.py:0 -> processor", + "CALLS tests/test_cpp_preprocessor.py:0 -> relationships", + "CALLS tests/test_cpp_preprocessor.py:0 -> start", + "CALLS tests/test_cpp_preprocessor.py:0 -> status", + "CALLS tests/test_cpp_preprocessor.py:0 -> temp_repo", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_conditional_compilation", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_cpp_preprocessor_comprehensive", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_define_macros", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_pragma_directives", + "CALLS tests/test_cpp_preprocessor.py:0 -> type", + "CALLS tests/test_cpp_preprocessor.py:0 -> value", + "CALLS tests/test_cpp_ranges_views.py:0 -> GraphNode", + "CALLS tests/test_cpp_ranges_views.py:0 -> cpp_ranges_project", + "CALLS tests/test_cpp_ranges_views.py:0 -> forward", + "CALLS tests/test_cpp_ranges_views.py:0 -> get", + "CALLS tests/test_cpp_ranges_views.py:0 -> keys", + "CALLS tests/test_cpp_ranges_views.py:0 -> main", + "CALLS tests/test_cpp_ranges_views.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_ranges_views.py:0 -> name", + "CALLS tests/test_cpp_ranges_views.py:0 -> nodes", + "CALLS tests/test_cpp_ranges_views.py:0 -> operation", + "CALLS tests/test_cpp_ranges_views.py:0 -> relationships", + "CALLS tests/test_cpp_ranges_views.py:0 -> start", + "CALLS tests/test_cpp_ranges_views.py:0 -> temp_repo", + "CALLS tests/test_cpp_ranges_views.py:0 -> test_basic_ranges_algorithms", + "CALLS tests/test_cpp_ranges_views.py:0 -> test_range_pipelines_graph_processing", + "CALLS tests/test_cpp_ranges_views.py:0 -> test_views_and_adaptors", + "CALLS tests/test_cpp_ranges_views.py:0 -> tokenize", + "CALLS tests/test_cpp_ranges_views.py:0 -> type", + "CALLS tests/test_cpp_smart_pointers.py:0 -> GraphNode", + "CALLS tests/test_cpp_smart_pointers.py:0 -> NodeType", + "CALLS tests/test_cpp_smart_pointers.py:0 -> add", + "CALLS tests/test_cpp_smart_pointers.py:0 -> clear", + "CALLS tests/test_cpp_smart_pointers.py:0 -> cpp_smart_pointers_project", + "CALLS tests/test_cpp_smart_pointers.py:0 -> ensure_node_batch", + "CALLS tests/test_cpp_smart_pointers.py:0 -> factory", + "CALLS tests/test_cpp_smart_pointers.py:0 -> forward", + "CALLS tests/test_cpp_smart_pointers.py:0 -> get", + "CALLS tests/test_cpp_smart_pointers.py:0 -> index", + "CALLS tests/test_cpp_smart_pointers.py:0 -> insert", + "CALLS tests/test_cpp_smart_pointers.py:0 -> items", + "CALLS tests/test_cpp_smart_pointers.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_smart_pointers.py:0 -> name", + "CALLS tests/test_cpp_smart_pointers.py:0 -> nodes", + "CALLS tests/test_cpp_smart_pointers.py:0 -> parent", + "CALLS tests/test_cpp_smart_pointers.py:0 -> process", + "CALLS tests/test_cpp_smart_pointers.py:0 -> put", + "CALLS tests/test_cpp_smart_pointers.py:0 -> read", + "CALLS tests/test_cpp_smart_pointers.py:0 -> relationships", + "CALLS tests/test_cpp_smart_pointers.py:0 -> temp_repo", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_cpp_smart_pointers_comprehensive", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_shared_ptr_patterns", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_unique_ptr_patterns", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_weak_ptr_and_advanced_patterns", + "CALLS tests/test_cpp_smart_pointers.py:0 -> type", + "CALLS tests/test_cpp_smart_pointers.py:0 -> up", + "CALLS tests/test_cpp_smart_pointers.py:0 -> value", + "CALLS tests/test_cpp_smart_pointers.py:0 -> wrapper", + "CALLS tests/test_cpp_stl_usage.py:0 -> cpp_stl_project", + "CALLS tests/test_cpp_stl_usage.py:0 -> forward", + "CALLS tests/test_cpp_stl_usage.py:0 -> generate", + "CALLS tests/test_cpp_stl_usage.py:0 -> insert", + "CALLS tests/test_cpp_stl_usage.py:0 -> keys", + "CALLS tests/test_cpp_stl_usage.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_stl_usage.py:0 -> name", + "CALLS tests/test_cpp_stl_usage.py:0 -> nodes", + "CALLS tests/test_cpp_stl_usage.py:0 -> operation", + "CALLS tests/test_cpp_stl_usage.py:0 -> read", + "CALLS tests/test_cpp_stl_usage.py:0 -> relationships", + "CALLS tests/test_cpp_stl_usage.py:0 -> temp_repo", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_cpp_stl_comprehensive", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_stl_algorithms", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_stl_containers", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_stl_iterators_functors", + "CALLS tests/test_cpp_stl_usage.py:0 -> value", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> GraphMetadata", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> add", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> cpp_metaprogramming_project", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> index", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> insert", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> metadata", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> name", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> nodes", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> process", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> relationships", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> temp_repo", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> test_advanced_metaprogramming", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> test_basic_metaprogramming", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> test_cpp_metaprogramming_comprehensive", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> text", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> type", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> value", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> wrapper", + "CALLS tests/test_cpp_templates.py:0 -> add", + "CALLS tests/test_cpp_templates.py:0 -> clear", + "CALLS tests/test_cpp_templates.py:0 -> cpp_templates_project", + "CALLS tests/test_cpp_templates.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_templates.py:0 -> forward", + "CALLS tests/test_cpp_templates.py:0 -> get", + "CALLS tests/test_cpp_templates.py:0 -> index", + "CALLS tests/test_cpp_templates.py:0 -> insert", + "CALLS tests/test_cpp_templates.py:0 -> mock_ingestor", + "CALLS tests/test_cpp_templates.py:0 -> name", + "CALLS tests/test_cpp_templates.py:0 -> relationships", + "CALLS tests/test_cpp_templates.py:0 -> style", + "CALLS tests/test_cpp_templates.py:0 -> temp_repo", + "CALLS tests/test_cpp_templates.py:0 -> test_class_templates", + "CALLS tests/test_cpp_templates.py:0 -> test_cpp_templates_comprehensive", + "CALLS tests/test_cpp_templates.py:0 -> test_function_templates", + "CALLS tests/test_cpp_templates.py:0 -> test_template_metaprogramming", + "CALLS tests/test_cpp_templates.py:0 -> type", + "CALLS tests/test_cpp_templates.py:0 -> value", + "CALLS tests/test_cpp_templates.py:0 -> wrapper", + "CALLS tests/test_cypher_validation.py:0 -> LLMGenerationError", + "CALLS tests/test_cypher_validation.py:0 -> TestBuildKeywordPattern", + "CALLS tests/test_cypher_validation.py:0 -> TestValidateCallProcedures", + "CALLS tests/test_cypher_validation.py:0 -> TestValidateCypherReadOnly", + "CALLS tests/test_cypher_validation.py:0 -> TestValidateNoUnboundedPaths", + "CALLS tests/test_cypher_validation.py:0 -> get", + "CALLS tests/test_cypher_validation.py:0 -> load", + "CALLS tests/test_cypher_validation.py:0 -> name", + "CALLS tests/test_cypher_validation.py:0 -> nodes", + "CALLS tests/test_cypher_validation.py:0 -> read", + "CALLS tests/test_cypher_validation.py:0 -> start", + "CALLS tests/test_cypher_validation.py:0 -> test_all_dangerous_keywords_produce_valid_patterns", + "CALLS tests/test_cypher_validation.py:0 -> test_allowed_procedure_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_bounded_or_no_varlen_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_call_is_case_insensitive", + "CALLS tests/test_cypher_validation.py:0 -> test_call_no_longer_in_keyword_blocklist", + "CALLS tests/test_cypher_validation.py:0 -> test_case_insensitive", + "CALLS tests/test_cypher_validation.py:0 -> test_disallowed_procedure_rejected", + "CALLS tests/test_cypher_validation.py:0 -> test_does_not_flag_created_at", + "CALLS tests/test_cypher_validation.py:0 -> test_does_not_flag_reset", + "CALLS tests/test_cypher_validation.py:0 -> test_does_not_flag_substring_matches", + "CALLS tests/test_cypher_validation.py:0 -> test_error_includes_keyword_and_query", + "CALLS tests/test_cypher_validation.py:0 -> test_error_includes_procedure_name", + "CALLS tests/test_cypher_validation.py:0 -> test_error_includes_query", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_allows_block_comment_between_parts", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_allows_single_line_comment_between_parts", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_allows_whitespace_between_parts", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_has_dotall_flag", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_respects_word_boundaries", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_all_dangerous_keywords", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_block_comment_bypass", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_create", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_create_constraint", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_create_index", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_delete", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_detach_delete", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_drop", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_foreach", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_load_csv", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_merge", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_multiline_block_comment_bypass", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_remove", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_set", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_single_line_comment_bypass", + "CALLS tests/test_cypher_validation.py:0 -> test_returns_compiled_pattern", + "CALLS tests/test_cypher_validation.py:0 -> test_safe_match_query_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_safe_match_with_where_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_safe_optional_match_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_single_word_is_case_sensitive_on_input", + "CALLS tests/test_cypher_validation.py:0 -> test_single_word_uses_word_boundaries", + "CALLS tests/test_cypher_validation.py:0 -> test_unbounded_varlen_rejected", + "CALLS tests/test_cypher_validation.py:0 -> value", + "CALLS tests/test_dead_code_command.py:0 -> TestDeadCodeCommand", + "CALLS tests/test_dead_code_command.py:0 -> _params", + "CALLS tests/test_dead_code_command.py:0 -> cli", + "CALLS tests/test_dead_code_command.py:0 -> connect_memgraph", + "CALLS tests/test_dead_code_command.py:0 -> dead_rows", + "CALLS tests/test_dead_code_command.py:0 -> decorator", + "CALLS tests/test_dead_code_command.py:0 -> fetch_all", + "CALLS tests/test_dead_code_command.py:0 -> list_projects", + "CALLS tests/test_dead_code_command.py:0 -> load", + "CALLS tests/test_dead_code_command.py:0 -> main", + "CALLS tests/test_dead_code_command.py:0 -> mock_ingestor", + "CALLS tests/test_dead_code_command.py:0 -> name", + "CALLS tests/test_dead_code_command.py:0 -> run", + "CALLS tests/test_dead_code_command.py:0 -> runner", + "CALLS tests/test_dead_code_command.py:0 -> test_classes_flag_includes_class_candidates", + "CALLS tests/test_dead_code_command.py:0 -> test_classes_off_by_default", + "CALLS tests/test_dead_code_command.py:0 -> test_decorator_root_extends_defaults", + "CALLS tests/test_dead_code_command.py:0 -> test_entry_point_forwarded_to_query", + "CALLS tests/test_dead_code_command.py:0 -> test_errors_when_no_projects", + "CALLS tests/test_dead_code_command.py:0 -> test_errors_when_project_ambiguous", + "CALLS tests/test_dead_code_command.py:0 -> test_explicit_project_name_used", + "CALLS tests/test_dead_code_command.py:0 -> test_fail_on_found_exits_one_when_dead_code", + "CALLS tests/test_dead_code_command.py:0 -> test_fail_on_found_exits_zero_when_clean", + "CALLS tests/test_dead_code_command.py:0 -> test_handles_connection_error", + "CALLS tests/test_dead_code_command.py:0 -> test_include_tests_default_passes_test_patterns", + "CALLS tests/test_dead_code_command.py:0 -> test_json_format_emits_qualified_names", + "CALLS tests/test_dead_code_command.py:0 -> test_lists_orphans_in_table", + "CALLS tests/test_dead_code_command.py:0 -> test_no_include_tests_omits_test_patterns", + "CALLS tests/test_dead_code_command.py:0 -> test_writes_json_to_output_file", + "CALLS tests/test_dead_code_command.py:0 -> test_writes_table_to_output_file", + "CALLS tests/test_decorator_call_edges.py:0 -> NodeLabel", + "CALLS tests/test_decorator_call_edges.py:0 -> RelationshipType", + "CALLS tests/test_decorator_call_edges.py:0 -> TestDecoratorCallEdges", + "CALLS tests/test_decorator_call_edges.py:0 -> decorator", + "CALLS tests/test_decorator_call_edges.py:0 -> ensure_relationship_batch", + "CALLS tests/test_decorator_call_edges.py:0 -> handler", + "CALLS tests/test_decorator_call_edges.py:0 -> load", + "CALLS tests/test_decorator_call_edges.py:0 -> mock_ingestor", + "CALLS tests/test_decorator_call_edges.py:0 -> name", + "CALLS tests/test_decorator_call_edges.py:0 -> temp_repo", + "CALLS tests/test_decorator_call_edges.py:0 -> test_alias_decorator_resolves_to_first_party", + "CALLS tests/test_decorator_call_edges.py:0 -> test_bare_decorator_emits_module_call", + "CALLS tests/test_decorator_call_edges.py:0 -> test_call_decorator_emits_module_call", + "CALLS tests/test_decorator_call_edges.py:0 -> test_class_decorator_emits_module_call", + "CALLS tests/test_decorator_call_edges.py:0 -> test_decorator_on_nested_function_not_module_attributed", + "CALLS tests/test_decorator_call_edges.py:0 -> test_undecorated_function_has_no_decorator_edge", + "CALLS tests/test_decorators.py:0 -> TestAsyncTimingDecorator", + "CALLS tests/test_decorators.py:0 -> TestEnsureLoaded", + "CALLS tests/test_decorators.py:0 -> TestLogOperation", + "CALLS tests/test_decorators.py:0 -> TestMcpTryExcept", + "CALLS tests/test_decorators.py:0 -> TestRecursionGuard", + "CALLS tests/test_decorators.py:0 -> TestTimingDecorator", + "CALLS tests/test_decorators.py:0 -> TestValidateProjectPath", + "CALLS tests/test_decorators.py:0 -> _ensure_loaded", + "CALLS tests/test_decorators.py:0 -> async_timing_decorator", + "CALLS tests/test_decorators.py:0 -> done", + "CALLS tests/test_decorators.py:0 -> ensure_loaded", + "CALLS tests/test_decorators.py:0 -> error_factory", + "CALLS tests/test_decorators.py:0 -> key_func", + "CALLS tests/test_decorators.py:0 -> loader", + "CALLS tests/test_decorators.py:0 -> module_qn", + "CALLS tests/test_decorators.py:0 -> my_method", + "CALLS tests/test_decorators.py:0 -> named_async_function", + "CALLS tests/test_decorators.py:0 -> named_function", + "CALLS tests/test_decorators.py:0 -> named_handler", + "CALLS tests/test_decorators.py:0 -> named_op", + "CALLS tests/test_decorators.py:0 -> project_root", + "CALLS tests/test_decorators.py:0 -> status", + "CALLS tests/test_decorators.py:0 -> test_allows_different_keys", + "CALLS tests/test_decorators.py:0 -> test_allows_valid_path_within_project", + "CALLS tests/test_decorators.py:0 -> test_calls_ensure_loaded_before_method", + "CALLS tests/test_decorators.py:0 -> test_clears_guard_after_completion", + "CALLS tests/test_decorators.py:0 -> test_clears_guard_on_exception", + "CALLS tests/test_decorators.py:0 -> test_handles_exceptions", + "CALLS tests/test_decorators.py:0 -> test_handles_keyword_arguments_in_guarded_function", + "CALLS tests/test_decorators.py:0 -> test_handles_path_not_first_positional_arg", + "CALLS tests/test_decorators.py:0 -> test_key_func_receives_kwargs_correctly", + "CALLS tests/test_decorators.py:0 -> test_logs_end_even_on_success", + "CALLS tests/test_decorators.py:0 -> test_logs_start_and_end_messages", + "CALLS tests/test_decorators.py:0 -> test_logs_timing_info", + "CALLS tests/test_decorators.py:0 -> test_passes_arguments_correctly", + "CALLS tests/test_decorators.py:0 -> test_preserves_function_metadata", + "CALLS tests/test_decorators.py:0 -> test_prevents_recursive_calls", + "CALLS tests/test_decorators.py:0 -> test_recursion_guard_with_mixed_positional_and_keyword_args", + "CALLS tests/test_decorators.py:0 -> test_rejects_non_string_path", + "CALLS tests/test_decorators.py:0 -> test_rejects_path_outside_project", + "CALLS tests/test_decorators.py:0 -> test_reraises_cancelled_error", + "CALLS tests/test_decorators.py:0 -> test_reraises_keyboard_interrupt", + "CALLS tests/test_decorators.py:0 -> test_reraises_system_exit", + "CALLS tests/test_decorators.py:0 -> test_returns_correct_result", + "CALLS tests/test_decorators.py:0 -> test_returns_error_on_exception", + "CALLS tests/test_decorators.py:0 -> test_returns_result_on_success", + "CALLS tests/test_decorators.py:0 -> test_separate_guard_names", + "CALLS tests/test_decorators.py:0 -> test_shared_guard_name", + "CALLS tests/test_decorators.py:0 -> test_works_with_dict_error_factory", + "CALLS tests/test_decorators.py:0 -> test_works_with_property", + "CALLS tests/test_decorators.py:0 -> timing_decorator", + "CALLS tests/test_decorators.py:0 -> type", + "CALLS tests/test_decorators.py:0 -> value", + "CALLS tests/test_definition_processor.py:0 -> DefinitionProcessor", + "CALLS tests/test_definition_processor.py:0 -> SupportedLanguage", + "CALLS tests/test_definition_processor.py:0 -> TestAddDependency", + "CALLS tests/test_definition_processor.py:0 -> TestExtractDecorators", + "CALLS tests/test_definition_processor.py:0 -> TestGetDocstring", + "CALLS tests/test_definition_processor.py:0 -> TestProcessDependencies", + "CALLS tests/test_definition_processor.py:0 -> TestProcessFile", + "CALLS tests/test_definition_processor.py:0 -> TestProcessFileRust", + "CALLS tests/test_definition_processor.py:0 -> children", + "CALLS tests/test_definition_processor.py:0 -> decorator", + "CALLS tests/test_definition_processor.py:0 -> definition_processor", + "CALLS tests/test_definition_processor.py:0 -> ensure_node_batch", + "CALLS tests/test_definition_processor.py:0 -> ensure_relationship_batch", + "CALLS tests/test_definition_processor.py:0 -> factory", + "CALLS tests/test_definition_processor.py:0 -> graph_updater", + "CALLS tests/test_definition_processor.py:0 -> handler", + "CALLS tests/test_definition_processor.py:0 -> mock_ingestor", + "CALLS tests/test_definition_processor.py:0 -> name", + "CALLS tests/test_definition_processor.py:0 -> processor", + "CALLS tests/test_definition_processor.py:0 -> py_parser", + "CALLS tests/test_definition_processor.py:0 -> repo", + "CALLS tests/test_definition_processor.py:0 -> repo_path", + "CALLS tests/test_definition_processor.py:0 -> temp_repo", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_creates_node_and_relationship", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_skips_empty_name", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_skips_php", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_skips_python", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_with_empty_version_spec", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_with_properties", + "CALLS tests/test_definition_processor.py:0 -> test_builtin_decorators", + "CALLS tests/test_definition_processor.py:0 -> test_cargo_toml_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_class_decorator", + "CALLS tests/test_definition_processor.py:0 -> test_class_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_composer_json_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_csproj_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_decorator_with_arguments", + "CALLS tests/test_definition_processor.py:0 -> test_dotted_decorator", + "CALLS tests/test_definition_processor.py:0 -> test_double_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_empty_function_body", + "CALLS tests/test_definition_processor.py:0 -> test_gemfile_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_go_mod_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_multiline_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_multiple_decorators", + "CALLS tests/test_definition_processor.py:0 -> test_no_decorators", + "CALLS tests/test_definition_processor.py:0 -> test_no_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_package_json_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_calls_ingest_methods", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_contains_module_relationship_to_folder", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_contains_module_relationship_to_package", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_contains_module_relationship_to_project", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_module_node", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_empty_file", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_init_py_uses_parent_qn", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_mod_rs_uses_parent_qn", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_nested_init_py", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_registers_module_qn_to_file_path", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_unsupported_language_returns_none", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_with_syntax_error_still_processes", + "CALLS tests/test_definition_processor.py:0 -> test_pyproject_toml_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_requirements_txt_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_single_decorator", + "CALLS tests/test_definition_processor.py:0 -> test_single_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_triple_double_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_triple_single_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> text", + "CALLS tests/test_definition_processor.py:0 -> type", + "CALLS tests/test_definition_processor.py:0 -> updater", + "CALLS tests/test_dependency_parser.py:0 -> TestCargoTomlParser", + "CALLS tests/test_dependency_parser.py:0 -> TestComposerJsonParser", + "CALLS tests/test_dependency_parser.py:0 -> TestCsprojParser", + "CALLS tests/test_dependency_parser.py:0 -> TestExtractPep508PackageName", + "CALLS tests/test_dependency_parser.py:0 -> TestGemfileParser", + "CALLS tests/test_dependency_parser.py:0 -> TestGoModParser", + "CALLS tests/test_dependency_parser.py:0 -> TestPackageJsonParser", + "CALLS tests/test_dependency_parser.py:0 -> TestParseDependencies", + "CALLS tests/test_dependency_parser.py:0 -> TestPyProjectTomlParser", + "CALLS tests/test_dependency_parser.py:0 -> TestRequirementsTxtParser", + "CALLS tests/test_dependency_parser.py:0 -> index", + "CALLS tests/test_dependency_parser.py:0 -> name", + "CALLS tests/test_dependency_parser.py:0 -> test_all_dependency_types", + "CALLS tests/test_dependency_parser.py:0 -> test_both_dep_types", + "CALLS tests/test_dependency_parser.py:0 -> test_both_project_and_poetry", + "CALLS tests/test_dependency_parser.py:0 -> test_both_require_types", + "CALLS tests/test_dependency_parser.py:0 -> test_cargo_toml", + "CALLS tests/test_dependency_parser.py:0 -> test_cargo_toml_case_insensitive", + "CALLS tests/test_dependency_parser.py:0 -> test_case_insensitive_matching", + "CALLS tests/test_dependency_parser.py:0 -> test_comments_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_comments_in_require_block", + "CALLS tests/test_dependency_parser.py:0 -> test_compatible_release", + "CALLS tests/test_dependency_parser.py:0 -> test_complex_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_composer_json", + "CALLS tests/test_dependency_parser.py:0 -> test_conditional_item_groups", + "CALLS tests/test_dependency_parser.py:0 -> test_csproj", + "CALLS tests/test_dependency_parser.py:0 -> test_csproj_suffix_matching", + "CALLS tests/test_dependency_parser.py:0 -> test_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_dependency_without_version", + "CALLS tests/test_dependency_parser.py:0 -> test_dev_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_file", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_lines_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_project", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_string", + "CALLS tests/test_dependency_parser.py:0 -> test_exact_version", + "CALLS tests/test_dependency_parser.py:0 -> test_gem_with_version", + "CALLS tests/test_dependency_parser.py:0 -> test_gem_without_version", + "CALLS tests/test_dependency_parser.py:0 -> test_gemfile", + "CALLS tests/test_dependency_parser.py:0 -> test_go_mod", + "CALLS tests/test_dependency_parser.py:0 -> test_group_blocks", + "CALLS tests/test_dependency_parser.py:0 -> test_include_lines_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_indirect_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_invalid_json", + "CALLS tests/test_dependency_parser.py:0 -> test_invalid_toml", + "CALLS tests/test_dependency_parser.py:0 -> test_invalid_xml", + "CALLS tests/test_dependency_parser.py:0 -> test_leading_whitespace", + "CALLS tests/test_dependency_parser.py:0 -> test_multiple_require_blocks", + "CALLS tests/test_dependency_parser.py:0 -> test_nonexistent_file", + "CALLS tests/test_dependency_parser.py:0 -> test_optional_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_package_json", + "CALLS tests/test_dependency_parser.py:0 -> test_package_references", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_complex_version", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_dots", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_extras", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_extras_no_version", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_hyphen", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_multiple_extras", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_underscore", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_version_specifier", + "CALLS tests/test_dependency_parser.py:0 -> test_package_without_version", + "CALLS tests/test_dependency_parser.py:0 -> test_peer_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_php_excluded", + "CALLS tests/test_dependency_parser.py:0 -> test_poetry_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_project_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_pyproject_toml", + "CALLS tests/test_dependency_parser.py:0 -> test_require_block", + "CALLS tests/test_dependency_parser.py:0 -> test_require_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_require_dev", + "CALLS tests/test_dependency_parser.py:0 -> test_requirements_txt", + "CALLS tests/test_dependency_parser.py:0 -> test_scoped_package", + "CALLS tests/test_dependency_parser.py:0 -> test_simple_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_simple_package_name", + "CALLS tests/test_dependency_parser.py:0 -> test_simple_requirements", + "CALLS tests/test_dependency_parser.py:0 -> test_single_quoted_gem", + "CALLS tests/test_dependency_parser.py:0 -> test_single_require_line", + "CALLS tests/test_dependency_parser.py:0 -> test_source_line_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_unknown_file_type", + "CALLS tests/test_dependency_parser.py:0 -> test_whitespace_only", + "CALLS tests/test_diff_autowrap.py:0 -> TestAlreadyFenced", + "CALLS tests/test_diff_autowrap.py:0 -> TestNoDiff", + "CALLS tests/test_diff_autowrap.py:0 -> TestWrappingUnfencedDiff", + "CALLS tests/test_diff_autowrap.py:0 -> main", + "CALLS tests/test_diff_autowrap.py:0 -> test_already_fenced_diff_not_double_wrapped", + "CALLS tests/test_diff_autowrap.py:0 -> test_diff_followed_by_explanation_text", + "CALLS tests/test_diff_autowrap.py:0 -> test_fenced_with_other_language_not_rewrapped", + "CALLS tests/test_diff_autowrap.py:0 -> test_full_git_diff_gets_fenced_as_diff", + "CALLS tests/test_diff_autowrap.py:0 -> test_plain_text_unchanged", + "CALLS tests/test_diff_autowrap.py:0 -> test_preamble_before_diff_preserved", + "CALLS tests/test_diff_autowrap.py:0 -> test_text_without_diff_marker_unchanged", + "CALLS tests/test_diff_autowrap.py:0 -> text", + "CALLS tests/test_directory_lister.py:0 -> TestCreateDirectoryListerTool", + "CALLS tests/test_directory_lister.py:0 -> TestDirectoryListerInit", + "CALLS tests/test_directory_lister.py:0 -> TestGetSafePath", + "CALLS tests/test_directory_lister.py:0 -> TestListDirectoryContents", + "CALLS tests/test_directory_lister.py:0 -> description", + "CALLS tests/test_directory_lister.py:0 -> directory_lister", + "CALLS tests/test_directory_lister.py:0 -> project_root", + "CALLS tests/test_directory_lister.py:0 -> sample_directory_structure", + "CALLS tests/test_directory_lister.py:0 -> temp_project_root", + "CALLS tests/test_directory_lister.py:0 -> test_creates_tool_instance", + "CALLS tests/test_directory_lister.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_directory_lister.py:0 -> test_init_with_relative_path", + "CALLS tests/test_directory_lister.py:0 -> test_list_directory_returns_error_for_absolute_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_list_directory_returns_error_for_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_list_empty_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_file_instead_of_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_nested_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_nonexistent_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_root_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_subdirectory", + "CALLS tests/test_directory_lister.py:0 -> test_list_with_absolute_path_within_root", + "CALLS tests/test_directory_lister.py:0 -> test_list_with_hidden_files", + "CALLS tests/test_directory_lister.py:0 -> test_list_with_special_characters", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_rejects_absolute_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_rejects_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_with_absolute_path_within_root", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_with_relative_path", + "CALLS tests/test_directory_lister.py:0 -> test_tool_function_returns_contents", + "CALLS tests/test_directory_lister.py:0 -> test_tool_has_description", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> NodeLabel", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> RelationshipType", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> TestDuplicateQualifiedNameClasses", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> TestDuplicateQualifiedNameDefinitions", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> TestDuplicateQualifiedNameMethodsInOneClass", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> ensure_node_batch", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> ensure_relationship_batch", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> execute_write", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> fetch_all", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> flush_all", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> graph_updater", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> name", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> nodes", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> render", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> repo_path", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_both_branch_classes_become_distinct_nodes", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_both_branch_definitions_become_distinct_nodes", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_both_branch_methods_in_one_class_survive", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_call_links_to_both_duplicate_definitions", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_methods_of_both_branch_classes_survive", + "CALLS tests/test_embedder.py:0 -> UniXcoder", + "CALLS tests/test_embedder.py:0 -> add", + "CALLS tests/test_embedder.py:0 -> mock_unixcoder", + "CALLS tests/test_embedder.py:0 -> reset_cache", + "CALLS tests/test_embedder.py:0 -> reset_model_cache", + "CALLS tests/test_embedder.py:0 -> side_effect_forward", + "CALLS tests/test_embedder.py:0 -> side_effect_tokenize", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_cache_hit", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_empty_list", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_partial_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_populates_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_raises_without_dependencies", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_respects_batch_size", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_returns_correct_count", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_uses_padding", + "CALLS tests/test_embedder.py:0 -> test_embed_code_calls_tokenize", + "CALLS tests/test_embedder.py:0 -> test_embed_code_integration", + "CALLS tests/test_embedder.py:0 -> test_embed_code_populates_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_raises_without_dependencies", + "CALLS tests/test_embedder.py:0 -> test_embed_code_returns_768_dimensional_vector", + "CALLS tests/test_embedder.py:0 -> test_embed_code_uses_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_uses_default_max_length", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_clear", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_different_content_different_key", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_get_many", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_len", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_load_corrupt_file", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_load_no_path", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_load_nonexistent_path", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_miss_returns_none", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_overwrite", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_persistence_roundtrip", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_put_and_get", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_put_many", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_save_and_load", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_save_no_path", + "CALLS tests/test_embedder.py:0 -> test_embedding_default_batch_size_at_least_64", + "CALLS tests/test_embedder.py:0 -> test_get_model_does_not_use_cuda_when_unavailable", + "CALLS tests/test_embedder.py:0 -> test_get_model_is_cached", + "CALLS tests/test_embedder.py:0 -> test_get_model_moves_to_mps_when_available", + "CALLS tests/test_embedder.py:0 -> test_get_model_uses_cuda_when_available", + "CALLS tests/test_embedder.py:0 -> test_select_device_falls_back_to_cpu", + "CALLS tests/test_embedder.py:0 -> test_select_device_prefers_cuda", + "CALLS tests/test_embedder.py:0 -> test_select_device_uses_mps_when_cuda_unavailable", + "CALLS tests/test_embedder.py:0 -> test_similar_code_has_similar_embeddings", + "CALLS tests/test_embedder.py:0 -> tokenize", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> NodeLabel", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> RelationshipType", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> name", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> nodes", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> repo", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> test_import_placeholder_module_not_scored_as_internal", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> value", + "CALLS tests/test_eval_module_calls.py:0 -> TestModuleCallEval", + "CALLS tests/test_eval_module_calls.py:0 -> decorator", + "CALLS tests/test_eval_module_calls.py:0 -> load", + "CALLS tests/test_eval_module_calls.py:0 -> main", + "CALLS tests/test_eval_module_calls.py:0 -> test_annotation_not_counted_with_future_import", + "CALLS tests/test_eval_module_calls.py:0 -> test_cgr_matches_oracle_module_calls", + "CALLS tests/test_eval_module_calls.py:0 -> test_class_decorator_is_module_attributed", + "CALLS tests/test_eval_module_calls.py:0 -> test_classless_module_construction_credited_via_instantiates", + "CALLS tests/test_eval_module_calls.py:0 -> test_generator_expression_call_is_deferred", + "CALLS tests/test_eval_module_calls.py:0 -> test_generator_outermost_iterable_is_eager", + "CALLS tests/test_eval_module_calls.py:0 -> test_lambda_body_call_is_deferred", + "CALLS tests/test_eval_module_calls.py:0 -> test_list_comprehension_call_is_module_attributed", + "CALLS tests/test_eval_module_calls.py:0 -> test_nested_call_is_not_module_attributed", + "CALLS tests/test_eval_module_calls.py:0 -> test_oracle_counts_only_definition_time_calls", + "CALLS tests/test_eval_module_calls.py:0 -> test_return_annotation_counted_without_future_import", + "CALLS tests/test_eval_score_span.py:0 -> NodeLabel", + "CALLS tests/test_eval_score_span.py:0 -> nodes", + "CALLS tests/test_eval_score_span.py:0 -> start", + "CALLS tests/test_eval_score_span.py:0 -> test_span_end_line_mismatch_is_penalized_and_surfaced", + "CALLS tests/test_eval_score_span.py:0 -> test_span_exact_match_scores_perfect", + "CALLS tests/test_eval_score_span.py:0 -> test_span_only_grades_co_identified_nodes", + "CALLS tests/test_eval_score_span.py:0 -> value", + "CALLS tests/test_exclude_patterns.py:0 -> TestDetectExcludableDirectories", + "CALLS tests/test_exclude_patterns.py:0 -> TestDirectoryVsFileBehavior", + "CALLS tests/test_exclude_patterns.py:0 -> TestExcludePathsEdgeCases", + "CALLS tests/test_exclude_patterns.py:0 -> TestGetGroupingKey", + "CALLS tests/test_exclude_patterns.py:0 -> TestGroupPathsByPattern", + "CALLS tests/test_exclude_patterns.py:0 -> TestIgnorePatterns", + "CALLS tests/test_exclude_patterns.py:0 -> TestIgnoreSuffixesInteraction", + "CALLS tests/test_exclude_patterns.py:0 -> TestPromptExcludeDirectories", + "CALLS tests/test_exclude_patterns.py:0 -> TestShouldSkipPath", + "CALLS tests/test_exclude_patterns.py:0 -> TestUnignoreExcludeInteraction", + "CALLS tests/test_exclude_patterns.py:0 -> TestUnignorePathsEdgeCases", + "CALLS tests/test_exclude_patterns.py:0 -> index", + "CALLS tests/test_exclude_patterns.py:0 -> main", + "CALLS tests/test_exclude_patterns.py:0 -> parent", + "CALLS tests/test_exclude_patterns.py:0 -> repo", + "CALLS tests/test_exclude_patterns.py:0 -> test_cli_excludes_without_pattern_match", + "CALLS tests/test_exclude_patterns.py:0 -> test_codebase_with_nested_pycache_groups_correctly", + "CALLS tests/test_exclude_patterns.py:0 -> test_custom_exclude_pattern_is_applied", + "CALLS tests/test_exclude_patterns.py:0 -> test_deep_nested_pattern_returns_first_match", + "CALLS tests/test_exclude_patterns.py:0 -> test_deeply_nested_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_matching_patterns_at_root", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_multiple_git_directories", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_nested_matching_patterns_with_full_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_site_packages_at_root", + "CALLS tests/test_exclude_patterns.py:0 -> test_does_not_match_partial_directory_names", + "CALLS tests/test_exclude_patterns.py:0 -> test_does_not_skip_normal_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_paths_returns_empty_groups", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_repo_returns_empty", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_repo_returns_empty_set", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_unignore_paths_does_not_skip", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_does_not_match_partial_name", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_multiple_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_nested_path_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_path_based_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_path_pattern_does_not_affect_other_paths", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_paths_adds_to_default_skip", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_specific_file_by_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_specific_file_does_not_affect_siblings", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_takes_precedence_over_unignore", + "CALLS tests/test_exclude_patterns.py:0 -> test_groups_by_matching_pattern_not_parent_directory", + "CALLS tests/test_exclude_patterns.py:0 -> test_groups_nested_paths_under_first_matching_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_groups_single_level_paths", + "CALLS tests/test_exclude_patterns.py:0 -> test_ignores_files", + "CALLS tests/test_exclude_patterns.py:0 -> test_mixed_root_and_nested_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_multiple_patterns_in_path_returns_first", + "CALLS tests/test_exclude_patterns.py:0 -> test_multiple_unignore_paths", + "CALLS tests/test_exclude_patterns.py:0 -> test_nested_path_returns_first_matching_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_no_matching_pattern_returns_first_component", + "CALLS tests/test_exclude_patterns.py:0 -> test_no_matching_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_pattern_must_be_exact_match", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_all_keeps_everything", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_expand_then_select_from_group", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_none_keeps_nothing", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_number_keeps_entire_group", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_with_cli_excludes", + "CALLS tests/test_exclude_patterns.py:0 -> test_real_world_scenario_with_venv_and_pycache", + "CALLS tests/test_exclude_patterns.py:0 -> test_root_level_file", + "CALLS tests/test_exclude_patterns.py:0 -> test_root_level_pattern_returns_itself", + "CALLS tests/test_exclude_patterns.py:0 -> test_similar_names_not_matching_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_site_packages_in_ignore_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_skip_directory_in_exclude", + "CALLS tests/test_exclude_patterns.py:0 -> test_skips_nested_ignore_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_skips_path_matching_ignore_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_sorts_paths_within_group", + "CALLS tests/test_exclude_patterns.py:0 -> test_stops_at_first_matching_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_suffix_checked_before_exclude", + "CALLS tests/test_exclude_patterns.py:0 -> test_suffix_checked_before_include", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_directory_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_exact_file_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_parent_unignores_children", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_paths_overrides_default_skip", + "CALLS tests/test_exclude_patterns.py:0 -> test_venv_patterns_in_ignore_patterns", + "CALLS tests/test_external_package_name_collision.py:0 -> RelationshipType", + "CALLS tests/test_external_package_name_collision.py:0 -> TestExternalPackageNameCollision", + "CALLS tests/test_external_package_name_collision.py:0 -> ensure_node_batch", + "CALLS tests/test_external_package_name_collision.py:0 -> ensure_relationship_batch", + "CALLS tests/test_external_package_name_collision.py:0 -> execute_write", + "CALLS tests/test_external_package_name_collision.py:0 -> fetch_all", + "CALLS tests/test_external_package_name_collision.py:0 -> flush_all", + "CALLS tests/test_external_package_name_collision.py:0 -> graph_updater", + "CALLS tests/test_external_package_name_collision.py:0 -> name", + "CALLS tests/test_external_package_name_collision.py:0 -> repo_path", + "CALLS tests/test_external_package_name_collision.py:0 -> test_bare_absolute_import_is_external_not_internal", + "CALLS tests/test_external_package_name_collision.py:0 -> test_relative_import_to_subpackage_still_internal", + "CALLS tests/test_file_editor.py:0 -> TestApplyPatchToFile", + "CALLS tests/test_file_editor.py:0 -> TestCreateFileEditorTool", + "CALLS tests/test_file_editor.py:0 -> TestEditFile", + "CALLS tests/test_file_editor.py:0 -> TestEditResult", + "CALLS tests/test_file_editor.py:0 -> TestFileEditorInit", + "CALLS tests/test_file_editor.py:0 -> TestGetAst", + "CALLS tests/test_file_editor.py:0 -> TestGetDiff", + "CALLS tests/test_file_editor.py:0 -> TestGetFunctionSourceCode", + "CALLS tests/test_file_editor.py:0 -> TestGetParser", + "CALLS tests/test_file_editor.py:0 -> TestReplaceCodeBlock", + "CALLS tests/test_file_editor.py:0 -> anyio_backend", + "CALLS tests/test_file_editor.py:0 -> description", + "CALLS tests/test_file_editor.py:0 -> file_editor", + "CALLS tests/test_file_editor.py:0 -> project_root", + "CALLS tests/test_file_editor.py:0 -> sample_js_file", + "CALLS tests/test_file_editor.py:0 -> sample_python_file", + "CALLS tests/test_file_editor.py:0 -> temp_project_root", + "CALLS tests/test_file_editor.py:0 -> test_apply_valid_patch", + "CALLS tests/test_file_editor.py:0 -> test_creates_tool_instance", + "CALLS tests/test_file_editor.py:0 -> test_edit_directory_fails", + "CALLS tests/test_file_editor.py:0 -> test_edit_existing_file", + "CALLS tests/test_file_editor.py:0 -> test_edit_file_outside_root", + "CALLS tests/test_file_editor.py:0 -> test_edit_nonexistent_file", + "CALLS tests/test_file_editor.py:0 -> test_error_result", + "CALLS tests/test_file_editor.py:0 -> test_get_ast_for_javascript_file", + "CALLS tests/test_file_editor.py:0 -> test_get_ast_for_python_file", + "CALLS tests/test_file_editor.py:0 -> test_get_diff_nonexistent_function", + "CALLS tests/test_file_editor.py:0 -> test_get_diff_shows_changes", + "CALLS tests/test_file_editor.py:0 -> test_get_function_source_by_name", + "CALLS tests/test_file_editor.py:0 -> test_get_function_source_by_qualified_name", + "CALLS tests/test_file_editor.py:0 -> test_get_nonexistent_function", + "CALLS tests/test_file_editor.py:0 -> test_get_parser_for_javascript", + "CALLS tests/test_file_editor.py:0 -> test_get_parser_for_python", + "CALLS tests/test_file_editor.py:0 -> test_get_parser_for_unknown_extension", + "CALLS tests/test_file_editor.py:0 -> test_init_creates_dmp_instance", + "CALLS tests/test_file_editor.py:0 -> test_init_loads_parsers", + "CALLS tests/test_file_editor.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_file_editor.py:0 -> test_replace_block_file_not_found", + "CALLS tests/test_file_editor.py:0 -> test_replace_block_outside_root", + "CALLS tests/test_file_editor.py:0 -> test_replace_existing_block", + "CALLS tests/test_file_editor.py:0 -> test_replace_identical_content", + "CALLS tests/test_file_editor.py:0 -> test_replace_nonexistent_block", + "CALLS tests/test_file_editor.py:0 -> test_success_result", + "CALLS tests/test_file_editor.py:0 -> test_tool_function_replaces_code", + "CALLS tests/test_file_editor.py:0 -> test_tool_function_returns_failure_message", + "CALLS tests/test_file_editor.py:0 -> test_tool_has_description", + "CALLS tests/test_file_editor.py:0 -> test_tool_requires_approval", + "CALLS tests/test_file_editor.py:0 -> type", + "CALLS tests/test_file_reader.py:0 -> TestCreateFileReaderTool", + "CALLS tests/test_file_reader.py:0 -> TestFileReadResult", + "CALLS tests/test_file_reader.py:0 -> TestFileReaderInit", + "CALLS tests/test_file_reader.py:0 -> TestReadFile", + "CALLS tests/test_file_reader.py:0 -> anyio_backend", + "CALLS tests/test_file_reader.py:0 -> description", + "CALLS tests/test_file_reader.py:0 -> file_reader", + "CALLS tests/test_file_reader.py:0 -> project_root", + "CALLS tests/test_file_reader.py:0 -> read", + "CALLS tests/test_file_reader.py:0 -> sample_python_file", + "CALLS tests/test_file_reader.py:0 -> sample_text_file", + "CALLS tests/test_file_reader.py:0 -> temp_project_root", + "CALLS tests/test_file_reader.py:0 -> test_binary_extensions_set", + "CALLS tests/test_file_reader.py:0 -> test_creates_tool_instance", + "CALLS tests/test_file_reader.py:0 -> test_error_result", + "CALLS tests/test_file_reader.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_file_reader.py:0 -> test_init_with_relative_path", + "CALLS tests/test_file_reader.py:0 -> test_read_binary_pdf_file", + "CALLS tests/test_file_reader.py:0 -> test_read_binary_png_file", + "CALLS tests/test_file_reader.py:0 -> test_read_directory_returns_error", + "CALLS tests/test_file_reader.py:0 -> test_read_empty_file", + "CALLS tests/test_file_reader.py:0 -> test_read_existing_text_file", + "CALLS tests/test_file_reader.py:0 -> test_read_file_in_subdirectory", + "CALLS tests/test_file_reader.py:0 -> test_read_file_outside_root", + "CALLS tests/test_file_reader.py:0 -> test_read_file_with_unicode", + "CALLS tests/test_file_reader.py:0 -> test_read_nonexistent_file", + "CALLS tests/test_file_reader.py:0 -> test_read_python_file", + "CALLS tests/test_file_reader.py:0 -> test_success_result", + "CALLS tests/test_file_reader.py:0 -> test_tool_function_returns_content", + "CALLS tests/test_file_reader.py:0 -> test_tool_function_returns_error_string", + "CALLS tests/test_file_reader.py:0 -> test_tool_has_description", + "CALLS tests/test_file_writer.py:0 -> TestCreateFile", + "CALLS tests/test_file_writer.py:0 -> TestCreateFileWriterTool", + "CALLS tests/test_file_writer.py:0 -> TestFileCreationResult", + "CALLS tests/test_file_writer.py:0 -> TestFileWriterInit", + "CALLS tests/test_file_writer.py:0 -> anyio_backend", + "CALLS tests/test_file_writer.py:0 -> description", + "CALLS tests/test_file_writer.py:0 -> file_writer", + "CALLS tests/test_file_writer.py:0 -> project_root", + "CALLS tests/test_file_writer.py:0 -> temp_project_root", + "CALLS tests/test_file_writer.py:0 -> test_create_empty_file", + "CALLS tests/test_file_writer.py:0 -> test_create_file_in_subdirectory", + "CALLS tests/test_file_writer.py:0 -> test_create_file_multiline_content", + "CALLS tests/test_file_writer.py:0 -> test_create_file_outside_root", + "CALLS tests/test_file_writer.py:0 -> test_create_file_with_special_characters_in_name", + "CALLS tests/test_file_writer.py:0 -> test_create_file_with_unicode_content", + "CALLS tests/test_file_writer.py:0 -> test_create_new_file", + "CALLS tests/test_file_writer.py:0 -> test_creates_tool_instance", + "CALLS tests/test_file_writer.py:0 -> test_error_result", + "CALLS tests/test_file_writer.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_file_writer.py:0 -> test_init_with_relative_path", + "CALLS tests/test_file_writer.py:0 -> test_overwrite_existing_file", + "CALLS tests/test_file_writer.py:0 -> test_success_result", + "CALLS tests/test_file_writer.py:0 -> test_tool_function_creates_file", + "CALLS tests/test_file_writer.py:0 -> test_tool_has_description", + "CALLS tests/test_file_writer.py:0 -> test_tool_requires_approval", + "CALLS tests/test_fqn_resolver.py:0 -> SupportedLanguage", + "CALLS tests/test_fqn_resolver.py:0 -> TestExtractFunctionFqns", + "CALLS tests/test_fqn_resolver.py:0 -> TestFindFunctionSourceByFqn", + "CALLS tests/test_fqn_resolver.py:0 -> TestResolveFqnFromAst", + "CALLS tests/test_fqn_resolver.py:0 -> children", + "CALLS tests/test_fqn_resolver.py:0 -> method_a", + "CALLS tests/test_fqn_resolver.py:0 -> method_b", + "CALLS tests/test_fqn_resolver.py:0 -> my_method", + "CALLS tests/test_fqn_resolver.py:0 -> repo", + "CALLS tests/test_fqn_resolver.py:0 -> test_deeply_nested", + "CALLS tests/test_fqn_resolver.py:0 -> test_empty_tree_returns_empty_list", + "CALLS tests/test_fqn_resolver.py:0 -> test_empty_tree_returns_none", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_from_multiple_classes", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_multiple_functions", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_nested_methods", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_single_function", + "CALLS tests/test_fqn_resolver.py:0 -> test_finds_matching_function", + "CALLS tests/test_fqn_resolver.py:0 -> test_finds_nested_method", + "CALLS tests/test_fqn_resolver.py:0 -> test_init_file_excluded_from_path", + "CALLS tests/test_fqn_resolver.py:0 -> test_lambda_returns_none", + "CALLS tests/test_fqn_resolver.py:0 -> test_nested_in_class", + "CALLS tests/test_fqn_resolver.py:0 -> test_returns_none_when_not_found", + "CALLS tests/test_fqn_resolver.py:0 -> test_simple_function", + "CALLS tests/test_fqn_resolver.py:0 -> test_skips_lambdas", + "CALLS tests/test_function_ingest.py:0 -> DefinitionProcessor", + "CALLS tests/test_function_ingest.py:0 -> SupportedLanguage", + "CALLS tests/test_function_ingest.py:0 -> TestBuildFunctionProps", + "CALLS tests/test_function_ingest.py:0 -> TestBuildNestedQualifiedName", + "CALLS tests/test_function_ingest.py:0 -> TestCollectAncestorPathParts", + "CALLS tests/test_function_ingest.py:0 -> TestDetermineFunctionParent", + "CALLS tests/test_function_ingest.py:0 -> TestExtractFunctionName", + "CALLS tests/test_function_ingest.py:0 -> TestExtractNodeName", + "CALLS tests/test_function_ingest.py:0 -> TestFormatNestedQn", + "CALLS tests/test_function_ingest.py:0 -> TestFunctionResolution", + "CALLS tests/test_function_ingest.py:0 -> TestGenerateAnonymousFunctionName", + "CALLS tests/test_function_ingest.py:0 -> TestIntegrationFunctionIngestion", + "CALLS tests/test_function_ingest.py:0 -> TestIsMethod", + "CALLS tests/test_function_ingest.py:0 -> TestRustFunctionQualifiedName", + "CALLS tests/test_function_ingest.py:0 -> children", + "CALLS tests/test_function_ingest.py:0 -> definition_processor", + "CALLS tests/test_function_ingest.py:0 -> factory", + "CALLS tests/test_function_ingest.py:0 -> graph_updater", + "CALLS tests/test_function_ingest.py:0 -> is_exported", + "CALLS tests/test_function_ingest.py:0 -> javascript_functions_project", + "CALLS tests/test_function_ingest.py:0 -> main", + "CALLS tests/test_function_ingest.py:0 -> mock_ingestor", + "CALLS tests/test_function_ingest.py:0 -> my_method", + "CALLS tests/test_function_ingest.py:0 -> name", + "CALLS tests/test_function_ingest.py:0 -> parent", + "CALLS tests/test_function_ingest.py:0 -> parsers_and_queries", + "CALLS tests/test_function_ingest.py:0 -> python_functions_project", + "CALLS tests/test_function_ingest.py:0 -> repo_path", + "CALLS tests/test_function_ingest.py:0 -> temp_repo", + "CALLS tests/test_function_ingest.py:0 -> test_anonymous_function_returns_none", + "CALLS tests/test_function_ingest.py:0 -> test_basic_function_props", + "CALLS tests/test_function_ingest.py:0 -> test_class_with_name", + "CALLS tests/test_function_ingest.py:0 -> test_deeply_nested_function", + "CALLS tests/test_function_ingest.py:0 -> test_empty_path_parts", + "CALLS tests/test_function_ingest.py:0 -> test_exported_function_props", + "CALLS tests/test_function_ingest.py:0 -> test_function_inside_class", + "CALLS tests/test_function_ingest.py:0 -> test_function_not_in_class", + "CALLS tests/test_function_ingest.py:0 -> test_function_with_name", + "CALLS tests/test_function_ingest.py:0 -> test_iife_arrow", + "CALLS tests/test_function_ingest.py:0 -> test_iife_parenthesized", + "CALLS tests/test_function_ingest.py:0 -> test_immutability", + "CALLS tests/test_function_ingest.py:0 -> test_javascript_arrow_function_with_variable", + "CALLS tests/test_function_ingest.py:0 -> test_javascript_functions_ingested", + "CALLS tests/test_function_ingest.py:0 -> test_method_in_class_returns_none", + "CALLS tests/test_function_ingest.py:0 -> test_multiple_function_ancestors", + "CALLS tests/test_function_ingest.py:0 -> test_named_function", + "CALLS tests/test_function_ingest.py:0 -> test_named_tuple_fields", + "CALLS tests/test_function_ingest.py:0 -> test_nested_function", + "CALLS tests/test_function_ingest.py:0 -> test_nested_function_in_method", + "CALLS tests/test_function_ingest.py:0 -> test_no_ancestors", + "CALLS tests/test_function_ingest.py:0 -> test_one_function_ancestor", + "CALLS tests/test_function_ingest.py:0 -> test_regular_anonymous", + "CALLS tests/test_function_ingest.py:0 -> test_rust_function_in_mod", + "CALLS tests/test_function_ingest.py:0 -> test_single_path_part", + "CALLS tests/test_function_ingest.py:0 -> test_top_level_function", + "CALLS tests/test_function_ingest.py:0 -> test_top_level_functions_ingested", + "CALLS tests/test_function_ingest.py:0 -> test_top_level_rust_function", + "CALLS tests/test_function_ingest.py:0 -> test_with_path_parts", + "CALLS tests/test_function_ingest.py:0 -> text", + "CALLS tests/test_function_ingest.py:0 -> type", + "CALLS tests/test_function_ingest.py:0 -> updater", + "CALLS tests/test_function_ingest.py:0 -> wrapper", + "CALLS tests/test_function_local_definitions.py:0 -> NodeLabel", + "CALLS tests/test_function_local_definitions.py:0 -> RelationshipType", + "CALLS tests/test_function_local_definitions.py:0 -> TestFunctionLocalDefinitions", + "CALLS tests/test_function_local_definitions.py:0 -> ensure_node_batch", + "CALLS tests/test_function_local_definitions.py:0 -> ensure_relationship_batch", + "CALLS tests/test_function_local_definitions.py:0 -> execute_write", + "CALLS tests/test_function_local_definitions.py:0 -> fetch_all", + "CALLS tests/test_function_local_definitions.py:0 -> flush_all", + "CALLS tests/test_function_local_definitions.py:0 -> graph_updater", + "CALLS tests/test_function_local_definitions.py:0 -> nodes", + "CALLS tests/test_function_local_definitions.py:0 -> repo_path", + "CALLS tests/test_function_local_definitions.py:0 -> test_default_captures_local_class_methods", + "CALLS tests/test_function_local_definitions.py:0 -> test_flag_off_skips_local_class_methods", + "CALLS tests/test_getattr_dispatch.py:0 -> FunctionRegistryTrie", + "CALLS tests/test_getattr_dispatch.py:0 -> JavaTypeResolverMixin", + "CALLS tests/test_getattr_dispatch.py:0 -> RelationshipType", + "CALLS tests/test_getattr_dispatch.py:0 -> TestGetattrDispatch", + "CALLS tests/test_getattr_dispatch.py:0 -> _find_registry_entries_under", + "CALLS tests/test_getattr_dispatch.py:0 -> ensure_node_batch", + "CALLS tests/test_getattr_dispatch.py:0 -> ensure_relationship_batch", + "CALLS tests/test_getattr_dispatch.py:0 -> execute_write", + "CALLS tests/test_getattr_dispatch.py:0 -> fetch_all", + "CALLS tests/test_getattr_dispatch.py:0 -> find_with_prefix", + "CALLS tests/test_getattr_dispatch.py:0 -> flush_all", + "CALLS tests/test_getattr_dispatch.py:0 -> graph_updater", + "CALLS tests/test_getattr_dispatch.py:0 -> name", + "CALLS tests/test_getattr_dispatch.py:0 -> parent", + "CALLS tests/test_getattr_dispatch.py:0 -> repo_path", + "CALLS tests/test_getattr_dispatch.py:0 -> test_getattr_with_constant_name_resolves", + "CALLS tests/test_getattr_dispatch.py:0 -> test_getattr_with_string_literal_resolves", + "CALLS tests/test_getattr_dispatch.py:0 -> value", + "CALLS tests/test_github_issues_integration.py:0 -> GoogleProviderType", + "CALLS tests/test_github_issues_integration.py:0 -> TestGitHubIssuesIntegration", + "CALLS tests/test_github_issues_integration.py:0 -> active_cypher_config", + "CALLS tests/test_github_issues_integration.py:0 -> active_orchestrator_config", + "CALLS tests/test_github_issues_integration.py:0 -> name", + "CALLS tests/test_github_issues_integration.py:0 -> test_cli_override_real_scenario", + "CALLS tests/test_github_issues_integration.py:0 -> test_custom_model_names_with_colons_parsing", + "CALLS tests/test_github_issues_integration.py:0 -> test_env_file_ollama_configuration_respected", + "CALLS tests/test_github_issues_integration.py:0 -> test_google_gla_without_api_key_raises", + "CALLS tests/test_github_issues_integration.py:0 -> test_mixed_provider_real_world_scenario", + "CALLS tests/test_github_issues_integration.py:0 -> test_openai_compatible_endpoints", + "CALLS tests/test_github_issues_integration.py:0 -> test_reasoning_model_thinking_budget", + "CALLS tests/test_github_issues_integration.py:0 -> test_vertex_ai_enterprise_scenario", + "CALLS tests/test_github_issues_integration.py:0 -> test_vertex_ai_skips_api_key_validation", + "CALLS tests/test_github_issues_integration.py:0 -> test_vertex_ai_with_google_api_key_env_does_not_error", + "CALLS tests/test_github_issues_integration.py:0 -> up", + "CALLS tests/test_go_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_go_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_go_containment_oracle.py:0 -> name", + "CALLS tests/test_go_containment_oracle.py:0 -> test_cgr_matches_go_oracle_on_containment_edges", + "CALLS tests/test_go_containment_oracle.py:0 -> type", + "CALLS tests/test_go_containment_oracle.py:0 -> value", + "CALLS tests/test_go_receiver_methods.py:0 -> NodeLabel", + "CALLS tests/test_go_receiver_methods.py:0 -> RelationshipType", + "CALLS tests/test_go_receiver_methods.py:0 -> go_crossfile_project", + "CALLS tests/test_go_receiver_methods.py:0 -> go_methods_project", + "CALLS tests/test_go_receiver_methods.py:0 -> mock_ingestor", + "CALLS tests/test_go_receiver_methods.py:0 -> name", + "CALLS tests/test_go_receiver_methods.py:0 -> temp_repo", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_crossfile_method_binds_to_declaring_type", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_defined_type_receiver_method_is_method_node", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_free_function_not_a_method", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_method_defined_by_receiver_type", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_pointer_receiver_method_is_method_node", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_value_receiver_method_is_method_node", + "CALLS tests/test_go_receiver_methods.py:0 -> type", + "CALLS tests/test_go_receiver_methods.py:0 -> value", + "CALLS tests/test_go_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_go_span_oracle.py:0 -> name", + "CALLS tests/test_go_span_oracle.py:0 -> start", + "CALLS tests/test_go_span_oracle.py:0 -> test_cgr_matches_go_oracle_on_node_spans", + "CALLS tests/test_go_span_oracle.py:0 -> type", + "CALLS tests/test_go_structure_oracle.py:0 -> NodeLabel", + "CALLS tests/test_go_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_go_structure_oracle.py:0 -> name", + "CALLS tests/test_go_structure_oracle.py:0 -> nodes", + "CALLS tests/test_go_structure_oracle.py:0 -> test_cgr_matches_oracle_on_type_declarations", + "CALLS tests/test_go_structure_oracle.py:0 -> test_oracle_labels_go_declarations", + "CALLS tests/test_go_structure_oracle.py:0 -> type", + "CALLS tests/test_go_structure_oracle.py:0 -> value", + "CALLS tests/test_go_type_declarations.py:0 -> NodeLabel", + "CALLS tests/test_go_type_declarations.py:0 -> go_types_project", + "CALLS tests/test_go_type_declarations.py:0 -> mock_ingestor", + "CALLS tests/test_go_type_declarations.py:0 -> nodes", + "CALLS tests/test_go_type_declarations.py:0 -> temp_repo", + "CALLS tests/test_go_type_declarations.py:0 -> test_go_interface_captured_as_interface", + "CALLS tests/test_go_type_declarations.py:0 -> test_go_struct_captured_as_class", + "CALLS tests/test_go_type_declarations.py:0 -> test_go_type_alias_captured_as_type", + "CALLS tests/test_go_type_declarations.py:0 -> type", + "CALLS tests/test_graph_export_integration.py:0 -> TestGraphExportIntegration", + "CALLS tests/test_graph_export_integration.py:0 -> add", + "CALLS tests/test_graph_export_integration.py:0 -> ensure_node_batch", + "CALLS tests/test_graph_export_integration.py:0 -> ensure_relationship_batch", + "CALLS tests/test_graph_export_integration.py:0 -> loader", + "CALLS tests/test_graph_export_integration.py:0 -> main", + "CALLS tests/test_graph_export_integration.py:0 -> metadata", + "CALLS tests/test_graph_export_integration.py:0 -> mock_ingestor", + "CALLS tests/test_graph_export_integration.py:0 -> name", + "CALLS tests/test_graph_export_integration.py:0 -> nodes", + "CALLS tests/test_graph_export_integration.py:0 -> relationships", + "CALLS tests/test_graph_export_integration.py:0 -> temp_repo", + "CALLS tests/test_graph_export_integration.py:0 -> test_exported_json_structure_is_valid", + "CALLS tests/test_graph_export_integration.py:0 -> test_function_call_relationship_exports", + "CALLS tests/test_graph_export_integration.py:0 -> test_module_defines_relationship_exports", + "CALLS tests/test_graph_export_integration.py:0 -> test_python_class_with_methods_exports_correctly", + "CALLS tests/test_graph_export_integration.py:0 -> test_simple_python_function_exports_correctly", + "CALLS tests/test_graph_export_integration.py:0 -> type", + "CALLS tests/test_graph_loader.py:0 -> GraphData", + "CALLS tests/test_graph_loader.py:0 -> TestGraphLoaderLoad", + "CALLS tests/test_graph_loader.py:0 -> TestGraphLoaderNodeLookup", + "CALLS tests/test_graph_loader.py:0 -> TestGraphLoaderRelationshipLookup", + "CALLS tests/test_graph_loader.py:0 -> TestGraphLoaderSummary", + "CALLS tests/test_graph_loader.py:0 -> TestLoadGraphFunction", + "CALLS tests/test_graph_loader.py:0 -> graph_file", + "CALLS tests/test_graph_loader.py:0 -> loader", + "CALLS tests/test_graph_loader.py:0 -> metadata", + "CALLS tests/test_graph_loader.py:0 -> name", + "CALLS tests/test_graph_loader.py:0 -> nodes", + "CALLS tests/test_graph_loader.py:0 -> relationships", + "CALLS tests/test_graph_loader.py:0 -> test_find_node_by_property", + "CALLS tests/test_graph_loader.py:0 -> test_find_node_by_property_multiple_matches", + "CALLS tests/test_graph_loader.py:0 -> test_find_node_by_property_not_found", + "CALLS tests/test_graph_loader.py:0 -> test_find_nodes_by_label", + "CALLS tests/test_graph_loader.py:0 -> test_find_nodes_by_label_empty", + "CALLS tests/test_graph_loader.py:0 -> test_get_incoming_relationships", + "CALLS tests/test_graph_loader.py:0 -> test_get_incoming_relationships_empty", + "CALLS tests/test_graph_loader.py:0 -> test_get_node_by_id", + "CALLS tests/test_graph_loader.py:0 -> test_get_node_by_id_not_found", + "CALLS tests/test_graph_loader.py:0 -> test_get_outgoing_relationships", + "CALLS tests/test_graph_loader.py:0 -> test_get_outgoing_relationships_empty", + "CALLS tests/test_graph_loader.py:0 -> test_get_relationships_for_node", + "CALLS tests/test_graph_loader.py:0 -> test_lazy_loading", + "CALLS tests/test_graph_loader.py:0 -> test_load_file_not_found_raises", + "CALLS tests/test_graph_loader.py:0 -> test_load_graph_returns_loaded_loader", + "CALLS tests/test_graph_loader.py:0 -> test_load_parses_metadata", + "CALLS tests/test_graph_loader.py:0 -> test_load_parses_nodes", + "CALLS tests/test_graph_loader.py:0 -> test_load_parses_relationships", + "CALLS tests/test_graph_loader.py:0 -> test_relationship_properties", + "CALLS tests/test_graph_loader.py:0 -> test_summary_includes_metadata", + "CALLS tests/test_graph_loader.py:0 -> test_summary_node_labels", + "CALLS tests/test_graph_loader.py:0 -> test_summary_relationship_types", + "CALLS tests/test_graph_loader.py:0 -> test_summary_total_nodes", + "CALLS tests/test_graph_loader.py:0 -> test_summary_total_relationships", + "CALLS tests/test_graph_loader.py:0 -> type", + "CALLS tests/test_graph_service.py:0 -> TestCleanDatabase", + "CALLS tests/test_graph_service.py:0 -> TestContextManager", + "CALLS tests/test_graph_service.py:0 -> TestCreateMode", + "CALLS tests/test_graph_service.py:0 -> TestCursorToResults", + "CALLS tests/test_graph_service.py:0 -> TestCypherCreateQueries", + "CALLS tests/test_graph_service.py:0 -> TestEnsureConstraints", + "CALLS tests/test_graph_service.py:0 -> TestExecuteBatchOn", + "CALLS tests/test_graph_service.py:0 -> TestExecuteQuery", + "CALLS tests/test_graph_service.py:0 -> TestExportGraphToDict", + "CALLS tests/test_graph_service.py:0 -> TestFetchAllAndExecuteWrite", + "CALLS tests/test_graph_service.py:0 -> TestFlushAll", + "CALLS tests/test_graph_service.py:0 -> TestFlushNodesEdgeCases", + "CALLS tests/test_graph_service.py:0 -> TestGetCurrentTimestamp", + "CALLS tests/test_graph_service.py:0 -> TestMemgraphIngestorInit", + "CALLS tests/test_graph_service.py:0 -> TestPreGroupedRelBuffer", + "CALLS tests/test_graph_service.py:0 -> TestSlots", + "CALLS tests/test_graph_service.py:0 -> capture_query", + "CALLS tests/test_graph_service.py:0 -> close", + "CALLS tests/test_graph_service.py:0 -> description", + "CALLS tests/test_graph_service.py:0 -> execute", + "CALLS tests/test_graph_service.py:0 -> fail_then_succeed", + "CALLS tests/test_graph_service.py:0 -> fetchall", + "CALLS tests/test_graph_service.py:0 -> graph_service", + "CALLS tests/test_graph_service.py:0 -> metadata", + "CALLS tests/test_graph_service.py:0 -> mock_fetch_all", + "CALLS tests/test_graph_service.py:0 -> name", + "CALLS tests/test_graph_service.py:0 -> nodes", + "CALLS tests/test_graph_service.py:0 -> relationships", + "CALLS tests/test_graph_service.py:0 -> test_build_create_node_query", + "CALLS tests/test_graph_service.py:0 -> test_build_create_relationship_query", + "CALLS tests/test_graph_service.py:0 -> test_build_create_relationship_query_with_props", + "CALLS tests/test_graph_service.py:0 -> test_build_merge_node_query_unchanged", + "CALLS tests/test_graph_service.py:0 -> test_build_merge_relationship_query_unchanged", + "CALLS tests/test_graph_service.py:0 -> test_calls_flush_nodes_and_flush_relationships", + "CALLS tests/test_graph_service.py:0 -> test_closes_cursor_on_exception", + "CALLS tests/test_graph_service.py:0 -> test_closes_cursor_on_success", + "CALLS tests/test_graph_service.py:0 -> test_continues_on_constraint_error", + "CALLS tests/test_graph_service.py:0 -> test_converts_rows_to_dicts", + "CALLS tests/test_graph_service.py:0 -> test_counts_nodes_and_relationships", + "CALLS tests/test_graph_service.py:0 -> test_creates_constraint_for_each_node_type", + "CALLS tests/test_graph_service.py:0 -> test_default_use_merge_is_true", + "CALLS tests/test_graph_service.py:0 -> test_enter_connects_to_memgraph", + "CALLS tests/test_graph_service.py:0 -> test_enter_omits_auth_when_not_provided", + "CALLS tests/test_graph_service.py:0 -> test_enter_passes_auth_when_provided", + "CALLS tests/test_graph_service.py:0 -> test_execute_write_delegates_to_execute_query", + "CALLS tests/test_graph_service.py:0 -> test_executes_delete_query", + "CALLS tests/test_graph_service.py:0 -> test_executes_query_and_returns_results", + "CALLS tests/test_graph_service.py:0 -> test_exit_flushes_and_closes_connection", + "CALLS tests/test_graph_service.py:0 -> test_exit_handles_none_connection", + "CALLS tests/test_graph_service.py:0 -> test_exit_logs_error_on_exception", + "CALLS tests/test_graph_service.py:0 -> test_fetch_all_delegates_to_execute_query", + "CALLS tests/test_graph_service.py:0 -> test_fetch_all_preserves_existing_memory_limit", + "CALLS tests/test_graph_service.py:0 -> test_flush_nodes_uses_create_query_when_merge_disabled", + "CALLS tests/test_graph_service.py:0 -> test_flush_nodes_uses_merge_query_by_default", + "CALLS tests/test_graph_service.py:0 -> test_flush_relationships_uses_create_query_when_merge_disabled", + "CALLS tests/test_graph_service.py:0 -> test_flush_relationships_uses_merge_query_by_default", + "CALLS tests/test_graph_service.py:0 -> test_handles_empty_buffer", + "CALLS tests/test_graph_service.py:0 -> test_handles_empty_result_set", + "CALLS tests/test_graph_service.py:0 -> test_handles_single_row", + "CALLS tests/test_graph_service.py:0 -> test_has_slots", + "CALLS tests/test_graph_service.py:0 -> test_init_conn_is_none", + "CALLS tests/test_graph_service.py:0 -> test_init_creates_empty_buffers", + "CALLS tests/test_graph_service.py:0 -> test_init_defaults_auth_to_none", + "CALLS tests/test_graph_service.py:0 -> test_init_normalizes_empty_strings_to_none", + "CALLS tests/test_graph_service.py:0 -> test_init_normalizes_whitespace_only_to_none", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_empty_password_with_valid_username", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_negative_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_password_without_username", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_username_without_password", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_zero_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_sets_custom_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_sets_default_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_sets_host_and_port", + "CALLS tests/test_graph_service.py:0 -> test_init_stores_auth_credentials", + "CALLS tests/test_graph_service.py:0 -> test_init_strips_whitespace_from_credentials", + "CALLS tests/test_graph_service.py:0 -> test_no_dict", + "CALLS tests/test_graph_service.py:0 -> test_passes_params_to_query", + "CALLS tests/test_graph_service.py:0 -> test_processes_valid_nodes_and_skips_invalid", + "CALLS tests/test_graph_service.py:0 -> test_raises_when_not_connected", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_cleared_after_flush", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_correct_batch_row_values", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_empty_on_init", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_groups_by_pattern", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_populated_on_ensure", + "CALLS tests/test_graph_service.py:0 -> test_returns_early_when_params_empty", + "CALLS tests/test_graph_service.py:0 -> test_returns_empty_list_when_no_description", + "CALLS tests/test_graph_service.py:0 -> test_returns_graph_data_structure", + "CALLS tests/test_graph_service.py:0 -> test_returns_iso_format_timestamp", + "CALLS tests/test_graph_service.py:0 -> test_skips_nodes_missing_id_property", + "CALLS tests/test_graph_service.py:0 -> test_skips_nodes_with_unknown_label", + "CALLS tests/test_graph_service.py:0 -> test_suppresses_already_exists_errors_in_logs", + "CALLS tests/test_graph_service.py:0 -> test_use_merge_false", + "CALLS tests/test_graph_service.py:0 -> test_wraps_query_with_unwind", + "CALLS tests/test_graph_service.py:0 -> value", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> _execute_batch_with_return_on", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> graph_service", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> log_messages", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> logs", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> mock_execute_batch", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> nodes", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> relationships", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> sink", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_calls_failure_logging_multiple_batches", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_calls_failure_logging_single_batch", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_calls_success_no_failure_logging", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_non_calls_relationships_no_failure_logging", + "CALLS tests/test_graph_updater_embeddings.py:0 -> MemgraphIngestor", + "CALLS tests/test_graph_updater_embeddings.py:0 -> TestBatchedEmbeddingDispatch", + "CALLS tests/test_graph_updater_embeddings.py:0 -> TestCypherQueryEmbeddingsStructure", + "CALLS tests/test_graph_updater_embeddings.py:0 -> TestGenerateSemanticEmbeddings", + "CALLS tests/test_graph_updater_embeddings.py:0 -> _fake_embed_batch", + "CALLS tests/test_graph_updater_embeddings.py:0 -> embed_code_batch", + "CALLS tests/test_graph_updater_embeddings.py:0 -> execute_write", + "CALLS tests/test_graph_updater_embeddings.py:0 -> fetch_all", + "CALLS tests/test_graph_updater_embeddings.py:0 -> graph_service", + "CALLS tests/test_graph_updater_embeddings.py:0 -> graph_updater", + "CALLS tests/test_graph_updater_embeddings.py:0 -> has_semantic_dependencies", + "CALLS tests/test_graph_updater_embeddings.py:0 -> name", + "CALLS tests/test_graph_updater_embeddings.py:0 -> query_ingestor", + "CALLS tests/test_graph_updater_embeddings.py:0 -> repo_path", + "CALLS tests/test_graph_updater_embeddings.py:0 -> store_embedding_batch", + "CALLS tests/test_graph_updater_embeddings.py:0 -> temp_repo", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_contains_starts_with_project_name", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_counts_embedded_functions", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_dispatches_single_batch_call_for_multiple_snippets", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_dot_concatenation_is_parenthesized", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_embeds_valid_function_with_source", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_handles_embed_failure_gracefully", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_no_bare_starts_with_plus", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_passes_project_name_without_trailing_dot", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_returns_early_on_empty_results", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_returns_required_columns", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_skips_row_with_missing_source_info", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_skips_unparseable_rows", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_skips_when_no_semantic_dependencies", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_uses_cypher_query_embeddings_constant", + "CALLS tests/test_graph_updater_embeddings.py:0 -> updater_with_query", + "CALLS tests/test_graph_updater_embeddings.py:0 -> verify_stored_ids", + "CALLS tests/test_graph_updater_incremental.py:0 -> TestFastPathInSync", + "CALLS tests/test_graph_updater_incremental.py:0 -> TestHashCacheIO", + "CALLS tests/test_graph_updater_incremental.py:0 -> TestHashFile", + "CALLS tests/test_graph_updater_incremental.py:0 -> TestIncrementalUpdates", + "CALLS tests/test_graph_updater_incremental.py:0 -> TestSlots", + "CALLS tests/test_graph_updater_incremental.py:0 -> _process_function_calls", + "CALLS tests/test_graph_updater_incremental.py:0 -> _process_single_file", + "CALLS tests/test_graph_updater_incremental.py:0 -> graph_updater", + "CALLS tests/test_graph_updater_incremental.py:0 -> mock_ingestor", + "CALLS tests/test_graph_updater_incremental.py:0 -> py_project", + "CALLS tests/test_graph_updater_incremental.py:0 -> remove_file_from_state", + "CALLS tests/test_graph_updater_incremental.py:0 -> repo_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> temp_repo", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_bounded_ast_cache_has_slots", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_broken_symlink_does_not_crash_indexing", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_cache_file_is_valid_json", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_changed_file_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_changed_file_is_reparsed", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_deleted_file_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_deleted_file_removed_from_hash_cache", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_deleted_file_removed_from_state", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_different_content_different_hash", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_force_bypasses_cache", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_force_bypasses_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_function_registry_trie_has_slots", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_cache_file_created_after_run", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_returns_hex_string", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_with_bytes_returns_none_for_broken_symlink", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_with_bytes_returns_none_for_missing_file", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_load_corrupted_returns_empty", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_load_nonexistent_returns_empty", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_new_file_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_new_file_is_processed", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_no_hash_cache_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_same_content_same_hash", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_save_and_load_cache", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_save_creates_parent_dirs", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_second_run_skips_all_passes", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_unchanged_file_is_skipped", + "CALLS tests/test_graph_updater_incremental.py:0 -> type", + "CALLS tests/test_graph_updater_incremental.py:0 -> updater", + "CALLS tests/test_graph_updater_incremental.py:0 -> value", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> IngestorProtocol", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> NodeLabel", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> QueryProtocol", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> RelationshipType", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> TestIncrementalRenameStaleEntities", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> ensure_node_batch", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> ensure_relationship_batch", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> execute_write", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> fetch_all", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> flush_all", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> graph_updater", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> nodes", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> repo_path", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> test_incremental_rename_matches_full_rebuild", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> updater", + "CALLS tests/test_graph_updater_integration.py:0 -> GraphUpdater", + "CALLS tests/test_graph_updater_integration.py:0 -> main", + "CALLS tests/test_graph_updater_integration.py:0 -> mock_ingestor", + "CALLS tests/test_graph_updater_integration.py:0 -> name", + "CALLS tests/test_graph_updater_integration.py:0 -> relationships", + "CALLS tests/test_graph_updater_integration.py:0 -> temp_project", + "CALLS tests/test_graph_updater_integration.py:0 -> temp_repo", + "CALLS tests/test_graph_updater_integration.py:0 -> test_function_call_relationships_are_created", + "CALLS tests/test_graph_updater_integration.py:0 -> up", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> EmbeddingQueryResult", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> TestParseEmbeddingResult", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> graph_updater", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> mock_ingestor", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> repo_path", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> temp_repo", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_empty_dict_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_end_line_not_int_becomes_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_missing_node_id_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_missing_qualified_name_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_node_id_not_int_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_none_values_for_required_fields_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_path_not_str_becomes_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_qualified_name_not_str_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_result_is_embedding_query_result_type", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_start_line_not_int_becomes_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_valid_input_all_fields", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_valid_input_required_fields_only", + "CALLS tests/test_graph_updater_pruning.py:0 -> TestCypherDeleteModuleQuery", + "CALLS tests/test_graph_updater_pruning.py:0 -> TestDeletedFileInProcessFiles", + "CALLS tests/test_graph_updater_pruning.py:0 -> TestPruneOrphanNodes", + "CALLS tests/test_graph_updater_pruning.py:0 -> _process_files", + "CALLS tests/test_graph_updater_pruning.py:0 -> cli", + "CALLS tests/test_graph_updater_pruning.py:0 -> execute_write", + "CALLS tests/test_graph_updater_pruning.py:0 -> fetch_all", + "CALLS tests/test_graph_updater_pruning.py:0 -> graph_updater", + "CALLS tests/test_graph_updater_pruning.py:0 -> main", + "CALLS tests/test_graph_updater_pruning.py:0 -> mock_ingestor", + "CALLS tests/test_graph_updater_pruning.py:0 -> name", + "CALLS tests/test_graph_updater_pruning.py:0 -> py_project", + "CALLS tests/test_graph_updater_pruning.py:0 -> repo_path", + "CALLS tests/test_graph_updater_pruning.py:0 -> temp_repo", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_deleted_file_triggers_cypher_delete", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_no_deletes_when_no_files_removed", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_handles_empty_graph", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_handles_none_path_gracefully", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_multiple_orphans_across_types", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_no_orphans_skips_deletes", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_removes_orphan_external_module_nodes", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_removes_orphan_module_nodes", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_skips_inline_module_synthetic_paths", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_skips_other_projects", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_query_constrains_traversal_to_containment_edges", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_query_does_not_traverse_calls_edges", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_run_calls_prune", + "CALLS tests/test_graph_updater_pruning.py:0 -> updater", + "CALLS tests/test_handler_integration.py:0 -> CppHandler", + "CALLS tests/test_handler_integration.py:0 -> JavaHandler", + "CALLS tests/test_handler_integration.py:0 -> JsTsHandler", + "CALLS tests/test_handler_integration.py:0 -> LuaHandler", + "CALLS tests/test_handler_integration.py:0 -> PythonHandler", + "CALLS tests/test_handler_integration.py:0 -> RustHandler", + "CALLS tests/test_handler_integration.py:0 -> SupportedLanguage", + "CALLS tests/test_handler_integration.py:0 -> TestCppHandlerIntegration", + "CALLS tests/test_handler_integration.py:0 -> TestHandlerDelegationInPipeline", + "CALLS tests/test_handler_integration.py:0 -> TestJavaHandlerIntegration", + "CALLS tests/test_handler_integration.py:0 -> TestJsTsHandlerIntegration", + "CALLS tests/test_handler_integration.py:0 -> TestLuaHandlerIntegration", + "CALLS tests/test_handler_integration.py:0 -> TestRustHandlerIntegration", + "CALLS tests/test_handler_integration.py:0 -> add", + "CALLS tests/test_handler_integration.py:0 -> export", + "CALLS tests/test_handler_integration.py:0 -> graph_updater", + "CALLS tests/test_handler_integration.py:0 -> handler", + "CALLS tests/test_handler_integration.py:0 -> mock_ingestor", + "CALLS tests/test_handler_integration.py:0 -> name", + "CALLS tests/test_handler_integration.py:0 -> process", + "CALLS tests/test_handler_integration.py:0 -> repo_path", + "CALLS tests/test_handler_integration.py:0 -> temp_repo", + "CALLS tests/test_handler_integration.py:0 -> test_assigned_function_names_extracted", + "CALLS tests/test_handler_integration.py:0 -> test_class_is_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_class_is_ingested_with_methods", + "CALLS tests/test_handler_integration.py:0 -> test_cpp_handler_used_for_cpp_files", + "CALLS tests/test_handler_integration.py:0 -> test_dot_index_function_names_extracted", + "CALLS tests/test_handler_integration.py:0 -> test_exports_inside_functions_skipped", + "CALLS tests/test_handler_integration.py:0 -> test_handler_switches_per_file_language", + "CALLS tests/test_handler_integration.py:0 -> test_java_handler_used_for_java_files", + "CALLS tests/test_handler_integration.py:0 -> test_js_handler_used_for_javascript_files", + "CALLS tests/test_handler_integration.py:0 -> test_lambda_functions_get_generated_names", + "CALLS tests/test_handler_integration.py:0 -> test_lua_handler_used_for_lua_files", + "CALLS tests/test_handler_integration.py:0 -> test_namespaced_functions_have_full_qn", + "CALLS tests/test_handler_integration.py:0 -> test_object_literal_methods_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_python_handler_used_for_python_files", + "CALLS tests/test_handler_integration.py:0 -> test_rust_handler_used_for_rust_files", + "CALLS tests/test_handler_integration.py:0 -> test_standalone_functions_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_struct_is_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_template_base_class_names_extracted", + "CALLS tests/test_handler_integration.py:0 -> test_ts_handler_used_for_typescript_files", + "CALLS tests/test_handler_integration.py:0 -> updater", + "CALLS tests/test_handler_integration.py:0 -> wrapper", + "CALLS tests/test_handler_registry.py:0 -> BaseLanguageHandler", + "CALLS tests/test_handler_registry.py:0 -> CppHandler", + "CALLS tests/test_handler_registry.py:0 -> JavaHandler", + "CALLS tests/test_handler_registry.py:0 -> JsTsHandler", + "CALLS tests/test_handler_registry.py:0 -> LuaHandler", + "CALLS tests/test_handler_registry.py:0 -> PhpHandler", + "CALLS tests/test_handler_registry.py:0 -> PythonHandler", + "CALLS tests/test_handler_registry.py:0 -> RustHandler", + "CALLS tests/test_handler_registry.py:0 -> SupportedLanguage", + "CALLS tests/test_handler_registry.py:0 -> TestGetHandler", + "CALLS tests/test_handler_registry.py:0 -> TestHandlerCaching", + "CALLS tests/test_handler_registry.py:0 -> TestHandlerInheritance", + "CALLS tests/test_handler_registry.py:0 -> TestHandlerProtocol", + "CALLS tests/test_handler_registry.py:0 -> build_function_qualified_name", + "CALLS tests/test_handler_registry.py:0 -> build_method_qualified_name", + "CALLS tests/test_handler_registry.py:0 -> build_nested_function_qn", + "CALLS tests/test_handler_registry.py:0 -> extract_base_class_name", + "CALLS tests/test_handler_registry.py:0 -> extract_decorators", + "CALLS tests/test_handler_registry.py:0 -> extract_function_name", + "CALLS tests/test_handler_registry.py:0 -> extract_impl_target", + "CALLS tests/test_handler_registry.py:0 -> handler", + "CALLS tests/test_handler_registry.py:0 -> is_class_method", + "CALLS tests/test_handler_registry.py:0 -> is_export_inside_function", + "CALLS tests/test_handler_registry.py:0 -> is_function_exported", + "CALLS tests/test_handler_registry.py:0 -> is_inside_method_with_object_literals", + "CALLS tests/test_handler_registry.py:0 -> should_process_as_impl_block", + "CALLS tests/test_handler_registry.py:0 -> test_cpp_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_different_instances_for_different_languages", + "CALLS tests/test_handler_registry.py:0 -> test_handler_has_all_protocol_methods", + "CALLS tests/test_handler_registry.py:0 -> test_handler_methods_are_callable", + "CALLS tests/test_handler_registry.py:0 -> test_java_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_js_and_ts_share_same_handler_type", + "CALLS tests/test_handler_registry.py:0 -> test_jsts_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_lua_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_php_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_python_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_returns_base_handler_for_c", + "CALLS tests/test_handler_registry.py:0 -> test_returns_base_handler_for_go", + "CALLS tests/test_handler_registry.py:0 -> test_returns_cpp_handler_for_cpp", + "CALLS tests/test_handler_registry.py:0 -> test_returns_java_handler_for_java", + "CALLS tests/test_handler_registry.py:0 -> test_returns_jsts_handler_for_javascript", + "CALLS tests/test_handler_registry.py:0 -> test_returns_jsts_handler_for_typescript", + "CALLS tests/test_handler_registry.py:0 -> test_returns_lua_handler_for_lua", + "CALLS tests/test_handler_registry.py:0 -> test_returns_php_handler_for_php", + "CALLS tests/test_handler_registry.py:0 -> test_returns_python_handler_for_python", + "CALLS tests/test_handler_registry.py:0 -> test_returns_rust_handler_for_rust", + "CALLS tests/test_handler_registry.py:0 -> test_rust_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_same_instance_returned_for_same_language", + "CALLS tests/test_handlers_unit.py:0 -> SupportedLanguage", + "CALLS tests/test_handlers_unit.py:0 -> TestBaseLanguageHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestCppHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestJavaHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestJsTsHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestJsTsHandlerTypeScriptDecorators", + "CALLS tests/test_handlers_unit.py:0 -> TestLuaHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestPhpHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestPythonHandler", + "CALLS tests/test_handlers_unit.py:0 -> TestRustHandler", + "CALLS tests/test_handlers_unit.py:0 -> children", + "CALLS tests/test_handlers_unit.py:0 -> class_qn", + "CALLS tests/test_handlers_unit.py:0 -> cpp_parser", + "CALLS tests/test_handlers_unit.py:0 -> dead_code", + "CALLS tests/test_handlers_unit.py:0 -> decorator", + "CALLS tests/test_handlers_unit.py:0 -> export", + "CALLS tests/test_handlers_unit.py:0 -> handler", + "CALLS tests/test_handlers_unit.py:0 -> index", + "CALLS tests/test_handlers_unit.py:0 -> java_parser", + "CALLS tests/test_handlers_unit.py:0 -> js_parser", + "CALLS tests/test_handlers_unit.py:0 -> lua_parser", + "CALLS tests/test_handlers_unit.py:0 -> main", + "CALLS tests/test_handlers_unit.py:0 -> module_qn", + "CALLS tests/test_handlers_unit.py:0 -> php_parser", + "CALLS tests/test_handlers_unit.py:0 -> process", + "CALLS tests/test_handlers_unit.py:0 -> python_parser", + "CALLS tests/test_handlers_unit.py:0 -> repo", + "CALLS tests/test_handlers_unit.py:0 -> repo_path", + "CALLS tests/test_handlers_unit.py:0 -> rust_parser", + "CALLS tests/test_handlers_unit.py:0 -> test_build_function_qualified_name_simple", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_overloaded_methods", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_simple", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_with_params", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_without_params", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_skips_class_without_object_literals", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_stops_at_class", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_with_class_and_object_literals", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_with_parent_functions", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_simple_identifier", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_template_type", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_with_text", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_without_text_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_call_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_class_annotation", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_class_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_dataclass_with_options", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_decorator_with_args", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_decorator_with_call", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_dotted_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_function_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_inner_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_member_expression", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple_annotations", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple_attributes", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple_decorators", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_no_annotations", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_no_attributes", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_no_decorators", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_on_function_definition", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_parameterized_annotation", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_php8_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_returns_empty_for_undecorated", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_returns_empty_list", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_simple_identifier", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_single_annotation", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_single_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_single_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_with_args", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_anonymous_function", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_anonymous_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_arrow_function", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_arrow_in_callback", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_arrow_in_variable_declarator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_assigned_to_dot_index", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_assigned_to_identifier", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_from_function_definition", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_from_method_declaration", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_lambda_expression", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_regular_function", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_with_name_field", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_without_name_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_impl_target_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_impl_target_struct", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_impl_target_trait_for_struct", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_at_module_level", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_in_class_body", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_inside_class", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_inside_interface", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_inside_trait", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_outside_class", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_export_inside_function_at_module_level", + "CALLS tests/test_handlers_unit.py:0 -> test_is_export_inside_function_nested", + "CALLS tests/test_handlers_unit.py:0 -> test_is_export_inside_function_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_private_method", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_public_method", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_standalone_function", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_without_export", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_nested_in_method", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_standalone_object", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_stops_at_class_body", + "CALLS tests/test_handlers_unit.py:0 -> test_should_process_as_impl_block_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_should_process_as_impl_block_with_impl_item", + "CALLS tests/test_handlers_unit.py:0 -> test_should_process_as_impl_block_with_other_node", + "CALLS tests/test_handlers_unit.py:0 -> text", + "CALLS tests/test_handlers_unit.py:0 -> ts_parser", + "CALLS tests/test_handlers_unit.py:0 -> type", + "CALLS tests/test_handlers_unit.py:0 -> value", + "CALLS tests/test_higher_order_calls.py:0 -> RelationshipType", + "CALLS tests/test_higher_order_calls.py:0 -> TestHigherOrderCalls", + "CALLS tests/test_higher_order_calls.py:0 -> _start_byte_key", + "CALLS tests/test_higher_order_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_higher_order_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_higher_order_calls.py:0 -> execute_write", + "CALLS tests/test_higher_order_calls.py:0 -> fetch_all", + "CALLS tests/test_higher_order_calls.py:0 -> flush_all", + "CALLS tests/test_higher_order_calls.py:0 -> graph_updater", + "CALLS tests/test_higher_order_calls.py:0 -> ingest_method", + "CALLS tests/test_higher_order_calls.py:0 -> items", + "CALLS tests/test_higher_order_calls.py:0 -> name", + "CALLS tests/test_higher_order_calls.py:0 -> repo_path", + "CALLS tests/test_higher_order_calls.py:0 -> start", + "CALLS tests/test_higher_order_calls.py:0 -> test_callable_parameter_prefers_module_function_over_sibling_method", + "CALLS tests/test_higher_order_calls.py:0 -> test_callable_parameter_resolves_to_argument_at_call_site", + "CALLS tests/test_higher_order_calls.py:0 -> test_callback_attributed_to_invoking_callee_not_caller", + "CALLS tests/test_higher_order_calls.py:0 -> test_normal_call_edge_to_callee_still_present", + "CALLS tests/test_higher_order_calls.py:0 -> test_sorted_key_attributed_to_enclosing_function", + "CALLS tests/test_higher_order_calls.py:0 -> value", + "CALLS tests/test_import_distance_calculation.py:0 -> NodeType", + "CALLS tests/test_import_distance_calculation.py:0 -> TestImportDistanceCalculation", + "CALLS tests/test_import_distance_calculation.py:0 -> call_processor", + "CALLS tests/test_import_distance_calculation.py:0 -> factory", + "CALLS tests/test_import_distance_calculation.py:0 -> function_qn", + "CALLS tests/test_import_distance_calculation.py:0 -> get", + "CALLS tests/test_import_distance_calculation.py:0 -> graph_updater", + "CALLS tests/test_import_distance_calculation.py:0 -> method_qn", + "CALLS tests/test_import_distance_calculation.py:0 -> mock_ingestor", + "CALLS tests/test_import_distance_calculation.py:0 -> mock_updater", + "CALLS tests/test_import_distance_calculation.py:0 -> repo", + "CALLS tests/test_import_distance_calculation.py:0 -> repo_path", + "CALLS tests/test_import_distance_calculation.py:0 -> test_edge_case_missing_from_registry", + "CALLS tests/test_import_distance_calculation.py:0 -> test_function_vs_method_distance_difference", + "CALLS tests/test_import_distance_calculation.py:0 -> test_method_detection_correctness", + "CALLS tests/test_import_distance_calculation.py:0 -> test_non_sibling_modules_no_bonus", + "CALLS tests/test_import_distance_calculation.py:0 -> test_same_module_candidates", + "CALLS tests/test_import_distance_calculation.py:0 -> test_sibling_module_bonus_for_functions", + "CALLS tests/test_import_distance_calculation.py:0 -> test_sibling_module_bonus_for_methods", + "CALLS tests/test_import_distance_calculation.py:0 -> updater", + "CALLS tests/test_import_parsing.py:0 -> NodeLabel", + "CALLS tests/test_import_parsing.py:0 -> NodeType", + "CALLS tests/test_import_parsing.py:0 -> SupportedLanguage", + "CALLS tests/test_import_parsing.py:0 -> TestExternalModuleNodeCreation", + "CALLS tests/test_import_parsing.py:0 -> TestImportParsing", + "CALLS tests/test_import_parsing.py:0 -> TestImportProcessorCacheUtilities", + "CALLS tests/test_import_parsing.py:0 -> TestIsLocalModuleCache", + "CALLS tests/test_import_parsing.py:0 -> TestJsInternalModuleResolution", + "CALLS tests/test_import_parsing.py:0 -> TestProjectPrefixMatching", + "CALLS tests/test_import_parsing.py:0 -> TestRustCrateResolution", + "CALLS tests/test_import_parsing.py:0 -> _is_local_java_import_cached", + "CALLS tests/test_import_parsing.py:0 -> _is_local_module_cached", + "CALLS tests/test_import_parsing.py:0 -> _parse_generic_imports", + "CALLS tests/test_import_parsing.py:0 -> _parse_go_imports", + "CALLS tests/test_import_parsing.py:0 -> _parse_java_imports", + "CALLS tests/test_import_parsing.py:0 -> _parse_js_ts_imports", + "CALLS tests/test_import_parsing.py:0 -> _parse_python_imports", + "CALLS tests/test_import_parsing.py:0 -> _parse_rust_imports", + "CALLS tests/test_import_parsing.py:0 -> _resolve_relative_import", + "CALLS tests/test_import_parsing.py:0 -> call_processor", + "CALLS tests/test_import_parsing.py:0 -> capture_node", + "CALLS tests/test_import_parsing.py:0 -> children", + "CALLS tests/test_import_parsing.py:0 -> ensure_node_batch", + "CALLS tests/test_import_parsing.py:0 -> ensure_relationship_batch", + "CALLS tests/test_import_parsing.py:0 -> factory", + "CALLS tests/test_import_parsing.py:0 -> graph_updater", + "CALLS tests/test_import_parsing.py:0 -> import_processor", + "CALLS tests/test_import_parsing.py:0 -> index", + "CALLS tests/test_import_parsing.py:0 -> main", + "CALLS tests/test_import_parsing.py:0 -> mock_ingestor", + "CALLS tests/test_import_parsing.py:0 -> module_qn", + "CALLS tests/test_import_parsing.py:0 -> name", + "CALLS tests/test_import_parsing.py:0 -> parent", + "CALLS tests/test_import_parsing.py:0 -> process", + "CALLS tests/test_import_parsing.py:0 -> processor", + "CALLS tests/test_import_parsing.py:0 -> repo_path", + "CALLS tests/test_import_parsing.py:0 -> stats", + "CALLS tests/test_import_parsing.py:0 -> test_cache_stats_after_clear", + "CALLS tests/test_import_parsing.py:0 -> test_clear_stdlib_cache_does_not_raise", + "CALLS tests/test_import_parsing.py:0 -> test_crate_import_from_flat_module_resolves_correctly", + "CALLS tests/test_import_parsing.py:0 -> test_crate_import_from_nested_module_resolves_to_crate_root", + "CALLS tests/test_import_parsing.py:0 -> test_external_module_name_uses_module_path_not_local_alias", + "CALLS tests/test_import_parsing.py:0 -> test_flush_stdlib_cache_does_not_raise", + "CALLS tests/test_import_parsing.py:0 -> test_function_registry_integration", + "CALLS tests/test_import_parsing.py:0 -> test_get_stdlib_cache_stats_returns_dict", + "CALLS tests/test_import_parsing.py:0 -> test_import_mapping_functionality", + "CALLS tests/test_import_parsing.py:0 -> test_import_processing_doesnt_crash", + "CALLS tests/test_import_parsing.py:0 -> test_internal_import_matched_with_dot_separator", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_java_import_cache_hits", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_cache_hits_on_repeated_calls", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_cache_returns_correct_result", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_detects_directory", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_detects_py_file", + "CALLS tests/test_import_parsing.py:0 -> test_language_specific_import_methods", + "CALLS tests/test_import_parsing.py:0 -> test_python_alias_import_parsing", + "CALLS tests/test_import_parsing.py:0 -> test_python_import_parsing", + "CALLS tests/test_import_parsing.py:0 -> test_relative_import_resolution", + "CALLS tests/test_import_parsing.py:0 -> test_resolves_directory_with_index_file", + "CALLS tests/test_import_parsing.py:0 -> test_resolves_directory_with_index_js", + "CALLS tests/test_import_parsing.py:0 -> test_resolves_file_with_extension", + "CALLS tests/test_import_parsing.py:0 -> test_returns_full_name_when_no_match", + "CALLS tests/test_import_parsing.py:0 -> test_rust_external_module_name_uses_module_path", + "CALLS tests/test_import_parsing.py:0 -> test_rust_external_module_node_created", + "CALLS tests/test_import_parsing.py:0 -> test_separate_instances_have_independent_caches", + "CALLS tests/test_import_parsing.py:0 -> test_similar_prefix_not_matched_without_dot", + "CALLS tests/test_import_parsing.py:0 -> type", + "CALLS tests/test_import_parsing.py:0 -> updater", + "CALLS tests/test_inherits_attribute_base.py:0 -> RelationshipType", + "CALLS tests/test_inherits_attribute_base.py:0 -> TestInheritsAttributeBase", + "CALLS tests/test_inherits_attribute_base.py:0 -> UniXcoder", + "CALLS tests/test_inherits_attribute_base.py:0 -> ensure_node_batch", + "CALLS tests/test_inherits_attribute_base.py:0 -> ensure_relationship_batch", + "CALLS tests/test_inherits_attribute_base.py:0 -> execute_write", + "CALLS tests/test_inherits_attribute_base.py:0 -> fetch_all", + "CALLS tests/test_inherits_attribute_base.py:0 -> flush_all", + "CALLS tests/test_inherits_attribute_base.py:0 -> graph_updater", + "CALLS tests/test_inherits_attribute_base.py:0 -> name", + "CALLS tests/test_inherits_attribute_base.py:0 -> nodes", + "CALLS tests/test_inherits_attribute_base.py:0 -> repo_path", + "CALLS tests/test_inherits_attribute_base.py:0 -> style", + "CALLS tests/test_inherits_attribute_base.py:0 -> test_attribute_base_class_creates_inherits_edge", + "CALLS tests/test_instance_attr_type_inference.py:0 -> RelationshipType", + "CALLS tests/test_instance_attr_type_inference.py:0 -> TestInstanceAttrTypeInference", + "CALLS tests/test_instance_attr_type_inference.py:0 -> ensure_node_batch", + "CALLS tests/test_instance_attr_type_inference.py:0 -> ensure_relationship_batch", + "CALLS tests/test_instance_attr_type_inference.py:0 -> execute_write", + "CALLS tests/test_instance_attr_type_inference.py:0 -> fetch_all", + "CALLS tests/test_instance_attr_type_inference.py:0 -> flush_all", + "CALLS tests/test_instance_attr_type_inference.py:0 -> graph_updater", + "CALLS tests/test_instance_attr_type_inference.py:0 -> name", + "CALLS tests/test_instance_attr_type_inference.py:0 -> repo_path", + "CALLS tests/test_instance_attr_type_inference.py:0 -> status", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_ambiguous_method_does_not_resolve_to_module_function", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_method_call_resolves_via_init_attribute_type", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_property_access_not_resolved_to_module_function", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_property_access_resolves_via_init_attribute_type", + "CALLS tests/test_instance_attr_type_inference.py:0 -> type", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> RelationshipType", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> TestInterproceduralCallbackFlow", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> _resolve_to_qn", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> ensure_node_batch", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> ensure_relationship_batch", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> execute_write", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> extract_implemented_interfaces", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> extract_java_interface_names", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> fetch_all", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> flush_all", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> graph_updater", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> name", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> parent", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> repo_path", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> test_callback_propagates_through_passthrough_param", + "CALLS tests/test_java_advanced_oop.py:0 -> NodeType", + "CALLS tests/test_java_advanced_oop.py:0 -> add", + "CALLS tests/test_java_advanced_oop.py:0 -> children", + "CALLS tests/test_java_advanced_oop.py:0 -> ensure_node_batch", + "CALLS tests/test_java_advanced_oop.py:0 -> execute", + "CALLS tests/test_java_advanced_oop.py:0 -> flush", + "CALLS tests/test_java_advanced_oop.py:0 -> get", + "CALLS tests/test_java_advanced_oop.py:0 -> java_advanced_oop_project", + "CALLS tests/test_java_advanced_oop.py:0 -> main", + "CALLS tests/test_java_advanced_oop.py:0 -> method_calls", + "CALLS tests/test_java_advanced_oop.py:0 -> mock_ingestor", + "CALLS tests/test_java_advanced_oop.py:0 -> name", + "CALLS tests/test_java_advanced_oop.py:0 -> parent", + "CALLS tests/test_java_advanced_oop.py:0 -> process", + "CALLS tests/test_java_advanced_oop.py:0 -> processor", + "CALLS tests/test_java_advanced_oop.py:0 -> put", + "CALLS tests/test_java_advanced_oop.py:0 -> render", + "CALLS tests/test_java_advanced_oop.py:0 -> run", + "CALLS tests/test_java_advanced_oop.py:0 -> save", + "CALLS tests/test_java_advanced_oop.py:0 -> status", + "CALLS tests/test_java_advanced_oop.py:0 -> temp_repo", + "CALLS tests/test_java_advanced_oop.py:0 -> test_abstract_classes_with_partial_implementation", + "CALLS tests/test_java_advanced_oop.py:0 -> test_advanced_inner_class_scenarios", + "CALLS tests/test_java_advanced_oop.py:0 -> test_annotation_processing_complex", + "CALLS tests/test_java_advanced_oop.py:0 -> test_complex_generics_with_wildcards", + "CALLS tests/test_java_advanced_oop.py:0 -> test_complex_static_initialization", + "CALLS tests/test_java_advanced_oop.py:0 -> test_covariant_return_types", + "CALLS tests/test_java_advanced_oop.py:0 -> test_diamond_problem_resolution", + "CALLS tests/test_java_advanced_oop.py:0 -> test_generic_type_erasure_scenarios", + "CALLS tests/test_java_advanced_oop.py:0 -> test_method_overloading_variations", + "CALLS tests/test_java_advanced_oop.py:0 -> test_method_overriding_edge_cases", + "CALLS tests/test_java_advanced_oop.py:0 -> test_multiple_interface_inheritance", + "CALLS tests/test_java_advanced_oop.py:0 -> test_nested_generic_bounds", + "CALLS tests/test_java_advanced_oop.py:0 -> type", + "CALLS tests/test_java_advanced_oop.py:0 -> value", + "CALLS tests/test_java_collections_frameworks.py:0 -> Color", + "CALLS tests/test_java_collections_frameworks.py:0 -> NodeType", + "CALLS tests/test_java_collections_frameworks.py:0 -> add", + "CALLS tests/test_java_collections_frameworks.py:0 -> clear", + "CALLS tests/test_java_collections_frameworks.py:0 -> ensure_node_batch", + "CALLS tests/test_java_collections_frameworks.py:0 -> get", + "CALLS tests/test_java_collections_frameworks.py:0 -> index", + "CALLS tests/test_java_collections_frameworks.py:0 -> items", + "CALLS tests/test_java_collections_frameworks.py:0 -> java_collections_project", + "CALLS tests/test_java_collections_frameworks.py:0 -> keys", + "CALLS tests/test_java_collections_frameworks.py:0 -> main", + "CALLS tests/test_java_collections_frameworks.py:0 -> mock_ingestor", + "CALLS tests/test_java_collections_frameworks.py:0 -> name", + "CALLS tests/test_java_collections_frameworks.py:0 -> operation", + "CALLS tests/test_java_collections_frameworks.py:0 -> put", + "CALLS tests/test_java_collections_frameworks.py:0 -> snapshot", + "CALLS tests/test_java_collections_frameworks.py:0 -> start", + "CALLS tests/test_java_collections_frameworks.py:0 -> stats", + "CALLS tests/test_java_collections_frameworks.py:0 -> temp_repo", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_basic_collection_implementations", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_custom_collection_implementations", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_iterator_patterns_enhanced_for", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_map_operations_key_value_handling", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_set_operations_uniqueness", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_stream_api_integration_collections", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_thread_safe_collections", + "CALLS tests/test_java_collections_frameworks.py:0 -> value", + "CALLS tests/test_java_collections_frameworks.py:0 -> variants", + "CALLS tests/test_java_complex_relationships.py:0 -> add", + "CALLS tests/test_java_complex_relationships.py:0 -> decorator", + "CALLS tests/test_java_complex_relationships.py:0 -> engine", + "CALLS tests/test_java_complex_relationships.py:0 -> execute", + "CALLS tests/test_java_complex_relationships.py:0 -> factory", + "CALLS tests/test_java_complex_relationships.py:0 -> items", + "CALLS tests/test_java_complex_relationships.py:0 -> java_complex_project", + "CALLS tests/test_java_complex_relationships.py:0 -> main", + "CALLS tests/test_java_complex_relationships.py:0 -> mock_ingestor", + "CALLS tests/test_java_complex_relationships.py:0 -> name", + "CALLS tests/test_java_complex_relationships.py:0 -> relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> start", + "CALLS tests/test_java_complex_relationships.py:0 -> temp_repo", + "CALLS tests/test_java_complex_relationships.py:0 -> test_builder_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_command_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_decorator_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_factory_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_observer_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_strategy_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> type", + "CALLS tests/test_java_complex_relationships.py:0 -> up", + "CALLS tests/test_java_comprehensive.py:0 -> Color", + "CALLS tests/test_java_comprehensive.py:0 -> FileReader", + "CALLS tests/test_java_comprehensive.py:0 -> FileWriter", + "CALLS tests/test_java_comprehensive.py:0 -> NodeType", + "CALLS tests/test_java_comprehensive.py:0 -> add", + "CALLS tests/test_java_comprehensive.py:0 -> clear", + "CALLS tests/test_java_comprehensive.py:0 -> ensure_node_batch", + "CALLS tests/test_java_comprehensive.py:0 -> items", + "CALLS tests/test_java_comprehensive.py:0 -> java_project", + "CALLS tests/test_java_comprehensive.py:0 -> main", + "CALLS tests/test_java_comprehensive.py:0 -> mock_ingestor", + "CALLS tests/test_java_comprehensive.py:0 -> name", + "CALLS tests/test_java_comprehensive.py:0 -> process", + "CALLS tests/test_java_comprehensive.py:0 -> restart", + "CALLS tests/test_java_comprehensive.py:0 -> run", + "CALLS tests/test_java_comprehensive.py:0 -> start", + "CALLS tests/test_java_comprehensive.py:0 -> temp_repo", + "CALLS tests/test_java_comprehensive.py:0 -> test_basic_java_classes", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_enums_and_annotations", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_exception_handling", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_generics_and_collections", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_inner_classes", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_lambda_expressions", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_static_and_final", + "CALLS tests/test_java_comprehensive.py:0 -> value", + "CALLS tests/test_java_concurrency.py:0 -> add", + "CALLS tests/test_java_concurrency.py:0 -> clear", + "CALLS tests/test_java_concurrency.py:0 -> description", + "CALLS tests/test_java_concurrency.py:0 -> execute", + "CALLS tests/test_java_concurrency.py:0 -> get", + "CALLS tests/test_java_concurrency.py:0 -> items", + "CALLS tests/test_java_concurrency.py:0 -> java_concurrency_project", + "CALLS tests/test_java_concurrency.py:0 -> main", + "CALLS tests/test_java_concurrency.py:0 -> mock_ingestor", + "CALLS tests/test_java_concurrency.py:0 -> name", + "CALLS tests/test_java_concurrency.py:0 -> operation", + "CALLS tests/test_java_concurrency.py:0 -> process", + "CALLS tests/test_java_concurrency.py:0 -> put", + "CALLS tests/test_java_concurrency.py:0 -> read", + "CALLS tests/test_java_concurrency.py:0 -> snapshot", + "CALLS tests/test_java_concurrency.py:0 -> start", + "CALLS tests/test_java_concurrency.py:0 -> stats", + "CALLS tests/test_java_concurrency.py:0 -> status", + "CALLS tests/test_java_concurrency.py:0 -> submit", + "CALLS tests/test_java_concurrency.py:0 -> temp_repo", + "CALLS tests/test_java_concurrency.py:0 -> test_completable_future_patterns", + "CALLS tests/test_java_concurrency.py:0 -> test_concurrent_collections", + "CALLS tests/test_java_concurrency.py:0 -> test_executor_service_patterns", + "CALLS tests/test_java_concurrency.py:0 -> test_locks_and_conditions", + "CALLS tests/test_java_concurrency.py:0 -> test_synchronized_methods_blocks", + "CALLS tests/test_java_concurrency.py:0 -> test_volatile_fields", + "CALLS tests/test_java_concurrency.py:0 -> up", + "CALLS tests/test_java_concurrency.py:0 -> value", + "CALLS tests/test_java_containment_oracle.py:0 -> Color", + "CALLS tests/test_java_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_java_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_java_containment_oracle.py:0 -> name", + "CALLS tests/test_java_containment_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_containment_edges", + "CALLS tests/test_java_containment_oracle.py:0 -> type", + "CALLS tests/test_java_containment_oracle.py:0 -> value", + "CALLS tests/test_java_edge_cases.py:0 -> NodeType", + "CALLS tests/test_java_edge_cases.py:0 -> add", + "CALLS tests/test_java_edge_cases.py:0 -> children", + "CALLS tests/test_java_edge_cases.py:0 -> description", + "CALLS tests/test_java_edge_cases.py:0 -> ensure_node_batch", + "CALLS tests/test_java_edge_cases.py:0 -> get", + "CALLS tests/test_java_edge_cases.py:0 -> index", + "CALLS tests/test_java_edge_cases.py:0 -> java_edge_cases_project", + "CALLS tests/test_java_edge_cases.py:0 -> main", + "CALLS tests/test_java_edge_cases.py:0 -> mock_ingestor", + "CALLS tests/test_java_edge_cases.py:0 -> name", + "CALLS tests/test_java_edge_cases.py:0 -> parse", + "CALLS tests/test_java_edge_cases.py:0 -> process", + "CALLS tests/test_java_edge_cases.py:0 -> read", + "CALLS tests/test_java_edge_cases.py:0 -> relationships", + "CALLS tests/test_java_edge_cases.py:0 -> run", + "CALLS tests/test_java_edge_cases.py:0 -> start", + "CALLS tests/test_java_edge_cases.py:0 -> temp_repo", + "CALLS tests/test_java_edge_cases.py:0 -> test_annotation_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_boundary_value_literals", + "CALLS tests/test_java_edge_cases.py:0 -> test_comment_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_deeply_nested_generics", + "CALLS tests/test_java_edge_cases.py:0 -> test_empty_classes_and_interfaces", + "CALLS tests/test_java_edge_cases.py:0 -> test_generic_variance_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_long_qualified_names", + "CALLS tests/test_java_edge_cases.py:0 -> test_malformed_but_valid_syntax", + "CALLS tests/test_java_edge_cases.py:0 -> test_modifier_combinations_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_operator_and_expression_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_package_and_import_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_parsing_edge_cases_syntax", + "CALLS tests/test_java_edge_cases.py:0 -> test_single_line_vs_multiline_constructs", + "CALLS tests/test_java_edge_cases.py:0 -> test_unicode_identifiers", + "CALLS tests/test_java_edge_cases.py:0 -> test_whitespace_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> text", + "CALLS tests/test_java_edge_cases.py:0 -> type", + "CALLS tests/test_java_edge_cases.py:0 -> value", + "CALLS tests/test_java_field_access_chains.py:0 -> children", + "CALLS tests/test_java_field_access_chains.py:0 -> engine", + "CALLS tests/test_java_field_access_chains.py:0 -> graph_updater", + "CALLS tests/test_java_field_access_chains.py:0 -> main", + "CALLS tests/test_java_field_access_chains.py:0 -> mock_ingestor", + "CALLS tests/test_java_field_access_chains.py:0 -> name", + "CALLS tests/test_java_field_access_chains.py:0 -> repo_path", + "CALLS tests/test_java_field_access_chains.py:0 -> start", + "CALLS tests/test_java_field_access_chains.py:0 -> temp_repo", + "CALLS tests/test_java_field_access_chains.py:0 -> test_direct_super_field_chain_method_call_multiclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_direct_this_field_chain_method_call_multiclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_generic_scoped_superclass_extraction", + "CALLS tests/test_java_field_access_chains.py:0 -> test_inherited_field_chain_via_nested_superclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_inherited_field_chain_via_object", + "CALLS tests/test_java_field_access_chains.py:0 -> test_inherited_field_chain_via_this", + "CALLS tests/test_java_field_access_chains.py:0 -> test_mixed_field_access_then_method_resolves", + "CALLS tests/test_java_field_access_chains.py:0 -> test_multilevel_field_access_then_method_resolves", + "CALLS tests/test_java_field_access_chains.py:0 -> test_nested_field_access_type_inference_via_var", + "CALLS tests/test_java_field_access_chains.py:0 -> test_scoped_superclass_extraction_keeps_actual_class", + "CALLS tests/test_java_field_access_chains.py:0 -> test_super_rooted_chain_with_nested_superclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_super_rooted_nested_field_access_via_var", + "CALLS tests/test_java_field_access_chains.py:0 -> test_this_rooted_nested_field_access_via_var", + "CALLS tests/test_java_field_access_chains.py:0 -> type", + "CALLS tests/test_java_imports.py:0 -> FileReader", + "CALLS tests/test_java_imports.py:0 -> FileWriter", + "CALLS tests/test_java_imports.py:0 -> add", + "CALLS tests/test_java_imports.py:0 -> factory", + "CALLS tests/test_java_imports.py:0 -> get", + "CALLS tests/test_java_imports.py:0 -> graph_updater", + "CALLS tests/test_java_imports.py:0 -> import_processor", + "CALLS tests/test_java_imports.py:0 -> java_imports_project", + "CALLS tests/test_java_imports.py:0 -> main", + "CALLS tests/test_java_imports.py:0 -> mock_ingestor", + "CALLS tests/test_java_imports.py:0 -> module_qn", + "CALLS tests/test_java_imports.py:0 -> name", + "CALLS tests/test_java_imports.py:0 -> repo_path", + "CALLS tests/test_java_imports.py:0 -> temp_repo", + "CALLS tests/test_java_imports.py:0 -> test_basic_java_imports", + "CALLS tests/test_java_imports.py:0 -> test_package_local_imports", + "CALLS tests/test_java_imports.py:0 -> test_qualified_names_without_imports", + "CALLS tests/test_java_imports.py:0 -> test_static_imports", + "CALLS tests/test_java_imports.py:0 -> test_wildcard_imports", + "CALLS tests/test_java_imports.py:0 -> updater", + "CALLS tests/test_java_imports.py:0 -> wrapper", + "CALLS tests/test_java_inheritance_edges.py:0 -> Color", + "CALLS tests/test_java_inheritance_edges.py:0 -> RelationshipType", + "CALLS tests/test_java_inheritance_edges.py:0 -> mock_ingestor", + "CALLS tests/test_java_inheritance_edges.py:0 -> temp_repo", + "CALLS tests/test_java_inheritance_edges.py:0 -> test_java_inheritance_and_implements_edges", + "CALLS tests/test_java_inheritance_edges.py:0 -> type", + "CALLS tests/test_java_inheritance_edges.py:0 -> value", + "CALLS tests/test_java_inheritance_oracle.py:0 -> Color", + "CALLS tests/test_java_inheritance_oracle.py:0 -> RelationshipType", + "CALLS tests/test_java_inheritance_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_java_inheritance_oracle.py:0 -> name", + "CALLS tests/test_java_inheritance_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_inheritance_edges", + "CALLS tests/test_java_inheritance_oracle.py:0 -> value", + "CALLS tests/test_java_label_name_collision.py:0 -> NodeLabel", + "CALLS tests/test_java_label_name_collision.py:0 -> NodeType", + "CALLS tests/test_java_label_name_collision.py:0 -> done", + "CALLS tests/test_java_label_name_collision.py:0 -> execute", + "CALLS tests/test_java_label_name_collision.py:0 -> java_label_collision_project", + "CALLS tests/test_java_label_name_collision.py:0 -> load", + "CALLS tests/test_java_label_name_collision.py:0 -> main", + "CALLS tests/test_java_label_name_collision.py:0 -> mock_ingestor", + "CALLS tests/test_java_label_name_collision.py:0 -> name", + "CALLS tests/test_java_label_name_collision.py:0 -> nodes", + "CALLS tests/test_java_label_name_collision.py:0 -> run", + "CALLS tests/test_java_label_name_collision.py:0 -> temp_repo", + "CALLS tests/test_java_label_name_collision.py:0 -> test_all_node_labels_have_constraints", + "CALLS tests/test_java_label_name_collision.py:0 -> test_class_implementing_interface_named_interface", + "CALLS tests/test_java_label_name_collision.py:0 -> test_class_named_class_ingested_as_class_node", + "CALLS tests/test_java_label_name_collision.py:0 -> test_enum_named_enum_has_defines_relationship", + "CALLS tests/test_java_label_name_collision.py:0 -> test_enum_named_enum_ingested_as_enum_node", + "CALLS tests/test_java_label_name_collision.py:0 -> test_interface_and_enum_labels_have_constraints", + "CALLS tests/test_java_label_name_collision.py:0 -> test_interface_named_interface_has_defines_relationship", + "CALLS tests/test_java_label_name_collision.py:0 -> test_interface_named_interface_ingested_as_interface_node", + "CALLS tests/test_java_label_name_collision.py:0 -> test_multiple_label_colliding_names", + "CALLS tests/test_java_label_name_collision.py:0 -> value", + "CALLS tests/test_java_method_calls.py:0 -> NodeType", + "CALLS tests/test_java_method_calls.py:0 -> add", + "CALLS tests/test_java_method_calls.py:0 -> clear", + "CALLS tests/test_java_method_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_java_method_calls.py:0 -> factory", + "CALLS tests/test_java_method_calls.py:0 -> get", + "CALLS tests/test_java_method_calls.py:0 -> graph_updater", + "CALLS tests/test_java_method_calls.py:0 -> import_processor", + "CALLS tests/test_java_method_calls.py:0 -> java_methods_project", + "CALLS tests/test_java_method_calls.py:0 -> keys", + "CALLS tests/test_java_method_calls.py:0 -> main", + "CALLS tests/test_java_method_calls.py:0 -> mock_ingestor", + "CALLS tests/test_java_method_calls.py:0 -> name", + "CALLS tests/test_java_method_calls.py:0 -> process", + "CALLS tests/test_java_method_calls.py:0 -> put", + "CALLS tests/test_java_method_calls.py:0 -> relationships", + "CALLS tests/test_java_method_calls.py:0 -> repo_path", + "CALLS tests/test_java_method_calls.py:0 -> temp_repo", + "CALLS tests/test_java_method_calls.py:0 -> test_basic_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> test_cross_file_method_calls_with_imports", + "CALLS tests/test_java_method_calls.py:0 -> test_fully_qualified_static_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> test_generic_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> test_inheritance_and_polymorphism", + "CALLS tests/test_java_method_calls.py:0 -> test_interface_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> type", + "CALLS tests/test_java_method_calls.py:0 -> updater", + "CALLS tests/test_java_method_calls.py:0 -> value", + "CALLS tests/test_java_method_calls.py:0 -> wrapper", + "CALLS tests/test_java_modern_features.py:0 -> NodeType", + "CALLS tests/test_java_modern_features.py:0 -> ensure_node_batch", + "CALLS tests/test_java_modern_features.py:0 -> get", + "CALLS tests/test_java_modern_features.py:0 -> java_modern_project", + "CALLS tests/test_java_modern_features.py:0 -> main", + "CALLS tests/test_java_modern_features.py:0 -> mock_ingestor", + "CALLS tests/test_java_modern_features.py:0 -> name", + "CALLS tests/test_java_modern_features.py:0 -> put", + "CALLS tests/test_java_modern_features.py:0 -> run", + "CALLS tests/test_java_modern_features.py:0 -> temp_repo", + "CALLS tests/test_java_modern_features.py:0 -> test_java_instanceof_patterns", + "CALLS tests/test_java_modern_features.py:0 -> test_java_records", + "CALLS tests/test_java_modern_features.py:0 -> test_java_sealed_classes", + "CALLS tests/test_java_modern_features.py:0 -> test_java_switch_expressions", + "CALLS tests/test_java_modern_features.py:0 -> test_java_text_blocks", + "CALLS tests/test_java_modern_features.py:0 -> test_java_var_keyword", + "CALLS tests/test_java_modern_features.py:0 -> text", + "CALLS tests/test_java_modern_features.py:0 -> type", + "CALLS tests/test_java_modern_features.py:0 -> value", + "CALLS tests/test_java_modules.py:0 -> Dependency", + "CALLS tests/test_java_modules.py:0 -> NodeType", + "CALLS tests/test_java_modules.py:0 -> Provider", + "CALLS tests/test_java_modules.py:0 -> add", + "CALLS tests/test_java_modules.py:0 -> analyze", + "CALLS tests/test_java_modules.py:0 -> clear", + "CALLS tests/test_java_modules.py:0 -> done", + "CALLS tests/test_java_modules.py:0 -> ensure_node_batch", + "CALLS tests/test_java_modules.py:0 -> export", + "CALLS tests/test_java_modules.py:0 -> get", + "CALLS tests/test_java_modules.py:0 -> java_modules_project", + "CALLS tests/test_java_modules.py:0 -> load", + "CALLS tests/test_java_modules.py:0 -> mock_ingestor", + "CALLS tests/test_java_modules.py:0 -> name", + "CALLS tests/test_java_modules.py:0 -> process", + "CALLS tests/test_java_modules.py:0 -> processor", + "CALLS tests/test_java_modules.py:0 -> put", + "CALLS tests/test_java_modules.py:0 -> read", + "CALLS tests/test_java_modules.py:0 -> save", + "CALLS tests/test_java_modules.py:0 -> temp_repo", + "CALLS tests/test_java_modules.py:0 -> test_modular_application_structure", + "CALLS tests/test_java_modules.py:0 -> test_module_info_declarations", + "CALLS tests/test_java_modules.py:0 -> test_module_layer_and_configuration", + "CALLS tests/test_java_modules.py:0 -> test_service_provider_interface", + "CALLS tests/test_java_modules.py:0 -> text", + "CALLS tests/test_java_modules.py:0 -> value", + "CALLS tests/test_java_name_collision.py:0 -> java_collision_project", + "CALLS tests/test_java_name_collision.py:0 -> main", + "CALLS tests/test_java_name_collision.py:0 -> mock_ingestor", + "CALLS tests/test_java_name_collision.py:0 -> name", + "CALLS tests/test_java_name_collision.py:0 -> temp_repo", + "CALLS tests/test_java_name_collision.py:0 -> test_name_collision_prefers_explicit_import", + "CALLS tests/test_java_name_collision.py:0 -> test_name_collision_prefers_same_package", + "CALLS tests/test_java_nested_structures.py:0 -> add", + "CALLS tests/test_java_nested_structures.py:0 -> clear", + "CALLS tests/test_java_nested_structures.py:0 -> done", + "CALLS tests/test_java_nested_structures.py:0 -> get", + "CALLS tests/test_java_nested_structures.py:0 -> index", + "CALLS tests/test_java_nested_structures.py:0 -> items", + "CALLS tests/test_java_nested_structures.py:0 -> java_nested_project", + "CALLS tests/test_java_nested_structures.py:0 -> main", + "CALLS tests/test_java_nested_structures.py:0 -> metadata", + "CALLS tests/test_java_nested_structures.py:0 -> mock_ingestor", + "CALLS tests/test_java_nested_structures.py:0 -> name", + "CALLS tests/test_java_nested_structures.py:0 -> operation", + "CALLS tests/test_java_nested_structures.py:0 -> parse", + "CALLS tests/test_java_nested_structures.py:0 -> process", + "CALLS tests/test_java_nested_structures.py:0 -> processor", + "CALLS tests/test_java_nested_structures.py:0 -> put", + "CALLS tests/test_java_nested_structures.py:0 -> run", + "CALLS tests/test_java_nested_structures.py:0 -> start", + "CALLS tests/test_java_nested_structures.py:0 -> temp_repo", + "CALLS tests/test_java_nested_structures.py:0 -> test_anonymous_classes_complex", + "CALLS tests/test_java_nested_structures.py:0 -> test_builder_pattern_nested", + "CALLS tests/test_java_nested_structures.py:0 -> test_deeply_nested_classes", + "CALLS tests/test_java_nested_structures.py:0 -> test_lambda_edge_cases", + "CALLS tests/test_java_nested_structures.py:0 -> test_local_classes_in_methods", + "CALLS tests/test_java_nested_structures.py:0 -> test_visitor_pattern_nested", + "CALLS tests/test_java_nested_structures.py:0 -> text", + "CALLS tests/test_java_nested_structures.py:0 -> type", + "CALLS tests/test_java_nested_structures.py:0 -> value", + "CALLS tests/test_java_real_world.py:0 -> NodeType", + "CALLS tests/test_java_real_world.py:0 -> add", + "CALLS tests/test_java_real_world.py:0 -> engine", + "CALLS tests/test_java_real_world.py:0 -> ensure_node_batch", + "CALLS tests/test_java_real_world.py:0 -> execute", + "CALLS tests/test_java_real_world.py:0 -> factory", + "CALLS tests/test_java_real_world.py:0 -> get", + "CALLS tests/test_java_real_world.py:0 -> java_real_world_project", + "CALLS tests/test_java_real_world.py:0 -> main", + "CALLS tests/test_java_real_world.py:0 -> mock_ingestor", + "CALLS tests/test_java_real_world.py:0 -> name", + "CALLS tests/test_java_real_world.py:0 -> parse", + "CALLS tests/test_java_real_world.py:0 -> put", + "CALLS tests/test_java_real_world.py:0 -> render", + "CALLS tests/test_java_real_world.py:0 -> save", + "CALLS tests/test_java_real_world.py:0 -> start", + "CALLS tests/test_java_real_world.py:0 -> style", + "CALLS tests/test_java_real_world.py:0 -> temp_repo", + "CALLS tests/test_java_real_world.py:0 -> test_builder_observer_patterns", + "CALLS tests/test_java_real_world.py:0 -> test_configuration_classes", + "CALLS tests/test_java_real_world.py:0 -> test_dao_repository_patterns", + "CALLS tests/test_java_real_world.py:0 -> test_design_patterns_singleton_factory", + "CALLS tests/test_java_real_world.py:0 -> test_spring_framework_annotations", + "CALLS tests/test_java_real_world.py:0 -> test_utility_helper_classes", + "CALLS tests/test_java_real_world.py:0 -> text", + "CALLS tests/test_java_real_world.py:0 -> type", + "CALLS tests/test_java_real_world.py:0 -> value", + "CALLS tests/test_java_reflection_annotations.py:0 -> description", + "CALLS tests/test_java_reflection_annotations.py:0 -> get", + "CALLS tests/test_java_reflection_annotations.py:0 -> handler", + "CALLS tests/test_java_reflection_annotations.py:0 -> java_reflection_project", + "CALLS tests/test_java_reflection_annotations.py:0 -> main", + "CALLS tests/test_java_reflection_annotations.py:0 -> mock_ingestor", + "CALLS tests/test_java_reflection_annotations.py:0 -> name", + "CALLS tests/test_java_reflection_annotations.py:0 -> operation", + "CALLS tests/test_java_reflection_annotations.py:0 -> process", + "CALLS tests/test_java_reflection_annotations.py:0 -> processor", + "CALLS tests/test_java_reflection_annotations.py:0 -> put", + "CALLS tests/test_java_reflection_annotations.py:0 -> read", + "CALLS tests/test_java_reflection_annotations.py:0 -> save", + "CALLS tests/test_java_reflection_annotations.py:0 -> status", + "CALLS tests/test_java_reflection_annotations.py:0 -> temp_repo", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_annotation_processing", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_custom_annotations", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_meta_annotations_inheritance", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_reflection_api_usage", + "CALLS tests/test_java_reflection_annotations.py:0 -> type", + "CALLS tests/test_java_reflection_annotations.py:0 -> value", + "CALLS tests/test_java_relationship_validation.py:0 -> add", + "CALLS tests/test_java_relationship_validation.py:0 -> description", + "CALLS tests/test_java_relationship_validation.py:0 -> engine", + "CALLS tests/test_java_relationship_validation.py:0 -> get", + "CALLS tests/test_java_relationship_validation.py:0 -> java_relationships_project", + "CALLS tests/test_java_relationship_validation.py:0 -> main", + "CALLS tests/test_java_relationship_validation.py:0 -> mock_ingestor", + "CALLS tests/test_java_relationship_validation.py:0 -> name", + "CALLS tests/test_java_relationship_validation.py:0 -> nodes", + "CALLS tests/test_java_relationship_validation.py:0 -> relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> run", + "CALLS tests/test_java_relationship_validation.py:0 -> save", + "CALLS tests/test_java_relationship_validation.py:0 -> start", + "CALLS tests/test_java_relationship_validation.py:0 -> summary", + "CALLS tests/test_java_relationship_validation.py:0 -> temp_repo", + "CALLS tests/test_java_relationship_validation.py:0 -> test_composition_and_aggregation_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_cross_package_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_dependency_injection_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_inner_class_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_method_overriding_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_static_method_and_field_relationships", + "CALLS tests/test_java_span_oracle.py:0 -> Color", + "CALLS tests/test_java_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_java_span_oracle.py:0 -> name", + "CALLS tests/test_java_span_oracle.py:0 -> start", + "CALLS tests/test_java_span_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_node_spans", + "CALLS tests/test_java_span_oracle.py:0 -> value", + "CALLS tests/test_java_streams_functional.py:0 -> NodeType", + "CALLS tests/test_java_streams_functional.py:0 -> add", + "CALLS tests/test_java_streams_functional.py:0 -> ensure_node_batch", + "CALLS tests/test_java_streams_functional.py:0 -> generate", + "CALLS tests/test_java_streams_functional.py:0 -> get", + "CALLS tests/test_java_streams_functional.py:0 -> items", + "CALLS tests/test_java_streams_functional.py:0 -> java_streams_project", + "CALLS tests/test_java_streams_functional.py:0 -> keys", + "CALLS tests/test_java_streams_functional.py:0 -> main", + "CALLS tests/test_java_streams_functional.py:0 -> mock_ingestor", + "CALLS tests/test_java_streams_functional.py:0 -> name", + "CALLS tests/test_java_streams_functional.py:0 -> process", + "CALLS tests/test_java_streams_functional.py:0 -> processor", + "CALLS tests/test_java_streams_functional.py:0 -> stats", + "CALLS tests/test_java_streams_functional.py:0 -> temp_repo", + "CALLS tests/test_java_streams_functional.py:0 -> test_functional_interfaces", + "CALLS tests/test_java_streams_functional.py:0 -> test_method_references_patterns", + "CALLS tests/test_java_streams_functional.py:0 -> test_optional_patterns", + "CALLS tests/test_java_streams_functional.py:0 -> test_stream_operations", + "CALLS tests/test_java_streams_functional.py:0 -> text", + "CALLS tests/test_java_streams_functional.py:0 -> type", + "CALLS tests/test_java_streams_functional.py:0 -> value", + "CALLS tests/test_java_streams_functional.py:0 -> wrapper", + "CALLS tests/test_java_structure_oracle.py:0 -> Color", + "CALLS tests/test_java_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_java_structure_oracle.py:0 -> name", + "CALLS tests/test_java_structure_oracle.py:0 -> nodes", + "CALLS tests/test_java_structure_oracle.py:0 -> run", + "CALLS tests/test_java_structure_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_java_structure", + "CALLS tests/test_java_type_inference_unit.py:0 -> ImportProcessor", + "CALLS tests/test_java_type_inference_unit.py:0 -> NodeType", + "CALLS tests/test_java_type_inference_unit.py:0 -> TestJavaMethodResolverMixin", + "CALLS tests/test_java_type_inference_unit.py:0 -> TestJavaTypeInferenceEngineIntegration", + "CALLS tests/test_java_type_inference_unit.py:0 -> TestJavaTypeResolverAstMethods", + "CALLS tests/test_java_type_inference_unit.py:0 -> TestJavaTypeResolverMixin", + "CALLS tests/test_java_type_inference_unit.py:0 -> TestJavaVariableAnalyzerMixin", + "CALLS tests/test_java_type_inference_unit.py:0 -> children", + "CALLS tests/test_java_type_inference_unit.py:0 -> factory", + "CALLS tests/test_java_type_inference_unit.py:0 -> find_with_prefix", + "CALLS tests/test_java_type_inference_unit.py:0 -> import_processor", + "CALLS tests/test_java_type_inference_unit.py:0 -> items", + "CALLS tests/test_java_type_inference_unit.py:0 -> main", + "CALLS tests/test_java_type_inference_unit.py:0 -> mock_ast_cache", + "CALLS tests/test_java_type_inference_unit.py:0 -> mock_function_registry", + "CALLS tests/test_java_type_inference_unit.py:0 -> mock_import_processor", + "CALLS tests/test_java_type_inference_unit.py:0 -> name", + "CALLS tests/test_java_type_inference_unit.py:0 -> process", + "CALLS tests/test_java_type_inference_unit.py:0 -> processor", + "CALLS tests/test_java_type_inference_unit.py:0 -> repo", + "CALLS tests/test_java_type_inference_unit.py:0 -> repo_path", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_build_fqn_lookup_map", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_calculate_module_distance", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_collect_candidate_modules", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_parent_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_parent_class_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_registry_entries_under_fallback_to_items", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_registry_entries_under_with_prefix", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_superclass_using_ast_class_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_superclass_using_ast_nested_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_enum", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_interface", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_no_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_short_module", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_implemented_interfaces_no_interfaces", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_implemented_interfaces_short_qualified_name", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_implemented_interfaces_single_interface", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_file_not_in_cache", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_module_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_no_superclass", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_short_qualified_name", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_with_valid_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_boolean", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_create_pattern", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_getter", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_unknown", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_is_matching_method", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_caching", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_cycle_detection", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_empty_module", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_empty_var_name", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_module_qn_to_java_fqn", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_empty", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_no_current_module", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_prefers_closer_package", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_prefers_exact_match", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_from_import", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_from_local_vars", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_same_package_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_super_reference", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_this_reference", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_unknown", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_array_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_empty_returns_object", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_from_import_mapping", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_fully_qualified", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_generic_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_primitive_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_same_package_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_wrapper_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_static_or_local_method", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_static_or_local_method_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_traverse_for_class_declarations_mixed_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_traverse_for_class_declarations_multiple_classes", + "CALLS tests/test_java_type_inference_unit.py:0 -> type_inference", + "CALLS tests/test_java_type_inference_unit.py:0 -> type_inference_engine", + "CALLS tests/test_java_type_resolver_integration.py:0 -> TestJavaTypeResolverWithRealParsing", + "CALLS tests/test_java_type_resolver_integration.py:0 -> add", + "CALLS tests/test_java_type_resolver_integration.py:0 -> find_with_prefix", + "CALLS tests/test_java_type_resolver_integration.py:0 -> import_processor", + "CALLS tests/test_java_type_resolver_integration.py:0 -> items", + "CALLS tests/test_java_type_resolver_integration.py:0 -> java_parser", + "CALLS tests/test_java_type_resolver_integration.py:0 -> mock_ast_cache", + "CALLS tests/test_java_type_resolver_integration.py:0 -> mock_function_registry", + "CALLS tests/test_java_type_resolver_integration.py:0 -> name", + "CALLS tests/test_java_type_resolver_integration.py:0 -> process", + "CALLS tests/test_java_type_resolver_integration.py:0 -> repo", + "CALLS tests/test_java_type_resolver_integration.py:0 -> repo_path", + "CALLS tests/test_java_type_resolver_integration.py:0 -> run", + "CALLS tests/test_java_type_resolver_integration.py:0 -> save", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_class_with_extends_and_implements", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_current_class_name_class", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_current_class_name_enum", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_current_class_name_interface", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_implemented_interfaces_multiple", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_implemented_interfaces_none", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_implemented_interfaces_single", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_superclass_name_generic_extends", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_superclass_name_no_extends", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_superclass_name_with_real_ast", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_nested_class", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_traverse_for_class_declarations_multiple_in_file", + "CALLS tests/test_java_type_resolver_integration.py:0 -> type_inference", + "CALLS tests/test_java_type_resolver_integration.py:0 -> type_inference_engine", + "CALLS tests/test_java_type_resolver_integration.py:0 -> value", + "CALLS tests/test_java_utils.py:0 -> TestBuildJavaQualifiedName", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaAnnotationInfo", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaClassInfo", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaFieldInfo", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaImportPath", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaMethodCallInfo", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaMethodInfo", + "CALLS tests/test_java_utils.py:0 -> TestExtractJavaPackageName", + "CALLS tests/test_java_utils.py:0 -> TestFindJavaPackageStartIndex", + "CALLS tests/test_java_utils.py:0 -> TestGetJavaVisibility", + "CALLS tests/test_java_utils.py:0 -> TestIsJavaMainMethod", + "CALLS tests/test_java_utils.py:0 -> add", + "CALLS tests/test_java_utils.py:0 -> children", + "CALLS tests/test_java_utils.py:0 -> main", + "CALLS tests/test_java_utils.py:0 -> name", + "CALLS tests/test_java_utils.py:0 -> parent", + "CALLS tests/test_java_utils.py:0 -> process", + "CALLS tests/test_java_utils.py:0 -> run", + "CALLS tests/test_java_utils.py:0 -> test_annotation_type_declaration", + "CALLS tests/test_java_utils.py:0 -> test_annotation_with_arguments", + "CALLS tests/test_java_utils.py:0 -> test_class_with_generic_superclass", + "CALLS tests/test_java_utils.py:0 -> test_class_with_modifiers", + "CALLS tests/test_java_utils.py:0 -> test_class_with_superclass", + "CALLS tests/test_java_utils.py:0 -> test_class_with_type_parameters", + "CALLS tests/test_java_utils.py:0 -> test_constructor", + "CALLS tests/test_java_utils.py:0 -> test_empty_import_declaration", + "CALLS tests/test_java_utils.py:0 -> test_empty_package_declaration", + "CALLS tests/test_java_utils.py:0 -> test_empty_parts", + "CALLS tests/test_java_utils.py:0 -> test_empty_path", + "CALLS tests/test_java_utils.py:0 -> test_enum_declaration", + "CALLS tests/test_java_utils.py:0 -> test_exclude_classes", + "CALLS tests/test_java_utils.py:0 -> test_field_with_annotation", + "CALLS tests/test_java_utils.py:0 -> test_field_with_modifiers", + "CALLS tests/test_java_utils.py:0 -> test_include_methods", + "CALLS tests/test_java_utils.py:0 -> test_interface_declaration", + "CALLS tests/test_java_utils.py:0 -> test_invalid_node_type", + "CALLS tests/test_java_utils.py:0 -> test_java_at_start", + "CALLS tests/test_java_utils.py:0 -> test_kotlin_layout", + "CALLS tests/test_java_utils.py:0 -> test_method_call_on_object", + "CALLS tests/test_java_utils.py:0 -> test_method_call_on_super", + "CALLS tests/test_java_utils.py:0 -> test_method_call_on_this", + "CALLS tests/test_java_utils.py:0 -> test_method_call_with_arguments", + "CALLS tests/test_java_utils.py:0 -> test_method_with_modifiers_and_annotations", + "CALLS tests/test_java_utils.py:0 -> test_method_with_parameters", + "CALLS tests/test_java_utils.py:0 -> test_method_with_varargs", + "CALLS tests/test_java_utils.py:0 -> test_nested_class", + "CALLS tests/test_java_utils.py:0 -> test_no_package_structure", + "CALLS tests/test_java_utils.py:0 -> test_non_standard_layout_with_main", + "CALLS tests/test_java_utils.py:0 -> test_not_main_missing_public", + "CALLS tests/test_java_utils.py:0 -> test_not_main_missing_static", + "CALLS tests/test_java_utils.py:0 -> test_not_main_not_void", + "CALLS tests/test_java_utils.py:0 -> test_not_main_wrong_name", + "CALLS tests/test_java_utils.py:0 -> test_package_private_visibility", + "CALLS tests/test_java_utils.py:0 -> test_private_visibility", + "CALLS tests/test_java_utils.py:0 -> test_protected_visibility", + "CALLS tests/test_java_utils.py:0 -> test_public_visibility", + "CALLS tests/test_java_utils.py:0 -> test_record_declaration", + "CALLS tests/test_java_utils.py:0 -> test_regular_import", + "CALLS tests/test_java_utils.py:0 -> test_scala_layout", + "CALLS tests/test_java_utils.py:0 -> test_scoped_identifier_package", + "CALLS tests/test_java_utils.py:0 -> test_simple_annotation", + "CALLS tests/test_java_utils.py:0 -> test_simple_class", + "CALLS tests/test_java_utils.py:0 -> test_simple_field", + "CALLS tests/test_java_utils.py:0 -> test_simple_identifier_import", + "CALLS tests/test_java_utils.py:0 -> test_simple_identifier_package", + "CALLS tests/test_java_utils.py:0 -> test_simple_method", + "CALLS tests/test_java_utils.py:0 -> test_simple_method_call", + "CALLS tests/test_java_utils.py:0 -> test_simple_src_layout", + "CALLS tests/test_java_utils.py:0 -> test_standard_maven_layout", + "CALLS tests/test_java_utils.py:0 -> test_static_import", + "CALLS tests/test_java_utils.py:0 -> test_test_folder_layout", + "CALLS tests/test_java_utils.py:0 -> test_valid_main_method_with_array", + "CALLS tests/test_java_utils.py:0 -> test_valid_main_method_with_varargs", + "CALLS tests/test_java_utils.py:0 -> test_wildcard_import", + "CALLS tests/test_java_utils.py:0 -> type", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestAssignmentAnalysisWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestClassFieldAnalysisWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestComplexScenariosWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestEdgeCasesWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestEnhancedForLoopAnalysisWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestLocalVariableAnalysisWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestParameterAnalysisWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> TestTypeInferenceWithRealParsing", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> children", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> done", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> engine", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> find_with_prefix", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> import_processor", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> items", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> java_parser", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> mock_ast_cache", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> mock_function_registry", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> name", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> process", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> repo", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> repo_path", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_abstract_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_array_type_declaration", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_assignment_with_literal_value", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_chained_assignments", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_class_fields_accessible_in_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_constructor_parameters", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_empty_method_body", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_enhanced_for_loop_with_array", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_enhanced_for_loop_with_list", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_enhanced_for_with_custom_type", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_generic_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_infer_type_from_literals", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_infer_type_from_new_expression", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_interface_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_lambda_expression_context", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_local_variable_with_object_creation", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_all_variable_types", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_multiple_parameters", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_single_parameter", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_varargs_parameter", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_multiple_declarators_same_type", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_nested_classes_variable_resolution", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_nested_enhanced_for_loops", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_record_constructor", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_simple_assignment_in_constructor", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_simple_local_variable_declaration", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_static_fields", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_static_method_variables", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_switch_expression_variables", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_try_catch_variable_declarations", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> text", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> type", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> type_inference", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> value", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> ImportProcessor", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestAnalyzeJavaClassFields", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestAnalyzeJavaConstructorAssignments", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestAnalyzeJavaEnhancedForLoops", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestAnalyzeJavaLocalVariables", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestAnalyzeJavaParameters", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestBuildVariableTypeMap", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestCollectAllVariableTypes", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestDoVariableTypeLookup", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestExtractJavaVariableReference", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestFindFieldTypeInClass", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestInferJavaTypeFromExpression", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> TestLookupJavaFieldType", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> children", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> engine", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> find_with_prefix", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> import_processor", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> items", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> mock_ast_cache", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> mock_function_registry", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> mock_import_processor", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> processor", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> repo", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> repo_path", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_array_creation_expression", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_assignment_expression_inferred", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_assignment_with_field_access", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_builds_map_successfully", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_class_field_extracted", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_collects_from_all_sources", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_decimal_floating_point_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_enhanced_for_with_child_variable_declarator", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_enhanced_for_with_type_and_name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_false_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_field_access", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_field_access_missing_parts", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_finds_field_in_class", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_formal_parameter_missing_name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_formal_parameter_with_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_identifier", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_integer_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_local_variable_declaration", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_local_variable_missing_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_local_variable_with_object_creation_value", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_multiple_formal_parameters", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_nested_assignments", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_nested_enhanced_for_loops", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_nested_local_variables", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_no_containing_class", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_no_parameters_node", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_object_creation_expression", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_empty_on_no_variables", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_class_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_field_name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_for_short_module_qn", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_class_not_found", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_field_not_found", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_file_not_in_ast_cache", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_file_not_in_cache", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_module_not_in_path_map", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_spread_parameter", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_string_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_true_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_unknown_expression_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_unknown_node_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> type_inference", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> value", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> add", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> factory", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> get", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> java_loom_project", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> main", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> mock_ingestor", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> name", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> operation", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> run", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> start", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> submit", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> temp_repo", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> test_scoped_values", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> test_structured_concurrency", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> test_virtual_threads_basics", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> value", + "CALLS tests/test_javascript_async_patterns.py:0 -> children", + "CALLS tests/test_javascript_async_patterns.py:0 -> done", + "CALLS tests/test_javascript_async_patterns.py:0 -> export", + "CALLS tests/test_javascript_async_patterns.py:0 -> index", + "CALLS tests/test_javascript_async_patterns.py:0 -> items", + "CALLS tests/test_javascript_async_patterns.py:0 -> javascript_async_project", + "CALLS tests/test_javascript_async_patterns.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_async_patterns.py:0 -> name", + "CALLS tests/test_javascript_async_patterns.py:0 -> process", + "CALLS tests/test_javascript_async_patterns.py:0 -> relationships", + "CALLS tests/test_javascript_async_patterns.py:0 -> save", + "CALLS tests/test_javascript_async_patterns.py:0 -> start", + "CALLS tests/test_javascript_async_patterns.py:0 -> status", + "CALLS tests/test_javascript_async_patterns.py:0 -> style", + "CALLS tests/test_javascript_async_patterns.py:0 -> temp_repo", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_async_await_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_async_comprehensive", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_callback_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_generator_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_promise_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> text", + "CALLS tests/test_javascript_async_patterns.py:0 -> value", + "CALLS tests/test_javascript_classes.py:0 -> add", + "CALLS tests/test_javascript_classes.py:0 -> clear", + "CALLS tests/test_javascript_classes.py:0 -> ensure_relationship_batch", + "CALLS tests/test_javascript_classes.py:0 -> export", + "CALLS tests/test_javascript_classes.py:0 -> factory", + "CALLS tests/test_javascript_classes.py:0 -> get", + "CALLS tests/test_javascript_classes.py:0 -> javascript_classes_project", + "CALLS tests/test_javascript_classes.py:0 -> mixin", + "CALLS tests/test_javascript_classes.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_classes.py:0 -> name", + "CALLS tests/test_javascript_classes.py:0 -> parent", + "CALLS tests/test_javascript_classes.py:0 -> parse", + "CALLS tests/test_javascript_classes.py:0 -> processor", + "CALLS tests/test_javascript_classes.py:0 -> read", + "CALLS tests/test_javascript_classes.py:0 -> relationships", + "CALLS tests/test_javascript_classes.py:0 -> retriever", + "CALLS tests/test_javascript_classes.py:0 -> start", + "CALLS tests/test_javascript_classes.py:0 -> temp_repo", + "CALLS tests/test_javascript_classes.py:0 -> test_basic_class_declarations", + "CALLS tests/test_javascript_classes.py:0 -> test_class_comprehensive", + "CALLS tests/test_javascript_classes.py:0 -> test_class_expressions_and_mixins", + "CALLS tests/test_javascript_classes.py:0 -> test_class_inheritance", + "CALLS tests/test_javascript_classes.py:0 -> test_private_fields_and_methods", + "CALLS tests/test_javascript_classes.py:0 -> test_static_methods_and_properties", + "CALLS tests/test_javascript_classes.py:0 -> type", + "CALLS tests/test_javascript_classes.py:0 -> value", + "CALLS tests/test_javascript_closures_scoping.py:0 -> add", + "CALLS tests/test_javascript_closures_scoping.py:0 -> engine", + "CALLS tests/test_javascript_closures_scoping.py:0 -> export", + "CALLS tests/test_javascript_closures_scoping.py:0 -> get", + "CALLS tests/test_javascript_closures_scoping.py:0 -> index", + "CALLS tests/test_javascript_closures_scoping.py:0 -> javascript_closures_project", + "CALLS tests/test_javascript_closures_scoping.py:0 -> logs", + "CALLS tests/test_javascript_closures_scoping.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_closures_scoping.py:0 -> name", + "CALLS tests/test_javascript_closures_scoping.py:0 -> process", + "CALLS tests/test_javascript_closures_scoping.py:0 -> relationships", + "CALLS tests/test_javascript_closures_scoping.py:0 -> start", + "CALLS tests/test_javascript_closures_scoping.py:0 -> temp_repo", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_basic_closures", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_closures_comprehensive", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_hoisting_behavior", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_module_patterns_iife", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_variable_scoping", + "CALLS tests/test_javascript_closures_scoping.py:0 -> text", + "CALLS tests/test_javascript_closures_scoping.py:0 -> value", + "CALLS tests/test_javascript_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_javascript_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_javascript_containment_oracle.py:0 -> export", + "CALLS tests/test_javascript_containment_oracle.py:0 -> name", + "CALLS tests/test_javascript_containment_oracle.py:0 -> run", + "CALLS tests/test_javascript_containment_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_js_containment_edges", + "CALLS tests/test_javascript_containment_oracle.py:0 -> value", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> js_singleton_project", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> load", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> main", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> name", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> save", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> start", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> temp_repo", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> test_js_singleton_pattern_cross_file_calls", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> up", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> value", + "CALLS tests/test_javascript_destructuring.py:0 -> export", + "CALLS tests/test_javascript_destructuring.py:0 -> get", + "CALLS tests/test_javascript_destructuring.py:0 -> handler", + "CALLS tests/test_javascript_destructuring.py:0 -> items", + "CALLS tests/test_javascript_destructuring.py:0 -> javascript_destructuring_project", + "CALLS tests/test_javascript_destructuring.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_destructuring.py:0 -> name", + "CALLS tests/test_javascript_destructuring.py:0 -> process", + "CALLS tests/test_javascript_destructuring.py:0 -> processor", + "CALLS tests/test_javascript_destructuring.py:0 -> relationships", + "CALLS tests/test_javascript_destructuring.py:0 -> start", + "CALLS tests/test_javascript_destructuring.py:0 -> stats", + "CALLS tests/test_javascript_destructuring.py:0 -> status", + "CALLS tests/test_javascript_destructuring.py:0 -> temp_repo", + "CALLS tests/test_javascript_destructuring.py:0 -> test_array_destructuring", + "CALLS tests/test_javascript_destructuring.py:0 -> test_destructuring_comprehensive", + "CALLS tests/test_javascript_destructuring.py:0 -> test_destructuring_with_imports", + "CALLS tests/test_javascript_destructuring.py:0 -> test_object_destructuring", + "CALLS tests/test_javascript_destructuring.py:0 -> test_parameter_destructuring", + "CALLS tests/test_javascript_destructuring.py:0 -> type", + "CALLS tests/test_javascript_destructuring.py:0 -> value", + "CALLS tests/test_javascript_error_handling.py:0 -> add", + "CALLS tests/test_javascript_error_handling.py:0 -> clear", + "CALLS tests/test_javascript_error_handling.py:0 -> ensure_node_batch", + "CALLS tests/test_javascript_error_handling.py:0 -> ensure_relationship_batch", + "CALLS tests/test_javascript_error_handling.py:0 -> execute", + "CALLS tests/test_javascript_error_handling.py:0 -> export", + "CALLS tests/test_javascript_error_handling.py:0 -> factory", + "CALLS tests/test_javascript_error_handling.py:0 -> handler", + "CALLS tests/test_javascript_error_handling.py:0 -> index", + "CALLS tests/test_javascript_error_handling.py:0 -> items", + "CALLS tests/test_javascript_error_handling.py:0 -> javascript_error_handling_project", + "CALLS tests/test_javascript_error_handling.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_error_handling.py:0 -> name", + "CALLS tests/test_javascript_error_handling.py:0 -> nodes", + "CALLS tests/test_javascript_error_handling.py:0 -> operation", + "CALLS tests/test_javascript_error_handling.py:0 -> parse", + "CALLS tests/test_javascript_error_handling.py:0 -> process", + "CALLS tests/test_javascript_error_handling.py:0 -> processor", + "CALLS tests/test_javascript_error_handling.py:0 -> relationships", + "CALLS tests/test_javascript_error_handling.py:0 -> status", + "CALLS tests/test_javascript_error_handling.py:0 -> temp_repo", + "CALLS tests/test_javascript_error_handling.py:0 -> test_async_error_handling", + "CALLS tests/test_javascript_error_handling.py:0 -> test_custom_error_classes", + "CALLS tests/test_javascript_error_handling.py:0 -> test_error_handling_comprehensive", + "CALLS tests/test_javascript_error_handling.py:0 -> test_try_catch_finally_blocks", + "CALLS tests/test_javascript_error_handling.py:0 -> text", + "CALLS tests/test_javascript_error_handling.py:0 -> type", + "CALLS tests/test_javascript_error_handling.py:0 -> up", + "CALLS tests/test_javascript_error_handling.py:0 -> value", + "CALLS tests/test_javascript_error_handling.py:0 -> wrapper", + "CALLS tests/test_javascript_functions.py:0 -> add", + "CALLS tests/test_javascript_functions.py:0 -> export", + "CALLS tests/test_javascript_functions.py:0 -> get", + "CALLS tests/test_javascript_functions.py:0 -> items", + "CALLS tests/test_javascript_functions.py:0 -> javascript_functions_project", + "CALLS tests/test_javascript_functions.py:0 -> method_calls", + "CALLS tests/test_javascript_functions.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_functions.py:0 -> name", + "CALLS tests/test_javascript_functions.py:0 -> operation", + "CALLS tests/test_javascript_functions.py:0 -> process", + "CALLS tests/test_javascript_functions.py:0 -> processor", + "CALLS tests/test_javascript_functions.py:0 -> relationships", + "CALLS tests/test_javascript_functions.py:0 -> start", + "CALLS tests/test_javascript_functions.py:0 -> temp_repo", + "CALLS tests/test_javascript_functions.py:0 -> test_arrow_functions", + "CALLS tests/test_javascript_functions.py:0 -> test_async_functions", + "CALLS tests/test_javascript_functions.py:0 -> test_function_comprehensive", + "CALLS tests/test_javascript_functions.py:0 -> test_function_declarations", + "CALLS tests/test_javascript_functions.py:0 -> test_higher_order_functions", + "CALLS tests/test_javascript_functions.py:0 -> test_immediately_invoked_function_expressions", + "CALLS tests/test_javascript_functions.py:0 -> test_method_definitions", + "CALLS tests/test_javascript_functions.py:0 -> text", + "CALLS tests/test_javascript_functions.py:0 -> value", + "CALLS tests/test_javascript_imports.py:0 -> _ingest_missing_import_patterns", + "CALLS tests/test_javascript_imports.py:0 -> add", + "CALLS tests/test_javascript_imports.py:0 -> export", + "CALLS tests/test_javascript_imports.py:0 -> get", + "CALLS tests/test_javascript_imports.py:0 -> index", + "CALLS tests/test_javascript_imports.py:0 -> javascript_imports_project", + "CALLS tests/test_javascript_imports.py:0 -> load", + "CALLS tests/test_javascript_imports.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_imports.py:0 -> name", + "CALLS tests/test_javascript_imports.py:0 -> parse", + "CALLS tests/test_javascript_imports.py:0 -> processor", + "CALLS tests/test_javascript_imports.py:0 -> read", + "CALLS tests/test_javascript_imports.py:0 -> relationships", + "CALLS tests/test_javascript_imports.py:0 -> temp_repo", + "CALLS tests/test_javascript_imports.py:0 -> test_absolute_package_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_aliased_re_export_import_mapping", + "CALLS tests/test_javascript_imports.py:0 -> test_commonjs_aliased_destructuring", + "CALLS tests/test_javascript_imports.py:0 -> test_commonjs_multiple_destructured_variables_regression", + "CALLS tests/test_javascript_imports.py:0 -> test_commonjs_require_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_dynamic_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_es6_default_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_es6_named_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_es6_namespace_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_import_error_handling", + "CALLS tests/test_javascript_imports.py:0 -> test_import_relationships_comprehensive", + "CALLS tests/test_javascript_imports.py:0 -> test_mixed_import_patterns", + "CALLS tests/test_javascript_imports.py:0 -> test_relative_path_resolution", + "CALLS tests/test_javascript_imports.py:0 -> up", + "CALLS tests/test_javascript_modules.py:0 -> add", + "CALLS tests/test_javascript_modules.py:0 -> children", + "CALLS tests/test_javascript_modules.py:0 -> export", + "CALLS tests/test_javascript_modules.py:0 -> factory", + "CALLS tests/test_javascript_modules.py:0 -> get", + "CALLS tests/test_javascript_modules.py:0 -> handler", + "CALLS tests/test_javascript_modules.py:0 -> items", + "CALLS tests/test_javascript_modules.py:0 -> javascript_modules_project", + "CALLS tests/test_javascript_modules.py:0 -> main", + "CALLS tests/test_javascript_modules.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_modules.py:0 -> name", + "CALLS tests/test_javascript_modules.py:0 -> nodes", + "CALLS tests/test_javascript_modules.py:0 -> process", + "CALLS tests/test_javascript_modules.py:0 -> put", + "CALLS tests/test_javascript_modules.py:0 -> relationships", + "CALLS tests/test_javascript_modules.py:0 -> render", + "CALLS tests/test_javascript_modules.py:0 -> temp_repo", + "CALLS tests/test_javascript_modules.py:0 -> test_aliased_re_exports", + "CALLS tests/test_javascript_modules.py:0 -> test_circular_dependencies", + "CALLS tests/test_javascript_modules.py:0 -> test_commonjs_module_exports", + "CALLS tests/test_javascript_modules.py:0 -> test_dynamic_exports", + "CALLS tests/test_javascript_modules.py:0 -> test_es6_export_patterns", + "CALLS tests/test_javascript_modules.py:0 -> test_mixed_module_systems", + "CALLS tests/test_javascript_modules.py:0 -> test_module_comprehensive", + "CALLS tests/test_javascript_modules.py:0 -> type", + "CALLS tests/test_javascript_modules.py:0 -> value", + "CALLS tests/test_javascript_object_patterns.py:0 -> add", + "CALLS tests/test_javascript_object_patterns.py:0 -> clear", + "CALLS tests/test_javascript_object_patterns.py:0 -> engine", + "CALLS tests/test_javascript_object_patterns.py:0 -> ensure_node_batch", + "CALLS tests/test_javascript_object_patterns.py:0 -> ensure_relationship_batch", + "CALLS tests/test_javascript_object_patterns.py:0 -> export", + "CALLS tests/test_javascript_object_patterns.py:0 -> factory", + "CALLS tests/test_javascript_object_patterns.py:0 -> index", + "CALLS tests/test_javascript_object_patterns.py:0 -> javascript_object_patterns_project", + "CALLS tests/test_javascript_object_patterns.py:0 -> load", + "CALLS tests/test_javascript_object_patterns.py:0 -> mixin", + "CALLS tests/test_javascript_object_patterns.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_object_patterns.py:0 -> name", + "CALLS tests/test_javascript_object_patterns.py:0 -> nodes", + "CALLS tests/test_javascript_object_patterns.py:0 -> parent", + "CALLS tests/test_javascript_object_patterns.py:0 -> parse", + "CALLS tests/test_javascript_object_patterns.py:0 -> process", + "CALLS tests/test_javascript_object_patterns.py:0 -> put", + "CALLS tests/test_javascript_object_patterns.py:0 -> relationships", + "CALLS tests/test_javascript_object_patterns.py:0 -> restart", + "CALLS tests/test_javascript_object_patterns.py:0 -> run", + "CALLS tests/test_javascript_object_patterns.py:0 -> start", + "CALLS tests/test_javascript_object_patterns.py:0 -> status", + "CALLS tests/test_javascript_object_patterns.py:0 -> temp_repo", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_constructor_patterns", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_factory_functions", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_object_composition", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_object_literals", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_object_patterns_comprehensive", + "CALLS tests/test_javascript_object_patterns.py:0 -> type", + "CALLS tests/test_javascript_object_patterns.py:0 -> value", + "CALLS tests/test_javascript_object_patterns.py:0 -> walk", + "CALLS tests/test_javascript_path_resolution.py:0 -> TestJavaScriptPathResolution", + "CALLS tests/test_javascript_path_resolution.py:0 -> engine", + "CALLS tests/test_javascript_path_resolution.py:0 -> factory", + "CALLS tests/test_javascript_path_resolution.py:0 -> graph_updater", + "CALLS tests/test_javascript_path_resolution.py:0 -> import_processor", + "CALLS tests/test_javascript_path_resolution.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_path_resolution.py:0 -> parent", + "CALLS tests/test_javascript_path_resolution.py:0 -> repo_path", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_absolute_imports", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_complex_relative_paths", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_deeply_nested_modules", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_edge_cases", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_parent_directory_imports", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_same_directory_imports", + "CALLS tests/test_javascript_prototypes.py:0 -> NodeType", + "CALLS tests/test_javascript_prototypes.py:0 -> add", + "CALLS tests/test_javascript_prototypes.py:0 -> ensure_node_batch", + "CALLS tests/test_javascript_prototypes.py:0 -> ensure_relationship_batch", + "CALLS tests/test_javascript_prototypes.py:0 -> factory", + "CALLS tests/test_javascript_prototypes.py:0 -> get", + "CALLS tests/test_javascript_prototypes.py:0 -> handler", + "CALLS tests/test_javascript_prototypes.py:0 -> index", + "CALLS tests/test_javascript_prototypes.py:0 -> items", + "CALLS tests/test_javascript_prototypes.py:0 -> javascript_prototypes_project", + "CALLS tests/test_javascript_prototypes.py:0 -> keys", + "CALLS tests/test_javascript_prototypes.py:0 -> mixin", + "CALLS tests/test_javascript_prototypes.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_prototypes.py:0 -> name", + "CALLS tests/test_javascript_prototypes.py:0 -> parent", + "CALLS tests/test_javascript_prototypes.py:0 -> process", + "CALLS tests/test_javascript_prototypes.py:0 -> relationships", + "CALLS tests/test_javascript_prototypes.py:0 -> save", + "CALLS tests/test_javascript_prototypes.py:0 -> start", + "CALLS tests/test_javascript_prototypes.py:0 -> status", + "CALLS tests/test_javascript_prototypes.py:0 -> temp_repo", + "CALLS tests/test_javascript_prototypes.py:0 -> test_constructor_functions_and_prototypes", + "CALLS tests/test_javascript_prototypes.py:0 -> test_object_create_patterns", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_chain_and_method_resolution", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_comprehensive", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_mixins_and_composition", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_patterns_edge_cases", + "CALLS tests/test_javascript_prototypes.py:0 -> type", + "CALLS tests/test_javascript_prototypes.py:0 -> up", + "CALLS tests/test_javascript_prototypes.py:0 -> value", + "CALLS tests/test_javascript_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_javascript_span_oracle.py:0 -> main", + "CALLS tests/test_javascript_span_oracle.py:0 -> name", + "CALLS tests/test_javascript_span_oracle.py:0 -> run", + "CALLS tests/test_javascript_span_oracle.py:0 -> start", + "CALLS tests/test_javascript_span_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_javascript_node_spans", + "CALLS tests/test_javascript_spread_rest.py:0 -> add", + "CALLS tests/test_javascript_spread_rest.py:0 -> children", + "CALLS tests/test_javascript_spread_rest.py:0 -> ensure_relationship_batch", + "CALLS tests/test_javascript_spread_rest.py:0 -> export", + "CALLS tests/test_javascript_spread_rest.py:0 -> handler", + "CALLS tests/test_javascript_spread_rest.py:0 -> index", + "CALLS tests/test_javascript_spread_rest.py:0 -> items", + "CALLS tests/test_javascript_spread_rest.py:0 -> javascript_spread_rest_project", + "CALLS tests/test_javascript_spread_rest.py:0 -> keys", + "CALLS tests/test_javascript_spread_rest.py:0 -> main", + "CALLS tests/test_javascript_spread_rest.py:0 -> metadata", + "CALLS tests/test_javascript_spread_rest.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_spread_rest.py:0 -> name", + "CALLS tests/test_javascript_spread_rest.py:0 -> operation", + "CALLS tests/test_javascript_spread_rest.py:0 -> process", + "CALLS tests/test_javascript_spread_rest.py:0 -> processor", + "CALLS tests/test_javascript_spread_rest.py:0 -> start", + "CALLS tests/test_javascript_spread_rest.py:0 -> temp_repo", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_destructuring_with_spread_rest", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_rest_parameters", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_spread_in_arrays", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_spread_in_objects", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_spread_rest_comprehensive", + "CALLS tests/test_javascript_spread_rest.py:0 -> type", + "CALLS tests/test_javascript_spread_rest.py:0 -> value", + "CALLS tests/test_javascript_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_javascript_structure_oracle.py:0 -> name", + "CALLS tests/test_javascript_structure_oracle.py:0 -> nodes", + "CALLS tests/test_javascript_structure_oracle.py:0 -> run", + "CALLS tests/test_javascript_structure_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_javascript_structure", + "CALLS tests/test_javascript_this_binding.py:0 -> ensure_relationship_batch", + "CALLS tests/test_javascript_this_binding.py:0 -> export", + "CALLS tests/test_javascript_this_binding.py:0 -> handler", + "CALLS tests/test_javascript_this_binding.py:0 -> index", + "CALLS tests/test_javascript_this_binding.py:0 -> javascript_this_project", + "CALLS tests/test_javascript_this_binding.py:0 -> method_calls", + "CALLS tests/test_javascript_this_binding.py:0 -> mock_ingestor", + "CALLS tests/test_javascript_this_binding.py:0 -> name", + "CALLS tests/test_javascript_this_binding.py:0 -> processor", + "CALLS tests/test_javascript_this_binding.py:0 -> render", + "CALLS tests/test_javascript_this_binding.py:0 -> start", + "CALLS tests/test_javascript_this_binding.py:0 -> temp_repo", + "CALLS tests/test_javascript_this_binding.py:0 -> test_arrow_functions_lexical_this", + "CALLS tests/test_javascript_this_binding.py:0 -> test_bind_call_apply_methods", + "CALLS tests/test_javascript_this_binding.py:0 -> test_this_comprehensive", + "CALLS tests/test_javascript_this_binding.py:0 -> test_this_in_callbacks_and_events", + "CALLS tests/test_javascript_this_binding.py:0 -> test_this_in_different_contexts", + "CALLS tests/test_javascript_this_binding.py:0 -> value", + "CALLS tests/test_javascript_this_binding.py:0 -> wrapper", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestArrowFunctionIngestion", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestFindObjectNameForMethod", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestIsClassMethod", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestIsExportInsideFunction", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestIsInsideMethodWithObjectLiterals", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestIsMethodInClass", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestIsStaticMethodInClass", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestObjectLiteralMethodIngestion", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> TestPrototypeMethodIngestion", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> add", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> children", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> definition_processor", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> ensure_node_batch", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> ensure_relationship_batch", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> export", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> graph_updater", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> handler", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> js_parser", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> mixin", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> mock_ingestor", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> name", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> repo_path", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> temp_js_project", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> temp_repo", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_arrow_functions_in_objects_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_assignment_arrow_functions_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_class_method_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_export_at_module_level_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_export_inside_function_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_finds_object_name_from_variable_declarator", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_instance_method_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_method_in_class_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_non_method_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_object_in_class_method_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_object_literal_methods_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_object_method_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_prototype_inheritance_creates_relationship", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_prototype_methods_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_returns_none_for_anonymous_object", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_standalone_function_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_standalone_object_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_static_method_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> updater", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> value", + "CALLS tests/test_js_ts_module_system.py:0 -> TestAsyncExports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestCommonJSDestructuringImports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestCommonJSExports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestCommonJSObjectExports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestDeepNestedModulePaths", + "CALLS tests/test_js_ts_module_system.py:0 -> TestES6Exports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestEmptyAndMinimalFiles", + "CALLS tests/test_js_ts_module_system.py:0 -> TestExportConstFunctionExpression", + "CALLS tests/test_js_ts_module_system.py:0 -> TestIIFEPatterns", + "CALLS tests/test_js_ts_module_system.py:0 -> TestLargeFileWithManyExports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestMixedModuleSystems", + "CALLS tests/test_js_ts_module_system.py:0 -> TestMultipleDestructuredImports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestNestedRequires", + "CALLS tests/test_js_ts_module_system.py:0 -> TestSpecialCharactersInExports", + "CALLS tests/test_js_ts_module_system.py:0 -> TestTypeScriptDecorators", + "CALLS tests/test_js_ts_module_system.py:0 -> TestTypeScriptInterfaces", + "CALLS tests/test_js_ts_module_system.py:0 -> TestTypeScriptModules", + "CALLS tests/test_js_ts_module_system.py:0 -> add", + "CALLS tests/test_js_ts_module_system.py:0 -> ensure_node_batch", + "CALLS tests/test_js_ts_module_system.py:0 -> ensure_relationship_batch", + "CALLS tests/test_js_ts_module_system.py:0 -> export", + "CALLS tests/test_js_ts_module_system.py:0 -> handler", + "CALLS tests/test_js_ts_module_system.py:0 -> mock_ingestor", + "CALLS tests/test_js_ts_module_system.py:0 -> name", + "CALLS tests/test_js_ts_module_system.py:0 -> process", + "CALLS tests/test_js_ts_module_system.py:0 -> read", + "CALLS tests/test_js_ts_module_system.py:0 -> start", + "CALLS tests/test_js_ts_module_system.py:0 -> temp_js_project", + "CALLS tests/test_js_ts_module_system.py:0 -> temp_repo", + "CALLS tests/test_js_ts_module_system.py:0 -> temp_ts_project", + "CALLS tests/test_js_ts_module_system.py:0 -> test_aliased_destructured_require", + "CALLS tests/test_js_ts_module_system.py:0 -> test_async_function_export", + "CALLS tests/test_js_ts_module_system.py:0 -> test_deeply_nested_require_paths", + "CALLS tests/test_js_ts_module_system.py:0 -> test_destructured_require_creates_import_relationship", + "CALLS tests/test_js_ts_module_system.py:0 -> test_empty_file", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_const_arrow_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_const_function_expression", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_function_declaration", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_generator_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_exports_dot_function_is_ingested", + "CALLS tests/test_js_ts_module_system.py:0 -> test_exports_with_special_names", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_both_commonjs_and_es6_patterns", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_many_exports", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_only_comments", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_only_imports", + "CALLS tests/test_js_ts_module_system.py:0 -> test_iife_with_exports", + "CALLS tests/test_js_ts_module_system.py:0 -> test_mixed_destructured_and_default_require", + "CALLS tests/test_js_ts_module_system.py:0 -> test_module_exports_dot_function_is_ingested", + "CALLS tests/test_js_ts_module_system.py:0 -> test_module_exports_object_with_methods", + "CALLS tests/test_js_ts_module_system.py:0 -> test_multiple_destructured_from_same_module", + "CALLS tests/test_js_ts_module_system.py:0 -> test_require_in_function_scope", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_async_export_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_class_with_decorators", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_class_with_methods", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_export_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_with_interfaces_and_types", + "CALLS tests/test_js_ts_module_system.py:0 -> text", + "CALLS tests/test_js_ts_module_system.py:0 -> type", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> JsTsModuleSystemMixin", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> NodeLabel", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> RelationshipType", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> SupportedLanguage", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> TestEdgeCases", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> TestIngestCommonjsExports", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> TestIngestEs6Exports", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> TestIngestMissingImportPatterns", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> TestProcessCommonjsImport", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> TestProcessVariableDeclaratorForCommonjs", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> _get_docstring", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> _is_export_inside_function", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> _resolve_js_module_path", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> children", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> ensure_node_batch", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> ensure_relationship_batch", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> import_processor", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mixin", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_function_registry", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_import_processor", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_ingestor", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_language_queries", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> processor", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> repo", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> repo_path", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_creates_module_node_and_relationship", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_handles_query_errors_gracefully", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_handles_resolution_error_gracefully", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_arguments_in_require_call", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_function_field_in_call_expression", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_name_field_in_declarator", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_value_field_in_declarator", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_non_string_module_argument", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_pair_pattern_with_wrong_key_type", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_processes_aliased_destructuring", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_processes_simple_destructuring", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_duplicate_imports", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_empty_object_pattern", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_non_js_ts_languages", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_non_object_pattern_name", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_non_require_call", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_when_no_language_obj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestAnalyzeReturnExpressionIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestExtractClassQnIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestExtractConstructorNameIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestExtractMethodCallIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestFindMethodInAstCacheOwnerTracking", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestFindMethodInAstIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestFindMethodInClassBodyIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestFindReturnStatementsIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestFindReturnStatementsWithLanguageObj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> TestTypeScriptIntegration", + "CALLS tests/test_js_ts_utils_integration.py:0 -> add", + "CALLS tests/test_js_ts_utils_integration.py:0 -> children", + "CALLS tests/test_js_ts_utils_integration.py:0 -> factory", + "CALLS tests/test_js_ts_utils_integration.py:0 -> items", + "CALLS tests/test_js_ts_utils_integration.py:0 -> js_parser", + "CALLS tests/test_js_ts_utils_integration.py:0 -> name", + "CALLS tests/test_js_ts_utils_integration.py:0 -> process", + "CALLS tests/test_js_ts_utils_integration.py:0 -> sample_js_project", + "CALLS tests/test_js_ts_utils_integration.py:0 -> sample_ts_project", + "CALLS tests/test_js_ts_utils_integration.py:0 -> save", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_builder_pattern_returns_this", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_cache_hit_returns_correct_result", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_cache_invalidates_on_new_root_node", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_cache_miss_returns_none", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_chained_method_calls_in_singleton", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_deeply_nested_qn", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_extract_from_factory_returns", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_extract_from_method_qn", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_extract_from_singleton_pattern", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_factory_returns_new_instance", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_fallback_without_language_obj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_all_builder_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_generic_class_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_methods_in_inheritance_hierarchy", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_repository_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_singleton_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_static_factory_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_method_calls_in_factory", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_multiple_returns_in_conditional", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_nested_class_interactions", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_nonexistent_method_returns_none", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_return_types_in_typescript", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_returns_in_factory_method", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_single_part_returns_none", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_single_return_in_simple_method", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_singleton_getInstance_returns_static_instance", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_with_language_obj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> text", + "CALLS tests/test_js_ts_utils_integration.py:0 -> ts_parser", + "CALLS tests/test_js_ts_utils_integration.py:0 -> ts_project", + "CALLS tests/test_js_ts_utils_integration.py:0 -> type", + "CALLS tests/test_js_ts_utils_integration.py:0 -> value", + "CALLS tests/test_js_type_inference_integration.py:0 -> ImportProcessor", + "CALLS tests/test_js_type_inference_integration.py:0 -> NodeType", + "CALLS tests/test_js_type_inference_integration.py:0 -> SupportedLanguage", + "CALLS tests/test_js_type_inference_integration.py:0 -> TestJsTypeInferenceEdgeCases", + "CALLS tests/test_js_type_inference_integration.py:0 -> TestJsTypeInferenceWithRealParsing", + "CALLS tests/test_js_type_inference_integration.py:0 -> TestTsTypeInferenceWithRealParsing", + "CALLS tests/test_js_type_inference_integration.py:0 -> execute", + "CALLS tests/test_js_type_inference_integration.py:0 -> get", + "CALLS tests/test_js_type_inference_integration.py:0 -> handler", + "CALLS tests/test_js_type_inference_integration.py:0 -> import_processor", + "CALLS tests/test_js_type_inference_integration.py:0 -> items", + "CALLS tests/test_js_type_inference_integration.py:0 -> js_parser", + "CALLS tests/test_js_type_inference_integration.py:0 -> js_type_engine", + "CALLS tests/test_js_type_inference_integration.py:0 -> main", + "CALLS tests/test_js_type_inference_integration.py:0 -> mock_find_method_ast_node", + "CALLS tests/test_js_type_inference_integration.py:0 -> mock_function_registry", + "CALLS tests/test_js_type_inference_integration.py:0 -> mock_import_processor", + "CALLS tests/test_js_type_inference_integration.py:0 -> name", + "CALLS tests/test_js_type_inference_integration.py:0 -> process", + "CALLS tests/test_js_type_inference_integration.py:0 -> processor", + "CALLS tests/test_js_type_inference_integration.py:0 -> run", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_array_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_arrow_function_body", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_async_function_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_conditional_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_deeply_nested_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_destructuring_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_function_call_assignment", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_let_declaration_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_loop_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_multiple_variable_declarations", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_nested_in_class_method", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_nested_in_function", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_new_expression_with_arguments", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_number_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_object_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_resolves_imported_class", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_resolves_local_class", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_simple_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_string_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_try_catch_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_generic_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_interface_implementation", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_multiple_declarations_in_class", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_var_declaration_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> ts_parser", + "CALLS tests/test_js_type_inference_integration.py:0 -> type", + "CALLS tests/test_js_type_inference_integration.py:0 -> type_inference", + "CALLS tests/test_js_type_inference_unit.py:0 -> ImportProcessor", + "CALLS tests/test_js_type_inference_unit.py:0 -> MockNode", + "CALLS tests/test_js_type_inference_unit.py:0 -> NodeType", + "CALLS tests/test_js_type_inference_unit.py:0 -> SupportedLanguage", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestAnalyzeReturnStatements", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestBuildLocalVariableTypeMap", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestGetDeclaratorsViaQueryException", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestGetLanguageObj", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestInferJsMethodReturnType", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestInferJsVariableTypeFromValue", + "CALLS tests/test_js_type_inference_unit.py:0 -> TestResolveJsClassName", + "CALLS tests/test_js_type_inference_unit.py:0 -> children", + "CALLS tests/test_js_type_inference_unit.py:0 -> create_call_expression_with_member", + "CALLS tests/test_js_type_inference_unit.py:0 -> engine", + "CALLS tests/test_js_type_inference_unit.py:0 -> import_processor", + "CALLS tests/test_js_type_inference_unit.py:0 -> js_type_engine", + "CALLS tests/test_js_type_inference_unit.py:0 -> main", + "CALLS tests/test_js_type_inference_unit.py:0 -> mock_find_method_ast_node", + "CALLS tests/test_js_type_inference_unit.py:0 -> mock_function_registry", + "CALLS tests/test_js_type_inference_unit.py:0 -> mock_import_processor", + "CALLS tests/test_js_type_inference_unit.py:0 -> name", + "CALLS tests/test_js_type_inference_unit.py:0 -> processor", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_call_expression_with_identifier_returns_func_name", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_empty_method_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_empty_node_returns_empty_dict", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_exception_in_query_continues_to_next_language", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_import_takes_precedence_over_local", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_invalid_method_call_format_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_method_ast_not_found_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_multiple_variable_declarators", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_nested_variable_declarator", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_new_expression_resolves_class_qn", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_new_expression_returns_class_name", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_local_class_in_registry", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_returns_none_when_not_found", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_via_import_mapping_checks_full_class_qn", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_via_import_mapping_returns_imported_qn", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_return_with_no_expression_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_returns_language_when_available", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_returns_none_when_queries_is_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_too_many_parts_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_unrecognized_node_type_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_unresolved_class_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_declarator_with_function_call", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_declarator_with_new_expression", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_with_uninferrable_value_is_skipped", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_without_value_is_skipped", + "CALLS tests/test_js_type_inference_unit.py:0 -> type", + "CALLS tests/test_js_type_inference_unit.py:0 -> type_inference", + "CALLS tests/test_js_type_inference_unit.py:0 -> value", + "CALLS tests/test_js_utils.py:0 -> TestAnalyzeJsReturnExpression", + "CALLS tests/test_js_utils.py:0 -> TestExtractJsConstructorName", + "CALLS tests/test_js_utils.py:0 -> TestExtractJsMethodCall", + "CALLS tests/test_js_utils.py:0 -> TestFindJsMethodInAst", + "CALLS tests/test_js_utils.py:0 -> TestFindJsMethodInClassBody", + "CALLS tests/test_js_utils.py:0 -> TestFindJsReturnStatements", + "CALLS tests/test_js_utils.py:0 -> children", + "CALLS tests/test_js_utils.py:0 -> js_parser", + "CALLS tests/test_js_utils.py:0 -> test_chained_method_call", + "CALLS tests/test_js_utils.py:0 -> test_empty_return", + "CALLS tests/test_js_utils.py:0 -> test_finds_constructor", + "CALLS tests/test_js_utils.py:0 -> test_finds_existing_method", + "CALLS tests/test_js_utils.py:0 -> test_finds_method_in_class", + "CALLS tests/test_js_utils.py:0 -> test_finds_method_in_nested_structure", + "CALLS tests/test_js_utils.py:0 -> test_finds_multiple_returns", + "CALLS tests/test_js_utils.py:0 -> test_finds_nested_returns", + "CALLS tests/test_js_utils.py:0 -> test_finds_single_return", + "CALLS tests/test_js_utils.py:0 -> test_finds_static_method", + "CALLS tests/test_js_utils.py:0 -> test_multiple_classes_finds_correct_one", + "CALLS tests/test_js_utils.py:0 -> test_multiple_methods_finds_correct_one", + "CALLS tests/test_js_utils.py:0 -> test_nested_object_access", + "CALLS tests/test_js_utils.py:0 -> test_new_date", + "CALLS tests/test_js_utils.py:0 -> test_new_with_arguments", + "CALLS tests/test_js_utils.py:0 -> test_new_with_member_expression", + "CALLS tests/test_js_utils.py:0 -> test_no_returns_empty_list", + "CALLS tests/test_js_utils.py:0 -> test_non_member_expression_returns_none", + "CALLS tests/test_js_utils.py:0 -> test_non_new_expression_returns_none", + "CALLS tests/test_js_utils.py:0 -> test_property_access_without_call", + "CALLS tests/test_js_utils.py:0 -> test_return_class_property", + "CALLS tests/test_js_utils.py:0 -> test_return_literal_returns_none", + "CALLS tests/test_js_utils.py:0 -> test_return_member_with_different_class_name", + "CALLS tests/test_js_utils.py:0 -> test_return_new_expression", + "CALLS tests/test_js_utils.py:0 -> test_return_this", + "CALLS tests/test_js_utils.py:0 -> test_return_this_property", + "CALLS tests/test_js_utils.py:0 -> test_return_unrelated_expression", + "CALLS tests/test_js_utils.py:0 -> test_returns_none_for_nonexistent_class", + "CALLS tests/test_js_utils.py:0 -> test_returns_none_for_nonexistent_method", + "CALLS tests/test_js_utils.py:0 -> test_short_qualified_name", + "CALLS tests/test_js_utils.py:0 -> test_simple_method_call", + "CALLS tests/test_js_utils.py:0 -> test_simple_new_expression", + "CALLS tests/test_js_utils.py:0 -> type", + "CALLS tests/test_js_utils.py:0 -> value", + "CALLS tests/test_l3_decorator_normalization.py:0 -> TestDecoratorWrapperNormalization", + "CALLS tests/test_l3_decorator_normalization.py:0 -> decorator", + "CALLS tests/test_l3_decorator_normalization.py:0 -> loader", + "CALLS tests/test_l3_decorator_normalization.py:0 -> test_call_attributed_to_wrapped_function_not_wrapper", + "CALLS tests/test_l3_decorator_normalization.py:0 -> test_no_generic_wrapper_node_appears", + "CALLS tests/test_l3_decorator_normalization.py:0 -> test_wrapped_function_body_calls_are_preserved", + "CALLS tests/test_l3_decorator_normalization.py:0 -> wrapper", + "CALLS tests/test_language_node_coverage.py:0 -> NodeLabel", + "CALLS tests/test_language_node_coverage.py:0 -> NodeType", + "CALLS tests/test_language_node_coverage.py:0 -> SupportedLanguage", + "CALLS tests/test_language_node_coverage.py:0 -> TestAllExtensionsHaveLanguage", + "CALLS tests/test_language_node_coverage.py:0 -> TestConstraintsKeyFormat", + "CALLS tests/test_language_node_coverage.py:0 -> TestExtensionToLanguageMapping", + "CALLS tests/test_language_node_coverage.py:0 -> TestLanguageMetadataComplete", + "CALLS tests/test_language_node_coverage.py:0 -> TestLanguageSpecHasRequiredFields", + "CALLS tests/test_language_node_coverage.py:0 -> TestLanguageSpecsComplete", + "CALLS tests/test_language_node_coverage.py:0 -> TestNodeLabelStringValues", + "CALLS tests/test_language_node_coverage.py:0 -> TestNodeTypeStringValues", + "CALLS tests/test_language_node_coverage.py:0 -> TestNodeTypesForLanguages", + "CALLS tests/test_language_node_coverage.py:0 -> TestSupportedLanguageCoverage", + "CALLS tests/test_language_node_coverage.py:0 -> UniqueKeyType", + "CALLS tests/test_language_node_coverage.py:0 -> metadata", + "CALLS tests/test_language_node_coverage.py:0 -> name", + "CALLS tests/test_language_node_coverage.py:0 -> start", + "CALLS tests/test_language_node_coverage.py:0 -> status", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_keys_are_pascal_case", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_keys_are_strings", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_values_are_strings", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_values_are_valid_property_names", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_extensions_map_to_correct_language", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_node_types_have_constraints", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_file_extensions", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_language_spec", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_metadata", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_status", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_call_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_class_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_function_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_module_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_extension_maps_to_language", + "CALLS tests/test_language_node_coverage.py:0 -> test_language_spec_has_correct_extensions", + "CALLS tests/test_language_node_coverage.py:0 -> test_node_label_value_is_pascal_case", + "CALLS tests/test_language_node_coverage.py:0 -> test_node_type_value_is_pascal_case", + "CALLS tests/test_language_node_coverage.py:0 -> type", + "CALLS tests/test_language_node_coverage.py:0 -> value", + "CALLS tests/test_language_tool_unit.py:0 -> TestCategorizeNodeTypes", + "CALLS tests/test_language_tool_unit.py:0 -> TestExtractSemanticCategories", + "CALLS tests/test_language_tool_unit.py:0 -> TestFindNodeTypesPath", + "CALLS tests/test_language_tool_unit.py:0 -> TestLanguageInfo", + "CALLS tests/test_language_tool_unit.py:0 -> TestNodeCategories", + "CALLS tests/test_language_tool_unit.py:0 -> TestParseTreeSitterJson", + "CALLS tests/test_language_tool_unit.py:0 -> name", + "CALLS tests/test_language_tool_unit.py:0 -> test_adds_dot_prefix_to_extensions", + "CALLS tests/test_language_tool_unit.py:0 -> test_adds_root_nodes_to_modules", + "CALLS tests/test_language_tool_unit.py:0 -> test_categorizes_classes", + "CALLS tests/test_language_tool_unit.py:0 -> test_categorizes_functions", + "CALLS tests/test_language_tool_unit.py:0 -> test_categorizes_modules", + "CALLS tests/test_language_tool_unit.py:0 -> test_deduplicates_results", + "CALLS tests/test_language_tool_unit.py:0 -> test_deduplicates_subtypes", + "CALLS tests/test_language_tool_unit.py:0 -> test_empty_input", + "CALLS tests/test_language_tool_unit.py:0 -> test_empty_lists", + "CALLS tests/test_language_tool_unit.py:0 -> test_excludes_call_from_functions", + "CALLS tests/test_language_tool_unit.py:0 -> test_extracts_subtypes", + "CALLS tests/test_language_tool_unit.py:0 -> test_finds_in_language_subdirectory", + "CALLS tests/test_language_tool_unit.py:0 -> test_finds_in_src_directory", + "CALLS tests/test_language_tool_unit.py:0 -> test_finds_with_underscore_language_name", + "CALLS tests/test_language_tool_unit.py:0 -> test_immutable", + "CALLS tests/test_language_tool_unit.py:0 -> test_namedtuple_fields", + "CALLS tests/test_language_tool_unit.py:0 -> test_nodes_without_subtypes", + "CALLS tests/test_language_tool_unit.py:0 -> test_parses_valid_config", + "CALLS tests/test_language_tool_unit.py:0 -> test_preserves_existing_dot_prefix", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_for_empty_grammars", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_for_missing_file", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_for_missing_grammars_key", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_when_not_found", + "CALLS tests/test_language_tool_unit.py:0 -> test_uses_provided_language_name", + "CALLS tests/test_language_tool_unit.py:0 -> type", + "CALLS tests/test_llm_service_unit.py:0 -> LLMGenerationError", + "CALLS tests/test_llm_service_unit.py:0 -> Provider", + "CALLS tests/test_llm_service_unit.py:0 -> TestCleanCypherResponse", + "CALLS tests/test_llm_service_unit.py:0 -> TestCreateRagOrchestrator", + "CALLS tests/test_llm_service_unit.py:0 -> TestCypherGenerator", + "CALLS tests/test_llm_service_unit.py:0 -> TestCypherGeneratorGenerate", + "CALLS tests/test_llm_service_unit.py:0 -> active_cypher_config", + "CALLS tests/test_llm_service_unit.py:0 -> active_orchestrator_config", + "CALLS tests/test_llm_service_unit.py:0 -> agent", + "CALLS tests/test_llm_service_unit.py:0 -> build_rag_orchestrator_prompt", + "CALLS tests/test_llm_service_unit.py:0 -> create_model", + "CALLS tests/test_llm_service_unit.py:0 -> get_provider_from_config", + "CALLS tests/test_llm_service_unit.py:0 -> main", + "CALLS tests/test_llm_service_unit.py:0 -> mock_settings", + "CALLS tests/test_llm_service_unit.py:0 -> name", + "CALLS tests/test_llm_service_unit.py:0 -> nodes", + "CALLS tests/test_llm_service_unit.py:0 -> run", + "CALLS tests/test_llm_service_unit.py:0 -> test_adds_semicolon_if_missing", + "CALLS tests/test_llm_service_unit.py:0 -> test_creates_agent_with_tools", + "CALLS tests/test_llm_service_unit.py:0 -> test_generate_raises_on_agent_error", + "CALLS tests/test_llm_service_unit.py:0 -> test_generate_raises_on_invalid_output", + "CALLS tests/test_llm_service_unit.py:0 -> test_generate_returns_cleaned_query", + "CALLS tests/test_llm_service_unit.py:0 -> test_handles_complex_query", + "CALLS tests/test_llm_service_unit.py:0 -> test_handles_multiline_query", + "CALLS tests/test_llm_service_unit.py:0 -> test_init_creates_agent", + "CALLS tests/test_llm_service_unit.py:0 -> test_init_raises_on_error", + "CALLS tests/test_llm_service_unit.py:0 -> test_keeps_existing_semicolon", + "CALLS tests/test_llm_service_unit.py:0 -> test_raises_on_error", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_backticks", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_cypher_prefix", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_leading_whitespace", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_trailing_whitespace", + "CALLS tests/test_llm_service_unit.py:0 -> test_uses_local_prompt_for_ollama", + "CALLS tests/test_llm_service_unit.py:0 -> type", + "CALLS tests/test_local_alias_calls.py:0 -> RelationshipType", + "CALLS tests/test_local_alias_calls.py:0 -> TestLocalAliasCalls", + "CALLS tests/test_local_alias_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_local_alias_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_local_alias_calls.py:0 -> execute_write", + "CALLS tests/test_local_alias_calls.py:0 -> fetch_all", + "CALLS tests/test_local_alias_calls.py:0 -> flush_all", + "CALLS tests/test_local_alias_calls.py:0 -> graph_updater", + "CALLS tests/test_local_alias_calls.py:0 -> name", + "CALLS tests/test_local_alias_calls.py:0 -> repo_path", + "CALLS tests/test_local_alias_calls.py:0 -> test_alias_to_module_function_is_a_call", + "CALLS tests/test_local_alias_calls.py:0 -> test_alias_to_self_method_is_a_call", + "CALLS tests/test_local_alias_calls.py:0 -> test_direct_call_unaffected", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> CallProcessor", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> FunctionRegistryTrie", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> RelationshipType", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> TestLocalAliasChainResolution", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> _ingest_function_calls", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> execute_write", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> fetch_all", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> flush_all", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> graph_updater", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> parent", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> repo_path", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> test_local_alias_attribute_chain_dispatches_to_dunder", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> type", + "CALLS tests/test_lua_54_edge_cases.py:0 -> Color", + "CALLS tests/test_lua_54_edge_cases.py:0 -> NodeType", + "CALLS tests/test_lua_54_edge_cases.py:0 -> close", + "CALLS tests/test_lua_54_edge_cases.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_54_edge_cases.py:0 -> insert", + "CALLS tests/test_lua_54_edge_cases.py:0 -> main", + "CALLS tests/test_lua_54_edge_cases.py:0 -> mock_ingestor", + "CALLS tests/test_lua_54_edge_cases.py:0 -> name", + "CALLS tests/test_lua_54_edge_cases.py:0 -> process", + "CALLS tests/test_lua_54_edge_cases.py:0 -> read", + "CALLS tests/test_lua_54_edge_cases.py:0 -> start", + "CALLS tests/test_lua_54_edge_cases.py:0 -> temp_repo", + "CALLS tests/test_lua_54_edge_cases.py:0 -> test_lua_54_bitwise_operators", + "CALLS tests/test_lua_54_edge_cases.py:0 -> test_lua_54_goto_labels", + "CALLS tests/test_lua_54_edge_cases.py:0 -> test_lua_54_utf8_library", + "CALLS tests/test_lua_54_edge_cases.py:0 -> text", + "CALLS tests/test_lua_54_edge_cases.py:0 -> type", + "CALLS tests/test_lua_54_edge_cases.py:0 -> value", + "CALLS tests/test_lua_closures.py:0 -> NodeType", + "CALLS tests/test_lua_closures.py:0 -> add", + "CALLS tests/test_lua_closures.py:0 -> decorator", + "CALLS tests/test_lua_closures.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_closures.py:0 -> execute", + "CALLS tests/test_lua_closures.py:0 -> get", + "CALLS tests/test_lua_closures.py:0 -> index", + "CALLS tests/test_lua_closures.py:0 -> insert", + "CALLS tests/test_lua_closures.py:0 -> main", + "CALLS tests/test_lua_closures.py:0 -> mock_ingestor", + "CALLS tests/test_lua_closures.py:0 -> name", + "CALLS tests/test_lua_closures.py:0 -> temp_repo", + "CALLS tests/test_lua_closures.py:0 -> test_lua_advanced_closures", + "CALLS tests/test_lua_closures.py:0 -> test_lua_basic_closures", + "CALLS tests/test_lua_closures.py:0 -> test_lua_event_system_closures", + "CALLS tests/test_lua_closures.py:0 -> test_lua_functional_programming", + "CALLS tests/test_lua_closures.py:0 -> value", + "CALLS tests/test_lua_complex_scenarios.py:0 -> close", + "CALLS tests/test_lua_complex_scenarios.py:0 -> decode", + "CALLS tests/test_lua_complex_scenarios.py:0 -> engine", + "CALLS tests/test_lua_complex_scenarios.py:0 -> execute", + "CALLS tests/test_lua_complex_scenarios.py:0 -> get", + "CALLS tests/test_lua_complex_scenarios.py:0 -> handler", + "CALLS tests/test_lua_complex_scenarios.py:0 -> insert", + "CALLS tests/test_lua_complex_scenarios.py:0 -> items", + "CALLS tests/test_lua_complex_scenarios.py:0 -> keys", + "CALLS tests/test_lua_complex_scenarios.py:0 -> load", + "CALLS tests/test_lua_complex_scenarios.py:0 -> main", + "CALLS tests/test_lua_complex_scenarios.py:0 -> mock_ingestor", + "CALLS tests/test_lua_complex_scenarios.py:0 -> name", + "CALLS tests/test_lua_complex_scenarios.py:0 -> parse", + "CALLS tests/test_lua_complex_scenarios.py:0 -> read", + "CALLS tests/test_lua_complex_scenarios.py:0 -> relationships", + "CALLS tests/test_lua_complex_scenarios.py:0 -> render", + "CALLS tests/test_lua_complex_scenarios.py:0 -> sink", + "CALLS tests/test_lua_complex_scenarios.py:0 -> start", + "CALLS tests/test_lua_complex_scenarios.py:0 -> stats", + "CALLS tests/test_lua_complex_scenarios.py:0 -> status", + "CALLS tests/test_lua_complex_scenarios.py:0 -> temp_repo", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_configuration_management_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_data_processing_pipeline", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_database_orm_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_game_engine_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_microservice_architecture", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_web_framework_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> type", + "CALLS tests/test_lua_complex_scenarios.py:0 -> value", + "CALLS tests/test_lua_comprehensive.py:0 -> close", + "CALLS tests/test_lua_comprehensive.py:0 -> create_model", + "CALLS tests/test_lua_comprehensive.py:0 -> decode", + "CALLS tests/test_lua_comprehensive.py:0 -> engine", + "CALLS tests/test_lua_comprehensive.py:0 -> execute", + "CALLS tests/test_lua_comprehensive.py:0 -> factory", + "CALLS tests/test_lua_comprehensive.py:0 -> get", + "CALLS tests/test_lua_comprehensive.py:0 -> handler", + "CALLS tests/test_lua_comprehensive.py:0 -> index", + "CALLS tests/test_lua_comprehensive.py:0 -> insert", + "CALLS tests/test_lua_comprehensive.py:0 -> keys", + "CALLS tests/test_lua_comprehensive.py:0 -> load", + "CALLS tests/test_lua_comprehensive.py:0 -> loader", + "CALLS tests/test_lua_comprehensive.py:0 -> mock_ingestor", + "CALLS tests/test_lua_comprehensive.py:0 -> name", + "CALLS tests/test_lua_comprehensive.py:0 -> put", + "CALLS tests/test_lua_comprehensive.py:0 -> read", + "CALLS tests/test_lua_comprehensive.py:0 -> read_file", + "CALLS tests/test_lua_comprehensive.py:0 -> render", + "CALLS tests/test_lua_comprehensive.py:0 -> save", + "CALLS tests/test_lua_comprehensive.py:0 -> stats", + "CALLS tests/test_lua_comprehensive.py:0 -> status", + "CALLS tests/test_lua_comprehensive.py:0 -> temp_repo", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_binary_tree", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_database_orm", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_environment_management", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_factory_pattern", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_file_operations", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_final_comprehensive_check", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_hash_table", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_json_serialization", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_linked_list", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_memory_management", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_module_system", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_observer_pattern", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_pcall_patterns", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_performance_utils", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_search_algorithms", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_sorting_algorithms", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_strategy_pattern", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_string_interpolation", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_string_patterns", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_table_iteration", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_table_operations", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_template_engine", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_web_framework", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_xpcall_patterns", + "CALLS tests/test_lua_comprehensive.py:0 -> text", + "CALLS tests/test_lua_comprehensive.py:0 -> type", + "CALLS tests/test_lua_comprehensive.py:0 -> value", + "CALLS tests/test_lua_comprehensive.py:0 -> write_file", + "CALLS tests/test_lua_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_lua_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_lua_containment_oracle.py:0 -> name", + "CALLS tests/test_lua_containment_oracle.py:0 -> test_cgr_matches_luaparse_oracle_on_containment_edges", + "CALLS tests/test_lua_containment_oracle.py:0 -> value", + "CALLS tests/test_lua_coroutines.py:0 -> done", + "CALLS tests/test_lua_coroutines.py:0 -> handler", + "CALLS tests/test_lua_coroutines.py:0 -> insert", + "CALLS tests/test_lua_coroutines.py:0 -> main", + "CALLS tests/test_lua_coroutines.py:0 -> mock_ingestor", + "CALLS tests/test_lua_coroutines.py:0 -> name", + "CALLS tests/test_lua_coroutines.py:0 -> relationships", + "CALLS tests/test_lua_coroutines.py:0 -> run", + "CALLS tests/test_lua_coroutines.py:0 -> start", + "CALLS tests/test_lua_coroutines.py:0 -> status", + "CALLS tests/test_lua_coroutines.py:0 -> temp_repo", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_async_patterns", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_basic_coroutines", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_coroutine_scheduler", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_generator_patterns", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_state_machines", + "CALLS tests/test_lua_coroutines.py:0 -> value", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> load", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> lua_singleton_project", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> main", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> mock_ingestor", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> name", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> save", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> start", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> temp_repo", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> test_lua_singleton_pattern_cross_file_calls", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> up", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> value", + "CALLS tests/test_lua_edge_cases.py:0 -> factory", + "CALLS tests/test_lua_edge_cases.py:0 -> import_processor", + "CALLS tests/test_lua_edge_cases.py:0 -> main", + "CALLS tests/test_lua_edge_cases.py:0 -> mock_ingestor", + "CALLS tests/test_lua_edge_cases.py:0 -> module_qn", + "CALLS tests/test_lua_edge_cases.py:0 -> name", + "CALLS tests/test_lua_edge_cases.py:0 -> temp_repo", + "CALLS tests/test_lua_edge_cases.py:0 -> test_lua_require_edge_cases", + "CALLS tests/test_lua_edge_cases.py:0 -> type", + "CALLS tests/test_lua_edge_cases.py:0 -> updater", + "CALLS tests/test_lua_environment.py:0 -> close", + "CALLS tests/test_lua_environment.py:0 -> insert", + "CALLS tests/test_lua_environment.py:0 -> load", + "CALLS tests/test_lua_environment.py:0 -> mock_ingestor", + "CALLS tests/test_lua_environment.py:0 -> name", + "CALLS tests/test_lua_environment.py:0 -> read", + "CALLS tests/test_lua_environment.py:0 -> relationships", + "CALLS tests/test_lua_environment.py:0 -> temp_repo", + "CALLS tests/test_lua_environment.py:0 -> test_dynamic_code_execution", + "CALLS tests/test_lua_environment.py:0 -> test_environment_manipulation", + "CALLS tests/test_lua_environment.py:0 -> test_global_environment_access", + "CALLS tests/test_lua_environment.py:0 -> test_global_variable_management", + "CALLS tests/test_lua_environment.py:0 -> test_module_environment_patterns", + "CALLS tests/test_lua_environment.py:0 -> type", + "CALLS tests/test_lua_environment.py:0 -> up", + "CALLS tests/test_lua_environment.py:0 -> value", + "CALLS tests/test_lua_error_handling.py:0 -> NodeType", + "CALLS tests/test_lua_error_handling.py:0 -> close", + "CALLS tests/test_lua_error_handling.py:0 -> down", + "CALLS tests/test_lua_error_handling.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_error_handling.py:0 -> execute", + "CALLS tests/test_lua_error_handling.py:0 -> handler", + "CALLS tests/test_lua_error_handling.py:0 -> index", + "CALLS tests/test_lua_error_handling.py:0 -> insert", + "CALLS tests/test_lua_error_handling.py:0 -> items", + "CALLS tests/test_lua_error_handling.py:0 -> load", + "CALLS tests/test_lua_error_handling.py:0 -> main", + "CALLS tests/test_lua_error_handling.py:0 -> mock_ingestor", + "CALLS tests/test_lua_error_handling.py:0 -> name", + "CALLS tests/test_lua_error_handling.py:0 -> operation", + "CALLS tests/test_lua_error_handling.py:0 -> processor", + "CALLS tests/test_lua_error_handling.py:0 -> read", + "CALLS tests/test_lua_error_handling.py:0 -> read_file", + "CALLS tests/test_lua_error_handling.py:0 -> start", + "CALLS tests/test_lua_error_handling.py:0 -> stats", + "CALLS tests/test_lua_error_handling.py:0 -> status", + "CALLS tests/test_lua_error_handling.py:0 -> temp_repo", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_debug_library", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_error_recovery", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_exception_patterns", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_pcall_xpcall_patterns", + "CALLS tests/test_lua_error_handling.py:0 -> type", + "CALLS tests/test_lua_error_handling.py:0 -> value", + "CALLS tests/test_lua_error_handling.py:0 -> wrapper", + "CALLS tests/test_lua_file_io.py:0 -> close", + "CALLS tests/test_lua_file_io.py:0 -> flush", + "CALLS tests/test_lua_file_io.py:0 -> insert", + "CALLS tests/test_lua_file_io.py:0 -> mock_ingestor", + "CALLS tests/test_lua_file_io.py:0 -> name", + "CALLS tests/test_lua_file_io.py:0 -> read", + "CALLS tests/test_lua_file_io.py:0 -> read_file", + "CALLS tests/test_lua_file_io.py:0 -> relationships", + "CALLS tests/test_lua_file_io.py:0 -> start", + "CALLS tests/test_lua_file_io.py:0 -> temp_repo", + "CALLS tests/test_lua_file_io.py:0 -> test_binary_file_operations", + "CALLS tests/test_lua_file_io.py:0 -> test_file_operations", + "CALLS tests/test_lua_file_io.py:0 -> test_file_positioning_and_info", + "CALLS tests/test_lua_file_io.py:0 -> test_file_reading_modes", + "CALLS tests/test_lua_file_io.py:0 -> test_serialization_patterns", + "CALLS tests/test_lua_file_io.py:0 -> type", + "CALLS tests/test_lua_file_io.py:0 -> value", + "CALLS tests/test_lua_file_io.py:0 -> write_file", + "CALLS tests/test_lua_functions.py:0 -> NodeType", + "CALLS tests/test_lua_functions.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_functions.py:0 -> graph_updater", + "CALLS tests/test_lua_functions.py:0 -> main", + "CALLS tests/test_lua_functions.py:0 -> mock_ingestor", + "CALLS tests/test_lua_functions.py:0 -> repo_path", + "CALLS tests/test_lua_functions.py:0 -> temp_repo", + "CALLS tests/test_lua_functions.py:0 -> test_lua_function_discovery", + "CALLS tests/test_lua_functions.py:0 -> updater", + "CALLS tests/test_lua_functions_methods.py:0 -> add", + "CALLS tests/test_lua_functions_methods.py:0 -> main", + "CALLS tests/test_lua_functions_methods.py:0 -> mock_ingestor", + "CALLS tests/test_lua_functions_methods.py:0 -> temp_repo", + "CALLS tests/test_lua_functions_methods.py:0 -> test_lua_function_and_method_calls", + "CALLS tests/test_lua_imports.py:0 -> factory", + "CALLS tests/test_lua_imports.py:0 -> graph_updater", + "CALLS tests/test_lua_imports.py:0 -> import_processor", + "CALLS tests/test_lua_imports.py:0 -> main", + "CALLS tests/test_lua_imports.py:0 -> mock_ingestor", + "CALLS tests/test_lua_imports.py:0 -> module_qn", + "CALLS tests/test_lua_imports.py:0 -> name", + "CALLS tests/test_lua_imports.py:0 -> repo_path", + "CALLS tests/test_lua_imports.py:0 -> style", + "CALLS tests/test_lua_imports.py:0 -> temp_repo", + "CALLS tests/test_lua_imports.py:0 -> test_lua_pcall_require_pattern", + "CALLS tests/test_lua_imports.py:0 -> test_lua_require_imports", + "CALLS tests/test_lua_imports.py:0 -> test_lua_stdlib_detection", + "CALLS tests/test_lua_imports.py:0 -> type", + "CALLS tests/test_lua_imports.py:0 -> updater", + "CALLS tests/test_lua_imports_paths.py:0 -> ensure_relationship_batch", + "CALLS tests/test_lua_imports_paths.py:0 -> factory", + "CALLS tests/test_lua_imports_paths.py:0 -> import_processor", + "CALLS tests/test_lua_imports_paths.py:0 -> main", + "CALLS tests/test_lua_imports_paths.py:0 -> mock_ingestor", + "CALLS tests/test_lua_imports_paths.py:0 -> module_qn", + "CALLS tests/test_lua_imports_paths.py:0 -> name", + "CALLS tests/test_lua_imports_paths.py:0 -> temp_repo", + "CALLS tests/test_lua_imports_paths.py:0 -> test_lua_imports_paths", + "CALLS tests/test_lua_imports_paths.py:0 -> type", + "CALLS tests/test_lua_imports_paths.py:0 -> updater", + "CALLS tests/test_lua_metatables.py:0 -> NodeType", + "CALLS tests/test_lua_metatables.py:0 -> add", + "CALLS tests/test_lua_metatables.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_metatables.py:0 -> factory", + "CALLS tests/test_lua_metatables.py:0 -> get", + "CALLS tests/test_lua_metatables.py:0 -> insert", + "CALLS tests/test_lua_metatables.py:0 -> items", + "CALLS tests/test_lua_metatables.py:0 -> keys", + "CALLS tests/test_lua_metatables.py:0 -> main", + "CALLS tests/test_lua_metatables.py:0 -> mock_ingestor", + "CALLS tests/test_lua_metatables.py:0 -> name", + "CALLS tests/test_lua_metatables.py:0 -> temp_repo", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_arithmetic_metamethods", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_call_metamethod", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_comparison_metamethods", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_index_metamethods", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_weak_references", + "CALLS tests/test_lua_metatables.py:0 -> type", + "CALLS tests/test_lua_metatables.py:0 -> value", + "CALLS tests/test_lua_modern_features.py:0 -> NodeType", + "CALLS tests/test_lua_modern_features.py:0 -> close", + "CALLS tests/test_lua_modern_features.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_modern_features.py:0 -> index", + "CALLS tests/test_lua_modern_features.py:0 -> insert", + "CALLS tests/test_lua_modern_features.py:0 -> items", + "CALLS tests/test_lua_modern_features.py:0 -> logs", + "CALLS tests/test_lua_modern_features.py:0 -> main", + "CALLS tests/test_lua_modern_features.py:0 -> mock_ingestor", + "CALLS tests/test_lua_modern_features.py:0 -> name", + "CALLS tests/test_lua_modern_features.py:0 -> processor", + "CALLS tests/test_lua_modern_features.py:0 -> read", + "CALLS tests/test_lua_modern_features.py:0 -> status", + "CALLS tests/test_lua_modern_features.py:0 -> temp_repo", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_attributes_syntax", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_enhanced_metamethods", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_enhanced_stdlib", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_numerical_for_loops", + "CALLS tests/test_lua_modern_features.py:0 -> type", + "CALLS tests/test_lua_modern_features.py:0 -> up", + "CALLS tests/test_lua_modern_features.py:0 -> value", + "CALLS tests/test_lua_oop_patterns.py:0 -> NodeType", + "CALLS tests/test_lua_oop_patterns.py:0 -> add", + "CALLS tests/test_lua_oop_patterns.py:0 -> ensure_node_batch", + "CALLS tests/test_lua_oop_patterns.py:0 -> factory", + "CALLS tests/test_lua_oop_patterns.py:0 -> import_processor", + "CALLS tests/test_lua_oop_patterns.py:0 -> insert", + "CALLS tests/test_lua_oop_patterns.py:0 -> main", + "CALLS tests/test_lua_oop_patterns.py:0 -> mixin", + "CALLS tests/test_lua_oop_patterns.py:0 -> mock_ingestor", + "CALLS tests/test_lua_oop_patterns.py:0 -> name", + "CALLS tests/test_lua_oop_patterns.py:0 -> parent", + "CALLS tests/test_lua_oop_patterns.py:0 -> parse", + "CALLS tests/test_lua_oop_patterns.py:0 -> relationships", + "CALLS tests/test_lua_oop_patterns.py:0 -> temp_repo", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_class_pattern_basic", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_inheritance_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_mixin_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_module_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_prototype_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_singleton_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> type", + "CALLS tests/test_lua_oop_patterns.py:0 -> updater", + "CALLS tests/test_lua_oop_patterns.py:0 -> value", + "CALLS tests/test_lua_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_lua_span_oracle.py:0 -> handler", + "CALLS tests/test_lua_span_oracle.py:0 -> name", + "CALLS tests/test_lua_span_oracle.py:0 -> start", + "CALLS tests/test_lua_span_oracle.py:0 -> test_cgr_matches_luaparse_oracle_on_node_spans", + "CALLS tests/test_lua_stdlib.py:0 -> close", + "CALLS tests/test_lua_stdlib.py:0 -> execute", + "CALLS tests/test_lua_stdlib.py:0 -> flush", + "CALLS tests/test_lua_stdlib.py:0 -> index", + "CALLS tests/test_lua_stdlib.py:0 -> insert", + "CALLS tests/test_lua_stdlib.py:0 -> loader", + "CALLS tests/test_lua_stdlib.py:0 -> mock_ingestor", + "CALLS tests/test_lua_stdlib.py:0 -> operation", + "CALLS tests/test_lua_stdlib.py:0 -> process", + "CALLS tests/test_lua_stdlib.py:0 -> read", + "CALLS tests/test_lua_stdlib.py:0 -> relationships", + "CALLS tests/test_lua_stdlib.py:0 -> temp_repo", + "CALLS tests/test_lua_stdlib.py:0 -> test_builtin_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_debug_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_io_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_math_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_os_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_package_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_string_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_table_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> text", + "CALLS tests/test_lua_stdlib.py:0 -> type", + "CALLS tests/test_lua_stdlib.py:0 -> up", + "CALLS tests/test_lua_stdlib.py:0 -> value", + "CALLS tests/test_lua_stdlib.py:0 -> wrapper", + "CALLS tests/test_lua_string_patterns.py:0 -> insert", + "CALLS tests/test_lua_string_patterns.py:0 -> mock_ingestor", + "CALLS tests/test_lua_string_patterns.py:0 -> name", + "CALLS tests/test_lua_string_patterns.py:0 -> relationships", + "CALLS tests/test_lua_string_patterns.py:0 -> start", + "CALLS tests/test_lua_string_patterns.py:0 -> temp_repo", + "CALLS tests/test_lua_string_patterns.py:0 -> test_complex_pattern_operations", + "CALLS tests/test_lua_string_patterns.py:0 -> test_string_manipulation_functions", + "CALLS tests/test_lua_string_patterns.py:0 -> test_string_pattern_matching", + "CALLS tests/test_lua_string_patterns.py:0 -> test_unicode_and_encoding", + "CALLS tests/test_lua_string_patterns.py:0 -> text", + "CALLS tests/test_lua_string_patterns.py:0 -> wrapper", + "CALLS tests/test_lua_structure_oracle.py:0 -> NodeLabel", + "CALLS tests/test_lua_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_lua_structure_oracle.py:0 -> name", + "CALLS tests/test_lua_structure_oracle.py:0 -> nodes", + "CALLS tests/test_lua_structure_oracle.py:0 -> test_cgr_matches_luaparse_oracle_on_lua_structure", + "CALLS tests/test_lua_structure_oracle.py:0 -> value", + "CALLS tests/test_lua_table_manipulation.py:0 -> index", + "CALLS tests/test_lua_table_manipulation.py:0 -> insert", + "CALLS tests/test_lua_table_manipulation.py:0 -> mock_ingestor", + "CALLS tests/test_lua_table_manipulation.py:0 -> name", + "CALLS tests/test_lua_table_manipulation.py:0 -> relationships", + "CALLS tests/test_lua_table_manipulation.py:0 -> temp_repo", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_construction_and_access", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_iteration_patterns", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_metatable_operations", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_modification_functions", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_serialization", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_utility_functions", + "CALLS tests/test_lua_table_manipulation.py:0 -> text", + "CALLS tests/test_lua_table_manipulation.py:0 -> type", + "CALLS tests/test_lua_table_manipulation.py:0 -> value", + "CALLS tests/test_lua_type_inference_integration.py:0 -> ImportProcessor", + "CALLS tests/test_lua_type_inference_integration.py:0 -> NodeType", + "CALLS tests/test_lua_type_inference_integration.py:0 -> SupportedLanguage", + "CALLS tests/test_lua_type_inference_integration.py:0 -> TestLuaTypeInferenceComplexScenarios", + "CALLS tests/test_lua_type_inference_integration.py:0 -> TestLuaTypeInferenceWithRealParsing", + "CALLS tests/test_lua_type_inference_integration.py:0 -> find_with_prefix", + "CALLS tests/test_lua_type_inference_integration.py:0 -> handler", + "CALLS tests/test_lua_type_inference_integration.py:0 -> import_processor", + "CALLS tests/test_lua_type_inference_integration.py:0 -> load", + "CALLS tests/test_lua_type_inference_integration.py:0 -> lua_parser", + "CALLS tests/test_lua_type_inference_integration.py:0 -> lua_type_engine", + "CALLS tests/test_lua_type_inference_integration.py:0 -> main", + "CALLS tests/test_lua_type_inference_integration.py:0 -> mock_function_registry", + "CALLS tests/test_lua_type_inference_integration.py:0 -> mock_import_processor", + "CALLS tests/test_lua_type_inference_integration.py:0 -> name", + "CALLS tests/test_lua_type_inference_integration.py:0 -> processor", + "CALLS tests/test_lua_type_inference_integration.py:0 -> render", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_chained_method_call_only_first_part", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_class_resolved_via_method_prefix", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_class_with_module_table_pattern", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_empty_code", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_global_variable_not_tracked", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_mixed_resolvable_and_unresolvable", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_multiple_variable_declarations", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_nested_function_with_variable_declarations", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_only_comments", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_simple_variable_declaration_with_method_call", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_unicode_identifier", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_unresolvable_class_skipped", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_declaration_without_method_call", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_in_for_loop", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_in_if_block", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_imported_class", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_regular_function_call", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_string_value", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_table_constructor", + "CALLS tests/test_lua_type_inference_integration.py:0 -> type_inference", + "CALLS tests/test_lua_type_inference_unit.py:0 -> ImportProcessor", + "CALLS tests/test_lua_type_inference_unit.py:0 -> MockNode", + "CALLS tests/test_lua_type_inference_unit.py:0 -> NodeType", + "CALLS tests/test_lua_type_inference_unit.py:0 -> TestBuildLuaLocalVariableTypeMap", + "CALLS tests/test_lua_type_inference_unit.py:0 -> TestBuildLuaLocalVariableTypeMapEdgeCases", + "CALLS tests/test_lua_type_inference_unit.py:0 -> TestInferLuaVariableTypeFromValue", + "CALLS tests/test_lua_type_inference_unit.py:0 -> TestInferLuaVariableTypeFromValueEdgeCases", + "CALLS tests/test_lua_type_inference_unit.py:0 -> TestLuaTypeInferenceEdgeCases", + "CALLS tests/test_lua_type_inference_unit.py:0 -> TestResolveLuaClassName", + "CALLS tests/test_lua_type_inference_unit.py:0 -> children", + "CALLS tests/test_lua_type_inference_unit.py:0 -> find_with_prefix", + "CALLS tests/test_lua_type_inference_unit.py:0 -> import_processor", + "CALLS tests/test_lua_type_inference_unit.py:0 -> lua_type_engine", + "CALLS tests/test_lua_type_inference_unit.py:0 -> main", + "CALLS tests/test_lua_type_inference_unit.py:0 -> mock_function_registry", + "CALLS tests/test_lua_type_inference_unit.py:0 -> mock_import_processor", + "CALLS tests/test_lua_type_inference_unit.py:0 -> processor", + "CALLS tests/test_lua_type_inference_unit.py:0 -> run", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_assignment_with_empty_expression_list", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_assignment_with_empty_variable_list", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_empty_for_non_matching_nodes", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_skips_unresolvable_types", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_with_imported_class", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_with_nested_declarations", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_with_single_variable", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_class_name_with_special_characters", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_deeply_nested_variable_declaration", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_empty_module_qn", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_function_call_without_method_index_expression", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_from_method_call", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_returns_none_for_function_call_without_method_index", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_returns_none_for_non_function_call", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_returns_none_when_class_not_resolved", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_empty_class_name", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_empty_method_name", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_non_identifier_children", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_only_class_identifier", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_prefix_matching_colon_separator", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_prefix_with_dot_method", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_more_variables_than_values", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_non_function_call_in_expression_list", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_import_takes_precedence", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_returns_none_when_not_found", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_via_function_registry_direct", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_via_import_mapping", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_via_method_prefix_matching", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_with_no_import_mapping_for_module", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_variable_declaration_without_assignment", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_variable_with_empty_text", + "CALLS tests/test_lua_type_inference_unit.py:0 -> type_inference", + "CALLS tests/test_lua_utils.py:0 -> SupportedLanguage", + "CALLS tests/test_lua_utils.py:0 -> TestEdgeCases", + "CALLS tests/test_lua_utils.py:0 -> TestExtractAssignedName", + "CALLS tests/test_lua_utils.py:0 -> TestExtractPcallSecondIdentifier", + "CALLS tests/test_lua_utils.py:0 -> TestFindAncestorStatement", + "CALLS tests/test_lua_utils.py:0 -> children", + "CALLS tests/test_lua_utils.py:0 -> handler", + "CALLS tests/test_lua_utils.py:0 -> lua_parser", + "CALLS tests/test_lua_utils.py:0 -> test_assignment_without_local", + "CALLS tests/test_lua_utils.py:0 -> test_basic_pcall_require", + "CALLS tests/test_lua_utils.py:0 -> test_complex_expression_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_deeply_nested_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_dot_index_accepted_with_custom_types", + "CALLS tests/test_lua_utils.py:0 -> test_dot_index_expression_rejected", + "CALLS tests/test_lua_utils.py:0 -> test_empty_function_body", + "CALLS tests/test_lua_utils.py:0 -> test_finds_assignment_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_expression_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_for_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_if_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_local_statement", + "CALLS tests/test_lua_utils.py:0 -> test_function_as_argument", + "CALLS tests/test_lua_utils.py:0 -> test_function_call_not_a_statement", + "CALLS tests/test_lua_utils.py:0 -> test_method_syntax_function", + "CALLS tests/test_lua_utils.py:0 -> test_multiline_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_multiple_assignment_first_value", + "CALLS tests/test_lua_utils.py:0 -> test_multiple_assignment_second_value", + "CALLS tests/test_lua_utils.py:0 -> test_nested_function_in_table", + "CALLS tests/test_lua_utils.py:0 -> test_nested_in_function", + "CALLS tests/test_lua_utils.py:0 -> test_nested_pcall", + "CALLS tests/test_lua_utils.py:0 -> test_no_assignment_context", + "CALLS tests/test_lua_utils.py:0 -> test_no_statement_ancestor", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_in_if_block", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_not_in_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_single_return_value", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_three_return_values", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_with_different_names", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_with_non_identifier_target", + "CALLS tests/test_lua_utils.py:0 -> test_return_statement_function", + "CALLS tests/test_lua_utils.py:0 -> test_simple_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_unicode_identifier", + "CALLS tests/test_lua_utils.py:0 -> test_xpcall_pattern", + "CALLS tests/test_lua_utils.py:0 -> text", + "CALLS tests/test_lua_utils.py:0 -> type", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> TestGetCodeSnippetBasic", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> TestGetCodeSnippetEdgeCases", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> TestGetCodeSnippetErrorHandling", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> TestGetCodeSnippetIntegration", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> TestGetCodeSnippetNotFound", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> add", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> anyio_backend", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> mcp_registry", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> name", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> project_root", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> sample_file", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> temp_project_root", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_class_snippet", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_function_snippet", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_malformed_qualified_name", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_method_snippet", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_multiple_snippets_sequentially", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_nonexistent_function", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_from_nested_module", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_tool_returns_none", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_verifies_qualified_name_passed", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_with_exception", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_with_no_docstring", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_with_unicode", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> type", + "CALLS tests/test_mcp_list_directory.py:0 -> TestListDirectoryBasic", + "CALLS tests/test_mcp_list_directory.py:0 -> TestListDirectoryEdgeCases", + "CALLS tests/test_mcp_list_directory.py:0 -> TestListDirectoryOutput", + "CALLS tests/test_mcp_list_directory.py:0 -> TestListDirectoryPathHandling", + "CALLS tests/test_mcp_list_directory.py:0 -> anyio_backend", + "CALLS tests/test_mcp_list_directory.py:0 -> mcp_registry", + "CALLS tests/test_mcp_list_directory.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_list_directory.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_list_directory.py:0 -> project_root", + "CALLS tests/test_mcp_list_directory.py:0 -> sample_directory_structure", + "CALLS tests/test_mcp_list_directory.py:0 -> temp_project_root", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_directory_with_hidden_files", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_directory_with_special_characters", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_empty_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_file_instead_of_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_nested_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_nonexistent_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_prevents_directory_traversal", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_root_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_subdirectory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_with_absolute_path", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_with_relative_path", + "CALLS tests/test_mcp_list_directory.py:0 -> test_output_contains_only_names_not_paths", + "CALLS tests/test_mcp_list_directory.py:0 -> test_output_is_newline_separated", + "CALLS tests/test_mcp_query_and_index.py:0 -> GraphUpdater", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestDeleteProject", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestIndexRepository", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestIndexRepositoryConstraintsAndFlush", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestListProjects", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestQueryAndIndexIntegration", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestQueryCodeGraph", + "CALLS tests/test_mcp_query_and_index.py:0 -> TestWipeDatabase", + "CALLS tests/test_mcp_query_and_index.py:0 -> _MockIngestor", + "CALLS tests/test_mcp_query_and_index.py:0 -> add", + "CALLS tests/test_mcp_query_and_index.py:0 -> anyio_backend", + "CALLS tests/test_mcp_query_and_index.py:0 -> clean_database", + "CALLS tests/test_mcp_query_and_index.py:0 -> ensure_constraints", + "CALLS tests/test_mcp_query_and_index.py:0 -> flush_all", + "CALLS tests/test_mcp_query_and_index.py:0 -> index", + "CALLS tests/test_mcp_query_and_index.py:0 -> main", + "CALLS tests/test_mcp_query_and_index.py:0 -> mcp_registry", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_delete", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_run", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_updater", + "CALLS tests/test_mcp_query_and_index.py:0 -> name", + "CALLS tests/test_mcp_query_and_index.py:0 -> nodes", + "CALLS tests/test_mcp_query_and_index.py:0 -> project_root", + "CALLS tests/test_mcp_query_and_index.py:0 -> relationships", + "CALLS tests/test_mcp_query_and_index.py:0 -> repo", + "CALLS tests/test_mcp_query_and_index.py:0 -> repo_path", + "CALLS tests/test_mcp_query_and_index.py:0 -> run", + "CALLS tests/test_mcp_query_and_index.py:0 -> sample_file", + "CALLS tests/test_mcp_query_and_index.py:0 -> summary", + "CALLS tests/test_mcp_query_and_index.py:0 -> temp_project_root", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_delete_project_error", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_delete_project_not_found", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_delete_project_success", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_and_query_workflow", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_ensures_constraints_and_flushes_around_run", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_clears_project_data_first", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_creates_graph_updater", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_deletes_project_before_updater_runs", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_handles_errors", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_multiple_times", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_success", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_with_empty_directory", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_list_projects_empty", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_list_projects_error", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_list_projects_success", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_after_index", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_error_handling", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_finds_classes", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_finds_function_calls", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_finds_functions", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_handles_unicode", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_verifies_parameter_passed", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_with_complex_natural_language", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_with_no_results", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_sequential_index_only_clears_own_project_data", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_update_ensures_constraints_and_flushes_around_run", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_wipe_database_confirmed", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_wipe_database_error", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_wipe_database_not_confirmed", + "CALLS tests/test_mcp_query_and_index.py:0 -> type", + "CALLS tests/test_mcp_query_and_index.py:0 -> updater", + "CALLS tests/test_mcp_read_file.py:0 -> TestReadFileEdgeCases", + "CALLS tests/test_mcp_read_file.py:0 -> TestReadFileLargeFiles", + "CALLS tests/test_mcp_read_file.py:0 -> TestReadFileWithPagination", + "CALLS tests/test_mcp_read_file.py:0 -> TestReadFileWithoutPagination", + "CALLS tests/test_mcp_read_file.py:0 -> anyio_backend", + "CALLS tests/test_mcp_read_file.py:0 -> large_file", + "CALLS tests/test_mcp_read_file.py:0 -> load", + "CALLS tests/test_mcp_read_file.py:0 -> mcp_registry", + "CALLS tests/test_mcp_read_file.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_read_file.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_read_file.py:0 -> project_root", + "CALLS tests/test_mcp_read_file.py:0 -> read", + "CALLS tests/test_mcp_read_file.py:0 -> sample_file", + "CALLS tests/test_mcp_read_file.py:0 -> temp_project_root", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_empty_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_file_with_unicode", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_full_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_last_lines_of_large_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_middle_of_large_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_nonexistent_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_offset_beyond_file_length", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_single_line_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_with_limit_only", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_with_offset_and_limit", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_with_offset_only", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_zero_offset", + "CALLS tests/test_mcp_server.py:0 -> TestGetProjectRoot", + "CALLS tests/test_mcp_server.py:0 -> TestServeStdioShutdown", + "CALLS tests/test_mcp_server.py:0 -> TestServiceLifecycle", + "CALLS tests/test_mcp_server.py:0 -> clear", + "CALLS tests/test_mcp_server.py:0 -> close_qdrant_client", + "CALLS tests/test_mcp_server.py:0 -> create_server", + "CALLS tests/test_mcp_server.py:0 -> fake_stdio", + "CALLS tests/test_mcp_server.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_server.py:0 -> mock_settings", + "CALLS tests/test_mcp_server.py:0 -> name", + "CALLS tests/test_mcp_server.py:0 -> parent", + "CALLS tests/test_mcp_server.py:0 -> run", + "CALLS tests/test_mcp_server.py:0 -> test_defaults_to_cwd_when_empty_string", + "CALLS tests/test_mcp_server.py:0 -> test_defaults_to_cwd_when_not_configured", + "CALLS tests/test_mcp_server.py:0 -> test_defaults_to_cwd_without_error", + "CALLS tests/test_mcp_server.py:0 -> test_env_var_takes_priority_over_settings", + "CALLS tests/test_mcp_server.py:0 -> test_handles_symlinks", + "CALLS tests/test_mcp_server.py:0 -> test_raises_error_when_path_does_not_exist", + "CALLS tests/test_mcp_server.py:0 -> test_raises_error_when_path_is_file", + "CALLS tests/test_mcp_server.py:0 -> test_resolves_relative_paths", + "CALLS tests/test_mcp_server.py:0 -> test_serve_stdio_closes_qdrant_client_on_shutdown", + "CALLS tests/test_mcp_server.py:0 -> test_service_lifecycle_closes_qdrant_on_exception", + "CALLS tests/test_mcp_server.py:0 -> test_service_lifecycle_closes_qdrant_on_exit", + "CALLS tests/test_mcp_server.py:0 -> test_uses_environment_variable_when_set", + "CALLS tests/test_mcp_server.py:0 -> test_uses_settings_when_env_not_set", + "CALLS tests/test_mcp_server.py:0 -> test_works_with_actual_cwd", + "CALLS tests/test_mcp_surgical_replace.py:0 -> TestSurgicalReplaceBasic", + "CALLS tests/test_mcp_surgical_replace.py:0 -> TestSurgicalReplaceEdgeCases", + "CALLS tests/test_mcp_surgical_replace.py:0 -> TestSurgicalReplaceErrorHandling", + "CALLS tests/test_mcp_surgical_replace.py:0 -> TestSurgicalReplaceIntegration", + "CALLS tests/test_mcp_surgical_replace.py:0 -> TestSurgicalReplacePathHandling", + "CALLS tests/test_mcp_surgical_replace.py:0 -> add", + "CALLS tests/test_mcp_surgical_replace.py:0 -> anyio_backend", + "CALLS tests/test_mcp_surgical_replace.py:0 -> mcp_registry", + "CALLS tests/test_mcp_surgical_replace.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_surgical_replace.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_surgical_replace.py:0 -> project_root", + "CALLS tests/test_mcp_surgical_replace.py:0 -> read", + "CALLS tests/test_mcp_surgical_replace.py:0 -> sample_file", + "CALLS tests/test_mcp_surgical_replace.py:0 -> style", + "CALLS tests/test_mcp_surgical_replace.py:0 -> temp_project_root", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_multiple_replacements_in_sequence", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_code_not_found", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_different_file_types", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_function_implementation", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_in_subdirectory", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_method_implementation", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_multiline_block", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_nonexistent_file", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_preserves_whitespace", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_prevents_directory_traversal", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_readonly_file", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_verifies_parameters_passed", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_empty_replacement", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_exact_match", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_exception", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_unicode", + "CALLS tests/test_mcp_surgical_replace.py:0 -> value", + "CALLS tests/test_mcp_tools_helpers.py:0 -> MCPToolsRegistry", + "CALLS tests/test_mcp_tools_helpers.py:0 -> TestCleanupProjectEmbeddings", + "CALLS tests/test_mcp_tools_helpers.py:0 -> TestGetProjectNodeIds", + "CALLS tests/test_mcp_tools_helpers.py:0 -> delete_project_embeddings", + "CALLS tests/test_mcp_tools_helpers.py:0 -> fetch_all", + "CALLS tests/test_mcp_tools_helpers.py:0 -> mock_delete", + "CALLS tests/test_mcp_tools_helpers.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_calls_delete_with_empty_list_when_no_nodes", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_calls_delete_with_node_ids", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_filters_non_integer_ids", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_returns_empty_when_no_rows", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_returns_integer_ids", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_skips_rows_missing_key", + "CALLS tests/test_mcp_update_and_search.py:0 -> GraphUpdater", + "CALLS tests/test_mcp_update_and_search.py:0 -> MCPParamName", + "CALLS tests/test_mcp_update_and_search.py:0 -> MCPToolName", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestAskAgent", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestMCPClient", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestMainSingleQuery", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestRagAgentProperty", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestSemanticSearchRegistration", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestToolDescriptions", + "CALLS tests/test_mcp_update_and_search.py:0 -> TestUpdateRepository", + "CALLS tests/test_mcp_update_and_search.py:0 -> _initialize_services_and_agent", + "CALLS tests/test_mcp_update_and_search.py:0 -> _setup_common_initialization", + "CALLS tests/test_mcp_update_and_search.py:0 -> add", + "CALLS tests/test_mcp_update_and_search.py:0 -> agent", + "CALLS tests/test_mcp_update_and_search.py:0 -> anyio_backend", + "CALLS tests/test_mcp_update_and_search.py:0 -> call_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> clean_database", + "CALLS tests/test_mcp_update_and_search.py:0 -> connect_memgraph", + "CALLS tests/test_mcp_update_and_search.py:0 -> create_get_function_source_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> create_rag_orchestrator", + "CALLS tests/test_mcp_update_and_search.py:0 -> create_semantic_search_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> delete_project", + "CALLS tests/test_mcp_update_and_search.py:0 -> has_semantic_dependencies", + "CALLS tests/test_mcp_update_and_search.py:0 -> main", + "CALLS tests/test_mcp_update_and_search.py:0 -> mcp_registry", + "CALLS tests/test_mcp_update_and_search.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_update_and_search.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_update_and_search.py:0 -> mock_open", + "CALLS tests/test_mcp_update_and_search.py:0 -> mock_updater", + "CALLS tests/test_mcp_update_and_search.py:0 -> parse", + "CALLS tests/test_mcp_update_and_search.py:0 -> project_root", + "CALLS tests/test_mcp_update_and_search.py:0 -> rag_agent", + "CALLS tests/test_mcp_update_and_search.py:0 -> run", + "CALLS tests/test_mcp_update_and_search.py:0 -> sample_file", + "CALLS tests/test_mcp_update_and_search.py:0 -> temp_project_root", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_error", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_in_tool_map", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_registered", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_success", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_client_uses_constants", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_index_repository_warns_about_project_clear", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_main_single_query_prints_output", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_main_single_query_routes_logs_to_stderr", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_mcp_server_is_callable", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_mcp_server_opens_devnull", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_empty_response", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_is_async", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_json_response", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_non_json_response", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_caches_after_first_access", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_includes_function_source_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_includes_semantic_search_when_available", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_lazy_init", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_setter_allows_mock", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_calls_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_in_tool_map", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_not_registered_without_deps", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_registered_with_deps", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_error", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_in_tool_map", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_no_wipe", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_registered", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_success", + "CALLS tests/test_mcp_update_and_search.py:0 -> text", + "CALLS tests/test_mcp_write_file.py:0 -> TestWriteFileBasic", + "CALLS tests/test_mcp_write_file.py:0 -> TestWriteFileContent", + "CALLS tests/test_mcp_write_file.py:0 -> TestWriteFileErrorHandling", + "CALLS tests/test_mcp_write_file.py:0 -> TestWriteFilePathHandling", + "CALLS tests/test_mcp_write_file.py:0 -> anyio_backend", + "CALLS tests/test_mcp_write_file.py:0 -> mcp_registry", + "CALLS tests/test_mcp_write_file.py:0 -> mock_cypher_gen", + "CALLS tests/test_mcp_write_file.py:0 -> mock_ingestor", + "CALLS tests/test_mcp_write_file.py:0 -> name", + "CALLS tests/test_mcp_write_file.py:0 -> parent", + "CALLS tests/test_mcp_write_file.py:0 -> project_root", + "CALLS tests/test_mcp_write_file.py:0 -> read", + "CALLS tests/test_mcp_write_file.py:0 -> temp_project_root", + "CALLS tests/test_mcp_write_file.py:0 -> test_overwrite_existing_file", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_empty_file", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_file_in_subdirectory", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_json_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_multiline_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_new_file", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_prevents_directory_traversal", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_python_code", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_to_readonly_directory", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_unicode_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_very_long_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_relative_path", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_spaces_in_filename", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_special_characters_in_filename", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_various_file_extensions", + "CALLS tests/test_mcp_write_file.py:0 -> value", + "CALLS tests/test_memgraph_batching.py:0 -> close", + "CALLS tests/test_memgraph_batching.py:0 -> description", + "CALLS tests/test_memgraph_batching.py:0 -> execute", + "CALLS tests/test_memgraph_batching.py:0 -> fetchall", + "CALLS tests/test_memgraph_batching.py:0 -> flush_nodes", + "CALLS tests/test_memgraph_batching.py:0 -> graph_service", + "CALLS tests/test_memgraph_batching.py:0 -> name", + "CALLS tests/test_memgraph_batching.py:0 -> test_node_batch_flushes_when_threshold_reached", + "CALLS tests/test_memgraph_batching.py:0 -> test_node_batch_preserves_per_row_properties", + "CALLS tests/test_memgraph_batching.py:0 -> test_relationship_batch_flushes_after_threshold_and_respects_node_flush", + "CALLS tests/test_memory_limit.py:0 -> TestApplyMemoryLimit", + "CALLS tests/test_memory_limit.py:0 -> graph_service", + "CALLS tests/test_memory_limit.py:0 -> test_appends_hint_to_simple_query", + "CALLS tests/test_memory_limit.py:0 -> test_appends_hint_when_no_trailing_semicolon", + "CALLS tests/test_memory_limit.py:0 -> test_handles_multiline_query", + "CALLS tests/test_memory_limit.py:0 -> test_handles_trailing_whitespace", + "CALLS tests/test_memory_limit.py:0 -> test_handles_whitespace_before_semicolon", + "CALLS tests/test_memory_limit.py:0 -> test_preserves_existing_hint", + "CALLS tests/test_memory_limit.py:0 -> test_preserves_existing_hint_case_insensitive", + "CALLS tests/test_memory_limit.py:0 -> test_uses_configured_megabytes", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> NodeLabel", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> RelationshipType", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> SupportedLanguage", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestCppMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestJavaMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestJavaScriptMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestPhpMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestPythonMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestRustMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> TestTypeScriptMethodCallerAttribution", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> _get_function_caller_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> _get_module_caller_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> _load", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> add", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> execute", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> handler", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> method_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> mock_ingestor", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> parse", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> process", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> repo", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> run", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> save", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> temp_repo", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_class_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_const_method_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_constructor_body_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_constructor_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_deeply_nested_call_chain", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_dunder_init_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_impl_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_calling_another_via_this_pointer", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_calling_free_function_has_method_caller", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_with_parameters", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_classes_in_one_file", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_impl_methods", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_methods_calling_each_other", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_methods_with_types", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_simple_class_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_static_method_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_struct_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_virtual_method_calls", + "CALLS tests/test_model_switching.py:0 -> TestAgentLoopUserPromptOnResume", + "CALLS tests/test_model_switching.py:0 -> TestCommandConstants", + "CALLS tests/test_model_switching.py:0 -> TestCreateModelFromString", + "CALLS tests/test_model_switching.py:0 -> TestHandleModelCommand", + "CALLS tests/test_model_switching.py:0 -> TestModelCommandEdgeCases", + "CALLS tests/test_model_switching.py:0 -> TestModelHelpCommand", + "CALLS tests/test_model_switching.py:0 -> TestModelOverrideInAgentLoop", + "CALLS tests/test_model_switching.py:0 -> TestMultipleModelSwitches", + "CALLS tests/test_model_switching.py:0 -> _process_tool_approvals", + "CALLS tests/test_model_switching.py:0 -> _refresh_context_tokens", + "CALLS tests/test_model_switching.py:0 -> _thinking_with_status_bar", + "CALLS tests/test_model_switching.py:0 -> active_orchestrator_config", + "CALLS tests/test_model_switching.py:0 -> add", + "CALLS tests/test_model_switching.py:0 -> create_file", + "CALLS tests/test_model_switching.py:0 -> create_model", + "CALLS tests/test_model_switching.py:0 -> done", + "CALLS tests/test_model_switching.py:0 -> get_provider_from_config", + "CALLS tests/test_model_switching.py:0 -> log_session_event", + "CALLS tests/test_model_switching.py:0 -> main", + "CALLS tests/test_model_switching.py:0 -> mock_console", + "CALLS tests/test_model_switching.py:0 -> mock_settings", + "CALLS tests/test_model_switching.py:0 -> name", + "CALLS tests/test_model_switching.py:0 -> ollama_endpoint", + "CALLS tests/test_model_switching.py:0 -> parse_model_string", + "CALLS tests/test_model_switching.py:0 -> run", + "CALLS tests/test_model_switching.py:0 -> status", + "CALLS tests/test_model_switching.py:0 -> test_assertion_error_is_caught", + "CALLS tests/test_model_switching.py:0 -> test_deferred_results_passed_only_after_approval", + "CALLS tests/test_model_switching.py:0 -> test_empty_model_id_raises_error", + "CALLS tests/test_model_switching.py:0 -> test_empty_provider_raises_error", + "CALLS tests/test_model_switching.py:0 -> test_help_command", + "CALLS tests/test_model_switching.py:0 -> test_invalid_provider_raises_error", + "CALLS tests/test_model_switching.py:0 -> test_missing_colon_raises_format_error", + "CALLS tests/test_model_switching.py:0 -> test_model_command_prefix", + "CALLS tests/test_model_switching.py:0 -> test_model_creation_error_shows_error_message", + "CALLS tests/test_model_switching.py:0 -> test_model_help_case_insensitive", + "CALLS tests/test_model_switching.py:0 -> test_model_help_preserves_current_model", + "CALLS tests/test_model_switching.py:0 -> test_model_help_shows_usage", + "CALLS tests/test_model_switching.py:0 -> test_model_override_none_by_default", + "CALLS tests/test_model_switching.py:0 -> test_model_override_passed_to_agent_run", + "CALLS tests/test_model_switching.py:0 -> test_multimodal_user_prompt_not_resent_after_approval", + "CALLS tests/test_model_switching.py:0 -> test_multiple_switches_in_sequence", + "CALLS tests/test_model_switching.py:0 -> test_ollama_provider_uses_local_endpoint", + "CALLS tests/test_model_switching.py:0 -> test_preserves_previous_model_on_show", + "CALLS tests/test_model_switching.py:0 -> test_same_provider_uses_current_config", + "CALLS tests/test_model_switching.py:0 -> test_show_current_model_when_no_argument", + "CALLS tests/test_model_switching.py:0 -> test_show_current_model_with_trailing_space", + "CALLS tests/test_model_switching.py:0 -> test_show_default_model_when_no_override", + "CALLS tests/test_model_switching.py:0 -> test_switch_model_with_extra_whitespace", + "CALLS tests/test_model_switching.py:0 -> test_switch_then_show_preserves_model", + "CALLS tests/test_model_switching.py:0 -> test_switch_to_new_model", + "CALLS tests/test_model_switching.py:0 -> test_ui_messages_exist", + "CALLS tests/test_model_switching.py:0 -> test_ui_model_current_format", + "CALLS tests/test_model_switching.py:0 -> test_ui_model_switched_format", + "CALLS tests/test_model_switching.py:0 -> test_user_prompt_not_resent_across_multiple_deferred_rounds", + "CALLS tests/test_model_switching.py:0 -> test_user_prompt_not_resent_after_deferred_tool_approval", + "CALLS tests/test_model_switching.py:0 -> test_user_prompt_passed_on_first_call_when_no_deferred", + "CALLS tests/test_model_switching.py:0 -> test_value_error_is_caught", + "CALLS tests/test_model_switching.py:0 -> test_whitespace_around_colon_is_stripped", + "CALLS tests/test_module_call_attribution.py:0 -> NodeLabel", + "CALLS tests/test_module_call_attribution.py:0 -> RelationshipType", + "CALLS tests/test_module_call_attribution.py:0 -> TestModuleCallAttribution", + "CALLS tests/test_module_call_attribution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_module_call_attribution.py:0 -> load", + "CALLS tests/test_module_call_attribution.py:0 -> main", + "CALLS tests/test_module_call_attribution.py:0 -> mock_ingestor", + "CALLS tests/test_module_call_attribution.py:0 -> temp_repo", + "CALLS tests/test_module_call_attribution.py:0 -> test_bare_module_level_call_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_cpp_file_scope_initializer_call_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_default_argument_call_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_nested_call_not_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_top_level_call_is_attributed_to_module", + "CALLS tests/test_module_qn_language_collision.py:0 -> NodeLabel", + "CALLS tests/test_module_qn_language_collision.py:0 -> mock_ingestor", + "CALLS tests/test_module_qn_language_collision.py:0 -> name", + "CALLS tests/test_module_qn_language_collision.py:0 -> nodes", + "CALLS tests/test_module_qn_language_collision.py:0 -> register_unique_qn", + "CALLS tests/test_module_qn_language_collision.py:0 -> temp_repo", + "CALLS tests/test_module_qn_language_collision.py:0 -> test_same_stem_files_get_distinct_module_qns", + "CALLS tests/test_module_qn_language_collision.py:0 -> test_same_stem_methods_do_not_collide", + "CALLS tests/test_module_qn_language_collision.py:0 -> up", + "CALLS tests/test_multi_project.py:0 -> TestPromptActiveProjectsBlock", + "CALLS tests/test_multi_project.py:0 -> TestResolveActiveProjects", + "CALLS tests/test_multi_project.py:0 -> _run_graph_sync", + "CALLS tests/test_multi_project.py:0 -> _update_and_validate_models", + "CALLS tests/test_multi_project.py:0 -> agent", + "CALLS tests/test_multi_project.py:0 -> cli", + "CALLS tests/test_multi_project.py:0 -> connect_memgraph", + "CALLS tests/test_multi_project.py:0 -> list_projects", + "CALLS tests/test_multi_project.py:0 -> main_single_query", + "CALLS tests/test_multi_project.py:0 -> mock_ingestor", + "CALLS tests/test_multi_project.py:0 -> mock_memgraph_connect", + "CALLS tests/test_multi_project.py:0 -> mock_sync_path", + "CALLS tests/test_multi_project.py:0 -> mock_validate_models", + "CALLS tests/test_multi_project.py:0 -> name", + "CALLS tests/test_multi_project.py:0 -> repo", + "CALLS tests/test_multi_project.py:0 -> runner", + "CALLS tests/test_multi_project.py:0 -> start", + "CALLS tests/test_multi_project.py:0 -> test_all_empty_falls_back_to_default", + "CALLS tests/test_multi_project.py:0 -> test_drops_empty_entries", + "CALLS tests/test_multi_project.py:0 -> test_multiple_projects_comma_separated", + "CALLS tests/test_multi_project.py:0 -> test_multiple_projects_lists_all", + "CALLS tests/test_multi_project.py:0 -> test_no_projects_lists_list_projects_hint", + "CALLS tests/test_multi_project.py:0 -> test_returns_default_for_empty_string", + "CALLS tests/test_multi_project.py:0 -> test_returns_default_when_no_projects_flag", + "CALLS tests/test_multi_project.py:0 -> test_single_project_in_flag", + "CALLS tests/test_multi_project.py:0 -> test_single_project_mentions_starts_with", + "CALLS tests/test_multi_project.py:0 -> test_start_default_projects_uses_derived_name", + "CALLS tests/test_multi_project.py:0 -> test_start_passes_projects_to_single_query", + "CALLS tests/test_multi_project.py:0 -> test_strips_whitespace", + "CALLS tests/test_multilang_import_parsing.py:0 -> factory", + "CALLS tests/test_multilang_import_parsing.py:0 -> graph_updater", + "CALLS tests/test_multilang_import_parsing.py:0 -> import_processor", + "CALLS tests/test_multilang_import_parsing.py:0 -> main", + "CALLS tests/test_multilang_import_parsing.py:0 -> mock_ingestor", + "CALLS tests/test_multilang_import_parsing.py:0 -> name", + "CALLS tests/test_multilang_import_parsing.py:0 -> repo_path", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_go_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_java_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_javascript_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_rust_complex_import_patterns", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_rust_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> updater", + "CALLS tests/test_multiline_input_keybindings.py:0 -> KeyBinding", + "CALLS tests/test_multiline_input_keybindings.py:0 -> main", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_c_raises_keyboard_interrupt", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_e_submits_after_multiline_with_enter", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_e_submits_buffer", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_j_submits_after_multiline_with_enter", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_j_submits_buffer", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_hint_mentions_both_submit_shortcuts", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_keybinding_enum_has_submit_shortcuts", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_result_is_stripped", + "CALLS tests/test_multiline_input_keybindings.py:0 -> text", + "CALLS tests/test_multiline_input_keybindings.py:0 -> value", + "CALLS tests/test_nested_function_defines.py:0 -> NodeLabel", + "CALLS tests/test_nested_function_defines.py:0 -> RelationshipType", + "CALLS tests/test_nested_function_defines.py:0 -> TestNestedClassDefines", + "CALLS tests/test_nested_function_defines.py:0 -> TestNestedFunctionDefines", + "CALLS tests/test_nested_function_defines.py:0 -> dfs", + "CALLS tests/test_nested_function_defines.py:0 -> ensure_node_batch", + "CALLS tests/test_nested_function_defines.py:0 -> ensure_relationship_batch", + "CALLS tests/test_nested_function_defines.py:0 -> execute_write", + "CALLS tests/test_nested_function_defines.py:0 -> fetch_all", + "CALLS tests/test_nested_function_defines.py:0 -> flush_all", + "CALLS tests/test_nested_function_defines.py:0 -> graph_updater", + "CALLS tests/test_nested_function_defines.py:0 -> nodes", + "CALLS tests/test_nested_function_defines.py:0 -> repo_path", + "CALLS tests/test_nested_function_defines.py:0 -> test_class_in_method_defined_by_method", + "CALLS tests/test_nested_function_defines.py:0 -> test_function_in_function_defined_by_function", + "CALLS tests/test_nested_function_defines.py:0 -> test_function_in_method_defined_by_method", + "CALLS tests/test_nested_function_qualified_names.py:0 -> description", + "CALLS tests/test_nested_function_qualified_names.py:0 -> execute", + "CALLS tests/test_nested_function_qualified_names.py:0 -> export", + "CALLS tests/test_nested_function_qualified_names.py:0 -> get", + "CALLS tests/test_nested_function_qualified_names.py:0 -> items", + "CALLS tests/test_nested_function_qualified_names.py:0 -> loader", + "CALLS tests/test_nested_function_qualified_names.py:0 -> mock_ingestor", + "CALLS tests/test_nested_function_qualified_names.py:0 -> name", + "CALLS tests/test_nested_function_qualified_names.py:0 -> nested_functions_project", + "CALLS tests/test_nested_function_qualified_names.py:0 -> process", + "CALLS tests/test_nested_function_qualified_names.py:0 -> processor", + "CALLS tests/test_nested_function_qualified_names.py:0 -> temp_repo", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_arrow_functions_in_constructors", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_commonjs_exports_in_functions", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_export_functions_in_modules", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_object_methods_inside_functions", + "CALLS tests/test_nested_function_qualified_names.py:0 -> text", + "CALLS tests/test_nested_function_qualified_names.py:0 -> value", + "CALLS tests/test_node_relationship_coverage.py:0 -> NodeLabel", + "CALLS tests/test_node_relationship_coverage.py:0 -> NodeType", + "CALLS tests/test_node_relationship_coverage.py:0 -> RelationshipType", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestEnsureConstraintsForAllLabels", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestFlushNodesForAllNodeLabels", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestFlushRelationshipsForAllTypes", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestImportTimeValidation", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestNodeBufferFlushWithMissingKey", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestNodeLabelConstraintConsistency", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestNodeLabelCoverage", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestNodeLabelEnumCompleteness", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestRelationshipTypeCompleteness", + "CALLS tests/test_node_relationship_coverage.py:0 -> TestUniqueKeyPropertyNames", + "CALLS tests/test_node_relationship_coverage.py:0 -> UniqueKeyType", + "CALLS tests/test_node_relationship_coverage.py:0 -> _execute_query", + "CALLS tests/test_node_relationship_coverage.py:0 -> capture_query", + "CALLS tests/test_node_relationship_coverage.py:0 -> description", + "CALLS tests/test_node_relationship_coverage.py:0 -> execute", + "CALLS tests/test_node_relationship_coverage.py:0 -> fetchall", + "CALLS tests/test_node_relationship_coverage.py:0 -> flush", + "CALLS tests/test_node_relationship_coverage.py:0 -> graph_service", + "CALLS tests/test_node_relationship_coverage.py:0 -> index", + "CALLS tests/test_node_relationship_coverage.py:0 -> name", + "CALLS tests/test_node_relationship_coverage.py:0 -> nodes", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_all_node_labels_have_unique_key_mapping", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_all_node_labels_in_constraints", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_all_node_types_in_constraints", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_label_can_be_flushed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_label_has_constraint", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_type_can_be_flushed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_type_has_constraint", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_relationship_type_can_be_flushed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_ensure_constraints_creates_all_constraints", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_ensure_constraints_creates_all_indexes", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_import_time_validation_catches_missing_keys", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_name_unique_key_uses_correct_property", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_label_count_matches_constraints_count", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_type_is_subset_of_node_label", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_unique_constraints_derived_from_single_source", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_without_unique_key_is_skipped_not_crashed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_path_unique_key_uses_correct_property", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_qualified_name_unique_key_uses_correct_property", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_relationship_type_values_match_names", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_relationship_types_are_uppercase", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_unique_key_types_are_valid", + "CALLS tests/test_node_relationship_coverage.py:0 -> type", + "CALLS tests/test_node_relationship_coverage.py:0 -> value", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> RelationshipType", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> TestOperatorDispatchResolution", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> execute_write", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> fetch_all", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> flush_all", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> graph_updater", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> parent", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> repo_path", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_builtin_container_produces_no_dunder_edge", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_contains_operator_dispatches_to_dunder", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_len_dispatches_to_dunder", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_subscript_read_dispatches_to_getitem", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_subscript_write_dispatches_to_setitem", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> type", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> value", + "CALLS tests/test_oracle_nested_defs.py:0 -> handler", + "CALLS tests/test_oracle_nested_defs.py:0 -> name", + "CALLS tests/test_oracle_nested_defs.py:0 -> nodes", + "CALLS tests/test_oracle_nested_defs.py:0 -> test_oracle_captures_function_in_except_handler", + "CALLS tests/test_oracle_nested_defs.py:0 -> test_oracle_captures_function_in_match_case", + "CALLS tests/test_oracle_nested_defs.py:0 -> value", + "CALLS tests/test_permission_mode.py:0 -> PermissionMode", + "CALLS tests/test_permission_mode.py:0 -> TestSessionPermissionMode", + "CALLS tests/test_permission_mode.py:0 -> test_cycle_toggles_back_to_normal", + "CALLS tests/test_permission_mode.py:0 -> test_cycle_toggles_to_yolo", + "CALLS tests/test_permission_mode.py:0 -> test_default_mode_is_normal", + "CALLS tests/test_php_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_php_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_php_containment_oracle.py:0 -> name", + "CALLS tests/test_php_containment_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_containment_edges", + "CALLS tests/test_php_containment_oracle.py:0 -> type", + "CALLS tests/test_php_containment_oracle.py:0 -> value", + "CALLS tests/test_php_functions.py:0 -> NodeType", + "CALLS tests/test_php_functions.py:0 -> add", + "CALLS tests/test_php_functions.py:0 -> ensure_node_batch", + "CALLS tests/test_php_functions.py:0 -> graph_updater", + "CALLS tests/test_php_functions.py:0 -> main", + "CALLS tests/test_php_functions.py:0 -> mock_ingestor", + "CALLS tests/test_php_functions.py:0 -> repo_path", + "CALLS tests/test_php_functions.py:0 -> temp_repo", + "CALLS tests/test_php_functions.py:0 -> test_php_class_discovery", + "CALLS tests/test_php_functions.py:0 -> test_php_function_discovery", + "CALLS tests/test_php_functions.py:0 -> test_php_method_calls", + "CALLS tests/test_php_functions.py:0 -> updater", + "CALLS tests/test_php_functions.py:0 -> value", + "CALLS tests/test_php_imports.py:0 -> factory", + "CALLS tests/test_php_imports.py:0 -> graph_updater", + "CALLS tests/test_php_imports.py:0 -> import_processor", + "CALLS tests/test_php_imports.py:0 -> index", + "CALLS tests/test_php_imports.py:0 -> mock_ingestor", + "CALLS tests/test_php_imports.py:0 -> name", + "CALLS tests/test_php_imports.py:0 -> repo_path", + "CALLS tests/test_php_imports.py:0 -> temp_repo", + "CALLS tests/test_php_imports.py:0 -> test_php_multiple_use_statements", + "CALLS tests/test_php_imports.py:0 -> test_php_use_statement_import", + "CALLS tests/test_php_imports.py:0 -> updater", + "CALLS tests/test_php_inheritance_edges.py:0 -> RelationshipType", + "CALLS tests/test_php_inheritance_edges.py:0 -> extractor", + "CALLS tests/test_php_inheritance_edges.py:0 -> mock_ingestor", + "CALLS tests/test_php_inheritance_edges.py:0 -> name", + "CALLS tests/test_php_inheritance_edges.py:0 -> nodes", + "CALLS tests/test_php_inheritance_edges.py:0 -> parent", + "CALLS tests/test_php_inheritance_edges.py:0 -> read", + "CALLS tests/test_php_inheritance_edges.py:0 -> temp_repo", + "CALLS tests/test_php_inheritance_edges.py:0 -> test_php_inheritance_and_implements_edges", + "CALLS tests/test_php_inheritance_edges.py:0 -> value", + "CALLS tests/test_php_inheritance_oracle.py:0 -> RelationshipType", + "CALLS tests/test_php_inheritance_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_php_inheritance_oracle.py:0 -> name", + "CALLS tests/test_php_inheritance_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_inheritance_edges", + "CALLS tests/test_php_inheritance_oracle.py:0 -> value", + "CALLS tests/test_php_span_oracle.py:0 -> Color", + "CALLS tests/test_php_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_php_span_oracle.py:0 -> name", + "CALLS tests/test_php_span_oracle.py:0 -> start", + "CALLS tests/test_php_span_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_node_spans", + "CALLS tests/test_php_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_php_structure_oracle.py:0 -> name", + "CALLS tests/test_php_structure_oracle.py:0 -> nodes", + "CALLS tests/test_php_structure_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_php_structure", + "CALLS tests/test_processor_factory.py:0 -> CallProcessor", + "CALLS tests/test_processor_factory.py:0 -> DefinitionProcessor", + "CALLS tests/test_processor_factory.py:0 -> ImportProcessor", + "CALLS tests/test_processor_factory.py:0 -> NodeType", + "CALLS tests/test_processor_factory.py:0 -> ProcessorFactory", + "CALLS tests/test_processor_factory.py:0 -> StructureProcessor", + "CALLS tests/test_processor_factory.py:0 -> TestDependencyInjection", + "CALLS tests/test_processor_factory.py:0 -> TestDependencyOrdering", + "CALLS tests/test_processor_factory.py:0 -> TestLazyInitialization", + "CALLS tests/test_processor_factory.py:0 -> TestProcessorTypes", + "CALLS tests/test_processor_factory.py:0 -> TestSharedState", + "CALLS tests/test_processor_factory.py:0 -> TestSingletonBehavior", + "CALLS tests/test_processor_factory.py:0 -> TypeInferenceEngine", + "CALLS tests/test_processor_factory.py:0 -> call_processor", + "CALLS tests/test_processor_factory.py:0 -> definition_processor", + "CALLS tests/test_processor_factory.py:0 -> engine", + "CALLS tests/test_processor_factory.py:0 -> factory", + "CALLS tests/test_processor_factory.py:0 -> graph_updater", + "CALLS tests/test_processor_factory.py:0 -> import_processor", + "CALLS tests/test_processor_factory.py:0 -> mock_ingestor", + "CALLS tests/test_processor_factory.py:0 -> processor", + "CALLS tests/test_processor_factory.py:0 -> repo_path", + "CALLS tests/test_processor_factory.py:0 -> structure_processor", + "CALLS tests/test_processor_factory.py:0 -> temp_repo", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_call_processor_initializes_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_call_processor_initializes_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_call_processor_initializes_type_inference", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_definition_processor_initializes_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_type_inference_initializes_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_type_inference_initializes_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_class_inheritance_from_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_type_inference", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_class_inheritance_is_shared_between_type_inference_and_call_processor", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_simple_name_lookup", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_shares_module_qn_to_file_path", + "CALLS tests/test_processor_factory.py:0 -> test_function_registry_is_shared_across_processors", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_module_qn_to_file_path_is_shared_dict", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_receives_queries", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_ast_cache", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_class_inheritance_from_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_queries", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_simple_name_lookup", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_shares_module_qn_to_file_path", + "CALLS tests/test_processor_factory.py:0 -> type_inference", + "CALLS tests/test_processor_factory.py:0 -> updater", + "CALLS tests/test_project_name_flag.py:0 -> TestDefaultProjectName", + "CALLS tests/test_project_name_flag.py:0 -> TestEdgeCases", + "CALLS tests/test_project_name_flag.py:0 -> TestExplicitProjectName", + "CALLS tests/test_project_name_flag.py:0 -> TestFactoryPropagation", + "CALLS tests/test_project_name_flag.py:0 -> TestQualifiedNameIntegration", + "CALLS tests/test_project_name_flag.py:0 -> call_processor", + "CALLS tests/test_project_name_flag.py:0 -> definition_processor", + "CALLS tests/test_project_name_flag.py:0 -> factory", + "CALLS tests/test_project_name_flag.py:0 -> graph_updater", + "CALLS tests/test_project_name_flag.py:0 -> import_processor", + "CALLS tests/test_project_name_flag.py:0 -> main", + "CALLS tests/test_project_name_flag.py:0 -> mock_ingestor", + "CALLS tests/test_project_name_flag.py:0 -> name", + "CALLS tests/test_project_name_flag.py:0 -> parent", + "CALLS tests/test_project_name_flag.py:0 -> parsers_and_queries", + "CALLS tests/test_project_name_flag.py:0 -> rel_path", + "CALLS tests/test_project_name_flag.py:0 -> repo_path", + "CALLS tests/test_project_name_flag.py:0 -> structure_processor", + "CALLS tests/test_project_name_flag.py:0 -> temp_repo", + "CALLS tests/test_project_name_flag.py:0 -> test_call_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_class_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_default_empty_string_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_default_none_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_default_qualified_names_use_directory", + "CALLS tests/test_project_name_flag.py:0 -> test_default_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_default_whitespace_only_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_definition_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_factory_default_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_factory_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_function_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_generic_dir_name_main", + "CALLS tests/test_project_name_flag.py:0 -> test_generic_dir_name_src", + "CALLS tests/test_project_name_flag.py:0 -> test_import_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_module_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_nested_same_name_parent", + "CALLS tests/test_project_name_flag.py:0 -> test_override_simple", + "CALLS tests/test_project_name_flag.py:0 -> test_override_vs_default_different_names", + "CALLS tests/test_project_name_flag.py:0 -> test_override_with_dots", + "CALLS tests/test_project_name_flag.py:0 -> test_override_with_hyphens", + "CALLS tests/test_project_name_flag.py:0 -> test_package_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_structure_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_type_inference_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_version_named_directory", + "CALLS tests/test_project_name_flag.py:0 -> type_inference", + "CALLS tests/test_project_name_flag.py:0 -> updater", + "CALLS tests/test_project_naming.py:0 -> name", + "CALLS tests/test_project_naming.py:0 -> repo", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_disambiguates_same_basename", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_fallback_for_root", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_includes_basename", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_is_stable", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_slugifies_special_chars", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_dot_falls_back_to_cwd", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_empty_falls_back_to_cwd", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_explicit_wins", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_uses_target_default", + "CALLS tests/test_property_getter_calls.py:0 -> RelationshipType", + "CALLS tests/test_property_getter_calls.py:0 -> TestPropertyGetterCalls", + "CALLS tests/test_property_getter_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_property_getter_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_property_getter_calls.py:0 -> execute_write", + "CALLS tests/test_property_getter_calls.py:0 -> fetch_all", + "CALLS tests/test_property_getter_calls.py:0 -> flush_all", + "CALLS tests/test_property_getter_calls.py:0 -> graph_updater", + "CALLS tests/test_property_getter_calls.py:0 -> repo_path", + "CALLS tests/test_property_getter_calls.py:0 -> status", + "CALLS tests/test_property_getter_calls.py:0 -> test_property_access_only_emits_the_getter_edge", + "CALLS tests/test_property_getter_calls.py:0 -> test_property_access_via_self_is_a_call", + "CALLS tests/test_property_getter_calls.py:0 -> test_property_access_via_typed_param_is_a_call", + "CALLS tests/test_property_getter_calls.py:0 -> test_regular_method_call_is_unaffected", + "CALLS tests/test_property_return_type_chain.py:0 -> RelationshipType", + "CALLS tests/test_property_return_type_chain.py:0 -> TestPropertyReturnTypeChain", + "CALLS tests/test_property_return_type_chain.py:0 -> ensure_node_batch", + "CALLS tests/test_property_return_type_chain.py:0 -> ensure_relationship_batch", + "CALLS tests/test_property_return_type_chain.py:0 -> execute_write", + "CALLS tests/test_property_return_type_chain.py:0 -> fetch_all", + "CALLS tests/test_property_return_type_chain.py:0 -> flush_all", + "CALLS tests/test_property_return_type_chain.py:0 -> graph_updater", + "CALLS tests/test_property_return_type_chain.py:0 -> name", + "CALLS tests/test_property_return_type_chain.py:0 -> repo_path", + "CALLS tests/test_property_return_type_chain.py:0 -> test_chained_call_through_property_resolves_to_return_type_class", + "CALLS tests/test_property_return_type_chain.py:0 -> test_does_not_resolve_to_same_class_method_of_same_name", + "CALLS tests/test_property_return_type_chain.py:0 -> type", + "CALLS tests/test_protobuf_end_to_end.py:0 -> add", + "CALLS tests/test_protobuf_end_to_end.py:0 -> graph_updater", + "CALLS tests/test_protobuf_end_to_end.py:0 -> index", + "CALLS tests/test_protobuf_end_to_end.py:0 -> name", + "CALLS tests/test_protobuf_end_to_end.py:0 -> nodes", + "CALLS tests/test_protobuf_end_to_end.py:0 -> parent", + "CALLS tests/test_protobuf_end_to_end.py:0 -> relationships", + "CALLS tests/test_protobuf_end_to_end.py:0 -> test_comprehensive_pipeline_produces_valid_artifact_joint", + "CALLS tests/test_protobuf_end_to_end.py:0 -> test_comprehensive_pipeline_produces_valid_artifacts_split_index", + "CALLS tests/test_protobuf_end_to_end.py:0 -> updater", + "CALLS tests/test_protobuf_service.py:0 -> NodeLabel", + "CALLS tests/test_protobuf_service.py:0 -> NodeType", + "CALLS tests/test_protobuf_service.py:0 -> RelationshipType", + "CALLS tests/test_protobuf_service.py:0 -> index", + "CALLS tests/test_protobuf_service.py:0 -> is_exported", + "CALLS tests/test_protobuf_service.py:0 -> name", + "CALLS tests/test_protobuf_service.py:0 -> nodes", + "CALLS tests/test_protobuf_service.py:0 -> relationships", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_node_batch_no_message_class_logs_warning", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_node_batch_no_oneof_mapping_logs_warning", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_dedup", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_dedup_with_properties_merge", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_invalid_empty_source", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_invalid_empty_target", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_none_values", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_unknown_rel_type", + "CALLS tests/test_protobuf_service.py:0 -> test_protobuf_ingestor_joint_serialization_and_deserialization", + "CALLS tests/test_protobuf_service.py:0 -> test_protobuf_ingestor_split_index_serialization_and_deserialization", + "CALLS tests/test_protobuf_service.py:0 -> type", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> DefinitionProcessor", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> LanguageHandler", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> PythonHandler", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> RelationshipType", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> TestProtocolDispatchResolution", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> _extract_decorators", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> execute_write", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> extract_decorators", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> fetch_all", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> flush_all", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> get_handler", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> graph_updater", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> handler", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> parent", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> repo_path", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> test_dispatches_to_all_conformers", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> test_dispatches_to_concrete_conformer", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> test_does_not_emit_protocol_stub_edge", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> type", + "CALLS tests/test_protocol_impl_resolution.py:0 -> FunctionRegistryTrie", + "CALLS tests/test_protocol_impl_resolution.py:0 -> FunctionRegistryTrieProtocol", + "CALLS tests/test_protocol_impl_resolution.py:0 -> RelationshipType", + "CALLS tests/test_protocol_impl_resolution.py:0 -> TestProtocolImplResolution", + "CALLS tests/test_protocol_impl_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_protocol_impl_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_protocol_impl_resolution.py:0 -> execute_write", + "CALLS tests/test_protocol_impl_resolution.py:0 -> fetch_all", + "CALLS tests/test_protocol_impl_resolution.py:0 -> flush_all", + "CALLS tests/test_protocol_impl_resolution.py:0 -> get", + "CALLS tests/test_protocol_impl_resolution.py:0 -> graph_updater", + "CALLS tests/test_protocol_impl_resolution.py:0 -> repo_path", + "CALLS tests/test_protocol_impl_resolution.py:0 -> test_does_not_resolve_to_protocol_stub", + "CALLS tests/test_protocol_impl_resolution.py:0 -> test_naming_convention_disambiguates_from_other_conformer", + "CALLS tests/test_protocol_impl_resolution.py:0 -> test_protocol_typed_call_resolves_to_concrete_implementer", + "CALLS tests/test_protocol_impl_resolution.py:0 -> type", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> RelationshipType", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> TestProtocolOperatorDispatch", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> ensure_node_batch", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> ensure_relationship_batch", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> execute_write", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> fetch_all", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> flush_all", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> graph_updater", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> name", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> parent", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> repo_path", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> snapshot", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> test_dunder_only_on_implementer_resolves", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> test_protocol_stub_not_emitted", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> test_subscript_and_membership_reach_structural_conformer", + "CALLS tests/test_provider_classes.py:0 -> CustomProvider", + "CALLS tests/test_provider_classes.py:0 -> GoogleProviderType", + "CALLS tests/test_provider_classes.py:0 -> ModelProvider", + "CALLS tests/test_provider_classes.py:0 -> Provider", + "CALLS tests/test_provider_classes.py:0 -> TestAnthropicProvider", + "CALLS tests/test_provider_classes.py:0 -> TestAzureOpenAIProvider", + "CALLS tests/test_provider_classes.py:0 -> TestGoogleProvider", + "CALLS tests/test_provider_classes.py:0 -> TestLiteLLMProvider", + "CALLS tests/test_provider_classes.py:0 -> TestModelCreation", + "CALLS tests/test_provider_classes.py:0 -> TestOllamaProvider", + "CALLS tests/test_provider_classes.py:0 -> TestOpenAIProvider", + "CALLS tests/test_provider_classes.py:0 -> TestProviderRegistry", + "CALLS tests/test_provider_classes.py:0 -> get", + "CALLS tests/test_provider_classes.py:0 -> mock_settings", + "CALLS tests/test_provider_classes.py:0 -> provider_name", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_api_key_from_env", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_model_enables_prompt_caching", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_azure_api_key_from_env", + "CALLS tests/test_provider_classes.py:0 -> test_azure_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_azure_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_azure_validation_error_no_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_azure_validation_error_no_key", + "CALLS tests/test_provider_classes.py:0 -> test_get_invalid_provider", + "CALLS tests/test_provider_classes.py:0 -> test_get_litellm_provider", + "CALLS tests/test_provider_classes.py:0 -> test_get_valid_providers", + "CALLS tests/test_provider_classes.py:0 -> test_google_gla_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_google_gla_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_google_model_creation_with_thinking_budget", + "CALLS tests/test_provider_classes.py:0 -> test_google_model_creation_without_thinking_budget", + "CALLS tests/test_provider_classes.py:0 -> test_google_thinking_budget", + "CALLS tests/test_provider_classes.py:0 -> test_google_vertex_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_google_vertex_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_list_providers", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_default_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_no_endpoint_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_connection_error", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_fallback_to_models_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_server_not_running", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_success", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_custom_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_validation_connection_error", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_validation_server_not_running", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_validation_success", + "CALLS tests/test_provider_classes.py:0 -> test_openai_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_openai_custom_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_openai_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_openai_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_register_custom_provider", + "CALLS tests/test_provider_configuration.py:0 -> TestProviderConfiguration", + "CALLS tests/test_provider_configuration.py:0 -> active_cypher_config", + "CALLS tests/test_provider_configuration.py:0 -> active_orchestrator_config", + "CALLS tests/test_provider_configuration.py:0 -> clear", + "CALLS tests/test_provider_configuration.py:0 -> test_bare_model_name_parsing", + "CALLS tests/test_provider_configuration.py:0 -> test_batch_size_validation", + "CALLS tests/test_provider_configuration.py:0 -> test_custom_model_names_with_colons", + "CALLS tests/test_provider_configuration.py:0 -> test_default_fallback_behavior", + "CALLS tests/test_provider_configuration.py:0 -> test_explicit_provider_configuration_from_env", + "CALLS tests/test_provider_configuration.py:0 -> test_google_vertex_ai_configuration", + "CALLS tests/test_provider_configuration.py:0 -> test_mixed_provider_configuration", + "CALLS tests/test_provider_configuration.py:0 -> test_ollama_env_vars_respected_not_ignored", + "CALLS tests/test_provider_configuration.py:0 -> test_openai_custom_endpoint", + "CALLS tests/test_provider_configuration.py:0 -> test_runtime_provider_override", + "CALLS tests/test_provider_configuration.py:0 -> test_thinking_budget_configuration", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> ImportProcessor", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> SupportedLanguage", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestComplexScenariosWithRealParsing", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestEdgeCasesWithRealParsing", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestForLoopAnalysisWithRealParsing", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestListComprehensionAnalysisWithRealParsing", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestParameterAnalysisWithRealParsing", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestSelfAssignmentAnalysisWithRealParsing", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> TestTraverseSinglePassWithQueries", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> add", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> children", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> decorator", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> engine", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> engine_with_queries", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> find_with_prefix", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> import_processor", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> items", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> mock_ast_cache", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> mock_function_registry", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> name", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> process", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> processor", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> python_parser", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> repo", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> repo_path", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_async_function_parameters", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_classmethod_with_cls_parameter", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_decorated_function", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_empty_function_body", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_for_loop_with_list_literal", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_for_loop_with_range", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_for_loop_with_tuple_unpacking", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_args_kwargs", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_complex_type_hints", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_default_values", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_only_docstring", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_typed_parameters", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_walrus_operator", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_generator_function", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_init_with_self_assignments", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_lambda_in_function", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_list_comprehension_variable", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_method_accessing_instance_vars", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_method_with_all_variable_types", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_method_with_self_parameter", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_nested_for_loops", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_nested_list_comprehension", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_traverse_with_query_path", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_traverse_with_query_path_caches_return_stmts", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> text", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> type", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> type_inference", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> value", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> ImportProcessor", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> NodeType", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestAnalyzeForClause", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestCalculateMatchScore", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestCollectAvailableClasses", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestExtractVariableName", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestFindBestClassMatch", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestInferListElementType", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestInferMethodReturnElementType", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestInferVariableElementType", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestProcessParameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> TestProcessTypedParameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> children", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> engine", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> find_with_prefix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> import_processor", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> items", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> mock_ast_cache", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> mock_function_registry", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> mock_import_processor", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> processor", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> repo", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> repo_path", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_collects_classes_from_registry", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_collects_imported_classes", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_contains_match_returns_scaled_score", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_exact_match_case_insensitive", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_exact_match_returns_100", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_class_from_call_in_list", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_identifier", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_loop_variable_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_typed_parameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_falls_back_to_method_return", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_finds_exact_match", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_finds_suffix_match", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_matches_all_prefix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_matches_plural_suffix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_no_match_returns_zero", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_prefers_exact_over_suffix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_empty_for_no_classes", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_known_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_list", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_text", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_no_match", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_non_collection_name", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_non_identifier", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_routes_identifier_to_untyped", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_routes_typed_default_parameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_routes_typed_parameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_list_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_lowercase_function", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_missing_left_node", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_missing_name", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_missing_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_suffix_match_class_ends_with_param", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_suffix_match_param_ends_with_class", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> text", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> type_inference", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> NodeType", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> TestCallResolutionFallback", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> call_processor", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> factory", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> graph_updater", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> main", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> mock_ingestor", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> mock_updater", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> repo", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> repo_path", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_chooses_closest_candidate", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_with_mixed_function_types", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_with_no_candidates", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_with_single_candidate", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_same_module_resolution_bypasses_fallback", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> updater", + "CALLS tests/test_python_class_method_relationship.py:0 -> GraphUpdater", + "CALLS tests/test_python_class_method_relationship.py:0 -> class_qn", + "CALLS tests/test_python_class_method_relationship.py:0 -> main", + "CALLS tests/test_python_class_method_relationship.py:0 -> method_qn", + "CALLS tests/test_python_class_method_relationship.py:0 -> mock_ingestor", + "CALLS tests/test_python_class_method_relationship.py:0 -> my_method", + "CALLS tests/test_python_class_method_relationship.py:0 -> name", + "CALLS tests/test_python_class_method_relationship.py:0 -> relationships", + "CALLS tests/test_python_class_method_relationship.py:0 -> temp_project", + "CALLS tests/test_python_class_method_relationship.py:0 -> temp_repo", + "CALLS tests/test_python_class_method_relationship.py:0 -> test_defines_method_relationship_is_created", + "CALLS tests/test_python_class_method_relationship.py:0 -> up", + "CALLS tests/test_python_context_managers.py:0 -> context_manager_project", + "CALLS tests/test_python_context_managers.py:0 -> decorator", + "CALLS tests/test_python_context_managers.py:0 -> ensure_relationship_batch", + "CALLS tests/test_python_context_managers.py:0 -> execute", + "CALLS tests/test_python_context_managers.py:0 -> function_qn", + "CALLS tests/test_python_context_managers.py:0 -> graph_updater", + "CALLS tests/test_python_context_managers.py:0 -> mock_ingestor", + "CALLS tests/test_python_context_managers.py:0 -> name", + "CALLS tests/test_python_context_managers.py:0 -> process", + "CALLS tests/test_python_context_managers.py:0 -> process_file", + "CALLS tests/test_python_context_managers.py:0 -> processor", + "CALLS tests/test_python_context_managers.py:0 -> read", + "CALLS tests/test_python_context_managers.py:0 -> repo_path", + "CALLS tests/test_python_context_managers.py:0 -> save", + "CALLS tests/test_python_context_managers.py:0 -> temp_repo", + "CALLS tests/test_python_context_managers.py:0 -> test_async_context_manager_parsing", + "CALLS tests/test_python_context_managers.py:0 -> test_context_manager_function_calls", + "CALLS tests/test_python_context_managers.py:0 -> test_context_manager_function_definitions", + "CALLS tests/test_python_context_managers.py:0 -> test_context_manager_in_control_structures", + "CALLS tests/test_python_context_managers.py:0 -> test_custom_context_manager_class", + "CALLS tests/test_python_context_managers.py:0 -> test_decorated_context_manager_function", + "CALLS tests/test_python_context_managers.py:0 -> up", + "CALLS tests/test_python_context_managers.py:0 -> updater", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> clear", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> deep_hierarchy_project", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> get", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> handler", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> load", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> main", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> mock_ingestor", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> name", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> parent", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> processor", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> run", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> save", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> singleton_project", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> start", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> temp_repo", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> test_chained_cross_file_calls", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> test_deep_package_hierarchy_cross_file_calls", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> test_singleton_pattern_cross_file_calls", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> up", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> value", + "CALLS tests/test_python_decorators.py:0 -> class_qn", + "CALLS tests/test_python_decorators.py:0 -> decorator", + "CALLS tests/test_python_decorators.py:0 -> decorator_project", + "CALLS tests/test_python_decorators.py:0 -> graph_updater", + "CALLS tests/test_python_decorators.py:0 -> method_calls", + "CALLS tests/test_python_decorators.py:0 -> method_qn", + "CALLS tests/test_python_decorators.py:0 -> mock_ingestor", + "CALLS tests/test_python_decorators.py:0 -> name", + "CALLS tests/test_python_decorators.py:0 -> repo_path", + "CALLS tests/test_python_decorators.py:0 -> test_class_decorators", + "CALLS tests/test_python_decorators.py:0 -> test_decorator_with_complex_arguments", + "CALLS tests/test_python_decorators.py:0 -> test_empty_decorators_for_undecorated_functions", + "CALLS tests/test_python_decorators.py:0 -> test_method_decorators", + "CALLS tests/test_python_decorators.py:0 -> test_nested_function_decorators", + "CALLS tests/test_python_decorators.py:0 -> test_simple_function_decorators", + "CALLS tests/test_python_decorators.py:0 -> timing_decorator", + "CALLS tests/test_python_decorators.py:0 -> updater", + "CALLS tests/test_python_decorators.py:0 -> value", + "CALLS tests/test_python_decorators.py:0 -> wrapper", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> GraphUpdater", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> NodeType", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> class_method_project", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> main", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> method_calls", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> mock_ingestor", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> name", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> process", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> temp_repo", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> test_cross_file_object_method_chaining", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> test_imported_class_method_calls_are_detected", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> up", + "CALLS tests/test_python_imports.py:0 -> load", + "CALLS tests/test_python_imports.py:0 -> mock_ingestor", + "CALLS tests/test_python_imports.py:0 -> name", + "CALLS tests/test_python_imports.py:0 -> parent", + "CALLS tests/test_python_imports.py:0 -> parse", + "CALLS tests/test_python_imports.py:0 -> python_imports_project", + "CALLS tests/test_python_imports.py:0 -> relationships", + "CALLS tests/test_python_imports.py:0 -> render", + "CALLS tests/test_python_imports.py:0 -> temp_repo", + "CALLS tests/test_python_imports.py:0 -> test_complex_import_patterns", + "CALLS tests/test_python_imports.py:0 -> test_import_aliases_and_renaming", + "CALLS tests/test_python_imports.py:0 -> test_import_error_handling", + "CALLS tests/test_python_imports.py:0 -> test_import_relationships_comprehensive", + "CALLS tests/test_python_imports.py:0 -> test_relative_imports", + "CALLS tests/test_python_imports.py:0 -> test_standard_library_imports", + "CALLS tests/test_python_imports.py:0 -> test_third_party_framework_imports", + "CALLS tests/test_python_imports.py:0 -> text", + "CALLS tests/test_python_inheritance.py:0 -> engine", + "CALLS tests/test_python_inheritance.py:0 -> ensure_relationship_batch", + "CALLS tests/test_python_inheritance.py:0 -> graph_updater", + "CALLS tests/test_python_inheritance.py:0 -> inheritance_project", + "CALLS tests/test_python_inheritance.py:0 -> mock_ingestor", + "CALLS tests/test_python_inheritance.py:0 -> mro_diamond_project", + "CALLS tests/test_python_inheritance.py:0 -> name", + "CALLS tests/test_python_inheritance.py:0 -> parent", + "CALLS tests/test_python_inheritance.py:0 -> relationships", + "CALLS tests/test_python_inheritance.py:0 -> repo_path", + "CALLS tests/test_python_inheritance.py:0 -> test_asymmetric_diamond_mro", + "CALLS tests/test_python_inheritance.py:0 -> test_complex_multiple_inheritance_mro", + "CALLS tests/test_python_inheritance.py:0 -> test_deep_diamond_chain_mro", + "CALLS tests/test_python_inheritance.py:0 -> test_deep_inheritance_chain", + "CALLS tests/test_python_inheritance.py:0 -> test_diamond_inheritance_mro_basic", + "CALLS tests/test_python_inheritance.py:0 -> test_diamond_inheritance_mro_override_at_point", + "CALLS tests/test_python_inheritance.py:0 -> test_inheritance_relationships_are_created", + "CALLS tests/test_python_inheritance.py:0 -> test_inherited_method_calls_are_resolved", + "CALLS tests/test_python_inheritance.py:0 -> test_method_overrides_are_detected", + "CALLS tests/test_python_inheritance.py:0 -> test_mro_nearest_override_selection", + "CALLS tests/test_python_inheritance.py:0 -> test_multiple_inheritance_is_handled", + "CALLS tests/test_python_inheritance.py:0 -> test_super_calls_are_tracked", + "CALLS tests/test_python_inheritance.py:0 -> updater", + "CALLS tests/test_python_nested_functions.py:0 -> decorator", + "CALLS tests/test_python_nested_functions.py:0 -> ensure_relationship_batch", + "CALLS tests/test_python_nested_functions.py:0 -> graph_updater", + "CALLS tests/test_python_nested_functions.py:0 -> main", + "CALLS tests/test_python_nested_functions.py:0 -> mock_ingestor", + "CALLS tests/test_python_nested_functions.py:0 -> name", + "CALLS tests/test_python_nested_functions.py:0 -> nested_functions_project", + "CALLS tests/test_python_nested_functions.py:0 -> parent", + "CALLS tests/test_python_nested_functions.py:0 -> relationships", + "CALLS tests/test_python_nested_functions.py:0 -> repo_path", + "CALLS tests/test_python_nested_functions.py:0 -> temp_repo", + "CALLS tests/test_python_nested_functions.py:0 -> test_function_calls_are_tracked", + "CALLS tests/test_python_nested_functions.py:0 -> test_function_in_class_method", + "CALLS tests/test_python_nested_functions.py:0 -> test_nested_function_definitions_are_created", + "CALLS tests/test_python_nested_functions.py:0 -> test_nested_function_in_staticmethod_not_ingested_as_method", + "CALLS tests/test_python_nested_functions.py:0 -> test_nested_function_parent_child_relationships", + "CALLS tests/test_python_nested_functions.py:0 -> up", + "CALLS tests/test_python_nested_functions.py:0 -> updater", + "CALLS tests/test_python_nested_functions.py:0 -> wrapper", + "CALLS tests/test_python_real_world.py:0 -> add", + "CALLS tests/test_python_real_world.py:0 -> ensure_relationship_batch", + "CALLS tests/test_python_real_world.py:0 -> execute", + "CALLS tests/test_python_real_world.py:0 -> export", + "CALLS tests/test_python_real_world.py:0 -> get", + "CALLS tests/test_python_real_world.py:0 -> index", + "CALLS tests/test_python_real_world.py:0 -> items", + "CALLS tests/test_python_real_world.py:0 -> main", + "CALLS tests/test_python_real_world.py:0 -> metadata", + "CALLS tests/test_python_real_world.py:0 -> mock_ingestor", + "CALLS tests/test_python_real_world.py:0 -> name", + "CALLS tests/test_python_real_world.py:0 -> nodes", + "CALLS tests/test_python_real_world.py:0 -> put", + "CALLS tests/test_python_real_world.py:0 -> relationships", + "CALLS tests/test_python_real_world.py:0 -> render", + "CALLS tests/test_python_real_world.py:0 -> status", + "CALLS tests/test_python_real_world.py:0 -> temp_repo", + "CALLS tests/test_python_real_world.py:0 -> test_api_service_calls", + "CALLS tests/test_python_real_world.py:0 -> test_cross_language_api_structure", + "CALLS tests/test_python_real_world.py:0 -> test_flask_controller_imports", + "CALLS tests/test_python_real_world.py:0 -> test_flask_no_calls_to_class_nodes", + "CALLS tests/test_python_real_world.py:0 -> test_flask_route_controller_calls", + "CALLS tests/test_python_real_world.py:0 -> test_schema_inheritance_detection", + "CALLS tests/test_python_real_world.py:0 -> test_typescript_hook_usage", + "CALLS tests/test_python_real_world.py:0 -> test_typescript_structure_detection", + "CALLS tests/test_python_real_world.py:0 -> text", + "CALLS tests/test_python_real_world.py:0 -> todo_app_project", + "CALLS tests/test_python_real_world.py:0 -> type", + "CALLS tests/test_python_real_world.py:0 -> up", + "CALLS tests/test_python_relative_import_resolution.py:0 -> TestRelativeImportResolution", + "CALLS tests/test_python_relative_import_resolution.py:0 -> children", + "CALLS tests/test_python_relative_import_resolution.py:0 -> factory", + "CALLS tests/test_python_relative_import_resolution.py:0 -> graph_updater", + "CALLS tests/test_python_relative_import_resolution.py:0 -> import_processor", + "CALLS tests/test_python_relative_import_resolution.py:0 -> mock_ingestor", + "CALLS tests/test_python_relative_import_resolution.py:0 -> mock_updater", + "CALLS tests/test_python_relative_import_resolution.py:0 -> module_qn", + "CALLS tests/test_python_relative_import_resolution.py:0 -> name", + "CALLS tests/test_python_relative_import_resolution.py:0 -> parent", + "CALLS tests/test_python_relative_import_resolution.py:0 -> repo", + "CALLS tests/test_python_relative_import_resolution.py:0 -> repo_path", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_double_dot_relative_import", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_complex_module_path", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_edge_case_shallow_module", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_to_package_root", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_without_module_name", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_single_dot_relative_import", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_triple_dot_relative_import", + "CALLS tests/test_python_relative_import_resolution.py:0 -> text", + "CALLS tests/test_python_relative_import_resolution.py:0 -> type", + "CALLS tests/test_python_relative_import_resolution.py:0 -> up", + "CALLS tests/test_python_relative_import_resolution.py:0 -> updater", + "CALLS tests/test_python_return_type_inference.py:0 -> NodeType", + "CALLS tests/test_python_return_type_inference.py:0 -> ensure_relationship_batch", + "CALLS tests/test_python_return_type_inference.py:0 -> factory", + "CALLS tests/test_python_return_type_inference.py:0 -> graph_updater", + "CALLS tests/test_python_return_type_inference.py:0 -> method_calls", + "CALLS tests/test_python_return_type_inference.py:0 -> mock_ingestor", + "CALLS tests/test_python_return_type_inference.py:0 -> name", + "CALLS tests/test_python_return_type_inference.py:0 -> processor", + "CALLS tests/test_python_return_type_inference.py:0 -> repo_path", + "CALLS tests/test_python_return_type_inference.py:0 -> return_type_project", + "CALLS tests/test_python_return_type_inference.py:0 -> status", + "CALLS tests/test_python_return_type_inference.py:0 -> test_basic_return_type_inference", + "CALLS tests/test_python_return_type_inference.py:0 -> test_fluent_interface_return_types", + "CALLS tests/test_python_return_type_inference.py:0 -> test_loop_variable_return_types", + "CALLS tests/test_python_return_type_inference.py:0 -> test_nested_return_type_inference", + "CALLS tests/test_python_return_type_inference.py:0 -> test_service_method_return_types", + "CALLS tests/test_python_return_type_inference.py:0 -> type", + "CALLS tests/test_python_return_type_inference.py:0 -> updater", + "CALLS tests/test_python_span_oracle.py:0 -> name", + "CALLS tests/test_python_span_oracle.py:0 -> start", + "CALLS tests/test_python_span_oracle.py:0 -> test_cgr_matches_ast_oracle_on_python_node_spans", + "CALLS tests/test_python_span_oracle.py:0 -> value", + "CALLS tests/test_python_standard_library_imports.py:0 -> TestStandardLibraryImports", + "CALLS tests/test_python_standard_library_imports.py:0 -> child_by_field_name", + "CALLS tests/test_python_standard_library_imports.py:0 -> factory", + "CALLS tests/test_python_standard_library_imports.py:0 -> graph_updater", + "CALLS tests/test_python_standard_library_imports.py:0 -> import_processor", + "CALLS tests/test_python_standard_library_imports.py:0 -> main", + "CALLS tests/test_python_standard_library_imports.py:0 -> mock_ingestor", + "CALLS tests/test_python_standard_library_imports.py:0 -> mock_updater", + "CALLS tests/test_python_standard_library_imports.py:0 -> module_qn", + "CALLS tests/test_python_standard_library_imports.py:0 -> name", + "CALLS tests/test_python_standard_library_imports.py:0 -> repo_path", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_aliased_import_local_module", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_aliased_import_standard_library", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_already_prefixed_imports_unchanged", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_local_file_imports_are_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_local_module_imports_are_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_nested_local_module_imports", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_regular_import_dotted_local_module", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_regular_import_local_module", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_regular_import_standard_library", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_standard_library_imports_not_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_third_party_imports_not_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> text", + "CALLS tests/test_python_standard_library_imports.py:0 -> type", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> NodeType", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> TestWildcardImportResolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> call_processor", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> factory", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> graph_updater", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> import_processor", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> mock_ingestor", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> mock_updater", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> module_qn", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> repo", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> repo_path", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_cpp_using_namespace_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_exact_import_priority_over_wildcard", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_fallback_still_works_after_wildcard_check", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_go_no_wildcard_imports", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_java_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_javascript_namespace_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_multiple_wildcard_imports", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_python_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_rust_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_scala_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_wildcard_with_no_matching_function", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> updater", + "CALLS tests/test_query_truncation.py:0 -> TestQueryTruncation", + "CALLS tests/test_query_truncation.py:0 -> fetch_all", + "CALLS tests/test_query_truncation.py:0 -> generate", + "CALLS tests/test_query_truncation.py:0 -> mock_cypher_gen", + "CALLS tests/test_query_truncation.py:0 -> mock_ingestor", + "CALLS tests/test_query_truncation.py:0 -> mock_settings", + "CALLS tests/test_query_truncation.py:0 -> name", + "CALLS tests/test_query_truncation.py:0 -> nodes", + "CALLS tests/test_query_truncation.py:0 -> summary", + "CALLS tests/test_query_truncation.py:0 -> test_no_truncation_when_within_limits", + "CALLS tests/test_query_truncation.py:0 -> test_row_cap_truncation", + "CALLS tests/test_query_truncation.py:0 -> test_token_truncation", + "CALLS tests/test_realtime_debounce.py:0 -> QueryProtocol", + "CALLS tests/test_realtime_debounce.py:0 -> TestCodeChangeEventHandlerDebounce", + "CALLS tests/test_realtime_debounce.py:0 -> TestDebounceIntegration", + "CALLS tests/test_realtime_debounce.py:0 -> TestDebounceValidation", + "CALLS tests/test_realtime_debounce.py:0 -> _patch_ignore", + "CALLS tests/test_realtime_debounce.py:0 -> _process_function_calls", + "CALLS tests/test_realtime_debounce.py:0 -> definition_processor", + "CALLS tests/test_realtime_debounce.py:0 -> ensure_node_batch", + "CALLS tests/test_realtime_debounce.py:0 -> ensure_relationship_batch", + "CALLS tests/test_realtime_debounce.py:0 -> execute_write", + "CALLS tests/test_realtime_debounce.py:0 -> factory", + "CALLS tests/test_realtime_debounce.py:0 -> fetch_all", + "CALLS tests/test_realtime_debounce.py:0 -> flush_all", + "CALLS tests/test_realtime_debounce.py:0 -> handler", + "CALLS tests/test_realtime_debounce.py:0 -> main", + "CALLS tests/test_realtime_debounce.py:0 -> mock_ingestor", + "CALLS tests/test_realtime_debounce.py:0 -> mock_updater", + "CALLS tests/test_realtime_debounce.py:0 -> name", + "CALLS tests/test_realtime_debounce.py:0 -> process", + "CALLS tests/test_realtime_debounce.py:0 -> process_file", + "CALLS tests/test_realtime_debounce.py:0 -> remove_file_from_state", + "CALLS tests/test_realtime_debounce.py:0 -> repo_path", + "CALLS tests/test_realtime_debounce.py:0 -> sample_file", + "CALLS tests/test_realtime_debounce.py:0 -> save", + "CALLS tests/test_realtime_debounce.py:0 -> send_events", + "CALLS tests/test_realtime_debounce.py:0 -> test_created_event_triggers_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_debounce_batches_rapid_events", + "CALLS tests/test_realtime_debounce.py:0 -> test_deleted_event_triggers_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_different_files_tracked_separately", + "CALLS tests/test_realtime_debounce.py:0 -> test_dispatch_ignores_directories", + "CALLS tests/test_realtime_debounce.py:0 -> test_handler_initialization_with_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_handler_initialization_without_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_handler_uses_default_constants", + "CALLS tests/test_realtime_debounce.py:0 -> test_is_relevant_filters_ignored_patterns", + "CALLS tests/test_realtime_debounce.py:0 -> test_max_wait_forces_update", + "CALLS tests/test_realtime_debounce.py:0 -> test_no_debounce_processes_immediately", + "CALLS tests/test_realtime_debounce.py:0 -> test_realistic_rapid_save_scenario", + "CALLS tests/test_realtime_debounce.py:0 -> test_single_edit_after_quiet_period", + "CALLS tests/test_realtime_debounce.py:0 -> test_thread_safety_concurrent_events", + "CALLS tests/test_realtime_debounce.py:0 -> test_timer_cleanup_after_processing", + "CALLS tests/test_realtime_debounce.py:0 -> test_validate_non_negative_float_accepts_positive", + "CALLS tests/test_realtime_debounce.py:0 -> test_validate_non_negative_float_accepts_zero", + "CALLS tests/test_realtime_debounce.py:0 -> test_validate_non_negative_float_rejects_negative", + "CALLS tests/test_realtime_debounce.py:0 -> type", + "CALLS tests/test_realtime_debounce.py:0 -> up", + "CALLS tests/test_realtime_debounce.py:0 -> updater", + "CALLS tests/test_realtime_event_filtering.py:0 -> QueryProtocol", + "CALLS tests/test_realtime_event_filtering.py:0 -> TestCypherDeleteFileQuery", + "CALLS tests/test_realtime_event_filtering.py:0 -> TestEventFiltering", + "CALLS tests/test_realtime_event_filtering.py:0 -> TestMixedEventSequences", + "CALLS tests/test_realtime_event_filtering.py:0 -> TestNonCodeFileHandling", + "CALLS tests/test_realtime_event_filtering.py:0 -> _AnyProtocol", + "CALLS tests/test_realtime_event_filtering.py:0 -> _bypass_protocol_check", + "CALLS tests/test_realtime_event_filtering.py:0 -> definition_processor", + "CALLS tests/test_realtime_event_filtering.py:0 -> execute_write", + "CALLS tests/test_realtime_event_filtering.py:0 -> factory", + "CALLS tests/test_realtime_event_filtering.py:0 -> flush_all", + "CALLS tests/test_realtime_event_filtering.py:0 -> handler", + "CALLS tests/test_realtime_event_filtering.py:0 -> mock_updater", + "CALLS tests/test_realtime_event_filtering.py:0 -> parse", + "CALLS tests/test_realtime_event_filtering.py:0 -> process_file", + "CALLS tests/test_realtime_event_filtering.py:0 -> process_generic_file", + "CALLS tests/test_realtime_event_filtering.py:0 -> structure_processor", + "CALLS tests/test_realtime_event_filtering.py:0 -> temp_repo", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_access_event_is_ignored", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_closed_no_write_event_is_ignored", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_created_event_is_processed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_delete_file_only_targets_specific_path", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_deleted_event_is_processed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_json_file_creates_file_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_markdown_file_creates_file_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_modified_event_is_processed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_multiple_files_changed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_non_code_file_deletion_removes_file_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_non_code_file_has_no_module_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_opened_event_is_ignored", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_rapid_create_modify_delete", + "CALLS tests/test_realtime_event_filtering.py:0 -> text", + "CALLS tests/test_realtime_updater.py:0 -> QueryProtocol", + "CALLS tests/test_realtime_updater.py:0 -> _AnyProtocol", + "CALLS tests/test_realtime_updater.py:0 -> _bypass_protocol_check", + "CALLS tests/test_realtime_updater.py:0 -> definition_processor", + "CALLS tests/test_realtime_updater.py:0 -> event_handler", + "CALLS tests/test_realtime_updater.py:0 -> execute_write", + "CALLS tests/test_realtime_updater.py:0 -> factory", + "CALLS tests/test_realtime_updater.py:0 -> flush_all", + "CALLS tests/test_realtime_updater.py:0 -> handler", + "CALLS tests/test_realtime_updater.py:0 -> mock_updater", + "CALLS tests/test_realtime_updater.py:0 -> nodes", + "CALLS tests/test_realtime_updater.py:0 -> process_file", + "CALLS tests/test_realtime_updater.py:0 -> process_generic_file", + "CALLS tests/test_realtime_updater.py:0 -> structure_processor", + "CALLS tests/test_realtime_updater.py:0 -> temp_repo", + "CALLS tests/test_realtime_updater.py:0 -> test_directory_creation_is_ignored", + "CALLS tests/test_realtime_updater.py:0 -> test_file_creation_flow", + "CALLS tests/test_realtime_updater.py:0 -> test_file_deletion_flow", + "CALLS tests/test_realtime_updater.py:0 -> test_file_modification_flow", + "CALLS tests/test_realtime_updater.py:0 -> test_irrelevant_files_are_ignored", + "CALLS tests/test_realtime_updater.py:0 -> test_non_code_files_create_file_nodes", + "CALLS tests/test_reconcile_embeddings.py:0 -> MemgraphIngestor", + "CALLS tests/test_reconcile_embeddings.py:0 -> TestReconcileEmbeddings", + "CALLS tests/test_reconcile_embeddings.py:0 -> fetch_all", + "CALLS tests/test_reconcile_embeddings.py:0 -> graph_service", + "CALLS tests/test_reconcile_embeddings.py:0 -> graph_updater", + "CALLS tests/test_reconcile_embeddings.py:0 -> log_messages", + "CALLS tests/test_reconcile_embeddings.py:0 -> repo_path", + "CALLS tests/test_reconcile_embeddings.py:0 -> temp_repo", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_handles_verify_fn_exception", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_logs_ok_when_all_found", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_logs_warning_when_ids_missing", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_noop_when_expected_empty", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_sample_ids_in_warning", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_sample_limited_to_ten", + "CALLS tests/test_reconcile_embeddings.py:0 -> updater", + "CALLS tests/test_reexport_chain_resolution.py:0 -> PythonTypeInferenceEngine", + "CALLS tests/test_reexport_chain_resolution.py:0 -> RelationshipType", + "CALLS tests/test_reexport_chain_resolution.py:0 -> TestReexportChainResolution", + "CALLS tests/test_reexport_chain_resolution.py:0 -> TypeInferenceEngine", + "CALLS tests/test_reexport_chain_resolution.py:0 -> build_local_variable_type_map", + "CALLS tests/test_reexport_chain_resolution.py:0 -> engine", + "CALLS tests/test_reexport_chain_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_reexport_chain_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_reexport_chain_resolution.py:0 -> execute_write", + "CALLS tests/test_reexport_chain_resolution.py:0 -> export", + "CALLS tests/test_reexport_chain_resolution.py:0 -> fetch_all", + "CALLS tests/test_reexport_chain_resolution.py:0 -> flush_all", + "CALLS tests/test_reexport_chain_resolution.py:0 -> graph_updater", + "CALLS tests/test_reexport_chain_resolution.py:0 -> name", + "CALLS tests/test_reexport_chain_resolution.py:0 -> parent", + "CALLS tests/test_reexport_chain_resolution.py:0 -> python_type_inference", + "CALLS tests/test_reexport_chain_resolution.py:0 -> repo_path", + "CALLS tests/test_reexport_chain_resolution.py:0 -> test_does_not_collapse_to_caller_same_named_method", + "CALLS tests/test_reexport_chain_resolution.py:0 -> test_property_typed_by_reexport_resolves_to_real_class", + "CALLS tests/test_relative_import_package_init.py:0 -> RelationshipType", + "CALLS tests/test_relative_import_package_init.py:0 -> TestRelativeImportPackageInit", + "CALLS tests/test_relative_import_package_init.py:0 -> ensure_node_batch", + "CALLS tests/test_relative_import_package_init.py:0 -> ensure_relationship_batch", + "CALLS tests/test_relative_import_package_init.py:0 -> execute_write", + "CALLS tests/test_relative_import_package_init.py:0 -> fetch_all", + "CALLS tests/test_relative_import_package_init.py:0 -> flush_all", + "CALLS tests/test_relative_import_package_init.py:0 -> graph_updater", + "CALLS tests/test_relative_import_package_init.py:0 -> name", + "CALLS tests/test_relative_import_package_init.py:0 -> parent", + "CALLS tests/test_relative_import_package_init.py:0 -> repo_path", + "CALLS tests/test_relative_import_package_init.py:0 -> test_from_dot_import_in_package_init_targets_own_submodule", + "CALLS tests/test_relative_import_root_level.py:0 -> RelationshipType", + "CALLS tests/test_relative_import_root_level.py:0 -> TestRelativeImportRootLevel", + "CALLS tests/test_relative_import_root_level.py:0 -> cli", + "CALLS tests/test_relative_import_root_level.py:0 -> ensure_node_batch", + "CALLS tests/test_relative_import_root_level.py:0 -> ensure_relationship_batch", + "CALLS tests/test_relative_import_root_level.py:0 -> execute_write", + "CALLS tests/test_relative_import_root_level.py:0 -> fetch_all", + "CALLS tests/test_relative_import_root_level.py:0 -> flush_all", + "CALLS tests/test_relative_import_root_level.py:0 -> graph_updater", + "CALLS tests/test_relative_import_root_level.py:0 -> name", + "CALLS tests/test_relative_import_root_level.py:0 -> repo_path", + "CALLS tests/test_relative_import_root_level.py:0 -> test_from_dot_import_submodule_at_root", + "CALLS tests/test_retrieval_eval.py:0 -> NodeLabel", + "CALLS tests/test_retrieval_eval.py:0 -> RelationshipType", + "CALLS tests/test_retrieval_eval.py:0 -> name", + "CALLS tests/test_retrieval_eval.py:0 -> repo", + "CALLS tests/test_retrieval_eval.py:0 -> run", + "CALLS tests/test_retrieval_eval.py:0 -> test_cgr_call_edges_smoke", + "CALLS tests/test_retrieval_eval.py:0 -> test_grep_call_excludes_bare_reference_but_flags_def_site", + "CALLS tests/test_retrieval_eval.py:0 -> test_grep_name_overincludes_vs_oracle", + "CALLS tests/test_retrieval_eval.py:0 -> test_grep_preserves_colon_in_path", + "CALLS tests/test_retrieval_eval.py:0 -> test_oracle_captures_first_party_calls", + "CALLS tests/test_retrieval_eval.py:0 -> test_score_retrieval_computes_prf", + "CALLS tests/test_retrieval_eval.py:0 -> value", + "CALLS tests/test_rust.py:0 -> Color", + "CALLS tests/test_rust.py:0 -> add", + "CALLS tests/test_rust.py:0 -> dead_code", + "CALLS tests/test_rust.py:0 -> export", + "CALLS tests/test_rust.py:0 -> get", + "CALLS tests/test_rust.py:0 -> index", + "CALLS tests/test_rust.py:0 -> insert", + "CALLS tests/test_rust.py:0 -> items", + "CALLS tests/test_rust.py:0 -> keys", + "CALLS tests/test_rust.py:0 -> main", + "CALLS tests/test_rust.py:0 -> metadata", + "CALLS tests/test_rust.py:0 -> mock_ingestor", + "CALLS tests/test_rust.py:0 -> name", + "CALLS tests/test_rust.py:0 -> parse", + "CALLS tests/test_rust.py:0 -> process", + "CALLS tests/test_rust.py:0 -> processor", + "CALLS tests/test_rust.py:0 -> read", + "CALLS tests/test_rust.py:0 -> read_file_content", + "CALLS tests/test_rust.py:0 -> relationships", + "CALLS tests/test_rust.py:0 -> repo", + "CALLS tests/test_rust.py:0 -> rust_project", + "CALLS tests/test_rust.py:0 -> save", + "CALLS tests/test_rust.py:0 -> temp_repo", + "CALLS tests/test_rust.py:0 -> test_basic_rust_functions", + "CALLS tests/test_rust.py:0 -> test_rust_advanced_edge_cases", + "CALLS tests/test_rust.py:0 -> test_rust_closures_and_lambdas", + "CALLS tests/test_rust.py:0 -> test_rust_comprehensive_integration", + "CALLS tests/test_rust.py:0 -> test_rust_error_handling", + "CALLS tests/test_rust.py:0 -> test_rust_generics_and_lifetimes", + "CALLS tests/test_rust.py:0 -> test_rust_imports_and_use_statements", + "CALLS tests/test_rust.py:0 -> test_rust_macros", + "CALLS tests/test_rust.py:0 -> test_rust_modules_and_crates", + "CALLS tests/test_rust.py:0 -> test_rust_pattern_matching", + "CALLS tests/test_rust.py:0 -> test_rust_structs_enums_unions", + "CALLS tests/test_rust.py:0 -> test_rust_traits_and_implementations", + "CALLS tests/test_rust.py:0 -> text", + "CALLS tests/test_rust.py:0 -> type", + "CALLS tests/test_rust.py:0 -> value", + "CALLS tests/test_rust.py:0 -> variants", + "CALLS tests/test_rust_advanced_types.py:0 -> close", + "CALLS tests/test_rust_advanced_types.py:0 -> execute", + "CALLS tests/test_rust_advanced_types.py:0 -> get", + "CALLS tests/test_rust_advanced_types.py:0 -> method_calls", + "CALLS tests/test_rust_advanced_types.py:0 -> mock_ingestor", + "CALLS tests/test_rust_advanced_types.py:0 -> name", + "CALLS tests/test_rust_advanced_types.py:0 -> process", + "CALLS tests/test_rust_advanced_types.py:0 -> processor", + "CALLS tests/test_rust_advanced_types.py:0 -> rust_advanced_types_project", + "CALLS tests/test_rust_advanced_types.py:0 -> temp_repo", + "CALLS tests/test_rust_advanced_types.py:0 -> test_advanced_associated_types", + "CALLS tests/test_rust_advanced_types.py:0 -> test_const_generics_advanced", + "CALLS tests/test_rust_advanced_types.py:0 -> test_higher_ranked_trait_bounds", + "CALLS tests/test_rust_advanced_types.py:0 -> test_phantom_types_and_markers", + "CALLS tests/test_rust_advanced_types.py:0 -> test_type_level_programming", + "CALLS tests/test_rust_advanced_types.py:0 -> type", + "CALLS tests/test_rust_advanced_types.py:0 -> value", + "CALLS tests/test_rust_call_recall.py:0 -> RelationshipType", + "CALLS tests/test_rust_call_recall.py:0 -> TestRustMacroCalls", + "CALLS tests/test_rust_call_recall.py:0 -> TestRustTurbofishCalls", + "CALLS tests/test_rust_call_recall.py:0 -> ensure_relationship_batch", + "CALLS tests/test_rust_call_recall.py:0 -> mock_ingestor", + "CALLS tests/test_rust_call_recall.py:0 -> name", + "CALLS tests/test_rust_call_recall.py:0 -> temp_repo", + "CALLS tests/test_rust_call_recall.py:0 -> test_bare_identifier_in_macro_is_not_a_call", + "CALLS tests/test_rust_call_recall.py:0 -> test_call_inside_macro_is_captured", + "CALLS tests/test_rust_call_recall.py:0 -> test_struct_literal_in_macro_is_not_a_call", + "CALLS tests/test_rust_call_recall.py:0 -> test_turbofish_call_is_captured", + "CALLS tests/test_rust_call_recall.py:0 -> value", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> name", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> run", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> test_cgr_matches_syn_oracle_on_closure_containment", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> value", + "CALLS tests/test_rust_closure_method_defines.py:0 -> NodeLabel", + "CALLS tests/test_rust_closure_method_defines.py:0 -> RelationshipType", + "CALLS tests/test_rust_closure_method_defines.py:0 -> mock_ingestor", + "CALLS tests/test_rust_closure_method_defines.py:0 -> parent", + "CALLS tests/test_rust_closure_method_defines.py:0 -> read", + "CALLS tests/test_rust_closure_method_defines.py:0 -> run", + "CALLS tests/test_rust_closure_method_defines.py:0 -> temp_repo", + "CALLS tests/test_rust_closure_method_defines.py:0 -> test_rust_closure_in_impl_method_defined_by_method", + "CALLS tests/test_rust_closure_method_defines.py:0 -> type", + "CALLS tests/test_rust_closure_method_defines.py:0 -> value", + "CALLS tests/test_rust_closure_method_defines.py:0 -> walk", + "CALLS tests/test_rust_closures_functions.py:0 -> add", + "CALLS tests/test_rust_closures_functions.py:0 -> get", + "CALLS tests/test_rust_closures_functions.py:0 -> handler", + "CALLS tests/test_rust_closures_functions.py:0 -> insert", + "CALLS tests/test_rust_closures_functions.py:0 -> items", + "CALLS tests/test_rust_closures_functions.py:0 -> method_calls", + "CALLS tests/test_rust_closures_functions.py:0 -> mock_ingestor", + "CALLS tests/test_rust_closures_functions.py:0 -> name", + "CALLS tests/test_rust_closures_functions.py:0 -> operation", + "CALLS tests/test_rust_closures_functions.py:0 -> process", + "CALLS tests/test_rust_closures_functions.py:0 -> processor", + "CALLS tests/test_rust_closures_functions.py:0 -> rust_closures_project", + "CALLS tests/test_rust_closures_functions.py:0 -> start", + "CALLS tests/test_rust_closures_functions.py:0 -> temp_repo", + "CALLS tests/test_rust_closures_functions.py:0 -> test_async_closures_and_futures", + "CALLS tests/test_rust_closures_functions.py:0 -> test_basic_closures_and_captures", + "CALLS tests/test_rust_closures_functions.py:0 -> test_function_pointers_and_types", + "CALLS tests/test_rust_closures_functions.py:0 -> test_higher_order_functions", + "CALLS tests/test_rust_closures_functions.py:0 -> type", + "CALLS tests/test_rust_closures_functions.py:0 -> value", + "CALLS tests/test_rust_collections_iterators.py:0 -> add", + "CALLS tests/test_rust_collections_iterators.py:0 -> get", + "CALLS tests/test_rust_collections_iterators.py:0 -> index", + "CALLS tests/test_rust_collections_iterators.py:0 -> insert", + "CALLS tests/test_rust_collections_iterators.py:0 -> keys", + "CALLS tests/test_rust_collections_iterators.py:0 -> method_calls", + "CALLS tests/test_rust_collections_iterators.py:0 -> mock_ingestor", + "CALLS tests/test_rust_collections_iterators.py:0 -> name", + "CALLS tests/test_rust_collections_iterators.py:0 -> operation", + "CALLS tests/test_rust_collections_iterators.py:0 -> parse", + "CALLS tests/test_rust_collections_iterators.py:0 -> rust_collections_project", + "CALLS tests/test_rust_collections_iterators.py:0 -> start", + "CALLS tests/test_rust_collections_iterators.py:0 -> temp_repo", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_functional_programming", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_hashmap_operations", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_iterator_patterns", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_other_collections", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_vector_operations", + "CALLS tests/test_rust_collections_iterators.py:0 -> text", + "CALLS tests/test_rust_collections_iterators.py:0 -> type", + "CALLS tests/test_rust_collections_iterators.py:0 -> value", + "CALLS tests/test_rust_concurrency_async.py:0 -> execute", + "CALLS tests/test_rust_concurrency_async.py:0 -> insert", + "CALLS tests/test_rust_concurrency_async.py:0 -> load", + "CALLS tests/test_rust_concurrency_async.py:0 -> main", + "CALLS tests/test_rust_concurrency_async.py:0 -> method_calls", + "CALLS tests/test_rust_concurrency_async.py:0 -> mock_ingestor", + "CALLS tests/test_rust_concurrency_async.py:0 -> name", + "CALLS tests/test_rust_concurrency_async.py:0 -> operation", + "CALLS tests/test_rust_concurrency_async.py:0 -> read", + "CALLS tests/test_rust_concurrency_async.py:0 -> rust_concurrency_project", + "CALLS tests/test_rust_concurrency_async.py:0 -> start", + "CALLS tests/test_rust_concurrency_async.py:0 -> temp_repo", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_async_await_basics", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_atomic_operations", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_basic_threads", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_message_passing_channels", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_parallel_computing", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_shared_state_mutex", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_tokio_async_runtime", + "CALLS tests/test_rust_concurrency_async.py:0 -> type", + "CALLS tests/test_rust_concurrency_async.py:0 -> value", + "CALLS tests/test_rust_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_rust_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_rust_containment_oracle.py:0 -> name", + "CALLS tests/test_rust_containment_oracle.py:0 -> test_cgr_matches_syn_oracle_on_containment_edges", + "CALLS tests/test_rust_containment_oracle.py:0 -> type", + "CALLS tests/test_rust_containment_oracle.py:0 -> value", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> clear", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> get", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> insert", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> load", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> main", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> mock_ingestor", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> name", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> rust_singleton_project", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> save", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> start", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> temp_repo", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> test_rust_singleton_pattern_cross_file_calls", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> up", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> value", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> down", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> get", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> handler", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> index", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> insert", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> items", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> load", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> method_calls", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> mock_ingestor", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> name", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> operation", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> parse", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> read", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> rust_error_project", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> temp_repo", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_custom_error_types", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_error_handling_patterns", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_error_propagation", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_panic_handling", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_result_option_basics", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> type", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> value", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> NodeLabel", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> extract_impl_target", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> mock_ingestor", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> temp_repo", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> test_rust_method_on_primitive_impl_target_is_captured", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> value", + "CALLS tests/test_rust_inheritance_edges.py:0 -> RelationshipType", + "CALLS tests/test_rust_inheritance_edges.py:0 -> mock_ingestor", + "CALLS tests/test_rust_inheritance_edges.py:0 -> temp_repo", + "CALLS tests/test_rust_inheritance_edges.py:0 -> test_rust_impl_and_supertrait_edges", + "CALLS tests/test_rust_inheritance_edges.py:0 -> value", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> RelationshipType", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> name", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> test_cgr_matches_syn_oracle_on_inheritance_edges", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> value", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> get", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> get_data", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> index", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> insert", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> items", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> metadata", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> method_calls", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> mock_ingestor", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> name", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> nodes", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> process", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> processor", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> relationships", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> rust_lifetimes_project", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> temp_repo", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_borrowing_edge_cases", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_complex_lifetime_relationships", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_lifetime_elision_rules", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_lifetime_variance_and_subtyping", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> text", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> type", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> updater", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> value", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> wrapper", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> get", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> handler", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> insert", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> items", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> main", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> method_calls", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> mock_ingestor", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> name", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> parse", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> rust_macros_project", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> start", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> summary", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> temp_repo", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_advanced_macro_patterns", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_declarative_macros_basic", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_derive_macros_custom", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_macro_usage_patterns", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_procedural_macros", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> text", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> type", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> value", + "CALLS tests/test_rust_memory_management.py:0 -> children", + "CALLS tests/test_rust_memory_management.py:0 -> main", + "CALLS tests/test_rust_memory_management.py:0 -> method_calls", + "CALLS tests/test_rust_memory_management.py:0 -> mock_ingestor", + "CALLS tests/test_rust_memory_management.py:0 -> name", + "CALLS tests/test_rust_memory_management.py:0 -> parent", + "CALLS tests/test_rust_memory_management.py:0 -> rust_memory_project", + "CALLS tests/test_rust_memory_management.py:0 -> temp_repo", + "CALLS tests/test_rust_memory_management.py:0 -> test_drop_trait_cleanup", + "CALLS tests/test_rust_memory_management.py:0 -> test_lifetimes_explicit", + "CALLS tests/test_rust_memory_management.py:0 -> test_memory_layout_optimization", + "CALLS tests/test_rust_memory_management.py:0 -> test_ownership_borrowing_basic", + "CALLS tests/test_rust_memory_management.py:0 -> test_reference_counting", + "CALLS tests/test_rust_memory_management.py:0 -> test_smart_pointers", + "CALLS tests/test_rust_memory_management.py:0 -> test_unsafe_code_patterns", + "CALLS tests/test_rust_memory_management.py:0 -> value", + "CALLS tests/test_rust_modules_visibility.py:0 -> Architecture", + "CALLS tests/test_rust_modules_visibility.py:0 -> add", + "CALLS tests/test_rust_modules_visibility.py:0 -> dead_code", + "CALLS tests/test_rust_modules_visibility.py:0 -> execute", + "CALLS tests/test_rust_modules_visibility.py:0 -> export", + "CALLS tests/test_rust_modules_visibility.py:0 -> insert", + "CALLS tests/test_rust_modules_visibility.py:0 -> items", + "CALLS tests/test_rust_modules_visibility.py:0 -> method_calls", + "CALLS tests/test_rust_modules_visibility.py:0 -> mock_ingestor", + "CALLS tests/test_rust_modules_visibility.py:0 -> name", + "CALLS tests/test_rust_modules_visibility.py:0 -> parent", + "CALLS tests/test_rust_modules_visibility.py:0 -> process", + "CALLS tests/test_rust_modules_visibility.py:0 -> processor", + "CALLS tests/test_rust_modules_visibility.py:0 -> read_file", + "CALLS tests/test_rust_modules_visibility.py:0 -> rust_modules_project", + "CALLS tests/test_rust_modules_visibility.py:0 -> status", + "CALLS tests/test_rust_modules_visibility.py:0 -> temp_repo", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_advanced_visibility_patterns", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_basic_module_declarations", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_complex_use_statements", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_conditional_compilation", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_module_attributes_and_cfg", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_module_path_resolution", + "CALLS tests/test_rust_modules_visibility.py:0 -> type", + "CALLS tests/test_rust_modules_visibility.py:0 -> up", + "CALLS tests/test_rust_modules_visibility.py:0 -> value", + "CALLS tests/test_rust_modules_visibility.py:0 -> variants", + "CALLS tests/test_rust_nested_module_containment.py:0 -> NodeLabel", + "CALLS tests/test_rust_nested_module_containment.py:0 -> RelationshipType", + "CALLS tests/test_rust_nested_module_containment.py:0 -> items", + "CALLS tests/test_rust_nested_module_containment.py:0 -> mock_ingestor", + "CALLS tests/test_rust_nested_module_containment.py:0 -> parent", + "CALLS tests/test_rust_nested_module_containment.py:0 -> temp_repo", + "CALLS tests/test_rust_nested_module_containment.py:0 -> test_rust_impl_method_in_module_binds_to_nested_type", + "CALLS tests/test_rust_nested_module_containment.py:0 -> test_rust_nested_module_is_module_nested", + "CALLS tests/test_rust_nested_module_containment.py:0 -> type", + "CALLS tests/test_rust_nested_module_containment.py:0 -> value", + "CALLS tests/test_rust_node_type.py:0 -> Color", + "CALLS tests/test_rust_node_type.py:0 -> NodeType", + "CALLS tests/test_rust_node_type.py:0 -> SupportedLanguage", + "CALLS tests/test_rust_node_type.py:0 -> mock_ingestor", + "CALLS tests/test_rust_node_type.py:0 -> rust_node_type_project", + "CALLS tests/test_rust_node_type.py:0 -> temp_repo", + "CALLS tests/test_rust_node_type.py:0 -> test_determine_node_type_rust", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_enum_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_struct_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_trait_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_type_alias_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_union_label", + "CALLS tests/test_rust_node_type.py:0 -> type", + "CALLS tests/test_rust_pattern_matching.py:0 -> Color", + "CALLS tests/test_rust_pattern_matching.py:0 -> get", + "CALLS tests/test_rust_pattern_matching.py:0 -> insert", + "CALLS tests/test_rust_pattern_matching.py:0 -> items", + "CALLS tests/test_rust_pattern_matching.py:0 -> metadata", + "CALLS tests/test_rust_pattern_matching.py:0 -> method_calls", + "CALLS tests/test_rust_pattern_matching.py:0 -> mock_ingestor", + "CALLS tests/test_rust_pattern_matching.py:0 -> name", + "CALLS tests/test_rust_pattern_matching.py:0 -> parse", + "CALLS tests/test_rust_pattern_matching.py:0 -> rust_pattern_project", + "CALLS tests/test_rust_pattern_matching.py:0 -> status", + "CALLS tests/test_rust_pattern_matching.py:0 -> temp_repo", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_advanced_if_let_while_let", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_advanced_macro_patterns", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_destructuring_patterns", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_exhaustive_enum_matching", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_macro_pattern_matching", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_pattern_guards_and_ranges", + "CALLS tests/test_rust_pattern_matching.py:0 -> text", + "CALLS tests/test_rust_pattern_matching.py:0 -> type", + "CALLS tests/test_rust_pattern_matching.py:0 -> value", + "CALLS tests/test_rust_pattern_matching.py:0 -> variants", + "CALLS tests/test_rust_performance_optimization.py:0 -> add", + "CALLS tests/test_rust_performance_optimization.py:0 -> get", + "CALLS tests/test_rust_performance_optimization.py:0 -> index", + "CALLS tests/test_rust_performance_optimization.py:0 -> insert", + "CALLS tests/test_rust_performance_optimization.py:0 -> items", + "CALLS tests/test_rust_performance_optimization.py:0 -> load", + "CALLS tests/test_rust_performance_optimization.py:0 -> method_calls", + "CALLS tests/test_rust_performance_optimization.py:0 -> mock_ingestor", + "CALLS tests/test_rust_performance_optimization.py:0 -> name", + "CALLS tests/test_rust_performance_optimization.py:0 -> run", + "CALLS tests/test_rust_performance_optimization.py:0 -> rust_performance_project", + "CALLS tests/test_rust_performance_optimization.py:0 -> start", + "CALLS tests/test_rust_performance_optimization.py:0 -> temp_repo", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_benchmarking_patterns", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_memory_optimization", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_parallel_processing_rayon", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_profiling_optimization_tools", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_simd_vectorization", + "CALLS tests/test_rust_performance_optimization.py:0 -> tracker", + "CALLS tests/test_rust_performance_optimization.py:0 -> value", + "CALLS tests/test_rust_smart_pointers.py:0 -> GraphNode", + "CALLS tests/test_rust_smart_pointers.py:0 -> children", + "CALLS tests/test_rust_smart_pointers.py:0 -> clear", + "CALLS tests/test_rust_smart_pointers.py:0 -> description", + "CALLS tests/test_rust_smart_pointers.py:0 -> done", + "CALLS tests/test_rust_smart_pointers.py:0 -> get", + "CALLS tests/test_rust_smart_pointers.py:0 -> insert", + "CALLS tests/test_rust_smart_pointers.py:0 -> items", + "CALLS tests/test_rust_smart_pointers.py:0 -> keys", + "CALLS tests/test_rust_smart_pointers.py:0 -> metadata", + "CALLS tests/test_rust_smart_pointers.py:0 -> method_calls", + "CALLS tests/test_rust_smart_pointers.py:0 -> mock_ingestor", + "CALLS tests/test_rust_smart_pointers.py:0 -> name", + "CALLS tests/test_rust_smart_pointers.py:0 -> parent", + "CALLS tests/test_rust_smart_pointers.py:0 -> process", + "CALLS tests/test_rust_smart_pointers.py:0 -> processor", + "CALLS tests/test_rust_smart_pointers.py:0 -> read", + "CALLS tests/test_rust_smart_pointers.py:0 -> run", + "CALLS tests/test_rust_smart_pointers.py:0 -> rust_smart_pointers_project", + "CALLS tests/test_rust_smart_pointers.py:0 -> start", + "CALLS tests/test_rust_smart_pointers.py:0 -> temp_repo", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_arc_atomic_reference_counting", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_box_pointer_patterns", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_custom_smart_pointers", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_rc_reference_counting", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_refcell_interior_mutability", + "CALLS tests/test_rust_smart_pointers.py:0 -> type", + "CALLS tests/test_rust_smart_pointers.py:0 -> up", + "CALLS tests/test_rust_smart_pointers.py:0 -> value", + "CALLS tests/test_rust_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_rust_span_oracle.py:0 -> name", + "CALLS tests/test_rust_span_oracle.py:0 -> start", + "CALLS tests/test_rust_span_oracle.py:0 -> test_cgr_matches_syn_oracle_on_node_spans", + "CALLS tests/test_rust_structs_enums.py:0 -> Color", + "CALLS tests/test_rust_structs_enums.py:0 -> children", + "CALLS tests/test_rust_structs_enums.py:0 -> get", + "CALLS tests/test_rust_structs_enums.py:0 -> insert", + "CALLS tests/test_rust_structs_enums.py:0 -> items", + "CALLS tests/test_rust_structs_enums.py:0 -> metadata", + "CALLS tests/test_rust_structs_enums.py:0 -> method_calls", + "CALLS tests/test_rust_structs_enums.py:0 -> mock_ingestor", + "CALLS tests/test_rust_structs_enums.py:0 -> name", + "CALLS tests/test_rust_structs_enums.py:0 -> parent", + "CALLS tests/test_rust_structs_enums.py:0 -> relationships", + "CALLS tests/test_rust_structs_enums.py:0 -> rust_structs_project", + "CALLS tests/test_rust_structs_enums.py:0 -> temp_repo", + "CALLS tests/test_rust_structs_enums.py:0 -> test_basic_struct_definitions", + "CALLS tests/test_rust_structs_enums.py:0 -> test_complex_struct_relationships", + "CALLS tests/test_rust_structs_enums.py:0 -> test_enum_definitions_and_variants", + "CALLS tests/test_rust_structs_enums.py:0 -> test_enum_pattern_matching_advanced", + "CALLS tests/test_rust_structs_enums.py:0 -> test_pattern_matching_destructuring", + "CALLS tests/test_rust_structs_enums.py:0 -> test_struct_derive_attributes", + "CALLS tests/test_rust_structs_enums.py:0 -> text", + "CALLS tests/test_rust_structs_enums.py:0 -> type", + "CALLS tests/test_rust_structs_enums.py:0 -> value", + "CALLS tests/test_rust_structs_enums.py:0 -> variants", + "CALLS tests/test_rust_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_rust_structure_oracle.py:0 -> name", + "CALLS tests/test_rust_structure_oracle.py:0 -> nodes", + "CALLS tests/test_rust_structure_oracle.py:0 -> test_cgr_matches_syn_oracle_on_rust_structure", + "CALLS tests/test_rust_structure_oracle.py:0 -> type", + "CALLS tests/test_rust_trait_method_containment.py:0 -> NodeLabel", + "CALLS tests/test_rust_trait_method_containment.py:0 -> RelationshipType", + "CALLS tests/test_rust_trait_method_containment.py:0 -> mock_ingestor", + "CALLS tests/test_rust_trait_method_containment.py:0 -> parent", + "CALLS tests/test_rust_trait_method_containment.py:0 -> temp_repo", + "CALLS tests/test_rust_trait_method_containment.py:0 -> test_rust_trait_method_defined_by_interface_node", + "CALLS tests/test_rust_trait_method_containment.py:0 -> value", + "CALLS tests/test_rust_trait_objects.py:0 -> ProcessorFactory", + "CALLS tests/test_rust_trait_objects.py:0 -> analyze", + "CALLS tests/test_rust_trait_objects.py:0 -> execute", + "CALLS tests/test_rust_trait_objects.py:0 -> factory", + "CALLS tests/test_rust_trait_objects.py:0 -> get", + "CALLS tests/test_rust_trait_objects.py:0 -> get_data", + "CALLS tests/test_rust_trait_objects.py:0 -> get_summary", + "CALLS tests/test_rust_trait_objects.py:0 -> handler", + "CALLS tests/test_rust_trait_objects.py:0 -> index", + "CALLS tests/test_rust_trait_objects.py:0 -> insert", + "CALLS tests/test_rust_trait_objects.py:0 -> method_calls", + "CALLS tests/test_rust_trait_objects.py:0 -> mock_ingestor", + "CALLS tests/test_rust_trait_objects.py:0 -> name", + "CALLS tests/test_rust_trait_objects.py:0 -> operation", + "CALLS tests/test_rust_trait_objects.py:0 -> parse", + "CALLS tests/test_rust_trait_objects.py:0 -> process", + "CALLS tests/test_rust_trait_objects.py:0 -> processor", + "CALLS tests/test_rust_trait_objects.py:0 -> render", + "CALLS tests/test_rust_trait_objects.py:0 -> rust_trait_objects_project", + "CALLS tests/test_rust_trait_objects.py:0 -> start", + "CALLS tests/test_rust_trait_objects.py:0 -> summary", + "CALLS tests/test_rust_trait_objects.py:0 -> temp_repo", + "CALLS tests/test_rust_trait_objects.py:0 -> test_advanced_trait_object_patterns", + "CALLS tests/test_rust_trait_objects.py:0 -> test_basic_trait_objects", + "CALLS tests/test_rust_trait_objects.py:0 -> test_dynamic_dispatch_performance", + "CALLS tests/test_rust_trait_objects.py:0 -> test_object_safety_patterns", + "CALLS tests/test_rust_trait_objects.py:0 -> type", + "CALLS tests/test_rust_trait_objects.py:0 -> value", + "CALLS tests/test_rust_traits_generics.py:0 -> add", + "CALLS tests/test_rust_traits_generics.py:0 -> children", + "CALLS tests/test_rust_traits_generics.py:0 -> clear", + "CALLS tests/test_rust_traits_generics.py:0 -> factory", + "CALLS tests/test_rust_traits_generics.py:0 -> get", + "CALLS tests/test_rust_traits_generics.py:0 -> insert", + "CALLS tests/test_rust_traits_generics.py:0 -> items", + "CALLS tests/test_rust_traits_generics.py:0 -> load", + "CALLS tests/test_rust_traits_generics.py:0 -> metadata", + "CALLS tests/test_rust_traits_generics.py:0 -> method_calls", + "CALLS tests/test_rust_traits_generics.py:0 -> mock_ingestor", + "CALLS tests/test_rust_traits_generics.py:0 -> name", + "CALLS tests/test_rust_traits_generics.py:0 -> parse", + "CALLS tests/test_rust_traits_generics.py:0 -> process", + "CALLS tests/test_rust_traits_generics.py:0 -> processor", + "CALLS tests/test_rust_traits_generics.py:0 -> relationships", + "CALLS tests/test_rust_traits_generics.py:0 -> rust_traits_project", + "CALLS tests/test_rust_traits_generics.py:0 -> temp_repo", + "CALLS tests/test_rust_traits_generics.py:0 -> test_associated_types_and_constants", + "CALLS tests/test_rust_traits_generics.py:0 -> test_basic_trait_definitions", + "CALLS tests/test_rust_traits_generics.py:0 -> test_generic_types_and_constraints", + "CALLS tests/test_rust_traits_generics.py:0 -> test_higher_ranked_trait_bounds", + "CALLS tests/test_rust_traits_generics.py:0 -> test_trait_objects_and_dynamic_dispatch", + "CALLS tests/test_rust_traits_generics.py:0 -> type", + "CALLS tests/test_rust_traits_generics.py:0 -> value", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> Color", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> add", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> get", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> index", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> load", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> method_calls", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> mock_ingestor", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> name", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> optimize", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> read", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> rust_unsafe_project", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> temp_repo", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_extern_c_functions", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_inline_assembly", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_raw_pointers_and_dereferencing", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_unsafe_traits_and_implementations", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_unsafe_unions_and_transmute", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> type", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> value", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> wrapper", + "CALLS tests/test_rust_utils.py:0 -> TestBuildModulePath", + "CALLS tests/test_rust_utils.py:0 -> TestExtractImplTarget", + "CALLS tests/test_rust_utils.py:0 -> TestExtractUseImports", + "CALLS tests/test_rust_utils.py:0 -> TestExtractUseImportsEdgeCases", + "CALLS tests/test_rust_utils.py:0 -> TestRustImportsIntegration", + "CALLS tests/test_rust_utils.py:0 -> children", + "CALLS tests/test_rust_utils.py:0 -> ensure_node_batch", + "CALLS tests/test_rust_utils.py:0 -> factory", + "CALLS tests/test_rust_utils.py:0 -> graph_updater", + "CALLS tests/test_rust_utils.py:0 -> import_processor", + "CALLS tests/test_rust_utils.py:0 -> main", + "CALLS tests/test_rust_utils.py:0 -> method_calls", + "CALLS tests/test_rust_utils.py:0 -> mock_ingestor", + "CALLS tests/test_rust_utils.py:0 -> name", + "CALLS tests/test_rust_utils.py:0 -> parent", + "CALLS tests/test_rust_utils.py:0 -> process", + "CALLS tests/test_rust_utils.py:0 -> repo_path", + "CALLS tests/test_rust_utils.py:0 -> temp_repo", + "CALLS tests/test_rust_utils.py:0 -> test_aliased_import", + "CALLS tests/test_rust_utils.py:0 -> test_aliased_imports_tracked", + "CALLS tests/test_rust_utils.py:0 -> test_complex_nested_imports_integration", + "CALLS tests/test_rust_utils.py:0 -> test_crate_import", + "CALLS tests/test_rust_utils.py:0 -> test_deeply_nested_path", + "CALLS tests/test_rust_utils.py:0 -> test_empty_group", + "CALLS tests/test_rust_utils.py:0 -> test_function_in_module", + "CALLS tests/test_rust_utils.py:0 -> test_function_in_nested_modules", + "CALLS tests/test_rust_utils.py:0 -> test_function_with_class_node_types", + "CALLS tests/test_rust_utils.py:0 -> test_grouped_imports", + "CALLS tests/test_rust_utils.py:0 -> test_impl_methods_have_correct_qualified_names", + "CALLS tests/test_rust_utils.py:0 -> test_impl_scoped_type", + "CALLS tests/test_rust_utils.py:0 -> test_impl_trait_for_type", + "CALLS tests/test_rust_utils.py:0 -> test_impl_with_generic", + "CALLS tests/test_rust_utils.py:0 -> test_imports_create_relationships", + "CALLS tests/test_rust_utils.py:0 -> test_method_in_impl_with_target", + "CALLS tests/test_rust_utils.py:0 -> test_method_in_impl_without_target", + "CALLS tests/test_rust_utils.py:0 -> test_mixed_self_and_items_in_group", + "CALLS tests/test_rust_utils.py:0 -> test_multiple_nested_groups", + "CALLS tests/test_rust_utils.py:0 -> test_nested_grouped_imports", + "CALLS tests/test_rust_utils.py:0 -> test_non_impl_node_returns_none", + "CALLS tests/test_rust_utils.py:0 -> test_non_use_node_returns_empty", + "CALLS tests/test_rust_utils.py:0 -> test_self_alias_in_group", + "CALLS tests/test_rust_utils.py:0 -> test_self_import", + "CALLS tests/test_rust_utils.py:0 -> test_simple_impl", + "CALLS tests/test_rust_utils.py:0 -> test_simple_import", + "CALLS tests/test_rust_utils.py:0 -> test_super_import", + "CALLS tests/test_rust_utils.py:0 -> test_super_super_import", + "CALLS tests/test_rust_utils.py:0 -> test_top_level_function", + "CALLS tests/test_rust_utils.py:0 -> test_wildcard_import", + "CALLS tests/test_rust_utils.py:0 -> test_wildcard_imports_tracked", + "CALLS tests/test_rust_utils.py:0 -> type", + "CALLS tests/test_rust_utils.py:0 -> updater", + "CALLS tests/test_rust_web_networking.py:0 -> down", + "CALLS tests/test_rust_web_networking.py:0 -> execute", + "CALLS tests/test_rust_web_networking.py:0 -> get", + "CALLS tests/test_rust_web_networking.py:0 -> handler", + "CALLS tests/test_rust_web_networking.py:0 -> insert", + "CALLS tests/test_rust_web_networking.py:0 -> keys", + "CALLS tests/test_rust_web_networking.py:0 -> metadata", + "CALLS tests/test_rust_web_networking.py:0 -> method_calls", + "CALLS tests/test_rust_web_networking.py:0 -> mock_ingestor", + "CALLS tests/test_rust_web_networking.py:0 -> name", + "CALLS tests/test_rust_web_networking.py:0 -> put", + "CALLS tests/test_rust_web_networking.py:0 -> run", + "CALLS tests/test_rust_web_networking.py:0 -> rust_web_project", + "CALLS tests/test_rust_web_networking.py:0 -> save", + "CALLS tests/test_rust_web_networking.py:0 -> start", + "CALLS tests/test_rust_web_networking.py:0 -> stats", + "CALLS tests/test_rust_web_networking.py:0 -> status", + "CALLS tests/test_rust_web_networking.py:0 -> temp_repo", + "CALLS tests/test_rust_web_networking.py:0 -> test_database_orm_patterns", + "CALLS tests/test_rust_web_networking.py:0 -> test_http_client_requests", + "CALLS tests/test_rust_web_networking.py:0 -> test_json_api_serialization", + "CALLS tests/test_rust_web_networking.py:0 -> test_web_server_axum", + "CALLS tests/test_rust_web_networking.py:0 -> test_websockets_realtime", + "CALLS tests/test_rust_web_networking.py:0 -> text", + "CALLS tests/test_rust_web_networking.py:0 -> type", + "CALLS tests/test_rust_web_networking.py:0 -> up", + "CALLS tests/test_rust_web_networking.py:0 -> value", + "CALLS tests/test_semantic_search.py:0 -> AgenticToolName", + "CALLS tests/test_semantic_search.py:0 -> MemgraphIngestor", + "CALLS tests/test_semantic_search.py:0 -> _execute_query", + "CALLS tests/test_semantic_search.py:0 -> embed_code", + "CALLS tests/test_semantic_search.py:0 -> extract_source_lines", + "CALLS tests/test_semantic_search.py:0 -> graph_service", + "CALLS tests/test_semantic_search.py:0 -> mock_embed_code", + "CALLS tests/test_semantic_search.py:0 -> mock_ingestor", + "CALLS tests/test_semantic_search.py:0 -> mock_search_embeddings", + "CALLS tests/test_semantic_search.py:0 -> name", + "CALLS tests/test_semantic_search.py:0 -> search_embeddings", + "CALLS tests/test_semantic_search.py:0 -> semantic_search", + "CALLS tests/test_semantic_search.py:0 -> test_create_get_function_source_tool_returns_tool", + "CALLS tests/test_semantic_search.py:0 -> test_create_semantic_search_tool_returns_tool", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_handles_exception", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_returns_none_on_invalid_location", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_returns_none_when_not_found", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_returns_source", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_tool_handles_not_found", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_tool_returns_source", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_calls_embed_code_with_query", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_handles_exception", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_passes_top_k_to_search", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_preserves_score_order", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_returns_empty_when_no_matches", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_returns_empty_without_dependencies", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_returns_formatted_results", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_search_tool_formats_results", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_search_tool_handles_no_results", + "CALLS tests/test_semantic_search.py:0 -> type", + "CALLS tests/test_semantic_search.py:0 -> validate_source_location", + "CALLS tests/test_shell_command.py:0 -> AgenticToolName", + "CALLS tests/test_shell_command.py:0 -> TestAwkSedXargsIntegration", + "CALLS tests/test_shell_command.py:0 -> TestAwkSedXargsPatterns", + "CALLS tests/test_shell_command.py:0 -> TestBlockedCommands", + "CALLS tests/test_shell_command.py:0 -> TestCommandAllowlist", + "CALLS tests/test_shell_command.py:0 -> TestCreateShellCommandTool", + "CALLS tests/test_shell_command.py:0 -> TestDangerousRmFlags", + "CALLS tests/test_shell_command.py:0 -> TestDangerousRmPath", + "CALLS tests/test_shell_command.py:0 -> TestHasRedirectOperators", + "CALLS tests/test_shell_command.py:0 -> TestHasSubshell", + "CALLS tests/test_shell_command.py:0 -> TestIsDangerousCommand", + "CALLS tests/test_shell_command.py:0 -> TestParseCommandEdgeCases", + "CALLS tests/test_shell_command.py:0 -> TestPipedCommandApproval", + "CALLS tests/test_shell_command.py:0 -> TestPipedCommandExecution", + "CALLS tests/test_shell_command.py:0 -> TestPipelinePatterns", + "CALLS tests/test_shell_command.py:0 -> TestQuoteAwareSubshellDetection", + "CALLS tests/test_shell_command.py:0 -> TestRequiresApproval", + "CALLS tests/test_shell_command.py:0 -> TestRequiresApprovalWithRedirects", + "CALLS tests/test_shell_command.py:0 -> TestSecurityIntegration", + "CALLS tests/test_shell_command.py:0 -> TestSegmentPatterns", + "CALLS tests/test_shell_command.py:0 -> TestSeparateRmFlags", + "CALLS tests/test_shell_command.py:0 -> TestShellCommanderExecute", + "CALLS tests/test_shell_command.py:0 -> TestShellCommanderInit", + "CALLS tests/test_shell_command.py:0 -> TestShellOperators", + "CALLS tests/test_shell_command.py:0 -> TestToolApprovalBehavior", + "CALLS tests/test_shell_command.py:0 -> TestValidateSegment", + "CALLS tests/test_shell_command.py:0 -> TestYoloMode", + "CALLS tests/test_shell_command.py:0 -> anyio_backend", + "CALLS tests/test_shell_command.py:0 -> description", + "CALLS tests/test_shell_command.py:0 -> is_yolo", + "CALLS tests/test_shell_command.py:0 -> name", + "CALLS tests/test_shell_command.py:0 -> project_root", + "CALLS tests/test_shell_command.py:0 -> run", + "CALLS tests/test_shell_command.py:0 -> shell_commander", + "CALLS tests/test_shell_command.py:0 -> status", + "CALLS tests/test_shell_command.py:0 -> temp_project_root", + "CALLS tests/test_shell_command.py:0 -> test_absolute_system_dir", + "CALLS tests/test_shell_command.py:0 -> test_all_read_only_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_and_operator", + "CALLS tests/test_shell_command.py:0 -> test_and_operator_short_circuit", + "CALLS tests/test_shell_command.py:0 -> test_append_redirect", + "CALLS tests/test_shell_command.py:0 -> test_append_redirect_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_awk_getline_detected", + "CALLS tests/test_shell_command.py:0 -> test_awk_getline_rejected", + "CALLS tests/test_shell_command.py:0 -> test_awk_system_call_detected", + "CALLS tests/test_shell_command.py:0 -> test_awk_system_rejected", + "CALLS tests/test_shell_command.py:0 -> test_backtick_subshell_rejected", + "CALLS tests/test_shell_command.py:0 -> test_backtick_substitution", + "CALLS tests/test_shell_command.py:0 -> test_blocked_command_execution", + "CALLS tests/test_shell_command.py:0 -> test_bypass_allowlist_skips_allowlist_error", + "CALLS tests/test_shell_command.py:0 -> test_bypass_allowlist_still_blocks_dangerous_rm", + "CALLS tests/test_shell_command.py:0 -> test_chmod_777_root", + "CALLS tests/test_shell_command.py:0 -> test_combined_flags_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_command_not_in_allowlist", + "CALLS tests/test_shell_command.py:0 -> test_command_substitution", + "CALLS tests/test_shell_command.py:0 -> test_common_commands_in_allowlist", + "CALLS tests/test_shell_command.py:0 -> test_creates_tool_instance", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_command", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_command_as_second_in_pipe", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_command_in_pipe_rejected", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_pattern_in_pipeline", + "CALLS tests/test_shell_command.py:0 -> test_dd_to_device", + "CALLS tests/test_shell_command.py:0 -> test_destructive_commands_blocked", + "CALLS tests/test_shell_command.py:0 -> test_disk_operations_blocked", + "CALLS tests/test_shell_command.py:0 -> test_dollar_in_variable", + "CALLS tests/test_shell_command.py:0 -> test_dot_dot_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_double_quoted_subshell_rejected", + "CALLS tests/test_shell_command.py:0 -> test_empty_command", + "CALLS tests/test_shell_command.py:0 -> test_empty_segment", + "CALLS tests/test_shell_command.py:0 -> test_escaped_operators_in_quotes", + "CALLS tests/test_shell_command.py:0 -> test_escaped_quote_bypass_detected", + "CALLS tests/test_shell_command.py:0 -> test_execute_cat_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_command_not_in_allowlist", + "CALLS tests/test_shell_command.py:0 -> test_execute_command_with_stderr", + "CALLS tests/test_shell_command.py:0 -> test_execute_dangerous_command_rejected", + "CALLS tests/test_shell_command.py:0 -> test_execute_echo_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_empty_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_grep_suggests_rg", + "CALLS tests/test_shell_command.py:0 -> test_execute_ls_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_pwd_command", + "CALLS tests/test_shell_command.py:0 -> test_find_with_wc", + "CALLS tests/test_shell_command.py:0 -> test_flags_with_other_options", + "CALLS tests/test_shell_command.py:0 -> test_heredoc", + "CALLS tests/test_shell_command.py:0 -> test_heredoc_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_init_custom_timeout", + "CALLS tests/test_shell_command.py:0 -> test_init_default_timeout", + "CALLS tests/test_shell_command.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_shell_command.py:0 -> test_input_redirect", + "CALLS tests/test_shell_command.py:0 -> test_input_redirect_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_invalid_command_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_invalid_syntax", + "CALLS tests/test_shell_command.py:0 -> test_invalid_syntax_rejected", + "CALLS tests/test_shell_command.py:0 -> test_kernel_module_commands_blocked", + "CALLS tests/test_shell_command.py:0 -> test_leading_operator", + "CALLS tests/test_shell_command.py:0 -> test_mixed_quote_styles", + "CALLS tests/test_shell_command.py:0 -> test_multiple_dangerous_commands_all_rejected", + "CALLS tests/test_shell_command.py:0 -> test_multiple_operators_in_sequence", + "CALLS tests/test_shell_command.py:0 -> test_no_redirect", + "CALLS tests/test_shell_command.py:0 -> test_no_subshell", + "CALLS tests/test_shell_command.py:0 -> test_non_rm_commands_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_or_operator", + "CALLS tests/test_shell_command.py:0 -> test_or_operator_short_circuit", + "CALLS tests/test_shell_command.py:0 -> test_other_commands_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_output_redirect", + "CALLS tests/test_shell_command.py:0 -> test_output_redirect_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_path_outside_project", + "CALLS tests/test_shell_command.py:0 -> test_pipe_in_single_quotes", + "CALLS tests/test_shell_command.py:0 -> test_pipe_with_disallowed_command", + "CALLS tests/test_shell_command.py:0 -> test_python_os_import_detected", + "CALLS tests/test_shell_command.py:0 -> test_read_only_command_no_approval_needed", + "CALLS tests/test_shell_command.py:0 -> test_read_only_commands_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_read_only_with_args_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_read_only_without_redirect_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_relative_path_bypass_blocked", + "CALLS tests/test_shell_command.py:0 -> test_relative_path_to_system_dir", + "CALLS tests/test_shell_command.py:0 -> test_remote_script_execution", + "CALLS tests/test_shell_command.py:0 -> test_rg_in_pipeline", + "CALLS tests/test_shell_command.py:0 -> test_rm_fr_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_outside_project_blocked", + "CALLS tests/test_shell_command.py:0 -> test_rm_rf_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_rf_is_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_system_directory", + "CALLS tests/test_shell_command.py:0 -> test_rm_without_force_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_without_rf_is_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_root_directory", + "CALLS tests/test_shell_command.py:0 -> test_safe_awk_allowed", + "CALLS tests/test_shell_command.py:0 -> test_safe_awk_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_commands_not_blocked", + "CALLS tests/test_shell_command.py:0 -> test_safe_git_subcommands_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_safe_path_inside_project", + "CALLS tests/test_shell_command.py:0 -> test_safe_pipeline_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_sed_allowed", + "CALLS tests/test_shell_command.py:0 -> test_safe_sed_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_segment_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_xargs_allowed", + "CALLS tests/test_shell_command.py:0 -> test_safe_xargs_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_alternate_delimiters", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_flag_any_position", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_flag_detected", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_rejected", + "CALLS tests/test_shell_command.py:0 -> test_semicolon_operator", + "CALLS tests/test_shell_command.py:0 -> test_separate_r_f_flags", + "CALLS tests/test_shell_command.py:0 -> test_simple_pipe", + "CALLS tests/test_shell_command.py:0 -> test_single_quoted_subshell_pattern_allowed", + "CALLS tests/test_shell_command.py:0 -> test_subshell_in_double_quotes_detected", + "CALLS tests/test_shell_command.py:0 -> test_subshell_in_single_quotes_not_detected", + "CALLS tests/test_shell_command.py:0 -> test_subshell_outside_quotes_detected", + "CALLS tests/test_shell_command.py:0 -> test_subshell_rejected", + "CALLS tests/test_shell_command.py:0 -> test_system_control_blocked", + "CALLS tests/test_shell_command.py:0 -> test_tool_has_correct_name", + "CALLS tests/test_shell_command.py:0 -> test_tool_has_description", + "CALLS tests/test_shell_command.py:0 -> test_trailing_and", + "CALLS tests/test_shell_command.py:0 -> test_trailing_pipe", + "CALLS tests/test_shell_command.py:0 -> test_unsafe_git_subcommands_require_approval", + "CALLS tests/test_shell_command.py:0 -> test_valid_command", + "CALLS tests/test_shell_command.py:0 -> test_wildcard_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_write_command_in_pipe_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_write_command_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_write_command_with_approval", + "CALLS tests/test_shell_command.py:0 -> test_write_commands_require_approval", + "CALLS tests/test_shell_command.py:0 -> test_xargs_chmod_detected", + "CALLS tests/test_shell_command.py:0 -> test_xargs_chmod_rejected", + "CALLS tests/test_shell_command.py:0 -> test_xargs_rm_detected", + "CALLS tests/test_shell_command.py:0 -> test_xargs_rm_rejected", + "CALLS tests/test_shell_command.py:0 -> test_yolo_runs_non_allowlist_command", + "CALLS tests/test_shell_command.py:0 -> test_yolo_skips_approval_for_write_command", + "CALLS tests/test_shell_command.py:0 -> test_yolo_still_blocks_dangerous_rm_rf", + "CALLS tests/test_shell_command.py:0 -> text", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> PythonAstAnalyzerMixin", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> PythonTypeInferenceEngine", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> PythonVariableAnalyzerMixin", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> RelationshipType", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> TestSiblingMixinResolution", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> _infer_instance_variable_types_from_assignments", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> _traverse_single_pass", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> engine", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> execute_write", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> fetch_all", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> flush_all", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> graph_updater", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> name", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> parent", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> repo_path", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> test_does_not_resolve_to_decoy_class", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> test_self_call_resolves_to_sibling_mixin_method", + "CALLS tests/test_single_file_repo_path.py:0 -> cpp_single_file", + "CALLS tests/test_single_file_repo_path.py:0 -> graph_updater", + "CALLS tests/test_single_file_repo_path.py:0 -> main", + "CALLS tests/test_single_file_repo_path.py:0 -> mock_ingestor", + "CALLS tests/test_single_file_repo_path.py:0 -> name", + "CALLS tests/test_single_file_repo_path.py:0 -> ran_single_file_updater", + "CALLS tests/test_single_file_repo_path.py:0 -> repo_path", + "CALLS tests/test_single_file_repo_path.py:0 -> temp_repo", + "CALLS tests/test_single_file_repo_path.py:0 -> test_directory_repo_path_still_works", + "CALLS tests/test_single_file_repo_path.py:0 -> test_single_file_repo_path_out_of_class_methods", + "CALLS tests/test_single_file_repo_path.py:0 -> test_single_file_repo_path_produces_graph", + "CALLS tests/test_single_file_repo_path.py:0 -> test_single_file_repo_path_static_functions", + "CALLS tests/test_single_file_repo_path.py:0 -> updater", + "CALLS tests/test_single_query_output_format.py:0 -> QueryFormat", + "CALLS tests/test_single_query_output_format.py:0 -> _initialize_services_and_agent", + "CALLS tests/test_single_query_output_format.py:0 -> _setup_common_initialization", + "CALLS tests/test_single_query_output_format.py:0 -> agent", + "CALLS tests/test_single_query_output_format.py:0 -> cli", + "CALLS tests/test_single_query_output_format.py:0 -> connect_memgraph", + "CALLS tests/test_single_query_output_format.py:0 -> main", + "CALLS tests/test_single_query_output_format.py:0 -> mock_agent_stack", + "CALLS tests/test_single_query_output_format.py:0 -> repo", + "CALLS tests/test_single_query_output_format.py:0 -> run", + "CALLS tests/test_single_query_output_format.py:0 -> runner", + "CALLS tests/test_single_query_output_format.py:0 -> start", + "CALLS tests/test_single_query_output_format.py:0 -> test_default_format_prints_plain_text", + "CALLS tests/test_single_query_output_format.py:0 -> test_json_format_preserves_non_ascii", + "CALLS tests/test_single_query_output_format.py:0 -> test_json_format_without_ask_agent_exits_with_error", + "CALLS tests/test_single_query_output_format.py:0 -> test_json_format_wraps_query_and_response", + "CALLS tests/test_slots_and_optimizations.py:0 -> BaseLanguageHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> CargoTomlParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> ComposerJsonParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> CppHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> CsprojParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> DependencyParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> GemfileParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> GoModParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> JavaHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> JsTsHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> LanguageHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> LuaHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> PackageJsonParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> PyProjectTomlParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> PythonHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> RequirementsTxtParser", + "CALLS tests/test_slots_and_optimizations.py:0 -> RustHandler", + "CALLS tests/test_slots_and_optimizations.py:0 -> TestCachedDecodeBytes", + "CALLS tests/test_slots_and_optimizations.py:0 -> TestDependencyParserSlots", + "CALLS tests/test_slots_and_optimizations.py:0 -> TestHandlerSlots", + "CALLS tests/test_slots_and_optimizations.py:0 -> TestStdlibExtractorSlots", + "CALLS tests/test_slots_and_optimizations.py:0 -> extractor", + "CALLS tests/test_slots_and_optimizations.py:0 -> repo_path", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_cache_maxsize", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_decode_bytes", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_decode_caches", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_handler_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_handler_no_instance_dict", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_no_instance_dict", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_parser_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_parser_no_instance_dict", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_protocol_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> type", + "CALLS tests/test_slots_lazy_logger.py:0 -> CypherGenerator", + "CALLS tests/test_slots_lazy_logger.py:0 -> FileEditor", + "CALLS tests/test_slots_lazy_logger.py:0 -> GraphLoader", + "CALLS tests/test_slots_lazy_logger.py:0 -> ModelProvider", + "CALLS tests/test_slots_lazy_logger.py:0 -> TestLazyLoggerFormat", + "CALLS tests/test_slots_lazy_logger.py:0 -> TestProviderSlotsInheritance", + "CALLS tests/test_slots_lazy_logger.py:0 -> TestSlotsBlockDict", + "CALLS tests/test_slots_lazy_logger.py:0 -> TestSlotsPresence", + "CALLS tests/test_slots_lazy_logger.py:0 -> TestSlotsRejectArbitraryAttrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> agent", + "CALLS tests/test_slots_lazy_logger.py:0 -> directory_lister", + "CALLS tests/test_slots_lazy_logger.py:0 -> file_editor", + "CALLS tests/test_slots_lazy_logger.py:0 -> file_reader", + "CALLS tests/test_slots_lazy_logger.py:0 -> file_writer", + "CALLS tests/test_slots_lazy_logger.py:0 -> is_yolo", + "CALLS tests/test_slots_lazy_logger.py:0 -> mock_ingestor", + "CALLS tests/test_slots_lazy_logger.py:0 -> mock_settings", + "CALLS tests/test_slots_lazy_logger.py:0 -> ollama_endpoint", + "CALLS tests/test_slots_lazy_logger.py:0 -> parent", + "CALLS tests/test_slots_lazy_logger.py:0 -> project_root", + "CALLS tests/test_slots_lazy_logger.py:0 -> rel_path", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_class_has_slots", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_code_retriever_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_command_group_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_command_group_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_directory_lister_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_directory_lister_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_file_reader_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_file_writer_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_google_provider_inherits_config_slot", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_google_provider_instance_has_all_attrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_graph_loader_has_slots", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_health_checker_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_health_checker_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_no_eager_debug_format", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_ollama_provider_inherits_config_slot", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_ollama_provider_instance_has_all_attrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_openai_provider_inherits_config_slot", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_openai_provider_instance_has_all_attrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_shell_commander_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_shell_commander_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> type", + "CALLS tests/test_source_extraction.py:0 -> TestExtractSourceLines", + "CALLS tests/test_source_extraction.py:0 -> TestExtractSourceWithFallback", + "CALLS tests/test_source_extraction.py:0 -> TestValidateSourceLocation", + "CALLS tests/test_source_extraction.py:0 -> mock_ast_extractor", + "CALLS tests/test_source_extraction.py:0 -> name", + "CALLS tests/test_source_extraction.py:0 -> test_clamps_end_line_returns_partial_content", + "CALLS tests/test_source_extraction.py:0 -> test_clamps_when_end_exceeds_file_length", + "CALLS tests/test_source_extraction.py:0 -> test_converts_string_to_path", + "CALLS tests/test_source_extraction.py:0 -> test_counts_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_across_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_all_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_multiple_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_single_line", + "CALLS tests/test_source_extraction.py:0 -> test_falls_back_to_lines_when_ast_extractor_raises", + "CALLS tests/test_source_extraction.py:0 -> test_falls_back_to_lines_when_ast_extractor_returns_none", + "CALLS tests/test_source_extraction.py:0 -> test_handles_empty_file", + "CALLS tests/test_source_extraction.py:0 -> test_handles_empty_string_path", + "CALLS tests/test_source_extraction.py:0 -> test_handles_windows_style_path", + "CALLS tests/test_source_extraction.py:0 -> test_line_count_matches_with_many_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_preserves_indentation", + "CALLS tests/test_source_extraction.py:0 -> test_preserves_internal_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_all_are_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_end_line_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_file_path_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_start_line_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_negative_start_line", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_nonexistent_file", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_start_greater_than_end", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_zero_end_line", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_zero_start_line", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_when_start_exceeds_file_length", + "CALLS tests/test_source_extraction.py:0 -> test_returns_true_for_valid_location", + "CALLS tests/test_source_extraction.py:0 -> test_skips_ast_when_extractor_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_skips_ast_when_qualified_name_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_strips_trailing_whitespace", + "CALLS tests/test_source_extraction.py:0 -> test_uses_ast_extractor_when_provided", + "CALLS tests/test_source_extraction.py:0 -> test_uses_line_extraction_when_no_ast_extractor", + "CALLS tests/test_stack_manager.py:0 -> StackError", + "CALLS tests/test_stack_manager.py:0 -> StackState", + "CALLS tests/test_stack_manager.py:0 -> compose_file", + "CALLS tests/test_stack_manager.py:0 -> down", + "CALLS tests/test_stack_manager.py:0 -> fake_run", + "CALLS tests/test_stack_manager.py:0 -> fake_up", + "CALLS tests/test_stack_manager.py:0 -> memgraph_check", + "CALLS tests/test_stack_manager.py:0 -> qdrant_check", + "CALLS tests/test_stack_manager.py:0 -> run", + "CALLS tests/test_stack_manager.py:0 -> stack_home", + "CALLS tests/test_stack_manager.py:0 -> test_check_docker_raises_when_compose_missing", + "CALLS tests/test_stack_manager.py:0 -> test_check_docker_raises_when_daemon_down", + "CALLS tests/test_stack_manager.py:0 -> test_check_docker_raises_when_docker_not_on_path", + "CALLS tests/test_stack_manager.py:0 -> test_compose_cmd_uses_project_and_file", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_compose_file_copies_when_missing", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_compose_file_preserves_existing", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_compose_file_raises_when_source_missing", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_running_skips_docker_when_already_up", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_running_starts_when_stopped", + "CALLS tests/test_stack_manager.py:0 -> test_status_returns_partial_when_only_memgraph_reachable", + "CALLS tests/test_stack_manager.py:0 -> test_status_returns_running_when_both_reachable", + "CALLS tests/test_stack_manager.py:0 -> test_status_returns_stopped_when_nothing_reachable", + "CALLS tests/test_stack_manager.py:0 -> test_up_propagates_failure", + "CALLS tests/test_stack_manager.py:0 -> value", + "CALLS tests/test_stack_manager.py:0 -> wait_for_memgraph", + "CALLS tests/test_stack_manager.py:0 -> wait_for_qdrant", + "CALLS tests/test_stack_manager.py:0 -> wait_healthy", + "CALLS tests/test_stats_command.py:0 -> TestStatsCommand", + "CALLS tests/test_stats_command.py:0 -> cli", + "CALLS tests/test_stats_command.py:0 -> connect_memgraph", + "CALLS tests/test_stats_command.py:0 -> fetch_all", + "CALLS tests/test_stats_command.py:0 -> mock_ingestor", + "CALLS tests/test_stats_command.py:0 -> mock_node_results", + "CALLS tests/test_stats_command.py:0 -> mock_rel_results", + "CALLS tests/test_stats_command.py:0 -> runner", + "CALLS tests/test_stats_command.py:0 -> stats", + "CALLS tests/test_stats_command.py:0 -> test_stats_displays_node_table", + "CALLS tests/test_stats_command.py:0 -> test_stats_displays_relationship_table", + "CALLS tests/test_stats_command.py:0 -> test_stats_displays_totals", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_connection_error", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_empty_graph", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_empty_labels", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_multi_label_nodes", + "CALLS tests/test_stats_command.py:0 -> type", + "CALLS tests/test_status_bar_config.py:0 -> _terminal_columns", + "CALLS tests/test_status_bar_config.py:0 -> active_cypher_config", + "CALLS tests/test_status_bar_config.py:0 -> active_orchestrator_config", + "CALLS tests/test_status_bar_config.py:0 -> fake_run", + "CALLS tests/test_status_bar_config.py:0 -> load_cgr_instructions", + "CALLS tests/test_status_bar_config.py:0 -> main", + "CALLS tests/test_status_bar_config.py:0 -> mock_settings", + "CALLS tests/test_status_bar_config.py:0 -> repo", + "CALLS tests/test_status_bar_config.py:0 -> reset_session", + "CALLS tests/test_status_bar_config.py:0 -> run", + "CALLS tests/test_status_bar_config.py:0 -> test_abbreviated_repo_handles_none", + "CALLS tests/test_status_bar_config.py:0 -> test_abbreviated_repo_keeps_absolute_for_outside_paths", + "CALLS tests/test_status_bar_config.py:0 -> test_abbreviated_repo_uses_tilde_for_home_paths", + "CALLS tests/test_status_bar_config.py:0 -> test_branch_appears_after_repo_when_inline", + "CALLS tests/test_status_bar_config.py:0 -> test_config_segments_always_shows_both_models", + "CALLS tests/test_status_bar_config.py:0 -> test_config_segments_reflects_session_flags", + "CALLS tests/test_status_bar_config.py:0 -> test_config_segments_shows_distinct_models", + "CALLS tests/test_status_bar_config.py:0 -> test_config_status_html_includes_model_and_repo", + "CALLS tests/test_status_bar_config.py:0 -> test_git_state_returns_none_when_target_missing", + "CALLS tests/test_status_bar_config.py:0 -> test_git_state_returns_none_without_target_repo", + "CALLS tests/test_status_bar_config.py:0 -> test_git_state_uses_target_repo_cwd", + "CALLS tests/test_status_bar_config.py:0 -> test_rich_status_bar_inlines_config_when_wide", + "CALLS tests/test_status_bar_config.py:0 -> test_rich_status_bar_wraps_config_when_narrow", + "CALLS tests/test_status_bar_config.py:0 -> test_status_bar_html_inlines_config_when_wide", + "CALLS tests/test_status_bar_config.py:0 -> test_status_bar_html_places_branch_after_repo_when_inline", + "CALLS tests/test_status_bar_config.py:0 -> test_status_bar_html_wraps_config_when_narrow", + "CALLS tests/test_status_bar_config.py:0 -> value", + "CALLS tests/test_stdlib_extractor.py:0 -> SupportedLanguage", + "CALLS tests/test_stdlib_extractor.py:0 -> TestCacheHelpers", + "CALLS tests/test_stdlib_extractor.py:0 -> TestCachePersistence", + "CALLS tests/test_stdlib_extractor.py:0 -> TestCachePersistenceErrorHandling", + "CALLS tests/test_stdlib_extractor.py:0 -> TestEdgeCases", + "CALLS tests/test_stdlib_extractor.py:0 -> TestGetStdlibCacheStats", + "CALLS tests/test_stdlib_extractor.py:0 -> TestGoExtractorWithMockedSubprocess", + "CALLS tests/test_stdlib_extractor.py:0 -> TestJavaExtractorWithMockedSubprocess", + "CALLS tests/test_stdlib_extractor.py:0 -> TestJsExtractorWithMockedNode", + "CALLS tests/test_stdlib_extractor.py:0 -> TestLuaExtractorWithMockedSubprocess", + "CALLS tests/test_stdlib_extractor.py:0 -> TestPythonExtractorEdgeCases", + "CALLS tests/test_stdlib_extractor.py:0 -> TestStdlibExtractorExtractModulePath", + "CALLS tests/test_stdlib_extractor.py:0 -> TestStdlibExtractorWithMockedSubprocesses", + "CALLS tests/test_stdlib_extractor.py:0 -> TestToolAvailability", + "CALLS tests/test_stdlib_extractor.py:0 -> extractor", + "CALLS tests/test_stdlib_extractor.py:0 -> extractor_with_registry", + "CALLS tests/test_stdlib_extractor.py:0 -> get", + "CALLS tests/test_stdlib_extractor.py:0 -> insert", + "CALLS tests/test_stdlib_extractor.py:0 -> mock_run", + "CALLS tests/test_stdlib_extractor.py:0 -> parent", + "CALLS tests/test_stdlib_extractor.py:0 -> reset_caches", + "CALLS tests/test_stdlib_extractor.py:0 -> run", + "CALLS tests/test_stdlib_extractor.py:0 -> stats", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cache_stdlib_result_creates_entry", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cache_ttl_expiration", + "CALLS tests/test_stdlib_extractor.py:0 -> test_clear_stdlib_cache", + "CALLS tests/test_stdlib_extractor.py:0 -> test_clear_stdlib_cache_handles_unlink_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cpp_non_std_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cpp_std_namespace_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_deeply_nested_path", + "CALLS tests/test_stdlib_extractor.py:0 -> test_empty_string", + "CALLS tests/test_stdlib_extractor.py:0 -> test_flush_stdlib_cache_calls_save", + "CALLS tests/test_stdlib_extractor.py:0 -> test_function_registry_entity_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_function_registry_none_handling", + "CALLS tests/test_stdlib_extractor.py:0 -> test_get_cached_stdlib_result_returns_cached_value", + "CALLS tests/test_stdlib_extractor.py:0 -> test_get_cached_stdlib_result_returns_none_for_missing", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_fallback_on_go_list_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_lowercase_entity_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_returns_package_on_successful_introspection", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_single_part_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_is_tool_available_caches_result", + "CALLS tests/test_stdlib_extractor.py:0 -> test_is_tool_available_returns_false_on_file_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_is_tool_available_returns_false_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_exception_suffix", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_builder_suffix", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_error_suffix", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_fallback_on_compile_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_fallback_on_file_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_uppercase_class", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_fallback_on_entity_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_fallback_on_json_decode_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_returns_module_on_successful_introspection", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_stdlib_lowercase_entity_without_node", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_stdlib_uppercase_entity_without_node", + "CALLS tests/test_stdlib_extractor.py:0 -> test_load_persistent_cache_handles_json_decode_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_load_persistent_cache_handles_missing_file", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_entity_in_stdlib_set", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_fallback_on_entity_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_fallback_on_lua_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_returns_module_on_successful_introspection", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_stdlib_module_in_set", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_stdlib_module_uppercase", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_entity_is_module_not_class", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_entity_not_found_in_module", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_lowercase_entity_with_import_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_single_part_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_stdlib_lowercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_stdlib_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_uppercase_entity_with_import_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_returns_correct_stats", + "CALLS tests/test_stdlib_extractor.py:0 -> test_returns_module_for_registered_class", + "CALLS tests/test_stdlib_extractor.py:0 -> test_returns_module_for_registered_function", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_deeply_nested", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_single_part_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_stdlib_all_uppercase", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_stdlib_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_save_and_load_persistent_cache", + "CALLS tests/test_stdlib_extractor.py:0 -> test_save_persistent_cache_handles_os_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_scala_lowercase_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_scala_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_single_part_path", + "CALLS tests/test_stdlib_extractor.py:0 -> test_ts_lowercase_strips_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_ts_uses_js_extraction_uppercase", + "CALLS tests/test_structural_relationships.py:0 -> complex_project", + "CALLS tests/test_structural_relationships.py:0 -> dependency_project", + "CALLS tests/test_structural_relationships.py:0 -> ensure_relationship_batch", + "CALLS tests/test_structural_relationships.py:0 -> export", + "CALLS tests/test_structural_relationships.py:0 -> graph_updater", + "CALLS tests/test_structural_relationships.py:0 -> index", + "CALLS tests/test_structural_relationships.py:0 -> main", + "CALLS tests/test_structural_relationships.py:0 -> mock_ingestor", + "CALLS tests/test_structural_relationships.py:0 -> name", + "CALLS tests/test_structural_relationships.py:0 -> relationships", + "CALLS tests/test_structural_relationships.py:0 -> repo_path", + "CALLS tests/test_structural_relationships.py:0 -> style", + "CALLS tests/test_structural_relationships.py:0 -> test_contains_file_relationships", + "CALLS tests/test_structural_relationships.py:0 -> test_contains_folder_relationships", + "CALLS tests/test_structural_relationships.py:0 -> test_contains_package_relationships", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_cargo_toml", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_composer_json", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_csproj", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_gemfile", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_go_mod", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_package_json", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_pyproject_toml", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_python_requirements", + "CALLS tests/test_structural_relationships.py:0 -> test_edge_cases_empty_folders_and_special_files", + "CALLS tests/test_structural_relationships.py:0 -> test_mixed_structure_and_dependencies", + "CALLS tests/test_structural_relationships.py:0 -> type", + "CALLS tests/test_structural_relationships.py:0 -> updater", + "CALLS tests/test_structural_relationships.py:0 -> value", + "CALLS tests/test_structure_processor.py:0 -> SupportedLanguage", + "CALLS tests/test_structure_processor.py:0 -> TestIdentifyStructure", + "CALLS tests/test_structure_processor.py:0 -> TestMultipleLanguages", + "CALLS tests/test_structure_processor.py:0 -> TestProcessGenericFile", + "CALLS tests/test_structure_processor.py:0 -> TestStructureProcessorSlots", + "CALLS tests/test_structure_processor.py:0 -> ensure_node_batch", + "CALLS tests/test_structure_processor.py:0 -> ensure_relationship_batch", + "CALLS tests/test_structure_processor.py:0 -> mock_ingestor", + "CALLS tests/test_structure_processor.py:0 -> mock_language_queries", + "CALLS tests/test_structure_processor.py:0 -> name", + "CALLS tests/test_structure_processor.py:0 -> parent", + "CALLS tests/test_structure_processor.py:0 -> processor", + "CALLS tests/test_structure_processor.py:0 -> repo_path", + "CALLS tests/test_structure_processor.py:0 -> structure_processor", + "CALLS tests/test_structure_processor.py:0 -> temp_repo", + "CALLS tests/test_structure_processor.py:0 -> test_directory_with_init_py_identified_as_package", + "CALLS tests/test_structure_processor.py:0 -> test_directory_without_init_py_identified_as_folder", + "CALLS tests/test_structure_processor.py:0 -> test_empty_repo_creates_no_nodes", + "CALLS tests/test_structure_processor.py:0 -> test_file_at_root", + "CALLS tests/test_structure_processor.py:0 -> test_file_extension_extracted", + "CALLS tests/test_structure_processor.py:0 -> test_file_in_folder", + "CALLS tests/test_structure_processor.py:0 -> test_file_in_package", + "CALLS tests/test_structure_processor.py:0 -> test_file_without_extension", + "CALLS tests/test_structure_processor.py:0 -> test_folder_inside_package", + "CALLS tests/test_structure_processor.py:0 -> test_folder_parent_relationship_to_project", + "CALLS tests/test_structure_processor.py:0 -> test_has_slots", + "CALLS tests/test_structure_processor.py:0 -> test_ignored_directories_are_skipped", + "CALLS tests/test_structure_processor.py:0 -> test_multiple_package_indicators", + "CALLS tests/test_structure_processor.py:0 -> test_nested_ignored_directory_skipped", + "CALLS tests/test_structure_processor.py:0 -> test_nested_package_parent_relationship", + "CALLS tests/test_structure_processor.py:0 -> test_nested_packages", + "CALLS tests/test_structure_processor.py:0 -> test_no_instance_dict", + "CALLS tests/test_structure_processor.py:0 -> test_package_inside_folder", + "CALLS tests/test_structure_processor.py:0 -> test_package_parent_relationship_to_project", + "CALLS tests/test_structure_processor.py:0 -> test_rejects_arbitrary_attribute", + "CALLS tests/test_structure_processor.py:0 -> test_slot_attributes_accessible", + "CALLS tests/test_structure_processor.py:0 -> test_structural_elements_populated", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> graph_updater", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> handler", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> main", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> mock_ingestor", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> name", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> relationships", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> repo_path", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> start", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> test_thenews_cross_file_method_calls_with_singleton_pattern", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> updater", + "CALLS tests/test_token_utils.py:0 -> TestCountTokens", + "CALLS tests/test_token_utils.py:0 -> TestTruncateResultsByTokens", + "CALLS tests/test_token_utils.py:0 -> name", + "CALLS tests/test_token_utils.py:0 -> test_empty_results", + "CALLS tests/test_token_utils.py:0 -> test_empty_string", + "CALLS tests/test_token_utils.py:0 -> test_longer_string_has_more_tokens", + "CALLS tests/test_token_utils.py:0 -> test_preserves_row_order", + "CALLS tests/test_token_utils.py:0 -> test_results_exceed_limit", + "CALLS tests/test_token_utils.py:0 -> test_results_within_limit", + "CALLS tests/test_token_utils.py:0 -> test_simple_string", + "CALLS tests/test_token_utils.py:0 -> test_single_large_row_still_included", + "CALLS tests/test_token_utils.py:0 -> test_token_count_accuracy", + "CALLS tests/test_trie_optimization.py:0 -> NodeType", + "CALLS tests/test_trie_optimization.py:0 -> TestTrieOptimization", + "CALLS tests/test_trie_optimization.py:0 -> call_processor", + "CALLS tests/test_trie_optimization.py:0 -> class_qn", + "CALLS tests/test_trie_optimization.py:0 -> factory", + "CALLS tests/test_trie_optimization.py:0 -> graph_updater", + "CALLS tests/test_trie_optimization.py:0 -> graph_updater_with_trie", + "CALLS tests/test_trie_optimization.py:0 -> method_qn", + "CALLS tests/test_trie_optimization.py:0 -> mock_ingestor", + "CALLS tests/test_trie_optimization.py:0 -> process", + "CALLS tests/test_trie_optimization.py:0 -> repo_path", + "CALLS tests/test_trie_optimization.py:0 -> test_function_registry_trie_basic_operations", + "CALLS tests/test_trie_optimization.py:0 -> test_function_resolution_with_trie", + "CALLS tests/test_trie_optimization.py:0 -> test_trie_compatibility_with_existing_code", + "CALLS tests/test_trie_optimization.py:0 -> test_trie_performance_optimization", + "CALLS tests/test_trie_optimization.py:0 -> test_trie_prefix_and_suffix_search", + "CALLS tests/test_trie_optimization.py:0 -> updater", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> FunctionRegistryTrie", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> RelationshipType", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> TestTruthinessDispatchResolution", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> execute_write", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> fetch_all", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> flush_all", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> graph_updater", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> parent", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> repo_path", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> test_bool_takes_precedence_over_len", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> test_boolean_operator_operand_dispatches_to_len", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> test_if_truthiness_dispatches_to_len", + "CALLS tests/test_ts_closure_containment.py:0 -> RelationshipType", + "CALLS tests/test_ts_closure_containment.py:0 -> export", + "CALLS tests/test_ts_closure_containment.py:0 -> mock_ingestor", + "CALLS tests/test_ts_closure_containment.py:0 -> parent", + "CALLS tests/test_ts_closure_containment.py:0 -> temp_repo", + "CALLS tests/test_ts_closure_containment.py:0 -> test_function_in_anonymous_callback_defined_by_callback", + "CALLS tests/test_ts_closure_containment.py:0 -> value", + "CALLS tests/test_type_inference_iterative.py:0 -> JavaTypeInferenceEngine", + "CALLS tests/test_type_inference_iterative.py:0 -> JsTypeInferenceEngine", + "CALLS tests/test_type_inference_iterative.py:0 -> LuaTypeInferenceEngine", + "CALLS tests/test_type_inference_iterative.py:0 -> PythonTypeInferenceEngine", + "CALLS tests/test_type_inference_iterative.py:0 -> SupportedLanguage", + "CALLS tests/test_type_inference_iterative.py:0 -> TestBuildJavaVariableTypeMap", + "CALLS tests/test_type_inference_iterative.py:0 -> TestBuildLocalVariableTypeMapDispatch", + "CALLS tests/test_type_inference_iterative.py:0 -> TestLazyPropertyInitialization", + "CALLS tests/test_type_inference_iterative.py:0 -> TestResolveClassName", + "CALLS tests/test_type_inference_iterative.py:0 -> _infer_type_from_expression", + "CALLS tests/test_type_inference_iterative.py:0 -> build_variable_type_map", + "CALLS tests/test_type_inference_iterative.py:0 -> child_by_field_name", + "CALLS tests/test_type_inference_iterative.py:0 -> children", + "CALLS tests/test_type_inference_iterative.py:0 -> engine", + "CALLS tests/test_type_inference_iterative.py:0 -> import_processor", + "CALLS tests/test_type_inference_iterative.py:0 -> index", + "CALLS tests/test_type_inference_iterative.py:0 -> java_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> js_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> lua_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> mock_function_registry", + "CALLS tests/test_type_inference_iterative.py:0 -> mock_import_processor", + "CALLS tests/test_type_inference_iterative.py:0 -> mock_node", + "CALLS tests/test_type_inference_iterative.py:0 -> name", + "CALLS tests/test_type_inference_iterative.py:0 -> python_type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> repo_path", + "CALLS tests/test_type_inference_iterative.py:0 -> test_analyze_self_assignments_handles_deep_tree_without_recursion_error", + "CALLS tests/test_type_inference_iterative.py:0 -> test_delegates_to_java_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_delegates_to_resolve_class_name_function", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_java_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_js_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_lua_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_python_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_ts_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_find_return_statements_handles_deep_tree_without_recursion_error", + "CALLS tests/test_type_inference_iterative.py:0 -> test_java_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_js_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_lua_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_python_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_returns_empty_dict_for_unsupported_language", + "CALLS tests/test_type_inference_iterative.py:0 -> test_returns_none_when_class_not_found", + "CALLS tests/test_type_inference_iterative.py:0 -> text", + "CALLS tests/test_type_inference_iterative.py:0 -> type_inference", + "CALLS tests/test_type_inference_iterative.py:0 -> value", + "CALLS tests/test_typescript_advanced_types.py:0 -> AppConfig", + "CALLS tests/test_typescript_advanced_types.py:0 -> add", + "CALLS tests/test_typescript_advanced_types.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_advanced_types.py:0 -> export", + "CALLS tests/test_typescript_advanced_types.py:0 -> get", + "CALLS tests/test_typescript_advanced_types.py:0 -> handler", + "CALLS tests/test_typescript_advanced_types.py:0 -> index", + "CALLS tests/test_typescript_advanced_types.py:0 -> infer", + "CALLS tests/test_typescript_advanced_types.py:0 -> items", + "CALLS tests/test_typescript_advanced_types.py:0 -> keys", + "CALLS tests/test_typescript_advanced_types.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_advanced_types.py:0 -> name", + "CALLS tests/test_typescript_advanced_types.py:0 -> process", + "CALLS tests/test_typescript_advanced_types.py:0 -> processor", + "CALLS tests/test_typescript_advanced_types.py:0 -> put", + "CALLS tests/test_typescript_advanced_types.py:0 -> read", + "CALLS tests/test_typescript_advanced_types.py:0 -> temp_repo", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_conditional_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_generic_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_template_literal_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_typescript_advanced_types_comprehensive", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_utility_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> type", + "CALLS tests/test_typescript_advanced_types.py:0 -> typescript_advanced_types_project", + "CALLS tests/test_typescript_advanced_types.py:0 -> value", + "CALLS tests/test_typescript_classes.py:0 -> add", + "CALLS tests/test_typescript_classes.py:0 -> done", + "CALLS tests/test_typescript_classes.py:0 -> engine", + "CALLS tests/test_typescript_classes.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_classes.py:0 -> export", + "CALLS tests/test_typescript_classes.py:0 -> factory", + "CALLS tests/test_typescript_classes.py:0 -> get", + "CALLS tests/test_typescript_classes.py:0 -> index", + "CALLS tests/test_typescript_classes.py:0 -> items", + "CALLS tests/test_typescript_classes.py:0 -> keys", + "CALLS tests/test_typescript_classes.py:0 -> metadata", + "CALLS tests/test_typescript_classes.py:0 -> method_calls", + "CALLS tests/test_typescript_classes.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_classes.py:0 -> name", + "CALLS tests/test_typescript_classes.py:0 -> operation", + "CALLS tests/test_typescript_classes.py:0 -> parent", + "CALLS tests/test_typescript_classes.py:0 -> process", + "CALLS tests/test_typescript_classes.py:0 -> processor", + "CALLS tests/test_typescript_classes.py:0 -> read", + "CALLS tests/test_typescript_classes.py:0 -> relationships", + "CALLS tests/test_typescript_classes.py:0 -> repo", + "CALLS tests/test_typescript_classes.py:0 -> save", + "CALLS tests/test_typescript_classes.py:0 -> start", + "CALLS tests/test_typescript_classes.py:0 -> status", + "CALLS tests/test_typescript_classes.py:0 -> temp_repo", + "CALLS tests/test_typescript_classes.py:0 -> test_abstract_classes", + "CALLS tests/test_typescript_classes.py:0 -> test_access_modifiers", + "CALLS tests/test_typescript_classes.py:0 -> test_parameter_properties", + "CALLS tests/test_typescript_classes.py:0 -> test_typescript_class_comprehensive", + "CALLS tests/test_typescript_classes.py:0 -> type", + "CALLS tests/test_typescript_classes.py:0 -> typescript_classes_project", + "CALLS tests/test_typescript_classes.py:0 -> value", + "CALLS tests/test_typescript_containment_oracle.py:0 -> Color", + "CALLS tests/test_typescript_containment_oracle.py:0 -> RelationshipType", + "CALLS tests/test_typescript_containment_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_typescript_containment_oracle.py:0 -> export", + "CALLS tests/test_typescript_containment_oracle.py:0 -> name", + "CALLS tests/test_typescript_containment_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_containment_edges", + "CALLS tests/test_typescript_containment_oracle.py:0 -> type", + "CALLS tests/test_typescript_containment_oracle.py:0 -> value", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> export", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> load", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> main", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> name", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> save", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> start", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> temp_repo", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> test_ts_singleton_pattern_cross_file_calls", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> ts_singleton_project", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> up", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> value", + "CALLS tests/test_typescript_declaration_files.py:0 -> NodeType", + "CALLS tests/test_typescript_declaration_files.py:0 -> description", + "CALLS tests/test_typescript_declaration_files.py:0 -> ensure_node_batch", + "CALLS tests/test_typescript_declaration_files.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_declaration_files.py:0 -> export", + "CALLS tests/test_typescript_declaration_files.py:0 -> handler", + "CALLS tests/test_typescript_declaration_files.py:0 -> index", + "CALLS tests/test_typescript_declaration_files.py:0 -> items", + "CALLS tests/test_typescript_declaration_files.py:0 -> keys", + "CALLS tests/test_typescript_declaration_files.py:0 -> main", + "CALLS tests/test_typescript_declaration_files.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_declaration_files.py:0 -> name", + "CALLS tests/test_typescript_declaration_files.py:0 -> nodes", + "CALLS tests/test_typescript_declaration_files.py:0 -> parse", + "CALLS tests/test_typescript_declaration_files.py:0 -> process", + "CALLS tests/test_typescript_declaration_files.py:0 -> processor", + "CALLS tests/test_typescript_declaration_files.py:0 -> put", + "CALLS tests/test_typescript_declaration_files.py:0 -> start", + "CALLS tests/test_typescript_declaration_files.py:0 -> stats", + "CALLS tests/test_typescript_declaration_files.py:0 -> status", + "CALLS tests/test_typescript_declaration_files.py:0 -> temp_repo", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_ambient_declarations", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_global_augmentations", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_module_declarations", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_typescript_declarations_comprehensive", + "CALLS tests/test_typescript_declaration_files.py:0 -> text", + "CALLS tests/test_typescript_declaration_files.py:0 -> type", + "CALLS tests/test_typescript_declaration_files.py:0 -> typescript_declarations_project", + "CALLS tests/test_typescript_declaration_files.py:0 -> value", + "CALLS tests/test_typescript_decorators.py:0 -> Dependency", + "CALLS tests/test_typescript_decorators.py:0 -> clear", + "CALLS tests/test_typescript_decorators.py:0 -> decorator", + "CALLS tests/test_typescript_decorators.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_decorators.py:0 -> execute", + "CALLS tests/test_typescript_decorators.py:0 -> export", + "CALLS tests/test_typescript_decorators.py:0 -> factory", + "CALLS tests/test_typescript_decorators.py:0 -> get", + "CALLS tests/test_typescript_decorators.py:0 -> index", + "CALLS tests/test_typescript_decorators.py:0 -> items", + "CALLS tests/test_typescript_decorators.py:0 -> keys", + "CALLS tests/test_typescript_decorators.py:0 -> metadata", + "CALLS tests/test_typescript_decorators.py:0 -> method_calls", + "CALLS tests/test_typescript_decorators.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_decorators.py:0 -> name", + "CALLS tests/test_typescript_decorators.py:0 -> operation", + "CALLS tests/test_typescript_decorators.py:0 -> process", + "CALLS tests/test_typescript_decorators.py:0 -> processor", + "CALLS tests/test_typescript_decorators.py:0 -> read", + "CALLS tests/test_typescript_decorators.py:0 -> relationships", + "CALLS tests/test_typescript_decorators.py:0 -> render", + "CALLS tests/test_typescript_decorators.py:0 -> save", + "CALLS tests/test_typescript_decorators.py:0 -> start", + "CALLS tests/test_typescript_decorators.py:0 -> temp_repo", + "CALLS tests/test_typescript_decorators.py:0 -> test_class_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_method_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_parameter_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_property_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_typescript_decorators_comprehensive", + "CALLS tests/test_typescript_decorators.py:0 -> type", + "CALLS tests/test_typescript_decorators.py:0 -> typescript_decorators_project", + "CALLS tests/test_typescript_decorators.py:0 -> value", + "CALLS tests/test_typescript_enums.py:0 -> Color", + "CALLS tests/test_typescript_enums.py:0 -> EventType", + "CALLS tests/test_typescript_enums.py:0 -> down", + "CALLS tests/test_typescript_enums.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_enums.py:0 -> export", + "CALLS tests/test_typescript_enums.py:0 -> get", + "CALLS tests/test_typescript_enums.py:0 -> handler", + "CALLS tests/test_typescript_enums.py:0 -> keys", + "CALLS tests/test_typescript_enums.py:0 -> load", + "CALLS tests/test_typescript_enums.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_enums.py:0 -> name", + "CALLS tests/test_typescript_enums.py:0 -> operation", + "CALLS tests/test_typescript_enums.py:0 -> process", + "CALLS tests/test_typescript_enums.py:0 -> put", + "CALLS tests/test_typescript_enums.py:0 -> status", + "CALLS tests/test_typescript_enums.py:0 -> style", + "CALLS tests/test_typescript_enums.py:0 -> submit", + "CALLS tests/test_typescript_enums.py:0 -> temp_repo", + "CALLS tests/test_typescript_enums.py:0 -> test_const_enums", + "CALLS tests/test_typescript_enums.py:0 -> test_enum_comprehensive", + "CALLS tests/test_typescript_enums.py:0 -> test_numeric_enums", + "CALLS tests/test_typescript_enums.py:0 -> test_string_enums", + "CALLS tests/test_typescript_enums.py:0 -> text", + "CALLS tests/test_typescript_enums.py:0 -> type", + "CALLS tests/test_typescript_enums.py:0 -> typescript_enums_project", + "CALLS tests/test_typescript_enums.py:0 -> up", + "CALLS tests/test_typescript_enums.py:0 -> value", + "CALLS tests/test_typescript_implements_edges.py:0 -> RelationshipType", + "CALLS tests/test_typescript_implements_edges.py:0 -> export", + "CALLS tests/test_typescript_implements_edges.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_implements_edges.py:0 -> temp_repo", + "CALLS tests/test_typescript_implements_edges.py:0 -> test_typescript_class_implements_edges", + "CALLS tests/test_typescript_implements_edges.py:0 -> value", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> RelationshipType", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> export", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> name", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_inheritance_edges", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> value", + "CALLS tests/test_typescript_namespace_qn.py:0 -> NodeLabel", + "CALLS tests/test_typescript_namespace_qn.py:0 -> export", + "CALLS tests/test_typescript_namespace_qn.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_namespace_qn.py:0 -> name", + "CALLS tests/test_typescript_namespace_qn.py:0 -> nodes", + "CALLS tests/test_typescript_namespace_qn.py:0 -> temp_repo", + "CALLS tests/test_typescript_namespace_qn.py:0 -> test_typescript_namespace_class_qn_includes_namespace", + "CALLS tests/test_typescript_namespace_qn.py:0 -> type", + "CALLS tests/test_typescript_namespace_qn.py:0 -> walk", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> NodeType", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> add", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> clear", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> ensure_node_batch", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> export", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> index", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> items", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> name", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> nodes", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> parse", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> process", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> start", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> status", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> style", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> temp_repo", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_module_patterns", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_namespace_declarations", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_namespace_merging", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_typescript_namespaces_comprehensive", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> typescript_namespaces_project", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> value", + "CALLS tests/test_typescript_span_oracle.py:0 -> Color", + "CALLS tests/test_typescript_span_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_typescript_span_oracle.py:0 -> export", + "CALLS tests/test_typescript_span_oracle.py:0 -> main", + "CALLS tests/test_typescript_span_oracle.py:0 -> name", + "CALLS tests/test_typescript_span_oracle.py:0 -> start", + "CALLS tests/test_typescript_span_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_node_spans", + "CALLS tests/test_typescript_span_oracle.py:0 -> type", + "CALLS tests/test_typescript_structure_oracle.py:0 -> Color", + "CALLS tests/test_typescript_structure_oracle.py:0 -> SupportedLanguage", + "CALLS tests/test_typescript_structure_oracle.py:0 -> export", + "CALLS tests/test_typescript_structure_oracle.py:0 -> name", + "CALLS tests/test_typescript_structure_oracle.py:0 -> nodes", + "CALLS tests/test_typescript_structure_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_typescript_structure", + "CALLS tests/test_typescript_structure_oracle.py:0 -> type", + "CALLS tests/test_typescript_types.py:0 -> add", + "CALLS tests/test_typescript_types.py:0 -> clear", + "CALLS tests/test_typescript_types.py:0 -> description", + "CALLS tests/test_typescript_types.py:0 -> ensure_node_batch", + "CALLS tests/test_typescript_types.py:0 -> ensure_relationship_batch", + "CALLS tests/test_typescript_types.py:0 -> export", + "CALLS tests/test_typescript_types.py:0 -> handler", + "CALLS tests/test_typescript_types.py:0 -> infer", + "CALLS tests/test_typescript_types.py:0 -> items", + "CALLS tests/test_typescript_types.py:0 -> keys", + "CALLS tests/test_typescript_types.py:0 -> mock_ingestor", + "CALLS tests/test_typescript_types.py:0 -> name", + "CALLS tests/test_typescript_types.py:0 -> read", + "CALLS tests/test_typescript_types.py:0 -> relationships", + "CALLS tests/test_typescript_types.py:0 -> save", + "CALLS tests/test_typescript_types.py:0 -> status", + "CALLS tests/test_typescript_types.py:0 -> submit", + "CALLS tests/test_typescript_types.py:0 -> temp_repo", + "CALLS tests/test_typescript_types.py:0 -> test_basic_type_annotations", + "CALLS tests/test_typescript_types.py:0 -> test_generic_types", + "CALLS tests/test_typescript_types.py:0 -> test_interfaces_and_type_aliases", + "CALLS tests/test_typescript_types.py:0 -> test_type_comprehensive", + "CALLS tests/test_typescript_types.py:0 -> test_utility_types", + "CALLS tests/test_typescript_types.py:0 -> type", + "CALLS tests/test_typescript_types.py:0 -> typescript_types_project", + "CALLS tests/test_typescript_types.py:0 -> value", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamAdvance", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamBuildTargetTokens", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamDone", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamGetCurrentOrigin", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamGetCurrentState", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamGetFinal", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamGetHyp", + "CALLS tests/test_unixcoder_unit.py:0 -> TestBeamInit", + "CALLS tests/test_unixcoder_unit.py:0 -> TestForwardAttentionMask", + "CALLS tests/test_unixcoder_unit.py:0 -> UniXcoder", + "CALLS tests/test_unixcoder_unit.py:0 -> fake_model", + "CALLS tests/test_unixcoder_unit.py:0 -> test_adds_current_state_if_empty_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_attention_mask_is_4d", + "CALLS tests/test_unixcoder_unit.py:0 -> test_builds_tokens_until_eos", + "CALLS tests/test_unixcoder_unit.py:0 -> test_constructs_hypothesis_path", + "CALLS tests/test_unixcoder_unit.py:0 -> test_done_when_eos_top_and_enough_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_first_step_uses_first_beam", + "CALLS tests/test_unixcoder_unit.py:0 -> test_handles_no_eos", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_empty_prevKs", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_finished_empty", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_nextYs_with_zeros", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_scores_to_zero", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_with_correct_size", + "CALLS tests/test_unixcoder_unit.py:0 -> test_marks_eos_in_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_not_done_initially", + "CALLS tests/test_unixcoder_unit.py:0 -> test_not_done_when_not_enough_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_not_done_when_not_eos_top", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_batch_shaped_tensor", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_finished_sorted_by_score", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_last_nextYs", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_last_prevKs", + "CALLS tests/test_unixcoder_unit.py:0 -> test_subsequent_steps_combine_scores", + "CALLS tests/test_vector_store.py:0 -> integration_client", + "CALLS tests/test_vector_store.py:0 -> mock_qdrant_client", + "CALLS tests/test_vector_store.py:0 -> reset_global_client", + "CALLS tests/test_vector_store.py:0 -> temp_qdrant_path", + "CALLS tests/test_vector_store.py:0 -> test_empty_search_returns_empty_list", + "CALLS tests/test_vector_store.py:0 -> test_get_qdrant_client_logs_and_reraises_on_lock_error", + "CALLS tests/test_vector_store.py:0 -> test_get_qdrant_client_uses_path_when_url_unset", + "CALLS tests/test_vector_store.py:0 -> test_get_qdrant_client_uses_url_when_set", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_calls_query_points", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_default_top_k", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_filters_null_payloads", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_handles_exception", + "CALLS tests/test_vector_store.py:0 -> test_store_and_search_roundtrip", + "CALLS tests/test_vector_store.py:0 -> test_store_embedding_calls_upsert", + "CALLS tests/test_vector_store.py:0 -> test_store_embedding_handles_exception", + "CALLS tests/test_vector_store.py:0 -> test_upsert_updates_existing", + "CALLS tests/test_vector_store_batch.py:0 -> TestDeleteProjectEmbeddings", + "CALLS tests/test_vector_store_batch.py:0 -> TestStoreEmbeddingBatch", + "CALLS tests/test_vector_store_batch.py:0 -> TestUpsertWithRetry", + "CALLS tests/test_vector_store_batch.py:0 -> TestVerifyStoredIds", + "CALLS tests/test_vector_store_batch.py:0 -> get_qdrant_client", + "CALLS tests/test_vector_store_batch.py:0 -> test_batches_large_id_sets", + "CALLS tests/test_vector_store_batch.py:0 -> test_builds_correct_point_structs", + "CALLS tests/test_vector_store_batch.py:0 -> test_deletes_given_ids", + "CALLS tests/test_vector_store_batch.py:0 -> test_exponential_backoff_delays", + "CALLS tests/test_vector_store_batch.py:0 -> test_handles_exception_gracefully", + "CALLS tests/test_vector_store_batch.py:0 -> test_noop_on_empty_ids", + "CALLS tests/test_vector_store_batch.py:0 -> test_raises_after_exhausting_retries", + "CALLS tests/test_vector_store_batch.py:0 -> test_raises_on_exception", + "CALLS tests/test_vector_store_batch.py:0 -> test_retries_on_failure_then_succeeds", + "CALLS tests/test_vector_store_batch.py:0 -> test_retrieve_called_with_correct_params", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_count_on_success", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_empty_for_empty_input", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_found_ids", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_zero_on_empty", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_zero_on_failure", + "CALLS tests/test_vector_store_batch.py:0 -> test_succeeds_on_first_attempt", + "CALLS tests/test_workspaces.py:0 -> TestCli", + "CALLS tests/test_workspaces.py:0 -> TestStorage", + "CALLS tests/test_workspaces.py:0 -> WorkspaceError", + "CALLS tests/test_workspaces.py:0 -> _run_graph_sync", + "CALLS tests/test_workspaces.py:0 -> _temp_home", + "CALLS tests/test_workspaces.py:0 -> _update_and_validate_models", + "CALLS tests/test_workspaces.py:0 -> add", + "CALLS tests/test_workspaces.py:0 -> agent", + "CALLS tests/test_workspaces.py:0 -> cli", + "CALLS tests/test_workspaces.py:0 -> connect_memgraph", + "CALLS tests/test_workspaces.py:0 -> description", + "CALLS tests/test_workspaces.py:0 -> main_single_query", + "CALLS tests/test_workspaces.py:0 -> mock_ingestor", + "CALLS tests/test_workspaces.py:0 -> mock_memgraph_connect", + "CALLS tests/test_workspaces.py:0 -> mock_validate_models", + "CALLS tests/test_workspaces.py:0 -> name", + "CALLS tests/test_workspaces.py:0 -> repo", + "CALLS tests/test_workspaces.py:0 -> runner", + "CALLS tests/test_workspaces.py:0 -> start", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_derives_project_name", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_duplicate", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_missing_path", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_with_explicit_project_name", + "CALLS tests/test_workspaces.py:0 -> test_create_duplicate_raises", + "CALLS tests/test_workspaces.py:0 -> test_create_then_load", + "CALLS tests/test_workspaces.py:0 -> test_create_with_force_overwrites", + "CALLS tests/test_workspaces.py:0 -> test_delete", + "CALLS tests/test_workspaces.py:0 -> test_delete_missing_raises", + "CALLS tests/test_workspaces.py:0 -> test_list_empty", + "CALLS tests/test_workspaces.py:0 -> test_list_sorted", + "CALLS tests/test_workspaces.py:0 -> test_load_missing_raises", + "CALLS tests/test_workspaces.py:0 -> test_remove_repo", + "CALLS tests/test_workspaces.py:0 -> test_remove_repo_not_in_workspace", + "CALLS tests/test_workspaces.py:0 -> test_start_with_unknown_workspace_errors", + "CALLS tests/test_workspaces.py:0 -> test_start_with_workspace_passes_all_projects", + "CALLS tests/test_workspaces.py:0 -> test_workspace_add_remove_repo_via_cli", + "CALLS tests/test_workspaces.py:0 -> test_workspace_create_list_show_delete", + "CALLS tests/test_workspaces.py:0 -> test_workspace_list_empty", + "CALLS tests/test_workspaces.py:0 -> test_workspace_model_project_names", + "CALLS tool_errors.py:0 -> export", + "CALLS tool_errors.py:0 -> operation", + "CALLS tool_errors.py:0 -> read", + "CALLS tool_errors.py:0 -> text", + "CALLS tool_errors.py:0 -> value", + "CALLS tool_errors.py:0 -> wrapper", + "CALLS tools/code_retrieval.py:0 -> AgenticToolName", + "CALLS tools/code_retrieval.py:0 -> CodeRetriever", + "CALLS tools/code_retrieval.py:0 -> QueryProtocol", + "CALLS tools/code_retrieval.py:0 -> create_code_retrieval_tool", + "CALLS tools/code_retrieval.py:0 -> description", + "CALLS tools/code_retrieval.py:0 -> fetch_all", + "CALLS tools/code_retrieval.py:0 -> get_code_snippet", + "CALLS tools/code_retrieval.py:0 -> logs", + "CALLS tools/code_retrieval.py:0 -> name", + "CALLS tools/code_retrieval.py:0 -> project_root", + "CALLS tools/code_retrieval.py:0 -> start", + "CALLS tools/codebase_query.py:0 -> AgenticToolName", + "CALLS tools/codebase_query.py:0 -> CypherGenerator", + "CALLS tools/codebase_query.py:0 -> LLMGenerationError", + "CALLS tools/codebase_query.py:0 -> QueryProtocol", + "CALLS tools/codebase_query.py:0 -> create_query_tool", + "CALLS tools/codebase_query.py:0 -> description", + "CALLS tools/codebase_query.py:0 -> fetch_all", + "CALLS tools/codebase_query.py:0 -> logs", + "CALLS tools/codebase_query.py:0 -> name", + "CALLS tools/codebase_query.py:0 -> query_codebase_knowledge_graph", + "CALLS tools/codebase_query.py:0 -> summary", + "CALLS tools/codebase_query.py:0 -> value", + "CALLS tools/directory_lister.py:0 -> AgenticToolName", + "CALLS tools/directory_lister.py:0 -> DirectoryLister", + "CALLS tools/directory_lister.py:0 -> create_directory_lister_tool", + "CALLS tools/directory_lister.py:0 -> description", + "CALLS tools/directory_lister.py:0 -> directory_lister", + "CALLS tools/directory_lister.py:0 -> list_directory_contents", + "CALLS tools/directory_lister.py:0 -> logs", + "CALLS tools/directory_lister.py:0 -> name", + "CALLS tools/directory_lister.py:0 -> project_root", + "CALLS tools/file_editor.py:0 -> AgenticToolName", + "CALLS tools/file_editor.py:0 -> FileAction", + "CALLS tools/file_editor.py:0 -> FileEditor", + "CALLS tools/file_editor.py:0 -> FunctionMatch", + "CALLS tools/file_editor.py:0 -> apply_patch_to_file", + "CALLS tools/file_editor.py:0 -> children", + "CALLS tools/file_editor.py:0 -> create_file_editor_tool", + "CALLS tools/file_editor.py:0 -> description", + "CALLS tools/file_editor.py:0 -> edit_file", + "CALLS tools/file_editor.py:0 -> file_editor", + "CALLS tools/file_editor.py:0 -> get_diff", + "CALLS tools/file_editor.py:0 -> logs", + "CALLS tools/file_editor.py:0 -> name", + "CALLS tools/file_editor.py:0 -> project_root", + "CALLS tools/file_editor.py:0 -> replace_code_surgically", + "CALLS tools/file_editor.py:0 -> text", + "CALLS tools/file_editor.py:0 -> type", + "CALLS tools/file_reader.py:0 -> AgenticToolName", + "CALLS tools/file_reader.py:0 -> FileReader", + "CALLS tools/file_reader.py:0 -> create_file_reader_tool", + "CALLS tools/file_reader.py:0 -> description", + "CALLS tools/file_reader.py:0 -> file_reader", + "CALLS tools/file_reader.py:0 -> logs", + "CALLS tools/file_reader.py:0 -> name", + "CALLS tools/file_reader.py:0 -> project_root", + "CALLS tools/file_reader.py:0 -> read_file_content", + "CALLS tools/file_writer.py:0 -> AgenticToolName", + "CALLS tools/file_writer.py:0 -> FileWriter", + "CALLS tools/file_writer.py:0 -> create_file_writer_tool", + "CALLS tools/file_writer.py:0 -> create_new_file", + "CALLS tools/file_writer.py:0 -> description", + "CALLS tools/file_writer.py:0 -> file_writer", + "CALLS tools/file_writer.py:0 -> logs", + "CALLS tools/file_writer.py:0 -> name", + "CALLS tools/file_writer.py:0 -> parent", + "CALLS tools/file_writer.py:0 -> project_root", + "CALLS tools/health_checker.py:0 -> HealthChecker", + "CALLS tools/health_checker.py:0 -> get_summary", + "CALLS tools/health_checker.py:0 -> name", + "CALLS tools/health_checker.py:0 -> run_all_checks", + "CALLS tools/health_checker.py:0 -> text", + "CALLS tools/health_checker.py:0 -> value", + "CALLS tools/language.py:0 -> Color", + "CALLS tools/language.py:0 -> add_grammar", + "CALLS tools/language.py:0 -> cleanup_orphaned_modules", + "CALLS tools/language.py:0 -> index", + "CALLS tools/language.py:0 -> list_languages", + "CALLS tools/language.py:0 -> name", + "CALLS tools/language.py:0 -> nodes", + "CALLS tools/language.py:0 -> remove_language", + "CALLS tools/language.py:0 -> text", + "CALLS tools/language.py:0 -> type", + "CALLS tools/language.py:0 -> value", + "CALLS tools/semantic_search.py:0 -> AgenticToolName", + "CALLS tools/semantic_search.py:0 -> create_get_function_source_tool", + "CALLS tools/semantic_search.py:0 -> create_semantic_search_tool", + "CALLS tools/semantic_search.py:0 -> description", + "CALLS tools/semantic_search.py:0 -> get_function_source_by_id", + "CALLS tools/semantic_search.py:0 -> graph_service", + "CALLS tools/semantic_search.py:0 -> logs", + "CALLS tools/semantic_search.py:0 -> name", + "CALLS tools/semantic_search.py:0 -> semantic_search_functions", + "CALLS tools/semantic_search.py:0 -> type", + "CALLS tools/shell_command.py:0 -> AgenticToolName", + "CALLS tools/shell_command.py:0 -> ShellCommander", + "CALLS tools/shell_command.py:0 -> async_timing_decorator", + "CALLS tools/shell_command.py:0 -> create_shell_command_tool", + "CALLS tools/shell_command.py:0 -> description", + "CALLS tools/shell_command.py:0 -> logs", + "CALLS tools/shell_command.py:0 -> metadata", + "CALLS tools/shell_command.py:0 -> name", + "CALLS tools/shell_command.py:0 -> parent", + "CALLS tools/shell_command.py:0 -> project_root", + "CALLS tools/shell_command.py:0 -> run_shell_command", + "CALLS tools/shell_command.py:0 -> shell_commander", + "CALLS tools/shell_command.py:0 -> start", + "CALLS tools/tool_descriptions.py:0 -> AgenticToolName", + "CALLS tools/tool_descriptions.py:0 -> MCPToolName", + "CALLS tools/tool_descriptions.py:0 -> agent", + "CALLS tools/tool_descriptions.py:0 -> analyze", + "CALLS tools/tool_descriptions.py:0 -> create_file", + "CALLS tools/tool_descriptions.py:0 -> delete_project", + "CALLS tools/tool_descriptions.py:0 -> get_code_snippet", + "CALLS tools/tool_descriptions.py:0 -> insert", + "CALLS tools/tool_descriptions.py:0 -> list_directory", + "CALLS tools/tool_descriptions.py:0 -> list_projects", + "CALLS tools/tool_descriptions.py:0 -> name", + "CALLS tools/tool_descriptions.py:0 -> nodes", + "CALLS tools/tool_descriptions.py:0 -> operation", + "CALLS tools/tool_descriptions.py:0 -> query_graph", + "CALLS tools/tool_descriptions.py:0 -> read", + "CALLS tools/tool_descriptions.py:0 -> read_file", + "CALLS tools/tool_descriptions.py:0 -> relationships", + "CALLS tools/tool_descriptions.py:0 -> run", + "CALLS tools/tool_descriptions.py:0 -> semantic_search", + "CALLS tools/tool_descriptions.py:0 -> start", + "CALLS tools/tool_descriptions.py:0 -> text", + "CALLS tools/tool_descriptions.py:0 -> update_repository", + "CALLS types_defs.py:0 -> ASTCacheProtocol", + "CALLS types_defs.py:0 -> BatchWrapper", + "CALLS types_defs.py:0 -> CancelledResult", + "CALLS types_defs.py:0 -> CgrignorePatterns", + "CALLS types_defs.py:0 -> CodeSnippetResultDict", + "CALLS types_defs.py:0 -> ColumnDescriptor", + "CALLS types_defs.py:0 -> ConfirmationToolNames", + "CALLS types_defs.py:0 -> CreateFileArgs", + "CALLS types_defs.py:0 -> CursorProtocol", + "CALLS types_defs.py:0 -> DeadCodeRow", + "CALLS types_defs.py:0 -> DeleteProjectErrorResult", + "CALLS types_defs.py:0 -> DeleteProjectSuccessResult", + "CALLS types_defs.py:0 -> EmbeddingQueryResult", + "CALLS types_defs.py:0 -> FunctionMatch", + "CALLS types_defs.py:0 -> FunctionNodeProps", + "CALLS types_defs.py:0 -> FunctionRegistryTrieProtocol", + "CALLS types_defs.py:0 -> GraphData", + "CALLS types_defs.py:0 -> GraphMetadata", + "CALLS types_defs.py:0 -> GraphSummary", + "CALLS types_defs.py:0 -> JavaAnnotationInfo", + "CALLS types_defs.py:0 -> JavaClassInfo", + "CALLS types_defs.py:0 -> JavaFieldInfo", + "CALLS types_defs.py:0 -> JavaMethodCallInfo", + "CALLS types_defs.py:0 -> JavaMethodInfo", + "CALLS types_defs.py:0 -> LanguageImport", + "CALLS types_defs.py:0 -> LanguageQueries", + "CALLS types_defs.py:0 -> LanguageSpec", + "CALLS types_defs.py:0 -> ListProjectsErrorResult", + "CALLS types_defs.py:0 -> ListProjectsSuccessResult", + "CALLS types_defs.py:0 -> LoadableProtocol", + "CALLS types_defs.py:0 -> MCPInputSchema", + "CALLS types_defs.py:0 -> MCPInputSchemaProperty", + "CALLS types_defs.py:0 -> MCPToolSchema", + "CALLS types_defs.py:0 -> ModelConfigKwargs", + "CALLS types_defs.py:0 -> NodeBatchRow", + "CALLS types_defs.py:0 -> NodeData", + "CALLS types_defs.py:0 -> NodeLabel", + "CALLS types_defs.py:0 -> NodeType", + "CALLS types_defs.py:0 -> PathValidatorProtocol", + "CALLS types_defs.py:0 -> QueryJsonOutput", + "CALLS types_defs.py:0 -> QueryResultDict", + "CALLS types_defs.py:0 -> RawToolArgs", + "CALLS types_defs.py:0 -> RelBatchRow", + "CALLS types_defs.py:0 -> RelationshipData", + "CALLS types_defs.py:0 -> RelationshipType", + "CALLS types_defs.py:0 -> ReplaceCodeArgs", + "CALLS types_defs.py:0 -> SemanticSearchResult", + "CALLS types_defs.py:0 -> ShellCommandArgs", + "CALLS types_defs.py:0 -> SupportedLanguage", + "CALLS types_defs.py:0 -> ToolNames", + "CALLS types_defs.py:0 -> TreeSitterNodeProtocol", + "CALLS types_defs.py:0 -> _ensure_loaded", + "CALLS types_defs.py:0 -> callable_params", + "CALLS types_defs.py:0 -> children", + "CALLS types_defs.py:0 -> close", + "CALLS types_defs.py:0 -> create_file", + "CALLS types_defs.py:0 -> description", + "CALLS types_defs.py:0 -> edit_file", + "CALLS types_defs.py:0 -> execute", + "CALLS types_defs.py:0 -> fetchall", + "CALLS types_defs.py:0 -> find_ending_with", + "CALLS types_defs.py:0 -> find_with_prefix", + "CALLS types_defs.py:0 -> get", + "CALLS types_defs.py:0 -> is_abstract", + "CALLS types_defs.py:0 -> is_property", + "CALLS types_defs.py:0 -> items", + "CALLS types_defs.py:0 -> keys", + "CALLS types_defs.py:0 -> mark_abstract", + "CALLS types_defs.py:0 -> mark_callable_params", + "CALLS types_defs.py:0 -> mark_property", + "CALLS types_defs.py:0 -> metadata", + "CALLS types_defs.py:0 -> name", + "CALLS types_defs.py:0 -> nodes", + "CALLS types_defs.py:0 -> project_root", + "CALLS types_defs.py:0 -> property_names", + "CALLS types_defs.py:0 -> query_graph", + "CALLS types_defs.py:0 -> read_file", + "CALLS types_defs.py:0 -> register_unique_qn", + "CALLS types_defs.py:0 -> relationships", + "CALLS types_defs.py:0 -> semantic_search", + "CALLS types_defs.py:0 -> summary", + "CALLS types_defs.py:0 -> text", + "CALLS types_defs.py:0 -> type", + "CALLS types_defs.py:0 -> value", + "CALLS types_defs.py:0 -> variants", + "CALLS unixcoder.py:0 -> UniXcoder", + "CALLS unixcoder.py:0 -> UniXcoderMode", + "CALLS unixcoder.py:0 -> dim", + "CALLS unixcoder.py:0 -> forward", + "CALLS unixcoder.py:0 -> generate", + "CALLS unixcoder.py:0 -> text", + "CALLS utils/dependencies.py:0 -> check_dependencies", + "CALLS utils/dependencies.py:0 -> get_missing_dependencies", + "CALLS utils/dependencies.py:0 -> has_semantic_dependencies", + "CALLS utils/fqn_resolver.py:0 -> FQNSpec", + "CALLS utils/fqn_resolver.py:0 -> children", + "CALLS utils/fqn_resolver.py:0 -> extract_function_fqns", + "CALLS utils/fqn_resolver.py:0 -> find_function_source_by_fqn", + "CALLS utils/fqn_resolver.py:0 -> logs", + "CALLS utils/fqn_resolver.py:0 -> parent", + "CALLS utils/fqn_resolver.py:0 -> type", + "CALLS utils/path_utils.py:0 -> cached_resolve_posix", + "CALLS utils/path_utils.py:0 -> derive_project_name", + "CALLS utils/path_utils.py:0 -> name", + "CALLS utils/path_utils.py:0 -> parent", + "CALLS utils/path_utils.py:0 -> rel_path", + "CALLS utils/path_utils.py:0 -> repo", + "CALLS utils/path_utils.py:0 -> repo_path", + "CALLS utils/path_utils.py:0 -> resolve_repo_path", + "CALLS utils/path_utils.py:0 -> should_skip_path", + "CALLS utils/path_utils.py:0 -> should_skip_rel_file", + "CALLS utils/rich_markdown.py:0 -> LeftAlignedHeading", + "CALLS utils/rich_markdown.py:0 -> LeftAlignedMarkdown", + "CALLS utils/rich_markdown.py:0 -> style", + "CALLS utils/rich_markdown.py:0 -> text", + "CALLS utils/rich_markdown.py:0 -> type", + "CALLS utils/source_extraction.py:0 -> extract_source_with_fallback", + "CALLS utils/source_extraction.py:0 -> logs", + "CALLS utils/source_extraction.py:0 -> name", + "CALLS utils/source_extraction.py:0 -> start", + "CALLS utils/source_extraction.py:0 -> text", + "CALLS utils/source_extraction.py:0 -> validate_source_location", + "CALLS utils/token_utils.py:0 -> logs", + "CALLS utils/token_utils.py:0 -> text", + "CALLS utils/token_utils.py:0 -> truncate_results_by_tokens", + "CALLS vector_store.py:0 -> close_qdrant_client", + "CALLS vector_store.py:0 -> delete_project_embeddings", + "CALLS vector_store.py:0 -> logs", + "CALLS vector_store.py:0 -> search_embeddings", + "CALLS vector_store.py:0 -> store_embedding", + "CALLS vector_store.py:0 -> verify_stored_ids", + "CALLS workspaces/__init__.py:0 -> WorkspaceConfig", + "CALLS workspaces/__init__.py:0 -> WorkspaceError", + "CALLS workspaces/__init__.py:0 -> WorkspaceRepo", + "CALLS workspaces/__init__.py:0 -> add_repo", + "CALLS workspaces/__init__.py:0 -> create_workspace", + "CALLS workspaces/__init__.py:0 -> delete_workspace", + "CALLS workspaces/__init__.py:0 -> list_workspaces", + "CALLS workspaces/__init__.py:0 -> load_workspace", + "CALLS workspaces/__init__.py:0 -> remove_repo", + "CALLS workspaces/__init__.py:0 -> save_workspace", + "CALLS workspaces/__init__.py:0 -> workspace_path", + "CALLS workspaces/__init__.py:0 -> workspaces_dir", + "CALLS workspaces/cli.py:0 -> WorkspaceError", + "CALLS workspaces/cli.py:0 -> add", + "CALLS workspaces/cli.py:0 -> add_repo_cmd", + "CALLS workspaces/cli.py:0 -> cli", + "CALLS workspaces/cli.py:0 -> create_cmd", + "CALLS workspaces/cli.py:0 -> delete_cmd", + "CALLS workspaces/cli.py:0 -> description", + "CALLS workspaces/cli.py:0 -> list_cmd", + "CALLS workspaces/cli.py:0 -> name", + "CALLS workspaces/cli.py:0 -> remove_repo_cmd", + "CALLS workspaces/cli.py:0 -> repo", + "CALLS workspaces/cli.py:0 -> repo_path", + "CALLS workspaces/cli.py:0 -> show_cmd", + "CALLS workspaces/constants.py:0 -> name", + "CALLS workspaces/constants.py:0 -> operation", + "CALLS workspaces/constants.py:0 -> repo", + "CALLS workspaces/models.py:0 -> WorkspaceConfig", + "CALLS workspaces/models.py:0 -> WorkspaceRepo", + "CALLS workspaces/models.py:0 -> description", + "CALLS workspaces/models.py:0 -> find_repo", + "CALLS workspaces/models.py:0 -> name", + "CALLS workspaces/models.py:0 -> project_names", + "CALLS workspaces/models.py:0 -> repo", + "CALLS workspaces/storage.py:0 -> add_repo", + "CALLS workspaces/storage.py:0 -> create_workspace", + "CALLS workspaces/storage.py:0 -> delete_workspace", + "CALLS workspaces/storage.py:0 -> description", + "CALLS workspaces/storage.py:0 -> list_workspaces", + "CALLS workspaces/storage.py:0 -> name", + "CALLS workspaces/storage.py:0 -> parent", + "CALLS workspaces/storage.py:0 -> remove_repo", + "CALLS workspaces/storage.py:0 -> repo", + "CALLS workspaces/storage.py:0 -> repo_path" + ] + }, + "retrieval:grep_call": { + "missing": [], + "extra": [ + "CALLS cgr_state.py:0 -> read_sync_timestamps", + "CALLS cgr_state.py:0 -> record_sync", + "CALLS cli.py:0 -> _global_options", + "CALLS cli.py:0 -> _sync_workspace", + "CALLS cli.py:0 -> _version_callback", + "CALLS cli.py:0 -> daemon_command", + "CALLS cli.py:0 -> dead_code", + "CALLS cli.py:0 -> doctor", + "CALLS cli.py:0 -> export", + "CALLS cli.py:0 -> graph_loader_command", + "CALLS cli.py:0 -> index", + "CALLS cli.py:0 -> language_command", + "CALLS cli.py:0 -> mcp_server", + "CALLS cli.py:0 -> optimize", + "CALLS cli.py:0 -> start", + "CALLS cli.py:0 -> stats", + "CALLS cli.py:0 -> status_command", + "CALLS cli.py:0 -> stop_command", + "CALLS cli.py:0 -> workspace_command", + "CALLS cli_help.py:0 -> CLICommandName", + "CALLS cli_help.py:0 -> derive_project_name", + "CALLS cli_help.py:0 -> index", + "CALLS cli_help.py:0 -> optimize", + "CALLS cli_help.py:0 -> repo", + "CALLS config.py:0 -> ApiKeyInfoEntry", + "CALLS config.py:0 -> active_cypher_config", + "CALLS config.py:0 -> active_orchestrator_config", + "CALLS config.py:0 -> load_cgr_instructions", + "CALLS config.py:0 -> load_cgrignore_patterns", + "CALLS config.py:0 -> ollama_endpoint", + "CALLS config.py:0 -> parse_model_string", + "CALLS config.py:0 -> resolve_batch_size", + "CALLS config.py:0 -> set_cypher", + "CALLS config.py:0 -> set_orchestrator", + "CALLS config.py:0 -> to_update_kwargs", + "CALLS config.py:0 -> validate_api_key", + "CALLS constants.py:0 -> Architecture", + "CALLS constants.py:0 -> Color", + "CALLS constants.py:0 -> CppFrontend", + "CALLS constants.py:0 -> CppNodeType", + "CALLS constants.py:0 -> DeadCodeFormat", + "CALLS constants.py:0 -> EventType", + "CALLS constants.py:0 -> FileAction", + "CALLS constants.py:0 -> GoogleProviderType", + "CALLS constants.py:0 -> KeyBinding", + "CALLS constants.py:0 -> LanguageStatus", + "CALLS constants.py:0 -> MCPEnvVar", + "CALLS constants.py:0 -> MCPParamName", + "CALLS constants.py:0 -> MCPSchemaField", + "CALLS constants.py:0 -> MCPSchemaType", + "CALLS constants.py:0 -> MCPToolName", + "CALLS constants.py:0 -> MCPTransport", + "CALLS constants.py:0 -> ModelRole", + "CALLS constants.py:0 -> NodeLabel", + "CALLS constants.py:0 -> PermissionMode", + "CALLS constants.py:0 -> Provider", + "CALLS constants.py:0 -> QueryFormat", + "CALLS constants.py:0 -> RelationshipType", + "CALLS constants.py:0 -> StyleModifier", + "CALLS constants.py:0 -> SupportedLanguage", + "CALLS constants.py:0 -> TreeSitterModule", + "CALLS constants.py:0 -> UniXcoderMode", + "CALLS constants.py:0 -> UniqueKeyType", + "CALLS constants.py:0 -> name", + "CALLS cypher_queries.py:0 -> build_constraint_query", + "CALLS cypher_queries.py:0 -> build_create_node_query", + "CALLS cypher_queries.py:0 -> build_create_relationship_query", + "CALLS cypher_queries.py:0 -> build_dead_code_query", + "CALLS cypher_queries.py:0 -> build_index_query", + "CALLS cypher_queries.py:0 -> build_merge_node_query", + "CALLS cypher_queries.py:0 -> build_merge_relationship_query", + "CALLS cypher_queries.py:0 -> build_nodes_by_ids_query", + "CALLS cypher_queries.py:0 -> type", + "CALLS cypher_queries.py:0 -> wrap_with_unwind", + "CALLS decorators.py:0 -> decorator", + "CALLS decorators.py:0 -> wrapper", + "CALLS embedder.py:0 -> clear_embedding_cache", + "CALLS embedder.py:0 -> embed_code", + "CALLS embedder.py:0 -> embed_code_batch", + "CALLS embedder.py:0 -> save", + "CALLS exceptions.py:0 -> LLMGenerationError", + "CALLS graph_loader.py:0 -> _ensure_loaded", + "CALLS graph_loader.py:0 -> find_node_by_property", + "CALLS graph_loader.py:0 -> find_nodes_by_label", + "CALLS graph_loader.py:0 -> get_node_by_id", + "CALLS graph_loader.py:0 -> get_relationships_for_node", + "CALLS graph_loader.py:0 -> load_graph", + "CALLS graph_loader.py:0 -> metadata", + "CALLS graph_loader.py:0 -> nodes", + "CALLS graph_loader.py:0 -> relationships", + "CALLS graph_loader.py:0 -> summary", + "CALLS graph_updater.py:0 -> ast_extractor_func", + "CALLS graph_updater.py:0 -> callable_params", + "CALLS graph_updater.py:0 -> find_ending_with", + "CALLS graph_updater.py:0 -> find_with_prefix", + "CALLS graph_updater.py:0 -> find_with_prefix_and_suffix", + "CALLS graph_updater.py:0 -> is_abstract", + "CALLS graph_updater.py:0 -> is_property", + "CALLS graph_updater.py:0 -> mark_abstract", + "CALLS graph_updater.py:0 -> mark_callable_params", + "CALLS graph_updater.py:0 -> mark_property", + "CALLS graph_updater.py:0 -> property_names", + "CALLS graph_updater.py:0 -> register_unique_qn", + "CALLS graph_updater.py:0 -> run", + "CALLS graph_updater.py:0 -> variants", + "CALLS language_spec.py:0 -> _c_get_name", + "CALLS language_spec.py:0 -> _cpp_get_name", + "CALLS language_spec.py:0 -> _generic_file_to_module", + "CALLS language_spec.py:0 -> _js_file_to_module", + "CALLS language_spec.py:0 -> _js_get_name", + "CALLS language_spec.py:0 -> _php_file_to_module", + "CALLS language_spec.py:0 -> _python_file_to_module", + "CALLS language_spec.py:0 -> _python_get_name", + "CALLS language_spec.py:0 -> _rust_file_to_module", + "CALLS language_spec.py:0 -> _rust_get_name", + "CALLS language_spec.py:0 -> get_language_for_extension", + "CALLS language_spec.py:0 -> get_language_spec", + "CALLS logs.py:0 -> logs", + "CALLS logs.py:0 -> name", + "CALLS logs.py:0 -> relationships", + "CALLS logs.py:0 -> type", + "CALLS main.py:0 -> _interrupt", + "CALLS main.py:0 -> _rich_log_sink", + "CALLS main.py:0 -> _toggle", + "CALLS main.py:0 -> export_graph_to_file", + "CALLS main.py:0 -> get_multiline_input", + "CALLS main.py:0 -> keyboard_interrupt", + "CALLS main.py:0 -> main_async", + "CALLS main.py:0 -> main_optimize_async", + "CALLS main.py:0 -> main_single_query", + "CALLS main.py:0 -> new_line", + "CALLS main.py:0 -> on_input", + "CALLS main.py:0 -> prompt_for_unignored_directories", + "CALLS main.py:0 -> submit", + "CALLS main.py:0 -> submit_ctrl_e", + "CALLS main.py:0 -> toggle_permission_mode", + "CALLS mcp/client.py:0 -> main", + "CALLS mcp/server.py:0 -> lifespan", + "CALLS mcp/server.py:0 -> serve_http", + "CALLS mcp/tools.py:0 -> _delete_project_sync", + "CALLS mcp/tools.py:0 -> _index_repository_sync", + "CALLS mcp/tools.py:0 -> _update_repository_sync", + "CALLS mcp/tools.py:0 -> ask_agent", + "CALLS mcp/tools.py:0 -> create_mcp_tools_registry", + "CALLS mcp/tools.py:0 -> get_code_snippet", + "CALLS mcp/tools.py:0 -> get_tool_handler", + "CALLS mcp/tools.py:0 -> get_tool_schemas", + "CALLS mcp/tools.py:0 -> index_repository", + "CALLS mcp/tools.py:0 -> list_directory", + "CALLS mcp/tools.py:0 -> query_code_graph", + "CALLS mcp/tools.py:0 -> rag_agent", + "CALLS mcp/tools.py:0 -> read_file", + "CALLS mcp/tools.py:0 -> semantic_search", + "CALLS mcp/tools.py:0 -> surgical_replace_code", + "CALLS mcp/tools.py:0 -> update_repository", + "CALLS mcp/tools.py:0 -> wipe_database", + "CALLS mcp/tools.py:0 -> write_file", + "CALLS models.py:0 -> FQNSpec", + "CALLS models.py:0 -> _default_console", + "CALLS models.py:0 -> cycle_permission_mode", + "CALLS models.py:0 -> is_yolo", + "CALLS models.py:0 -> reset_cancelled", + "CALLS parser_loader.py:0 -> load_parsers", + "CALLS parsers/call_processor.py:0 -> FQNSpec", + "CALLS parsers/call_processor.py:0 -> collect_callable_field_bindings", + "CALLS parsers/call_processor.py:0 -> finalize_callable_param_flow", + "CALLS parsers/call_processor.py:0 -> name", + "CALLS parsers/call_processor.py:0 -> process_calls_in_file", + "CALLS parsers/call_resolver.py:0 -> _calculate_import_distance", + "CALLS parsers/call_resolver.py:0 -> _get_separator", + "CALLS parsers/call_resolver.py:0 -> _has_separator", + "CALLS parsers/call_resolver.py:0 -> callable_field_targets", + "CALLS parsers/call_resolver.py:0 -> export", + "CALLS parsers/call_resolver.py:0 -> operator_dunder_targets", + "CALLS parsers/call_resolver.py:0 -> protocol_dispatch_targets", + "CALLS parsers/call_resolver.py:0 -> record_callable_field_binding", + "CALLS parsers/call_resolver.py:0 -> resolve_builtin_call", + "CALLS parsers/call_resolver.py:0 -> resolve_cpp_operator_call", + "CALLS parsers/call_resolver.py:0 -> resolve_function_call", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> find_cpp_exported_classes", + "CALLS parsers/class_ingest/cpp_modules.py:0 -> ingest_cpp_module_declarations", + "CALLS parsers/class_ingest/identity.py:0 -> resolve_class_identity", + "CALLS parsers/class_ingest/method_override.py:0 -> process_all_method_overrides", + "CALLS parsers/class_ingest/mixin.py:0 -> _extract_cpp_base_class_name", + "CALLS parsers/class_ingest/mixin.py:0 -> _get_node_type_for_inheritance", + "CALLS parsers/class_ingest/mixin.py:0 -> _ingest_classes_and_methods", + "CALLS parsers/class_ingest/mixin.py:0 -> _ingest_cpp_module_declarations", + "CALLS parsers/class_ingest/node_type.py:0 -> determine_node_type", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> extract_implemented_interfaces", + "CALLS parsers/class_ingest/parent_extraction.py:0 -> extract_parent_classes", + "CALLS parsers/class_ingest/relationships.py:0 -> create_class_relationships", + "CALLS parsers/class_ingest/utils.py:0 -> decode_node_stripped", + "CALLS parsers/class_ingest/utils.py:0 -> find_child_by_type", + "CALLS parsers/cpp/utils.py:0 -> build_qualified_name", + "CALLS parsers/cpp/utils.py:0 -> extract_class_name_from_out_of_class_method", + "CALLS parsers/cpp/utils.py:0 -> extract_exported_class_name", + "CALLS parsers/cpp/utils.py:0 -> is_exported", + "CALLS parsers/cpp/utils.py:0 -> is_out_of_class_method_definition", + "CALLS parsers/cpp_frontend/frontend.py:0 -> cpp_frontend_available", + "CALLS parsers/cpp_frontend/frontend.py:0 -> find_compile_commands", + "CALLS parsers/cpp_frontend/frontend.py:0 -> repo", + "CALLS parsers/cpp_frontend/frontend.py:0 -> run_cpp_frontend", + "CALLS parsers/cpp_frontend/qn.py:0 -> function_qn", + "CALLS parsers/cpp_frontend/qn.py:0 -> method_qn", + "CALLS parsers/cpp_frontend/qn.py:0 -> type_qn", + "CALLS parsers/definition_processor.py:0 -> DefinitionProcessor", + "CALLS parsers/definition_processor.py:0 -> _extract_decorators", + "CALLS parsers/definition_processor.py:0 -> _get_docstring", + "CALLS parsers/definition_processor.py:0 -> process_dependencies", + "CALLS parsers/definition_processor.py:0 -> process_file", + "CALLS parsers/dependency_parser.py:0 -> parse_dependencies", + "CALLS parsers/factory.py:0 -> call_processor", + "CALLS parsers/factory.py:0 -> definition_processor", + "CALLS parsers/factory.py:0 -> import_processor", + "CALLS parsers/factory.py:0 -> structure_processor", + "CALLS parsers/factory.py:0 -> type_inference", + "CALLS parsers/function_ingest.py:0 -> _ingest_all_functions", + "CALLS parsers/function_ingest.py:0 -> resolve_deferred_cpp_methods", + "CALLS parsers/function_ingest.py:0 -> resolve_deferred_go_methods", + "CALLS parsers/go/utils.py:0 -> extract_receiver_type_name", + "CALLS parsers/go/utils.py:0 -> is_receiver_method", + "CALLS parsers/handlers/base.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/base.py:0 -> build_method_qualified_name", + "CALLS parsers/handlers/base.py:0 -> build_nested_function_qn", + "CALLS parsers/handlers/base.py:0 -> extract_base_class_name", + "CALLS parsers/handlers/base.py:0 -> extract_decorators", + "CALLS parsers/handlers/base.py:0 -> extract_function_name", + "CALLS parsers/handlers/base.py:0 -> extract_impl_target", + "CALLS parsers/handlers/base.py:0 -> is_class_method", + "CALLS parsers/handlers/base.py:0 -> is_export_inside_function", + "CALLS parsers/handlers/base.py:0 -> is_function_exported", + "CALLS parsers/handlers/base.py:0 -> is_inside_method_with_object_literals", + "CALLS parsers/handlers/base.py:0 -> should_process_as_impl_block", + "CALLS parsers/handlers/cpp.py:0 -> CppHandler", + "CALLS parsers/handlers/cpp.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/cpp.py:0 -> extract_base_class_name", + "CALLS parsers/handlers/cpp.py:0 -> is_function_exported", + "CALLS parsers/handlers/java.py:0 -> JavaHandler", + "CALLS parsers/handlers/java.py:0 -> build_method_qualified_name", + "CALLS parsers/handlers/java.py:0 -> extract_decorators", + "CALLS parsers/handlers/js_ts.py:0 -> JsTsHandler", + "CALLS parsers/handlers/js_ts.py:0 -> build_nested_function_qn", + "CALLS parsers/handlers/js_ts.py:0 -> extract_decorators", + "CALLS parsers/handlers/js_ts.py:0 -> is_class_method", + "CALLS parsers/handlers/js_ts.py:0 -> is_export_inside_function", + "CALLS parsers/handlers/lua.py:0 -> LuaHandler", + "CALLS parsers/handlers/lua.py:0 -> extract_function_name", + "CALLS parsers/handlers/php.py:0 -> PhpHandler", + "CALLS parsers/handlers/php.py:0 -> extract_decorators", + "CALLS parsers/handlers/php.py:0 -> extract_function_name", + "CALLS parsers/handlers/php.py:0 -> is_class_method", + "CALLS parsers/handlers/php.py:0 -> is_function_exported", + "CALLS parsers/handlers/protocol.py:0 -> LanguageHandler", + "CALLS parsers/handlers/protocol.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/protocol.py:0 -> build_method_qualified_name", + "CALLS parsers/handlers/protocol.py:0 -> build_nested_function_qn", + "CALLS parsers/handlers/protocol.py:0 -> extract_base_class_name", + "CALLS parsers/handlers/protocol.py:0 -> extract_decorators", + "CALLS parsers/handlers/protocol.py:0 -> extract_function_name", + "CALLS parsers/handlers/protocol.py:0 -> extract_impl_target", + "CALLS parsers/handlers/protocol.py:0 -> is_class_method", + "CALLS parsers/handlers/protocol.py:0 -> is_export_inside_function", + "CALLS parsers/handlers/protocol.py:0 -> is_function_exported", + "CALLS parsers/handlers/protocol.py:0 -> is_inside_method_with_object_literals", + "CALLS parsers/handlers/protocol.py:0 -> should_process_as_impl_block", + "CALLS parsers/handlers/python.py:0 -> PythonHandler", + "CALLS parsers/handlers/python.py:0 -> extract_decorators", + "CALLS parsers/handlers/registry.py:0 -> get_handler", + "CALLS parsers/handlers/rust.py:0 -> RustHandler", + "CALLS parsers/handlers/rust.py:0 -> build_function_qualified_name", + "CALLS parsers/handlers/rust.py:0 -> extract_decorators", + "CALLS parsers/handlers/rust.py:0 -> should_process_as_impl_block", + "CALLS parsers/import_processor.py:0 -> name", + "CALLS parsers/import_processor.py:0 -> nodes", + "CALLS parsers/import_processor.py:0 -> parse_imports", + "CALLS parsers/java/method_resolver.py:0 -> _do_resolve_java_method_call", + "CALLS parsers/java/method_resolver.py:0 -> _resolve_java_method_return_type", + "CALLS parsers/java/method_resolver.py:0 -> start", + "CALLS parsers/java/type_inference.py:0 -> JavaTypeInferenceEngine", + "CALLS parsers/java/type_inference.py:0 -> _find_containing_java_class", + "CALLS parsers/java/type_inference.py:0 -> build_variable_type_map", + "CALLS parsers/java/type_inference.py:0 -> resolve_java_method_call", + "CALLS parsers/java/type_resolver.py:0 -> _find_registry_entries_under", + "CALLS parsers/java/type_resolver.py:0 -> _get_current_class_name", + "CALLS parsers/java/type_resolver.py:0 -> _get_implemented_interfaces", + "CALLS parsers/java/type_resolver.py:0 -> _get_superclass_name", + "CALLS parsers/java/type_resolver.py:0 -> _rank_module_candidates", + "CALLS parsers/java/utils.py:0 -> build_qualified_name", + "CALLS parsers/java/utils.py:0 -> extract_annotation_info", + "CALLS parsers/java/utils.py:0 -> extract_class_info", + "CALLS parsers/java/utils.py:0 -> extract_field_info", + "CALLS parsers/java/utils.py:0 -> extract_import_path", + "CALLS parsers/java/utils.py:0 -> extract_method_call_info", + "CALLS parsers/java/utils.py:0 -> extract_method_info", + "CALLS parsers/java/utils.py:0 -> extract_package_name", + "CALLS parsers/java/utils.py:0 -> find_package_start_index", + "CALLS parsers/java/utils.py:0 -> get_class_context_from_qn", + "CALLS parsers/java/utils.py:0 -> get_java_visibility", + "CALLS parsers/java/utils.py:0 -> is_main_method", + "CALLS parsers/java/variable_analyzer.py:0 -> _collect_all_variable_types", + "CALLS parsers/java/variable_analyzer.py:0 -> _find_field_type_in_class", + "CALLS parsers/js_ts/ingest.py:0 -> JsTsIngestMixin", + "CALLS parsers/js_ts/ingest.py:0 -> _build_nested_qualified_name", + "CALLS parsers/js_ts/ingest.py:0 -> _ingest_assignment_arrow_functions", + "CALLS parsers/js_ts/ingest.py:0 -> _ingest_object_literal_methods", + "CALLS parsers/js_ts/ingest.py:0 -> _ingest_prototype_inheritance", + "CALLS parsers/js_ts/ingest.py:0 -> _is_export_inside_function", + "CALLS parsers/js_ts/ingest.py:0 -> _is_method_in_class", + "CALLS parsers/js_ts/ingest.py:0 -> _is_static_method_in_class", + "CALLS parsers/js_ts/module_system.py:0 -> _get_docstring", + "CALLS parsers/js_ts/module_system.py:0 -> _ingest_commonjs_exports", + "CALLS parsers/js_ts/module_system.py:0 -> _ingest_es6_exports", + "CALLS parsers/js_ts/module_system.py:0 -> _ingest_missing_import_patterns", + "CALLS parsers/js_ts/module_system.py:0 -> _is_export_inside_function", + "CALLS parsers/js_ts/type_inference.py:0 -> build_local_variable_type_map", + "CALLS parsers/js_ts/utils.py:0 -> analyze_return_expression", + "CALLS parsers/js_ts/utils.py:0 -> extract_method_call", + "CALLS parsers/js_ts/utils.py:0 -> find_method_in_ast", + "CALLS parsers/js_ts/utils.py:0 -> find_return_statements", + "CALLS parsers/js_ts/utils.py:0 -> get_js_ts_language_obj", + "CALLS parsers/lua/type_inference.py:0 -> build_local_variable_type_map", + "CALLS parsers/lua/utils.py:0 -> extract_assigned_name", + "CALLS parsers/lua/utils.py:0 -> extract_pcall_second_identifier", + "CALLS parsers/py/ast_analyzer.py:0 -> PythonAstAnalyzerMixin", + "CALLS parsers/py/ast_analyzer.py:0 -> _AstAnalyzerDeps", + "CALLS parsers/py/ast_analyzer.py:0 -> _analyze_method_return_statements", + "CALLS parsers/py/ast_analyzer.py:0 -> _find_class_node", + "CALLS parsers/py/ast_analyzer.py:0 -> _infer_type_from_expression", + "CALLS parsers/py/ast_analyzer.py:0 -> _traverse_for_assignments", + "CALLS parsers/py/ast_analyzer.py:0 -> _traverse_single_pass", + "CALLS parsers/py/expression_analyzer.py:0 -> PythonExpressionAnalyzerMixin", + "CALLS parsers/py/expression_analyzer.py:0 -> _ExpressionAnalyzerDeps", + "CALLS parsers/py/expression_analyzer.py:0 -> _infer_expression_return_type", + "CALLS parsers/py/expression_analyzer.py:0 -> _infer_type_from_expression_complex", + "CALLS parsers/py/expression_analyzer.py:0 -> build_local_variable_type_map", + "CALLS parsers/py/type_inference.py:0 -> PythonTypeInferenceEngine", + "CALLS parsers/py/type_inference.py:0 -> build_local_variable_type_map", + "CALLS parsers/py/utils.py:0 -> resolve_class_name", + "CALLS parsers/py/variable_analyzer.py:0 -> PythonVariableAnalyzerMixin", + "CALLS parsers/py/variable_analyzer.py:0 -> _VariableAnalyzerDeps", + "CALLS parsers/py/variable_analyzer.py:0 -> _analyze_comprehension", + "CALLS parsers/py/variable_analyzer.py:0 -> _analyze_for_loop", + "CALLS parsers/py/variable_analyzer.py:0 -> _collect_local_aliases", + "CALLS parsers/py/variable_analyzer.py:0 -> _expand_chained_attribute_types", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_class_annotation_types", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_instance_attributes_from_init", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_instance_variable_types_from_assignments", + "CALLS parsers/py/variable_analyzer.py:0 -> _infer_property_return_types", + "CALLS parsers/py/variable_analyzer.py:0 -> name", + "CALLS parsers/py/variable_analyzer.py:0 -> type", + "CALLS parsers/rs/utils.py:0 -> build_module_path", + "CALLS parsers/rs/utils.py:0 -> extract_impl_trait", + "CALLS parsers/rs/utils.py:0 -> extract_use_imports", + "CALLS parsers/rs/utils.py:0 -> name", + "CALLS parsers/stdlib_extractor.py:0 -> clear_stdlib_cache", + "CALLS parsers/stdlib_extractor.py:0 -> extract_module_path", + "CALLS parsers/stdlib_extractor.py:0 -> flush_stdlib_cache", + "CALLS parsers/stdlib_extractor.py:0 -> get_stdlib_cache_stats", + "CALLS parsers/stdlib_extractor.py:0 -> load_persistent_cache", + "CALLS parsers/stdlib_extractor.py:0 -> main", + "CALLS parsers/stdlib_extractor.py:0 -> type", + "CALLS parsers/structure_processor.py:0 -> identify_structure", + "CALLS parsers/structure_processor.py:0 -> process_generic_file", + "CALLS parsers/type_inference.py:0 -> _build_java_variable_type_map", + "CALLS parsers/type_inference.py:0 -> _resolve_class_name", + "CALLS parsers/type_inference.py:0 -> java_type_inference", + "CALLS parsers/type_inference.py:0 -> js_type_inference", + "CALLS parsers/type_inference.py:0 -> lua_type_inference", + "CALLS parsers/type_inference.py:0 -> python_type_inference", + "CALLS parsers/utils.py:0 -> _start_byte_key", + "CALLS parsers/utils.py:0 -> get_cached_query", + "CALLS parsers/utils.py:0 -> get_function_captures", + "CALLS parsers/utils.py:0 -> get_query_cursor", + "CALLS parsers/utils.py:0 -> ingest_exported_function", + "CALLS parsers/utils.py:0 -> ingest_method", + "CALLS parsers/utils.py:0 -> is_method_node", + "CALLS parsers/utils.py:0 -> safe_decode_with_fallback", + "CALLS prompts.py:0 -> build_rag_orchestrator_prompt", + "CALLS prompts.py:0 -> main", + "CALLS prompts.py:0 -> name", + "CALLS prompts.py:0 -> nodes", + "CALLS prompts.py:0 -> run", + "CALLS prompts.py:0 -> type", + "CALLS providers/base.py:0 -> AnthropicProvider", + "CALLS providers/base.py:0 -> AzureOpenAIProvider", + "CALLS providers/base.py:0 -> GoogleProvider", + "CALLS providers/base.py:0 -> ModelProvider", + "CALLS providers/base.py:0 -> OllamaProvider", + "CALLS providers/base.py:0 -> OpenAIProvider", + "CALLS providers/base.py:0 -> check_litellm_proxy_running", + "CALLS providers/base.py:0 -> create_model", + "CALLS providers/base.py:0 -> get_provider_from_config", + "CALLS providers/base.py:0 -> list_providers", + "CALLS providers/base.py:0 -> provider_name", + "CALLS providers/base.py:0 -> register_provider", + "CALLS providers/litellm.py:0 -> LiteLLMProvider", + "CALLS providers/litellm.py:0 -> create_model", + "CALLS providers/litellm.py:0 -> provider_name", + "CALLS readme_sections.py:0 -> generate_all_sections", + "CALLS schemas.py:0 -> CodeSnippet", + "CALLS schemas.py:0 -> EditResult", + "CALLS schemas.py:0 -> FileCreationResult", + "CALLS schemas.py:0 -> FileReadResult", + "CALLS schemas.py:0 -> HealthCheckResult", + "CALLS schemas.py:0 -> QueryGraphData", + "CALLS schemas.py:0 -> ShellCommandResult", + "CALLS schemas.py:0 -> _format_results", + "CALLS schemas.py:0 -> _set_success_on_error", + "CALLS services/__init__.py:0 -> IngestorProtocol", + "CALLS services/__init__.py:0 -> QueryProtocol", + "CALLS services/__init__.py:0 -> ensure_node_batch", + "CALLS services/__init__.py:0 -> ensure_relationship_batch", + "CALLS services/__init__.py:0 -> execute_write", + "CALLS services/__init__.py:0 -> fetch_all", + "CALLS services/__init__.py:0 -> flush_all", + "CALLS services/anthropic_token_counter.py:0 -> count_anthropic_context", + "CALLS services/graph_service.py:0 -> _flush_node_group_with_own_conn", + "CALLS services/graph_service.py:0 -> _flush_rel_group_with_own_conn", + "CALLS services/graph_service.py:0 -> clean_database", + "CALLS services/graph_service.py:0 -> delete_project", + "CALLS services/graph_service.py:0 -> ensure_constraints", + "CALLS services/graph_service.py:0 -> ensure_node_batch", + "CALLS services/graph_service.py:0 -> ensure_relationship_batch", + "CALLS services/graph_service.py:0 -> execute_write", + "CALLS services/graph_service.py:0 -> export_graph_to_dict", + "CALLS services/graph_service.py:0 -> list_projects", + "CALLS services/llm.py:0 -> create_rag_orchestrator", + "CALLS services/llm.py:0 -> generate", + "CALLS services/protobuf_service.py:0 -> ensure_node_batch", + "CALLS services/protobuf_service.py:0 -> ensure_relationship_batch", + "CALLS services/protobuf_service.py:0 -> flush_all", + "CALLS stack/cli.py:0 -> cli", + "CALLS stack/cli.py:0 -> down_cmd", + "CALLS stack/cli.py:0 -> logs_cmd", + "CALLS stack/cli.py:0 -> restart_cmd", + "CALLS stack/cli.py:0 -> status_cmd", + "CALLS stack/cli.py:0 -> up_cmd", + "CALLS stack/constants.py:0 -> StackState", + "CALLS stack/health.py:0 -> wait_for_memgraph", + "CALLS stack/health.py:0 -> wait_for_qdrant", + "CALLS stack/manager.py:0 -> compose_file", + "CALLS stack/manager.py:0 -> daemon_down", + "CALLS stack/manager.py:0 -> daemon_logs", + "CALLS stack/manager.py:0 -> daemon_restart", + "CALLS stack/manager.py:0 -> daemon_status", + "CALLS stack/manager.py:0 -> daemon_up", + "CALLS tests/conftest.py:0 -> NodeProtocol", + "CALLS tests/conftest.py:0 -> _disable_stack_autostart", + "CALLS tests/conftest.py:0 -> _isolate_cgr_home", + "CALLS tests/conftest.py:0 -> child_by_field_name", + "CALLS tests/conftest.py:0 -> children", + "CALLS tests/conftest.py:0 -> cleanup_qdrant_client", + "CALLS tests/conftest.py:0 -> create_mock_node", + "CALLS tests/conftest.py:0 -> get_node_names", + "CALLS tests/conftest.py:0 -> get_relationships", + "CALLS tests/conftest.py:0 -> method_calls", + "CALLS tests/conftest.py:0 -> mock_ingestor", + "CALLS tests/conftest.py:0 -> mock_updater", + "CALLS tests/conftest.py:0 -> parent", + "CALLS tests/conftest.py:0 -> run_updater", + "CALLS tests/conftest.py:0 -> temp_repo", + "CALLS tests/conftest.py:0 -> text", + "CALLS tests/conftest.py:0 -> type", + "CALLS tests/fuzz_test_parsers.py:0 -> fuzz_language_spec", + "CALLS tests/integration/conftest.py:0 -> memgraph_connection", + "CALLS tests/integration/conftest.py:0 -> memgraph_container", + "CALLS tests/integration/conftest.py:0 -> memgraph_ingestor", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> anyio_backend", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> generate_query", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> mock_cypher_gen_realistic", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> mock_ingestor_with_sample_data", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> silent_console", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_complete_query_flow", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_about_classes", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_about_functions", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_handles_database_error_gracefully", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_handles_llm_error_gracefully", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_with_empty_results", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_query_with_unicode_characters", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_result_has_required_fields", + "CALLS tests/integration/test_codebase_query_integration.py:0 -> test_result_preserves_data_types", + "CALLS tests/integration/test_cypher_queries.py:0 -> main", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_class_candidates_when_classes_included", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_creates_calls_relationship_with_properties", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_creates_relationship_between_nodes", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_deletes_all_nodes", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exclude_tests_omits_test_function_roots", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_excluding_tests_reports_orphan_and_test_only_code", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exports_multiple_nodes", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exports_node_with_labels_and_properties", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_exports_relationship_with_type", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_fetches_nodes_by_ids", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_file_node_query", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_file_path_constraint", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_finds_function_by_qualified_name", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_function_calls_function_with_props", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_function_node_query", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_function_qualified_name_constraint", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_gets_source_location_by_node_id", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_include_classes_adds_class_candidates", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_include_tests_references_test_patterns", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_merge_creates_new_node", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_merge_updates_existing_node", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_module_defines_function_no_props", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_module_load_callee_is_a_root", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_module_load_callees_are_roots", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_multiple_node_ids", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_reports_only_the_orphan_with_tests_included", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_returns_empty_for_nonexistent_ids", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_returns_empty_for_nonexistent_name", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_returns_row_shape", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_single_node_id", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_subclass_only_base_is_reported_when_subclass_is_unreachable", + "CALLS tests/integration/test_cypher_queries.py:0 -> test_test_module_call_is_not_a_root_when_excluding_tests", + "CALLS tests/integration/test_imports_e2e.py:0 -> cpp_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> go_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> java_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> js_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> lua_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> main", + "CALLS tests/integration/test_imports_e2e.py:0 -> python_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> rust_imports_project", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_import_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_import_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_include_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_external_require_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_internal_import_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_internal_include_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_internal_require_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_stdlib_import_creates_module_node", + "CALLS tests/integration/test_imports_e2e.py:0 -> test_stdlib_import_creates_relationship", + "CALLS tests/integration/test_imports_e2e.py:0 -> ts_imports_project", + "CALLS tests/integration/test_incremental_external_prune_e2e.py:0 -> test_incremental_rebuild_prunes_orphaned_external_module", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> add", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> anyio_backend", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> mcp_registry", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> mock_generate", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> temp_test_repo", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_all_tools_have_consistent_takes_ctx", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_get_code_snippet_actual_behavior", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_list_directory_works", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_query_code_graph_works", + "CALLS tests/integration/test_mcp_tools_integration.py:0 -> test_read_file_works", + "CALLS tests/integration/test_multi_project_integration.py:0 -> project1_path", + "CALLS tests/integration/test_multi_project_integration.py:0 -> project2_path", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_clean_database_removes_all_projects", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_nonexistent_project_no_error", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_project_preserves_other_projects", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_project_removes_all_project_nodes", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_delete_project_removes_files_and_folders", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_list_projects_after_indexing", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_list_projects_empty_database", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_list_projects_multiple", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_projects_have_separate_namespaces", + "CALLS tests/integration/test_multi_project_integration.py:0 -> test_reindex_only_affects_target_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> cpp_module_impl_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> cpp_module_interface_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> cpp_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> go_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> java_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> javascript_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> lua_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> main", + "CALLS tests/integration/test_node_label_e2e.py:0 -> php_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> python_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> rust_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> scala_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_module_implementation_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_module_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_cpp_creates_union_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_go_creates_class_nodes_for_structs", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_go_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_go_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_java_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_java_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_java_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_javascript_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_javascript_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_language_has_defines", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_lua_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_php_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_defines_relationships", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_inherits_relationships", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_python_creates_method_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_class_nodes_for_structs", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_enum_nodes_for_enums", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_rust_creates_interface_nodes_for_traits", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_scala_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_scala_creates_interface_nodes_for_traits", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_class_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_enum_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_function_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_interface_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> test_typescript_creates_type_nodes", + "CALLS tests/integration/test_node_label_e2e.py:0 -> type", + "CALLS tests/integration/test_node_label_e2e.py:0 -> typescript_project", + "CALLS tests/integration/test_node_label_e2e.py:0 -> value", + "CALLS tests/integration/test_shell_command_integration.py:0 -> anyio_backend", + "CALLS tests/integration/test_shell_command_integration.py:0 -> shell_commander", + "CALLS tests/integration/test_shell_command_integration.py:0 -> temp_test_repo", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_and_operator", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cat_pipe_cut", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cat_pipe_rg", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cat_reads_file_content", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_command_with_nonexistent_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_cp_copies_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_disallowed_command_in_pipe_rejected", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_echo_outputs_text", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_echo_pipe_wc", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_find_locates_files", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_find_pipe_rg_pipe_wc", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_find_pipe_wc", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_git_init_and_status", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_git_status_without_repo", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_invalid_command_arguments", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_lists_files", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_pipe_head", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_pipe_sort", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_ls_with_flags", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_mkdir_creates_directory", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_mv_moves_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_pwd_shows_working_directory", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_rg_searches_content", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_rm_removes_file", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_semicolon_operator", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_subshell_rejected", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_tool_executes_read_only_command_without_approval", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_tool_executes_write_command_with_approval", + "CALLS tests/integration/test_shell_command_integration.py:0 -> test_tool_requires_approval_for_write_command", + "CALLS tests/integration/test_tool_calling.py:0 -> agent", + "CALLS tests/integration/test_tool_calling.py:0 -> list_directory", + "CALLS tests/integration/test_tool_calling.py:0 -> main", + "CALLS tests/integration/test_tool_calling.py:0 -> query_graph", + "CALLS tests/integration/test_tool_calling.py:0 -> read_file", + "CALLS tests/integration/test_tool_calling.py:0 -> semantic_search", + "CALLS tests/integration/test_tool_calling.py:0 -> test_hybrid_search_completes", + "CALLS tests/integration/test_tool_calling.py:0 -> test_parallel_tool_calls_all_execute", + "CALLS tests/integration/test_tool_calling.py:0 -> tracker", + "CALLS tests/integration/test_tool_calling.py:0 -> tracking_tools", + "CALLS tests/test_absolute_path.py:0 -> add", + "CALLS tests/test_absolute_path.py:0 -> cpp_module_project", + "CALLS tests/test_absolute_path.py:0 -> my_method", + "CALLS tests/test_absolute_path.py:0 -> parsers_and_queries", + "CALLS tests/test_absolute_path.py:0 -> python_project", + "CALLS tests/test_absolute_path.py:0 -> test_absolute_path_is_posix_format", + "CALLS tests/test_absolute_path.py:0 -> test_absolute_path_matches_resolved_file", + "CALLS tests/test_absolute_path.py:0 -> test_class_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_enum_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_file_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_folder_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_function_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_interface_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_method_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_module_implementation_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_module_interface_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_module_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_package_nodes_have_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> test_project_node_has_no_absolute_path", + "CALLS tests/test_absolute_path.py:0 -> ts_project", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> execute_write", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> fetch_all", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> flush_all", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> parse", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> read", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> test_abstract_stub_is_not_the_call_target", + "CALLS tests/test_abstract_method_override_resolution.py:0 -> test_self_call_resolves_to_concrete_sibling_not_abstract_stub", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_injects_placeholder_when_only_system_prompt_present", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_retry_prompt_with_tool_name_becomes_tool_result_error_block", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_retry_prompt_without_tool_name_becomes_text_block", + "CALLS tests/test_anthropic_token_counter.py:0 -> test_returns_zero_when_no_messages_and_no_system_prompt", + "CALLS tests/test_build_binary.py:0 -> test_all_options_combined", + "CALLS tests/test_build_binary.py:0 -> test_collect_all_only", + "CALLS tests/test_build_binary.py:0 -> test_collect_data_only", + "CALLS tests/test_build_binary.py:0 -> test_extracts_treesitter_packages_from_pyproject", + "CALLS tests/test_build_binary.py:0 -> test_filters_non_treesitter_packages", + "CALLS tests/test_build_binary.py:0 -> test_handles_different_version_specifiers", + "CALLS tests/test_build_binary.py:0 -> test_hidden_import_only", + "CALLS tests/test_build_binary.py:0 -> test_no_options_returns_empty_list", + "CALLS tests/test_build_binary.py:0 -> test_returns_empty_list_when_no_optional_dependencies", + "CALLS tests/test_build_binary.py:0 -> test_returns_empty_list_when_no_treesitter_extra", + "CALLS tests/test_c_language.py:0 -> add", + "CALLS tests/test_c_language.py:0 -> c_project", + "CALLS tests/test_c_language.py:0 -> c_subdir_project", + "CALLS tests/test_c_language.py:0 -> main", + "CALLS tests/test_c_language.py:0 -> run", + "CALLS tests/test_c_language.py:0 -> test_c_file_nodes_created", + "CALLS tests/test_c_language.py:0 -> test_c_module_nodes_created", + "CALLS tests/test_c_language.py:0 -> test_cmakelists_creates_package", + "CALLS tests/test_c_language.py:0 -> test_enum_detected", + "CALLS tests/test_c_language.py:0 -> test_function_call_detected", + "CALLS tests/test_c_language.py:0 -> test_function_qualified_name_format", + "CALLS tests/test_c_language.py:0 -> test_function_qualified_name_has_project", + "CALLS tests/test_c_language.py:0 -> test_function_with_parameters", + "CALLS tests/test_c_language.py:0 -> test_header_file_node_created", + "CALLS tests/test_c_language.py:0 -> test_include_creates_external_module", + "CALLS tests/test_c_language.py:0 -> test_include_utils_h_module_exists", + "CALLS tests/test_c_language.py:0 -> test_main_calls_greet", + "CALLS tests/test_c_language.py:0 -> test_main_function_detected", + "CALLS tests/test_c_language.py:0 -> test_main_module_defines_add", + "CALLS tests/test_c_language.py:0 -> test_makefile_creates_package", + "CALLS tests/test_c_language.py:0 -> test_module_defines_functions", + "CALLS tests/test_c_language.py:0 -> test_multiple_calls_from_main", + "CALLS tests/test_c_language.py:0 -> test_pointer_return_function_detected", + "CALLS tests/test_c_language.py:0 -> test_simple_function_detected", + "CALLS tests/test_c_language.py:0 -> test_struct_detected", + "CALLS tests/test_c_language.py:0 -> test_struct_has_qualified_name", + "CALLS tests/test_c_language.py:0 -> test_subdirectory_with_makefile_is_package", + "CALLS tests/test_c_language.py:0 -> test_union_detected", + "CALLS tests/test_c_language.py:0 -> test_void_function_detected", + "CALLS tests/test_call_processor.py:0 -> call_processor", + "CALLS tests/test_call_processor.py:0 -> my_method", + "CALLS tests/test_call_processor.py:0 -> parsers_and_queries", + "CALLS tests/test_call_processor.py:0 -> patched_query", + "CALLS tests/test_call_processor.py:0 -> processor_with_imports", + "CALLS tests/test_call_processor.py:0 -> processor_with_inheritance", + "CALLS tests/test_call_processor.py:0 -> processor_with_methods", + "CALLS tests/test_call_processor.py:0 -> processor_with_registry", + "CALLS tests/test_call_processor.py:0 -> processor_with_types", + "CALLS tests/test_call_processor.py:0 -> test_apply_method", + "CALLS tests/test_call_processor.py:0 -> test_arrow_function_iife", + "CALLS tests/test_call_processor.py:0 -> test_attribute_call", + "CALLS tests/test_call_processor.py:0 -> test_bind_method", + "CALLS tests/test_call_processor.py:0 -> test_builtin_operator_equal", + "CALLS tests/test_call_processor.py:0 -> test_builtin_operator_plus", + "CALLS tests/test_call_processor.py:0 -> test_call_method", + "CALLS tests/test_call_processor.py:0 -> test_chained_attribute_call", + "CALLS tests/test_call_processor.py:0 -> test_chained_calls_is_chain", + "CALLS tests/test_call_processor.py:0 -> test_combined_func_class_query_exception_sets_none", + "CALLS tests/test_call_processor.py:0 -> test_common_prefix_reduces_distance", + "CALLS tests/test_call_processor.py:0 -> test_continues_after_error_in_single_file", + "CALLS tests/test_call_processor.py:0 -> test_cpp_binary_expression_minus", + "CALLS tests/test_call_processor.py:0 -> test_cpp_binary_expression_plus", + "CALLS tests/test_call_processor.py:0 -> test_cpp_unary_expression", + "CALLS tests/test_call_processor.py:0 -> test_cpp_update_expression", + "CALLS tests/test_call_processor.py:0 -> test_custom_operator_from_registry", + "CALLS tests/test_call_processor.py:0 -> test_distant_module_higher_distance", + "CALLS tests/test_call_processor.py:0 -> test_empty_string_not_chain", + "CALLS tests/test_call_processor.py:0 -> test_fallback_to_local_resolution", + "CALLS tests/test_call_processor.py:0 -> test_filters_calls_within_container", + "CALLS tests/test_call_processor.py:0 -> test_finds_method_in_grandparent", + "CALLS tests/test_call_processor.py:0 -> test_finds_method_in_parent", + "CALLS tests/test_call_processor.py:0 -> test_function_expression_iife", + "CALLS tests/test_call_processor.py:0 -> test_function_in_class_is_method", + "CALLS tests/test_call_processor.py:0 -> test_gets_field_by_custom_field_name", + "CALLS tests/test_call_processor.py:0 -> test_gets_name_from_class_def", + "CALLS tests/test_call_processor.py:0 -> test_gets_name_from_function_def", + "CALLS tests/test_call_processor.py:0 -> test_has_slots", + "CALLS tests/test_call_processor.py:0 -> test_identifier_call", + "CALLS tests/test_call_processor.py:0 -> test_iife_function_resolution", + "CALLS tests/test_call_processor.py:0 -> test_java_chained_method_invocation", + "CALLS tests/test_call_processor.py:0 -> test_java_method_invocation_with_object", + "CALLS tests/test_call_processor.py:0 -> test_java_method_invocation_without_object", + "CALLS tests/test_call_processor.py:0 -> test_js_builtin_pattern_json_parse", + "CALLS tests/test_call_processor.py:0 -> test_js_builtin_pattern_object_keys", + "CALLS tests/test_call_processor.py:0 -> test_logs_error_on_processing_failure", + "CALLS tests/test_call_processor.py:0 -> test_member_expression_js", + "CALLS tests/test_call_processor.py:0 -> test_method_not_found_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_method_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_method_with_parens_is_chain", + "CALLS tests/test_call_processor.py:0 -> test_nested_function", + "CALLS tests/test_call_processor.py:0 -> test_nested_function_is_not_method", + "CALLS tests/test_call_processor.py:0 -> test_no_dots_not_chain", + "CALLS tests/test_call_processor.py:0 -> test_no_function_child_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_no_instance_dict", + "CALLS tests/test_call_processor.py:0 -> test_non_builtin_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_non_iife_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_non_operator_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_process_calls_with_empty_combined_captures", + "CALLS tests/test_call_processor.py:0 -> test_process_calls_without_func_class_captures_cache", + "CALLS tests/test_call_processor.py:0 -> test_prototype_apply", + "CALLS tests/test_call_processor.py:0 -> test_prototype_call", + "CALLS tests/test_call_processor.py:0 -> test_rejects_arbitrary_attribute", + "CALLS tests/test_call_processor.py:0 -> test_resolves_direct_method", + "CALLS tests/test_call_processor.py:0 -> test_resolves_from_import_map", + "CALLS tests/test_call_processor.py:0 -> test_resolves_imported_function", + "CALLS tests/test_call_processor.py:0 -> test_resolves_inherited_method", + "CALLS tests/test_call_processor.py:0 -> test_resolves_local_variable_method_call", + "CALLS tests/test_call_processor.py:0 -> test_resolves_method_on_imported_class", + "CALLS tests/test_call_processor.py:0 -> test_resolves_same_module_function", + "CALLS tests/test_call_processor.py:0 -> test_returns_call_nodes_for_code_with_calls", + "CALLS tests/test_call_processor.py:0 -> test_returns_dotted_type_as_is", + "CALLS tests/test_call_processor.py:0 -> test_returns_empty_when_no_calls_query", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_chain_without_type_info", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_no_name", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_non_chained_expression", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_unknown_function", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_unknown_method", + "CALLS tests/test_call_processor.py:0 -> test_returns_none_for_unresolvable_chain", + "CALLS tests/test_call_processor.py:0 -> test_rust_impl_fallback_to_children", + "CALLS tests/test_call_processor.py:0 -> test_same_module_distance_zero", + "CALLS tests/test_call_processor.py:0 -> test_sibling_module_distance_one", + "CALLS tests/test_call_processor.py:0 -> test_simple_method_not_chain", + "CALLS tests/test_call_processor.py:0 -> test_slot_attributes_accessible", + "CALLS tests/test_call_processor.py:0 -> test_super_calls_constructor", + "CALLS tests/test_call_processor.py:0 -> test_super_dot_method", + "CALLS tests/test_call_processor.py:0 -> test_super_inherited_from_grandparent", + "CALLS tests/test_call_processor.py:0 -> test_super_method_not_found_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_super_no_class_context_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_super_unknown_class_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_top_level_function", + "CALLS tests/test_call_processor.py:0 -> test_top_level_function_is_not_method", + "CALLS tests/test_call_processor.py:0 -> test_unknown_class_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_unknown_method_returns_none", + "CALLS tests/test_call_processor.py:0 -> test_without_call_nodes", + "CALLS tests/test_call_processor.py:0 -> test_without_call_nodes_and_no_query", + "CALLS tests/test_call_processor.py:0 -> test_without_combined_captures", + "CALLS tests/test_call_processor.py:0 -> test_without_combined_captures_no_functions", + "CALLS tests/test_call_processor.py:0 -> test_without_sorted_func_nodes", + "CALLS tests/test_call_processor_integration.py:0 -> add", + "CALLS tests/test_call_processor_integration.py:0 -> factory", + "CALLS tests/test_call_processor_integration.py:0 -> keys", + "CALLS tests/test_call_processor_integration.py:0 -> main", + "CALLS tests/test_call_processor_integration.py:0 -> parse", + "CALLS tests/test_call_processor_integration.py:0 -> parsers_and_queries", + "CALLS tests/test_call_processor_integration.py:0 -> process", + "CALLS tests/test_call_processor_integration.py:0 -> test_function_does_not_call_class_python", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_chained_method_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_empty_file", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_file_with_only_imports", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_init_py_module_qn", + "CALLS tests/test_call_processor_integration.py:0 -> test_handles_nested_function_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_module_does_not_call_class_python", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_builtin_calls_js", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_class_method_calls_ts", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_cpp", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_in_file", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_js", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_rust", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_function_calls_ts", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_impl_method_calls_rust", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_imported_function_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_calls_cpp", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_calls_in_class", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_calls_js", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_method_invocation_java", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_module_level_calls", + "CALLS tests/test_call_processor_integration.py:0 -> test_processes_same_class_method_calls_java", + "CALLS tests/test_call_resolver.py:0 -> call_processor", + "CALLS tests/test_call_resolver.py:0 -> call_resolver", + "CALLS tests/test_call_resolver.py:0 -> execute", + "CALLS tests/test_call_resolver.py:0 -> find_ending_with", + "CALLS tests/test_call_resolver.py:0 -> find_with_prefix", + "CALLS tests/test_call_resolver.py:0 -> is_abstract", + "CALLS tests/test_call_resolver.py:0 -> is_property", + "CALLS tests/test_call_resolver.py:0 -> mark_abstract", + "CALLS tests/test_call_resolver.py:0 -> mark_property", + "CALLS tests/test_call_resolver.py:0 -> mock_ast_cache", + "CALLS tests/test_call_resolver.py:0 -> mock_function_registry", + "CALLS tests/test_call_resolver.py:0 -> mock_import_processor", + "CALLS tests/test_call_resolver.py:0 -> mock_type_inference", + "CALLS tests/test_call_resolver.py:0 -> process", + "CALLS tests/test_call_resolver.py:0 -> property_names", + "CALLS tests/test_call_resolver.py:0 -> register_unique_qn", + "CALLS tests/test_call_resolver.py:0 -> start", + "CALLS tests/test_call_resolver.py:0 -> test_bfs_finds_deep_ancestor_method", + "CALLS tests/test_call_resolver.py:0 -> test_bfs_no_infinite_loop_on_cycle", + "CALLS tests/test_call_resolver.py:0 -> test_bfs_order_prefers_closer_parent", + "CALLS tests/test_call_resolver.py:0 -> test_chained_calls_is_chain", + "CALLS tests/test_call_resolver.py:0 -> test_cpp_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_distant_module_higher_distance", + "CALLS tests/test_call_resolver.py:0 -> test_dot_separated_qn", + "CALLS tests/test_call_resolver.py:0 -> test_dot_separator", + "CALLS tests/test_call_resolver.py:0 -> test_double_colon_separated_qn", + "CALLS tests/test_call_resolver.py:0 -> test_double_colon_separator", + "CALLS tests/test_call_resolver.py:0 -> test_eligible_files_are_sorted", + "CALLS tests/test_call_resolver.py:0 -> test_empty_string_not_chain", + "CALLS tests/test_call_resolver.py:0 -> test_falls_back_to_class_name_resolution", + "CALLS tests/test_call_resolver.py:0 -> test_falls_back_to_trie", + "CALLS tests/test_call_resolver.py:0 -> test_finds_method_in_grandparent", + "CALLS tests/test_call_resolver.py:0 -> test_finds_method_in_parent", + "CALLS tests/test_call_resolver.py:0 -> test_go_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_graph_output_deterministic_across_runs", + "CALLS tests/test_call_resolver.py:0 -> test_handles_diamond_inheritance", + "CALLS tests/test_call_resolver.py:0 -> test_handles_qualified_call_name", + "CALLS tests/test_call_resolver.py:0 -> test_java_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_javascript_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_last_element_matches_function_name", + "CALLS tests/test_call_resolver.py:0 -> test_lua_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_matches_deeply_chained", + "CALLS tests/test_call_resolver.py:0 -> test_matches_final_method", + "CALLS tests/test_call_resolver.py:0 -> test_method_with_parens_is_chain", + "CALLS tests/test_call_resolver.py:0 -> test_no_dots_not_chain", + "CALLS tests/test_call_resolver.py:0 -> test_no_match_on_parenthesized_suffix", + "CALLS tests/test_call_resolver.py:0 -> test_no_separator", + "CALLS tests/test_call_resolver.py:0 -> test_no_separator_returns_single_element", + "CALLS tests/test_call_resolver.py:0 -> test_regular_call_not_super", + "CALLS tests/test_call_resolver.py:0 -> test_resolve_function_call_deterministic_across_runs", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_chained_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_direct_import", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_from_import_map", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_iife_arrow_prefix", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_iife_function_prefix", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_iife_priority", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_imported_class_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_imported_function", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_imported_multi_part_call", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_inherited_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_inherited_self_attribute_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_java_method_call", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_js_builtin_type", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_local_variable_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_method_on_class", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_module_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_module_method_fallback", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_rust_class_qn", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_same_module_function", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_self_attribute_method", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_static_method_via_import", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_super_call", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_via_local_type", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_via_trie_match", + "CALLS tests/test_call_resolver.py:0 -> test_resolves_wildcard_import", + "CALLS tests/test_call_resolver.py:0 -> test_returns_class_qn_for_matching_method", + "CALLS tests/test_call_resolver.py:0 -> test_returns_colon_over_dot", + "CALLS tests/test_call_resolver.py:0 -> test_returns_dot_as_default", + "CALLS tests/test_call_resolver.py:0 -> test_returns_dot_for_no_separator", + "CALLS tests/test_call_resolver.py:0 -> test_returns_dotted_type_as_is", + "CALLS tests/test_call_resolver.py:0 -> test_returns_double_colon_first", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_empty_call_name", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_local_type", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_local_types", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_match", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_separator", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_no_wildcard_match", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_non_iife", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unimported", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_call", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_class", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_function", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_method", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_module", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unknown_object", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unregistered_iife", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unregistered_import", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unresolvable_chain", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_for_unresolved_java_call", + "CALLS tests/test_call_resolver.py:0 -> test_returns_none_without_class_context", + "CALLS tests/test_call_resolver.py:0 -> test_returns_original_for_no_match", + "CALLS tests/test_call_resolver.py:0 -> test_rust_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_same_module_distance_zero", + "CALLS tests/test_call_resolver.py:0 -> test_self_attribute_call", + "CALLS tests/test_call_resolver.py:0 -> test_sibling_module_distance_low", + "CALLS tests/test_call_resolver.py:0 -> test_simple_method_not_chain", + "CALLS tests/test_call_resolver.py:0 -> test_single_colon_separator", + "CALLS tests/test_call_resolver.py:0 -> test_skips_non_wildcard_imports", + "CALLS tests/test_call_resolver.py:0 -> test_splits_on_colon", + "CALLS tests/test_call_resolver.py:0 -> test_splits_on_dot", + "CALLS tests/test_call_resolver.py:0 -> test_splits_on_double_colon", + "CALLS tests/test_call_resolver.py:0 -> test_super_constructor_call", + "CALLS tests/test_call_resolver.py:0 -> test_super_dot_method", + "CALLS tests/test_call_resolver.py:0 -> test_super_keyword_alone", + "CALLS tests/test_call_resolver.py:0 -> test_super_method_call", + "CALLS tests/test_call_resolver.py:0 -> test_super_parens_method", + "CALLS tests/test_call_resolver.py:0 -> test_trie_many_candidates_deterministic", + "CALLS tests/test_call_resolver.py:0 -> test_trie_tiebreak_by_qualified_name", + "CALLS tests/test_call_resolver.py:0 -> test_trie_tiebreak_distance_still_wins", + "CALLS tests/test_call_resolver.py:0 -> test_trie_tiebreak_picks_lexicographic_first", + "CALLS tests/test_call_resolver.py:0 -> test_two_part_call", + "CALLS tests/test_call_resolver.py:0 -> test_typescript_deterministic", + "CALLS tests/test_call_resolver.py:0 -> variants", + "CALLS tests/test_callable_field_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_callable_field_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_callable_field_calls.py:0 -> execute_write", + "CALLS tests/test_callable_field_calls.py:0 -> fetch_all", + "CALLS tests/test_callable_field_calls.py:0 -> flush_all", + "CALLS tests/test_callable_field_calls.py:0 -> get", + "CALLS tests/test_callable_field_calls.py:0 -> test_ambiguous_field_name_not_resolved", + "CALLS tests/test_callable_field_calls.py:0 -> test_resolves_to_all_bound_functions", + "CALLS tests/test_callable_field_calls.py:0 -> test_resolves_to_first_bound_function", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_appends_synthetic_return_for_each_orphan_tool_call", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_ignores_non_tool_call_parts_in_response", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_noop_when_history_empty", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_noop_when_last_message_is_request", + "CALLS tests/test_cancel_orphaned_tool_calls.py:0 -> test_noop_when_response_has_no_tool_calls", + "CALLS tests/test_cgr_instructions.py:0 -> isolated_global", + "CALLS tests/test_cgr_instructions.py:0 -> mock_open", + "CALLS tests/test_cgr_instructions.py:0 -> test_create_rag_orchestrator_reads_global_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_create_rag_orchestrator_reads_project_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_create_rag_orchestrator_skips_instructions_when_disabled", + "CALLS tests/test_cgr_instructions.py:0 -> test_loads_global_only_when_repo_path_none", + "CALLS tests/test_cgr_instructions.py:0 -> test_loads_instructions_when_repo_file_present", + "CALLS tests/test_cgr_instructions.py:0 -> test_merges_global_and_repo", + "CALLS tests/test_cgr_instructions.py:0 -> test_orchestrator_prompt_appends_project_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_orchestrator_prompt_unchanged_without_instructions", + "CALLS tests/test_cgr_instructions.py:0 -> test_returns_none_on_read_error", + "CALLS tests/test_cgr_instructions.py:0 -> test_returns_none_when_file_empty", + "CALLS tests/test_cgr_instructions.py:0 -> test_returns_none_when_no_file", + "CALLS tests/test_cgr_shim.py:0 -> test_all_matches_module_exports", + "CALLS tests/test_cgr_shim.py:0 -> test_all_symbols_importable", + "CALLS tests/test_cgr_shim.py:0 -> test_cypher_generator_is_canonical_class", + "CALLS tests/test_cgr_shim.py:0 -> test_embed_code_is_canonical_function", + "CALLS tests/test_cgr_shim.py:0 -> test_graph_loader_is_canonical_class", + "CALLS tests/test_cgr_shim.py:0 -> test_load_graph_is_canonical_function", + "CALLS tests/test_cgr_shim.py:0 -> test_memgraph_ingestor_is_canonical_class", + "CALLS tests/test_cgr_shim.py:0 -> test_settings_is_canonical_instance", + "CALLS tests/test_cgr_state_and_status.py:0 -> _temp_home", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_read_when_no_state_returns_empty", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_record_sync_creates_file", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_record_sync_multiple_projects", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_record_sync_updates_existing", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_status_lists_recorded_projects", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_status_runs_clean", + "CALLS tests/test_cgr_state_and_status.py:0 -> test_stop_invokes_daemon_down", + "CALLS tests/test_cgrignore.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cgrignore.py:0 -> mock_open", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_alone_triggers_prompt", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_deduplicates_with_detected", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_merged_with_cli_excludes", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_only_returns_without_prompt_when_empty", + "CALLS tests/test_cgrignore.py:0 -> test_cgrignore_patterns_included_in_candidates", + "CALLS tests/test_cgrignore.py:0 -> test_handles_duplicates", + "CALLS tests/test_cgrignore.py:0 -> test_ignores_comments_and_blank_lines", + "CALLS tests/test_cgrignore.py:0 -> test_index_loads_cgrignore_without_interactive_setup", + "CALLS tests/test_cgrignore.py:0 -> test_loads_exclude_patterns_from_file", + "CALLS tests/test_cgrignore.py:0 -> test_mixed_exclude_and_negation", + "CALLS tests/test_cgrignore.py:0 -> test_negation_strips_leading_whitespace", + "CALLS tests/test_cgrignore.py:0 -> test_negation_strips_whitespace_after_exclamation", + "CALLS tests/test_cgrignore.py:0 -> test_parses_negation_patterns", + "CALLS tests/test_cgrignore.py:0 -> test_returns_cgrignore_patterns_type", + "CALLS tests/test_cgrignore.py:0 -> test_returns_empty_if_cgrignore_is_a_directory", + "CALLS tests/test_cgrignore.py:0 -> test_returns_empty_on_read_error", + "CALLS tests/test_cgrignore.py:0 -> test_returns_empty_when_no_file", + "CALLS tests/test_cgrignore.py:0 -> test_start_does_not_prompt_without_interactive_setup", + "CALLS tests/test_cgrignore.py:0 -> test_start_loads_cgrignore_without_interactive_setup", + "CALLS tests/test_cgrignore.py:0 -> test_start_merges_cli_excludes_with_cgrignore", + "CALLS tests/test_cgrignore.py:0 -> test_strips_whitespace", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_included_when_user_selects_all", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_included_when_user_selects_none", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_merged_with_user_selection", + "CALLS tests/test_cgrignore.py:0 -> test_unignore_only_returns_without_prompt", + "CALLS tests/test_chained_attribute_resolution.py:0 -> definition_processor", + "CALLS tests/test_chained_attribute_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_chained_attribute_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_chained_attribute_resolution.py:0 -> execute_write", + "CALLS tests/test_chained_attribute_resolution.py:0 -> fetch_all", + "CALLS tests/test_chained_attribute_resolution.py:0 -> flush_all", + "CALLS tests/test_chained_attribute_resolution.py:0 -> process_all_method_overrides", + "CALLS tests/test_chained_attribute_resolution.py:0 -> test_does_not_resolve_to_module_level_function", + "CALLS tests/test_chained_attribute_resolution.py:0 -> test_three_level_chain_resolves_to_inherited_mixin_method", + "CALLS tests/test_check_no_docs.py:0 -> test_comment_after_string_with_hash", + "CALLS tests/test_check_no_docs.py:0 -> test_comment_at_start", + "CALLS tests/test_check_no_docs.py:0 -> test_double_quote_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_empty_lines_before_code", + "CALLS tests/test_check_no_docs.py:0 -> test_empty_lines_before_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_empty_string", + "CALLS tests/test_check_no_docs.py:0 -> test_escaped_quote_in_string", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_allowed_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_disallowed_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_module_docstring_detected", + "CALLS tests/test_check_no_docs.py:0 -> test_file_with_no_comments", + "CALLS tests/test_check_no_docs.py:0 -> test_h_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_hash_in_double_quoted_string", + "CALLS tests/test_check_no_docs.py:0 -> test_hash_in_single_quoted_string", + "CALLS tests/test_check_no_docs.py:0 -> test_mixed_quotes", + "CALLS tests/test_check_no_docs.py:0 -> test_multiline_string_not_treated_as_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_multiple_errors_reported", + "CALLS tests/test_check_no_docs.py:0 -> test_multiple_strings", + "CALLS tests/test_check_no_docs.py:0 -> test_no_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_no_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_no_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_noqa_comment_allowed", + "CALLS tests/test_check_no_docs.py:0 -> test_noqa_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_partial_match_not_allowed", + "CALLS tests/test_check_no_docs.py:0 -> test_protoc_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_pyright_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_shebang_and_module_docstring_detected", + "CALLS tests/test_check_no_docs.py:0 -> test_shebang_then_code", + "CALLS tests/test_check_no_docs.py:0 -> test_shebang_then_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_simple_comment", + "CALLS tests/test_check_no_docs.py:0 -> test_single_quote_docstring", + "CALLS tests/test_check_no_docs.py:0 -> test_ty_marker", + "CALLS tests/test_check_no_docs.py:0 -> test_type_ignore_comment_allowed", + "CALLS tests/test_check_no_docs.py:0 -> test_type_marker", + "CALLS tests/test_class_ingest.py:0 -> abstract_class_project", + "CALLS tests/test_class_ingest.py:0 -> add", + "CALLS tests/test_class_ingest.py:0 -> circular_inheritance_project", + "CALLS tests/test_class_ingest.py:0 -> comments_only_project", + "CALLS tests/test_class_ingest.py:0 -> cpp_template_class_project", + "CALLS tests/test_class_ingest.py:0 -> deeply_nested_class_project", + "CALLS tests/test_class_ingest.py:0 -> empty_file_project", + "CALLS tests/test_class_ingest.py:0 -> go_struct_project", + "CALLS tests/test_class_ingest.py:0 -> inline_module_project", + "CALLS tests/test_class_ingest.py:0 -> java_interface_project", + "CALLS tests/test_class_ingest.py:0 -> js_class_expression_project", + "CALLS tests/test_class_ingest.py:0 -> main", + "CALLS tests/test_class_ingest.py:0 -> method_a", + "CALLS tests/test_class_ingest.py:0 -> method_b", + "CALLS tests/test_class_ingest.py:0 -> method_override_project", + "CALLS tests/test_class_ingest.py:0 -> mixin_instance", + "CALLS tests/test_class_ingest.py:0 -> multiple_inheritance_project", + "CALLS tests/test_class_ingest.py:0 -> nested_class_project", + "CALLS tests/test_class_ingest.py:0 -> process", + "CALLS tests/test_class_ingest.py:0 -> python_class_project", + "CALLS tests/test_class_ingest.py:0 -> rust_impl_project", + "CALLS tests/test_class_ingest.py:0 -> special_characters_project", + "CALLS tests/test_class_ingest.py:0 -> test_abstract_method_overrides", + "CALLS tests/test_class_ingest.py:0 -> test_circular_inheritance_does_not_crash", + "CALLS tests/test_class_ingest.py:0 -> test_classes_ingested_without_combined_captures", + "CALLS tests/test_class_ingest.py:0 -> test_comments_only_file_does_not_crash", + "CALLS tests/test_class_ingest.py:0 -> test_cpp_template_class_methods", + "CALLS tests/test_class_ingest.py:0 -> test_cpp_template_inheritance", + "CALLS tests/test_class_ingest.py:0 -> test_deeply_nested_classes_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_empty_file_does_not_crash", + "CALLS tests/test_class_ingest.py:0 -> test_extracts_last_namespace_component", + "CALLS tests/test_class_ingest.py:0 -> test_extracts_simple_class_name", + "CALLS tests/test_class_ingest.py:0 -> test_go_embedded_interface", + "CALLS tests/test_class_ingest.py:0 -> test_go_interface_nodes_created", + "CALLS tests/test_class_ingest.py:0 -> test_go_struct_methods_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_go_struct_nodes_created", + "CALLS tests/test_class_ingest.py:0 -> test_handles_deeply_nested_namespace", + "CALLS tests/test_class_ingest.py:0 -> test_handles_namespaced_template", + "CALLS tests/test_class_ingest.py:0 -> test_java_interface_nodes_created", + "CALLS tests/test_class_ingest.py:0 -> test_java_multiple_interface_implementation", + "CALLS tests/test_class_ingest.py:0 -> test_java_single_interface_implementation", + "CALLS tests/test_class_ingest.py:0 -> test_js_class_expression_inheritance", + "CALLS tests/test_class_ingest.py:0 -> test_js_class_expression_methods", + "CALLS tests/test_class_ingest.py:0 -> test_method_override_chain", + "CALLS tests/test_class_ingest.py:0 -> test_method_override_skips_non_overriding_methods", + "CALLS tests/test_class_ingest.py:0 -> test_multiple_inheritance_creates_all_relationships", + "CALLS tests/test_class_ingest.py:0 -> test_nested_class_method_qualified_names", + "CALLS tests/test_class_ingest.py:0 -> test_nested_class_qualified_names", + "CALLS tests/test_class_ingest.py:0 -> test_non_abstract_method_override", + "CALLS tests/test_class_ingest.py:0 -> test_resolves_imported_name", + "CALLS tests/test_class_ingest.py:0 -> test_returns_class_for_known_class", + "CALLS tests/test_class_ingest.py:0 -> test_returns_class_for_unknown", + "CALLS tests/test_class_ingest.py:0 -> test_returns_interface_for_known_interface", + "CALLS tests/test_class_ingest.py:0 -> test_returns_qualified_name_for_unknown", + "CALLS tests/test_class_ingest.py:0 -> test_rust_impl_method_calls", + "CALLS tests/test_class_ingest.py:0 -> test_rust_impl_methods_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_rust_impl_methods_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_rust_inline_modules_are_ingested", + "CALLS tests/test_class_ingest.py:0 -> test_special_character_names_are_handled", + "CALLS tests/test_class_ingest.py:0 -> test_strips_nested_template_parameters", + "CALLS tests/test_class_ingest.py:0 -> test_strips_template_parameters", + "CALLS tests/test_class_ingest.py:0 -> test_typescript_interface_implementation", + "CALLS tests/test_class_ingest.py:0 -> test_typescript_mixin_inheritance", + "CALLS tests/test_class_ingest.py:0 -> test_uses_module_qn_as_prefix", + "CALLS tests/test_class_ingest.py:0 -> typescript_mixin_project", + "CALLS tests/test_classless_constructor_calls.py:0 -> test_class_with_init_emits_both_instantiates_and_init_call", + "CALLS tests/test_classless_constructor_calls.py:0 -> test_dataclass_construction_emits_instantiates_not_calls", + "CALLS tests/test_cli_autosync.py:0 -> mock_agent_loops", + "CALLS tests/test_cli_autosync.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_autosync.py:0 -> mock_sync_path", + "CALLS tests/test_cli_autosync.py:0 -> mock_validate_models", + "CALLS tests/test_cli_autosync.py:0 -> test_start_auto_sync_respects_explicit_project_name", + "CALLS tests/test_cli_autosync.py:0 -> test_start_auto_sync_uses_derived_project_name_when_none_provided", + "CALLS tests/test_cli_autosync.py:0 -> test_start_clean_without_update_graph_does_not_sync", + "CALLS tests/test_cli_autosync.py:0 -> test_start_default_triggers_auto_sync", + "CALLS tests/test_cli_autosync.py:0 -> test_start_no_sync_skips_auto_sync", + "CALLS tests/test_cli_autosync.py:0 -> test_start_update_graph_uses_sync_helper", + "CALLS tests/test_cli_clean.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_deletes_hash_cache", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_does_not_invoke_graph_updater", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_no_cache_file_still_succeeds", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_shows_clean_done_message", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_skips_model_validation", + "CALLS tests/test_cli_clean.py:0 -> test_clean_alone_wipes_database", + "CALLS tests/test_cli_clean.py:0 -> test_clean_with_update_calls_clean_database", + "CALLS tests/test_cli_clean.py:0 -> test_clean_with_update_deletes_hash_cache", + "CALLS tests/test_cli_clean.py:0 -> test_update_without_clean_preserves_hash_cache", + "CALLS tests/test_cli_delete_project.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_calls_ingestor_delete_project", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_cleans_embeddings_with_node_ids", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_does_not_wipe_other_projects", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_fails_when_project_missing", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_rejects_blank_name", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_removes_hash_cache_when_repo_path_given", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_shows_success_message", + "CALLS tests/test_cli_delete_project.py:0 -> test_delete_project_without_repo_path_leaves_unrelated_hash_caches", + "CALLS tests/test_cli_repo_path_validation.py:0 -> mock_memgraph_connect", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_file_path_exits_with_error", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_git_dir_does_not_warn", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_git_file_worktree_does_not_warn", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_index_nonexistent_path_exits_with_error", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_nonexistent_path_exits_with_error", + "CALLS tests/test_cli_repo_path_validation.py:0 -> test_valid_non_git_dir_warns_but_proceeds", + "CALLS tests/test_cli_smoke.py:0 -> test_help_command_works", + "CALLS tests/test_cli_smoke.py:0 -> test_import_cli_module", + "CALLS tests/test_cli_smoke.py:0 -> test_version_flag", + "CALLS tests/test_code_retrieval.py:0 -> mock_ingestor", + "CALLS tests/test_code_retrieval.py:0 -> retriever", + "CALLS tests/test_code_retrieval.py:0 -> test_creates_tool_with_description", + "CALLS tests/test_code_retrieval.py:0 -> test_handles_ingestor_error", + "CALLS tests/test_code_retrieval.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_code_retrieval.py:0 -> test_init_stores_ingestor", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_missing_end_line", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_missing_path", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_missing_start_line", + "CALLS tests/test_code_retrieval.py:0 -> test_returns_not_found_when_no_results", + "CALLS tests/test_code_retrieval.py:0 -> test_tool_calls_retriever", + "CALLS tests/test_code_retrieval.py:0 -> test_uses_cypher_query_constant", + "CALLS tests/test_codebase_query.py:0 -> anyio_backend", + "CALLS tests/test_codebase_query.py:0 -> mock_console", + "CALLS tests/test_codebase_query.py:0 -> mock_cypher_gen", + "CALLS tests/test_codebase_query.py:0 -> mock_ingestor", + "CALLS tests/test_codebase_query.py:0 -> test_creates_default_console", + "CALLS tests/test_codebase_query.py:0 -> test_creates_tool_instance", + "CALLS tests/test_codebase_query.py:0 -> test_database_error_handled", + "CALLS tests/test_codebase_query.py:0 -> test_default_console_writes_to_stderr", + "CALLS tests/test_codebase_query.py:0 -> test_empty_results_returns_zero_count", + "CALLS tests/test_codebase_query.py:0 -> test_handles_boolean_values", + "CALLS tests/test_codebase_query.py:0 -> test_handles_none_values", + "CALLS tests/test_codebase_query.py:0 -> test_handles_numeric_values", + "CALLS tests/test_codebase_query.py:0 -> test_llm_generation_error_handled", + "CALLS tests/test_codebase_query.py:0 -> test_query_calls_cypher_generator", + "CALLS tests/test_codebase_query.py:0 -> test_query_calls_ingestor_fetch_all", + "CALLS tests/test_codebase_query.py:0 -> test_query_timeout_handled", + "CALLS tests/test_codebase_query.py:0 -> test_result_contains_query_used", + "CALLS tests/test_codebase_query.py:0 -> test_result_summary_contains_count", + "CALLS tests/test_codebase_query.py:0 -> test_successful_query_returns_results", + "CALLS tests/test_codebase_query.py:0 -> test_tool_has_description", + "CALLS tests/test_codebase_query.py:0 -> test_uses_provided_console", + "CALLS tests/test_complex_cross_file_calls.py:0 -> complex_project", + "CALLS tests/test_complex_cross_file_calls.py:0 -> main", + "CALLS tests/test_complex_cross_file_calls.py:0 -> process", + "CALLS tests/test_complex_cross_file_calls.py:0 -> test_complex_cross_file_function_calls", + "CALLS tests/test_complex_cross_file_calls.py:0 -> test_cross_file_calls_with_short_names", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> close", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> get", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> insert", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> main", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> read", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_all_languages_stdlib_consistency", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_cpp_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_go_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_java_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_javascript_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_lua_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_python_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_rust_stdlib_introspection", + "CALLS tests/test_comprehensive_stdlib_introspection.py:0 -> test_typescript_stdlib_introspection", + "CALLS tests/test_conditional_alias_call.py:0 -> ensure_node_batch", + "CALLS tests/test_conditional_alias_call.py:0 -> ensure_relationship_batch", + "CALLS tests/test_conditional_alias_call.py:0 -> execute_write", + "CALLS tests/test_conditional_alias_call.py:0 -> fetch_all", + "CALLS tests/test_conditional_alias_call.py:0 -> flush_all", + "CALLS tests/test_conditional_alias_call.py:0 -> test_conditional_bound_method_alias_resolves", + "CALLS tests/test_config_validation.py:0 -> test_case_insensitive_lookup", + "CALLS tests/test_config_validation.py:0 -> test_default_role_omits_role_from_message", + "CALLS tests/test_config_validation.py:0 -> test_google_gla_requires_api_key", + "CALLS tests/test_config_validation.py:0 -> test_google_vertex_skips_validation", + "CALLS tests/test_config_validation.py:0 -> test_invalid_api_key_raises", + "CALLS tests/test_config_validation.py:0 -> test_known_provider_anthropic", + "CALLS tests/test_config_validation.py:0 -> test_known_provider_openai", + "CALLS tests/test_config_validation.py:0 -> test_local_providers_skip_validation", + "CALLS tests/test_config_validation.py:0 -> test_role_appears_in_message", + "CALLS tests/test_config_validation.py:0 -> test_role_forwarded_to_error_message", + "CALLS tests/test_config_validation.py:0 -> test_unknown_provider_generic_message", + "CALLS tests/test_config_validation.py:0 -> test_valid_api_key_passes", + "CALLS tests/test_constructor_call_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_constructor_call_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_constructor_call_resolution.py:0 -> execute_write", + "CALLS tests/test_constructor_call_resolution.py:0 -> fetch_all", + "CALLS tests/test_constructor_call_resolution.py:0 -> flush_all", + "CALLS tests/test_constructor_call_resolution.py:0 -> test_instantiation_calls_init", + "CALLS tests/test_constructor_call_resolution.py:0 -> test_instantiation_without_init_is_not_dropped_to_class", + "CALLS tests/test_cpp_attributes.py:0 -> cpp_attributes_project", + "CALLS tests/test_cpp_attributes.py:0 -> test_attribute_combinations_and_edge_cases", + "CALLS tests/test_cpp_attributes.py:0 -> test_compiler_specific_attributes", + "CALLS tests/test_cpp_attributes.py:0 -> test_cpp_attributes_comprehensive", + "CALLS tests/test_cpp_attributes.py:0 -> test_standard_attributes", + "CALLS tests/test_cpp_basic_syntax.py:0 -> Color", + "CALLS tests/test_cpp_basic_syntax.py:0 -> add", + "CALLS tests/test_cpp_basic_syntax.py:0 -> cpp_basic_project", + "CALLS tests/test_cpp_basic_syntax.py:0 -> main", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_class_declarations", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_function_declarations", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_member_functions", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_basic_namespaces", + "CALLS tests/test_cpp_basic_syntax.py:0 -> test_cpp_basic_comprehensive", + "CALLS tests/test_cpp_casting_operators.py:0 -> GraphNode", + "CALLS tests/test_cpp_casting_operators.py:0 -> cpp_casting_project", + "CALLS tests/test_cpp_casting_operators.py:0 -> get", + "CALLS tests/test_cpp_casting_operators.py:0 -> name", + "CALLS tests/test_cpp_casting_operators.py:0 -> process", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_cpp_casting_comprehensive", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_dynamic_cast_examples", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_other_cast_operators", + "CALLS tests/test_cpp_casting_operators.py:0 -> test_static_cast_examples", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> cpp_inheritance_project", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> get", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> process", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> run", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_abstract_classes_and_interfaces", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_cpp_inheritance_comprehensive", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_cpp_inheritance_edge_cases", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_multiple_inheritance", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> test_single_inheritance", + "CALLS tests/test_cpp_classes_inheritance.py:0 -> walk", + "CALLS tests/test_cpp_comprehensive.py:0 -> add", + "CALLS tests/test_cpp_comprehensive.py:0 -> cpp_comprehensive_project", + "CALLS tests/test_cpp_comprehensive.py:0 -> execute", + "CALLS tests/test_cpp_comprehensive.py:0 -> get", + "CALLS tests/test_cpp_comprehensive.py:0 -> main", + "CALLS tests/test_cpp_comprehensive.py:0 -> name", + "CALLS tests/test_cpp_comprehensive.py:0 -> process", + "CALLS tests/test_cpp_comprehensive.py:0 -> test_comprehensive_cpp_features", + "CALLS tests/test_cpp_comprehensive.py:0 -> test_cpp_comprehensive_complete", + "CALLS tests/test_cpp_comprehensive.py:0 -> test_real_world_cpp_scenario", + "CALLS tests/test_cpp_concepts.py:0 -> cpp_concepts_project", + "CALLS tests/test_cpp_concepts.py:0 -> get", + "CALLS tests/test_cpp_concepts.py:0 -> process", + "CALLS tests/test_cpp_concepts.py:0 -> processor", + "CALLS tests/test_cpp_concepts.py:0 -> test_advanced_concept_patterns", + "CALLS tests/test_cpp_concepts.py:0 -> test_concept_composition_and_specialization", + "CALLS tests/test_cpp_concepts.py:0 -> test_concept_definitions_and_constraints", + "CALLS tests/test_cpp_concepts.py:0 -> wrapper", + "CALLS tests/test_cpp_concurrency.py:0 -> clear", + "CALLS tests/test_cpp_concurrency.py:0 -> cpp_concurrency_project", + "CALLS tests/test_cpp_concurrency.py:0 -> done", + "CALLS tests/test_cpp_concurrency.py:0 -> get", + "CALLS tests/test_cpp_concurrency.py:0 -> insert", + "CALLS tests/test_cpp_concurrency.py:0 -> load", + "CALLS tests/test_cpp_concurrency.py:0 -> process", + "CALLS tests/test_cpp_concurrency.py:0 -> read", + "CALLS tests/test_cpp_concurrency.py:0 -> run", + "CALLS tests/test_cpp_concurrency.py:0 -> start", + "CALLS tests/test_cpp_concurrency.py:0 -> submit", + "CALLS tests/test_cpp_concurrency.py:0 -> test_atomics_and_memory_ordering", + "CALLS tests/test_cpp_concurrency.py:0 -> test_condition_variables_and_futures", + "CALLS tests/test_cpp_concurrency.py:0 -> test_cpp_concurrency_comprehensive", + "CALLS tests/test_cpp_concurrency.py:0 -> test_mutex_and_locks", + "CALLS tests/test_cpp_concurrency.py:0 -> test_thread_basics", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> cpp_constexpr_project", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> test_basic_constexpr", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> test_constexpr_if_and_templates", + "CALLS tests/test_cpp_constexpr_compile_time.py:0 -> test_cpp_constexpr_comprehensive", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> close", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> cpp_constructor_project", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> flush", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_basic_constructors_destructors", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_constructor_destructor_complete", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_cpp_constructor_destructor_comprehensive", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_raii_patterns", + "CALLS tests/test_cpp_constructor_destructor.py:0 -> test_special_member_functions", + "CALLS tests/test_cpp_coroutines.py:0 -> cpp_coroutines_project", + "CALLS tests/test_cpp_coroutines.py:0 -> done", + "CALLS tests/test_cpp_coroutines.py:0 -> get", + "CALLS tests/test_cpp_coroutines.py:0 -> get_data", + "CALLS tests/test_cpp_coroutines.py:0 -> start", + "CALLS tests/test_cpp_coroutines.py:0 -> test_async_await_coroutines", + "CALLS tests/test_cpp_coroutines.py:0 -> test_basic_generator_coroutines", + "CALLS tests/test_cpp_coroutines.py:0 -> test_custom_coroutine_types", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> cpp_cross_file_project", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> nodes", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> start", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_cross_file_constructor_destructor", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_header_source_method_resolution", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_multiple_source_files_one_class", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_nested_namespace_cross_file", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_no_orphan_methods_across_files", + "CALLS tests/test_cpp_cross_file_methods.py:0 -> test_same_file_out_of_class_still_works", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> clear", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> cpp_singleton_project", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> load", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> main", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> save", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> start", + "CALLS tests/test_cpp_cross_file_singleton.py:0 -> test_cpp_singleton_pattern_cross_file_calls", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> render", + "CALLS tests/test_cpp_crosslang_qn_collision.py:0 -> test_cpp_method_does_not_steal_python_method_qn", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> cpp_designated_consteval_project", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> name", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> operation", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> processor", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_comprehensive_modern_cpp_complete", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_consteval_immediate_functions", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_designated_initializers", + "CALLS tests/test_cpp_designated_init_consteval.py:0 -> test_lambda_init_captures", + "CALLS tests/test_cpp_error_handling.py:0 -> cpp_error_handling_project", + "CALLS tests/test_cpp_error_handling.py:0 -> get", + "CALLS tests/test_cpp_error_handling.py:0 -> processor", + "CALLS tests/test_cpp_error_handling.py:0 -> test_basic_exception_handling", + "CALLS tests/test_cpp_error_handling.py:0 -> test_cpp_error_handling_comprehensive", + "CALLS tests/test_cpp_error_handling.py:0 -> test_raii_patterns", + "CALLS tests/test_cpp_format_spaceship.py:0 -> cpp_format_spaceship_project", + "CALLS tests/test_cpp_format_spaceship.py:0 -> parse", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_format_library_basics", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_format_spaceship_complete", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_format_spaceship_integration", + "CALLS tests/test_cpp_format_spaceship.py:0 -> test_spaceship_operator", + "CALLS tests/test_cpp_format_spaceship.py:0 -> value", + "CALLS tests/test_cpp_friend_functions.py:0 -> GraphNode", + "CALLS tests/test_cpp_friend_functions.py:0 -> add", + "CALLS tests/test_cpp_friend_functions.py:0 -> cpp_friend_project", + "CALLS tests/test_cpp_friend_functions.py:0 -> insert", + "CALLS tests/test_cpp_friend_functions.py:0 -> name", + "CALLS tests/test_cpp_friend_functions.py:0 -> test_cpp_friend_comprehensive", + "CALLS tests/test_cpp_friend_functions.py:0 -> test_friend_functions", + "CALLS tests/test_cpp_friend_functions.py:0 -> test_friend_templates", + "CALLS tests/test_cpp_frontend_calls.py:0 -> add", + "CALLS tests/test_cpp_frontend_calls.py:0 -> test_method_calls_free_function", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> run", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> test_frontend_emits_inheritance_and_operator", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> test_frontend_qns_match_tree_sitter", + "CALLS tests/test_cpp_frontend_qn_parity.py:0 -> test_frontend_recovers_macro_mangled_class", + "CALLS tests/test_cpp_frontend_types.py:0 -> test_frontend_emits_type_aliases", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> test_default_treesitter_does_not_recover_macro_class", + "CALLS tests/test_cpp_frontend_wiring.py:0 -> test_libclang_frontend_recovers_macro_class", + "CALLS tests/test_cpp_includes.py:0 -> add", + "CALLS tests/test_cpp_includes.py:0 -> cpp_includes_project", + "CALLS tests/test_cpp_includes.py:0 -> process", + "CALLS tests/test_cpp_includes.py:0 -> test_conditional_includes", + "CALLS tests/test_cpp_includes.py:0 -> test_cpp20_module_import_syntax", + "CALLS tests/test_cpp_includes.py:0 -> test_cpp20_module_partition_imports", + "CALLS tests/test_cpp_includes.py:0 -> test_cpp_includes_comprehensive", + "CALLS tests/test_cpp_includes.py:0 -> test_include_guards_and_pragma_once", + "CALLS tests/test_cpp_includes.py:0 -> test_local_header_includes", + "CALLS tests/test_cpp_includes.py:0 -> test_standard_library_includes", + "CALLS tests/test_cpp_includes.py:0 -> test_system_vs_local_includes", + "CALLS tests/test_cpp_lambda_captures.py:0 -> cpp_lambda_project", + "CALLS tests/test_cpp_lambda_captures.py:0 -> factory", + "CALLS tests/test_cpp_lambda_captures.py:0 -> operation", + "CALLS tests/test_cpp_lambda_captures.py:0 -> processor", + "CALLS tests/test_cpp_lambda_captures.py:0 -> test_basic_lambda_captures", + "CALLS tests/test_cpp_lambda_captures.py:0 -> test_generalized_captures", + "CALLS tests/test_cpp_lambda_captures.py:0 -> test_lambda_validation_complete", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> cpp_lambdas_project", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> get", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> handler", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> name", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> processor", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> test_async_functional_patterns", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> test_basic_lambdas", + "CALLS tests/test_cpp_lambdas_functional.py:0 -> test_cpp_lambdas_comprehensive", + "CALLS tests/test_cpp_line_numbers.py:0 -> _get_line_span", + "CALLS tests/test_cpp_line_numbers.py:0 -> add", + "CALLS tests/test_cpp_line_numbers.py:0 -> cpp_line_numbers_project", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_const_method_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_constructor_out_of_class_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_declaration_only_methods_have_declaration_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_deeply_nested_namespace_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_destructor_out_of_class_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_inline_method_has_correct_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_mixed_methods_have_correct_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_multiline_inline_method_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_multiple_out_of_class_methods_have_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_namespaced_class_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_nested_class_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_operator_methods_have_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_simple_out_of_class_method_has_definition_line_numbers", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_static_method_has_correct_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_struct_out_of_class_method_has_definition_lines", + "CALLS tests/test_cpp_line_numbers.py:0 -> test_template_method_has_definition_line_numbers", + "CALLS tests/test_cpp_memory_management.py:0 -> cpp_memory_project", + "CALLS tests/test_cpp_memory_management.py:0 -> get", + "CALLS tests/test_cpp_memory_management.py:0 -> name", + "CALLS tests/test_cpp_memory_management.py:0 -> test_cpp_memory_management_comprehensive", + "CALLS tests/test_cpp_memory_management.py:0 -> test_move_semantics", + "CALLS tests/test_cpp_memory_management.py:0 -> test_smart_pointers", + "CALLS tests/test_cpp_memory_management.py:0 -> value", + "CALLS tests/test_cpp_modern_features.py:0 -> add", + "CALLS tests/test_cpp_modern_features.py:0 -> cpp_modern_project", + "CALLS tests/test_cpp_modern_features.py:0 -> get", + "CALLS tests/test_cpp_modern_features.py:0 -> index", + "CALLS tests/test_cpp_modern_features.py:0 -> main", + "CALLS tests/test_cpp_modern_features.py:0 -> process", + "CALLS tests/test_cpp_modern_features.py:0 -> test_auto_keyword_type_deduction", + "CALLS tests/test_cpp_modern_features.py:0 -> test_cpp_modern_comprehensive", + "CALLS tests/test_cpp_modern_features.py:0 -> test_lambda_expressions", + "CALLS tests/test_cpp_modern_features.py:0 -> test_smart_pointers_move_semantics", + "CALLS tests/test_cpp_modern_features.py:0 -> test_structured_bindings_ranges", + "CALLS tests/test_cpp_modern_features.py:0 -> test_variadic_templates_constexpr", + "CALLS tests/test_cpp_modules.py:0 -> add", + "CALLS tests/test_cpp_modules.py:0 -> clear", + "CALLS tests/test_cpp_modules.py:0 -> cpp_modules_project", + "CALLS tests/test_cpp_modules.py:0 -> get", + "CALLS tests/test_cpp_modules.py:0 -> process", + "CALLS tests/test_cpp_modules.py:0 -> test_basic_module_interface", + "CALLS tests/test_cpp_modules.py:0 -> test_module_imports_usage", + "CALLS tests/test_cpp_modules.py:0 -> test_module_partitions", + "CALLS tests/test_cpp_move_semantics.py:0 -> clear", + "CALLS tests/test_cpp_move_semantics.py:0 -> cpp_move_semantics_project", + "CALLS tests/test_cpp_move_semantics.py:0 -> factory", + "CALLS tests/test_cpp_move_semantics.py:0 -> get", + "CALLS tests/test_cpp_move_semantics.py:0 -> name", + "CALLS tests/test_cpp_move_semantics.py:0 -> process", + "CALLS tests/test_cpp_move_semantics.py:0 -> processor", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_basic_move_semantics", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_cpp_move_semantics_comprehensive", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_move_optimization_patterns", + "CALLS tests/test_cpp_move_semantics.py:0 -> test_perfect_forwarding", + "CALLS tests/test_cpp_move_semantics.py:0 -> value", + "CALLS tests/test_cpp_move_semantics.py:0 -> wrapper", + "CALLS tests/test_cpp_namespaces.py:0 -> Color", + "CALLS tests/test_cpp_namespaces.py:0 -> add", + "CALLS tests/test_cpp_namespaces.py:0 -> cpp_namespaces_project", + "CALLS tests/test_cpp_namespaces.py:0 -> process", + "CALLS tests/test_cpp_namespaces.py:0 -> test_anonymous_namespaces", + "CALLS tests/test_cpp_namespaces.py:0 -> test_basic_namespaces", + "CALLS tests/test_cpp_namespaces.py:0 -> test_cpp_namespaces_comprehensive", + "CALLS tests/test_cpp_namespaces.py:0 -> test_using_directives", + "CALLS tests/test_cpp_operators_overloading.py:0 -> cpp_operators_project", + "CALLS tests/test_cpp_operators_overloading.py:0 -> name", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_arithmetic_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_comparison_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_cpp_operators_comprehensive", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_stream_function_call_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> test_subscript_increment_operators", + "CALLS tests/test_cpp_operators_overloading.py:0 -> value", + "CALLS tests/test_cpp_oracle.py:0 -> test_cgr_matches_libclang_oracle_on_cpp_structure", + "CALLS tests/test_cpp_oracle.py:0 -> test_libclang_oracle_emits_inherits_edges", + "CALLS tests/test_cpp_oracle.py:0 -> test_restrict_to_files_scopes_graph_to_universe", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> add", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_cpp_out_of_class_method_calls.py:0 -> test_out_of_class_method_call_attributed_to_method_qn", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> cpp_out_of_class_project", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_deeply_nested_qualified_identifier", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_mixed_inline_and_out_of_class_methods", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_nested_namespace_out_of_class_methods", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_out_of_class_constructor_destructor", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_out_of_class_operator_overloading", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_simple_out_of_class_method_definitions", + "CALLS tests/test_cpp_out_of_class_methods.py:0 -> test_template_out_of_class_methods", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> add", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> cpp_parser", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_constructor_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_destructor_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_inline_method_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_lambda_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_namespaced_function_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_nested_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_nested_class_out_of_class_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_operator_plus_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_inner_function_for_template", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_none_for_inline_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_none_for_standalone_function", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_returns_same_node_for_function_definition", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_simple_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_simple_method_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_simple_out_of_class_method", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_standalone_function_not_out_of_class", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_struct_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_template_class_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_template_method_name", + "CALLS tests/test_cpp_out_of_class_utils.py:0 -> test_template_out_of_class_method", + "CALLS tests/test_cpp_preprocessor.py:0 -> cpp_preprocessor_project", + "CALLS tests/test_cpp_preprocessor.py:0 -> name", + "CALLS tests/test_cpp_preprocessor.py:0 -> optimize", + "CALLS tests/test_cpp_preprocessor.py:0 -> processor", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_conditional_compilation", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_cpp_preprocessor_comprehensive", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_define_macros", + "CALLS tests/test_cpp_preprocessor.py:0 -> test_pragma_directives", + "CALLS tests/test_cpp_ranges_views.py:0 -> GraphNode", + "CALLS tests/test_cpp_ranges_views.py:0 -> cpp_ranges_project", + "CALLS tests/test_cpp_ranges_views.py:0 -> name", + "CALLS tests/test_cpp_ranges_views.py:0 -> nodes", + "CALLS tests/test_cpp_ranges_views.py:0 -> test_basic_ranges_algorithms", + "CALLS tests/test_cpp_ranges_views.py:0 -> test_range_pipelines_graph_processing", + "CALLS tests/test_cpp_ranges_views.py:0 -> test_views_and_adaptors", + "CALLS tests/test_cpp_ranges_views.py:0 -> type", + "CALLS tests/test_cpp_smart_pointers.py:0 -> GraphNode", + "CALLS tests/test_cpp_smart_pointers.py:0 -> add", + "CALLS tests/test_cpp_smart_pointers.py:0 -> clear", + "CALLS tests/test_cpp_smart_pointers.py:0 -> cpp_smart_pointers_project", + "CALLS tests/test_cpp_smart_pointers.py:0 -> get", + "CALLS tests/test_cpp_smart_pointers.py:0 -> insert", + "CALLS tests/test_cpp_smart_pointers.py:0 -> items", + "CALLS tests/test_cpp_smart_pointers.py:0 -> name", + "CALLS tests/test_cpp_smart_pointers.py:0 -> process", + "CALLS tests/test_cpp_smart_pointers.py:0 -> put", + "CALLS tests/test_cpp_smart_pointers.py:0 -> read", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_cpp_smart_pointers_comprehensive", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_shared_ptr_patterns", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_unique_ptr_patterns", + "CALLS tests/test_cpp_smart_pointers.py:0 -> test_weak_ptr_and_advanced_patterns", + "CALLS tests/test_cpp_smart_pointers.py:0 -> wrapper", + "CALLS tests/test_cpp_stl_usage.py:0 -> cpp_stl_project", + "CALLS tests/test_cpp_stl_usage.py:0 -> generate", + "CALLS tests/test_cpp_stl_usage.py:0 -> insert", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_cpp_stl_comprehensive", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_stl_algorithms", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_stl_containers", + "CALLS tests/test_cpp_stl_usage.py:0 -> test_stl_iterators_functors", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> add", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> cpp_metaprogramming_project", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> insert", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> process", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> test_advanced_metaprogramming", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> test_basic_metaprogramming", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> test_cpp_metaprogramming_comprehensive", + "CALLS tests/test_cpp_template_metaprogramming.py:0 -> value", + "CALLS tests/test_cpp_templates.py:0 -> add", + "CALLS tests/test_cpp_templates.py:0 -> clear", + "CALLS tests/test_cpp_templates.py:0 -> cpp_templates_project", + "CALLS tests/test_cpp_templates.py:0 -> get", + "CALLS tests/test_cpp_templates.py:0 -> insert", + "CALLS tests/test_cpp_templates.py:0 -> test_class_templates", + "CALLS tests/test_cpp_templates.py:0 -> test_cpp_templates_comprehensive", + "CALLS tests/test_cpp_templates.py:0 -> test_function_templates", + "CALLS tests/test_cpp_templates.py:0 -> test_template_metaprogramming", + "CALLS tests/test_cypher_validation.py:0 -> get", + "CALLS tests/test_cypher_validation.py:0 -> load", + "CALLS tests/test_cypher_validation.py:0 -> nodes", + "CALLS tests/test_cypher_validation.py:0 -> test_all_dangerous_keywords_produce_valid_patterns", + "CALLS tests/test_cypher_validation.py:0 -> test_allowed_procedure_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_bounded_or_no_varlen_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_call_is_case_insensitive", + "CALLS tests/test_cypher_validation.py:0 -> test_call_no_longer_in_keyword_blocklist", + "CALLS tests/test_cypher_validation.py:0 -> test_case_insensitive", + "CALLS tests/test_cypher_validation.py:0 -> test_disallowed_procedure_rejected", + "CALLS tests/test_cypher_validation.py:0 -> test_does_not_flag_created_at", + "CALLS tests/test_cypher_validation.py:0 -> test_does_not_flag_reset", + "CALLS tests/test_cypher_validation.py:0 -> test_does_not_flag_substring_matches", + "CALLS tests/test_cypher_validation.py:0 -> test_error_includes_keyword_and_query", + "CALLS tests/test_cypher_validation.py:0 -> test_error_includes_procedure_name", + "CALLS tests/test_cypher_validation.py:0 -> test_error_includes_query", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_allows_block_comment_between_parts", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_allows_single_line_comment_between_parts", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_allows_whitespace_between_parts", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_has_dotall_flag", + "CALLS tests/test_cypher_validation.py:0 -> test_multi_word_respects_word_boundaries", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_all_dangerous_keywords", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_block_comment_bypass", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_create", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_create_constraint", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_create_index", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_delete", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_detach_delete", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_drop", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_foreach", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_load_csv", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_merge", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_multiline_block_comment_bypass", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_remove", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_set", + "CALLS tests/test_cypher_validation.py:0 -> test_rejects_single_line_comment_bypass", + "CALLS tests/test_cypher_validation.py:0 -> test_returns_compiled_pattern", + "CALLS tests/test_cypher_validation.py:0 -> test_safe_match_query_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_safe_match_with_where_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_safe_optional_match_passes", + "CALLS tests/test_cypher_validation.py:0 -> test_single_word_is_case_sensitive_on_input", + "CALLS tests/test_cypher_validation.py:0 -> test_single_word_uses_word_boundaries", + "CALLS tests/test_cypher_validation.py:0 -> test_unbounded_varlen_rejected", + "CALLS tests/test_dead_code_command.py:0 -> dead_rows", + "CALLS tests/test_dead_code_command.py:0 -> runner", + "CALLS tests/test_dead_code_command.py:0 -> test_classes_flag_includes_class_candidates", + "CALLS tests/test_dead_code_command.py:0 -> test_classes_off_by_default", + "CALLS tests/test_dead_code_command.py:0 -> test_decorator_root_extends_defaults", + "CALLS tests/test_dead_code_command.py:0 -> test_entry_point_forwarded_to_query", + "CALLS tests/test_dead_code_command.py:0 -> test_errors_when_no_projects", + "CALLS tests/test_dead_code_command.py:0 -> test_errors_when_project_ambiguous", + "CALLS tests/test_dead_code_command.py:0 -> test_explicit_project_name_used", + "CALLS tests/test_dead_code_command.py:0 -> test_fail_on_found_exits_one_when_dead_code", + "CALLS tests/test_dead_code_command.py:0 -> test_fail_on_found_exits_zero_when_clean", + "CALLS tests/test_dead_code_command.py:0 -> test_handles_connection_error", + "CALLS tests/test_dead_code_command.py:0 -> test_include_tests_default_passes_test_patterns", + "CALLS tests/test_dead_code_command.py:0 -> test_json_format_emits_qualified_names", + "CALLS tests/test_dead_code_command.py:0 -> test_lists_orphans_in_table", + "CALLS tests/test_dead_code_command.py:0 -> test_no_include_tests_omits_test_patterns", + "CALLS tests/test_dead_code_command.py:0 -> test_writes_json_to_output_file", + "CALLS tests/test_dead_code_command.py:0 -> test_writes_table_to_output_file", + "CALLS tests/test_decorator_call_edges.py:0 -> handler", + "CALLS tests/test_decorator_call_edges.py:0 -> test_alias_decorator_resolves_to_first_party", + "CALLS tests/test_decorator_call_edges.py:0 -> test_bare_decorator_emits_module_call", + "CALLS tests/test_decorator_call_edges.py:0 -> test_call_decorator_emits_module_call", + "CALLS tests/test_decorator_call_edges.py:0 -> test_class_decorator_emits_module_call", + "CALLS tests/test_decorator_call_edges.py:0 -> test_decorator_on_nested_function_not_module_attributed", + "CALLS tests/test_decorator_call_edges.py:0 -> test_undecorated_function_has_no_decorator_edge", + "CALLS tests/test_decorators.py:0 -> _ensure_loaded", + "CALLS tests/test_decorators.py:0 -> error_factory", + "CALLS tests/test_decorators.py:0 -> key_func", + "CALLS tests/test_decorators.py:0 -> my_method", + "CALLS tests/test_decorators.py:0 -> named_async_function", + "CALLS tests/test_decorators.py:0 -> named_function", + "CALLS tests/test_decorators.py:0 -> named_handler", + "CALLS tests/test_decorators.py:0 -> named_op", + "CALLS tests/test_decorators.py:0 -> test_allows_different_keys", + "CALLS tests/test_decorators.py:0 -> test_allows_valid_path_within_project", + "CALLS tests/test_decorators.py:0 -> test_calls_ensure_loaded_before_method", + "CALLS tests/test_decorators.py:0 -> test_clears_guard_after_completion", + "CALLS tests/test_decorators.py:0 -> test_clears_guard_on_exception", + "CALLS tests/test_decorators.py:0 -> test_handles_exceptions", + "CALLS tests/test_decorators.py:0 -> test_handles_keyword_arguments_in_guarded_function", + "CALLS tests/test_decorators.py:0 -> test_handles_path_not_first_positional_arg", + "CALLS tests/test_decorators.py:0 -> test_key_func_receives_kwargs_correctly", + "CALLS tests/test_decorators.py:0 -> test_logs_end_even_on_success", + "CALLS tests/test_decorators.py:0 -> test_logs_start_and_end_messages", + "CALLS tests/test_decorators.py:0 -> test_logs_timing_info", + "CALLS tests/test_decorators.py:0 -> test_passes_arguments_correctly", + "CALLS tests/test_decorators.py:0 -> test_preserves_function_metadata", + "CALLS tests/test_decorators.py:0 -> test_prevents_recursive_calls", + "CALLS tests/test_decorators.py:0 -> test_recursion_guard_with_mixed_positional_and_keyword_args", + "CALLS tests/test_decorators.py:0 -> test_rejects_non_string_path", + "CALLS tests/test_decorators.py:0 -> test_rejects_path_outside_project", + "CALLS tests/test_decorators.py:0 -> test_reraises_cancelled_error", + "CALLS tests/test_decorators.py:0 -> test_reraises_keyboard_interrupt", + "CALLS tests/test_decorators.py:0 -> test_reraises_system_exit", + "CALLS tests/test_decorators.py:0 -> test_returns_correct_result", + "CALLS tests/test_decorators.py:0 -> test_returns_error_on_exception", + "CALLS tests/test_decorators.py:0 -> test_returns_result_on_success", + "CALLS tests/test_decorators.py:0 -> test_separate_guard_names", + "CALLS tests/test_decorators.py:0 -> test_shared_guard_name", + "CALLS tests/test_decorators.py:0 -> test_works_with_dict_error_factory", + "CALLS tests/test_decorators.py:0 -> test_works_with_property", + "CALLS tests/test_decorators.py:0 -> value", + "CALLS tests/test_definition_processor.py:0 -> definition_processor", + "CALLS tests/test_definition_processor.py:0 -> py_parser", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_creates_node_and_relationship", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_skips_empty_name", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_skips_php", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_skips_python", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_with_empty_version_spec", + "CALLS tests/test_definition_processor.py:0 -> test_add_dependency_with_properties", + "CALLS tests/test_definition_processor.py:0 -> test_builtin_decorators", + "CALLS tests/test_definition_processor.py:0 -> test_cargo_toml_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_class_decorator", + "CALLS tests/test_definition_processor.py:0 -> test_class_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_composer_json_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_csproj_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_decorator_with_arguments", + "CALLS tests/test_definition_processor.py:0 -> test_dotted_decorator", + "CALLS tests/test_definition_processor.py:0 -> test_double_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_empty_function_body", + "CALLS tests/test_definition_processor.py:0 -> test_gemfile_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_go_mod_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_multiline_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_multiple_decorators", + "CALLS tests/test_definition_processor.py:0 -> test_no_decorators", + "CALLS tests/test_definition_processor.py:0 -> test_no_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_package_json_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_calls_ingest_methods", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_contains_module_relationship_to_folder", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_contains_module_relationship_to_package", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_contains_module_relationship_to_project", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_creates_module_node", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_empty_file", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_init_py_uses_parent_qn", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_mod_rs_uses_parent_qn", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_nested_init_py", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_registers_module_qn_to_file_path", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_unsupported_language_returns_none", + "CALLS tests/test_definition_processor.py:0 -> test_process_file_with_syntax_error_still_processes", + "CALLS tests/test_definition_processor.py:0 -> test_pyproject_toml_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_requirements_txt_dependencies", + "CALLS tests/test_definition_processor.py:0 -> test_single_decorator", + "CALLS tests/test_definition_processor.py:0 -> test_single_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_triple_double_quoted_docstring", + "CALLS tests/test_definition_processor.py:0 -> test_triple_single_quoted_docstring", + "CALLS tests/test_dependency_parser.py:0 -> test_all_dependency_types", + "CALLS tests/test_dependency_parser.py:0 -> test_both_dep_types", + "CALLS tests/test_dependency_parser.py:0 -> test_both_project_and_poetry", + "CALLS tests/test_dependency_parser.py:0 -> test_both_require_types", + "CALLS tests/test_dependency_parser.py:0 -> test_cargo_toml", + "CALLS tests/test_dependency_parser.py:0 -> test_cargo_toml_case_insensitive", + "CALLS tests/test_dependency_parser.py:0 -> test_case_insensitive_matching", + "CALLS tests/test_dependency_parser.py:0 -> test_comments_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_comments_in_require_block", + "CALLS tests/test_dependency_parser.py:0 -> test_compatible_release", + "CALLS tests/test_dependency_parser.py:0 -> test_complex_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_composer_json", + "CALLS tests/test_dependency_parser.py:0 -> test_conditional_item_groups", + "CALLS tests/test_dependency_parser.py:0 -> test_csproj", + "CALLS tests/test_dependency_parser.py:0 -> test_csproj_suffix_matching", + "CALLS tests/test_dependency_parser.py:0 -> test_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_dependency_without_version", + "CALLS tests/test_dependency_parser.py:0 -> test_dev_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_file", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_lines_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_project", + "CALLS tests/test_dependency_parser.py:0 -> test_empty_string", + "CALLS tests/test_dependency_parser.py:0 -> test_exact_version", + "CALLS tests/test_dependency_parser.py:0 -> test_gem_with_version", + "CALLS tests/test_dependency_parser.py:0 -> test_gem_without_version", + "CALLS tests/test_dependency_parser.py:0 -> test_gemfile", + "CALLS tests/test_dependency_parser.py:0 -> test_go_mod", + "CALLS tests/test_dependency_parser.py:0 -> test_group_blocks", + "CALLS tests/test_dependency_parser.py:0 -> test_include_lines_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_indirect_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_invalid_json", + "CALLS tests/test_dependency_parser.py:0 -> test_invalid_toml", + "CALLS tests/test_dependency_parser.py:0 -> test_invalid_xml", + "CALLS tests/test_dependency_parser.py:0 -> test_leading_whitespace", + "CALLS tests/test_dependency_parser.py:0 -> test_multiple_require_blocks", + "CALLS tests/test_dependency_parser.py:0 -> test_nonexistent_file", + "CALLS tests/test_dependency_parser.py:0 -> test_optional_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_package_json", + "CALLS tests/test_dependency_parser.py:0 -> test_package_references", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_complex_version", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_dots", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_extras", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_extras_no_version", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_hyphen", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_multiple_extras", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_underscore", + "CALLS tests/test_dependency_parser.py:0 -> test_package_with_version_specifier", + "CALLS tests/test_dependency_parser.py:0 -> test_package_without_version", + "CALLS tests/test_dependency_parser.py:0 -> test_peer_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_php_excluded", + "CALLS tests/test_dependency_parser.py:0 -> test_poetry_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_project_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_pyproject_toml", + "CALLS tests/test_dependency_parser.py:0 -> test_require_block", + "CALLS tests/test_dependency_parser.py:0 -> test_require_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_require_dev", + "CALLS tests/test_dependency_parser.py:0 -> test_requirements_txt", + "CALLS tests/test_dependency_parser.py:0 -> test_scoped_package", + "CALLS tests/test_dependency_parser.py:0 -> test_simple_dependencies", + "CALLS tests/test_dependency_parser.py:0 -> test_simple_package_name", + "CALLS tests/test_dependency_parser.py:0 -> test_simple_requirements", + "CALLS tests/test_dependency_parser.py:0 -> test_single_quoted_gem", + "CALLS tests/test_dependency_parser.py:0 -> test_single_require_line", + "CALLS tests/test_dependency_parser.py:0 -> test_source_line_ignored", + "CALLS tests/test_dependency_parser.py:0 -> test_unknown_file_type", + "CALLS tests/test_dependency_parser.py:0 -> test_whitespace_only", + "CALLS tests/test_diff_autowrap.py:0 -> test_already_fenced_diff_not_double_wrapped", + "CALLS tests/test_diff_autowrap.py:0 -> test_diff_followed_by_explanation_text", + "CALLS tests/test_diff_autowrap.py:0 -> test_fenced_with_other_language_not_rewrapped", + "CALLS tests/test_diff_autowrap.py:0 -> test_full_git_diff_gets_fenced_as_diff", + "CALLS tests/test_diff_autowrap.py:0 -> test_plain_text_unchanged", + "CALLS tests/test_diff_autowrap.py:0 -> test_preamble_before_diff_preserved", + "CALLS tests/test_diff_autowrap.py:0 -> test_text_without_diff_marker_unchanged", + "CALLS tests/test_directory_lister.py:0 -> directory_lister", + "CALLS tests/test_directory_lister.py:0 -> sample_directory_structure", + "CALLS tests/test_directory_lister.py:0 -> temp_project_root", + "CALLS tests/test_directory_lister.py:0 -> test_creates_tool_instance", + "CALLS tests/test_directory_lister.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_directory_lister.py:0 -> test_init_with_relative_path", + "CALLS tests/test_directory_lister.py:0 -> test_list_directory_returns_error_for_absolute_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_list_directory_returns_error_for_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_list_empty_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_file_instead_of_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_nested_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_nonexistent_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_root_directory", + "CALLS tests/test_directory_lister.py:0 -> test_list_subdirectory", + "CALLS tests/test_directory_lister.py:0 -> test_list_with_absolute_path_within_root", + "CALLS tests/test_directory_lister.py:0 -> test_list_with_hidden_files", + "CALLS tests/test_directory_lister.py:0 -> test_list_with_special_characters", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_rejects_absolute_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_rejects_path_outside_root", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_with_absolute_path_within_root", + "CALLS tests/test_directory_lister.py:0 -> test_safe_path_with_relative_path", + "CALLS tests/test_directory_lister.py:0 -> test_tool_function_returns_contents", + "CALLS tests/test_directory_lister.py:0 -> test_tool_has_description", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> ensure_node_batch", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> ensure_relationship_batch", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> execute_write", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> fetch_all", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> flush_all", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> render", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_both_branch_classes_become_distinct_nodes", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_both_branch_definitions_become_distinct_nodes", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_both_branch_methods_in_one_class_survive", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_call_links_to_both_duplicate_definitions", + "CALLS tests/test_duplicate_qn_definitions.py:0 -> test_methods_of_both_branch_classes_survive", + "CALLS tests/test_embedder.py:0 -> add", + "CALLS tests/test_embedder.py:0 -> mock_unixcoder", + "CALLS tests/test_embedder.py:0 -> reset_cache", + "CALLS tests/test_embedder.py:0 -> reset_model_cache", + "CALLS tests/test_embedder.py:0 -> side_effect_forward", + "CALLS tests/test_embedder.py:0 -> side_effect_tokenize", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_cache_hit", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_empty_list", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_partial_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_populates_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_raises_without_dependencies", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_respects_batch_size", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_returns_correct_count", + "CALLS tests/test_embedder.py:0 -> test_embed_code_batch_uses_padding", + "CALLS tests/test_embedder.py:0 -> test_embed_code_calls_tokenize", + "CALLS tests/test_embedder.py:0 -> test_embed_code_integration", + "CALLS tests/test_embedder.py:0 -> test_embed_code_populates_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_raises_without_dependencies", + "CALLS tests/test_embedder.py:0 -> test_embed_code_returns_768_dimensional_vector", + "CALLS tests/test_embedder.py:0 -> test_embed_code_uses_cache", + "CALLS tests/test_embedder.py:0 -> test_embed_code_uses_default_max_length", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_clear", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_different_content_different_key", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_get_many", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_len", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_load_corrupt_file", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_load_no_path", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_load_nonexistent_path", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_miss_returns_none", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_overwrite", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_persistence_roundtrip", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_put_and_get", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_put_many", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_save_and_load", + "CALLS tests/test_embedder.py:0 -> test_embedding_cache_save_no_path", + "CALLS tests/test_embedder.py:0 -> test_embedding_default_batch_size_at_least_64", + "CALLS tests/test_embedder.py:0 -> test_get_model_does_not_use_cuda_when_unavailable", + "CALLS tests/test_embedder.py:0 -> test_get_model_is_cached", + "CALLS tests/test_embedder.py:0 -> test_get_model_moves_to_mps_when_available", + "CALLS tests/test_embedder.py:0 -> test_get_model_uses_cuda_when_available", + "CALLS tests/test_embedder.py:0 -> test_select_device_falls_back_to_cpu", + "CALLS tests/test_embedder.py:0 -> test_select_device_prefers_cuda", + "CALLS tests/test_embedder.py:0 -> test_select_device_uses_mps_when_cuda_unavailable", + "CALLS tests/test_embedder.py:0 -> test_similar_code_has_similar_embeddings", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> name", + "CALLS tests/test_eval_imports_internal_modules.py:0 -> test_import_placeholder_module_not_scored_as_internal", + "CALLS tests/test_eval_module_calls.py:0 -> load", + "CALLS tests/test_eval_module_calls.py:0 -> main", + "CALLS tests/test_eval_module_calls.py:0 -> test_annotation_not_counted_with_future_import", + "CALLS tests/test_eval_module_calls.py:0 -> test_cgr_matches_oracle_module_calls", + "CALLS tests/test_eval_module_calls.py:0 -> test_class_decorator_is_module_attributed", + "CALLS tests/test_eval_module_calls.py:0 -> test_classless_module_construction_credited_via_instantiates", + "CALLS tests/test_eval_module_calls.py:0 -> test_generator_expression_call_is_deferred", + "CALLS tests/test_eval_module_calls.py:0 -> test_generator_outermost_iterable_is_eager", + "CALLS tests/test_eval_module_calls.py:0 -> test_lambda_body_call_is_deferred", + "CALLS tests/test_eval_module_calls.py:0 -> test_list_comprehension_call_is_module_attributed", + "CALLS tests/test_eval_module_calls.py:0 -> test_nested_call_is_not_module_attributed", + "CALLS tests/test_eval_module_calls.py:0 -> test_oracle_counts_only_definition_time_calls", + "CALLS tests/test_eval_module_calls.py:0 -> test_return_annotation_counted_without_future_import", + "CALLS tests/test_eval_score_span.py:0 -> test_span_end_line_mismatch_is_penalized_and_surfaced", + "CALLS tests/test_eval_score_span.py:0 -> test_span_exact_match_scores_perfect", + "CALLS tests/test_eval_score_span.py:0 -> test_span_only_grades_co_identified_nodes", + "CALLS tests/test_exclude_patterns.py:0 -> test_cli_excludes_without_pattern_match", + "CALLS tests/test_exclude_patterns.py:0 -> test_codebase_with_nested_pycache_groups_correctly", + "CALLS tests/test_exclude_patterns.py:0 -> test_custom_exclude_pattern_is_applied", + "CALLS tests/test_exclude_patterns.py:0 -> test_deep_nested_pattern_returns_first_match", + "CALLS tests/test_exclude_patterns.py:0 -> test_deeply_nested_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_matching_patterns_at_root", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_multiple_git_directories", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_nested_matching_patterns_with_full_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_detects_site_packages_at_root", + "CALLS tests/test_exclude_patterns.py:0 -> test_does_not_match_partial_directory_names", + "CALLS tests/test_exclude_patterns.py:0 -> test_does_not_skip_normal_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_paths_returns_empty_groups", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_repo_returns_empty", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_repo_returns_empty_set", + "CALLS tests/test_exclude_patterns.py:0 -> test_empty_unignore_paths_does_not_skip", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_does_not_match_partial_name", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_multiple_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_nested_path_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_path_based_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_path_pattern_does_not_affect_other_paths", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_paths_adds_to_default_skip", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_specific_file_by_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_specific_file_does_not_affect_siblings", + "CALLS tests/test_exclude_patterns.py:0 -> test_exclude_takes_precedence_over_unignore", + "CALLS tests/test_exclude_patterns.py:0 -> test_groups_by_matching_pattern_not_parent_directory", + "CALLS tests/test_exclude_patterns.py:0 -> test_groups_nested_paths_under_first_matching_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_groups_single_level_paths", + "CALLS tests/test_exclude_patterns.py:0 -> test_ignores_files", + "CALLS tests/test_exclude_patterns.py:0 -> test_mixed_root_and_nested_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_multiple_patterns_in_path_returns_first", + "CALLS tests/test_exclude_patterns.py:0 -> test_multiple_unignore_paths", + "CALLS tests/test_exclude_patterns.py:0 -> test_nested_path_returns_first_matching_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_no_matching_pattern_returns_first_component", + "CALLS tests/test_exclude_patterns.py:0 -> test_no_matching_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_pattern_must_be_exact_match", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_all_keeps_everything", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_expand_then_select_from_group", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_none_keeps_nothing", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_number_keeps_entire_group", + "CALLS tests/test_exclude_patterns.py:0 -> test_prompt_with_cli_excludes", + "CALLS tests/test_exclude_patterns.py:0 -> test_real_world_scenario_with_venv_and_pycache", + "CALLS tests/test_exclude_patterns.py:0 -> test_root_level_file", + "CALLS tests/test_exclude_patterns.py:0 -> test_root_level_pattern_returns_itself", + "CALLS tests/test_exclude_patterns.py:0 -> test_similar_names_not_matching_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_site_packages_in_ignore_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_skip_directory_in_exclude", + "CALLS tests/test_exclude_patterns.py:0 -> test_skips_nested_ignore_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_skips_path_matching_ignore_patterns", + "CALLS tests/test_exclude_patterns.py:0 -> test_sorts_paths_within_group", + "CALLS tests/test_exclude_patterns.py:0 -> test_stops_at_first_matching_pattern", + "CALLS tests/test_exclude_patterns.py:0 -> test_suffix_checked_before_exclude", + "CALLS tests/test_exclude_patterns.py:0 -> test_suffix_checked_before_include", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_directory_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_exact_file_path", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_parent_unignores_children", + "CALLS tests/test_exclude_patterns.py:0 -> test_unignore_paths_overrides_default_skip", + "CALLS tests/test_exclude_patterns.py:0 -> test_venv_patterns_in_ignore_patterns", + "CALLS tests/test_external_package_name_collision.py:0 -> ensure_node_batch", + "CALLS tests/test_external_package_name_collision.py:0 -> ensure_relationship_batch", + "CALLS tests/test_external_package_name_collision.py:0 -> execute_write", + "CALLS tests/test_external_package_name_collision.py:0 -> fetch_all", + "CALLS tests/test_external_package_name_collision.py:0 -> flush_all", + "CALLS tests/test_external_package_name_collision.py:0 -> test_bare_absolute_import_is_external_not_internal", + "CALLS tests/test_external_package_name_collision.py:0 -> test_relative_import_to_subpackage_still_internal", + "CALLS tests/test_file_editor.py:0 -> anyio_backend", + "CALLS tests/test_file_editor.py:0 -> file_editor", + "CALLS tests/test_file_editor.py:0 -> sample_js_file", + "CALLS tests/test_file_editor.py:0 -> sample_python_file", + "CALLS tests/test_file_editor.py:0 -> temp_project_root", + "CALLS tests/test_file_editor.py:0 -> test_apply_valid_patch", + "CALLS tests/test_file_editor.py:0 -> test_creates_tool_instance", + "CALLS tests/test_file_editor.py:0 -> test_edit_directory_fails", + "CALLS tests/test_file_editor.py:0 -> test_edit_existing_file", + "CALLS tests/test_file_editor.py:0 -> test_edit_file_outside_root", + "CALLS tests/test_file_editor.py:0 -> test_edit_nonexistent_file", + "CALLS tests/test_file_editor.py:0 -> test_error_result", + "CALLS tests/test_file_editor.py:0 -> test_get_ast_for_javascript_file", + "CALLS tests/test_file_editor.py:0 -> test_get_ast_for_python_file", + "CALLS tests/test_file_editor.py:0 -> test_get_diff_nonexistent_function", + "CALLS tests/test_file_editor.py:0 -> test_get_diff_shows_changes", + "CALLS tests/test_file_editor.py:0 -> test_get_function_source_by_name", + "CALLS tests/test_file_editor.py:0 -> test_get_function_source_by_qualified_name", + "CALLS tests/test_file_editor.py:0 -> test_get_nonexistent_function", + "CALLS tests/test_file_editor.py:0 -> test_get_parser_for_javascript", + "CALLS tests/test_file_editor.py:0 -> test_get_parser_for_python", + "CALLS tests/test_file_editor.py:0 -> test_get_parser_for_unknown_extension", + "CALLS tests/test_file_editor.py:0 -> test_init_creates_dmp_instance", + "CALLS tests/test_file_editor.py:0 -> test_init_loads_parsers", + "CALLS tests/test_file_editor.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_file_editor.py:0 -> test_replace_block_file_not_found", + "CALLS tests/test_file_editor.py:0 -> test_replace_block_outside_root", + "CALLS tests/test_file_editor.py:0 -> test_replace_existing_block", + "CALLS tests/test_file_editor.py:0 -> test_replace_identical_content", + "CALLS tests/test_file_editor.py:0 -> test_replace_nonexistent_block", + "CALLS tests/test_file_editor.py:0 -> test_success_result", + "CALLS tests/test_file_editor.py:0 -> test_tool_function_replaces_code", + "CALLS tests/test_file_editor.py:0 -> test_tool_function_returns_failure_message", + "CALLS tests/test_file_editor.py:0 -> test_tool_has_description", + "CALLS tests/test_file_editor.py:0 -> test_tool_requires_approval", + "CALLS tests/test_file_reader.py:0 -> anyio_backend", + "CALLS tests/test_file_reader.py:0 -> file_reader", + "CALLS tests/test_file_reader.py:0 -> sample_python_file", + "CALLS tests/test_file_reader.py:0 -> sample_text_file", + "CALLS tests/test_file_reader.py:0 -> temp_project_root", + "CALLS tests/test_file_reader.py:0 -> test_binary_extensions_set", + "CALLS tests/test_file_reader.py:0 -> test_creates_tool_instance", + "CALLS tests/test_file_reader.py:0 -> test_error_result", + "CALLS tests/test_file_reader.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_file_reader.py:0 -> test_init_with_relative_path", + "CALLS tests/test_file_reader.py:0 -> test_read_binary_pdf_file", + "CALLS tests/test_file_reader.py:0 -> test_read_binary_png_file", + "CALLS tests/test_file_reader.py:0 -> test_read_directory_returns_error", + "CALLS tests/test_file_reader.py:0 -> test_read_empty_file", + "CALLS tests/test_file_reader.py:0 -> test_read_existing_text_file", + "CALLS tests/test_file_reader.py:0 -> test_read_file_in_subdirectory", + "CALLS tests/test_file_reader.py:0 -> test_read_file_outside_root", + "CALLS tests/test_file_reader.py:0 -> test_read_file_with_unicode", + "CALLS tests/test_file_reader.py:0 -> test_read_nonexistent_file", + "CALLS tests/test_file_reader.py:0 -> test_read_python_file", + "CALLS tests/test_file_reader.py:0 -> test_success_result", + "CALLS tests/test_file_reader.py:0 -> test_tool_function_returns_content", + "CALLS tests/test_file_reader.py:0 -> test_tool_function_returns_error_string", + "CALLS tests/test_file_reader.py:0 -> test_tool_has_description", + "CALLS tests/test_file_writer.py:0 -> anyio_backend", + "CALLS tests/test_file_writer.py:0 -> file_writer", + "CALLS tests/test_file_writer.py:0 -> temp_project_root", + "CALLS tests/test_file_writer.py:0 -> test_create_empty_file", + "CALLS tests/test_file_writer.py:0 -> test_create_file_in_subdirectory", + "CALLS tests/test_file_writer.py:0 -> test_create_file_multiline_content", + "CALLS tests/test_file_writer.py:0 -> test_create_file_outside_root", + "CALLS tests/test_file_writer.py:0 -> test_create_file_with_special_characters_in_name", + "CALLS tests/test_file_writer.py:0 -> test_create_file_with_unicode_content", + "CALLS tests/test_file_writer.py:0 -> test_create_new_file", + "CALLS tests/test_file_writer.py:0 -> test_creates_tool_instance", + "CALLS tests/test_file_writer.py:0 -> test_error_result", + "CALLS tests/test_file_writer.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_file_writer.py:0 -> test_init_with_relative_path", + "CALLS tests/test_file_writer.py:0 -> test_overwrite_existing_file", + "CALLS tests/test_file_writer.py:0 -> test_success_result", + "CALLS tests/test_file_writer.py:0 -> test_tool_function_creates_file", + "CALLS tests/test_file_writer.py:0 -> test_tool_has_description", + "CALLS tests/test_file_writer.py:0 -> test_tool_requires_approval", + "CALLS tests/test_fqn_resolver.py:0 -> method_a", + "CALLS tests/test_fqn_resolver.py:0 -> method_b", + "CALLS tests/test_fqn_resolver.py:0 -> my_method", + "CALLS tests/test_fqn_resolver.py:0 -> test_deeply_nested", + "CALLS tests/test_fqn_resolver.py:0 -> test_empty_tree_returns_empty_list", + "CALLS tests/test_fqn_resolver.py:0 -> test_empty_tree_returns_none", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_from_multiple_classes", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_multiple_functions", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_nested_methods", + "CALLS tests/test_fqn_resolver.py:0 -> test_extracts_single_function", + "CALLS tests/test_fqn_resolver.py:0 -> test_finds_matching_function", + "CALLS tests/test_fqn_resolver.py:0 -> test_finds_nested_method", + "CALLS tests/test_fqn_resolver.py:0 -> test_init_file_excluded_from_path", + "CALLS tests/test_fqn_resolver.py:0 -> test_lambda_returns_none", + "CALLS tests/test_fqn_resolver.py:0 -> test_nested_in_class", + "CALLS tests/test_fqn_resolver.py:0 -> test_returns_none_when_not_found", + "CALLS tests/test_fqn_resolver.py:0 -> test_simple_function", + "CALLS tests/test_fqn_resolver.py:0 -> test_skips_lambdas", + "CALLS tests/test_function_ingest.py:0 -> definition_processor", + "CALLS tests/test_function_ingest.py:0 -> javascript_functions_project", + "CALLS tests/test_function_ingest.py:0 -> my_method", + "CALLS tests/test_function_ingest.py:0 -> parsers_and_queries", + "CALLS tests/test_function_ingest.py:0 -> python_functions_project", + "CALLS tests/test_function_ingest.py:0 -> test_anonymous_function_returns_none", + "CALLS tests/test_function_ingest.py:0 -> test_basic_function_props", + "CALLS tests/test_function_ingest.py:0 -> test_class_with_name", + "CALLS tests/test_function_ingest.py:0 -> test_deeply_nested_function", + "CALLS tests/test_function_ingest.py:0 -> test_empty_path_parts", + "CALLS tests/test_function_ingest.py:0 -> test_exported_function_props", + "CALLS tests/test_function_ingest.py:0 -> test_function_inside_class", + "CALLS tests/test_function_ingest.py:0 -> test_function_not_in_class", + "CALLS tests/test_function_ingest.py:0 -> test_function_with_name", + "CALLS tests/test_function_ingest.py:0 -> test_iife_arrow", + "CALLS tests/test_function_ingest.py:0 -> test_iife_parenthesized", + "CALLS tests/test_function_ingest.py:0 -> test_immutability", + "CALLS tests/test_function_ingest.py:0 -> test_javascript_arrow_function_with_variable", + "CALLS tests/test_function_ingest.py:0 -> test_javascript_functions_ingested", + "CALLS tests/test_function_ingest.py:0 -> test_method_in_class_returns_none", + "CALLS tests/test_function_ingest.py:0 -> test_multiple_function_ancestors", + "CALLS tests/test_function_ingest.py:0 -> test_named_function", + "CALLS tests/test_function_ingest.py:0 -> test_named_tuple_fields", + "CALLS tests/test_function_ingest.py:0 -> test_nested_function", + "CALLS tests/test_function_ingest.py:0 -> test_nested_function_in_method", + "CALLS tests/test_function_ingest.py:0 -> test_no_ancestors", + "CALLS tests/test_function_ingest.py:0 -> test_one_function_ancestor", + "CALLS tests/test_function_ingest.py:0 -> test_regular_anonymous", + "CALLS tests/test_function_ingest.py:0 -> test_rust_function_in_mod", + "CALLS tests/test_function_ingest.py:0 -> test_single_path_part", + "CALLS tests/test_function_ingest.py:0 -> test_top_level_function", + "CALLS tests/test_function_ingest.py:0 -> test_top_level_functions_ingested", + "CALLS tests/test_function_ingest.py:0 -> test_top_level_rust_function", + "CALLS tests/test_function_ingest.py:0 -> test_with_path_parts", + "CALLS tests/test_function_local_definitions.py:0 -> ensure_node_batch", + "CALLS tests/test_function_local_definitions.py:0 -> ensure_relationship_batch", + "CALLS tests/test_function_local_definitions.py:0 -> execute_write", + "CALLS tests/test_function_local_definitions.py:0 -> fetch_all", + "CALLS tests/test_function_local_definitions.py:0 -> flush_all", + "CALLS tests/test_function_local_definitions.py:0 -> test_default_captures_local_class_methods", + "CALLS tests/test_function_local_definitions.py:0 -> test_flag_off_skips_local_class_methods", + "CALLS tests/test_getattr_dispatch.py:0 -> ensure_node_batch", + "CALLS tests/test_getattr_dispatch.py:0 -> ensure_relationship_batch", + "CALLS tests/test_getattr_dispatch.py:0 -> execute_write", + "CALLS tests/test_getattr_dispatch.py:0 -> fetch_all", + "CALLS tests/test_getattr_dispatch.py:0 -> flush_all", + "CALLS tests/test_getattr_dispatch.py:0 -> test_getattr_with_constant_name_resolves", + "CALLS tests/test_getattr_dispatch.py:0 -> test_getattr_with_string_literal_resolves", + "CALLS tests/test_github_issues_integration.py:0 -> test_cli_override_real_scenario", + "CALLS tests/test_github_issues_integration.py:0 -> test_custom_model_names_with_colons_parsing", + "CALLS tests/test_github_issues_integration.py:0 -> test_env_file_ollama_configuration_respected", + "CALLS tests/test_github_issues_integration.py:0 -> test_google_gla_without_api_key_raises", + "CALLS tests/test_github_issues_integration.py:0 -> test_mixed_provider_real_world_scenario", + "CALLS tests/test_github_issues_integration.py:0 -> test_openai_compatible_endpoints", + "CALLS tests/test_github_issues_integration.py:0 -> test_reasoning_model_thinking_budget", + "CALLS tests/test_github_issues_integration.py:0 -> test_vertex_ai_enterprise_scenario", + "CALLS tests/test_github_issues_integration.py:0 -> test_vertex_ai_skips_api_key_validation", + "CALLS tests/test_github_issues_integration.py:0 -> test_vertex_ai_with_google_api_key_env_does_not_error", + "CALLS tests/test_go_containment_oracle.py:0 -> test_cgr_matches_go_oracle_on_containment_edges", + "CALLS tests/test_go_receiver_methods.py:0 -> go_crossfile_project", + "CALLS tests/test_go_receiver_methods.py:0 -> go_methods_project", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_crossfile_method_binds_to_declaring_type", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_defined_type_receiver_method_is_method_node", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_free_function_not_a_method", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_method_defined_by_receiver_type", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_pointer_receiver_method_is_method_node", + "CALLS tests/test_go_receiver_methods.py:0 -> test_go_value_receiver_method_is_method_node", + "CALLS tests/test_go_span_oracle.py:0 -> test_cgr_matches_go_oracle_on_node_spans", + "CALLS tests/test_go_span_oracle.py:0 -> type", + "CALLS tests/test_go_structure_oracle.py:0 -> test_cgr_matches_oracle_on_type_declarations", + "CALLS tests/test_go_structure_oracle.py:0 -> test_oracle_labels_go_declarations", + "CALLS tests/test_go_type_declarations.py:0 -> go_types_project", + "CALLS tests/test_go_type_declarations.py:0 -> test_go_interface_captured_as_interface", + "CALLS tests/test_go_type_declarations.py:0 -> test_go_struct_captured_as_class", + "CALLS tests/test_go_type_declarations.py:0 -> test_go_type_alias_captured_as_type", + "CALLS tests/test_go_type_declarations.py:0 -> type", + "CALLS tests/test_graph_export_integration.py:0 -> add", + "CALLS tests/test_graph_export_integration.py:0 -> main", + "CALLS tests/test_graph_export_integration.py:0 -> test_exported_json_structure_is_valid", + "CALLS tests/test_graph_export_integration.py:0 -> test_function_call_relationship_exports", + "CALLS tests/test_graph_export_integration.py:0 -> test_module_defines_relationship_exports", + "CALLS tests/test_graph_export_integration.py:0 -> test_python_class_with_methods_exports_correctly", + "CALLS tests/test_graph_export_integration.py:0 -> test_simple_python_function_exports_correctly", + "CALLS tests/test_graph_loader.py:0 -> graph_file", + "CALLS tests/test_graph_loader.py:0 -> loader", + "CALLS tests/test_graph_loader.py:0 -> test_find_node_by_property", + "CALLS tests/test_graph_loader.py:0 -> test_find_node_by_property_multiple_matches", + "CALLS tests/test_graph_loader.py:0 -> test_find_node_by_property_not_found", + "CALLS tests/test_graph_loader.py:0 -> test_find_nodes_by_label", + "CALLS tests/test_graph_loader.py:0 -> test_find_nodes_by_label_empty", + "CALLS tests/test_graph_loader.py:0 -> test_get_incoming_relationships", + "CALLS tests/test_graph_loader.py:0 -> test_get_incoming_relationships_empty", + "CALLS tests/test_graph_loader.py:0 -> test_get_node_by_id", + "CALLS tests/test_graph_loader.py:0 -> test_get_node_by_id_not_found", + "CALLS tests/test_graph_loader.py:0 -> test_get_outgoing_relationships", + "CALLS tests/test_graph_loader.py:0 -> test_get_outgoing_relationships_empty", + "CALLS tests/test_graph_loader.py:0 -> test_get_relationships_for_node", + "CALLS tests/test_graph_loader.py:0 -> test_lazy_loading", + "CALLS tests/test_graph_loader.py:0 -> test_load_file_not_found_raises", + "CALLS tests/test_graph_loader.py:0 -> test_load_graph_returns_loaded_loader", + "CALLS tests/test_graph_loader.py:0 -> test_load_parses_metadata", + "CALLS tests/test_graph_loader.py:0 -> test_load_parses_nodes", + "CALLS tests/test_graph_loader.py:0 -> test_load_parses_relationships", + "CALLS tests/test_graph_loader.py:0 -> test_relationship_properties", + "CALLS tests/test_graph_loader.py:0 -> test_summary_includes_metadata", + "CALLS tests/test_graph_loader.py:0 -> test_summary_node_labels", + "CALLS tests/test_graph_loader.py:0 -> test_summary_relationship_types", + "CALLS tests/test_graph_loader.py:0 -> test_summary_total_nodes", + "CALLS tests/test_graph_loader.py:0 -> test_summary_total_relationships", + "CALLS tests/test_graph_service.py:0 -> capture_query", + "CALLS tests/test_graph_service.py:0 -> fail_then_succeed", + "CALLS tests/test_graph_service.py:0 -> mock_fetch_all", + "CALLS tests/test_graph_service.py:0 -> test_build_create_node_query", + "CALLS tests/test_graph_service.py:0 -> test_build_create_relationship_query", + "CALLS tests/test_graph_service.py:0 -> test_build_create_relationship_query_with_props", + "CALLS tests/test_graph_service.py:0 -> test_build_merge_node_query_unchanged", + "CALLS tests/test_graph_service.py:0 -> test_build_merge_relationship_query_unchanged", + "CALLS tests/test_graph_service.py:0 -> test_calls_flush_nodes_and_flush_relationships", + "CALLS tests/test_graph_service.py:0 -> test_closes_cursor_on_exception", + "CALLS tests/test_graph_service.py:0 -> test_closes_cursor_on_success", + "CALLS tests/test_graph_service.py:0 -> test_continues_on_constraint_error", + "CALLS tests/test_graph_service.py:0 -> test_converts_rows_to_dicts", + "CALLS tests/test_graph_service.py:0 -> test_counts_nodes_and_relationships", + "CALLS tests/test_graph_service.py:0 -> test_creates_constraint_for_each_node_type", + "CALLS tests/test_graph_service.py:0 -> test_default_use_merge_is_true", + "CALLS tests/test_graph_service.py:0 -> test_enter_connects_to_memgraph", + "CALLS tests/test_graph_service.py:0 -> test_enter_omits_auth_when_not_provided", + "CALLS tests/test_graph_service.py:0 -> test_enter_passes_auth_when_provided", + "CALLS tests/test_graph_service.py:0 -> test_execute_write_delegates_to_execute_query", + "CALLS tests/test_graph_service.py:0 -> test_executes_delete_query", + "CALLS tests/test_graph_service.py:0 -> test_executes_query_and_returns_results", + "CALLS tests/test_graph_service.py:0 -> test_exit_flushes_and_closes_connection", + "CALLS tests/test_graph_service.py:0 -> test_exit_handles_none_connection", + "CALLS tests/test_graph_service.py:0 -> test_exit_logs_error_on_exception", + "CALLS tests/test_graph_service.py:0 -> test_fetch_all_delegates_to_execute_query", + "CALLS tests/test_graph_service.py:0 -> test_fetch_all_preserves_existing_memory_limit", + "CALLS tests/test_graph_service.py:0 -> test_flush_nodes_uses_create_query_when_merge_disabled", + "CALLS tests/test_graph_service.py:0 -> test_flush_nodes_uses_merge_query_by_default", + "CALLS tests/test_graph_service.py:0 -> test_flush_relationships_uses_create_query_when_merge_disabled", + "CALLS tests/test_graph_service.py:0 -> test_flush_relationships_uses_merge_query_by_default", + "CALLS tests/test_graph_service.py:0 -> test_handles_empty_buffer", + "CALLS tests/test_graph_service.py:0 -> test_handles_empty_result_set", + "CALLS tests/test_graph_service.py:0 -> test_handles_single_row", + "CALLS tests/test_graph_service.py:0 -> test_has_slots", + "CALLS tests/test_graph_service.py:0 -> test_init_conn_is_none", + "CALLS tests/test_graph_service.py:0 -> test_init_creates_empty_buffers", + "CALLS tests/test_graph_service.py:0 -> test_init_defaults_auth_to_none", + "CALLS tests/test_graph_service.py:0 -> test_init_normalizes_empty_strings_to_none", + "CALLS tests/test_graph_service.py:0 -> test_init_normalizes_whitespace_only_to_none", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_empty_password_with_valid_username", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_negative_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_password_without_username", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_username_without_password", + "CALLS tests/test_graph_service.py:0 -> test_init_raises_for_zero_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_sets_custom_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_sets_default_batch_size", + "CALLS tests/test_graph_service.py:0 -> test_init_sets_host_and_port", + "CALLS tests/test_graph_service.py:0 -> test_init_stores_auth_credentials", + "CALLS tests/test_graph_service.py:0 -> test_init_strips_whitespace_from_credentials", + "CALLS tests/test_graph_service.py:0 -> test_no_dict", + "CALLS tests/test_graph_service.py:0 -> test_passes_params_to_query", + "CALLS tests/test_graph_service.py:0 -> test_processes_valid_nodes_and_skips_invalid", + "CALLS tests/test_graph_service.py:0 -> test_raises_when_not_connected", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_cleared_after_flush", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_correct_batch_row_values", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_empty_on_init", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_groups_by_pattern", + "CALLS tests/test_graph_service.py:0 -> test_rel_groups_populated_on_ensure", + "CALLS tests/test_graph_service.py:0 -> test_returns_early_when_params_empty", + "CALLS tests/test_graph_service.py:0 -> test_returns_empty_list_when_no_description", + "CALLS tests/test_graph_service.py:0 -> test_returns_graph_data_structure", + "CALLS tests/test_graph_service.py:0 -> test_returns_iso_format_timestamp", + "CALLS tests/test_graph_service.py:0 -> test_skips_nodes_missing_id_property", + "CALLS tests/test_graph_service.py:0 -> test_skips_nodes_with_unknown_label", + "CALLS tests/test_graph_service.py:0 -> test_suppresses_already_exists_errors_in_logs", + "CALLS tests/test_graph_service.py:0 -> test_use_merge_false", + "CALLS tests/test_graph_service.py:0 -> test_wraps_query_with_unwind", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> graph_service", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> log_messages", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> mock_execute_batch", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> sink", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_calls_failure_logging_multiple_batches", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_calls_failure_logging_single_batch", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_calls_success_no_failure_logging", + "CALLS tests/test_graph_service_calls_failure_logging.py:0 -> test_non_calls_relationships_no_failure_logging", + "CALLS tests/test_graph_updater_embeddings.py:0 -> _fake_embed_batch", + "CALLS tests/test_graph_updater_embeddings.py:0 -> query_ingestor", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_contains_starts_with_project_name", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_counts_embedded_functions", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_dispatches_single_batch_call_for_multiple_snippets", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_dot_concatenation_is_parenthesized", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_embeds_valid_function_with_source", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_handles_embed_failure_gracefully", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_no_bare_starts_with_plus", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_passes_project_name_without_trailing_dot", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_returns_early_on_empty_results", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_returns_required_columns", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_skips_row_with_missing_source_info", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_skips_unparseable_rows", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_skips_when_no_semantic_dependencies", + "CALLS tests/test_graph_updater_embeddings.py:0 -> test_uses_cypher_query_embeddings_constant", + "CALLS tests/test_graph_updater_embeddings.py:0 -> updater_with_query", + "CALLS tests/test_graph_updater_incremental.py:0 -> py_project", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_bounded_ast_cache_has_slots", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_broken_symlink_does_not_crash_indexing", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_cache_file_is_valid_json", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_changed_file_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_changed_file_is_reparsed", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_deleted_file_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_deleted_file_removed_from_hash_cache", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_deleted_file_removed_from_state", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_different_content_different_hash", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_force_bypasses_cache", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_force_bypasses_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_function_registry_trie_has_slots", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_cache_file_created_after_run", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_returns_hex_string", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_with_bytes_returns_none_for_broken_symlink", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_hash_with_bytes_returns_none_for_missing_file", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_load_corrupted_returns_empty", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_load_nonexistent_returns_empty", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_new_file_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_new_file_is_processed", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_no_hash_cache_disables_fast_path", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_same_content_same_hash", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_save_and_load_cache", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_save_creates_parent_dirs", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_second_run_skips_all_passes", + "CALLS tests/test_graph_updater_incremental.py:0 -> test_unchanged_file_is_skipped", + "CALLS tests/test_graph_updater_incremental.py:0 -> updater", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> ensure_node_batch", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> ensure_relationship_batch", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> execute_write", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> fetch_all", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> flush_all", + "CALLS tests/test_graph_updater_incremental_rename.py:0 -> test_incremental_rename_matches_full_rebuild", + "CALLS tests/test_graph_updater_integration.py:0 -> temp_project", + "CALLS tests/test_graph_updater_integration.py:0 -> test_function_call_relationships_are_created", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> graph_updater", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_empty_dict_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_end_line_not_int_becomes_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_missing_node_id_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_missing_qualified_name_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_node_id_not_int_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_none_values_for_required_fields_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_path_not_str_becomes_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_qualified_name_not_str_returns_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_result_is_embedding_query_result_type", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_start_line_not_int_becomes_none", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_valid_input_all_fields", + "CALLS tests/test_graph_updater_parse_embedding.py:0 -> test_valid_input_required_fields_only", + "CALLS tests/test_graph_updater_pruning.py:0 -> py_project", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_deleted_file_triggers_cypher_delete", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_no_deletes_when_no_files_removed", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_handles_empty_graph", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_handles_none_path_gracefully", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_multiple_orphans_across_types", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_no_orphans_skips_deletes", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_removes_orphan_external_module_nodes", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_removes_orphan_module_nodes", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_skips_inline_module_synthetic_paths", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_prune_skips_other_projects", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_query_constrains_traversal_to_containment_edges", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_query_does_not_traverse_calls_edges", + "CALLS tests/test_graph_updater_pruning.py:0 -> test_run_calls_prune", + "CALLS tests/test_graph_updater_pruning.py:0 -> updater", + "CALLS tests/test_handler_integration.py:0 -> add", + "CALLS tests/test_handler_integration.py:0 -> process", + "CALLS tests/test_handler_integration.py:0 -> test_assigned_function_names_extracted", + "CALLS tests/test_handler_integration.py:0 -> test_class_is_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_class_is_ingested_with_methods", + "CALLS tests/test_handler_integration.py:0 -> test_cpp_handler_used_for_cpp_files", + "CALLS tests/test_handler_integration.py:0 -> test_dot_index_function_names_extracted", + "CALLS tests/test_handler_integration.py:0 -> test_exports_inside_functions_skipped", + "CALLS tests/test_handler_integration.py:0 -> test_handler_switches_per_file_language", + "CALLS tests/test_handler_integration.py:0 -> test_java_handler_used_for_java_files", + "CALLS tests/test_handler_integration.py:0 -> test_js_handler_used_for_javascript_files", + "CALLS tests/test_handler_integration.py:0 -> test_lambda_functions_get_generated_names", + "CALLS tests/test_handler_integration.py:0 -> test_lua_handler_used_for_lua_files", + "CALLS tests/test_handler_integration.py:0 -> test_namespaced_functions_have_full_qn", + "CALLS tests/test_handler_integration.py:0 -> test_object_literal_methods_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_python_handler_used_for_python_files", + "CALLS tests/test_handler_integration.py:0 -> test_rust_handler_used_for_rust_files", + "CALLS tests/test_handler_integration.py:0 -> test_standalone_functions_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_struct_is_ingested", + "CALLS tests/test_handler_integration.py:0 -> test_template_base_class_names_extracted", + "CALLS tests/test_handler_integration.py:0 -> test_ts_handler_used_for_typescript_files", + "CALLS tests/test_handler_integration.py:0 -> wrapper", + "CALLS tests/test_handler_registry.py:0 -> test_cpp_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_different_instances_for_different_languages", + "CALLS tests/test_handler_registry.py:0 -> test_handler_has_all_protocol_methods", + "CALLS tests/test_handler_registry.py:0 -> test_handler_methods_are_callable", + "CALLS tests/test_handler_registry.py:0 -> test_java_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_js_and_ts_share_same_handler_type", + "CALLS tests/test_handler_registry.py:0 -> test_jsts_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_lua_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_php_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_python_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_returns_base_handler_for_c", + "CALLS tests/test_handler_registry.py:0 -> test_returns_base_handler_for_go", + "CALLS tests/test_handler_registry.py:0 -> test_returns_cpp_handler_for_cpp", + "CALLS tests/test_handler_registry.py:0 -> test_returns_java_handler_for_java", + "CALLS tests/test_handler_registry.py:0 -> test_returns_jsts_handler_for_javascript", + "CALLS tests/test_handler_registry.py:0 -> test_returns_jsts_handler_for_typescript", + "CALLS tests/test_handler_registry.py:0 -> test_returns_lua_handler_for_lua", + "CALLS tests/test_handler_registry.py:0 -> test_returns_php_handler_for_php", + "CALLS tests/test_handler_registry.py:0 -> test_returns_python_handler_for_python", + "CALLS tests/test_handler_registry.py:0 -> test_returns_rust_handler_for_rust", + "CALLS tests/test_handler_registry.py:0 -> test_rust_handler_extends_base", + "CALLS tests/test_handler_registry.py:0 -> test_same_instance_returned_for_same_language", + "CALLS tests/test_handlers_unit.py:0 -> cpp_parser", + "CALLS tests/test_handlers_unit.py:0 -> index", + "CALLS tests/test_handlers_unit.py:0 -> java_parser", + "CALLS tests/test_handlers_unit.py:0 -> js_parser", + "CALLS tests/test_handlers_unit.py:0 -> lua_parser", + "CALLS tests/test_handlers_unit.py:0 -> php_parser", + "CALLS tests/test_handlers_unit.py:0 -> process", + "CALLS tests/test_handlers_unit.py:0 -> python_parser", + "CALLS tests/test_handlers_unit.py:0 -> rust_parser", + "CALLS tests/test_handlers_unit.py:0 -> test_build_function_qualified_name_simple", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_overloaded_methods", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_simple", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_with_params", + "CALLS tests/test_handlers_unit.py:0 -> test_build_method_qualified_name_without_params", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_skips_class_without_object_literals", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_stops_at_class", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_with_class_and_object_literals", + "CALLS tests/test_handlers_unit.py:0 -> test_build_nested_function_qn_with_parent_functions", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_simple_identifier", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_template_type", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_with_text", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_base_class_name_without_text_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_call_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_class_annotation", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_class_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_dataclass_with_options", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_decorator_with_args", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_decorator_with_call", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_dotted_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_function_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_inner_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_member_expression", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple_annotations", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple_attributes", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_multiple_decorators", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_no_annotations", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_no_attributes", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_no_decorators", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_on_function_definition", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_parameterized_annotation", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_php8_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_returns_empty_for_undecorated", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_returns_empty_list", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_simple_identifier", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_single_annotation", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_single_attribute", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_single_decorator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_decorators_with_args", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_anonymous_function", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_anonymous_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_arrow_function", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_arrow_in_callback", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_arrow_in_variable_declarator", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_assigned_to_dot_index", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_assigned_to_identifier", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_from_function_definition", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_from_method_declaration", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_lambda_expression", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_regular_function", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_with_name_field", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_function_name_without_name_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_impl_target_returns_none", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_impl_target_struct", + "CALLS tests/test_handlers_unit.py:0 -> test_extract_impl_target_trait_for_struct", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_at_module_level", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_in_class_body", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_inside_class", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_inside_interface", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_inside_trait", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_outside_class", + "CALLS tests/test_handlers_unit.py:0 -> test_is_class_method_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_export_inside_function_at_module_level", + "CALLS tests/test_handlers_unit.py:0 -> test_is_export_inside_function_nested", + "CALLS tests/test_handlers_unit.py:0 -> test_is_export_inside_function_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_private_method", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_public_method", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_standalone_function", + "CALLS tests/test_handlers_unit.py:0 -> test_is_function_exported_without_export", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_nested_in_method", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_standalone_object", + "CALLS tests/test_handlers_unit.py:0 -> test_is_inside_method_with_object_literals_stops_at_class_body", + "CALLS tests/test_handlers_unit.py:0 -> test_should_process_as_impl_block_returns_false", + "CALLS tests/test_handlers_unit.py:0 -> test_should_process_as_impl_block_with_impl_item", + "CALLS tests/test_handlers_unit.py:0 -> test_should_process_as_impl_block_with_other_node", + "CALLS tests/test_handlers_unit.py:0 -> ts_parser", + "CALLS tests/test_higher_order_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_higher_order_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_higher_order_calls.py:0 -> execute_write", + "CALLS tests/test_higher_order_calls.py:0 -> fetch_all", + "CALLS tests/test_higher_order_calls.py:0 -> flush_all", + "CALLS tests/test_higher_order_calls.py:0 -> name", + "CALLS tests/test_higher_order_calls.py:0 -> test_callable_parameter_prefers_module_function_over_sibling_method", + "CALLS tests/test_higher_order_calls.py:0 -> test_callable_parameter_resolves_to_argument_at_call_site", + "CALLS tests/test_higher_order_calls.py:0 -> test_callback_attributed_to_invoking_callee_not_caller", + "CALLS tests/test_higher_order_calls.py:0 -> test_normal_call_edge_to_callee_still_present", + "CALLS tests/test_higher_order_calls.py:0 -> test_sorted_key_attributed_to_enclosing_function", + "CALLS tests/test_import_distance_calculation.py:0 -> mock_updater", + "CALLS tests/test_import_distance_calculation.py:0 -> test_edge_case_missing_from_registry", + "CALLS tests/test_import_distance_calculation.py:0 -> test_function_vs_method_distance_difference", + "CALLS tests/test_import_distance_calculation.py:0 -> test_method_detection_correctness", + "CALLS tests/test_import_distance_calculation.py:0 -> test_non_sibling_modules_no_bonus", + "CALLS tests/test_import_distance_calculation.py:0 -> test_same_module_candidates", + "CALLS tests/test_import_distance_calculation.py:0 -> test_sibling_module_bonus_for_functions", + "CALLS tests/test_import_distance_calculation.py:0 -> test_sibling_module_bonus_for_methods", + "CALLS tests/test_import_parsing.py:0 -> capture_node", + "CALLS tests/test_import_parsing.py:0 -> graph_updater", + "CALLS tests/test_import_parsing.py:0 -> import_processor", + "CALLS tests/test_import_parsing.py:0 -> mock_ingestor", + "CALLS tests/test_import_parsing.py:0 -> test_cache_stats_after_clear", + "CALLS tests/test_import_parsing.py:0 -> test_clear_stdlib_cache_does_not_raise", + "CALLS tests/test_import_parsing.py:0 -> test_crate_import_from_flat_module_resolves_correctly", + "CALLS tests/test_import_parsing.py:0 -> test_crate_import_from_nested_module_resolves_to_crate_root", + "CALLS tests/test_import_parsing.py:0 -> test_external_module_name_uses_module_path_not_local_alias", + "CALLS tests/test_import_parsing.py:0 -> test_flush_stdlib_cache_does_not_raise", + "CALLS tests/test_import_parsing.py:0 -> test_function_registry_integration", + "CALLS tests/test_import_parsing.py:0 -> test_get_stdlib_cache_stats_returns_dict", + "CALLS tests/test_import_parsing.py:0 -> test_import_mapping_functionality", + "CALLS tests/test_import_parsing.py:0 -> test_import_processing_doesnt_crash", + "CALLS tests/test_import_parsing.py:0 -> test_internal_import_matched_with_dot_separator", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_java_import_cache_hits", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_cache_hits_on_repeated_calls", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_cache_returns_correct_result", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_detects_directory", + "CALLS tests/test_import_parsing.py:0 -> test_is_local_module_detects_py_file", + "CALLS tests/test_import_parsing.py:0 -> test_language_specific_import_methods", + "CALLS tests/test_import_parsing.py:0 -> test_python_alias_import_parsing", + "CALLS tests/test_import_parsing.py:0 -> test_python_import_parsing", + "CALLS tests/test_import_parsing.py:0 -> test_relative_import_resolution", + "CALLS tests/test_import_parsing.py:0 -> test_resolves_directory_with_index_file", + "CALLS tests/test_import_parsing.py:0 -> test_resolves_directory_with_index_js", + "CALLS tests/test_import_parsing.py:0 -> test_resolves_file_with_extension", + "CALLS tests/test_import_parsing.py:0 -> test_returns_full_name_when_no_match", + "CALLS tests/test_import_parsing.py:0 -> test_rust_external_module_name_uses_module_path", + "CALLS tests/test_import_parsing.py:0 -> test_rust_external_module_node_created", + "CALLS tests/test_import_parsing.py:0 -> test_separate_instances_have_independent_caches", + "CALLS tests/test_import_parsing.py:0 -> test_similar_prefix_not_matched_without_dot", + "CALLS tests/test_inherits_attribute_base.py:0 -> UniXcoder", + "CALLS tests/test_inherits_attribute_base.py:0 -> ensure_node_batch", + "CALLS tests/test_inherits_attribute_base.py:0 -> ensure_relationship_batch", + "CALLS tests/test_inherits_attribute_base.py:0 -> execute_write", + "CALLS tests/test_inherits_attribute_base.py:0 -> fetch_all", + "CALLS tests/test_inherits_attribute_base.py:0 -> flush_all", + "CALLS tests/test_inherits_attribute_base.py:0 -> test_attribute_base_class_creates_inherits_edge", + "CALLS tests/test_instance_attr_type_inference.py:0 -> ensure_node_batch", + "CALLS tests/test_instance_attr_type_inference.py:0 -> ensure_relationship_batch", + "CALLS tests/test_instance_attr_type_inference.py:0 -> execute_write", + "CALLS tests/test_instance_attr_type_inference.py:0 -> fetch_all", + "CALLS tests/test_instance_attr_type_inference.py:0 -> flush_all", + "CALLS tests/test_instance_attr_type_inference.py:0 -> status", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_ambiguous_method_does_not_resolve_to_module_function", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_method_call_resolves_via_init_attribute_type", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_property_access_not_resolved_to_module_function", + "CALLS tests/test_instance_attr_type_inference.py:0 -> test_property_access_resolves_via_init_attribute_type", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> ensure_node_batch", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> ensure_relationship_batch", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> execute_write", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> fetch_all", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> flush_all", + "CALLS tests/test_interprocedural_callback_flow.py:0 -> test_callback_propagates_through_passthrough_param", + "CALLS tests/test_java_advanced_oop.py:0 -> add", + "CALLS tests/test_java_advanced_oop.py:0 -> execute", + "CALLS tests/test_java_advanced_oop.py:0 -> flush", + "CALLS tests/test_java_advanced_oop.py:0 -> get", + "CALLS tests/test_java_advanced_oop.py:0 -> java_advanced_oop_project", + "CALLS tests/test_java_advanced_oop.py:0 -> process", + "CALLS tests/test_java_advanced_oop.py:0 -> put", + "CALLS tests/test_java_advanced_oop.py:0 -> render", + "CALLS tests/test_java_advanced_oop.py:0 -> run", + "CALLS tests/test_java_advanced_oop.py:0 -> save", + "CALLS tests/test_java_advanced_oop.py:0 -> test_abstract_classes_with_partial_implementation", + "CALLS tests/test_java_advanced_oop.py:0 -> test_advanced_inner_class_scenarios", + "CALLS tests/test_java_advanced_oop.py:0 -> test_annotation_processing_complex", + "CALLS tests/test_java_advanced_oop.py:0 -> test_complex_generics_with_wildcards", + "CALLS tests/test_java_advanced_oop.py:0 -> test_complex_static_initialization", + "CALLS tests/test_java_advanced_oop.py:0 -> test_covariant_return_types", + "CALLS tests/test_java_advanced_oop.py:0 -> test_diamond_problem_resolution", + "CALLS tests/test_java_advanced_oop.py:0 -> test_generic_type_erasure_scenarios", + "CALLS tests/test_java_advanced_oop.py:0 -> test_method_overloading_variations", + "CALLS tests/test_java_advanced_oop.py:0 -> test_method_overriding_edge_cases", + "CALLS tests/test_java_advanced_oop.py:0 -> test_multiple_interface_inheritance", + "CALLS tests/test_java_advanced_oop.py:0 -> test_nested_generic_bounds", + "CALLS tests/test_java_advanced_oop.py:0 -> value", + "CALLS tests/test_java_collections_frameworks.py:0 -> add", + "CALLS tests/test_java_collections_frameworks.py:0 -> clear", + "CALLS tests/test_java_collections_frameworks.py:0 -> get", + "CALLS tests/test_java_collections_frameworks.py:0 -> java_collections_project", + "CALLS tests/test_java_collections_frameworks.py:0 -> operation", + "CALLS tests/test_java_collections_frameworks.py:0 -> put", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_basic_collection_implementations", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_custom_collection_implementations", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_iterator_patterns_enhanced_for", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_map_operations_key_value_handling", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_set_operations_uniqueness", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_stream_api_integration_collections", + "CALLS tests/test_java_collections_frameworks.py:0 -> test_thread_safe_collections", + "CALLS tests/test_java_complex_relationships.py:0 -> add", + "CALLS tests/test_java_complex_relationships.py:0 -> execute", + "CALLS tests/test_java_complex_relationships.py:0 -> java_complex_project", + "CALLS tests/test_java_complex_relationships.py:0 -> start", + "CALLS tests/test_java_complex_relationships.py:0 -> test_builder_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_command_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_decorator_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_factory_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_observer_pattern_relationships", + "CALLS tests/test_java_complex_relationships.py:0 -> test_strategy_pattern_relationships", + "CALLS tests/test_java_comprehensive.py:0 -> FileReader", + "CALLS tests/test_java_comprehensive.py:0 -> FileWriter", + "CALLS tests/test_java_comprehensive.py:0 -> add", + "CALLS tests/test_java_comprehensive.py:0 -> clear", + "CALLS tests/test_java_comprehensive.py:0 -> java_project", + "CALLS tests/test_java_comprehensive.py:0 -> process", + "CALLS tests/test_java_comprehensive.py:0 -> restart", + "CALLS tests/test_java_comprehensive.py:0 -> run", + "CALLS tests/test_java_comprehensive.py:0 -> start", + "CALLS tests/test_java_comprehensive.py:0 -> test_basic_java_classes", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_enums_and_annotations", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_exception_handling", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_generics_and_collections", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_inner_classes", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_lambda_expressions", + "CALLS tests/test_java_comprehensive.py:0 -> test_java_static_and_final", + "CALLS tests/test_java_comprehensive.py:0 -> value", + "CALLS tests/test_java_concurrency.py:0 -> add", + "CALLS tests/test_java_concurrency.py:0 -> clear", + "CALLS tests/test_java_concurrency.py:0 -> execute", + "CALLS tests/test_java_concurrency.py:0 -> get", + "CALLS tests/test_java_concurrency.py:0 -> java_concurrency_project", + "CALLS tests/test_java_concurrency.py:0 -> put", + "CALLS tests/test_java_concurrency.py:0 -> start", + "CALLS tests/test_java_concurrency.py:0 -> submit", + "CALLS tests/test_java_concurrency.py:0 -> test_completable_future_patterns", + "CALLS tests/test_java_concurrency.py:0 -> test_concurrent_collections", + "CALLS tests/test_java_concurrency.py:0 -> test_executor_service_patterns", + "CALLS tests/test_java_concurrency.py:0 -> test_locks_and_conditions", + "CALLS tests/test_java_concurrency.py:0 -> test_synchronized_methods_blocks", + "CALLS tests/test_java_concurrency.py:0 -> test_volatile_fields", + "CALLS tests/test_java_containment_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_containment_edges", + "CALLS tests/test_java_edge_cases.py:0 -> add", + "CALLS tests/test_java_edge_cases.py:0 -> description", + "CALLS tests/test_java_edge_cases.py:0 -> get", + "CALLS tests/test_java_edge_cases.py:0 -> java_edge_cases_project", + "CALLS tests/test_java_edge_cases.py:0 -> name", + "CALLS tests/test_java_edge_cases.py:0 -> process", + "CALLS tests/test_java_edge_cases.py:0 -> run", + "CALLS tests/test_java_edge_cases.py:0 -> test_annotation_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_boundary_value_literals", + "CALLS tests/test_java_edge_cases.py:0 -> test_comment_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_deeply_nested_generics", + "CALLS tests/test_java_edge_cases.py:0 -> test_empty_classes_and_interfaces", + "CALLS tests/test_java_edge_cases.py:0 -> test_generic_variance_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_long_qualified_names", + "CALLS tests/test_java_edge_cases.py:0 -> test_malformed_but_valid_syntax", + "CALLS tests/test_java_edge_cases.py:0 -> test_modifier_combinations_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_operator_and_expression_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_package_and_import_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> test_parsing_edge_cases_syntax", + "CALLS tests/test_java_edge_cases.py:0 -> test_single_line_vs_multiline_constructs", + "CALLS tests/test_java_edge_cases.py:0 -> test_unicode_identifiers", + "CALLS tests/test_java_edge_cases.py:0 -> test_whitespace_edge_cases", + "CALLS tests/test_java_edge_cases.py:0 -> value", + "CALLS tests/test_java_field_access_chains.py:0 -> main", + "CALLS tests/test_java_field_access_chains.py:0 -> start", + "CALLS tests/test_java_field_access_chains.py:0 -> test_direct_super_field_chain_method_call_multiclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_direct_this_field_chain_method_call_multiclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_generic_scoped_superclass_extraction", + "CALLS tests/test_java_field_access_chains.py:0 -> test_inherited_field_chain_via_nested_superclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_inherited_field_chain_via_object", + "CALLS tests/test_java_field_access_chains.py:0 -> test_inherited_field_chain_via_this", + "CALLS tests/test_java_field_access_chains.py:0 -> test_mixed_field_access_then_method_resolves", + "CALLS tests/test_java_field_access_chains.py:0 -> test_multilevel_field_access_then_method_resolves", + "CALLS tests/test_java_field_access_chains.py:0 -> test_nested_field_access_type_inference_via_var", + "CALLS tests/test_java_field_access_chains.py:0 -> test_scoped_superclass_extraction_keeps_actual_class", + "CALLS tests/test_java_field_access_chains.py:0 -> test_super_rooted_chain_with_nested_superclass", + "CALLS tests/test_java_field_access_chains.py:0 -> test_super_rooted_nested_field_access_via_var", + "CALLS tests/test_java_field_access_chains.py:0 -> test_this_rooted_nested_field_access_via_var", + "CALLS tests/test_java_imports.py:0 -> FileReader", + "CALLS tests/test_java_imports.py:0 -> FileWriter", + "CALLS tests/test_java_imports.py:0 -> add", + "CALLS tests/test_java_imports.py:0 -> get", + "CALLS tests/test_java_imports.py:0 -> java_imports_project", + "CALLS tests/test_java_imports.py:0 -> test_basic_java_imports", + "CALLS tests/test_java_imports.py:0 -> test_package_local_imports", + "CALLS tests/test_java_imports.py:0 -> test_qualified_names_without_imports", + "CALLS tests/test_java_imports.py:0 -> test_static_imports", + "CALLS tests/test_java_imports.py:0 -> test_wildcard_imports", + "CALLS tests/test_java_inheritance_edges.py:0 -> test_java_inheritance_and_implements_edges", + "CALLS tests/test_java_inheritance_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_inheritance_edges", + "CALLS tests/test_java_label_name_collision.py:0 -> execute", + "CALLS tests/test_java_label_name_collision.py:0 -> java_label_collision_project", + "CALLS tests/test_java_label_name_collision.py:0 -> load", + "CALLS tests/test_java_label_name_collision.py:0 -> run", + "CALLS tests/test_java_label_name_collision.py:0 -> test_all_node_labels_have_constraints", + "CALLS tests/test_java_label_name_collision.py:0 -> test_class_implementing_interface_named_interface", + "CALLS tests/test_java_label_name_collision.py:0 -> test_class_named_class_ingested_as_class_node", + "CALLS tests/test_java_label_name_collision.py:0 -> test_enum_named_enum_has_defines_relationship", + "CALLS tests/test_java_label_name_collision.py:0 -> test_enum_named_enum_ingested_as_enum_node", + "CALLS tests/test_java_label_name_collision.py:0 -> test_interface_and_enum_labels_have_constraints", + "CALLS tests/test_java_label_name_collision.py:0 -> test_interface_named_interface_has_defines_relationship", + "CALLS tests/test_java_label_name_collision.py:0 -> test_interface_named_interface_ingested_as_interface_node", + "CALLS tests/test_java_label_name_collision.py:0 -> test_multiple_label_colliding_names", + "CALLS tests/test_java_method_calls.py:0 -> add", + "CALLS tests/test_java_method_calls.py:0 -> clear", + "CALLS tests/test_java_method_calls.py:0 -> get", + "CALLS tests/test_java_method_calls.py:0 -> java_methods_project", + "CALLS tests/test_java_method_calls.py:0 -> process", + "CALLS tests/test_java_method_calls.py:0 -> put", + "CALLS tests/test_java_method_calls.py:0 -> test_basic_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> test_cross_file_method_calls_with_imports", + "CALLS tests/test_java_method_calls.py:0 -> test_fully_qualified_static_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> test_generic_method_calls", + "CALLS tests/test_java_method_calls.py:0 -> test_inheritance_and_polymorphism", + "CALLS tests/test_java_method_calls.py:0 -> test_interface_method_calls", + "CALLS tests/test_java_modern_features.py:0 -> get", + "CALLS tests/test_java_modern_features.py:0 -> java_modern_project", + "CALLS tests/test_java_modern_features.py:0 -> put", + "CALLS tests/test_java_modern_features.py:0 -> run", + "CALLS tests/test_java_modern_features.py:0 -> test_java_instanceof_patterns", + "CALLS tests/test_java_modern_features.py:0 -> test_java_records", + "CALLS tests/test_java_modern_features.py:0 -> test_java_sealed_classes", + "CALLS tests/test_java_modern_features.py:0 -> test_java_switch_expressions", + "CALLS tests/test_java_modern_features.py:0 -> test_java_text_blocks", + "CALLS tests/test_java_modern_features.py:0 -> test_java_var_keyword", + "CALLS tests/test_java_modules.py:0 -> add", + "CALLS tests/test_java_modules.py:0 -> clear", + "CALLS tests/test_java_modules.py:0 -> get", + "CALLS tests/test_java_modules.py:0 -> java_modules_project", + "CALLS tests/test_java_modules.py:0 -> load", + "CALLS tests/test_java_modules.py:0 -> name", + "CALLS tests/test_java_modules.py:0 -> process", + "CALLS tests/test_java_modules.py:0 -> put", + "CALLS tests/test_java_modules.py:0 -> save", + "CALLS tests/test_java_modules.py:0 -> test_modular_application_structure", + "CALLS tests/test_java_modules.py:0 -> test_module_info_declarations", + "CALLS tests/test_java_modules.py:0 -> test_module_layer_and_configuration", + "CALLS tests/test_java_modules.py:0 -> test_service_provider_interface", + "CALLS tests/test_java_name_collision.py:0 -> java_collision_project", + "CALLS tests/test_java_name_collision.py:0 -> test_name_collision_prefers_explicit_import", + "CALLS tests/test_java_name_collision.py:0 -> test_name_collision_prefers_same_package", + "CALLS tests/test_java_nested_structures.py:0 -> add", + "CALLS tests/test_java_nested_structures.py:0 -> clear", + "CALLS tests/test_java_nested_structures.py:0 -> done", + "CALLS tests/test_java_nested_structures.py:0 -> get", + "CALLS tests/test_java_nested_structures.py:0 -> java_nested_project", + "CALLS tests/test_java_nested_structures.py:0 -> metadata", + "CALLS tests/test_java_nested_structures.py:0 -> name", + "CALLS tests/test_java_nested_structures.py:0 -> process", + "CALLS tests/test_java_nested_structures.py:0 -> put", + "CALLS tests/test_java_nested_structures.py:0 -> run", + "CALLS tests/test_java_nested_structures.py:0 -> test_anonymous_classes_complex", + "CALLS tests/test_java_nested_structures.py:0 -> test_builder_pattern_nested", + "CALLS tests/test_java_nested_structures.py:0 -> test_deeply_nested_classes", + "CALLS tests/test_java_nested_structures.py:0 -> test_lambda_edge_cases", + "CALLS tests/test_java_nested_structures.py:0 -> test_local_classes_in_methods", + "CALLS tests/test_java_nested_structures.py:0 -> test_visitor_pattern_nested", + "CALLS tests/test_java_nested_structures.py:0 -> value", + "CALLS tests/test_java_real_world.py:0 -> add", + "CALLS tests/test_java_real_world.py:0 -> execute", + "CALLS tests/test_java_real_world.py:0 -> get", + "CALLS tests/test_java_real_world.py:0 -> java_real_world_project", + "CALLS tests/test_java_real_world.py:0 -> parse", + "CALLS tests/test_java_real_world.py:0 -> put", + "CALLS tests/test_java_real_world.py:0 -> render", + "CALLS tests/test_java_real_world.py:0 -> save", + "CALLS tests/test_java_real_world.py:0 -> start", + "CALLS tests/test_java_real_world.py:0 -> test_builder_observer_patterns", + "CALLS tests/test_java_real_world.py:0 -> test_configuration_classes", + "CALLS tests/test_java_real_world.py:0 -> test_dao_repository_patterns", + "CALLS tests/test_java_real_world.py:0 -> test_design_patterns_singleton_factory", + "CALLS tests/test_java_real_world.py:0 -> test_spring_framework_annotations", + "CALLS tests/test_java_real_world.py:0 -> test_utility_helper_classes", + "CALLS tests/test_java_reflection_annotations.py:0 -> description", + "CALLS tests/test_java_reflection_annotations.py:0 -> get", + "CALLS tests/test_java_reflection_annotations.py:0 -> java_reflection_project", + "CALLS tests/test_java_reflection_annotations.py:0 -> name", + "CALLS tests/test_java_reflection_annotations.py:0 -> operation", + "CALLS tests/test_java_reflection_annotations.py:0 -> process", + "CALLS tests/test_java_reflection_annotations.py:0 -> put", + "CALLS tests/test_java_reflection_annotations.py:0 -> save", + "CALLS tests/test_java_reflection_annotations.py:0 -> status", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_annotation_processing", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_custom_annotations", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_meta_annotations_inheritance", + "CALLS tests/test_java_reflection_annotations.py:0 -> test_reflection_api_usage", + "CALLS tests/test_java_reflection_annotations.py:0 -> value", + "CALLS tests/test_java_relationship_validation.py:0 -> add", + "CALLS tests/test_java_relationship_validation.py:0 -> java_relationships_project", + "CALLS tests/test_java_relationship_validation.py:0 -> main", + "CALLS tests/test_java_relationship_validation.py:0 -> run", + "CALLS tests/test_java_relationship_validation.py:0 -> save", + "CALLS tests/test_java_relationship_validation.py:0 -> start", + "CALLS tests/test_java_relationship_validation.py:0 -> test_composition_and_aggregation_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_cross_package_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_dependency_injection_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_inner_class_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_method_overriding_relationships", + "CALLS tests/test_java_relationship_validation.py:0 -> test_static_method_and_field_relationships", + "CALLS tests/test_java_span_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_node_spans", + "CALLS tests/test_java_span_oracle.py:0 -> value", + "CALLS tests/test_java_streams_functional.py:0 -> add", + "CALLS tests/test_java_streams_functional.py:0 -> generate", + "CALLS tests/test_java_streams_functional.py:0 -> get", + "CALLS tests/test_java_streams_functional.py:0 -> java_streams_project", + "CALLS tests/test_java_streams_functional.py:0 -> process", + "CALLS tests/test_java_streams_functional.py:0 -> test_functional_interfaces", + "CALLS tests/test_java_streams_functional.py:0 -> test_method_references_patterns", + "CALLS tests/test_java_streams_functional.py:0 -> test_optional_patterns", + "CALLS tests/test_java_streams_functional.py:0 -> test_stream_operations", + "CALLS tests/test_java_structure_oracle.py:0 -> run", + "CALLS tests/test_java_structure_oracle.py:0 -> test_cgr_matches_jdk_oracle_on_java_structure", + "CALLS tests/test_java_type_inference_unit.py:0 -> mock_ast_cache", + "CALLS tests/test_java_type_inference_unit.py:0 -> mock_function_registry", + "CALLS tests/test_java_type_inference_unit.py:0 -> mock_import_processor", + "CALLS tests/test_java_type_inference_unit.py:0 -> process", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_build_fqn_lookup_map", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_calculate_module_distance", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_collect_candidate_modules", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_parent_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_parent_class_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_registry_entries_under_fallback_to_items", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_registry_entries_under_with_prefix", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_superclass_using_ast_class_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_find_superclass_using_ast_nested_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_enum", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_interface", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_no_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_current_class_name_short_module", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_implemented_interfaces_no_interfaces", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_implemented_interfaces_short_qualified_name", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_implemented_interfaces_single_interface", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_file_not_in_cache", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_module_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_no_superclass", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_short_qualified_name", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_get_superclass_name_with_valid_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_boolean", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_create_pattern", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_getter", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_heuristic_method_return_type_unknown", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_is_matching_method", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_caching", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_cycle_detection", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_empty_module", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_lookup_variable_type_empty_var_name", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_module_qn_to_java_fqn", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_empty", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_no_current_module", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_prefers_closer_package", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_rank_module_candidates_prefers_exact_match", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_from_import", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_from_local_vars", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_same_package_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_super_reference", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_this_reference", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_object_type_unknown", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_array_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_empty_returns_object", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_from_import_mapping", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_fully_qualified", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_generic_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_primitive_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_same_package_class", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_java_type_name_wrapper_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_static_or_local_method", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_resolve_static_or_local_method_not_found", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_traverse_for_class_declarations_mixed_types", + "CALLS tests/test_java_type_inference_unit.py:0 -> test_traverse_for_class_declarations_multiple_classes", + "CALLS tests/test_java_type_inference_unit.py:0 -> type_inference_engine", + "CALLS tests/test_java_type_resolver_integration.py:0 -> add", + "CALLS tests/test_java_type_resolver_integration.py:0 -> import_processor", + "CALLS tests/test_java_type_resolver_integration.py:0 -> java_parser", + "CALLS tests/test_java_type_resolver_integration.py:0 -> mock_ast_cache", + "CALLS tests/test_java_type_resolver_integration.py:0 -> mock_function_registry", + "CALLS tests/test_java_type_resolver_integration.py:0 -> process", + "CALLS tests/test_java_type_resolver_integration.py:0 -> run", + "CALLS tests/test_java_type_resolver_integration.py:0 -> save", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_class_with_extends_and_implements", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_current_class_name_class", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_current_class_name_enum", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_current_class_name_interface", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_implemented_interfaces_multiple", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_implemented_interfaces_none", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_implemented_interfaces_single", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_superclass_name_generic_extends", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_superclass_name_no_extends", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_get_superclass_name_with_real_ast", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_nested_class", + "CALLS tests/test_java_type_resolver_integration.py:0 -> test_traverse_for_class_declarations_multiple_in_file", + "CALLS tests/test_java_type_resolver_integration.py:0 -> type_inference_engine", + "CALLS tests/test_java_utils.py:0 -> test_annotation_type_declaration", + "CALLS tests/test_java_utils.py:0 -> test_annotation_with_arguments", + "CALLS tests/test_java_utils.py:0 -> test_class_with_generic_superclass", + "CALLS tests/test_java_utils.py:0 -> test_class_with_modifiers", + "CALLS tests/test_java_utils.py:0 -> test_class_with_superclass", + "CALLS tests/test_java_utils.py:0 -> test_class_with_type_parameters", + "CALLS tests/test_java_utils.py:0 -> test_constructor", + "CALLS tests/test_java_utils.py:0 -> test_empty_import_declaration", + "CALLS tests/test_java_utils.py:0 -> test_empty_package_declaration", + "CALLS tests/test_java_utils.py:0 -> test_empty_parts", + "CALLS tests/test_java_utils.py:0 -> test_empty_path", + "CALLS tests/test_java_utils.py:0 -> test_enum_declaration", + "CALLS tests/test_java_utils.py:0 -> test_exclude_classes", + "CALLS tests/test_java_utils.py:0 -> test_field_with_annotation", + "CALLS tests/test_java_utils.py:0 -> test_field_with_modifiers", + "CALLS tests/test_java_utils.py:0 -> test_include_methods", + "CALLS tests/test_java_utils.py:0 -> test_interface_declaration", + "CALLS tests/test_java_utils.py:0 -> test_invalid_node_type", + "CALLS tests/test_java_utils.py:0 -> test_java_at_start", + "CALLS tests/test_java_utils.py:0 -> test_kotlin_layout", + "CALLS tests/test_java_utils.py:0 -> test_method_call_on_object", + "CALLS tests/test_java_utils.py:0 -> test_method_call_on_super", + "CALLS tests/test_java_utils.py:0 -> test_method_call_on_this", + "CALLS tests/test_java_utils.py:0 -> test_method_call_with_arguments", + "CALLS tests/test_java_utils.py:0 -> test_method_with_modifiers_and_annotations", + "CALLS tests/test_java_utils.py:0 -> test_method_with_parameters", + "CALLS tests/test_java_utils.py:0 -> test_method_with_varargs", + "CALLS tests/test_java_utils.py:0 -> test_nested_class", + "CALLS tests/test_java_utils.py:0 -> test_no_package_structure", + "CALLS tests/test_java_utils.py:0 -> test_non_standard_layout_with_main", + "CALLS tests/test_java_utils.py:0 -> test_not_main_missing_public", + "CALLS tests/test_java_utils.py:0 -> test_not_main_missing_static", + "CALLS tests/test_java_utils.py:0 -> test_not_main_not_void", + "CALLS tests/test_java_utils.py:0 -> test_not_main_wrong_name", + "CALLS tests/test_java_utils.py:0 -> test_package_private_visibility", + "CALLS tests/test_java_utils.py:0 -> test_private_visibility", + "CALLS tests/test_java_utils.py:0 -> test_protected_visibility", + "CALLS tests/test_java_utils.py:0 -> test_public_visibility", + "CALLS tests/test_java_utils.py:0 -> test_record_declaration", + "CALLS tests/test_java_utils.py:0 -> test_regular_import", + "CALLS tests/test_java_utils.py:0 -> test_scala_layout", + "CALLS tests/test_java_utils.py:0 -> test_scoped_identifier_package", + "CALLS tests/test_java_utils.py:0 -> test_simple_annotation", + "CALLS tests/test_java_utils.py:0 -> test_simple_class", + "CALLS tests/test_java_utils.py:0 -> test_simple_field", + "CALLS tests/test_java_utils.py:0 -> test_simple_identifier_import", + "CALLS tests/test_java_utils.py:0 -> test_simple_identifier_package", + "CALLS tests/test_java_utils.py:0 -> test_simple_method", + "CALLS tests/test_java_utils.py:0 -> test_simple_method_call", + "CALLS tests/test_java_utils.py:0 -> test_simple_src_layout", + "CALLS tests/test_java_utils.py:0 -> test_standard_maven_layout", + "CALLS tests/test_java_utils.py:0 -> test_static_import", + "CALLS tests/test_java_utils.py:0 -> test_test_folder_layout", + "CALLS tests/test_java_utils.py:0 -> test_valid_main_method_with_array", + "CALLS tests/test_java_utils.py:0 -> test_valid_main_method_with_varargs", + "CALLS tests/test_java_utils.py:0 -> test_wildcard_import", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> engine", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> import_processor", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> java_parser", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> mock_ast_cache", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> mock_function_registry", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> process", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_abstract_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_array_type_declaration", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_assignment_with_literal_value", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_chained_assignments", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_class_fields_accessible_in_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_constructor_parameters", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_empty_method_body", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_enhanced_for_loop_with_array", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_enhanced_for_loop_with_list", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_enhanced_for_with_custom_type", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_generic_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_infer_type_from_literals", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_infer_type_from_new_expression", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_interface_method", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_lambda_expression_context", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_local_variable_with_object_creation", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_all_variable_types", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_multiple_parameters", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_single_parameter", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_method_with_varargs_parameter", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_multiple_declarators_same_type", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_nested_classes_variable_resolution", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_nested_enhanced_for_loops", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_record_constructor", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_simple_assignment_in_constructor", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_simple_local_variable_declaration", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_static_fields", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_static_method_variables", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_switch_expression_variables", + "CALLS tests/test_java_variable_analyzer_integration.py:0 -> test_try_catch_variable_declarations", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> engine", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> mock_ast_cache", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> mock_function_registry", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> mock_import_processor", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_array_creation_expression", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_assignment_expression_inferred", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_assignment_with_field_access", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_builds_map_successfully", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_class_field_extracted", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_collects_from_all_sources", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_decimal_floating_point_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_enhanced_for_with_child_variable_declarator", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_enhanced_for_with_type_and_name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_false_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_field_access", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_field_access_missing_parts", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_finds_field_in_class", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_formal_parameter_missing_name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_formal_parameter_with_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_identifier", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_integer_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_local_variable_declaration", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_local_variable_missing_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_local_variable_with_object_creation_value", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_multiple_formal_parameters", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_nested_assignments", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_nested_enhanced_for_loops", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_nested_local_variables", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_no_containing_class", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_no_parameters_node", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_object_creation_expression", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_empty_on_no_variables", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_class_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_field_name", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_for_short_module_qn", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_class_not_found", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_field_not_found", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_file_not_in_ast_cache", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_file_not_in_cache", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_returns_none_when_module_not_in_path_map", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_spread_parameter", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_string_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_true_literal", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_unknown_expression_type", + "CALLS tests/test_java_variable_analyzer_unit.py:0 -> test_unknown_node_type", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> add", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> factory", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> get", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> java_loom_project", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> name", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> run", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> start", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> submit", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> test_scoped_values", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> test_structured_concurrency", + "CALLS tests/test_java_virtual_threads_loom.py:0 -> test_virtual_threads_basics", + "CALLS tests/test_javascript_async_patterns.py:0 -> javascript_async_project", + "CALLS tests/test_javascript_async_patterns.py:0 -> process", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_async_await_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_async_comprehensive", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_callback_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_generator_patterns", + "CALLS tests/test_javascript_async_patterns.py:0 -> test_promise_patterns", + "CALLS tests/test_javascript_classes.py:0 -> add", + "CALLS tests/test_javascript_classes.py:0 -> clear", + "CALLS tests/test_javascript_classes.py:0 -> javascript_classes_project", + "CALLS tests/test_javascript_classes.py:0 -> parse", + "CALLS tests/test_javascript_classes.py:0 -> start", + "CALLS tests/test_javascript_classes.py:0 -> test_basic_class_declarations", + "CALLS tests/test_javascript_classes.py:0 -> test_class_comprehensive", + "CALLS tests/test_javascript_classes.py:0 -> test_class_expressions_and_mixins", + "CALLS tests/test_javascript_classes.py:0 -> test_class_inheritance", + "CALLS tests/test_javascript_classes.py:0 -> test_private_fields_and_methods", + "CALLS tests/test_javascript_classes.py:0 -> test_static_methods_and_properties", + "CALLS tests/test_javascript_closures_scoping.py:0 -> add", + "CALLS tests/test_javascript_closures_scoping.py:0 -> get", + "CALLS tests/test_javascript_closures_scoping.py:0 -> javascript_closures_project", + "CALLS tests/test_javascript_closures_scoping.py:0 -> name", + "CALLS tests/test_javascript_closures_scoping.py:0 -> process", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_basic_closures", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_closures_comprehensive", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_hoisting_behavior", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_module_patterns_iife", + "CALLS tests/test_javascript_closures_scoping.py:0 -> test_variable_scoping", + "CALLS tests/test_javascript_containment_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_js_containment_edges", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> js_singleton_project", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> load", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> main", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> save", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> start", + "CALLS tests/test_javascript_cross_file_singleton.py:0 -> test_js_singleton_pattern_cross_file_calls", + "CALLS tests/test_javascript_destructuring.py:0 -> get", + "CALLS tests/test_javascript_destructuring.py:0 -> javascript_destructuring_project", + "CALLS tests/test_javascript_destructuring.py:0 -> process", + "CALLS tests/test_javascript_destructuring.py:0 -> processor", + "CALLS tests/test_javascript_destructuring.py:0 -> test_array_destructuring", + "CALLS tests/test_javascript_destructuring.py:0 -> test_destructuring_comprehensive", + "CALLS tests/test_javascript_destructuring.py:0 -> test_destructuring_with_imports", + "CALLS tests/test_javascript_destructuring.py:0 -> test_object_destructuring", + "CALLS tests/test_javascript_destructuring.py:0 -> test_parameter_destructuring", + "CALLS tests/test_javascript_error_handling.py:0 -> add", + "CALLS tests/test_javascript_error_handling.py:0 -> clear", + "CALLS tests/test_javascript_error_handling.py:0 -> execute", + "CALLS tests/test_javascript_error_handling.py:0 -> javascript_error_handling_project", + "CALLS tests/test_javascript_error_handling.py:0 -> operation", + "CALLS tests/test_javascript_error_handling.py:0 -> parse", + "CALLS tests/test_javascript_error_handling.py:0 -> process", + "CALLS tests/test_javascript_error_handling.py:0 -> test_async_error_handling", + "CALLS tests/test_javascript_error_handling.py:0 -> test_custom_error_classes", + "CALLS tests/test_javascript_error_handling.py:0 -> test_error_handling_comprehensive", + "CALLS tests/test_javascript_error_handling.py:0 -> test_try_catch_finally_blocks", + "CALLS tests/test_javascript_functions.py:0 -> add", + "CALLS tests/test_javascript_functions.py:0 -> get", + "CALLS tests/test_javascript_functions.py:0 -> javascript_functions_project", + "CALLS tests/test_javascript_functions.py:0 -> operation", + "CALLS tests/test_javascript_functions.py:0 -> processor", + "CALLS tests/test_javascript_functions.py:0 -> test_arrow_functions", + "CALLS tests/test_javascript_functions.py:0 -> test_async_functions", + "CALLS tests/test_javascript_functions.py:0 -> test_function_comprehensive", + "CALLS tests/test_javascript_functions.py:0 -> test_function_declarations", + "CALLS tests/test_javascript_functions.py:0 -> test_higher_order_functions", + "CALLS tests/test_javascript_functions.py:0 -> test_immediately_invoked_function_expressions", + "CALLS tests/test_javascript_functions.py:0 -> test_method_definitions", + "CALLS tests/test_javascript_functions.py:0 -> text", + "CALLS tests/test_javascript_imports.py:0 -> add", + "CALLS tests/test_javascript_imports.py:0 -> get", + "CALLS tests/test_javascript_imports.py:0 -> javascript_imports_project", + "CALLS tests/test_javascript_imports.py:0 -> load", + "CALLS tests/test_javascript_imports.py:0 -> processor", + "CALLS tests/test_javascript_imports.py:0 -> test_absolute_package_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_aliased_re_export_import_mapping", + "CALLS tests/test_javascript_imports.py:0 -> test_commonjs_aliased_destructuring", + "CALLS tests/test_javascript_imports.py:0 -> test_commonjs_multiple_destructured_variables_regression", + "CALLS tests/test_javascript_imports.py:0 -> test_commonjs_require_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_dynamic_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_es6_default_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_es6_named_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_es6_namespace_imports", + "CALLS tests/test_javascript_imports.py:0 -> test_import_error_handling", + "CALLS tests/test_javascript_imports.py:0 -> test_import_relationships_comprehensive", + "CALLS tests/test_javascript_imports.py:0 -> test_mixed_import_patterns", + "CALLS tests/test_javascript_imports.py:0 -> test_relative_path_resolution", + "CALLS tests/test_javascript_modules.py:0 -> add", + "CALLS tests/test_javascript_modules.py:0 -> export", + "CALLS tests/test_javascript_modules.py:0 -> factory", + "CALLS tests/test_javascript_modules.py:0 -> get", + "CALLS tests/test_javascript_modules.py:0 -> javascript_modules_project", + "CALLS tests/test_javascript_modules.py:0 -> render", + "CALLS tests/test_javascript_modules.py:0 -> test_aliased_re_exports", + "CALLS tests/test_javascript_modules.py:0 -> test_circular_dependencies", + "CALLS tests/test_javascript_modules.py:0 -> test_commonjs_module_exports", + "CALLS tests/test_javascript_modules.py:0 -> test_dynamic_exports", + "CALLS tests/test_javascript_modules.py:0 -> test_es6_export_patterns", + "CALLS tests/test_javascript_modules.py:0 -> test_mixed_module_systems", + "CALLS tests/test_javascript_modules.py:0 -> test_module_comprehensive", + "CALLS tests/test_javascript_object_patterns.py:0 -> add", + "CALLS tests/test_javascript_object_patterns.py:0 -> clear", + "CALLS tests/test_javascript_object_patterns.py:0 -> javascript_object_patterns_project", + "CALLS tests/test_javascript_object_patterns.py:0 -> parse", + "CALLS tests/test_javascript_object_patterns.py:0 -> process", + "CALLS tests/test_javascript_object_patterns.py:0 -> put", + "CALLS tests/test_javascript_object_patterns.py:0 -> restart", + "CALLS tests/test_javascript_object_patterns.py:0 -> run", + "CALLS tests/test_javascript_object_patterns.py:0 -> start", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_constructor_patterns", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_factory_functions", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_object_composition", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_object_literals", + "CALLS tests/test_javascript_object_patterns.py:0 -> test_object_patterns_comprehensive", + "CALLS tests/test_javascript_object_patterns.py:0 -> value", + "CALLS tests/test_javascript_object_patterns.py:0 -> walk", + "CALLS tests/test_javascript_path_resolution.py:0 -> graph_updater", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_absolute_imports", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_complex_relative_paths", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_deeply_nested_modules", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_edge_cases", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_parent_directory_imports", + "CALLS tests/test_javascript_path_resolution.py:0 -> test_same_directory_imports", + "CALLS tests/test_javascript_prototypes.py:0 -> add", + "CALLS tests/test_javascript_prototypes.py:0 -> get", + "CALLS tests/test_javascript_prototypes.py:0 -> javascript_prototypes_project", + "CALLS tests/test_javascript_prototypes.py:0 -> keys", + "CALLS tests/test_javascript_prototypes.py:0 -> mixin", + "CALLS tests/test_javascript_prototypes.py:0 -> save", + "CALLS tests/test_javascript_prototypes.py:0 -> start", + "CALLS tests/test_javascript_prototypes.py:0 -> test_constructor_functions_and_prototypes", + "CALLS tests/test_javascript_prototypes.py:0 -> test_object_create_patterns", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_chain_and_method_resolution", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_comprehensive", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_mixins_and_composition", + "CALLS tests/test_javascript_prototypes.py:0 -> test_prototype_patterns_edge_cases", + "CALLS tests/test_javascript_span_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_javascript_node_spans", + "CALLS tests/test_javascript_spread_rest.py:0 -> add", + "CALLS tests/test_javascript_spread_rest.py:0 -> handler", + "CALLS tests/test_javascript_spread_rest.py:0 -> javascript_spread_rest_project", + "CALLS tests/test_javascript_spread_rest.py:0 -> keys", + "CALLS tests/test_javascript_spread_rest.py:0 -> process", + "CALLS tests/test_javascript_spread_rest.py:0 -> processor", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_destructuring_with_spread_rest", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_rest_parameters", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_spread_in_arrays", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_spread_in_objects", + "CALLS tests/test_javascript_spread_rest.py:0 -> test_spread_rest_comprehensive", + "CALLS tests/test_javascript_structure_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_javascript_structure", + "CALLS tests/test_javascript_this_binding.py:0 -> javascript_this_project", + "CALLS tests/test_javascript_this_binding.py:0 -> render", + "CALLS tests/test_javascript_this_binding.py:0 -> test_arrow_functions_lexical_this", + "CALLS tests/test_javascript_this_binding.py:0 -> test_bind_call_apply_methods", + "CALLS tests/test_javascript_this_binding.py:0 -> test_this_comprehensive", + "CALLS tests/test_javascript_this_binding.py:0 -> test_this_in_callbacks_and_events", + "CALLS tests/test_javascript_this_binding.py:0 -> test_this_in_different_contexts", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> add", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> definition_processor", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> js_parser", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> temp_js_project", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_arrow_functions_in_objects_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_assignment_arrow_functions_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_class_method_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_export_at_module_level_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_export_inside_function_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_finds_object_name_from_variable_declarator", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_instance_method_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_method_in_class_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_non_method_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_object_in_class_method_returns_true", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_object_literal_methods_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_object_method_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_prototype_inheritance_creates_relationship", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_prototype_methods_are_ingested", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_returns_none_for_anonymous_object", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_standalone_function_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_standalone_object_returns_false", + "CALLS tests/test_js_ts_ingest_helpers.py:0 -> test_static_method_returns_true", + "CALLS tests/test_js_ts_module_system.py:0 -> process", + "CALLS tests/test_js_ts_module_system.py:0 -> read", + "CALLS tests/test_js_ts_module_system.py:0 -> temp_js_project", + "CALLS tests/test_js_ts_module_system.py:0 -> temp_ts_project", + "CALLS tests/test_js_ts_module_system.py:0 -> test_aliased_destructured_require", + "CALLS tests/test_js_ts_module_system.py:0 -> test_async_function_export", + "CALLS tests/test_js_ts_module_system.py:0 -> test_deeply_nested_require_paths", + "CALLS tests/test_js_ts_module_system.py:0 -> test_destructured_require_creates_import_relationship", + "CALLS tests/test_js_ts_module_system.py:0 -> test_empty_file", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_const_arrow_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_const_function_expression", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_function_declaration", + "CALLS tests/test_js_ts_module_system.py:0 -> test_export_generator_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_exports_dot_function_is_ingested", + "CALLS tests/test_js_ts_module_system.py:0 -> test_exports_with_special_names", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_both_commonjs_and_es6_patterns", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_many_exports", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_only_comments", + "CALLS tests/test_js_ts_module_system.py:0 -> test_file_with_only_imports", + "CALLS tests/test_js_ts_module_system.py:0 -> test_iife_with_exports", + "CALLS tests/test_js_ts_module_system.py:0 -> test_mixed_destructured_and_default_require", + "CALLS tests/test_js_ts_module_system.py:0 -> test_module_exports_dot_function_is_ingested", + "CALLS tests/test_js_ts_module_system.py:0 -> test_module_exports_object_with_methods", + "CALLS tests/test_js_ts_module_system.py:0 -> test_multiple_destructured_from_same_module", + "CALLS tests/test_js_ts_module_system.py:0 -> test_require_in_function_scope", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_async_export_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_class_with_decorators", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_class_with_methods", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_export_function", + "CALLS tests/test_js_ts_module_system.py:0 -> test_typescript_with_interfaces_and_types", + "CALLS tests/test_js_ts_module_system.py:0 -> text", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mixin", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_function_registry", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_import_processor", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_ingestor", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> mock_language_queries", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_creates_module_node_and_relationship", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_handles_query_errors_gracefully", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_handles_resolution_error_gracefully", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_arguments_in_require_call", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_function_field_in_call_expression", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_name_field_in_declarator", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_missing_value_field_in_declarator", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_non_string_module_argument", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_pair_pattern_with_wrong_key_type", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_processes_aliased_destructuring", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_processes_simple_destructuring", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_duplicate_imports", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_empty_object_pattern", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_non_js_ts_languages", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_non_object_pattern_name", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_non_require_call", + "CALLS tests/test_js_ts_module_system_unit.py:0 -> test_skips_when_no_language_obj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> add", + "CALLS tests/test_js_ts_utils_integration.py:0 -> js_parser", + "CALLS tests/test_js_ts_utils_integration.py:0 -> process", + "CALLS tests/test_js_ts_utils_integration.py:0 -> sample_js_project", + "CALLS tests/test_js_ts_utils_integration.py:0 -> sample_ts_project", + "CALLS tests/test_js_ts_utils_integration.py:0 -> save", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_builder_pattern_returns_this", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_cache_hit_returns_correct_result", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_cache_invalidates_on_new_root_node", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_cache_miss_returns_none", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_chained_method_calls_in_singleton", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_deeply_nested_qn", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_extract_from_factory_returns", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_extract_from_method_qn", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_extract_from_singleton_pattern", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_factory_returns_new_instance", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_fallback_without_language_obj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_all_builder_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_generic_class_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_methods_in_inheritance_hierarchy", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_repository_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_singleton_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_find_static_factory_methods", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_method_calls_in_factory", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_multiple_returns_in_conditional", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_nested_class_interactions", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_nonexistent_method_returns_none", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_return_types_in_typescript", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_returns_in_factory_method", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_single_part_returns_none", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_single_return_in_simple_method", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_singleton_getInstance_returns_static_instance", + "CALLS tests/test_js_ts_utils_integration.py:0 -> test_with_language_obj", + "CALLS tests/test_js_ts_utils_integration.py:0 -> ts_parser", + "CALLS tests/test_js_type_inference_integration.py:0 -> execute", + "CALLS tests/test_js_type_inference_integration.py:0 -> get", + "CALLS tests/test_js_type_inference_integration.py:0 -> js_parser", + "CALLS tests/test_js_type_inference_integration.py:0 -> js_type_engine", + "CALLS tests/test_js_type_inference_integration.py:0 -> mock_find_method_ast_node", + "CALLS tests/test_js_type_inference_integration.py:0 -> mock_function_registry", + "CALLS tests/test_js_type_inference_integration.py:0 -> mock_import_processor", + "CALLS tests/test_js_type_inference_integration.py:0 -> process", + "CALLS tests/test_js_type_inference_integration.py:0 -> run", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_array_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_arrow_function_body", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_async_function_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_conditional_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_deeply_nested_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_destructuring_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_function_call_assignment", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_let_declaration_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_loop_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_multiple_variable_declarations", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_nested_in_class_method", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_nested_in_function", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_new_expression_with_arguments", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_number_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_object_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_resolves_imported_class", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_resolves_local_class", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_simple_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_string_literal_not_inferred", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_try_catch_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_generic_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_interface_implementation", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_multiple_declarations_in_class", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_typescript_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> test_var_declaration_with_new_expression", + "CALLS tests/test_js_type_inference_integration.py:0 -> ts_parser", + "CALLS tests/test_js_type_inference_unit.py:0 -> create_call_expression_with_member", + "CALLS tests/test_js_type_inference_unit.py:0 -> js_type_engine", + "CALLS tests/test_js_type_inference_unit.py:0 -> mock_find_method_ast_node", + "CALLS tests/test_js_type_inference_unit.py:0 -> mock_function_registry", + "CALLS tests/test_js_type_inference_unit.py:0 -> mock_import_processor", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_call_expression_with_identifier_returns_func_name", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_empty_method_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_empty_node_returns_empty_dict", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_exception_in_query_continues_to_next_language", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_import_takes_precedence_over_local", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_invalid_method_call_format_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_method_ast_not_found_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_multiple_variable_declarators", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_nested_variable_declarator", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_new_expression_resolves_class_qn", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_new_expression_returns_class_name", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_local_class_in_registry", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_returns_none_when_not_found", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_via_import_mapping_checks_full_class_qn", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_resolve_via_import_mapping_returns_imported_qn", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_return_with_no_expression_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_returns_language_when_available", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_returns_none_when_queries_is_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_too_many_parts_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_unrecognized_node_type_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_unresolved_class_returns_none", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_declarator_with_function_call", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_declarator_with_new_expression", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_with_uninferrable_value_is_skipped", + "CALLS tests/test_js_type_inference_unit.py:0 -> test_variable_without_value_is_skipped", + "CALLS tests/test_js_utils.py:0 -> js_parser", + "CALLS tests/test_js_utils.py:0 -> test_chained_method_call", + "CALLS tests/test_js_utils.py:0 -> test_empty_return", + "CALLS tests/test_js_utils.py:0 -> test_finds_constructor", + "CALLS tests/test_js_utils.py:0 -> test_finds_existing_method", + "CALLS tests/test_js_utils.py:0 -> test_finds_method_in_class", + "CALLS tests/test_js_utils.py:0 -> test_finds_method_in_nested_structure", + "CALLS tests/test_js_utils.py:0 -> test_finds_multiple_returns", + "CALLS tests/test_js_utils.py:0 -> test_finds_nested_returns", + "CALLS tests/test_js_utils.py:0 -> test_finds_single_return", + "CALLS tests/test_js_utils.py:0 -> test_finds_static_method", + "CALLS tests/test_js_utils.py:0 -> test_multiple_classes_finds_correct_one", + "CALLS tests/test_js_utils.py:0 -> test_multiple_methods_finds_correct_one", + "CALLS tests/test_js_utils.py:0 -> test_nested_object_access", + "CALLS tests/test_js_utils.py:0 -> test_new_date", + "CALLS tests/test_js_utils.py:0 -> test_new_with_arguments", + "CALLS tests/test_js_utils.py:0 -> test_new_with_member_expression", + "CALLS tests/test_js_utils.py:0 -> test_no_returns_empty_list", + "CALLS tests/test_js_utils.py:0 -> test_non_member_expression_returns_none", + "CALLS tests/test_js_utils.py:0 -> test_non_new_expression_returns_none", + "CALLS tests/test_js_utils.py:0 -> test_property_access_without_call", + "CALLS tests/test_js_utils.py:0 -> test_return_class_property", + "CALLS tests/test_js_utils.py:0 -> test_return_literal_returns_none", + "CALLS tests/test_js_utils.py:0 -> test_return_member_with_different_class_name", + "CALLS tests/test_js_utils.py:0 -> test_return_new_expression", + "CALLS tests/test_js_utils.py:0 -> test_return_this", + "CALLS tests/test_js_utils.py:0 -> test_return_this_property", + "CALLS tests/test_js_utils.py:0 -> test_return_unrelated_expression", + "CALLS tests/test_js_utils.py:0 -> test_returns_none_for_nonexistent_class", + "CALLS tests/test_js_utils.py:0 -> test_returns_none_for_nonexistent_method", + "CALLS tests/test_js_utils.py:0 -> test_short_qualified_name", + "CALLS tests/test_js_utils.py:0 -> test_simple_method_call", + "CALLS tests/test_js_utils.py:0 -> test_simple_new_expression", + "CALLS tests/test_l3_decorator_normalization.py:0 -> test_call_attributed_to_wrapped_function_not_wrapper", + "CALLS tests/test_l3_decorator_normalization.py:0 -> test_no_generic_wrapper_node_appears", + "CALLS tests/test_l3_decorator_normalization.py:0 -> test_wrapped_function_body_calls_are_preserved", + "CALLS tests/test_l3_decorator_normalization.py:0 -> wrapper", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_keys_are_pascal_case", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_keys_are_strings", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_values_are_strings", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_constraint_values_are_valid_property_names", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_extensions_map_to_correct_language", + "CALLS tests/test_language_node_coverage.py:0 -> test_all_node_types_have_constraints", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_file_extensions", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_language_spec", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_metadata", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_has_status", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_call_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_class_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_function_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_each_language_spec_has_module_node_types", + "CALLS tests/test_language_node_coverage.py:0 -> test_extension_maps_to_language", + "CALLS tests/test_language_node_coverage.py:0 -> test_language_spec_has_correct_extensions", + "CALLS tests/test_language_node_coverage.py:0 -> test_node_label_value_is_pascal_case", + "CALLS tests/test_language_node_coverage.py:0 -> test_node_type_value_is_pascal_case", + "CALLS tests/test_language_tool_unit.py:0 -> test_adds_dot_prefix_to_extensions", + "CALLS tests/test_language_tool_unit.py:0 -> test_adds_root_nodes_to_modules", + "CALLS tests/test_language_tool_unit.py:0 -> test_categorizes_classes", + "CALLS tests/test_language_tool_unit.py:0 -> test_categorizes_functions", + "CALLS tests/test_language_tool_unit.py:0 -> test_categorizes_modules", + "CALLS tests/test_language_tool_unit.py:0 -> test_deduplicates_results", + "CALLS tests/test_language_tool_unit.py:0 -> test_deduplicates_subtypes", + "CALLS tests/test_language_tool_unit.py:0 -> test_empty_input", + "CALLS tests/test_language_tool_unit.py:0 -> test_empty_lists", + "CALLS tests/test_language_tool_unit.py:0 -> test_excludes_call_from_functions", + "CALLS tests/test_language_tool_unit.py:0 -> test_extracts_subtypes", + "CALLS tests/test_language_tool_unit.py:0 -> test_finds_in_language_subdirectory", + "CALLS tests/test_language_tool_unit.py:0 -> test_finds_in_src_directory", + "CALLS tests/test_language_tool_unit.py:0 -> test_finds_with_underscore_language_name", + "CALLS tests/test_language_tool_unit.py:0 -> test_immutable", + "CALLS tests/test_language_tool_unit.py:0 -> test_namedtuple_fields", + "CALLS tests/test_language_tool_unit.py:0 -> test_nodes_without_subtypes", + "CALLS tests/test_language_tool_unit.py:0 -> test_parses_valid_config", + "CALLS tests/test_language_tool_unit.py:0 -> test_preserves_existing_dot_prefix", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_for_empty_grammars", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_for_missing_file", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_for_missing_grammars_key", + "CALLS tests/test_language_tool_unit.py:0 -> test_returns_none_when_not_found", + "CALLS tests/test_language_tool_unit.py:0 -> test_uses_provided_language_name", + "CALLS tests/test_llm_service_unit.py:0 -> test_adds_semicolon_if_missing", + "CALLS tests/test_llm_service_unit.py:0 -> test_creates_agent_with_tools", + "CALLS tests/test_llm_service_unit.py:0 -> test_generate_raises_on_agent_error", + "CALLS tests/test_llm_service_unit.py:0 -> test_generate_raises_on_invalid_output", + "CALLS tests/test_llm_service_unit.py:0 -> test_generate_returns_cleaned_query", + "CALLS tests/test_llm_service_unit.py:0 -> test_handles_complex_query", + "CALLS tests/test_llm_service_unit.py:0 -> test_handles_multiline_query", + "CALLS tests/test_llm_service_unit.py:0 -> test_init_creates_agent", + "CALLS tests/test_llm_service_unit.py:0 -> test_init_raises_on_error", + "CALLS tests/test_llm_service_unit.py:0 -> test_keeps_existing_semicolon", + "CALLS tests/test_llm_service_unit.py:0 -> test_raises_on_error", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_backticks", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_cypher_prefix", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_leading_whitespace", + "CALLS tests/test_llm_service_unit.py:0 -> test_removes_trailing_whitespace", + "CALLS tests/test_llm_service_unit.py:0 -> test_uses_local_prompt_for_ollama", + "CALLS tests/test_local_alias_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_local_alias_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_local_alias_calls.py:0 -> execute_write", + "CALLS tests/test_local_alias_calls.py:0 -> fetch_all", + "CALLS tests/test_local_alias_calls.py:0 -> flush_all", + "CALLS tests/test_local_alias_calls.py:0 -> test_alias_to_module_function_is_a_call", + "CALLS tests/test_local_alias_calls.py:0 -> test_alias_to_self_method_is_a_call", + "CALLS tests/test_local_alias_calls.py:0 -> test_direct_call_unaffected", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> execute_write", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> fetch_all", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> flush_all", + "CALLS tests/test_local_alias_chain_resolution.py:0 -> test_local_alias_attribute_chain_dispatches_to_dunder", + "CALLS tests/test_lua_54_edge_cases.py:0 -> close", + "CALLS tests/test_lua_54_edge_cases.py:0 -> insert", + "CALLS tests/test_lua_54_edge_cases.py:0 -> read", + "CALLS tests/test_lua_54_edge_cases.py:0 -> test_lua_54_bitwise_operators", + "CALLS tests/test_lua_54_edge_cases.py:0 -> test_lua_54_goto_labels", + "CALLS tests/test_lua_54_edge_cases.py:0 -> test_lua_54_utf8_library", + "CALLS tests/test_lua_54_edge_cases.py:0 -> type", + "CALLS tests/test_lua_closures.py:0 -> add", + "CALLS tests/test_lua_closures.py:0 -> execute", + "CALLS tests/test_lua_closures.py:0 -> get", + "CALLS tests/test_lua_closures.py:0 -> insert", + "CALLS tests/test_lua_closures.py:0 -> test_lua_advanced_closures", + "CALLS tests/test_lua_closures.py:0 -> test_lua_basic_closures", + "CALLS tests/test_lua_closures.py:0 -> test_lua_event_system_closures", + "CALLS tests/test_lua_closures.py:0 -> test_lua_functional_programming", + "CALLS tests/test_lua_complex_scenarios.py:0 -> close", + "CALLS tests/test_lua_complex_scenarios.py:0 -> decode", + "CALLS tests/test_lua_complex_scenarios.py:0 -> execute", + "CALLS tests/test_lua_complex_scenarios.py:0 -> get", + "CALLS tests/test_lua_complex_scenarios.py:0 -> handler", + "CALLS tests/test_lua_complex_scenarios.py:0 -> insert", + "CALLS tests/test_lua_complex_scenarios.py:0 -> parse", + "CALLS tests/test_lua_complex_scenarios.py:0 -> read", + "CALLS tests/test_lua_complex_scenarios.py:0 -> render", + "CALLS tests/test_lua_complex_scenarios.py:0 -> start", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_configuration_management_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_data_processing_pipeline", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_database_orm_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_game_engine_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_microservice_architecture", + "CALLS tests/test_lua_complex_scenarios.py:0 -> test_web_framework_scenario", + "CALLS tests/test_lua_complex_scenarios.py:0 -> type", + "CALLS tests/test_lua_comprehensive.py:0 -> close", + "CALLS tests/test_lua_comprehensive.py:0 -> create_model", + "CALLS tests/test_lua_comprehensive.py:0 -> decode", + "CALLS tests/test_lua_comprehensive.py:0 -> execute", + "CALLS tests/test_lua_comprehensive.py:0 -> get", + "CALLS tests/test_lua_comprehensive.py:0 -> handler", + "CALLS tests/test_lua_comprehensive.py:0 -> insert", + "CALLS tests/test_lua_comprehensive.py:0 -> keys", + "CALLS tests/test_lua_comprehensive.py:0 -> load", + "CALLS tests/test_lua_comprehensive.py:0 -> put", + "CALLS tests/test_lua_comprehensive.py:0 -> read", + "CALLS tests/test_lua_comprehensive.py:0 -> read_file", + "CALLS tests/test_lua_comprehensive.py:0 -> render", + "CALLS tests/test_lua_comprehensive.py:0 -> save", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_binary_tree", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_database_orm", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_environment_management", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_factory_pattern", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_file_operations", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_final_comprehensive_check", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_hash_table", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_json_serialization", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_linked_list", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_memory_management", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_module_system", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_observer_pattern", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_pcall_patterns", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_performance_utils", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_search_algorithms", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_sorting_algorithms", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_strategy_pattern", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_string_interpolation", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_string_patterns", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_table_iteration", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_table_operations", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_template_engine", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_web_framework", + "CALLS tests/test_lua_comprehensive.py:0 -> test_lua_xpcall_patterns", + "CALLS tests/test_lua_comprehensive.py:0 -> type", + "CALLS tests/test_lua_comprehensive.py:0 -> write_file", + "CALLS tests/test_lua_containment_oracle.py:0 -> test_cgr_matches_luaparse_oracle_on_containment_edges", + "CALLS tests/test_lua_coroutines.py:0 -> insert", + "CALLS tests/test_lua_coroutines.py:0 -> run", + "CALLS tests/test_lua_coroutines.py:0 -> status", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_async_patterns", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_basic_coroutines", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_coroutine_scheduler", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_generator_patterns", + "CALLS tests/test_lua_coroutines.py:0 -> test_lua_state_machines", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> load", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> lua_singleton_project", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> main", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> save", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> start", + "CALLS tests/test_lua_cross_file_singleton.py:0 -> test_lua_singleton_pattern_cross_file_calls", + "CALLS tests/test_lua_edge_cases.py:0 -> test_lua_require_edge_cases", + "CALLS tests/test_lua_environment.py:0 -> close", + "CALLS tests/test_lua_environment.py:0 -> insert", + "CALLS tests/test_lua_environment.py:0 -> read", + "CALLS tests/test_lua_environment.py:0 -> test_dynamic_code_execution", + "CALLS tests/test_lua_environment.py:0 -> test_environment_manipulation", + "CALLS tests/test_lua_environment.py:0 -> test_global_environment_access", + "CALLS tests/test_lua_environment.py:0 -> test_global_variable_management", + "CALLS tests/test_lua_environment.py:0 -> test_module_environment_patterns", + "CALLS tests/test_lua_environment.py:0 -> type", + "CALLS tests/test_lua_error_handling.py:0 -> close", + "CALLS tests/test_lua_error_handling.py:0 -> execute", + "CALLS tests/test_lua_error_handling.py:0 -> insert", + "CALLS tests/test_lua_error_handling.py:0 -> operation", + "CALLS tests/test_lua_error_handling.py:0 -> read", + "CALLS tests/test_lua_error_handling.py:0 -> read_file", + "CALLS tests/test_lua_error_handling.py:0 -> start", + "CALLS tests/test_lua_error_handling.py:0 -> status", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_debug_library", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_error_recovery", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_exception_patterns", + "CALLS tests/test_lua_error_handling.py:0 -> test_lua_pcall_xpcall_patterns", + "CALLS tests/test_lua_error_handling.py:0 -> type", + "CALLS tests/test_lua_file_io.py:0 -> close", + "CALLS tests/test_lua_file_io.py:0 -> flush", + "CALLS tests/test_lua_file_io.py:0 -> insert", + "CALLS tests/test_lua_file_io.py:0 -> read", + "CALLS tests/test_lua_file_io.py:0 -> read_file", + "CALLS tests/test_lua_file_io.py:0 -> test_binary_file_operations", + "CALLS tests/test_lua_file_io.py:0 -> test_file_operations", + "CALLS tests/test_lua_file_io.py:0 -> test_file_positioning_and_info", + "CALLS tests/test_lua_file_io.py:0 -> test_file_reading_modes", + "CALLS tests/test_lua_file_io.py:0 -> test_serialization_patterns", + "CALLS tests/test_lua_file_io.py:0 -> type", + "CALLS tests/test_lua_file_io.py:0 -> write_file", + "CALLS tests/test_lua_functions.py:0 -> test_lua_function_discovery", + "CALLS tests/test_lua_functions_methods.py:0 -> add", + "CALLS tests/test_lua_functions_methods.py:0 -> test_lua_function_and_method_calls", + "CALLS tests/test_lua_imports.py:0 -> test_lua_pcall_require_pattern", + "CALLS tests/test_lua_imports.py:0 -> test_lua_require_imports", + "CALLS tests/test_lua_imports.py:0 -> test_lua_stdlib_detection", + "CALLS tests/test_lua_imports_paths.py:0 -> test_lua_imports_paths", + "CALLS tests/test_lua_metatables.py:0 -> get", + "CALLS tests/test_lua_metatables.py:0 -> insert", + "CALLS tests/test_lua_metatables.py:0 -> items", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_arithmetic_metamethods", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_call_metamethod", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_comparison_metamethods", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_index_metamethods", + "CALLS tests/test_lua_metatables.py:0 -> test_lua_weak_references", + "CALLS tests/test_lua_metatables.py:0 -> type", + "CALLS tests/test_lua_modern_features.py:0 -> close", + "CALLS tests/test_lua_modern_features.py:0 -> insert", + "CALLS tests/test_lua_modern_features.py:0 -> read", + "CALLS tests/test_lua_modern_features.py:0 -> status", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_attributes_syntax", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_enhanced_metamethods", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_enhanced_stdlib", + "CALLS tests/test_lua_modern_features.py:0 -> test_lua_54_numerical_for_loops", + "CALLS tests/test_lua_modern_features.py:0 -> type", + "CALLS tests/test_lua_oop_patterns.py:0 -> add", + "CALLS tests/test_lua_oop_patterns.py:0 -> insert", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_class_pattern_basic", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_inheritance_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_mixin_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_module_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_prototype_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> test_lua_singleton_pattern", + "CALLS tests/test_lua_oop_patterns.py:0 -> type", + "CALLS tests/test_lua_span_oracle.py:0 -> handler", + "CALLS tests/test_lua_span_oracle.py:0 -> test_cgr_matches_luaparse_oracle_on_node_spans", + "CALLS tests/test_lua_stdlib.py:0 -> close", + "CALLS tests/test_lua_stdlib.py:0 -> execute", + "CALLS tests/test_lua_stdlib.py:0 -> flush", + "CALLS tests/test_lua_stdlib.py:0 -> insert", + "CALLS tests/test_lua_stdlib.py:0 -> read", + "CALLS tests/test_lua_stdlib.py:0 -> test_builtin_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_debug_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_io_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_math_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_os_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_package_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_string_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> test_table_module_functions", + "CALLS tests/test_lua_stdlib.py:0 -> type", + "CALLS tests/test_lua_string_patterns.py:0 -> insert", + "CALLS tests/test_lua_string_patterns.py:0 -> test_complex_pattern_operations", + "CALLS tests/test_lua_string_patterns.py:0 -> test_string_manipulation_functions", + "CALLS tests/test_lua_string_patterns.py:0 -> test_string_pattern_matching", + "CALLS tests/test_lua_string_patterns.py:0 -> test_unicode_and_encoding", + "CALLS tests/test_lua_structure_oracle.py:0 -> test_cgr_matches_luaparse_oracle_on_lua_structure", + "CALLS tests/test_lua_table_manipulation.py:0 -> insert", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_construction_and_access", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_iteration_patterns", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_metatable_operations", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_modification_functions", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_serialization", + "CALLS tests/test_lua_table_manipulation.py:0 -> test_table_utility_functions", + "CALLS tests/test_lua_table_manipulation.py:0 -> type", + "CALLS tests/test_lua_type_inference_integration.py:0 -> load", + "CALLS tests/test_lua_type_inference_integration.py:0 -> lua_parser", + "CALLS tests/test_lua_type_inference_integration.py:0 -> lua_type_engine", + "CALLS tests/test_lua_type_inference_integration.py:0 -> mock_function_registry", + "CALLS tests/test_lua_type_inference_integration.py:0 -> mock_import_processor", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_chained_method_call_only_first_part", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_class_resolved_via_method_prefix", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_class_with_module_table_pattern", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_empty_code", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_global_variable_not_tracked", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_mixed_resolvable_and_unresolvable", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_multiple_variable_declarations", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_nested_function_with_variable_declarations", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_only_comments", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_simple_variable_declaration_with_method_call", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_unicode_identifier", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_unresolvable_class_skipped", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_declaration_without_method_call", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_in_for_loop", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_in_if_block", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_imported_class", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_regular_function_call", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_string_value", + "CALLS tests/test_lua_type_inference_integration.py:0 -> test_variable_with_table_constructor", + "CALLS tests/test_lua_type_inference_unit.py:0 -> lua_type_engine", + "CALLS tests/test_lua_type_inference_unit.py:0 -> mock_function_registry", + "CALLS tests/test_lua_type_inference_unit.py:0 -> mock_import_processor", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_assignment_with_empty_expression_list", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_assignment_with_empty_variable_list", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_empty_for_non_matching_nodes", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_skips_unresolvable_types", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_with_imported_class", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_with_nested_declarations", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_build_map_with_single_variable", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_class_name_with_special_characters", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_deeply_nested_variable_declaration", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_empty_module_qn", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_function_call_without_method_index_expression", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_from_method_call", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_returns_none_for_function_call_without_method_index", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_returns_none_for_non_function_call", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_infer_returns_none_when_class_not_resolved", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_empty_class_name", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_empty_method_name", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_non_identifier_children", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_index_with_only_class_identifier", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_prefix_matching_colon_separator", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_method_prefix_with_dot_method", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_more_variables_than_values", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_non_function_call_in_expression_list", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_import_takes_precedence", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_returns_none_when_not_found", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_via_function_registry_direct", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_via_import_mapping", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_via_method_prefix_matching", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_resolve_with_no_import_mapping_for_module", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_variable_declaration_without_assignment", + "CALLS tests/test_lua_type_inference_unit.py:0 -> test_variable_with_empty_text", + "CALLS tests/test_lua_utils.py:0 -> lua_parser", + "CALLS tests/test_lua_utils.py:0 -> test_assignment_without_local", + "CALLS tests/test_lua_utils.py:0 -> test_basic_pcall_require", + "CALLS tests/test_lua_utils.py:0 -> test_complex_expression_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_deeply_nested_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_dot_index_accepted_with_custom_types", + "CALLS tests/test_lua_utils.py:0 -> test_dot_index_expression_rejected", + "CALLS tests/test_lua_utils.py:0 -> test_empty_function_body", + "CALLS tests/test_lua_utils.py:0 -> test_finds_assignment_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_expression_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_for_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_if_statement", + "CALLS tests/test_lua_utils.py:0 -> test_finds_local_statement", + "CALLS tests/test_lua_utils.py:0 -> test_function_as_argument", + "CALLS tests/test_lua_utils.py:0 -> test_function_call_not_a_statement", + "CALLS tests/test_lua_utils.py:0 -> test_method_syntax_function", + "CALLS tests/test_lua_utils.py:0 -> test_multiline_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_multiple_assignment_first_value", + "CALLS tests/test_lua_utils.py:0 -> test_multiple_assignment_second_value", + "CALLS tests/test_lua_utils.py:0 -> test_nested_function_in_table", + "CALLS tests/test_lua_utils.py:0 -> test_nested_in_function", + "CALLS tests/test_lua_utils.py:0 -> test_nested_pcall", + "CALLS tests/test_lua_utils.py:0 -> test_no_assignment_context", + "CALLS tests/test_lua_utils.py:0 -> test_no_statement_ancestor", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_in_if_block", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_not_in_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_single_return_value", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_three_return_values", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_with_different_names", + "CALLS tests/test_lua_utils.py:0 -> test_pcall_with_non_identifier_target", + "CALLS tests/test_lua_utils.py:0 -> test_return_statement_function", + "CALLS tests/test_lua_utils.py:0 -> test_simple_assignment", + "CALLS tests/test_lua_utils.py:0 -> test_unicode_identifier", + "CALLS tests/test_lua_utils.py:0 -> test_xpcall_pattern", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> add", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> anyio_backend", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> mcp_registry", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> temp_project_root", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_class_snippet", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_function_snippet", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_malformed_qualified_name", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_method_snippet", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_multiple_snippets_sequentially", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_nonexistent_function", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_from_nested_module", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_tool_returns_none", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_verifies_qualified_name_passed", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_with_exception", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_with_no_docstring", + "CALLS tests/test_mcp_get_code_snippet.py:0 -> test_get_snippet_with_unicode", + "CALLS tests/test_mcp_list_directory.py:0 -> anyio_backend", + "CALLS tests/test_mcp_list_directory.py:0 -> mcp_registry", + "CALLS tests/test_mcp_list_directory.py:0 -> sample_directory_structure", + "CALLS tests/test_mcp_list_directory.py:0 -> temp_project_root", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_directory_with_hidden_files", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_directory_with_special_characters", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_empty_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_file_instead_of_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_nested_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_nonexistent_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_prevents_directory_traversal", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_root_directory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_subdirectory", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_with_absolute_path", + "CALLS tests/test_mcp_list_directory.py:0 -> test_list_with_relative_path", + "CALLS tests/test_mcp_list_directory.py:0 -> test_output_contains_only_names_not_paths", + "CALLS tests/test_mcp_list_directory.py:0 -> test_output_is_newline_separated", + "CALLS tests/test_mcp_query_and_index.py:0 -> add", + "CALLS tests/test_mcp_query_and_index.py:0 -> anyio_backend", + "CALLS tests/test_mcp_query_and_index.py:0 -> ensure_constraints", + "CALLS tests/test_mcp_query_and_index.py:0 -> mcp_registry", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_delete", + "CALLS tests/test_mcp_query_and_index.py:0 -> mock_run", + "CALLS tests/test_mcp_query_and_index.py:0 -> run", + "CALLS tests/test_mcp_query_and_index.py:0 -> temp_project_root", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_delete_project_error", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_delete_project_not_found", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_delete_project_success", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_and_query_workflow", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_ensures_constraints_and_flushes_around_run", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_clears_project_data_first", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_creates_graph_updater", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_deletes_project_before_updater_runs", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_handles_errors", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_multiple_times", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_success", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_index_repository_with_empty_directory", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_list_projects_empty", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_list_projects_error", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_list_projects_success", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_after_index", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_error_handling", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_finds_classes", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_finds_function_calls", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_finds_functions", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_handles_unicode", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_verifies_parameter_passed", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_with_complex_natural_language", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_query_with_no_results", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_sequential_index_only_clears_own_project_data", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_update_ensures_constraints_and_flushes_around_run", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_wipe_database_confirmed", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_wipe_database_error", + "CALLS tests/test_mcp_query_and_index.py:0 -> test_wipe_database_not_confirmed", + "CALLS tests/test_mcp_read_file.py:0 -> anyio_backend", + "CALLS tests/test_mcp_read_file.py:0 -> large_file", + "CALLS tests/test_mcp_read_file.py:0 -> mcp_registry", + "CALLS tests/test_mcp_read_file.py:0 -> sample_file", + "CALLS tests/test_mcp_read_file.py:0 -> temp_project_root", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_empty_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_file_with_unicode", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_full_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_last_lines_of_large_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_middle_of_large_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_nonexistent_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_offset_beyond_file_length", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_single_line_file", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_with_limit_only", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_with_offset_and_limit", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_with_offset_only", + "CALLS tests/test_mcp_read_file.py:0 -> test_read_zero_offset", + "CALLS tests/test_mcp_server.py:0 -> fake_stdio", + "CALLS tests/test_mcp_server.py:0 -> test_defaults_to_cwd_when_empty_string", + "CALLS tests/test_mcp_server.py:0 -> test_defaults_to_cwd_when_not_configured", + "CALLS tests/test_mcp_server.py:0 -> test_defaults_to_cwd_without_error", + "CALLS tests/test_mcp_server.py:0 -> test_env_var_takes_priority_over_settings", + "CALLS tests/test_mcp_server.py:0 -> test_handles_symlinks", + "CALLS tests/test_mcp_server.py:0 -> test_raises_error_when_path_does_not_exist", + "CALLS tests/test_mcp_server.py:0 -> test_raises_error_when_path_is_file", + "CALLS tests/test_mcp_server.py:0 -> test_resolves_relative_paths", + "CALLS tests/test_mcp_server.py:0 -> test_serve_stdio_closes_qdrant_client_on_shutdown", + "CALLS tests/test_mcp_server.py:0 -> test_service_lifecycle_closes_qdrant_on_exception", + "CALLS tests/test_mcp_server.py:0 -> test_service_lifecycle_closes_qdrant_on_exit", + "CALLS tests/test_mcp_server.py:0 -> test_uses_environment_variable_when_set", + "CALLS tests/test_mcp_server.py:0 -> test_uses_settings_when_env_not_set", + "CALLS tests/test_mcp_server.py:0 -> test_works_with_actual_cwd", + "CALLS tests/test_mcp_surgical_replace.py:0 -> add", + "CALLS tests/test_mcp_surgical_replace.py:0 -> anyio_backend", + "CALLS tests/test_mcp_surgical_replace.py:0 -> mcp_registry", + "CALLS tests/test_mcp_surgical_replace.py:0 -> temp_project_root", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_multiple_replacements_in_sequence", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_code_not_found", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_different_file_types", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_function_implementation", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_in_subdirectory", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_method_implementation", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_multiline_block", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_nonexistent_file", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_preserves_whitespace", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_prevents_directory_traversal", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_readonly_file", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_verifies_parameters_passed", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_empty_replacement", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_exact_match", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_exception", + "CALLS tests/test_mcp_surgical_replace.py:0 -> test_replace_with_unicode", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_calls_delete_with_empty_list_when_no_nodes", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_calls_delete_with_node_ids", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_filters_non_integer_ids", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_returns_empty_when_no_rows", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_returns_integer_ids", + "CALLS tests/test_mcp_tools_helpers.py:0 -> test_skips_rows_missing_key", + "CALLS tests/test_mcp_update_and_search.py:0 -> anyio_backend", + "CALLS tests/test_mcp_update_and_search.py:0 -> main", + "CALLS tests/test_mcp_update_and_search.py:0 -> mcp_registry", + "CALLS tests/test_mcp_update_and_search.py:0 -> temp_project_root", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_error", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_in_tool_map", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_registered", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_ask_agent_success", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_client_uses_constants", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_index_repository_warns_about_project_clear", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_main_single_query_prints_output", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_main_single_query_routes_logs_to_stderr", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_mcp_server_is_callable", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_mcp_server_opens_devnull", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_empty_response", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_is_async", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_json_response", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_query_with_errlog_non_json_response", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_caches_after_first_access", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_includes_function_source_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_includes_semantic_search_when_available", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_lazy_init", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_rag_agent_setter_allows_mock", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_calls_tool", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_in_tool_map", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_not_registered_without_deps", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_semantic_search_registered_with_deps", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_error", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_in_tool_map", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_no_wipe", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_registered", + "CALLS tests/test_mcp_update_and_search.py:0 -> test_update_repository_success", + "CALLS tests/test_mcp_write_file.py:0 -> anyio_backend", + "CALLS tests/test_mcp_write_file.py:0 -> mcp_registry", + "CALLS tests/test_mcp_write_file.py:0 -> temp_project_root", + "CALLS tests/test_mcp_write_file.py:0 -> test_overwrite_existing_file", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_empty_file", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_file_in_subdirectory", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_json_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_multiline_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_new_file", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_prevents_directory_traversal", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_python_code", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_to_readonly_directory", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_unicode_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_very_long_content", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_relative_path", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_spaces_in_filename", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_special_characters_in_filename", + "CALLS tests/test_mcp_write_file.py:0 -> test_write_with_various_file_extensions", + "CALLS tests/test_memgraph_batching.py:0 -> test_node_batch_flushes_when_threshold_reached", + "CALLS tests/test_memgraph_batching.py:0 -> test_node_batch_preserves_per_row_properties", + "CALLS tests/test_memgraph_batching.py:0 -> test_relationship_batch_flushes_after_threshold_and_respects_node_flush", + "CALLS tests/test_memory_limit.py:0 -> test_appends_hint_to_simple_query", + "CALLS tests/test_memory_limit.py:0 -> test_appends_hint_when_no_trailing_semicolon", + "CALLS tests/test_memory_limit.py:0 -> test_handles_multiline_query", + "CALLS tests/test_memory_limit.py:0 -> test_handles_trailing_whitespace", + "CALLS tests/test_memory_limit.py:0 -> test_handles_whitespace_before_semicolon", + "CALLS tests/test_memory_limit.py:0 -> test_preserves_existing_hint", + "CALLS tests/test_memory_limit.py:0 -> test_preserves_existing_hint_case_insensitive", + "CALLS tests/test_memory_limit.py:0 -> test_uses_configured_megabytes", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> _get_function_caller_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> _get_module_caller_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> _load", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> add", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> execute", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> parse", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> process", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> run", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> save", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_class_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_const_method_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_constructor_body_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_constructor_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_deeply_nested_call_chain", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_dunder_init_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_impl_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_calling_another_via_this_pointer", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_calling_free_function_has_method_caller", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_method_with_parameters", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_classes_in_one_file", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_impl_methods", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_methods_calling_each_other", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_multiple_methods_with_types", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_simple_class_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_static_method_calls", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_struct_method_calls_method", + "CALLS tests/test_method_calls_caller_attribution.py:0 -> test_virtual_method_calls", + "CALLS tests/test_model_switching.py:0 -> mock_console", + "CALLS tests/test_model_switching.py:0 -> mock_settings", + "CALLS tests/test_model_switching.py:0 -> test_assertion_error_is_caught", + "CALLS tests/test_model_switching.py:0 -> test_deferred_results_passed_only_after_approval", + "CALLS tests/test_model_switching.py:0 -> test_empty_model_id_raises_error", + "CALLS tests/test_model_switching.py:0 -> test_empty_provider_raises_error", + "CALLS tests/test_model_switching.py:0 -> test_help_command", + "CALLS tests/test_model_switching.py:0 -> test_invalid_provider_raises_error", + "CALLS tests/test_model_switching.py:0 -> test_missing_colon_raises_format_error", + "CALLS tests/test_model_switching.py:0 -> test_model_command_prefix", + "CALLS tests/test_model_switching.py:0 -> test_model_creation_error_shows_error_message", + "CALLS tests/test_model_switching.py:0 -> test_model_help_case_insensitive", + "CALLS tests/test_model_switching.py:0 -> test_model_help_preserves_current_model", + "CALLS tests/test_model_switching.py:0 -> test_model_help_shows_usage", + "CALLS tests/test_model_switching.py:0 -> test_model_override_none_by_default", + "CALLS tests/test_model_switching.py:0 -> test_model_override_passed_to_agent_run", + "CALLS tests/test_model_switching.py:0 -> test_multimodal_user_prompt_not_resent_after_approval", + "CALLS tests/test_model_switching.py:0 -> test_multiple_switches_in_sequence", + "CALLS tests/test_model_switching.py:0 -> test_ollama_provider_uses_local_endpoint", + "CALLS tests/test_model_switching.py:0 -> test_preserves_previous_model_on_show", + "CALLS tests/test_model_switching.py:0 -> test_same_provider_uses_current_config", + "CALLS tests/test_model_switching.py:0 -> test_show_current_model_when_no_argument", + "CALLS tests/test_model_switching.py:0 -> test_show_current_model_with_trailing_space", + "CALLS tests/test_model_switching.py:0 -> test_show_default_model_when_no_override", + "CALLS tests/test_model_switching.py:0 -> test_switch_model_with_extra_whitespace", + "CALLS tests/test_model_switching.py:0 -> test_switch_then_show_preserves_model", + "CALLS tests/test_model_switching.py:0 -> test_switch_to_new_model", + "CALLS tests/test_model_switching.py:0 -> test_ui_messages_exist", + "CALLS tests/test_model_switching.py:0 -> test_ui_model_current_format", + "CALLS tests/test_model_switching.py:0 -> test_ui_model_switched_format", + "CALLS tests/test_model_switching.py:0 -> test_user_prompt_not_resent_across_multiple_deferred_rounds", + "CALLS tests/test_model_switching.py:0 -> test_user_prompt_not_resent_after_deferred_tool_approval", + "CALLS tests/test_model_switching.py:0 -> test_user_prompt_passed_on_first_call_when_no_deferred", + "CALLS tests/test_model_switching.py:0 -> test_value_error_is_caught", + "CALLS tests/test_model_switching.py:0 -> test_whitespace_around_colon_is_stripped", + "CALLS tests/test_module_call_attribution.py:0 -> load", + "CALLS tests/test_module_call_attribution.py:0 -> main", + "CALLS tests/test_module_call_attribution.py:0 -> test_bare_module_level_call_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_cpp_file_scope_initializer_call_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_default_argument_call_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_nested_call_not_attributed_to_module", + "CALLS tests/test_module_call_attribution.py:0 -> test_top_level_call_is_attributed_to_module", + "CALLS tests/test_module_qn_language_collision.py:0 -> test_same_stem_files_get_distinct_module_qns", + "CALLS tests/test_module_qn_language_collision.py:0 -> test_same_stem_methods_do_not_collide", + "CALLS tests/test_multi_project.py:0 -> mock_memgraph_connect", + "CALLS tests/test_multi_project.py:0 -> mock_sync_path", + "CALLS tests/test_multi_project.py:0 -> mock_validate_models", + "CALLS tests/test_multi_project.py:0 -> test_all_empty_falls_back_to_default", + "CALLS tests/test_multi_project.py:0 -> test_drops_empty_entries", + "CALLS tests/test_multi_project.py:0 -> test_multiple_projects_comma_separated", + "CALLS tests/test_multi_project.py:0 -> test_multiple_projects_lists_all", + "CALLS tests/test_multi_project.py:0 -> test_no_projects_lists_list_projects_hint", + "CALLS tests/test_multi_project.py:0 -> test_returns_default_for_empty_string", + "CALLS tests/test_multi_project.py:0 -> test_returns_default_when_no_projects_flag", + "CALLS tests/test_multi_project.py:0 -> test_single_project_in_flag", + "CALLS tests/test_multi_project.py:0 -> test_single_project_mentions_starts_with", + "CALLS tests/test_multi_project.py:0 -> test_start_default_projects_uses_derived_name", + "CALLS tests/test_multi_project.py:0 -> test_start_passes_projects_to_single_query", + "CALLS tests/test_multi_project.py:0 -> test_strips_whitespace", + "CALLS tests/test_multilang_import_parsing.py:0 -> main", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_go_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_java_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_javascript_import_parsing", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_rust_complex_import_patterns", + "CALLS tests/test_multilang_import_parsing.py:0 -> test_rust_import_parsing", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_c_raises_keyboard_interrupt", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_e_submits_after_multiline_with_enter", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_e_submits_buffer", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_j_submits_after_multiline_with_enter", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_ctrl_j_submits_buffer", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_hint_mentions_both_submit_shortcuts", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_keybinding_enum_has_submit_shortcuts", + "CALLS tests/test_multiline_input_keybindings.py:0 -> test_result_is_stripped", + "CALLS tests/test_nested_function_defines.py:0 -> dfs", + "CALLS tests/test_nested_function_defines.py:0 -> ensure_node_batch", + "CALLS tests/test_nested_function_defines.py:0 -> ensure_relationship_batch", + "CALLS tests/test_nested_function_defines.py:0 -> execute_write", + "CALLS tests/test_nested_function_defines.py:0 -> fetch_all", + "CALLS tests/test_nested_function_defines.py:0 -> flush_all", + "CALLS tests/test_nested_function_defines.py:0 -> test_class_in_method_defined_by_method", + "CALLS tests/test_nested_function_defines.py:0 -> test_function_in_function_defined_by_function", + "CALLS tests/test_nested_function_defines.py:0 -> test_function_in_method_defined_by_method", + "CALLS tests/test_nested_function_qualified_names.py:0 -> execute", + "CALLS tests/test_nested_function_qualified_names.py:0 -> loader", + "CALLS tests/test_nested_function_qualified_names.py:0 -> nested_functions_project", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_arrow_functions_in_constructors", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_commonjs_exports_in_functions", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_export_functions_in_modules", + "CALLS tests/test_nested_function_qualified_names.py:0 -> test_object_methods_inside_functions", + "CALLS tests/test_node_relationship_coverage.py:0 -> NodeLabel", + "CALLS tests/test_node_relationship_coverage.py:0 -> UniqueKeyType", + "CALLS tests/test_node_relationship_coverage.py:0 -> capture_query", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_all_node_labels_have_unique_key_mapping", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_all_node_labels_in_constraints", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_all_node_types_in_constraints", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_label_can_be_flushed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_label_has_constraint", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_type_can_be_flushed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_node_type_has_constraint", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_each_relationship_type_can_be_flushed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_ensure_constraints_creates_all_constraints", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_ensure_constraints_creates_all_indexes", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_import_time_validation_catches_missing_keys", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_name_unique_key_uses_correct_property", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_label_count_matches_constraints_count", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_type_is_subset_of_node_label", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_unique_constraints_derived_from_single_source", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_node_without_unique_key_is_skipped_not_crashed", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_path_unique_key_uses_correct_property", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_qualified_name_unique_key_uses_correct_property", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_relationship_type_values_match_names", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_relationship_types_are_uppercase", + "CALLS tests/test_node_relationship_coverage.py:0 -> test_unique_key_types_are_valid", + "CALLS tests/test_node_relationship_coverage.py:0 -> value", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> execute_write", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> fetch_all", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> flush_all", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_builtin_container_produces_no_dunder_edge", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_contains_operator_dispatches_to_dunder", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_len_dispatches_to_dunder", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_subscript_read_dispatches_to_getitem", + "CALLS tests/test_operator_dispatch_resolution.py:0 -> test_subscript_write_dispatches_to_setitem", + "CALLS tests/test_oracle_nested_defs.py:0 -> test_oracle_captures_function_in_except_handler", + "CALLS tests/test_oracle_nested_defs.py:0 -> test_oracle_captures_function_in_match_case", + "CALLS tests/test_permission_mode.py:0 -> test_cycle_toggles_back_to_normal", + "CALLS tests/test_permission_mode.py:0 -> test_cycle_toggles_to_yolo", + "CALLS tests/test_permission_mode.py:0 -> test_default_mode_is_normal", + "CALLS tests/test_php_containment_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_containment_edges", + "CALLS tests/test_php_functions.py:0 -> add", + "CALLS tests/test_php_functions.py:0 -> main", + "CALLS tests/test_php_functions.py:0 -> test_php_class_discovery", + "CALLS tests/test_php_functions.py:0 -> test_php_function_discovery", + "CALLS tests/test_php_functions.py:0 -> test_php_method_calls", + "CALLS tests/test_php_imports.py:0 -> index", + "CALLS tests/test_php_imports.py:0 -> test_php_multiple_use_statements", + "CALLS tests/test_php_imports.py:0 -> test_php_use_statement_import", + "CALLS tests/test_php_inheritance_edges.py:0 -> test_php_inheritance_and_implements_edges", + "CALLS tests/test_php_inheritance_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_inheritance_edges", + "CALLS tests/test_php_span_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_node_spans", + "CALLS tests/test_php_structure_oracle.py:0 -> test_cgr_matches_php_parser_oracle_on_php_structure", + "CALLS tests/test_processor_factory.py:0 -> factory", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_call_processor_initializes_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_call_processor_initializes_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_call_processor_initializes_type_inference", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_definition_processor_initializes_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_type_inference_initializes_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_accessing_type_inference_initializes_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_class_inheritance_from_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_receives_type_inference", + "CALLS tests/test_processor_factory.py:0 -> test_call_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_class_inheritance_is_shared_between_type_inference_and_call_processor", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_receives_simple_name_lookup", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_definition_processor_shares_module_qn_to_file_path", + "CALLS tests/test_processor_factory.py:0 -> test_function_registry_is_shared_across_processors", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_import_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_module_qn_to_file_path_is_shared_dict", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_receives_queries", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_structure_processor_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_initialized_on_first_access", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_is_correct_type", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_not_initialized_on_factory_creation", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_ast_cache", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_class_inheritance_from_definition_processor", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_function_registry", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_import_processor", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_project_name", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_queries", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_repo_path", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_receives_simple_name_lookup", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_returns_same_instance", + "CALLS tests/test_processor_factory.py:0 -> test_type_inference_shares_module_qn_to_file_path", + "CALLS tests/test_project_name_flag.py:0 -> main", + "CALLS tests/test_project_name_flag.py:0 -> parsers_and_queries", + "CALLS tests/test_project_name_flag.py:0 -> test_call_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_class_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_default_empty_string_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_default_none_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_default_qualified_names_use_directory", + "CALLS tests/test_project_name_flag.py:0 -> test_default_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_default_whitespace_only_uses_directory_name", + "CALLS tests/test_project_name_flag.py:0 -> test_definition_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_factory_default_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_factory_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_function_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_generic_dir_name_main", + "CALLS tests/test_project_name_flag.py:0 -> test_generic_dir_name_src", + "CALLS tests/test_project_name_flag.py:0 -> test_import_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_module_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_nested_same_name_parent", + "CALLS tests/test_project_name_flag.py:0 -> test_override_simple", + "CALLS tests/test_project_name_flag.py:0 -> test_override_vs_default_different_names", + "CALLS tests/test_project_name_flag.py:0 -> test_override_with_dots", + "CALLS tests/test_project_name_flag.py:0 -> test_override_with_hyphens", + "CALLS tests/test_project_name_flag.py:0 -> test_package_qualified_names_use_override", + "CALLS tests/test_project_name_flag.py:0 -> test_structure_processor_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_type_inference_receives_project_name", + "CALLS tests/test_project_name_flag.py:0 -> test_version_named_directory", + "CALLS tests/test_project_naming.py:0 -> repo", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_disambiguates_same_basename", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_fallback_for_root", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_includes_basename", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_is_stable", + "CALLS tests/test_project_naming.py:0 -> test_derive_project_name_slugifies_special_chars", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_dot_falls_back_to_cwd", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_empty_falls_back_to_cwd", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_explicit_wins", + "CALLS tests/test_project_naming.py:0 -> test_resolve_repo_path_uses_target_default", + "CALLS tests/test_property_getter_calls.py:0 -> ensure_node_batch", + "CALLS tests/test_property_getter_calls.py:0 -> ensure_relationship_batch", + "CALLS tests/test_property_getter_calls.py:0 -> execute_write", + "CALLS tests/test_property_getter_calls.py:0 -> fetch_all", + "CALLS tests/test_property_getter_calls.py:0 -> flush_all", + "CALLS tests/test_property_getter_calls.py:0 -> status", + "CALLS tests/test_property_getter_calls.py:0 -> test_property_access_only_emits_the_getter_edge", + "CALLS tests/test_property_getter_calls.py:0 -> test_property_access_via_self_is_a_call", + "CALLS tests/test_property_getter_calls.py:0 -> test_property_access_via_typed_param_is_a_call", + "CALLS tests/test_property_getter_calls.py:0 -> test_regular_method_call_is_unaffected", + "CALLS tests/test_property_return_type_chain.py:0 -> ensure_node_batch", + "CALLS tests/test_property_return_type_chain.py:0 -> ensure_relationship_batch", + "CALLS tests/test_property_return_type_chain.py:0 -> execute_write", + "CALLS tests/test_property_return_type_chain.py:0 -> fetch_all", + "CALLS tests/test_property_return_type_chain.py:0 -> flush_all", + "CALLS tests/test_property_return_type_chain.py:0 -> test_chained_call_through_property_resolves_to_return_type_class", + "CALLS tests/test_property_return_type_chain.py:0 -> test_does_not_resolve_to_same_class_method_of_same_name", + "CALLS tests/test_protobuf_end_to_end.py:0 -> add", + "CALLS tests/test_protobuf_end_to_end.py:0 -> test_comprehensive_pipeline_produces_valid_artifact_joint", + "CALLS tests/test_protobuf_end_to_end.py:0 -> test_comprehensive_pipeline_produces_valid_artifacts_split_index", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_node_batch_no_message_class_logs_warning", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_node_batch_no_oneof_mapping_logs_warning", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_dedup", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_dedup_with_properties_merge", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_invalid_empty_source", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_invalid_empty_target", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_none_values", + "CALLS tests/test_protobuf_service.py:0 -> test_ensure_relationship_batch_unknown_rel_type", + "CALLS tests/test_protobuf_service.py:0 -> test_protobuf_ingestor_joint_serialization_and_deserialization", + "CALLS tests/test_protobuf_service.py:0 -> test_protobuf_ingestor_split_index_serialization_and_deserialization", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> LanguageHandler", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> execute_write", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> extract_decorators", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> fetch_all", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> flush_all", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> get_handler", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> test_dispatches_to_all_conformers", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> test_dispatches_to_concrete_conformer", + "CALLS tests/test_protocol_dispatch_resolution.py:0 -> test_does_not_emit_protocol_stub_edge", + "CALLS tests/test_protocol_impl_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_protocol_impl_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_protocol_impl_resolution.py:0 -> execute_write", + "CALLS tests/test_protocol_impl_resolution.py:0 -> fetch_all", + "CALLS tests/test_protocol_impl_resolution.py:0 -> flush_all", + "CALLS tests/test_protocol_impl_resolution.py:0 -> get", + "CALLS tests/test_protocol_impl_resolution.py:0 -> test_does_not_resolve_to_protocol_stub", + "CALLS tests/test_protocol_impl_resolution.py:0 -> test_naming_convention_disambiguates_from_other_conformer", + "CALLS tests/test_protocol_impl_resolution.py:0 -> test_protocol_typed_call_resolves_to_concrete_implementer", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> ensure_node_batch", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> ensure_relationship_batch", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> execute_write", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> fetch_all", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> flush_all", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> snapshot", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> test_dunder_only_on_implementer_resolves", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> test_protocol_stub_not_emitted", + "CALLS tests/test_protocol_operator_dispatch.py:0 -> test_subscript_and_membership_reach_structural_conformer", + "CALLS tests/test_provider_classes.py:0 -> CustomProvider", + "CALLS tests/test_provider_classes.py:0 -> provider_name", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_api_key_from_env", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_model_enables_prompt_caching", + "CALLS tests/test_provider_classes.py:0 -> test_anthropic_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_azure_api_key_from_env", + "CALLS tests/test_provider_classes.py:0 -> test_azure_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_azure_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_azure_validation_error_no_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_azure_validation_error_no_key", + "CALLS tests/test_provider_classes.py:0 -> test_get_invalid_provider", + "CALLS tests/test_provider_classes.py:0 -> test_get_litellm_provider", + "CALLS tests/test_provider_classes.py:0 -> test_get_valid_providers", + "CALLS tests/test_provider_classes.py:0 -> test_google_gla_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_google_gla_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_google_model_creation_with_thinking_budget", + "CALLS tests/test_provider_classes.py:0 -> test_google_model_creation_without_thinking_budget", + "CALLS tests/test_provider_classes.py:0 -> test_google_thinking_budget", + "CALLS tests/test_provider_classes.py:0 -> test_google_vertex_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_google_vertex_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_list_providers", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_default_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_no_endpoint_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_connection_error", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_fallback_to_models_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_server_not_running", + "CALLS tests/test_provider_classes.py:0 -> test_litellm_validation_success", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_custom_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_validation_connection_error", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_validation_server_not_running", + "CALLS tests/test_provider_classes.py:0 -> test_ollama_validation_success", + "CALLS tests/test_provider_classes.py:0 -> test_openai_configuration", + "CALLS tests/test_provider_classes.py:0 -> test_openai_custom_endpoint", + "CALLS tests/test_provider_classes.py:0 -> test_openai_model_creation", + "CALLS tests/test_provider_classes.py:0 -> test_openai_validation_error", + "CALLS tests/test_provider_classes.py:0 -> test_register_custom_provider", + "CALLS tests/test_provider_configuration.py:0 -> test_bare_model_name_parsing", + "CALLS tests/test_provider_configuration.py:0 -> test_batch_size_validation", + "CALLS tests/test_provider_configuration.py:0 -> test_custom_model_names_with_colons", + "CALLS tests/test_provider_configuration.py:0 -> test_default_fallback_behavior", + "CALLS tests/test_provider_configuration.py:0 -> test_explicit_provider_configuration_from_env", + "CALLS tests/test_provider_configuration.py:0 -> test_google_vertex_ai_configuration", + "CALLS tests/test_provider_configuration.py:0 -> test_mixed_provider_configuration", + "CALLS tests/test_provider_configuration.py:0 -> test_ollama_env_vars_respected_not_ignored", + "CALLS tests/test_provider_configuration.py:0 -> test_openai_custom_endpoint", + "CALLS tests/test_provider_configuration.py:0 -> test_runtime_provider_override", + "CALLS tests/test_provider_configuration.py:0 -> test_thinking_budget_configuration", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> add", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> engine", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> engine_with_queries", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> import_processor", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> mock_ast_cache", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> mock_function_registry", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> process", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> python_parser", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_async_function_parameters", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_classmethod_with_cls_parameter", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_decorated_function", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_empty_function_body", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_for_loop_with_list_literal", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_for_loop_with_range", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_for_loop_with_tuple_unpacking", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_args_kwargs", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_complex_type_hints", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_default_values", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_only_docstring", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_typed_parameters", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_function_with_walrus_operator", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_generator_function", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_init_with_self_assignments", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_lambda_in_function", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_list_comprehension_variable", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_method_accessing_instance_vars", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_method_with_all_variable_types", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_method_with_self_parameter", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_nested_for_loops", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_nested_list_comprehension", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_traverse_with_query_path", + "CALLS tests/test_py_variable_analyzer_integration.py:0 -> test_traverse_with_query_path_caches_return_stmts", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> engine", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> mock_ast_cache", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> mock_function_registry", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> mock_import_processor", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_collects_classes_from_registry", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_collects_imported_classes", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_contains_match_returns_scaled_score", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_exact_match_case_insensitive", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_exact_match_returns_100", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_class_from_call_in_list", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_identifier", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_loop_variable_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_extracts_typed_parameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_falls_back_to_method_return", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_finds_exact_match", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_finds_suffix_match", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_matches_all_prefix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_matches_plural_suffix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_no_match_returns_zero", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_prefers_exact_over_suffix", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_empty_for_no_classes", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_known_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_list", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_empty_text", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_no_match", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_non_collection_name", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_returns_none_for_non_identifier", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_routes_identifier_to_untyped", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_routes_typed_default_parameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_routes_typed_parameter", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_list_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_lowercase_function", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_missing_left_node", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_missing_name", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_skips_missing_type", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_suffix_match_class_ends_with_param", + "CALLS tests/test_py_variable_analyzer_unit.py:0 -> test_suffix_match_param_ends_with_class", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> mock_updater", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_chooses_closest_candidate", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_with_mixed_function_types", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_with_no_candidates", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_fallback_with_single_candidate", + "CALLS tests/test_python_call_resolution_fallback.py:0 -> test_same_module_resolution_bypasses_fallback", + "CALLS tests/test_python_class_method_relationship.py:0 -> my_method", + "CALLS tests/test_python_class_method_relationship.py:0 -> temp_project", + "CALLS tests/test_python_class_method_relationship.py:0 -> test_defines_method_relationship_is_created", + "CALLS tests/test_python_context_managers.py:0 -> context_manager_project", + "CALLS tests/test_python_context_managers.py:0 -> execute", + "CALLS tests/test_python_context_managers.py:0 -> process", + "CALLS tests/test_python_context_managers.py:0 -> process_file", + "CALLS tests/test_python_context_managers.py:0 -> read", + "CALLS tests/test_python_context_managers.py:0 -> save", + "CALLS tests/test_python_context_managers.py:0 -> test_async_context_manager_parsing", + "CALLS tests/test_python_context_managers.py:0 -> test_context_manager_function_calls", + "CALLS tests/test_python_context_managers.py:0 -> test_context_manager_function_definitions", + "CALLS tests/test_python_context_managers.py:0 -> test_context_manager_in_control_structures", + "CALLS tests/test_python_context_managers.py:0 -> test_custom_context_manager_class", + "CALLS tests/test_python_context_managers.py:0 -> test_decorated_context_manager_function", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> clear", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> deep_hierarchy_project", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> get", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> load", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> main", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> run", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> save", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> singleton_project", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> start", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> test_chained_cross_file_calls", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> test_deep_package_hierarchy_cross_file_calls", + "CALLS tests/test_python_cross_file_advanced_patterns.py:0 -> test_singleton_pattern_cross_file_calls", + "CALLS tests/test_python_decorators.py:0 -> decorator", + "CALLS tests/test_python_decorators.py:0 -> decorator_project", + "CALLS tests/test_python_decorators.py:0 -> test_class_decorators", + "CALLS tests/test_python_decorators.py:0 -> test_decorator_with_complex_arguments", + "CALLS tests/test_python_decorators.py:0 -> test_empty_decorators_for_undecorated_functions", + "CALLS tests/test_python_decorators.py:0 -> test_method_decorators", + "CALLS tests/test_python_decorators.py:0 -> test_nested_function_decorators", + "CALLS tests/test_python_decorators.py:0 -> test_simple_function_decorators", + "CALLS tests/test_python_decorators.py:0 -> timing_decorator", + "CALLS tests/test_python_decorators.py:0 -> value", + "CALLS tests/test_python_decorators.py:0 -> wrapper", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> class_method_project", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> main", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> test_cross_file_object_method_chaining", + "CALLS tests/test_python_imported_class_method_calls.py:0 -> test_imported_class_method_calls_are_detected", + "CALLS tests/test_python_imports.py:0 -> python_imports_project", + "CALLS tests/test_python_imports.py:0 -> test_complex_import_patterns", + "CALLS tests/test_python_imports.py:0 -> test_import_aliases_and_renaming", + "CALLS tests/test_python_imports.py:0 -> test_import_error_handling", + "CALLS tests/test_python_imports.py:0 -> test_import_relationships_comprehensive", + "CALLS tests/test_python_imports.py:0 -> test_relative_imports", + "CALLS tests/test_python_imports.py:0 -> test_standard_library_imports", + "CALLS tests/test_python_imports.py:0 -> test_third_party_framework_imports", + "CALLS tests/test_python_inheritance.py:0 -> inheritance_project", + "CALLS tests/test_python_inheritance.py:0 -> mro_diamond_project", + "CALLS tests/test_python_inheritance.py:0 -> test_asymmetric_diamond_mro", + "CALLS tests/test_python_inheritance.py:0 -> test_complex_multiple_inheritance_mro", + "CALLS tests/test_python_inheritance.py:0 -> test_deep_diamond_chain_mro", + "CALLS tests/test_python_inheritance.py:0 -> test_deep_inheritance_chain", + "CALLS tests/test_python_inheritance.py:0 -> test_diamond_inheritance_mro_basic", + "CALLS tests/test_python_inheritance.py:0 -> test_diamond_inheritance_mro_override_at_point", + "CALLS tests/test_python_inheritance.py:0 -> test_inheritance_relationships_are_created", + "CALLS tests/test_python_inheritance.py:0 -> test_inherited_method_calls_are_resolved", + "CALLS tests/test_python_inheritance.py:0 -> test_method_overrides_are_detected", + "CALLS tests/test_python_inheritance.py:0 -> test_mro_nearest_override_selection", + "CALLS tests/test_python_inheritance.py:0 -> test_multiple_inheritance_is_handled", + "CALLS tests/test_python_inheritance.py:0 -> test_super_calls_are_tracked", + "CALLS tests/test_python_nested_functions.py:0 -> decorator", + "CALLS tests/test_python_nested_functions.py:0 -> main", + "CALLS tests/test_python_nested_functions.py:0 -> nested_functions_project", + "CALLS tests/test_python_nested_functions.py:0 -> test_function_calls_are_tracked", + "CALLS tests/test_python_nested_functions.py:0 -> test_function_in_class_method", + "CALLS tests/test_python_nested_functions.py:0 -> test_nested_function_definitions_are_created", + "CALLS tests/test_python_nested_functions.py:0 -> test_nested_function_in_staticmethod_not_ingested_as_method", + "CALLS tests/test_python_nested_functions.py:0 -> test_nested_function_parent_child_relationships", + "CALLS tests/test_python_nested_functions.py:0 -> wrapper", + "CALLS tests/test_python_real_world.py:0 -> add", + "CALLS tests/test_python_real_world.py:0 -> execute", + "CALLS tests/test_python_real_world.py:0 -> get", + "CALLS tests/test_python_real_world.py:0 -> put", + "CALLS tests/test_python_real_world.py:0 -> render", + "CALLS tests/test_python_real_world.py:0 -> test_api_service_calls", + "CALLS tests/test_python_real_world.py:0 -> test_cross_language_api_structure", + "CALLS tests/test_python_real_world.py:0 -> test_flask_controller_imports", + "CALLS tests/test_python_real_world.py:0 -> test_flask_no_calls_to_class_nodes", + "CALLS tests/test_python_real_world.py:0 -> test_flask_route_controller_calls", + "CALLS tests/test_python_real_world.py:0 -> test_schema_inheritance_detection", + "CALLS tests/test_python_real_world.py:0 -> test_typescript_hook_usage", + "CALLS tests/test_python_real_world.py:0 -> test_typescript_structure_detection", + "CALLS tests/test_python_real_world.py:0 -> todo_app_project", + "CALLS tests/test_python_relative_import_resolution.py:0 -> mock_updater", + "CALLS tests/test_python_relative_import_resolution.py:0 -> name", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_double_dot_relative_import", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_complex_module_path", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_edge_case_shallow_module", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_to_package_root", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_relative_import_without_module_name", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_single_dot_relative_import", + "CALLS tests/test_python_relative_import_resolution.py:0 -> test_triple_dot_relative_import", + "CALLS tests/test_python_return_type_inference.py:0 -> return_type_project", + "CALLS tests/test_python_return_type_inference.py:0 -> test_basic_return_type_inference", + "CALLS tests/test_python_return_type_inference.py:0 -> test_fluent_interface_return_types", + "CALLS tests/test_python_return_type_inference.py:0 -> test_loop_variable_return_types", + "CALLS tests/test_python_return_type_inference.py:0 -> test_nested_return_type_inference", + "CALLS tests/test_python_return_type_inference.py:0 -> test_service_method_return_types", + "CALLS tests/test_python_span_oracle.py:0 -> test_cgr_matches_ast_oracle_on_python_node_spans", + "CALLS tests/test_python_standard_library_imports.py:0 -> mock_updater", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_aliased_import_local_module", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_aliased_import_standard_library", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_already_prefixed_imports_unchanged", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_local_file_imports_are_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_local_module_imports_are_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_nested_local_module_imports", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_regular_import_dotted_local_module", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_regular_import_local_module", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_regular_import_standard_library", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_standard_library_imports_not_prefixed", + "CALLS tests/test_python_standard_library_imports.py:0 -> test_third_party_imports_not_prefixed", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> mock_updater", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_cpp_using_namespace_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_exact_import_priority_over_wildcard", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_fallback_still_works_after_wildcard_check", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_go_no_wildcard_imports", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_java_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_javascript_namespace_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_multiple_wildcard_imports", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_python_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_rust_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_scala_wildcard_import_resolution", + "CALLS tests/test_python_wildcard_import_resolution.py:0 -> test_wildcard_with_no_matching_function", + "CALLS tests/test_query_truncation.py:0 -> mock_cypher_gen", + "CALLS tests/test_query_truncation.py:0 -> mock_ingestor", + "CALLS tests/test_query_truncation.py:0 -> test_no_truncation_when_within_limits", + "CALLS tests/test_query_truncation.py:0 -> test_row_cap_truncation", + "CALLS tests/test_query_truncation.py:0 -> test_token_truncation", + "CALLS tests/test_realtime_debounce.py:0 -> _patch_ignore", + "CALLS tests/test_realtime_debounce.py:0 -> mock_ingestor", + "CALLS tests/test_realtime_debounce.py:0 -> mock_updater", + "CALLS tests/test_realtime_debounce.py:0 -> sample_file", + "CALLS tests/test_realtime_debounce.py:0 -> send_events", + "CALLS tests/test_realtime_debounce.py:0 -> test_created_event_triggers_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_debounce_batches_rapid_events", + "CALLS tests/test_realtime_debounce.py:0 -> test_deleted_event_triggers_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_different_files_tracked_separately", + "CALLS tests/test_realtime_debounce.py:0 -> test_dispatch_ignores_directories", + "CALLS tests/test_realtime_debounce.py:0 -> test_handler_initialization_with_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_handler_initialization_without_debounce", + "CALLS tests/test_realtime_debounce.py:0 -> test_handler_uses_default_constants", + "CALLS tests/test_realtime_debounce.py:0 -> test_is_relevant_filters_ignored_patterns", + "CALLS tests/test_realtime_debounce.py:0 -> test_max_wait_forces_update", + "CALLS tests/test_realtime_debounce.py:0 -> test_no_debounce_processes_immediately", + "CALLS tests/test_realtime_debounce.py:0 -> test_realistic_rapid_save_scenario", + "CALLS tests/test_realtime_debounce.py:0 -> test_single_edit_after_quiet_period", + "CALLS tests/test_realtime_debounce.py:0 -> test_thread_safety_concurrent_events", + "CALLS tests/test_realtime_debounce.py:0 -> test_timer_cleanup_after_processing", + "CALLS tests/test_realtime_debounce.py:0 -> test_validate_non_negative_float_accepts_positive", + "CALLS tests/test_realtime_debounce.py:0 -> test_validate_non_negative_float_accepts_zero", + "CALLS tests/test_realtime_debounce.py:0 -> test_validate_non_negative_float_rejects_negative", + "CALLS tests/test_realtime_event_filtering.py:0 -> _AnyProtocol", + "CALLS tests/test_realtime_event_filtering.py:0 -> _bypass_protocol_check", + "CALLS tests/test_realtime_event_filtering.py:0 -> handler", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_access_event_is_ignored", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_closed_no_write_event_is_ignored", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_created_event_is_processed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_delete_file_only_targets_specific_path", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_deleted_event_is_processed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_json_file_creates_file_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_markdown_file_creates_file_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_modified_event_is_processed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_multiple_files_changed", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_non_code_file_deletion_removes_file_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_non_code_file_has_no_module_node", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_opened_event_is_ignored", + "CALLS tests/test_realtime_event_filtering.py:0 -> test_rapid_create_modify_delete", + "CALLS tests/test_realtime_updater.py:0 -> _AnyProtocol", + "CALLS tests/test_realtime_updater.py:0 -> _bypass_protocol_check", + "CALLS tests/test_realtime_updater.py:0 -> event_handler", + "CALLS tests/test_realtime_updater.py:0 -> test_directory_creation_is_ignored", + "CALLS tests/test_realtime_updater.py:0 -> test_file_creation_flow", + "CALLS tests/test_realtime_updater.py:0 -> test_file_deletion_flow", + "CALLS tests/test_realtime_updater.py:0 -> test_file_modification_flow", + "CALLS tests/test_realtime_updater.py:0 -> test_irrelevant_files_are_ignored", + "CALLS tests/test_realtime_updater.py:0 -> test_non_code_files_create_file_nodes", + "CALLS tests/test_reconcile_embeddings.py:0 -> log_messages", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_handles_verify_fn_exception", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_logs_ok_when_all_found", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_logs_warning_when_ids_missing", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_noop_when_expected_empty", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_sample_ids_in_warning", + "CALLS tests/test_reconcile_embeddings.py:0 -> test_sample_limited_to_ten", + "CALLS tests/test_reconcile_embeddings.py:0 -> updater", + "CALLS tests/test_reexport_chain_resolution.py:0 -> build_local_variable_type_map", + "CALLS tests/test_reexport_chain_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_reexport_chain_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_reexport_chain_resolution.py:0 -> execute_write", + "CALLS tests/test_reexport_chain_resolution.py:0 -> export", + "CALLS tests/test_reexport_chain_resolution.py:0 -> fetch_all", + "CALLS tests/test_reexport_chain_resolution.py:0 -> flush_all", + "CALLS tests/test_reexport_chain_resolution.py:0 -> test_does_not_collapse_to_caller_same_named_method", + "CALLS tests/test_reexport_chain_resolution.py:0 -> test_property_typed_by_reexport_resolves_to_real_class", + "CALLS tests/test_relative_import_package_init.py:0 -> ensure_node_batch", + "CALLS tests/test_relative_import_package_init.py:0 -> ensure_relationship_batch", + "CALLS tests/test_relative_import_package_init.py:0 -> execute_write", + "CALLS tests/test_relative_import_package_init.py:0 -> fetch_all", + "CALLS tests/test_relative_import_package_init.py:0 -> flush_all", + "CALLS tests/test_relative_import_package_init.py:0 -> test_from_dot_import_in_package_init_targets_own_submodule", + "CALLS tests/test_relative_import_root_level.py:0 -> ensure_node_batch", + "CALLS tests/test_relative_import_root_level.py:0 -> ensure_relationship_batch", + "CALLS tests/test_relative_import_root_level.py:0 -> execute_write", + "CALLS tests/test_relative_import_root_level.py:0 -> fetch_all", + "CALLS tests/test_relative_import_root_level.py:0 -> flush_all", + "CALLS tests/test_relative_import_root_level.py:0 -> test_from_dot_import_submodule_at_root", + "CALLS tests/test_retrieval_eval.py:0 -> repo", + "CALLS tests/test_retrieval_eval.py:0 -> run", + "CALLS tests/test_retrieval_eval.py:0 -> test_cgr_call_edges_smoke", + "CALLS tests/test_retrieval_eval.py:0 -> test_grep_call_excludes_bare_reference_but_flags_def_site", + "CALLS tests/test_retrieval_eval.py:0 -> test_grep_name_overincludes_vs_oracle", + "CALLS tests/test_retrieval_eval.py:0 -> test_grep_preserves_colon_in_path", + "CALLS tests/test_retrieval_eval.py:0 -> test_oracle_captures_first_party_calls", + "CALLS tests/test_retrieval_eval.py:0 -> test_score_retrieval_computes_prf", + "CALLS tests/test_rust.py:0 -> Color", + "CALLS tests/test_rust.py:0 -> add", + "CALLS tests/test_rust.py:0 -> get", + "CALLS tests/test_rust.py:0 -> insert", + "CALLS tests/test_rust.py:0 -> keys", + "CALLS tests/test_rust.py:0 -> main", + "CALLS tests/test_rust.py:0 -> parse", + "CALLS tests/test_rust.py:0 -> process", + "CALLS tests/test_rust.py:0 -> processor", + "CALLS tests/test_rust.py:0 -> read", + "CALLS tests/test_rust.py:0 -> read_file_content", + "CALLS tests/test_rust.py:0 -> rust_project", + "CALLS tests/test_rust.py:0 -> save", + "CALLS tests/test_rust.py:0 -> test_basic_rust_functions", + "CALLS tests/test_rust.py:0 -> test_rust_advanced_edge_cases", + "CALLS tests/test_rust.py:0 -> test_rust_closures_and_lambdas", + "CALLS tests/test_rust.py:0 -> test_rust_comprehensive_integration", + "CALLS tests/test_rust.py:0 -> test_rust_error_handling", + "CALLS tests/test_rust.py:0 -> test_rust_generics_and_lifetimes", + "CALLS tests/test_rust.py:0 -> test_rust_imports_and_use_statements", + "CALLS tests/test_rust.py:0 -> test_rust_macros", + "CALLS tests/test_rust.py:0 -> test_rust_modules_and_crates", + "CALLS tests/test_rust.py:0 -> test_rust_pattern_matching", + "CALLS tests/test_rust.py:0 -> test_rust_structs_enums_unions", + "CALLS tests/test_rust.py:0 -> test_rust_traits_and_implementations", + "CALLS tests/test_rust.py:0 -> value", + "CALLS tests/test_rust_advanced_types.py:0 -> close", + "CALLS tests/test_rust_advanced_types.py:0 -> execute", + "CALLS tests/test_rust_advanced_types.py:0 -> get", + "CALLS tests/test_rust_advanced_types.py:0 -> process", + "CALLS tests/test_rust_advanced_types.py:0 -> rust_advanced_types_project", + "CALLS tests/test_rust_advanced_types.py:0 -> test_advanced_associated_types", + "CALLS tests/test_rust_advanced_types.py:0 -> test_const_generics_advanced", + "CALLS tests/test_rust_advanced_types.py:0 -> test_higher_ranked_trait_bounds", + "CALLS tests/test_rust_advanced_types.py:0 -> test_phantom_types_and_markers", + "CALLS tests/test_rust_advanced_types.py:0 -> test_type_level_programming", + "CALLS tests/test_rust_call_recall.py:0 -> name", + "CALLS tests/test_rust_call_recall.py:0 -> test_bare_identifier_in_macro_is_not_a_call", + "CALLS tests/test_rust_call_recall.py:0 -> test_call_inside_macro_is_captured", + "CALLS tests/test_rust_call_recall.py:0 -> test_struct_literal_in_macro_is_not_a_call", + "CALLS tests/test_rust_call_recall.py:0 -> test_turbofish_call_is_captured", + "CALLS tests/test_rust_call_recall.py:0 -> value", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> run", + "CALLS tests/test_rust_closure_containment_oracle.py:0 -> test_cgr_matches_syn_oracle_on_closure_containment", + "CALLS tests/test_rust_closure_method_defines.py:0 -> run", + "CALLS tests/test_rust_closure_method_defines.py:0 -> test_rust_closure_in_impl_method_defined_by_method", + "CALLS tests/test_rust_closures_functions.py:0 -> add", + "CALLS tests/test_rust_closures_functions.py:0 -> get", + "CALLS tests/test_rust_closures_functions.py:0 -> handler", + "CALLS tests/test_rust_closures_functions.py:0 -> insert", + "CALLS tests/test_rust_closures_functions.py:0 -> operation", + "CALLS tests/test_rust_closures_functions.py:0 -> process", + "CALLS tests/test_rust_closures_functions.py:0 -> processor", + "CALLS tests/test_rust_closures_functions.py:0 -> rust_closures_project", + "CALLS tests/test_rust_closures_functions.py:0 -> test_async_closures_and_futures", + "CALLS tests/test_rust_closures_functions.py:0 -> test_basic_closures_and_captures", + "CALLS tests/test_rust_closures_functions.py:0 -> test_function_pointers_and_types", + "CALLS tests/test_rust_closures_functions.py:0 -> test_higher_order_functions", + "CALLS tests/test_rust_closures_functions.py:0 -> value", + "CALLS tests/test_rust_collections_iterators.py:0 -> get", + "CALLS tests/test_rust_collections_iterators.py:0 -> insert", + "CALLS tests/test_rust_collections_iterators.py:0 -> keys", + "CALLS tests/test_rust_collections_iterators.py:0 -> parse", + "CALLS tests/test_rust_collections_iterators.py:0 -> rust_collections_project", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_functional_programming", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_hashmap_operations", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_iterator_patterns", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_other_collections", + "CALLS tests/test_rust_collections_iterators.py:0 -> test_vector_operations", + "CALLS tests/test_rust_concurrency_async.py:0 -> insert", + "CALLS tests/test_rust_concurrency_async.py:0 -> load", + "CALLS tests/test_rust_concurrency_async.py:0 -> main", + "CALLS tests/test_rust_concurrency_async.py:0 -> read", + "CALLS tests/test_rust_concurrency_async.py:0 -> rust_concurrency_project", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_async_await_basics", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_atomic_operations", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_basic_threads", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_message_passing_channels", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_parallel_computing", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_shared_state_mutex", + "CALLS tests/test_rust_concurrency_async.py:0 -> test_tokio_async_runtime", + "CALLS tests/test_rust_containment_oracle.py:0 -> test_cgr_matches_syn_oracle_on_containment_edges", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> clear", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> get", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> insert", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> load", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> main", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> rust_singleton_project", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> save", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> start", + "CALLS tests/test_rust_cross_file_singleton.py:0 -> test_rust_singleton_pattern_cross_file_calls", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> get", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> insert", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> load", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> operation", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> parse", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> rust_error_project", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_custom_error_types", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_error_handling_patterns", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_error_propagation", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_panic_handling", + "CALLS tests/test_rust_error_handling_advanced.py:0 -> test_result_option_basics", + "CALLS tests/test_rust_impl_primitive_target.py:0 -> test_rust_method_on_primitive_impl_target_is_captured", + "CALLS tests/test_rust_inheritance_edges.py:0 -> test_rust_impl_and_supertrait_edges", + "CALLS tests/test_rust_inheritance_oracle.py:0 -> test_cgr_matches_syn_oracle_on_inheritance_edges", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> get", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> get_data", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> insert", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> process", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> processor", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> rust_lifetimes_project", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_borrowing_edge_cases", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_complex_lifetime_relationships", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_lifetime_elision_rules", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> test_lifetime_variance_and_subtyping", + "CALLS tests/test_rust_lifetimes_advanced.py:0 -> updater", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> get", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> insert", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> name", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> parse", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> rust_macros_project", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_advanced_macro_patterns", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_declarative_macros_basic", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_derive_macros_custom", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_macro_usage_patterns", + "CALLS tests/test_rust_macros_metaprogramming.py:0 -> test_procedural_macros", + "CALLS tests/test_rust_memory_management.py:0 -> main", + "CALLS tests/test_rust_memory_management.py:0 -> rust_memory_project", + "CALLS tests/test_rust_memory_management.py:0 -> test_drop_trait_cleanup", + "CALLS tests/test_rust_memory_management.py:0 -> test_lifetimes_explicit", + "CALLS tests/test_rust_memory_management.py:0 -> test_memory_layout_optimization", + "CALLS tests/test_rust_memory_management.py:0 -> test_ownership_borrowing_basic", + "CALLS tests/test_rust_memory_management.py:0 -> test_reference_counting", + "CALLS tests/test_rust_memory_management.py:0 -> test_smart_pointers", + "CALLS tests/test_rust_memory_management.py:0 -> test_unsafe_code_patterns", + "CALLS tests/test_rust_modules_visibility.py:0 -> add", + "CALLS tests/test_rust_modules_visibility.py:0 -> execute", + "CALLS tests/test_rust_modules_visibility.py:0 -> insert", + "CALLS tests/test_rust_modules_visibility.py:0 -> process", + "CALLS tests/test_rust_modules_visibility.py:0 -> processor", + "CALLS tests/test_rust_modules_visibility.py:0 -> read_file", + "CALLS tests/test_rust_modules_visibility.py:0 -> rust_modules_project", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_advanced_visibility_patterns", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_basic_module_declarations", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_complex_use_statements", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_conditional_compilation", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_module_attributes_and_cfg", + "CALLS tests/test_rust_modules_visibility.py:0 -> test_module_path_resolution", + "CALLS tests/test_rust_nested_module_containment.py:0 -> test_rust_impl_method_in_module_binds_to_nested_type", + "CALLS tests/test_rust_nested_module_containment.py:0 -> test_rust_nested_module_is_module_nested", + "CALLS tests/test_rust_node_type.py:0 -> rust_node_type_project", + "CALLS tests/test_rust_node_type.py:0 -> test_determine_node_type_rust", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_enum_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_struct_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_trait_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_type_alias_label", + "CALLS tests/test_rust_node_type.py:0 -> test_rust_union_label", + "CALLS tests/test_rust_pattern_matching.py:0 -> get", + "CALLS tests/test_rust_pattern_matching.py:0 -> insert", + "CALLS tests/test_rust_pattern_matching.py:0 -> rust_pattern_project", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_advanced_if_let_while_let", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_advanced_macro_patterns", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_destructuring_patterns", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_exhaustive_enum_matching", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_macro_pattern_matching", + "CALLS tests/test_rust_pattern_matching.py:0 -> test_pattern_guards_and_ranges", + "CALLS tests/test_rust_performance_optimization.py:0 -> add", + "CALLS tests/test_rust_performance_optimization.py:0 -> get", + "CALLS tests/test_rust_performance_optimization.py:0 -> insert", + "CALLS tests/test_rust_performance_optimization.py:0 -> load", + "CALLS tests/test_rust_performance_optimization.py:0 -> run", + "CALLS tests/test_rust_performance_optimization.py:0 -> rust_performance_project", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_benchmarking_patterns", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_memory_optimization", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_parallel_processing_rayon", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_profiling_optimization_tools", + "CALLS tests/test_rust_performance_optimization.py:0 -> test_simd_vectorization", + "CALLS tests/test_rust_smart_pointers.py:0 -> clear", + "CALLS tests/test_rust_smart_pointers.py:0 -> get", + "CALLS tests/test_rust_smart_pointers.py:0 -> insert", + "CALLS tests/test_rust_smart_pointers.py:0 -> keys", + "CALLS tests/test_rust_smart_pointers.py:0 -> name", + "CALLS tests/test_rust_smart_pointers.py:0 -> process", + "CALLS tests/test_rust_smart_pointers.py:0 -> read", + "CALLS tests/test_rust_smart_pointers.py:0 -> rust_smart_pointers_project", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_arc_atomic_reference_counting", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_box_pointer_patterns", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_custom_smart_pointers", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_rc_reference_counting", + "CALLS tests/test_rust_smart_pointers.py:0 -> test_refcell_interior_mutability", + "CALLS tests/test_rust_span_oracle.py:0 -> test_cgr_matches_syn_oracle_on_node_spans", + "CALLS tests/test_rust_structs_enums.py:0 -> Color", + "CALLS tests/test_rust_structs_enums.py:0 -> get", + "CALLS tests/test_rust_structs_enums.py:0 -> insert", + "CALLS tests/test_rust_structs_enums.py:0 -> rust_structs_project", + "CALLS tests/test_rust_structs_enums.py:0 -> test_basic_struct_definitions", + "CALLS tests/test_rust_structs_enums.py:0 -> test_complex_struct_relationships", + "CALLS tests/test_rust_structs_enums.py:0 -> test_enum_definitions_and_variants", + "CALLS tests/test_rust_structs_enums.py:0 -> test_enum_pattern_matching_advanced", + "CALLS tests/test_rust_structs_enums.py:0 -> test_pattern_matching_destructuring", + "CALLS tests/test_rust_structs_enums.py:0 -> test_struct_derive_attributes", + "CALLS tests/test_rust_structure_oracle.py:0 -> test_cgr_matches_syn_oracle_on_rust_structure", + "CALLS tests/test_rust_trait_method_containment.py:0 -> test_rust_trait_method_defined_by_interface_node", + "CALLS tests/test_rust_trait_objects.py:0 -> analyze", + "CALLS tests/test_rust_trait_objects.py:0 -> execute", + "CALLS tests/test_rust_trait_objects.py:0 -> get", + "CALLS tests/test_rust_trait_objects.py:0 -> get_data", + "CALLS tests/test_rust_trait_objects.py:0 -> get_summary", + "CALLS tests/test_rust_trait_objects.py:0 -> insert", + "CALLS tests/test_rust_trait_objects.py:0 -> name", + "CALLS tests/test_rust_trait_objects.py:0 -> process", + "CALLS tests/test_rust_trait_objects.py:0 -> render", + "CALLS tests/test_rust_trait_objects.py:0 -> rust_trait_objects_project", + "CALLS tests/test_rust_trait_objects.py:0 -> test_advanced_trait_object_patterns", + "CALLS tests/test_rust_trait_objects.py:0 -> test_basic_trait_objects", + "CALLS tests/test_rust_trait_objects.py:0 -> test_dynamic_dispatch_performance", + "CALLS tests/test_rust_trait_objects.py:0 -> test_object_safety_patterns", + "CALLS tests/test_rust_traits_generics.py:0 -> add", + "CALLS tests/test_rust_traits_generics.py:0 -> clear", + "CALLS tests/test_rust_traits_generics.py:0 -> get", + "CALLS tests/test_rust_traits_generics.py:0 -> insert", + "CALLS tests/test_rust_traits_generics.py:0 -> load", + "CALLS tests/test_rust_traits_generics.py:0 -> name", + "CALLS tests/test_rust_traits_generics.py:0 -> parse", + "CALLS tests/test_rust_traits_generics.py:0 -> process", + "CALLS tests/test_rust_traits_generics.py:0 -> processor", + "CALLS tests/test_rust_traits_generics.py:0 -> rust_traits_project", + "CALLS tests/test_rust_traits_generics.py:0 -> test_associated_types_and_constants", + "CALLS tests/test_rust_traits_generics.py:0 -> test_basic_trait_definitions", + "CALLS tests/test_rust_traits_generics.py:0 -> test_generic_types_and_constraints", + "CALLS tests/test_rust_traits_generics.py:0 -> test_higher_ranked_trait_bounds", + "CALLS tests/test_rust_traits_generics.py:0 -> test_trait_objects_and_dynamic_dispatch", + "CALLS tests/test_rust_traits_generics.py:0 -> value", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> add", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> get", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> load", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> rust_unsafe_project", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_extern_c_functions", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_inline_assembly", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_raw_pointers_and_dereferencing", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_unsafe_traits_and_implementations", + "CALLS tests/test_rust_unsafe_ffi.py:0 -> test_unsafe_unions_and_transmute", + "CALLS tests/test_rust_utils.py:0 -> main", + "CALLS tests/test_rust_utils.py:0 -> process", + "CALLS tests/test_rust_utils.py:0 -> test_aliased_import", + "CALLS tests/test_rust_utils.py:0 -> test_aliased_imports_tracked", + "CALLS tests/test_rust_utils.py:0 -> test_complex_nested_imports_integration", + "CALLS tests/test_rust_utils.py:0 -> test_crate_import", + "CALLS tests/test_rust_utils.py:0 -> test_deeply_nested_path", + "CALLS tests/test_rust_utils.py:0 -> test_empty_group", + "CALLS tests/test_rust_utils.py:0 -> test_function_in_module", + "CALLS tests/test_rust_utils.py:0 -> test_function_in_nested_modules", + "CALLS tests/test_rust_utils.py:0 -> test_function_with_class_node_types", + "CALLS tests/test_rust_utils.py:0 -> test_grouped_imports", + "CALLS tests/test_rust_utils.py:0 -> test_impl_methods_have_correct_qualified_names", + "CALLS tests/test_rust_utils.py:0 -> test_impl_scoped_type", + "CALLS tests/test_rust_utils.py:0 -> test_impl_trait_for_type", + "CALLS tests/test_rust_utils.py:0 -> test_impl_with_generic", + "CALLS tests/test_rust_utils.py:0 -> test_imports_create_relationships", + "CALLS tests/test_rust_utils.py:0 -> test_method_in_impl_with_target", + "CALLS tests/test_rust_utils.py:0 -> test_method_in_impl_without_target", + "CALLS tests/test_rust_utils.py:0 -> test_mixed_self_and_items_in_group", + "CALLS tests/test_rust_utils.py:0 -> test_multiple_nested_groups", + "CALLS tests/test_rust_utils.py:0 -> test_nested_grouped_imports", + "CALLS tests/test_rust_utils.py:0 -> test_non_impl_node_returns_none", + "CALLS tests/test_rust_utils.py:0 -> test_non_use_node_returns_empty", + "CALLS tests/test_rust_utils.py:0 -> test_self_alias_in_group", + "CALLS tests/test_rust_utils.py:0 -> test_self_import", + "CALLS tests/test_rust_utils.py:0 -> test_simple_impl", + "CALLS tests/test_rust_utils.py:0 -> test_simple_import", + "CALLS tests/test_rust_utils.py:0 -> test_super_import", + "CALLS tests/test_rust_utils.py:0 -> test_super_super_import", + "CALLS tests/test_rust_utils.py:0 -> test_top_level_function", + "CALLS tests/test_rust_utils.py:0 -> test_wildcard_import", + "CALLS tests/test_rust_utils.py:0 -> test_wildcard_imports_tracked", + "CALLS tests/test_rust_web_networking.py:0 -> execute", + "CALLS tests/test_rust_web_networking.py:0 -> get", + "CALLS tests/test_rust_web_networking.py:0 -> insert", + "CALLS tests/test_rust_web_networking.py:0 -> keys", + "CALLS tests/test_rust_web_networking.py:0 -> put", + "CALLS tests/test_rust_web_networking.py:0 -> run", + "CALLS tests/test_rust_web_networking.py:0 -> rust_web_project", + "CALLS tests/test_rust_web_networking.py:0 -> save", + "CALLS tests/test_rust_web_networking.py:0 -> status", + "CALLS tests/test_rust_web_networking.py:0 -> test_database_orm_patterns", + "CALLS tests/test_rust_web_networking.py:0 -> test_http_client_requests", + "CALLS tests/test_rust_web_networking.py:0 -> test_json_api_serialization", + "CALLS tests/test_rust_web_networking.py:0 -> test_web_server_axum", + "CALLS tests/test_rust_web_networking.py:0 -> test_websockets_realtime", + "CALLS tests/test_rust_web_networking.py:0 -> text", + "CALLS tests/test_semantic_search.py:0 -> mock_embed_code", + "CALLS tests/test_semantic_search.py:0 -> mock_ingestor", + "CALLS tests/test_semantic_search.py:0 -> mock_search_embeddings", + "CALLS tests/test_semantic_search.py:0 -> test_create_get_function_source_tool_returns_tool", + "CALLS tests/test_semantic_search.py:0 -> test_create_semantic_search_tool_returns_tool", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_handles_exception", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_returns_none_on_invalid_location", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_returns_none_when_not_found", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_code_returns_source", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_tool_handles_not_found", + "CALLS tests/test_semantic_search.py:0 -> test_get_function_source_tool_returns_source", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_calls_embed_code_with_query", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_handles_exception", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_passes_top_k_to_search", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_preserves_score_order", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_returns_empty_when_no_matches", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_returns_empty_without_dependencies", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_code_search_returns_formatted_results", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_search_tool_formats_results", + "CALLS tests/test_semantic_search.py:0 -> test_semantic_search_tool_handles_no_results", + "CALLS tests/test_shell_command.py:0 -> anyio_backend", + "CALLS tests/test_shell_command.py:0 -> shell_commander", + "CALLS tests/test_shell_command.py:0 -> temp_project_root", + "CALLS tests/test_shell_command.py:0 -> test_absolute_system_dir", + "CALLS tests/test_shell_command.py:0 -> test_all_read_only_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_and_operator", + "CALLS tests/test_shell_command.py:0 -> test_and_operator_short_circuit", + "CALLS tests/test_shell_command.py:0 -> test_append_redirect", + "CALLS tests/test_shell_command.py:0 -> test_append_redirect_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_awk_getline_detected", + "CALLS tests/test_shell_command.py:0 -> test_awk_getline_rejected", + "CALLS tests/test_shell_command.py:0 -> test_awk_system_call_detected", + "CALLS tests/test_shell_command.py:0 -> test_awk_system_rejected", + "CALLS tests/test_shell_command.py:0 -> test_backtick_subshell_rejected", + "CALLS tests/test_shell_command.py:0 -> test_backtick_substitution", + "CALLS tests/test_shell_command.py:0 -> test_blocked_command_execution", + "CALLS tests/test_shell_command.py:0 -> test_bypass_allowlist_skips_allowlist_error", + "CALLS tests/test_shell_command.py:0 -> test_bypass_allowlist_still_blocks_dangerous_rm", + "CALLS tests/test_shell_command.py:0 -> test_chmod_777_root", + "CALLS tests/test_shell_command.py:0 -> test_combined_flags_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_command_not_in_allowlist", + "CALLS tests/test_shell_command.py:0 -> test_command_substitution", + "CALLS tests/test_shell_command.py:0 -> test_common_commands_in_allowlist", + "CALLS tests/test_shell_command.py:0 -> test_creates_tool_instance", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_command", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_command_as_second_in_pipe", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_command_in_pipe_rejected", + "CALLS tests/test_shell_command.py:0 -> test_dangerous_pattern_in_pipeline", + "CALLS tests/test_shell_command.py:0 -> test_dd_to_device", + "CALLS tests/test_shell_command.py:0 -> test_destructive_commands_blocked", + "CALLS tests/test_shell_command.py:0 -> test_disk_operations_blocked", + "CALLS tests/test_shell_command.py:0 -> test_dollar_in_variable", + "CALLS tests/test_shell_command.py:0 -> test_dot_dot_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_double_quoted_subshell_rejected", + "CALLS tests/test_shell_command.py:0 -> test_empty_command", + "CALLS tests/test_shell_command.py:0 -> test_empty_segment", + "CALLS tests/test_shell_command.py:0 -> test_escaped_operators_in_quotes", + "CALLS tests/test_shell_command.py:0 -> test_escaped_quote_bypass_detected", + "CALLS tests/test_shell_command.py:0 -> test_execute_cat_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_command_not_in_allowlist", + "CALLS tests/test_shell_command.py:0 -> test_execute_command_with_stderr", + "CALLS tests/test_shell_command.py:0 -> test_execute_dangerous_command_rejected", + "CALLS tests/test_shell_command.py:0 -> test_execute_echo_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_empty_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_grep_suggests_rg", + "CALLS tests/test_shell_command.py:0 -> test_execute_ls_command", + "CALLS tests/test_shell_command.py:0 -> test_execute_pwd_command", + "CALLS tests/test_shell_command.py:0 -> test_find_with_wc", + "CALLS tests/test_shell_command.py:0 -> test_flags_with_other_options", + "CALLS tests/test_shell_command.py:0 -> test_heredoc", + "CALLS tests/test_shell_command.py:0 -> test_heredoc_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_init_custom_timeout", + "CALLS tests/test_shell_command.py:0 -> test_init_default_timeout", + "CALLS tests/test_shell_command.py:0 -> test_init_resolves_project_root", + "CALLS tests/test_shell_command.py:0 -> test_input_redirect", + "CALLS tests/test_shell_command.py:0 -> test_input_redirect_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_invalid_command_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_invalid_syntax", + "CALLS tests/test_shell_command.py:0 -> test_invalid_syntax_rejected", + "CALLS tests/test_shell_command.py:0 -> test_kernel_module_commands_blocked", + "CALLS tests/test_shell_command.py:0 -> test_leading_operator", + "CALLS tests/test_shell_command.py:0 -> test_mixed_quote_styles", + "CALLS tests/test_shell_command.py:0 -> test_multiple_dangerous_commands_all_rejected", + "CALLS tests/test_shell_command.py:0 -> test_multiple_operators_in_sequence", + "CALLS tests/test_shell_command.py:0 -> test_no_redirect", + "CALLS tests/test_shell_command.py:0 -> test_no_subshell", + "CALLS tests/test_shell_command.py:0 -> test_non_rm_commands_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_or_operator", + "CALLS tests/test_shell_command.py:0 -> test_or_operator_short_circuit", + "CALLS tests/test_shell_command.py:0 -> test_other_commands_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_output_redirect", + "CALLS tests/test_shell_command.py:0 -> test_output_redirect_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_path_outside_project", + "CALLS tests/test_shell_command.py:0 -> test_pipe_in_single_quotes", + "CALLS tests/test_shell_command.py:0 -> test_pipe_with_disallowed_command", + "CALLS tests/test_shell_command.py:0 -> test_python_os_import_detected", + "CALLS tests/test_shell_command.py:0 -> test_read_only_command_no_approval_needed", + "CALLS tests/test_shell_command.py:0 -> test_read_only_commands_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_read_only_with_args_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_read_only_without_redirect_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_relative_path_bypass_blocked", + "CALLS tests/test_shell_command.py:0 -> test_relative_path_to_system_dir", + "CALLS tests/test_shell_command.py:0 -> test_remote_script_execution", + "CALLS tests/test_shell_command.py:0 -> test_rg_in_pipeline", + "CALLS tests/test_shell_command.py:0 -> test_rm_fr_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_outside_project_blocked", + "CALLS tests/test_shell_command.py:0 -> test_rm_rf_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_rf_is_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_system_directory", + "CALLS tests/test_shell_command.py:0 -> test_rm_without_force_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_rm_without_rf_is_not_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_root_directory", + "CALLS tests/test_shell_command.py:0 -> test_safe_awk_allowed", + "CALLS tests/test_shell_command.py:0 -> test_safe_awk_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_commands_not_blocked", + "CALLS tests/test_shell_command.py:0 -> test_safe_git_subcommands_no_approval", + "CALLS tests/test_shell_command.py:0 -> test_safe_path_inside_project", + "CALLS tests/test_shell_command.py:0 -> test_safe_pipeline_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_sed_allowed", + "CALLS tests/test_shell_command.py:0 -> test_safe_sed_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_segment_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_safe_xargs_allowed", + "CALLS tests/test_shell_command.py:0 -> test_safe_xargs_not_flagged", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_alternate_delimiters", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_flag_any_position", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_flag_detected", + "CALLS tests/test_shell_command.py:0 -> test_sed_execute_rejected", + "CALLS tests/test_shell_command.py:0 -> test_semicolon_operator", + "CALLS tests/test_shell_command.py:0 -> test_separate_r_f_flags", + "CALLS tests/test_shell_command.py:0 -> test_simple_pipe", + "CALLS tests/test_shell_command.py:0 -> test_single_quoted_subshell_pattern_allowed", + "CALLS tests/test_shell_command.py:0 -> test_subshell_in_double_quotes_detected", + "CALLS tests/test_shell_command.py:0 -> test_subshell_in_single_quotes_not_detected", + "CALLS tests/test_shell_command.py:0 -> test_subshell_outside_quotes_detected", + "CALLS tests/test_shell_command.py:0 -> test_subshell_rejected", + "CALLS tests/test_shell_command.py:0 -> test_system_control_blocked", + "CALLS tests/test_shell_command.py:0 -> test_tool_has_correct_name", + "CALLS tests/test_shell_command.py:0 -> test_tool_has_description", + "CALLS tests/test_shell_command.py:0 -> test_trailing_and", + "CALLS tests/test_shell_command.py:0 -> test_trailing_pipe", + "CALLS tests/test_shell_command.py:0 -> test_unsafe_git_subcommands_require_approval", + "CALLS tests/test_shell_command.py:0 -> test_valid_command", + "CALLS tests/test_shell_command.py:0 -> test_wildcard_dangerous", + "CALLS tests/test_shell_command.py:0 -> test_write_command_in_pipe_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_write_command_requires_approval", + "CALLS tests/test_shell_command.py:0 -> test_write_command_with_approval", + "CALLS tests/test_shell_command.py:0 -> test_write_commands_require_approval", + "CALLS tests/test_shell_command.py:0 -> test_xargs_chmod_detected", + "CALLS tests/test_shell_command.py:0 -> test_xargs_chmod_rejected", + "CALLS tests/test_shell_command.py:0 -> test_xargs_rm_detected", + "CALLS tests/test_shell_command.py:0 -> test_xargs_rm_rejected", + "CALLS tests/test_shell_command.py:0 -> test_yolo_runs_non_allowlist_command", + "CALLS tests/test_shell_command.py:0 -> test_yolo_skips_approval_for_write_command", + "CALLS tests/test_shell_command.py:0 -> test_yolo_still_blocks_dangerous_rm_rf", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> _infer_instance_variable_types_from_assignments", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> execute_write", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> fetch_all", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> flush_all", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> name", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> test_does_not_resolve_to_decoy_class", + "CALLS tests/test_sibling_mixin_resolution.py:0 -> test_self_call_resolves_to_sibling_mixin_method", + "CALLS tests/test_single_file_repo_path.py:0 -> cpp_single_file", + "CALLS tests/test_single_file_repo_path.py:0 -> main", + "CALLS tests/test_single_file_repo_path.py:0 -> ran_single_file_updater", + "CALLS tests/test_single_file_repo_path.py:0 -> test_directory_repo_path_still_works", + "CALLS tests/test_single_file_repo_path.py:0 -> test_single_file_repo_path_out_of_class_methods", + "CALLS tests/test_single_file_repo_path.py:0 -> test_single_file_repo_path_produces_graph", + "CALLS tests/test_single_file_repo_path.py:0 -> test_single_file_repo_path_static_functions", + "CALLS tests/test_single_query_output_format.py:0 -> mock_agent_stack", + "CALLS tests/test_single_query_output_format.py:0 -> test_default_format_prints_plain_text", + "CALLS tests/test_single_query_output_format.py:0 -> test_json_format_preserves_non_ascii", + "CALLS tests/test_single_query_output_format.py:0 -> test_json_format_without_ask_agent_exits_with_error", + "CALLS tests/test_single_query_output_format.py:0 -> test_json_format_wraps_query_and_response", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_cache_maxsize", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_decode_bytes", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_decode_caches", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_handler_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_handler_no_instance_dict", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_no_instance_dict", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_parser_has_slots", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_parser_no_instance_dict", + "CALLS tests/test_slots_and_optimizations.py:0 -> test_protocol_has_slots", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_class_has_slots", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_code_retriever_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_command_group_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_command_group_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_directory_lister_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_directory_lister_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_file_reader_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_file_writer_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_google_provider_inherits_config_slot", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_google_provider_instance_has_all_attrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_graph_loader_has_slots", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_health_checker_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_health_checker_rejects_attr", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_no_eager_debug_format", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_ollama_provider_inherits_config_slot", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_ollama_provider_instance_has_all_attrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_openai_provider_inherits_config_slot", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_openai_provider_instance_has_all_attrs", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_shell_commander_no_dict", + "CALLS tests/test_slots_lazy_logger.py:0 -> test_shell_commander_rejects_attr", + "CALLS tests/test_source_extraction.py:0 -> mock_ast_extractor", + "CALLS tests/test_source_extraction.py:0 -> test_clamps_end_line_returns_partial_content", + "CALLS tests/test_source_extraction.py:0 -> test_clamps_when_end_exceeds_file_length", + "CALLS tests/test_source_extraction.py:0 -> test_converts_string_to_path", + "CALLS tests/test_source_extraction.py:0 -> test_counts_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_across_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_all_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_multiple_lines", + "CALLS tests/test_source_extraction.py:0 -> test_extracts_single_line", + "CALLS tests/test_source_extraction.py:0 -> test_falls_back_to_lines_when_ast_extractor_raises", + "CALLS tests/test_source_extraction.py:0 -> test_falls_back_to_lines_when_ast_extractor_returns_none", + "CALLS tests/test_source_extraction.py:0 -> test_handles_empty_file", + "CALLS tests/test_source_extraction.py:0 -> test_handles_empty_string_path", + "CALLS tests/test_source_extraction.py:0 -> test_handles_windows_style_path", + "CALLS tests/test_source_extraction.py:0 -> test_line_count_matches_with_many_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_preserves_indentation", + "CALLS tests/test_source_extraction.py:0 -> test_preserves_internal_blank_lines", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_all_are_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_end_line_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_file_path_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_false_when_start_line_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_negative_start_line", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_nonexistent_file", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_start_greater_than_end", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_zero_end_line", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_for_zero_start_line", + "CALLS tests/test_source_extraction.py:0 -> test_returns_none_when_start_exceeds_file_length", + "CALLS tests/test_source_extraction.py:0 -> test_returns_true_for_valid_location", + "CALLS tests/test_source_extraction.py:0 -> test_skips_ast_when_extractor_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_skips_ast_when_qualified_name_is_none", + "CALLS tests/test_source_extraction.py:0 -> test_strips_trailing_whitespace", + "CALLS tests/test_source_extraction.py:0 -> test_uses_ast_extractor_when_provided", + "CALLS tests/test_source_extraction.py:0 -> test_uses_line_extraction_when_no_ast_extractor", + "CALLS tests/test_stack_manager.py:0 -> fake_run", + "CALLS tests/test_stack_manager.py:0 -> fake_up", + "CALLS tests/test_stack_manager.py:0 -> memgraph_check", + "CALLS tests/test_stack_manager.py:0 -> qdrant_check", + "CALLS tests/test_stack_manager.py:0 -> stack_home", + "CALLS tests/test_stack_manager.py:0 -> test_check_docker_raises_when_compose_missing", + "CALLS tests/test_stack_manager.py:0 -> test_check_docker_raises_when_daemon_down", + "CALLS tests/test_stack_manager.py:0 -> test_check_docker_raises_when_docker_not_on_path", + "CALLS tests/test_stack_manager.py:0 -> test_compose_cmd_uses_project_and_file", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_compose_file_copies_when_missing", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_compose_file_preserves_existing", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_compose_file_raises_when_source_missing", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_running_skips_docker_when_already_up", + "CALLS tests/test_stack_manager.py:0 -> test_ensure_running_starts_when_stopped", + "CALLS tests/test_stack_manager.py:0 -> test_status_returns_partial_when_only_memgraph_reachable", + "CALLS tests/test_stack_manager.py:0 -> test_status_returns_running_when_both_reachable", + "CALLS tests/test_stack_manager.py:0 -> test_status_returns_stopped_when_nothing_reachable", + "CALLS tests/test_stack_manager.py:0 -> test_up_propagates_failure", + "CALLS tests/test_stats_command.py:0 -> mock_node_results", + "CALLS tests/test_stats_command.py:0 -> mock_rel_results", + "CALLS tests/test_stats_command.py:0 -> runner", + "CALLS tests/test_stats_command.py:0 -> test_stats_displays_node_table", + "CALLS tests/test_stats_command.py:0 -> test_stats_displays_relationship_table", + "CALLS tests/test_stats_command.py:0 -> test_stats_displays_totals", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_connection_error", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_empty_graph", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_empty_labels", + "CALLS tests/test_stats_command.py:0 -> test_stats_handles_multi_label_nodes", + "CALLS tests/test_status_bar_config.py:0 -> fake_run", + "CALLS tests/test_status_bar_config.py:0 -> reset_session", + "CALLS tests/test_status_bar_config.py:0 -> test_abbreviated_repo_handles_none", + "CALLS tests/test_status_bar_config.py:0 -> test_abbreviated_repo_keeps_absolute_for_outside_paths", + "CALLS tests/test_status_bar_config.py:0 -> test_abbreviated_repo_uses_tilde_for_home_paths", + "CALLS tests/test_status_bar_config.py:0 -> test_branch_appears_after_repo_when_inline", + "CALLS tests/test_status_bar_config.py:0 -> test_config_segments_always_shows_both_models", + "CALLS tests/test_status_bar_config.py:0 -> test_config_segments_reflects_session_flags", + "CALLS tests/test_status_bar_config.py:0 -> test_config_segments_shows_distinct_models", + "CALLS tests/test_status_bar_config.py:0 -> test_config_status_html_includes_model_and_repo", + "CALLS tests/test_status_bar_config.py:0 -> test_git_state_returns_none_when_target_missing", + "CALLS tests/test_status_bar_config.py:0 -> test_git_state_returns_none_without_target_repo", + "CALLS tests/test_status_bar_config.py:0 -> test_git_state_uses_target_repo_cwd", + "CALLS tests/test_status_bar_config.py:0 -> test_rich_status_bar_inlines_config_when_wide", + "CALLS tests/test_status_bar_config.py:0 -> test_rich_status_bar_wraps_config_when_narrow", + "CALLS tests/test_status_bar_config.py:0 -> test_status_bar_html_inlines_config_when_wide", + "CALLS tests/test_status_bar_config.py:0 -> test_status_bar_html_places_branch_after_repo_when_inline", + "CALLS tests/test_status_bar_config.py:0 -> test_status_bar_html_wraps_config_when_narrow", + "CALLS tests/test_stdlib_extractor.py:0 -> extractor", + "CALLS tests/test_stdlib_extractor.py:0 -> extractor_with_registry", + "CALLS tests/test_stdlib_extractor.py:0 -> reset_caches", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cache_stdlib_result_creates_entry", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cache_ttl_expiration", + "CALLS tests/test_stdlib_extractor.py:0 -> test_clear_stdlib_cache", + "CALLS tests/test_stdlib_extractor.py:0 -> test_clear_stdlib_cache_handles_unlink_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cpp_non_std_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_cpp_std_namespace_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_deeply_nested_path", + "CALLS tests/test_stdlib_extractor.py:0 -> test_empty_string", + "CALLS tests/test_stdlib_extractor.py:0 -> test_flush_stdlib_cache_calls_save", + "CALLS tests/test_stdlib_extractor.py:0 -> test_function_registry_entity_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_function_registry_none_handling", + "CALLS tests/test_stdlib_extractor.py:0 -> test_get_cached_stdlib_result_returns_cached_value", + "CALLS tests/test_stdlib_extractor.py:0 -> test_get_cached_stdlib_result_returns_none_for_missing", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_fallback_on_go_list_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_lowercase_entity_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_extractor_returns_package_on_successful_introspection", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_single_part_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_go_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_is_tool_available_caches_result", + "CALLS tests/test_stdlib_extractor.py:0 -> test_is_tool_available_returns_false_on_file_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_is_tool_available_returns_false_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_exception_suffix", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_builder_suffix", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_error_suffix", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_fallback_on_compile_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_fallback_on_file_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_java_uppercase_class", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_fallback_on_entity_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_fallback_on_json_decode_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_extractor_returns_module_on_successful_introspection", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_stdlib_lowercase_entity_without_node", + "CALLS tests/test_stdlib_extractor.py:0 -> test_js_stdlib_uppercase_entity_without_node", + "CALLS tests/test_stdlib_extractor.py:0 -> test_load_persistent_cache_handles_json_decode_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_load_persistent_cache_handles_missing_file", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_entity_in_stdlib_set", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_fallback_on_entity_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_fallback_on_lua_not_found", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_fallback_on_timeout", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_returns_module_on_successful_introspection", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_extractor_stdlib_module_in_set", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_stdlib_module_uppercase", + "CALLS tests/test_stdlib_extractor.py:0 -> test_lua_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_entity_is_module_not_class", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_entity_not_found_in_module", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_lowercase_entity_with_import_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_single_part_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_stdlib_lowercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_stdlib_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_python_uppercase_entity_with_import_failure", + "CALLS tests/test_stdlib_extractor.py:0 -> test_returns_correct_stats", + "CALLS tests/test_stdlib_extractor.py:0 -> test_returns_module_for_registered_class", + "CALLS tests/test_stdlib_extractor.py:0 -> test_returns_module_for_registered_function", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_deeply_nested", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_single_part_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_stdlib_all_uppercase", + "CALLS tests/test_stdlib_extractor.py:0 -> test_rust_stdlib_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_save_and_load_persistent_cache", + "CALLS tests/test_stdlib_extractor.py:0 -> test_save_persistent_cache_handles_os_error", + "CALLS tests/test_stdlib_extractor.py:0 -> test_scala_lowercase_returns_unchanged", + "CALLS tests/test_stdlib_extractor.py:0 -> test_scala_uppercase_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_single_part_path", + "CALLS tests/test_stdlib_extractor.py:0 -> test_ts_lowercase_strips_entity", + "CALLS tests/test_stdlib_extractor.py:0 -> test_ts_uses_js_extraction_uppercase", + "CALLS tests/test_structural_relationships.py:0 -> complex_project", + "CALLS tests/test_structural_relationships.py:0 -> dependency_project", + "CALLS tests/test_structural_relationships.py:0 -> main", + "CALLS tests/test_structural_relationships.py:0 -> test_contains_file_relationships", + "CALLS tests/test_structural_relationships.py:0 -> test_contains_folder_relationships", + "CALLS tests/test_structural_relationships.py:0 -> test_contains_package_relationships", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_cargo_toml", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_composer_json", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_csproj", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_gemfile", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_go_mod", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_package_json", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_pyproject_toml", + "CALLS tests/test_structural_relationships.py:0 -> test_depends_on_external_python_requirements", + "CALLS tests/test_structural_relationships.py:0 -> test_edge_cases_empty_folders_and_special_files", + "CALLS tests/test_structural_relationships.py:0 -> test_mixed_structure_and_dependencies", + "CALLS tests/test_structure_processor.py:0 -> mock_language_queries", + "CALLS tests/test_structure_processor.py:0 -> processor", + "CALLS tests/test_structure_processor.py:0 -> test_directory_with_init_py_identified_as_package", + "CALLS tests/test_structure_processor.py:0 -> test_directory_without_init_py_identified_as_folder", + "CALLS tests/test_structure_processor.py:0 -> test_empty_repo_creates_no_nodes", + "CALLS tests/test_structure_processor.py:0 -> test_file_at_root", + "CALLS tests/test_structure_processor.py:0 -> test_file_extension_extracted", + "CALLS tests/test_structure_processor.py:0 -> test_file_in_folder", + "CALLS tests/test_structure_processor.py:0 -> test_file_in_package", + "CALLS tests/test_structure_processor.py:0 -> test_file_without_extension", + "CALLS tests/test_structure_processor.py:0 -> test_folder_inside_package", + "CALLS tests/test_structure_processor.py:0 -> test_folder_parent_relationship_to_project", + "CALLS tests/test_structure_processor.py:0 -> test_has_slots", + "CALLS tests/test_structure_processor.py:0 -> test_ignored_directories_are_skipped", + "CALLS tests/test_structure_processor.py:0 -> test_multiple_package_indicators", + "CALLS tests/test_structure_processor.py:0 -> test_nested_ignored_directory_skipped", + "CALLS tests/test_structure_processor.py:0 -> test_nested_package_parent_relationship", + "CALLS tests/test_structure_processor.py:0 -> test_nested_packages", + "CALLS tests/test_structure_processor.py:0 -> test_no_instance_dict", + "CALLS tests/test_structure_processor.py:0 -> test_package_inside_folder", + "CALLS tests/test_structure_processor.py:0 -> test_package_parent_relationship_to_project", + "CALLS tests/test_structure_processor.py:0 -> test_rejects_arbitrary_attribute", + "CALLS tests/test_structure_processor.py:0 -> test_slot_attributes_accessible", + "CALLS tests/test_structure_processor.py:0 -> test_structural_elements_populated", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> start", + "CALLS tests/test_thenews_cross_file_calls.py:0 -> test_thenews_cross_file_method_calls_with_singleton_pattern", + "CALLS tests/test_token_utils.py:0 -> test_empty_results", + "CALLS tests/test_token_utils.py:0 -> test_empty_string", + "CALLS tests/test_token_utils.py:0 -> test_longer_string_has_more_tokens", + "CALLS tests/test_token_utils.py:0 -> test_preserves_row_order", + "CALLS tests/test_token_utils.py:0 -> test_results_exceed_limit", + "CALLS tests/test_token_utils.py:0 -> test_results_within_limit", + "CALLS tests/test_token_utils.py:0 -> test_simple_string", + "CALLS tests/test_token_utils.py:0 -> test_single_large_row_still_included", + "CALLS tests/test_token_utils.py:0 -> test_token_count_accuracy", + "CALLS tests/test_trie_optimization.py:0 -> graph_updater_with_trie", + "CALLS tests/test_trie_optimization.py:0 -> test_function_registry_trie_basic_operations", + "CALLS tests/test_trie_optimization.py:0 -> test_function_resolution_with_trie", + "CALLS tests/test_trie_optimization.py:0 -> test_trie_compatibility_with_existing_code", + "CALLS tests/test_trie_optimization.py:0 -> test_trie_performance_optimization", + "CALLS tests/test_trie_optimization.py:0 -> test_trie_prefix_and_suffix_search", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> ensure_node_batch", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> ensure_relationship_batch", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> execute_write", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> fetch_all", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> flush_all", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> test_bool_takes_precedence_over_len", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> test_boolean_operator_operand_dispatches_to_len", + "CALLS tests/test_truthiness_dispatch_resolution.py:0 -> test_if_truthiness_dispatches_to_len", + "CALLS tests/test_ts_closure_containment.py:0 -> test_function_in_anonymous_callback_defined_by_callback", + "CALLS tests/test_type_inference_iterative.py:0 -> child_by_field_name", + "CALLS tests/test_type_inference_iterative.py:0 -> children", + "CALLS tests/test_type_inference_iterative.py:0 -> engine", + "CALLS tests/test_type_inference_iterative.py:0 -> mock_node", + "CALLS tests/test_type_inference_iterative.py:0 -> test_analyze_self_assignments_handles_deep_tree_without_recursion_error", + "CALLS tests/test_type_inference_iterative.py:0 -> test_delegates_to_java_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_delegates_to_resolve_class_name_function", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_java_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_js_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_lua_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_python_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_dispatches_to_ts_engine", + "CALLS tests/test_type_inference_iterative.py:0 -> test_find_return_statements_handles_deep_tree_without_recursion_error", + "CALLS tests/test_type_inference_iterative.py:0 -> test_java_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_js_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_lua_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_python_type_inference_lazy_init", + "CALLS tests/test_type_inference_iterative.py:0 -> test_returns_empty_dict_for_unsupported_language", + "CALLS tests/test_type_inference_iterative.py:0 -> test_returns_none_when_class_not_found", + "CALLS tests/test_typescript_advanced_types.py:0 -> add", + "CALLS tests/test_typescript_advanced_types.py:0 -> get", + "CALLS tests/test_typescript_advanced_types.py:0 -> handler", + "CALLS tests/test_typescript_advanced_types.py:0 -> keys", + "CALLS tests/test_typescript_advanced_types.py:0 -> process", + "CALLS tests/test_typescript_advanced_types.py:0 -> put", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_conditional_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_generic_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_template_literal_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_typescript_advanced_types_comprehensive", + "CALLS tests/test_typescript_advanced_types.py:0 -> test_utility_types", + "CALLS tests/test_typescript_advanced_types.py:0 -> typescript_advanced_types_project", + "CALLS tests/test_typescript_classes.py:0 -> add", + "CALLS tests/test_typescript_classes.py:0 -> get", + "CALLS tests/test_typescript_classes.py:0 -> keys", + "CALLS tests/test_typescript_classes.py:0 -> process", + "CALLS tests/test_typescript_classes.py:0 -> processor", + "CALLS tests/test_typescript_classes.py:0 -> save", + "CALLS tests/test_typescript_classes.py:0 -> start", + "CALLS tests/test_typescript_classes.py:0 -> status", + "CALLS tests/test_typescript_classes.py:0 -> test_abstract_classes", + "CALLS tests/test_typescript_classes.py:0 -> test_access_modifiers", + "CALLS tests/test_typescript_classes.py:0 -> test_parameter_properties", + "CALLS tests/test_typescript_classes.py:0 -> test_typescript_class_comprehensive", + "CALLS tests/test_typescript_classes.py:0 -> typescript_classes_project", + "CALLS tests/test_typescript_containment_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_containment_edges", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> load", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> main", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> save", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> start", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> test_ts_singleton_pattern_cross_file_calls", + "CALLS tests/test_typescript_cross_file_singleton.py:0 -> ts_singleton_project", + "CALLS tests/test_typescript_declaration_files.py:0 -> keys", + "CALLS tests/test_typescript_declaration_files.py:0 -> parse", + "CALLS tests/test_typescript_declaration_files.py:0 -> process", + "CALLS tests/test_typescript_declaration_files.py:0 -> start", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_ambient_declarations", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_global_augmentations", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_module_declarations", + "CALLS tests/test_typescript_declaration_files.py:0 -> test_typescript_declarations_comprehensive", + "CALLS tests/test_typescript_declaration_files.py:0 -> typescript_declarations_project", + "CALLS tests/test_typescript_decorators.py:0 -> clear", + "CALLS tests/test_typescript_decorators.py:0 -> decorator", + "CALLS tests/test_typescript_decorators.py:0 -> get", + "CALLS tests/test_typescript_decorators.py:0 -> keys", + "CALLS tests/test_typescript_decorators.py:0 -> metadata", + "CALLS tests/test_typescript_decorators.py:0 -> save", + "CALLS tests/test_typescript_decorators.py:0 -> test_class_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_method_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_parameter_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_property_decorators", + "CALLS tests/test_typescript_decorators.py:0 -> test_typescript_decorators_comprehensive", + "CALLS tests/test_typescript_decorators.py:0 -> typescript_decorators_project", + "CALLS tests/test_typescript_enums.py:0 -> get", + "CALLS tests/test_typescript_enums.py:0 -> keys", + "CALLS tests/test_typescript_enums.py:0 -> process", + "CALLS tests/test_typescript_enums.py:0 -> put", + "CALLS tests/test_typescript_enums.py:0 -> test_const_enums", + "CALLS tests/test_typescript_enums.py:0 -> test_enum_comprehensive", + "CALLS tests/test_typescript_enums.py:0 -> test_numeric_enums", + "CALLS tests/test_typescript_enums.py:0 -> test_string_enums", + "CALLS tests/test_typescript_enums.py:0 -> typescript_enums_project", + "CALLS tests/test_typescript_implements_edges.py:0 -> test_typescript_class_implements_edges", + "CALLS tests/test_typescript_inheritance_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_inheritance_edges", + "CALLS tests/test_typescript_namespace_qn.py:0 -> name", + "CALLS tests/test_typescript_namespace_qn.py:0 -> test_typescript_namespace_class_qn_includes_namespace", + "CALLS tests/test_typescript_namespace_qn.py:0 -> type", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> add", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> clear", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> items", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> parse", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> start", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_module_patterns", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_namespace_declarations", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_namespace_merging", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> test_typescript_namespaces_comprehensive", + "CALLS tests/test_typescript_namespaces_modules.py:0 -> typescript_namespaces_project", + "CALLS tests/test_typescript_span_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_node_spans", + "CALLS tests/test_typescript_structure_oracle.py:0 -> test_cgr_matches_tsc_oracle_on_typescript_structure", + "CALLS tests/test_typescript_types.py:0 -> add", + "CALLS tests/test_typescript_types.py:0 -> clear", + "CALLS tests/test_typescript_types.py:0 -> save", + "CALLS tests/test_typescript_types.py:0 -> test_basic_type_annotations", + "CALLS tests/test_typescript_types.py:0 -> test_generic_types", + "CALLS tests/test_typescript_types.py:0 -> test_interfaces_and_type_aliases", + "CALLS tests/test_typescript_types.py:0 -> test_type_comprehensive", + "CALLS tests/test_typescript_types.py:0 -> test_utility_types", + "CALLS tests/test_typescript_types.py:0 -> typescript_types_project", + "CALLS tests/test_typescript_types.py:0 -> value", + "CALLS tests/test_unixcoder_unit.py:0 -> fake_model", + "CALLS tests/test_unixcoder_unit.py:0 -> test_adds_current_state_if_empty_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_attention_mask_is_4d", + "CALLS tests/test_unixcoder_unit.py:0 -> test_builds_tokens_until_eos", + "CALLS tests/test_unixcoder_unit.py:0 -> test_constructs_hypothesis_path", + "CALLS tests/test_unixcoder_unit.py:0 -> test_done_when_eos_top_and_enough_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_first_step_uses_first_beam", + "CALLS tests/test_unixcoder_unit.py:0 -> test_handles_no_eos", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_empty_prevKs", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_finished_empty", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_nextYs_with_zeros", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_scores_to_zero", + "CALLS tests/test_unixcoder_unit.py:0 -> test_initializes_with_correct_size", + "CALLS tests/test_unixcoder_unit.py:0 -> test_marks_eos_in_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_not_done_initially", + "CALLS tests/test_unixcoder_unit.py:0 -> test_not_done_when_not_enough_finished", + "CALLS tests/test_unixcoder_unit.py:0 -> test_not_done_when_not_eos_top", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_batch_shaped_tensor", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_finished_sorted_by_score", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_last_nextYs", + "CALLS tests/test_unixcoder_unit.py:0 -> test_returns_last_prevKs", + "CALLS tests/test_unixcoder_unit.py:0 -> test_subsequent_steps_combine_scores", + "CALLS tests/test_vector_store.py:0 -> integration_client", + "CALLS tests/test_vector_store.py:0 -> mock_qdrant_client", + "CALLS tests/test_vector_store.py:0 -> reset_global_client", + "CALLS tests/test_vector_store.py:0 -> temp_qdrant_path", + "CALLS tests/test_vector_store.py:0 -> test_empty_search_returns_empty_list", + "CALLS tests/test_vector_store.py:0 -> test_get_qdrant_client_logs_and_reraises_on_lock_error", + "CALLS tests/test_vector_store.py:0 -> test_get_qdrant_client_uses_path_when_url_unset", + "CALLS tests/test_vector_store.py:0 -> test_get_qdrant_client_uses_url_when_set", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_calls_query_points", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_default_top_k", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_filters_null_payloads", + "CALLS tests/test_vector_store.py:0 -> test_search_embeddings_handles_exception", + "CALLS tests/test_vector_store.py:0 -> test_store_and_search_roundtrip", + "CALLS tests/test_vector_store.py:0 -> test_store_embedding_calls_upsert", + "CALLS tests/test_vector_store.py:0 -> test_store_embedding_handles_exception", + "CALLS tests/test_vector_store.py:0 -> test_upsert_updates_existing", + "CALLS tests/test_vector_store_batch.py:0 -> test_batches_large_id_sets", + "CALLS tests/test_vector_store_batch.py:0 -> test_builds_correct_point_structs", + "CALLS tests/test_vector_store_batch.py:0 -> test_deletes_given_ids", + "CALLS tests/test_vector_store_batch.py:0 -> test_exponential_backoff_delays", + "CALLS tests/test_vector_store_batch.py:0 -> test_handles_exception_gracefully", + "CALLS tests/test_vector_store_batch.py:0 -> test_noop_on_empty_ids", + "CALLS tests/test_vector_store_batch.py:0 -> test_raises_after_exhausting_retries", + "CALLS tests/test_vector_store_batch.py:0 -> test_raises_on_exception", + "CALLS tests/test_vector_store_batch.py:0 -> test_retries_on_failure_then_succeeds", + "CALLS tests/test_vector_store_batch.py:0 -> test_retrieve_called_with_correct_params", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_count_on_success", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_empty_for_empty_input", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_found_ids", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_zero_on_empty", + "CALLS tests/test_vector_store_batch.py:0 -> test_returns_zero_on_failure", + "CALLS tests/test_vector_store_batch.py:0 -> test_succeeds_on_first_attempt", + "CALLS tests/test_workspaces.py:0 -> _temp_home", + "CALLS tests/test_workspaces.py:0 -> mock_memgraph_connect", + "CALLS tests/test_workspaces.py:0 -> mock_validate_models", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_derives_project_name", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_duplicate", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_missing_path", + "CALLS tests/test_workspaces.py:0 -> test_add_repo_with_explicit_project_name", + "CALLS tests/test_workspaces.py:0 -> test_create_duplicate_raises", + "CALLS tests/test_workspaces.py:0 -> test_create_then_load", + "CALLS tests/test_workspaces.py:0 -> test_create_with_force_overwrites", + "CALLS tests/test_workspaces.py:0 -> test_delete", + "CALLS tests/test_workspaces.py:0 -> test_delete_missing_raises", + "CALLS tests/test_workspaces.py:0 -> test_list_empty", + "CALLS tests/test_workspaces.py:0 -> test_list_sorted", + "CALLS tests/test_workspaces.py:0 -> test_load_missing_raises", + "CALLS tests/test_workspaces.py:0 -> test_remove_repo", + "CALLS tests/test_workspaces.py:0 -> test_remove_repo_not_in_workspace", + "CALLS tests/test_workspaces.py:0 -> test_start_with_unknown_workspace_errors", + "CALLS tests/test_workspaces.py:0 -> test_start_with_workspace_passes_all_projects", + "CALLS tests/test_workspaces.py:0 -> test_workspace_add_remove_repo_via_cli", + "CALLS tests/test_workspaces.py:0 -> test_workspace_create_list_show_delete", + "CALLS tests/test_workspaces.py:0 -> test_workspace_list_empty", + "CALLS tests/test_workspaces.py:0 -> test_workspace_model_project_names", + "CALLS tools/code_retrieval.py:0 -> create_code_retrieval_tool", + "CALLS tools/code_retrieval.py:0 -> get_code_snippet", + "CALLS tools/codebase_query.py:0 -> create_query_tool", + "CALLS tools/codebase_query.py:0 -> query_codebase_knowledge_graph", + "CALLS tools/directory_lister.py:0 -> create_directory_lister_tool", + "CALLS tools/directory_lister.py:0 -> list_directory_contents", + "CALLS tools/file_editor.py:0 -> apply_patch_to_file", + "CALLS tools/file_editor.py:0 -> create_file_editor_tool", + "CALLS tools/file_editor.py:0 -> edit_file", + "CALLS tools/file_editor.py:0 -> get_diff", + "CALLS tools/file_editor.py:0 -> replace_code_surgically", + "CALLS tools/file_reader.py:0 -> create_file_reader_tool", + "CALLS tools/file_reader.py:0 -> read_file_content", + "CALLS tools/file_writer.py:0 -> create_file_writer_tool", + "CALLS tools/file_writer.py:0 -> create_new_file", + "CALLS tools/health_checker.py:0 -> get_summary", + "CALLS tools/health_checker.py:0 -> run_all_checks", + "CALLS tools/language.py:0 -> add_grammar", + "CALLS tools/language.py:0 -> cleanup_orphaned_modules", + "CALLS tools/language.py:0 -> list_languages", + "CALLS tools/language.py:0 -> remove_language", + "CALLS tools/semantic_search.py:0 -> create_get_function_source_tool", + "CALLS tools/semantic_search.py:0 -> create_semantic_search_tool", + "CALLS tools/semantic_search.py:0 -> get_function_source_by_id", + "CALLS tools/semantic_search.py:0 -> semantic_search_functions", + "CALLS tools/shell_command.py:0 -> create_shell_command_tool", + "CALLS tools/shell_command.py:0 -> run_shell_command", + "CALLS tools/tool_descriptions.py:0 -> AgenticToolName", + "CALLS tools/tool_descriptions.py:0 -> name", + "CALLS tools/tool_descriptions.py:0 -> read", + "CALLS types_defs.py:0 -> ASTCacheProtocol", + "CALLS types_defs.py:0 -> BatchWrapper", + "CALLS types_defs.py:0 -> CancelledResult", + "CALLS types_defs.py:0 -> CgrignorePatterns", + "CALLS types_defs.py:0 -> CodeSnippetResultDict", + "CALLS types_defs.py:0 -> ColumnDescriptor", + "CALLS types_defs.py:0 -> ConfirmationToolNames", + "CALLS types_defs.py:0 -> CreateFileArgs", + "CALLS types_defs.py:0 -> CursorProtocol", + "CALLS types_defs.py:0 -> DeadCodeRow", + "CALLS types_defs.py:0 -> DeleteProjectErrorResult", + "CALLS types_defs.py:0 -> DeleteProjectSuccessResult", + "CALLS types_defs.py:0 -> EmbeddingQueryResult", + "CALLS types_defs.py:0 -> FunctionMatch", + "CALLS types_defs.py:0 -> FunctionNodeProps", + "CALLS types_defs.py:0 -> FunctionRegistryTrieProtocol", + "CALLS types_defs.py:0 -> GraphData", + "CALLS types_defs.py:0 -> GraphMetadata", + "CALLS types_defs.py:0 -> GraphSummary", + "CALLS types_defs.py:0 -> JavaAnnotationInfo", + "CALLS types_defs.py:0 -> JavaClassInfo", + "CALLS types_defs.py:0 -> JavaFieldInfo", + "CALLS types_defs.py:0 -> JavaMethodCallInfo", + "CALLS types_defs.py:0 -> JavaMethodInfo", + "CALLS types_defs.py:0 -> LanguageImport", + "CALLS types_defs.py:0 -> LanguageQueries", + "CALLS types_defs.py:0 -> ListProjectsErrorResult", + "CALLS types_defs.py:0 -> ListProjectsSuccessResult", + "CALLS types_defs.py:0 -> LoadableProtocol", + "CALLS types_defs.py:0 -> MCPInputSchema", + "CALLS types_defs.py:0 -> MCPInputSchemaProperty", + "CALLS types_defs.py:0 -> MCPToolSchema", + "CALLS types_defs.py:0 -> ModelConfigKwargs", + "CALLS types_defs.py:0 -> NodeBatchRow", + "CALLS types_defs.py:0 -> NodeData", + "CALLS types_defs.py:0 -> NodeType", + "CALLS types_defs.py:0 -> PathValidatorProtocol", + "CALLS types_defs.py:0 -> QueryJsonOutput", + "CALLS types_defs.py:0 -> QueryResultDict", + "CALLS types_defs.py:0 -> RelBatchRow", + "CALLS types_defs.py:0 -> RelationshipData", + "CALLS types_defs.py:0 -> ReplaceCodeArgs", + "CALLS types_defs.py:0 -> SemanticSearchResult", + "CALLS types_defs.py:0 -> ShellCommandArgs", + "CALLS types_defs.py:0 -> ToolNames", + "CALLS types_defs.py:0 -> TreeSitterNodeProtocol", + "CALLS types_defs.py:0 -> _ensure_loaded", + "CALLS types_defs.py:0 -> callable_params", + "CALLS types_defs.py:0 -> children", + "CALLS types_defs.py:0 -> close", + "CALLS types_defs.py:0 -> description", + "CALLS types_defs.py:0 -> execute", + "CALLS types_defs.py:0 -> fetchall", + "CALLS types_defs.py:0 -> find_ending_with", + "CALLS types_defs.py:0 -> find_with_prefix", + "CALLS types_defs.py:0 -> get", + "CALLS types_defs.py:0 -> is_abstract", + "CALLS types_defs.py:0 -> is_property", + "CALLS types_defs.py:0 -> items", + "CALLS types_defs.py:0 -> keys", + "CALLS types_defs.py:0 -> mark_abstract", + "CALLS types_defs.py:0 -> mark_callable_params", + "CALLS types_defs.py:0 -> mark_property", + "CALLS types_defs.py:0 -> name", + "CALLS types_defs.py:0 -> project_root", + "CALLS types_defs.py:0 -> property_names", + "CALLS types_defs.py:0 -> register_unique_qn", + "CALLS types_defs.py:0 -> text", + "CALLS types_defs.py:0 -> type", + "CALLS types_defs.py:0 -> variants", + "CALLS unixcoder.py:0 -> UniXcoder", + "CALLS unixcoder.py:0 -> forward", + "CALLS unixcoder.py:0 -> generate", + "CALLS utils/dependencies.py:0 -> check_dependencies", + "CALLS utils/dependencies.py:0 -> get_missing_dependencies", + "CALLS utils/dependencies.py:0 -> has_semantic_dependencies", + "CALLS utils/fqn_resolver.py:0 -> extract_function_fqns", + "CALLS utils/fqn_resolver.py:0 -> find_function_source_by_fqn", + "CALLS utils/path_utils.py:0 -> cached_resolve_posix", + "CALLS utils/path_utils.py:0 -> derive_project_name", + "CALLS utils/path_utils.py:0 -> resolve_repo_path", + "CALLS utils/path_utils.py:0 -> should_skip_path", + "CALLS utils/path_utils.py:0 -> should_skip_rel_file", + "CALLS utils/rich_markdown.py:0 -> LeftAlignedHeading", + "CALLS utils/rich_markdown.py:0 -> LeftAlignedMarkdown", + "CALLS utils/source_extraction.py:0 -> extract_source_with_fallback", + "CALLS utils/source_extraction.py:0 -> validate_source_location", + "CALLS utils/token_utils.py:0 -> truncate_results_by_tokens", + "CALLS vector_store.py:0 -> close_qdrant_client", + "CALLS vector_store.py:0 -> delete_project_embeddings", + "CALLS vector_store.py:0 -> search_embeddings", + "CALLS vector_store.py:0 -> store_embedding", + "CALLS vector_store.py:0 -> verify_stored_ids", + "CALLS workspaces/cli.py:0 -> add_repo_cmd", + "CALLS workspaces/cli.py:0 -> cli", + "CALLS workspaces/cli.py:0 -> create_cmd", + "CALLS workspaces/cli.py:0 -> delete_cmd", + "CALLS workspaces/cli.py:0 -> list_cmd", + "CALLS workspaces/cli.py:0 -> remove_repo_cmd", + "CALLS workspaces/cli.py:0 -> show_cmd", + "CALLS workspaces/constants.py:0 -> repo", + "CALLS workspaces/models.py:0 -> WorkspaceConfig", + "CALLS workspaces/models.py:0 -> WorkspaceRepo", + "CALLS workspaces/models.py:0 -> find_repo", + "CALLS workspaces/models.py:0 -> project_names", + "CALLS workspaces/storage.py:0 -> add_repo", + "CALLS workspaces/storage.py:0 -> create_workspace", + "CALLS workspaces/storage.py:0 -> delete_workspace", + "CALLS workspaces/storage.py:0 -> list_workspaces", + "CALLS workspaces/storage.py:0 -> remove_repo" + ] + } +} diff --git a/evals/results/retrieval_scores.csv b/evals/results/retrieval_scores.csv new file mode 100644 index 000000000..20955ec2b --- /dev/null +++ b/evals/results/retrieval_scores.csv @@ -0,0 +1,4 @@ +category,label,tp,fp,fn,precision,recall,f1 +retrieval,graph,3217,587,37,0.8457,0.9886,0.9116 +retrieval,grep_name,3254,10591,0,0.235,1.0,0.3806 +retrieval,grep_call,3254,5638,0,0.3659,1.0,0.5358 diff --git a/evals/results/scores.csv b/evals/results/scores.csv new file mode 100644 index 000000000..b5c3f7ff6 --- /dev/null +++ b/evals/results/scores.csv @@ -0,0 +1,11 @@ +category,label,tp,fp,fn,precision,recall,f1 +node,Module,417,0,0,1.0,1.0,1.0 +node,Class,926,0,0,1.0,1.0,1.0 +node,Function,1955,0,0,1.0,1.0,1.0 +node,Method,3919,0,0,1.0,1.0,1.0 +node,ALL,7217,0,0,1.0,1.0,1.0 +edge,DEFINES,2742,0,0,1.0,1.0,1.0 +edge,DEFINES_METHOD,3919,0,0,1.0,1.0,1.0 +edge,ALL,6661,0,0,1.0,1.0,1.0 +edge,INHERITS,153,0,0,1.0,1.0,1.0 +edge,IMPORTS,1274,0,0,1.0,1.0,1.0 diff --git a/evals/results/static_calls_diff.json b/evals/results/static_calls_diff.json new file mode 100644 index 000000000..3088bb278 --- /dev/null +++ b/evals/results/static_calls_diff.json @@ -0,0 +1,6 @@ +{ + "static-calls:direct-call-recall": { + "missing": [], + "extra": [] + } +} diff --git a/evals/results/static_calls_scores.csv b/evals/results/static_calls_scores.csv new file mode 100644 index 000000000..4ad208351 --- /dev/null +++ b/evals/results/static_calls_scores.csv @@ -0,0 +1,2 @@ +category,label,tp,fp,fn,precision,recall,f1 +edge,direct-call-recall,4434,0,0,1.0,1.0,1.0 diff --git a/evals/retrieval.py b/evals/retrieval.py new file mode 100644 index 000000000..fefb0dd08 --- /dev/null +++ b/evals/retrieval.py @@ -0,0 +1,233 @@ +# (H) Retrieval benchmark: graph-augmented call localization vs grep. For every +# (H) first-party symbol S, the task is to find the files that call S. The graph +# (H) condition uses cgr's resolved CALLS/INSTANTIATES edges; the grep conditions +# (H) use ripgrep (bare-name and call-tuned patterns). All three are scored +# (H) against the same Python ast oracle over the same file and symbol universe, +# (H) as a set of (caller_file, callee_simple_name) name-edges restricted to +# (H) first-party, non-dunder callees -- the set cgr can emit. This isolates +# (H) retrieval quality (does the graph beat grep) from any LLM in the loop, the +# (H) decoupled measurement the GitLab GKG eval flagged as out of scope. +import ast +import re +import shutil +import subprocess +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .ast_oracle import _iter_py_files +from .cgr_graph import _capture +from .module_calls import _callee_name, _first_party_names, _is_dunder +from .score import _name_edge_bucket, _prf +from .structure_report import render, write_outputs +from .types_defs import ( + DiffBucket, + LocationStats, + NameEdge, + NodeKey, + ScoreResult, + ScoreRow, +) + +console_target = Path(ec.RETRIEVAL_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_INSTANTIATES = cs.RelationshipType.INSTANTIATES.value +_MODULE = cs.NodeLabel.MODULE.value +_METHOD = cs.NodeLabel.METHOD.value +_IDENTIFIER = re.compile(ec.IDENTIFIER_PATTERN) +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + + +def parse_py_trees(target: Path) -> tuple[list[tuple[str, ast.Module]], set[str]]: + trees: list[tuple[str, ast.Module]] = [] + files: set[str] = set() + for path in _iter_py_files(target): + rel = path.relative_to(target).as_posix() + files.add(rel) + try: + trees.append((rel, ast.parse(path.read_text(encoding=cs.ENCODING_UTF8)))) + except (SyntaxError, UnicodeDecodeError, ValueError) as error: + logger.warning(ls.ORACLE_PARSE_FAILED.format(path=rel, error=error)) + return trees, files + + +def first_party_symbols(trees: list[tuple[str, ast.Module]]) -> set[str]: + names = _first_party_names([tree for _rel, tree in trees]) + return {name for name in names if not _is_dunder(name)} + + +def _edge(file: str, name: str) -> NameEdge: + return NameEdge(_CALLS, NodeKey(_MODULE, file, ec.MODULE_START_LINE), name) + + +def oracle_call_edges( + trees: list[tuple[str, ast.Module]], first_party: set[str] +) -> set[NameEdge]: + edges: set[NameEdge] = set() + for rel, tree in trees: + for node in ast.walk(tree): + if isinstance(node, ast.Call) and (name := _callee_name(node.func)): + if name in first_party: + edges.add(_edge(rel, name)) + return edges + + +def cgr_call_edges( + target: Path, project_name: str, first_party: set[str] +) -> set[NameEdge]: + ingestor = _capture(target, project_name) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.PY_SUFFIX) + } + + edges: set[NameEdge] = set() + for from_label, from_val, rel_type, to_label, to_val in ingestor.rels: + if rel_type not in (_CALLS, _INSTANTIATES): + continue + path = caller_path.get((from_label, str(from_val))) + if path is None: + continue + segments = str(to_val).split(ec.SEP) + name = segments[-1] + # (H) A constructor call resolves via CALLS to `X.__init__` (a METHOD); the + # (H) oracle sees the class name `X`, so credit the class, matching L2. + if name == ec.INIT_STEM and to_label == _METHOD and len(segments) >= 2: + name = segments[-2] + if _is_dunder(name) or name not in first_party: + continue + edges.add(_edge(path, name)) + return edges + + +def _grep_patterns(first_party: set[str], mode: ec.GrepMode) -> str: + template = ( + ec.GREP_CALL_TEMPLATE if mode == ec.GrepMode.CALL else ec.GREP_NAME_TEMPLATE + ) + return ec.PATTERN_SEP.join( + template.format(name=re.escape(name)) for name in sorted(first_party) + ) + + +def grep_call_edges( + target: Path, first_party: set[str], files: set[str], mode: ec.GrepMode +) -> set[NameEdge]: + if not first_party: + return set() + completed = subprocess.run( + [ + ec.RG_BIN, + ec.RG_ONLY_MATCHING, + ec.RG_WITH_FILENAME, + ec.RG_NO_LINE_NUMBER, + ec.RG_NO_HEADING, + ec.RG_NULL, + ec.RG_GLOB_FLAG, + ec.RG_PY_GLOB, + ec.RG_PATTERN_FILE_FLAG, + ec.RG_STDIN, + ec.RG_SEARCH_PATH, + ], + cwd=target, + input=_grep_patterns(first_party, mode), + capture_output=True, + text=True, + check=False, + ) + if completed.returncode not in ec.RG_OK_RETURNCODES: + logger.warning(completed.stderr.strip()) + return set() + + edges: set[NameEdge] = set() + for line in completed.stdout.splitlines(): + path_text, sep, matched = line.partition(ec.RG_NULL_SEP) + if not sep: + continue + # (H) Path(...).as_posix() strips the leading ./ and folds Windows + # (H) backslashes to the forward-slash form parse_py_trees keys files on. + rel = Path(path_text).as_posix() + if rel not in files: + continue + token = _IDENTIFIER.match(matched) + if token is not None and token.group(0) in first_party: + edges.add(_edge(rel, token.group(0))) + return edges + + +def score_retrieval( + conditions: list[tuple[str, set[NameEdge]]], oracle: set[NameEdge] +) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + for label, retrieved in conditions: + row = _prf(ec.Category.RETRIEVAL.value, label, retrieved, oracle) + if row is not None: + rows.append(row) + diff[ec.RETRIEVAL_DIFF_PREFIX + label] = _name_edge_bucket(retrieved, oracle) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate call retrieval for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option( + help="Directory for retrieval_scores.csv and retrieval_diff.json." + ), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if shutil.which(ec.RG_BIN) is None: + logger.error(ls.RETRIEVAL_RG_MISSING.format(binary=ec.RG_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + trees, files = parse_py_trees(target) + first_party = first_party_symbols(trees) + logger.info(ls.RETRIEVAL_SYMBOLS.format(count=len(first_party))) + + logger.info(ls.RETRIEVAL_EXTRACTING_ORACLE.format(target=target)) + oracle = oracle_call_edges(trees, first_party) + logger.success(ls.RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.RETRIEVAL_EXTRACTING_CGR.format(target=target, project=project)) + graph = cgr_call_edges(target, project, first_party) + logger.success(ls.RETRIEVAL_CGR_DONE.format(count=len(graph))) + + conditions: list[tuple[str, set[NameEdge]]] = [ + (ec.RetrievalCondition.GRAPH.value, graph) + ] + for mode, label in ( + (ec.GrepMode.NAME, ec.RetrievalCondition.GREP_NAME.value), + (ec.GrepMode.CALL, ec.RetrievalCondition.GREP_CALL.value), + ): + logger.info(ls.RETRIEVAL_EXTRACTING_GREP.format(mode=mode.value, target=target)) + grep_edges = grep_call_edges(target, first_party, files, mode) + logger.success( + ls.RETRIEVAL_GREP_DONE.format(mode=mode.value, count=len(grep_edges)) + ) + conditions.append((label, grep_edges)) + + result = score_retrieval(conditions, oracle) + write_outputs( + result, out_dir, ec.RETRIEVAL_SCORES_FILENAME, ec.RETRIEVAL_DIFF_FILENAME + ) + render(result, ec.RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/rust_l1.py b/evals/rust_l1.py new file mode 100644 index 000000000..bc9b981ff --- /dev/null +++ b/evals/rust_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_rust_graph +from .oracles import run_rust_oracle, rust_available +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (Rust vs syn)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Rust sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for rs_scores.csv and rs_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not rust_available(): + logger.error(ls.RS_ORACLE_MISSING.format(binary=ec.CARGO_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.RS_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_rust_graph(target, project) + logger.success(ls.RS_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.RS_EXTRACTING_ORACLE.format(binary=ec.CARGO_BIN, target=target)) + oracle = run_rust_oracle(target) + logger.success(ls.RS_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.RS_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.RS_SCORES_FILENAME, ec.RS_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/rust_retrieval.py b/evals/rust_retrieval.py new file mode 100644 index 000000000..29b3a7ac0 --- /dev/null +++ b/evals/rust_retrieval.py @@ -0,0 +1,113 @@ +# (H) Multi-language retrieval (Rust). Extends the file-level call-localization +# (H) benchmark to Rust: for each first-party Rust symbol, which files call it. +# (H) cgr's Rust CALLS edges (reduced to caller file + callee simple name) are +# (H) graded against syn call sites over the same first-party name universe. +# (H) The oracle uses Rust's own parser (syn), independent of cgr's tree-sitter +# (H) frontend, so this measures cgr's cross-file Rust call resolution against +# (H) ground truth (mirrors evals/go_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import run_rust_call_oracle, rust_available +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.RUST_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_rust_call_edges(target: Path) -> tuple[set[CallEdge], frozenset[str]]: + return run_rust_call_oracle(target) + + +def cgr_rust_call_edges( + target: Path, project: str, declared: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.RS_SUFFIX) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.RUST_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_rust_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.RUST_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.RUST_RETRIEVAL_DIFF_PREFIX + ec.RUST_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of Rust sources to evaluate call retrieval.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option(help="Directory for rust_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not rust_available(): + logger.error(ls.RUST_ORACLE_MISSING.format(binary=ec.CARGO_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.RUST_RETRIEVAL_ORACLE.format(binary=ec.CARGO_BIN, target=target)) + oracle, declared = oracle_rust_call_edges(target) + logger.success(ls.RUST_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.RUST_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_rust_call_edges(target, project, declared) + logger.success(ls.RUST_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_rust_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.RUST_RETRIEVAL_SCORES_FILENAME, + ec.RUST_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.RUST_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/score.py b/evals/score.py new file mode 100644 index 000000000..12f7f985e --- /dev/null +++ b/evals/score.py @@ -0,0 +1,319 @@ +from statistics import fmean +from typing import TypeVar + +from codebase_rag import constants as cs + +from . import constants as ec +from .types_defs import ( + DiffBucket, + EdgeKey, + GraphData, + LocationStats, + NameEdge, + NodeKey, + ScoreResult, + ScoreRow, +) + +T = TypeVar("T") + + +def score(cgr: GraphData, oracle: GraphData) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + + cgr_nodes_all: set[NodeKey] = set() + oracle_nodes_all: set[NodeKey] = set() + for kind in ec.SCORED_NODE_KINDS: + cgr_set = {k for k in cgr.nodes if k.kind == kind.value} + oracle_set = {k for k in oracle.nodes if k.kind == kind.value} + cgr_nodes_all |= cgr_set + oracle_nodes_all |= oracle_set + row = _prf(ec.Category.NODE.value, kind.value, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.DIFF_NODE_PREFIX + kind.value] = _node_bucket( + cgr_set, oracle_set, cgr, oracle + ) + node_aggregate = _prf( + ec.Category.NODE.value, ec.AGGREGATE_LABEL, cgr_nodes_all, oracle_nodes_all + ) + if node_aggregate is not None: + rows.append(node_aggregate) + + cgr_edges_all: set[EdgeKey] = set() + oracle_edges_all: set[EdgeKey] = set() + for edge_type in ec.SCORED_EDGE_TYPES: + cgr_set_e = {e for e in cgr.edges if e.rel_type == edge_type.value} + oracle_set_e = {e for e in oracle.edges if e.rel_type == edge_type.value} + cgr_edges_all |= cgr_set_e + oracle_edges_all |= oracle_set_e + row = _prf(ec.Category.EDGE.value, edge_type.value, cgr_set_e, oracle_set_e) + if row is not None: + rows.append(row) + diff[ec.DIFF_EDGE_PREFIX + edge_type.value] = _edge_bucket( + cgr_set_e, oracle_set_e + ) + edge_aggregate = _prf( + ec.Category.EDGE.value, ec.AGGREGATE_LABEL, cgr_edges_all, oracle_edges_all + ) + if edge_aggregate is not None: + rows.append(edge_aggregate) + + for name_edge_type in ec.SCORED_NAME_EDGE_TYPES: + cgr_set_n = {e for e in cgr.name_edges if e.rel_type == name_edge_type.value} + oracle_set_n = { + e for e in oracle.name_edges if e.rel_type == name_edge_type.value + } + row = _prf( + ec.Category.EDGE.value, name_edge_type.value, cgr_set_n, oracle_set_n + ) + if row is not None: + rows.append(row) + diff[ec.DIFF_NAME_EDGE_PREFIX + name_edge_type.value] = _name_edge_bucket( + cgr_set_n, oracle_set_n + ) + + # (H) The Python ast oracle records real end_lineno, so spans are graded like + # (H) the native-oracle languages (Class/Function/Method; Module is excluded). + span_result = score_span(cgr, oracle, ec.SPANNED_NODE_KINDS_TUPLE) + rows.extend(span_result.rows) + diff.update(span_result.diff) + + return ScoreResult(rows=rows, location=_location_stats(cgr, oracle), diff=diff) + + +def score_node_kinds( + cgr: GraphData, oracle: GraphData, kinds: tuple[cs.NodeLabel, ...] +) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + cgr_all: set[NodeKey] = set() + oracle_all: set[NodeKey] = set() + for kind in kinds: + cgr_set = {k for k in cgr.nodes if k.kind == kind.value} + oracle_set = {k for k in oracle.nodes if k.kind == kind.value} + cgr_all |= cgr_set + oracle_all |= oracle_set + row = _prf(ec.Category.NODE.value, kind.value, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.DIFF_NODE_PREFIX + kind.value] = _node_bucket( + cgr_set, oracle_set, cgr, oracle + ) + aggregate = _prf(ec.Category.NODE.value, ec.AGGREGATE_LABEL, cgr_all, oracle_all) + if aggregate is not None: + rows.append(aggregate) + return ScoreResult(rows=rows, location=LocationStats(0, 0, 0, 0.0, 0), diff=diff) + + +def score_edge_types( + cgr: GraphData, oracle: GraphData, edge_types: tuple[cs.RelationshipType, ...] +) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + cgr_all: set[EdgeKey] = set() + oracle_all: set[EdgeKey] = set() + for edge_type in edge_types: + cgr_set = {e for e in cgr.edges if e.rel_type == edge_type.value} + oracle_set = {e for e in oracle.edges if e.rel_type == edge_type.value} + cgr_all |= cgr_set + oracle_all |= oracle_set + row = _prf(ec.Category.EDGE.value, edge_type.value, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.DIFF_EDGE_PREFIX + edge_type.value] = _edge_bucket( + cgr_set, oracle_set + ) + aggregate = _prf(ec.Category.EDGE.value, ec.AGGREGATE_LABEL, cgr_all, oracle_all) + if aggregate is not None: + rows.append(aggregate) + return ScoreResult(rows=rows, location=LocationStats(0, 0, 0, 0.0, 0), diff=diff) + + +def score_name_edge_types( + cgr: GraphData, + oracle: GraphData, + name_edge_types: tuple[cs.RelationshipType, ...], +) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + cgr_all: set[NameEdge] = set() + oracle_all: set[NameEdge] = set() + for edge_type in name_edge_types: + cgr_set = {e for e in cgr.name_edges if e.rel_type == edge_type.value} + oracle_set = {e for e in oracle.name_edges if e.rel_type == edge_type.value} + cgr_all |= cgr_set + oracle_all |= oracle_set + row = _prf(ec.Category.EDGE.value, edge_type.value, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.DIFF_NAME_EDGE_PREFIX + edge_type.value] = _name_edge_bucket( + cgr_set, oracle_set + ) + aggregate = _prf(ec.Category.EDGE.value, ec.AGGREGATE_LABEL, cgr_all, oracle_all) + if aggregate is not None: + rows.append(aggregate) + return ScoreResult(rows=rows, location=LocationStats(0, 0, 0, 0.0, 0), diff=diff) + + +_SpanKey = tuple[str, str, int, int] + + +def score_span( + cgr: GraphData, oracle: GraphData, kinds: tuple[cs.NodeLabel, ...] +) -> ScoreResult: + # (H) Grade node SPANS (end_line) only on nodes both sides identify by + # (H) (kind, file, start), so an end_line disagreement is not masked by, nor + # (H) conflated with, a node-identity miss. Restricted to the shared key set, + # (H) fp and fn each count one end_line mismatch (precision == recall). + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + cgr_all: set[_SpanKey] = set() + oracle_all: set[_SpanKey] = set() + shared = cgr.nodes.keys() & oracle.nodes.keys() + for kind in kinds: + keys = {k for k in shared if k.kind == kind.value} + cgr_set = {(k.kind, k.file, k.start_line, cgr.nodes[k].end_line) for k in keys} + oracle_set = { + (k.kind, k.file, k.start_line, oracle.nodes[k].end_line) for k in keys + } + cgr_all |= cgr_set + oracle_all |= oracle_set + row = _prf(ec.Category.SPAN.value, kind.value, cgr_set, oracle_set) + if row is not None: + rows.append(row) + diff[ec.DIFF_SPAN_PREFIX + kind.value] = _span_bucket(cgr_set, oracle_set) + aggregate = _prf(ec.Category.SPAN.value, ec.AGGREGATE_LABEL, cgr_all, oracle_all) + if aggregate is not None: + rows.append(aggregate) + return ScoreResult(rows=rows, location=LocationStats(0, 0, 0, 0.0, 0), diff=diff) + + +def _fmt_span(span: _SpanKey) -> str: + kind, file, start, end = span + return ec.SPAN_REPR.format(kind=kind, file=file, start=start, end=end) + + +def _span_bucket(cgr_set: set[_SpanKey], oracle_set: set[_SpanKey]) -> DiffBucket: + missing = [_fmt_span(s) for s in sorted(oracle_set - cgr_set)] + extra = [_fmt_span(s) for s in sorted(cgr_set - oracle_set)] + return DiffBucket(missing=missing, extra=extra) + + +def score_structure( + cgr: GraphData, + oracle: GraphData, + node_kinds: tuple[cs.NodeLabel, ...], + edge_types: tuple[cs.RelationshipType, ...], + grade_spans: bool = False, +) -> ScoreResult: + node_result = score_node_kinds(cgr, oracle, node_kinds) + edge_result = score_edge_types(cgr, oracle, edge_types) + # (H) Inheritance name-edges only produce rows when a side has them, so this + # (H) is a no-op for languages without inheritance (Go, Lua). + name_result = score_name_edge_types(cgr, oracle, ec.INHERITANCE_NAME_EDGE_TYPES) + # (H) Spans are opt-in per language: only oracles that emit end_line can grade + # (H) them, else every multi-line node reads as a mismatch against the start. + span_result = ( + score_span(cgr, oracle, node_kinds) + if grade_spans + else ScoreResult(rows=[], location=LocationStats(0, 0, 0, 0.0, 0), diff={}) + ) + return ScoreResult( + rows=node_result.rows + edge_result.rows + name_result.rows + span_result.rows, + location=node_result.location, + diff={ + **node_result.diff, + **edge_result.diff, + **name_result.diff, + **span_result.diff, + }, + ) + + +def _fmt_name_edge(edge: NameEdge) -> str: + return ec.NAME_EDGE_REPR.format( + rel=edge.rel_type, + sfile=edge.source.file, + sstart=edge.source.start_line, + target=edge.target_name, + ) + + +def _name_edge_bucket(cgr_set: set[NameEdge], oracle_set: set[NameEdge]) -> DiffBucket: + missing = [_fmt_name_edge(e) for e in sorted(oracle_set - cgr_set)] + extra = [_fmt_name_edge(e) for e in sorted(cgr_set - oracle_set)] + return DiffBucket(missing=missing, extra=extra) + + +def _prf(category: str, label: str, cgr: set[T], oracle: set[T]) -> ScoreRow | None: + tp = len(cgr & oracle) + fp = len(cgr - oracle) + fn = len(oracle - cgr) + if tp + fp + fn == 0: + return None + precision = tp / (tp + fp) if tp + fp else 0.0 + recall = tp / (tp + fn) if tp + fn else 0.0 + f1 = 2 * precision * recall / (precision + recall) if precision + recall else 0.0 + return ScoreRow( + category=category, + label=label, + tp=tp, + fp=fp, + fn=fn, + precision=round(precision, ec.ROUND_DIGITS), + recall=round(recall, ec.ROUND_DIGITS), + f1=round(f1, ec.ROUND_DIGITS), + ) + + +def _fmt_node(key: NodeKey, name: str) -> str: + return ec.NODE_REPR.format( + kind=key.kind, file=key.file, start=key.start_line, name=name + ) + + +def _fmt_edge(edge: EdgeKey) -> str: + return ec.EDGE_REPR.format( + rel=edge.rel_type, + pfile=edge.parent.file, + pstart=edge.parent.start_line, + cfile=edge.child.file, + cstart=edge.child.start_line, + ) + + +def _node_bucket( + cgr_set: set[NodeKey], + oracle_set: set[NodeKey], + cgr: GraphData, + oracle: GraphData, +) -> DiffBucket: + missing = [_fmt_node(k, oracle.nodes[k].name) for k in sorted(oracle_set - cgr_set)] + extra = [_fmt_node(k, cgr.nodes[k].name) for k in sorted(cgr_set - oracle_set)] + return DiffBucket(missing=missing, extra=extra) + + +def _edge_bucket(cgr_set: set[EdgeKey], oracle_set: set[EdgeKey]) -> DiffBucket: + missing = [_fmt_edge(e) for e in sorted(oracle_set - cgr_set)] + extra = [_fmt_edge(e) for e in sorted(cgr_set - oracle_set)] + return DiffBucket(missing=missing, extra=extra) + + +def _location_stats(cgr: GraphData, oracle: GraphData) -> LocationStats: + shared = [ + k + for k in cgr.nodes.keys() & oracle.nodes.keys() + if k.kind in ec.SPANNED_NODE_KINDS + ] + deltas = [abs(cgr.nodes[k].end_line - oracle.nodes[k].end_line) for k in shared] + if not deltas: + return LocationStats(0, 0, 0, 0.0, 0) + return LocationStats( + matched=len(deltas), + end_exact=sum(1 for d in deltas if d == 0), + end_within_one=sum(1 for d in deltas if d <= 1), + mean_abs_delta=round(fmean(deltas), ec.ROUND_DIGITS), + max_abs_delta=max(deltas), + ) diff --git a/evals/semantic_search.py b/evals/semantic_search.py new file mode 100644 index 000000000..2798fa430 --- /dev/null +++ b/evals/semantic_search.py @@ -0,0 +1,107 @@ +# (H) Semantic-search relevance eval. cgr's semantic search embeds each function's +# (H) source and retrieves by cosine similarity to a query embedding. This grades +# (H) that relevance directly: for controlled fixtures whose natural-language query +# (H) maps unambiguously to one function, does cgr's embedder rank that function in +# (H) the top k? It uses cgr's own embedder over function source extracted from the +# (H) captured graph, so it tests cgr's embedding + ranking pipeline (the part that +# (H) decides relevance); the Qdrant ANN layer only approximates this same ranking. +from pathlib import Path +from typing import NamedTuple + +from codebase_rag import constants as cs + +from . import constants as ec +from .cgr_graph import _capture +from .score import _prf +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +_FUNCTION = cs.NodeLabel.FUNCTION.value +_METHOD = cs.NodeLabel.METHOD.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + + +class SemanticCase(NamedTuple): + query: str + expected_qn: str + + +def _cosine(a: list[float], b: list[float]) -> float: + dot = sum(x * y for x, y in zip(a, b, strict=False)) + na = sum(x * x for x in a) ** 0.5 + nb = sum(y * y for y in b) ** 0.5 + return dot / (na * nb) if na and nb else 0.0 + + +def function_snippets(target: Path, project: str) -> dict[str, str]: + # (H) The source of every first-party function/method, keyed by qualified name, + # (H) read from the captured node's file and span -- the same text cgr embeds. + ingestor = _capture(target, project) + snippets: dict[str, str] = {} + for (label, uid), props in ingestor.nodes.items(): + if label not in (_FUNCTION, _METHOD): + continue + rel = props.get(cs.KEY_PATH) + raw_start = props.get(cs.KEY_START_LINE) + if not rel or not isinstance(raw_start, int | float): + continue + path = target / str(rel) + if not path.is_file(): + continue + start = int(raw_start) + raw_end = props.get(cs.KEY_END_LINE) + end = int(raw_end) if isinstance(raw_end, int | float) else start + lines = path.read_text(encoding=cs.ENCODING_UTF8).splitlines() + if start >= 1: + snippets[str(uid)] = "\n".join(lines[start - 1 : end]) + return snippets + + +def cgr_semantic_ranking( + target: Path, project: str, queries: list[str], top_k: int +) -> dict[str, list[str]]: + from codebase_rag.embedder import embed_code_batch + + snippets = function_snippets(target, project) + qns = list(snippets) + snippet_vecs = embed_code_batch([snippets[qn] for qn in qns]) + query_vecs = embed_code_batch(queries) + + ranking: dict[str, list[str]] = {} + for query, query_vec in zip(queries, query_vecs, strict=False): + scored = sorted( + ( + (qn, _cosine(query_vec, vec)) + for qn, vec in zip(qns, snippet_vecs, strict=False) + ), + key=lambda pair: pair[1], + reverse=True, + ) + ranking[query] = [qn for qn, _score in scored[:top_k]] + return ranking + + +def score_semantic( + cases: list[SemanticCase], ranking: dict[str, list[str]] +) -> ScoreResult: + # (H) recall@k: a case is a hit when its expected function is in the query's + # (H) top-k. Modelled as a set of satisfied cases vs all cases, so precision is + # (H) 1.0 by construction and the headline number is recall. + oracle = {(case.query, case.expected_qn) for case in cases} + hits = { + (case.query, case.expected_qn) + for case in cases + if case.expected_qn in ranking.get(case.query, []) + } + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.SEMANTIC_LABEL, hits, oracle) + if row is not None: + rows.append(row) + diff[ec.SEMANTIC_DIFF_PREFIX + ec.SEMANTIC_LABEL] = DiffBucket( + missing=[ + ec.SEMANTIC_CASE_REPR.format(query=q, expected=e) + for q, e in sorted(oracle - hits) + ], + extra=[], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) diff --git a/evals/static_calls.py b/evals/static_calls.py new file mode 100644 index 000000000..ebc8866a1 --- /dev/null +++ b/evals/static_calls.py @@ -0,0 +1,200 @@ +# (H) Static CALLS eval. Function-level call recall against an ast oracle that +# (H) resolves only the calls a reader can resolve without type inference: a bare +# (H) name call (foo()) whose target is a first-party function reached via a +# (H) `from ... import foo` or a same-module top-level def. Each becomes a +# (H) (caller_qn, callee_qn) edge. Method / attribute / dynamic calls need cgr's +# (H) type inference and are out of scope, so only RECALL is graded: every +# (H) statically-certain call must appear in cgr's CALLS graph (cgr resolving more +# (H) than the oracle is expected, not a false positive). Independent of cgr's +# (H) resolver -- it uses ast import resolution, not the function-registry trie. +import ast +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .ast_oracle import _from_base_parts, _iter_py_files, _module_dotted +from .cgr_graph import _capture +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.STATIC_CALLS_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) +_SCOPE_NODES = (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef) +_FUNC_NODES = (ast.FunctionDef, ast.AsyncFunctionDef) + +CallEdge = tuple[str, str] + + +def _parents(tree: ast.Module) -> dict[ast.AST, ast.AST]: + parents: dict[ast.AST, ast.AST] = {} + for node in ast.walk(tree): + for child in ast.iter_child_nodes(node): + parents[child] = node + return parents + + +def _node_qn(node: ast.AST, module: str, parents: dict[ast.AST, ast.AST]) -> str: + parts: list[str] = [] + cur: ast.AST | None = node + while cur is not None and not isinstance(cur, ast.Module): + if isinstance(cur, _SCOPE_NODES): + parts.append(cur.name) + cur = parents.get(cur) + return cs.SEPARATOR_DOT.join([module, *reversed(parts)]) + + +def _enclosing_function( + node: ast.AST, parents: dict[ast.AST, ast.AST] +) -> ast.AST | None: + cur = parents.get(node) + while cur is not None and not isinstance(cur, ast.Module): + if isinstance(cur, _FUNC_NODES): + return cur + cur = parents.get(cur) + return None + + +def _decorator_calls(tree: ast.Module) -> set[ast.Call]: + # (H) Calls that live inside a decorator expression (@deco(...)). These are + # (H) decorator applications, not calls the decorated function makes, so cgr + # (H) emits no CALLS edge for them and the oracle must exclude them. + calls: set[ast.Call] = set() + for node in ast.walk(tree): + if isinstance(node, _SCOPE_NODES): + for decorator in node.decorator_list: + for inner in ast.walk(decorator): + if isinstance(inner, ast.Call): + calls.add(inner) + return calls + + +def _import_map(tree: ast.Module, rel: str, project: str) -> dict[str, str]: + # (H) local name -> resolved target qn for `from import name`. + pkg_parts = [project, *Path(rel).parent.parts] + mapping: dict[str, str] = {} + for node in ast.walk(tree): + if not isinstance(node, ast.ImportFrom): + continue + base_parts = _from_base_parts(node, pkg_parts) + if not base_parts or base_parts[0] != project: + continue + for alias in node.names: + if alias.name != ec.STAR_IMPORT: + target = cs.SEPARATOR_DOT.join([*base_parts, alias.name]) + mapping[alias.asname or alias.name] = target + return mapping + + +def oracle_static_calls(target: Path, project: str) -> set[CallEdge]: + parsed: list[tuple[str, ast.Module]] = [] + defined: set[str] = set() + for path in _iter_py_files(target): + rel = path.relative_to(target).as_posix() + try: + tree = ast.parse(path.read_text(encoding=cs.ENCODING_UTF8)) + except (SyntaxError, UnicodeDecodeError, ValueError) as error: + logger.warning(ls.ORACLE_PARSE_FAILED.format(path=rel, error=error)) + continue + parsed.append((rel, tree)) + module = _module_dotted(rel, project) + parents = _parents(tree) + for node in ast.walk(tree): + if isinstance(node, _FUNC_NODES): + defined.add(_node_qn(node, module, parents)) + + edges: set[CallEdge] = set() + for rel, tree in parsed: + module = _module_dotted(rel, project) + parents = _parents(tree) + imports = _import_map(tree, rel, project) + decorator_calls = _decorator_calls(tree) + for node in ast.walk(tree): + if not isinstance(node, ast.Call) or not isinstance(node.func, ast.Name): + continue + if node in decorator_calls: + continue + enclosing = _enclosing_function(node, parents) + if enclosing is None: + continue + name = node.func.id + candidates = ( + imports.get(name), + cs.SEPARATOR_DOT.join([module, name]), + ) + callee = next((qn for qn in candidates if qn and qn in defined), None) + if callee is not None: + edges.add((_node_qn(enclosing, module, parents), callee)) + return edges + + +def cgr_static_calls(target: Path, project: str) -> set[CallEdge]: + ingestor = _capture(target, project) + return { + (str(from_val), str(to_val)) + for _fl, from_val, rel_type, _tl, to_val in ingestor.rels + if rel_type == _CALLS + } + + +def _edge_repr(edge: CallEdge) -> str: + return ec.STATIC_CALL_EDGE_REPR.format(caller=edge[0], callee=edge[1]) + + +def score_static_calls(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + # (H) Recall only: hits are oracle edges cgr also has. cgr's extra edges + # (H) (method / type-inferred calls) are expected, not false positives, so + # (H) precision is not graded here. + hits = oracle & cgr + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.EDGE.value, ec.STATIC_CALLS_LABEL, hits, oracle) + if row is not None: + rows.append(row) + diff[ec.STATIC_CALLS_DIFF_PREFIX + ec.STATIC_CALLS_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="cgr source to evaluate static call recall for.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for static_calls_scores.csv and diff json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + target = target.resolve() + project = project_name or target.name + logger.info(ls.STATIC_CALLS_TARGET.format(target=target, project=project)) + + oracle = oracle_static_calls(target, project) + logger.success(ls.STATIC_CALLS_ORACLE_DONE.format(count=len(oracle))) + cgr = cgr_static_calls(target, project) + logger.success(ls.STATIC_CALLS_CGR_DONE.format(count=len(cgr))) + + result = score_static_calls(cgr, oracle) + write_outputs( + result, + out_dir, + ec.STATIC_CALLS_SCORES_FILENAME, + ec.STATIC_CALLS_DIFF_FILENAME, + ) + render(result, ec.STATIC_CALLS_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/structure_report.py b/evals/structure_report.py new file mode 100644 index 000000000..526396e55 --- /dev/null +++ b/evals/structure_report.py @@ -0,0 +1,49 @@ +import csv +import json +from pathlib import Path + +from loguru import logger +from rich.console import Console +from rich.table import Table + +from . import constants as ec +from . import logs as ls +from .types_defs import ScoreResult + +_console = Console() + + +def write_outputs( + result: ScoreResult, out_dir: Path, scores_filename: str, diff_filename: str +) -> None: + out_dir.mkdir(parents=True, exist_ok=True) + scores_path = out_dir / scores_filename + with scores_path.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=list(ec.CSV_FIELDS)) + writer.writeheader() + for row in result.rows: + writer.writerow(row) + logger.success(ls.WROTE_SCORES.format(path=scores_path)) + + diff_path = out_dir / diff_filename + diff_path.write_text(json.dumps(result.diff, indent=2), encoding="utf-8") + logger.success(ls.WROTE_DIFF.format(path=diff_path)) + + +def render(result: ScoreResult, title: str) -> None: + table = Table(title=title) + for column in ec.CSV_FIELDS: + justify = "left" if column in ec.LEFT_COLUMNS else "right" + table.add_column(column, justify=justify) + for row in result.rows: + table.add_row( + row["category"], + row["label"], + str(row["tp"]), + str(row["fp"]), + str(row["fn"]), + f"{row['precision']:.4f}", + f"{row['recall']:.4f}", + f"{row['f1']:.4f}", + ) + _console.print(table) diff --git a/evals/ts_l1.py b/evals/ts_l1.py new file mode 100644 index 000000000..5b710ca4a --- /dev/null +++ b/evals/ts_l1.py @@ -0,0 +1,51 @@ +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from . import constants as ec +from . import logs as ls +from .cgr_graph import extract_cgr_ts_graph +from .oracles import run_typescript_oracle, typescript_available +from .score import score_structure +from .structure_report import render, write_outputs + +_TITLE = "cgr L1 structure eval (TypeScript vs tsc)" + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of TypeScript sources to evaluate.") + ] = Path(ec.GO_DEFAULT_TARGET), + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, typer.Option(help="Directory for ts_scores.csv and ts_diff.json.") + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not typescript_available(): + logger.error(ls.TS_ORACLE_MISSING) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.TS_EXTRACTING_CGR.format(target=target, project=project)) + cgr = extract_cgr_ts_graph(target, project) + logger.success(ls.TS_CGR_DONE.format(count=len(cgr.nodes))) + + logger.info(ls.TS_EXTRACTING_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle = run_typescript_oracle(target) + logger.success(ls.TS_ORACLE_DONE.format(count=len(oracle.nodes))) + + result = score_structure( + cgr, oracle, ec.TS_SCORED_NODE_KINDS, ec.SCORED_EDGE_TYPES, grade_spans=True + ) + write_outputs(result, out_dir, ec.TS_SCORES_FILENAME, ec.TS_DIFF_FILENAME) + render(result, _TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/ts_retrieval.py b/evals/ts_retrieval.py new file mode 100644 index 000000000..be822354a --- /dev/null +++ b/evals/ts_retrieval.py @@ -0,0 +1,113 @@ +# (H) Multi-language retrieval (TypeScript). Extends the file-level +# (H) call-localization benchmark to TypeScript: for each first-party TS symbol, +# (H) which files call it. cgr's TS CALLS edges (reduced to (caller_file, +# (H) callee_simple_name)) are graded against call sites extracted by the +# (H) TypeScript compiler API (tsc), over the same first-party name universe. +# (H) tsc is independent of cgr's tree-sitter TS frontend, so this measures cgr's +# (H) cross-file TS call resolution against ground truth (mirrors java_retrieval.py). +from pathlib import Path +from typing import Annotated + +import typer +from loguru import logger + +from codebase_rag import constants as cs + +from . import constants as ec +from . import logs as ls +from .cgr_graph import _capture +from .oracles import run_typescript_call_oracle, typescript_available +from .score import _prf +from .structure_report import render, write_outputs +from .types_defs import DiffBucket, LocationStats, ScoreResult, ScoreRow + +console_target = Path(ec.TS_DEFAULT_TARGET) + +_CALLS = cs.RelationshipType.CALLS.value +_EMPTY_LOCATION = LocationStats(0, 0, 0, 0.0, 0) + +CallEdge = tuple[str, str] + + +def oracle_ts_call_edges(target: Path) -> tuple[set[CallEdge], frozenset[str]]: + return run_typescript_call_oracle(target) + + +def cgr_ts_call_edges( + target: Path, project: str, declared: frozenset[str] +) -> set[CallEdge]: + ingestor = _capture(target, project) + caller_path: dict[tuple[str, str], str] = { + (str(label), str(uid)): str(props[cs.KEY_PATH]) + for (label, uid), props in ingestor.nodes.items() + if props.get(cs.KEY_PATH) and str(props[cs.KEY_PATH]).endswith(ec.TS_SUFFIXES) + } + edges: set[CallEdge] = set() + for from_label, from_val, rel_type, _to_label, to_val in ingestor.rels: + if rel_type != _CALLS: + continue + path = caller_path.get((str(from_label), str(from_val))) + if path is None: + continue + name = str(to_val).split(cs.SEPARATOR_DOT)[-1] + if name in declared: + edges.add((path, name)) + return edges + + +def _edge_repr(edge: CallEdge) -> str: + return ec.TS_CALL_EDGE_REPR.format(file=edge[0], name=edge[1]) + + +def score_ts_retrieval(cgr: set[CallEdge], oracle: set[CallEdge]) -> ScoreResult: + rows: list[ScoreRow] = [] + diff: dict[str, DiffBucket] = {} + row = _prf(ec.Category.RETRIEVAL.value, ec.TS_RETRIEVAL_LABEL, cgr, oracle) + if row is not None: + rows.append(row) + diff[ec.TS_RETRIEVAL_DIFF_PREFIX + ec.TS_RETRIEVAL_LABEL] = DiffBucket( + missing=[_edge_repr(e) for e in sorted(oracle - cgr)], + extra=[_edge_repr(e) for e in sorted(cgr - oracle)], + ) + return ScoreResult(rows=rows, location=_EMPTY_LOCATION, diff=diff) + + +def main( + target: Annotated[ + Path, typer.Option(help="Directory of TypeScript sources to evaluate.") + ] = console_target, + project_name: Annotated[ + str, typer.Option(help="cgr project name; defaults to target dir name.") + ] = "", + out_dir: Annotated[ + Path, + typer.Option(help="Directory for ts_retrieval_scores.csv and diff json."), + ] = Path(ec.DEFAULT_OUT_DIR), +) -> None: + if not typescript_available(): + logger.error(ls.TS_ORACLE_MISSING.format(binary=ec.NODE_BIN)) + raise typer.Exit(code=1) + + target = target.resolve() + project = project_name or target.name + + logger.info(ls.TS_RETRIEVAL_ORACLE.format(binary=ec.NODE_BIN, target=target)) + oracle, declared = oracle_ts_call_edges(target) + logger.success(ls.TS_RETRIEVAL_ORACLE_DONE.format(count=len(oracle))) + + logger.info(ls.TS_RETRIEVAL_CGR.format(target=target, project=project)) + cgr = cgr_ts_call_edges(target, project, declared) + logger.success(ls.TS_RETRIEVAL_CGR_DONE.format(count=len(cgr))) + + result = score_ts_retrieval(cgr, oracle) + write_outputs( + result, + out_dir, + ec.TS_RETRIEVAL_SCORES_FILENAME, + ec.TS_RETRIEVAL_DIFF_FILENAME, + ) + render(result, ec.TS_RETRIEVAL_TITLE) + + +if __name__ == "__main__": + typer.run(main) diff --git a/evals/types_defs.py b/evals/types_defs.py new file mode 100644 index 000000000..421a99a07 --- /dev/null +++ b/evals/types_defs.py @@ -0,0 +1,112 @@ +from typing import NamedTuple, NotRequired, TypedDict + + +class NodeKey(NamedTuple): + kind: str + file: str + start_line: int + + +class DefNode(NamedTuple): + key: NodeKey + name: str + end_line: int + + +class EdgeKey(NamedTuple): + rel_type: str + parent: NodeKey + child: NodeKey + + +class NameEdge(NamedTuple): + rel_type: str + source: NodeKey + target_name: str + + +class GraphData(NamedTuple): + nodes: dict[NodeKey, DefNode] + edges: set[EdgeKey] + name_edges: set[NameEdge] + + +class GraphState(NamedTuple): + # (H) A flat, comparable snapshot of a whole captured graph: node identities + # (H) (label, unique-id) and directed edges (from_label, from_id, rel, + # (H) to_label, to_id). Used to diff an incremental update against a clean + # (H) re-index, where the clean index is the oracle. + nodes: frozenset[tuple[str, str]] + edges: frozenset[tuple[str, str, str, str, str]] + + +class ScoreRow(TypedDict): + category: str + label: str + tp: int + fp: int + fn: int + precision: float + recall: float + f1: float + + +class LocationStats(NamedTuple): + matched: int + end_exact: int + end_within_one: int + mean_abs_delta: float + max_abs_delta: int + + +class DiffBucket(TypedDict): + missing: list[str] + extra: list[str] + + +class ScoreResult(NamedTuple): + rows: list[ScoreRow] + location: LocationStats + diff: dict[str, DiffBucket] + + +class OracleRecord(TypedDict): + kind: str + file: str + line: int + name: str + # (H) Optional so oracles that have not yet adopted span emission keep working + # (H) (records_to_nodes falls back to the start line). + end_line: NotRequired[int] + + +class OracleNodeRef(TypedDict): + kind: str + file: str + line: int + + +class OracleEdge(TypedDict): + rel: str + parent: OracleNodeRef + child: OracleNodeRef + + +class OracleNameEdge(TypedDict): + rel: str + source: OracleNodeRef + target_name: str + + +class OracleCall(TypedDict): + file: str + name: str + + +class OraclePayload(TypedDict): + nodes: list[OracleRecord] + edges: list[OracleEdge] + name_edges: list[OracleNameEdge] + # (H) Optional: only the call-aware oracles (Go multi-language retrieval) emit + # (H) call sites; structure-only oracles omit this key. + calls: NotRequired[list[OracleCall]] diff --git a/funding.json b/funding.json new file mode 100644 index 000000000..baa0c096c --- /dev/null +++ b/funding.json @@ -0,0 +1,108 @@ +{ + "$schema": "https://fundingjson.org/schema/v1.1.0.json", + "version": "v1.1.0", + "entity": { + "type": "individual", + "role": "owner", + "name": "Vitali Avagyan", + "email": "eheva87@gmail.com", + "description": "Creator and maintainer of Code-Graph-RAG, an open source tool for AI-powered codebase understanding via knowledge graphs.", + "webpageUrl": { + "url": "https://code-graph-rag.com" + } + }, + "projects": [ + { + "guid": "code-graph-rag", + "name": "Code-Graph-RAG", + "description": "An open source retrieval-augmented generation system that analyzes multi-language codebases using Tree-sitter, builds comprehensive knowledge graphs, and enables natural language querying and editing of codebase structure and relationships. Supports 11 programming languages with a unified graph schema and functions as an MCP server for AI assistant integration.", + "webpageUrl": { + "url": "https://code-graph-rag.com" + }, + "repositoryUrl": { + "url": "https://github.com/vitali87/code-graph-rag" + }, + "licenses": [ + "spdx:MIT" + ], + "tags": [ + "rag", + "knowledge-graph", + "code-analysis", + "tree-sitter", + "mcp-server", + "developer-tools", + "ai", + "graph-database", + "semantic-search", + "python" + ] + } + ], + "funding": { + "channels": [ + { + "guid": "github-sponsors", + "type": "payment-provider", + "address": "https://github.com/sponsors/vitali87", + "description": "GitHub Sponsors" + }, + { + "guid": "buy-me-a-coffee", + "type": "payment-provider", + "address": "https://buymeacoffee.com/vitali87", + "description": "Buy Me a Coffee" + } + ], + "plans": [ + { + "guid": "one-time-any", + "status": "active", + "name": "One-time donation", + "description": "Support Code-Graph-RAG development with a one-time contribution of any amount.", + "amount": 0, + "currency": "USD", + "frequency": "one-time", + "channels": [ + "github-sponsors", + "buy-me-a-coffee" + ] + }, + { + "guid": "monthly-supporter", + "status": "active", + "name": "Monthly supporter", + "description": "Recurring monthly support for ongoing development, security maintenance, and new language support.", + "amount": 0, + "currency": "USD", + "frequency": "monthly", + "channels": [ + "github-sponsors", + "buy-me-a-coffee" + ] + }, + { + "guid": "annual-sponsor", + "status": "active", + "name": "Annual sponsor", + "description": "Yearly sponsorship for sustained development of Code-Graph-RAG as open infrastructure for AI-powered codebase understanding.", + "amount": 25000, + "currency": "USD", + "frequency": "yearly", + "channels": [ + "github-sponsors" + ] + } + ], + "history": [ + { + "year": 2025, + "income": 0, + "expenses": 0, + "taxes": 0, + "currency": "USD", + "description": "Project launched in 2025. No external funding received." + } + ] + } +} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 000000000..fccedaccb --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,121 @@ +site_name: Code-Graph-RAG +site_url: https://docs.code-graph-rag.com +site_description: >- + Graph-based RAG system that parses multi-language codebases with Tree-sitter, + builds knowledge graphs, and enables natural language querying, editing, + and optimization. +site_author: Vitali Avagyan + +repo_name: vitali87/code-graph-rag +repo_url: https://github.com/vitali87/code-graph-rag +edit_uri: edit/main/docs/ + +copyright: Copyright © 2024 Vitali Avagyan + +theme: + name: material + custom_dir: docs/overrides + logo: assets/logo-icon.png + favicon: assets/favicon.png + font: + text: Inter + code: JetBrains Mono + palette: + - scheme: slate + primary: custom + accent: custom + toggle: + icon: material/brightness-4 + name: Switch to light mode + - scheme: default + primary: custom + accent: custom + toggle: + icon: material/brightness-7 + name: Switch to dark mode + features: + - navigation.instant + - navigation.tracking + - navigation.tabs + - navigation.sections + - navigation.expand + - navigation.top + - search.suggest + - search.highlight + - content.code.copy + - content.code.annotate + - content.tabs.link + - toc.follow + icon: + repo: fontawesome/brands/github + +plugins: + - search + - minify: + minify_html: true + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.tabbed: + alternate_style: true + - pymdownx.snippets + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - tables + - attr_list + - md_in_html + - toc: + permalink: true + +nav: + - Home: index.md + - Getting Started: + - Installation: getting-started/installation.md + - Configuration: getting-started/configuration.md + - Quick Start: getting-started/quickstart.md + - User Guide: + - CLI Reference: guide/cli-reference.md + - Interactive Querying: guide/interactive-querying.md + - Code Optimization: guide/code-optimization.md + - Graph Export: guide/graph-export.md + - Real-Time Updates: guide/realtime-updates.md + - MCP Server: guide/mcp-server.md + - Python SDK: + - Overview: sdk/overview.md + - Graph Loader: sdk/graph-loader.md + - Cypher Generator: sdk/cypher-generator.md + - Semantic Search: sdk/semantic-search.md + - Architecture: + - Overview: architecture/overview.md + - Graph Schema: architecture/graph-schema.md + - Language Support: architecture/language-support.md + - Advanced: + - Adding Languages: advanced/adding-languages.md + - Ignore Patterns: advanced/ignore-patterns.md + - Building Binaries: advanced/building-binaries.md + - Troubleshooting: advanced/troubleshooting.md + - Contributing: contributing.md + +# Internal analysis artifacts kept in the repo but not published to the doc site nav. +not_in_nav: | + /reports/ + /TODO.md + +extra_css: + - stylesheets/extra.css + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/vitali87/code-graph-rag + - icon: fontawesome/brands/python + link: https://pypi.org/project/code-graph-rag/ + generator: false diff --git a/optimize/memory_profile.py b/optimize/memory_profile.py new file mode 100644 index 000000000..eaf98c2e3 --- /dev/null +++ b/optimize/memory_profile.py @@ -0,0 +1,665 @@ +"""Memory allocation profiler for code-graph-rag. + +Profiles the main data structures and parsing pipeline using tracemalloc. +Does NOT require external services (Memgraph, Qdrant). +""" + +import gc +import json +import sys +import tracemalloc +from collections import OrderedDict, defaultdict +from pathlib import Path +from textwrap import dedent + +PROJECT_ROOT = Path(__file__).resolve().parent.parent + +sys.path.insert(0, str(PROJECT_ROOT)) + + +def format_bytes(size: int) -> str: + for unit in ("B", "KiB", "MiB", "GiB"): + if abs(size) < 1024: + return f"{size:.1f} {unit}" + size /= 1024 # type: ignore[assignment] + return f"{size:.1f} TiB" + + +def snapshot_diff(label: str, snap1: tracemalloc.Snapshot, snap2: tracemalloc.Snapshot, top_n: int = 15) -> dict: + stats = snap2.compare_to(snap1, "lineno") + total_diff = sum(s.size_diff for s in stats if s.size_diff > 0) + result = { + "label": label, + "total_new_alloc": total_diff, + "total_new_alloc_human": format_bytes(total_diff), + "top_allocators": [], + } + for stat in stats[:top_n]: + if stat.size_diff > 0: + result["top_allocators"].append({ + "file": str(stat.traceback), + "size_diff": stat.size_diff, + "size_diff_human": format_bytes(stat.size_diff), + "count_diff": stat.count_diff, + }) + return result + + +def measure_object_sizes() -> dict: + """Measure sizes of core Python data structures used in the codebase.""" + results = {} + + # 1. FunctionRegistryTrie: dict + trie node overhead + from codebase_rag.graph_updater import FunctionRegistryTrie + + trie = FunctionRegistryTrie() + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + for i in range(10_000): + qn = f"project.module_{i // 100}.class_{i // 10}.func_{i}" + trie.insert(qn, "Function") + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["FunctionRegistryTrie_10k_insert"] = snapshot_diff( + "FunctionRegistryTrie: insert 10k qualified names", snap_before, snap_after + ) + results["FunctionRegistryTrie_10k_insert"]["entries_size"] = sys.getsizeof(trie._entries) + results["FunctionRegistryTrie_10k_insert"]["entry_count"] = len(trie._entries) + + # Measure trie overhead vs flat dict + flat_dict = {} + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + for i in range(10_000): + qn = f"project.module_{i // 100}.class_{i // 10}.func_{i}" + flat_dict[qn] = "Function" + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["flat_dict_10k_baseline"] = snapshot_diff( + "Flat dict: 10k entries baseline", snap_before, snap_after + ) + + # 2. SimpleNameLookup: defaultdict[str, set[str]] + simple_lookup: defaultdict[str, set[str]] = defaultdict(set) + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + for i in range(10_000): + simple_name = f"func_{i % 500}" + qn = f"project.module_{i // 100}.class_{i // 10}.{simple_name}" + simple_lookup[simple_name].add(qn) + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["SimpleNameLookup_10k"] = snapshot_diff( + "SimpleNameLookup: 10k entries, 500 unique names", snap_before, snap_after + ) + + # 3. BoundedASTCache with OrderedDict + from codebase_rag.graph_updater import BoundedASTCache + + cache = BoundedASTCache(max_entries=5000, max_memory_mb=512) + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + # Simulate storing mock entries (can't use real AST nodes without tree-sitter parsing) + for i in range(1000): + key = Path(f"/fake/path/module_{i}.py") + # Use a placeholder tuple since we can't create real AST nodes without parsing + cache.cache[key] = (None, "python") # type: ignore + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["BoundedASTCache_1k_entries"] = snapshot_diff( + "BoundedASTCache (OrderedDict): 1k entries", snap_before, snap_after + ) + + # 4. node_buffer in MemgraphIngestor pattern + node_buffer: list[tuple[str, dict[str, str | int | float | bool | list[str] | None]]] = [] + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + for i in range(5000): + node_buffer.append(( + "Function", + { + "qualified_name": f"project.mod_{i // 50}.cls_{i // 10}.fn_{i}", + "name": f"fn_{i}", + "start_line": i * 10, + "end_line": i * 10 + 15, + "path": f"src/mod_{i // 50}/cls_{i // 10}.py", + }, + )) + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["node_buffer_5k"] = snapshot_diff( + "node_buffer: 5k buffered nodes", snap_before, snap_after + ) + + # 5. _rel_groups in MemgraphIngestor pattern + rel_groups: defaultdict[tuple, list[dict]] = defaultdict(list) + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + for i in range(10_000): + pattern = ("Function", "qualified_name", "CALLS", "Function", "qualified_name") + rel_groups[pattern].append({ + "from_val": f"project.mod.fn_{i}", + "to_val": f"project.mod.fn_{i + 1}", + "props": {}, + }) + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["rel_groups_10k"] = snapshot_diff( + "rel_groups: 10k buffered relationships", snap_before, snap_after + ) + + # 6. import_mapping pattern + import_mapping: dict[str, dict[str, str]] = {} + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + for i in range(2000): + module_qn = f"project.module_{i}" + imports = {} + for j in range(20): + imports[f"import_{j}"] = f"external.package_{j}.symbol_{j}" + import_mapping[module_qn] = imports + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["import_mapping_2k_modules"] = snapshot_diff( + "import_mapping: 2k modules x 20 imports each", snap_before, snap_after + ) + + # 7. class_inheritance pattern + class_inheritance: dict[str, list[str]] = {} + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + for i in range(3000): + class_qn = f"project.module_{i // 30}.Class_{i}" + parents = [f"project.module_{i // 30}.BaseClass_{j}" for j in range(3)] + class_inheritance[class_qn] = parents + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["class_inheritance_3k"] = snapshot_diff( + "class_inheritance: 3k classes x 3 parents", snap_before, snap_after + ) + + return results + + +def measure_tree_sitter_parsing() -> dict: + """Profile memory during tree-sitter parsing of actual Python files.""" + results = {} + + try: + from tree_sitter import Language, Parser + import tree_sitter_python + + py_language = Language(tree_sitter_python.language()) + parser = Parser(py_language) + except Exception as e: + return {"error": f"tree-sitter setup failed: {e}"} + + # Find Python files in the project itself + py_files = sorted(PROJECT_ROOT.glob("codebase_rag/**/*.py")) + if not py_files: + return {"error": "No Python files found"} + + # Profile parsing all project files + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + trees = [] + total_bytes_parsed = 0 + for f in py_files: + try: + source = f.read_bytes() + total_bytes_parsed += len(source) + tree = parser.parse(source) + trees.append((f, tree)) + except Exception: + pass + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["parse_all_project_files"] = snapshot_diff( + f"Parse {len(trees)} Python files ({format_bytes(total_bytes_parsed)} source)", + snap_before, snap_after + ) + results["parse_all_project_files"]["file_count"] = len(trees) + results["parse_all_project_files"]["source_bytes"] = total_bytes_parsed + + # Profile AST node retention + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + root_nodes = [tree.root_node for _, tree in trees] + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["ast_node_retention"] = snapshot_diff( + f"Retaining {len(root_nodes)} AST root nodes", snap_before, snap_after + ) + + # Profile what happens when we walk AST nodes (simulating function extraction) + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + all_function_nodes = [] + for root in root_nodes: + stack = [root] + while stack: + node = stack.pop() + if node.type in ("function_definition", "class_definition"): + all_function_nodes.append(node) + stack.extend(node.children) + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["ast_walk_function_extraction"] = snapshot_diff( + f"Walking ASTs, collected {len(all_function_nodes)} function/class nodes", + snap_before, snap_after, + ) + results["ast_walk_function_extraction"]["function_class_count"] = len(all_function_nodes) + + # Cleanup + del trees, root_nodes, all_function_nodes + + return results + + +def measure_graph_loader_json() -> dict: + """Profile GraphLoader JSON loading and indexing with synthetic data.""" + results = {} + + # Create synthetic graph JSON + nodes = [] + relationships = [] + for i in range(5000): + nodes.append({ + "node_id": i, + "labels": ["Function"], + "properties": { + "qualified_name": f"project.module_{i // 50}.class_{i // 10}.func_{i}", + "name": f"func_{i}", + "start_line": i * 10, + "end_line": i * 10 + 15, + "path": f"src/module_{i // 50}/class_{i // 10}.py", + }, + }) + for i in range(8000): + relationships.append({ + "from_id": i % 5000, + "to_id": (i + 1) % 5000, + "type": "CALLS", + "properties": {}, + }) + + graph_data = { + "nodes": nodes, + "relationships": relationships, + "metadata": { + "total_nodes": len(nodes), + "total_relationships": len(relationships), + "exported_at": "2024-01-01T00:00:00Z", + }, + } + + # Write temp file + tmp_path = PROJECT_ROOT / "optimize" / "_tmp_graph.json" + with open(tmp_path, "w") as f: + json.dump(graph_data, f) + + try: + from codebase_rag.graph_loader import GraphLoader + + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + loader = GraphLoader(str(tmp_path)) + loader.load() + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["graph_loader_5k_nodes_8k_rels"] = snapshot_diff( + "GraphLoader: load 5k nodes + 8k relationships from JSON", + snap_before, snap_after, + ) + + # Measure index building + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + loader._build_property_index("qualified_name") + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["graph_loader_property_index"] = snapshot_diff( + "GraphLoader: build property index on qualified_name", + snap_before, snap_after, + ) + + except Exception as e: + results["error"] = str(e) + finally: + tmp_path.unlink(missing_ok=True) + + return results + + +def measure_embedding_cache() -> dict: + """Profile EmbeddingCache with simulated embeddings.""" + results = {} + + try: + from codebase_rag.embedder import EmbeddingCache + + cache = EmbeddingCache() + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + # Simulate 2k embeddings, each 768-dim float vector + for i in range(2000): + content = f"def function_{i}(x, y): return x + y + {i}" + embedding = [float(j) / 768.0 for j in range(768)] + cache.put(content, embedding) + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["embedding_cache_2k_768dim"] = snapshot_diff( + "EmbeddingCache: 2k entries x 768-dim embeddings", + snap_before, snap_after, + ) + results["embedding_cache_2k_768dim"]["cache_dict_size"] = sys.getsizeof(cache._cache) + results["embedding_cache_2k_768dim"]["entry_count"] = len(cache) + + except Exception as e: + results["error"] = str(e) + + return results + + +def measure_gc_pressure() -> dict: + """Measure GC pressure by tracking collections during workload simulation.""" + results = {} + + gc.collect() + gc_stats_before = gc.get_stats() + gc.disable() + + # Simulate a typical file processing workload creating many temporary objects + temp_objects_created = 0 + for i in range(1000): + # Simulate tree-sitter query results (lists of tuples, dicts) + captures = {"function": [f"node_{j}" for j in range(20)]} + for func_name in captures["function"]: + # Simulate qualified name construction (many string concatenations) + parts = ["project", f"module_{i}", f"class_{i // 10}", func_name] + qn = ".".join(parts) + # Simulate property dict construction + props = { + "qualified_name": qn, + "name": func_name, + "start_line": i * 10, + "end_line": i * 10 + 15, + } + temp_objects_created += 1 + del props + + gc.enable() + gc.collect() + gc_stats_after = gc.get_stats() + + results["gc_pressure_simulation"] = { + "label": "GC pressure during simulated file processing (1k files x 20 funcs)", + "temp_objects_created": temp_objects_created, + "gc_gen0_before": gc_stats_before[0], + "gc_gen0_after": gc_stats_after[0], + "gc_gen1_before": gc_stats_before[1], + "gc_gen1_after": gc_stats_after[1], + "gc_gen2_before": gc_stats_before[2], + "gc_gen2_after": gc_stats_after[2], + } + + return results + + +def measure_string_duplication() -> dict: + """Estimate memory wasted on duplicated strings in typical data structures.""" + results = {} + + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + # Simulate how property dicts repeat the same key strings thousands of times + all_dicts: list[dict] = [] + for i in range(5000): + d = { + "qualified_name": f"project.mod_{i // 50}.cls_{i // 10}.fn_{i}", + "name": f"fn_{i}", + "start_line": i * 10, + "end_line": i * 10 + 15, + "path": f"src/mod_{i // 50}/cls_{i // 10}.py", + } + all_dicts.append(d) + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["property_dict_duplication_5k"] = snapshot_diff( + "5k property dicts with repeated key strings", snap_before, snap_after + ) + + # Compare: same data using tuples (no key duplication) + gc.collect() + tracemalloc.clear_traces() + snap_before = tracemalloc.take_snapshot() + + all_tuples: list[tuple] = [] + for i in range(5000): + t = ( + f"project.mod_{i // 50}.cls_{i // 10}.fn_{i}", + f"fn_{i}", + i * 10, + i * 10 + 15, + f"src/mod_{i // 50}/cls_{i // 10}.py", + ) + all_tuples.append(t) + + gc.collect() + snap_after = tracemalloc.take_snapshot() + results["property_tuple_alternative_5k"] = snapshot_diff( + "5k tuples (no key duplication) as alternative", snap_before, snap_after + ) + + return results + + +def measure_peak_usage_full_pipeline() -> dict: + """Simulate the full pipeline memory envelope. + + This exercises the complete data structure lifecycle: + 1. Build FunctionRegistryTrie + 2. Build import mappings + 3. Build class inheritance + 4. Buffer nodes and relationships + 5. Measure peak + """ + results = {} + + gc.collect() + tracemalloc.clear_traces() + snap_baseline = tracemalloc.take_snapshot() + + # Phase 1: Build FunctionRegistryTrie + from codebase_rag.graph_updater import FunctionRegistryTrie + + simple_name_lookup: defaultdict[str, set[str]] = defaultdict(set) + trie = FunctionRegistryTrie(simple_name_lookup=simple_name_lookup) + + for i in range(15_000): + simple_name = f"func_{i % 1000}" + qn = f"project.module_{i // 150}.class_{i // 15}.{simple_name}" + trie.insert(qn, "Function") + simple_name_lookup[simple_name].add(qn) + + gc.collect() + snap_phase1 = tracemalloc.take_snapshot() + results["phase1_trie_15k"] = snapshot_diff( + "Phase 1: FunctionRegistryTrie + SimpleNameLookup (15k entries)", + snap_baseline, snap_phase1, + ) + + # Phase 2: Import mappings + import_mapping: dict[str, dict[str, str]] = {} + for i in range(1500): + module_qn = f"project.module_{i}" + imports = {f"sym_{j}": f"ext.pkg_{j}.sym_{j}" for j in range(25)} + import_mapping[module_qn] = imports + + gc.collect() + snap_phase2 = tracemalloc.take_snapshot() + results["phase2_imports_1500_modules"] = snapshot_diff( + "Phase 2: import_mapping (1500 modules x 25 imports)", + snap_phase1, snap_phase2, + ) + + # Phase 3: Class inheritance + class_inheritance: dict[str, list[str]] = {} + for i in range(5000): + class_qn = f"project.module_{i // 50}.Class_{i}" + parents = [f"project.module_{i // 50}.Base_{j}" for j in range(2)] + class_inheritance[class_qn] = parents + + gc.collect() + snap_phase3 = tracemalloc.take_snapshot() + results["phase3_inheritance_5k"] = snapshot_diff( + "Phase 3: class_inheritance (5k classes x 2 parents)", + snap_phase2, snap_phase3, + ) + + # Phase 4: Node + relationship buffers + node_buffer: list[tuple[str, dict]] = [] + for i in range(10_000): + node_buffer.append(( + "Function", + { + "qualified_name": f"project.mod_{i // 100}.cls_{i // 10}.fn_{i}", + "name": f"fn_{i}", + "start_line": i * 5, + "end_line": i * 5 + 10, + }, + )) + + rel_groups: defaultdict[tuple, list[dict]] = defaultdict(list) + for i in range(20_000): + pattern = ("Function", "qualified_name", "CALLS", "Function", "qualified_name") + rel_groups[pattern].append({ + "from_val": f"project.mod.fn_{i}", + "to_val": f"project.mod.fn_{i + 1}", + "props": {}, + }) + + gc.collect() + snap_phase4 = tracemalloc.take_snapshot() + results["phase4_buffers_10k_nodes_20k_rels"] = snapshot_diff( + "Phase 4: node_buffer (10k) + rel_groups (20k)", + snap_phase3, snap_phase4, + ) + + # Total from baseline + results["total_pipeline_memory"] = snapshot_diff( + "TOTAL: Full pipeline memory (all phases combined)", + snap_baseline, snap_phase4, + ) + + # Peak usage + current, peak = tracemalloc.get_traced_memory() + results["peak_traced_memory"] = { + "current": current, + "current_human": format_bytes(current), + "peak": peak, + "peak_human": format_bytes(peak), + } + + return results + + +def main() -> None: + tracemalloc.start(25) # 25 frames for stack traces + + all_results: dict[str, dict] = {} + + print("=" * 70) + print("MEMORY ALLOCATION PROFILING REPORT") + print("=" * 70) + + print("\n[1/7] Measuring core data structure sizes...") + all_results["data_structures"] = measure_object_sizes() + + print("[2/7] Profiling tree-sitter parsing...") + all_results["tree_sitter"] = measure_tree_sitter_parsing() + + print("[3/7] Profiling GraphLoader JSON loading...") + all_results["graph_loader"] = measure_graph_loader_json() + + print("[4/7] Profiling EmbeddingCache...") + all_results["embedding_cache"] = measure_embedding_cache() + + print("[5/7] Measuring GC pressure...") + all_results["gc_pressure"] = measure_gc_pressure() + + print("[6/7] Measuring string duplication overhead...") + all_results["string_duplication"] = measure_string_duplication() + + print("[7/7] Measuring peak usage in full pipeline simulation...") + all_results["full_pipeline"] = measure_peak_usage_full_pipeline() + + tracemalloc.stop() + + # Print summary report + print("\n" + "=" * 70) + print("RESULTS SUMMARY") + print("=" * 70) + + for section_name, section_data in all_results.items(): + print(f"\n--- {section_name.upper()} ---") + for key, value in section_data.items(): + if isinstance(value, dict) and "label" in value: + total = value.get("total_new_alloc_human", value.get("peak_human", "N/A")) + print(f" {value['label']}") + print(f" Total new allocation: {total}") + if "top_allocators" in value: + for i, alloc in enumerate(value["top_allocators"][:5]): + print(f" [{i+1}] {alloc['size_diff_human']} ({alloc['count_diff']} objects) - {alloc['file'][:80]}") + elif isinstance(value, dict) and "current_human" in value: + print(f" Current traced: {value['current_human']}") + print(f" Peak traced: {value['peak_human']}") + elif isinstance(value, dict) and "temp_objects_created" in value: + print(f" {value['label']}") + print(f" Temp objects created: {value['temp_objects_created']}") + for gen in range(3): + before = value[f"gc_gen{gen}_before"] + after = value[f"gc_gen{gen}_after"] + print(f" Gen{gen}: collections {before['collections']} -> {after['collections']}, collected {before['collected']} -> {after['collected']}") + + # Save detailed JSON + output_path = PROJECT_ROOT / "optimize" / "memory_profile_results.json" + with open(output_path, "w") as f: + json.dump(all_results, f, indent=2, default=str) + print(f"\nDetailed results saved to: {output_path}") + + +if __name__ == "__main__": + main() diff --git a/optimize/memory_profile_results.json b/optimize/memory_profile_results.json new file mode 100644 index 000000000..f8cb642db --- /dev/null +++ b/optimize/memory_profile_results.json @@ -0,0 +1,1482 @@ +{ + "data_structures": { + "FunctionRegistryTrie_10k_insert": { + "label": "FunctionRegistryTrie: insert 10k qualified names", + "total_new_alloc": 3681520, + "total_new_alloc_human": "3.5 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:56", + "size_diff": 1079880, + "size_diff_human": "1.0 MiB", + "count_diff": 8999 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:51", + "size_diff": 1062648, + "size_diff_human": "1.0 MiB", + "count_diff": 13203 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:61", + "size_diff": 776790, + "size_diff_human": "758.6 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:46", + "size_diff": 553818, + "size_diff_human": "540.8 KiB", + "count_diff": 11101 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:44", + "size_diff": 207672, + "size_diff_human": "202.8 KiB", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 312, + "size_diff_human": "312.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 312, + "size_diff_human": "312.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:60", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ], + "entries_size": 207616, + "entry_count": 10000 + }, + "flat_dict_10k_baseline": { + "label": "Flat dict: 10k entries baseline", + "total_new_alloc": 985022, + "total_new_alloc_human": "961.9 KiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:78", + "size_diff": 776790, + "size_diff_human": "758.6 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:79", + "size_diff": 207552, + "size_diff_human": "202.7 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 296, + "size_diff_human": "296.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 296, + "size_diff_human": "296.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:77", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "SimpleNameLookup_10k": { + "label": "SimpleNameLookup: 10k entries, 500 unique names", + "total_new_alloc": 1935779, + "total_new_alloc_human": "1.8 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:94", + "size_diff": 1144992, + "size_diff_human": "1.1 MiB", + "count_diff": 1001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:93", + "size_diff": 765700, + "size_diff_human": "747.8 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:92", + "size_diff": 24439, + "size_diff_human": "23.9 KiB", + "count_diff": 501 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 280, + "size_diff_human": "280.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 280, + "size_diff_human": "280.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:91", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "BoundedASTCache_1k_entries": { + "label": "BoundedASTCache (OrderedDict): 1k entries", + "total_new_alloc": 585087, + "total_new_alloc_human": "571.4 KiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:404", + "size_diff": 141935, + "size_diff_human": "138.6 KiB", + "count_diff": 3001 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:1167", + "size_diff": 104000, + "size_diff_human": "101.6 KiB", + "count_diff": 1000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:113", + "size_diff": 85272, + "size_diff_human": "83.3 KiB", + "count_diff": 1002 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:111", + "size_diff": 64890, + "size_diff_human": "63.4 KiB", + "count_diff": 1000 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:432", + "size_diff": 64890, + "size_diff_human": "63.4 KiB", + "count_diff": 1000 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:359", + "size_diff": 55944, + "size_diff_human": "54.6 KiB", + "count_diff": 999 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:528", + "size_diff": 35540, + "size_diff_human": "34.7 KiB", + "count_diff": 1000 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:377", + "size_diff": 32000, + "size_diff_human": "31.2 KiB", + "count_diff": 1000 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 264, + "size_diff_human": "264.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 264, + "size_diff_human": "264.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:110", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "node_buffer_5k": { + "label": "node_buffer: 5k buffered nodes", + "total_new_alloc": 2460116, + "total_new_alloc_human": "2.3 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:128", + "size_diff": 920000, + "size_diff_human": "898.4 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:129", + "size_diff": 352290, + "size_diff_human": "344.0 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:126", + "size_diff": 321600, + "size_diff_human": "314.1 KiB", + "count_diff": 4997 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:133", + "size_diff": 308400, + "size_diff_human": "301.2 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:130", + "size_diff": 238890, + "size_diff_human": "233.3 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:132", + "size_diff": 159200, + "size_diff_human": "155.5 KiB", + "count_diff": 4975 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:131", + "size_diff": 159168, + "size_diff_human": "155.4 KiB", + "count_diff": 4974 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 240, + "size_diff_human": "240.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 240, + "size_diff_human": "240.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:125", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "rel_groups_10k": { + "label": "rel_groups: 10k buffered relationships", + "total_new_alloc": 3763656, + "total_new_alloc_human": "3.6 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:149", + "size_diff": 1925336, + "size_diff_human": "1.8 MiB", + "count_diff": 20003 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:152", + "size_diff": 640000, + "size_diff_human": "625.0 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:151", + "size_diff": 598894, + "size_diff_human": "584.9 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:150", + "size_diff": 598890, + "size_diff_human": "584.9 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 224, + "size_diff_human": "224.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 224, + "size_diff_human": "224.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:147", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "import_mapping_2k_modules": { + "label": "import_mapping: 2k modules x 20 imports each", + "total_new_alloc": 5839298, + "total_new_alloc_human": "5.6 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:169", + "size_diff": 5540000, + "size_diff_human": "5.3 MiB", + "count_diff": 82000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:167", + "size_diff": 128000, + "size_diff_human": "125.0 KiB", + "count_diff": 2000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:166", + "size_diff": 118890, + "size_diff_human": "116.1 KiB", + "count_diff": 2000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:170", + "size_diff": 51904, + "size_diff_human": "50.7 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 208, + "size_diff_human": "208.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 208, + "size_diff_human": "208.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:165", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "class_inheritance_3k": { + "label": "class_inheritance: 3k classes x 3 parents", + "total_new_alloc": 1202898, + "total_new_alloc_human": "1.1 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:184", + "size_diff": 893044, + "size_diff_human": "872.1 KiB", + "count_diff": 14999 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:183", + "size_diff": 205590, + "size_diff_human": "200.8 KiB", + "count_diff": 3000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:185", + "size_diff": 103792, + "size_diff_human": "101.4 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 192, + "size_diff_human": "192.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 192, + "size_diff_human": "192.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:182", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + } + }, + "tree_sitter": { + "parse_all_project_files": { + "label": "Parse 343 Python files (5.4 MiB source)", + "total_new_alloc": 88243514, + "total_new_alloc_human": "84.2 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:224", + "size_diff": 82541776, + "size_diff_human": "78.7 MiB", + "count_diff": 903039 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:1020", + "size_diff": 5679234, + "size_diff_human": "5.4 MiB", + "count_diff": 337 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:225", + "size_diff": 22024, + "size_diff_human": "21.5 KiB", + "count_diff": 344 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 168, + "size_diff_human": "168.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 168, + "size_diff_human": "168.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:218", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:223", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ], + "file_count": 343, + "source_bytes": 5668113 + }, + "ast_node_retention": { + "label": "Retaining 343 AST root nodes", + "total_new_alloc": 25128, + "total_new_alloc_human": "24.5 KiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:243", + "size_diff": 24768, + "size_diff_human": "24.2 KiB", + "count_diff": 344 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 152, + "size_diff_human": "152.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 152, + "size_diff_human": "152.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + } + ] + }, + "ast_walk_function_extraction": { + "label": "Walking ASTs, collected 5578 function/class nodes", + "total_new_alloc": 91566344, + "total_new_alloc_human": "87.3 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:263", + "size_diff": 91518856, + "size_diff_human": "87.3 MiB", + "count_diff": 1673834 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:262", + "size_diff": 47104, + "size_diff_human": "46.0 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 136, + "size_diff_human": "136.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 136, + "size_diff_human": "136.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:258", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + } + ], + "function_class_count": 5578 + } + }, + "graph_loader": { + "graph_loader_5k_nodes_8k_rels": { + "label": "GraphLoader: load 5k nodes + 8k relationships from JSON", + "total_new_alloc": 9476802, + "total_new_alloc_human": "9.0 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/json/decoder.py:353", + "size_diff": 6787632, + "size_diff_human": "6.5 MiB", + "count_diff": 111693 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:74", + "size_diff": 770760, + "size_diff_human": "752.7 KiB", + "count_diff": 16000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:83", + "size_diff": 587480, + "size_diff_human": "573.7 KiB", + "count_diff": 10001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:82", + "size_diff": 587480, + "size_diff_human": "573.7 KiB", + "count_diff": 10001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:61", + "size_diff": 443080, + "size_diff_human": "432.7 KiB", + "count_diff": 10001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:68", + "size_diff": 147480, + "size_diff_human": "144.0 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:80", + "size_diff": 67168, + "size_diff_human": "65.6 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:70", + "size_diff": 41880, + "size_diff_human": "40.9 KiB", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:66", + "size_diff": 41824, + "size_diff_human": "40.8 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/.venv/lib/python3.12/site-packages/loguru/_logger.py:2003", + "size_diff": 200, + "size_diff_human": "200.0 B", + "count_diff": 4 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 120, + "size_diff_human": "120.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 120, + "size_diff_human": "120.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/pathlib.py:404", + "size_diff": 120, + "size_diff_human": "120.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:52", + "size_diff": 120, + "size_diff_human": "120.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/.venv/lib/python3.12/site-packages/loguru/_handler.py:120", + "size_diff": 120, + "size_diff_human": "120.0 B", + "count_diff": 1 + } + ] + }, + "graph_loader_property_index": { + "label": "GraphLoader: build property index on qualified_name", + "total_new_alloc": 544224, + "total_new_alloc_human": "531.5 KiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:99", + "size_diff": 440120, + "size_diff_human": "429.8 KiB", + "count_diff": 10001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_loader.py:100", + "size_diff": 103856, + "size_diff_human": "101.4 KiB", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 96, + "size_diff_human": "96.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 96, + "size_diff_human": "96.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + } + ] + } + }, + "embedding_cache": { + "embedding_cache_2k_768dim": { + "label": "EmbeddingCache: 2k entries x 768-dim embeddings", + "total_new_alloc": 50998237, + "total_new_alloc_human": "48.6 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:375", + "size_diff": 50736000, + "size_diff_human": "48.4 MiB", + "count_diff": 1540000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/embedder.py:26", + "size_diff": 210000, + "size_diff_human": "205.1 KiB", + "count_diff": 2000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/embedder.py:32", + "size_diff": 51904, + "size_diff_human": "50.7 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:374", + "size_diff": 85, + "size_diff_human": "85.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:373", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ], + "cache_dict_size": 51968, + "entry_count": 2000 + } + }, + "gc_pressure": { + "gc_pressure_simulation": { + "label": "GC pressure during simulated file processing (1k files x 20 funcs)", + "temp_objects_created": 20000, + "gc_gen0_before": { + "collections": 1785, + "collected": 8016, + "uncollectable": 0 + }, + "gc_gen0_after": { + "collections": 1785, + "collected": 8016, + "uncollectable": 0 + }, + "gc_gen1_before": { + "collections": 155, + "collected": 1262, + "uncollectable": 0 + }, + "gc_gen1_after": { + "collections": 155, + "collected": 1262, + "uncollectable": 0 + }, + "gc_gen2_before": { + "collections": 40, + "collected": 279, + "uncollectable": 0 + }, + "gc_gen2_after": { + "collections": 41, + "collected": 279, + "uncollectable": 0 + } + } + }, + "string_duplication": { + "property_dict_duplication_5k": { + "label": "5k property dicts with repeated key strings", + "total_new_alloc": 2180068, + "total_new_alloc_human": "2.1 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:449", + "size_diff": 920000, + "size_diff_human": "898.4 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:450", + "size_diff": 352290, + "size_diff_human": "344.0 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:454", + "size_diff": 308400, + "size_diff_human": "301.2 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:451", + "size_diff": 238890, + "size_diff_human": "233.3 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:453", + "size_diff": 159200, + "size_diff_human": "155.5 KiB", + "count_diff": 4975 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:452", + "size_diff": 159168, + "size_diff_human": "155.4 KiB", + "count_diff": 4974 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:456", + "size_diff": 41824, + "size_diff_human": "40.8 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 72, + "size_diff_human": "72.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:447", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:448", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "property_tuple_alternative_5k": { + "label": "5k tuples (no key duplication) as alternative", + "total_new_alloc": 1660012, + "total_new_alloc_human": "1.6 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:471", + "size_diff": 400000, + "size_diff_human": "390.6 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:472", + "size_diff": 352290, + "size_diff_human": "344.0 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:476", + "size_diff": 308400, + "size_diff_human": "301.2 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:473", + "size_diff": 238890, + "size_diff_human": "233.3 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:475", + "size_diff": 159200, + "size_diff_human": "155.5 KiB", + "count_diff": 4975 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:474", + "size_diff": 159168, + "size_diff_human": "155.4 KiB", + "count_diff": 4974 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:478", + "size_diff": 41824, + "size_diff_human": "40.8 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 72, + "size_diff_human": "72.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:470", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + } + }, + "full_pipeline": { + "phase1_trie_15k": { + "label": "Phase 1: FunctionRegistryTrie + SimpleNameLookup (15k entries)", + "total_new_alloc": 6411617, + "total_new_alloc_human": "6.1 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:56", + "size_diff": 1679760, + "size_diff_human": "1.6 MiB", + "count_diff": 13998 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:51", + "size_diff": 1574648, + "size_diff_human": "1.5 MiB", + "count_diff": 18203 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:513", + "size_diff": 1150200, + "size_diff_human": "1.1 MiB", + "count_diff": 15000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:46", + "size_diff": 788278, + "size_diff_human": "769.8 KiB", + "count_diff": 16101 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:515", + "size_diff": 754088, + "size_diff_human": "736.4 KiB", + "count_diff": 2002 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:44", + "size_diff": 415088, + "size_diff_human": "405.4 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:512", + "size_diff": 48939, + "size_diff_human": "47.8 KiB", + "count_diff": 1001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:509", + "size_diff": 176, + "size_diff_human": "176.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 72, + "size_diff_human": "72.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:508", + "size_diff": 72, + "size_diff_human": "72.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:40", + "size_diff": 64, + "size_diff_human": "64.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:39", + "size_diff": 64, + "size_diff_human": "64.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:511", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "phase2_imports_1500_modules": { + "label": "Phase 2: import_mapping (1500 modules x 25 imports)", + "total_new_alloc": 5287898, + "total_new_alloc_human": "5.0 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:528", + "size_diff": 5140500, + "size_diff_human": "4.9 MiB", + "count_diff": 78000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:527", + "size_diff": 88890, + "size_diff_human": "86.8 KiB", + "count_diff": 1500 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:529", + "size_diff": 51904, + "size_diff_human": "50.7 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:39", + "size_diff": 2888, + "size_diff_human": "2.8 KiB", + "count_diff": 31 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:173", + "size_diff": 1872, + "size_diff_human": "1.8 KiB", + "count_diff": 15 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:23", + "size_diff": 768, + "size_diff_human": "768.0 B", + "count_diff": 16 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:503", + "size_diff": 192, + "size_diff_human": "192.0 B", + "count_diff": 6 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:502", + "size_diff": 192, + "size_diff_human": "192.0 B", + "count_diff": 6 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:31", + "size_diff": 184, + "size_diff_human": "184.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:519", + "size_diff": 120, + "size_diff_human": "120.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 72, + "size_diff_human": "72.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:525", + "size_diff": 64, + "size_diff_human": "64.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:35", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + } + ] + }, + "phase3_inheritance_5k": { + "label": "Phase 3: class_inheritance (5k classes x 2 parents)", + "total_new_alloc": 1542592, + "total_new_alloc_human": "1.5 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:542", + "size_diff": 1089000, + "size_diff_human": "1.0 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:541", + "size_diff": 343390, + "size_diff_human": "335.3 KiB", + "count_diff": 5000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:543", + "size_diff": 103792, + "size_diff_human": "101.4 KiB", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:39", + "size_diff": 2888, + "size_diff_human": "2.8 KiB", + "count_diff": 31 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:173", + "size_diff": 1961, + "size_diff_human": "1.9 KiB", + "count_diff": 15 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:23", + "size_diff": 765, + "size_diff_human": "765.0 B", + "count_diff": 16 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:31", + "size_diff": 184, + "size_diff_human": "184.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:502", + "size_diff": 160, + "size_diff_human": "160.0 B", + "count_diff": 5 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:560", + "size_diff": 80, + "size_diff_human": "80.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:423", + "size_diff": 72, + "size_diff_human": "72.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:503", + "size_diff": 64, + "size_diff_human": "64.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:539", + "size_diff": 64, + "size_diff_human": "64.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:558", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:35", + "size_diff": 56, + "size_diff_human": "56.0 B", + "count_diff": 1 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:540", + "size_diff": 32, + "size_diff_human": "32.0 B", + "count_diff": 1 + } + ] + }, + "phase4_buffers_10k_nodes_20k_rels": { + "label": "Phase 4: node_buffer (10k) + rel_groups (20k)", + "total_new_alloc": 11864970, + "total_new_alloc_human": "11.3 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:568", + "size_diff": 3853176, + "size_diff_human": "3.7 MiB", + "count_diff": 40003 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:557", + "size_diff": 1840000, + "size_diff_human": "1.8 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:571", + "size_diff": 1280000, + "size_diff_human": "1.2 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:570", + "size_diff": 1208894, + "size_diff_human": "1.2 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:569", + "size_diff": 1208890, + "size_diff_human": "1.2 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:558", + "size_diff": 706790, + "size_diff_human": "690.2 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:555", + "size_diff": 645120, + "size_diff_human": "630.0 KiB", + "count_diff": 10001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:559", + "size_diff": 478890, + "size_diff_human": "467.7 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:561", + "size_diff": 318400, + "size_diff_human": "310.9 KiB", + "count_diff": 9950 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:560", + "size_diff": 318336, + "size_diff_human": "310.9 KiB", + "count_diff": 9948 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:39", + "size_diff": 2888, + "size_diff_human": "2.8 KiB", + "count_diff": 31 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:173", + "size_diff": 1961, + "size_diff_human": "1.9 KiB", + "count_diff": 15 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:23", + "size_diff": 765, + "size_diff_human": "765.0 B", + "count_diff": 16 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:31", + "size_diff": 184, + "size_diff_human": "184.0 B", + "count_diff": 2 + }, + { + "file": "/Users/vitaliavagyan/.local/share/uv/python/cpython-3.12.2-macos-aarch64-none/lib/python3.12/tracemalloc.py:126", + "size_diff": 96, + "size_diff_human": "96.0 B", + "count_diff": 3 + } + ] + }, + "total_pipeline_memory": { + "label": "TOTAL: Full pipeline memory (all phases combined)", + "total_new_alloc": 25106981, + "total_new_alloc_human": "23.9 MiB", + "top_allocators": [ + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:528", + "size_diff": 5140500, + "size_diff_human": "4.9 MiB", + "count_diff": 78000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:568", + "size_diff": 3853176, + "size_diff_human": "3.7 MiB", + "count_diff": 40003 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:557", + "size_diff": 1840000, + "size_diff_human": "1.8 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:56", + "size_diff": 1679760, + "size_diff_human": "1.6 MiB", + "count_diff": 13998 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:51", + "size_diff": 1574648, + "size_diff_human": "1.5 MiB", + "count_diff": 18203 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:571", + "size_diff": 1280000, + "size_diff_human": "1.2 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:570", + "size_diff": 1208894, + "size_diff_human": "1.2 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:569", + "size_diff": 1208890, + "size_diff_human": "1.2 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:513", + "size_diff": 1150200, + "size_diff_human": "1.1 MiB", + "count_diff": 15000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:542", + "size_diff": 1089000, + "size_diff_human": "1.0 MiB", + "count_diff": 20000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/codebase_rag/graph_updater.py:46", + "size_diff": 788278, + "size_diff_human": "769.8 KiB", + "count_diff": 16101 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:515", + "size_diff": 754088, + "size_diff_human": "736.4 KiB", + "count_diff": 2002 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:558", + "size_diff": 706790, + "size_diff_human": "690.2 KiB", + "count_diff": 10000 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:555", + "size_diff": 645120, + "size_diff_human": "630.0 KiB", + "count_diff": 10001 + }, + { + "file": "/Users/vitaliavagyan/Documents/code-graph-rag/optimize/memory_profile.py:559", + "size_diff": 478890, + "size_diff_human": "467.7 KiB", + "count_diff": 10000 + } + ] + }, + "peak_traced_memory": { + "current": 25128953, + "current_human": "24.0 MiB", + "peak": 25135561, + "peak_human": "24.0 MiB" + } + } +} diff --git a/optimize/profile_io.py b/optimize/profile_io.py new file mode 100644 index 000000000..c71d98ecd --- /dev/null +++ b/optimize/profile_io.py @@ -0,0 +1,431 @@ +import hashlib +import json +import statistics +import sys +import time +from collections import defaultdict +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from codebase_rag import constants as cs +from codebase_rag.graph_updater import _hash_file, _load_hash_cache, _save_hash_cache +from codebase_rag.parser_loader import load_parsers +from codebase_rag.parsers.utils import safe_decode_with_fallback +from codebase_rag.services.protobuf_service import ProtobufFileIngestor +from codebase_rag.utils.path_utils import should_skip_path + + +REPO_PATH = Path(__file__).resolve().parent.parent +RUNS = 5 + + +def benchmark(func, *args, runs=RUNS, label=""): + times = [] + result = None + for _ in range(runs): + start = time.perf_counter() + result = func(*args) + elapsed = time.perf_counter() - start + times.append(elapsed) + avg = statistics.mean(times) + std = statistics.stdev(times) if len(times) > 1 else 0.0 + med = statistics.median(times) + return { + "label": label, + "avg_ms": avg * 1000, + "median_ms": med * 1000, + "std_ms": std * 1000, + "min_ms": min(times) * 1000, + "max_ms": max(times) * 1000, + "runs": runs, + "result": result, + } + + +def collect_py_files(): + files = [] + for f in REPO_PATH.rglob("*.py"): + if not should_skip_path(f, REPO_PATH): + files.append(f) + return files + + +def profile_file_hashing(files): + print("\n=== FILE HASHING (SHA-256) ===") + results = [] + total_bytes = 0 + for f in files: + total_bytes += f.stat().st_size + + def hash_all(): + for f in files: + _hash_file(f) + + r = benchmark(hash_all, label=f"hash {len(files)} files ({total_bytes/1024:.0f} KB)") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, median={r['median_ms']:.2f}ms, std={r['std_ms']:.2f}ms") + + per_file_ms = r['avg_ms'] / len(files) if files else 0 + print(f" Per file average: {per_file_ms:.3f}ms") + print(f" Throughput: {total_bytes / (r['avg_ms']/1000) / 1024 / 1024:.1f} MB/s") + + single_sizes = [(f, f.stat().st_size) for f in files] + single_sizes.sort(key=lambda x: x[1], reverse=True) + for f, sz in single_sizes[:5]: + r2 = benchmark(_hash_file, f, runs=10, label=f"hash {f.relative_to(REPO_PATH)} ({sz}B)") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.3f}ms") + + return results + + +def profile_file_reading(files): + print("\n=== FILE READING (read_bytes + parse) ===") + results = [] + + def read_all_bytes(): + for f in files: + f.read_bytes() + + total_bytes = sum(f.stat().st_size for f in files) + r = benchmark(read_all_bytes, label=f"read_bytes {len(files)} files ({total_bytes/1024:.0f} KB)") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, median={r['median_ms']:.2f}ms") + print(f" Throughput: {total_bytes / (r['avg_ms']/1000) / 1024 / 1024:.1f} MB/s") + + def read_all_text(): + for f in files: + f.read_text(encoding="utf-8") + + r2 = benchmark(read_all_text, label=f"read_text {len(files)} files") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.2f}ms, median={r2['median_ms']:.2f}ms") + + return results + + +def profile_tree_sitter_parsing(files): + print("\n=== TREE-SITTER PARSING ===") + results = [] + parsers, queries = load_parsers() + py_parser = parsers.get(cs.SupportedLanguage.PYTHON) + if not py_parser: + print(" Python parser not available, skipping") + return results + + py_files = [f for f in files if f.suffix == ".py"] + file_bytes = [(f, f.read_bytes()) for f in py_files] + + def parse_all(): + for f, src in file_bytes: + py_parser.parse(src) + + r = benchmark(parse_all, label=f"parse {len(py_files)} Python files") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, median={r['median_ms']:.2f}ms") + per_file_ms = r['avg_ms'] / len(py_files) if py_files else 0 + print(f" Per file average: {per_file_ms:.3f}ms") + + file_bytes_sorted = sorted(file_bytes, key=lambda x: len(x[1]), reverse=True) + for f, src in file_bytes_sorted[:5]: + r2 = benchmark(py_parser.parse, src, runs=10, + label=f"parse {f.relative_to(REPO_PATH)} ({len(src)}B)") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.3f}ms") + + return results + + +def profile_json_serialization(): + print("\n=== JSON SERIALIZATION ===") + results = [] + + small = {"key": "value", "num": 42, "arr": [1, 2, 3]} + r = benchmark(json.dumps, small, runs=1000, label="json.dumps small dict") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.4f}ms") + + medium_nodes = [ + {"node_id": i, "labels": ["Function"], "properties": {"name": f"func_{i}", "path": f"src/mod_{i//10}.py", "start_line": i*10, "end_line": i*10+5}} + for i in range(1000) + ] + medium_rels = [ + {"from_id": i, "to_id": (i+1) % 1000, "type": "CALLS", "properties": {}} + for i in range(2000) + ] + medium = {"nodes": medium_nodes, "relationships": medium_rels, "metadata": {"total_nodes": 1000, "total_relationships": 2000}} + + r2 = benchmark(json.dumps, medium, runs=5, label=f"json.dumps graph (1K nodes, 2K rels, {len(json.dumps(medium))/1024:.0f}KB)") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.2f}ms") + + json_str = json.dumps(medium) + r3 = benchmark(json.loads, json_str, runs=5, label=f"json.loads graph ({len(json_str)/1024:.0f}KB)") + results.append(r3) + print(f" {r3['label']}: avg={r3['avg_ms']:.2f}ms") + + large_nodes = medium_nodes * 10 + large_rels = medium_rels * 10 + large = {"nodes": large_nodes, "relationships": large_rels, "metadata": {"total_nodes": 10000, "total_relationships": 20000}} + large_json = json.dumps(large) + r4 = benchmark(json.dumps, large, runs=3, label=f"json.dumps large graph (10K nodes, 20K rels, {len(large_json)/1024:.0f}KB)") + results.append(r4) + print(f" {r4['label']}: avg={r4['avg_ms']:.2f}ms") + + r5 = benchmark(json.loads, large_json, runs=3, label=f"json.loads large graph ({len(large_json)/1024:.0f}KB)") + results.append(r5) + print(f" {r5['label']}: avg={r5['avg_ms']:.2f}ms") + + with_indent = lambda d: json.dumps(d, indent=2, ensure_ascii=False) + r6 = benchmark(with_indent, large, runs=3, label=f"json.dumps large graph (indent=2)") + results.append(r6) + print(f" {r6['label']}: avg={r6['avg_ms']:.2f}ms") + + return results + + +def profile_protobuf_serialization(): + print("\n=== PROTOBUF SERIALIZATION ===") + results = [] + try: + import codec.schema_pb2 as pb + except ImportError: + print(" protobuf schema not available, skipping") + return results + + import tempfile, shutil + tmp_dir = Path(tempfile.mkdtemp()) + try: + ingestor = ProtobufFileIngestor(output_path=str(tmp_dir)) + + for i in range(100): + ingestor.ensure_node_batch("Function", { + "qualified_name": f"project.mod.func_{i}", + "name": f"func_{i}", + "path": f"src/mod.py", + "start_line": i * 10, + "end_line": i * 10 + 5, + }) + for i in range(200): + ingestor.ensure_relationship_batch( + ("Function", "qualified_name", f"project.mod.func_{i % 100}"), + "CALLS", + ("Function", "qualified_name", f"project.mod.func_{(i+1) % 100}"), + ) + + def flush_protobuf(): + ingestor.flush_all() + + r = benchmark(flush_protobuf, runs=5, label="protobuf flush (100 nodes, 200 rels)") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms") + + index_file = tmp_dir / "graph_code_index.pb" + if index_file.exists(): + size = index_file.stat().st_size + print(f" Output size: {size} bytes") + + def read_protobuf(): + idx = pb.GraphCodeIndex() + idx.ParseFromString(index_file.read_bytes()) + return idx + + r2 = benchmark(read_protobuf, runs=10, label=f"protobuf parse ({size}B)") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.3f}ms") + + for node_path in tmp_dir.iterdir(): + if node_path.suffix == ".pb": + sz = node_path.stat().st_size + print(f" Protobuf file: {node_path.name} ({sz} bytes)") + + finally: + shutil.rmtree(tmp_dir) + + return results + + +def profile_hash_cache_io(): + print("\n=== HASH CACHE I/O ===") + results = [] + + import tempfile + tmp = Path(tempfile.mkdtemp()) + try: + cache_data = {f"path/to/file_{i}.py": hashlib.sha256(f"content_{i}".encode()).hexdigest() for i in range(1000)} + cache_path = tmp / ".file_hashes.json" + + r = benchmark(_save_hash_cache, cache_path, cache_data, runs=5, label=f"save hash cache ({len(cache_data)} entries)") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, size={cache_path.stat().st_size/1024:.1f}KB") + + r2 = benchmark(_load_hash_cache, cache_path, runs=5, label=f"load hash cache ({len(cache_data)} entries)") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.2f}ms") + finally: + import shutil + shutil.rmtree(tmp) + + return results + + +def profile_file_traversal(): + print("\n=== FILESYSTEM TRAVERSAL ===") + results = [] + + def rglob_all(): + return list(REPO_PATH.rglob("*")) + + r = benchmark(rglob_all, runs=5, label="rglob('*') entire repo") + results.append(r) + all_paths = r['result'] + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, found {len(all_paths)} paths") + + def rglob_with_filter(): + eligible = [] + for f in REPO_PATH.rglob("*"): + if f.is_file() and not should_skip_path(f, REPO_PATH): + eligible.append(f) + return eligible + + r2 = benchmark(rglob_with_filter, runs=5, label="rglob + should_skip_path filter") + results.append(r2) + eligible = r2['result'] + print(f" {r2['label']}: avg={r2['avg_ms']:.2f}ms, eligible {len(eligible)} files") + + overhead_ms = r2['avg_ms'] - r['avg_ms'] + print(f" Filter overhead: {overhead_ms:.2f}ms") + + return results + + +def profile_source_extraction(): + print("\n=== SOURCE EXTRACTION ===") + results = [] + from codebase_rag.utils.source_extraction import extract_source_lines + + py_files = [f for f in REPO_PATH.rglob("*.py") + if not should_skip_path(f, REPO_PATH) and f.stat().st_size > 100] + if not py_files: + print(" No Python files found") + return results + + target = py_files[0] + line_count = len(target.read_text().splitlines()) + + def extract_50_lines(): + return extract_source_lines(target, 1, min(50, line_count)) + + r = benchmark(extract_50_lines, runs=20, label=f"extract 50 lines from {target.relative_to(REPO_PATH)}") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.3f}ms") + + def extract_all_files_10_lines(): + for f in py_files[:50]: + extract_source_lines(f, 1, 10) + + r2 = benchmark(extract_all_files_10_lines, runs=5, label=f"extract 10 lines from {min(50, len(py_files))} files") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.2f}ms") + + return results + + +def profile_embedding_cache_io(): + print("\n=== EMBEDDING CACHE I/O ===") + results = [] + import tempfile + + from codebase_rag.embedder import EmbeddingCache + + tmp = Path(tempfile.mkdtemp()) + try: + cache = EmbeddingCache(path=tmp / "embedding_cache.json") + for i in range(500): + cache.put(f"def func_{i}(): pass", [float(j) / 768 for j in range(768)]) + + def save_cache(): + cache.save() + + r = benchmark(save_cache, runs=5, label=f"save embedding cache ({len(cache)} entries, 768-dim)") + results.append(r) + size = (tmp / "embedding_cache.json").stat().st_size + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, size={size/1024/1024:.2f}MB") + + def load_cache(): + new_cache = EmbeddingCache(path=tmp / "embedding_cache.json") + new_cache.load() + return new_cache + + r2 = benchmark(load_cache, runs=5, label=f"load embedding cache ({size/1024/1024:.2f}MB)") + results.append(r2) + print(f" {r2['label']}: avg={r2['avg_ms']:.2f}ms") + print(f" Throughput: {size / (r2['avg_ms']/1000) / 1024 / 1024:.1f} MB/s") + finally: + import shutil + shutil.rmtree(tmp) + + return results + + +def profile_directory_structure(): + print("\n=== DIRECTORY STRUCTURE IDENTIFICATION ===") + results = [] + from codebase_rag.language_spec import LANGUAGE_SPECS + + package_indicators = set() + for spec in LANGUAGE_SPECS.values(): + package_indicators.update(spec.package_indicators) + + def identify_packages(): + dirs = set() + for p in REPO_PATH.rglob("*"): + if p.is_dir() and not should_skip_path(p, REPO_PATH): + dirs.add(p) + packages = 0 + for d in dirs: + for indicator in package_indicators: + if (d / indicator).exists(): + packages += 1 + break + return packages + + r = benchmark(identify_packages, runs=5, label="identify package structure") + results.append(r) + print(f" {r['label']}: avg={r['avg_ms']:.2f}ms, packages={r['result']}") + + return results + + +def main(): + print("=" * 70) + print("I/O AND SERIALIZATION LATENCY PROFILE") + print(f"Repo: {REPO_PATH}") + print("=" * 70) + + all_results = [] + files = collect_py_files() + print(f"\nPython files for profiling: {len(files)}") + + all_results.extend(profile_file_traversal()) + all_results.extend(profile_file_reading(files)) + all_results.extend(profile_file_hashing(files)) + all_results.extend(profile_tree_sitter_parsing(files)) + all_results.extend(profile_source_extraction()) + all_results.extend(profile_json_serialization()) + all_results.extend(profile_protobuf_serialization()) + all_results.extend(profile_hash_cache_io()) + all_results.extend(profile_embedding_cache_io()) + all_results.extend(profile_directory_structure()) + + print("\n" + "=" * 70) + print("RANKED SUMMARY (by avg wall-clock time)") + print("=" * 70) + ranked = sorted(all_results, key=lambda x: x['avg_ms'], reverse=True) + for i, r in enumerate(ranked, 1): + print(f" {i:2d}. [{r['avg_ms']:10.2f}ms] {r['label']}") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 12160521b..fa8464872 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,28 +1,65 @@ [project] name = "code-graph-rag" -version = "0.0.60" +version = "0.0.187" description = "The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs" -readme = "README.md" +readme = "PYPI_README.md" requires-python = ">=3.12" +license = "MIT" +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Code Generators", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +keywords = [ + "rag", + "retrieval-augmented-generation", + "knowledge-graph", + "code-analysis", + "tree-sitter", + "mcp", + "mcp-server", + "llm", + "graph-database", + "semantic-search", + "codebase", + "memgraph", + "developer-tools", + "monorepo", +] dependencies = [ "loguru>=0.7.3", - "mcp>=1.21.1", - "pydantic-ai>=1.27.0", - "pydantic-settings>=2.0.0", - "pymgclient>=1.4.0", - "python-dotenv>=1.1.0", + "mcp>=1.25.0", + "pydantic-ai>=1.102.0", + "pydantic-settings>=2.12.0", + "pymgclient>=1.5.1", + "python-dotenv>=1.2.1", + "tiktoken>=0.12.0", "toml>=0.10.2", - "tree-sitter-python>=0.23.6", - "tree-sitter==0.25.0", + "tree-sitter-python>=0.25.0", + "tree-sitter==0.25.2", "watchdog>=6.0.0", - "typer>=0.12.5", - "rich>=13.7.1", - "prompt-toolkit>=3.0.0", + "typer>=0.21.1", + "rich>=14.2.0", + "prompt-toolkit>=3.0.52", "diff-match-patch>=20241021", - "click>=8.0.0", - "protobuf>=5.27.0", + "click>=8.3.1", + "protobuf>=6.33.5", "defusedxml>=0.7.1", - "huggingface-hub[hf-xet]>=0.36.0", + "huggingface-hub[hf-xet]>=1.7.2", + # TODO: remove once pydantic-ai is upgraded to a release whose code and + # metadata agree on the griffe package. pydantic-ai-slim 1.102.0 imports + # `griffe` at runtime but only declares the renamed `griffelib`, so a clean + # `uv sync` omits griffe and leaves codebase_rag unimportable; declare it + # explicitly to keep the environment reproducible. + "griffe>=1.0,<2", ] [project.scripts] @@ -32,8 +69,12 @@ cgr = "codebase_rag.cli:app" [tool.uv] package = true -[tool.setuptools] -packages = ["codebase_rag", "codec"] +[tool.setuptools.packages.find] +include = ["codebase_rag*", "codec*", "cgr*"] +exclude = ["*.tests", "*.tests.*"] + +[tool.setuptools.package-data] +codebase_rag = ["docker-compose.yaml"] [project.optional-dependencies] test = [ @@ -42,6 +83,7 @@ test = [ "pytest-cov>=4.0.0", "pytest-xdist>=3.8.0", "testcontainers>=4.9.0", + "libclang>=18.1.1", ] treesitter-full = [ @@ -52,8 +94,10 @@ treesitter-full = [ "tree-sitter-go>=0.23.4", "tree-sitter-scala>=0.24.0", "tree-sitter-java>=0.23.5", + "tree-sitter-c>=0.24.1", "tree-sitter-cpp>=0.23.0", "tree-sitter-lua>=0.0.19", + "tree-sitter-php>=0.24.1", ] semantic = [ @@ -65,7 +109,7 @@ semantic = [ [tool.ruff] line-length = 88 target-version = "py312" -exclude = ["codec/"] +exclude = ["codec/", "benchmarks/", "optimize/"] [tool.ruff.lint] select = ["E", "F", "W", "I", "UP", "PL", "T201"] @@ -83,6 +127,7 @@ ignore = [ [tool.ruff.lint.per-file-ignores] "**/tests/**" = ["T201"] +"benchmarks/**" = ["T201"] [tool.ruff.format] quote-style = "double" @@ -91,7 +136,7 @@ quote-style = "double" python-version = "3.12" [tool.ty.src] -exclude = ["codebase_rag/tests/test_cypher_queries.py", "codebase_rag/tests/test_code_retrieval.py", "codebase_rag/tests/test_call_resolver.py"] +exclude = ["codebase_rag/tests/test_cypher_queries.py", "codebase_rag/tests/test_code_retrieval.py", "codebase_rag/tests/test_call_resolver.py", "benchmarks/", "optimize/"] [tool.pytest.ini_options] asyncio_mode = "auto" @@ -113,6 +158,7 @@ dev = [ "pre-commit>=4.2.0", "pyinstaller>=6.14.1", "pylint>=4.0.4", + "pytest>=9.0.2", "radon>=6.0.1", "ruff>=0.5.5", "semgrep>=1.79.0", @@ -121,7 +167,15 @@ dev = [ "types-toml>=0.10.8.20240310", "vulture>=2.14", ] +docs = [ + "mkdocs>=1.6.1,<2", + "mkdocs-material>=9.7.3", + "mkdocs-minify-plugin>=0.8.0", +] +fuzz = [ + "atheris>=2.3.0", +] [tool.bandit] -exclude_dirs = ["codebase_rag/tests", "scripts"] +exclude_dirs = ["codebase_rag/tests", "scripts", "benchmarks", "optimize"] skips = ["B101"] diff --git a/realtime_updater.py b/realtime_updater.py index 4fd95d5bc..f3bc21f65 100644 --- a/realtime_updater.py +++ b/realtime_updater.py @@ -1,4 +1,5 @@ import sys +import threading import time from pathlib import Path from typing import Annotated @@ -14,7 +15,10 @@ from codebase_rag.config import settings from codebase_rag.constants import ( CYPHER_DELETE_CALLS, + CYPHER_DELETE_FILE, CYPHER_DELETE_MODULE, + DEFAULT_DEBOUNCE_SECONDS, + DEFAULT_MAX_WAIT_SECONDS, IGNORE_PATTERNS, IGNORE_SUFFIXES, KEY_PATH, @@ -32,11 +36,47 @@ class CodeChangeEventHandler(FileSystemEventHandler): - def __init__(self, updater: GraphUpdater): + """ + Handles file system events with debouncing to prevent redundant graph updates. + + The handler implements a hybrid debounce strategy: + - Debounce: Waits for a quiet period after the last change before processing + - Max wait: Ensures updates happen within a maximum time window, even during + continuous editing + + This prevents the graph update process from running repeatedly when a file + is saved multiple times in quick succession (common during active development). + """ + + def __init__( + self, + updater: GraphUpdater, + debounce_seconds: float = DEFAULT_DEBOUNCE_SECONDS, + max_wait_seconds: float = DEFAULT_MAX_WAIT_SECONDS, + ): self.updater = updater self.ignore_patterns = IGNORE_PATTERNS self.ignore_suffixes = IGNORE_SUFFIXES - logger.info(logs.WATCHER_ACTIVE) + + # (H) Debounce configuration + self.debounce_seconds = debounce_seconds + self.max_wait_seconds = max_wait_seconds + self.debounce_enabled = debounce_seconds > 0 + + # (H) Thread-safe state for tracking pending changes + self.timers: dict[str, threading.Timer] = {} + self.first_event_time: dict[str, float] = {} + self.pending_events: dict[str, FileSystemEvent] = {} + self.lock = threading.Lock() + + if self.debounce_enabled: + logger.info( + logs.WATCHER_DEBOUNCE_ACTIVE.format( + debounce=debounce_seconds, max_wait=max_wait_seconds + ) + ) + else: + logger.info(logs.WATCHER_ACTIVE) def _is_relevant(self, path_str: str) -> bool: path = Path(path_str) @@ -65,6 +105,99 @@ def dispatch(self, event: FileSystemEvent) -> None: if event.is_directory or not self._is_relevant(src_path): return + if not self.debounce_enabled: + # (H) No debouncing - process immediately (legacy behavior) + self._process_change(event) + return + + # (H) Debounced processing with hybrid approach + path = Path(src_path) + relative_path_str = str(path.relative_to(self.updater.repo_path)) + current_time = time.time() + + with self.lock: + # (H) Track the first event time for max-wait calculation + if relative_path_str not in self.first_event_time: + self.first_event_time[relative_path_str] = current_time + logger.info( + logs.CHANGE_DEBOUNCING.format( + event_type=event.event_type, + name=path.name, + debounce=self.debounce_seconds, + ) + ) + + # (H) Always store the latest event for this file + self.pending_events[relative_path_str] = event + + # (H) Cancel any existing timer for this file + if relative_path_str in self.timers: + self.timers[relative_path_str].cancel() + logger.debug(logs.DEBOUNCE_RESET.format(path=relative_path_str)) + + # (H) Check if max wait time has been exceeded + time_since_first = current_time - self.first_event_time[relative_path_str] + + if time_since_first >= self.max_wait_seconds: + # (H) Max wait exceeded - process immediately + logger.info( + logs.DEBOUNCE_MAX_WAIT.format( + max_wait=self.max_wait_seconds, path=relative_path_str + ) + ) + self._schedule_immediate_processing(relative_path_str) + else: + # (H) Schedule debounced processing + remaining_wait = self.max_wait_seconds - time_since_first + effective_delay = min(self.debounce_seconds, remaining_wait) + timer = threading.Timer( + effective_delay, + self._process_debounced_change, + args=[relative_path_str], + ) + timer.daemon = True + self.timers[relative_path_str] = timer + timer.start() + + logger.debug( + logs.DEBOUNCE_SCHEDULED.format( + path=relative_path_str, + debounce=self.debounce_seconds, + remaining=f"{remaining_wait:.1f}", + ) + ) + + def _schedule_immediate_processing(self, relative_path_str: str) -> None: + """Process a file change immediately (called when max wait is exceeded).""" + # (H) Use a zero-delay timer to process in the timer thread + timer = threading.Timer( + 0, self._process_debounced_change, args=[relative_path_str] + ) + timer.daemon = True + self.timers[relative_path_str] = timer + timer.start() + + def _process_debounced_change(self, relative_path_str: str) -> None: + """Process a debounced file change after the timer fires.""" + with self.lock: + # (H) Retrieve and clear pending state for this file + event = self.pending_events.pop(relative_path_str, None) + self.first_event_time.pop(relative_path_str, None) + self.timers.pop(relative_path_str, None) + + if event is None: + logger.warning(logs.DEBOUNCE_NO_EVENT.format(path=relative_path_str)) + return + + logger.info(logs.DEBOUNCE_PROCESSING.format(path=relative_path_str)) + self._process_change(event) + + def _process_change(self, event: FileSystemEvent) -> None: + """Execute the actual graph update for a file change.""" + src_path = event.src_path + if isinstance(src_path, bytes): + src_path = src_path.decode() + ingestor = self.updater.ingestor if not isinstance(ingestor, QueryProtocol): logger.warning(logs.WATCHER_SKIP_NO_QUERY) @@ -73,18 +206,31 @@ def dispatch(self, event: FileSystemEvent) -> None: path = Path(src_path) relative_path_str = str(path.relative_to(self.updater.repo_path)) + # (H) Only process events that actually change file content + # (H) Skip read-only events like "opened", "closed_no_write" that don't modify the file + relevant_events = { + EventType.MODIFIED, + EventType.CREATED, + EventType.DELETED, # (H) watchdog deletion event + } + if event.event_type not in relevant_events: + return + logger.warning( logs.CHANGE_DETECTED.format(event_type=event.event_type, path=path) ) - # (H) Step 1 + # (H) Step 1: Delete existing nodes for this file path + # (H) Delete Module node and its children (for code files) ingestor.execute_write(CYPHER_DELETE_MODULE, {KEY_PATH: relative_path_str}) + # (H) Delete File node (for all files including non-code like .md, .json) + ingestor.execute_write(CYPHER_DELETE_FILE, {KEY_PATH: relative_path_str}) logger.debug(logs.DELETION_QUERY.format(path=relative_path_str)) - # (H) Step 2 + # (H) Step 2: Clear in-memory state self.updater.remove_file_from_state(path) - # (H) Step 3 + # (H) Step 3: Re-parse code files and create File nodes for ALL files if event.event_type in (EventType.MODIFIED, EventType.CREATED): lang_config = get_language_spec(path.suffix) if ( @@ -101,18 +247,28 @@ def dispatch(self, event: FileSystemEvent) -> None: root_node, language = result self.updater.ast_cache[path] = (root_node, language) + # (H) Create File node for ALL files (code and non-code like .md, .json, etc.) + self.updater.factory.structure_processor.process_generic_file( + path, path.name + ) + # (H) Step 4 logger.info(logs.RECALC_CALLS) ingestor.execute_write(CYPHER_DELETE_CALLS) self.updater._process_function_calls() - # (H) Step 5 + # (H) Step 5: Flush changes to database self.updater.ingestor.flush_all() logger.success(logs.GRAPH_UPDATED.format(name=path.name)) def start_watcher( - repo_path: str, host: str, port: int, batch_size: int | None = None + repo_path: str, + host: str, + port: int, + batch_size: int | None = None, + debounce_seconds: float = DEFAULT_DEBOUNCE_SECONDS, + max_wait_seconds: float = DEFAULT_MAX_WAIT_SECONDS, ) -> None: repo_path_obj = Path(repo_path).resolve() parsers, queries = load_parsers() @@ -123,11 +279,27 @@ def start_watcher( host=host, port=port, batch_size=effective_batch_size, + username=settings.MEMGRAPH_USERNAME, + password=settings.MEMGRAPH_PASSWORD, ) as ingestor: - _run_watcher_loop(ingestor, repo_path_obj, parsers, queries) + _run_watcher_loop( + ingestor, + repo_path_obj, + parsers, + queries, + debounce_seconds, + max_wait_seconds, + ) -def _run_watcher_loop(ingestor, repo_path_obj, parsers, queries): +def _run_watcher_loop( + ingestor, + repo_path_obj, + parsers, + queries, + debounce_seconds: float, + max_wait_seconds: float, +): updater = GraphUpdater(ingestor, repo_path_obj, parsers, queries) # (H) Initial full scan builds the complete context for real-time updates @@ -135,7 +307,11 @@ def _run_watcher_loop(ingestor, repo_path_obj, parsers, queries): updater.run() logger.success(logs.INITIAL_SCAN_DONE) - event_handler = CodeChangeEventHandler(updater) + event_handler = CodeChangeEventHandler( + updater, + debounce_seconds=debounce_seconds, + max_wait_seconds=max_wait_seconds, + ) observer = Observer() observer.schedule(event_handler, str(repo_path_obj), recursive=True) observer.start() @@ -157,6 +333,12 @@ def _validate_positive_int(value: int | None) -> int | None: return value +def _validate_non_negative_float(value: float) -> float: + if value < 0: + raise typer.BadParameter(te.INVALID_NON_NEGATIVE_FLOAT.format(value=value)) + return value + + def main( repo_path: Annotated[str, typer.Argument(help=ch.HELP_REPO_PATH_WATCH)], host: Annotated[ @@ -172,11 +354,62 @@ def main( callback=_validate_positive_int, ), ] = None, + debounce: Annotated[ + float, + typer.Option( + "--debounce", + "-d", + help=ch.HELP_DEBOUNCE, + callback=_validate_non_negative_float, + ), + ] = DEFAULT_DEBOUNCE_SECONDS, + max_wait: Annotated[ + float, + typer.Option( + "--max-wait", + "-m", + help=ch.HELP_MAX_WAIT, + callback=_validate_non_negative_float, + ), + ] = DEFAULT_MAX_WAIT_SECONDS, ) -> None: + """ + Watch a repository for file changes and update the knowledge graph in real-time. + + The watcher uses a hybrid debouncing strategy to efficiently handle rapid file saves: + + - DEBOUNCE: After a file change, waits for a quiet period before processing. + This batches rapid saves into a single update. + + - MAX_WAIT: Ensures updates happen within a maximum time window, even during + continuous editing. Prevents indefinite delays. + + Examples: + + # Default settings (5s debounce, 30s max wait) + python realtime_updater.py /path/to/repo + + # More aggressive batching for background monitoring + python realtime_updater.py /path/to/repo --debounce 10 --max-wait 60 + + # Quick feedback for demos + python realtime_updater.py /path/to/repo --debounce 2 --max-wait 10 + + # Disable debouncing (legacy behavior) + python realtime_updater.py /path/to/repo --debounce 0 + """ logger.remove() logger.add(sys.stdout, format=REALTIME_LOGGER_FORMAT, level=LOG_LEVEL_INFO) logger.info(logs.LOGGER_CONFIGURED) - start_watcher(repo_path, host, port, batch_size) + + # (H) Validate max_wait is greater than debounce when both are enabled + if debounce > 0 and max_wait > 0 and max_wait < debounce: + logger.warning( + logs.DEBOUNCE_MAX_WAIT_ADJUSTED.format(max_wait=max_wait, debounce=debounce) + ) + max_wait = debounce + + start_watcher(repo_path, host, port, batch_size, debounce, max_wait) if __name__ == "__main__": diff --git a/scripts/hooks/generate_readme.py b/scripts/hooks/generate_readme.py index 88394ff55..51d6bbeec 100644 --- a/scripts/hooks/generate_readme.py +++ b/scripts/hooks/generate_readme.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import hashlib import subprocess import sys from pathlib import Path @@ -6,6 +7,8 @@ repo_root = Path(__file__).parent.parent.parent readme_path = repo_root / "README.md" +before = hashlib.sha256(readme_path.read_bytes()).hexdigest() + result = subprocess.run( ["uv", "run", "python", "scripts/generate_readme.py"], check=False, @@ -18,5 +21,9 @@ sys.stderr.write(result.stderr) sys.exit(result.returncode) -subprocess.run(["git", "add", "README.md"], cwd=repo_root, check=True) +after = hashlib.sha256(readme_path.read_bytes()).hexdigest() + +if before != after: + subprocess.run(["git", "add", "README.md"], cwd=repo_root, check=True) + sys.exit(1) sys.exit(0) diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100755 index 000000000..eea3f351a --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Local release: sync server.json to the pyproject version, then build, verify, +# and publish that version to PyPI and create the matching git tag and GitHub +# Release. Use this when the GitHub Actions publish workflow is unavailable +# (e.g. billing disabled). +# +# Credentials: twine prompts for a PyPI token (username __token__). To avoid the +# prompt, export TWINE_USERNAME=__token__ and TWINE_PASSWORD=pypi-... or set up +# ~/.pypirc beforehand. + +VERSION=$(grep -E '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/') +TAG="v${VERSION}" + +echo "==> Releasing ${TAG}" + +if [ -n "$(git status --porcelain)" ]; then + echo "Error: working tree is not clean. Commit or stash changes first." >&2 + exit 1 +fi + +if git rev-parse "${TAG}" >/dev/null 2>&1; then + echo "Error: tag ${TAG} already exists. Bump the version in pyproject.toml first." >&2 + exit 1 +fi + +echo "==> Syncing server.json to ${VERSION}" +perl -i -pe 's/"version": "[^"]*"/"version": "'"${VERSION}"'"/g' server.json +if [ -n "$(git status --porcelain server.json)" ]; then + git commit -m "chore: sync server.json version to ${VERSION}" server.json +fi + +echo "==> Building distributions" +rm -rf dist/ +uv build + +echo "==> Checking distributions" +uvx twine check dist/* + +echo "==> Uploading to PyPI" +uvx twine upload dist/* + +echo "==> Tagging and creating GitHub Release" +git tag "${TAG}" +git push origin "${TAG}" +# Note: this fires the publish.yml workflow, which will fail harmlessly while +# Actions billing is unavailable. PyPI is already published by the step above. +gh release create "${TAG}" --generate-notes --target main + +echo "==> Released ${TAG} at https://pypi.org/project/code-graph-rag/${VERSION}/" diff --git a/server.json b/server.json new file mode 100644 index 000000000..049872e39 --- /dev/null +++ b/server.json @@ -0,0 +1,78 @@ +{ + "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json", + "name": "io.github.vitali87/code-graph-rag", + "title": "Code-Graph-RAG", + "description": "Graph-based RAG system for multi-language codebases. Parse, index, query, and edit code using knowledge graphs and natural language.", + "websiteUrl": "https://code-graph-rag.com", + "repository": { + "url": "https://github.com/vitali87/code-graph-rag", + "source": "github" + }, + "version": "0.0.187", + "packages": [ + { + "registryType": "pypi", + "registryBaseUrl": "https://pypi.org", + "identifier": "code-graph-rag", + "version": "0.0.187", + "runtimeHint": "uvx", + "transport": { + "type": "stdio" + }, + "packageArguments": [ + { + "type": "positional", + "value": "mcp-server" + } + ], + "environmentVariables": [ + { + "name": "ORCHESTRATOR_PROVIDER", + "description": "LLM provider for the orchestrator agent (openai, anthropic, google, azure, cohere, ollama)", + "default": "anthropic" + }, + { + "name": "ORCHESTRATOR_MODEL", + "description": "Model name for the orchestrator agent", + "default": "claude-sonnet-4-20250514" + }, + { + "name": "ORCHESTRATOR_API_KEY", + "description": "API key for the orchestrator LLM provider", + "isRequired": true, + "isSecret": true + }, + { + "name": "CYPHER_PROVIDER", + "description": "LLM provider for Cypher query generation (openai, anthropic, google, azure, cohere, ollama)", + "default": "anthropic" + }, + { + "name": "CYPHER_MODEL", + "description": "Model name for Cypher query generation", + "default": "claude-sonnet-4-20250514" + }, + { + "name": "CYPHER_API_KEY", + "description": "API key for the Cypher LLM provider", + "isRequired": true, + "isSecret": true + }, + { + "name": "MEMGRAPH_HOST", + "description": "Hostname of the Memgraph database", + "default": "localhost" + }, + { + "name": "MEMGRAPH_PORT", + "description": "Port of the Memgraph database", + "default": "7687" + }, + { + "name": "TARGET_REPO_PATH", + "description": "Path to the repository to analyze (auto-detected from working directory if not set)" + } + ] + } + ] +} diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 000000000..796dc31c5 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,13 @@ +sonar.projectKey=vitali87_code-graph-rag +sonar.organization=vitali87 +sonar.projectName=code-graph-rag + +sonar.sources=codebase_rag +sonar.tests=codebase_rag/tests +sonar.exclusions=**/__pycache__/**,**/*.pyc,codebase_rag/tests/** +sonar.security.exclusions=codebase_rag/tests/** + +sonar.python.version=3.12 +sonar.python.coverage.reportPaths=coverage.xml + +sonar.sourceEncoding=UTF-8 diff --git a/uv.lock b/uv.lock index aa1977b86..5b81e297c 100644 --- a/uv.lock +++ b/uv.lock @@ -19,6 +19,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/78/eb55fabaab41abc53f52c0918a9a8c0f747807e5306273f51120fd695957/ag_ui_protocol-0.1.10-py3-none-any.whl", hash = "sha256:c81e6981f30aabdf97a7ee312bfd4df0cd38e718d9fc10019c7d438128b93ab5", size = 7889, upload-time = "2025-11-06T15:17:15.325Z" }, ] +[[package]] +name = "aiofile" +version = "3.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "caio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/41/2fea7e193e061ce54eacc3b7bc0e6a99e4fcff43c78cf0a76dd781ed8334/aiofile-3.11.1.tar.gz", hash = "sha256:1f91912c6643d2a4e49ca4ae3514f0bf3867ce948a36d99a6411b8f4755f4cf9", size = 19342, upload-time = "2026-05-16T08:18:33.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/cd/0d76dfc5de72bde52f55f53e925c7d152d9c7906634ec1e0cbc7e8d4ad93/aiofile-3.11.1-py3-none-any.whl", hash = "sha256:ce77d14ac07f77bc2b757834a5c129321f3f705c474593deed5ab209079a52c9", size = 20446, upload-time = "2026-05-16T08:18:32.051Z" }, +] + [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -30,7 +42,7 @@ wheels = [ [[package]] name = "aiohttp" -version = "3.13.3" +version = "3.14.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohappyeyeballs" }, @@ -39,78 +51,93 @@ dependencies = [ { name = "frozenlist" }, { name = "multidict" }, { name = "propcache" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload-time = "2026-01-03T17:30:14.23Z" }, - { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload-time = "2026-01-03T17:30:15.96Z" }, - { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload-time = "2026-01-03T17:30:17.431Z" }, - { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload-time = "2026-01-03T17:30:19.422Z" }, - { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload-time = "2026-01-03T17:30:21.756Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload-time = "2026-01-03T17:30:23.932Z" }, - { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload-time = "2026-01-03T17:30:26Z" }, - { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload-time = "2026-01-03T17:30:27.554Z" }, - { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload-time = "2026-01-03T17:30:29.254Z" }, - { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload-time = "2026-01-03T17:30:31.033Z" }, - { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload-time = "2026-01-03T17:30:32.703Z" }, - { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload-time = "2026-01-03T17:30:34.695Z" }, - { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload-time = "2026-01-03T17:30:36.864Z" }, - { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload-time = "2026-01-03T17:30:39.433Z" }, - { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload-time = "2026-01-03T17:30:41.081Z" }, - { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload-time = "2026-01-03T17:30:42.644Z" }, - { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" }, - { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" }, - { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" }, - { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" }, - { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" }, - { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" }, - { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" }, - { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" }, - { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" }, - { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" }, - { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" }, - { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" }, - { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" }, - { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" }, - { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" }, - { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" }, - { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload-time = "2026-01-03T17:31:14.382Z" }, - { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" }, - { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload-time = "2026-01-03T17:31:17.909Z" }, - { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload-time = "2026-01-03T17:31:19.919Z" }, - { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload-time = "2026-01-03T17:31:21.636Z" }, - { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload-time = "2026-01-03T17:31:23.296Z" }, - { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload-time = "2026-01-03T17:31:25.334Z" }, - { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload-time = "2026-01-03T17:31:27.394Z" }, - { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload-time = "2026-01-03T17:31:29.238Z" }, - { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload-time = "2026-01-03T17:31:31.053Z" }, - { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload-time = "2026-01-03T17:31:32.87Z" }, - { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload-time = "2026-01-03T17:31:34.76Z" }, - { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload-time = "2026-01-03T17:31:36.699Z" }, - { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload-time = "2026-01-03T17:31:38.622Z" }, - { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload-time = "2026-01-03T17:31:40.57Z" }, - { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload-time = "2026-01-03T17:31:42.857Z" }, - { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload-time = "2026-01-03T17:31:44.984Z" }, - { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126, upload-time = "2026-01-03T17:31:47.463Z" }, - { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128, upload-time = "2026-01-03T17:31:49.2Z" }, - { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload-time = "2026-01-03T17:31:51.134Z" }, - { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload-time = "2026-01-03T17:31:52.85Z" }, - { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload-time = "2026-01-03T17:31:54.91Z" }, - { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload-time = "2026-01-03T17:31:56.733Z" }, - { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload-time = "2026-01-03T17:31:58.65Z" }, - { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload-time = "2026-01-03T17:32:00.989Z" }, - { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload-time = "2026-01-03T17:32:03.122Z" }, - { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload-time = "2026-01-03T17:32:05.255Z" }, - { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload-time = "2026-01-03T17:32:07.607Z" }, - { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload-time = "2026-01-03T17:32:09.59Z" }, - { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload-time = "2026-01-03T17:32:11.445Z" }, - { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload-time = "2026-01-03T17:32:13.705Z" }, - { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload-time = "2026-01-03T17:32:15.965Z" }, - { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload-time = "2026-01-03T17:32:18.219Z" }, - { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload-time = "2026-01-03T17:32:20.25Z" }, - { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523, upload-time = "2026-01-03T17:32:22.215Z" }, - { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload-time = "2026-01-03T17:32:24.546Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/82/78/8ea7308cac6934de8c74a14f3d5f65d1c89287426688be79538d0e5c013d/aiohttp-3.14.1.tar.gz", hash = "sha256:307f2cff90a764d329e77040603fa032db89c5c24fdad50c4c15334cba744035", size = 7955794, upload-time = "2026-06-07T21:09:35.529Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/21/151624b51cd92553d95424daf4bf19f19ce9be9002d19253e7e7ce67197b/aiohttp-3.14.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d35143e27778b4bb0fb189562d7f275bff79c62ab8e98459717c0ea617ff2480", size = 757402, upload-time = "2026-06-07T21:06:40.311Z" }, + { url = "https://files.pythonhosted.org/packages/c2/82/280619e0bd7bf2454987e19282616e84762255dd9c8468f62382e8c191f1/aiohttp-3.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bcfb80a2cc36fba2534e5e5b5264dc7ae6fcd9bf15256da3e53d2f499e6fa29d", size = 512310, upload-time = "2026-06-07T21:06:42.207Z" }, + { url = "https://files.pythonhosted.org/packages/55/b2/2aac325583aaa1353045f96dffa586d8a34e8322e14a7ba49cffeb103ab4/aiohttp-3.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27fd7c91e51729b4f7e1577865fa6d34c9adccbc39aabe9000285b48af9f0ec2", size = 512448, upload-time = "2026-06-07T21:06:43.813Z" }, + { url = "https://files.pythonhosted.org/packages/8a/72/a60607cb849faa8af8a356c9329ea2eb6f395d49e82cc82ccba1fd8deb8f/aiohttp-3.14.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:64c567bf9eaf664280116a8688f63016e6b32db2505908e2bdaca1b6438142f2", size = 1766854, upload-time = "2026-06-07T21:06:45.391Z" }, + { url = "https://files.pythonhosted.org/packages/b5/d3/d9fe1c9ec7557ab4d0d82bebaa728c6418f0b93295ec2f4ab015f7710cc7/aiohttp-3.14.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f5e6ff2bdbb8f4cd3fbe41f99e25bbcd58e3bf9f13d3dd31a11e7917251cc77a", size = 1740884, upload-time = "2026-06-07T21:06:47.413Z" }, + { url = "https://files.pythonhosted.org/packages/c1/dc/f2cecfaf9337ba3e63f181500814ff502aa3d00d9c7ec93a9d23d10a27b2/aiohttp-3.14.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f73e01dc37122325caf079982621262f96d74823c179038a82fddfc50359264", size = 1810034, upload-time = "2026-06-07T21:06:50.165Z" }, + { url = "https://files.pythonhosted.org/packages/66/d7/2ff65c5e65c0d7476daf7e15c032e0805e36811185b9623e3238ad6c763e/aiohttp-3.14.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bb2c0c80d431c0d03f2c7dbf125150fedd4f0de17366a7ca33f7ccb822391842", size = 1904054, upload-time = "2026-06-07T21:06:52.035Z" }, + { url = "https://files.pythonhosted.org/packages/20/9c/d445818389df371f56d141d881153ba23183c4735a03f7356ffb43f7757d/aiohttp-3.14.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e6fc1a85fa7194a1a7d19f44e8609180f4a8eb5fa4c7ed8b4355f080fad235c", size = 1790278, upload-time = "2026-06-07T21:06:54.049Z" }, + { url = "https://files.pythonhosted.org/packages/4d/aa/bf04cb4d865fc6101c2229a294ad744973b72e513fdc5a6b791e6983d72a/aiohttp-3.14.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:686b6c0d3911ec387b444ddf5dc62fb7f7c0a7d5186a7861626496a5ab4aff95", size = 1591795, upload-time = "2026-06-07T21:06:55.911Z" }, + { url = "https://files.pythonhosted.org/packages/dc/b4/4dac0038960427ba832f6609dfb4ea5437d7fd80c72001b9e48f834f428b/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c6fa4dc7ad6f8109c70bb1499e589f76b0b792baf39f9b017eb92c8a81d0a199", size = 1728397, upload-time = "2026-06-07T21:06:57.777Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/7cd4e8ad7aa3b75f17d56bb5498dd604a93d4e6eece822ba0568c413fff0/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:87a5eea1b2a5e21e1ebdbb33ad4165359189327e63fc4e4894693e7f821ac817", size = 1766504, upload-time = "2026-06-07T21:07:00.009Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/fc01d9fcad0f73fed3f3d361f1f94f975947b50dff82919f6dc2bf4316cc/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c1421eb01d4fd608d88cc8290211d177a58532b55ad94076fb349c5bf467f0a", size = 1777806, upload-time = "2026-06-07T21:07:02.064Z" }, + { url = "https://files.pythonhosted.org/packages/41/09/47e2d090bddcc8fb4ccb4c314aadc32d7c5d9bb55f50f6ad1c92fc15d501/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:34b257ec41345c1e8f2df68fa908a7952f5de932723871eb633ecbbff396c9a4", size = 1580707, upload-time = "2026-06-07T21:07:03.942Z" }, + { url = "https://files.pythonhosted.org/packages/3d/36/f1a4ce904ae0b6930cfe9afc96d0896f7ec1a620c400405d63783bb95a9c/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:de538791a80e5d862addbc183f70f0158ac9b9bb872bb147f1fd2a683691e087", size = 1798121, upload-time = "2026-06-07T21:07:05.987Z" }, + { url = "https://files.pythonhosted.org/packages/70/0a/e0075ce9ca0279ee1d4f0c0b85f54fea02ebc83c3007651a72bece658fec/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f71173be42d3241d428f760122febb748de0623f44308a6f120d0dd9ec572e3", size = 1767580, upload-time = "2026-06-07T21:07:07.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/a0c0a8f327a9c52095cdd8e312391b00d3ed64ab6c72bb5c33d8ec251cf7/aiohttp-3.14.1-cp312-cp312-win32.whl", hash = "sha256:ec8dc383ee57ea3e883477dcca3f11b65d58199f1080acaf4cd6ad9a99698be4", size = 452771, upload-time = "2026-06-07T21:07:09.669Z" }, + { url = "https://files.pythonhosted.org/packages/df/d9/ea367c75f16ac9c6cdc8febb25e8318fa21a2b1bc8d6514d4b2d890bface/aiohttp-3.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2aa92c87868cd13674989f9ee83e5f9f7ea4237589b728048e1f0c8f6caa3271", size = 479873, upload-time = "2026-06-07T21:07:11.538Z" }, + { url = "https://files.pythonhosted.org/packages/03/64/8d96784a7851156db8a4c6c3f6f91042fdf39fb15a4cc38c8b3c14833c45/aiohttp-3.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:2c840c90759922cb5e6dda94596e079a30fb5a5ba548e7e0dc00574703940847", size = 448073, upload-time = "2026-06-07T21:07:13.637Z" }, + { url = "https://files.pythonhosted.org/packages/bc/97/bd137012dd97e1649162b099135a80e1fd59aaa807b2430fc448d1029aff/aiohttp-3.14.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:b3a03285a7f9c7b016324574a6d92a1c895da6b978cb8f1deee3ac72bc6da178", size = 506882, upload-time = "2026-06-07T21:07:15.501Z" }, + { url = "https://files.pythonhosted.org/packages/ef/79/e5cc690e9d922a66887ceeaca53a8ffd5a7b0be3816142b7abc433742d89/aiohttp-3.14.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:2a73f487ab8ef5abbb24b7aa9b73e98eaba9e9e031804ff2416f02eca315ccaf", size = 515270, upload-time = "2026-06-07T21:07:17.53Z" }, + { url = "https://files.pythonhosted.org/packages/fe/22/a73ccbf9dbd6e26dda0b24d5fd5db7da92ee3383a79f47677ffb834c5c5b/aiohttp-3.14.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:915fbb7b41b115192259f8c9ae58f3ddc444d2b5579917270211858e606a4afd", size = 485841, upload-time = "2026-06-07T21:07:19.555Z" }, + { url = "https://files.pythonhosted.org/packages/3b/b9/57ed8eaf596321c2ad747bd480fb1700dbd7177c60dfc9e4c187f629662e/aiohttp-3.14.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:7fb4bdf95b0561a79f259f9d28fbc109728c5ee7f27aff6391f0ca703a329abe", size = 492088, upload-time = "2026-06-07T21:07:21.581Z" }, + { url = "https://files.pythonhosted.org/packages/78/c0/5ebe5270a7c140d7c6f79dcb018640225f14d406c149e4eec04a7d82fe71/aiohttp-3.14.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1b9748363260121d2927704f5d4fc498150669ca3ae93625986ee89c8f80dcd4", size = 501564, upload-time = "2026-06-07T21:07:23.388Z" }, + { url = "https://files.pythonhosted.org/packages/75/7f/8cdaa24fc7983865e0915153b96a9ac5bcdd3548d64c5a27d17cecccad2d/aiohttp-3.14.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:86a6dab78b0e43e2897a3bbe15745aa60dc5423ca437b7b0b164c069bf91b876", size = 751998, upload-time = "2026-06-07T21:07:25.046Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f4/c4227aacfacc5cb0cc2d119b65301d177912a6842cd64e120c47af76064f/aiohttp-3.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4dfd6e47d3c44c2279907607f73a4240b88c69eb8b90da7e2441a8045dfd21da", size = 510918, upload-time = "2026-06-07T21:07:27.28Z" }, + { url = "https://files.pythonhosted.org/packages/ab/01/a2d5f96cd4e74424864d30bc0a7e44d0a12dacdcfa91b5b2d1bd3dca6bf3/aiohttp-3.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:317acd9f8602858dc7d59679812c376c7f0b97bcbbf16e0d6237f54141d8a8a6", size = 508657, upload-time = "2026-06-07T21:07:29.252Z" }, + { url = "https://files.pythonhosted.org/packages/e8/ed/3c0fb5c500fdd8e7ebc10d1889c04384fffa1a9163eac1356088ca9da1b1/aiohttp-3.14.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd869c427324e5cb15195793de951295710db28be7d818247f3097b4ab5d4b96", size = 1757907, upload-time = "2026-06-07T21:07:31.03Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ab/d4c924d9bd5be3050c226612413ce68cb54c70d2c31b661bfc8d9a5b6a70/aiohttp-3.14.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:93b032b5ec3255473c143627d21a69ac74ae12f7f33974cb587c564d11b1066f", size = 1737565, upload-time = "2026-06-07T21:07:33.031Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/37326821ff779084020cdc33224d20b19f42f4183a500ff92022a739eda7/aiohttp-3.14.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f234b4deb12f3ad59127e037bc57c40c21e45b45282df7d3a55a0f409f595296", size = 1799018, upload-time = "2026-06-07T21:07:35.003Z" }, + { url = "https://files.pythonhosted.org/packages/b3/4f/6e947ba73e4ce09070761c05ed3a8ceb7c21f5e46798671d8b2aac0e4626/aiohttp-3.14.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9af6779bfb46abf124068327abcdf9ce95c9ef8287a3e8da76ccf2d0f16c28fa", size = 1894416, upload-time = "2026-06-07T21:07:36.956Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6e/dbf1d0625dc711fb2851f4f3c3055c39ed58bae92082d8c627dbe6013736/aiohttp-3.14.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:faccab372e66bc76d5731525e7f1143c922271725b9d38c9f97edcc66266b451", size = 1783881, upload-time = "2026-06-07T21:07:39.063Z" }, + { url = "https://files.pythonhosted.org/packages/44/c2/5e25098a67268ed369483ae7d1a58bd0a13d03aab860d2a0e4a6eb25b046/aiohttp-3.14.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f380468b09d2a81633ee863b0ec5648d364bd17bb8ecfb8c2f387f7ac1faf42c", size = 1587572, upload-time = "2026-06-07T21:07:41.058Z" }, + { url = "https://files.pythonhosted.org/packages/2a/bd/cf9cee17e140f942a3de73e658a543aa8fbf35a5fc67a9d2538d52d77f0b/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:97e704dcd26271f5bda3fa07c3ce0fb76d6d3f8659f4baa1a24442cc9ba177ca", size = 1722137, upload-time = "2026-06-07T21:07:43.014Z" }, + { url = "https://files.pythonhosted.org/packages/89/6d/5684f8c59045c96f81a18cefbc1fbbd79d25b88f1c622f2a5c5c08fcb632/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:269b76ac5394092b95bc4a098f4fc6c191c083c3bd12775d1e30e663132f6a09", size = 1755953, upload-time = "2026-06-07T21:07:45.933Z" }, + { url = "https://files.pythonhosted.org/packages/a8/40/35caf3170f8359760740a7d9aa0fff2e344bef98e1d1186f5a0f6dec17e6/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c0b3e614340c889d575451696374c9d17affd54cd607ca0babed8f8c37b9397", size = 1766479, upload-time = "2026-06-07T21:07:48.047Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a1/b0c61e7a137f0d81de49a82023a6df73c3c16d6fefb0f8e4a93d21639002/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:5663ee9257cfa1add7253a7da3035a02f31b6600ec48261585e1800a81533080", size = 1580077, upload-time = "2026-06-07T21:07:50.069Z" }, + { url = "https://files.pythonhosted.org/packages/0b/41/194ea4623693009fcefebef7aef63c141754f153e9cd0d39d3b9e36c175c/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:603a2c834142172ffddc054067f5ec0ca65d57a0aa98a71bc81952573208e345", size = 1791688, upload-time = "2026-06-07T21:07:52.106Z" }, + { url = "https://files.pythonhosted.org/packages/ba/45/4de841f005cfe1fd63e2a2fe011262c515e2a62aa6994b15947e7d717ac9/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cb21957bb8aca671c1765e32f58164cf0c50e6bf41c0bbbd16da20732ecaf588", size = 1761094, upload-time = "2026-06-07T21:07:54.113Z" }, + { url = "https://files.pythonhosted.org/packages/e4/ae/dbce10533d3896d544d5053939ed75b7dc31a1b0973d959b1b5ae21028d6/aiohttp-3.14.1-cp313-cp313-win32.whl", hash = "sha256:e509a55f681e6158c20f70f102f9cf61fb20fbc382272bc6d94b7343f2582780", size = 452662, upload-time = "2026-06-07T21:07:56.06Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d9/0bf1a19362c32f06229da5e7ddfcec91f93474d6307f7a2d3135e9c674dc/aiohttp-3.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:1ac8531b638959718e18c2207fbfe297819875da46a740b29dfa29beba64355a", size = 479748, upload-time = "2026-06-07T21:07:58.319Z" }, + { url = "https://files.pythonhosted.org/packages/22/0a/62e7232dc9484fbec112ceb32efb6a624cc7994ec6e2b019286f17c4e8f2/aiohttp-3.14.1-cp313-cp313-win_arm64.whl", hash = "sha256:250d14af67f6b6a1a4a811049b1afa69d61d617fca6bf33149b3ab1a6dbcf7b8", size = 447723, upload-time = "2026-06-07T21:08:00.154Z" }, + { url = "https://files.pythonhosted.org/packages/c4/a1/5fafa04e1ca91ddb47608699d60649c1c6db3cf41c99e78fc4056f9513db/aiohttp-3.14.1-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:7c106c26852ca1c2047c6b80384f17100b4e439af276f21ef3d4e2f450ae7e15", size = 508531, upload-time = "2026-06-07T21:08:02.093Z" }, + { url = "https://files.pythonhosted.org/packages/fa/2e/bfa02f699d87ffc86d5959270b28f1cb410add3ccaced8ed2e0b8a5238fc/aiohttp-3.14.1-cp314-cp314-android_24_x86_64.whl", hash = "sha256:20205f7f5ade7aaec9f4b500549bbc071b046453aed72f9c06dcab87896a83e8", size = 514718, upload-time = "2026-06-07T21:08:04.476Z" }, + { url = "https://files.pythonhosted.org/packages/85/a5/9594ad6289eebbc97d167c44213d557807f90e59115caad24de21ad2c3b1/aiohttp-3.14.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:62a759436b29e677181a9e76bab8b8f689a29cb9c535f45f7c48c9c830d3f8c3", size = 487918, upload-time = "2026-06-07T21:08:06.377Z" }, + { url = "https://files.pythonhosted.org/packages/b4/61/16a32c36c3c49edec122a3dc811f2057df2f94d3b14aa107c8017d981618/aiohttp-3.14.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2964cbf553df4d7a57348da44d961d871895fc1ee4e8c322b2a95612c7b17fba", size = 494014, upload-time = "2026-06-07T21:08:08.263Z" }, + { url = "https://files.pythonhosted.org/packages/9b/89/3ebcf96ed99c05bec9c434aaac6963fd3cbab4a786ae739908a144d9ce44/aiohttp-3.14.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:237651caadc3a59badd39319c54642b5299e9cc98a3a194310e55d5bb9f5e397", size = 502398, upload-time = "2026-06-07T21:08:10.244Z" }, + { url = "https://files.pythonhosted.org/packages/fd/3d/b74870a0c2d40c355928cd5b96c7a11fa821b8a40fc41365e64479b151fb/aiohttp-3.14.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:896e12dfdbbab9d8f7e16d2b28c6769a60126fa92095d1ebf9473d02593a2448", size = 758018, upload-time = "2026-06-07T21:08:12.447Z" }, + { url = "https://files.pythonhosted.org/packages/d3/66/f42f5c984d99e49c6cff5f26f590750f2e2f7ef1fcfb99966ab5be1b632e/aiohttp-3.14.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d03f281ed22579314ba00821ce20115a7c0ac430660b4cc05704a3f818b3e004", size = 512462, upload-time = "2026-06-07T21:08:14.624Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a7/248e1aebe0c7810b0271e021a0f2a5eb6e78a051885b3c9df49f42a5802d/aiohttp-3.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:07eabb979d236335fed927e137a928c9adfb7df3b9ec7aa31726f133a62be983", size = 512824, upload-time = "2026-06-07T21:08:16.572Z" }, + { url = "https://files.pythonhosted.org/packages/26/97/2aa0e5ba0727dc3bd5aaebb7ccbc510f7dfb7fb961ec87497cd496635ab1/aiohttp-3.14.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4fe1f1087cbadb280b5e1bb054a4f00d1423c74d6626c5e48400d871d34ecefe", size = 1749898, upload-time = "2026-06-07T21:08:18.635Z" }, + { url = "https://files.pythonhosted.org/packages/00/8d/e97f6c96c891d457c8479d92a514ba194d0412f981d72c70341ee18488ed/aiohttp-3.14.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:367a9314fdc79dab0fac96e216cb41dd73c85bdca85306ce8999118ba7e0f333", size = 1710114, upload-time = "2026-06-07T21:08:20.892Z" }, + { url = "https://files.pythonhosted.org/packages/6f/e6/aa8d7e863048c8fceb5cd6ce74017311cec3ead07847387e12265fb4444e/aiohttp-3.14.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a24f677ebe83749039e7bdf862ff0bbb16818ae4193d4ef96505e269375bcce0", size = 1802541, upload-time = "2026-06-07T21:08:23.044Z" }, + { url = "https://files.pythonhosted.org/packages/83/a8/72193137de57fda4ebfae4563182d082c8856e3b6e9871d0b46f028fb369/aiohttp-3.14.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c83afe0ba876be7e943d2e0ba645809ad441575d2840c895c21ee5de93b9377a", size = 1875776, upload-time = "2026-06-07T21:08:25.288Z" }, + { url = "https://files.pythonhosted.org/packages/a0/18/938441025db6769a3464596b2410af3afde0b21eb2f204c6f766f68af4bd/aiohttp-3.14.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:634e385930fb6d2d479cf3aa66515955863b77a5e3c2b5894ca259a25b308602", size = 1760329, upload-time = "2026-06-07T21:08:27.363Z" }, + { url = "https://files.pythonhosted.org/packages/60/29/bf2496b4065e76e09fe48015aaffe5ce161d8f089b06ac6982070f653076/aiohttp-3.14.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eeea07c4397bbc57719c4eed8f9c284874d4f175f9b6d57f7a1546b976d455ca", size = 1587293, upload-time = "2026-06-07T21:08:29.805Z" }, + { url = "https://files.pythonhosted.org/packages/49/a2/2136674d52123b1354bd05dd5753c318db47dc0c927cc70b27bab3755456/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:335c0cc3e3545ce98dcb9cfcb836f40c3411f43fa03dab757597d80c89af8a35", size = 1714756, upload-time = "2026-06-07T21:08:32.094Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b9/e5fd2e6f915503081c0f9b1e8540947037929c70c191da2e4d54b31a21a1/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:ae6be797afdef264e8a84864a85b196ca06045586481b3df8a967322fd2fa844", size = 1721052, upload-time = "2026-06-07T21:08:34.167Z" }, + { url = "https://files.pythonhosted.org/packages/63/5a/2833e324a2263e104e31e2e91bc5bbee81bc499afd32203faee048a883f0/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:8560b4d712474335d08907db7973f71912d3a9a8f1dee992ec06b5d2fe359496", size = 1766888, upload-time = "2026-06-07T21:08:36.95Z" }, + { url = "https://files.pythonhosted.org/packages/57/fa/dea6511870913162f3b2e8c42a7614eb203a4540b8c2da43e0bfb0548f3c/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7edd08e0a5deb1e8564a2fcd8f4561014a3f05252334671bbf55ddd47db0e5", size = 1581679, upload-time = "2026-06-07T21:08:39.292Z" }, + { url = "https://files.pythonhosted.org/packages/14/bd/3cf0d55e71784b33534e9710a67d382d900598b4787fbce6cc7317f8c42a/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:b6ff7fcee63287ae57b5df3e4f5957ce032122802509246dec1a5bcc55904c95", size = 1782021, upload-time = "2026-06-07T21:08:41.407Z" }, + { url = "https://files.pythonhosted.org/packages/c1/af/14bb5843eccbe234f4dfb78ab73e549d99727247e62ae5d62cbd22eaf5b0/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6ffbb2f4ec1ceaff7e07d43922954da26b223d188bf30658e561b98e23089444", size = 1742574, upload-time = "2026-06-07T21:08:43.795Z" }, + { url = "https://files.pythonhosted.org/packages/f2/1e/fbeb7af9210a67ac0f9c9bec0f8f4568497924e33137a3d5b48e1cf85f3f/aiohttp-3.14.1-cp314-cp314-win32.whl", hash = "sha256:a9875b46d910cff3ea2f5962f9d266b465459fe634e22556ab9bd6fc1192eea0", size = 457773, upload-time = "2026-06-07T21:08:46.168Z" }, + { url = "https://files.pythonhosted.org/packages/f0/2b/13e8d741a9ec5db7d900c060554cf8352ab85e44e2a4469ebb9d377bda17/aiohttp-3.14.1-cp314-cp314-win_amd64.whl", hash = "sha256:af8b4b81a960eeaf1234971ac3cd0ba5901f3cd42eae42a46b4d089a8b492719", size = 485001, upload-time = "2026-06-07T21:08:48.401Z" }, + { url = "https://files.pythonhosted.org/packages/df/30/491acfa2c4d6c3ff59c49a14fc1b50be3241e25bbb0c84c09e2da4d11395/aiohttp-3.14.1-cp314-cp314-win_arm64.whl", hash = "sha256:cf4491381b1b57425c315a56a439251b1bdac07b2275f19a8c44bc57744532ec", size = 453809, upload-time = "2026-06-07T21:08:50.7Z" }, + { url = "https://files.pythonhosted.org/packages/34/e3/19dbe1a1f4cc6230eb9e314de7fe68053b0992f9302b27d12141a0b5db53/aiohttp-3.14.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:819c054312f1af92947e6a55883d1b66feefab11531a7fc45e0fb9b63880b5c2", size = 793320, upload-time = "2026-06-07T21:08:52.775Z" }, + { url = "https://files.pythonhosted.org/packages/7f/20/1b7182219ba1b108430d6e4dc53d25ae02dcfcf5a045b33af4e8c5167527/aiohttp-3.14.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10ee9c1753a8f706345b22496c79fbddb5be0599e0823f3738b1534058e25340", size = 529077, upload-time = "2026-06-07T21:08:55Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c8/14ce60ec31a2e5f5274bb17d383a6f7a3aabca31ac04eee05585bbadab16/aiohttp-3.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1601cc37baf5750ccacae618ec2daf020769581695550e3b654a911f859c563d", size = 532476, upload-time = "2026-06-07T21:08:57.176Z" }, + { url = "https://files.pythonhosted.org/packages/7e/02/9ac85e081e53da2e061b02fa7758fe0a12d17b8ce2d1f5e6c7cb76730328/aiohttp-3.14.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d6e0ac9da31c9c04c84e1c0182ad8d6df35965a85cae29cd71d089621b3ae94", size = 1922347, upload-time = "2026-06-07T21:08:59.563Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3e/d3ba07a0ab38b5389e10bec4362d21e10a4f667cba2d79ba30837b3a5059/aiohttp-3.14.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9e8f2d660c350b3d0e259c7a7e3d9b7fc8b41210cbcc3d4a7076ff0a5e5c2fdc", size = 1786465, upload-time = "2026-06-07T21:09:01.909Z" }, + { url = "https://files.pythonhosted.org/packages/0b/cb/e2ee978a00cfb2df829704a69528b18154eba5939f45bc1efa8f33aee4c5/aiohttp-3.14.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4691802dda97be727f79d86818acaad7eb8e9252626a1d6b519fedbb92d5e251", size = 1909423, upload-time = "2026-06-07T21:09:04.357Z" }, + { url = "https://files.pythonhosted.org/packages/73/5d/1430334858b1022b58ae50399a918f0bd6fe8fa7fa183598d657ff61e040/aiohttp-3.14.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c389c482a7e9b9dc3ee2701ac46c4125297a3818875b9c305ddb603c04828fd1", size = 2001906, upload-time = "2026-06-07T21:09:06.722Z" }, + { url = "https://files.pythonhosted.org/packages/66/4e/560c7472d3d198a23aa5c8b19a5115bf6a9b77b7d3e4bb363da320430ad2/aiohttp-3.14.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc0cacab7ba4e56f0f81c82a98c09bed2f39c940107b03a34b168bdf7597edd3", size = 1877095, upload-time = "2026-06-07T21:09:09.011Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f1/4745806578d447db4a784a8591e2dae3afdfc2bcb96f8f81271b13df6543/aiohttp-3.14.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:979ed4717f59b8bb12e3963378fa285d93d367e15bcd66c721311826d3c44a6c", size = 1676222, upload-time = "2026-06-07T21:09:11.461Z" }, + { url = "https://files.pythonhosted.org/packages/6a/c9/48255813cca749a229ef0ab476004ec623728ad79a9c0840616f6c076325/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:38e1e7daaea81df51c952e18483f323d878499a1e2bfe564790e0f9701d6f203", size = 1842922, upload-time = "2026-06-07T21:09:14.118Z" }, + { url = "https://files.pythonhosted.org/packages/3d/c0/bbd054e2bee909f529523a5af3891052606af5143c09f5f183ec3b234676/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:4132e72c608fe9fecb8f409113567605915b83e9bdd3ea56538d2f9cd35002f1", size = 1825035, upload-time = "2026-06-07T21:09:16.447Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ae/90395d4376deceb74e09ec26b6adf7d2015a6f8802d6d84446af860fef04/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:eefd9cc9b6d4a2db5f00a26bc3e4f9acf71926a6ec557cd56c9c6f27c290b665", size = 1849512, upload-time = "2026-06-07T21:09:18.742Z" }, + { url = "https://files.pythonhosted.org/packages/93/bd/fb25f3049957553d4ce0ba6ae480aa2f592a6985497fca590837d16c1be0/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:b165790117eea512d7f3fb22f1f6dad3d55a7189571993eb015591c1401276d1", size = 1668571, upload-time = "2026-06-07T21:09:21.458Z" }, + { url = "https://files.pythonhosted.org/packages/3f/22/7f73303d64dd567ff3addca90b556690ed1233a47b8f55d242fb90af3681/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ed09c7eb1c391271c2ed0314a51903e72a3acb653d5ccfc264cdf3ef11f8269d", size = 1881159, upload-time = "2026-06-07T21:09:23.813Z" }, + { url = "https://files.pythonhosted.org/packages/44/be/0474c5a8b5640e1e4aa1923430a91f4151be82e511373fe764189b89aef5/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:99abd37084b82f5830c635fddd0b4993b9742a66eb746dacf433c8590e8f9e3c", size = 1841409, upload-time = "2026-06-07T21:09:26.207Z" }, + { url = "https://files.pythonhosted.org/packages/7b/3c/bb4a7cba26956cb3da4553cc2056cf67be5b5ff6e6d8fa4fbdff73bfb7ae/aiohttp-3.14.1-cp314-cp314t-win32.whl", hash = "sha256:47ddf841cdecc810749921d25606dee45857d12d2ad5ddb7b5bd7eab12e4b365", size = 494166, upload-time = "2026-06-07T21:09:28.505Z" }, + { url = "https://files.pythonhosted.org/packages/8a/84/ec80c2c1f66a952555a9f86df6b33af65108a6febfa0471b69013a12f807/aiohttp-3.14.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5e78b522b7a6e27e0b25d19b247b75039ac4c94f99823e3c9e53ae1603a9f7e9", size = 530255, upload-time = "2026-06-07T21:09:30.843Z" }, + { url = "https://files.pythonhosted.org/packages/2a/71/6e22be134a4061ada85a92951b842f2657f17d926b727f3f94c56ae963d6/aiohttp-3.14.1-cp314-cp314t-win_arm64.whl", hash = "sha256:90d53f1609c29ccc2193945ef732428382a28f78d0456ae4d3daf0d48b74f0f6", size = 469640, upload-time = "2026-06-07T21:09:33.028Z" }, ] [[package]] @@ -146,7 +173,7 @@ wheels = [ [[package]] name = "anthropic" -version = "0.76.0" +version = "0.104.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -158,9 +185,9 @@ dependencies = [ { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6e/be/d11abafaa15d6304826438170f7574d750218f49a106c54424a40cef4494/anthropic-0.76.0.tar.gz", hash = "sha256:e0cae6a368986d5cf6df743dfbb1b9519e6a9eee9c6c942ad8121c0b34416ffe", size = 495483, upload-time = "2026-01-13T18:41:14.908Z" } +sdist = { url = "https://files.pythonhosted.org/packages/22/c7/7a655b948916f777354648ce979f68b94d5b8dbdb5f61fed1f37fad9378c/anthropic-0.104.1.tar.gz", hash = "sha256:17362b6c45f527afcc9b0fdf62011ffd359726ab2ebcb1978ea0cc41bd8d8d40", size = 850081, upload-time = "2026-05-22T15:36:57.432Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/70/7b0fd9c1a738f59d3babe2b4212031c34ab7d0fda4ffef15b58a55c5bcea/anthropic-0.76.0-py3-none-any.whl", hash = "sha256:81efa3113901192af2f0fe977d3ec73fdadb1e691586306c4256cd6d5ccc331c", size = 390309, upload-time = "2026-01-13T18:41:13.483Z" }, + { url = "https://files.pythonhosted.org/packages/b8/12/d9ab42790494d7c428391a46cd28492395566a6a8ccb138d681978594455/anthropic-0.104.1-py3-none-any.whl", hash = "sha256:35c8cb456f5a4405aafe1f10f03f6fcc54fa51fa8ec01d655cc4b437d120e9b7", size = 832996, upload-time = "2026-05-22T15:36:59.519Z" }, ] [[package]] @@ -194,6 +221,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/66/686ac4fc6ef48f5bacde625adac698f41d5316a9753c2b20bb0931c9d4e2/astroid-4.0.3-py3-none-any.whl", hash = "sha256:864a0a34af1bd70e1049ba1e61cee843a7252c826d97825fcee9b2fcbd9e1b14", size = 276443, upload-time = "2026-01-03T22:14:24.412Z" }, ] +[[package]] +name = "atheris" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/58/5965955898e16bee17c8379eae12194993bf641c4629016991248b862069/atheris-3.0.0.tar.gz", hash = "sha256:1f0929c7bc3040f3fe4102e557718734190cf2d7718bbb8e3ce6d3eb56ef5bb3", size = 373239, upload-time = "2025-11-24T23:54:02.15Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/8c/e9960b996e70e5f6a523670431166b2b238de52fef094955515dcf854da1/atheris-3.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:510e502c57b6dc615fb174066407af620d4c7f73cf08a782c86e7761bf12c4eb", size = 34907016, upload-time = "2025-11-24T23:53:56.535Z" }, + { url = "https://files.pythonhosted.org/packages/db/48/df670f75f458cc7c1752a01a394fd59c830b08172dd59cf29d73f31050f9/atheris-3.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a402cdca8a650d1371050b1f9552eb4cdc488d2db64950d603c4560318365eac", size = 34858525, upload-time = "2025-11-24T23:53:59.925Z" }, +] + [[package]] name = "attrs" version = "25.4.0" @@ -205,14 +242,38 @@ wheels = [ [[package]] name = "authlib" -version = "1.6.6" +version = "1.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, + { name = "joserfc" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/98/7d93f30d029643c0275dbc0bd6d5a6f670661ee6c9a94d93af7ab4887600/authlib-1.7.2.tar.gz", hash = "sha256:2cea25fefcd4e7173bdf1372c0afc265c8034b23a8cd5dcb6a9164b826c64231", size = 176511, upload-time = "2026-05-06T08:10:23.116Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/95/adcb68e20c34162e9135f370d6e31737719c2b6f94bc953fe7ed1f10fe21/authlib-1.7.2-py2.py3-none-any.whl", hash = "sha256:3e1faedc9d87e7d56a164eca3ccb6ace0d61b94abe83e92242f8dc8bba9b4a9f", size = 259548, upload-time = "2026-05-06T08:10:21.436Z" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894, upload-time = "2025-12-12T08:01:41.464Z" } + +[[package]] +name = "babel" +version = "2.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/b2/51899539b6ceeeb420d40ed3cd4b7a40519404f9baf3d4ac99dc413a834b/babel-2.18.0.tar.gz", hash = "sha256:b80b99a14bd085fcacfa15c9165f651fbb3406e66cc603abf11c5750937c992d", size = 9959554, upload-time = "2026-02-01T12:30:56.078Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/f5/21d2de20e8b8b0408f0681956ca2c69f1320a3848ac50e6e7f39c6159675/babel-2.18.0-py3-none-any.whl", hash = "sha256:e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35", size = 10196845, upload-time = "2026-02-01T12:30:53.445Z" }, +] + +[[package]] +name = "backrefs" +version = "6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/a6/e325ec73b638d3ede4421b5445d4a0b8b219481826cc079d510100af356c/backrefs-6.2.tar.gz", hash = "sha256:f44ff4d48808b243b6c0cdc6231e22195c32f77046018141556c66f8bab72a49", size = 7012303, upload-time = "2026-02-16T19:10:15.828Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005, upload-time = "2025-12-12T08:01:40.209Z" }, + { url = "https://files.pythonhosted.org/packages/1b/39/3765df263e08a4df37f4f43cb5aa3c6c17a4bdd42ecfe841e04c26037171/backrefs-6.2-py310-none-any.whl", hash = "sha256:0fdc7b012420b6b144410342caeb8adc54c6866cf12064abc9bb211302e496f8", size = 381075, upload-time = "2026-02-16T19:10:04.322Z" }, + { url = "https://files.pythonhosted.org/packages/0f/f0/35240571e1b67ffb19dafb29ab34150b6f59f93f717b041082cdb1bfceb1/backrefs-6.2-py311-none-any.whl", hash = "sha256:08aa7fae530c6b2361d7bdcbda1a7c454e330cc9dbcd03f5c23205e430e5c3be", size = 392874, upload-time = "2026-02-16T19:10:06.314Z" }, + { url = "https://files.pythonhosted.org/packages/e3/63/77e8c9745b4d227cce9f5e0a6f68041278c5f9b18588b35905f5f19c1beb/backrefs-6.2-py312-none-any.whl", hash = "sha256:c3f4b9cb2af8cda0d87ab4f57800b57b95428488477be164dd2b47be54db0c90", size = 398787, upload-time = "2026-02-16T19:10:08.274Z" }, + { url = "https://files.pythonhosted.org/packages/c5/71/c754b1737ad99102e03fa3235acb6cb6d3ac9d6f596cbc3e5f236705abd8/backrefs-6.2-py313-none-any.whl", hash = "sha256:12df81596ab511f783b7d87c043ce26bc5b0288cf3bb03610fe76b8189282b2b", size = 400747, upload-time = "2026-02-16T19:10:09.791Z" }, + { url = "https://files.pythonhosted.org/packages/af/75/be12ba31a6eb20dccef2320cd8ccb3f7d9013b68ba4c70156259fee9e409/backrefs-6.2-py314-none-any.whl", hash = "sha256:e5f805ae09819caa1aa0623b4a83790e7028604aa2b8c73ba602c4454e665de7", size = 412602, upload-time = "2026-02-16T19:10:12.317Z" }, + { url = "https://files.pythonhosted.org/packages/21/f8/d02f650c47d05034dcd6f9c8cf94f39598b7a89c00ecda0ecb2911bc27e9/backrefs-6.2-py39-none-any.whl", hash = "sha256:664e33cd88c6840b7625b826ecf2555f32d491800900f5a541f772c485f7cda7", size = 381077, upload-time = "2026-02-16T19:10:13.74Z" }, ] [[package]] @@ -250,30 +311,30 @@ wheels = [ [[package]] name = "boto3" -version = "1.42.33" +version = "1.43.14" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d4/c7/695a39a862140dd40637a3dc0020f4f645bb78c47f0d9195db76ed7e1da2/boto3-1.42.33.tar.gz", hash = "sha256:5da0d35dd82451d4520af63f8fcc722537597d7c790035e8b3a8fc53f032be3a", size = 112844, upload-time = "2026-01-22T20:29:15.817Z" } +sdist = { url = "https://files.pythonhosted.org/packages/79/4b/616367e871ce3f1cb3e8545a97736b6331b9fb081497f2d44c5b2aa6959d/boto3-1.43.14.tar.gz", hash = "sha256:5c0a994b3182061ee101812e721100717a4d664f9f4ceaf4a86b6d032ce9fc2d", size = 113142, upload-time = "2026-05-22T19:28:47.861Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/93/80aa0c9c5931e72252cbf46162f5b438f040f618bb941aa85bb591c62bc9/boto3-1.42.33-py3-none-any.whl", hash = "sha256:81db4a1ef08b3a69b2c5a879e7bd26ee43ca3fd5202cd320a2aaa4f5dd11182c", size = 140574, upload-time = "2026-01-22T20:29:13.531Z" }, + { url = "https://files.pythonhosted.org/packages/cb/00/59cb9329c18e2d3aa23062ceaa87d065f2e81e7d2931df24d64e9a7815aa/boto3-1.43.14-py3-none-any.whl", hash = "sha256:574335744656cfed0b362a0a0467aaf2eb2bf15526edcd02d31d3c661f4b09e4", size = 140536, upload-time = "2026-05-22T19:28:46.49Z" }, ] [[package]] name = "botocore" -version = "1.42.33" +version = "1.43.14" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8f/ea/7bfe0902a228b4aa73106e704188189ab0e16e0a0e9598fa2b126ebfe759/botocore-1.42.33.tar.gz", hash = "sha256:ecf48db73605a592b6c7f8f29e517d9eb6cf0c7e004a1fdbd9c192afc7b42b03", size = 14903415, upload-time = "2026-01-22T20:29:04.293Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/3c/798d2f7deb118241930c7c6bcfb0b970d3f0245bf580700663199aeed2c3/botocore-1.43.14.tar.gz", hash = "sha256:b9e500737e43d2f147c9d4e23b54360335e77d4c0ba90a318f51b65e06cb8516", size = 15382604, upload-time = "2026-05-22T19:28:36.363Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/58/da9a094c8c2499a19c57f4aedca2d5fb2c88bfb9e2931d87af41309c4521/botocore-1.42.33-py3-none-any.whl", hash = "sha256:156a1ead55c38709730c543eb8085c36098b7baf272fedc67cc4a543ae4b4cf6", size = 14575729, upload-time = "2026-01-22T20:29:00.759Z" }, + { url = "https://files.pythonhosted.org/packages/27/7e/6e64821077cd2efc4aa51b7d638fb6d48e1c7c450201c529fbaf1de8bfd3/botocore-1.43.14-py3-none-any.whl", hash = "sha256:1f4a2a95ea78c10398e78431e98c1fe47adb54a7b10a32975144c1f541186658", size = 15061424, upload-time = "2026-05-22T19:28:32.682Z" }, ] [[package]] @@ -294,6 +355,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/fc/1d7b80d0eb7b714984ce40efc78859c022cd930e402f599d8ca9e39c78a4/cachetools-6.2.4-py3-none-any.whl", hash = "sha256:69a7a52634fed8b8bf6e24a050fb60bff1c9bd8f6d24572b99c32d4e71e62a51", size = 11551, upload-time = "2025-12-15T18:24:52.332Z" }, ] +[[package]] +name = "caio" +version = "0.9.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/88/b8527e1b00c1811db339a1df8bd1ae49d146fcea9d6a5c40e3a80aaeb38d/caio-0.9.25.tar.gz", hash = "sha256:16498e7f81d1d0f5a4c0ad3f2540e65fe25691376e0a5bd367f558067113ed10", size = 26781, upload-time = "2025-12-26T15:21:36.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/25/79c98ebe12df31548ba4eaf44db11b7cad6b3e7b4203718335620939083c/caio-0.9.25-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fb7ff95af4c31ad3f03179149aab61097a71fd85e05f89b4786de0359dffd044", size = 36983, upload-time = "2025-12-26T15:21:36.075Z" }, + { url = "https://files.pythonhosted.org/packages/a3/2b/21288691f16d479945968a0a4f2856818c1c5be56881d51d4dac9b255d26/caio-0.9.25-cp312-cp312-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:97084e4e30dfa598449d874c4d8e0c8d5ea17d2f752ef5e48e150ff9d240cd64", size = 82012, upload-time = "2025-12-26T15:22:20.983Z" }, + { url = "https://files.pythonhosted.org/packages/03/c4/8a1b580875303500a9c12b9e0af58cb82e47f5bcf888c2457742a138273c/caio-0.9.25-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:4fa69eba47e0f041b9d4f336e2ad40740681c43e686b18b191b6c5f4c5544bfb", size = 81502, upload-time = "2026-03-04T22:08:22.381Z" }, + { url = "https://files.pythonhosted.org/packages/d1/1c/0fe770b8ffc8362c48134d1592d653a81a3d8748d764bec33864db36319d/caio-0.9.25-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:6bebf6f079f1341d19f7386db9b8b1f07e8cc15ae13bfdaff573371ba0575d69", size = 80200, upload-time = "2026-03-04T22:08:23.382Z" }, + { url = "https://files.pythonhosted.org/packages/31/57/5e6ff127e6f62c9f15d989560435c642144aa4210882f9494204bc892305/caio-0.9.25-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d6c2a3411af97762a2b03840c3cec2f7f728921ff8adda53d7ea2315a8563451", size = 36979, upload-time = "2025-12-26T15:21:35.484Z" }, + { url = "https://files.pythonhosted.org/packages/a3/9f/f21af50e72117eb528c422d4276cbac11fb941b1b812b182e0a9c70d19c5/caio-0.9.25-cp313-cp313-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0998210a4d5cd5cb565b32ccfe4e53d67303f868a76f212e002a8554692870e6", size = 81900, upload-time = "2025-12-26T15:22:21.919Z" }, + { url = "https://files.pythonhosted.org/packages/9c/12/c39ae2a4037cb10ad5eb3578eb4d5f8c1a2575c62bba675f3406b7ef0824/caio-0.9.25-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:1a177d4777141b96f175fe2c37a3d96dec7911ed9ad5f02bac38aaa1c936611f", size = 81523, upload-time = "2026-03-04T22:08:25.187Z" }, + { url = "https://files.pythonhosted.org/packages/22/59/f8f2e950eb4f1a5a3883e198dca514b9d475415cb6cd7b78b9213a0dd45a/caio-0.9.25-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:9ed3cfb28c0e99fec5e208c934e5c157d0866aa9c32aa4dc5e9b6034af6286b7", size = 80243, upload-time = "2026-03-04T22:08:26.449Z" }, + { url = "https://files.pythonhosted.org/packages/69/ca/a08fdc7efdcc24e6a6131a93c85be1f204d41c58f474c42b0670af8c016b/caio-0.9.25-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fab6078b9348e883c80a5e14b382e6ad6aabbc4429ca034e76e730cf464269db", size = 36978, upload-time = "2025-12-26T15:21:41.055Z" }, + { url = "https://files.pythonhosted.org/packages/5e/6c/d4d24f65e690213c097174d26eda6831f45f4734d9d036d81790a27e7b78/caio-0.9.25-cp314-cp314-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44a6b58e52d488c75cfaa5ecaa404b2b41cc965e6c417e03251e868ecd5b6d77", size = 81832, upload-time = "2025-12-26T15:22:22.757Z" }, + { url = "https://files.pythonhosted.org/packages/87/a4/e534cf7d2d0e8d880e25dd61e8d921ffcfe15bd696734589826f5a2df727/caio-0.9.25-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:628a630eb7fb22381dd8e3c8ab7f59e854b9c806639811fc3f4310c6bd711d79", size = 81565, upload-time = "2026-03-04T22:08:27.483Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ed/bf81aeac1d290017e5e5ac3e880fd56ee15e50a6d0353986799d1bc5cfd5/caio-0.9.25-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:0ba16aa605ccb174665357fc729cf500679c2d94d5f1458a6f0d5ca48f2060a7", size = 80071, upload-time = "2026-03-04T22:08:28.751Z" }, + { url = "https://files.pythonhosted.org/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087, upload-time = "2025-12-26T15:22:00.221Z" }, +] + [[package]] name = "certifi" version = "2026.1.4" @@ -450,23 +532,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/75/45/54bb2d8d4138964a94bef6e9afe48b0be4705ba66ac442ae7d8a8dc4ffef/click_option_group-0.5.9-py3-none-any.whl", hash = "sha256:ad2599248bd373e2e19bec5407967c3eec1d0d4fc4a5e77b08a0481e75991080", size = 11553, upload-time = "2025-10-09T09:38:00.066Z" }, ] -[[package]] -name = "cloudpickle" -version = "3.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, -] - [[package]] name = "code-graph-rag" -version = "0.0.58" +version = "0.0.187" source = { editable = "." } dependencies = [ { name = "click" }, { name = "defusedxml" }, { name = "diff-match-patch" }, + { name = "griffe" }, { name = "huggingface-hub", extra = ["hf-xet"] }, { name = "loguru" }, { name = "mcp" }, @@ -477,6 +551,7 @@ dependencies = [ { name = "pymgclient" }, { name = "python-dotenv" }, { name = "rich" }, + { name = "tiktoken" }, { name = "toml" }, { name = "tree-sitter" }, { name = "tree-sitter-python" }, @@ -491,6 +566,7 @@ semantic = [ { name = "transformers" }, ] test = [ + { name = "libclang" }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, @@ -498,11 +574,13 @@ test = [ { name = "testcontainers" }, ] treesitter-full = [ + { name = "tree-sitter-c" }, { name = "tree-sitter-cpp" }, { name = "tree-sitter-go" }, { name = "tree-sitter-java" }, { name = "tree-sitter-javascript" }, { name = "tree-sitter-lua" }, + { name = "tree-sitter-php" }, { name = "tree-sitter-python" }, { name = "tree-sitter-rust" }, { name = "tree-sitter-scala" }, @@ -516,6 +594,7 @@ dev = [ { name = "pre-commit" }, { name = "pyinstaller" }, { name = "pylint" }, + { name = "pytest" }, { name = "radon" }, { name = "ruff" }, { name = "semgrep" }, @@ -524,43 +603,56 @@ dev = [ { name = "types-toml" }, { name = "vulture" }, ] +docs = [ + { name = "mkdocs" }, + { name = "mkdocs-material" }, + { name = "mkdocs-minify-plugin" }, +] +fuzz = [ + { name = "atheris" }, +] [package.metadata] requires-dist = [ - { name = "click", specifier = ">=8.0.0" }, + { name = "click", specifier = ">=8.3.1" }, { name = "defusedxml", specifier = ">=0.7.1" }, { name = "diff-match-patch", specifier = ">=20241021" }, - { name = "huggingface-hub", extras = ["hf-xet"], specifier = ">=0.36.0" }, + { name = "griffe", specifier = ">=1.0,<2" }, + { name = "huggingface-hub", extras = ["hf-xet"], specifier = ">=1.7.2" }, + { name = "libclang", marker = "extra == 'test'", specifier = ">=18.1.1" }, { name = "loguru", specifier = ">=0.7.3" }, - { name = "mcp", specifier = ">=1.21.1" }, - { name = "prompt-toolkit", specifier = ">=3.0.0" }, - { name = "protobuf", specifier = ">=5.27.0" }, - { name = "pydantic-ai", specifier = ">=1.27.0" }, - { name = "pydantic-settings", specifier = ">=2.0.0" }, - { name = "pymgclient", specifier = ">=1.4.0" }, + { name = "mcp", specifier = ">=1.25.0" }, + { name = "prompt-toolkit", specifier = ">=3.0.52" }, + { name = "protobuf", specifier = ">=6.33.5" }, + { name = "pydantic-ai", specifier = ">=1.102.0" }, + { name = "pydantic-settings", specifier = ">=2.12.0" }, + { name = "pymgclient", specifier = ">=1.5.1" }, { name = "pytest", marker = "extra == 'test'", specifier = ">=8.4.1" }, { name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=1.0.0" }, { name = "pytest-cov", marker = "extra == 'test'", specifier = ">=4.0.0" }, { name = "pytest-xdist", marker = "extra == 'test'", specifier = ">=3.8.0" }, - { name = "python-dotenv", specifier = ">=1.1.0" }, + { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "qdrant-client", marker = "extra == 'semantic'", specifier = ">=1.9.0" }, - { name = "rich", specifier = ">=13.7.1" }, + { name = "rich", specifier = ">=14.2.0" }, { name = "testcontainers", marker = "extra == 'test'", specifier = ">=4.9.0" }, + { name = "tiktoken", specifier = ">=0.12.0" }, { name = "toml", specifier = ">=0.10.2" }, { name = "torch", marker = "extra == 'semantic'", specifier = ">=2.6.0" }, { name = "transformers", marker = "extra == 'semantic'", specifier = ">=4.0.0" }, - { name = "tree-sitter", specifier = "==0.25.0" }, + { name = "tree-sitter", specifier = "==0.25.2" }, + { name = "tree-sitter-c", marker = "extra == 'treesitter-full'", specifier = ">=0.24.1" }, { name = "tree-sitter-cpp", marker = "extra == 'treesitter-full'", specifier = ">=0.23.0" }, { name = "tree-sitter-go", marker = "extra == 'treesitter-full'", specifier = ">=0.23.4" }, { name = "tree-sitter-java", marker = "extra == 'treesitter-full'", specifier = ">=0.23.5" }, { name = "tree-sitter-javascript", marker = "extra == 'treesitter-full'", specifier = ">=0.23.1" }, { name = "tree-sitter-lua", marker = "extra == 'treesitter-full'", specifier = ">=0.0.19" }, - { name = "tree-sitter-python", specifier = ">=0.23.6" }, + { name = "tree-sitter-php", marker = "extra == 'treesitter-full'", specifier = ">=0.24.1" }, + { name = "tree-sitter-python", specifier = ">=0.25.0" }, { name = "tree-sitter-python", marker = "extra == 'treesitter-full'", specifier = ">=0.23.6" }, { name = "tree-sitter-rust", marker = "extra == 'treesitter-full'", specifier = ">=0.24.0" }, { name = "tree-sitter-scala", marker = "extra == 'treesitter-full'", specifier = ">=0.24.0" }, { name = "tree-sitter-typescript", marker = "extra == 'treesitter-full'", specifier = ">=0.23.2" }, - { name = "typer", specifier = ">=0.12.5" }, + { name = "typer", specifier = ">=0.21.1" }, { name = "watchdog", specifier = ">=6.0.0" }, ] provides-extras = ["test", "treesitter-full", "semantic"] @@ -572,6 +664,7 @@ dev = [ { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pyinstaller", specifier = ">=6.14.1" }, { name = "pylint", specifier = ">=4.0.4" }, + { name = "pytest", specifier = ">=9.0.2" }, { name = "radon", specifier = ">=6.0.1" }, { name = "ruff", specifier = ">=0.5.5" }, { name = "semgrep", specifier = ">=1.79.0" }, @@ -580,10 +673,16 @@ dev = [ { name = "types-toml", specifier = ">=0.10.8.20240310" }, { name = "vulture", specifier = ">=2.14" }, ] +docs = [ + { name = "mkdocs", specifier = ">=1.6.1,<2" }, + { name = "mkdocs-material", specifier = ">=9.7.3" }, + { name = "mkdocs-minify-plugin", specifier = ">=0.8.0" }, +] +fuzz = [{ name = "atheris", specifier = ">=2.3.0" }] [[package]] name = "cohere" -version = "5.20.1" +version = "5.20.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastavro" }, @@ -595,9 +694,9 @@ dependencies = [ { name = "types-requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4b/ed/bb02083654bdc089ae4ef1cd7691fd2233f1fd9f32bcbfacc80ff57d9775/cohere-5.20.1.tar.gz", hash = "sha256:50973f63d2c6138ff52ce37d8d6f78ccc539af4e8c43865e960d68e0bf835b6f", size = 180820, upload-time = "2025-12-18T16:39:50.975Z" } +sdist = { url = "https://files.pythonhosted.org/packages/44/0b/96e2b55a0114ed9d69b3154565f54b764e7530735426290b000f467f4c0f/cohere-5.20.7.tar.gz", hash = "sha256:997ed85fabb3a1e4a4c036fdb520382e7bfa670db48eb59a026803b6f7061dbb", size = 184986, upload-time = "2026-02-25T01:22:18.673Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/e3/94eb11ac3ebaaa3a6afb5d2ff23db95d58bc468ae538c388edf49f2f20b5/cohere-5.20.1-py3-none-any.whl", hash = "sha256:d230fd13d95ba92ae927fce3dd497599b169883afc7954fe29b39fb8d5df5fc7", size = 318973, upload-time = "2025-12-18T16:39:49.504Z" }, + { url = "https://files.pythonhosted.org/packages/9d/86/dc991a75e3b9c2007b90dbfaf7f36fdb2457c216f799e26ce0474faf0c1f/cohere-5.20.7-py3-none-any.whl", hash = "sha256:043fef2a12c30c07e9b2c1f0b869fd66ffd911f58d1492f87e901c4190a65914", size = 323389, upload-time = "2026-02-25T01:22:16.902Z" }, ] [[package]] @@ -685,81 +784,127 @@ wheels = [ [[package]] name = "cryptography" -version = "46.0.3" +version = "48.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004, upload-time = "2025-10-15T23:16:52.239Z" }, - { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" }, - { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" }, - { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" }, - { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" }, - { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" }, - { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" }, - { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" }, - { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" }, - { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" }, - { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" }, - { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" }, - { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988, upload-time = "2025-10-15T23:17:14.65Z" }, - { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451, upload-time = "2025-10-15T23:17:16.142Z" }, - { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007, upload-time = "2025-10-15T23:17:18.04Z" }, - { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012, upload-time = "2025-10-15T23:17:19.982Z" }, - { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" }, - { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" }, - { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" }, - { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" }, - { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" }, - { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" }, - { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" }, - { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" }, - { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" }, - { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" }, - { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" }, - { url = "https://files.pythonhosted.org/packages/fd/30/27654c1dbaf7e4a3531fa1fc77986d04aefa4d6d78259a62c9dc13d7ad36/cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914", size = 3022339, upload-time = "2025-10-15T23:17:40.888Z" }, - { url = "https://files.pythonhosted.org/packages/f6/30/640f34ccd4d2a1bc88367b54b926b781b5a018d65f404d409aba76a84b1c/cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db", size = 3494315, upload-time = "2025-10-15T23:17:42.769Z" }, - { url = "https://files.pythonhosted.org/packages/ba/8b/88cc7e3bd0a8e7b861f26981f7b820e1f46aa9d26cc482d0feba0ecb4919/cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21", size = 2919331, upload-time = "2025-10-15T23:17:44.468Z" }, - { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248, upload-time = "2025-10-15T23:17:46.294Z" }, - { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" }, - { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" }, - { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" }, - { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" }, - { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" }, - { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" }, - { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" }, - { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" }, - { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" }, - { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" }, - { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695, upload-time = "2025-10-15T23:18:08.672Z" }, - { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720, upload-time = "2025-10-15T23:18:10.632Z" }, - { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740, upload-time = "2025-10-15T23:18:12.277Z" }, -] +sdist = { url = "https://files.pythonhosted.org/packages/12/45/870e7f4bef50e5f53b9f51d4428aee5290eedf58ba443f16b1ebb7ab8e66/cryptography-48.0.1.tar.gz", hash = "sha256:266f4ee051abb2f725b74ef8072b521ce1feacf685a3364fa6a6b45548db791a", size = 832989, upload-time = "2026-06-09T22:32:31.8Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/bc/ee4137cbbe105652c0ee4252792b78fc8e7afa4b8e61d9d5dc05a7f45731/cryptography-48.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:3e4a1a3232eef2e6c732827d5722db29a0cc8b27af2a4d865b094cf954be9ca1", size = 8008324, upload-time = "2026-06-09T22:31:00.702Z" }, + { url = "https://files.pythonhosted.org/packages/d5/85/6379d42181bfc713094f081360fc5784d6c816b599d45e7f082502d173ce/cryptography-48.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32143b24adb918f078134e1e230f1eb8cc04886b92c28b5f0041aaf3e5699225", size = 4696243, upload-time = "2026-06-09T22:32:33.446Z" }, + { url = "https://files.pythonhosted.org/packages/9c/87/c85d147b53323c7eb4d850920c8901377323c2a0ff8d79c262d4fee89aa2/cryptography-48.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0d27a5696721ef7a672b8c810f6aded391058e0b9486e63e6d93baf765da691", size = 4713235, upload-time = "2026-06-09T22:31:40.141Z" }, + { url = "https://files.pythonhosted.org/packages/79/58/67cbf8cf1ee7c54b439ca07bbecf8362c07afc11a3724fea70f745784add/cryptography-48.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb86ce1af36fe65041b6db9a8bb064ee621a7e5fded0f80d475ec243477cd242", size = 4702323, upload-time = "2026-06-09T22:31:42.191Z" }, + { url = "https://files.pythonhosted.org/packages/89/c6/24266ac10c47f6cd2a865f4446062b466da1d1f10b27189eac00e61bf0c9/cryptography-48.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b024e784ad6c077ee0147b35ea9cbfc1e34e1fd4c1dcca214c2794d73a12df08", size = 5300085, upload-time = "2026-06-09T22:31:58.703Z" }, + { url = "https://files.pythonhosted.org/packages/d2/bb/cc4b78784f97efc8c5874c2a9743708d172be6663024b34a0467885ae0c8/cryptography-48.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3752f2dbc8f07a30aad2932c986cea495b03bb554887828225da104f732852b6", size = 4746137, upload-time = "2026-06-09T22:31:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/1f/52/0c44de3f5267f8fbe8e835138017522a333436166e406f0db9b9e6e3033f/cryptography-48.0.1-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:bd81490cd5801d755cf97bb68ac191f14b708470b1c7cf4580f669b9c9264cd8", size = 4333867, upload-time = "2026-06-09T22:32:28.096Z" }, + { url = "https://files.pythonhosted.org/packages/9a/2e/772d7adbfa931537bc401640b7cac9976bff689bda187833e5d63b428e49/cryptography-48.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:66fd0771e7b9c6dcd44cf1120690d2338d16d72795cf40cae2786a39eba65429", size = 4701805, upload-time = "2026-06-09T22:31:38.284Z" }, + { url = "https://files.pythonhosted.org/packages/f8/a3/b06844f303873493c963caf581c04df31c7035e0c1b0f02c4814d319ec80/cryptography-48.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:3fd2ca57062b241c856670b073487d2e86c4637937ca5601e48f97bf8e11fc8f", size = 5258461, upload-time = "2026-06-09T22:31:04.187Z" }, + { url = "https://files.pythonhosted.org/packages/9f/13/8b765e2e12b07c74941caadb9d1c8fdc006c4dfbf2b8f2d610519758954d/cryptography-48.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:0ee6ea481db1ab889cba043ec1eda17bb9c1ea79db6722f779c3667f9f70322f", size = 4745488, upload-time = "2026-06-09T22:32:30.07Z" }, + { url = "https://files.pythonhosted.org/packages/2e/aa/48972bce55049b32a94f4907eda4d75fa385aad8a39506cc2fc72196ecf0/cryptography-48.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f2ceef93cb096aa3c4cc4b5c94ca6131f9196d28c64d6111533402a9b2054d41", size = 4830256, upload-time = "2026-06-09T22:31:43.868Z" }, + { url = "https://files.pythonhosted.org/packages/47/a2/e5079a032fb85cf6005046ca92bbd78b0c82dad2b5751ab8c311659da06f/cryptography-48.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bd3f92d76217892b15df84ca256c2c113d386fdda7a7d8691aeeced976507c6", size = 4979117, upload-time = "2026-06-09T22:31:05.845Z" }, + { url = "https://files.pythonhosted.org/packages/b7/a0/8f50cae9c74e718ed769d63ed5c74bd0ea830c9550a74629cebd1b9c7bc7/cryptography-48.0.1-cp311-abi3-win32.whl", hash = "sha256:b9a32b876490d66c8bcc9963ef220199569748434ab01a9d6aaeabf88e7f5158", size = 3304154, upload-time = "2026-06-09T22:32:16.845Z" }, + { url = "https://files.pythonhosted.org/packages/c5/69/0572c77dbace6fef72f33755bd52ea399c71367250d366237f8691826b9e/cryptography-48.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:39489bfca54c7a1f6b297efcd8bc608ab92d16c4ca631b0cad4da46724588b24", size = 3817138, upload-time = "2026-06-09T22:32:00.388Z" }, + { url = "https://files.pythonhosted.org/packages/42/06/3e768b4c3bc78201583fa35a0e18f640dd782ff41afba88f8545481a8874/cryptography-48.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:f817adc181390bd54f2f700107a7419040fb7c1bdf2fc26f36551a06a68c3345", size = 7989830, upload-time = "2026-06-09T22:31:07.8Z" }, + { url = "https://files.pythonhosted.org/packages/8a/13/6476736484b94041110c8340a3eb63962fea4975baea8cb4a512adb44d4d/cryptography-48.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d5d30989c6917b478b5817902e85fddaea2261efa8648383d965381ccb9e1ac4", size = 4689201, upload-time = "2026-06-09T22:31:09.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/62/65a87f34d2a431546e2509b85d55e8c90df86d668f6731da64d538512ac2/cryptography-48.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:df637c05205ea7c1d7fbcbe54bbfea648a52951155f997af13d895d0ecc96991", size = 4702822, upload-time = "2026-06-09T22:32:24.409Z" }, + { url = "https://files.pythonhosted.org/packages/7f/59/810b5204b0a9b10f4b6bc06bd551a8b609803cd931806bc3b71884b225e5/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:869c3b8a53bfe27147832df48b32adadf558249d50e76cb3769d40e986b13265", size = 4694875, upload-time = "2026-06-09T22:32:08.737Z" }, + { url = "https://files.pythonhosted.org/packages/24/dc/d8ca05ffea724eec6d232ea6f18e74c269eb6bdfdcc9bfba689790d1325f/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:e361afba8918070d376df76f408a4f67fec0ee9cff81a99e48fe9a233ef59e17", size = 5290385, upload-time = "2026-06-09T22:31:15.212Z" }, + { url = "https://files.pythonhosted.org/packages/03/8c/3be6cb4da181f5bb6c19cf560c2359d60644a6b5fc5b57854e528f47b296/cryptography-48.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d069066deead00ac7f090be101be875a06855908f7ec004c27b8fefb4acfb411", size = 4737082, upload-time = "2026-06-09T22:32:22.66Z" }, + { url = "https://files.pythonhosted.org/packages/aa/f6/d5f60a5a1434dbfd949e227fd0065d194c7e6b6ac526b17f5c06152b8231/cryptography-48.0.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:09f73a725d582cef64b91281a322cd798d14a33b2b6f2b7ad9531dc336d84c02", size = 4325328, upload-time = "2026-06-09T22:32:10.777Z" }, + { url = "https://files.pythonhosted.org/packages/17/b7/ba75dd947a14b6ad907b01ae8f6b5b348cdd1b48142f0063dee9e20c1d9d/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:15254441469dd6bf027039453288e2072124f8b6603563f5d759e1c9b69273fa", size = 4694530, upload-time = "2026-06-09T22:31:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/62/29/50d6b9e8aff12d8b67afaeb3569335e32dc83a5723e3bbded24fdac9f809/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:8ace4507d1e6533c125f4fac754f8bb8b6a74c08e92179dabd7e16571a3efbf3", size = 5245046, upload-time = "2026-06-09T22:31:25.774Z" }, + { url = "https://files.pythonhosted.org/packages/9f/04/618f4115cfc0add0838c82507aa18a346089428da8653ad38b3ff36f5cb3/cryptography-48.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b4e391975f038e66432328639620a4aff2d307513b004f1ca06d6225bced815c", size = 4736660, upload-time = "2026-06-09T22:32:12.676Z" }, + { url = "https://files.pythonhosted.org/packages/24/9c/06e062462a0de28a3b3911322eded4c16deb9f441b1b7575d3dc59488ab5/cryptography-48.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42fcd8e26fe555d9b3577a135f5091fefa0aa4e99129c23fb56787a1bd4ada72", size = 4822229, upload-time = "2026-06-09T22:31:17.062Z" }, + { url = "https://files.pythonhosted.org/packages/f4/be/0561971eaaee4b8a0e7d5113c536921063ab91aaf23278ac374eaf881e11/cryptography-48.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c1400da5e32a43253392277eac7490a60e497d810a63dd5608d71bbd7af507c9", size = 4966364, upload-time = "2026-06-09T22:31:32.842Z" }, + { url = "https://files.pythonhosted.org/packages/a4/27/728c77876f12b000820b69ae490f3c4083775e79e07827e9e60be07ad209/cryptography-48.0.1-cp314-cp314t-win32.whl", hash = "sha256:0df56b056bc17c1b7d6821dfa65216e62bd232d8ab05eb3db44e71d235651471", size = 3278498, upload-time = "2026-06-09T22:31:29.154Z" }, + { url = "https://files.pythonhosted.org/packages/06/e3/79a612c6d7b1e6ee0edd43633d53035bec2cfb78c82b76f7864f39e36f34/cryptography-48.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:9de21387aa95e2a895823d0745b430bed4f33503ba9ab5e0b5311f33e37d66d2", size = 3798790, upload-time = "2026-06-09T22:31:56.697Z" }, + { url = "https://files.pythonhosted.org/packages/ca/6c/00fa2a95997164c8b2072ce327c23d4ab20809ccc323ea5fab91e53a4bba/cryptography-48.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:4fdc69f8e4316bcf0c8c8ec1f26f285d12e8142d88d96c876a59a03be3f6ae67", size = 7987408, upload-time = "2026-06-09T22:32:20.777Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d9/45f309a7e4e5f3f8f121d6d3be9e94024a7726ec598d6e08ae04edb2f04d/cryptography-48.0.1-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48fe40804d4caa2288f24e70ca8c64c42dd826da0ad7e4f1b41b2128d679e6c8", size = 4690196, upload-time = "2026-06-09T22:31:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9f/a1bc8bcc798811b8527eb374bbccf30a3f3e806829d967118222bf1125eb/cryptography-48.0.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:86be3b1b0b6bf09482fb50a979c508d2950ed95f5621ec77f4e385962006b83a", size = 4696782, upload-time = "2026-06-09T22:31:45.615Z" }, + { url = "https://files.pythonhosted.org/packages/66/c2/81a4fb4e4373c500bb526bc337ac5719dd31dd15b970b84a238168c6aa08/cryptography-48.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:4ab0a343c807bbcd90c971cd1ecf072937cd01847a9e002bef88fb47ac6be577", size = 4696618, upload-time = "2026-06-09T22:31:11.564Z" }, + { url = "https://files.pythonhosted.org/packages/e5/0b/aa68b221dde92d09cb29a024ede17550ee21e77a404e59fc093c82bb51e1/cryptography-48.0.1-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9621de99d2da096006b629979efd8ae7eb2d8b822488d0c89ee4000c306c59b1", size = 5289970, upload-time = "2026-06-09T22:31:20.368Z" }, + { url = "https://files.pythonhosted.org/packages/78/13/fba657f958d2af66ea959a4ba01212632089249d34af1ae48054136344d7/cryptography-48.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:88c852a0ae366e262e5a1744b685e6a433dc8788dd2a277e418bf4904203609d", size = 4731873, upload-time = "2026-06-09T22:31:22.253Z" }, + { url = "https://files.pythonhosted.org/packages/4c/4c/9a964756d24a26b3e34dfcb16f961b89838786e6700b635b0d1e3adff4b6/cryptography-48.0.1-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:43c5835e2cb98c8733d86f57d6fc879b613f5c3478607281c3e36daffc6dd8a6", size = 4330804, upload-time = "2026-06-09T22:31:36.56Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0f/a10f3a6eb12950a10e3a874070283aa2dd5875b2bfd15fad8a3e17b3f13e/cryptography-48.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:fe0180af5bf9236518a087e35bf2d9a347d5f5f51e63c579d683ddff424e3d46", size = 4696217, upload-time = "2026-06-09T22:31:13.351Z" }, + { url = "https://files.pythonhosted.org/packages/f3/6f/5cd12f951165ea73ef85266775d97e4c763b2474ccfd816dd69d3a18d6f8/cryptography-48.0.1-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:b7a2d1a937a738a881737cec135a38bb61470589b17515b9f73f571d0ae10401", size = 5245252, upload-time = "2026-06-09T22:32:02.193Z" }, + { url = "https://files.pythonhosted.org/packages/68/ab/8aaa12e4516ec4464033ab79b6f3b592bd5a92102467c4ace8a0d970203f/cryptography-48.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b74ca3b8e5ecdd833bf6a002ca41b4793bb27fb8f1c06ffaf2643c9e9140e31b", size = 4731388, upload-time = "2026-06-09T22:32:04.019Z" }, + { url = "https://files.pythonhosted.org/packages/1b/24/50027ea4dca85ec1f40688f3c24fb32ccacd520583c9592c3cc95628e6fb/cryptography-48.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c37f2461406063b417837f5f3daab668652acd82423efcd7f0a9f04be972de1", size = 4824186, upload-time = "2026-06-09T22:32:18.707Z" }, + { url = "https://files.pythonhosted.org/packages/52/41/04cb5eb17085ade6f50cc611fb657df6a0f5885350de8764ece89c050197/cryptography-48.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86fe77abb1bd87afb251d4d02ada7ecf53a32cee9b67d976abb2e45a13297475", size = 4964539, upload-time = "2026-06-09T22:31:18.793Z" }, + { url = "https://files.pythonhosted.org/packages/36/bf/ed70785c496e89d7e73b7cda2d21f2447fd6d4e821714b8d04ff217fed92/cryptography-48.0.1-cp39-abi3-win32.whl", hash = "sha256:6b2c0c3e6ccf3ade7750f836ef3ee36eea250cc467d45c256895573ac08cc6f1", size = 3282307, upload-time = "2026-06-09T22:30:53.162Z" }, + { url = "https://files.pythonhosted.org/packages/b3/ff/371ea7d252656ee1eb6d83eeeef3d1d0c6baf1d6497687d081ea03814670/cryptography-48.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:9a49ca6c81417f6a5edb50375a60cccdd70fa0a91a5211829dbea74eba94d2ac", size = 3793408, upload-time = "2026-06-09T22:32:15.191Z" }, +] + +[[package]] +name = "csscompressor" +version = "0.9.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/2a/8c3ac3d8bc94e6de8d7ae270bb5bc437b210bb9d6d9e46630c98f4abd20c/csscompressor-0.9.5.tar.gz", hash = "sha256:afa22badbcf3120a4f392e4d22f9fff485c044a1feda4a950ecc5eba9dd31a05", size = 237808, upload-time = "2017-11-26T21:13:08.238Z" } [[package]] name = "cuda-bindings" -version = "12.9.4" +version = "13.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cuda-pathfinder" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" }, - { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628, upload-time = "2025-10-21T14:51:49.905Z" }, - { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991, upload-time = "2025-10-21T14:51:56.535Z" }, - { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703, upload-time = "2025-10-21T14:52:03.585Z" }, - { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658, upload-time = "2025-10-21T14:52:10.411Z" }, + { url = "https://files.pythonhosted.org/packages/ce/67/5e7dba1ba576dd73da5dee894ca076ca5e959450dfff66d6d510a255d1f7/cuda_bindings-13.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7855c4868aabc0cfae28abbe83d56734bdfbd08f08fc234ac1912a12858bf49", size = 6025351, upload-time = "2026-05-29T23:11:49.685Z" }, + { url = "https://files.pythonhosted.org/packages/39/2a/6d2e9047d1fb243dbaa364b01e0297534b9ed7fd27dba1c9f361519cf69b/cuda_bindings-13.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e32d08f71ebcdf00f0f41eab2eb37e8da94c8ed411cc9f7f7a019ce6b34abe3a", size = 6657965, upload-time = "2026-05-29T23:11:52.227Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6e/2394f8163360f8391f8f1b7e72d300a82724edb81a7b7084c799fbd4c91f/cuda_bindings-13.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9efb21c1ee64981e184b9e0ba5eb3179e5ba3d4b51665a6cb52b8ef3d01a7cbf", size = 5920504, upload-time = "2026-05-29T23:11:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/34/c2/ef9b6a63f7dc432712a462c816662e662e00d38caa9b861c8c2588195d03/cuda_bindings-13.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2732904099e0a4d4db774a5fc6d91ee95fae065b4d2ecabb4968c5fe2406c9d7", size = 6476660, upload-time = "2026-05-29T23:11:59.188Z" }, + { url = "https://files.pythonhosted.org/packages/b1/81/bff68ce829999c1e4209c761bbf903b1c06ec570416ddb25020864ad5907/cuda_bindings-13.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ab2f74ed65bfef4163ba07a8db16f1085e0729291db12a2423aff84ee8278b8", size = 6013639, upload-time = "2026-05-29T23:12:03.509Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e0/c8a1f0c8f9ffdea4f5fe6dbab89b326cef4d85caf489dad39e209da89416/cuda_bindings-13.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd4c814d311ec08c981f6dded1dbe7d4b371067ee4f6c14cccec4bde9590f80", size = 6534419, upload-time = "2026-05-29T23:12:05.633Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/83b1f563925b290f2d11a01a77a84013ba56052fe3653a5bef3ccfbb43d6/cuda_bindings-13.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3c772dfff49681541d59630c90f858e173ac926b9c593a2b7123f2a1043cc76", size = 5809771, upload-time = "2026-05-29T23:12:10.422Z" }, + { url = "https://files.pythonhosted.org/packages/12/20/e79b4bfe98f075195afb6343d41c498f9dbd2d161d7021d4d28bceb83581/cuda_bindings-13.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:36febb7c1079d68a981dbbd8d5a67235b399802b82075c9388624719607e52b9", size = 6358584, upload-time = "2026-05-29T23:12:12.767Z" }, ] [[package]] name = "cuda-pathfinder" -version = "1.3.3" +version = "1.5.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/02/4dbe7568a42e46582248942f54dc64ad094769532adbe21e525e4edf7bc4/cuda_pathfinder-1.3.3-py3-none-any.whl", hash = "sha256:9984b664e404f7c134954a771be8775dfd6180ea1e1aef4a5a37d4be05d9bbb1", size = 27154, upload-time = "2025-12-04T22:35:08.996Z" }, + { url = "https://files.pythonhosted.org/packages/11/c8/26f2e4aae92f11522a96043892ba39a90eac610d5242523aa863212bc1c7/cuda_pathfinder-1.5.5-py3-none-any.whl", hash = "sha256:0228c023f95d1480f143ef5c8922d27a2ab052087a942e81dc289c9eb8f91689", size = 51671, upload-time = "2026-05-27T01:21:25.413Z" }, +] + +[[package]] +name = "cuda-toolkit" +version = "13.0.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/b2/453099f5f3b698d7d0eab38916aac44c7f76229f451709e2eb9db6615dcd/cuda_toolkit-13.0.2-py2.py3-none-any.whl", hash = "sha256:b198824cf2f54003f50d64ada3a0f184b42ca0846c1c94192fa269ecd97a66eb", size = 2364, upload-time = "2025-12-19T23:24:07.328Z" }, +] + +[package.optional-dependencies] +cudart = [ + { name = "nvidia-cuda-runtime", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cufft = [ + { name = "nvidia-cufft", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cufile = [ + { name = "nvidia-cufile", marker = "sys_platform == 'linux'" }, +] +cupti = [ + { name = "nvidia-cuda-cupti", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +curand = [ + { name = "nvidia-curand", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cusolver = [ + { name = "nvidia-cusolver", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +cusparse = [ + { name = "nvidia-cusparse", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvjitlink = [ + { name = "nvidia-nvjitlink", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvrtc = [ + { name = "nvidia-cuda-nvrtc", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] +nvtx = [ + { name = "nvidia-nvtx", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] [[package]] @@ -804,15 +949,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/77/dc8c558f7593132cf8fefec57c4f60c83b16941c574ac5f619abb3ae7933/dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d", size = 120019, upload-time = "2026-01-19T02:36:55.663Z" }, ] -[[package]] -name = "diskcache" -version = "5.6.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" }, -] - [[package]] name = "distlib" version = "0.4.0" @@ -933,24 +1069,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/47/21867c2e5fd006c8d36a560df9e32cb4f1f566b20c5dd41f5f8a2124f7de/face-24.0.0-py3-none-any.whl", hash = "sha256:0e2c17b426fa4639a4e77d1de9580f74a98f4869ba4c7c8c175b810611622cd3", size = 54742, upload-time = "2024-11-02T05:24:24.939Z" }, ] -[[package]] -name = "fakeredis" -version = "2.33.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "redis" }, - { name = "sortedcontainers" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5f/f9/57464119936414d60697fcbd32f38909bb5688b616ae13de6e98384433e0/fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770", size = 175187, upload-time = "2025-12-16T19:45:52.269Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/78/a850fed8aeef96d4a99043c90b818b2ed5419cd5b24a4049fd7cfb9f1471/fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965", size = 119605, upload-time = "2025-12-16T19:45:51.08Z" }, -] - -[package.optional-dependencies] -lua = [ - { name = "lupa" }, -] - [[package]] name = "fastavro" version = "1.12.1" @@ -988,32 +1106,63 @@ wheels = [ [[package]] name = "fastmcp" -version = "2.14.4" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastmcp-slim", extra = ["client", "server"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/a9/5c5a01b6abd5346bf60b97cfd29e4a86661940c27dd562bfcda07fd03519/fastmcp-3.3.1.tar.gz", hash = "sha256:979362ea557de42a5f40342563c7e4b236bcc8e7cd192715f50030695d1a71cd", size = 28681699, upload-time = "2026-05-15T15:50:39.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/11/6b1bdada6ccfe647d615ae63f9106f8136aec17971e9361546af01c7d38e/fastmcp-3.3.1-py3-none-any.whl", hash = "sha256:862440c5c4d281363a5995eee59d77f0f7cac1f18869038729cecf03b02fc522", size = 7903, upload-time = "2026-05-15T15:50:36.424Z" }, +] + +[[package]] +name = "fastmcp-slim" +version = "3.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "platformdirs" }, + { name = "pydantic", extra = ["email"] }, + { name = "pydantic-settings" }, + { name = "python-dotenv" }, + { name = "rich" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/a0/627103e517e1d0d6f1eec633d5662d13e776f01b45ad188e4f5f7478b438/fastmcp_slim-3.3.1.tar.gz", hash = "sha256:0957835fc59452e143ab2f4b7836d2d2df9b2d9958408edc79ba8b56232b2a88", size = 567007, upload-time = "2026-05-15T15:50:10.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/ee/97047f4cc2d7b1d46670d08d8ad01a96e7a748cc01c0b4b351ad8eddbc7a/fastmcp_slim-3.3.1-py3-none-any.whl", hash = "sha256:6cf1c2d77e3adb0d409d6825ed6b0b2a999062973e00b8eea03bd48bf9b4c043", size = 738644, upload-time = "2026-05-15T15:50:08.336Z" }, +] + +[package.optional-dependencies] +client = [ + { name = "authlib" }, + { name = "exceptiongroup" }, + { name = "httpx" }, + { name = "mcp" }, + { name = "opentelemetry-api" }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"] }, +] +server = [ { name = "authlib" }, { name = "cyclopts" }, { name = "exceptiongroup" }, + { name = "griffelib" }, { name = "httpx" }, { name = "jsonref" }, { name = "jsonschema-path" }, { name = "mcp" }, { name = "openapi-pydantic" }, + { name = "opentelemetry-api" }, { name = "packaging" }, - { name = "platformdirs" }, - { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"] }, - { name = "pydantic", extra = ["email"] }, - { name = "pydocket" }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"] }, { name = "pyperclip" }, - { name = "python-dotenv" }, - { name = "rich" }, + { name = "python-multipart" }, + { name = "pyyaml" }, + { name = "uncalled-for" }, { name = "uvicorn" }, + { name = "watchfiles" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fd/a9/a57d5e5629ebd4ef82b495a7f8e346ce29ef80cc86b15c8c40570701b94d/fastmcp-2.14.4.tar.gz", hash = "sha256:c01f19845c2adda0a70d59525c9193be64a6383014c8d40ce63345ac664053ff", size = 8302239, upload-time = "2026-01-22T17:29:37.024Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/41/c4d407e2218fd60d84acb6cc5131d28ff876afecf325e3fd9d27b8318581/fastmcp-2.14.4-py3-none-any.whl", hash = "sha256:5858cff5e4c8ea8107f9bca2609d71d6256e0fce74495912f6e51625e466c49a", size = 417788, upload-time = "2026-01-22T17:29:35.159Z" }, -] [[package]] name = "filelock" @@ -1124,15 +1273,27 @@ wheels = [ [[package]] name = "genai-prices" -version = "0.0.51" +version = "0.0.61" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3d/22/427934ef8e7ed29c35afc274666b87fe01a3a27ec7ff102f5839ce4723c0/genai_prices-0.0.51.tar.gz", hash = "sha256:003da98172641c94d7516b0fd8cec5ecf2dbab64a884996c26cc194c5e0b592e", size = 58071, upload-time = "2026-01-13T12:49:11.872Z" } +sdist = { url = "https://files.pythonhosted.org/packages/65/71/0c76010eec75f4b3623d521044785c0977c14adabe1cac72b004349567fb/genai_prices-0.0.61.tar.gz", hash = "sha256:4b3bcfd49f174c05831b09f9ee36557d3648569e2f594af6c24b72031b3f0e52", size = 67806, upload-time = "2026-05-19T17:01:36.902Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/af/b11b80d02aaefc2fc6bfaabb3ae873439c90dc464b3a29eda51b969842b0/genai_prices-0.0.51-py3-none-any.whl", hash = "sha256:4e0f5892a7ec757d59f343c5dbf9675b0f9e8ed65f4fe26ac7df600e34788ca0", size = 60656, upload-time = "2026-01-13T12:49:12.867Z" }, + { url = "https://files.pythonhosted.org/packages/de/ec/b08dc2e834ca00fd8dfedcb17ae2e920667adaad617b45e32b7a3b146f24/genai_prices-0.0.61-py3-none-any.whl", hash = "sha256:d77142f61c13e69909ac19c8e44fd315fd65f3afd714e8d55e914fab0eaf47a2", size = 70853, upload-time = "2026-05-19T17:01:37.858Z" }, +] + +[[package]] +name = "ghp-import" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/29/d40217cbe2f6b1359e00c6c307bb3fc876ba74068cbab3dde77f03ca0dc4/ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343", size = 10943, upload-time = "2022-05-02T15:47:16.11Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/ec/67fbef5d497f86283db54c22eec6f6140243aae73265799baaaa19cd17fb/ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619", size = 11034, upload-time = "2022-05-02T15:47:14.552Z" }, ] [[package]] @@ -1151,15 +1312,15 @@ wheels = [ [[package]] name = "google-auth" -version = "2.47.0" +version = "2.53.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "cryptography" }, { name = "pyasn1-modules" }, - { name = "rsa" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/3c/ec64b9a275ca22fa1cd3b6e77fefcf837b0732c890aa32d2bd21313d9b33/google_auth-2.47.0.tar.gz", hash = "sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da", size = 323719, upload-time = "2026-01-06T21:55:31.045Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c6/ad/ff781329bbbdc0974a098d996e89c9e1f7024262f9e3eec442fbb9ad1ac6/google_auth-2.53.0.tar.gz", hash = "sha256:e7e6aa16f6bee7b2b264830fd04f08087a1d5a836df516251a5d15327b246c9c", size = 335844, upload-time = "2026-05-15T20:53:07.928Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/18/79e9008530b79527e0d5f79e7eef08d3b179b7f851cfd3a2f27822fbdfa9/google_auth-2.47.0-py3-none-any.whl", hash = "sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498", size = 234867, upload-time = "2026-01-06T21:55:28.6Z" }, + { url = "https://files.pythonhosted.org/packages/4a/c9/db44165ba7c581268c6d46017ef63339110378305062830104fc7fa144cb/google_auth-2.53.0-py3-none-any.whl", hash = "sha256:6e7449917c599b35126a99ec268ec6880301f2fea41dce198fe8fd83ff642b68", size = 246071, upload-time = "2026-05-15T20:53:05.609Z" }, ] [package.optional-dependencies] @@ -1169,7 +1330,7 @@ requests = [ [[package]] name = "google-genai" -version = "1.60.0" +version = "2.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1183,9 +1344,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0a/3f/a753be0dcee352b7d63bc6d1ba14a72591d63b6391dac0cdff7ac168c530/google_genai-1.60.0.tar.gz", hash = "sha256:9768061775fddfaecfefb0d6d7a6cabefb3952ebd246cd5f65247151c07d33d1", size = 487721, upload-time = "2026-01-21T22:17:30.398Z" } +sdist = { url = "https://files.pythonhosted.org/packages/dd/ec/6e49f50f5c70588d97c6ed25e0b8c18828bf4d58895f397b53a7522168a1/google_genai-2.6.0.tar.gz", hash = "sha256:7d4f777234002f2e94be499dbdfb43b506a6aca9dbbec13e61d3dc6ce640ffa7", size = 554809, upload-time = "2026-05-22T01:34:33.581Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/e5/384b1f383917b5f0ae92e28f47bc27b16e3d26cd9bacb25e9f8ecab3c8fe/google_genai-1.60.0-py3-none-any.whl", hash = "sha256:967338378ffecebec19a8ed90cf8797b26818bacbefd7846a9280beb1099f7f3", size = 719431, upload-time = "2026-01-21T22:17:28.086Z" }, + { url = "https://files.pythonhosted.org/packages/b3/9e/e8ba4e58a9d5daf42343f3ea1cb0efb721eba36a1d6624e9873d039a5c1e/google_genai-2.6.0-py3-none-any.whl", hash = "sha256:272b6f6320f5d355735241ad441f972af095ec80dc10cb075cb430d96721648a", size = 821003, upload-time = "2026-05-22T01:34:31.55Z" }, ] [[package]] @@ -1212,6 +1373,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/83/3b1d03d36f224edded98e9affd0467630fc09d766c0e56fb1498cbb04a9b/griffe-1.15.0-py3-none-any.whl", hash = "sha256:6f6762661949411031f5fcda9593f586e6ce8340f0ba88921a0f2ef7a81eb9a3", size = 150705, upload-time = "2025-11-10T15:03:13.549Z" }, ] +[[package]] +name = "griffelib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ad/06/eccbd311c9e2b3ca45dbc063b93134c57a1ccc7607c5e545264ad092c4a9/griffelib-2.0.0.tar.gz", hash = "sha256:e504d637a089f5cab9b5daf18f7645970509bf4f53eda8d79ed71cce8bd97934", size = 166312, upload-time = "2026-03-23T21:06:55.954Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/51/c936033e16d12b627ea334aaaaf42229c37620d0f15593456ab69ab48161/griffelib-2.0.0-py3-none-any.whl", hash = "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f", size = 142004, upload-time = "2026-02-09T19:09:40.561Z" }, +] + [[package]] name = "groq" version = "1.0.0" @@ -1294,31 +1464,34 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" }, - { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" }, - { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" }, - { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" }, - { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" }, - { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" }, - { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" }, - { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" }, - { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" }, - { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" }, - { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" }, - { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" }, - { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" }, - { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" }, - { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" }, - { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" }, - { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" }, - { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" }, - { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" }, - { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, +version = "1.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357, upload-time = "2026-03-13T06:58:51.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/06/e8cf74c3c48e5485c7acc5a990d0d8516cdfb5fdf80f799174f1287cc1b5/hf_xet-1.4.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ac8202ae1e664b2c15cdfc7298cbb25e80301ae596d602ef7870099a126fcad4", size = 3796125, upload-time = "2026-03-13T06:58:33.177Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/b73ebab01cbf60777323b7de9ef05550790451eb5172a220d6b9845385ec/hf_xet-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6d2f8ee39fa9fba9af929f8c0d0482f8ee6e209179ad14a909b6ad78ffcb7c81", size = 3555985, upload-time = "2026-03-13T06:58:31.797Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e7/ded6d1bd041c3f2bca9e913a0091adfe32371988e047dd3a68a2463c15a2/hf_xet-1.4.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4642a6cf249c09da8c1f87fe50b24b2a3450b235bf8adb55700b52f0ea6e2eb6", size = 4212085, upload-time = "2026-03-13T06:58:24.323Z" }, + { url = "https://files.pythonhosted.org/packages/97/c1/a0a44d1f98934f7bdf17f7a915b934f9fca44bb826628c553589900f6df8/hf_xet-1.4.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:769431385e746c92dc05492dde6f687d304584b89c33d79def8367ace06cb555", size = 3988266, upload-time = "2026-03-13T06:58:22.887Z" }, + { url = "https://files.pythonhosted.org/packages/7a/82/be713b439060e7d1f1d93543c8053d4ef2fe7e6922c5b31642eaa26f3c4b/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c9dd1c1bc4cc56168f81939b0e05b4c36dd2d28c13dc1364b17af89aa0082496", size = 4188513, upload-time = "2026-03-13T06:58:40.858Z" }, + { url = "https://files.pythonhosted.org/packages/21/a6/cbd4188b22abd80ebd0edbb2b3e87f2633e958983519980815fb8314eae5/hf_xet-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fca58a2ae4e6f6755cc971ac6fcdf777ea9284d7e540e350bb000813b9a3008d", size = 4428287, upload-time = "2026-03-13T06:58:42.601Z" }, + { url = "https://files.pythonhosted.org/packages/b2/4e/84e45b25e2e3e903ed3db68d7eafa96dae9a1d1f6d0e7fc85120347a852f/hf_xet-1.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:163aab46854ccae0ab6a786f8edecbbfbaa38fcaa0184db6feceebf7000c93c0", size = 3665574, upload-time = "2026-03-13T06:58:53.881Z" }, + { url = "https://files.pythonhosted.org/packages/ee/71/c5ac2b9a7ae39c14e91973035286e73911c31980fe44e7b1d03730c00adc/hf_xet-1.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:09b138422ecbe50fd0c84d4da5ff537d27d487d3607183cd10e3e53f05188e82", size = 3528760, upload-time = "2026-03-13T06:58:52.187Z" }, + { url = "https://files.pythonhosted.org/packages/1e/0f/fcd2504015eab26358d8f0f232a1aed6b8d363a011adef83fe130bff88f7/hf_xet-1.4.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:949dcf88b484bb9d9276ca83f6599e4aa03d493c08fc168c124ad10b2e6f75d7", size = 3796493, upload-time = "2026-03-13T06:58:39.267Z" }, + { url = "https://files.pythonhosted.org/packages/82/56/19c25105ff81731ca6d55a188b5de2aa99d7a2644c7aa9de1810d5d3b726/hf_xet-1.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:41659966020d59eb9559c57de2cde8128b706a26a64c60f0531fa2318f409418", size = 3555797, upload-time = "2026-03-13T06:58:37.546Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/8933c073186849b5e06762aa89847991d913d10a95d1603eb7f2c3834086/hf_xet-1.4.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c588e21d80010119458dd5d02a69093f0d115d84e3467efe71ffb2c67c19146", size = 4212127, upload-time = "2026-03-13T06:58:30.539Z" }, + { url = "https://files.pythonhosted.org/packages/eb/01/f89ebba4e369b4ed699dcb60d3152753870996f41c6d22d3d7cac01310e1/hf_xet-1.4.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a296744d771a8621ad1d50c098d7ab975d599800dae6d48528ba3944e5001ba0", size = 3987788, upload-time = "2026-03-13T06:58:29.139Z" }, + { url = "https://files.pythonhosted.org/packages/84/4d/8a53e5ffbc2cc33bbf755382ac1552c6d9af13f623ed125fe67cc3e6772f/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f563f7efe49588b7d0629d18d36f46d1658fe7e08dce3fa3d6526e1c98315e2d", size = 4188315, upload-time = "2026-03-13T06:58:48.017Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b8/b7a1c1b5592254bd67050632ebbc1b42cc48588bf4757cb03c2ef87e704a/hf_xet-1.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5b2e0132c56d7ee1bf55bdb638c4b62e7106f6ac74f0b786fed499d5548c5570", size = 4428306, upload-time = "2026-03-13T06:58:49.502Z" }, + { url = "https://files.pythonhosted.org/packages/a0/0c/40779e45b20e11c7c5821a94135e0207080d6b3d76e7b78ccb413c6f839b/hf_xet-1.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:2f45c712c2fa1215713db10df6ac84b49d0e1c393465440e9cb1de73ecf7bbf6", size = 3665826, upload-time = "2026-03-13T06:58:59.88Z" }, + { url = "https://files.pythonhosted.org/packages/51/4c/e2688c8ad1760d7c30f7c429c79f35f825932581bc7c9ec811436d2f21a0/hf_xet-1.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:6d53df40616f7168abfccff100d232e9d460583b9d86fa4912c24845f192f2b8", size = 3529113, upload-time = "2026-03-13T06:58:58.491Z" }, + { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339, upload-time = "2026-03-13T06:58:36.245Z" }, + { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664, upload-time = "2026-03-13T06:58:34.787Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422, upload-time = "2026-03-13T06:58:27.472Z" }, + { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847, upload-time = "2026-03-13T06:58:25.989Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843, upload-time = "2026-03-13T06:58:44.59Z" }, + { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751, upload-time = "2026-03-13T06:58:46.533Z" }, + { url = "https://files.pythonhosted.org/packages/cd/71/193eabd7e7d4b903c4aa983a215509c6114915a5a237525ec562baddb868/hf_xet-1.4.2-cp37-abi3-win_amd64.whl", hash = "sha256:ad185719fb2e8ac26f88c8100562dbf9dbdcc3d9d2add00faa94b5f106aea53f", size = 3671149, upload-time = "2026-03-13T06:58:57.07Z" }, + { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426, upload-time = "2026-03-13T06:58:55.46Z" }, ] [[package]] @@ -1330,6 +1503,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, ] +[[package]] +name = "htmlmin2" +version = "0.1.13" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/31/a76f4bfa885f93b8167cb4c85cf32b54d1f64384d0b897d45bc6d19b7b45/htmlmin2-0.1.13-py3-none-any.whl", hash = "sha256:75609f2a42e64f7ce57dbff28a39890363bde9e7e5885db633317efbdf8c79a2", size = 34486, upload-time = "2023-03-14T21:28:30.388Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -1374,30 +1555,28 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "0.36.0" +version = "1.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, - { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, { name = "packaging" }, { name = "pyyaml" }, - { name = "requests" }, { name = "tqdm" }, + { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/98/63/4910c5fa9128fdadf6a9c5ac138e8b1b6cee4ca44bf7915bbfbce4e355ee/huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25", size = 463358, upload-time = "2025-10-23T12:12:01.413Z" } +sdist = { url = "https://files.pythonhosted.org/packages/19/15/eafc1c57bf0f8afffb243dcd4c0cceb785e956acc17bba4d9bf2ae21fc9c/huggingface_hub-1.7.2.tar.gz", hash = "sha256:7f7e294e9bbb822e025bdb2ada025fa4344d978175a7f78e824d86e35f7ab43b", size = 724684, upload-time = "2026-03-20T10:36:08.767Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/bd/1a875e0d592d447cbc02805fd3fe0f497714d6a2583f59d14fa9ebad96eb/huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d", size = 566094, upload-time = "2025-10-23T12:11:59.557Z" }, + { url = "https://files.pythonhosted.org/packages/08/de/3ad061a05f74728927ded48c90b73521b9a9328c85d841bdefb30e01fb85/huggingface_hub-1.7.2-py3-none-any.whl", hash = "sha256:288f33a0a17b2a73a1359e2a5fd28d1becb2c121748c6173ab8643fb342c850e", size = 618036, upload-time = "2026-03-20T10:36:06.824Z" }, ] [package.optional-dependencies] hf-xet = [ { name = "hf-xet" }, ] -inference = [ - { name = "aiohttp" }, -] [[package]] name = "hyperframe" @@ -1419,11 +1598,11 @@ wheels = [ [[package]] name = "idna" -version = "3.11" +version = "3.15" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, ] [[package]] @@ -1447,15 +1626,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] -[[package]] -name = "invoke" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/bd/b461d3424a24c80490313fd77feeb666ca4f6a28c7e72713e3d9095719b4/invoke-2.2.1.tar.gz", hash = "sha256:515bf49b4a48932b79b024590348da22f39c4942dff991ad1fb8b8baea1be707", size = 304762, upload-time = "2025-10-11T00:36:35.172Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/4b/b99e37f88336009971405cbb7630610322ed6fbfa31e1d7ab3fbf3049a2d/invoke-2.2.1-py3-none-any.whl", hash = "sha256:2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8", size = 160287, upload-time = "2025-10-11T00:36:33.703Z" }, -] - [[package]] name = "isort" version = "7.0.0" @@ -1596,6 +1766,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, ] +[[package]] +name = "joserfc" +version = "1.6.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/cb/52e479f20804904f5df20ac4539d292dcecd1287aaa33cba1d1def1d9d8e/joserfc-1.6.7.tar.gz", hash = "sha256:6999fe89457069ecacd8cc797c88a805f83054dd883333fa0409f74b46479fd7", size = 232158, upload-time = "2026-05-23T01:46:44.069Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/e4/bcf6718b5662894c6831f46296b73cd4b1a2e90c20b6d437e20c4997388c/joserfc-1.6.7-py3-none-any.whl", hash = "sha256:9e51e4a64840aa1734a058258e80a4480e2ff2d5686e480e7c92c954a92fbe05", size = 70603, upload-time = "2026-05-23T01:46:42.129Z" }, +] + +[[package]] +name = "jsmin" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/73/e01e4c5e11ad0494f4407a3f623ad4d87714909f50b17a06ed121034ff6e/jsmin-3.0.1.tar.gz", hash = "sha256:c0959a121ef94542e807a674142606f7e90214a2b3d1eb17300244bbb5cc2bfc", size = 13925, upload-time = "2022-01-16T20:35:59.13Z" } + +[[package]] +name = "jsonpath-python" +version = "1.1.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/18/4ca8742534a5993ff383f7602e325ce2d5d7cc93d72ac5e1cdedbea8a458/jsonpath_python-1.1.6.tar.gz", hash = "sha256:dded9932b4ec41fb8726e09c83afa4e6be618f938c2db287cc2a81723c639671", size = 88178, upload-time = "2026-05-07T01:26:34.482Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/8a/1270a6803bd821cbfcdda387eaa13cb41a7b1f7b9bd145979b3bfb9d6cb7/jsonpath_python-1.1.6-py3-none-any.whl", hash = "sha256:a1c50afd8d3fbbaf47a4873bc890dcb3c15da96f5c020327977d844d8731a2d4", size = 14453, upload-time = "2026-05-07T01:26:33.306Z" }, +] + [[package]] name = "jsonref" version = "1.1.0" @@ -1664,6 +1861,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, ] +[[package]] +name = "libclang" +version = "18.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/5c/ca35e19a4f142adffa27e3d652196b7362fa612243e2b916845d801454fc/libclang-18.1.1.tar.gz", hash = "sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250", size = 39612, upload-time = "2024-03-17T16:04:37.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/49/f5e3e7e1419872b69f6f5e82ba56e33955a74bd537d8a1f5f1eff2f3668a/libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a", size = 25836045, upload-time = "2024-06-30T17:40:31.646Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e5/fc61bbded91a8830ccce94c5294ecd6e88e496cc85f6704bf350c0634b70/libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5", size = 26502641, upload-time = "2024-03-18T15:52:26.722Z" }, + { url = "https://files.pythonhosted.org/packages/db/ed/1df62b44db2583375f6a8a5e2ca5432bbdc3edb477942b9b7c848c720055/libclang-18.1.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8", size = 26420207, upload-time = "2024-03-17T15:00:26.63Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fc/716c1e62e512ef1c160e7984a73a5fc7df45166f2ff3f254e71c58076f7c/libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl", hash = "sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b", size = 24515943, upload-time = "2024-03-17T16:03:45.942Z" }, + { url = "https://files.pythonhosted.org/packages/3c/3d/f0ac1150280d8d20d059608cf2d5ff61b7c3b7f7bcf9c0f425ab92df769a/libclang-18.1.1-py2.py3-none-manylinux2014_aarch64.whl", hash = "sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592", size = 23784972, upload-time = "2024-03-17T16:12:47.677Z" }, + { url = "https://files.pythonhosted.org/packages/fe/2f/d920822c2b1ce9326a4c78c0c2b4aa3fde610c7ee9f631b600acb5376c26/libclang-18.1.1-py2.py3-none-manylinux2014_armv7l.whl", hash = "sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe", size = 20259606, upload-time = "2024-03-17T16:17:42.437Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c2/de1db8c6d413597076a4259cea409b83459b2db997c003578affdd32bf66/libclang-18.1.1-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f", size = 24921494, upload-time = "2024-03-17T16:14:20.132Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2d/3f480b1e1d31eb3d6de5e3ef641954e5c67430d5ac93b7fa7e07589576c7/libclang-18.1.1-py2.py3-none-win_amd64.whl", hash = "sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb", size = 26415083, upload-time = "2024-03-17T16:42:21.703Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/e01dc4cc79779cd82d77888a88ae2fa424d93b445ad4f6c02bfc18335b70/libclang-18.1.1-py2.py3-none-win_arm64.whl", hash = "sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8", size = 22361112, upload-time = "2024-03-17T16:42:59.565Z" }, +] + [[package]] name = "logfire" version = "4.19.0" @@ -1709,58 +1923,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] -[[package]] -name = "lupa" -version = "2.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b8/1c/191c3e6ec6502e3dbe25a53e27f69a5daeac3e56de1f73c0138224171ead/lupa-2.6.tar.gz", hash = "sha256:9a770a6e89576be3447668d7ced312cd6fd41d3c13c2462c9dc2c2ab570e45d9", size = 7240282, upload-time = "2025-10-24T07:20:29.738Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/86/ce243390535c39d53ea17ccf0240815e6e457e413e40428a658ea4ee4b8d/lupa-2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47ce718817ef1cc0c40d87c3d5ae56a800d61af00fbc0fad1ca9be12df2f3b56", size = 951707, upload-time = "2025-10-24T07:18:03.884Z" }, - { url = "https://files.pythonhosted.org/packages/86/85/cedea5e6cbeb54396fdcc55f6b741696f3f036d23cfaf986d50d680446da/lupa-2.6-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7aba985b15b101495aa4b07112cdc08baa0c545390d560ad5cfde2e9e34f4d58", size = 1916703, upload-time = "2025-10-24T07:18:05.6Z" }, - { url = "https://files.pythonhosted.org/packages/24/be/3d6b5f9a8588c01a4d88129284c726017b2089f3a3fd3ba8bd977292fea0/lupa-2.6-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:b766f62f95b2739f2248977d29b0722e589dcf4f0ccfa827ccbd29f0148bd2e5", size = 985152, upload-time = "2025-10-24T07:18:08.561Z" }, - { url = "https://files.pythonhosted.org/packages/eb/23/9f9a05beee5d5dce9deca4cb07c91c40a90541fc0a8e09db4ee670da550f/lupa-2.6-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:00a934c23331f94cb51760097ebfab14b005d55a6b30a2b480e3c53dd2fa290d", size = 1159599, upload-time = "2025-10-24T07:18:10.346Z" }, - { url = "https://files.pythonhosted.org/packages/40/4e/e7c0583083db9d7f1fd023800a9767d8e4391e8330d56c2373d890ac971b/lupa-2.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21de9f38bd475303e34a042b7081aabdf50bd9bafd36ce4faea2f90fd9f15c31", size = 1038686, upload-time = "2025-10-24T07:18:12.112Z" }, - { url = "https://files.pythonhosted.org/packages/1c/9f/5a4f7d959d4feba5e203ff0c31889e74d1ca3153122be4a46dca7d92bf7c/lupa-2.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf3bda96d3fc41237e964a69c23647d50d4e28421111360274d4799832c560e9", size = 2071956, upload-time = "2025-10-24T07:18:14.572Z" }, - { url = "https://files.pythonhosted.org/packages/92/34/2f4f13ca65d01169b1720176aedc4af17bc19ee834598c7292db232cb6dc/lupa-2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a76ead245da54801a81053794aa3975f213221f6542d14ec4b859ee2e7e0323", size = 1057199, upload-time = "2025-10-24T07:18:16.379Z" }, - { url = "https://files.pythonhosted.org/packages/35/2a/5f7d2eebec6993b0dcd428e0184ad71afb06a45ba13e717f6501bfed1da3/lupa-2.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8dd0861741caa20886ddbda0a121d8e52fb9b5bb153d82fa9bba796962bf30e8", size = 1173693, upload-time = "2025-10-24T07:18:18.153Z" }, - { url = "https://files.pythonhosted.org/packages/e4/29/089b4d2f8e34417349af3904bb40bec40b65c8731f45e3fd8d497ca573e5/lupa-2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:239e63948b0b23023f81d9a19a395e768ed3da6a299f84e7963b8f813f6e3f9c", size = 2164394, upload-time = "2025-10-24T07:18:20.403Z" }, - { url = "https://files.pythonhosted.org/packages/f3/1b/79c17b23c921f81468a111cad843b076a17ef4b684c4a8dff32a7969c3f0/lupa-2.6-cp312-cp312-win32.whl", hash = "sha256:325894e1099499e7a6f9c351147661a2011887603c71086d36fe0f964d52d1ce", size = 1420647, upload-time = "2025-10-24T07:18:23.368Z" }, - { url = "https://files.pythonhosted.org/packages/b8/15/5121e68aad3584e26e1425a5c9a79cd898f8a152292059e128c206ee817c/lupa-2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c735a1ce8ee60edb0fe71d665f1e6b7c55c6021f1d340eb8c865952c602cd36f", size = 1688529, upload-time = "2025-10-24T07:18:25.523Z" }, - { url = "https://files.pythonhosted.org/packages/28/1d/21176b682ca5469001199d8b95fa1737e29957a3d185186e7a8b55345f2e/lupa-2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:663a6e58a0f60e7d212017d6678639ac8df0119bc13c2145029dcba084391310", size = 947232, upload-time = "2025-10-24T07:18:27.878Z" }, - { url = "https://files.pythonhosted.org/packages/ce/4c/d327befb684660ca13cf79cd1f1d604331808f9f1b6fb6bf57832f8edf80/lupa-2.6-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:d1f5afda5c20b1f3217a80e9bc1b77037f8a6eb11612fd3ada19065303c8f380", size = 1908625, upload-time = "2025-10-24T07:18:29.944Z" }, - { url = "https://files.pythonhosted.org/packages/66/8e/ad22b0a19454dfd08662237a84c792d6d420d36b061f239e084f29d1a4f3/lupa-2.6-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:26f2b3c085fe76e9119e48c1013c1cccdc1f51585d456858290475aa38e7089e", size = 981057, upload-time = "2025-10-24T07:18:31.553Z" }, - { url = "https://files.pythonhosted.org/packages/5c/48/74859073ab276bd0566c719f9ca0108b0cfc1956ca0d68678d117d47d155/lupa-2.6-cp313-cp313-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:60d2f902c7b96fb8ab98493dcff315e7bb4d0b44dc9dd76eb37de575025d5685", size = 1156227, upload-time = "2025-10-24T07:18:33.981Z" }, - { url = "https://files.pythonhosted.org/packages/09/6c/0e9ded061916877253c2266074060eb71ed99fb21d73c8c114a76725bce2/lupa-2.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a02d25dee3a3250967c36590128d9220ae02f2eda166a24279da0b481519cbff", size = 1035752, upload-time = "2025-10-24T07:18:36.32Z" }, - { url = "https://files.pythonhosted.org/packages/dd/ef/f8c32e454ef9f3fe909f6c7d57a39f950996c37a3deb7b391fec7903dab7/lupa-2.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eae1ee16b886b8914ff292dbefbf2f48abfbdee94b33a88d1d5475e02423203", size = 2069009, upload-time = "2025-10-24T07:18:38.072Z" }, - { url = "https://files.pythonhosted.org/packages/53/dc/15b80c226a5225815a890ee1c11f07968e0aba7a852df41e8ae6fe285063/lupa-2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0edd5073a4ee74ab36f74fe61450148e6044f3952b8d21248581f3c5d1a58be", size = 1056301, upload-time = "2025-10-24T07:18:40.165Z" }, - { url = "https://files.pythonhosted.org/packages/31/14/2086c1425c985acfb30997a67e90c39457122df41324d3c179d6ee2292c6/lupa-2.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0c53ee9f22a8a17e7d4266ad48e86f43771951797042dd51d1494aaa4f5f3f0a", size = 1170673, upload-time = "2025-10-24T07:18:42.426Z" }, - { url = "https://files.pythonhosted.org/packages/10/e5/b216c054cf86576c0191bf9a9f05de6f7e8e07164897d95eea0078dca9b2/lupa-2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:de7c0f157a9064a400d828789191a96da7f4ce889969a588b87ec80de9b14772", size = 2162227, upload-time = "2025-10-24T07:18:46.112Z" }, - { url = "https://files.pythonhosted.org/packages/59/2f/33ecb5bedf4f3bc297ceacb7f016ff951331d352f58e7e791589609ea306/lupa-2.6-cp313-cp313-win32.whl", hash = "sha256:ee9523941ae0a87b5b703417720c5d78f72d2f5bc23883a2ea80a949a3ed9e75", size = 1419558, upload-time = "2025-10-24T07:18:48.371Z" }, - { url = "https://files.pythonhosted.org/packages/f9/b4/55e885834c847ea610e111d87b9ed4768f0afdaeebc00cd46810f25029f6/lupa-2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b1335a5835b0a25ebdbc75cf0bda195e54d133e4d994877ef025e218c2e59db9", size = 1683424, upload-time = "2025-10-24T07:18:50.976Z" }, - { url = "https://files.pythonhosted.org/packages/66/9d/d9427394e54d22a35d1139ef12e845fd700d4872a67a34db32516170b746/lupa-2.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dcb6d0a3264873e1653bc188499f48c1fb4b41a779e315eba45256cfe7bc33c1", size = 953818, upload-time = "2025-10-24T07:18:53.378Z" }, - { url = "https://files.pythonhosted.org/packages/10/41/27bbe81953fb2f9ecfced5d9c99f85b37964cfaf6aa8453bb11283983721/lupa-2.6-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:a37e01f2128f8c36106726cb9d360bac087d58c54b4522b033cc5691c584db18", size = 1915850, upload-time = "2025-10-24T07:18:55.259Z" }, - { url = "https://files.pythonhosted.org/packages/a3/98/f9ff60db84a75ba8725506bbf448fb085bc77868a021998ed2a66d920568/lupa-2.6-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:458bd7e9ff3c150b245b0fcfbb9bd2593d1152ea7f0a7b91c1d185846da033fe", size = 982344, upload-time = "2025-10-24T07:18:57.05Z" }, - { url = "https://files.pythonhosted.org/packages/41/f7/f39e0f1c055c3b887d86b404aaf0ca197b5edfd235a8b81b45b25bac7fc3/lupa-2.6-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:052ee82cac5206a02df77119c325339acbc09f5ce66967f66a2e12a0f3211cad", size = 1156543, upload-time = "2025-10-24T07:18:59.251Z" }, - { url = "https://files.pythonhosted.org/packages/9e/9c/59e6cffa0d672d662ae17bd7ac8ecd2c89c9449dee499e3eb13ca9cd10d9/lupa-2.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96594eca3c87dd07938009e95e591e43d554c1dbd0385be03c100367141db5a8", size = 1047974, upload-time = "2025-10-24T07:19:01.449Z" }, - { url = "https://files.pythonhosted.org/packages/23/c6/a04e9cef7c052717fcb28fb63b3824802488f688391895b618e39be0f684/lupa-2.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8faddd9d198688c8884091173a088a8e920ecc96cda2ffed576a23574c4b3f6", size = 2073458, upload-time = "2025-10-24T07:19:03.369Z" }, - { url = "https://files.pythonhosted.org/packages/e6/10/824173d10f38b51fc77785228f01411b6ca28826ce27404c7c912e0e442c/lupa-2.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:daebb3a6b58095c917e76ba727ab37b27477fb926957c825205fbda431552134", size = 1067683, upload-time = "2025-10-24T07:19:06.2Z" }, - { url = "https://files.pythonhosted.org/packages/b6/dc/9692fbcf3c924d9c4ece2d8d2f724451ac2e09af0bd2a782db1cef34e799/lupa-2.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f3154e68972befe0f81564e37d8142b5d5d79931a18309226a04ec92487d4ea3", size = 1171892, upload-time = "2025-10-24T07:19:08.544Z" }, - { url = "https://files.pythonhosted.org/packages/84/ff/e318b628d4643c278c96ab3ddea07fc36b075a57383c837f5b11e537ba9d/lupa-2.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e4dadf77b9fedc0bfa53417cc28dc2278a26d4cbd95c29f8927ad4d8fe0a7ef9", size = 2166641, upload-time = "2025-10-24T07:19:10.485Z" }, - { url = "https://files.pythonhosted.org/packages/12/f7/a6f9ec2806cf2d50826980cdb4b3cffc7691dc6f95e13cc728846d5cb793/lupa-2.6-cp314-cp314-win32.whl", hash = "sha256:cb34169c6fa3bab3e8ac58ca21b8a7102f6a94b6a5d08d3636312f3f02fafd8f", size = 1456857, upload-time = "2025-10-24T07:19:37.989Z" }, - { url = "https://files.pythonhosted.org/packages/c5/de/df71896f25bdc18360fdfa3b802cd7d57d7fede41a0e9724a4625b412c85/lupa-2.6-cp314-cp314-win_amd64.whl", hash = "sha256:b74f944fe46c421e25d0f8692aef1e842192f6f7f68034201382ac440ef9ea67", size = 1731191, upload-time = "2025-10-24T07:19:40.281Z" }, - { url = "https://files.pythonhosted.org/packages/47/3c/a1f23b01c54669465f5f4c4083107d496fbe6fb45998771420e9aadcf145/lupa-2.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0e21b716408a21ab65723f8841cf7f2f37a844b7a965eeabb785e27fca4099cf", size = 999343, upload-time = "2025-10-24T07:19:12.519Z" }, - { url = "https://files.pythonhosted.org/packages/c5/6d/501994291cb640bfa2ccf7f554be4e6914afa21c4026bd01bff9ca8aac57/lupa-2.6-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:589db872a141bfff828340079bbdf3e9a31f2689f4ca0d88f97d9e8c2eae6142", size = 2000730, upload-time = "2025-10-24T07:19:14.869Z" }, - { url = "https://files.pythonhosted.org/packages/53/a5/457ffb4f3f20469956c2d4c4842a7675e884efc895b2f23d126d23e126cc/lupa-2.6-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:cd852a91a4a9d4dcbb9a58100f820a75a425703ec3e3f049055f60b8533b7953", size = 1021553, upload-time = "2025-10-24T07:19:17.123Z" }, - { url = "https://files.pythonhosted.org/packages/51/6b/36bb5a5d0960f2a5c7c700e0819abb76fd9bf9c1d8a66e5106416d6e9b14/lupa-2.6-cp314-cp314t-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:0334753be028358922415ca97a64a3048e4ed155413fc4eaf87dd0a7e2752983", size = 1133275, upload-time = "2025-10-24T07:19:20.51Z" }, - { url = "https://files.pythonhosted.org/packages/19/86/202ff4429f663013f37d2229f6176ca9f83678a50257d70f61a0a97281bf/lupa-2.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:661d895cd38c87658a34780fac54a690ec036ead743e41b74c3fb81a9e65a6aa", size = 1038441, upload-time = "2025-10-24T07:19:22.509Z" }, - { url = "https://files.pythonhosted.org/packages/a7/42/d8125f8e420714e5b52e9c08d88b5329dfb02dcca731b4f21faaee6cc5b5/lupa-2.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aa58454ccc13878cc177c62529a2056be734da16369e451987ff92784994ca7", size = 2058324, upload-time = "2025-10-24T07:19:24.979Z" }, - { url = "https://files.pythonhosted.org/packages/2b/2c/47bf8b84059876e877a339717ddb595a4a7b0e8740bacae78ba527562e1c/lupa-2.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1425017264e470c98022bba8cff5bd46d054a827f5df6b80274f9cc71dafd24f", size = 1060250, upload-time = "2025-10-24T07:19:27.262Z" }, - { url = "https://files.pythonhosted.org/packages/c2/06/d88add2b6406ca1bdec99d11a429222837ca6d03bea42ca75afa169a78cb/lupa-2.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:224af0532d216e3105f0a127410f12320f7c5f1aa0300bdf9646b8d9afb0048c", size = 1151126, upload-time = "2025-10-24T07:19:29.522Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a0/89e6a024c3b4485b89ef86881c9d55e097e7cb0bdb74efb746f2fa6a9a76/lupa-2.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9abb98d5a8fd27c8285302e82199f0e56e463066f88f619d6594a450bf269d80", size = 2153693, upload-time = "2025-10-24T07:19:31.379Z" }, - { url = "https://files.pythonhosted.org/packages/b6/36/a0f007dc58fc1bbf51fb85dcc82fcb1f21b8c4261361de7dab0e3d8521ef/lupa-2.6-cp314-cp314t-win32.whl", hash = "sha256:1849efeba7a8f6fb8aa2c13790bee988fd242ae404bd459509640eeea3d1e291", size = 1590104, upload-time = "2025-10-24T07:19:33.514Z" }, - { url = "https://files.pythonhosted.org/packages/7d/5e/db903ce9cf82c48d6b91bf6d63ae4c8d0d17958939a4e04ba6b9f38b8643/lupa-2.6-cp314-cp314t-win_amd64.whl", hash = "sha256:fc1498d1a4fc028bc521c26d0fad4ca00ed63b952e32fb95949bda76a04bad52", size = 1913818, upload-time = "2025-10-24T07:19:36.039Z" }, -] - [[package]] name = "macholib" version = "1.16.4" @@ -1785,6 +1947,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/f0/834e479e47e499b6478e807fb57b31cc2db696c4db30557bb6f5aea4a90b/mando-0.7.1-py2.py3-none-any.whl", hash = "sha256:26ef1d70928b6057ee3ca12583d73c63e05c49de8972d620c278a7b206581a8a", size = 28149, upload-time = "2022-02-24T08:12:25.24Z" }, ] +[[package]] +name = "markdown" +version = "3.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1903,22 +2074,116 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mergedeep" +version = "1.3.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/41/580bb4006e3ed0361b8151a01d324fb03f420815446c7def45d02f74c270/mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8", size = 4661, upload-time = "2021-02-05T18:55:30.623Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/19/04f9b178c2d8a15b076c8b5140708fa6ffc5601fb6f1e975537072df5b2a/mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307", size = 6354, upload-time = "2021-02-05T18:55:29.583Z" }, +] + [[package]] name = "mistralai" -version = "1.9.11" +version = "2.4.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "eval-type-backport" }, { name = "httpx" }, - { name = "invoke" }, + { name = "jsonpath-python" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, { name = "pydantic" }, { name = "python-dateutil" }, - { name = "pyyaml" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/8d/d8b7af67a966b6f227024e1cb7287fc19901a434f87a5a391dcfe635d338/mistralai-1.9.11.tar.gz", hash = "sha256:3df9e403c31a756ec79e78df25ee73cea3eb15f86693773e16b16adaf59c9b8a", size = 208051, upload-time = "2025-10-02T15:53:40.473Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/3f/5624d57c5897c83c55d3e4c7dd4127de42ad14fd3183e26566cdc7dca1bf/mistralai-2.4.5.tar.gz", hash = "sha256:ef165bb004ec4423cbf19a440bf0983ca0c3fc92ab12a35ebca097bdf418e33a", size = 424611, upload-time = "2026-05-07T11:46:43.888Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/76/4ce12563aea5a76016f8643eff30ab731e6656c845e9e4d090ef10c7b925/mistralai-1.9.11-py3-none-any.whl", hash = "sha256:7a3dc2b8ef3fceaa3582220234261b5c4e3e03a972563b07afa150e44a25a6d3", size = 442796, upload-time = "2025-10-02T15:53:39.134Z" }, + { url = "https://files.pythonhosted.org/packages/1b/48/2c5c4f853dec32a625c1a3d23809b80cf2e135c3441fe1764f72910dfea9/mistralai-2.4.5-py3-none-any.whl", hash = "sha256:bf3b6550258ab16dec8547b90e9c18bebf9099f55b7fc25a884bf0bbeffced0f", size = 995999, upload-time = "2026-05-07T11:46:41.915Z" }, +] + +[[package]] +name = "mkdocs" +version = "1.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "ghp-import" }, + { name = "jinja2" }, + { name = "markdown" }, + { name = "markupsafe" }, + { name = "mergedeep" }, + { name = "mkdocs-get-deps" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "pyyaml" }, + { name = "pyyaml-env-tag" }, + { name = "watchdog" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/c6/bbd4f061bd16b378247f12953ffcb04786a618ce5e904b8c5a01a0309061/mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2", size = 3889159, upload-time = "2024-08-30T12:24:06.899Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/5b/dbc6a8cddc9cfa9c4971d59fb12bb8d42e161b7e7f8cc89e49137c5b279c/mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e", size = 3864451, upload-time = "2024-08-30T12:24:05.054Z" }, +] + +[[package]] +name = "mkdocs-get-deps" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mergedeep" }, + { name = "platformdirs" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/f5/ed29cd50067784976f25ed0ed6fcd3c2ce9eb90650aa3b2796ddf7b6870b/mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c", size = 10239, upload-time = "2023-11-20T17:51:09.981Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/d4/029f984e8d3f3b6b726bd33cafc473b75e9e44c0f7e80a5b29abc466bdea/mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134", size = 9521, upload-time = "2023-11-20T17:51:08.587Z" }, +] + +[[package]] +name = "mkdocs-material" +version = "9.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "babel" }, + { name = "backrefs" }, + { name = "colorama" }, + { name = "jinja2" }, + { name = "markdown" }, + { name = "mkdocs" }, + { name = "mkdocs-material-extensions" }, + { name = "paginate" }, + { name = "pygments" }, + { name = "pymdown-extensions" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/b4/f900fcb8e6f510241e334ca401eddcb61ed880fb6572f7f32e4228472ca1/mkdocs_material-9.7.3.tar.gz", hash = "sha256:e5f0a18319699da7e78c35e4a8df7e93537a888660f61a86bd773a7134798f22", size = 4097748, upload-time = "2026-02-24T12:06:22.646Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/1b/16ad0193079bb8a15aa1d2620813a9cd15b18de150a4ea1b2c607fb4c74d/mkdocs_material-9.7.3-py3-none-any.whl", hash = "sha256:37ebf7b4788c992203faf2e71900be3c197c70a4be9b0d72aed537b08a91dd9d", size = 9305078, upload-time = "2026-02-24T12:06:19.155Z" }, +] + +[[package]] +name = "mkdocs-material-extensions" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/79/9b/9b4c96d6593b2a541e1cb8b34899a6d021d208bb357042823d4d2cabdbe7/mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443", size = 11847, upload-time = "2023-11-22T19:09:45.208Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728, upload-time = "2023-11-22T19:09:43.465Z" }, +] + +[[package]] +name = "mkdocs-minify-plugin" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "csscompressor" }, + { name = "htmlmin2" }, + { name = "jsmin" }, + { name = "mkdocs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/67/fe4b77e7a8ae7628392e28b14122588beaf6078b53eb91c7ed000fd158ac/mkdocs-minify-plugin-0.8.0.tar.gz", hash = "sha256:bc11b78b8120d79e817308e2b11539d790d21445eb63df831e393f76e52e753d", size = 8366, upload-time = "2024-01-29T16:11:32.982Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/cd/2e8d0d92421916e2ea4ff97f10a544a9bd5588eb747556701c983581df13/mkdocs_minify_plugin-0.8.0-py3-none-any.whl", hash = "sha256:5fba1a3f7bd9a2142c9954a6559a57e946587b21f133165ece30ea145c66aee6", size = 6723, upload-time = "2024-01-29T16:11:31.851Z" }, ] [[package]] @@ -2049,14 +2314,14 @@ wheels = [ [[package]] name = "nexus-rpc" -version = "1.2.0" +version = "1.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/50/95d7bc91f900da5e22662c82d9bf0f72a4b01f2a552708bf2f43807707a1/nexus_rpc-1.2.0.tar.gz", hash = "sha256:b4ddaffa4d3996aaeadf49b80dfcdfbca48fe4cb616defaf3b3c5c2c8fc61890", size = 74142, upload-time = "2025-11-17T19:17:06.798Z" } +sdist = { url = "https://files.pythonhosted.org/packages/35/d5/cd1ffb202b76ebc1b33c1332a3416e55a39929006982adc2b1eb069aaa9b/nexus_rpc-1.4.0.tar.gz", hash = "sha256:3b8b373d4865671789cc43623e3dc0bcbf192562e40e13727e17f1c149050fba", size = 82367, upload-time = "2026-02-25T22:01:34.053Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/13/04/eaac430d0e6bf21265ae989427d37e94be5e41dc216879f1fbb6c5339942/nexus_rpc-1.2.0-py3-none-any.whl", hash = "sha256:977876f3af811ad1a09b2961d3d1ac9233bda43ff0febbb0c9906483b9d9f8a3", size = 28166, upload-time = "2025-11-17T19:17:05.64Z" }, + { url = "https://files.pythonhosted.org/packages/11/52/6327a5f4fda01207205038a106a99848a41c83e933cd23ea2cab3d2ebc6c/nexus_rpc-1.4.0-py3-none-any.whl", hash = "sha256:14c953d3519113f8ccec533a9efdb6b10c28afef75d11cdd6d422640c40b3a49", size = 29645, upload-time = "2026-02-25T22:01:33.122Z" }, ] [[package]] @@ -2130,142 +2395,160 @@ wheels = [ ] [[package]] -name = "nvidia-cublas-cu12" -version = "12.8.4.1" +name = "nvidia-cublas" +version = "13.1.1.3" source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cuda-nvrtc" }, +] wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, + { url = "https://files.pythonhosted.org/packages/a7/a1/0bd24ee8c8d03adac032fd2909426a00c88f8c57961b1277ded97f91119f/nvidia_cublas-13.1.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b7a210458267ac818974c53038fbec2e969d5c99f305ab15c72522fa9f001dd5", size = 542848918, upload-time = "2026-04-08T18:46:22.985Z" }, + { url = "https://files.pythonhosted.org/packages/3b/cd/154ca20c38269e05eff77c1464e6c1da89f50a6390b565e9d82e06bc11e1/nvidia_cublas-13.1.1.3-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:37936a16db8fe4ac1f065c2139360608a543a09275cb1a1af612e08cfa065436", size = 423138758, upload-time = "2026-04-08T18:46:58.655Z" }, ] [[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.8.90" +name = "nvidia-cuda-cupti" +version = "13.0.85" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, + { url = "https://files.pythonhosted.org/packages/2a/2a/80353b103fc20ce05ef51e928daed4b6015db4aaa9162ed0997090fe2250/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:796bd679890ee55fb14a94629b698b6db54bcfd833d391d5e94017dd9d7d3151", size = 10310827, upload-time = "2025-09-04T08:26:42.012Z" }, + { url = "https://files.pythonhosted.org/packages/33/6d/737d164b4837a9bbd202f5ae3078975f0525a55730fe871d8ed4e3b952b0/nvidia_cuda_cupti-13.0.85-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:4eb01c08e859bf924d222250d2e8f8b8ff6d3db4721288cf35d14252a4d933c8", size = 10715597, upload-time = "2025-09-04T08:26:51.312Z" }, ] [[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.8.93" +name = "nvidia-cuda-nvrtc" +version = "13.0.88" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, + { url = "https://files.pythonhosted.org/packages/c3/68/483a78f5e8f31b08fb1bb671559968c0ca3a065ac7acabfc7cee55214fd6/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:ad9b6d2ead2435f11cbb6868809d2adeeee302e9bb94bcf0539c7a40d80e8575", size = 90215200, upload-time = "2025-09-04T08:28:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/b7/dc/6bb80850e0b7edd6588d560758f17e0550893a1feaf436807d64d2da040f/nvidia_cuda_nvrtc-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d27f20a0ca67a4bb34268a5e951033496c5b74870b868bacd046b1b8e0c3267b", size = 43015449, upload-time = "2025-09-04T08:28:20.239Z" }, ] [[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.8.90" +name = "nvidia-cuda-runtime" +version = "13.0.96" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, + { url = "https://files.pythonhosted.org/packages/87/4f/17d7b9b8e285199c58ce28e31b5c5bbaa4d8271af06a89b6405258245de2/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef9bcbe90493a2b9d810e43d249adb3d02e98dd30200d86607d8d02687c43f55", size = 2261060, upload-time = "2025-10-09T08:55:15.78Z" }, + { url = "https://files.pythonhosted.org/packages/2e/24/d1558f3b68b1d26e706813b1d10aa1d785e4698c425af8db8edc3dced472/nvidia_cuda_runtime-13.0.96-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f82250d7782aa23b6cfe765ecc7db554bd3c2870c43f3d1821f1d18aebf0548", size = 2243632, upload-time = "2025-10-09T08:55:36.117Z" }, ] [[package]] -name = "nvidia-cudnn-cu12" -version = "9.10.2.21" +name = "nvidia-cudnn-cu13" +version = "9.20.0.48" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, + { url = "https://files.pythonhosted.org/packages/56/c5/83384d846b2fd17c44bd499b36c75a45ed4f095fbbb2252294e89cea5c5c/nvidia_cudnn_cu13-9.20.0.48-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:e31454ae00094b0c55319d9d15b6fa2fc50a9e1c0f5c8c80fb75258234e731e1", size = 444574296, upload-time = "2026-03-09T19:28:27.751Z" }, + { url = "https://files.pythonhosted.org/packages/6e/5e/edb9c0ae051602c3ccaffe424256463636d639e27d7f302dde9975ef9e7a/nvidia_cudnn_cu13-9.20.0.48-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0c45dd8eeb50b603f07995b1b300c62ffe6a1980482b82b3bcf94a4ca9d49304", size = 366173588, upload-time = "2026-03-09T19:29:34.474Z" }, ] [[package]] -name = "nvidia-cufft-cu12" -version = "11.3.3.83" +name = "nvidia-cufft" +version = "12.0.0.61" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, + { url = "https://files.pythonhosted.org/packages/8b/ae/f417a75c0259e85c1d2f83ca4e960289a5f814ed0cea74d18c353d3e989d/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2708c852ef8cd89d1d2068bdbece0aa188813a0c934db3779b9b1faa8442e5f5", size = 214053554, upload-time = "2025-09-04T08:31:38.196Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2f/7b57e29836ea8714f81e9898409196f47d772d5ddedddf1592eadb8ab743/nvidia_cufft-12.0.0.61-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c44f692dce8fd5ffd3e3df134b6cdb9c2f72d99cf40b62c32dde45eea9ddad3", size = 214085489, upload-time = "2025-09-04T08:31:56.044Z" }, ] [[package]] -name = "nvidia-cufile-cu12" -version = "1.13.1.3" +name = "nvidia-cufile" +version = "1.15.1.6" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, + { url = "https://files.pythonhosted.org/packages/3f/70/4f193de89a48b71714e74602ee14d04e4019ad36a5a9f20c425776e72cd6/nvidia_cufile-1.15.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08a3ecefae5a01c7f5117351c64f17c7c62efa5fffdbe24fc7d298da19cd0b44", size = 1223672, upload-time = "2025-09-04T08:32:22.779Z" }, + { url = "https://files.pythonhosted.org/packages/ab/73/cc4a14c9813a8a0d509417cf5f4bdaba76e924d58beb9864f5a7baceefbf/nvidia_cufile-1.15.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:bdc0deedc61f548bddf7733bdc216456c2fdb101d020e1ab4b88d232d5e2f6d1", size = 1136992, upload-time = "2025-09-04T08:32:14.119Z" }, ] [[package]] -name = "nvidia-curand-cu12" -version = "10.3.9.90" +name = "nvidia-curand" +version = "10.4.0.35" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, + { url = "https://files.pythonhosted.org/packages/1e/72/7c2ae24fb6b63a32e6ae5d241cc65263ea18d08802aaae087d9f013335a2/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:133df5a7509c3e292aaa2b477afd0194f06ce4ea24d714d616ff36439cee349a", size = 61962106, upload-time = "2025-08-04T10:21:41.128Z" }, + { url = "https://files.pythonhosted.org/packages/a5/9f/be0a41ca4a4917abf5cb9ae0daff1a6060cc5de950aec0396de9f3b52bc5/nvidia_curand-10.4.0.35-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:1aee33a5da6e1db083fe2b90082def8915f30f3248d5896bcec36a579d941bfc", size = 59544258, upload-time = "2025-08-04T10:22:03.992Z" }, ] [[package]] -name = "nvidia-cusolver-cu12" -version = "11.7.3.90" +name = "nvidia-cusolver" +version = "12.0.4.66" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas" }, + { name = "nvidia-cusparse" }, + { name = "nvidia-nvjitlink" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c3/b30c9e935fc01e3da443ec0116ed1b2a009bb867f5324d3f2d7e533e776b/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:02c2457eaa9e39de20f880f4bd8820e6a1cfb9f9a34f820eb12a155aa5bc92d2", size = 223467760, upload-time = "2025-09-04T08:33:04.222Z" }, + { url = "https://files.pythonhosted.org/packages/5f/67/cba3777620cdacb99102da4042883709c41c709f4b6323c10781a9c3aa34/nvidia_cusolver-12.0.4.66-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:0a759da5dea5c0ea10fd307de75cdeb59e7ea4fcb8add0924859b944babf1112", size = 200941980, upload-time = "2025-09-04T08:33:22.767Z" }, ] [[package]] -name = "nvidia-cusparse-cu12" -version = "12.5.8.93" +name = "nvidia-cusparse" +version = "12.6.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, + { url = "https://files.pythonhosted.org/packages/f8/94/5c26f33738ae35276672f12615a64bd008ed5be6d1ebcb23579285d960a9/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:80bcc4662f23f1054ee334a15c72b8940402975e0eab63178fc7e670aa59472c", size = 162155568, upload-time = "2025-09-04T08:33:42.864Z" }, + { url = "https://files.pythonhosted.org/packages/fa/18/623c77619c31d62efd55302939756966f3ecc8d724a14dab2b75f1508850/nvidia_cusparse-12.6.3.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b3c89c88d01ee0e477cb7f82ef60a11a4bcd57b6b87c33f789350b59759360b", size = 145942937, upload-time = "2025-09-04T08:33:58.029Z" }, ] [[package]] -name = "nvidia-cusparselt-cu12" -version = "0.7.1" +name = "nvidia-cusparselt-cu13" +version = "0.8.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, + { url = "https://files.pythonhosted.org/packages/46/e1/cdc1797eadf82d3a9a575a19b33fdc871a97edbec42c00b5b5e914f4aff4/nvidia_cusparselt_cu13-0.8.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4dca476c50bf4780d46cd0bfbd82e2bc10a08e4fef7950917ce8d7578d22a23f", size = 221051344, upload-time = "2025-09-05T18:49:51.289Z" }, + { url = "https://files.pythonhosted.org/packages/34/7d/2661f2fb3ac4302f3a246f5fc030213ac60c1fe0bce84f9783dbd831dbb7/nvidia_cusparselt_cu13-0.8.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:786ce87568c303fadb5afcc7102d454cd3040d75f6f8626f5db460d1871f4dd0", size = 170148586, upload-time = "2025-09-05T18:50:50.248Z" }, ] [[package]] -name = "nvidia-nccl-cu12" -version = "2.27.5" +name = "nvidia-nccl-cu13" +version = "2.29.7" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, + { url = "https://files.pythonhosted.org/packages/72/0d/daf50d44177ee0cbc7ff0a0c91eb5ff676c82be42f9a970bc7597f440c3a/nvidia_nccl_cu13-2.29.7-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:674a12383e3c38a1bcccae7d4f3633b37852230b6047883cb2f4c2d1b36d9bf5", size = 206014712, upload-time = "2026-03-03T05:34:20.843Z" }, + { url = "https://files.pythonhosted.org/packages/67/f4/58e4e91b6919367c7aafb8e36fce9aad1a3047e536bf7e2fd560927d3a4c/nvidia_nccl_cu13-2.29.7-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:edd81538446786ec3b73972543e53bb43bcaf0bfc8ef76cb679fcc390ffe136d", size = 205976000, upload-time = "2026-03-03T05:36:24.472Z" }, ] [[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.8.93" +name = "nvidia-nvjitlink" +version = "13.0.88" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, + { url = "https://files.pythonhosted.org/packages/56/7a/123e033aaff487c77107195fa5a2b8686795ca537935a24efae476c41f05/nvidia_nvjitlink-13.0.88-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:13a74f429e23b921c1109976abefacc69835f2f433ebd323d3946e11d804e47b", size = 40713933, upload-time = "2025-09-04T08:35:43.553Z" }, + { url = "https://files.pythonhosted.org/packages/ab/2c/93c5250e64df4f894f1cbb397c6fd71f79813f9fd79d7cd61de3f97b3c2d/nvidia_nvjitlink-13.0.88-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e931536ccc7d467a98ba1d8b89ff7fa7f1fa3b13f2b0069118cd7f47bff07d0c", size = 38768748, upload-time = "2025-09-04T08:35:20.008Z" }, ] [[package]] -name = "nvidia-nvshmem-cu12" +name = "nvidia-nvshmem-cu13" version = "3.4.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" }, + { url = "https://files.pythonhosted.org/packages/dc/0f/05cc9c720236dcd2db9c1ab97fff629e96821be2e63103569da0c9b72f19/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dc2a197f38e5d0376ad52cd1a2a3617d3cdc150fd5966f4aee9bcebb1d68fe9", size = 60215947, upload-time = "2025-09-06T00:32:20.022Z" }, + { url = "https://files.pythonhosted.org/packages/3c/35/a9bf80a609e74e3b000fef598933235c908fcefcef9026042b8e6dfde2a9/nvidia_nvshmem_cu13-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:290f0a2ee94c9f3687a02502f3b9299a9f9fe826e6d0287ee18482e78d495b80", size = 60412546, upload-time = "2025-09-06T00:32:41.564Z" }, ] [[package]] -name = "nvidia-nvtx-cu12" -version = "12.8.90" +name = "nvidia-nvtx" +version = "13.0.85" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/c2/f3/d86c845465a2723ad7e1e5c36dcd75ddb82898b3f53be47ebd429fb2fa5d/nvidia_nvtx-13.0.85-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4936d1d6780fbe68db454f5e72a42ff64d1fd6397df9f363ae786930fd5c1cd4", size = 148047, upload-time = "2025-09-04T08:29:01.761Z" }, + { url = "https://files.pythonhosted.org/packages/a8/64/3708a90d1ebe202ffdeb7185f878a3c84d15c2b2c31858da2ce0583e2def/nvidia_nvtx-13.0.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb7780edb6b14107373c835bf8b72e7a178bac7367e23da7acb108f973f157a6", size = 148878, upload-time = "2025-09-04T08:28:53.627Z" }, ] [[package]] name = "openai" -version = "2.15.0" +version = "2.29.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2277,9 +2560,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383, upload-time = "2026-01-09T22:10:08.603Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b4/15/203d537e58986b5673e7f232453a2a2f110f22757b15921cbdeea392e520/openai-2.29.0.tar.gz", hash = "sha256:32d09eb2f661b38d3edd7d7e1a2943d1633f572596febe64c0cd370c86d52bec", size = 671128, upload-time = "2026-03-17T17:53:49.599Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b1/35b6f9c8cf9318e3dbb7146cc82dab4cf61182a8d5406fc9b50864362895/openai-2.29.0-py3-none-any.whl", hash = "sha256:b7c5de513c3286d17c5e29b92c4c98ceaf0d775244ac8159aeb1bddf840eb42a", size = 1141533, upload-time = "2026-03-17T17:53:47.348Z" }, ] [[package]] @@ -2337,20 +2620,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" }, ] -[[package]] -name = "opentelemetry-exporter-prometheus" -version = "0.60b1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-sdk" }, - { name = "prometheus-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/14/39/7dafa6fff210737267bed35a8855b6ac7399b9e582b8cf1f25f842517012/opentelemetry_exporter_prometheus-0.60b1.tar.gz", hash = "sha256:a4011b46906323f71724649d301b4dc188aaa068852e814f4df38cc76eac616b", size = 14976, upload-time = "2025-12-11T13:32:42.944Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/0d/4be6bf5477a3eb3d917d2f17d3c0b6720cd6cb97898444a61d43cc983f5c/opentelemetry_exporter_prometheus-0.60b1-py3-none-any.whl", hash = "sha256:49f59178de4f4590e3cef0b8b95cf6e071aae70e1f060566df5546fad773b8fd", size = 13019, upload-time = "2025-12-11T13:32:23.974Z" }, -] - [[package]] name = "opentelemetry-instrumentation" version = "0.60b1" @@ -2439,6 +2708,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "paginate" +version = "0.5.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/46/68dde5b6bc00c1296ec6466ab27dddede6aec9af1b99090e1107091b3b84/paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945", size = 19252, upload-time = "2024-08-25T14:17:24.139Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746, upload-time = "2024-08-25T14:17:22.55Z" }, +] + [[package]] name = "pathable" version = "0.4.4" @@ -2449,12 +2727,12 @@ wheels = [ ] [[package]] -name = "pathvalidate" -version = "3.3.1" +name = "pathspec" +version = "1.0.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" }, + { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" }, ] [[package]] @@ -2521,15 +2799,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" }, ] -[[package]] -name = "prometheus-client" -version = "0.24.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f0/58/a794d23feb6b00fc0c72787d7e87d872a6730dd9ed7c7b3e954637d8f280/prometheus_client-0.24.1.tar.gz", hash = "sha256:7e0ced7fbbd40f7b84962d5d2ab6f17ef88a72504dcf7c0b40737b43b2a461f9", size = 85616, upload-time = "2026-01-14T15:26:26.965Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/74/c3/24a2f845e3917201628ecaba4f18bab4d18a337834c1df2a159ee9d22a42/prometheus_client-0.24.1-py3-none-any.whl", hash = "sha256:150db128af71a5c2482b36e588fc8a6b95e498750da4b17065947c16070f4055", size = 64057, upload-time = "2026-01-14T15:26:24.42Z" }, -] - [[package]] name = "prompt-toolkit" version = "3.0.52" @@ -2643,21 +2912,21 @@ wheels = [ [[package]] name = "py-key-value-aio" -version = "0.3.0" +version = "0.4.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "beartype" }, - { name = "py-key-value-shared" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/93/ce/3136b771dddf5ac905cc193b461eb67967cf3979688c6696e1f2cdcde7ea/py_key_value_aio-0.3.0.tar.gz", hash = "sha256:858e852fcf6d696d231266da66042d3355a7f9871650415feef9fca7a6cd4155", size = 50801, upload-time = "2025-11-17T16:50:04.711Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/3c/0397c072a38d4bc580994b42e0c90c5f44f679303489e4376289534735e5/py_key_value_aio-0.4.4.tar.gz", hash = "sha256:e3012e6243ed7cc09bb05457bd4d03b1ba5c2b1ca8700096b3927db79ffbbe55", size = 92300, upload-time = "2026-02-16T21:21:43.245Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/99/10/72f6f213b8f0bce36eff21fda0a13271834e9eeff7f9609b01afdc253c79/py_key_value_aio-0.3.0-py3-none-any.whl", hash = "sha256:1c781915766078bfd608daa769fefb97e65d1d73746a3dfb640460e322071b64", size = 96342, upload-time = "2025-11-17T16:50:03.801Z" }, + { url = "https://files.pythonhosted.org/packages/32/69/f1b537ee70b7def42d63124a539ed3026a11a3ffc3086947a1ca6e861868/py_key_value_aio-0.4.4-py3-none-any.whl", hash = "sha256:18e17564ecae61b987f909fc2cd41ee2012c84b4b1dcb8c055cf8b4bc1bf3f5d", size = 152291, upload-time = "2026-02-16T21:21:44.241Z" }, ] [package.optional-dependencies] -disk = [ - { name = "diskcache" }, - { name = "pathvalidate" }, +filetree = [ + { name = "aiofile" }, + { name = "anyio" }, ] keyring = [ { name = "keyring" }, @@ -2665,30 +2934,14 @@ keyring = [ memory = [ { name = "cachetools" }, ] -redis = [ - { name = "redis" }, -] - -[[package]] -name = "py-key-value-shared" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "beartype" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7b/e4/1971dfc4620a3a15b4579fe99e024f5edd6e0967a71154771a059daff4db/py_key_value_shared-0.3.0.tar.gz", hash = "sha256:8fdd786cf96c3e900102945f92aa1473138ebe960ef49da1c833790160c28a4b", size = 11666, upload-time = "2025-11-17T16:50:06.849Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" }, -] [[package]] name = "pyasn1" -version = "0.6.2" +version = "0.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586, upload-time = "2026-01-16T18:04:18.534Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371, upload-time = "2026-01-16T18:04:17.174Z" }, + { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, ] [[package]] @@ -2734,32 +2987,32 @@ email = [ [[package]] name = "pydantic-ai" -version = "1.46.0" +version = "1.102.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pydantic-ai-slim", extra = ["ag-ui", "anthropic", "bedrock", "cli", "cohere", "evals", "fastmcp", "google", "groq", "huggingface", "logfire", "mcp", "mistral", "openai", "retries", "temporal", "ui", "vertexai", "xai"] }, + { name = "pydantic-ai-slim", extra = ["ag-ui", "anthropic", "bedrock", "cli", "cohere", "evals", "fastmcp", "google", "groq", "huggingface", "logfire", "mcp", "mistral", "openai", "retries", "spec", "temporal", "ui", "vertexai", "xai"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7d/e9/2917eabd9a8f408748e1e91b8d0a1bf695ca7d785f6b88efc3e4bba2fa94/pydantic_ai-1.46.0.tar.gz", hash = "sha256:e71c7d7c905da6f34b8759ad9f6914c31035fed5623ca5ac35096f9d738019cf", size = 11795, upload-time = "2026-01-23T00:07:15.786Z" } +sdist = { url = "https://files.pythonhosted.org/packages/61/a8/c6cecf03aea4ae75126069c6b0f988263d1cb18b97d6d0a6634f5e397b56/pydantic_ai-1.102.0.tar.gz", hash = "sha256:5def631d6e1c68b5e992c88da21b78377fe9262aeaf7f9ca09f67c100a9d3878", size = 17795, upload-time = "2026-05-23T01:14:30.493Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/9e/ff49bae2eeeb7f0afe0b8bfb49868f4e4e0f2d986be5f2f9883e09c3e09b/pydantic_ai-1.46.0-py3-none-any.whl", hash = "sha256:a9ac9413ae1e57d5f9ce563f6e46aceaaf9602540366e98363d08482e4ddc651", size = 7220, upload-time = "2026-01-23T00:07:08.263Z" }, + { url = "https://files.pythonhosted.org/packages/d1/57/de1ab45c2084cb2db886a09d93b005959134655f6ec348cf8a821a177b2f/pydantic_ai-1.102.0-py3-none-any.whl", hash = "sha256:bc38cf4936cf08fa3aaf9d34abf908fd73b47147768cdeb34ec3eaf43909aca8", size = 7587, upload-time = "2026-05-23T01:14:19.813Z" }, ] [[package]] name = "pydantic-ai-slim" -version = "1.46.0" +version = "1.102.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "genai-prices" }, - { name = "griffe" }, + { name = "griffelib" }, { name = "httpx" }, { name = "opentelemetry-api" }, { name = "pydantic" }, { name = "pydantic-graph" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c6/f3/c053fef7e4d55b7b28fea5d3a738e5e6fa15f227668faed53c76226ae79a/pydantic_ai_slim-1.46.0.tar.gz", hash = "sha256:8925bc2c54b6c1f5168142d703ecfdba65162d08dae9908bf583932fdf631d09", size = 393260, upload-time = "2026-01-23T00:07:18.831Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e2/3e/14980440e8f0532535e1fbe936fec5f8d8e7bc6cafa81f6f3c51b1884fe5/pydantic_ai_slim-1.102.0.tar.gz", hash = "sha256:0b8f2b70fa2b40efcbd09d341a346934fc4e46622ae281f858c6bfd3d0d3152b", size = 739988, upload-time = "2026-05-23T01:14:32.808Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/d8/640ccbd4d63021a7bd724571dfe92c5868e3890a1172e159b828c84c30dc/pydantic_ai_slim-1.46.0-py3-none-any.whl", hash = "sha256:2494ca9be6009a5e27db09fecb1ab49f0b569a6e7fcd2eda067262bcbd497856", size = 515335, upload-time = "2026-01-23T00:07:10.751Z" }, + { url = "https://files.pythonhosted.org/packages/b4/2e/089df86adaf904dd97a1b139d29fe728af0e41430d747f5b6315df3b0c1e/pydantic_ai_slim-1.102.0-py3-none-any.whl", hash = "sha256:f9fa9c3fb58a76f85522f78d1037d201b424de46d532263ed780b3730060449f", size = 919311, upload-time = "2026-05-23T01:14:23.464Z" }, ] [package.optional-dependencies] @@ -2777,6 +3030,7 @@ cli = [ { name = "argcomplete" }, { name = "prompt-toolkit" }, { name = "pyperclip" }, + { name = "pyyaml" }, { name = "rich" }, ] cohere = [ @@ -2795,13 +3049,13 @@ groq = [ { name = "groq" }, ] huggingface = [ - { name = "huggingface-hub", extra = ["inference"] }, + { name = "huggingface-hub" }, ] logfire = [ { name = "logfire", extra = ["httpx"] }, ] mcp = [ - { name = "mcp" }, + { name = "fastmcp-slim", extra = ["client"] }, ] mistral = [ { name = "mistralai" }, @@ -2813,6 +3067,10 @@ openai = [ retries = [ { name = "tenacity" }, ] +spec = [ + { name = "pydantic-handlebars" }, + { name = "pyyaml" }, +] temporal = [ { name = "temporalio" }, ] @@ -2900,7 +3158,7 @@ wheels = [ [[package]] name = "pydantic-evals" -version = "1.46.0" +version = "1.102.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2910,14 +3168,14 @@ dependencies = [ { name = "pyyaml" }, { name = "rich" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/ce/044bde6ba4f0da335d7f7955c58b86e45ba275b009b46cd61d5b53b62f06/pydantic_evals-1.46.0.tar.gz", hash = "sha256:66c52ad006d6fa7d05f563d667d20377a46edb54ef638c2b83c7660215560f76", size = 47173, upload-time = "2026-01-23T00:07:20.254Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/2a/2f0a18e170dc1db4b32120bea9e1162ef196c1f453db823878f5eaf7b8bb/pydantic_evals-1.102.0.tar.gz", hash = "sha256:711a6335d24a11c324e5a5c7758b12dfd77209f885ab2501d7eedb9dd5b75b18", size = 78557, upload-time = "2026-05-23T01:14:34.447Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/02/23cbcb3843b51bad4ecda57e2047fbbf82743e4bd29e694a17d366648470/pydantic_evals-1.46.0-py3-none-any.whl", hash = "sha256:6a7cdfd3bf5e5d99c76fb77e3d41897b9ef90c4ee300f937509cdbeaec8e16f9", size = 56346, upload-time = "2026-01-23T00:07:12.216Z" }, + { url = "https://files.pythonhosted.org/packages/e2/fd/2281c166b2c5cedab003b12bf8a630656cb5a9bbd552e4981ee190570d15/pydantic_evals-1.102.0-py3-none-any.whl", hash = "sha256:579edd6f7056d0fe52e03c7004377a0b9c42264c60a370258235fb0750fe20a2", size = 93529, upload-time = "2026-05-23T01:14:25.559Z" }, ] [[package]] name = "pydantic-graph" -version = "1.46.0" +version = "1.102.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -2925,46 +3183,35 @@ dependencies = [ { name = "pydantic" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/43/09cc322c1e7cf69e8f01fc6f09f7cd952b1fb49818cf2bee556f3b5fba07/pydantic_graph-1.46.0.tar.gz", hash = "sha256:ef0d316c95bdc37af20bdf3c343fb1caee2c8b536245d712c3ed46af0734319e", size = 58455, upload-time = "2026-01-23T00:07:21.125Z" } +sdist = { url = "https://files.pythonhosted.org/packages/51/37/4265a1a63eddf35a5aa621c9b2355525bdeae3eb59c3954b165fbfe31404/pydantic_graph-1.102.0.tar.gz", hash = "sha256:e285bd7115e4e92676eaf0a5e7e6faa64cda8c4819f67923a118c50666b909ab", size = 62584, upload-time = "2026-05-23T01:14:36.056Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/e9/058fd0001c2aed3675bc80d404c6171a753a4ff08bb570ec252848d6146d/pydantic_graph-1.46.0-py3-none-any.whl", hash = "sha256:cdbc609df49e2eeb9d0d4e43f87288b79ed9d021157ba639e71d862da4b71443", size = 72325, upload-time = "2026-01-23T00:07:13.807Z" }, + { url = "https://files.pythonhosted.org/packages/a4/49/5597c52d50114440047dd4ce4f6505e32ee336f43267639907d1a17648ee/pydantic_graph-1.102.0-py3-none-any.whl", hash = "sha256:b1a28314adc4abca4db02cf095d064782ec5712e0847ce7a6b79a3c84bf1fc01", size = 80100, upload-time = "2026-05-23T01:14:27.583Z" }, ] [[package]] -name = "pydantic-settings" -version = "2.12.0" +name = "pydantic-handlebars" +version = "0.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/a3/13b1f17648605d1872bbc6cc56f24d9a2f4151bbf0623b9f731282a061be/pydantic_handlebars-0.2.0.tar.gz", hash = "sha256:11ee67abddefcb624ede8c690bc0210248ac235a150d9423908a89630c9a4e98", size = 175652, upload-time = "2026-05-22T06:06:38.476Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, + { url = "https://files.pythonhosted.org/packages/4d/f1/a27154170818efe3cb38af1eb54e0f7fc155873bd3b54f39a672a918e6cb/pydantic_handlebars-0.2.0-py3-none-any.whl", hash = "sha256:e5accc8ed0dc1bd953daa2eea2c0ee1eab7a6a27029da2439abacdf4ed46a4ae", size = 49954, upload-time = "2026-05-22T06:06:37.034Z" }, ] [[package]] -name = "pydocket" -version = "0.16.6" +name = "pydantic-settings" +version = "2.14.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cloudpickle" }, - { name = "fakeredis", extra = ["lua"] }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-prometheus" }, - { name = "opentelemetry-instrumentation" }, - { name = "prometheus-client" }, - { name = "py-key-value-aio", extra = ["memory", "redis"] }, - { name = "python-json-logger" }, - { name = "redis" }, - { name = "rich" }, - { name = "typer" }, - { name = "typing-extensions" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/00/26befe5f58df7cd1aeda4a8d10bc7d1908ffd86b80fd995e57a2a7b3f7bd/pydocket-0.16.6.tar.gz", hash = "sha256:b96c96ad7692827214ed4ff25fcf941ec38371314db5dcc1ae792b3e9d3a0294", size = 299054, upload-time = "2026-01-09T22:09:15.405Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/b5/8f48e906c3e0205276e8bd8cb7512217a87b2685304d64be27cad5b3019f/pydantic_settings-2.14.2.tar.gz", hash = "sha256:c19dd64b19097f1de80184f0cc7b0272a13ae6e170cbf240a3e27e381ed14a5f", size = 237700, upload-time = "2026-06-19T13:44:56.324Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/3f/7483e5a6dc6326b6e0c640619b5c5bd1d6e3c20e54d58f5fb86267cef00e/pydocket-0.16.6-py3-none-any.whl", hash = "sha256:683d21e2e846aa5106274e7d59210331b242d7fb0dce5b08d3b82065663ed183", size = 67697, upload-time = "2026-01-09T22:09:13.436Z" }, + { url = "https://files.pythonhosted.org/packages/77/c1/6e422f34e569cf8e18df68d1939c81c099d2b61e4f7d9621c8a77560799c/pydantic_settings-2.14.2-py3-none-any.whl", hash = "sha256:a20c97b37910b6550d5ea50fbcc2d4187defe58cd57070b73863d069419c9440", size = 61715, upload-time = "2026-06-19T13:44:55.02Z" }, ] [[package]] @@ -3019,11 +3266,11 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.10.1" +version = "2.13.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/81/58d0ac84e1ef3a3843791d6954d94c0b33d526c75eeb1efbce9d0a4c4077/pyjwt-2.13.0.tar.gz", hash = "sha256:41571c89ca91598c79e8ef18a2d07367d4810fbbd6f637794879baf1b7703423", size = 107515, upload-time = "2026-05-21T19:54:36.618Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5e/ecf12fdb62546d64385c158514e9b2b671f7832108ef2ecd2020ce0af2d1/pyjwt-2.13.0-py3-none-any.whl", hash = "sha256:66adcc2aff09b3f1bbd95fc1e1577df8ac8723c978552fd43304c8a290ac5728", size = 31274, upload-time = "2026-05-21T19:54:35.362Z" }, ] [package.optional-dependencies] @@ -3049,6 +3296,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/92/d40f5d937517cc489ad848fc4414ecccc7592e4686b9071e09e64f5e378e/pylint-4.0.4-py3-none-any.whl", hash = "sha256:63e06a37d5922555ee2c20963eb42559918c20bd2b21244e4ef426e7c43b92e0", size = 536425, upload-time = "2025-11-30T13:29:02.53Z" }, ] +[[package]] +name = "pymdown-extensions" +version = "10.21.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/26/d1015444da4d952a1ca487a236b522eb979766f0295a0bd0c5fc089989a9/pymdown_extensions-10.21.3.tar.gz", hash = "sha256:72cfcf55f07aea0d4af2c4f11dd4e52466ddfb1bb819673146398e0bd3a77354", size = 854140, upload-time = "2026-05-13T12:57:32.267Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/85/545a951eecc270fcd688288c600017e2050a1aacb56c711d208586d3e470/pymdown_extensions-10.21.3-py3-none-any.whl", hash = "sha256:d7a5d08014fc571e80ca21dd6f854e31f94c489800350564d55d15b3c41e76b6", size = 269002, upload-time = "2026-05-13T12:57:30.296Z" }, +] + [[package]] name = "pymgclient" version = "1.5.1" @@ -3072,15 +3332,15 @@ wheels = [ [[package]] name = "pyopenssl" -version = "25.3.0" +version = "26.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/80/be/97b83a464498a79103036bc74d1038df4a7ef0e402cfaf4d5e113fb14759/pyopenssl-25.3.0.tar.gz", hash = "sha256:c981cb0a3fd84e8602d7afc209522773b94c1c2446a3c710a75b06fe1beae329", size = 184073, upload-time = "2025-09-17T00:32:21.037Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/51/27a5ad5f939d08f690a326ef9582cda7140555180db71695f6fb747d6a36/pyopenssl-26.2.0.tar.gz", hash = "sha256:8c6fcecd1183a7fc897548dfe388b0cdb7f37e018200d8409cf33959dbe35387", size = 182195, upload-time = "2026-05-04T23:06:09.72Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/81/ef2b1dfd1862567d573a4fdbc9f969067621764fbb74338496840a1d2977/pyopenssl-25.3.0-py3-none-any.whl", hash = "sha256:1fda6fc034d5e3d179d39e59c1895c9faeaf40a79de5fc4cbbfbe0d36f4a77b6", size = 57268, upload-time = "2025-09-17T00:32:19.474Z" }, + { url = "https://files.pythonhosted.org/packages/73/b8/a0e2790ae249d6f38c9f66de7a211621a7ab2650217bcd04e1262f578a56/pyopenssl-26.2.0-py3-none-any.whl", hash = "sha256:4f9d971bc5298b8bc1fab282803da04bf000c755d4ad9d99b52de2569ca19a70", size = 55823, upload-time = "2026-05-04T23:06:08.395Z" }, ] [[package]] @@ -3094,7 +3354,7 @@ wheels = [ [[package]] name = "pytest" -version = "9.0.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -3103,9 +3363,9 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] @@ -3162,29 +3422,20 @@ wheels = [ [[package]] name = "python-dotenv" -version = "1.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, -] - -[[package]] -name = "python-json-logger" -version = "4.0.0" +version = "1.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/29/bf/eca6a3d43db1dae7070f70e160ab20b807627ba953663ba07928cdd3dc58/python_json_logger-4.0.0.tar.gz", hash = "sha256:f58e68eb46e1faed27e0f574a55a0455eecd7b8a5b88b85a784519ba3cff047f", size = 17683, upload-time = "2025-10-06T04:15:18.984Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" }, + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, ] [[package]] name = "python-multipart" -version = "0.0.22" +version = "0.0.31" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } +sdist = { url = "https://files.pythonhosted.org/packages/64/7e/9b35ad8f3d9ca680f7c87a88f19612fdd8da9796c4d3b46e560ac79dcc4a/python_multipart-0.0.31.tar.gz", hash = "sha256:fc631183bb13e56db3158a4909908dfb2e23565286744e798241e63750e5d680", size = 46689, upload-time = "2026-06-04T08:27:49.014Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, + { url = "https://files.pythonhosted.org/packages/5e/1e/7f7f299527a5a8ad90acd5f2f78dfa6c8495c6301a3205106ea68a84de96/python_multipart-0.0.31-py3-none-any.whl", hash = "sha256:8408153d68a9773291fc1da39a8b85a50044bddbabd2dd72e9229776b7b15e28", size = 29996, upload-time = "2026-06-04T08:27:47.804Z" }, ] [[package]] @@ -3258,9 +3509,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "pyyaml-env-tag" +version = "1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/2e/79c822141bfd05a853236b504869ebc6b70159afc570e1d5a20641782eaa/pyyaml_env_tag-1.1.tar.gz", hash = "sha256:2eb38b75a2d21ee0475d6d97ec19c63287a7e140231e4214969d0eac923cd7ff", size = 5737, upload-time = "2025-05-13T15:24:01.64Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/11/432f32f8097b03e3cd5fe57e88efb685d964e2e5178a48ed61e841f7fdce/pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04", size = 4722, upload-time = "2025-05-13T15:23:59.629Z" }, +] + [[package]] name = "qdrant-client" -version = "1.16.2" +version = "1.18.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "grpcio" }, @@ -3271,9 +3534,9 @@ dependencies = [ { name = "pydantic" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ca/7d/3cd10e26ae97b35cf856ca1dc67576e42414ae39502c51165bb36bb1dff8/qdrant_client-1.16.2.tar.gz", hash = "sha256:ca4ef5f9be7b5eadeec89a085d96d5c723585a391eb8b2be8192919ab63185f0", size = 331112, upload-time = "2025-12-12T10:58:30.866Z" } +sdist = { url = "https://files.pythonhosted.org/packages/65/45/5b1bdd15a3c7730eefb9c113600829e20d689b82b5a23f9e07d107094004/qdrant_client-1.18.0.tar.gz", hash = "sha256:52e8ece1a7d40519801bf0b70713bfa0f6b7ae28c7275bbe0b0286fbed7f6db4", size = 352580, upload-time = "2026-05-11T14:12:38.702Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/08/13/8ce16f808297e16968269de44a14f4fef19b64d9766be1d6ba5ba78b579d/qdrant_client-1.16.2-py3-none-any.whl", hash = "sha256:442c7ef32ae0f005e88b5d3c0783c63d4912b97ae756eb5e052523be682f17d3", size = 377186, upload-time = "2025-12-12T10:58:29.282Z" }, + { url = "https://files.pythonhosted.org/packages/d6/10/c437bd2ac41ef30d3019063e6ce537dc111e9214473b337ee88f7fa6359a/qdrant_client-1.18.0-py3-none-any.whl", hash = "sha256:093aa8cf8a420ee3ad2a68b007e1378d7992b2600e0b53c193fc172674f659cd", size = 398126, upload-time = "2026-05-11T14:12:36.998Z" }, ] [[package]] @@ -3289,15 +3552,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/f7/d00d9b4a0313a6be3a3e0818e6375e15da6d7076f4ae47d1324e7ca986a1/radon-6.0.1-py2.py3-none-any.whl", hash = "sha256:632cc032364a6f8bb1010a2f6a12d0f14bc7e5ede76585ef29dc0cecf4cd8859", size = 52784, upload-time = "2023-03-26T06:24:33.949Z" }, ] -[[package]] -name = "redis" -version = "7.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/c8/983d5c6579a411d8a99bc5823cc5712768859b5ce2c8afe1a65b37832c81/redis-7.1.0.tar.gz", hash = "sha256:b1cc3cfa5a2cb9c2ab3ba700864fb0ad75617b41f01352ce5779dabf6d5f9c3c", size = 4796669, upload-time = "2025-11-19T15:54:39.961Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" }, -] - [[package]] name = "referencing" version = "0.36.2" @@ -3402,7 +3656,7 @@ wheels = [ [[package]] name = "requests" -version = "2.32.5" +version = "2.33.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "certifi" }, @@ -3410,9 +3664,9 @@ dependencies = [ { name = "idna" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/64/8860370b167a9721e8956ae116825caff829224fbca0ca6e7bf8ddef8430/requests-2.33.0.tar.gz", hash = "sha256:c7ebc5e8b0f21837386ad0e1c8fe8b829fa5f544d8df3b2253bff14ef29d7652", size = 134232, upload-time = "2026-03-25T15:10:41.586Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, + { url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" }, ] [[package]] @@ -3522,18 +3776,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, ] -[[package]] -name = "rsa" -version = "4.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034, upload-time = "2025-04-16T09:51:18.218Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696, upload-time = "2025-04-16T09:51:17.142Z" }, -] - [[package]] name = "ruamel-yaml" version = "0.17.40" @@ -3612,14 +3854,14 @@ wheels = [ [[package]] name = "s3transfer" -version = "0.16.0" +version = "0.17.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/ec/7c692cde9125b77e84b307354d4fb705f98b8ccad59a036d5957ca75bfc3/s3transfer-0.17.0.tar.gz", hash = "sha256:9edeb6d1c3c2f89d6050348548834ad8289610d886e5bf7b7207728bd43ce33a", size = 155337, upload-time = "2026-04-29T22:07:36.33Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, + { url = "https://files.pythonhosted.org/packages/87/72/c6c32d2b657fa3dad1de340254e14390b1e334ce38268b7ad51abda3c8c2/s3transfer-0.17.0-py3-none-any.whl", hash = "sha256:ce3801712acf4ad3e89fb9990df97b4972e93f4b3b0004d214be5bce12814c20", size = 86811, upload-time = "2026-04-29T22:07:34.966Z" }, ] [[package]] @@ -3725,15 +3967,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] -[[package]] -name = "sortedcontainers" -version = "2.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, -] - [[package]] name = "sse-starlette" version = "3.2.0" @@ -3749,15 +3982,15 @@ wheels = [ [[package]] name = "starlette" -version = "0.52.1" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/e3/7c1dc7381d9f8ab7d854328ebfa884e62cb3f3d8549ddfd37c7814f42afa/starlette-1.3.1.tar.gz", hash = "sha256:05d0213193f2fbaae60e2ecb593b4add4262ad4e46536b54abe36f11a71724e0", size = 2703240, upload-time = "2026-06-12T09:23:11.602Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bb/2799cc2ede3ed41131f8975621e7213dfc7ef4acbbaadfa440f32500c370/starlette-1.3.1-py3-none-any.whl", hash = "sha256:c7372aae11c3c3f26a42df7bd626cec2f47d03483d261d369516a615a53714c6", size = 73632, upload-time = "2026-06-12T09:23:10.017Z" }, ] [[package]] @@ -3783,7 +4016,7 @@ wheels = [ [[package]] name = "temporalio" -version = "1.20.0" +version = "1.27.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nexus-rpc" }, @@ -3791,13 +4024,13 @@ dependencies = [ { name = "types-protobuf" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/21/db/7d5118d28b0918888e1ec98f56f659fdb006351e06d95f30f4274962a76f/temporalio-1.20.0.tar.gz", hash = "sha256:5a6a85b7d298b7359bffa30025f7deac83c74ac095a4c6952fbf06c249a2a67c", size = 1850498, upload-time = "2025-11-25T21:25:20.225Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/62/2bc1a9ad29382a3a99f088907ef2024a94420cfef340be1b33026c632828/temporalio-1.27.2.tar.gz", hash = "sha256:633bf2379492f3db1e887d1e64fdac00d9c2ddc3e9382b831d5af68256912e92", size = 2503041, upload-time = "2026-05-14T02:17:57.565Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/1b/e69052aa6003eafe595529485d9c62d1382dd5e671108f1bddf544fb6032/temporalio-1.20.0-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:fba70314b4068f8b1994bddfa0e2ad742483f0ae714d2ef52e63013ccfd7042e", size = 12061638, upload-time = "2025-11-25T21:24:57.918Z" }, - { url = "https://files.pythonhosted.org/packages/ae/3b/3e8c67ed7f23bedfa231c6ac29a7a9c12b89881da7694732270f3ecd6b0c/temporalio-1.20.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffc5bb6cabc6ae67f0bfba44de6a9c121603134ae18784a2ff3a7f230ad99080", size = 11562603, upload-time = "2025-11-25T21:25:01.721Z" }, - { url = "https://files.pythonhosted.org/packages/6d/be/ed0cc11702210522a79e09703267ebeca06eb45832b873a58de3ca76b9d0/temporalio-1.20.0-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e80c1e4cdf88fa8277177f563edc91466fe4dc13c0322f26e55c76b6a219e6", size = 11824016, upload-time = "2025-11-25T21:25:06.771Z" }, - { url = "https://files.pythonhosted.org/packages/9d/97/09c5cafabc80139d97338a2bdd8ec22e08817dfd2949ab3e5b73565006eb/temporalio-1.20.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba92d909188930860c9d89ca6d7a753bc5a67e4e9eac6cea351477c967355eed", size = 12189521, upload-time = "2025-11-25T21:25:12.091Z" }, - { url = "https://files.pythonhosted.org/packages/11/23/5689c014a76aff3b744b3ee0d80815f63b1362637814f5fbb105244df09b/temporalio-1.20.0-cp310-abi3-win_amd64.whl", hash = "sha256:eacfd571b653e0a0f4aa6593f4d06fc628797898f0900d400e833a1f40cad03a", size = 12745027, upload-time = "2025-11-25T21:25:16.827Z" }, + { url = "https://files.pythonhosted.org/packages/64/85/9da14f9fbdfae95435d29353bb1c55891581ad6b23c86ca56e72d83035ed/temporalio-1.27.2-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:860f706380faafec8f183f9194d0883c8033a4211c5d19c2c962c45b06cf99e9", size = 14602829, upload-time = "2026-05-14T02:17:45.624Z" }, + { url = "https://files.pythonhosted.org/packages/24/51/b7437991e71eea082dc53222da11f064974917cd59063ba57e13e5895fbc/temporalio-1.27.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:a8dc0c680e351f3132809861888d8326dbd5030dd4e570663597e7d4768d9502", size = 13997680, upload-time = "2026-05-14T02:17:53.968Z" }, + { url = "https://files.pythonhosted.org/packages/8c/5d/358065040e6f0cedbf669acd333622999eec737ff868ca7829d727b77746/temporalio-1.27.2-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:805f3de4d193dec52e040e41dbfc9ab44be0206d2e81142ceefaf7b7208058d1", size = 14252199, upload-time = "2026-05-14T02:17:36.972Z" }, + { url = "https://files.pythonhosted.org/packages/72/8a/85d2eab07c3e23fc1124203e76857c69ab9b22d8ccebad0835e294edb754/temporalio-1.27.2-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bc996cb501b8a918f50037ccee6facb05bb70984acada4c2a3e01f5e7957a38", size = 14779945, upload-time = "2026-05-14T02:18:05.513Z" }, + { url = "https://files.pythonhosted.org/packages/67/81/c9b08609e2a92ecf62c97c59cabfa0608337c8d5cc9941eed5d9a7778840/temporalio-1.27.2-cp310-abi3-win_amd64.whl", hash = "sha256:62a84ae9a60c17932971e4ca3b0f3cd6f32f173b8183e759989376503fb95af6", size = 14981897, upload-time = "2026-05-14T02:17:27.333Z" }, ] [[package]] @@ -3927,57 +4160,42 @@ wheels = [ [[package]] name = "torch" -version = "2.10.0" +version = "2.12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "cuda-bindings", marker = "sys_platform == 'linux'" }, + { name = "cuda-toolkit", extra = ["cudart", "cufft", "cufile", "cupti", "curand", "cusolver", "cusparse", "nvjitlink", "nvrtc", "nvtx"], marker = "sys_platform == 'linux'" }, { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, { name = "networkx" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cublas", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu13", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu13", marker = "sys_platform == 'linux'" }, { name = "setuptools" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "sys_platform == 'linux'" }, { name = "typing-extensions" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/2f/0b295dd8d199ef71e6f176f576473d645d41357b7b8aa978cc6b042575df/torch-2.10.0-1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6abb224c2b6e9e27b592a1c0015c33a504b00a0e0938f1499f7f514e9b7bfb5c", size = 79498197, upload-time = "2026-02-06T17:37:27.627Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1b/af5fccb50c341bd69dc016769503cb0857c1423fbe9343410dfeb65240f2/torch-2.10.0-1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7350f6652dfd761f11f9ecb590bfe95b573e2961f7a242eccb3c8e78348d26fe", size = 79498248, upload-time = "2026-02-06T17:37:31.982Z" }, - { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload-time = "2026-01-21T16:24:44.171Z" }, - { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload-time = "2026-01-21T16:23:53.503Z" }, - { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload-time = "2026-01-21T16:24:39.516Z" }, - { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" }, - { url = "https://files.pythonhosted.org/packages/c9/6f/f2e91e34e3fcba2e3fc8d8f74e7d6c22e74e480bbd1db7bc8900fdf3e95c/torch-2.10.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5c4d217b14741e40776dd7074d9006fd28b8a97ef5654db959d8635b2fe5f29b", size = 146004247, upload-time = "2026-01-21T16:24:29.335Z" }, - { url = "https://files.pythonhosted.org/packages/98/fb/5160261aeb5e1ee12ee95fe599d0541f7c976c3701d607d8fc29e623229f/torch-2.10.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6b71486353fce0f9714ca0c9ef1c850a2ae766b409808acd58e9678a3edb7738", size = 915716445, upload-time = "2026-01-21T16:22:45.353Z" }, - { url = "https://files.pythonhosted.org/packages/6a/16/502fb1b41e6d868e8deb5b0e3ae926bbb36dab8ceb0d1b769b266ad7b0c3/torch-2.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2ee399c644dc92ef7bc0d4f7e74b5360c37cdbe7c5ba11318dda49ffac2bc57", size = 113757050, upload-time = "2026-01-21T16:24:19.204Z" }, - { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload-time = "2026-01-21T16:24:09.209Z" }, - { url = "https://files.pythonhosted.org/packages/d8/14/21fbce63bc452381ba5f74a2c0a959fdf5ad5803ccc0c654e752e0dbe91a/torch-2.10.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:aae1b29cd68e50a9397f5ee897b9c24742e9e306f88a807a27d617f07adb3bd8", size = 146005472, upload-time = "2026-01-21T16:22:29.022Z" }, - { url = "https://files.pythonhosted.org/packages/54/fd/b207d1c525cb570ef47f3e9f836b154685011fce11a2f444ba8a4084d042/torch-2.10.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6021db85958db2f07ec94e1bc77212721ba4920c12a18dc552d2ae36a3eb163f", size = 915612644, upload-time = "2026-01-21T16:21:47.019Z" }, - { url = "https://files.pythonhosted.org/packages/36/53/0197f868c75f1050b199fe58f9bf3bf3aecac9b4e85cc9c964383d745403/torch-2.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff43db38af76fda183156153983c9a096fc4c78d0cd1e07b14a2314c7f01c2c8", size = 113997015, upload-time = "2026-01-21T16:23:00.767Z" }, - { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload-time = "2026-01-21T16:23:09.315Z" }, - { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992, upload-time = "2026-01-21T16:23:05.162Z" }, - { url = "https://files.pythonhosted.org/packages/69/2b/51e663ff190c9d16d4a8271203b71bc73a16aa7619b9f271a69b9d4a936b/torch-2.10.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:233aed0659a2503b831d8a67e9da66a62c996204c0bba4f4c442ccc0c68a3f60", size = 146018567, upload-time = "2026-01-21T16:22:23.393Z" }, - { url = "https://files.pythonhosted.org/packages/5e/cd/4b95ef7f293b927c283db0b136c42be91c8ec6845c44de0238c8c23bdc80/torch-2.10.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:682497e16bdfa6efeec8cde66531bc8d1fbbbb4d8788ec6173c089ed3cc2bfe5", size = 915721646, upload-time = "2026-01-21T16:21:16.983Z" }, - { url = "https://files.pythonhosted.org/packages/56/97/078a007208f8056d88ae43198833469e61a0a355abc0b070edd2c085eb9a/torch-2.10.0-cp314-cp314-win_amd64.whl", hash = "sha256:6528f13d2a8593a1a412ea07a99812495bec07e9224c28b2a25c0a30c7da025c", size = 113752373, upload-time = "2026-01-21T16:22:13.471Z" }, - { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324, upload-time = "2026-01-21T16:22:09.494Z" }, - { url = "https://files.pythonhosted.org/packages/e2/65/1a05346b418ea8ccd10360eef4b3e0ce688fba544e76edec26913a8d0ee0/torch-2.10.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:716b01a176c2a5659c98f6b01bf868244abdd896526f1c692712ab36dbaf9b63", size = 146006482, upload-time = "2026-01-21T16:22:18.42Z" }, - { url = "https://files.pythonhosted.org/packages/1d/b9/5f6f9d9e859fc3235f60578fa64f52c9c6e9b4327f0fe0defb6de5c0de31/torch-2.10.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d8f5912ba938233f86361e891789595ff35ca4b4e2ac8fe3670895e5976731d6", size = 915613050, upload-time = "2026-01-21T16:20:49.035Z" }, - { url = "https://files.pythonhosted.org/packages/66/4d/35352043ee0eaffdeff154fad67cd4a31dbed7ff8e3be1cc4549717d6d51/torch-2.10.0-cp314-cp314t-win_amd64.whl", hash = "sha256:71283a373f0ee2c89e0f0d5f446039bdabe8dbc3c9ccf35f0f784908b0acd185", size = 113995816, upload-time = "2026-01-21T16:22:05.312Z" }, + { url = "https://files.pythonhosted.org/packages/f0/54/efb7ebca77970012b0cc21687a55d70eb2ba514b2c2b8e18d9fb1222f3be/torch-2.12.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:d2dd0f2c5f7ccbddaf34cade0deaf476808368f902b9cdb7f36a2ab42301bc0e", size = 87991951, upload-time = "2026-06-17T21:07:49.309Z" }, + { url = "https://files.pythonhosted.org/packages/1e/00/4210d76ca7424981f04033ebe7e48816ab83287a62538747a58825db770c/torch-2.12.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:2de4e19b88a481482c6c75291f2d6a52eda3ce51f311b29aa9b68499c830c07c", size = 426382721, upload-time = "2026-06-17T21:06:41.842Z" }, + { url = "https://files.pythonhosted.org/packages/76/1f/bc9f5a5aa569307076365f25afcebacb22e9c754b1bcfbaaa146627c7fda/torch-2.12.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:649e4ced014ba646f76f8cb9c9726735a6323eb321b7919f942790a923f90921", size = 532261322, upload-time = "2026-06-17T21:06:06.673Z" }, + { url = "https://files.pythonhosted.org/packages/9e/49/c549461daa008159d006a76a991fbc2f26fa8bac27a4030c858463dcb20f/torch-2.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:e86550597877fb272ddc52db2f85b82cb601ea7bd932576a0340152cae2200b3", size = 122988095, upload-time = "2026-06-17T21:07:44.9Z" }, + { url = "https://files.pythonhosted.org/packages/ff/4a/0300261818e1560d72cc160ac826005507e8b7ca0a35788b591436d05b4a/torch-2.12.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:c75e93173c700bccd6bfcc4a9d19ce242ab6dacd1f1781483027a16239b9e650", size = 87992358, upload-time = "2026-06-17T21:07:40.299Z" }, + { url = "https://files.pythonhosted.org/packages/30/a7/874a5ca05e8f159211dca7921060f7057acc1adb26431e119fd150623efc/torch-2.12.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fcb61ccd20784b62bdd78ec84238a5cfb383b4994902e03bac95505ab360884c", size = 426386134, upload-time = "2026-06-17T21:07:31.481Z" }, + { url = "https://files.pythonhosted.org/packages/e1/75/20bb8fe9c1ad6538cce8cd0391b51927ae5af0b17ed1eab44b8824465dc1/torch-2.12.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:f4afc8083dff08719edbea346644476e3cec0cf40ebe256be0ee5d5b7c7e8c0d", size = 532268019, upload-time = "2026-06-17T21:05:37.925Z" }, + { url = "https://files.pythonhosted.org/packages/d1/fa/824ddb662af55b2eabc0dbb7b57c7c0b1bcd93693754a2b8509ec4d16490/torch-2.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:f92609e3b3ce72f25e2eb780d043ced2480c1a86c47c852604fc7a9108648386", size = 122987777, upload-time = "2026-06-17T21:07:09.49Z" }, + { url = "https://files.pythonhosted.org/packages/63/b7/1b49fe7086ea36839cc80abc43174c43d0ab6f676c0891c871c162f44fe3/torch-2.12.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e9b6f7d2dd66ea87a3ae620069d31335d594c06effb1a383bdd21cfe61e44ece", size = 88010025, upload-time = "2026-06-17T21:07:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/d7/06/5b44063a6545036dcc680d2d303b137d9176cfb2cc1e1863e3ef94abeb52/torch-2.12.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:7973ccd3d2cd35c74449213f7bded199bec6c6247e705cbeda7407af79703d91", size = 426392891, upload-time = "2026-06-17T21:05:52.261Z" }, + { url = "https://files.pythonhosted.org/packages/f8/dd/c9ce9a4b0eb3c5bb92d9ea56766e2c22559f0b45171149188494edcce80f/torch-2.12.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:c64ac4aac16be5e296dcd912305605804b203333c690bf98c55bc09494ee92ad", size = 532272494, upload-time = "2026-06-17T21:06:22.72Z" }, + { url = "https://files.pythonhosted.org/packages/21/7c/f3a601fc1b1f663ff269bfe553654e638651939aa6563e8daa7167c33098/torch-2.12.1-cp314-cp314-win_amd64.whl", hash = "sha256:f6dc4caf7eb4adb38a2d9f536b51db56310fdd1254e69a2d96767e1367c892b3", size = 122987254, upload-time = "2026-06-17T21:06:33.199Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/b8087556cf81ddd808dbeb34afb8396d7ae7a1694ab489f08b1a0004e7d0/torch-2.12.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:2afbb2bdaa8a95040e733f05492ddf133c3967c9b7ce0abd218d704b6cab437d", size = 88303173, upload-time = "2026-06-17T21:05:06.603Z" }, + { url = "https://files.pythonhosted.org/packages/4a/07/fe09d1699fbed2afa10ebc692ff2b99d113f2605b6748cea633989e2789a/torch-2.12.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:97eba061fcb042fed191400b15568990073d67eaacaa6ee9b7ca01dd8b790fe9", size = 426404009, upload-time = "2026-06-17T21:04:57.557Z" }, + { url = "https://files.pythonhosted.org/packages/2e/f7/0ce4f6c1962c60ded7270e0a9eb560fb615c92b89d332cf9e3dff36d5ecc/torch-2.12.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:3867b861391701012adb2df93360efb88494dca245a185e3bb7624495cfe3f33", size = 532184292, upload-time = "2026-06-17T21:05:17.526Z" }, + { url = "https://files.pythonhosted.org/packages/70/db/e384c12aba30320ca92aaaf557456cbcb26f04b4df307728bb8f019f5000/torch-2.12.1-cp314-cp314t-win_amd64.whl", hash = "sha256:dd15595f8fc764cffde8c6361a3beb6ef69a028c851b1b3e70e077f615980d4e", size = 123231142, upload-time = "2026-06-17T21:05:27.061Z" }, ] [[package]] @@ -3994,45 +4212,66 @@ wheels = [ [[package]] name = "transformers" -version = "4.57.6" +version = "5.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock" }, { name = "huggingface-hub" }, { name = "numpy" }, { name = "packaging" }, { name = "pyyaml" }, { name = "regex" }, - { name = "requests" }, { name = "safetensors" }, { name = "tokenizers" }, { name = "tqdm" }, + { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" }, + { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" }, ] [[package]] name = "tree-sitter" -version = "0.25.0" +version = "0.25.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961, upload-time = "2025-09-25T17:37:59.751Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941, upload-time = "2025-09-25T17:37:34.813Z" }, + { url = "https://files.pythonhosted.org/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699, upload-time = "2025-09-25T17:37:36.349Z" }, + { url = "https://files.pythonhosted.org/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125, upload-time = "2025-09-25T17:37:37.725Z" }, + { url = "https://files.pythonhosted.org/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418, upload-time = "2025-09-25T17:37:38.922Z" }, + { url = "https://files.pythonhosted.org/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250, upload-time = "2025-09-25T17:37:40.039Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156, upload-time = "2025-09-25T17:37:41.132Z" }, + { url = "https://files.pythonhosted.org/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984, upload-time = "2025-09-25T17:37:42.074Z" }, + { url = "https://files.pythonhosted.org/packages/8c/67/67492014ce32729b63d7ef318a19f9cfedd855d677de5773476caf771e96/tree_sitter-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0628671f0de69bb279558ef6b640bcfc97864fe0026d840f872728a86cd6b6cd", size = 146926, upload-time = "2025-09-25T17:37:43.041Z" }, + { url = "https://files.pythonhosted.org/packages/4e/9c/a278b15e6b263e86c5e301c82a60923fa7c59d44f78d7a110a89a413e640/tree_sitter-0.25.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f5ddcd3e291a749b62521f71fc953f66f5fd9743973fd6dd962b092773569601", size = 137712, upload-time = "2025-09-25T17:37:44.039Z" }, + { url = "https://files.pythonhosted.org/packages/54/9a/423bba15d2bf6473ba67846ba5244b988cd97a4b1ea2b146822162256794/tree_sitter-0.25.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd88fbb0f6c3a0f28f0a68d72df88e9755cf5215bae146f5a1bdc8362b772053", size = 607873, upload-time = "2025-09-25T17:37:45.477Z" }, + { url = "https://files.pythonhosted.org/packages/ed/4c/b430d2cb43f8badfb3a3fa9d6cd7c8247698187b5674008c9d67b2a90c8e/tree_sitter-0.25.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b878e296e63661c8e124177cc3084b041ba3f5936b43076d57c487822426f614", size = 636313, upload-time = "2025-09-25T17:37:46.68Z" }, + { url = "https://files.pythonhosted.org/packages/9d/27/5f97098dbba807331d666a0997662e82d066e84b17d92efab575d283822f/tree_sitter-0.25.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d77605e0d353ba3fe5627e5490f0fbfe44141bafa4478d88ef7954a61a848dae", size = 631370, upload-time = "2025-09-25T17:37:47.993Z" }, + { url = "https://files.pythonhosted.org/packages/d4/3c/87caaed663fabc35e18dc704cd0e9800a0ee2f22bd18b9cbe7c10799895d/tree_sitter-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:463c032bd02052d934daa5f45d183e0521ceb783c2548501cf034b0beba92c9b", size = 127157, upload-time = "2025-09-25T17:37:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/d5/23/f8467b408b7988aff4ea40946a4bd1a2c1a73d17156a9d039bbaff1e2ceb/tree_sitter-0.25.2-cp313-cp313-win_arm64.whl", hash = "sha256:b3f63a1796886249bd22c559a5944d64d05d43f2be72961624278eff0dcc5cb8", size = 113975, upload-time = "2025-09-25T17:37:49.922Z" }, + { url = "https://files.pythonhosted.org/packages/07/e3/d9526ba71dfbbe4eba5e51d89432b4b333a49a1e70712aa5590cd22fc74f/tree_sitter-0.25.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:65d3c931013ea798b502782acab986bbf47ba2c452610ab0776cf4a8ef150fc0", size = 146776, upload-time = "2025-09-25T17:37:50.898Z" }, + { url = "https://files.pythonhosted.org/packages/42/97/4bd4ad97f85a23011dd8a535534bb1035c4e0bac1234d58f438e15cff51f/tree_sitter-0.25.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bda059af9d621918efb813b22fb06b3fe00c3e94079c6143fcb2c565eb44cb87", size = 137732, upload-time = "2025-09-25T17:37:51.877Z" }, + { url = "https://files.pythonhosted.org/packages/b6/19/1e968aa0b1b567988ed522f836498a6a9529a74aab15f09dd9ac1e41f505/tree_sitter-0.25.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eac4e8e4c7060c75f395feec46421eb61212cb73998dbe004b7384724f3682ab", size = 609456, upload-time = "2025-09-25T17:37:52.925Z" }, + { url = "https://files.pythonhosted.org/packages/48/b6/cf08f4f20f4c9094006ef8828555484e842fc468827ad6e56011ab668dbd/tree_sitter-0.25.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:260586381b23be33b6191a07cea3d44ecbd6c01aa4c6b027a0439145fcbc3358", size = 636772, upload-time = "2025-09-25T17:37:54.647Z" }, + { url = "https://files.pythonhosted.org/packages/57/e2/d42d55bf56360987c32bc7b16adb06744e425670b823fb8a5786a1cea991/tree_sitter-0.25.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7d2ee1acbacebe50ba0f85fff1bc05e65d877958f00880f49f9b2af38dce1af0", size = 631522, upload-time = "2025-09-25T17:37:55.833Z" }, + { url = "https://files.pythonhosted.org/packages/03/87/af9604ebe275a9345d88c3ace0cf2a1341aa3f8ef49dd9fc11662132df8a/tree_sitter-0.25.2-cp314-cp314-win_amd64.whl", hash = "sha256:4973b718fcadfb04e59e746abfbb0288694159c6aeecd2add59320c03368c721", size = 130864, upload-time = "2025-09-25T17:37:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/a6/6e/e64621037357acb83d912276ffd30a859ef117f9c680f2e3cb955f47c680/tree_sitter-0.25.2-cp314-cp314-win_arm64.whl", hash = "sha256:b8d4429954a3beb3e844e2872610d2a4800ba4eb42bb1990c6a4b1949b18459f", size = 117470, upload-time = "2025-09-25T17:37:58.431Z" }, +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/98/21/e952c3180f0fd83d09cee9e0bc29f67827c659cee45077ae06eb7d813cfc/tree-sitter-0.25.0.tar.gz", hash = "sha256:15c88775cf24db06677bafe62df058a6457d8a6dde67baa48dd3723b905e79a6", size = 177740, upload-time = "2025-07-20T13:17:48.886Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/f5/ba8cd08d717277551ade8537d3aa2a94b907c6c6e0fbcf4e4d8b1c747fa3/tree_sitter_c-0.24.1.tar.gz", hash = "sha256:7d2d0cda0b8dda428c81440c1e94367f9f13548eedca3f49768bde66b1422ad6", size = 228014, upload-time = "2025-05-24T17:32:58.384Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/75/36a4726a09aeb0477ca4a45aba4abf9705642b871539005ca91ddd68faa3/tree_sitter-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d9efacce0140ad74f97e027fb4ae693debff05f6246f3e024937f9500a0e874a", size = 147016, upload-time = "2025-07-20T13:17:33.921Z" }, - { url = "https://files.pythonhosted.org/packages/ff/5e/a549a21e459de94056cf48ca5e10e3774bc9b0460ffb3aec469a5f6001c0/tree_sitter-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82b4a5535107d2b8feee085edcafa89858faa4e1a98e94cfe1740c0ca8c28d84", size = 140832, upload-time = "2025-07-20T13:17:34.82Z" }, - { url = "https://files.pythonhosted.org/packages/d7/ed/7cc29a309e5f5cc209902c93589d29a4faeb656c7eecc1abd86842633b8f/tree_sitter-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c613372545490dfba3b3e7d934fda1156e3d16b27c0335c65a92f2b4fa6af5da", size = 617875, upload-time = "2025-07-20T13:17:35.693Z" }, - { url = "https://files.pythonhosted.org/packages/76/fc/43a61a35f021429d905ce272be9a9ea6dad6fe2c849782c53bd083a935cf/tree_sitter-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:241a90c815a354594d3147012ce470cfc797695ab768e29198815e147ef3c165", size = 635857, upload-time = "2025-07-20T13:17:36.676Z" }, - { url = "https://files.pythonhosted.org/packages/9b/28/c9236c505e35b3aedb3c941a359a708c173cbedab8d843fec729bab81ed9/tree_sitter-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f0b01b5068f1888af223021ba461480df28c76f39893c8113aae2154a2b81fd", size = 632649, upload-time = "2025-07-20T13:17:37.56Z" }, - { url = "https://files.pythonhosted.org/packages/13/d3/5dff82a02646619545c4e7c9b9ec87bc126f1937760228fcf2e91f5079c7/tree_sitter-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:1807bd1dae1f50721d65b270e6ffa85de84234ae39f98f4da702db56c2627e23", size = 126785, upload-time = "2025-07-20T13:17:38.488Z" }, - { url = "https://files.pythonhosted.org/packages/71/61/4fffd405569d9c1551906766825da75a2d8f1c075be8994542d5d7ba7768/tree_sitter-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:7848be6aeab5c1d62d649506d80d0e463727cb1bb55f423e88bf317db0be8d67", size = 113615, upload-time = "2025-07-20T13:17:39.965Z" }, - { url = "https://files.pythonhosted.org/packages/7a/fd/7578088dddec9b89b60d8dfea1901f3a5dff61b66d3c637c309b6209c8db/tree_sitter-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:689a19d51103f727a545ec9ba9cd377267445859838c38ec55d159dc57e82e8a", size = 147009, upload-time = "2025-07-20T13:17:41.038Z" }, - { url = "https://files.pythonhosted.org/packages/7a/3e/6e3dac18c119acf738174a19ce91d89b34f6ad1ca1c5dd57b245ae15c935/tree_sitter-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:86288b218ef958dcafe40030d6d70c99baffaf808bd81b49de160f9724fc0ba4", size = 140828, upload-time = "2025-07-20T13:17:42.023Z" }, - { url = "https://files.pythonhosted.org/packages/fa/21/94d26f5d488d85bf5201280f82ce7de374ce30ed5d5469e57623d64ead9a/tree_sitter-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5241610319177ee2f68b8e719bf1e1b309155e126d9cd567ff84f20878d7e5d0", size = 618600, upload-time = "2025-07-20T13:17:43.203Z" }, - { url = "https://files.pythonhosted.org/packages/67/74/e852445871c0a82bfa5e3d16541e0ce6775ef458d3a8f03ab3737c661832/tree_sitter-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ae1553d652a54926f80dc0a42fba07db110bb1a3ebaf47d1c4c64f8d44dd8207", size = 636691, upload-time = "2025-07-20T13:17:44.382Z" }, - { url = "https://files.pythonhosted.org/packages/87/67/759afe10e0018aa3ca3269df0257228b2df120e3956171a3667b133f3100/tree_sitter-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ccac581551407a73a519b872553973598b69d3d237ffaf32408fb38ecb775484", size = 632730, upload-time = "2025-07-20T13:17:45.687Z" }, - { url = "https://files.pythonhosted.org/packages/8d/42/24a80dafdb32f1f7d16e3236f2ba8a2bc7b0e5c2a19c7b45f874f0980e90/tree_sitter-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:d58e912869514ebb441b15c22a13a9c78f1b69be15f6a42b1d18e3f790e5d6ba", size = 126779, upload-time = "2025-07-20T13:17:46.943Z" }, - { url = "https://files.pythonhosted.org/packages/6f/2e/6af369e9d6deab9baaa60e2fa91acf82a68c63d835a2fe4f4265674ecc53/tree_sitter-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:a1b8302161fa8da52cfafcd7575fa7d5806a9608a0b51c7a1fe45bfe70b62d46", size = 113623, upload-time = "2025-07-20T13:17:47.718Z" }, + { url = "https://files.pythonhosted.org/packages/15/c7/c817be36306e457c2d36cc324789046390d9d8c555c38772429ffdb7d361/tree_sitter_c-0.24.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9c06ac26a1efdcc8b26a8a6970fbc6997c4071857359e5837d4c42892d45fe1e", size = 80940, upload-time = "2025-05-24T17:32:49.967Z" }, + { url = "https://files.pythonhosted.org/packages/7a/42/283909467290b24fdbc29bb32ee20e409a19a55002b43175d66d091ca1a4/tree_sitter_c-0.24.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:942bcd7cbecd810dcf7ca6f8f834391ebf0771a89479646d891ba4ca2fdfdc88", size = 86304, upload-time = "2025-05-24T17:32:51.271Z" }, + { url = "https://files.pythonhosted.org/packages/94/53/fb4f61d4e5f15ec3da85774a4df8e58d3b5b73036cf167f0203b4dd9d158/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a74cfd7a11ca5a961fafd4d751892ee65acae667d2818968a6f079397d8d28c", size = 109996, upload-time = "2025-05-24T17:32:52.119Z" }, + { url = "https://files.pythonhosted.org/packages/5e/e8/fc541d34ee81c386c5453c2596c1763e8e9cd7cb0725f39d7dfa2276afa4/tree_sitter_c-0.24.1-cp310-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6a807705a3978911dc7ee26a7ad36dcfacb6adfc13c190d496660ec9bd66707", size = 98137, upload-time = "2025-05-24T17:32:53.361Z" }, + { url = "https://files.pythonhosted.org/packages/32/c6/d0563319cae0d5b5780a92e2806074b24afea2a07aa4c10599b899bda3ec/tree_sitter_c-0.24.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:789781afcb710df34144f7e2a20cd80e325114b9119e3956c6bd1dd2d365df98", size = 94148, upload-time = "2025-05-24T17:32:54.855Z" }, + { url = "https://files.pythonhosted.org/packages/50/5a/6361df7f3fa2310c53a0d26b4702a261c332da16fa9d801e381e3a86e25f/tree_sitter_c-0.24.1-cp310-abi3-win_amd64.whl", hash = "sha256:290bff0f9c79c966496ebae45042f77543e6e4aea725f40587a8611d566231a8", size = 84703, upload-time = "2025-05-24T17:32:56.084Z" }, + { url = "https://files.pythonhosted.org/packages/22/6a/210a302e8025ac492cbaea58d3720d66b7d8034c5d747ac5e4d2d235aa25/tree_sitter_c-0.24.1-cp310-abi3-win_arm64.whl", hash = "sha256:d46bbda06f838c2dcb91daf767813671fd366b49ad84ff37db702129267b46e1", size = 82715, upload-time = "2025-05-24T17:32:57.248Z" }, ] [[package]] @@ -4113,6 +4352,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/ac/2615b858c9fc6c2f5458c6375c501392ef45c486e576985393521ca50971/tree_sitter_lua-0.4.1-cp310-abi3-win_arm64.whl", hash = "sha256:081577e4ca58f3b4f1856794f3e2f5a0955476b68a2a50baf85c9bb05b932738", size = 22752, upload-time = "2025-12-31T12:50:38.117Z" }, ] +[[package]] +name = "tree-sitter-php" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/c8/1a499038cb4036bea1d560ffbc807a6fb940261aa22296bd49a62ed8bcba/tree_sitter_php-0.24.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:d56e2dcf025450f84a2cdbf4b18a09e6cb88b92e9e6858e63de3d4133ab2e43e", size = 219550, upload-time = "2025-08-16T22:14:30.212Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5e/b52f2599acb29f6899470f7137d3d491c752b88df3950fb7408aea57ddca/tree_sitter_php-0.24.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:29759c67d4c27a68c227ed82c0b7e4699617b1bd23757d50c081f81a12b4f80d", size = 229632, upload-time = "2025-08-16T22:14:31.85Z" }, + { url = "https://files.pythonhosted.org/packages/6b/58/ca290da45380bd6ba7c6b0b98cc5fc30325c32c7f14f0c93196a451b19c4/tree_sitter_php-0.24.1-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94b89832ac09f078eed2acd88598838bc51012224cbcebb916dbb6a37e74357e", size = 325351, upload-time = "2025-08-16T22:14:33Z" }, + { url = "https://files.pythonhosted.org/packages/9a/c6/fd863a7a779d0ab67688939eba0e08bff7b1ffe731288d3d3610df21217b/tree_sitter_php-0.24.1-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7a1404a30f2972498ace040b0029738b8dac45d0a12932ccb8b605eb94bafbe4", size = 313021, upload-time = "2025-08-16T22:14:34.394Z" }, + { url = "https://files.pythonhosted.org/packages/48/ed/aace12f30c4f5474a9ad0e9da85c060174e3764342c9860974bb0feb02fc/tree_sitter_php-0.24.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3e96f61462a960c78e5389c7ba6c16c25e66b465c763b8e63ad66423326c2fa7", size = 305905, upload-time = "2025-08-16T22:14:35.846Z" }, + { url = "https://files.pythonhosted.org/packages/4e/c4/6c690c33b1ae9cae9505c0a2896f046fda174d72c46bdafce6aab3b2f2e7/tree_sitter_php-0.24.1-cp310-abi3-win_amd64.whl", hash = "sha256:1a1b65b72a8410d421f914ee13d38fd546a94d01cb834f69b27c78ba7589a5b5", size = 208014, upload-time = "2025-08-16T22:14:37.206Z" }, + { url = "https://files.pythonhosted.org/packages/7b/69/54c670d725c092b89e76ca6984582b6a768b128ac1859ed48141b124da1d/tree_sitter_php-0.24.1-cp310-abi3-win_arm64.whl", hash = "sha256:56a70c5ef1bddb15f220a479b2f2edf3042c764b6c443921fbd7ca9174d664e3", size = 206033, upload-time = "2025-08-16T22:14:38.632Z" }, +] + [[package]] name = "tree-sitter-python" version = "0.25.0" @@ -4176,14 +4429,17 @@ wheels = [ [[package]] name = "triton" -version = "3.6.0" +version = "3.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, - { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" }, - { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" }, - { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" }, - { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" }, + { url = "https://files.pythonhosted.org/packages/94/fa/f856e24deb462d5f18bd4b5a746957862ab9b6ee5834bda60605ec348366/triton-3.7.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9497f2e696ee368862a181a90b2dcc03ca978cc4f602abd67c7d81022a6988e1", size = 184692359, upload-time = "2026-06-17T20:03:48.288Z" }, + { url = "https://files.pythonhosted.org/packages/c4/6f/fb96d15db6f36d6eae4cafb998c2e0353bf59d7c4ea1662d7497f269134a/triton-3.7.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e40869937a68206ec70d7f25bb7ec6433cb083f9135e1f36dbd318dc449a728", size = 197719725, upload-time = "2026-06-17T19:53:20.419Z" }, + { url = "https://files.pythonhosted.org/packages/00/42/c5089d4d9327fcd1e862c599cc2927f39418f84dd11a84cb2ccff9d4787a/triton-3.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cdbfc09d9ec58bc5e68321525653220de7515c199e7a8097a97c85e62b52cd0a", size = 184694629, upload-time = "2026-06-17T20:03:53.444Z" }, + { url = "https://files.pythonhosted.org/packages/07/42/2c3ac59253ae8892b6f307875263dd23dc875cdf732d3aea40d6d41fb7cb/triton-3.7.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:58c0e131da05134a2a4788ccbcc0c1105cf0f54c8e98f19e34cd465396dc15eb", size = 197729241, upload-time = "2026-06-17T19:53:27.801Z" }, + { url = "https://files.pythonhosted.org/packages/40/71/e01aa7ad573883ed9456f130226babdec70b005e098c4d6226a6238e761b/triton-3.7.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe4ea396a06171f1f1f58cbd39c70b09294398f7dd7c620939bab54ad6f934fa", size = 184705764, upload-time = "2026-06-17T20:03:59.064Z" }, + { url = "https://files.pythonhosted.org/packages/a4/09/5683146fda6a2b569deb78ccfd8fbfea8bfe55f726b081c0a6bb18dd6f28/triton-3.7.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2020153b08280415ec0da6607834e79166442147e78e144df06b508c75b186d2", size = 197729537, upload-time = "2026-06-17T19:53:35.516Z" }, + { url = "https://files.pythonhosted.org/packages/e9/f8/448220c3092019f9fdfab39ec47985968181d67da34b44f6a7f6280a5cbb/triton-3.7.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c58e4c61f0c73b5dba3b5d19b4a7093c32f90dc18b2a7f121a7c16ccd31107b7", size = 184814760, upload-time = "2026-06-17T20:04:04.984Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ac/229b7d4589d2e5937310e72c6d46e89599d16a4a12b479ffa1499fee8eb8/triton-3.7.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10ba85fa2cca4a2fbdeb36bf1cb082f2c252bda55bf9fccd74f65ec5bc647e68", size = 197824404, upload-time = "2026-06-17T19:53:42.772Z" }, ] [[package]] @@ -4285,13 +4541,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" }, ] +[[package]] +name = "uncalled-for" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/82/345cc927f7fbdae6065e7768759932fcc827fc20b29b45dfbafa2f1f7da4/uncalled_for-0.3.2.tar.gz", hash = "sha256:89f5dbcd71e2b8f47c030b1fa302e6cce2ec795d1ac565eeb6525c5fe55cb8a2", size = 50032, upload-time = "2026-05-06T13:38:25.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/25/2c87754f3a9e692315f7b811244090e68f362979fc8886b3fbd2985a1d8c/uncalled_for-0.3.2-py3-none-any.whl", hash = "sha256:0ff60b142c7d1f8070bde9d42afaa70aedc77dcc10998c227687e9c15713418e", size = 11444, upload-time = "2026-05-06T13:38:24.025Z" }, +] + [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, + { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" }, ] [[package]] @@ -4354,6 +4619,92 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, ] +[[package]] +name = "watchfiles" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/41/5e1a4bb12aac5f1493fa1bdc11154eca3b258ca4eba65d39c473fe19d8e9/watchfiles-1.2.0.tar.gz", hash = "sha256:c995fba777f1ea992f090f9236e9284cf7a5d1a0130dd5a3d82c598cacd76838", size = 108252, upload-time = "2026-05-18T04:32:04.251Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/2f/e42c992d2afda3108ea1c02acecc991b9f31d05c14adc2a7cee9ee211fc4/watchfiles-1.2.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:bc13eb17538be00c874699dc0abe4ee2bc8d50bb1166a6b9e175ef3fd7eb8f26", size = 400115, upload-time = "2026-05-18T04:32:02.06Z" }, + { url = "https://files.pythonhosted.org/packages/5f/8f/6af2ea19065c91d8b0ea3516fdfc8c0d349f407e8e9fbf4e5a17360de8ad/watchfiles-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d95ddc1eb6914154253d239089900813f6a767e174b8e6a50e7fdacb7e4236c", size = 393659, upload-time = "2026-05-18T04:30:50.951Z" }, + { url = "https://files.pythonhosted.org/packages/13/01/b32a967c56fb3e3e5be3db52c3d3b87fa4513aa367d8ed1ad96d42952e5f/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f70d8b291ef6e88d19b1f297a6905ddb978888d9272b0d05e6f53309856bcfc", size = 453207, upload-time = "2026-05-18T04:31:04.231Z" }, + { url = "https://files.pythonhosted.org/packages/04/98/97557a812180338cb1abd32e1cffcc4588f59b5f23e0cb006b2ba95ba64a/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56d8641cf834c2836922899105bd3ce3d0dfc69291d52edf0b4d0436829b34c0", size = 459273, upload-time = "2026-05-18T04:31:50.377Z" }, + { url = "https://files.pythonhosted.org/packages/e8/a8/b4b08dcb7653b8087c6586f7ce649505900e866bbcfe40dc9587af02e686/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2581a94056e55d7d0a31a823ea92bf73749c489ca2285bfdc0fbe6b2bb49d50c", size = 489927, upload-time = "2026-05-18T04:31:42.485Z" }, + { url = "https://files.pythonhosted.org/packages/50/94/3dceea03545d2e5ddfd839f0ddd5e1cecbf1697b5a428d5ba11cef6af95d/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41bc1199f7523b3f82843c88cbb979180c949caef0342cf90968f178e5d49b01", size = 570476, upload-time = "2026-05-18T04:31:03.071Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f2/d39a5450c3532092b91f81d274360e613c2371bc874a89c7a1a3c5e8d138/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7571e4464cb6e434958f867f7f730b8ab0b75e3f8e5eac0499168486ab3c33a8", size = 465650, upload-time = "2026-05-18T04:30:12.701Z" }, + { url = "https://files.pythonhosted.org/packages/22/24/ed72f68cbc1333ca9b9f2200aa048bb6658ae41709bc1caad4310f4bdffd/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53a384f76b631c3ae5334ce6a52f0baa3a911eb94a4eac7f160079868b716d5", size = 456398, upload-time = "2026-05-18T04:30:13.784Z" }, + { url = "https://files.pythonhosted.org/packages/0d/64/982ef4a4e5bab5b6e5b6becc8cd5e732f6130a78b855f0abec6439a9a135/watchfiles-1.2.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:d20029a60a71a052a24c4db7673bc4de39ab89adbaccbfb5d67987c5d73f424d", size = 465140, upload-time = "2026-05-18T04:31:52.111Z" }, + { url = "https://files.pythonhosted.org/packages/a0/0c/95282abf4ed680b6096010bcfc30c5fa7a041fc5aa5a2ad17a2cc6c75bba/watchfiles-1.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2cb93af48550faf1cea04c303107c8b75833de7013e57ce27d3b8d21d8d0f58c", size = 630259, upload-time = "2026-05-18T04:31:25.676Z" }, + { url = "https://files.pythonhosted.org/packages/30/45/607c1de1530c4bdcf2cf1d1ecc2505ddba5d96bd43ba9f2b0e79876f850f/watchfiles-1.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2995c176de7692b86a2e4c58d9ec718f753150a979cb4a754e2b4ffa38e70906", size = 659859, upload-time = "2026-05-18T04:30:24.333Z" }, + { url = "https://files.pythonhosted.org/packages/fa/08/d9e2e0f9e8e6791d33aefc694ad7eefa7f901f63caff84a81ded38692f9c/watchfiles-1.2.0-cp312-cp312-win32.whl", hash = "sha256:7a2cffd17d27d2ecbb310c2b1d8174f222a5495b1a721894afa88ec11e25b898", size = 275480, upload-time = "2026-05-18T04:30:31.307Z" }, + { url = "https://files.pythonhosted.org/packages/1c/e6/9d42569c0102645cc8cea5d8c7d8a1e9d4ada2cb7f05f75e554b8aa2202a/watchfiles-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:f155b3a1b2a5fc89cdc70d47ee5d54e3b75e88efa34982028a35daef9ba00379", size = 288718, upload-time = "2026-05-18T04:32:10.745Z" }, + { url = "https://files.pythonhosted.org/packages/0a/26/88e0dc6ee3898169d7fa22bb6a69cabf2502d2ee25cb8c876d1262d204f8/watchfiles-1.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:8fa585ede612ee9f9e91b18bebf9ba11b9ae29a4e3a0d0cf6fca3e382133f0d5", size = 281026, upload-time = "2026-05-18T04:30:22.23Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4d/70a7feced9f87e2ff26dba42667290f41694fc64646c67261fbb8cab5d5c/watchfiles-1.2.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:01ea8d66f0693b9b60a6541c8d10263091ca9a9060d242f3c1f3143f9aad2c98", size = 399730, upload-time = "2026-05-18T04:31:38.162Z" }, + { url = "https://files.pythonhosted.org/packages/31/3a/0da302f2307aee316922806ebd5726c542cbd787c938271cf14a074c7daf/watchfiles-1.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ba0480b9a74af058f43b337e937a451e109295c420916d68ad24e3dc02f5e44", size = 392842, upload-time = "2026-05-18T04:30:27.051Z" }, + { url = "https://files.pythonhosted.org/packages/db/ef/d5bdb705c224dbc256aa0c1ec47bf4e61ec52558f2afb44a71a1fe4d7015/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f34e26a19f91f710c08e0183429f0d1d15df734e6bc78c31e77b9ea9c433658", size = 452989, upload-time = "2026-05-18T04:31:11.945Z" }, + { url = "https://files.pythonhosted.org/packages/71/29/5495f2c1661949ef7a35e4d71111d129cfe7606414a26887a919d0a55406/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b4e77f6a55f858504069abd35d336a637555c09bca453dde1ee1e5ada8a6a1fb", size = 458978, upload-time = "2026-05-18T04:30:52.606Z" }, + { url = "https://files.pythonhosted.org/packages/d5/8c/7f9c07c433811c2fffd93e13fdfb7135de9aab5f2ae41be08960fa0047dc/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cb4d80e212f116474a545c21c912b445f16bb0cef9e6a73a498164223e14e2f", size = 490248, upload-time = "2026-05-18T04:31:36.003Z" }, + { url = "https://files.pythonhosted.org/packages/3c/11/d93632febc52fbc21be90231bb7c17fd5387f46c9076fd40a5f9c2ae6910/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b974946a10af379d425e2eef5b62f5c6ebeaccf91d45eaad6f5b27ecd4f91aa0", size = 571847, upload-time = "2026-05-18T04:31:10.862Z" }, + { url = "https://files.pythonhosted.org/packages/55/b4/383173e73aabb07ad1d9c7aa859d95437ac46a6d6a1e11005facda0c9d19/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86bc13c25a8d1fcd70b51d0ce7c9b65e90de5666fcbfd3e34957cc73ee19aeb5", size = 465974, upload-time = "2026-05-18T04:30:17.006Z" }, + { url = "https://files.pythonhosted.org/packages/a7/6c/89b1a230a78f57c52dd8893adb1f92f94411721b6ec12596c56d98c74356/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca148d73dea36c9763aaa351e4d7a51780ec1584217c45276f4fe8239c768b71", size = 454782, upload-time = "2026-05-18T04:30:35.656Z" }, + { url = "https://files.pythonhosted.org/packages/24/62/1732118367cfff0a9fce3bf62ff4bfded09ef5df21d9d446b858b3f70a96/watchfiles-1.2.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:c525543d91961c6955b2636b308569e84a1d1c5f5f2932041ab9ef46422f43e3", size = 465182, upload-time = "2026-05-18T04:30:20.846Z" }, + { url = "https://files.pythonhosted.org/packages/28/96/716f7e5f51339bf22963f3345f9f27d7f3b30e2eadc597e257c881dd3c53/watchfiles-1.2.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a204794696ffb8f9b10fba6f7cb5216d42f3b2b71860ccac6b6e42f5f10973b0", size = 629841, upload-time = "2026-05-18T04:31:05.397Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fe/c40783950fd771ccf66ab3ec2722d188a9af1c7f96c6e811f36e40c6e03f/watchfiles-1.2.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:10d86db20695afe7997ac9e1717637d6714a8d0220458c33f3d2061f54cec427", size = 658028, upload-time = "2026-05-18T04:31:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/71/72/4508db1856d1d87fcbb3b63f4839bab1b5682cb0e8d224d122263c09654a/watchfiles-1.2.0-cp313-cp313-win32.whl", hash = "sha256:eb283ee99e21ad6443c8cdb06ac5b34b1308c329cbdf03fa02b445363714c799", size = 275183, upload-time = "2026-05-18T04:30:59.57Z" }, + { url = "https://files.pythonhosted.org/packages/f9/36/14b76ca57652e5cc5fd1c11f32a261292c08a0d19a00351013c2549cbfb2/watchfiles-1.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:a0f27f01bee51861392bb6b7c4fdb290b27d1eb194e9e28788d68102a0e898d9", size = 288059, upload-time = "2026-05-18T04:32:07.937Z" }, + { url = "https://files.pythonhosted.org/packages/1b/8d/0a85e395398d8d20fadfe5c5d32c726eee17a519e78fb356f2cf7531bffe/watchfiles-1.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:3651aa7058595e9cfb75d35dd5ada2bf9f48a5b8a0f3562821d3e210c507e077", size = 280186, upload-time = "2026-05-18T04:31:54.484Z" }, + { url = "https://files.pythonhosted.org/packages/37/68/36db056f1fdcc5f07302f56e631774d6835bcd6fa3ace402304621d5f9e5/watchfiles-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:faea288b6f0ab1902ef08f4ca6de005dccf856c4e0c4f21b8c5fce02d90a1b08", size = 399031, upload-time = "2026-05-18T04:30:44.576Z" }, + { url = "https://files.pythonhosted.org/packages/c1/64/01a9d6f66a82a5c101ce939274106cc72759d62427e153f01edd2b9f87c2/watchfiles-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01859b11fd9fbca670f4d5da00fbac282cfea9bd67a2125d8b2833a3b5617ea9", size = 391205, upload-time = "2026-05-18T04:30:25.413Z" }, + { url = "https://files.pythonhosted.org/packages/84/2c/0a44fe058cb4bb7b8ede6b6670698bbb7c0400740e378d00022189b7b31d/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fff610d7bb2256a317bb1e96f0d7862c7aa8076733ee5df0fd41bbe76a24a4f4", size = 451892, upload-time = "2026-05-18T04:32:14.005Z" }, + { url = "https://files.pythonhosted.org/packages/67/a1/351e0d56cd35e6488b5c8b4fb11a809a5bc923e8fe8fed9faf8920be0c89/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b141a4891c995a039cd89e9a49e62df1dc8a559a5d1a6e4c7106d16c12777a55", size = 458867, upload-time = "2026-05-18T04:31:22.279Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7d/9d09605187f1b838998624049fcf8bf47b73c1a3b76901fcac1782f62277/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f22943b7770483f6ea0721c6b11d022947a98eb0acae14694de034f4d0d38925", size = 490217, upload-time = "2026-05-18T04:31:43.657Z" }, + { url = "https://files.pythonhosted.org/packages/60/5d/a17a16eccb182f04188cd308ec24b1a71a9b5c4e7098269cf35d9fa56d02/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1bc6195825b7dcd217968bb1f801a60fd4c16e8eeab5bedc7fe917d7d5995ab4", size = 571458, upload-time = "2026-05-18T04:32:11.875Z" }, + { url = "https://files.pythonhosted.org/packages/d3/3d/4dd457062083ab1938e5dfd45032eb425cee2ac817287ca8ff4356183e5d/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4a4b147f5dca2a5d325a06a832fb43f345751adfbc63204aec30e0d9ca965a2", size = 464707, upload-time = "2026-05-18T04:30:43.492Z" }, + { url = "https://files.pythonhosted.org/packages/c6/71/ea8c57b128f5383de74d0c7d2d9c57ad7c9a65a930c451bd25d524b295b7/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4543579a9bdb0c9560039b4ffddbdb39545707659fbc430ce4c10f3f68d557f9", size = 454663, upload-time = "2026-05-18T04:30:16.061Z" }, + { url = "https://files.pythonhosted.org/packages/53/fd/2e812bf938406d7db351f0703ddd3fc6c061cf30d96153a77bc79a943a44/watchfiles-1.2.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:20aa0e708b920bde876a4aa82dc7dd6ebea228a63a67cda6632c2fc87b787efa", size = 463537, upload-time = "2026-05-18T04:31:44.9Z" }, + { url = "https://files.pythonhosted.org/packages/86/56/d17a7f1dd1bc3035f1072694a551301272f1739c2d8e319c927cb9e29b38/watchfiles-1.2.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:d413349d565dab74297f2a63e84a097936be69bf8f3b3801f27f380e32040f44", size = 629194, upload-time = "2026-05-18T04:31:14.141Z" }, + { url = "https://files.pythonhosted.org/packages/be/06/f1ff66bf5cae50aa4062779a0ecd0bbaf15e466195719074078947d9a17d/watchfiles-1.2.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f28b2725eb8cce327b9b3ab02415c853011dc55c95832fe90de6bc56f5315f72", size = 656194, upload-time = "2026-05-18T04:31:47.14Z" }, + { url = "https://files.pythonhosted.org/packages/e7/54/a9c7ea9a82a4ac65e7004c0a03920b5cdd2f9c3b678757d9cd425aa51d53/watchfiles-1.2.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:b8c8358484d5fa12ef34f05b7f4168eaf1932f408725ff6d023c33ec17bd79d4", size = 400205, upload-time = "2026-05-18T04:32:05.153Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5d/c9ab3534374a4a67450696905d6ef16a04405448b8dc52bd752ae50423d4/watchfiles-1.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f04b092229ad2c50126dd3c922c8822e51e605993764a33058d4a791ab42281", size = 392508, upload-time = "2026-05-18T04:30:54.849Z" }, + { url = "https://files.pythonhosted.org/packages/26/ca/1ad30103535cf0cecd7b993e8d50edc5351b1820e38f2d22e3df58962feb/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a7ce236284f002a156f70add88efe5c70879cccbb658be0822c54b1306fc09d", size = 452448, upload-time = "2026-05-18T04:30:53.727Z" }, + { url = "https://files.pythonhosted.org/packages/37/a1/ceee2cdf2afbd715fa07758d39c9859513eae411b23196f7fd039e5feedd/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b9909cc2b48468b575eefa944919e1fe8a36c5849d5c7c168f80a8c1db69398e", size = 459605, upload-time = "2026-05-18T04:30:23.312Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f6/421e30fd1cb3907a84ed92ab3f1983e37ba2dca015e9a894a048418417a2/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a37faaed405c67e28e6be45a1fa4f206ef5a2860f27c237db9fa30704c38242", size = 490757, upload-time = "2026-05-18T04:30:47.358Z" }, + { url = "https://files.pythonhosted.org/packages/41/b0/55ed1b97ed08be7bba6f9a541cac15f2a858e1d74d2b07b6da70a82aab00/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9649193aa27bd9ff2e80ff29bfaa93085496c7a3a377592823cc58b77ee88add", size = 568672, upload-time = "2026-05-18T04:30:38.915Z" }, + { url = "https://files.pythonhosted.org/packages/d1/cf/d8ae8a80dd7bafab395ea7681c10237311bbf34d37704a8c744e7cf31fc7/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e4ff8e37f99cf1da89e255e07c9c4b37c214038c4283707bdec308cb1b0ea1f", size = 464197, upload-time = "2026-05-18T04:30:09.914Z" }, + { url = "https://files.pythonhosted.org/packages/7c/8a/3076c496ca8dafe0e8cd03fcebdfc47be4b1174b4e5b24ff6e396e6b3af2/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:054dc20fd2e3132b4c3883b4a00d72fd6e1f56fdaf89fccd12e8057d74cd74d7", size = 453181, upload-time = "2026-05-18T04:30:14.829Z" }, + { url = "https://files.pythonhosted.org/packages/e5/10/9745e17c98e7b8a86454df0a3c7b5686bd650383f1e9f26e4ebcbd6cc0c0/watchfiles-1.2.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:e140ed30ebde76796b686e67c182cff10ea2fbab186fafd1560f74bb5a473a6e", size = 465109, upload-time = "2026-05-18T04:30:28.123Z" }, + { url = "https://files.pythonhosted.org/packages/8f/95/8ef4a95481d3e0cb52d62a06fa6e972e81424be2d9698b91a2fecca9904c/watchfiles-1.2.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:bb7e52ecf68ba46d22df23467b87cffeb2146908aa523ebfe803019618cfda06", size = 630653, upload-time = "2026-05-18T04:31:49.304Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e4/3b3bf36b0f829b50c6ebcb8d031583863c59f923d6a6af3d485e470d0fac/watchfiles-1.2.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:23282a321c8baf9b3a3c4afff673f9fe65eb7fdc2338d765ccad9d3d1916a5ba", size = 657838, upload-time = "2026-05-18T04:31:06.497Z" }, + { url = "https://files.pythonhosted.org/packages/21/b1/6cbbb50c1f3002ab568777d44aa21206dfb8807a840990c4037523b51812/watchfiles-1.2.0-cp314-cp314-win32.whl", hash = "sha256:c0db965c5f79aa49fe672d297cf1febc5ad149b658594944f49a54a2b96270a7", size = 275108, upload-time = "2026-05-18T04:30:06.891Z" }, + { url = "https://files.pythonhosted.org/packages/92/45/190ce6db8dcb4536682cf75d3889ff1a27182a58cb519d343cb6d9ea63d8/watchfiles-1.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:71283b39fd17e5408eb123bd37aeecfd9d54c81fc184421943208aadb879d103", size = 288441, upload-time = "2026-05-18T04:32:12.901Z" }, + { url = "https://files.pythonhosted.org/packages/74/0d/3eae1c2313ab08378431d907c3f8095ecca00f3eda33111cf4f0f2591799/watchfiles-1.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:c5c19526f4e54a00f2666a6c0e9e40d582c09e865055ea7378bf0009aab857b3", size = 280684, upload-time = "2026-05-18T04:31:26.902Z" }, + { url = "https://files.pythonhosted.org/packages/b1/75/fb64e6c25d6b5ca636d03df34ffb1c6e9873303e76d27967e045f8df088f/watchfiles-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d73a585accffa5ae39c17264c36ec3166d2fad7000c780f5ef83b2722afb9dd2", size = 398857, upload-time = "2026-05-18T04:32:17.108Z" }, + { url = "https://files.pythonhosted.org/packages/73/4e/9f7adf01754cbf81843722ccfec169d8f26c69778281a302855cecd2ee08/watchfiles-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ae99b14c5f21e026e0e9d96f40e07d8570ebee6cafd9d8fc318354606daa7a28", size = 392413, upload-time = "2026-05-18T04:31:07.911Z" }, + { url = "https://files.pythonhosted.org/packages/47/c8/bec626bcc2d69f44b9acb24ce7d60ed7b16b73628eea747fcbd169d8edda/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4429f3b105524a10b72c3a819b091c495d2811d419c1e1e8df773a5a5974f831", size = 452409, upload-time = "2026-05-18T04:31:20.142Z" }, + { url = "https://files.pythonhosted.org/packages/00/b7/b6362068e81e7c556d155a34c35d40ac3ef42d747b06d7f6e5bf58e359c2/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43d818978d06062d9b22c4fab2ebe44cf5213d42dc8e62bda8c2760cfa2eeb33", size = 458827, upload-time = "2026-05-18T04:32:06.219Z" }, + { url = "https://files.pythonhosted.org/packages/67/f8/9a813fa42afb1e0b4625e75f0479826644d3ee8dc287e093799bc01f390c/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b9f732dc58b2dbe69e464ccf8fff7a03b0dd0be439da4c0720d3558527d3d6b4", size = 490104, upload-time = "2026-05-18T04:31:56.034Z" }, + { url = "https://files.pythonhosted.org/packages/2f/bf/27dfb6094ca4c9aad21298b5525b6c53cb36121ee454331d05161e58d130/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f200104103feb097de4cab8fe4f5dd18a2026934c7dea98c55a2f5fd6d5a33b", size = 571360, upload-time = "2026-05-18T04:31:57.133Z" }, + { url = "https://files.pythonhosted.org/packages/fb/39/44a096d67270ea93df91d33877dbe91fbda3aa4f8ec2edf799d93eda8736/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ac26eefbf4af1741247d6fb68b11c49a25b2f7413fbd318a83a12aaa9cf666", size = 464644, upload-time = "2026-05-18T04:30:57.33Z" }, + { url = "https://files.pythonhosted.org/packages/0e/80/c7472203bad6268e3ef1ad260739704847898938ad7ea8b63a5131f46b50/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c4997d4e4a55f0d02b6cde327322daf3a0400e5df6c6b15948994bf72497925", size = 454771, upload-time = "2026-05-18T04:30:48.736Z" }, + { url = "https://files.pythonhosted.org/packages/51/cf/3b10b268b4b7f0fc26e9debb5eef1998b515887840f444cd3ec80c688755/watchfiles-1.2.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4c887eba18b7945ac73067a8b4a66f21cd46c2539b2bc68588f7be6c7eb6d26b", size = 463494, upload-time = "2026-05-18T04:31:33.826Z" }, + { url = "https://files.pythonhosted.org/packages/3d/3e/a4302545cd589262a0dc7d140e86f7688eba3f9c72776c27f7e23b8864c4/watchfiles-1.2.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:3416ff151bb6b5a8d8d11664974fbef4d9305b9b2957839ab5a270468fd8df30", size = 629383, upload-time = "2026-05-18T04:31:15.596Z" }, + { url = "https://files.pythonhosted.org/packages/db/99/d5649df0a9a410d45b7c882304d0b790903ac9b6e8f2cfd12114e0c6b9f2/watchfiles-1.2.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:0e831a271c035d89789cffc386b6aa1375f39f1cd25eb7ca0997e4970d152fc5", size = 656093, upload-time = "2026-05-18T04:31:58.707Z" }, + { url = "https://files.pythonhosted.org/packages/92/b9/362702539275019a54dd2e94511b31a9b89c5f9e6a21966de7eb692549fc/watchfiles-1.2.0-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:37a6721cdf3f65dbb13aa9503510ccb4451603ac837e44d265d7992a597e1374", size = 400109, upload-time = "2026-05-18T04:31:16.879Z" }, + { url = "https://files.pythonhosted.org/packages/8f/75/71d5ba62db781e5587bded1d944c675374bc4aa37ff33d5018d98e8b6538/watchfiles-1.2.0-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:2b37d10b5a63bd4d87e18472d80fa525bd670586fae62e5dd580452764879b65", size = 392167, upload-time = "2026-05-18T04:31:28.058Z" }, + { url = "https://files.pythonhosted.org/packages/3c/01/c66dd95d0423fe30d31820e2d1d5bda773764131bbb6ac0cb1cf303ac328/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a105bc2283f67e8fbec74253ec2d94925de92ed72c0393f1206bf326b7b7b69", size = 452372, upload-time = "2026-05-18T04:31:00.836Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/2fe99557e72f85627c6a8eed50d889e8d101623e060a22ad75b875cb932d/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5327989a465505f05cfe06f04fa9d0c2fd5432bb243e10e6f012b1bdca3c8579", size = 459596, upload-time = "2026-05-18T04:31:34.96Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/d4acfa0023367428ed48351b3b9b267893037b6cadae55620c61c24bcfd4/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecb47f183a8025b2aa18b546725c3657e542112ae9c0613a2af79b4fa8d04ad7", size = 490869, upload-time = "2026-05-18T04:31:59.923Z" }, + { url = "https://files.pythonhosted.org/packages/a4/5f/3164cbdce06c9fb95c4f7b9e2f9760b5e2797af43a9ecc317ef42a23a278/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8520a4ab0e37f770afc34459c4f8f7019e153f9124dc101c15538365875d1ab2", size = 571641, upload-time = "2026-05-18T04:32:00.948Z" }, + { url = "https://files.pythonhosted.org/packages/41/e6/85d3731c55e65cd7690f3f803d24c139588aaf863e4bf2148fe7a7fa1a19/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71cd71740ed2c15211ebb237ced4e39a1cdf6f80566e5fe95428da1626f4fde6", size = 464444, upload-time = "2026-05-18T04:30:34.298Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7d/562641012b8b09872742c3b8adf9629ec479fd78f8d68ae4a0c13da8add6/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f88af53d6ddaf72179ef613ddc905e6f4785f712b49b80b3bef9f3525e6194b4", size = 453593, upload-time = "2026-05-18T04:31:23.464Z" }, + { url = "https://files.pythonhosted.org/packages/56/fe/cb8ef3d6f929d14158fdaaad9925985b7310abc9384dcd4d82dd0016fb59/watchfiles-1.2.0-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:cee9d5efd929efdac5f7e58f72b3376f676b64050a91c5b99a7094c5b2317488", size = 465096, upload-time = "2026-05-18T04:31:30.384Z" }, + { url = "https://files.pythonhosted.org/packages/25/91/80908e835e100527a9267147b08c0eee1fa6ab0ffec15edc04d1d44885f7/watchfiles-1.2.0-cp315-cp315-musllinux_1_1_aarch64.whl", hash = "sha256:b718bf356bbc15e559bd8ef41782b573b8ae0e3f177ab244b440568d7ea02cfb", size = 630638, upload-time = "2026-05-18T04:30:49.89Z" }, + { url = "https://files.pythonhosted.org/packages/46/4b/95ab2f256bb4af3cb2eb23b9317bda984ee6e0f11733a5c004a6c95b06e3/watchfiles-1.2.0-cp315-cp315-musllinux_1_1_x86_64.whl", hash = "sha256:922c0e019fe68b3ae392965a766b02a71ba1168c932cebc3733cd52c5fe5b377", size = 657684, upload-time = "2026-05-18T04:31:32.027Z" }, +] + [[package]] name = "wcmatch" version = "8.5.2" @@ -4466,10 +4817,11 @@ wheels = [ [[package]] name = "xai-sdk" -version = "1.5.0" +version = "1.12.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, + { name = "googleapis-common-protos" }, { name = "grpcio" }, { name = "opentelemetry-sdk" }, { name = "packaging" }, @@ -4477,9 +4829,9 @@ dependencies = [ { name = "pydantic" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/54/378c681c2c4512de78b49b65af1b7aaea0e0740dfa4a3389535e65422f70/xai_sdk-1.5.0.tar.gz", hash = "sha256:f88529d844f962fbb24464351a5962cc21a7d080e088bf656709ca7856270c8c", size = 349692, upload-time = "2025-12-05T03:27:36.93Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/21/b6683eeb797bac6dd46e55e9fbdb15c598b34fadd862120da4c09d1d01d0/xai_sdk-1.12.2.tar.gz", hash = "sha256:917d1887e6afdb49fff9f0dc6ae1bceede43a747365a406a3486af3e23509be4", size = 414440, upload-time = "2026-05-07T00:07:01.244Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/34/cd3681e5f786e37fb2dbb195fa3d5eb2a5e2be9b20d3abf01b40c9aba839/xai_sdk-1.5.0-py3-none-any.whl", hash = "sha256:4dc56bec2d67811c67030a50b42c4a1bc60f43947d4baaa840acf0aef246e816", size = 204314, upload-time = "2025-12-05T03:27:35.67Z" }, + { url = "https://files.pythonhosted.org/packages/99/b1/76da151f71a2dc9a65ef725ad4bac597a8d02da6618fb0474468a3355a34/xai_sdk-1.12.2-py3-none-any.whl", hash = "sha256:a3b4079f0629637009c5e3d58388f8c88591658dde31f202d5f5e8560fe6e120", size = 256654, upload-time = "2026-05-07T00:06:59.56Z" }, ] [[package]]