diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..04be9f36b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,23 @@ +.git +__pycache__ +*.py[oc] +.venv +.env +.envrc +.ruff_cache +.mypy_cache +.pytest_cache +.claude +.coverage +.DS_Store +build +dist +wheels +*.egg-info +docs +site +.github +.qdrant_code_embeddings +CLAUDE.md +AGENTS.md +PROJECT.md diff --git a/.env.example b/.env.example index dc518b501..9e449e031 100644 --- a/.env.example +++ b/.env.example @@ -60,6 +60,17 @@ # CYPHER_MODEL=gemini-2.5-flash # CYPHER_API_KEY=your-google-api-key +# Example 6: LiteLLM with custom provider +# ORCHESTRATOR_PROVIDER=litellm_proxy +# ORCHESTRATOR_MODEL=gpt-oss:120b +# ORCHESTRATOR_ENDPOINT=http://litellm:4000/v1 +# ORCHESTRATOR_API_KEY=sk-your-litellm-key + +# CYPHER_PROVIDER=litellm_proxy +# CYPHER_MODEL=openrouter/gpt-oss:120b +# CYPHER_ENDPOINT=http://litellm:4000/v1 +# CYPHER_API_KEY=sk-your-litellm-key + # Thinking budget for reasoning models (optional) # ORCHESTRATOR_THINKING_BUDGET=10000 # CYPHER_THINKING_BUDGET=5000 @@ -68,9 +79,20 @@ MEMGRAPH_HOST=localhost MEMGRAPH_PORT=7687 MEMGRAPH_HTTP_PORT=7444 +# Memgraph authentication credentials +# Leave MEMGRAPH_USERNAME empty (or omit it) if your Memgraph instance doesn't require authentication +# If authentication is enabled, provide both username and password +# Common defaults: username=neo4j, password=password (or your custom credentials) +MEMGRAPH_USERNAME= +MEMGRAPH_PASSWORD= LAB_PORT=3000 MEMGRAPH_BATCH_SIZE=1000 +# Qdrant settings +# Leave QDRANT_URL unset to use local file mode (only suitable below ~20k embeddings) +# For larger codebases, run the bundled docker-compose service and point at it: +# QDRANT_URL=http://localhost:6333 + # Repository settings TARGET_REPO_PATH=. diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..49ff9c712 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @vitali87 diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..9b47f9561 --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +eheva87@gmail.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index d5f29c336..163b5ae21 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -3,5 +3,4 @@ github: vitali87 buy_me_a_coffee: vitali87 -# Uncomment and add username when you set up Patreon: -# patreon: YOUR_USERNAME +patreon: vitali87 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 4b6f8f59b..70c1f1023 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,10 +1,7 @@ -blank_issues_enabled: false +blank_issues_enabled: true contact_links: - - name: 💬 Discussions - url: https://github.com/vitali87/code-graph-rag/discussions - about: Ask questions and discuss ideas with the community - name: 📚 Documentation - url: https://github.com/vitali87/code-graph-rag#readme + url: https://github.com/vitali87/code-graph-rag about: Read the documentation and setup guides - name: 🎓 MCP Server Setup url: https://github.com/vitali87/code-graph-rag/blob/main/docs/claude-code-setup.md diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index 47d83bcd9..154945398 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -6,7 +6,7 @@ body: - type: markdown attributes: value: | - Thank you for your question! For general discussions or open-ended questions, consider using [GitHub Discussions](https://github.com/vitali87/code-graph-rag/discussions). + Thank you for your question! Please keep questions concrete; for broader topics, prefer opening an [issue](https://github.com/vitali87/code-graph-rag/issues) with the `question` label. - type: textarea id: question diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 000000000..77c1a62b4 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,46 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| 0.0.x | :white_check_mark: | + +As the project is in early development (pre 1.0), only the latest release receives security updates. Please ensure you are running the most recent version before reporting a vulnerability. + +## Reporting a Vulnerability + +**Please do not report security vulnerabilities through public issues, pull requests, or any other public channels.** + +Instead, please use GitHub's private vulnerability reporting: go to the [Security tab](https://github.com/vitali87/code-graph-rag/security/advisories/new) and click **Report a vulnerability**. This keeps the details confidential between you and the maintainers until a fix is available. + +When reporting, please include: + +- A description of the vulnerability and its potential impact +- Steps to reproduce or a proof of concept +- The version(s) affected +- Any suggested fix, if available + +## What to Expect + +- **Acknowledgement** within 72 hours of your report +- **Status update** within 7 days with an initial assessment +- **Resolution target** of 30 days for confirmed vulnerabilities, though critical issues will be prioritized for faster turnaround + +If the vulnerability is accepted, we will work on a fix, coordinate disclosure with you, and credit you in the release notes (unless you prefer to remain anonymous). + +If the vulnerability is declined, we will provide a clear explanation of why. + +## Scope + +This policy applies to the `code-graph-rag` Python package and its official repository. Third party dependencies are outside the direct scope of this policy, though we use Dependabot to monitor and update them. + +## Security Measures in This Project + +- **Dependency scanning**: Dependabot is enabled for automated dependency updates +- **Secret scanning**: GitHub secret scanning is active on this repository +- **Branch protection**: The `main` branch requires pull request reviews before merging + +## Preferred Languages + +We accept security reports in English. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..a075b29ee --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "docker" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..8dc054f6c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,38 @@ +## Summary + + + +- + +## Type of Change + + + +- [ ] Bug fix +- [ ] New feature +- [ ] Performance improvement +- [ ] Refactoring (no functional changes) +- [ ] Documentation +- [ ] CI/CD or tooling +- [ ] Dependencies + +## Related Issues + + + +## Test Plan + + + +- [ ] Unit tests pass (`make test-parallel` or `uv run pytest -n auto -m "not integration"`) +- [ ] New tests added +- [ ] Integration tests pass (`make test-integration`, requires Docker) +- [ ] Manual testing (describe below) + +## Checklist + +- [ ] PR title follows [Conventional Commits](https://www.conventionalcommits.org/) format +- [ ] All pre-commit checks pass (`make pre-commit`) +- [ ] No hardcoded strings in non-config/non-constants files +- [ ] No `# type: ignore`, `cast()`, `Any`, or `object` type hints +- [ ] No new comments or docstrings (code should be self-documenting) diff --git a/.github/workflows/build-binaries.yml b/.github/workflows/build-binaries.yml index c548d82ea..315cfa45a 100644 --- a/.github/workflows/build-binaries.yml +++ b/.github/workflows/build-binaries.yml @@ -8,10 +8,14 @@ on: release: types: [created] +permissions: read-all + jobs: build: name: Build ${{ matrix.platform }}-${{ matrix.arch }} runs-on: ${{ matrix.os }} + permissions: + contents: write timeout-minutes: 30 strategy: fail-fast: false @@ -32,18 +36,18 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 submodules: recursive - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -66,7 +70,7 @@ jobs: fi - name: Upload binary artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: code-graph-rag-${{ matrix.platform }}-${{ matrix.arch }} path: dist/code-graph-rag-* @@ -75,7 +79,39 @@ jobs: - name: Upload to release if: startsWith(github.ref, 'refs/tags/v') - uses: softprops/action-gh-release@v2 + uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2 with: files: dist/code-graph-rag-* fail_on_unmatched_files: true + + sign-release: + name: Sign Release Artifacts + if: startsWith(github.ref, 'refs/tags/v') + needs: build + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + steps: + - name: Install cosign + uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0 + + - name: Download all artifacts + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + path: artifacts + merge-multiple: true + + - name: Sign artifacts + shell: bash + run: | + for f in artifacts/*; do + [ -f "$f" ] || continue + cosign sign-blob --yes --bundle "${f}.sigstore.json" "$f" + done + + - name: Upload signatures to release + uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2 + with: + files: artifacts/*.sigstore.json + fail_on_unmatched_files: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 43b0cc8db..a7742b439 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,8 @@ on: branches: [main, master, develop] workflow_dispatch: +permissions: read-all + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -19,16 +21,16 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -51,16 +53,16 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -75,7 +77,7 @@ jobs: test-unit: name: Unit Tests (${{ matrix.os }}) runs-on: ${{ matrix.os }} - timeout-minutes: 15 + timeout-minutes: 20 strategy: fail-fast: false matrix: @@ -83,19 +85,19 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive fetch-depth: 0 - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -103,13 +105,19 @@ jobs: run: | uv sync --extra treesitter-full --extra test --extra semantic --group dev - - name: Run unit tests (parallel) + - name: Run unit tests (parallel, with coverage) + if: matrix.os == 'macos-latest' run: | uv run pytest -n auto -m "not integration" --tb=short --cov=codebase_rag --cov-report=xml --cov-report=term + - name: Run unit tests (parallel, no coverage) + if: matrix.os != 'macos-latest' + run: | + uv run pytest -n auto -m "not integration" --tb=short + - name: Upload coverage to Codecov - if: always() && secrets.CODECOV_TOKEN != '' - uses: codecov/codecov-action@v4 + if: always() && matrix.os == 'macos-latest' + uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3 with: files: ./coverage.xml flags: unit-${{ matrix.os }} @@ -123,7 +131,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive fetch-depth: 0 @@ -133,7 +141,7 @@ jobs: docker run -d --name memgraph -p 7687:7687 memgraph/memgraph-platform:latest echo "Waiting for Memgraph to start..." for i in {1..30}; do - if docker exec memgraph echo "SELECT 1;" 2>/dev/null; then + if docker exec memgraph mgconsole --no-history -c "RETURN 1;" 2>/dev/null; then echo "Memgraph is ready!" break fi @@ -142,13 +150,13 @@ jobs: done - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 with: enable-cache: true cache-dependency-glob: "uv.lock" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.12" @@ -164,8 +172,8 @@ jobs: uv run pytest -m "integration" -v --tb=short --cov=codebase_rag --cov-report=xml --cov-report=term - name: Upload coverage to Codecov - if: always() && secrets.CODECOV_TOKEN != '' - uses: codecov/codecov-action@v4 + if: always() + uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3 with: files: ./coverage.xml flags: integration-ubuntu-latest @@ -187,7 +195,7 @@ jobs: steps: - name: Check PR title format - uses: amannn/action-semantic-pull-request@v5 + uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6.1.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index ecd3732f3..6c0c48ebf 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -10,6 +10,8 @@ on: - "*.py" - "pyproject.toml" +permissions: read-all + jobs: claude-review: name: AI Code Review @@ -26,13 +28,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 1 - name: Run Claude Code Review id: claude-review - uses: anthropics/claude-code-action@beta + uses: anthropics/claude-code-action@28f83620103c48a57093dcc2837eec89e036bb9f # beta with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 000000000..853e4df66 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,62 @@ +name: Docker Publish + +on: + push: + tags: + - 'v*' + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +permissions: read-all + +jobs: + build-and-push: + runs-on: ubuntu-latest + timeout-minutes: 60 + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 + + - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 + + - uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5 + id: meta + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 + id: push + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - uses: actions/attest-build-provenance@96b4a1ef7235a096b17240c259729fdd70c83d45 # v2 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..912c8eb02 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,58 @@ +name: Deploy Documentation + +on: + push: + branches: + - main + paths: + - "docs/**" + - "mkdocs.yml" + # (H) Rebuilds periodically so the GitHub repo widget (version, stars, forks) + # stays current; MkDocs Material fetches these stats at build time. + schedule: + - cron: "0 */6 * * *" + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: uv sync --group docs + + - name: Build site + run: uv run mkdocs build --strict + + - uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0 + with: + path: site + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4 diff --git a/.github/workflows/label-sync.yml b/.github/workflows/label-sync.yml index ec787447e..40cc0e2c0 100644 --- a/.github/workflows/label-sync.yml +++ b/.github/workflows/label-sync.yml @@ -9,9 +9,10 @@ on: - ".github/workflows/label-sync.yml" workflow_dispatch: schedule: - # Run weekly on Mondays at 00:00 UTC to ensure labels stay in sync - cron: "0 0 * * 1" +permissions: read-all + jobs: sync-labels: name: Sync Repository Labels @@ -22,10 +23,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Sync labels - uses: micnncim/action-label-syncer@v1 + uses: micnncim/action-label-syncer@3abd5ab72fda571e69fffd97bd4e0033dd5f495c # v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/osv-scanner.yml b/.github/workflows/osv-scanner.yml new file mode 100644 index 000000000..5ac2a0a24 --- /dev/null +++ b/.github/workflows/osv-scanner.yml @@ -0,0 +1,50 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# A sample workflow which sets up periodic OSV-Scanner scanning for vulnerabilities, +# in addition to a PR check which fails if new vulnerabilities are introduced. +# +# For more examples and options, including how to ignore specific vulnerabilities, +# see https://google.github.io/osv-scanner/github-action/ + +name: OSV-Scanner + +on: + pull_request: + branches: [ "main" ] + merge_group: + branches: [ "main" ] + schedule: + - cron: '29 2 * * 4' + push: + branches: [ "main" ] + +permissions: read-all + +jobs: + scan-scheduled: + if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }} + uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c5996e0193a3df57d695c1b8a1dec2a4c62e8730 # v2.3.3 + permissions: + actions: read + security-events: write + contents: read + with: + scan-args: |- + -r + --skip-git + ./ + scan-pr: + if: ${{ github.event_name == 'pull_request' || github.event_name == 'merge_group' }} + uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable-pr.yml@c5996e0193a3df57d695c1b8a1dec2a4c62e8730 # v2.3.3 + permissions: + actions: read + security-events: write + contents: read + with: + scan-args: |- + -r + --skip-git + ./ diff --git a/.github/workflows/poor-quality-management.yml b/.github/workflows/poor-quality-management.yml index df73ada89..657a86dae 100644 --- a/.github/workflows/poor-quality-management.yml +++ b/.github/workflows/poor-quality-management.yml @@ -4,9 +4,11 @@ on: pull_request_target: types: [labeled] schedule: - - cron: "0 9 * * *" # Daily at 9 AM UTC + - cron: "0 9 * * *" workflow_dispatch: +permissions: read-all + jobs: notify-poor-quality: name: Notify Poor Quality PR @@ -19,7 +21,7 @@ jobs: steps: - name: Add warning comment - uses: actions/github-script@v7 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: script: | const message = `⚠️ **This PR has been marked as poor-quality.** @@ -73,7 +75,7 @@ jobs: steps: - name: Close PRs with poor-quality label older than 7 days - uses: actions/github-script@v7 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: script: | const LABEL_NAME = 'poor-quality'; diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..1201a3a14 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,38 @@ +name: Publish to PyPI + +on: + release: + types: [published] + +permissions: read-all + +jobs: + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + timeout-minutes: 10 + environment: pypi + permissions: + id-token: write + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Install uv + uses: astral-sh/setup-uv@e4db8464a088ece1b920f60402e813ea4de65b8f # v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Build package + run: uv build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # release/v1 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 000000000..08b117574 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,78 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '32 23 * * 2' + push: + branches: [ "main" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + # `publish_results: true` only works when run from the default branch. conditional can be removed if disabled. + if: github.event.repository.default_branch == github.ref_name || github.event_name == 'pull_request' + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # (Optional) Uncomment file_mode if you have a .gitattributes with files marked export-ignore + # file_mode: git + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard (optional). + # Commenting out will disable upload of results to your repo's Code Scanning dashboard + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@38697555549f1db7851b81482ff19f1fa5c4fedc # v3 + with: + sarif_file: results.sarif diff --git a/.github/workflows/sonarcloud.yml b/.github/workflows/sonarcloud.yml new file mode 100644 index 000000000..123b16f0a --- /dev/null +++ b/.github/workflows/sonarcloud.yml @@ -0,0 +1,45 @@ +name: SonarCloud + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + sonarcloud: + name: SonarCloud Analysis + if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push' + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Install uv + uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: uv sync --extra treesitter-full --extra test --extra semantic --group dev + + - name: Run tests with coverage + run: uv run pytest -n auto -m "not integration" --tb=short --cov=codebase_rag --cov-report=xml + + - name: SonarCloud Scan + uses: SonarSource/sonarqube-scan-action@fd88b7d7ccbaefd23d8f36f73b59db7a3d246602 # v6 + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/.github/workflows/split-score.yml b/.github/workflows/split-score.yml new file mode 100644 index 000000000..7c65ac2e2 --- /dev/null +++ b/.github/workflows/split-score.yml @@ -0,0 +1,22 @@ +name: PR Split Score + +on: + pull_request: + branches: [main] + +permissions: + contents: read + pull-requests: write + +jobs: + score: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: pr-split score + uses: vitali87/pr-split@v1.0.0 + with: + max-loc: "400" diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml index 0940adcad..596a01ccd 100644 --- a/.github/workflows/version-bump.yml +++ b/.github/workflows/version-bump.yml @@ -16,6 +16,8 @@ on: - minor - major +permissions: read-all + jobs: bump-version: name: Auto Version Bump @@ -26,7 +28,7 @@ jobs: contents: write steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 2 token: ${{ secrets.GITHUB_TOKEN }} @@ -90,12 +92,17 @@ jobs: run: | sed -i 's/^version = ".*"/version = "${{ steps.bump_version.outputs.new }}"/' pyproject.toml + - name: Update server.json + if: steps.check_manual.outputs.skip == 'false' + run: | + sed -i 's/"version": "[^"]*"/"version": "${{ steps.bump_version.outputs.new }}"/g' server.json + - name: Commit version bump if: steps.check_manual.outputs.skip == 'false' run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add pyproject.toml + git add pyproject.toml server.json git commit -m "chore: bump version to ${{ steps.bump_version.outputs.new }}" git push diff --git a/.gitignore b/.gitignore index 4b6211856..c44ce990d 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,19 @@ PROJECT.md .DS_Store .pypi_cache.json .omc +site/ + +# Eval harness scratch workspace (regenerated each run); result files are committed +evals/results/l3_workspace/ +# Rust oracle build artifacts (the source + Cargo.lock are committed) +evals/oracles/rs_oracle/target/ +# TypeScript oracle deps (the source + package-lock.json are committed) +evals/oracles/ts_oracle/node_modules/ +# Java oracle compiled classes (the source is committed) +evals/oracles/java_oracle/*.class +# Lua oracle deps (the source + package-lock.json are committed) +evals/oracles/lua_oracle/node_modules/ +# PHP oracle deps (the source + package-lock.json are committed) +evals/oracles/php_oracle/node_modules/ +.cgr-hash-cache.json +.cgr-dir-mtimes.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 92a09727a..12a7db5f0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,23 +5,24 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml + args: [--unsafe] - id: check-toml - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.12.2 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - exclude: ^codec/schema_pb2\.(py|pyi)$ + exclude: ^(codec/schema_pb2\.(py|pyi)|benchmarks/|optimize/)$ - id: ruff-format - exclude: ^codec/schema_pb2\.(py|pyi)$ + exclude: ^(codec/schema_pb2\.(py|pyi)|benchmarks/|optimize/)$ - repo: local hooks: - id: ty name: ty check - entry: uv run ty check --exclude codebase_rag/tests/ + entry: uv run ty check --exclude codebase_rag/tests/ --exclude benchmarks/ --exclude optimize/ --exclude codec/ --exclude grammars/ --exclude query_modules/ language: system types: [python] - exclude: ^codec/.*_pb2\.py$ + exclude: ^(codec/.*_pb2\.py|benchmarks/|optimize/|grammars/|query_modules/)$ pass_filenames: false - repo: local hooks: @@ -30,7 +31,7 @@ repos: entry: uv run python scripts/check_no_docs.py language: system types: [python] - exclude: ^codec/schema_pb2\.py$ + exclude: ^(codec/schema_pb2\.py|benchmarks/|optimize/) - repo: local hooks: - id: generate-readme @@ -45,7 +46,7 @@ repos: - id: bandit args: ["-c", "pyproject.toml", "--severity-level", "high"] additional_dependencies: ["bandit[toml]"] - exclude: ^(codebase_rag/tests/|scripts/) + exclude: ^(codebase_rag/tests/|scripts/|benchmarks/|optimize/) - repo: https://github.com/compilerla/conventional-pre-commit rev: v4.2.0 hooks: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cfc7c6d05..5fd788a9c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ Thank you for your interest in contributing to Code Graph RAG! We welcome contri ## Getting Started -1. **Browse Issues**: Check out our [GitHub Issues](https://github.com/vitali87/code-graph-rag/issues) to find tasks that need work +1. **Browse Issues**: Check out our [issue tracker](https://github.com/vitali87/code-graph-rag/issues) to find tasks that need work - Look for issues labeled `good first issue` for beginner-friendly tasks - Issues labeled `help wanted` are open for community contributions 2. **Pick an Issue**: Choose an issue that interests you and matches your skill level diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..e965de91d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,51 @@ +FROM ghcr.io/astral-sh/uv:0.10@sha256:72ab0aeb448090480ccabb99fb5f52b0dc3c71923bffb5e2e26517a1c27b7fec AS uv + +FROM python:3.14-slim@sha256:fb83750094b46fd6b8adaa80f66e2302ecbe45d513f6cece637a841e1025b4ca AS builder + +COPY --from=uv /uv /uvx /bin/ + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + cmake build-essential libssl-dev zlib1g-dev libzstd-dev && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen --no-dev --extra treesitter-full --no-install-project --no-binary-package pymgclient + +COPY . . +RUN uv sync --frozen --no-dev --extra treesitter-full --no-binary-package pymgclient + +FROM python:3.14-slim@sha256:fb83750094b46fd6b8adaa80f66e2302ecbe45d513f6cece637a841e1025b4ca + +RUN apt-get update && \ + apt-get install -y --no-install-recommends ripgrep libssl3 zlib1g libzstd1 && \ + rm -rf /var/lib/apt/lists/* + +RUN useradd --create-home appuser +USER appuser +WORKDIR /app + +COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv +COPY --from=builder --chown=appuser:appuser /app/codebase_rag /app/codebase_rag +COPY --from=builder --chown=appuser:appuser /app/codec /app/codec +COPY --from=builder --chown=appuser:appuser /app/cgr /app/cgr +COPY --from=builder --chown=appuser:appuser /app/pyproject.toml /app/pyproject.toml + +ENV PATH="/app/.venv/bin:$PATH" + +COPY --chmod=755 <<'EOF' /app/entrypoint.sh +#!/bin/sh +ARCH=$(uname -m) +case "$ARCH" in + x86_64) LIBDIR="/lib/x86_64-linux-gnu" ;; + aarch64) LIBDIR="/lib/aarch64-linux-gnu" ;; + *) LIBDIR="/lib" ;; +esac +export LD_PRELOAD="$LIBDIR/libz.so.1:$LIBDIR/libzstd.so.1" +exec code-graph-rag "$@" +EOF + +ENTRYPOINT ["/app/entrypoint.sh"] +CMD ["mcp-server"] diff --git a/LICENSE b/LICENSE index fd189113e..4765780e7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) [2025] [Vitali Avagyan] +Copyright (c) 2025 Vitali Avagyan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 10c757dac..d8fa492d8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help all install dev test test-parallel test-integration test-all test-parallel-all clean python build-grammars watch readme lint format typecheck check pre-commit +.PHONY: help all install dev test test-parallel test-integration test-all test-parallel-all clean python build-grammars watch readme lint format typecheck check pre-commit release PYTHON := uv run @@ -77,6 +77,9 @@ typecheck: ## Run type checking with ty check: lint typecheck test ## Run all checks: lint, typecheck, test +release: ## Build, verify, and publish the current pyproject version to PyPI, then tag and create a GitHub Release + ./scripts/release.sh + pre-commit: ## Run all pre-commit checks locally (comprehensive test before commit) @echo "Running pre-commit checks..." @echo "1. Formatting code..." diff --git a/PYPI_README.md b/PYPI_README.md new file mode 100644 index 000000000..93c74db31 --- /dev/null +++ b/PYPI_README.md @@ -0,0 +1,160 @@ +# Code-Graph-RAG + +A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization. + +## Install + +```bash +pip install code-graph-rag +``` + +With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua): + +```bash +pip install 'code-graph-rag[treesitter-full]' +``` + +With semantic code search (UniXcoder embeddings): + +```bash +pip install 'code-graph-rag[semantic]' +``` + +### Prerequisites + +- Python 3.12+ +- Docker (for Memgraph) +- `cmake` (for building pymgclient) +- `ripgrep` (`rg`) (for shell command text searching) + +## CLI Quick Start + +The package installs a `cgr` command. + +**Start Memgraph, parse a repo, and query it:** + +```bash +cgr daemon up # start Memgraph + Qdrant +cgr start --repo-path ./my-project \ + --update-graph --clean # parse & launch interactive chat +``` + +**Index to protobuf for offline use:** + +```bash +cgr index -o ./index-output --repo-path ./my-project +``` + +**Export knowledge graph to JSON:** + +```bash +cgr export -o graph.json +``` + +**AI-guided optimization:** + +```bash +cgr optimize python --repo-path ./my-project +``` + +**Run as an MCP server (for Claude Code):** + +```bash +cgr mcp-server +``` + +**Check your setup:** + +```bash +cgr doctor +``` + +## Python SDK + +The `cgr` package provides short imports for programmatic use. + +### Load and query an exported graph + +```python +from cgr import load_graph + +graph = load_graph("graph.json") +print(graph.summary()) + +functions = graph.find_nodes_by_label("Function") +for fn in functions[:5]: + rels = graph.get_relationships_for_node(fn.node_id) + print(f"{fn.properties['name']}: {len(rels)} relationships") +``` + +### Query Memgraph with Cypher + +```python +from cgr import MemgraphIngestor + +with MemgraphIngestor(host="localhost", port=7687) as db: + rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10") + for row in rows: + print(row) +``` + +### Generate Cypher from natural language + +```python +import asyncio +from cgr import CypherGenerator + +async def main(): + gen = CypherGenerator() + cypher = await gen.generate("Find all classes that inherit from BaseModel") + print(cypher) + +asyncio.run(main()) +``` + +### Semantic code search + +Requires the `semantic` extra. + +```python +from cgr import embed_code + +embedding = embed_code("def authenticate(user, password): ...") +print(f"Embedding dimension: {len(embedding)}") +``` + +### Configuration + +```python +from cgr import settings + +settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...") +settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key") +``` + +## Environment Variables + +Configure via `.env` or environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `MEMGRAPH_HOST` | `localhost` | Memgraph hostname | +| `MEMGRAPH_PORT` | `7687` | Memgraph port | +| `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` | +| `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) | +| `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) | +| `CYPHER_PROVIDER` | | Provider for Cypher generation | +| `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) | +| `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) | +| `TARGET_REPO_PATH` | `.` | Default repository path | + +## Documentation + +Full documentation, architecture details, and contribution guide: +[docs.code-graph-rag.com](https://docs.code-graph-rag.com) + +## License + +MIT + + diff --git a/README.md b/README.md index 5ef87d4e0..fa36b17fa 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,54 @@
+
+
+
+
+
+
+
+