Neverdecel · Neverdecel · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,5 @@
+[flake8]
+# Black formats code to 88; allow a little slack for prose docstrings/comments.
+max-line-length = 100
+extend-ignore = E203, W503
+exclude = .git, __pycache__, build, dist, venv, .venv, env
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -1,69 +1,52 @@
-name: CI Tests
+name: CI
 
 on:
   push:
-    branches: [ main, master, develop ]
+    branches: [main, master, develop]
   pull_request:
-    branches: [ main, master, develop ]
+    branches: [main, master, develop]
   schedule:
     - cron: "0 3 * * *"
 
 jobs:
-  test-imports:
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Set up Python 3.11
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-
-    - name: Cache pip dependencies
-      uses: actions/cache@v4
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-
-    - name: Test Import Structure
-      run: |
-        python -c "import coderag.config; print('✓ Config import successful')"
-        python -c "import coderag.embeddings; print('✓ Embeddings import successful')"
-        python -c "import coderag.index; print('✓ Index import successful')"
-        python -c "import coderag.search; print('✓ Search import successful')"
-        python -c "import coderag.monitor; print('✓ Monitor import successful')"
-      env:
-        OPENAI_API_KEY: dummy-key-for-testing
-
   quality-and-tests:
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12"]
+
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python 3.11
+
+      - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
-      - name: Install dependencies
+          python-version: ${{ matrix.python-version }}
+
+      - name: Cache pip
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('pyproject.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-py${{ matrix.python-version }}-pip-
+
+      - name: Install
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
-      - name: Format check
+          pip install -e ".[dev,server,openai]"
+
+      - name: Format check (black, isort)
         run: |
           black --check .
           isort --check-only .
-      - name: Lint
-        run: flake8 . --max-line-length=88 --ignore=E203,W503
-      - name: Type check
-        run: mypy .
-      - name: Run tests
-        env:
-          PYTHONPATH: ${{ github.workspace }}
-        run: pytest -q
+
+      - name: Lint (flake8)
+        run: flake8 coderag tests
+
+      - name: Type check (mypy)
+        run: mypy coderag
+
+      - name: Tests (offline — no model downloads, no network)
+        run: pytest -m "not integration"
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,13 @@
 __pycache__/
 *.py[cod]
 
+# Build artifacts
+*.egg-info/
+build/
+dist/
+.pytest_cache/
+.mypy_cache/
+
 # Ignore virtual environment directories
 .venv/
 env/
@@ -15,6 +22,10 @@ node_modules/
 
 # Ignore FAISS index file
 *.faiss
+*.faiss.kind
+
+# Ignore the CodeRAG store (SQLite db + index live here by default)
+.coderag/
 
 # Ignore Git directory
 .git/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,7 +20,6 @@ repos:
     hooks:
       - id: flake8
         additional_dependencies: ["flake8-bugbear==24.4.26"]
-        args: ["--max-line-length=88", "--ignore=E203,W503"]
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.11.1
     hooks:

diff --git a/AGENTS.md b/AGENTS.md
@@ -1,37 +1,46 @@
 # Repository Guidelines
 
 ## Project Structure & Module Organization
-- `coderag/`: Core library (`config.py`, `embeddings.py`, `index.py`, `search.py`, `monitor.py`).
-- `app.py`: Streamlit UI. `main.py`: backend/indexer. `prompt_flow.py`: RAG orchestration.
-- `scripts/`: Utilities (e.g., `initialize_index.py`, `run_monitor.py`).
-- `tests/`: Minimal checks (e.g., `test_faiss.py`).
-- `example.env` → copy to `.env` for local secrets; CI lives in `.github/`.
+- `coderag/api.py`: The `CodeRAG` facade — the public entry point every surface routes through.
+- `coderag/config.py`, `coderag/types.py`: Immutable `Config` and shared dataclasses.
+- `coderag/embeddings/`: `EmbeddingProvider` protocol + `fastembed` (default), `openai`, `fake`.
+- `coderag/chunking/`: Symbol-aware chunking (`python_ast.py`, `treesitter.py`, line-window `base.py`).
+- `coderag/store/`: `sqlite_store.py` (source of truth + FTS5) and `vector_index.py` (FAISS Flat/IVF cache).
+- `coderag/retrieval/`: Hybrid dense + BM25 search fused with RRF.
+- `coderag/indexer.py`, `coderag/watch.py`: Incremental indexing and the debounced watcher.
+- `coderag/surfaces/`: `cli.py`, `http_api.py` (FastAPI), `streamlit_app.py` — thin adapters over the facade.
+- `tests/`: pytest suite (offline by default via the `fake` provider; real model behind `-m integration`).
+- `example.env` → copy to `.env`; CI lives in `.github/`.
 
 ## Build, Test, and Development Commands
 - Create env: `python -m venv venv && source venv/bin/activate`.
-- Install deps: `pip install -r requirements.txt`.
-- Run backend: `python main.py` (indexes and watches `WATCHED_DIR`).
-- Run UI: `streamlit run app.py`.
-- Quick test: `python tests/test_faiss.py` (FAISS round‑trip sanity check).
-- Quality suite: `pre-commit run --all-files` (black, isort, flake8, mypy, basics).
+- Install: `pip install -e ".[dev,server,openai]"` (extras: `server`, `ui`, `openai`).
+- Use it: `coderag index`, `coderag search "QUERY"`, `coderag watch`, `coderag serve`, `coderag ui`, `coderag status`.
+- Tests: `pytest -m "not integration"` (fast/offline) or `pytest -m integration` (real fastembed).
+- Quality: `black --check . && isort --check-only . && flake8 coderag tests && mypy coderag`.
 
 ## Coding Style & Naming Conventions
-- Formatting: Black (88 cols), isort profile "black"; run `black . && isort .`.
-- Linting: flake8 with `--ignore=E203,W503` to match Black.
-- Typing: mypy (py311 target; ignore missing imports OK). Prefer typed signatures and docstrings.
-- Indentation: 4 spaces. Names: `snake_case` for files/functions, `PascalCase` for classes, constants `UPPER_SNAKE`.
-- Imports: first‑party module is `coderag` (see `pyproject.toml`).
+- Black (88-col code), isort profile "black". flake8 config in `.flake8` allows up to 100 cols (prose slack).
+- Typing: mypy (py311 target). Prefer typed signatures and concise docstrings.
+- Indentation: 4 spaces. `snake_case` functions/files, `PascalCase` classes, `UPPER_SNAKE` constants.
+- First-party module is `coderag`; surfaces must stay thin — no engine logic in `surfaces/`.
+
+## Architecture Invariants
+- SQLite is the source of truth; the FAISS index is a rebuildable cache (`rebuild_from_store`).
+- `chunks.id` is the FAISS id and is `AUTOINCREMENT` (ids never reused).
+- Incremental indexing is delete-before-add (no duplicate/stale vectors); unchanged files skip via content hash.
+- Embedding dimension comes from the provider, not a constant; a model change triggers a rebuild.
 
 ## Testing Guidelines
-- Place tests in `tests/` as `test_*.py`. Keep unit tests deterministic; mock OpenAI calls where possible.
-- Run directly (`python tests/test_faiss.py`) or with pytest if available (`pytest -q`).
-- Ensure `.env` or env vars provide `OPENAI_API_KEY` for integration tests; avoid hitting rate limits in CI.
+- Place tests in `tests/` as `test_*.py`; keep them deterministic and offline (use the `fake` provider fixture).
+- Mark anything that downloads a model or hits the network with `@pytest.mark.integration` (deselected in CI).
+- Mock OpenAI; never call the network in default tests.
 
 ## Commit & Pull Request Guidelines
-- Use Conventional Commits seen in history: `feat:`, `fix:`, `docs:`, `ci:`, `refactor:`, `simplify:`.
-- Before pushing: `pre-commit run --all-files` and update docs when behavior changes.
-- PRs: clear description, linked issues, steps to validate; include screenshots/GIFs for UI changes; note config changes (`.env`).
+- Conventional Commits: `feat:`, `fix:`, `docs:`, `ci:`, `refactor:`, `test:`.
+- Before pushing: run the quality gate above and update docs when behavior changes.
+- PRs: clear description, validation steps, screenshots/GIFs for UI changes, note config changes (`.env`).
 
 ## Security & Configuration Tips
-- Never commit secrets. Start with `cp example.env .env`; set `OPENAI_API_KEY`, `WATCHED_DIR`, `FAISS_INDEX_FILE`.
-- Avoid logging sensitive data. Regenerate the FAISS index if dimensions or models change (`python scripts/initialize_index.py`).
+- Never commit secrets. The default local provider needs no key; OpenAI is opt-in.
+- The index/database live in `CODERAG_STORE_DIR` (default `./.coderag/`, gitignored).