Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions src/agentready/assessors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ def is_applicable(self, repository: Repository) -> bool:
"Go": ["go.mod"],
"Python": ["pyproject.toml", "setup.py", "setup.cfg"],
"JavaScript": ["package.json"],
"TypeScript": ["tsconfig.json"],
"TypeScript": ["package.json", "tsconfig.json"],
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"Java": ["pom.xml", "build.gradle", "build.gradle.kts"],
"Rust": ["Cargo.toml"],
"Ruby": ["Gemfile"],
"PHP": ["composer.json"],
"C#": ["*.csproj", "*.sln"],
}

def _primary_language(
Expand All @@ -84,14 +89,51 @@ def _primary_language(
) -> str | None:
"""Return the primary programming language among candidates.

Uses file count as the base signal, but when counts are within 30%
First checks for root-level project manifests.
If exactly one language is detected by these manifest files, returns it immediately.

Otherwise, uses file count as the base signal, but when counts are within 30%
of each other, a root-level project manifest (go.mod, pyproject.toml,
package.json) acts as tiebreaker — the language whose manifest sits
at the repo root is treated as primary.

This handles repos like Go operators with a Python SDK subdirectory,
where Python may have slightly more files but Go owns the root.
"""

def has_manifest(lang: str) -> bool:
"""Check if language has root manifest file(s)."""
manifests = self._LANG_ROOT_MANIFESTS.get(lang, [])
for manifest in manifests:
if "*" in manifest:
if list(repository.path.glob(manifest)):
return True
else:
if (repository.path / manifest).exists():
return True
return False

# First, check for project files in root
detected_by_manifest = [lang for lang in candidates if has_manifest(lang)]

# If exactly one language detected by manifests, return it
if len(detected_by_manifest) == 1:
return detected_by_manifest[0]

# Special handling for JavaScript/TypeScript (share package.json)
if set(detected_by_manifest) == {"JavaScript", "TypeScript"}:
# TypeScript projects have tsconfig.json - stronger signal than file count
if (repository.path / "tsconfig.json").exists():
return "TypeScript"
# Otherwise determine by file count
js_count = repository.languages.get("JavaScript", 0)
ts_count = repository.languages.get("TypeScript", 0)
if js_count > ts_count:
return "JavaScript"
elif ts_count > js_count:
return "TypeScript"

# Use file counts to detect primary language
lang_counts = {
lang: repository.languages.get(lang, 0)
for lang in candidates
Expand All @@ -112,12 +154,7 @@ def _primary_language(
}
if len(close_langs) > 1:
manifest_winners = [
lang
for lang in sorted(close_langs)
if any(
(repository.path / m).exists()
for m in self._LANG_ROOT_MANIFESTS.get(lang, [])
)
lang for lang in sorted(close_langs) if has_manifest(lang)
]
if len(manifest_winners) == 1:
return manifest_winners[0]
Expand Down
282 changes: 282 additions & 0 deletions tests/unit/test_assessors_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
"""Unit tests for BaseAssessor helper methods."""

from unittest.mock import patch

from agentready.assessors.base import BaseAssessor
from agentready.models.finding import Finding
from agentready.models.repository import Repository


class ConcreteAssessor(BaseAssessor):
"""Concrete implementation for testing BaseAssessor methods."""

@property
def attribute_id(self) -> str:
return "test_attribute"

@property
def tier(self) -> int:
return 1

def assess(self, repository: Repository) -> Finding:
return Finding.create_pass(self.attribute_id, evidence="test", details="test")

Comment thread
mmorhun marked this conversation as resolved.

class TestPrimaryLanguage:
"""Tests for _primary_language() method."""

def test_go_manifest_detection(self, tmp_path):
"""Single language detected by manifest returns immediately."""
(tmp_path / "go.mod").write_text("module test\n")
with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Go": 10, "Python": 100}, # Python has more files
total_files=110,
total_lines=1000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Go", "Python"})

# Go should win despite having fewer files, because go.mod exists
assert result == "Go"

def test_python_manifest_detection(self, tmp_path):
"""Python project detected by pyproject.toml."""
(tmp_path / "pyproject.toml").write_text("[project]\nname = 'test'\n")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Python": 50, "JavaScript": 100}, # JS has more files
total_files=150,
total_lines=1000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Python", "JavaScript"})

# Python should win because pyproject.toml exists
assert result == "Python"

def test_typescript_manifest_tsconfig_priority(self, tmp_path):
"""TypeScript detected when tsconfig.json exists."""
(tmp_path / "package.json").write_text("{}")
(tmp_path / "tsconfig.json").write_text("{}")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"JavaScript": 200, "TypeScript": 50}, # More JS files
total_files=250,
total_lines=5000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"JavaScript", "TypeScript"})

# TypeScript should win because tsconfig.json exists
assert result == "TypeScript"

def test_javascript_manifest_detection_without_tsconfig(self, tmp_path):
"""JavaScript wins when no tsconfig.json and more JS files."""
(tmp_path / "package.json").write_text("{}")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"JavaScript": 200, "TypeScript": 50},
total_files=250,
total_lines=5000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"JavaScript", "TypeScript"})

# JavaScript should win by file count
assert result == "JavaScript"

def test_typescript_by_file_count(self, tmp_path):
"""TypeScript wins by file count when no tsconfig.json."""
(tmp_path / "package.json").write_text("{}")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"JavaScript": 50, "TypeScript": 200},
total_files=250,
total_lines=5000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"JavaScript", "TypeScript"})

assert result == "TypeScript"

Comment thread
coderabbitai[bot] marked this conversation as resolved.
def test_java_maven_detection(self, tmp_path):
"""Java project detected by pom.xml."""
(tmp_path / "pom.xml").write_text("<project></project>")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Java": 100},
total_files=100,
total_lines=2000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Java", "Python"})

assert result == "Java"

def test_java_gradle_detection(self, tmp_path):
"""Java project detected by build.gradle."""
(tmp_path / "build.gradle").write_text("plugins {}")
with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Java": 100},
total_files=100,
total_lines=2000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Java", "Python"})

assert result == "Java"

def test_rust_cargo_detection(self, tmp_path):
"""Rust project detected by Cargo.toml."""
(tmp_path / "Cargo.toml").write_text("[package]\nname = 'test'\n")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Rust": 100},
total_files=100,
total_lines=2000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Rust"})

assert result == "Rust"

def test_multiple_manifests_use_file_count(self, tmp_path):
"""When multiple languages have manifests, use file count."""
(tmp_path / "go.mod").write_text("module test\n")
(tmp_path / "pyproject.toml").write_text("[project]\n")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Go": 50, "Python": 150},
total_files=200,
total_lines=4000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Go", "Python"})

# Python has more files, so it should win
assert result == "Python"

def test_no_manifests_use_file_count(self, tmp_path):
"""Falls back to file count when no manifests."""
with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Python": 100, "JavaScript": 50},
total_files=150,
total_lines=3000,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Python", "JavaScript"})

# Python has more files
assert result == "Python"

def test_language_not_in_candidates(self, tmp_path):
"""Manifest for language not in candidates is ignored."""
(tmp_path / "go.mod").write_text("module test\n")

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={"Python": 100},
total_files=100,
total_lines=2000,
)

assessor = ConcreteAssessor()
# Only ask about Python, not Go
result = assessor._primary_language(repo, {"Python"})

assert result == "Python"

def test_no_languages_returns_none(self, tmp_path):
"""Returns None when no languages present."""

with patch.object(Repository, "__post_init__", lambda self: None):
repo = Repository(
path=tmp_path,
name="test",
url=None,
branch="main",
commit_hash="abc",
languages={},
total_files=0,
total_lines=0,
)

assessor = ConcreteAssessor()
result = assessor._primary_language(repo, {"Go", "Python"})

assert result is None
Loading
Loading