From 1ec43124e3ae8874ba35fcb69597f499aa4df364 Mon Sep 17 00:00:00 2001 From: Mykola Morhun Date: Fri, 15 May 2026 11:14:06 +0300 Subject: [PATCH 1/2] fix: detection of Golang project layout Signed-off-by: Mykola Morhun Assisted-by: Claude --- src/agentready/assessors/structure.py | 4 +- tests/unit/test_assessors_structure.py | 89 ++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/src/agentready/assessors/structure.py b/src/agentready/assessors/structure.py index d46d249d..9b8961cf 100644 --- a/src/agentready/assessors/structure.py +++ b/src/agentready/assessors/structure.py @@ -127,7 +127,9 @@ def assess(self, repository: Repository) -> Finding: Fix for #246, #305: Support multiple valid Python layouts """ - if self._primary_language(repository, {"Python", "Go"}) == "Go": + # Check for Go project first: go.mod presence is definitive, + # regardless of file counts (handles Go projects with Python scripts) + if self._find_go_module_roots(repository): return self._assess_go_layout(repository) # Check for tests directory (either tests/ or test/) diff --git a/tests/unit/test_assessors_structure.py b/tests/unit/test_assessors_structure.py index 91c99fe1..fdfadb02 100644 --- a/tests/unit/test_assessors_structure.py +++ b/tests/unit/test_assessors_structure.py @@ -1,5 +1,6 @@ """Tests for structure assessors.""" +import re from unittest.mock import MagicMock, patch import pytest @@ -833,6 +834,94 @@ def test_python_arsrc_bundled_with_package(self): assert len(content) > 0, "Python.arsrc is empty" assert "tests" in content, "Python.arsrc missing expected entry 'tests'" + # === Tests for Go project layout === + + def test_go_project_with_gomod_uses_go_assessment(self, tmp_path): + """Test that a Go project with go.mod uses Go-specific assessment.""" + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create go.mod to indicate Go project + (tmp_path / "go.mod").write_text("module example.com/myproject\n") + + # Create standard Go directories + (tmp_path / "cmd").mkdir() + (tmp_path / "internal").mkdir() + + repo = Repository( + path=tmp_path, + name="go-project", + url=None, + branch="main", + commit_hash="abc123", + languages={"Go": 100}, + total_files=10, + total_lines=100, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should use Go assessment + evidence_str = " ".join(finding.evidence) + assert "go.mod" in evidence_str + assert "cmd/" in evidence_str or "internal/" in evidence_str + + def test_go_project_with_python_scripts_uses_go_assessment(self, tmp_path): + """Test that Go project with Python scripts still uses Go assessment. + + This is the bug fix: even if a Go project has Python scripts (e.g., in + scripts/ directory) and Python has more file count, the presence of + go.mod should definitively mark it as a Go project. + """ + git_dir = tmp_path / ".git" + git_dir.mkdir() + + # Create go.mod to indicate Go project + (tmp_path / "go.mod").write_text("module example.com/myproject\n") + + # Create standard Go directories + (tmp_path / "cmd").mkdir() + (tmp_path / "internal").mkdir() + + # Create Python scripts directory (many Python files) + scripts_dir = tmp_path / "scripts" + scripts_dir.mkdir() + for i in range(10): + (scripts_dir / f"script{i}.py").write_text("# Python script\n") + + repo = Repository( + path=tmp_path, + name="go-project-with-scripts", + url=None, + branch="main", + commit_hash="abc123", + # Python has more files than Go in this scenario + languages={"Python": 150, "Go": 50}, + total_files=200, + total_lines=1000, + ) + + assessor = StandardLayoutAssessor() + finding = assessor.assess(repo) + + # Should STILL use Go assessment despite Python having more files + # because go.mod definitively indicates a Go project + evidence_str = " ".join(finding.evidence) + assert "go.mod" in evidence_str, "Should detect go.mod and use Go assessment" + assert ( + "cmd/" in evidence_str or "internal/" in evidence_str + ), "Should check Go directories, not Python" + # Should NOT check for Python src/ or tests/ directories + # If "src/" appears, it must be in a Go context (with go.mod, module, package) + if "src/" in evidence_str.lower(): + assert "source" not in evidence_str.lower() and ( + "go.mod" in evidence_str.lower() + or "module " in evidence_str.lower() + or re.search(r"\bpackage\s+\w+", evidence_str.lower()) + ), "If 'src/' appears, it must be in a clear Go context, not Python" + # Otherwise, evidence_str can omit "src/" entirely (which is expected for Go) + class TestIssuePRTemplatesAssessor: """Test IssuePRTemplatesAssessor.""" From d4295f2309bb768ccf0e92b3da7a52acf0e7cca7 Mon Sep 17 00:00:00 2001 From: Mykola Morhun Date: Wed, 20 May 2026 16:30:14 +0300 Subject: [PATCH 2/2] fix: rework _primary_language method Signed-off-by: Mykola Morhun Assisted-by: Claude --- src/agentready/assessors/base.py | 53 ++++- src/agentready/assessors/structure.py | 4 +- tests/unit/test_assessors_base.py | 282 +++++++++++++++++++++++++ tests/unit/test_assessors_structure.py | 58 ----- 4 files changed, 328 insertions(+), 69 deletions(-) create mode 100644 tests/unit/test_assessors_base.py diff --git a/src/agentready/assessors/base.py b/src/agentready/assessors/base.py index 0883671e..651ffe3c 100644 --- a/src/agentready/assessors/base.py +++ b/src/agentready/assessors/base.py @@ -74,7 +74,12 @@ def is_applicable(self, repository: Repository) -> bool: "Go": ["go.mod"], "Python": ["pyproject.toml", "setup.py", "setup.cfg"], "JavaScript": ["package.json"], - "TypeScript": ["tsconfig.json"], + "TypeScript": ["package.json", "tsconfig.json"], + "Java": ["pom.xml", "build.gradle", "build.gradle.kts"], + "Rust": ["Cargo.toml"], + "Ruby": ["Gemfile"], + "PHP": ["composer.json"], + "C#": ["*.csproj", "*.sln"], } def _primary_language( @@ -84,7 +89,10 @@ def _primary_language( ) -> str | None: """Return the primary programming language among candidates. - Uses file count as the base signal, but when counts are within 30% + First checks for root-level project manifests. + If exactly one language is detected by these manifest files, returns it immediately. + + Otherwise, uses file count as the base signal, but when counts are within 30% of each other, a root-level project manifest (go.mod, pyproject.toml, package.json) acts as tiebreaker — the language whose manifest sits at the repo root is treated as primary. @@ -92,6 +100,40 @@ def _primary_language( This handles repos like Go operators with a Python SDK subdirectory, where Python may have slightly more files but Go owns the root. """ + + def has_manifest(lang: str) -> bool: + """Check if language has root manifest file(s).""" + manifests = self._LANG_ROOT_MANIFESTS.get(lang, []) + for manifest in manifests: + if "*" in manifest: + if list(repository.path.glob(manifest)): + return True + else: + if (repository.path / manifest).exists(): + return True + return False + + # First, check for project files in root + detected_by_manifest = [lang for lang in candidates if has_manifest(lang)] + + # If exactly one language detected by manifests, return it + if len(detected_by_manifest) == 1: + return detected_by_manifest[0] + + # Special handling for JavaScript/TypeScript (share package.json) + if set(detected_by_manifest) == {"JavaScript", "TypeScript"}: + # TypeScript projects have tsconfig.json - stronger signal than file count + if (repository.path / "tsconfig.json").exists(): + return "TypeScript" + # Otherwise determine by file count + js_count = repository.languages.get("JavaScript", 0) + ts_count = repository.languages.get("TypeScript", 0) + if js_count > ts_count: + return "JavaScript" + elif ts_count > js_count: + return "TypeScript" + + # Use file counts to detect primary language lang_counts = { lang: repository.languages.get(lang, 0) for lang in candidates @@ -112,12 +154,7 @@ def _primary_language( } if len(close_langs) > 1: manifest_winners = [ - lang - for lang in sorted(close_langs) - if any( - (repository.path / m).exists() - for m in self._LANG_ROOT_MANIFESTS.get(lang, []) - ) + lang for lang in sorted(close_langs) if has_manifest(lang) ] if len(manifest_winners) == 1: return manifest_winners[0] diff --git a/src/agentready/assessors/structure.py b/src/agentready/assessors/structure.py index 9b8961cf..d46d249d 100644 --- a/src/agentready/assessors/structure.py +++ b/src/agentready/assessors/structure.py @@ -127,9 +127,7 @@ def assess(self, repository: Repository) -> Finding: Fix for #246, #305: Support multiple valid Python layouts """ - # Check for Go project first: go.mod presence is definitive, - # regardless of file counts (handles Go projects with Python scripts) - if self._find_go_module_roots(repository): + if self._primary_language(repository, {"Python", "Go"}) == "Go": return self._assess_go_layout(repository) # Check for tests directory (either tests/ or test/) diff --git a/tests/unit/test_assessors_base.py b/tests/unit/test_assessors_base.py new file mode 100644 index 00000000..2f059270 --- /dev/null +++ b/tests/unit/test_assessors_base.py @@ -0,0 +1,282 @@ +"""Unit tests for BaseAssessor helper methods.""" + +from unittest.mock import patch + +from agentready.assessors.base import BaseAssessor +from agentready.models.finding import Finding +from agentready.models.repository import Repository + + +class ConcreteAssessor(BaseAssessor): + """Concrete implementation for testing BaseAssessor methods.""" + + @property + def attribute_id(self) -> str: + return "test_attribute" + + @property + def tier(self) -> int: + return 1 + + def assess(self, repository: Repository) -> Finding: + return Finding.create_pass(self.attribute_id, evidence="test", details="test") + + +class TestPrimaryLanguage: + """Tests for _primary_language() method.""" + + def test_go_manifest_detection(self, tmp_path): + """Single language detected by manifest returns immediately.""" + (tmp_path / "go.mod").write_text("module test\n") + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Go": 10, "Python": 100}, # Python has more files + total_files=110, + total_lines=1000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Go", "Python"}) + + # Go should win despite having fewer files, because go.mod exists + assert result == "Go" + + def test_python_manifest_detection(self, tmp_path): + """Python project detected by pyproject.toml.""" + (tmp_path / "pyproject.toml").write_text("[project]\nname = 'test'\n") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Python": 50, "JavaScript": 100}, # JS has more files + total_files=150, + total_lines=1000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Python", "JavaScript"}) + + # Python should win because pyproject.toml exists + assert result == "Python" + + def test_typescript_manifest_tsconfig_priority(self, tmp_path): + """TypeScript detected when tsconfig.json exists.""" + (tmp_path / "package.json").write_text("{}") + (tmp_path / "tsconfig.json").write_text("{}") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"JavaScript": 200, "TypeScript": 50}, # More JS files + total_files=250, + total_lines=5000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"JavaScript", "TypeScript"}) + + # TypeScript should win because tsconfig.json exists + assert result == "TypeScript" + + def test_javascript_manifest_detection_without_tsconfig(self, tmp_path): + """JavaScript wins when no tsconfig.json and more JS files.""" + (tmp_path / "package.json").write_text("{}") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"JavaScript": 200, "TypeScript": 50}, + total_files=250, + total_lines=5000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"JavaScript", "TypeScript"}) + + # JavaScript should win by file count + assert result == "JavaScript" + + def test_typescript_by_file_count(self, tmp_path): + """TypeScript wins by file count when no tsconfig.json.""" + (tmp_path / "package.json").write_text("{}") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"JavaScript": 50, "TypeScript": 200}, + total_files=250, + total_lines=5000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"JavaScript", "TypeScript"}) + + assert result == "TypeScript" + + def test_java_maven_detection(self, tmp_path): + """Java project detected by pom.xml.""" + (tmp_path / "pom.xml").write_text("") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Java": 100}, + total_files=100, + total_lines=2000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Java", "Python"}) + + assert result == "Java" + + def test_java_gradle_detection(self, tmp_path): + """Java project detected by build.gradle.""" + (tmp_path / "build.gradle").write_text("plugins {}") + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Java": 100}, + total_files=100, + total_lines=2000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Java", "Python"}) + + assert result == "Java" + + def test_rust_cargo_detection(self, tmp_path): + """Rust project detected by Cargo.toml.""" + (tmp_path / "Cargo.toml").write_text("[package]\nname = 'test'\n") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Rust": 100}, + total_files=100, + total_lines=2000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Rust"}) + + assert result == "Rust" + + def test_multiple_manifests_use_file_count(self, tmp_path): + """When multiple languages have manifests, use file count.""" + (tmp_path / "go.mod").write_text("module test\n") + (tmp_path / "pyproject.toml").write_text("[project]\n") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Go": 50, "Python": 150}, + total_files=200, + total_lines=4000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Go", "Python"}) + + # Python has more files, so it should win + assert result == "Python" + + def test_no_manifests_use_file_count(self, tmp_path): + """Falls back to file count when no manifests.""" + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Python": 100, "JavaScript": 50}, + total_files=150, + total_lines=3000, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Python", "JavaScript"}) + + # Python has more files + assert result == "Python" + + def test_language_not_in_candidates(self, tmp_path): + """Manifest for language not in candidates is ignored.""" + (tmp_path / "go.mod").write_text("module test\n") + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={"Python": 100}, + total_files=100, + total_lines=2000, + ) + + assessor = ConcreteAssessor() + # Only ask about Python, not Go + result = assessor._primary_language(repo, {"Python"}) + + assert result == "Python" + + def test_no_languages_returns_none(self, tmp_path): + """Returns None when no languages present.""" + + with patch.object(Repository, "__post_init__", lambda self: None): + repo = Repository( + path=tmp_path, + name="test", + url=None, + branch="main", + commit_hash="abc", + languages={}, + total_files=0, + total_lines=0, + ) + + assessor = ConcreteAssessor() + result = assessor._primary_language(repo, {"Go", "Python"}) + + assert result is None diff --git a/tests/unit/test_assessors_structure.py b/tests/unit/test_assessors_structure.py index fdfadb02..cb6929f0 100644 --- a/tests/unit/test_assessors_structure.py +++ b/tests/unit/test_assessors_structure.py @@ -1,6 +1,5 @@ """Tests for structure assessors.""" -import re from unittest.mock import MagicMock, patch import pytest @@ -834,8 +833,6 @@ def test_python_arsrc_bundled_with_package(self): assert len(content) > 0, "Python.arsrc is empty" assert "tests" in content, "Python.arsrc missing expected entry 'tests'" - # === Tests for Go project layout === - def test_go_project_with_gomod_uses_go_assessment(self, tmp_path): """Test that a Go project with go.mod uses Go-specific assessment.""" git_dir = tmp_path / ".git" @@ -867,61 +864,6 @@ def test_go_project_with_gomod_uses_go_assessment(self, tmp_path): assert "go.mod" in evidence_str assert "cmd/" in evidence_str or "internal/" in evidence_str - def test_go_project_with_python_scripts_uses_go_assessment(self, tmp_path): - """Test that Go project with Python scripts still uses Go assessment. - - This is the bug fix: even if a Go project has Python scripts (e.g., in - scripts/ directory) and Python has more file count, the presence of - go.mod should definitively mark it as a Go project. - """ - git_dir = tmp_path / ".git" - git_dir.mkdir() - - # Create go.mod to indicate Go project - (tmp_path / "go.mod").write_text("module example.com/myproject\n") - - # Create standard Go directories - (tmp_path / "cmd").mkdir() - (tmp_path / "internal").mkdir() - - # Create Python scripts directory (many Python files) - scripts_dir = tmp_path / "scripts" - scripts_dir.mkdir() - for i in range(10): - (scripts_dir / f"script{i}.py").write_text("# Python script\n") - - repo = Repository( - path=tmp_path, - name="go-project-with-scripts", - url=None, - branch="main", - commit_hash="abc123", - # Python has more files than Go in this scenario - languages={"Python": 150, "Go": 50}, - total_files=200, - total_lines=1000, - ) - - assessor = StandardLayoutAssessor() - finding = assessor.assess(repo) - - # Should STILL use Go assessment despite Python having more files - # because go.mod definitively indicates a Go project - evidence_str = " ".join(finding.evidence) - assert "go.mod" in evidence_str, "Should detect go.mod and use Go assessment" - assert ( - "cmd/" in evidence_str or "internal/" in evidence_str - ), "Should check Go directories, not Python" - # Should NOT check for Python src/ or tests/ directories - # If "src/" appears, it must be in a Go context (with go.mod, module, package) - if "src/" in evidence_str.lower(): - assert "source" not in evidence_str.lower() and ( - "go.mod" in evidence_str.lower() - or "module " in evidence_str.lower() - or re.search(r"\bpackage\s+\w+", evidence_str.lower()) - ), "If 'src/' appears, it must be in a clear Go context, not Python" - # Otherwise, evidence_str can omit "src/" entirely (which is expected for Go) - class TestIssuePRTemplatesAssessor: """Test IssuePRTemplatesAssessor."""