just why not
All checks were successful
AI Codebase Quality Review / ai-codebase-review (push) Successful in 39s
All checks were successful
AI Codebase Quality Review / ai-codebase-review (push) Successful in 39s
This commit is contained in:
358
tools/ai-review/utils/ignore_patterns.py
Normal file
358
tools/ai-review/utils/ignore_patterns.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""AI Review Ignore Patterns
|
||||
|
||||
Provides .gitignore-style pattern matching for excluding files from AI review.
|
||||
Reads patterns from .ai-reviewignore files in the repository.
|
||||
"""
|
||||
|
||||
import fnmatch
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class IgnoreRule:
|
||||
"""A single ignore rule."""
|
||||
|
||||
pattern: str
|
||||
negation: bool = False
|
||||
directory_only: bool = False
|
||||
anchored: bool = False
|
||||
regex: re.Pattern = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Compile the pattern to regex."""
|
||||
self.regex = self._compile_pattern()
|
||||
|
||||
def _compile_pattern(self) -> re.Pattern:
|
||||
"""Convert gitignore pattern to regex."""
|
||||
pattern = self.pattern
|
||||
|
||||
# Handle directory-only patterns
|
||||
if pattern.endswith("/"):
|
||||
pattern = pattern[:-1]
|
||||
self.directory_only = True
|
||||
|
||||
# Handle anchored patterns (starting with /)
|
||||
if pattern.startswith("/"):
|
||||
pattern = pattern[1:]
|
||||
self.anchored = True
|
||||
|
||||
# Escape special regex characters except * and ?
|
||||
regex_pattern = ""
|
||||
i = 0
|
||||
while i < len(pattern):
|
||||
c = pattern[i]
|
||||
if c == "*":
|
||||
if i + 1 < len(pattern) and pattern[i + 1] == "*":
|
||||
# ** matches everything including /
|
||||
if i + 2 < len(pattern) and pattern[i + 2] == "/":
|
||||
regex_pattern += "(?:.*/)?"
|
||||
i += 3
|
||||
continue
|
||||
else:
|
||||
regex_pattern += ".*"
|
||||
i += 2
|
||||
continue
|
||||
else:
|
||||
# * matches everything except /
|
||||
regex_pattern += "[^/]*"
|
||||
elif c == "?":
|
||||
regex_pattern += "[^/]"
|
||||
elif c == "[":
|
||||
# Character class
|
||||
j = i + 1
|
||||
if j < len(pattern) and pattern[j] == "!":
|
||||
regex_pattern += "[^"
|
||||
j += 1
|
||||
else:
|
||||
regex_pattern += "["
|
||||
while j < len(pattern) and pattern[j] != "]":
|
||||
regex_pattern += pattern[j]
|
||||
j += 1
|
||||
if j < len(pattern):
|
||||
regex_pattern += "]"
|
||||
i = j
|
||||
elif c in ".^$+{}|()":
|
||||
regex_pattern += "\\" + c
|
||||
else:
|
||||
regex_pattern += c
|
||||
i += 1
|
||||
|
||||
# Anchor pattern
|
||||
if self.anchored:
|
||||
regex_pattern = "^" + regex_pattern
|
||||
else:
|
||||
regex_pattern = "(?:^|/)" + regex_pattern
|
||||
|
||||
# Match to end or as directory prefix
|
||||
regex_pattern += "(?:$|/)"
|
||||
|
||||
return re.compile(regex_pattern)
|
||||
|
||||
def matches(self, path: str, is_directory: bool = False) -> bool:
|
||||
"""Check if a path matches this rule.
|
||||
|
||||
Args:
|
||||
path: Relative path to check.
|
||||
is_directory: Whether the path is a directory.
|
||||
|
||||
Returns:
|
||||
True if the path matches.
|
||||
"""
|
||||
if self.directory_only and not is_directory:
|
||||
return False
|
||||
|
||||
# Normalize path
|
||||
path = path.replace("\\", "/")
|
||||
if not path.startswith("/"):
|
||||
path = "/" + path
|
||||
|
||||
return bool(self.regex.search(path))
|
||||
|
||||
|
||||
class IgnorePatterns:
|
||||
"""Manages .ai-reviewignore patterns for a repository."""
|
||||
|
||||
DEFAULT_PATTERNS = [
|
||||
# Version control
|
||||
".git/",
|
||||
".svn/",
|
||||
".hg/",
|
||||
# Dependencies
|
||||
"node_modules/",
|
||||
"vendor/",
|
||||
"venv/",
|
||||
".venv/",
|
||||
"__pycache__/",
|
||||
"*.pyc",
|
||||
# Build outputs
|
||||
"dist/",
|
||||
"build/",
|
||||
"out/",
|
||||
"target/",
|
||||
"*.min.js",
|
||||
"*.min.css",
|
||||
"*.bundle.js",
|
||||
# IDE/Editor
|
||||
".idea/",
|
||||
".vscode/",
|
||||
"*.swp",
|
||||
"*.swo",
|
||||
# Generated files
|
||||
"*.lock",
|
||||
"package-lock.json",
|
||||
"yarn.lock",
|
||||
"poetry.lock",
|
||||
"Pipfile.lock",
|
||||
"Cargo.lock",
|
||||
"go.sum",
|
||||
# Binary files
|
||||
"*.exe",
|
||||
"*.dll",
|
||||
"*.so",
|
||||
"*.dylib",
|
||||
"*.bin",
|
||||
"*.o",
|
||||
"*.a",
|
||||
# Media files
|
||||
"*.png",
|
||||
"*.jpg",
|
||||
"*.jpeg",
|
||||
"*.gif",
|
||||
"*.svg",
|
||||
"*.ico",
|
||||
"*.mp3",
|
||||
"*.mp4",
|
||||
"*.wav",
|
||||
"*.pdf",
|
||||
# Archives
|
||||
"*.zip",
|
||||
"*.tar",
|
||||
"*.gz",
|
||||
"*.rar",
|
||||
"*.7z",
|
||||
# Large data files
|
||||
"*.csv",
|
||||
"*.json.gz",
|
||||
"*.sql",
|
||||
"*.sqlite",
|
||||
"*.db",
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_root: str | None = None,
|
||||
ignore_file: str = ".ai-reviewignore",
|
||||
use_defaults: bool = True,
|
||||
):
|
||||
"""Initialize ignore patterns.
|
||||
|
||||
Args:
|
||||
repo_root: Repository root path.
|
||||
ignore_file: Name of ignore file to read.
|
||||
use_defaults: Whether to include default patterns.
|
||||
"""
|
||||
self.repo_root = repo_root or os.getcwd()
|
||||
self.ignore_file = ignore_file
|
||||
self.rules: list[IgnoreRule] = []
|
||||
|
||||
# Load default patterns
|
||||
if use_defaults:
|
||||
for pattern in self.DEFAULT_PATTERNS:
|
||||
self._add_pattern(pattern)
|
||||
|
||||
# Load patterns from ignore file
|
||||
self._load_ignore_file()
|
||||
|
||||
def _load_ignore_file(self):
|
||||
"""Load patterns from .ai-reviewignore file."""
|
||||
ignore_path = os.path.join(self.repo_root, self.ignore_file)
|
||||
|
||||
if not os.path.exists(ignore_path):
|
||||
return
|
||||
|
||||
try:
|
||||
with open(ignore_path) as f:
|
||||
for line in f:
|
||||
line = line.rstrip("\n\r")
|
||||
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
self._add_pattern(line)
|
||||
except Exception:
|
||||
pass # Ignore errors reading the file
|
||||
|
||||
def _add_pattern(self, pattern: str):
|
||||
"""Add a pattern to the rules list.
|
||||
|
||||
Args:
|
||||
pattern: Pattern string (gitignore format).
|
||||
"""
|
||||
# Check for negation
|
||||
negation = False
|
||||
if pattern.startswith("!"):
|
||||
negation = True
|
||||
pattern = pattern[1:]
|
||||
|
||||
if not pattern:
|
||||
return
|
||||
|
||||
self.rules.append(IgnoreRule(pattern=pattern, negation=negation))
|
||||
|
||||
def is_ignored(self, path: str, is_directory: bool = False) -> bool:
|
||||
"""Check if a path should be ignored.
|
||||
|
||||
Args:
|
||||
path: Relative path to check.
|
||||
is_directory: Whether the path is a directory.
|
||||
|
||||
Returns:
|
||||
True if the path should be ignored.
|
||||
"""
|
||||
# Normalize path
|
||||
path = path.replace("\\", "/").lstrip("/")
|
||||
|
||||
# Check each rule in order (later rules override earlier ones)
|
||||
ignored = False
|
||||
for rule in self.rules:
|
||||
if rule.matches(path, is_directory):
|
||||
ignored = not rule.negation
|
||||
|
||||
return ignored
|
||||
|
||||
def filter_paths(self, paths: list[str]) -> list[str]:
|
||||
"""Filter a list of paths, removing ignored ones.
|
||||
|
||||
Args:
|
||||
paths: List of relative paths.
|
||||
|
||||
Returns:
|
||||
Filtered list of paths.
|
||||
"""
|
||||
return [p for p in paths if not self.is_ignored(p)]
|
||||
|
||||
def filter_diff_files(self, files: list[dict]) -> list[dict]:
|
||||
"""Filter diff file objects, removing ignored ones.
|
||||
|
||||
Args:
|
||||
files: List of file dicts with 'filename' or 'path' key.
|
||||
|
||||
Returns:
|
||||
Filtered list of file dicts.
|
||||
"""
|
||||
result = []
|
||||
for f in files:
|
||||
path = f.get("filename") or f.get("path") or f.get("name", "")
|
||||
if not self.is_ignored(path):
|
||||
result.append(f)
|
||||
return result
|
||||
|
||||
def should_review_file(self, filename: str) -> bool:
|
||||
"""Check if a file should be reviewed.
|
||||
|
||||
Args:
|
||||
filename: File path to check.
|
||||
|
||||
Returns:
|
||||
True if the file should be reviewed.
|
||||
"""
|
||||
return not self.is_ignored(filename)
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls, config: dict, repo_root: str | None = None
|
||||
) -> "IgnorePatterns":
|
||||
"""Create IgnorePatterns from config.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary.
|
||||
repo_root: Repository root path.
|
||||
|
||||
Returns:
|
||||
IgnorePatterns instance.
|
||||
"""
|
||||
ignore_config = config.get("ignore", {})
|
||||
use_defaults = ignore_config.get("use_defaults", True)
|
||||
ignore_file = ignore_config.get("file", ".ai-reviewignore")
|
||||
|
||||
instance = cls(
|
||||
repo_root=repo_root,
|
||||
ignore_file=ignore_file,
|
||||
use_defaults=use_defaults,
|
||||
)
|
||||
|
||||
# Add any extra patterns from config
|
||||
extra_patterns = ignore_config.get("patterns", [])
|
||||
for pattern in extra_patterns:
|
||||
instance._add_pattern(pattern)
|
||||
|
||||
return instance
|
||||
|
||||
|
||||
def get_ignore_patterns(repo_root: str | None = None) -> IgnorePatterns:
|
||||
"""Get ignore patterns for a repository.
|
||||
|
||||
Args:
|
||||
repo_root: Repository root path.
|
||||
|
||||
Returns:
|
||||
IgnorePatterns instance.
|
||||
"""
|
||||
return IgnorePatterns(repo_root=repo_root)
|
||||
|
||||
|
||||
def should_ignore_file(filename: str, repo_root: str | None = None) -> bool:
|
||||
"""Quick check if a file should be ignored.
|
||||
|
||||
Args:
|
||||
filename: File path to check.
|
||||
repo_root: Repository root path.
|
||||
|
||||
Returns:
|
||||
True if the file should be ignored.
|
||||
"""
|
||||
return get_ignore_patterns(repo_root).is_ignored(filename)
|
||||
Reference in New Issue
Block a user