Files
GuardDen/tests/test_automod_security.py
latte 831eed8dbc
Some checks failed
CI/CD Pipeline / Code Quality Checks (push) Failing after 6m9s
CI/CD Pipeline / Security Scanning (push) Successful in 26s
CI/CD Pipeline / Tests (3.11) (push) Failing after 5m24s
CI/CD Pipeline / Tests (3.12) (push) Failing after 5m23s
CI/CD Pipeline / Build Docker Image (push) Has been skipped
CI/CD Pipeline / Deploy to Staging (push) Has been skipped
CI/CD Pipeline / Deploy to Production (push) Has been skipped
CI/CD Pipeline / Notification (push) Successful in 1s
quick commit
2026-01-17 20:24:43 +01:00

210 lines
8.0 KiB
Python

"""Tests for automod security improvements."""
import pytest
from guardden.services.automod import normalize_domain, URL_PATTERN
class TestDomainNormalization:
"""Test domain normalization security improvements."""
def test_normalize_domain_valid(self):
"""Test normalization of valid domains."""
test_cases = [
("example.com", "example.com"),
("www.example.com", "example.com"),
("http://example.com", "example.com"),
("https://www.example.com", "example.com"),
("EXAMPLE.COM", "example.com"),
("Example.Com", "example.com"),
]
for input_domain, expected in test_cases:
result = normalize_domain(input_domain)
assert result == expected
def test_normalize_domain_security_filters(self):
"""Test that malicious domains are filtered out."""
malicious_domains = [
"example.com\x00", # null byte
"example.com\n", # newline
"example.com\r", # carriage return
"example.com\t", # tab
"example.com\x01", # control character
"example com", # space in hostname
"", # empty string
" ", # space only
"a" * 2001, # excessively long
None, # None value
123, # non-string value
]
for malicious_domain in malicious_domains:
result = normalize_domain(malicious_domain)
assert result == "" # Should return empty string for invalid input
def test_normalize_domain_length_limits(self):
"""Test that domain length limits are enforced."""
# Test exactly at the limit
valid_long_domain = "a" * 249 + ".com" # 253 chars total (RFC limit)
result = normalize_domain(valid_long_domain)
assert result != "" # Should be valid
# Test over the limit
invalid_long_domain = "a" * 250 + ".com" # 254 chars total (over RFC limit)
result = normalize_domain(invalid_long_domain)
assert result == "" # Should be invalid
def test_normalize_domain_malformed_urls(self):
"""Test handling of malformed URLs."""
malformed_urls = [
"http://", # incomplete URL
"://example.com", # missing scheme
"http:///example.com", # extra slash
"http://example..com", # double dot
"http://.example.com", # leading dot
"http://example.com.", # trailing dot
"ftp://example.com", # non-http scheme (should still work)
]
for malformed_url in malformed_urls:
result = normalize_domain(malformed_url)
# Should either return valid domain or empty string
assert isinstance(result, str)
def test_normalize_domain_injection_attempts(self):
"""Test that domain normalization prevents injection."""
injection_attempts = [
"example.com'; DROP TABLE guilds; --",
"example.com UNION SELECT * FROM users",
"example.com\"><script>alert('xss')</script>",
"example.com\\x00\\x01\\x02",
"example.com\n\rmalicious",
]
for attempt in injection_attempts:
result = normalize_domain(attempt)
# Should either return a safe domain or empty string
if result:
assert "script" not in result
assert "DROP" not in result
assert "UNION" not in result
assert "\x00" not in result
assert "\n" not in result
assert "\r" not in result
class TestUrlPatternSecurity:
"""Test URL pattern security improvements."""
def test_url_pattern_matches_valid_urls(self):
"""Test that URL pattern matches legitimate URLs."""
valid_urls = [
"https://example.com",
"http://www.example.org",
"https://subdomain.example.net",
"http://example.io/path/to/resource",
"https://example.com/path?query=value",
"www.example.com",
"example.gg",
]
for url in valid_urls:
matches = URL_PATTERN.findall(url)
assert len(matches) >= 1, f"Failed to match valid URL: {url}"
def test_url_pattern_rejects_malicious_patterns(self):
"""Test that URL pattern doesn't match malicious patterns."""
# These should not be matched as URLs
non_urls = [
"javascript:alert('xss')",
"data:text/html,<script>alert('xss')</script>",
"file:///etc/passwd",
"ftp://anonymous@server",
"mailto:user@example.com",
]
for non_url in non_urls:
matches = URL_PATTERN.findall(non_url)
# Should not match these protocols
assert len(matches) == 0 or not any("javascript:" in match for match in matches)
def test_url_pattern_handles_edge_cases(self):
"""Test URL pattern with edge cases."""
edge_cases = [
"http://" + "a" * 300 + ".com", # very long domain
"https://example.com" + "a" * 2000, # very long path
"https://192.168.1.1", # IP address (should not match)
"https://[::1]", # IPv6 (should not match)
"https://ex-ample.com", # hyphenated domain
"https://example.123", # numeric TLD (should not match)
]
for edge_case in edge_cases:
matches = URL_PATTERN.findall(edge_case)
# Should handle gracefully (either match or not, but no crashes)
assert isinstance(matches, list)
class TestAutomodIntegration:
"""Test automod integration with security improvements."""
def test_url_processing_security(self):
"""Test that URL processing handles malicious input safely."""
from guardden.services.automod import detect_scam_links
# Mock allowlist and suspicious TLDs for testing
allowlist = ["trusted.com", "example.org"]
# Test with malicious URLs
malicious_content = [
"Check out this link: https://evil.tk/steal-your-data",
"Visit http://phishing.ml/discord-nitro-free",
"Go to https://scam" + "." * 100 + "tk", # excessive dots
"Link: https://example.com" + "x" * 5000, # excessively long
]
for content in malicious_content:
# Should not crash and should return appropriate result
result = detect_scam_links(content, allowlist)
assert result is None or hasattr(result, 'should_delete')
def test_domain_allowlist_security(self):
"""Test that domain allowlist checking is secure."""
from guardden.services.automod import is_allowed_domain
# Test with malicious allowlist entries
malicious_allowlist = {
"good.com",
"evil.com\x00", # null byte
"bad.com\n", # newline
"trusted.org",
}
test_domains = [
"good.com",
"evil.com",
"bad.com",
"trusted.org",
"unknown.com",
]
for domain in test_domains:
# Should not crash
result = is_allowed_domain(domain, malicious_allowlist)
assert isinstance(result, bool)
def test_regex_pattern_safety(self):
"""Test that regex patterns are processed safely."""
# This tests the circuit breaker functionality (when implemented)
malicious_patterns = [
"(.+)+", # catastrophic backtracking
"a" * 1000, # very long pattern
"(?:a|a)*", # another backtracking pattern
"[" + "a-z" * 100 + "]", # excessive character class
]
for pattern in malicious_patterns:
# Should not cause infinite loops or crashes
# This is a placeholder for when circuit breakers are implemented
assert len(pattern) > 0 # Just ensure we're testing something