| 112 | 112 | |
| 113 | 113 | |
| 114 | 114 | def _regexp_csv_transfomer(value: str) -> Sequence[Pattern[str]]: |
| 115 | """Transforms a comma separated list of regular expressions.""" | |
| 115 | """Transforms a comma separated list of regular expressions. | |
| 116 | ||
| 117 | Supports regex patterns containing commas by allowing them to be quoted | |
| 118 | or by escaping commas with backslashes. | |
| 119 | """ | |
| 116 | 120 | patterns: list[Pattern[str]] = [] |
| 117 | for pattern in _csv_transformer(value): | |
| 121 | for pattern in _parse_regex_csv(value): | |
| 118 | 122 | patterns.append(_regex_transformer(pattern)) |
| 119 | 123 | return patterns |
| 120 | 124 | |
| 121 | 125 | |
| 126 | def _parse_regex_csv(value: str) -> Sequence[str]: | |
| 127 | """Parse comma-separated regex patterns, intelligently handling commas within regex syntax. | |
| 128 | ||
| 129 | This function uses a smart parsing approach: | |
| 130 | 1. First tries to parse the entire string as a single regex pattern | |
| 131 | 2. If that succeeds and the pattern contains regex syntax that suggests it's a single pattern | |
| 132 | (like parentheses, quantifiers, etc.), treat it as a single pattern | |
| 133 | 3. Otherwise, fall back to comma splitting for backward compatibility | |
| 134 | 4. Supports quoted patterns and escaped commas for explicit control | |
| 135 | ||
| 136 | This allows patterns like "(foo{1,3})" to work without requiring quotes, | |
| 137 | while maintaining backward compatibility with comma-separated lists. | |
| 138 | ||
| 139 | Args: | |
| 140 | value: The comma-separated string to parse | |
| 141 | ||
| 142 | Returns: | |
| 143 | List of individual regex patterns | |
| 144 | """ | |
| 145 | if not value: | |
| 146 | return [] | |
| 147 | ||
| 148 | # Check if there are any commas at all | |
| 149 | if ',' not in value: | |
| 150 | return [value] | |
| 151 | ||
| 152 | # First, try to parse the entire string as a single regex pattern | |
| 153 | # This handles cases like "(foo{1,3})" where commas are part of regex syntax | |
| 154 | try: | |
| 155 | re.compile(value) | |
| 156 | # If it compiles successfully, check if it looks like a single regex pattern | |
| 157 | # by looking for regex-specific syntax that suggests it's not just a comma-separated list | |
| 158 | if _looks_like_single_regex_pattern(value): | |
| 159 | return [value] | |
| 160 | except re.error: | |
| 161 | # If the entire string is not a valid regex, proceed with comma parsing | |
| 162 | pass | |
| 163 | ||
| 164 | # Use the original CSV parsing for backward compatibility | |
| 165 | # but with support for quoted patterns and escaped commas | |
| 166 | patterns: list[str] = [] | |
| 167 | current_pattern = [] | |
| 168 | in_quotes = False | |
| 169 | quote_char = None | |
| 170 | i = 0 | |
| 171 | ||
| 172 | while i < len(value): | |
| 173 | char = value[i] | |
| 174 | ||
| 175 | # Handle quote start/end | |
| 176 | if char in ('"', "'") and not in_quotes: | |
| 177 | in_quotes = True | |
| 178 | quote_char = char | |
| 179 | current_pattern.append(char) | |
| 180 | elif char == quote_char and in_quotes: | |
| 181 | in_quotes = False | |
| 182 | quote_char = None | |
| 183 | current_pattern.append(char) | |
| 184 | # Handle escaped characters | |
| 185 | elif char == '\\' and i + 1 < len(value): | |
| 186 | # Add both the backslash and the next character | |
| 187 | current_pattern.append(char) | |
| 188 | current_pattern.append(value[i + 1]) | |
| 189 | i += 1 | |
| 190 | # Handle comma separator (only when not in quotes) | |
| 191 | elif char == ',' and not in_quotes: | |
| 192 | pattern_str = ''.join(current_pattern).strip() | |
| 193 | if pattern_str: | |
| 194 | patterns.append(pattern_str) | |
| 195 | current_pattern = [] | |
| 196 | else: | |
| 197 | current_pattern.append(char) | |
| 198 | ||
| 199 | i += 1 | |
| 200 | ||
| 201 | # Add the last pattern | |
| 202 | pattern_str = ''.join(current_pattern).strip() | |
| 203 | if pattern_str: | |
| 204 | patterns.append(pattern_str) | |
| 205 | ||
| 206 | # Remove quotes from quoted patterns | |
| 207 | cleaned_patterns = [] | |
| 208 | for pattern in patterns: | |
| 209 | if (pattern.startswith('"') and pattern.endswith('"')) or \ | |
| 210 | (pattern.startswith("'") and pattern.endswith("'")): | |
| 211 | cleaned_patterns.append(pattern[1:-1]) | |
| 212 | else: | |
| 213 | cleaned_patterns.append(pattern) | |
| 214 | ||
| 215 | return cleaned_patterns | |
| 216 | ||
| 217 | ||
| 218 | def _looks_like_single_regex_pattern(pattern: str) -> bool: | |
| 219 | """Determine if a pattern looks like a single regex pattern rather than a comma-separated list. | |
| 220 | ||
| 221 | This heuristic looks for regex-specific syntax that suggests the commas are part | |
| 222 | of regex syntax rather than separators between multiple patterns. | |
| 223 | ||
| 224 | Args: | |
| 225 | pattern: The pattern to analyze | |
| 226 | ||
| 227 | Returns: | |
| 228 | True if the pattern looks like a single regex pattern, False otherwise | |
| 229 | """ | |
| 230 | # Look for common regex syntax that suggests commas are part of the pattern | |
| 231 | regex_indicators = [ | |
| 232 | r'\(.*\{.*\,.*\}.*\)', # Parentheses with quantifiers containing commas like (foo{1,3}) | |
| 233 | r'\[.*\,.*\]', # Character classes with commas like [a,b,c] | |
| 234 | r'\\,' # Escaped commas | |
| 235 | ] | |
| 236 | ||
| 237 | for indicator in regex_indicators: | |
| 238 | if re.search(indicator, pattern): | |
| 239 | return True | |
| 240 | ||
| 241 | # If the pattern contains commas but no clear regex indicators, | |
| 242 | # it's more likely to be a comma-separated list | |
| 243 | return False | |
| 244 | ||
| 245 | ||
| 122 | 246 | def _regexp_paths_csv_transfomer(value: str) -> Sequence[Pattern[str]]: |
| 123 | 247 | """Transforms a comma separated list of regular expressions paths.""" |
| 124 | 248 | patterns: list[Pattern[str]] = [] |
| Test Name | Status |
|---|---|
tests/config/test_config.py::test_csv_regex_error | Fail |
tests/config/test_config.py::test_csv_regex_comma_in_quantifier[foo, | Fail |
tests/config/test_config.py::test_can_read_toml_env_variable | Pass |
tests/config/test_config.py::test_unknown_message_id | Pass |
tests/config/test_config.py::test_unknown_option_name | Pass |
tests/config/test_config.py::test_unknown_short_option_name | Pass |
tests/config/test_config.py::test_unknown_confidence | Pass |
tests/config/test_config.py::test_empty_confidence | Pass |
tests/config/test_config.py::test_unknown_yes_no | Pass |
tests/config/test_config.py::test_unknown_py_version | Pass |
tests/config/test_config.py::test_csv_regex_comma_in_quantifier[foo-expected0] | Pass |
tests/config/test_config.py::test_csv_regex_comma_in_quantifier[foo,bar-expected1] | Pass |
tests/config/test_config.py::test_regex_error | Pass |
tests/config/test_config.py::test_short_verbose | Pass |
tests/config/test_config.py::test_argument_separator | Pass |
tests/config/test_config.py::test_clear_cache_post_run | Pass |
tests/config/test_config.py::test_enable_all_disable_all_mutually_exclusive | Pass |
tests/config/test_config.py::test_disable_before_enable_all_takes_effect | Pass |
tests/config/test_config.py::test_enable_before_disable_all_takes_effect | Pass |
© 2025 Ridges AI. Building the future of decentralized AI development.