| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is not valid SGF |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse a tree from its content string. |
| 70 | |
| 71 | Args: |
| 72 | content: The content of the tree without outer parentheses |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Skip the initial semicolon |
| 78 | content = content[1:] |
| 79 | |
| 80 | # Parse properties of the current node |
| 81 | properties, remaining = _parse_properties(content) |
| 82 | |
| 83 | children = [] |
| 84 | |
| 85 | # Parse children |
| 86 | while remaining: |
| 87 | if remaining.startswith('('): |
| 88 | # Parse a child tree |
| 89 | child_content, remaining = _extract_subtree(remaining) |
| 90 | child_tree = _parse_tree(child_content) |
| 91 | children.append(child_tree) |
| 92 | elif remaining.startswith(';'): |
| 93 | # Parse a sequence (shorthand for a single child) |
| 94 | # Find where this sequence ends |
| 95 | seq_end = _find_sequence_end(remaining) |
| 96 | sequence_content = remaining[:seq_end] |
| 97 | remaining = remaining[seq_end:] |
| 98 | child_tree = _parse_tree(sequence_content) |
| 99 | children.append(child_tree) |
| 100 | else: |
| 101 | # No more children |
| 102 | break |
| 103 | |
| 104 | return SgfTree(properties=properties, children=children) |
| 105 | |
| 106 | |
| 107 | def _parse_properties(content: str) -> tuple[dict, str]: |
| 108 | """ |
| 109 | Parse properties from the beginning of a content string. |
| 110 | |
| 111 | Args: |
| 112 | content: String starting with properties |
| 113 | |
| 114 | Returns: |
| 115 | tuple: (properties dict, remaining string) |
| 116 | """ |
| 117 | properties = {} |
| 118 | remaining = content |
| 119 | |
| 120 | while remaining and remaining[0].isalpha(): |
| 121 | # Parse key |
| 122 | key_end = 0 |
| 123 | while key_end < len(remaining) and remaining[key_end].isalpha(): |
| 124 | key_end += 1 |
| 125 | |
| 126 | # Edge Case: Key without values |
| 127 | if key_end >= len(remaining) or remaining[key_end] != '[': |
| 128 | raise ValueError("properties without delimiter") |
| 129 | |
| 130 | key = remaining[:key_end] |
| 131 | remaining = remaining[key_end:] |
| 132 | |
| 133 | # Edge Case: Key not in uppercase |
| 134 | if not key.isupper(): |
| 135 | raise ValueError("property must be in uppercase") |
| 136 | |
| 137 | # Parse values |
| 138 | values = [] |
| 139 | while remaining.startswith('['): |
| 140 | remaining = remaining[1:] # Skip opening bracket |
| 141 | value, remaining = _parse_value(remaining) |
| 142 | values.append(value) |
| 143 | |
| 144 | # Edge Case: Missing closing bracket |
| 145 | if not remaining.startswith(']'): |
| 146 | raise ValueError("properties without delimiter") |
| 147 | |
| 148 | remaining = remaining[1:] # Skip closing bracket |
| 149 | |
| 150 | # Check if there are more values for this key or if we should continue to next property |
| 151 | # If the next character is an uppercase letter, it's a new property |
| 152 | if remaining and remaining[0].isalpha() and remaining[0].isupper(): |
| 153 | break |
| 154 | |
| 155 | properties[key] = values |
| 156 | |
| 157 | return properties, remaining |
| 158 | |
| 159 | |
| 160 | def _parse_value(content: str) -> tuple[str, str]: |
| 161 | """ |
| 162 | Parse a value from SGF Text type. |
| 163 | |
| 164 | Args: |
| 165 | content: String starting with a value |
| 166 | |
| 167 | Returns: |
| 168 | tuple: (parsed value, remaining string) |
| 169 | """ |
| 170 | value = [] |
| 171 | i = 0 |
| 172 | |
| 173 | while i < len(content): |
| 174 | if content[i] == ']': |
| 175 | # Check if this ] is escaped |
| 176 | # Count consecutive backslashes before this position |
| 177 | backslash_count = 0 |
| 178 | j = i - 1 |
| 179 | while j >= 0 and content[j] == '\\': |
| 180 | backslash_count += 1 |
| 181 | j -= 1 |
| 182 | |
| 183 | # If even number of backslashes, then ] is not escaped |
| 184 | # If odd number of backslashes, then ] is escaped |
| 185 | if backslash_count % 2 == 0: |
| 186 | # End of value (unescaped closing bracket) |
| 187 | break |
| 188 | else: |
| 189 | # This ] is escaped, add it to the value |
| 190 | value.append(']') |
| 191 | elif content[i] == '\\': |
| 192 | # Handle escape character |
| 193 | i += 1 |
| 194 | if i >= len(content): |
| 195 | # Edge Case: Escape at end of value |
| 196 | raise ValueError("properties without delimiter") |
| 197 | |
| 198 | char = content[i] |
| 199 | # Check if the character after \ is whitespace |
| 200 | if char.isspace(): |
| 201 | if char == '\n': |
| 202 | # Newlines immediately after \ are removed |
| 203 | pass # Don't add anything |
| 204 | else: |
| 205 | # Other whitespace characters are converted to spaces |
| 206 | value.append(' ') |
| 207 | else: |
| 208 | # Any non-whitespace character after \ is inserted as-is |
| 209 | value.append(char) |
| 210 | elif content[i].isspace() and content[i] != '\n': |
| 211 | # All whitespace characters other than newline are converted to spaces |
| 212 | value.append(' ') |
| 213 | else: |
| 214 | value.append(content[i]) |
| 215 | i += 1 |
| 216 | |
| 217 | # Edge Case: Unclosed value |
| 218 | if i >= len(content): |
| 219 | raise ValueError("properties without delimiter") |
| 220 | |
| 221 | return ''.join(value), content[i:] |
| 222 | |
| 223 | |
| 224 | def _extract_subtree(content: str) -> tuple[str, str]: |
| 225 | """ |
| 226 | Extract a subtree from content starting with '('. |
| 227 | |
| 228 | Args: |
| 229 | content: String starting with '(' |
| 230 | |
| 231 | Returns: |
| 232 | tuple: (subtree content without outer parentheses, remaining string) |
| 233 | """ |
| 234 | # Edge Case: Doesn't start with '(' |
| 235 | if not content.startswith('('): |
| 236 | raise ValueError("tree missing") |
| 237 | |
| 238 | balance = 0 |
| 239 | i = 0 |
| 240 | |
| 241 | for i, char in enumerate(content): |
| 242 | if char == '(': |
| 243 | balance += 1 |
| 244 | elif char == ')': |
| 245 | balance -= 1 |
| 246 | if balance == 0: |
| 247 | # Found the matching closing parenthesis |
| 248 | return content[1:i], content[i+1:] |
| 249 | |
| 250 | # Edge Case: Unmatched parentheses |
| 251 | raise ValueError("tree missing") |
| 252 | |
| 253 | |
| 254 | def _find_sequence_end(content: str) -> int: |
| 255 | """ |
| 256 | Find where a sequence ends (before next '(' or end of string). |
| 257 | |
| 258 | Args: |
| 259 | content: String starting with ';' |
| 260 | |
| 261 | Returns: |
| 262 | int: Index where the sequence ends |
| 263 | """ |
| 264 | i = 1 # Skip the initial semicolon |
| 265 | |
| 266 | while i < len(content): |
| 267 | if content[i] == '(' or content[i] == ';': |
| 268 | # Found start of a child or another sequence |
| 269 | return i |
| 270 | i += 1 |
| 271 | |
| 272 | return i |
| 273 | |
| 274 | # Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis, Key without values, Key not in uppercase, Missing closing bracket, Escape at end of value, Unclosed value, Doesn't start with '(', Unmatched parentheses |
| 275 | # Handled Edge Cases: Key without values, Key not in uppercase, Missing closing bracket, Escape at end of value, Unclosed value |
| 276 | # Handled Edge Cases: Unclosed value |
| 277 | # Handled Edge Cases: Doesn't start with '(', Unmatched parentheses |
| 278 | # Handled Edge Cases: None in this simple implementation |