| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse the content of a tree (without outer parentheses). |
| 70 | |
| 71 | Args: |
| 72 | content: The content of the tree |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Skip the initial semicolon |
| 78 | content = content[1:] |
| 79 | |
| 80 | # Parse properties of the current node |
| 81 | properties, remaining = _parse_properties(content) |
| 82 | |
| 83 | children = [] |
| 84 | |
| 85 | # Parse children |
| 86 | while remaining: |
| 87 | if remaining.startswith(';'): |
| 88 | # Parse a sequential child node |
| 89 | child_tree, remaining = _parse_sequential_node(remaining) |
| 90 | children.append(child_tree) |
| 91 | elif remaining.startswith('('): |
| 92 | # Parse a subtree (variation) |
| 93 | subtree, remaining = _parse_subtree(remaining) |
| 94 | children.append(subtree) |
| 95 | else: |
| 96 | # No more children |
| 97 | break |
| 98 | |
| 99 | return SgfTree(properties=properties, children=children) |
| 100 | |
| 101 | |
| 102 | def _parse_sequential_node(content: str) -> tuple[SgfTree, str]: |
| 103 | """ |
| 104 | Parse a sequential node (starting with semicolon) and return both the tree and remaining content. |
| 105 | |
| 106 | Args: |
| 107 | content: String starting with ';' |
| 108 | |
| 109 | Returns: |
| 110 | tuple: (parsed tree, remaining string) |
| 111 | """ |
| 112 | # Skip the initial semicolon |
| 113 | content = content[1:] |
| 114 | |
| 115 | # Parse properties of the current node |
| 116 | properties, remaining = _parse_properties(content) |
| 117 | |
| 118 | children = [] |
| 119 | |
| 120 | # Parse children |
| 121 | while remaining: |
| 122 | if remaining.startswith(';'): |
| 123 | # Parse a sequential child node |
| 124 | child_tree, remaining = _parse_sequential_node(remaining) |
| 125 | children.append(child_tree) |
| 126 | elif remaining.startswith('('): |
| 127 | # Parse a subtree (variation) |
| 128 | subtree, remaining = _parse_subtree(remaining) |
| 129 | children.append(subtree) |
| 130 | else: |
| 131 | # No more children |
| 132 | break |
| 133 | |
| 134 | return SgfTree(properties=properties, children=children), remaining |
| 135 | |
| 136 | |
| 137 | def _parse_properties(content: str) -> tuple[dict, str]: |
| 138 | """ |
| 139 | Parse properties from the content string. |
| 140 | |
| 141 | Args: |
| 142 | content: String containing properties |
| 143 | |
| 144 | Returns: |
| 145 | tuple: (properties dict, remaining string) |
| 146 | """ |
| 147 | properties = {} |
| 148 | remaining = content |
| 149 | |
| 150 | while remaining and remaining[0] not in '();': |
| 151 | # Parse key |
| 152 | key_end = 0 |
| 153 | while key_end < len(remaining) and remaining[key_end].isalpha() and remaining[key_end].isupper(): |
| 154 | key_end += 1 |
| 155 | |
| 156 | # Edge Case: Property key is not in uppercase |
| 157 | if key_end == 0: |
| 158 | raise ValueError("property must be in uppercase") |
| 159 | |
| 160 | key = remaining[:key_end] |
| 161 | remaining = remaining[key_end:] |
| 162 | |
| 163 | # Edge Case: Missing opening bracket for property value |
| 164 | if not remaining.startswith('['): |
| 165 | raise ValueError("properties without delimiter") |
| 166 | |
| 167 | # Parse all values for this key |
| 168 | values = [] |
| 169 | while remaining.startswith('['): |
| 170 | remaining = remaining[1:] # Skip opening bracket |
| 171 | value, remaining = _parse_value(remaining) |
| 172 | values.append(value) |
| 173 | |
| 174 | # Edge Case: Missing closing bracket |
| 175 | if not remaining.startswith(']'): |
| 176 | raise ValueError("properties without delimiter") |
| 177 | |
| 178 | remaining = remaining[1:] # Skip closing bracket |
| 179 | |
| 180 | properties[key] = values |
| 181 | |
| 182 | return properties, remaining |
| 183 | |
| 184 | |
| 185 | def _parse_value(content: str) -> tuple[str, str]: |
| 186 | """ |
| 187 | Parse a single property value according to SGF Text type rules. |
| 188 | |
| 189 | Args: |
| 190 | content: String containing the value (after opening bracket) |
| 191 | |
| 192 | Returns: |
| 193 | tuple: (parsed value, remaining string) |
| 194 | """ |
| 195 | value = [] |
| 196 | i = 0 |
| 197 | |
| 198 | # Parse until we find an unescaped closing bracket |
| 199 | while i < len(content): |
| 200 | if content[i] == ']': |
| 201 | # Count how many consecutive backslashes precede this bracket |
| 202 | backslash_count = 0 |
| 203 | j = i - 1 |
| 204 | while j >= 0 and content[j] == '\\': |
| 205 | backslash_count += 1 |
| 206 | j -= 1 |
| 207 | |
| 208 | # If even number of backslashes, the bracket is not escaped |
| 209 | # If odd number of backslashes, the bracket is escaped |
| 210 | if backslash_count % 2 == 0: |
| 211 | # Found an unescaped closing bracket |
| 212 | break |
| 213 | else: |
| 214 | # This bracket is escaped, treat it as a literal character |
| 215 | value.append(']') |
| 216 | i += 1 |
| 217 | continue |
| 218 | elif content[i] == '\\': # Escape character |
| 219 | i += 1 |
| 220 | if i >= len(content): |
| 221 | # Edge Case: Escape character at end of content |
| 222 | value.append('\\') |
| 223 | break |
| 224 | |
| 225 | char = content[i] |
| 226 | if char == '\n': |
| 227 | # Newline immediately after \ is removed |
| 228 | pass |
| 229 | elif char.isspace(): |
| 230 | # Whitespace after \ is converted to space, except \n which is removed |
| 231 | value.append(' ') |
| 232 | else: |
| 233 | # Any non-whitespace character after \ is inserted as-is |
| 234 | value.append(char) |
| 235 | elif content[i] == '\n': |
| 236 | # Newlines are kept as newlines |
| 237 | value.append('\n') |
| 238 | elif content[i].isspace() and content[i] != '\n': |
| 239 | # All whitespace other than newline is converted to space |
| 240 | value.append(' ') |
| 241 | else: |
| 242 | # Regular character |
| 243 | value.append(content[i]) |
| 244 | |
| 245 | i += 1 |
| 246 | |
| 247 | # Edge Case: Unclosed value (this should be caught earlier but just in case) |
| 248 | if i >= len(content): |
| 249 | raise ValueError("properties without delimiter") |
| 250 | |
| 251 | return ''.join(value), content[i:] |
| 252 | |
| 253 | |
| 254 | def _parse_subtree(content: str) -> tuple[SgfTree, str]: |
| 255 | """ |
| 256 | Parse a subtree enclosed in parentheses. |
| 257 | |
| 258 | Args: |
| 259 | content: String starting with '(' |
| 260 | |
| 261 | Returns: |
| 262 | tuple: (parsed subtree, remaining string) |
| 263 | """ |
| 264 | # Edge Case: Not starting with '(' |
| 265 | if not content.startswith('('): |
| 266 | raise ValueError("tree missing") |
| 267 | |
| 268 | # Find matching closing parenthesis |
| 269 | depth = 0 |
| 270 | i = 0 |
| 271 | |
| 272 | for i, char in enumerate(content): |
| 273 | if char == '(': |
| 274 | depth += 1 |
| 275 | elif char == ')': |
| 276 | depth -= 1 |
| 277 | if depth == 0: |
| 278 | # Found the matching closing parenthesis |
| 279 | subtree_content = content[1:i] # Exclude the parentheses |
| 280 | remaining = content[i+1:] |
| 281 | return _parse_tree(subtree_content), remaining |
| 282 | |
| 283 | # Edge Case: Unmatched parentheses |
| 284 | raise ValueError("tree missing") |
| 285 | |
| 286 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon after opening parenthesis, property key is not in uppercase, missing opening bracket for property value, missing closing bracket, escape character at end of content, unclosed value, not starting with '(', unmatched parentheses |