| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Tree with no nodes (only semicolons) |
| 61 | if content.strip() == ';': |
| 62 | return SgfTree() |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse the content of a tree (without the outer parentheses). |
| 70 | """ |
| 71 | # Edge Case: Content is empty |
| 72 | if not content: |
| 73 | raise ValueError("tree with no nodes") |
| 74 | |
| 75 | nodes, remainder = _parse_nodes(content) |
| 76 | |
| 77 | # Edge Case: No nodes parsed |
| 78 | if not nodes: |
| 79 | raise ValueError("tree with no nodes") |
| 80 | |
| 81 | # Convert the list of nodes to a tree structure |
| 82 | # The first node is the root, and each subsequent node is a child of the previous one |
| 83 | # until we reach a node with children, then we go depth-first |
| 84 | return _build_tree(nodes) |
| 85 | |
| 86 | |
| 87 | def _parse_nodes(content: str) -> tuple[list, str]: |
| 88 | """ |
| 89 | Parse a sequence of nodes from the content. |
| 90 | Returns a list of (properties, children) tuples and the remainder of the content. |
| 91 | """ |
| 92 | nodes = [] |
| 93 | |
| 94 | while content: |
| 95 | if content.startswith(';'): |
| 96 | # Parse a single node |
| 97 | node, content = _parse_node(content[1:]) |
| 98 | nodes.append(node) |
| 99 | elif content.startswith('('): |
| 100 | # This is the start of children for the last node |
| 101 | # Edge Case: No nodes before children |
| 102 | if not nodes: |
| 103 | raise ValueError("tree missing") |
| 104 | |
| 105 | children = [] |
| 106 | while content.startswith('('): |
| 107 | # Parse a child tree |
| 108 | child_content, content = _parse_parenthesized_content(content) |
| 109 | child_nodes, _ = _parse_nodes(child_content[1:-1]) |
| 110 | |
| 111 | # Edge Case: Child tree has no nodes |
| 112 | if not child_nodes: |
| 113 | raise ValueError("tree with no nodes") |
| 114 | |
| 115 | children.append(_build_tree(child_nodes)) |
| 116 | |
| 117 | # Add children to the last node |
| 118 | nodes[-1] = (nodes[-1][0], children) |
| 119 | else: |
| 120 | # Edge Case: Unexpected character |
| 121 | break |
| 122 | |
| 123 | return nodes, content |
| 124 | |
| 125 | |
| 126 | def _parse_node(content: str) -> tuple[tuple, str]: |
| 127 | """ |
| 128 | Parse a single node (properties only, no children). |
| 129 | Returns a (properties, []) tuple and the remainder of the content. |
| 130 | """ |
| 131 | properties = {} |
| 132 | |
| 133 | while content and content[0].isalpha(): |
| 134 | # Parse a property |
| 135 | key, content = _parse_property_key(content) |
| 136 | values, content = _parse_property_values(content) |
| 137 | |
| 138 | # Edge Case: Property already exists |
| 139 | if key in properties: |
| 140 | # According to SGF spec, each key can only occur once |
| 141 | # But the problem statement says "each key can only occur once but may have multiple values" |
| 142 | # So we should extend the existing values list |
| 143 | properties[key].extend(values) |
| 144 | else: |
| 145 | properties[key] = values |
| 146 | |
| 147 | return (properties, []), content |
| 148 | |
| 149 | |
| 150 | def _parse_property_key(content: str) -> tuple[str, str]: |
| 151 | """ |
| 152 | Parse a property key. |
| 153 | Returns the key and the remainder of the content. |
| 154 | """ |
| 155 | key = '' |
| 156 | i = 0 |
| 157 | |
| 158 | # Parse the key |
| 159 | while i < len(content) and content[i].isalpha(): |
| 160 | key += content[i] |
| 161 | i += 1 |
| 162 | |
| 163 | # Edge Case: Key is empty |
| 164 | if not key: |
| 165 | raise ValueError("properties without delimiter") |
| 166 | |
| 167 | # Edge Case: Key is not in uppercase |
| 168 | if not key.isupper(): |
| 169 | raise ValueError("property must be in uppercase") |
| 170 | |
| 171 | return key, content[i:] |
| 172 | |
| 173 | |
| 174 | def _parse_property_values(content: str) -> tuple[list[str], str]: |
| 175 | """ |
| 176 | Parse property values. |
| 177 | Returns a list of values and the remainder of the content. |
| 178 | """ |
| 179 | values = [] |
| 180 | |
| 181 | # Edge Case: No opening bracket |
| 182 | if not content.startswith('['): |
| 183 | raise ValueError("properties without delimiter") |
| 184 | |
| 185 | # Parse the first value |
| 186 | value, content = _parse_property_value(content[1:]) |
| 187 | values.append(value) |
| 188 | |
| 189 | # Edge Case: No closing bracket |
| 190 | if not content.startswith(']'): |
| 191 | raise ValueError("properties without delimiter") |
| 192 | |
| 193 | content = content[1:] |
| 194 | |
| 195 | # Parse additional values if they exist (consecutive bracketed values) |
| 196 | while content.startswith('['): |
| 197 | value, content = _parse_property_value(content[1:]) |
| 198 | values.append(value) |
| 199 | |
| 200 | # Edge Case: No closing bracket |
| 201 | if not content.startswith(']'): |
| 202 | raise ValueError("properties without delimiter") |
| 203 | |
| 204 | content = content[1:] |
| 205 | |
| 206 | return values, content |
| 207 | |
| 208 | |
| 209 | def _parse_property_value(content: str) -> tuple[str, str]: |
| 210 | """ |
| 211 | Parse a single property value. |
| 212 | Returns the value and the remainder of the content. |
| 213 | """ |
| 214 | value = '' |
| 215 | i = 0 |
| 216 | |
| 217 | while i < len(content): |
| 218 | if content[i] == ']': |
| 219 | # End of value |
| 220 | break |
| 221 | elif content[i] == '\\': |
| 222 | # Escape character |
| 223 | i += 1 |
| 224 | # Edge Case: Escape at end of content |
| 225 | if i >= len(content): |
| 226 | value += '\\' |
| 227 | break |
| 228 | |
| 229 | char = content[i] |
| 230 | if char == '\n': |
| 231 | # Newline escaped - remove if immediately after backslash |
| 232 | # Skip the newline character entirely |
| 233 | pass |
| 234 | elif char == ']': |
| 235 | # Escaped closing bracket - treat as literal and end value |
| 236 | value += ']' |
| 237 | i += 1 |
| 238 | break |
| 239 | else: |
| 240 | # Any character after backslash is inserted as-is |
| 241 | value += char |
| 242 | elif content[i] == '\n': |
| 243 | # Newline - keep as is |
| 244 | value += content[i] |
| 245 | elif content[i].isspace() and content[i] != '\n': |
| 246 | # All whitespace other than newline becomes space |
| 247 | value += ' ' |
| 248 | else: |
| 249 | # Regular character |
| 250 | value += content[i] |
| 251 | i += 1 |
| 252 | |
| 253 | return value, content[i:] |
| 254 | |
| 255 | |
| 256 | def _parse_parenthesized_content(content: str) -> tuple[str, str]: |
| 257 | """ |
| 258 | Parse content within parentheses, including nested parentheses. |
| 259 | Returns the content within the parentheses and the remainder. |
| 260 | """ |
| 261 | # Edge Case: Doesn't start with '(' |
| 262 | if not content.startswith('('): |
| 263 | raise ValueError("tree missing") |
| 264 | |
| 265 | level = 0 |
| 266 | i = 0 |
| 267 | |
| 268 | while i < len(content): |
| 269 | if content[i] == '(': |
| 270 | level += 1 |
| 271 | elif content[i] == ')': |
| 272 | level -= 1 |
| 273 | if level == 0: |
| 274 | return content[:i+1], content[i+1:] |
| 275 | i += 1 |
| 276 | |
| 277 | # Edge Case: Unmatched parentheses |
| 278 | raise ValueError("tree missing") |
| 279 | |
| 280 | |
| 281 | def _build_tree(nodes: list) -> SgfTree: |
| 282 | """ |
| 283 | Build a tree from a list of nodes. |
| 284 | Each node is a (properties, children) tuple. |
| 285 | """ |
| 286 | # Edge Case: Empty nodes list |
| 287 | if not nodes: |
| 288 | raise ValueError("tree with no nodes") |
| 289 | |
| 290 | # The first node is the root |
| 291 | root_properties, root_children = nodes[0] |
| 292 | |
| 293 | # If there are more nodes, they form a chain of children |
| 294 | if len(nodes) > 1: |
| 295 | # Create the chain from back to front |
| 296 | current_tree = SgfTree(properties=nodes[-1][0], children=nodes[-1][1]) |
| 297 | |
| 298 | for i in range(len(nodes) - 2, 0, -1): |
| 299 | current_tree = SgfTree(properties=nodes[i][0], children=[current_tree] + nodes[i][1]) |
| 300 | |
| 301 | root_children = [current_tree] + root_children |
| 302 | |
| 303 | return SgfTree(properties=root_properties, children=root_children) |
| 304 | |
| 305 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree with no nodes (only semicolons), Content is empty, No nodes parsed, No nodes before children, Child tree has no nodes, Unexpected character, Key is not in uppercase, Key is empty, No opening bracket, No closing bracket, Escape at end of content, Doesn't start with '(', Unmatched parentheses, Empty nodes list |