| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid SGF format |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse a tree from SGF content. |
| 70 | |
| 71 | Args: |
| 72 | content: SGF content without outer parentheses |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Skip the initial semicolon |
| 78 | index = 1 |
| 79 | |
| 80 | # Parse properties of the current node |
| 81 | properties, index = _parse_properties(content, index) |
| 82 | |
| 83 | # Parse children |
| 84 | children = [] |
| 85 | |
| 86 | # Handle both variations (parentheses) and shorthand notation (semicolons) |
| 87 | while index < len(content): |
| 88 | if content[index] == '(': |
| 89 | # Parse variation - child in parentheses |
| 90 | paren_count = 1 |
| 91 | start = index + 1 |
| 92 | index += 1 |
| 93 | |
| 94 | while index < len(content) and paren_count > 0: |
| 95 | if content[index] == '(': |
| 96 | paren_count += 1 |
| 97 | elif content[index] == ')': |
| 98 | paren_count -= 1 |
| 99 | index += 1 |
| 100 | |
| 101 | # Edge Case: Unmatched parenthesis |
| 102 | if paren_count != 0: |
| 103 | raise ValueError("tree missing") |
| 104 | |
| 105 | # Parse the child tree |
| 106 | child_content = content[start:index-1] |
| 107 | child_tree = _parse_tree(child_content) |
| 108 | children.append(child_tree) |
| 109 | elif content[index] == ';': |
| 110 | # Parse shorthand notation - sequential node |
| 111 | index += 1 # Skip semicolon |
| 112 | |
| 113 | # Parse properties of the child node |
| 114 | child_properties, index = _parse_properties(content, index) |
| 115 | |
| 116 | # Create child tree and recursively parse its children |
| 117 | child_tree = SgfTree(child_properties, []) |
| 118 | |
| 119 | # Parse any children of this child node |
| 120 | while index < len(content): |
| 121 | if content[index] == '(': |
| 122 | # Parse variation for this child |
| 123 | paren_count = 1 |
| 124 | start = index + 1 |
| 125 | index += 1 |
| 126 | |
| 127 | while index < len(content) and paren_count > 0: |
| 128 | if content[index] == '(': |
| 129 | paren_count += 1 |
| 130 | elif content[index] == ')': |
| 131 | paren_count -= 1 |
| 132 | index += 1 |
| 133 | |
| 134 | if paren_count != 0: |
| 135 | raise ValueError("tree missing") |
| 136 | |
| 137 | grandchild_content = content[start:index-1] |
| 138 | grandchild_tree = _parse_tree(grandchild_content) |
| 139 | child_tree.children.append(grandchild_tree) |
| 140 | elif content[index] == ';': |
| 141 | # Another sequential node - this becomes a child of the current child |
| 142 | index += 1 |
| 143 | grandchild_properties, index = _parse_properties(content, index) |
| 144 | grandchild_tree = SgfTree(grandchild_properties, []) |
| 145 | child_tree.children.append(grandchild_tree) |
| 146 | else: |
| 147 | break |
| 148 | |
| 149 | children.append(child_tree) |
| 150 | else: |
| 151 | # Invalid character |
| 152 | break |
| 153 | |
| 154 | return SgfTree(properties, children) |
| 155 | |
| 156 | |
| 157 | def _parse_properties(content: str, index: int) -> tuple[dict, int]: |
| 158 | """ |
| 159 | Parse properties from SGF content starting at index. |
| 160 | |
| 161 | Args: |
| 162 | content: SGF content |
| 163 | index: Starting index |
| 164 | |
| 165 | Returns: |
| 166 | tuple: (properties dict, new index) |
| 167 | """ |
| 168 | properties = {} |
| 169 | |
| 170 | # Parse all properties in this node (stop at semicolon, opening parenthesis, or end) |
| 171 | while index < len(content) and content[index] not in ';(': |
| 172 | # Parse key |
| 173 | if not content[index].isalpha(): |
| 174 | # If we hit a non-alphabetic character that's not a delimiter, it's an error |
| 175 | if content[index] not in ')': |
| 176 | raise ValueError("properties without delimiter") |
| 177 | break |
| 178 | |
| 179 | key_start = index |
| 180 | while index < len(content) and content[index].isalpha(): |
| 181 | index += 1 |
| 182 | |
| 183 | key = content[key_start:index] |
| 184 | |
| 185 | # Edge Case: Property key must be uppercase |
| 186 | if not key.isupper(): |
| 187 | raise ValueError("property must be in uppercase") |
| 188 | |
| 189 | # Edge Case: Missing opening bracket |
| 190 | if index >= len(content) or content[index] != '[': |
| 191 | raise ValueError("properties without delimiter") |
| 192 | |
| 193 | # Parse values |
| 194 | values = [] |
| 195 | while index < len(content) and content[index] == '[': |
| 196 | index += 1 # Skip opening bracket |
| 197 | |
| 198 | # Parse value |
| 199 | value, index = _parse_value(content, index) |
| 200 | |
| 201 | # Edge Case: Missing closing bracket |
| 202 | if index >= len(content) or content[index] != ']': |
| 203 | raise ValueError("properties without delimiter") |
| 204 | |
| 205 | index += 1 # Skip closing bracket |
| 206 | values.append(value) |
| 207 | |
| 208 | # Edge Case: No values for property |
| 209 | if not values: |
| 210 | raise ValueError("properties without delimiter") |
| 211 | |
| 212 | properties[key] = values |
| 213 | |
| 214 | return properties, index |
| 215 | |
| 216 | |
| 217 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 218 | """ |
| 219 | Parse a value according to SGF Text type rules. |
| 220 | |
| 221 | Args: |
| 222 | content: SGF content |
| 223 | index: Starting index |
| 224 | |
| 225 | Returns: |
| 226 | tuple: (parsed value, new index) |
| 227 | """ |
| 228 | value = [] |
| 229 | |
| 230 | while index < len(content) and content[index] != ']': |
| 231 | if content[index] == '\\': |
| 232 | # Handle escape character |
| 233 | index += 1 |
| 234 | |
| 235 | # Edge Case: Escape at end of content |
| 236 | if index >= len(content): |
| 237 | # Treat backslash at end as literal backslash |
| 238 | value.append('\\') |
| 239 | break |
| 240 | |
| 241 | char = content[index] |
| 242 | |
| 243 | # If next character is newline, remove the backslash and newline |
| 244 | if char == '\n': |
| 245 | # Remove the backslash-newline sequence |
| 246 | pass |
| 247 | else: |
| 248 | # Add the escaped character as-is |
| 249 | value.append(char) |
| 250 | |
| 251 | index += 1 |
| 252 | else: |
| 253 | char = content[index] |
| 254 | |
| 255 | # Convert whitespace (except newline) to spaces |
| 256 | if char != '\n' and char.isspace(): |
| 257 | value.append(' ') |
| 258 | else: |
| 259 | value.append(char) |
| 260 | |
| 261 | index += 1 |
| 262 | |
| 263 | return ''.join(value), index |
| 264 | |
| 265 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon, unmatched parenthesis, property key not uppercase, missing opening bracket, missing closing bracket, no values for property, escape at end of content |