| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is not valid SGF format |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Parse the content inside the outermost parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse a tree from its content string. |
| 70 | |
| 71 | Args: |
| 72 | content: The content of the tree (without outer parentheses) |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Skip the initial semicolon |
| 78 | content = content[1:] |
| 79 | |
| 80 | # Parse the root node |
| 81 | properties, remaining = _parse_properties(content) |
| 82 | |
| 83 | children = [] |
| 84 | |
| 85 | # Parse children |
| 86 | if remaining: |
| 87 | if remaining.startswith('('): |
| 88 | # Parse variations (multiple children) |
| 89 | while remaining and remaining.startswith('('): |
| 90 | subtree_content, remaining = _parse_subtree(remaining) |
| 91 | child = _parse_tree(subtree_content) |
| 92 | children.append(child) |
| 93 | elif remaining.startswith(';'): |
| 94 | # Parse sequential nodes (chain of single children) |
| 95 | current_children = children |
| 96 | current_node = None |
| 97 | |
| 98 | while remaining and remaining.startswith(';'): |
| 99 | # Parse a sequential node |
| 100 | remaining = remaining[1:] # Skip semicolon |
| 101 | child_properties, remaining = _parse_properties(remaining) |
| 102 | new_node = SgfTree(properties=child_properties, children=[]) |
| 103 | |
| 104 | if current_node is None: |
| 105 | # First sequential node - add to root's children |
| 106 | current_children.append(new_node) |
| 107 | else: |
| 108 | # Subsequent sequential nodes - add as child of previous node |
| 109 | current_node.children.append(new_node) |
| 110 | |
| 111 | current_node = new_node |
| 112 | |
| 113 | return SgfTree(properties=properties, children=children) |
| 114 | |
| 115 | |
| 116 | def _parse_properties(content: str) -> tuple[dict, str]: |
| 117 | """ |
| 118 | Parse properties from the content string. |
| 119 | |
| 120 | Args: |
| 121 | content: String containing properties |
| 122 | |
| 123 | Returns: |
| 124 | tuple: (properties_dict, remaining_content) |
| 125 | """ |
| 126 | properties = {} |
| 127 | remaining = content |
| 128 | |
| 129 | while remaining and remaining[0].isalpha(): |
| 130 | # Parse property key |
| 131 | key_end = 0 |
| 132 | while key_end < len(remaining) and remaining[key_end].isalpha(): |
| 133 | key_end += 1 |
| 134 | |
| 135 | key = remaining[:key_end] |
| 136 | |
| 137 | # Edge Case: Property key not in uppercase |
| 138 | if not key.isupper(): |
| 139 | raise ValueError("property must be in uppercase") |
| 140 | |
| 141 | remaining = remaining[key_end:] |
| 142 | |
| 143 | # Edge Case: Missing opening bracket for property value |
| 144 | if not remaining.startswith('['): |
| 145 | raise ValueError("properties without delimiter") |
| 146 | |
| 147 | # Parse all values for this key |
| 148 | values = [] |
| 149 | while remaining.startswith('['): |
| 150 | remaining = remaining[1:] # Skip opening bracket |
| 151 | value_end = 0 |
| 152 | |
| 153 | # Find the closing bracket, handling escaped brackets |
| 154 | while value_end < len(remaining): |
| 155 | if remaining[value_end] == ']': |
| 156 | # Count consecutive backslashes before this bracket |
| 157 | backslash_count = 0 |
| 158 | pos = value_end - 1 |
| 159 | while pos >= 0 and remaining[pos] == '\\': |
| 160 | backslash_count += 1 |
| 161 | pos -= 1 |
| 162 | |
| 163 | # If even number of backslashes, bracket is not escaped |
| 164 | # If odd number of backslashes, bracket is escaped |
| 165 | if backslash_count % 2 == 0: |
| 166 | break |
| 167 | value_end += 1 |
| 168 | |
| 169 | # Edge Case: Missing closing bracket |
| 170 | if value_end >= len(remaining): |
| 171 | raise ValueError("properties without delimiter") |
| 172 | |
| 173 | value = remaining[:value_end] |
| 174 | values.append(_unescape_text(value)) |
| 175 | remaining = remaining[value_end+1:] # Skip closing bracket and move to next |
| 176 | |
| 177 | properties[key] = values |
| 178 | |
| 179 | return properties, remaining |
| 180 | |
| 181 | |
| 182 | def _parse_subtree(content: str) -> tuple[str, str]: |
| 183 | """ |
| 184 | Parse a subtree from its content string. |
| 185 | |
| 186 | Args: |
| 187 | content: String starting with '(' |
| 188 | |
| 189 | Returns: |
| 190 | tuple: (subtree_content, remaining_content) |
| 191 | """ |
| 192 | # Edge Case: Not starting with '(' |
| 193 | if not content.startswith('('): |
| 194 | raise ValueError("tree missing") |
| 195 | |
| 196 | balance = 0 |
| 197 | i = 0 |
| 198 | |
| 199 | while i < len(content): |
| 200 | if content[i] == '(': |
| 201 | balance += 1 |
| 202 | elif content[i] == ')': |
| 203 | balance -= 1 |
| 204 | # Edge Case: Found matching closing parenthesis |
| 205 | if balance == 0: |
| 206 | return content[1:i], content[i+1:] |
| 207 | i += 1 |
| 208 | |
| 209 | # Edge Case: Unmatched parentheses |
| 210 | raise ValueError("tree missing") |
| 211 | |
| 212 | |
| 213 | def _unescape_text(text: str) -> str: |
| 214 | """ |
| 215 | Unescape SGF text according to SGF specification. |
| 216 | |
| 217 | Args: |
| 218 | text: The escaped text |
| 219 | |
| 220 | Returns: |
| 221 | str: The unescaped text |
| 222 | """ |
| 223 | result = [] |
| 224 | i = 0 |
| 225 | |
| 226 | while i < len(text): |
| 227 | if text[i] == '\\': |
| 228 | i += 1 |
| 229 | # Edge Case: Backslash at end of string |
| 230 | if i >= len(text): |
| 231 | result.append('\\') |
| 232 | break |
| 233 | |
| 234 | char = text[i] |
| 235 | # Handle newline removal after backslash |
| 236 | if char == '\n': |
| 237 | # Newline is removed if immediately after backslash |
| 238 | pass |
| 239 | # Convert other whitespace to space |
| 240 | elif char in ' \t\r': |
| 241 | result.append(' ') |
| 242 | else: |
| 243 | # Insert non-whitespace characters as-is |
| 244 | result.append(char) |
| 245 | else: |
| 246 | char = text[i] |
| 247 | # Convert whitespace (except newline) to space |
| 248 | if char in ' \t\r': |
| 249 | result.append(' ') |
| 250 | else: |
| 251 | result.append(char) |
| 252 | i += 1 |
| 253 | |
| 254 | return ''.join(result) |
| 255 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis, property key not in uppercase, missing opening bracket for property value, missing closing bracket, not starting with '(', unmatched parentheses, backslash at end of string |
| 256 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis |
| 257 | # Handled Edge Cases: Property key not in uppercase, missing opening bracket for property value, missing closing bracket |
| 258 | # Handled Edge Cases: Not starting with '(', unmatched parentheses |
| 259 | # Handled Edge Cases: Backslash at end of string |