| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse the content of a tree (without the outer parentheses). |
| 70 | |
| 71 | Args: |
| 72 | content: The content of the tree |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Parse properties of the root node (starting at index 0, which should be a semicolon) |
| 78 | properties, index = _parse_properties(content, 0) |
| 79 | |
| 80 | # Parse children |
| 81 | children = [] |
| 82 | while index < len(content): |
| 83 | if content[index] == '(': # Start of a child tree |
| 84 | # Find the matching closing parenthesis |
| 85 | paren_count = 1 |
| 86 | start = index |
| 87 | index += 1 |
| 88 | while index < len(content) and paren_count > 0: |
| 89 | if content[index] == '(': |
| 90 | paren_count += 1 |
| 91 | elif content[index] == ')': |
| 92 | paren_count -= 1 |
| 93 | index += 1 |
| 94 | |
| 95 | # Parse the child tree |
| 96 | child_content = content[start:index] |
| 97 | children.append(parse(child_content)) |
| 98 | elif content[index] == ';': |
| 99 | # This should be the start of another node in the same tree |
| 100 | # Parse its properties |
| 101 | child_properties, new_index = _parse_properties(content, index) |
| 102 | # Create a child node with these properties |
| 103 | # For consecutive nodes, we need to chain them as parent-child relationships |
| 104 | child_node = SgfTree(properties=child_properties, children=[]) |
| 105 | |
| 106 | # If we already have children, we need to append this as a child of the last child |
| 107 | # Otherwise, add it as a direct child |
| 108 | if children: |
| 109 | # Find the deepest child in the chain and add this as its child |
| 110 | last_child = children[-1] |
| 111 | while last_child.children: |
| 112 | last_child = last_child.children[-1] |
| 113 | last_child.children.append(child_node) |
| 114 | else: |
| 115 | children.append(child_node) |
| 116 | index = new_index |
| 117 | else: |
| 118 | # Skip any other characters |
| 119 | index += 1 |
| 120 | |
| 121 | return SgfTree(properties=properties, children=children) |
| 122 | |
| 123 | |
| 124 | def _parse_properties(content: str, index: int) -> tuple[dict, int]: |
| 125 | """ |
| 126 | Parse properties from the content starting at index. |
| 127 | |
| 128 | Args: |
| 129 | content: The content to parse |
| 130 | index: The starting index |
| 131 | |
| 132 | Returns: |
| 133 | tuple: A tuple of (properties dict, new index) |
| 134 | """ |
| 135 | properties = {} |
| 136 | |
| 137 | # Expect a semicolon at the start |
| 138 | if index >= len(content) or content[index] != ';': |
| 139 | return properties, index |
| 140 | |
| 141 | index += 1 # Skip the semicolon |
| 142 | |
| 143 | # Parse all properties for this node |
| 144 | while index < len(content) and content[index].isalpha(): |
| 145 | # Parse key |
| 146 | key_start = index |
| 147 | while index < len(content) and content[index].isalpha(): |
| 148 | index += 1 |
| 149 | key = content[key_start:index] |
| 150 | |
| 151 | # Edge Case: Property not in uppercase |
| 152 | if not key.isupper(): |
| 153 | raise ValueError("property must be in uppercase") |
| 154 | |
| 155 | # Parse values |
| 156 | values = [] |
| 157 | |
| 158 | # Edge Case: Missing opening bracket |
| 159 | if index >= len(content) or content[index] != '[': |
| 160 | raise ValueError("properties without delimiter") |
| 161 | |
| 162 | while index < len(content) and content[index] == '[': |
| 163 | index += 1 # Skip the opening bracket |
| 164 | |
| 165 | # Parse value |
| 166 | value, index = _parse_value(content, index) |
| 167 | |
| 168 | # Edge Case: Missing closing bracket |
| 169 | if index >= len(content) or content[index] != ']': |
| 170 | raise ValueError("properties without delimiter") |
| 171 | |
| 172 | index += 1 # Skip the closing bracket |
| 173 | values.append(value) |
| 174 | |
| 175 | properties[key] = values |
| 176 | |
| 177 | return properties, index |
| 178 | |
| 179 | |
| 180 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 181 | """ |
| 182 | Parse a value from the content starting at index. |
| 183 | |
| 184 | Args: |
| 185 | content: The content to parse |
| 186 | index: The starting index |
| 187 | |
| 188 | Returns: |
| 189 | tuple: A tuple of (parsed value, new index) |
| 190 | """ |
| 191 | value = [] |
| 192 | |
| 193 | while index < len(content): |
| 194 | if content[index] == ']': # End of value |
| 195 | break |
| 196 | elif content[index] == '\\': # Escape character |
| 197 | index += 1 |
| 198 | if index < len(content): |
| 199 | # According to SGF spec: |
| 200 | # - Newlines are removed if they come immediately after a \ |
| 201 | # - All whitespace characters other than newline are converted to spaces |
| 202 | # - Any non-whitespace character after \ is inserted as-is |
| 203 | char = content[index] |
| 204 | if char == '\n': # Newline immediately after backslash |
| 205 | # Remove the newline (it's already processed by this point in Python) |
| 206 | pass |
| 207 | elif char.isspace(): # Other whitespace characters |
| 208 | value.append(' ') |
| 209 | else: # Non-whitespace character (including n, r, t) |
| 210 | value.append(char) |
| 211 | index += 1 |
| 212 | else: |
| 213 | # Edge Case: Escape character at end of content |
| 214 | value.append('\\') |
| 215 | else: |
| 216 | # Regular character |
| 217 | # According to SGF spec: |
| 218 | # - All whitespace characters other than newline are converted to spaces |
| 219 | char = content[index] |
| 220 | if char != '\n' and char.isspace(): |
| 221 | value.append(' ') |
| 222 | else: |
| 223 | value.append(char) |
| 224 | index += 1 |
| 225 | |
| 226 | return ''.join(value), index |
| 227 | |
| 228 | return ''.join(value), index |
| 229 | # Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis, Property not in uppercase, Missing opening bracket, Missing closing bracket, Escape character at end of content |
| 230 | # Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis |
| 231 | # Handled Edge Cases: Property not in uppercase, Missing opening bracket, Missing closing bracket |
| 232 | # Handled Edge Cases: Escape character at end of content |