| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | if not input_string: |
| 30 | # Edge Case: Empty input string |
| 31 | raise ValueError("tree missing") |
| 32 | |
| 33 | if not input_string.startswith('(') or not input_string.endswith(')'): |
| 34 | # Edge Case: Input does not start with '(' or end with ')' |
| 35 | raise ValueError("tree missing") |
| 36 | |
| 37 | # Remove the outer parentheses |
| 38 | content = input_string[1:-1] |
| 39 | |
| 40 | if not content: |
| 41 | # Edge Case: Empty tree with no nodes |
| 42 | raise ValueError("tree with no nodes") |
| 43 | |
| 44 | # Parse the tree |
| 45 | tree, remaining = _parse_tree(content) |
| 46 | |
| 47 | if remaining: |
| 48 | # Edge Case: Unexpected content after parsing the tree |
| 49 | raise ValueError("tree missing") |
| 50 | |
| 51 | return tree |
| 52 | |
| 53 | |
| 54 | def _parse_tree(content: str) -> tuple[SgfTree, str]: |
| 55 | if not content.startswith(';'): |
| 56 | # Edge Case: Node must start with ';' |
| 57 | raise ValueError("tree missing") |
| 58 | |
| 59 | # Parse the first node |
| 60 | node, remaining = _parse_node(content[1:]) # Skip the ';' |
| 61 | |
| 62 | children = [] |
| 63 | |
| 64 | # Parse any children - both variations (in parentheses) and single-child sequences |
| 65 | while remaining: |
| 66 | if remaining.startswith('('): |
| 67 | # Find the matching parenthesis for the child tree |
| 68 | balance = 1 |
| 69 | index = 1 # Start after the initial '(' |
| 70 | while index < len(remaining) and balance > 0: |
| 71 | if remaining[index] == '(': |
| 72 | balance += 1 |
| 73 | elif remaining[index] == ')': |
| 74 | balance -= 1 |
| 75 | index += 1 |
| 76 | |
| 77 | if balance != 0: |
| 78 | # Edge Case: Mismatched parentheses |
| 79 | raise ValueError("tree missing") |
| 80 | |
| 81 | # Parse the child tree |
| 82 | child_content = remaining[1:index-1] # Exclude the outer parentheses |
| 83 | child_tree, child_remaining = _parse_tree(child_content) |
| 84 | |
| 85 | if child_remaining: |
| 86 | # Edge Case: Unexpected content in child tree |
| 87 | raise ValueError("tree missing") |
| 88 | |
| 89 | children.append(child_tree) |
| 90 | remaining = remaining[index:] # Move past the parsed child |
| 91 | elif remaining.startswith(';'): |
| 92 | # Single-child sequence |
| 93 | child_node, child_remaining = _parse_node(remaining[1:]) # Skip the ';' |
| 94 | child_tree = SgfTree(properties=child_node, children=[]) |
| 95 | children.append(child_tree) |
| 96 | remaining = child_remaining |
| 97 | else: |
| 98 | # Unexpected content |
| 99 | break |
| 100 | |
| 101 | return SgfTree(properties=node, children=children), remaining |
| 102 | |
| 103 | |
| 104 | def _parse_node(content: str) -> tuple[dict, str]: |
| 105 | properties = {} |
| 106 | remaining = content |
| 107 | |
| 108 | while remaining and remaining[0] != ')' and remaining[0] != '(': |
| 109 | # Parse a property |
| 110 | prop, remaining = _parse_property(remaining) |
| 111 | key, values = prop |
| 112 | |
| 113 | if key in properties: |
| 114 | # Edge Case: Duplicate property key - accumulate values |
| 115 | properties[key].extend(values) |
| 116 | else: |
| 117 | properties[key] = values |
| 118 | |
| 119 | return properties, remaining |
| 120 | |
| 121 | |
| 122 | def _parse_property(content: str) -> tuple[tuple[str, list[str]], str]: |
| 123 | # Debug print |
| 124 | print(f"_parse_property called with content: {repr(content)}") |
| 125 | |
| 126 | # Parse the key |
| 127 | key_end = 0 |
| 128 | while key_end < len(content) and content[key_end].isalpha(): |
| 129 | key_end += 1 |
| 130 | |
| 131 | if key_end == 0: |
| 132 | # Edge Case: Property key is missing |
| 133 | print(f"Property key is missing in content: {repr(content)}") |
| 134 | raise ValueError("properties without delimiter") |
| 135 | |
| 136 | key = content[:key_end] |
| 137 | |
| 138 | if not key.isupper(): |
| 139 | # Edge Case: Property key is not in uppercase |
| 140 | raise ValueError("property must be in uppercase") |
| 141 | |
| 142 | remaining = content[key_end:] |
| 143 | |
| 144 | if not remaining.startswith('['): |
| 145 | # Edge Case: Property value delimiter '[' is missing |
| 146 | raise ValueError("properties without delimiter") |
| 147 | |
| 148 | # Parse the values |
| 149 | values = [] |
| 150 | remaining = remaining[1:] # Skip the '[' |
| 151 | |
| 152 | while True: |
| 153 | # Parse a single value |
| 154 | value, remaining = _parse_value(remaining) |
| 155 | values.append(value) |
| 156 | |
| 157 | if not remaining.startswith(']'): |
| 158 | # Edge Case: Property value delimiter ']' is missing |
| 159 | raise ValueError("properties without delimiter") |
| 160 | |
| 161 | remaining = remaining[1:] # Skip the ']' |
| 162 | |
| 163 | # Check if there are more values for this property |
| 164 | if not remaining.startswith('['): |
| 165 | break |
| 166 | |
| 167 | remaining = remaining[1:] # Skip the '[' |
| 168 | |
| 169 | return (key, values), remaining |
| 170 | |
| 171 | |
| 172 | def _parse_value(content: str) -> tuple[str, str]: |
| 173 | value = [] |
| 174 | i = 0 |
| 175 | |
| 176 | while i < len(content): |
| 177 | char = content[i] |
| 178 | |
| 179 | if char == '\\': |
| 180 | # Handle escape sequences |
| 181 | i += 1 |
| 182 | if i >= len(content): |
| 183 | # Edge Case: Escape character at end of value |
| 184 | raise ValueError("properties without delimiter") |
| 185 | |
| 186 | next_char = content[i] |
| 187 | |
| 188 | # According to SGF spec: |
| 189 | # - Any non-whitespace character after \ is inserted as-is |
| 190 | # - Any whitespace character after \ follows the normal rules |
| 191 | if next_char in ' \t\r\n': |
| 192 | # Whitespace after escape - follows normal whitespace rules |
| 193 | if next_char == '\n' and i > 0 and content[i-1] == '\\': |
| 194 | # Newline immediately after \ should be removed |
| 195 | pass |
| 196 | else: |
| 197 | # Apply normal whitespace rules |
| 198 | if next_char in ' \t\r': |
| 199 | value.append(' ') |
| 200 | else: # newline |
| 201 | value.append(next_char) |
| 202 | else: |
| 203 | # Any non-whitespace character after \ is inserted as-is |
| 204 | value.append(next_char) |
| 205 | elif char == ']': |
| 206 | # End of value |
| 207 | break |
| 208 | elif char in ' \t\r': |
| 209 | # Convert whitespace (except newline) to space |
| 210 | value.append(' ') |
| 211 | elif char == '\n': |
| 212 | # Newlines are kept unless immediately after \ |
| 213 | value.append(char) |
| 214 | else: |
| 215 | # Regular character |
| 216 | value.append(char) |
| 217 | |
| 218 | i += 1 |
| 219 | |
| 220 | if i >= len(content): |
| 221 | # Edge Case: End of content reached without finding ']' |
| 222 | raise ValueError("properties without delimiter") |
| 223 | |
| 224 | return ''.join(value), content[i:] |
| 225 | |
| 226 | # Handled Edge Cases: Empty input string, Input does not start with '(' or end with ')', Empty tree with no nodes, Unexpected content after parsing the tree, Node must start with ';', Mismatched parentheses, Unexpected content in child tree, Duplicate property key, Property key is missing, Property key is not in uppercase, Property value delimiter '[' is missing, Property value delimiter ']' is missing, Escape character at end of value, End of content reached without finding ']' |