| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree structure |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is malformed or doesn't follow SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Parse the tree and return the result |
| 50 | tree, remaining = _parse_tree(input_string) |
| 51 | |
| 52 | # Edge Case: Unexpected characters after the tree |
| 53 | if remaining: |
| 54 | raise ValueError("tree missing") |
| 55 | |
| 56 | return tree |
| 57 | |
| 58 | |
| 59 | def _parse_tree(s: str) -> tuple[SgfTree, str]: |
| 60 | """ |
| 61 | Parse a tree from the input string. |
| 62 | |
| 63 | Args: |
| 64 | s: The input string starting with '(' |
| 65 | |
| 66 | Returns: |
| 67 | tuple: A tuple containing the parsed SgfTree and the remaining string |
| 68 | |
| 69 | Raises: |
| 70 | ValueError: If the tree structure is invalid |
| 71 | """ |
| 72 | # Edge Case: Missing opening parenthesis |
| 73 | if not s.startswith('('): |
| 74 | raise ValueError("tree missing") |
| 75 | |
| 76 | s = s[1:] # Remove the opening parenthesis |
| 77 | |
| 78 | # Edge Case: Empty tree |
| 79 | if not s: |
| 80 | raise ValueError("tree with no nodes") |
| 81 | |
| 82 | # Parse the sequence of nodes |
| 83 | nodes = [] |
| 84 | while s and s[0] != ')': |
| 85 | node, s = _parse_node(s) |
| 86 | nodes.append(node) |
| 87 | |
| 88 | # Edge Case: No nodes in the tree |
| 89 | if not nodes: |
| 90 | raise ValueError("tree with no nodes") |
| 91 | |
| 92 | # Edge Case: Missing closing parenthesis |
| 93 | if not s: |
| 94 | raise ValueError("tree missing") |
| 95 | |
| 96 | s = s[1:] # Remove the closing parenthesis |
| 97 | |
| 98 | # Build the tree structure from the nodes |
| 99 | # The first node is the root, and each subsequent node is a child of the previous one |
| 100 | # unless we encounter variations |
| 101 | |
| 102 | # Handle variations - if there are multiple top-level nodes, they are variations |
| 103 | # But in a simple sequence, each node (except the first) is a child of the previous |
| 104 | |
| 105 | # For now, we'll build a simple chain |
| 106 | # The first node is the root |
| 107 | root = nodes[0] |
| 108 | current = root |
| 109 | |
| 110 | # Each subsequent node becomes a child of the previous one |
| 111 | for node in nodes[1:]: |
| 112 | current.children = [node] |
| 113 | current = node |
| 114 | |
| 115 | # Handle variations - if after the first node we have parentheses, they are variations |
| 116 | # This is handled in _parse_node when we encounter '(' |
| 117 | |
| 118 | return root, s |
| 119 | |
| 120 | |
| 121 | def _parse_node(s: str) -> tuple[SgfTree, str]: |
| 122 | """ |
| 123 | Parse a node from the input string. |
| 124 | |
| 125 | Args: |
| 126 | s: The input string starting with ';' or '(' |
| 127 | |
| 128 | Returns: |
| 129 | tuple: A tuple containing the parsed SgfTree node and the remaining string |
| 130 | """ |
| 131 | # Edge Case: Missing semicolon or opening parenthesis for a new node/variation |
| 132 | if not s or (s[0] != ';' and s[0] != '('): |
| 133 | raise ValueError("tree missing") |
| 134 | |
| 135 | # If it's a variation (starts with '('), parse it as a tree |
| 136 | if s[0] == '(': |
| 137 | tree, s = _parse_tree(s) |
| 138 | return tree, s |
| 139 | |
| 140 | s = s[1:] # Remove the semicolon |
| 141 | |
| 142 | properties = {} |
| 143 | children = [] |
| 144 | |
| 145 | # Parse properties |
| 146 | while s and s[0] != ')' and s[0] != ';' and s[0] != '(': |
| 147 | key, values, s = _parse_property(s) |
| 148 | |
| 149 | # Edge Case: Property not in uppercase |
| 150 | if not key.isupper(): |
| 151 | raise ValueError("property must be in uppercase") |
| 152 | |
| 153 | properties[key] = values |
| 154 | |
| 155 | # Parse children (variations) |
| 156 | while s and (s[0] == '(' or s[0] == ';'): |
| 157 | if s[0] == '(': |
| 158 | # This is a variation |
| 159 | child, s = _parse_tree(s) |
| 160 | children.append(child) |
| 161 | else: |
| 162 | # This is a sequential node |
| 163 | child, s = _parse_node(s) |
| 164 | children.append(child) |
| 165 | |
| 166 | return SgfTree(properties, children), s |
| 167 | |
| 168 | |
| 169 | def _parse_property(s: str) -> tuple[str, list[str], str]: |
| 170 | """ |
| 171 | Parse a property (key with one or more values) from the input string. |
| 172 | |
| 173 | Args: |
| 174 | s: The input string starting with a property key |
| 175 | |
| 176 | Returns: |
| 177 | tuple: A tuple containing the property key, list of values, and the remaining string |
| 178 | |
| 179 | Raises: |
| 180 | ValueError: If the property format is invalid |
| 181 | """ |
| 182 | # Parse the key (sequence of uppercase letters) |
| 183 | key = '' |
| 184 | i = 0 |
| 185 | |
| 186 | # Edge Case: Missing key |
| 187 | if not s or not s[0].isalpha() or not s[0].isupper(): |
| 188 | raise ValueError("property must be in uppercase") |
| 189 | |
| 190 | while i < len(s) and s[i].isalpha() and s[i].isupper(): |
| 191 | key += s[i] |
| 192 | i += 1 |
| 193 | |
| 194 | # Edge Case: Missing key |
| 195 | if not key: |
| 196 | raise ValueError("property must be in uppercase") |
| 197 | |
| 198 | values = [] |
| 199 | |
| 200 | # Parse values |
| 201 | while i < len(s) and s[i] == '[': |
| 202 | value, i = _parse_value(s, i) |
| 203 | values.append(value) |
| 204 | |
| 205 | # Edge Case: No values for the property |
| 206 | if not values: |
| 207 | raise ValueError("properties without delimiter") |
| 208 | |
| 209 | return key, values, s[i:] |
| 210 | |
| 211 | |
| 212 | def _parse_value(s: str, start: int) -> tuple[str, int]: |
| 213 | """ |
| 214 | Parse a value (text within brackets) from the input string. |
| 215 | |
| 216 | Args: |
| 217 | s: The input string |
| 218 | start: The starting position (should be at '[') |
| 219 | |
| 220 | Returns: |
| 221 | tuple: A tuple containing the parsed value and the position after the closing ']' |
| 222 | |
| 223 | Raises: |
| 224 | ValueError: If the value format is invalid |
| 225 | """ |
| 226 | # Edge Case: Missing opening bracket |
| 227 | if s[start] != '[': |
| 228 | raise ValueError("properties without delimiter") |
| 229 | |
| 230 | i = start + 1 |
| 231 | value = '' |
| 232 | |
| 233 | while i < len(s): |
| 234 | if s[i] == ']': |
| 235 | # Found closing bracket, end of value |
| 236 | break |
| 237 | elif s[i] == '\\': |
| 238 | # Handle escape sequences |
| 239 | i += 1 |
| 240 | # Edge Case: Escape at end of string |
| 241 | if i >= len(s): |
| 242 | raise ValueError("properties without delimiter") |
| 243 | |
| 244 | char = s[i] |
| 245 | |
| 246 | # If the next character is a newline immediately after backslash, remove it |
| 247 | if char == '\n': |
| 248 | # Newlines are removed if they come immediately after a `\` |
| 249 | pass |
| 250 | else: |
| 251 | # All other characters after \ are inserted as-is |
| 252 | # All whitespace characters other than newline are converted to spaces |
| 253 | if char.isspace() and char != '\n': |
| 254 | value += ' ' |
| 255 | else: |
| 256 | value += char |
| 257 | else: |
| 258 | # Regular character |
| 259 | char = s[i] |
| 260 | # All whitespace characters other than newline are converted to spaces |
| 261 | if char.isspace() and char != '\n': |
| 262 | value += ' ' |
| 263 | else: |
| 264 | value += char |
| 265 | i += 1 |
| 266 | |
| 267 | # Edge Case: Missing closing bracket |
| 268 | if i >= len(s) or s[i] != ']': |
| 269 | raise ValueError("properties without delimiter") |
| 270 | |
| 271 | return value, i + 1 |
| 272 | |
| 273 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, no nodes in tree, missing semicolon, property not in uppercase, missing key, no values for property, missing opening bracket, missing closing bracket, escape at end of string |