| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is malformed or doesn't follow SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Parse the tree and get the result along with the remaining string |
| 50 | tree, remaining = _parse_tree(input_string) |
| 51 | |
| 52 | # Edge Case: There's remaining text after parsing the tree |
| 53 | if remaining: |
| 54 | raise ValueError("tree missing") |
| 55 | |
| 56 | return tree |
| 57 | |
| 58 | |
| 59 | def _parse_tree(s: str) -> tuple[SgfTree, str]: |
| 60 | """ |
| 61 | Parse a tree from the input string. |
| 62 | |
| 63 | Args: |
| 64 | s: Input string starting with '(' |
| 65 | |
| 66 | Returns: |
| 67 | tuple: (parsed SgfTree, remaining string) |
| 68 | """ |
| 69 | # Edge Case: Missing opening parenthesis |
| 70 | if not s.startswith('('): |
| 71 | raise ValueError("tree missing") |
| 72 | |
| 73 | s = s[1:] # Remove the opening '(' |
| 74 | |
| 75 | # Edge Case: Empty tree |
| 76 | if not s: |
| 77 | raise ValueError("tree with no nodes") |
| 78 | |
| 79 | # Parse the sequence of nodes |
| 80 | nodes = [] |
| 81 | while s and not s.startswith(')') and not s.startswith('('): |
| 82 | node, s = _parse_node(s) |
| 83 | nodes.append(node) |
| 84 | |
| 85 | # Edge Case: No nodes in tree |
| 86 | if not nodes: |
| 87 | raise ValueError("tree with no nodes") |
| 88 | |
| 89 | # Handle variations (children) |
| 90 | children = [] |
| 91 | while s and s.startswith('('): |
| 92 | child, s = _parse_tree(s) |
| 93 | children.append(child) |
| 94 | |
| 95 | # Edge Case: Missing closing parenthesis |
| 96 | if not s or not s.startswith(')'): |
| 97 | raise ValueError("tree missing") |
| 98 | |
| 99 | s = s[1:] # Remove the closing ')' |
| 100 | |
| 101 | # Build the tree structure - the first node is the root, others are its children chain |
| 102 | if not nodes: |
| 103 | raise ValueError("tree with no nodes") |
| 104 | |
| 105 | # Create the root node |
| 106 | root = nodes[0] |
| 107 | |
| 108 | # Chain the nodes - each node (except the last) has the next as its only child |
| 109 | current = root |
| 110 | for node in nodes[1:]: |
| 111 | current.children = [node] |
| 112 | current = node |
| 113 | |
| 114 | # Add variations to the root |
| 115 | if children: |
| 116 | root.children.extend(children) |
| 117 | |
| 118 | return root, s |
| 119 | |
| 120 | |
| 121 | def _parse_node(s: str) -> tuple[SgfTree, str]: |
| 122 | """ |
| 123 | Parse a single node from the input string. |
| 124 | |
| 125 | Args: |
| 126 | s: Input string starting with ';' |
| 127 | |
| 128 | Returns: |
| 129 | tuple: (parsed SgfTree node, remaining string) |
| 130 | """ |
| 131 | # Edge Case: Missing semicolon |
| 132 | if not s.startswith(';'): |
| 133 | raise ValueError("tree missing") |
| 134 | |
| 135 | s = s[1:] # Remove the semicolon |
| 136 | |
| 137 | properties = {} |
| 138 | |
| 139 | # Check if there are any properties |
| 140 | if s and s[0].isalpha(): |
| 141 | # If there's a character but it's not uppercase, it's an error |
| 142 | if not s[0].isupper(): |
| 143 | raise ValueError("property must be in uppercase") |
| 144 | |
| 145 | # Parse properties while we have uppercase letters |
| 146 | while s and s[0].isupper(): |
| 147 | key, values, s = _parse_property(s) |
| 148 | properties[key] = values |
| 149 | |
| 150 | # Edge Case: No properties in node (empty node) |
| 151 | if not properties and (not s or (s[0] != ')' and s[0] != '(' and s[0] != ';')): |
| 152 | # This is a bit tricky - we need to distinguish between an empty node and a node with children |
| 153 | # For now, let's check if we have a valid continuation |
| 154 | pass |
| 155 | |
| 156 | return SgfTree(properties=properties), s |
| 157 | |
| 158 | |
| 159 | def _parse_property(s: str) -> tuple[str, list[str], str]: |
| 160 | """ |
| 161 | Parse a property (key with one or more values) from the input string. |
| 162 | |
| 163 | Args: |
| 164 | s: Input string starting with an uppercase letter |
| 165 | |
| 166 | Returns: |
| 167 | tuple: (property key, list of property values, remaining string) |
| 168 | """ |
| 169 | # Edge Case: Property key is not uppercase |
| 170 | if not s or not s[0].isupper(): |
| 171 | raise ValueError("property must be in uppercase") |
| 172 | |
| 173 | # Parse the key (sequence of uppercase letters) |
| 174 | key_end = 0 |
| 175 | while key_end < len(s) and s[key_end].isupper(): |
| 176 | key_end += 1 |
| 177 | |
| 178 | key = s[:key_end] |
| 179 | s = s[key_end:] |
| 180 | |
| 181 | # Edge Case: Missing opening bracket for value |
| 182 | if not s or s[0] != '[': |
| 183 | raise ValueError("properties without delimiter") |
| 184 | |
| 185 | values = [] |
| 186 | |
| 187 | # Parse one or more values |
| 188 | while s and s.startswith('['): |
| 189 | value, s = _parse_value(s) |
| 190 | values.append(value) |
| 191 | |
| 192 | return key, values, s |
| 193 | |
| 194 | |
| 195 | def _parse_value(s: str) -> tuple[str, str]: |
| 196 | """ |
| 197 | Parse a single value from the input string. |
| 198 | |
| 199 | Args: |
| 200 | s: Input string starting with '[' |
| 201 | |
| 202 | Returns: |
| 203 | tuple: (parsed value, remaining string) |
| 204 | """ |
| 205 | # Edge Case: Missing opening bracket |
| 206 | if not s or s[0] != '[': |
| 207 | raise ValueError("properties without delimiter") |
| 208 | |
| 209 | s = s[1:] # Remove the opening '[' |
| 210 | |
| 211 | value = [] |
| 212 | i = 0 |
| 213 | |
| 214 | # Process characters until we find the closing bracket |
| 215 | while i < len(s): |
| 216 | char = s[i] |
| 217 | |
| 218 | if char == ']' and (i == 0 or s[i-1] != '\\'): |
| 219 | # Found the end of the value (only if not escaped) |
| 220 | break |
| 221 | elif char == '\\': |
| 222 | # Handle escape sequences |
| 223 | i += 1 |
| 224 | if i >= len(s): |
| 225 | # Edge Case: Escape character at end of string |
| 226 | raise ValueError("properties without delimiter") |
| 227 | |
| 228 | next_char = s[i] |
| 229 | if next_char == '\n': |
| 230 | # Newline immediately after backslash - remove it |
| 231 | pass |
| 232 | elif next_char in (' ', '\t'): |
| 233 | # Escaped whitespace - convert to space |
| 234 | value.append(' ') |
| 235 | else: |
| 236 | # Any other escaped character - insert as-is |
| 237 | value.append(next_char) |
| 238 | elif char in (' ', '\t'): |
| 239 | # Convert all whitespace (except newlines) to spaces |
| 240 | value.append(' ') |
| 241 | else: |
| 242 | # Regular character |
| 243 | value.append(char) |
| 244 | |
| 245 | i += 1 |
| 246 | |
| 247 | # Edge Case: Missing closing bracket |
| 248 | if i >= len(s) or s[i] != ']': |
| 249 | raise ValueError("properties without delimiter") |
| 250 | |
| 251 | # Join the value and return with the remaining string |
| 252 | return ''.join(value), s[i+1:] |
| 253 | |
| 254 | # Handled Edge Cases: Empty input, missing opening parenthesis, empty tree, no nodes in tree, missing closing parenthesis, missing semicolon, property key not uppercase, missing opening bracket for value, missing opening bracket, escape character at end of string, missing closing bracket |