| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Tree missing semicolon |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse the content of a tree (without the outer parentheses). |
| 70 | |
| 71 | Args: |
| 72 | content: The content of the tree |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Skip the initial semicolon |
| 78 | content = content[1:] |
| 79 | |
| 80 | # Parse properties of the current node |
| 81 | properties, remaining = _parse_properties(content) |
| 82 | |
| 83 | children = [] |
| 84 | |
| 85 | # Parse children - can be either semicolon nodes or parenthesized variations |
| 86 | while remaining: |
| 87 | if remaining.startswith(';'): |
| 88 | # Parse a child node created by semicolon |
| 89 | child_tree = _parse_tree(remaining) |
| 90 | children.append(child_tree) |
| 91 | remaining = "" # All remaining content is consumed by the child |
| 92 | elif remaining.startswith('('): |
| 93 | # Parse a variation in parentheses |
| 94 | # Find the matching parenthesis for this child |
| 95 | balance = 1 |
| 96 | i = 1 |
| 97 | while i < len(remaining) and balance > 0: |
| 98 | if remaining[i] == '(': |
| 99 | balance += 1 |
| 100 | elif remaining[i] == ')': |
| 101 | balance -= 1 |
| 102 | i += 1 |
| 103 | |
| 104 | # Edge Case: Unmatched parenthesis |
| 105 | if balance != 0: |
| 106 | raise ValueError("tree missing") |
| 107 | |
| 108 | # Parse the child tree |
| 109 | child_content = remaining[1:i-1] |
| 110 | child_tree = _parse_tree(child_content) |
| 111 | children.append(child_tree) |
| 112 | |
| 113 | # Move to the next potential child |
| 114 | remaining = remaining[i:] |
| 115 | else: |
| 116 | # No more children |
| 117 | break |
| 118 | |
| 119 | return SgfTree(properties, children) |
| 120 | |
| 121 | |
| 122 | def _parse_properties(content: str) -> tuple[dict, str]: |
| 123 | """ |
| 124 | Parse properties from the content string. |
| 125 | |
| 126 | Args: |
| 127 | content: String containing properties |
| 128 | |
| 129 | Returns: |
| 130 | tuple: (properties dictionary, remaining string) |
| 131 | """ |
| 132 | properties = {} |
| 133 | remaining = content |
| 134 | |
| 135 | while remaining and remaining[0].isalpha(): |
| 136 | # Parse key |
| 137 | key_end = 0 |
| 138 | while key_end < len(remaining) and remaining[key_end].isalpha(): |
| 139 | key_end += 1 |
| 140 | |
| 141 | key = remaining[:key_end] |
| 142 | |
| 143 | # Edge Case: Property key is not uppercase |
| 144 | if key != key.upper(): |
| 145 | raise ValueError("property must be in uppercase") |
| 146 | |
| 147 | remaining = remaining[key_end:] |
| 148 | |
| 149 | # Edge Case: Missing opening bracket |
| 150 | if not remaining.startswith('['): |
| 151 | raise ValueError("properties without delimiter") |
| 152 | |
| 153 | # Parse values |
| 154 | values = [] |
| 155 | while remaining.startswith('['): |
| 156 | remaining = remaining[1:] # Skip opening bracket |
| 157 | |
| 158 | # Parse value |
| 159 | value, remaining = _parse_value(remaining) |
| 160 | |
| 161 | # Edge Case: Missing closing bracket |
| 162 | if not remaining.startswith(']'): |
| 163 | raise ValueError("properties without delimiter") |
| 164 | |
| 165 | remaining = remaining[1:] # Skip closing bracket |
| 166 | values.append(value) |
| 167 | |
| 168 | properties[key] = values |
| 169 | |
| 170 | return properties, remaining |
| 171 | |
| 172 | |
| 173 | def _parse_value(content: str) -> tuple[str, str]: |
| 174 | """ |
| 175 | Parse a value from the content string. |
| 176 | |
| 177 | Args: |
| 178 | content: String containing a value |
| 179 | |
| 180 | Returns: |
| 181 | tuple: (parsed value, remaining string) |
| 182 | """ |
| 183 | value = [] |
| 184 | i = 0 |
| 185 | |
| 186 | while i < len(content): |
| 187 | if content[i] == ']': |
| 188 | # End of value |
| 189 | break |
| 190 | |
| 191 | if content[i] == '\\': |
| 192 | # Handle escape character |
| 193 | i += 1 |
| 194 | # Edge Case: Escape at end of value |
| 195 | if i >= len(content): |
| 196 | raise ValueError("properties without delimiter") |
| 197 | |
| 198 | char = content[i] |
| 199 | |
| 200 | # If next character is newline, remove the backslash and newline |
| 201 | if char == '\n': |
| 202 | # Do nothing - remove the backslash and newline |
| 203 | pass |
| 204 | elif char.isspace(): |
| 205 | # Any whitespace character after \ follows the normal rules |
| 206 | # All whitespace characters other than newline are converted to spaces |
| 207 | value.append(' ') |
| 208 | else: |
| 209 | # Any non-whitespace character after \ is inserted as-is |
| 210 | value.append(char) |
| 211 | else: |
| 212 | char = content[i] |
| 213 | if char == '\n': |
| 214 | # Newlines remain as newlines |
| 215 | value.append(char) |
| 216 | elif char.isspace() and char != '\n': |
| 217 | # All whitespace characters other than newline are converted to spaces |
| 218 | value.append(' ') |
| 219 | else: |
| 220 | value.append(char) |
| 221 | i += 1 |
| 222 | |
| 223 | return ''.join(value), content[i:] |
| 224 | |
| 225 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree missing semicolon, Unmatched parenthesis, Property key is not uppercase, Missing opening bracket, Missing closing bracket, Escape at end of value |