| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF (Smart Game Format) string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the parsed SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is invalid SGF format |
| 40 | """ |
| 41 | # Edge Case: Empty input or doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Doesn't end with ')' |
| 46 | if not input_string.endswith(')'): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Remove the outer parentheses |
| 50 | content = input_string[1:-1] |
| 51 | |
| 52 | # Edge Case: Empty content after removing parentheses |
| 53 | if not content: |
| 54 | raise ValueError("tree with no nodes") |
| 55 | |
| 56 | # Parse the tree |
| 57 | tree, _ = _parse_tree(content, 0) |
| 58 | return tree |
| 59 | |
| 60 | |
| 61 | def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]: |
| 62 | """ |
| 63 | Parse a tree from the content string starting at the given index. |
| 64 | |
| 65 | Args: |
| 66 | content: The string content to parse |
| 67 | index: The starting index for parsing |
| 68 | |
| 69 | Returns: |
| 70 | tuple: A tuple containing the parsed SgfTree and the next index to parse |
| 71 | """ |
| 72 | # Edge Case: No semicolon to start node |
| 73 | if index >= len(content) or content[index] != ';': |
| 74 | raise ValueError("tree with no nodes") |
| 75 | |
| 76 | index += 1 # Skip the semicolon |
| 77 | |
| 78 | # Parse properties of the current node |
| 79 | properties = {} |
| 80 | while index < len(content) and content[index] not in ['(', ')', ';']: |
| 81 | prop, index = _parse_property(content, index) |
| 82 | key, values = prop |
| 83 | properties[key] = values |
| 84 | |
| 85 | # Parse children |
| 86 | children = [] |
| 87 | |
| 88 | # Handle children in parentheses |
| 89 | while index < len(content) and content[index] == '(': |
| 90 | # Parse a child tree |
| 91 | child, index = _parse_subtree(content, index) |
| 92 | children.append(child) |
| 93 | |
| 94 | # Handle sequential nodes (separated by semicolons) |
| 95 | while index < len(content) and content[index] == ';': |
| 96 | # Parse the next sequential node as a child |
| 97 | child, index = _parse_tree(content, index) |
| 98 | children.append(child) |
| 99 | |
| 100 | return SgfTree(properties, children), index |
| 101 | |
| 102 | |
| 103 | def _parse_subtree(content: str, index: int) -> tuple[SgfTree, int]: |
| 104 | """ |
| 105 | Parse a subtree from the content string starting at the given index. |
| 106 | |
| 107 | Args: |
| 108 | content: The string content to parse |
| 109 | index: The starting index for parsing |
| 110 | |
| 111 | Returns: |
| 112 | tuple: A tuple containing the parsed SgfTree and the next index to parse |
| 113 | """ |
| 114 | # Edge Case: No opening parenthesis |
| 115 | if index >= len(content) or content[index] != '(': |
| 116 | raise ValueError("tree missing") |
| 117 | |
| 118 | index += 1 # Skip the opening parenthesis |
| 119 | |
| 120 | # Parse the tree inside the parentheses |
| 121 | tree, index = _parse_tree(content, index) |
| 122 | |
| 123 | # Edge Case: No closing parenthesis |
| 124 | if index >= len(content) or content[index] != ')': |
| 125 | raise ValueError("tree missing") |
| 126 | |
| 127 | index += 1 # Skip the closing parenthesis |
| 128 | |
| 129 | return tree, index |
| 130 | |
| 131 | |
| 132 | def _parse_property(content: str, index: int) -> tuple[tuple[str, list[str]], int]: |
| 133 | """ |
| 134 | Parse a property from the content string starting at the given index. |
| 135 | |
| 136 | Args: |
| 137 | content: The string content to parse |
| 138 | index: The starting index for parsing |
| 139 | |
| 140 | Returns: |
| 141 | tuple: A tuple containing the parsed property (key, values) and the next index to parse |
| 142 | """ |
| 143 | # Parse the property key |
| 144 | key_start = index |
| 145 | while index < len(content) and content[index].isalpha(): |
| 146 | index += 1 |
| 147 | |
| 148 | key = content[key_start:index] |
| 149 | |
| 150 | # Edge Case: No key found |
| 151 | if index == key_start: |
| 152 | raise ValueError("properties without delimiter") |
| 153 | |
| 154 | # Edge Case: Property not in uppercase (takes precedence over delimiter error) |
| 155 | if not key.isupper(): |
| 156 | raise ValueError("property must be in uppercase") |
| 157 | |
| 158 | # Parse the property values |
| 159 | values = [] |
| 160 | |
| 161 | # Edge Case: No opening bracket after key |
| 162 | if index >= len(content) or content[index] != '[': |
| 163 | raise ValueError("properties without delimiter") |
| 164 | |
| 165 | while index < len(content) and content[index] == '[': |
| 166 | index += 1 # Skip the opening bracket |
| 167 | |
| 168 | # Parse the value |
| 169 | value, index = _parse_value(content, index) |
| 170 | values.append(value) |
| 171 | |
| 172 | # Edge Case: No closing bracket |
| 173 | if index >= len(content) or content[index] != ']': |
| 174 | raise ValueError("properties without delimiter") |
| 175 | |
| 176 | index += 1 # Skip the closing bracket |
| 177 | |
| 178 | return (key, values), index |
| 179 | |
| 180 | |
| 181 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 182 | """ |
| 183 | Parse a value from the content string starting at the given index. |
| 184 | This handles SGF Text type escaping rules. |
| 185 | |
| 186 | Args: |
| 187 | content: The string content to parse |
| 188 | index: The starting index for parsing |
| 189 | |
| 190 | Returns: |
| 191 | tuple: A tuple containing the parsed value and the next index to parse |
| 192 | """ |
| 193 | value = [] |
| 194 | while index < len(content) and content[index] != ']': |
| 195 | if content[index] == '\\': |
| 196 | # Handle escape character |
| 197 | index += 1 |
| 198 | |
| 199 | # Edge Case: Escape at end of content |
| 200 | if index >= len(content): |
| 201 | # Treat backslash at end as literal backslash |
| 202 | value.append('\\') |
| 203 | break |
| 204 | |
| 205 | char = content[index] |
| 206 | |
| 207 | # If next character is newline, remove it and replace with space |
| 208 | if char == '\n': |
| 209 | # Newline immediately after \ is removed and replaced with space |
| 210 | value.append(' ') |
| 211 | elif char.isspace(): |
| 212 | # Other whitespace characters after \ are converted to spaces |
| 213 | value.append(' ') |
| 214 | else: |
| 215 | # Any non-whitespace character after \ is inserted as-is |
| 216 | value.append('\\') |
| 217 | value.append(char) |
| 218 | elif content[index] == '\n': |
| 219 | # Newlines remain as newlines (unless escaped) |
| 220 | value.append('\n') |
| 221 | elif content[index].isspace() and content[index] != '\n': |
| 222 | # All whitespace characters other than newline are converted to spaces |
| 223 | value.append(' ') |
| 224 | else: |
| 225 | # Regular character |
| 226 | value.append(content[index]) |
| 227 | index += 1 |
| 228 | |
| 229 | return ''.join(value), index |
| 230 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, missing semicolon, property not in uppercase, no key found, no opening bracket after key, no closing bracket, escape at end of content |