| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input or input that doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input that doesn't end with ')' |
| 46 | if not input_string.endswith(')'): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Remove the outer parentheses |
| 50 | content = input_string[1:-1] |
| 51 | |
| 52 | # Edge Case: Empty content after removing parentheses |
| 53 | if not content: |
| 54 | raise ValueError("tree with no nodes") |
| 55 | |
| 56 | # Parse the tree |
| 57 | tree, remaining = _parse_tree(content) |
| 58 | |
| 59 | # Edge Case: If there's remaining content after parsing, it's invalid |
| 60 | if remaining: |
| 61 | raise ValueError("tree missing") |
| 62 | |
| 63 | return tree |
| 64 | |
| 65 | |
| 66 | def _parse_tree(content: str) -> tuple[SgfTree, str]: |
| 67 | """ |
| 68 | Parse a tree from the content string. |
| 69 | |
| 70 | Args: |
| 71 | content: String content to parse |
| 72 | |
| 73 | Returns: |
| 74 | tuple: (SgfTree, remaining_content) |
| 75 | """ |
| 76 | # Edge Case: Content doesn't start with ';' |
| 77 | if not content.startswith(';'): |
| 78 | raise ValueError("tree with no nodes") |
| 79 | |
| 80 | content = content[1:] # Remove the ';' |
| 81 | |
| 82 | # Parse properties |
| 83 | properties, content = _parse_properties(content) |
| 84 | |
| 85 | children = [] |
| 86 | |
| 87 | # Parse children - handle both sequential nodes (;) and variations (()) |
| 88 | while content: |
| 89 | if content.startswith('('): |
| 90 | # Handle variations in parentheses |
| 91 | child, content = _parse_tree_in_parentheses(content) |
| 92 | children.append(child) |
| 93 | elif content.startswith(';'): |
| 94 | # Handle sequential nodes |
| 95 | child, content = _parse_tree(content) |
| 96 | children.append(child) |
| 97 | else: |
| 98 | # No more children |
| 99 | break |
| 100 | |
| 101 | return SgfTree(properties=properties, children=children), content |
| 102 | |
| 103 | |
| 104 | def _parse_tree_in_parentheses(content: str) -> tuple[SgfTree, str]: |
| 105 | """ |
| 106 | Parse a tree that's enclosed in parentheses. |
| 107 | |
| 108 | Args: |
| 109 | content: String content starting with '(' |
| 110 | |
| 111 | Returns: |
| 112 | tuple: (SgfTree, remaining_content) |
| 113 | """ |
| 114 | # Edge Case: Content doesn't start with '(' |
| 115 | if not content.startswith('('): |
| 116 | raise ValueError("tree missing") |
| 117 | |
| 118 | content = content[1:] # Remove the '(' |
| 119 | tree, remaining = _parse_tree(content) |
| 120 | |
| 121 | # Edge Case: No closing parenthesis |
| 122 | if not remaining.startswith(')'): |
| 123 | raise ValueError("tree missing") |
| 124 | |
| 125 | return tree, remaining[1:] # Remove the ')' |
| 126 | |
| 127 | |
| 128 | def _parse_properties(content: str) -> tuple[dict, str]: |
| 129 | """ |
| 130 | Parse properties from the content string. |
| 131 | |
| 132 | Args: |
| 133 | content: String content to parse properties from |
| 134 | |
| 135 | Returns: |
| 136 | tuple: (properties_dict, remaining_content) |
| 137 | """ |
| 138 | properties = {} |
| 139 | |
| 140 | while content and content[0].isalpha(): |
| 141 | # Parse key |
| 142 | key_end = 0 |
| 143 | while key_end < len(content) and content[key_end].isalpha(): |
| 144 | key_end += 1 |
| 145 | |
| 146 | key = content[:key_end] |
| 147 | content = content[key_end:] |
| 148 | |
| 149 | # Edge Case: Property key is not uppercase |
| 150 | if key != key.upper(): |
| 151 | raise ValueError("property must be in uppercase") |
| 152 | |
| 153 | # Edge Case: No values for the property |
| 154 | if not content.startswith('['): |
| 155 | raise ValueError("properties without delimiter") |
| 156 | |
| 157 | values = [] |
| 158 | # Parse values |
| 159 | while content.startswith('['): |
| 160 | content = content[1:] # Remove the '[' |
| 161 | value, content = _parse_value(content) |
| 162 | |
| 163 | # Edge Case: No closing ']' for value |
| 164 | if not content.startswith(']'): |
| 165 | raise ValueError("properties without delimiter") |
| 166 | |
| 167 | content = content[1:] # Remove the ']' |
| 168 | values.append(value) |
| 169 | |
| 170 | properties[key] = values |
| 171 | |
| 172 | return properties, content |
| 173 | |
| 174 | |
| 175 | def _parse_value(content: str) -> tuple[str, str]: |
| 176 | """ |
| 177 | Parse a value from the content string, handling SGF text escaping rules. |
| 178 | |
| 179 | Args: |
| 180 | content: String content to parse value from |
| 181 | |
| 182 | Returns: |
| 183 | tuple: (parsed_value, remaining_content) |
| 184 | """ |
| 185 | result = [] |
| 186 | i = 0 |
| 187 | |
| 188 | while i < len(content): |
| 189 | char = content[i] |
| 190 | |
| 191 | if char == ']': |
| 192 | # End of value |
| 193 | break |
| 194 | elif char == '\\': |
| 195 | # Escape character |
| 196 | i += 1 |
| 197 | if i >= len(content): |
| 198 | # Edge Case: Escape character at end of content |
| 199 | result.append('\\') |
| 200 | break |
| 201 | |
| 202 | next_char = content[i] |
| 203 | if next_char == '\n': |
| 204 | # Newline immediately after \ is removed |
| 205 | # But if there's content before and after, add a space |
| 206 | if result and i + 1 < len(content) and content[i + 1] != ']': |
| 207 | result.append(' ') |
| 208 | elif next_char.isspace() and next_char != '\n': |
| 209 | # Other whitespace after \ is converted to space |
| 210 | result.append(' ') |
| 211 | else: |
| 212 | # Any non-whitespace character after \ is inserted as-is |
| 213 | result.append(next_char) |
| 214 | elif char.isspace() and char != '\n': |
| 215 | # All whitespace characters other than newline are converted to spaces |
| 216 | result.append(' ') |
| 217 | elif char == '\n': |
| 218 | # Newlines remain as newlines unless immediately after \ |
| 219 | result.append(char) |
| 220 | else: |
| 221 | result.append(char) |
| 222 | |
| 223 | i += 1 |
| 224 | |
| 225 | return ''.join(result), content[i:] |
| 226 | |
| 227 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, missing semicolon, property key not uppercase, missing property delimiter, missing closing bracket for value, escape character at end of content |
| 228 | # Handled Edge Cases: Missing opening parenthesis for child tree, missing closing parenthesis for child tree, remaining content after parsing |
| 229 | # Handled Edge Cases: Property key not uppercase, missing property delimiter, missing closing bracket for value |
| 230 | # Handled Edge Cases: Escape character at end of content, special handling for \n, whitespace conversion rules, newlines removal after \ |