| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | if not input_string: |
| 30 | raise ValueError("tree missing") |
| 31 | |
| 32 | if not input_string.startswith('(') or not input_string.endswith(')'): |
| 33 | raise ValueError("tree missing") |
| 34 | |
| 35 | # Edge Case: Empty tree with just parentheses |
| 36 | if input_string == '()': |
| 37 | raise ValueError("tree with no nodes") |
| 38 | |
| 39 | # Remove the outer parentheses |
| 40 | content = input_string[1:-1] |
| 41 | |
| 42 | if not content: |
| 43 | raise ValueError("tree with no nodes") |
| 44 | |
| 45 | return _parse_tree(content) |
| 46 | |
| 47 | |
| 48 | def _parse_tree(content: str) -> SgfTree: |
| 49 | if not content.startswith(';'): |
| 50 | raise ValueError("tree missing") |
| 51 | |
| 52 | # Find the first node and any subsequent children |
| 53 | nodes = _split_nodes(content) |
| 54 | |
| 55 | if not nodes: |
| 56 | raise ValueError("tree with no nodes") |
| 57 | |
| 58 | # Parse the first node (root) |
| 59 | root_properties = _parse_properties(nodes[0]) |
| 60 | children = [] |
| 61 | |
| 62 | # Parse any children |
| 63 | for node in nodes[1:]: |
| 64 | if node.startswith('('): |
| 65 | # This is a subtree |
| 66 | children.append(_parse_tree(node[1:-1])) |
| 67 | else: |
| 68 | # This is a single node |
| 69 | child_properties = _parse_properties(node) |
| 70 | children.append(SgfTree(properties=child_properties)) |
| 71 | |
| 72 | return SgfTree(properties=root_properties, children=children) |
| 73 | |
| 74 | |
| 75 | def _split_nodes(content: str) -> list: |
| 76 | nodes = [] |
| 77 | i = 0 |
| 78 | content_len = len(content) |
| 79 | while i < content_len: |
| 80 | if content[i] == ';': |
| 81 | # Start of a node |
| 82 | start = i |
| 83 | i += 1 |
| 84 | # Read until we hit another ; or ( or end of string |
| 85 | while i < content_len and content[i] not in ';(': |
| 86 | if content[i] == '[': |
| 87 | # Skip over property values |
| 88 | i += 1 |
| 89 | bracket_count = 1 |
| 90 | while i < content_len and bracket_count > 0: |
| 91 | if content[i] == '\\': |
| 92 | i += 2 # Skip escaped character |
| 93 | elif content[i] == '[': |
| 94 | bracket_count += 1 |
| 95 | i += 1 |
| 96 | elif content[i] == ']': |
| 97 | bracket_count -= 1 |
| 98 | i += 1 |
| 99 | else: |
| 100 | i += 1 |
| 101 | else: |
| 102 | i += 1 |
| 103 | nodes.append(content[start:i]) |
| 104 | elif content[i] == '(': |
| 105 | # Start of a subtree |
| 106 | start = i |
| 107 | i += 1 |
| 108 | paren_count = 1 |
| 109 | while i < content_len and paren_count > 0: |
| 110 | if content[i] == '(': |
| 111 | paren_count += 1 |
| 112 | elif content[i] == ')': |
| 113 | paren_count -= 1 |
| 114 | i += 1 |
| 115 | nodes.append(content[start:i]) |
| 116 | else: |
| 117 | i += 1 |
| 118 | |
| 119 | # Edge Case: No nodes found |
| 120 | if not nodes: |
| 121 | raise ValueError("tree with no nodes") |
| 122 | |
| 123 | return nodes |
| 124 | |
| 125 | |
| 126 | def _parse_properties(node_content: str) -> dict: |
| 127 | if not node_content.startswith(';'): |
| 128 | raise ValueError("tree missing") |
| 129 | |
| 130 | properties = {} |
| 131 | content = node_content[1:] # Remove the leading semicolon |
| 132 | |
| 133 | i = 0 |
| 134 | content_len = len(content) |
| 135 | while i < content_len: |
| 136 | # Skip whitespace |
| 137 | if content[i].isspace(): |
| 138 | i += 1 |
| 139 | continue |
| 140 | |
| 141 | # Parse property key |
| 142 | key_start = i |
| 143 | while i < content_len and content[i].isalpha(): |
| 144 | i += 1 |
| 145 | |
| 146 | # Edge Case: Missing key |
| 147 | if i == key_start: |
| 148 | raise ValueError("properties without delimiter") |
| 149 | |
| 150 | key = content[key_start:i] |
| 151 | |
| 152 | # Edge Case: Key not in uppercase |
| 153 | if key != key.upper() or not key.isalpha(): |
| 154 | raise ValueError("property must be in uppercase") |
| 155 | |
| 156 | # Parse property values |
| 157 | values = [] |
| 158 | while i < content_len and content[i] == '[': |
| 159 | i += 1 # Skip opening bracket |
| 160 | value_start = i |
| 161 | bracket_count = 1 |
| 162 | value_chars = [] |
| 163 | |
| 164 | while i < content_len and bracket_count > 0: |
| 165 | if content[i] == '\\': |
| 166 | # Handle escape character - preserve backslash and next character |
| 167 | # Both will be processed by _process_text_value according to SGF rules |
| 168 | value_chars.append(content[i]) # Add backslash |
| 169 | i += 1 |
| 170 | if i < content_len: |
| 171 | value_chars.append(content[i]) # Add next character |
| 172 | i += 1 |
| 173 | else: |
| 174 | # Edge Case: Escape at end of string |
| 175 | raise ValueError("properties without delimiter") |
| 176 | elif content[i] == ']': |
| 177 | bracket_count -= 1 |
| 178 | if bracket_count > 0: |
| 179 | value_chars.append(content[i]) |
| 180 | i += 1 |
| 181 | elif content[i] == '[' and bracket_count == 1: |
| 182 | bracket_count -= 1 # This is the closing bracket for this value |
| 183 | else: |
| 184 | value_chars.append(content[i]) |
| 185 | i += 1 |
| 186 | |
| 187 | # Edge Case: Unclosed bracket |
| 188 | if bracket_count > 0: |
| 189 | raise ValueError("properties without delimiter") |
| 190 | |
| 191 | value = ''.join(value_chars) |
| 192 | # Process according to SGF text type rules |
| 193 | processed_value = _process_text_value(value) |
| 194 | values.append(processed_value) |
| 195 | |
| 196 | # Edge Case: No values for property |
| 197 | if not values: |
| 198 | raise ValueError("properties without delimiter") |
| 199 | |
| 200 | properties[key] = values |
| 201 | |
| 202 | return properties |
| 203 | |
| 204 | |
| 205 | def _process_text_value(value: str) -> str: |
| 206 | result = [] |
| 207 | i = 0 |
| 208 | value_len = len(value) |
| 209 | while i < value_len: |
| 210 | if value[i] == '\\': |
| 211 | # Edge Case: Backslash at end of value |
| 212 | if i + 1 >= value_len: |
| 213 | result.append('\\') |
| 214 | break |
| 215 | next_char = value[i+1] |
| 216 | # Handle newline removal after backslash |
| 217 | if next_char == '\n': |
| 218 | i += 2 # Skip backslash and newline |
| 219 | continue |
| 220 | # For other characters after backslash, apply whitespace conversion rules |
| 221 | if next_char != '\n' and next_char.isspace(): |
| 222 | # Whitespace other than newline is converted to space |
| 223 | result.append(' ') |
| 224 | else: |
| 225 | # Non-whitespace characters inserted as-is |
| 226 | result.append(next_char) |
| 227 | i += 2 |
| 228 | elif value[i] != '\n' and value[i].isspace(): |
| 229 | # All whitespace other than newline converted to space |
| 230 | result.append(' ') |
| 231 | i += 1 |
| 232 | else: |
| 233 | result.append(value[i]) |
| 234 | i += 1 |
| 235 | return ''.join(result) |
| 236 | |
| 237 | # Handled Edge Cases: Empty input string, Missing delimiters (brackets, semicolons, parentheses), Property keys not in uppercase, Malformed escape sequences in property values, Tree structure without any nodes, Invalid nesting of nodes or variations, Multiple values for a single property, Newline handling in text values with backslashes |