| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | if not input_string: |
| 30 | # Edge Case: Empty input string |
| 31 | raise ValueError("tree missing") |
| 32 | |
| 33 | if not input_string.startswith('(') or not input_string.endswith(')'): |
| 34 | # Edge Case: Missing parentheses at start or end |
| 35 | raise ValueError("tree missing") |
| 36 | |
| 37 | # Remove the outer parentheses |
| 38 | content = input_string[1:-1] |
| 39 | |
| 40 | if not content: |
| 41 | # Edge Case: Empty tree with no nodes |
| 42 | raise ValueError("tree with no nodes") |
| 43 | |
| 44 | # Parse the tree |
| 45 | tree, remaining = _parse_node(content) |
| 46 | |
| 47 | if remaining: |
| 48 | # Edge Case: Unexpected content after parsing |
| 49 | raise ValueError("tree missing") |
| 50 | |
| 51 | return tree |
| 52 | |
| 53 | |
| 54 | def _parse_node(content: str) -> tuple[SgfTree, str]: |
| 55 | if not content.startswith(';'): |
| 56 | # Edge Case: Node must start with semicolon |
| 57 | raise ValueError("tree missing") |
| 58 | |
| 59 | content = content[1:] # Remove the semicolon |
| 60 | properties = {} |
| 61 | children = [] |
| 62 | |
| 63 | # Parse properties |
| 64 | while content and content[0] != '(' and content[0] != ')' and content[0] != ';': |
| 65 | prop, content = _parse_property(content) |
| 66 | key, values = prop |
| 67 | |
| 68 | if key in properties: |
| 69 | # Edge Case: Duplicate property key |
| 70 | raise ValueError("properties without delimiter") |
| 71 | |
| 72 | properties[key] = values |
| 73 | |
| 74 | # Parse children |
| 75 | while content: |
| 76 | if content.startswith('('): |
| 77 | # Child node in parentheses |
| 78 | child, content = _parse_tree(content) |
| 79 | children.append(child) |
| 80 | elif content.startswith(';'): |
| 81 | # Sequential child node (shorthand notation) |
| 82 | child, content = _parse_node(content) |
| 83 | children.append(child) |
| 84 | else: |
| 85 | # Unexpected content |
| 86 | break |
| 87 | |
| 88 | return SgfTree(properties, children), content |
| 89 | |
| 90 | |
| 91 | def _parse_property(content: str) -> tuple[tuple[str, list[str]], str]: |
| 92 | # Parse key |
| 93 | key_end = 0 |
| 94 | while key_end < len(content) and content[key_end].isalpha() and content[key_end].isupper(): |
| 95 | key_end += 1 |
| 96 | |
| 97 | if key_end == 0: |
| 98 | # Edge Case: Property key must exist and be uppercase |
| 99 | raise ValueError("property must be in uppercase") |
| 100 | |
| 101 | key = content[:key_end] |
| 102 | content = content[key_end:] |
| 103 | |
| 104 | if not content.startswith('['): |
| 105 | # Edge Case: Property must have at least one value |
| 106 | raise ValueError("properties without delimiter") |
| 107 | |
| 108 | values = [] |
| 109 | |
| 110 | # Parse values |
| 111 | while content.startswith('['): |
| 112 | value, content = _parse_value(content[1:]) # Remove opening bracket |
| 113 | values.append(value) |
| 114 | |
| 115 | if not content.startswith(']'): |
| 116 | # Edge Case: Value must end with closing bracket |
| 117 | raise ValueError("properties without delimiter") |
| 118 | |
| 119 | content = content[1:] # Remove closing bracket |
| 120 | |
| 121 | return (key, values), content |
| 122 | |
| 123 | |
| 124 | def _parse_value(content: str) -> tuple[str, str]: |
| 125 | result = [] |
| 126 | i = 0 |
| 127 | |
| 128 | while i < len(content): |
| 129 | char = content[i] |
| 130 | |
| 131 | if char == ']': |
| 132 | # End of value |
| 133 | break |
| 134 | elif char == '\\': |
| 135 | # Handle escape character |
| 136 | i += 1 |
| 137 | if i >= len(content): |
| 138 | # Edge Case: Escape character at end of value |
| 139 | raise ValueError("properties without delimiter") |
| 140 | |
| 141 | next_char = content[i] |
| 142 | if next_char == 'n': |
| 143 | # Special case: \n should be treated as newline |
| 144 | result.append('\n') |
| 145 | elif next_char == 't': |
| 146 | # Special case: \t should be treated as tab |
| 147 | result.append('\t') |
| 148 | elif next_char == '\n': |
| 149 | # Newline immediately after backslash is removed |
| 150 | pass |
| 151 | elif next_char in ' \t': |
| 152 | # Whitespace after backslash converted to space |
| 153 | result.append(' ') |
| 154 | else: |
| 155 | # Any other character after backslash is inserted as-is |
| 156 | result.append(next_char) |
| 157 | elif char in ' \t': |
| 158 | # Convert whitespace to space |
| 159 | result.append(' ') |
| 160 | elif char == '\n': |
| 161 | # Keep newlines |
| 162 | result.append(char) |
| 163 | else: |
| 164 | # Regular character |
| 165 | result.append(char) |
| 166 | |
| 167 | i += 1 |
| 168 | |
| 169 | return ''.join(result), content[i:] |
| 170 | |
| 171 | |
| 172 | def _parse_tree(content: str) -> tuple[SgfTree, str]: |
| 173 | if not content.startswith('('): |
| 174 | # Edge Case: Tree must start with opening parenthesis |
| 175 | raise ValueError("tree missing") |
| 176 | |
| 177 | content = content[1:] # Remove opening parenthesis |
| 178 | |
| 179 | # Parse node |
| 180 | node, content = _parse_node(content) |
| 181 | |
| 182 | if not content.startswith(')'): |
| 183 | # Edge Case: Tree must end with closing parenthesis |
| 184 | raise ValueError("tree missing") |
| 185 | |
| 186 | content = content[1:] # Remove closing parenthesis |
| 187 | |
| 188 | return node, content |
| 189 | # Handled Edge Cases: Empty input string, Missing parentheses at start or end, Empty tree with no nodes, Unexpected content after parsing, Node must start with semicolon, Duplicate property key, Property key must exist and be uppercase, Property must have at least one value, Value must end with closing bracket, Escape character at end of value, Tree must start with opening parenthesis, Tree must end with closing parenthesis |