| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | if not input_string: |
| 30 | # Edge Case: Empty input string |
| 31 | raise ValueError("tree missing") |
| 32 | |
| 33 | if not input_string.startswith('('): |
| 34 | # Edge Case: Input does not start with '(' |
| 35 | raise ValueError("tree missing") |
| 36 | |
| 37 | tree, pos = _parse_tree(input_string, 1) |
| 38 | |
| 39 | if pos >= len(input_string) or input_string[pos] != ')': |
| 40 | # Edge Case: Tree not properly closed |
| 41 | raise ValueError("tree missing") |
| 42 | |
| 43 | # Check if there are extra characters after the closing parenthesis |
| 44 | if pos + 1 != len(input_string): |
| 45 | # Edge Case: Extra characters after the tree |
| 46 | raise ValueError("tree missing") |
| 47 | |
| 48 | return tree |
| 49 | |
| 50 | |
| 51 | def _parse_tree(s: str, pos: int) -> tuple[SgfTree, int]: |
| 52 | if pos >= len(s) or s[pos] != ';': |
| 53 | # Edge Case: Tree does not start with a node (semicolon) |
| 54 | raise ValueError("tree with no nodes") |
| 55 | |
| 56 | # Parse the first node |
| 57 | node, pos = _parse_node(s, pos + 1) |
| 58 | |
| 59 | # Parse any subsequent nodes in this tree (sequential nodes) |
| 60 | # In SGF, sequential nodes form a parent-child chain |
| 61 | current_tree = SgfTree(node, []) |
| 62 | while pos < len(s) and s[pos] == ';': |
| 63 | # Parse the next node as a child of the current node |
| 64 | child_node, pos = _parse_node(s, pos + 1) |
| 65 | # Create a tree with this node and any of its children |
| 66 | child_tree = SgfTree(child_node, []) |
| 67 | # Add this child to the current tree's children |
| 68 | current_tree.children.append(child_tree) |
| 69 | # Make this child the current tree for the next iteration |
| 70 | current_tree = child_tree |
| 71 | |
| 72 | # The children list should contain the children of the root node only |
| 73 | children = SgfTree(node, []).children if SgfTree(node, []).children else [] |
| 74 | |
| 75 | # Parse any child variations (subtrees in parentheses) |
| 76 | while pos < len(s) and s[pos] == '(': |
| 77 | child, pos = _parse_tree(s, pos + 1) |
| 78 | children.append(child) |
| 79 | if pos >= len(s) or s[pos] != ')': |
| 80 | # Edge Case: Missing closing parenthesis for child tree |
| 81 | raise ValueError("tree missing") |
| 82 | pos += 1 |
| 83 | |
| 84 | return SgfTree(node, children), pos |
| 85 | |
| 86 | # Parse any child variations (subtrees in parentheses) |
| 87 | while pos < len(s) and s[pos] == '(': |
| 88 | child, pos = _parse_tree(s, pos + 1) |
| 89 | children.append(child) |
| 90 | if pos >= len(s) or s[pos] != ')': |
| 91 | # Edge Case: Missing closing parenthesis for child tree |
| 92 | raise ValueError("tree missing") |
| 93 | pos += 1 |
| 94 | |
| 95 | return SgfTree(node, children), pos |
| 96 | |
| 97 | |
| 98 | # Handled Edge Cases: Empty input string, Input does not start with '(', Extra characters after the tree, Tree does not start with a node (semicolon), Missing closing parenthesis for child tree, Duplicate key in the same node, Property without any values, Empty key, Key is not in uppercase, Escape character at end of string, Unclosed value bracket |
| 99 | |
| 100 | |
| 101 | def _parse_node(s: str, pos: int) -> tuple[dict, int]: |
| 102 | properties = {} |
| 103 | |
| 104 | while pos < len(s) and s[pos] != ';' and s[pos] != ')' and s[pos] != '(': |
| 105 | key, pos = _parse_key(s, pos) |
| 106 | |
| 107 | if key in properties: |
| 108 | # Edge Case: Duplicate key in the same node |
| 109 | raise ValueError("properties without delimiter") |
| 110 | |
| 111 | values = [] |
| 112 | while pos < len(s) and s[pos] == '[': |
| 113 | value, pos = _parse_value(s, pos + 1) |
| 114 | values.append(value) |
| 115 | |
| 116 | if not values: |
| 117 | # Edge Case: Property without any values |
| 118 | raise ValueError("properties without delimiter") |
| 119 | |
| 120 | properties[key] = values |
| 121 | |
| 122 | return properties, pos |
| 123 | |
| 124 | |
| 125 | def _parse_key(s: str, pos: int) -> tuple[str, int]: |
| 126 | start = pos |
| 127 | |
| 128 | while pos < len(s) and s[pos].isalpha(): |
| 129 | pos += 1 |
| 130 | |
| 131 | key = s[start:pos] |
| 132 | |
| 133 | if not key: |
| 134 | # Edge Case: Empty key |
| 135 | raise ValueError("properties without delimiter") |
| 136 | |
| 137 | if not key.isupper(): |
| 138 | # Edge Case: Key is not in uppercase |
| 139 | raise ValueError("property must be in uppercase") |
| 140 | |
| 141 | return key, pos |
| 142 | |
| 143 | |
| 144 | def _parse_value(s: str, pos: int) -> tuple[str, int]: |
| 145 | value = [] |
| 146 | |
| 147 | while pos < len(s): |
| 148 | if s[pos] == '\\': |
| 149 | # Handle escape character |
| 150 | pos += 1 |
| 151 | if pos >= len(s): |
| 152 | # Edge Case: Escape character at end of string |
| 153 | raise ValueError("properties without delimiter") |
| 154 | |
| 155 | char = s[pos] |
| 156 | # In SGF, \n is literal backslash + n, not a newline escape sequence |
| 157 | # Any character after backslash is inserted as-is |
| 158 | value.append(char) |
| 159 | pos += 1 |
| 160 | elif s[pos] == ']': |
| 161 | # End of value |
| 162 | break |
| 163 | elif s[pos] == '\n': |
| 164 | # Regular newline |
| 165 | value.append(s[pos]) |
| 166 | pos += 1 |
| 167 | elif s[pos].isspace(): |
| 168 | # All whitespace other than newline becomes space |
| 169 | value.append(' ') |
| 170 | pos += 1 |
| 171 | else: |
| 172 | value.append(s[pos]) |
| 173 | pos += 1 |
| 174 | |
| 175 | if pos >= len(s): |
| 176 | # Edge Case: Unclosed value bracket |
| 177 | raise ValueError("properties without delimiter") |
| 178 | |
| 179 | # Skip the closing bracket |
| 180 | pos += 1 |
| 181 | |
| 182 | return ''.join(value), pos |
| 183 | |
| 184 | # Handled Edge Cases: Empty input string, Input does not start with '(', Extra characters after the tree, Tree does not start with a node (semicolon), Missing closing parenthesis for child tree, Duplicate key in the same node, Property without any values, Empty key, Key is not in uppercase, Escape character at end of string, Unclosed value bracket |