| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | if not input_string: |
| 30 | # Edge Case: Empty input string |
| 31 | raise ValueError("tree missing") |
| 32 | |
| 33 | if not input_string.startswith('('): |
| 34 | # Edge Case: Input does not start with '(' |
| 35 | raise ValueError("tree missing") |
| 36 | |
| 37 | # Parse the tree and get the result along with the remaining string |
| 38 | tree, remaining = _parse_tree(input_string) |
| 39 | |
| 40 | # Edge Case: There's content after the main tree |
| 41 | if remaining: |
| 42 | raise ValueError("tree missing") |
| 43 | |
| 44 | return tree |
| 45 | |
| 46 | |
| 47 | def _parse_tree(s: str) -> tuple[SgfTree, str]: |
| 48 | if not s.startswith('('): |
| 49 | # Edge Case: Tree doesn't start with '(' |
| 50 | raise ValueError("tree missing") |
| 51 | |
| 52 | # Move past the opening parenthesis |
| 53 | s = s[1:] |
| 54 | |
| 55 | # Edge Case: Empty tree with just parentheses |
| 56 | if not s: |
| 57 | raise ValueError("tree with no nodes") |
| 58 | |
| 59 | # Parse the sequence of nodes |
| 60 | nodes = [] |
| 61 | while s and not s.startswith(')') and not s.startswith('('): |
| 62 | node, s = _parse_node(s) |
| 63 | nodes.append(node) |
| 64 | |
| 65 | # Edge Case: No nodes in the tree |
| 66 | if not nodes: |
| 67 | raise ValueError("tree with no nodes") |
| 68 | |
| 69 | # Handle variations (children) |
| 70 | children = [] |
| 71 | while s and s.startswith('('): |
| 72 | child, s = _parse_tree(s) |
| 73 | children.append(child) |
| 74 | |
| 75 | # Move past the closing parenthesis |
| 76 | if not s or not s.startswith(')'): |
| 77 | # Edge Case: Missing closing parenthesis |
| 78 | raise ValueError("tree missing") |
| 79 | s = s[1:] |
| 80 | |
| 81 | # Construct the tree by chaining nodes |
| 82 | # The last node is the deepest, and we build up from there |
| 83 | current_tree = SgfTree(properties=nodes[-1].properties, children=children) |
| 84 | |
| 85 | # Iterate backwards through nodes to build parent structure |
| 86 | for i in range(len(nodes) - 2, -1, -1): |
| 87 | current_tree = SgfTree(properties=nodes[i].properties, children=[current_tree]) |
| 88 | |
| 89 | return current_tree, s |
| 90 | |
| 91 | |
| 92 | def _parse_node(s: str) -> tuple[SgfTree, str]: |
| 93 | if not s.startswith(';'): |
| 94 | # Edge Case: Node doesn't start with ';' |
| 95 | raise ValueError("properties without delimiter") |
| 96 | |
| 97 | # Move past the semicolon |
| 98 | s = s[1:] |
| 99 | |
| 100 | properties = {} |
| 101 | |
| 102 | # Parse properties while we have uppercase letters |
| 103 | while s and s[0].isupper(): |
| 104 | prop, s = _parse_property(s) |
| 105 | key, values = prop |
| 106 | |
| 107 | # Edge Case: Duplicate property key |
| 108 | if key in properties: |
| 109 | raise ValueError("properties without delimiter") |
| 110 | |
| 111 | properties[key] = values |
| 112 | |
| 113 | # Edge Case: If we have content that doesn't start with valid node/tree delimiters |
| 114 | if s and not (s.startswith(';') or s.startswith('(') or s.startswith(')')): |
| 115 | # Check if it starts with lowercase - that's a property error |
| 116 | if s[0].isalpha() and s[0].islower(): |
| 117 | raise ValueError("property must be in uppercase") |
| 118 | else: |
| 119 | raise ValueError("properties without delimiter") |
| 120 | |
| 121 | return SgfTree(properties=properties), s |
| 122 | |
| 123 | |
| 124 | def _parse_property(s: str) -> tuple[tuple[str, list[str]], str]: |
| 125 | # Parse the key (must be uppercase) |
| 126 | key = '' |
| 127 | # Check if the first character is uppercase |
| 128 | if not s or not s[0].isupper(): |
| 129 | raise ValueError("property must be in uppercase") |
| 130 | |
| 131 | while s and s[0].isupper(): |
| 132 | key += s[0] |
| 133 | s = s[1:] |
| 134 | |
| 135 | # Edge Case: Property key is empty (should not happen with the above check, but just in case) |
| 136 | if not key: |
| 137 | raise ValueError("properties without delimiter") |
| 138 | |
| 139 | # Edge Case: Missing opening bracket for value |
| 140 | if not s or s[0] != '[': |
| 141 | raise ValueError("properties without delimiter") |
| 142 | |
| 143 | values = [] |
| 144 | |
| 145 | # Parse all values for this key |
| 146 | while s and s.startswith('['): |
| 147 | value, s = _parse_value(s[1:]) # Skip the opening bracket |
| 148 | values.append(value) |
| 149 | |
| 150 | # Edge Case: Missing closing bracket |
| 151 | if not s or s[0] != ']': |
| 152 | raise ValueError("properties without delimiter") |
| 153 | s = s[1:] # Skip the closing bracket |
| 154 | |
| 155 | return (key, values), s |
| 156 | |
| 157 | |
| 158 | def _parse_value(s: str) -> tuple[str, str]: |
| 159 | result = '' |
| 160 | i = 0 |
| 161 | |
| 162 | while i < len(s): |
| 163 | char = s[i] |
| 164 | |
| 165 | if char == '\\': |
| 166 | # Look at the next character |
| 167 | i += 1 |
| 168 | if i >= len(s): |
| 169 | # Edge Case: Escape character at end of string |
| 170 | result += '\\' |
| 171 | break |
| 172 | |
| 173 | next_char = s[i] |
| 174 | |
| 175 | # If next character is newline, skip both \ and \n |
| 176 | if next_char == '\n': |
| 177 | # Edge Case: Backslash followed by newline - remove both |
| 178 | i += 1 |
| 179 | else: |
| 180 | # Edge Case: Any other character after backslash is added as-is |
| 181 | result += next_char |
| 182 | i += 1 |
| 183 | elif char == ']': |
| 184 | # End of value - stop at first unescaped closing bracket |
| 185 | break |
| 186 | elif char in (' ', '\t', '\r'): |
| 187 | # Edge Case: Whitespace characters (except newline) converted to space |
| 188 | result += ' ' |
| 189 | i += 1 |
| 190 | elif char == '\n': |
| 191 | # Edge Case: Newline characters preserved |
| 192 | result += char |
| 193 | i += 1 |
| 194 | else: |
| 195 | # Edge Case: Regular characters added as-is |
| 196 | result += char |
| 197 | i += 1 |
| 198 | |
| 199 | return result, s[i:] |
| 200 | |
| 201 | # Handled Edge Cases: Empty input string; Input does not start with '('; Tree doesn't start with '('; Empty tree with just parentheses; No nodes in the tree; Missing closing parenthesis; Node doesn't start with ';'; Property key is empty; Property not in uppercase; Missing opening bracket for value; Missing closing bracket; Escape character at end of string; Backslash followed by newline - remove both; Any other character after backslash is added as-is; Whitespace characters (except newline) converted to space; Newline characters preserved; Regular characters added as-is; Duplicate property key; Content after the main tree |