| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | if not input_string: |
| 30 | # Edge Case: Empty input string |
| 31 | raise ValueError("tree missing") |
| 32 | |
| 33 | if not input_string.startswith("(") or not input_string.endswith(")"): |
| 34 | # Edge Case: Missing parentheses at start or end |
| 35 | raise ValueError("tree missing") |
| 36 | |
| 37 | # Remove the outer parentheses |
| 38 | content = input_string[1:-1] |
| 39 | |
| 40 | if not content: |
| 41 | # Edge Case: Empty tree with no nodes |
| 42 | raise ValueError("tree with no nodes") |
| 43 | |
| 44 | if not content.startswith(";"): |
| 45 | # Edge Case: First node doesn't start with semicolon |
| 46 | raise ValueError("tree missing") |
| 47 | |
| 48 | # Parse the tree |
| 49 | tree, _ = parse_node(content) |
| 50 | return tree |
| 51 | |
| 52 | |
| 53 | def parse_node(content: str) -> tuple[SgfTree, str]: |
| 54 | if not content.startswith(";"): |
| 55 | # Edge Case: Node doesn't start with semicolon |
| 56 | raise ValueError("tree missing") |
| 57 | |
| 58 | content = content[1:] # Remove the semicolon |
| 59 | properties = {} |
| 60 | |
| 61 | # Parse properties |
| 62 | while content and content[0].isalpha(): |
| 63 | if not content[0].isupper(): |
| 64 | # Edge Case: Property key is not uppercase |
| 65 | raise ValueError("property must be in uppercase") |
| 66 | |
| 67 | prop, content = parse_property(content) |
| 68 | key, values = prop |
| 69 | |
| 70 | if key in properties: |
| 71 | # Edge Case: Duplicate property key in the same node |
| 72 | raise ValueError("property must be in uppercase") |
| 73 | |
| 74 | properties[key] = values |
| 75 | |
| 76 | # Parse children - first check for sequential nodes (single child) |
| 77 | if content.startswith(";"): |
| 78 | # Sequential node - this is a single child |
| 79 | child, content = parse_node(content) |
| 80 | children = [child] |
| 81 | else: |
| 82 | # Parse variations (multiple children) |
| 83 | children = [] |
| 84 | while content.startswith("("): |
| 85 | child, content = parse_tree(content) |
| 86 | children.append(child) |
| 87 | |
| 88 | return SgfTree(properties, children), content |
| 89 | |
| 90 | |
| 91 | def parse_tree(content: str) -> tuple[SgfTree, str]: |
| 92 | if not content.startswith("("): |
| 93 | # Edge Case: Tree doesn't start with parenthesis |
| 94 | raise ValueError("tree missing") |
| 95 | |
| 96 | content = content[1:] # Remove the opening parenthesis |
| 97 | node, content = parse_node(content) |
| 98 | |
| 99 | if not content.startswith(")"): |
| 100 | # Edge Case: Tree doesn't end with closing parenthesis |
| 101 | raise ValueError("tree missing") |
| 102 | |
| 103 | content = content[1:] # Remove the closing parenthesis |
| 104 | return node, content |
| 105 | |
| 106 | |
| 107 | def parse_property(content: str) -> tuple[tuple[str, list[str]], str]: |
| 108 | # Parse property key |
| 109 | key = "" |
| 110 | i = 0 |
| 111 | while i < len(content) and content[i].isalpha() and content[i].isupper(): |
| 112 | key += content[i] |
| 113 | i += 1 |
| 114 | |
| 115 | if not key: |
| 116 | # Edge Case: Property key is missing or not uppercase |
| 117 | raise ValueError("property must be in uppercase") |
| 118 | |
| 119 | if i >= len(content) or content[i] != "[": |
| 120 | # Edge Case: Property has no values (missing opening bracket) |
| 121 | raise ValueError("properties without delimiter") |
| 122 | |
| 123 | content = content[i:] # Move to the values part |
| 124 | values = [] |
| 125 | |
| 126 | # Parse all values for this property |
| 127 | while content.startswith("["): |
| 128 | value, content = parse_value(content[1:]) # Remove opening bracket |
| 129 | values.append(value) |
| 130 | |
| 131 | return (key, values), content |
| 132 | |
| 133 | |
| 134 | def parse_value(content: str) -> tuple[str, str]: |
| 135 | value = "" |
| 136 | i = 0 |
| 137 | |
| 138 | while i < len(content): |
| 139 | if content[i] == "]": |
| 140 | # End of value |
| 141 | return value, content[i+1:] |
| 142 | elif content[i] == "\\": |
| 143 | # Escape character |
| 144 | i += 1 |
| 145 | if i >= len(content): |
| 146 | # Edge Case: Escape character at end of value |
| 147 | raise ValueError("properties without delimiter") |
| 148 | |
| 149 | char = content[i] |
| 150 | if char == '\n': |
| 151 | # Newline immediately after backslash is removed |
| 152 | pass |
| 153 | elif char.isspace(): |
| 154 | # Whitespace after backslash is converted to space |
| 155 | value += ' ' |
| 156 | else: |
| 157 | # Any other character is inserted as-is |
| 158 | value += char |
| 159 | elif content[i] == '\n': |
| 160 | # Newlines are kept unless immediately after backslash |
| 161 | value += content[i] |
| 162 | elif content[i].isspace(): |
| 163 | # All whitespace other than newline is converted to space |
| 164 | value += ' ' |
| 165 | else: |
| 166 | # Regular character |
| 167 | value += content[i] |
| 168 | i += 1 |
| 169 | |
| 170 | # Edge Case: Reached end of content without finding closing bracket |
| 171 | raise ValueError("properties without delimiter") |
| 172 | |
| 173 | # Handled Edge Cases: Empty input string, Missing parentheses at start or end, Empty tree with no nodes, First node doesn't start with semicolon, Node doesn't start with semicolon, Duplicate property key in the same node, Tree doesn't start with parenthesis, Tree doesn't end with closing parenthesis, Property key is missing or not uppercase, Property has no values (missing opening bracket), Escape character at end of value, Reached end of content without finding closing bracket |