| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is malformed or doesn't conform to SGF format |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse a tree from its content string. |
| 70 | |
| 71 | Args: |
| 72 | content: The content of a tree without outer parentheses |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | if not content or not content.startswith(';'): |
| 78 | raise ValueError("tree missing") |
| 79 | |
| 80 | # Parse the first node (properties) |
| 81 | index = 1 # Skip the initial semicolon |
| 82 | properties = {} |
| 83 | |
| 84 | # Parse properties of the current node |
| 85 | while index < len(content) and content[index] not in '();': |
| 86 | # Parse a property key |
| 87 | prop_start = index |
| 88 | |
| 89 | # Property key must be uppercase letters |
| 90 | while index < len(content) and content[index].isalpha(): |
| 91 | if not content[index].isupper(): |
| 92 | raise ValueError("property must be in uppercase") |
| 93 | index += 1 |
| 94 | |
| 95 | # Empty property key |
| 96 | if index == prop_start: |
| 97 | raise ValueError("properties without delimiter") |
| 98 | |
| 99 | key = content[prop_start:index] |
| 100 | |
| 101 | # Missing opening bracket |
| 102 | if index >= len(content) or content[index] != '[': |
| 103 | raise ValueError("properties without delimiter") |
| 104 | |
| 105 | # Parse values for this key |
| 106 | values = [] |
| 107 | while index < len(content) and content[index] == '[': |
| 108 | index += 1 # Skip opening bracket |
| 109 | value_chars = [] |
| 110 | |
| 111 | # Parse value, handling escapes according to SGF Text type rules |
| 112 | while index < len(content): |
| 113 | if content[index] == ']': |
| 114 | # End of value |
| 115 | index += 1 # Skip the closing bracket |
| 116 | break |
| 117 | elif content[index] == '\\': |
| 118 | # Handle escape character |
| 119 | index += 1 |
| 120 | if index >= len(content): |
| 121 | raise ValueError("properties without delimiter") |
| 122 | char = content[index] |
| 123 | # Newlines are removed if they come immediately after a backslash |
| 124 | if char == '\n': |
| 125 | # Newlines are removed if they come immediately after a backslash |
| 126 | pass # Just skip the newline |
| 127 | else: |
| 128 | # Any character after backslash is inserted as-is |
| 129 | value_chars.append(char) |
| 130 | else: |
| 131 | # Regular character - convert whitespace (except newlines) to spaces |
| 132 | char = content[index] |
| 133 | if char in ' \t\r': |
| 134 | value_chars.append(' ') |
| 135 | else: |
| 136 | value_chars.append(char) |
| 137 | index += 1 |
| 138 | |
| 139 | value = ''.join(value_chars) |
| 140 | values.append(value) |
| 141 | |
| 142 | # Add property to dict |
| 143 | properties[key] = values |
| 144 | |
| 145 | # Handle children and sequential nodes |
| 146 | children = [] |
| 147 | |
| 148 | # Handle sequential nodes (separated by semicolons) |
| 149 | if index < len(content) and content[index] == ';': |
| 150 | # Parse the next node as a child |
| 151 | remaining_content = content[index:] |
| 152 | child_tree = _parse_tree(remaining_content) |
| 153 | children.append(child_tree) |
| 154 | # Handle variations (parenthesized branches) |
| 155 | elif index < len(content) and content[index] == '(': |
| 156 | # Parse all variations |
| 157 | while index < len(content) and content[index] == '(': |
| 158 | # Find matching closing parenthesis |
| 159 | paren_count = 1 |
| 160 | start = index |
| 161 | index += 1 |
| 162 | |
| 163 | while index < len(content) and paren_count > 0: |
| 164 | if content[index] == '(': |
| 165 | paren_count += 1 |
| 166 | elif content[index] == ')': |
| 167 | paren_count -= 1 |
| 168 | index += 1 |
| 169 | |
| 170 | # Mismatched parentheses |
| 171 | if paren_count != 0: |
| 172 | raise ValueError("tree missing") |
| 173 | |
| 174 | # Parse child tree |
| 175 | child_content = content[start+1:index-1] |
| 176 | |
| 177 | # Empty child |
| 178 | if not child_content: |
| 179 | raise ValueError("tree with no nodes") |
| 180 | |
| 181 | # Missing semicolon in child |
| 182 | if not child_content.startswith(';'): |
| 183 | raise ValueError("tree missing") |
| 184 | |
| 185 | child_tree = _parse_tree(child_content) |
| 186 | children.append(child_tree) |
| 187 | |
| 188 | return SgfTree(properties, children) |
| 189 | # Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis, Property key must be uppercase, Empty property key, Missing opening bracket, Missing closing bracket, Mismatched parentheses, Empty child, Missing semicolon in child |