| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is not valid SGF |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Edge Case: Missing semicolon after opening parenthesis |
| 61 | if not content.startswith(';'): |
| 62 | raise ValueError("tree missing") |
| 63 | |
| 64 | return _parse_tree(content) |
| 65 | |
| 66 | |
| 67 | def _parse_tree(content: str) -> SgfTree: |
| 68 | """ |
| 69 | Parse a tree from its content string. |
| 70 | |
| 71 | Args: |
| 72 | content: The content of a tree without the outer parentheses |
| 73 | |
| 74 | Returns: |
| 75 | SgfTree: The parsed tree |
| 76 | """ |
| 77 | # Skip the initial semicolon |
| 78 | index = 1 |
| 79 | |
| 80 | # Parse properties of the current node |
| 81 | properties = {} |
| 82 | while index < len(content) and content[index] not in '();': |
| 83 | # Parse key |
| 84 | key_start = index |
| 85 | while index < len(content) and content[index].isupper(): |
| 86 | index += 1 |
| 87 | |
| 88 | # Edge Case: Property key is not in uppercase |
| 89 | if index == key_start: |
| 90 | raise ValueError("property must be in uppercase") |
| 91 | |
| 92 | key = content[key_start:index] |
| 93 | |
| 94 | # Edge Case: Property without delimiter |
| 95 | if index >= len(content) or content[index] != '[': |
| 96 | raise ValueError("properties without delimiter") |
| 97 | |
| 98 | # Parse values |
| 99 | values = [] |
| 100 | while index < len(content) and content[index] == '[': |
| 101 | index += 1 # Skip opening bracket |
| 102 | value_start = index |
| 103 | |
| 104 | # Parse value |
| 105 | value_chars = [] |
| 106 | while index < len(content): |
| 107 | if content[index] == ']': |
| 108 | # Found unescaped closing bracket, stop parsing |
| 109 | break |
| 110 | elif content[index] == '\\': |
| 111 | # Handle escape sequences |
| 112 | index += 1 |
| 113 | if index >= len(content): |
| 114 | raise ValueError("tree missing") |
| 115 | |
| 116 | # If next character is newline, skip both \ and \n |
| 117 | if content[index] == '\n': |
| 118 | index += 1 |
| 119 | else: |
| 120 | # For any other character after \, insert as-is |
| 121 | value_chars.append(content[index]) |
| 122 | index += 1 |
| 123 | else: |
| 124 | # Handle whitespace characters |
| 125 | if content[index] in ' \t\r': |
| 126 | value_chars.append(' ') |
| 127 | elif content[index] == '\n': |
| 128 | # Newlines are removed if they come immediately after a \ |
| 129 | # But we need to check if there was a \ before this \n |
| 130 | # In this case, just add the newline as it is not escaped |
| 131 | value_chars.append(content[index]) |
| 132 | else: |
| 133 | value_chars.append(content[index]) |
| 134 | index += 1 |
| 135 | |
| 136 | # Edge Case: Missing closing bracket |
| 137 | if index >= len(content) or content[index] != ']': |
| 138 | raise ValueError("properties without delimiter") |
| 139 | |
| 140 | # Join the characters to form the value |
| 141 | value = ''.join(value_chars) |
| 142 | values.append(value) |
| 143 | index += 1 # Skip closing bracket |
| 144 | |
| 145 | properties[key] = values |
| 146 | |
| 147 | # Parse children |
| 148 | children = [] |
| 149 | |
| 150 | # Handle sequential nodes (semicolon-separated) |
| 151 | if index < len(content) and content[index] == ';': |
| 152 | # Parse the next sequential node |
| 153 | child_content = content[index:] |
| 154 | child_tree = _parse_tree(child_content) |
| 155 | children.append(child_tree) |
| 156 | index = len(content) # Consume all remaining content |
| 157 | |
| 158 | # Handle variations (parenthesized children) |
| 159 | while index < len(content) and content[index] == '(': |
| 160 | # Find the matching closing parenthesis |
| 161 | paren_count = 1 |
| 162 | start = index |
| 163 | index += 1 |
| 164 | |
| 165 | while index < len(content) and paren_count > 0: |
| 166 | if content[index] == '(': |
| 167 | paren_count += 1 |
| 168 | elif content[index] == ')': |
| 169 | paren_count -= 1 |
| 170 | index += 1 |
| 171 | |
| 172 | # Edge Case: Mismatched parentheses |
| 173 | if paren_count != 0: |
| 174 | raise ValueError("tree missing") |
| 175 | |
| 176 | # Parse the child tree |
| 177 | child_content = content[start+1:index-1] |
| 178 | |
| 179 | # Edge Case: Empty child tree |
| 180 | if not child_content: |
| 181 | raise ValueError("tree with no nodes") |
| 182 | |
| 183 | # Edge Case: Missing semicolon in child tree |
| 184 | if not child_content.startswith(';'): |
| 185 | raise ValueError("tree missing") |
| 186 | |
| 187 | child_tree = _parse_tree(child_content) |
| 188 | children.append(child_tree) |
| 189 | |
| 190 | return SgfTree(properties, children) |
| 191 | # Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis, Property key is not in uppercase, Property without delimiter, Missing closing bracket, Mismatched parentheses, Empty child tree, Missing semicolon in child tree |