| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is malformed or doesn't follow SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Parse the tree, starting after the initial '(' |
| 54 | tree, pos = _parse_tree(input_string, 1) |
| 55 | |
| 56 | # Edge Case: Parser didn't consume the entire string |
| 57 | if pos != len(input_string) - 1: # -1 because we expect the final ')' |
| 58 | raise ValueError("tree missing") |
| 59 | |
| 60 | return tree |
| 61 | |
| 62 | |
| 63 | def _parse_tree(s: str, pos: int) -> tuple[SgfTree, int]: |
| 64 | """ |
| 65 | Parse a tree from the SGF string starting at position pos. |
| 66 | |
| 67 | Args: |
| 68 | s: The SGF string |
| 69 | pos: The starting position in the string |
| 70 | |
| 71 | Returns: |
| 72 | tuple: (SgfTree, new position) |
| 73 | """ |
| 74 | # Edge Case: No semicolon after opening parenthesis |
| 75 | if pos >= len(s) or s[pos] != ';': |
| 76 | raise ValueError("tree with no nodes") |
| 77 | |
| 78 | pos += 1 # Skip the ';' |
| 79 | |
| 80 | # Parse the node properties |
| 81 | properties = {} |
| 82 | while pos < len(s) and s[pos].isalpha(): |
| 83 | key, values, pos = _parse_property(s, pos) |
| 84 | |
| 85 | # Edge Case: Property key is not uppercase |
| 86 | if not key.isupper(): |
| 87 | raise ValueError("property must be in uppercase") |
| 88 | |
| 89 | properties[key] = values |
| 90 | |
| 91 | # Parse children - handle both sequential nodes and variations |
| 92 | children = [] |
| 93 | |
| 94 | # First handle sequential nodes (separated by semicolons) |
| 95 | if pos < len(s) and s[pos] == ';': |
| 96 | # This is a sequential node, create a chain |
| 97 | child, pos = _parse_tree(s, pos) # Don't skip the ';' as _parse_tree expects it |
| 98 | children.append(child) |
| 99 | |
| 100 | # Then handle variations (nodes in parentheses) |
| 101 | while pos < len(s) and s[pos] == '(': |
| 102 | child, pos = _parse_tree(s, pos + 1) # Skip the '(' |
| 103 | children.append(child) |
| 104 | |
| 105 | # Skip the closing ')' |
| 106 | if pos < len(s) and s[pos] == ')': |
| 107 | pos += 1 |
| 108 | else: |
| 109 | raise ValueError("tree missing") |
| 110 | |
| 111 | return SgfTree(properties, children), pos |
| 112 | |
| 113 | |
| 114 | def _parse_property(s: str, pos: int) -> tuple[str, list[str], int]: |
| 115 | """ |
| 116 | Parse a property (key with one or more values) from the SGF string. |
| 117 | |
| 118 | Args: |
| 119 | s: The SGF string |
| 120 | pos: The starting position in the string |
| 121 | |
| 122 | Returns: |
| 123 | tuple: (key, list of values, new position) |
| 124 | """ |
| 125 | # Parse the key |
| 126 | key_start = pos |
| 127 | while pos < len(s) and s[pos].isalpha(): |
| 128 | pos += 1 |
| 129 | |
| 130 | # Edge Case: No key found |
| 131 | if pos == key_start: |
| 132 | raise ValueError("properties without delimiter") |
| 133 | |
| 134 | key = s[key_start:pos] |
| 135 | |
| 136 | # Edge Case: No values after key |
| 137 | if pos >= len(s) or s[pos] != '[': |
| 138 | raise ValueError("properties without delimiter") |
| 139 | |
| 140 | # Parse values |
| 141 | values = [] |
| 142 | while pos < len(s) and s[pos] == '[': |
| 143 | pos += 1 # Skip the '[' |
| 144 | |
| 145 | # Parse the value, handling escapes |
| 146 | value_chars = [] |
| 147 | while pos < len(s): |
| 148 | if s[pos] == '\\': |
| 149 | pos += 1 |
| 150 | if pos < len(s): |
| 151 | # Handle escaped characters according to SGF text rules |
| 152 | char = s[pos] |
| 153 | if char == '\n': |
| 154 | # Newlines immediately after \ are removed |
| 155 | pass # Don't add anything to value_chars |
| 156 | else: |
| 157 | # Any other character after \ is inserted as-is |
| 158 | value_chars.append(char) |
| 159 | pos += 1 # Increment position after handling escaped character |
| 160 | else: |
| 161 | # Edge Case: Escape at end of string |
| 162 | raise ValueError("properties without delimiter") |
| 163 | elif s[pos] == ']': |
| 164 | # End of value |
| 165 | break |
| 166 | elif s[pos].isalpha(): |
| 167 | # Start of new property - end current value |
| 168 | break |
| 169 | else: |
| 170 | value_chars.append(s[pos]) |
| 171 | pos += 1 |
| 172 | |
| 173 | # Check if we ended value parsing due to finding start of new property |
| 174 | # If so, we don't expect a closing bracket at current position |
| 175 | if not (pos < len(s) and s[pos].isalpha()): |
| 176 | # Edge Case: Unclosed bracket |
| 177 | if pos >= len(s) or s[pos] != ']': |
| 178 | raise ValueError("properties without delimiter") |
| 179 | # If we did end due to new property, we skip the bracket check |
| 180 | |
| 181 | # Process the value according to SGF text rules |
| 182 | value = _process_text_value(''.join(value_chars)) |
| 183 | values.append(value) |
| 184 | |
| 185 | # Only skip closing bracket if we actually found one |
| 186 | if pos < len(s) and s[pos] == ']': |
| 187 | pos += 1 # Skip the ']' |
| 188 | # If we ended due to new property, we don't skip anything |
| 189 | |
| 190 | return key, values, pos |
| 191 | |
| 192 | |
| 193 | def _process_text_value(value: str) -> str: |
| 194 | """ |
| 195 | Process a text value according to SGF text type rules. |
| 196 | |
| 197 | Args: |
| 198 | value: The raw text value (after escape processing) |
| 199 | |
| 200 | Returns: |
| 201 | str: The processed text value |
| 202 | """ |
| 203 | result = [] |
| 204 | i = 0 |
| 205 | while i < len(value): |
| 206 | char = value[i] |
| 207 | if char in ' \t\r': |
| 208 | # All whitespace characters other than newline are converted to spaces |
| 209 | result.append(' ') |
| 210 | elif char == '\n': |
| 211 | # Newlines remain as newlines |
| 212 | result.append(char) |
| 213 | else: |
| 214 | result.append(char) |
| 215 | i += 1 |
| 216 | |
| 217 | return ''.join(result) |
| 218 | |
| 219 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', No semicolon after opening parenthesis, Property key is not uppercase, No key found, No values after key, Unclosed bracket, Escape at end of string, Parser didn't consume the entire string |
| 220 | # Handled Edge Cases: Property key is not uppercase, No key found, No values after key, Unclosed bracket |
| 221 | # Handled Edge Cases: Escape at end of string |