| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Parse the tree |
| 61 | tree, _ = _parse_tree(content, 0) |
| 62 | return tree |
| 63 | |
| 64 | |
| 65 | def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]: |
| 66 | """ |
| 67 | Parse a tree from the content string starting at index. |
| 68 | |
| 69 | Args: |
| 70 | content: The SGF content string |
| 71 | index: The starting index to parse from |
| 72 | |
| 73 | Returns: |
| 74 | tuple: A tuple containing the parsed SgfTree and the next index to parse from |
| 75 | """ |
| 76 | # Edge Case: Missing semicolon at start of node |
| 77 | if index >= len(content) or content[index] != ';': |
| 78 | raise ValueError("tree missing") |
| 79 | |
| 80 | index += 1 # Skip the semicolon |
| 81 | |
| 82 | # Parse properties of the current node |
| 83 | properties = {} |
| 84 | while index < len(content) and content[index] not in '();': |
| 85 | # Parse key |
| 86 | key_start = index |
| 87 | while index < len(content) and content[index].isupper(): |
| 88 | index += 1 |
| 89 | |
| 90 | # Edge Case: Property key is not in uppercase |
| 91 | if index == key_start: |
| 92 | raise ValueError("property must be in uppercase") |
| 93 | |
| 94 | key = content[key_start:index] |
| 95 | |
| 96 | # Edge Case: Missing opening bracket for property value |
| 97 | if index >= len(content) or content[index] != '[': |
| 98 | raise ValueError("properties without delimiter") |
| 99 | |
| 100 | # Parse values |
| 101 | values = [] |
| 102 | while index < len(content) and content[index] == '[': |
| 103 | index += 1 # Skip the opening bracket |
| 104 | value, index = _parse_value(content, index) |
| 105 | |
| 106 | # Edge Case: Missing closing bracket for property value |
| 107 | if index >= len(content) or content[index] != ']': |
| 108 | raise ValueError("properties without delimiter") |
| 109 | |
| 110 | index += 1 # Skip the closing bracket |
| 111 | values.append(value) |
| 112 | |
| 113 | properties[key] = values |
| 114 | |
| 115 | # Parse children |
| 116 | children = [] |
| 117 | |
| 118 | # First, parse children in parentheses (variations) |
| 119 | while index < len(content) and content[index] == '(': |
| 120 | child, index = _parse_tree(content, index+1) # Skip the opening parenthesis |
| 121 | |
| 122 | # Edge Case: Missing closing parenthesis for child |
| 123 | if index >= len(content) or content[index] != ')': |
| 124 | raise ValueError("tree missing") |
| 125 | |
| 126 | index += 1 # Skip the closing parenthesis |
| 127 | children.append(child) |
| 128 | |
| 129 | |
| 130 | # Then, parse sequential nodes (single children without variations) |
| 131 | # For sequential nodes, we parse one node at a time |
| 132 | while index < len(content) and content[index] == ';': |
| 133 | # Parse the next sequential node as a child |
| 134 | child, index = _parse_tree(content, index) |
| 135 | |
| 136 | # Add the sequential node to the children |
| 137 | children.append(child) |
| 138 | |
| 139 | |
| 140 | return SgfTree(properties, children), index |
| 141 | |
| 142 | |
| 143 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 144 | """ |
| 145 | Parse a value from the content string starting at index. |
| 146 | |
| 147 | Args: |
| 148 | content: The SGF content string |
| 149 | index: The starting index to parse from |
| 150 | |
| 151 | Returns: |
| 152 | tuple: A tuple containing the parsed value string and the next index to parse from |
| 153 | """ |
| 154 | result = [] |
| 155 | while index < len(content) and content[index] != ']': |
| 156 | if content[index] == '\\': |
| 157 | # Handle escape sequences |
| 158 | index += 1 |
| 159 | if index >= len(content): |
| 160 | # Edge Case: Escape character at end of content |
| 161 | raise ValueError("properties without delimiter") |
| 162 | |
| 163 | char = content[index] |
| 164 | if char == 'n' and (index+1 >= len(content) or content[index+1] == ']'): |
| 165 | # Special case: \n at end of value is treated as literal 'n' |
| 166 | result.append('n') |
| 167 | elif char == 't' and (index+1 >= len(content) or content[index+1] == ']'): |
| 168 | # Special case: \t at end of value is treated as literal 't' |
| 169 | result.append('t') |
| 170 | elif char in ' \n\r\t': |
| 171 | # Whitespace after backslash |
| 172 | if char == '\n': |
| 173 | # Newline is removed if it comes immediately after a backslash |
| 174 | pass |
| 175 | else: |
| 176 | # Other whitespace is converted to space |
| 177 | result.append(' ') |
| 178 | else: |
| 179 | # Any non-whitespace character after backslash is inserted as-is |
| 180 | result.append(char) |
| 181 | elif content[index] == '\n': |
| 182 | # Newlines remain as newlines |
| 183 | result.append('\n') |
| 184 | elif content[index] in ' \t\r': |
| 185 | # All whitespace characters other than newline are converted to spaces |
| 186 | result.append(' ') |
| 187 | else: |
| 188 | # Regular character |
| 189 | result.append(content[index]) |
| 190 | index += 1 |
| 191 | |
| 192 | return ''.join(result), index |
| 193 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Missing semicolon at start of node, Property key is not in uppercase, Missing opening bracket for property value, Missing closing bracket for property value, Missing closing parenthesis for child, Escape character at end of content |