| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Parse the tree |
| 61 | tree, _ = _parse_tree(content, 0) |
| 62 | return tree |
| 63 | |
| 64 | |
| 65 | def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]: |
| 66 | """ |
| 67 | Parse a tree from the content string starting at index. |
| 68 | |
| 69 | Args: |
| 70 | content: The SGF content string |
| 71 | index: The starting index to parse from |
| 72 | |
| 73 | Returns: |
| 74 | tuple: A tuple containing the parsed SgfTree and the next index to parse from |
| 75 | """ |
| 76 | # Edge Case: Missing semicolon at start of node |
| 77 | if index >= len(content) or content[index] != ';': |
| 78 | raise ValueError("tree missing") |
| 79 | |
| 80 | index += 1 # Skip the semicolon |
| 81 | |
| 82 | # Parse properties of the current node |
| 83 | properties = {} |
| 84 | while index < len(content) and content[index] not in '();': |
| 85 | # Parse key |
| 86 | key_start = index |
| 87 | while index < len(content) and content[index].isalpha(): |
| 88 | index += 1 |
| 89 | |
| 90 | # Edge Case: Property key is not in uppercase |
| 91 | key = content[key_start:index] |
| 92 | if not key.isupper(): |
| 93 | raise ValueError("property must be in uppercase") |
| 94 | |
| 95 | # Edge Case: Missing opening bracket for property value |
| 96 | if index >= len(content) or content[index] != '[': |
| 97 | raise ValueError("properties without delimiter") |
| 98 | |
| 99 | # Parse values |
| 100 | values = [] |
| 101 | while index < len(content) and content[index] == '[': |
| 102 | index += 1 # Skip opening bracket |
| 103 | value, index = _parse_value(content, index) |
| 104 | |
| 105 | # Edge Case: Missing closing bracket for property value |
| 106 | if index >= len(content) or content[index] != ']': |
| 107 | raise ValueError("properties without delimiter") |
| 108 | |
| 109 | index += 1 # Skip closing bracket |
| 110 | values.append(value) |
| 111 | |
| 112 | properties[key] = values |
| 113 | |
| 114 | # Parse children |
| 115 | children = [] |
| 116 | |
| 117 | # First, parse children in parentheses (variations) |
| 118 | while index < len(content) and content[index] == '(': |
| 119 | child, index = _parse_tree(content, index + 1) # Skip opening parenthesis |
| 120 | |
| 121 | # Edge Case: Missing closing parenthesis for child |
| 122 | if index >= len(content) or content[index] != ')': |
| 123 | raise ValueError("tree missing") |
| 124 | |
| 125 | children.append(child) |
| 126 | index += 1 # Skip closing parenthesis |
| 127 | |
| 128 | # Then, parse sequential nodes (single child chain) |
| 129 | while index < len(content) and content[index] == ';': |
| 130 | # Create a sequential child node |
| 131 | child, index = _parse_tree(content, index) |
| 132 | children.append(child) |
| 133 | |
| 134 | return SgfTree(properties, children), index |
| 135 | |
| 136 | |
| 137 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 138 | """ |
| 139 | Parse a property value from the content string starting at index. |
| 140 | |
| 141 | Args: |
| 142 | content: The SGF content string |
| 143 | index: The starting index to parse from |
| 144 | |
| 145 | Returns: |
| 146 | tuple: A tuple containing the parsed value string and the next index to parse from |
| 147 | """ |
| 148 | result = [] |
| 149 | while index < len(content) and content[index] != ']': |
| 150 | if content[index] == '\\': |
| 151 | # Handle escape sequences |
| 152 | index += 1 |
| 153 | if index < len(content): |
| 154 | char = content[index] |
| 155 | if char == '\n': |
| 156 | # Newline immediately after \ is removed |
| 157 | index += 1 |
| 158 | # Skip any additional whitespace on the next line |
| 159 | while index < len(content) and content[index] in ' \t\r': |
| 160 | index += 1 |
| 161 | elif char in ' \t\r': |
| 162 | # Other whitespace after \ is converted to space |
| 163 | result.append(' ') |
| 164 | index += 1 |
| 165 | else: |
| 166 | # Any other character after \ is inserted as-is |
| 167 | result.append(char) |
| 168 | index += 1 |
| 169 | else: |
| 170 | # Edge Case: Escape character at end of value |
| 171 | raise ValueError("properties without delimiter") |
| 172 | else: |
| 173 | char = content[index] |
| 174 | if char in ' \t\r': |
| 175 | # All whitespace characters other than newline are converted to spaces |
| 176 | result.append(' ') |
| 177 | else: |
| 178 | # Newlines and other characters are kept as-is |
| 179 | result.append(char) |
| 180 | index += 1 |
| 181 | |
| 182 | return ''.join(result), index |
| 183 | |
| 184 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Missing semicolon at start of node, Property key is not in uppercase, Missing opening bracket for property value, Missing closing bracket for property value, Missing closing parenthesis for child, Escape character at end of value |