| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Parse the tree |
| 61 | tree, _ = _parse_tree(content, 0) |
| 62 | return tree |
| 63 | |
| 64 | |
| 65 | def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]: |
| 66 | """ |
| 67 | Parse a tree from the content string starting at index. |
| 68 | |
| 69 | Args: |
| 70 | content: The SGF content string |
| 71 | index: The starting index to parse from |
| 72 | |
| 73 | Returns: |
| 74 | tuple: A tuple containing the parsed SgfTree and the next index to parse from |
| 75 | """ |
| 76 | # Edge Case: Missing semicolon at the beginning of a node |
| 77 | if index >= len(content) or content[index] != ';': |
| 78 | raise ValueError("tree missing") |
| 79 | |
| 80 | index += 1 # Skip the semicolon |
| 81 | |
| 82 | # Parse properties of the current node |
| 83 | properties = {} |
| 84 | while index < len(content) and content[index] not in '();': |
| 85 | # Parse key |
| 86 | key_start = index |
| 87 | while index < len(content) and content[index].isalpha() and content[index].isupper(): |
| 88 | index += 1 |
| 89 | |
| 90 | # Edge Case: Property key is not in uppercase |
| 91 | if index == key_start: |
| 92 | raise ValueError("property must be in uppercase") |
| 93 | |
| 94 | key = content[key_start:index] |
| 95 | |
| 96 | # Edge Case: Missing opening bracket for property value |
| 97 | if index >= len(content) or content[index] != '[': |
| 98 | raise ValueError("properties without delimiter") |
| 99 | |
| 100 | # Parse values |
| 101 | values = [] |
| 102 | while index < len(content) and content[index] == '[': |
| 103 | index += 1 # Skip the opening bracket |
| 104 | value, index = _parse_value(content, index) |
| 105 | values.append(value) |
| 106 | |
| 107 | # Edge Case: Missing closing bracket for property value |
| 108 | if index >= len(content) or content[index] != ']': |
| 109 | raise ValueError("properties without delimiter") |
| 110 | index += 1 # Skip the closing bracket |
| 111 | |
| 112 | properties[key] = values |
| 113 | |
| 114 | # Parse children |
| 115 | children = [] |
| 116 | |
| 117 | # Handle children in parentheses (variations) |
| 118 | while index < len(content) and content[index] == '(': |
| 119 | child, index = _parse_tree(content, index + 1) # Skip the opening parenthesis |
| 120 | children.append(child) |
| 121 | |
| 122 | # Skip the closing parenthesis |
| 123 | # Edge Case: Missing closing parenthesis for child |
| 124 | if index >= len(content) or content[index] != ')': |
| 125 | raise ValueError("tree missing") |
| 126 | index += 1 |
| 127 | |
| 128 | # Handle sequential semicolons (parent-child relationships) |
| 129 | while index < len(content) and content[index] == ';': |
| 130 | child, index = _parse_tree(content, index) # Start from the semicolon |
| 131 | children.append(child) |
| 132 | |
| 133 | return SgfTree(properties, children), index |
| 134 | |
| 135 | |
| 136 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 137 | """ |
| 138 | Parse a value from the content string starting at index. |
| 139 | |
| 140 | Args: |
| 141 | content: The SGF content string |
| 142 | index: The starting index to parse from (should be pointing to the content inside brackets) |
| 143 | |
| 144 | Returns: |
| 145 | tuple: A tuple containing the parsed value string and the next index to parse from |
| 146 | """ |
| 147 | result = [] |
| 148 | while index < len(content): |
| 149 | if content[index] == ']': |
| 150 | # Found the end of the value |
| 151 | break |
| 152 | elif content[index] == '\\': |
| 153 | # Handle escape sequences |
| 154 | index += 1 |
| 155 | if index >= len(content): |
| 156 | # Edge Case: Escape character at end of content |
| 157 | result.append('\\') |
| 158 | break |
| 159 | |
| 160 | char = content[index] |
| 161 | if char == '\n': |
| 162 | # Edge Case: Newline escaped with backslash - remove the newline |
| 163 | # The newline is removed, so we don't append anything |
| 164 | pass |
| 165 | elif char.isspace(): |
| 166 | # Edge Case: Whitespace after backslash - convert to space |
| 167 | result.append(' ') |
| 168 | else: |
| 169 | # Edge Case: Non-whitespace character after backslash - insert as-is |
| 170 | result.append(char) |
| 171 | elif content[index] == '\n': |
| 172 | # Edge Case: Unescaped newline - keep as newline |
| 173 | result.append(content[index]) |
| 174 | elif content[index].isspace() and content[index] != '\n': |
| 175 | # Edge Case: Whitespace other than newline - convert to space |
| 176 | result.append(' ') |
| 177 | else: |
| 178 | # Edge Case: Regular character - insert as-is |
| 179 | result.append(content[index]) |
| 180 | index += 1 |
| 181 | |
| 182 | return ''.join(result), index |
| 183 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Missing semicolon at the beginning of a node, Property key is not in uppercase, Missing opening bracket for property value, Missing closing bracket for property value, Missing closing parenthesis for child, Escape character at end of content, Newline escaped with backslash, Tab escaped with backslash, Whitespace after backslash, Non-whitespace character after backslash, Unescaped newline, Whitespace other than newline |
| 184 | # Handled Edge Cases: Property key is not in uppercase, Missing opening bracket for property value, Missing closing bracket for property value |
| 185 | # Handled Edge Cases: Missing closing parenthesis for child |
| 186 | # Handled Edge Cases: Escape character at end of content, Newline escaped with backslash - remove if immediately after \, Tab escaped with backslash - convert to space, Whitespace after backslash - convert to space, Non-whitespace character after backslash - insert as-is, Unescaped newline - keep as newline, Whitespace other than newline - convert to space, Regular character - insert as-is |