| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input doesn't start with '(' |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Input doesn't end with ')' |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Parse the tree |
| 61 | tree, _ = _parse_tree(content, 0) |
| 62 | return tree |
| 63 | |
| 64 | |
| 65 | def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]: |
| 66 | """ |
| 67 | Parse a tree from the content string starting at index. |
| 68 | |
| 69 | Args: |
| 70 | content: The SGF content string |
| 71 | index: The starting index to parse from |
| 72 | |
| 73 | Returns: |
| 74 | tuple: A tuple containing the parsed SgfTree and the next index to parse from |
| 75 | """ |
| 76 | # Edge Case: Missing semicolon at start of node |
| 77 | if index >= len(content) or content[index] != ';': |
| 78 | raise ValueError("tree missing") |
| 79 | |
| 80 | index += 1 # Skip the semicolon |
| 81 | |
| 82 | # Parse properties of the current node |
| 83 | properties = {} |
| 84 | while index < len(content) and content[index] not in '();': |
| 85 | # Parse key |
| 86 | key_start = index |
| 87 | while index < len(content) and content[index].isalpha() and content[index].isupper(): |
| 88 | index += 1 |
| 89 | |
| 90 | # Edge Case: Property key is not in uppercase or is empty |
| 91 | if index == key_start or (index < len(content) and content[index] not in '['): |
| 92 | # If we didn't parse any characters for the key, or if the next character is not '[', |
| 93 | # then we have an invalid property |
| 94 | if index == key_start: |
| 95 | raise ValueError("property must be in uppercase") |
| 96 | else: |
| 97 | # We parsed some characters but the next character is not '[' |
| 98 | # This means we have a malformed property |
| 99 | raise ValueError("properties without delimiter") |
| 100 | |
| 101 | key = content[key_start:index] |
| 102 | |
| 103 | # Edge Case: Missing opening bracket for property value |
| 104 | if index >= len(content) or content[index] != '[': |
| 105 | raise ValueError("properties without delimiter") |
| 106 | |
| 107 | # Parse all values for this key |
| 108 | values = [] |
| 109 | while index < len(content) and content[index] == '[': |
| 110 | index += 1 # Skip the opening bracket |
| 111 | value, index = _parse_value(content, index) |
| 112 | |
| 113 | # Edge Case: Missing closing bracket for property value |
| 114 | if index >= len(content) or content[index] != ']': |
| 115 | raise ValueError("properties without delimiter") |
| 116 | |
| 117 | index += 1 # Skip the closing bracket |
| 118 | values.append(value) |
| 119 | |
| 120 | properties[key] = values |
| 121 | |
| 122 | # Parse children |
| 123 | children = [] |
| 124 | # Handle variations (parentheses) |
| 125 | while index < len(content) and content[index] == '(': |
| 126 | child, index = _parse_tree(content, index + 1) # Skip the opening parenthesis |
| 127 | |
| 128 | # Edge Case: Missing closing parenthesis for child |
| 129 | if index >= len(content) or content[index] != ')': |
| 130 | raise ValueError("tree missing") |
| 131 | |
| 132 | index += 1 # Skip the closing parenthesis |
| 133 | children.append(child) |
| 134 | |
| 135 | # Handle sequential nodes (semicolon) |
| 136 | if index < len(content) and content[index] == ';': |
| 137 | child, index = _parse_tree(content, index) |
| 138 | children.append(child) |
| 139 | |
| 140 | return SgfTree(properties, children), index |
| 141 | |
| 142 | |
| 143 | def _parse_value(content: str, index: int) -> tuple[str, int]: |
| 144 | """ |
| 145 | Parse a property value according to SGF Text type rules. |
| 146 | |
| 147 | Args: |
| 148 | content: The SGF content string |
| 149 | index: The starting index to parse from |
| 150 | |
| 151 | Returns: |
| 152 | tuple: A tuple containing the parsed value string and the next index to parse from |
| 153 | """ |
| 154 | result = [] |
| 155 | while index < len(content) and content[index] != ']': |
| 156 | if content[index] == '\\': |
| 157 | # Handle escape character |
| 158 | index += 1 |
| 159 | if index >= len(content): |
| 160 | # Edge Case: Escape character at end of content |
| 161 | raise ValueError("properties without delimiter") |
| 162 | |
| 163 | char = content[index] |
| 164 | # Check if next character is a newline |
| 165 | if char == '\n': |
| 166 | # Newline immediately after backslash is removed |
| 167 | pass |
| 168 | elif char in ' \t\r': |
| 169 | # Whitespace after escape - converted to space |
| 170 | result.append(' ') |
| 171 | else: |
| 172 | # Any other character after escape is inserted as-is |
| 173 | result.append(char) |
| 174 | elif content[index] in ' \t\r': |
| 175 | # All whitespace characters other than newline are converted to spaces |
| 176 | result.append(' ') |
| 177 | else: |
| 178 | # Regular character |
| 179 | result.append(content[index]) |
| 180 | index += 1 |
| 181 | |
| 182 | return ''.join(result), index |
| 183 | |
| 184 | # Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Missing semicolon at start of node, Property key is not in uppercase, Missing opening bracket for property value, Missing closing bracket for property value, Missing closing parenthesis for child, Escape character at end of content |