| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF (Smart Game Format) string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input or input that doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input that doesn't end with ')' |
| 46 | if not input_string.endswith(')'): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Remove the outer parentheses |
| 50 | content = input_string[1:-1] |
| 51 | |
| 52 | # Edge Case: Empty content after removing parentheses |
| 53 | if not content: |
| 54 | raise ValueError("tree with no nodes") |
| 55 | |
| 56 | # Parse the tree |
| 57 | tree, _ = _parse_node(content, 0) |
| 58 | return tree |
| 59 | |
| 60 | |
| 61 | def _parse_node(content: str, index: int) -> tuple[SgfTree, int]: |
| 62 | """ |
| 63 | Parse a node from the content string starting at the given index. |
| 64 | |
| 65 | Args: |
| 66 | content: The content string to parse |
| 67 | index: The starting index for parsing |
| 68 | |
| 69 | Returns: |
| 70 | tuple: A tuple containing the parsed SgfTree and the next index to parse |
| 71 | """ |
| 72 | # Edge Case: Content doesn't start with ';' |
| 73 | if index >= len(content) or content[index] != ';': |
| 74 | raise ValueError("tree with no nodes") |
| 75 | |
| 76 | index += 1 # Skip the ';' |
| 77 | |
| 78 | # Parse properties for this node |
| 79 | properties = {} |
| 80 | while index < len(content) and content[index].isalpha(): |
| 81 | # Parse key |
| 82 | key_start = index |
| 83 | while index < len(content) and content[index].isalpha(): |
| 84 | index += 1 |
| 85 | |
| 86 | # Edge Case: No key found |
| 87 | if index == key_start: |
| 88 | raise ValueError("properties without delimiter") |
| 89 | |
| 90 | key = content[key_start:index] |
| 91 | |
| 92 | # Edge Case: Key is not uppercase |
| 93 | if key != key.upper(): |
| 94 | raise ValueError("property must be in uppercase") |
| 95 | |
| 96 | # Parse values |
| 97 | values = [] |
| 98 | while index < len(content) and content[index] == '[': |
| 99 | index += 1 # Skip '[' |
| 100 | value_start = index |
| 101 | |
| 102 | # Parse value, handling escapes |
| 103 | while index < len(content) and content[index] != ']': |
| 104 | if content[index] == '\\': |
| 105 | index += 1 # Skip escape character |
| 106 | if index < len(content): |
| 107 | index += 1 # Skip escaped character |
| 108 | else: |
| 109 | index += 1 |
| 110 | |
| 111 | # Edge Case: Unclosed bracket |
| 112 | if index >= len(content) or content[index] != ']': |
| 113 | raise ValueError("properties without delimiter") |
| 114 | |
| 115 | value = content[value_start:index] |
| 116 | value = _unescape_value(value) |
| 117 | values.append(value) |
| 118 | index += 1 # Skip ']' |
| 119 | |
| 120 | # Edge Case: No values found for key |
| 121 | if not values: |
| 122 | raise ValueError("properties without delimiter") |
| 123 | |
| 124 | properties[key] = values |
| 125 | |
| 126 | # Parse children - either variations in parentheses or sequential nodes with semicolons |
| 127 | children = [] |
| 128 | |
| 129 | # Handle variations (nodes in parentheses) and sequential nodes (nodes with semicolons) |
| 130 | # We need to handle both in a loop because they can be mixed |
| 131 | while index < len(content) and content[index] in '(;': |
| 132 | if content[index] == '(': |
| 133 | # Parse the node inside the parentheses |
| 134 | child, index = _parse_node(content, index + 1) # Skip '(' |
| 135 | children.append(child) |
| 136 | # Skip ')' |
| 137 | if index >= len(content) or content[index] != ')': |
| 138 | raise ValueError("tree missing") |
| 139 | index += 1 |
| 140 | elif content[index] == ';': |
| 141 | # Parse the next sequential node |
| 142 | child, index = _parse_node(content, index) |
| 143 | children.append(child) |
| 144 | |
| 145 | return SgfTree(properties, children), index |
| 146 | |
| 147 | |
| 148 | def _unescape_value(value: str) -> str: |
| 149 | """ |
| 150 | Unescape an SGF value according to SGF text type rules. |
| 151 | |
| 152 | Args: |
| 153 | value: The escaped value string |
| 154 | |
| 155 | Returns: |
| 156 | str: The unescaped value string |
| 157 | """ |
| 158 | result = [] |
| 159 | i = 0 |
| 160 | while i < len(value): |
| 161 | if value[i] == '\\': |
| 162 | i += 1 |
| 163 | if i < len(value): |
| 164 | # If next character is newline, remove the backslash and newline |
| 165 | if value[i] == '\n': |
| 166 | i += 1 |
| 167 | continue |
| 168 | # Any non-whitespace character after \ is inserted as-is |
| 169 | # Any whitespace character after \ is converted according to rules |
| 170 | if value[i].isspace() and value[i] != '\n': |
| 171 | result.append(' ') |
| 172 | else: |
| 173 | result.append(value[i]) |
| 174 | i += 1 |
| 175 | else: |
| 176 | # Edge Case: Backslash at end of value |
| 177 | result.append('\\') |
| 178 | else: |
| 179 | # Convert all whitespace (except newline) to spaces |
| 180 | if value[i].isspace() and value[i] != '\n': |
| 181 | result.append(' ') |
| 182 | else: |
| 183 | result.append(value[i]) |
| 184 | i += 1 |
| 185 | |
| 186 | return ''.join(result) |
| 187 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after removing parentheses, content not starting with semicolon, no key found, key not uppercase, unclosed bracket, no values found for key, missing closing parenthesis for children, backslash at end of value |