| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF (Smart Game Format) string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the parsed SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input or input that doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input that starts with '(' but doesn't have a matching ';' |
| 46 | if len(input_string) < 2 or input_string[1] != ';': |
| 47 | raise ValueError("tree with no nodes") |
| 48 | |
| 49 | index = 1 # Start after the initial '(' |
| 50 | |
| 51 | def parse_node() -> SgfTree: |
| 52 | nonlocal index |
| 53 | # Edge Case: Node doesn't start with ';' |
| 54 | if index >= len(input_string) or input_string[index] != ';': |
| 55 | raise ValueError("tree with no nodes") |
| 56 | |
| 57 | index += 1 # Skip the ';' |
| 58 | properties = {} |
| 59 | children = [] |
| 60 | |
| 61 | # Parse properties |
| 62 | while index < len(input_string) and input_string[index] not in '();': |
| 63 | # Parse property key |
| 64 | key_start = index |
| 65 | while index < len(input_string) and input_string[index].isalpha(): |
| 66 | index += 1 |
| 67 | |
| 68 | # Edge Case: Property key is not in uppercase |
| 69 | key = input_string[key_start:index] |
| 70 | if not key.isupper(): |
| 71 | raise ValueError("property must be in uppercase") |
| 72 | |
| 73 | # Edge Case: No values for the property |
| 74 | if index >= len(input_string) or input_string[index] != '[': |
| 75 | raise ValueError("properties without delimiter") |
| 76 | |
| 77 | # Parse property values |
| 78 | values = [] |
| 79 | while index < len(input_string) and input_string[index] == '[': |
| 80 | index += 1 # Skip '[' |
| 81 | value_start = index |
| 82 | |
| 83 | # Parse value, handling escapes |
| 84 | value_chars = [] |
| 85 | while index < len(input_string) and input_string[index] != ']': |
| 86 | if input_string[index] == '\\': |
| 87 | index += 1 |
| 88 | if index >= len(input_string): |
| 89 | raise ValueError("properties without delimiter") |
| 90 | # Handle escaped characters according to SGF text rules |
| 91 | char = input_string[index] |
| 92 | if char == 'n': |
| 93 | # Special case: \n at end of line should be removed |
| 94 | if index + 1 < len(input_string) and input_string[index + 1] == ']': |
| 95 | pass # Remove the \n |
| 96 | else: |
| 97 | value_chars.append('\n') |
| 98 | elif char == 't': |
| 99 | value_chars.append('\t') |
| 100 | elif char == 'r': |
| 101 | value_chars.append('\r') |
| 102 | elif char.isspace(): |
| 103 | value_chars.append(char) |
| 104 | else: |
| 105 | value_chars.append(char) |
| 106 | else: |
| 107 | value_chars.append(input_string[index]) |
| 108 | index += 1 |
| 109 | |
| 110 | # Edge Case: Unclosed value bracket |
| 111 | if index >= len(input_string) or input_string[index] != ']': |
| 112 | raise ValueError("properties without delimiter") |
| 113 | |
| 114 | # Process the value according to SGF text rules |
| 115 | value = ''.join(value_chars) |
| 116 | # Convert whitespace to spaces (except newlines) |
| 117 | processed_chars = [] |
| 118 | i = 0 |
| 119 | while i < len(value): |
| 120 | if value[i] == '\\' and i + 1 < len(value): |
| 121 | # Handle escaped characters |
| 122 | processed_chars.append(value[i]) |
| 123 | processed_chars.append(value[i+1]) |
| 124 | i += 2 |
| 125 | elif value[i] != '\n' and value[i].isspace(): |
| 126 | processed_chars.append(' ') |
| 127 | i += 1 |
| 128 | else: |
| 129 | processed_chars.append(value[i]) |
| 130 | i += 1 |
| 131 | value = ''.join(processed_chars) |
| 132 | |
| 133 | values.append(value) |
| 134 | index += 1 # Skip ']' |
| 135 | |
| 136 | properties[key] = values |
| 137 | |
| 138 | # Parse children |
| 139 | while index < len(input_string) and input_string[index] == '(': |
| 140 | index += 1 # Skip '(' |
| 141 | children.append(parse_node()) |
| 142 | # Edge Case: Child node not properly closed |
| 143 | if index >= len(input_string) or input_string[index] != ')': |
| 144 | raise ValueError("tree missing") |
| 145 | index += 1 # Skip ')' |
| 146 | |
| 147 | return SgfTree(properties, children) |
| 148 | |
| 149 | # Start parsing with the root node |
| 150 | root = parse_node() |
| 151 | |
| 152 | # Edge Case: Root node not properly closed |
| 153 | if index >= len(input_string) or input_string[index] != ')': |
| 154 | raise ValueError("tree missing") |
| 155 | |
| 156 | index += 1 # Skip the final ')' |
| 157 | |
| 158 | # Edge Case: Extra characters after the final ')' |
| 159 | if index < len(input_string): |
| 160 | raise ValueError("tree missing") |
| 161 | |
| 162 | return root |
| 163 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing semicolon, |
| 164 | # property key not uppercase, missing property delimiter, |
| 165 | # unclosed value bracket, child node not properly closed, |
| 166 | # root node not properly closed, extra characters after final parenthesis |