| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF (Smart Game Format) string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the parsed SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input string is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input or input that doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input that starts with '(' but doesn't have a matching ';' |
| 46 | if len(input_string) < 2 or input_string[1] != ';': |
| 47 | raise ValueError("tree with no nodes") |
| 48 | |
| 49 | index = 1 # Start after the initial '(' |
| 50 | |
| 51 | def parse_node() -> SgfTree: |
| 52 | nonlocal index |
| 53 | # Edge Case: Node that doesn't start with ';' |
| 54 | if index >= len(input_string) or input_string[index] != ';': |
| 55 | raise ValueError("tree with no nodes") |
| 56 | |
| 57 | index += 1 # Skip the ';' |
| 58 | properties = {} |
| 59 | children = [] |
| 60 | |
| 61 | # Parse properties |
| 62 | while index < len(input_string) and input_string[index].isupper(): |
| 63 | # Parse property key |
| 64 | key_start = index |
| 65 | while index < len(input_string) and input_string[index].isupper(): |
| 66 | index += 1 |
| 67 | |
| 68 | key = input_string[key_start:index] |
| 69 | |
| 70 | # Edge Case: Property key that is not in uppercase |
| 71 | if not key.isupper() or not key: |
| 72 | raise ValueError("property must be in uppercase") |
| 73 | |
| 74 | # Edge Case: Property without values (no '[') |
| 75 | if index >= len(input_string) or input_string[index] != '[': |
| 76 | raise ValueError("properties without delimiter") |
| 77 | |
| 78 | values = [] |
| 79 | # Parse all values for this key |
| 80 | while index < len(input_string) and input_string[index] == '[': |
| 81 | index += 1 # Skip '[' |
| 82 | value_start = index |
| 83 | |
| 84 | # Parse value, handling escapes |
| 85 | while index < len(input_string): |
| 86 | if input_string[index] == '\\': |
| 87 | index += 1 # Skip escape character |
| 88 | if index < len(input_string): |
| 89 | index += 1 # Skip escaped character |
| 90 | elif input_string[index] == ']': |
| 91 | break |
| 92 | else: |
| 93 | index += 1 |
| 94 | |
| 95 | # Edge Case: Unclosed value bracket |
| 96 | if index >= len(input_string) or input_string[index] != ']': |
| 97 | raise ValueError("properties without delimiter") |
| 98 | |
| 99 | value = input_string[value_start:index] |
| 100 | # Process the value according to SGF Text type rules |
| 101 | processed_value = "" |
| 102 | i = 0 |
| 103 | while i < len(value): |
| 104 | if value[i] == '\\': |
| 105 | i += 1 |
| 106 | if i < len(value): |
| 107 | # If next character is newline, remove both \ and newline |
| 108 | if value[i] == '\n': |
| 109 | i += 1 |
| 110 | else: |
| 111 | processed_value += value[i] |
| 112 | i += 1 |
| 113 | else: |
| 114 | # Convert whitespace (except newline) to space |
| 115 | if value[i] != '\n' and value[i].isspace(): |
| 116 | processed_value += ' ' |
| 117 | else: |
| 118 | processed_value += value[i] |
| 119 | i += 1 |
| 120 | values.append(processed_value) |
| 121 | index += 1 # Skip ']' |
| 122 | |
| 123 | properties[key] = values |
| 124 | |
| 125 | # Parse children |
| 126 | while index < len(input_string) and input_string[index] == '(': |
| 127 | index += 1 # Skip '(' |
| 128 | child = parse_node() |
| 129 | children.append(child) |
| 130 | # Edge Case: Child without closing ')' |
| 131 | if index >= len(input_string) or input_string[index] != ')': |
| 132 | raise ValueError("tree missing") |
| 133 | index += 1 # Skip ')' |
| 134 | |
| 135 | return SgfTree(properties, children) |
| 136 | |
| 137 | # Start parsing the root node |
| 138 | root = parse_node() |
| 139 | |
| 140 | # Edge Case: Input doesn't end properly after root node |
| 141 | if index < len(input_string) and input_string[index] != ')': |
| 142 | raise ValueError("tree missing") |
| 143 | |
| 144 | index += 1 # Skip the final ')' |
| 145 | |
| 146 | # Edge Case: Extra characters after the final ')' |
| 147 | if index < len(input_string): |
| 148 | raise ValueError("tree missing") |
| 149 | |
| 150 | return root |
| 151 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing semicolon after opening parenthesis, |
| 152 | # node without semicolon, property key not in uppercase, property without delimiter brackets, |
| 153 | # unclosed value bracket, child without closing parenthesis, extra characters after final parenthesis |