| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string in SGF format |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: The parsed SGF tree |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is malformed or doesn't conform to SGF format |
| 40 | """ |
| 41 | # Edge Case: Empty input |
| 42 | if not input_string: |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Missing opening parenthesis |
| 46 | if not input_string.startswith('('): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Edge Case: Missing closing parenthesis |
| 50 | if not input_string.endswith(')'): |
| 51 | raise ValueError("tree missing") |
| 52 | |
| 53 | # Remove the outer parentheses |
| 54 | content = input_string[1:-1] |
| 55 | |
| 56 | # Edge Case: Empty tree with no nodes |
| 57 | if not content: |
| 58 | raise ValueError("tree with no nodes") |
| 59 | |
| 60 | # Parse the tree |
| 61 | tree, _ = _parse_tree(content, 0) |
| 62 | return tree |
| 63 | |
| 64 | |
| 65 | def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]: |
| 66 | """ |
| 67 | Parse a tree from the content string starting at index. |
| 68 | |
| 69 | Args: |
| 70 | content: The SGF content string |
| 71 | index: The starting index to parse from |
| 72 | |
| 73 | Returns: |
| 74 | tuple: (SgfTree, next_index) |
| 75 | """ |
| 76 | # Edge Case: Missing semicolon at start of node |
| 77 | if index >= len(content) or content[index] != ';': |
| 78 | raise ValueError("tree missing") |
| 79 | |
| 80 | index += 1 # Skip the semicolon |
| 81 | |
| 82 | # Parse properties of the current node |
| 83 | properties = {} |
| 84 | while index < len(content) and content[index] not in '();': |
| 85 | # Parse key |
| 86 | key_start = index |
| 87 | while index < len(content) and content[index].isalpha() and content[index].isupper(): |
| 88 | index += 1 |
| 89 | |
| 90 | # Edge Case: Property key is not uppercase |
| 91 | if index == key_start: |
| 92 | raise ValueError("property must be in uppercase") |
| 93 | |
| 94 | key = content[key_start:index] |
| 95 | |
| 96 | # Edge Case: Missing opening bracket for property value |
| 97 | if index >= len(content) or content[index] != '[': |
| 98 | raise ValueError("properties without delimiter") |
| 99 | |
| 100 | # Parse all values for this key |
| 101 | values = [] |
| 102 | while index < len(content) and content[index] == '[': |
| 103 | index += 1 # Skip opening bracket |
| 104 | value_start = index |
| 105 | |
| 106 | # Parse value, handling escapes |
| 107 | value_chars = [] |
| 108 | while index < len(content) and content[index] != ']': |
| 109 | if content[index] == '\\': |
| 110 | index += 1 |
| 111 | if index >= len(content): |
| 112 | raise ValueError("properties without delimiter") |
| 113 | # Handle escaped characters according to SGF text rules |
| 114 | char = content[index] |
| 115 | if char == 'n' and index+1 < len(content) and content[index+1] == ']': |
| 116 | # Special case: \n at end of value should be treated as \n literal |
| 117 | value_chars.append('\\n') |
| 118 | elif char in ' \t\r\n': |
| 119 | # Whitespace after backslash |
| 120 | if char == ' ': |
| 121 | value_chars.append(' ') |
| 122 | elif char == '\n': |
| 123 | # Newline removal if immediately after backslash |
| 124 | pass |
| 125 | elif char in '\t\r': |
| 126 | # Other whitespace converted to space |
| 127 | value_chars.append(' ') |
| 128 | else: |
| 129 | # Non-whitespace character after backslash is inserted as-is |
| 130 | value_chars.append(char) |
| 131 | else: |
| 132 | # Handle regular characters |
| 133 | char = content[index] |
| 134 | if char in ' \t\r\n': |
| 135 | # All whitespace other than newline converted to space |
| 136 | if char == '\n': |
| 137 | value_chars.append('\n') |
| 138 | else: |
| 139 | value_chars.append(' ') |
| 140 | else: |
| 141 | value_chars.append(char) |
| 142 | index += 1 |
| 143 | |
| 144 | # Edge Case: Missing closing bracket |
| 145 | if index >= len(content) or content[index] != ']': |
| 146 | raise ValueError("properties without delimiter") |
| 147 | |
| 148 | value = ''.join(value_chars) |
| 149 | values.append(value) |
| 150 | index += 1 # Skip closing bracket |
| 151 | |
| 152 | properties[key] = values |
| 153 | |
| 154 | # Parse children |
| 155 | children = [] |
| 156 | while index < len(content) and content[index] == '(': |
| 157 | child, index = _parse_tree(content, index+1) |
| 158 | children.append(child) |
| 159 | |
| 160 | # Skip the closing parenthesis of the child tree |
| 161 | if index >= len(content) or content[index] != ')': |
| 162 | raise ValueError("tree missing") |
| 163 | index += 1 |
| 164 | |
| 165 | return SgfTree(properties, children), index |
| 166 | |
| 167 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon at start of node, property key is not uppercase, missing opening bracket for property value, missing closing bracket, missing closing parenthesis for child trees |