| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF (Smart Game Format) string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is invalid according to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input or input that doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input that doesn't end with ')' |
| 46 | if not input_string.endswith(')'): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Remove the outer parentheses |
| 50 | content = input_string[1:-1] |
| 51 | |
| 52 | # Edge Case: Empty content after removing parentheses |
| 53 | if not content: |
| 54 | raise ValueError("tree with no nodes") |
| 55 | |
| 56 | # Edge Case: Content that doesn't start with ';' |
| 57 | if not content.startswith(';'): |
| 58 | raise ValueError("tree missing") |
| 59 | |
| 60 | return _parse_tree(content) |
| 61 | |
| 62 | |
| 63 | def _parse_tree(content: str) -> SgfTree: |
| 64 | """ |
| 65 | Parse a tree from SGF content. |
| 66 | |
| 67 | Args: |
| 68 | content: SGF content starting with ';' |
| 69 | |
| 70 | Returns: |
| 71 | SgfTree: Parsed tree |
| 72 | """ |
| 73 | # Skip the initial ';' |
| 74 | i = 1 |
| 75 | |
| 76 | # Parse properties |
| 77 | properties = {} |
| 78 | while i < len(content) and content[i] != '(' and content[i] != ')': |
| 79 | # Parse property key |
| 80 | key_start = i |
| 81 | while i < len(content) and content[i].isupper(): |
| 82 | i += 1 |
| 83 | |
| 84 | # Edge Case: Property key is not in uppercase |
| 85 | if i == key_start: |
| 86 | raise ValueError("property must be in uppercase") |
| 87 | |
| 88 | key = content[key_start:i] |
| 89 | |
| 90 | # Edge Case: No delimiter after key |
| 91 | if i >= len(content) or content[i] != '[': |
| 92 | raise ValueError("properties without delimiter") |
| 93 | |
| 94 | # Parse property values |
| 95 | values = [] |
| 96 | while i < len(content) and content[i] == '[': |
| 97 | i += 1 # Skip '[' |
| 98 | value_start = i |
| 99 | |
| 100 | # Parse value, handling escapes |
| 101 | while i < len(content) and content[i] != ']': |
| 102 | if content[i] == '\\': |
| 103 | i += 2 # Skip escape and next character |
| 104 | else: |
| 105 | i += 1 |
| 106 | |
| 107 | # Edge Case: Unclosed value bracket |
| 108 | if i >= len(content): |
| 109 | raise ValueError("properties without delimiter") |
| 110 | |
| 111 | value = _unescape_text(content[value_start:i]) |
| 112 | values.append(value) |
| 113 | i += 1 # Skip ']' |
| 114 | |
| 115 | properties[key] = values |
| 116 | |
| 117 | # Parse children |
| 118 | children = [] |
| 119 | while i < len(content) and content[i] == '(': |
| 120 | # Find matching parenthesis for this child |
| 121 | paren_count = 1 |
| 122 | start = i + 1 |
| 123 | i += 1 |
| 124 | |
| 125 | while i < len(content) and paren_count > 0: |
| 126 | if content[i] == '(': |
| 127 | paren_count += 1 |
| 128 | elif content[i] == ')': |
| 129 | paren_count -= 1 |
| 130 | i += 1 |
| 131 | |
| 132 | # Edge Case: Unmatched parenthesis |
| 133 | if paren_count != 0: |
| 134 | raise ValueError("tree missing") |
| 135 | |
| 136 | # Parse the child tree |
| 137 | child_content = content[start:i-1] |
| 138 | child_tree = _parse_tree(child_content) |
| 139 | children.append(child_tree) |
| 140 | |
| 141 | return SgfTree(properties, children) |
| 142 | |
| 143 | |
| 144 | def _unescape_text(text: str) -> str: |
| 145 | """ |
| 146 | Unescape SGF text according to SGF specification. |
| 147 | |
| 148 | Args: |
| 149 | text: Escaped SGF text |
| 150 | |
| 151 | Returns: |
| 152 | str: Unescaped text |
| 153 | """ |
| 154 | result = [] |
| 155 | i = 0 |
| 156 | while i < len(text): |
| 157 | if text[i] == '\\': |
| 158 | i += 1 |
| 159 | if i < len(text): |
| 160 | # If next character is newline, remove both \ and newline |
| 161 | if text[i] == '\n': |
| 162 | i += 1 |
| 163 | continue |
| 164 | # Any character after \ is inserted as-is |
| 165 | result.append(text[i]) |
| 166 | i += 1 |
| 167 | else: |
| 168 | # Edge Case: Escape at end of string |
| 169 | result.append('\\') |
| 170 | break |
| 171 | else: |
| 172 | # Convert whitespace (except newline) to space |
| 173 | if text[i] != '\n' and text[i].isspace(): |
| 174 | result.append(' ') |
| 175 | else: |
| 176 | result.append(text[i]) |
| 177 | i += 1 |
| 178 | |
| 179 | return ''.join(result) |
| 180 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after removing parentheses, missing semicolon, property key not in uppercase, missing delimiter after key, unclosed value bracket, unmatched parenthesis, escape at end of string |
| 181 | # Handled Edge Cases: Property key not in uppercase, missing delimiter after key, unclosed value bracket |
| 182 | # Handled Edge Cases: Unmatched parenthesis, escape at end of string |