| 26 | 26 | |
| 27 | 27 | |
| 28 | 28 | def parse(input_string: str) -> SgfTree: |
| 29 | | pass |
| 29 | """ |
| 30 | Parse an SGF (Smart Game Format) string and return a tree structure of properties. |
| 31 | |
| 32 | Args: |
| 33 | input_string: A string containing SGF formatted data |
| 34 | |
| 35 | Returns: |
| 36 | SgfTree: A tree structure representing the SGF data |
| 37 | |
| 38 | Raises: |
| 39 | ValueError: If the input is malformed or doesn't conform to SGF format rules |
| 40 | """ |
| 41 | # Edge Case: Empty input or input that doesn't start with '(' |
| 42 | if not input_string or not input_string.startswith('('): |
| 43 | raise ValueError("tree missing") |
| 44 | |
| 45 | # Edge Case: Input that starts with '(' but doesn't end with ')' |
| 46 | if not input_string.endswith(')'): |
| 47 | raise ValueError("tree missing") |
| 48 | |
| 49 | # Parse the content inside the outermost parentheses |
| 50 | content = input_string[1:-1] |
| 51 | |
| 52 | # Edge Case: Empty content after removing parentheses |
| 53 | if not content: |
| 54 | raise ValueError("tree with no nodes") |
| 55 | |
| 56 | return _parse_tree(content) |
| 57 | |
| 58 | |
| 59 | def _parse_tree(content: str) -> SgfTree: |
| 60 | """ |
| 61 | Parse the content of a tree node. |
| 62 | |
| 63 | Args: |
| 64 | content: The content string to parse |
| 65 | |
| 66 | Returns: |
| 67 | SgfTree: A tree structure representing the parsed content |
| 68 | """ |
| 69 | # Edge Case: Content that doesn't start with ';' |
| 70 | if not content.startswith(';'): |
| 71 | raise ValueError("tree with no nodes") |
| 72 | |
| 73 | # Find the end of properties for this node |
| 74 | i = 1 # Skip the initial ';' |
| 75 | properties = {} |
| 76 | |
| 77 | # Parse properties |
| 78 | while i < len(content) and content[i].isalpha() and content[i].isupper(): |
| 79 | # Parse key |
| 80 | key_start = i |
| 81 | while i < len(content) and content[i].isalpha() and content[i].isupper(): |
| 82 | i += 1 |
| 83 | key = content[key_start:i] |
| 84 | |
| 85 | # Edge Case: Key without values |
| 86 | if i >= len(content) or content[i] != '[': |
| 87 | raise ValueError("properties without delimiter") |
| 88 | |
| 89 | # Parse values |
| 90 | values = [] |
| 91 | while i < len(content) and content[i] == '[': |
| 92 | i += 1 # Skip '[' |
| 93 | value_start = i |
| 94 | |
| 95 | # Parse value, handling escapes |
| 96 | while i < len(content) and content[i] != ']': |
| 97 | if content[i] == '\\': |
| 98 | i += 2 # Skip escape character and the next character |
| 99 | else: |
| 100 | i += 1 |
| 101 | |
| 102 | # Edge Case: Unclosed value bracket |
| 103 | if i >= len(content) or content[i] != ']': |
| 104 | raise ValueError("properties without delimiter") |
| 105 | |
| 106 | value = content[value_start:i] |
| 107 | values.append(_unescape_text(value)) |
| 108 | i += 1 # Skip ']' |
| 109 | |
| 110 | properties[key] = values |
| 111 | |
| 112 | # Parse children |
| 113 | children = [] |
| 114 | |
| 115 | # Handle variations |
| 116 | while i < len(content) and content[i] == '(': |
| 117 | # Find matching closing parenthesis |
| 118 | paren_count = 1 |
| 119 | start = i + 1 |
| 120 | i += 1 |
| 121 | |
| 122 | while i < len(content) and paren_count > 0: |
| 123 | if content[i] == '(': |
| 124 | paren_count += 1 |
| 125 | elif content[i] == ')': |
| 126 | paren_count -= 1 |
| 127 | i += 1 |
| 128 | |
| 129 | # Edge Case: Unmatched parenthesis |
| 130 | if paren_count != 0: |
| 131 | raise ValueError("tree missing") |
| 132 | |
| 133 | # Parse the child tree |
| 134 | child_content = content[start:i-1] |
| 135 | children.append(_parse_tree(child_content)) |
| 136 | |
| 137 | return SgfTree(properties, children) |
| 138 | |
| 139 | |
| 140 | def _unescape_text(text: str) -> str: |
| 141 | """ |
| 142 | Process escape sequences in SGF text according to SGF specification. |
| 143 | |
| 144 | Args: |
| 145 | text: The text to process |
| 146 | |
| 147 | Returns: |
| 148 | str: The processed text with escape sequences handled |
| 149 | """ |
| 150 | result = [] |
| 151 | i = 0 |
| 152 | |
| 153 | while i < len(text): |
| 154 | if text[i] == '\\': |
| 155 | i += 1 # Skip the escape character |
| 156 | |
| 157 | # Edge Case: Escape at end of string |
| 158 | if i >= len(text): |
| 159 | # According to SGF spec, this would be an invalid escape |
| 160 | # but we'll just ignore the trailing backslash |
| 161 | break |
| 162 | |
| 163 | char = text[i] |
| 164 | |
| 165 | # Handle newline removal after escape |
| 166 | if char == '\n': |
| 167 | # Newline is removed if it comes immediately after a backslash |
| 168 | pass |
| 169 | elif char.isspace(): |
| 170 | # Other whitespace characters are converted to spaces |
| 171 | result.append(' ') |
| 172 | else: |
| 173 | # Non-whitespace characters are inserted as-is |
| 174 | result.append(char) |
| 175 | else: |
| 176 | char = text[i] |
| 177 | if char != '\n' and char.isspace(): |
| 178 | # All whitespace characters other than newline are converted to spaces |
| 179 | result.append(' ') |
| 180 | else: |
| 181 | result.append(char) |
| 182 | i += 1 |
| 183 | |
| 184 | return ''.join(result) |
| 185 | |
| 186 | # Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, content not starting with semicolon, key without values, unclosed value bracket, unmatched parenthesis, escape at end of string |