uv

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree with no nodes")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse a tree from SGF content.
66
67 Args:
68 content: SGF content string starting with ';'
69
70 Returns:
71 SgfTree: Parsed tree structure
72 """
73 # Skip the initial ';'
74 index = 1
75 properties = {}
76 children = []
77
78 # Parse properties
79 while index < len(content) and content[index] not in '();':
80 # Parse property key
81 key_start = index
82 while index < len(content) and content[index].isalpha() and content[index].isupper():
83 index += 1
84
85 # Edge Case: Property key is not in uppercase
86 if index == key_start:
87 raise ValueError("property must be in uppercase")
88
89 key = content[key_start:index]
90
91 # Edge Case: No delimiter after key
92 if index >= len(content) or content[index] != '[':
93 raise ValueError("properties without delimiter")
94
95 # Parse property values
96 values = []
97 while index < len(content) and content[index] == '[':
98 index += 1 # Skip '['
99 value_start = index
100
101 # Parse value, handling escapes
102 while index < len(content) and content[index] != ']':
103 if content[index] == '\\':
104 index += 2 # Skip escape and next character
105 else:
106 index += 1
107
108 # Edge Case: Unclosed value bracket
109 if index >= len(content):
110 raise ValueError("properties without delimiter")
111
112 value = _unescape_text(content[value_start:index])
113 values.append(value)
114 index += 1 # Skip ']'
115
116 properties[key] = values
117
118 # Parse children
119 while index < len(content) and content[index] in '();':
120 if content[index] == '(': # Start of a variation
121 # Find matching parenthesis
122 paren_count = 1
123 child_start = index
124 index += 1
125
126 while index < len(content) and paren_count > 0:
127 if content[index] == '(':
128 paren_count += 1
129 elif content[index] == ')':
130 paren_count -= 1
131 index += 1
132
133 # Edge Case: Unmatched parenthesis
134 if paren_count != 0:
135 raise ValueError("tree missing")
136
137 child_content = content[child_start:index]
138 children.append(_parse_tree(child_content[1:-1]))
139 elif content[index] == ';': # Direct child node
140 # Parse just this single node
141 child_start = index
142 index += 1
143
144 # Parse properties of this child node
145 while index < len(content) and content[index] not in '();':
146 # Parse key
147 key_start = index
148 while index < len(content) and content[index].isalpha() and content[index].isupper():
149 index += 1
150
151 # Edge Case: Property key is not in uppercase
152 if index == key_start:
153 raise ValueError("property must be in uppercase")
154
155 # Edge Case: No delimiter after key
156 if index >= len(content) or content[index] != '[':
157 raise ValueError("properties without delimiter")
158
159 # Parse values
160 while index < len(content) and content[index] == '[':
161 index += 1 # Skip '['
162 # Skip until ']'
163 while index < len(content) and content[index] != ']':
164 if content[index] == '\\':
165 index += 2
166 else:
167 index += 1
168 index += 1 # Skip ']'
169
170 child_content = content[child_start:index]
171 child_node = _parse_tree(child_content)
172
173 # If there are more nodes in sequence, handle them properly
174 if index < len(content) and content[index] == ';':
175 # For sequential nodes, we need to build a chain
176 # Parse the remaining nodes one by one
177 current_parent = child_node
178 while index < len(content) and content[index] == ';':
179 # Parse the next node
180 node_start = index
181 index += 1
182
183 # Parse properties of this node
184 while index < len(content) and content[index] not in '();':
185 # Parse key
186 key_start = index
187 while index < len(content) and content[index].isalpha() and content[index].isupper():
188 index += 1
189
190 # Edge Case: Property key is not in uppercase
191 if index == key_start:
192 raise ValueError("property must be in uppercase")
193
194 # Edge Case: No delimiter after key
195 if index >= len(content) or content[index] != '[':
196 raise ValueError("properties without delimiter")
197
198 # Parse values
199 while index < len(content) and content[index] == '[':
200 index += 1 # Skip '['
201 # Skip until ']'
202 while index < len(content) and content[index] != ']':
203 if content[index] == '\\':
204 index += 2
205 else:
206 index += 1
207 index += 1 # Skip ']'
208
209 node_content = content[node_start:index]
210 node = _parse_tree(node_content)
211 current_parent.children = [node]
212 current_parent = node
213
214 children.append(child_node)
215 else: # content[index] == ')'
216 break
217
218 return SgfTree(properties, children)
219
220
221def _unescape_text(text: str) -> str:
222 """
223 Process SGF text according to SGF text type rules.
224
225 Args:
226 text: Raw text from SGF property value
227
228 Returns:
229 str: Processed text with escape sequences handled
230 """
231 result = []
232 i = 0
233 while i < len(text):
234 if text[i] == '\\':
235 i += 1
236 if i < len(text):
237 # If next character is newline, remove both \ and newline
238 if text[i] == '\n':
239 i += 1
240 continue
241 # Any character after \ is inserted as-is
242 # Special case: if the character is ']', we need to double it in the output
243 if text[i] == ']':
244 result.append(']]')
245 else:
246 result.append(text[i])
247 i += 1
248 else:
249 # Trailing backslash - should not happen in valid SGF but we handle it
250 result.append('\\')
251 i += 1
252 else:
253 # Convert all whitespace (except newline) to spaces
254 if text[i] != '\n' and text[i].isspace():
255 result.append(' ')
256 else:
257 result.append(text[i])
258 i += 1
259
260 return ''.join(result)
261# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, missing semicolon, property key not in uppercase, missing delimiter after key, unclosed value bracket, unmatched parenthesis, trailing backslash
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.