agent

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree with no nodes (only semicolons)
61 if content.strip() == ';':
62 return SgfTree()
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70 """
71 # Edge Case: Content is empty
72 if not content:
73 raise ValueError("tree with no nodes")
74
75 nodes, remainder = _parse_nodes(content)
76
77 # Edge Case: No nodes parsed
78 if not nodes:
79 raise ValueError("tree with no nodes")
80
81 # Convert the list of nodes to a tree structure
82 # The first node is the root, and each subsequent node is a child of the previous one
83 # until we reach a node with children, then we go depth-first
84 return _build_tree(nodes)
85
86
87def _parse_nodes(content: str) -> tuple[list, str]:
88 """
89 Parse a sequence of nodes from the content.
90 Returns a list of (properties, children) tuples and the remainder of the content.
91 """
92 nodes = []
93
94 while content:
95 if content.startswith(';'):
96 # Parse a single node
97 node, content = _parse_node(content[1:])
98 nodes.append(node)
99 elif content.startswith('('):
100 # This is the start of children for the last node
101 # Edge Case: No nodes before children
102 if not nodes:
103 raise ValueError("tree missing")
104
105 children = []
106 while content.startswith('('):
107 # Parse a child tree
108 child_content, content = _parse_parenthesized_content(content)
109 child_nodes, _ = _parse_nodes(child_content[1:-1])
110
111 # Edge Case: Child tree has no nodes
112 if not child_nodes:
113 raise ValueError("tree with no nodes")
114
115 children.append(_build_tree(child_nodes))
116
117 # Add children to the last node
118 nodes[-1] = (nodes[-1][0], children)
119 else:
120 # Edge Case: Unexpected character
121 break
122
123 return nodes, content
124
125
126def _parse_node(content: str) -> tuple[tuple, str]:
127 """
128 Parse a single node (properties only, no children).
129 Returns a (properties, []) tuple and the remainder of the content.
130 """
131 properties = {}
132
133 while content and content[0].isalpha():
134 # Parse a property
135 key, content = _parse_property_key(content)
136 values, content = _parse_property_values(content)
137
138 # Edge Case: Property already exists
139 if key in properties:
140 # According to SGF spec, each key can only occur once
141 # But the problem statement says "each key can only occur once but may have multiple values"
142 # So we should extend the existing values list
143 properties[key].extend(values)
144 else:
145 properties[key] = values
146
147 return (properties, []), content
148
149
150def _parse_property_key(content: str) -> tuple[str, str]:
151 """
152 Parse a property key.
153 Returns the key and the remainder of the content.
154 """
155 key = ''
156 i = 0
157
158 # Parse the key
159 while i < len(content) and content[i].isalpha():
160 key += content[i]
161 i += 1
162
163 # Edge Case: Key is empty
164 if not key:
165 raise ValueError("properties without delimiter")
166
167 # Edge Case: Key is not in uppercase
168 if not key.isupper():
169 raise ValueError("property must be in uppercase")
170
171 return key, content[i:]
172
173
174def _parse_property_values(content: str) -> tuple[list[str], str]:
175 """
176 Parse property values.
177 Returns a list of values and the remainder of the content.
178 """
179 values = []
180
181 # Edge Case: No opening bracket
182 if not content.startswith('['):
183 raise ValueError("properties without delimiter")
184
185 # Parse the first value
186 value, content = _parse_property_value(content[1:])
187 values.append(value)
188
189 # Edge Case: No closing bracket
190 if not content.startswith(']'):
191 raise ValueError("properties without delimiter")
192
193 content = content[1:]
194
195 # Parse additional values if they exist (consecutive bracketed values)
196 while content.startswith('['):
197 value, content = _parse_property_value(content[1:])
198 values.append(value)
199
200 # Edge Case: No closing bracket
201 if not content.startswith(']'):
202 raise ValueError("properties without delimiter")
203
204 content = content[1:]
205
206 return values, content
207
208
209def _parse_property_value(content: str) -> tuple[str, str]:
210 """
211 Parse a single property value.
212 Returns the value and the remainder of the content.
213 """
214 value = ''
215 i = 0
216
217 while i < len(content):
218 if content[i] == ']':
219 # End of value
220 break
221 elif content[i] == '\\':
222 # Escape character
223 i += 1
224 # Edge Case: Escape at end of content
225 if i >= len(content):
226 value += '\\'
227 break
228
229 char = content[i]
230 if char == '\n':
231 # Newline escaped - remove if immediately after backslash
232 # Skip the newline character entirely
233 pass
234 elif char == ']':
235 # Escaped closing bracket - treat as literal and end value
236 value += ']'
237 i += 1
238 break
239 else:
240 # Any character after backslash is inserted as-is
241 value += char
242 elif content[i] == '\n':
243 # Newline - keep as is
244 value += content[i]
245 elif content[i].isspace() and content[i] != '\n':
246 # All whitespace other than newline becomes space
247 value += ' '
248 else:
249 # Regular character
250 value += content[i]
251 i += 1
252
253 return value, content[i:]
254
255
256def _parse_parenthesized_content(content: str) -> tuple[str, str]:
257 """
258 Parse content within parentheses, including nested parentheses.
259 Returns the content within the parentheses and the remainder.
260 """
261 # Edge Case: Doesn't start with '('
262 if not content.startswith('('):
263 raise ValueError("tree missing")
264
265 level = 0
266 i = 0
267
268 while i < len(content):
269 if content[i] == '(':
270 level += 1
271 elif content[i] == ')':
272 level -= 1
273 if level == 0:
274 return content[:i+1], content[i+1:]
275 i += 1
276
277 # Edge Case: Unmatched parentheses
278 raise ValueError("tree missing")
279
280
281def _build_tree(nodes: list) -> SgfTree:
282 """
283 Build a tree from a list of nodes.
284 Each node is a (properties, children) tuple.
285 """
286 # Edge Case: Empty nodes list
287 if not nodes:
288 raise ValueError("tree with no nodes")
289
290 # The first node is the root
291 root_properties, root_children = nodes[0]
292
293 # If there are more nodes, they form a chain of children
294 if len(nodes) > 1:
295 # Create the chain from back to front
296 current_tree = SgfTree(properties=nodes[-1][0], children=nodes[-1][1])
297
298 for i in range(len(nodes) - 2, 0, -1):
299 current_tree = SgfTree(properties=nodes[i][0], children=[current_tree] + nodes[i][1])
300
301 root_children = [current_tree] + root_children
302
303 return SgfTree(properties=root_properties, children=root_children)
304
305# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree with no nodes (only semicolons), Content is empty, No nodes parsed, No nodes before children, Child tree has no nodes, Unexpected character, Key is not in uppercase, Key is empty, No opening bracket, No closing bracket, Escape at end of content, Doesn't start with '(', Unmatched parentheses, Empty nodes list
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.