agent

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 content = content[1:]
79
80 # Parse properties of the current node
81 properties, remaining = _parse_properties(content)
82
83 children = []
84
85 # Parse children
86 while remaining:
87 if remaining.startswith(';'):
88 # Parse a sequential child node
89 child_tree, remaining = _parse_sequential_node(remaining)
90 children.append(child_tree)
91 elif remaining.startswith('('):
92 # Parse a subtree (variation)
93 subtree, remaining = _parse_subtree(remaining)
94 children.append(subtree)
95 else:
96 # No more children
97 break
98
99 return SgfTree(properties=properties, children=children)
100
101
102def _parse_sequential_node(content: str) -> tuple[SgfTree, str]:
103 """
104 Parse a sequential node (starting with semicolon) and return both the tree and remaining content.
105
106 Args:
107 content: String starting with ';'
108
109 Returns:
110 tuple: (parsed tree, remaining string)
111 """
112 # Skip the initial semicolon
113 content = content[1:]
114
115 # Parse properties of the current node
116 properties, remaining = _parse_properties(content)
117
118 children = []
119
120 # Parse children
121 while remaining:
122 if remaining.startswith(';'):
123 # Parse a sequential child node
124 child_tree, remaining = _parse_sequential_node(remaining)
125 children.append(child_tree)
126 elif remaining.startswith('('):
127 # Parse a subtree (variation)
128 subtree, remaining = _parse_subtree(remaining)
129 children.append(subtree)
130 else:
131 # No more children
132 break
133
134 return SgfTree(properties=properties, children=children), remaining
135
136
137def _parse_properties(content: str) -> tuple[dict, str]:
138 """
139 Parse properties from the content string.
140
141 Args:
142 content: String containing properties
143
144 Returns:
145 tuple: (properties dict, remaining string)
146 """
147 properties = {}
148 remaining = content
149
150 while remaining and remaining[0] not in '();':
151 # Parse key
152 key_end = 0
153 while key_end < len(remaining) and remaining[key_end].isalpha() and remaining[key_end].isupper():
154 key_end += 1
155
156 # Edge Case: Property key is not in uppercase
157 if key_end == 0:
158 raise ValueError("property must be in uppercase")
159
160 key = remaining[:key_end]
161 remaining = remaining[key_end:]
162
163 # Edge Case: Missing opening bracket for property value
164 if not remaining.startswith('['):
165 raise ValueError("properties without delimiter")
166
167 # Parse all values for this key
168 values = []
169 while remaining.startswith('['):
170 remaining = remaining[1:] # Skip opening bracket
171 value, remaining = _parse_value(remaining)
172 values.append(value)
173
174 # Edge Case: Missing closing bracket
175 if not remaining.startswith(']'):
176 raise ValueError("properties without delimiter")
177
178 remaining = remaining[1:] # Skip closing bracket
179
180 properties[key] = values
181
182 return properties, remaining
183
184
185def _parse_value(content: str) -> tuple[str, str]:
186 """
187 Parse a single property value according to SGF Text type rules.
188
189 Args:
190 content: String containing the value (after opening bracket)
191
192 Returns:
193 tuple: (parsed value, remaining string)
194 """
195 value = []
196 i = 0
197
198 # Parse until we find an unescaped closing bracket
199 while i < len(content):
200 if content[i] == ']':
201 # Count how many consecutive backslashes precede this bracket
202 backslash_count = 0
203 j = i - 1
204 while j >= 0 and content[j] == '\\':
205 backslash_count += 1
206 j -= 1
207
208 # If even number of backslashes, the bracket is not escaped
209 # If odd number of backslashes, the bracket is escaped
210 if backslash_count % 2 == 0:
211 # Found an unescaped closing bracket
212 break
213 else:
214 # This bracket is escaped, treat it as a literal character
215 value.append(']')
216 i += 1
217 continue
218 elif content[i] == '\\': # Escape character
219 i += 1
220 if i >= len(content):
221 # Edge Case: Escape character at end of content
222 value.append('\\')
223 break
224
225 char = content[i]
226 if char == '\n':
227 # Newline immediately after \ is removed
228 pass
229 elif char.isspace():
230 # Whitespace after \ is converted to space, except \n which is removed
231 value.append(' ')
232 else:
233 # Any non-whitespace character after \ is inserted as-is
234 value.append(char)
235 elif content[i] == '\n':
236 # Newlines are kept as newlines
237 value.append('\n')
238 elif content[i].isspace() and content[i] != '\n':
239 # All whitespace other than newline is converted to space
240 value.append(' ')
241 else:
242 # Regular character
243 value.append(content[i])
244
245 i += 1
246
247 # Edge Case: Unclosed value (this should be caught earlier but just in case)
248 if i >= len(content):
249 raise ValueError("properties without delimiter")
250
251 return ''.join(value), content[i:]
252
253
254def _parse_subtree(content: str) -> tuple[SgfTree, str]:
255 """
256 Parse a subtree enclosed in parentheses.
257
258 Args:
259 content: String starting with '('
260
261 Returns:
262 tuple: (parsed subtree, remaining string)
263 """
264 # Edge Case: Not starting with '('
265 if not content.startswith('('):
266 raise ValueError("tree missing")
267
268 # Find matching closing parenthesis
269 depth = 0
270 i = 0
271
272 for i, char in enumerate(content):
273 if char == '(':
274 depth += 1
275 elif char == ')':
276 depth -= 1
277 if depth == 0:
278 # Found the matching closing parenthesis
279 subtree_content = content[1:i] # Exclude the parentheses
280 remaining = content[i+1:]
281 return _parse_tree(subtree_content), remaining
282
283 # Edge Case: Unmatched parentheses
284 raise ValueError("tree missing")
285
286# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon after opening parenthesis, property key is not in uppercase, missing opening bracket for property value, missing closing bracket, escape character at end of content, unclosed value, not starting with '(', unmatched parentheses
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.