viola

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is malformed according to SGF specifications
40 """
41 # Edge Case: Empty input should raise "tree with no nodes" per overlapping exception analysis
42 if not input_string:
43 raise ValueError("tree with no nodes")
44
45 # Edge Case: Input that doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input that starts with '(' but doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Edge Case: Input with just parentheses but no content
54 if len(input_string) <= 2:
55 raise ValueError("tree with no nodes")
56
57 # Remove the outer parentheses
58 content = input_string[1:-1]
59
60 # Edge Case: Content is empty after removing parentheses
61 if not content:
62 raise ValueError("tree with no nodes")
63
64 # Parse the content
65 return _parse_tree_content(content)
66
67
68def _parse_tree_content(content: str) -> SgfTree:
69 """
70 Parse the content inside the outer parentheses of an SGF string.
71
72 Args:
73 content: The content inside the outer parentheses
74
75 Returns:
76 SgfTree: A tree structure representing the SGF data
77 """
78 # Edge Case: Content that doesn't start with ';'
79 if not content.startswith(';'):
80 raise ValueError("tree with no nodes")
81
82 # Skip the first ';'
83 content = content[1:]
84
85 # Parse the first node
86 node_content, content = _split_node_content(content)
87 properties = _parse_node_properties(node_content)
88
89 children = []
90
91 # Parse any children
92 while content:
93 if content.startswith('('):
94 # Parse a subtree
95 subtree_content, content = _split_subtree_content(content)
96 children.append(_parse_tree_content(subtree_content[1:-1]))
97 else:
98 # Parse another node in the same sequence
99 node_content, content = _split_node_content(content)
100 node_properties = _parse_node_properties(node_content)
101 # Create a new tree node with these properties and any further children
102 new_node = SgfTree(properties=node_properties)
103 # Handle any children of this node
104 if content and content.startswith('('):
105 subtree_content, content = _split_subtree_content(content)
106 new_node.children = [_parse_tree_content(subtree_content[1:-1])]
107 children = [new_node]
108 break
109
110 return SgfTree(properties=properties, children=children)
111
112
113def _split_node_content(content: str) -> tuple[str, str]:
114 """
115 Split the content into the current node's content and the rest.
116
117 Args:
118 content: The remaining content to parse
119
120 Returns:
121 tuple: (current node content, remaining content)
122 """
123 # Find the end of this node - either end of string, ';' for next node, or '(' for child
124 i = 0
125 while i < len(content):
126 if content[i] in [';', '(']:
127 break
128 i += 1
129
130 return content[:i], content[i:]
131
132
133def _split_subtree_content(content: str) -> tuple[str, str]:
134 """
135 Split the content into a subtree and the rest.
136
137 Args:
138 content: The remaining content to parse, starting with '('
139
140 Returns:
141 tuple: (subtree content including parentheses, remaining content)
142 """
143 # Edge Case: Content doesn't start with '('
144 if not content.startswith('('):
145 raise ValueError("tree missing")
146
147 balance = 1
148 i = 1 # Skip the first '('
149
150 while i < len(content) and balance > 0:
151 if content[i] == '(':
152 balance += 1
153 elif content[i] == ')':
154 balance -= 1
155 i += 1
156
157 # Edge Case: Unbalanced parentheses
158 if balance != 0:
159 raise ValueError("tree missing")
160
161 return content[:i], content[i:]
162
163
164def _parse_node_properties(content: str) -> dict:
165 """
166 Parse the properties of a node.
167
168 Args:
169 content: The content of a node (without the leading ';')
170
171 Returns:
172 dict: A dictionary of properties
173 """
174 properties = {}
175 i = 0
176
177 while i < len(content):
178 # Skip any whitespace (though SGF spec says no optional whitespace)
179 while i < len(content) and content[i].isspace():
180 i += 1
181
182 if i >= len(content):
183 break
184
185 # Parse property key
186 key_start = i
187
188 # Edge Case: Property key that is not all uppercase
189 while i < len(content) and content[i].isalpha():
190 if not content[i].isupper():
191 raise ValueError("property must be in uppercase")
192 i += 1
193
194 # Edge Case: No key found
195 if i == key_start:
196 raise ValueError("properties without delimiter")
197
198 key = content[key_start:i]
199
200 # Edge Case: Key without value
201 if i >= len(content) or content[i] != '[':
202 raise ValueError("properties without delimiter")
203
204 # Parse property values
205 values = []
206 while i < len(content) and content[i] == '[':
207 i += 1 # Skip '['
208 value_start = i
209
210 # Parse value, handling escaping
211 while i < len(content):
212 if content[i] == ']':
213 # Found the end of the value
214 break
215 elif content[i] == '\\':
216 i += 1 # Skip escape character
217 if i < len(content):
218 i += 1 # Skip escaped character
219 else:
220 i += 1
221
222 # Edge Case: Unclosed value bracket
223 if i >= len(content):
224 raise ValueError("properties without delimiter")
225
226 value = content[value_start:i]
227 value = _unescape_value(value)
228 values.append(value)
229 i += 1 # Skip ']'
230
231 properties[key] = values
232
233 return properties
234
235 return properties
236
237
238def _unescape_value(value: str) -> str:
239 """
240 Unescape an SGF value according to SGF Text type rules.
241
242 Args:
243 value: The escaped value string
244
245 Returns:
246 str: The unescaped value string
247 """
248 result = []
249 i = 0
250
251 while i < len(value):
252 if value[i] == '\\':
253 i += 1 # Skip the escape character
254
255 # Edge Case: Escape at end of string
256 if i >= len(value):
257 # Treat backslash at end as literal backslash
258 result.append('\\')
259 break
260
261 char = value[i]
262
263 # Handle newline removal after backslash
264 if char == '\n':
265 # Newline character is removed if immediately after \
266 i += 1
267 continue
268 elif char in [' ', '\t', '\r', '\f', '\v']: # All whitespace except newline
269 # Whitespace after \ is converted to space
270 result.append(' ')
271 else:
272 # Any non-whitespace character after \ is inserted as-is
273 result.append(char)
274 elif value[i] in [' ', '\t', '\r', '\f', '\v']: # All whitespace except newline
275 # All whitespace characters other than newline are converted to spaces
276 result.append(' ')
277 else:
278 result.append(value[i])
279
280 i += 1
281
282 return ''.join(result)
283# Handled Edge Cases: Empty input, missing parentheses, content without nodes, missing semicolon, unbalanced parentheses, non-uppercase properties, missing delimiters, unclosed brackets, escape at end of string, newline handling, whitespace conversion
284# Handled Edge Cases: Empty input, missing closing parenthesis, content without nodes
285# Handled Edge Cases: Missing semicolon, content without nodes
286# Handled Edge Cases: Missing opening parenthesis, unbalanced parentheses
287# Handled Edge Cases: Missing key, non-uppercase key, missing opening bracket, unclosed bracket
288# Handled Edge Cases: Escape at end of string, newline handling, whitespace conversion
Test NameStatus
test_empty_input
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_all_lowercase_property
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.