couple

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 return _parse_tree(content)
57
58
59def _parse_tree(content: str) -> SgfTree:
60 """
61 Parse the content of a tree (without outer parentheses).
62 """
63 # Find the first semicolon to separate the root node from children
64 semicolon_index = content.find(';')
65
66 # Edge Case: No semicolon means no nodes
67 if semicolon_index == -1:
68 raise ValueError("tree with no nodes")
69
70 # Parse the root node properties
71 root_content = content[1:semicolon_index] # Skip the first semicolon
72 properties = _parse_properties(root_content)
73
74 # Parse children
75 children_content = content[semicolon_index+1:]
76 children = _parse_children(children_content)
77
78 return SgfTree(properties=properties, children=children)
79
80
81def _parse_properties(content: str) -> dict:
82 """
83 Parse properties from a string.
84 """
85 properties = {}
86 i = 0
87
88 while i < len(content):
89 # Skip whitespace
90 if content[i].isspace():
91 i += 1
92 continue
93
94 # Parse key (must be uppercase)
95 key_start = i
96 while i < len(content) and content[i].isalpha():
97 if not content[i].isupper():
98 raise ValueError("property must be in uppercase")
99 i += 1
100
101 # Edge Case: No key found
102 if i == key_start:
103 raise ValueError("properties without delimiter")
104
105 key = content[key_start:i]
106
107 # Edge Case: No opening bracket after key
108 if i >= len(content) or content[i] != '[':
109 raise ValueError("properties without delimiter")
110
111 i += 1 # Skip the opening bracket
112
113 # Parse all values for this key
114 values = []
115 while i < len(content) and content[i] == '[':
116 i += 1 # Skip the opening bracket
117 value_start = i
118
119 # Parse value, handling escapes
120 while i < len(content) and content[i] != ']':
121 if content[i] == '\\':
122 i += 2 # Skip escape and next character
123 else:
124 i += 1
125
126 # Edge Case: Unclosed bracket
127 if i >= len(content) or content[i] != ']':
128 raise ValueError("properties without delimiter")
129
130 value = content[value_start:i]
131 value = _unescape_value(value)
132 values.append(value)
133 i += 1 # Skip the closing bracket
134
135 properties[key] = values
136
137 return properties
138
139
140def _unescape_value(value: str) -> str:
141 """
142 Process escape sequences in a value according to SGF Text type rules.
143 """
144 result = []
145 i = 0
146
147 while i < len(value):
148 if value[i] == '\\':
149 i += 1
150 if i < len(value):
151 # If next character is newline, remove both \ and newline
152 if value[i] == '\n':
153 i += 1
154 continue
155 # If next character is whitespace, convert to space
156 elif value[i].isspace():
157 result.append(' ')
158 i += 1
159 # Otherwise, insert as-is
160 else:
161 result.append(value[i])
162 i += 1
163 else:
164 # Trailing backslash - should not happen in valid SGF but we handle it
165 result.append('\\')
166 else:
167 # Convert all whitespace (except newline) to spaces
168 if value[i] != '\n' and value[i].isspace():
169 result.append(' ')
170 else:
171 result.append(value[i])
172 i += 1
173
174 return ''.join(result)
175
176
177def _parse_children(content: str) -> list:
178 """
179 Parse children from a string.
180 """
181 children = []
182 i = 0
183
184 while i < len(content):
185 # Skip whitespace
186 if content[i].isspace():
187 i += 1
188 continue
189
190 # If we find an opening parenthesis, it's a subtree
191 if content[i] == '(':
192 # Find matching closing parenthesis
193 paren_count = 1
194 start = i
195 i += 1
196
197 while i < len(content) and paren_count > 0:
198 if content[i] == '(':
199 paren_count += 1
200 elif content[i] == ')':
201 paren_count -= 1
202 i += 1
203
204 # Edge Case: Unmatched parenthesis
205 if paren_count != 0:
206 raise ValueError("tree missing")
207
208 subtree_content = content[start:i]
209 children.append(parse(subtree_content))
210 else:
211 # It's a node without parentheses, parse as a sequence
212 # Find the next parenthesis or end of string
213 node_end = i
214 while node_end < len(content) and content[node_end] != '(':
215 node_end += 1
216
217 node_content = content[i:node_end]
218 # Create a tree node for this content
219 semicolon_index = node_content.find(';')
220
221 if semicolon_index == -1:
222 # Single node with no semicolon - just properties
223 properties = _parse_properties(node_content)
224 children.append(SgfTree(properties=properties))
225 else:
226 # Node with properties and possibly more children
227 prop_content = node_content[0:semicolon_index]
228 properties = _parse_properties(prop_content)
229 remaining_content = node_content[semicolon_index+1:]
230
231 # Create node with properties and parse any further children
232 child_node = SgfTree(properties=properties)
233
234 # If there are more characters, they represent a linear chain
235 if remaining_content:
236 # Recursively parse the remaining content as children
237 further_children = _parse_linear_chain(remaining_content)
238 if further_children:
239 # Connect the chain properly
240 current_node = child_node
241 for next_node in further_children:
242 current_node.children = [next_node]
243 current_node = next_node
244
245 children.append(child_node)
246
247 i = node_end
248
249 return children
250
251
252def _parse_linear_chain(content: str) -> list:
253 """
254 Parse a linear chain of nodes (e.g., ;B[aa];W[ab])
255 """
256 nodes = []
257 i = 0
258
259 while i < len(content):
260 # Skip whitespace
261 if content[i].isspace():
262 i += 1
263 continue
264
265 # Must start with semicolon
266 if content[i] != ';':
267 raise ValueError("tree missing")
268
269 i += 1
270
271 # Find the next semicolon or end
272 node_end = i
273 while node_end < len(content) and content[node_end] != ';':
274 node_end += 1
275
276 node_content = content[i:node_end]
277 properties = _parse_properties(node_content)
278 nodes.append(SgfTree(properties=properties))
279 i = node_end
280
281 return nodes
282# Handled Edge Cases: Empty input, missing parentheses, missing semicolons, missing brackets, unclosed brackets, unmatched parentheses, lowercase properties, properties without delimiters, empty content after parentheses removal, trailing backslash in values
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_multiple_properties
Fail
test_multiple_property_values
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_single_node_tree
Fail
test_two_child_trees
Fail
test_two_nodes
Fail
test_within_property_values_newlines_remain_as_newlines
Fail
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_tree_with_no_nodes
Pass
test_upper_and_lowercase_property
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.