let it go

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree (variation)
87 child_tree, consumed = _parse_subtree(content[index:])
88 children.append(child_tree)
89 index += consumed
90 elif content[index] == ';': # Start of a sequential child node
91 child_tree, consumed = _parse_sequential_node(content[index:])
92 children.append(child_tree)
93 index += consumed
94 else:
95 # This should not happen in a valid SGF
96 break
97
98 return SgfTree(properties=properties, children=children)
99
100
101def _parse_subtree(content: str) -> tuple[SgfTree, int]:
102 """
103 Parse a subtree (including the outer parentheses).
104
105 Args:
106 content: The content starting with '('
107
108 Returns:
109 tuple: (parsed tree, number of characters consumed)
110 """
111 # Find matching closing parenthesis
112 balance = 0
113 end_index = 0
114 for i, char in enumerate(content):
115 if char == '(':
116 # Edge Case: Properly handle nested parentheses
117 balance += 1
118 elif char == ')':
119 balance -= 1
120 if balance == 0:
121 end_index = i
122 break
123
124 # Edge Case: Unmatched parentheses
125 if balance != 0:
126 raise ValueError("tree missing")
127
128 # Parse the content inside the parentheses
129 inner_content = content[1:end_index]
130
131 # Edge Case: Empty subtree
132 if not inner_content:
133 raise ValueError("tree with no nodes")
134
135 # Edge Case: Missing semicolon
136 if not inner_content.startswith(';'):
137 raise ValueError("tree missing")
138
139 tree = _parse_tree(inner_content)
140 return tree, end_index + 1
141
142
143def _parse_sequential_node(content: str) -> tuple[SgfTree, int]:
144 """
145 Parse a sequential node (starting with semicolon, not in parentheses).
146
147 Args:
148 content: The content starting with ';'
149
150 Returns:
151 tuple: (parsed tree, number of characters consumed)
152 """
153 # Parse properties of this node
154 properties, index = _parse_properties(content, 1) # Start after the semicolon
155
156 # Parse children of this node
157 children = []
158 while index < len(content):
159 if content[index] == '(': # Start of a child tree (variation)
160 child_tree, consumed = _parse_subtree(content[index:])
161 children.append(child_tree)
162 index += consumed
163 elif content[index] == ';': # Start of a sequential child node
164 child_tree, consumed = _parse_sequential_node(content[index:])
165 children.append(child_tree)
166 index += consumed
167 else:
168 # End of this node
169 break
170
171 tree = SgfTree(properties=properties, children=children)
172 return tree, index
173
174
175def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
176 """
177 Parse properties from the content starting at start_index.
178
179 Args:
180 content: The content to parse
181 start_index: The index to start parsing from
182
183 Returns:
184 tuple: (properties dictionary, next index to parse)
185 """
186 properties = {}
187 index = start_index
188
189 while index < len(content) and content[index] not in '();':
190 # Parse key
191 key_start = index
192 while index < len(content) and content[index].isalpha():
193 index += 1
194
195 # Edge Case: Empty key (no alphabetic characters found)
196 if key_start == index:
197 break
198
199
200 # Edge Case: Property key must be uppercase
201 if not content[key_start:index].isupper():
202 raise ValueError("property must be in uppercase")
203
204 key = content[key_start:index]
205
206 # Edge Case: Property without values
207 if index >= len(content) or content[index] != '[':
208 raise ValueError("properties without delimiter")
209
210 # Parse values
211 values = []
212 while index < len(content) and content[index] == '[':
213 index += 1 # Skip opening bracket
214 value_start = index
215
216 # Parse value, handling escapes
217 value_chars = []
218 while index < len(content):
219 if content[index] == '\\':
220 # Add the backslash to the value
221 value_chars.append('\\')
222 index += 1
223 if index < len(content):
224 # Add the escaped character to the value
225 value_chars.append(content[index])
226 index += 1
227 elif content[index] == ']':
228 break
229 else:
230 value_chars.append(content[index])
231 index += 1
232
233 # Set the value and update the index
234 value = ''.join(value_chars)
235 value = _unescape_value(value)
236 values.append(value)
237 index += 1 # Skip closing bracket
238
239
240 # Edge Case: Property with no values
241 if not values:
242 raise ValueError("properties without delimiter")
243
244
245 properties[key] = values
246
247
248 return properties, index
249
250
251def _unescape_value(value: str) -> str:
252 """
253 Unescape an SGF value according to SGF text type rules.
254
255 Args:
256 value: The escaped value
257
258 Returns:
259 str: The unescaped value
260 """
261 result = []
262 i = 0
263 while i < len(value):
264 if value[i] == '\\':
265 i += 1
266 if i < len(value):
267 # Check if next character is newline
268 if value[i] == '\n':
269 # Edge Case: Backslash followed by newline removes the newline
270 pass # Don't add anything
271 else:
272 # Edge Case: Backslash followed by any other character adds that character
273 result.append(value[i])
274 i += 1
275 else:
276 # Edge Case: Trailing backslash
277 result.append('\\')
278 else:
279 char = value[i]
280 # Edge Case: Convert all whitespace (except newline) to space
281 if char != '\n' and char.isspace():
282 result.append(' ')
283 else:
284 result.append(char)
285 i += 1
286
287 return ''.join(result)
288# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon after opening parenthesis, properly handle nested parentheses, unmatched parentheses, empty subtree, missing semicolon, property key must be uppercase, property without values, unmatched bracket, property with no values, backslash followed by newline removes the newline, backslash followed by any other character adds that character, trailing backslash, convert all whitespace (except newline) to space
289# Handled Edge Cases: Property key must be uppercase, property without values, unmatched bracket, property with no values
290# Handled Edge Cases: Backslash followed by newline removes the newline, backslash followed by any other character adds that character, trailing backslash, convert all whitespace (except newline) to space
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.