let it go

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree_content(content)
65
66
67def _parse_tree_content(content: str) -> SgfTree:
68 """
69 Parse the content inside the parentheses of an SGF tree.
70
71 Args:
72 content: The content string without outer parentheses
73
74 Returns:
75 SgfTree: The parsed SGF tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85
86 # Edge Case: Handle sequential nodes (separated by semicolons) and variations (parenthesized)
87 while index < len(content):
88 if content[index] == ';':
89 # Sequential node - parse the rest as a new tree
90 sequential_content = content[index:]
91 # Create a new tree by parsing the sequential content
92 # We need to wrap it in parentheses to make it a valid SGF tree
93 child_tree = _parse_sequential_node(sequential_content)
94 children.append(child_tree)
95 break # Sequential nodes form a chain, so we're done
96 elif content[index] == '(':
97 # Variation - parse all child trees in parentheses
98 # Find matching closing parenthesis
99 paren_count = 1
100 start = index
101 index += 1
102
103 while index < len(content) and paren_count > 0:
104 if content[index] == '(':
105 paren_count += 1
106 elif content[index] == ')':
107 paren_count -= 1
108 index += 1
109
110 # Extract the subtree content (including parentheses)
111 subtree_content = content[start:index]
112
113 # Parse the subtree
114 child_tree = parse(subtree_content)
115 children.append(child_tree)
116 else:
117 # Invalid character
118 index += 1
119
120 return SgfTree(properties, children)
121
122
123def _parse_sequential_node(sequential_content: str) -> SgfTree:
124 """
125 Parse a sequential node (content starting with semicolon).
126
127 Args:
128 sequential_content: Content starting with semicolon
129
130 Returns:
131 SgfTree: The parsed sequential node tree
132 """
133 # Skip the semicolon
134 index = 1
135
136 # Parse properties of this sequential node
137 properties, index = _parse_properties(sequential_content, index)
138
139 # Parse children of this sequential node
140 children = []
141
142 # Check if there are more sequential nodes or variations
143 while index < len(sequential_content):
144 if sequential_content[index] == ';':
145 # Another sequential node
146 child_content = sequential_content[index:]
147 child_tree = _parse_sequential_node(child_content)
148 children.append(child_tree)
149 break
150 elif sequential_content[index] == '(':
151 # Variation
152 paren_count = 1
153 start = index
154 index += 1
155
156 while index < len(sequential_content) and paren_count > 0:
157 if sequential_content[index] == '(':
158 paren_count += 1
159 elif sequential_content[index] == ')':
160 paren_count -= 1
161 index += 1
162
163 subtree_content = sequential_content[start:index]
164 child_tree = parse(subtree_content)
165 children.append(child_tree)
166 else:
167 index += 1
168
169 return SgfTree(properties, children)
170
171
172def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
173 """
174 Parse properties from the content string starting at start_index.
175
176 Args:
177 content: The content string
178 start_index: The index to start parsing from
179
180 Returns:
181 tuple: A tuple of (properties_dict, next_index)
182 """
183 properties = {}
184 index = start_index
185
186 # Edge Case: Parse properties while we have letters followed by [
187 # We need to check if the next character is a letter (could be uppercase or lowercase)
188 while index < len(content) and content[index].isalpha():
189 # Parse key (all consecutive letters)
190 key_start = index
191 while index < len(content) and content[index].isalpha():
192 index += 1
193
194 key = content[key_start:index]
195
196 # Validate that the key is not empty and consists only of uppercase letters
197 if not key or not all(c.isupper() for c in key):
198 raise ValueError("property must be in uppercase")
199
200 # Edge Case: Key must be followed by at least one value in brackets
201 if index >= len(content) or content[index] != '[':
202 raise ValueError("properties without delimiter")
203
204 # Parse all values for this key
205 values = []
206
207 # Edge Case: Parse multiple values for the same key
208 while index < len(content) and content[index] == '[':
209 index += 1 # Skip opening bracket
210 value_start = index
211
212 # Parse value, handling escaped characters
213 while index < len(content):
214 if content[index] == ']':
215 # Check if this bracket is escaped
216 # Count consecutive backslashes before this bracket
217 backslash_count = 0
218 check_index = index - 1
219 while check_index >= value_start and content[check_index] == '\\':
220 backslash_count += 1
221 check_index -= 1
222
223 # If odd number of backslashes, this bracket is escaped
224 if backslash_count % 2 == 1:
225 index += 1 # Skip the escaped bracket
226 else:
227 break # This is the real end of the value
228 else:
229 index += 1
230
231 # Edge Case: Missing closing bracket
232 if index >= len(content):
233 raise ValueError("properties without delimiter")
234
235 value = content[value_start:index]
236 # Process the value according to SGF text type rules
237 value = _process_sgf_text(value)
238 values.append(value)
239 index += 1 # Skip closing bracket
240
241 # Check if there's another value for the same property or a new property
242 # If the next character is not '[', we're done with this property
243 if index >= len(content) or content[index] != '[':
244 break
245
246 properties[key] = values
247
248 return properties, index
249
250
251def _process_sgf_text(text: str) -> str:
252 """
253 Process SGF text according to SGF Text type rules.
254
255 Args:
256 text: The raw text from SGF
257
258 Returns:
259 str: The processed text
260 """
261 result = []
262 i = 0
263
264 while i < len(text):
265 # Edge Case: Handle escape character
266 if text[i] == '\\':
267 i += 1
268 # Edge Case: Check if there's a next character
269 if i < len(text):
270 char = text[i]
271 # Edge Case: Newlines are removed if they come immediately after a \
272 if char == '\n':
273 pass # Remove the newline
274 # Edge Case: All whitespace characters other than newline are converted to spaces
275 elif char.isspace():
276 result.append(' ')
277 # Edge Case: Any non-whitespace character after \ is inserted as-is
278 else:
279 result.append(char)
280 # Edge Case: Handle escape at end of string
281 else:
282 result.append('\\')
283 else:
284 char = text[i]
285 # Edge Case: All whitespace characters other than newline are converted to spaces
286 if char != '\n' and char.isspace():
287 result.append(' ')
288 else:
289 result.append(char)
290 i += 1
291
292 return ''.join(result)
293
294# Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis, Multiple variations - parse all child trees in parentheses, Parse properties while we have uppercase letters followed by [, Key must be followed by at least one value in brackets, Parse multiple values for the same key, Handle escaped characters, Missing closing bracket, Handle escape character, Check if there's a next character, Newlines are removed if they come immediately after a \, All whitespace characters other than newline are converted to spaces, Any non-whitespace character after \ is inserted as-is, Handle escape at end of string
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.