Immune

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that starts with '(' but doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree missing")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse a tree from SGF content.
66
67 Args:
68 content: SGF content string starting with ';'
69
70 Returns:
71 SgfTree: Parsed tree structure
72 """
73 # Skip the initial ';'
74 content = content[1:]
75
76 # Parse properties
77 properties, remaining = _parse_properties(content)
78
79 children = []
80
81 # Parse children
82 while remaining:
83 if remaining.startswith('('):
84 # Parse a child tree
85 child_tree, remaining = _parse_subtree(remaining)
86 children.append(child_tree)
87 else:
88 # No more children
89 break
90
91 return SgfTree(properties=properties, children=children)
92
93
94def _parse_subtree(content: str) -> tuple[SgfTree, str]:
95 """
96 Parse a subtree from content starting with '('.
97
98 Args:
99 content: Content starting with '('
100
101 Returns:
102 Tuple of (parsed tree, remaining content)
103 """
104 # Find matching closing parenthesis
105 depth = 0
106 end_index = -1
107
108 for i, char in enumerate(content):
109 if char == '(':
110 depth += 1
111 elif char == ')':
112 depth -= 1
113 if depth == 0:
114 end_index = i
115 break
116
117 # Edge Case: Unmatched parenthesis
118 if end_index == -1:
119 raise ValueError("tree missing")
120
121 # Extract subtree content
122 subtree_content = content[1:end_index]
123
124 # Edge Case: Empty subtree
125 if not subtree_content:
126 raise ValueError("tree with no nodes")
127
128 # Edge Case: Subtree doesn't start with ';'
129 if not subtree_content.startswith(';'):
130 raise ValueError("tree missing")
131
132 tree = _parse_tree(subtree_content)
133
134 return tree, content[end_index + 1:]
135
136
137def _parse_properties(content: str) -> tuple[dict, str]:
138 """
139 Parse properties from content.
140
141 Args:
142 content: Content to parse properties from
143
144 Returns:
145 Tuple of (properties dictionary, remaining content)
146 """
147 properties = {}
148 remaining = content
149
150 while remaining and remaining[0].isalpha() and remaining[0].isupper():
151 # Parse key
152 key_end = 0
153 while key_end < len(remaining) and remaining[key_end].isalpha() and remaining[key_end].isupper():
154 key_end += 1
155
156 # Edge Case: Key with no values
157 if key_end >= len(remaining) or remaining[key_end] != '[':
158 raise ValueError("properties without delimiter")
159
160 key = remaining[:key_end]
161 remaining = remaining[key_end:]
162
163 # Parse values
164 values = []
165 while remaining.startswith('['):
166 # Find matching closing bracket
167 value_end = -1
168 escape = False
169
170 for i, char in enumerate(remaining[1:], 1): # Start from index 1 to skip the opening '['
171 if escape:
172 escape = False
173 continue
174 if char == '\\':
175 escape = True
176 elif char == ']':
177 value_end = i
178 break
179
180 # Edge Case: Unmatched bracket
181 if value_end == -1:
182 raise ValueError("properties without delimiter")
183
184 # Extract and process value
185 value = remaining[1:value_end]
186 processed_value = _process_text_value(value)
187 values.append(processed_value)
188
189 # Move past this value
190 remaining = remaining[value_end + 1:]
191
192 # Edge Case: No values found for key
193 if not values:
194 raise ValueError("properties without delimiter")
195
196 properties[key] = values
197
198 return properties, remaining
199
200
201def _process_text_value(value: str) -> str:
202 """
203 Process an SGF text value according to SGF specification.
204
205 Args:
206 value: Raw text value from SGF
207
208 Returns:
209 Processed text value
210 """
211 result = []
212 i = 0
213
214 while i < len(value):
215 char = value[i]
216
217 if char == '\\':
218 # Handle escape sequence
219 i += 1
220 if i < len(value):
221 next_char = value[i]
222 if next_char == 'n':
223 # Special case: \n should be treated as literal 'n' if not followed by newline
224 result.append('n')
225 elif next_char == 't':
226 # Special case: \t should be treated as literal 't'
227 result.append('t')
228 elif next_char == '\\':
229 # Escaped backslash
230 result.append('\\')
231 elif next_char == ']':
232 # Escaped closing bracket
233 result.append(']')
234 elif next_char == 'r':
235 # Special case: \r should be treated as literal 'r'
236 result.append('r')
237 elif next_char.isspace():
238 # Whitespace after backslash
239 if next_char == '\n':
240 # If newline immediately follows backslash, remove it
241 pass
242 else:
243 # Other whitespace becomes space
244 result.append(' ')
245 else:
246 # Any other character after backslash is inserted as-is
247 result.append(next_char)
248 else:
249 # Backslash at end of string
250 result.append('\\')
251 elif char == '\n' and i > 0 and value[i-1] == '\\':
252 # Newline immediately after backslash should be removed
253 # This case is handled above
254 result.append(char)
255 elif char.isspace() and char != '\n':
256 # All whitespace other than newline becomes space
257 result.append(' ')
258 else:
259 result.append(char)
260
261 i += 1
262
263 # Join and handle newlines that come immediately after backslash
264 processed = ''.join(result)
265
266 # Remove newlines that come immediately after backslash
267 final_result = []
268 i = 0
269 while i < len(processed):
270 if processed[i] == '\\' and i + 1 < len(processed) and processed[i + 1] == '\n':
271 # Skip the newline
272 i += 2
273 else:
274 final_result.append(processed[i])
275 i += 1
276
277 return ''.join(final_result)
278
279# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses, missing semicolon, unmatched parenthesis, empty subtree, subtree not starting with semicolon, key with no values, unmatched bracket, no values for key
280# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis
281# Handled Edge Cases: Empty content after removing parentheses, content not starting with semicolon
282# Handled Edge Cases: Unmatched parenthesis, empty subtree, subtree not starting with semicolon
283# Handled Edge Cases: Key with no values, unmatched bracket, no values for key
Test NameStatus
test_all_lowercase_property
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_upper_and_lowercase_property
Fail
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.