Alone

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 # Check if this might be a property without delimiter case
48 if '(' in input_string and ';' in input_string:
49 # Try to parse to see if it's a property without delimiter
50 try:
51 # Remove the outer parentheses
52 content = input_string[1:]
53 if content.startswith(';'):
54 # Try to parse the properties
55 node_end = _find_node_end(content, 1)
56 node_content = content[1:node_end]
57 _parse_properties(node_content)
58 except ValueError as e:
59 if "properties without delimiter" in str(e):
60 raise ValueError("properties without delimiter")
61 elif "property must be in uppercase" in str(e):
62 raise ValueError("property must be in uppercase")
63 raise ValueError("tree missing")
64
65 # Remove the outer parentheses
66 content = input_string[1:-1]
67
68 # Edge Case: Empty content after removing parentheses
69 if not content:
70 raise ValueError("tree with no nodes")
71
72 # Edge Case: Content that doesn't start with ';'
73 if not content.startswith(';'):
74 raise ValueError("tree missing")
75
76 return _parse_tree_content(content)
77
78
79def _parse_tree_content(content: str) -> SgfTree:
80 """
81 Parse the content of a tree (without the outer parentheses).
82
83 Args:
84 content: The content of the tree
85
86 Returns:
87 SgfTree: A tree structure representing the content
88 """
89 # Find the first node
90 if not content.startswith(';'):
91 raise ValueError("tree missing")
92
93 # Parse the first node
94 node_end = _find_node_end(content, 1)
95 node_content = content[1:node_end]
96
97 properties = _parse_properties(node_content)
98
99 # Parse children
100 children_content = content[node_end:]
101 children = _parse_children(children_content)
102
103 return SgfTree(properties, children)
104
105
106def _find_node_end(content: str, start: int) -> int:
107 """
108 Find the end position of a node in the content.
109
110 Args:
111 content: The content to search in
112 start: The start position
113
114 Returns:
115 int: The end position of the node
116 """
117 i = start
118 while i < len(content):
119 # If we encounter a ';' or '(' or ')', it's the end of the current node
120 if content[i] in [';', '(', ')']:
121 break
122 i += 1
123 return i
124
125
126def _parse_properties(content: str) -> dict:
127 """
128 Parse properties from a node content.
129
130 Args:
131 content: The content of a node
132
133 Returns:
134 dict: A dictionary of properties
135
136 Raises:
137 ValueError: If properties are malformed
138 """
139 properties = {}
140 i = 0
141
142 while i < len(content):
143 # Skip whitespace
144 if content[i].isspace():
145 i += 1
146 continue
147
148 # Parse key
149 key_start = i
150 while i < len(content) and content[i].isalpha():
151 i += 1
152
153 # Edge Case: No key found
154 if i == key_start:
155 raise ValueError("properties without delimiter")
156
157 key = content[key_start:i]
158
159 # Edge Case: Key is not uppercase
160 if key != key.upper():
161 raise ValueError("property must be in uppercase")
162
163 # Parse values
164 if i >= len(content) or content[i] != '[':
165 raise ValueError("properties without delimiter")
166
167 values = []
168 while i < len(content) and content[i] == '[':
169 i += 1 # Skip '['
170 value_start = i
171
172 # Parse value according to SGF Text type rules
173 value_chars = []
174 while i < len(content) and content[i] != ']':
175 if content[i] == '\\':
176 # Handle escape character
177 i += 1
178 if i < len(content):
179 if content[i] == '\n':
180 # Newlines are removed if they come immediately after a '\'
181 i += 1
182 else:
183 # Any character after '\' is inserted as-is
184 value_chars.append(content[i])
185 i += 1
186 else:
187 # Edge Case: Escape at end of content
188 raise ValueError("properties without delimiter")
189 else:
190 # All whitespace characters other than newline are converted to spaces
191 if content[i].isspace() and content[i] != '\n':
192 value_chars.append(' ')
193 else:
194 value_chars.append(content[i])
195 i += 1
196
197 # Edge Case: Unclosed bracket
198 if i >= len(content) or content[i] != ']':
199 raise ValueError("properties without delimiter")
200
201 value = ''.join(value_chars)
202 values.append(value)
203 i += 1 # Skip ']'
204
205 properties[key] = values
206
207 return properties
208
209
210def _parse_children(content: str) -> list:
211 """
212 Parse children from content after a node.
213
214 Args:
215 content: The content after a node
216
217 Returns:
218 list: A list of child SgfTree objects
219 """
220 children = []
221 i = 0
222
223 while i < len(content):
224 # Skip whitespace
225 if content[i].isspace():
226 i += 1
227 continue
228
229 if content[i] == '(':
230 # Find matching parenthesis
231 paren_count = 1
232 start = i + 1
233 i += 1
234
235 while i < len(content) and paren_count > 0:
236 if content[i] == '(':
237 paren_count += 1
238 elif content[i] == ')':
239 paren_count -= 1
240 i += 1
241
242 # Edge Case: Unmatched parenthesis
243 if paren_count != 0:
244 raise ValueError("tree missing")
245
246 # Parse the child tree
247 child_content = content[start:i-1]
248
249 # Edge Case: Empty child content
250 if not child_content:
251 raise ValueError("tree with no nodes")
252
253 # Edge Case: Child content doesn't start with ';'
254 if not child_content.startswith(';'):
255 raise ValueError("tree missing")
256
257 child = _parse_tree_content(';' + child_content)
258 children.append(child)
259 elif content[i] == ';':
260 # This is a sequential node, create a child with it and any of its children
261 node_end = _find_node_end(content, i+1)
262 node_content = content[i+1:node_end]
263
264 properties = _parse_properties(node_content)
265
266 # Parse any children of this sequential node
267 remaining_content = content[node_end:]
268 remaining_children = _parse_children(remaining_content)
269
270 # Create a single child node with the properties and any remaining children
271 child = SgfTree(properties, remaining_children)
272 children.append(child)
273
274 # We've processed the rest of the content, so we're done
275 break
276 else:
277 # Edge Case: Unexpected character
278 raise ValueError("tree missing")
279
280 return children
281# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, empty content, invalid property format, lowercase properties, unclosed brackets, unmatched parentheses, empty child content, unexpected characters
282# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, empty content after removing parentheses
283# Handled Edge Cases: No key found, key not uppercase, missing opening bracket, unclosed bracket, escape at end of content
284# Handled Edge Cases: Unmatched parenthesis, empty child content, child content not starting with semicolon, unexpected characters
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_child_trees
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.