corpus

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Parse the tree and get the result along with the remaining string
50 tree, remaining = _parse_tree(input_string)
51
52 # Edge Case: There's remaining text after parsing the main tree
53 if remaining:
54 raise ValueError("tree missing")
55
56 return tree
57
58
59def _parse_tree(s: str) -> tuple[SgfTree, str]:
60 """
61 Parse a tree from the input string.
62
63 Args:
64 s: The input string starting with '('
65
66 Returns:
67 tuple: A tuple containing the parsed SgfTree and the remaining string
68 """
69 # Edge Case: Missing opening parenthesis
70 if not s.startswith('('):
71 raise ValueError("tree missing")
72
73 s = s[1:] # Remove the opening '('
74
75 # Edge Case: Empty tree
76 if not s:
77 raise ValueError("tree with no nodes")
78
79 # Parse the first node
80 node, s = _parse_node(s)
81
82 children = []
83
84 # Parse any child trees (variations)
85 while s.startswith('('):
86 child, s = _parse_tree(s)
87 children.append(child)
88
89 # Parse single child nodes (shorthand notation)
90 if s.startswith(';'):
91 # For shorthand notation, we need to build a chain of children
92 child_nodes = []
93
94 while s.startswith(';'):
95 child_node, s = _parse_node(s)
96 child_nodes.append(child_node)
97
98 # Build the chain of children
99 if child_nodes:
100 # Create the first child
101 first_child = SgfTree(properties=child_nodes[0][0], children=[])
102 children.append(first_child)
103
104 # Chain the rest of the children
105 current_parent = first_child
106 for child_node in child_nodes[1:]:
107 child_tree = SgfTree(properties=child_node[0], children=[])
108 current_parent.children.append(child_tree)
109 current_parent = child_tree
110
111 # Edge Case: Missing closing parenthesis
112 if not s.startswith(')'):
113 raise ValueError("tree missing")
114
115 s = s[1:] # Remove the closing ')'
116
117 return SgfTree(properties=node[0], children=children), s
118
119 # Edge Case: Missing closing parenthesis
120 if not s.startswith(')'):
121 raise ValueError("tree missing")
122
123 s = s[1:] # Remove the closing ')'
124
125 return SgfTree(properties=node[0], children=children), s
126
127
128def _parse_node(s: str) -> tuple[tuple[dict, list], str]:
129 """
130 Parse a node from the input string.
131
132 Args:
133 s: The input string starting with ';'
134
135 Returns:
136 tuple: A tuple containing the parsed node (properties, children) and the remaining string
137 """
138 # Edge Case: Missing semicolon
139 if not s.startswith(';'):
140 raise ValueError("tree with no nodes")
141
142 s = s[1:] # Remove the ';'
143 properties = {}
144
145 # Parse properties while we have letters (validation happens in _parse_property)
146 while s and s[0].isalpha():
147 key, values, s = _parse_property(s)
148 properties[key] = values
149
150 return (properties, []), s
151
152
153def _parse_property(s: str) -> tuple[str, list[str], str]:
154 """
155 Parse a property (key with values) from the input string.
156
157 Args:
158 s: The input string starting with a property key
159
160 Returns:
161 tuple: A tuple containing the key, list of values, and the remaining string
162 """
163 # Find the key (sequence of letters)
164 key_end = 0
165 while key_end < len(s) and s[key_end].isalpha():
166 key_end += 1
167
168 # Edge Case: Key is empty
169 if key_end == 0:
170 raise ValueError("property must be in uppercase")
171
172 key = s[:key_end]
173 s = s[key_end:]
174
175 # Edge Case: Key is not all uppercase
176 if not key.isupper():
177 raise ValueError("property must be in uppercase")
178
179 # Edge Case: Missing opening bracket
180 if not s.startswith('['):
181 raise ValueError("properties without delimiter")
182
183 values = []
184
185 # Parse all values for this key
186 while s.startswith('['):
187 value, s = _parse_value(s[1:]) # Skip the opening '['
188 values.append(value)
189
190 return key, values, s
191
192
193def _parse_value(s: str) -> tuple[str, str]:
194 """
195 Parse a value from the input string.
196
197 Args:
198 s: The input string starting with the value content
199
200 Returns:
201 tuple: A tuple containing the parsed value and the remaining string (after the closing ']')
202 """
203 result = []
204 i = 0
205
206 while i < len(s):
207 char = s[i]
208
209 if char == ']':
210 # End of value
211 break
212 elif char == '\\':
213 # Escape character
214 i += 1
215 if i >= len(s):
216 # Edge Case: Escape character at end of string
217 raise ValueError("properties without delimiter")
218
219 next_char = s[i]
220 if next_char == '\n':
221 # Newline immediately after backslash is removed
222 pass # Do nothing, skip the newline
223 elif next_char == ']':
224 # Backslash before closing bracket is treated as literal backslash
225 result.append('\\')
226 # Don't consume the ']', let the main loop handle it
227 i -= 1
228 else:
229 # Any other character after backslash is inserted as-is
230 result.append('\\') # Add the backslash
231 result.append(next_char) # Add the escaped character
232 elif char == '\n':
233 # Newlines remain as newlines unless immediately after backslash
234 result.append(char)
235 elif char.isspace() and char != '\n':
236 # All whitespace other than newline converted to space
237 result.append(' ')
238 else:
239 # Regular character
240 result.append(char)
241
242 i += 1
243
244 # Edge Case: Missing closing bracket
245 if i >= len(s) or s[i] != ']':
246 raise ValueError("properties without delimiter")
247
248 # Return the parsed value and the remaining string after the closing ']'
249 return ''.join(result), s[i+1:]
250
251# Handled Edge Cases: Empty input, missing opening parenthesis, empty tree, missing closing parenthesis, missing semicolon, key is empty, missing opening bracket, escape character at end of string, missing closing bracket
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.