goodagent

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Parse the tree and get the result along with the remaining string
50 tree, remaining = _parse_tree(input_string)
51
52 # Edge Case: There's remaining text after parsing the tree
53 if remaining:
54 raise ValueError("tree missing")
55
56 return tree
57
58
59def _parse_tree(s: str) -> tuple[SgfTree, str]:
60 """
61 Parse a tree from the input string.
62
63 Args:
64 s: Input string starting with '('
65
66 Returns:
67 tuple: (parsed SgfTree, remaining string)
68 """
69 # Edge Case: Missing opening parenthesis
70 if not s.startswith('('):
71 raise ValueError("tree missing")
72
73 s = s[1:] # Remove the opening '('
74
75 # Edge Case: Empty tree
76 if not s:
77 raise ValueError("tree with no nodes")
78
79 # Parse the sequence of nodes
80 nodes = []
81 while s and not s.startswith(')') and not s.startswith('('):
82 node, s = _parse_node(s)
83 nodes.append(node)
84
85 # Edge Case: No nodes in tree
86 if not nodes:
87 raise ValueError("tree with no nodes")
88
89 # Handle variations (children)
90 children = []
91 while s and s.startswith('('):
92 child, s = _parse_tree(s)
93 children.append(child)
94
95 # Edge Case: Missing closing parenthesis
96 if not s or not s.startswith(')'):
97 raise ValueError("tree missing")
98
99 s = s[1:] # Remove the closing ')'
100
101 # Build the tree structure - the first node is the root, others are its children chain
102 if not nodes:
103 raise ValueError("tree with no nodes")
104
105 # Create the root node
106 root = nodes[0]
107
108 # Chain the nodes - each node (except the last) has the next as its only child
109 current = root
110 for node in nodes[1:]:
111 current.children = [node]
112 current = node
113
114 # Add variations to the root
115 if children:
116 root.children.extend(children)
117
118 return root, s
119
120
121def _parse_node(s: str) -> tuple[SgfTree, str]:
122 """
123 Parse a single node from the input string.
124
125 Args:
126 s: Input string starting with ';'
127
128 Returns:
129 tuple: (parsed SgfTree node, remaining string)
130 """
131 # Edge Case: Missing semicolon
132 if not s.startswith(';'):
133 raise ValueError("tree missing")
134
135 s = s[1:] # Remove the semicolon
136
137 properties = {}
138
139 # Check if there are any properties
140 if s and s[0].isalpha():
141 # If there's a character but it's not uppercase, it's an error
142 if not s[0].isupper():
143 raise ValueError("property must be in uppercase")
144
145 # Parse properties while we have uppercase letters
146 while s and s[0].isupper():
147 key, values, s = _parse_property(s)
148 properties[key] = values
149
150 # Edge Case: No properties in node (empty node)
151 if not properties and (not s or (s[0] != ')' and s[0] != '(' and s[0] != ';')):
152 # This is a bit tricky - we need to distinguish between an empty node and a node with children
153 # For now, let's check if we have a valid continuation
154 pass
155
156 return SgfTree(properties=properties), s
157
158
159def _parse_property(s: str) -> tuple[str, list[str], str]:
160 """
161 Parse a property (key with one or more values) from the input string.
162
163 Args:
164 s: Input string starting with an uppercase letter
165
166 Returns:
167 tuple: (property key, list of property values, remaining string)
168 """
169 # Edge Case: Property key is not uppercase
170 if not s or not s[0].isupper():
171 raise ValueError("property must be in uppercase")
172
173 # Parse the key (sequence of uppercase letters)
174 key_end = 0
175 while key_end < len(s) and s[key_end].isupper():
176 key_end += 1
177
178 key = s[:key_end]
179 s = s[key_end:]
180
181 # Edge Case: Missing opening bracket for value
182 if not s or s[0] != '[':
183 raise ValueError("properties without delimiter")
184
185 values = []
186
187 # Parse one or more values
188 while s and s.startswith('['):
189 value, s = _parse_value(s)
190 values.append(value)
191
192 return key, values, s
193
194
195def _parse_value(s: str) -> tuple[str, str]:
196 """
197 Parse a single value from the input string.
198
199 Args:
200 s: Input string starting with '['
201
202 Returns:
203 tuple: (parsed value, remaining string)
204 """
205 # Edge Case: Missing opening bracket
206 if not s or s[0] != '[':
207 raise ValueError("properties without delimiter")
208
209 s = s[1:] # Remove the opening '['
210
211 value = []
212 i = 0
213
214 # Process characters until we find the closing bracket
215 while i < len(s):
216 char = s[i]
217
218 if char == ']' and (i == 0 or s[i-1] != '\\'):
219 # Found the end of the value (only if not escaped)
220 break
221 elif char == '\\':
222 # Handle escape sequences
223 i += 1
224 if i >= len(s):
225 # Edge Case: Escape character at end of string
226 raise ValueError("properties without delimiter")
227
228 next_char = s[i]
229 if next_char == '\n':
230 # Newline immediately after backslash - remove it
231 pass
232 elif next_char in (' ', '\t'):
233 # Escaped whitespace - convert to space
234 value.append(' ')
235 else:
236 # Any other escaped character - insert as-is
237 value.append(next_char)
238 elif char in (' ', '\t'):
239 # Convert all whitespace (except newlines) to spaces
240 value.append(' ')
241 else:
242 # Regular character
243 value.append(char)
244
245 i += 1
246
247 # Edge Case: Missing closing bracket
248 if i >= len(s) or s[i] != ']':
249 raise ValueError("properties without delimiter")
250
251 # Join the value and return with the remaining string
252 return ''.join(value), s[i+1:]
253
254# Handled Edge Cases: Empty input, missing opening parenthesis, empty tree, no nodes in tree, missing closing parenthesis, missing semicolon, property key not uppercase, missing opening bracket for value, missing opening bracket, escape character at end of string, missing closing bracket
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.