goodagent

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: The parsed SGF tree structure
37
38 Raises:
39 ValueError: If the input string is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Parse the tree and return the result
50 tree, remaining = _parse_tree(input_string)
51
52 # Edge Case: Unexpected characters after the tree
53 if remaining:
54 raise ValueError("tree missing")
55
56 return tree
57
58
59def _parse_tree(s: str) -> tuple[SgfTree, str]:
60 """
61 Parse a tree from the input string.
62
63 Args:
64 s: The input string starting with '('
65
66 Returns:
67 tuple: A tuple containing the parsed SgfTree and the remaining string
68
69 Raises:
70 ValueError: If the tree structure is invalid
71 """
72 # Edge Case: Missing opening parenthesis
73 if not s.startswith('('):
74 raise ValueError("tree missing")
75
76 s = s[1:] # Remove the opening parenthesis
77
78 # Edge Case: Empty tree
79 if not s:
80 raise ValueError("tree with no nodes")
81
82 # Parse the sequence of nodes
83 nodes = []
84 while s and s[0] != ')':
85 node, s = _parse_node(s)
86 nodes.append(node)
87
88 # Edge Case: No nodes in the tree
89 if not nodes:
90 raise ValueError("tree with no nodes")
91
92 # Edge Case: Missing closing parenthesis
93 if not s:
94 raise ValueError("tree missing")
95
96 s = s[1:] # Remove the closing parenthesis
97
98 # Build the tree structure from the nodes
99 # The first node is the root, and each subsequent node is a child of the previous one
100 # unless we encounter variations
101
102 # Handle variations - if there are multiple top-level nodes, they are variations
103 # But in a simple sequence, each node (except the first) is a child of the previous
104
105 # For now, we'll build a simple chain
106 # The first node is the root
107 root = nodes[0]
108 current = root
109
110 # Each subsequent node becomes a child of the previous one
111 for node in nodes[1:]:
112 current.children = [node]
113 current = node
114
115 # Handle variations - if after the first node we have parentheses, they are variations
116 # This is handled in _parse_node when we encounter '('
117
118 return root, s
119
120
121def _parse_node(s: str) -> tuple[SgfTree, str]:
122 """
123 Parse a node from the input string.
124
125 Args:
126 s: The input string starting with ';' or '('
127
128 Returns:
129 tuple: A tuple containing the parsed SgfTree node and the remaining string
130 """
131 # Edge Case: Missing semicolon or opening parenthesis for a new node/variation
132 if not s or (s[0] != ';' and s[0] != '('):
133 raise ValueError("tree missing")
134
135 # If it's a variation (starts with '('), parse it as a tree
136 if s[0] == '(':
137 tree, s = _parse_tree(s)
138 return tree, s
139
140 s = s[1:] # Remove the semicolon
141
142 properties = {}
143 children = []
144
145 # Parse properties
146 while s and s[0] != ')' and s[0] != ';' and s[0] != '(':
147 key, values, s = _parse_property(s)
148
149 # Edge Case: Property not in uppercase
150 if not key.isupper():
151 raise ValueError("property must be in uppercase")
152
153 properties[key] = values
154
155 # Parse children (variations)
156 while s and (s[0] == '(' or s[0] == ';'):
157 if s[0] == '(':
158 # This is a variation
159 child, s = _parse_tree(s)
160 children.append(child)
161 else:
162 # This is a sequential node
163 child, s = _parse_node(s)
164 children.append(child)
165
166 return SgfTree(properties, children), s
167
168
169def _parse_property(s: str) -> tuple[str, list[str], str]:
170 """
171 Parse a property (key with one or more values) from the input string.
172
173 Args:
174 s: The input string starting with a property key
175
176 Returns:
177 tuple: A tuple containing the property key, list of values, and the remaining string
178
179 Raises:
180 ValueError: If the property format is invalid
181 """
182 # Parse the key (sequence of uppercase letters)
183 key = ''
184 i = 0
185
186 # Edge Case: Missing key
187 if not s or not s[0].isalpha() or not s[0].isupper():
188 raise ValueError("property must be in uppercase")
189
190 while i < len(s) and s[i].isalpha() and s[i].isupper():
191 key += s[i]
192 i += 1
193
194 # Edge Case: Missing key
195 if not key:
196 raise ValueError("property must be in uppercase")
197
198 values = []
199
200 # Parse values
201 while i < len(s) and s[i] == '[':
202 value, i = _parse_value(s, i)
203 values.append(value)
204
205 # Edge Case: No values for the property
206 if not values:
207 raise ValueError("properties without delimiter")
208
209 return key, values, s[i:]
210
211
212def _parse_value(s: str, start: int) -> tuple[str, int]:
213 """
214 Parse a value (text within brackets) from the input string.
215
216 Args:
217 s: The input string
218 start: The starting position (should be at '[')
219
220 Returns:
221 tuple: A tuple containing the parsed value and the position after the closing ']'
222
223 Raises:
224 ValueError: If the value format is invalid
225 """
226 # Edge Case: Missing opening bracket
227 if s[start] != '[':
228 raise ValueError("properties without delimiter")
229
230 i = start + 1
231 value = ''
232
233 while i < len(s):
234 if s[i] == ']':
235 # Found closing bracket, end of value
236 break
237 elif s[i] == '\\':
238 # Handle escape sequences
239 i += 1
240 # Edge Case: Escape at end of string
241 if i >= len(s):
242 raise ValueError("properties without delimiter")
243
244 char = s[i]
245
246 # If the next character is a newline immediately after backslash, remove it
247 if char == '\n':
248 # Newlines are removed if they come immediately after a `\`
249 pass
250 else:
251 # All other characters after \ are inserted as-is
252 # All whitespace characters other than newline are converted to spaces
253 if char.isspace() and char != '\n':
254 value += ' '
255 else:
256 value += char
257 else:
258 # Regular character
259 char = s[i]
260 # All whitespace characters other than newline are converted to spaces
261 if char.isspace() and char != '\n':
262 value += ' '
263 else:
264 value += char
265 i += 1
266
267 # Edge Case: Missing closing bracket
268 if i >= len(s) or s[i] != ']':
269 raise ValueError("properties without delimiter")
270
271 return value, i + 1
272
273# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, no nodes in tree, missing semicolon, property not in uppercase, missing key, no values for property, missing opening bracket, missing closing bracket, escape at end of string
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.