sourdough

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree
87 child_tree, consumed = _parse_subtree(content[index:])
88 children.append(child_tree)
89 index += consumed
90 elif content[index] == ';': # Start of a sequential node
91 # Create a new node with the remaining content
92 remaining_content = content[index:]
93 child_tree, consumed = _parse_sequential_node(remaining_content)
94 children.append(child_tree)
95 index += consumed
96 break # Sequential nodes form a chain, so we only process one here
97 else:
98 # This shouldn't happen in a valid SGF
99 break
100
101 return SgfTree(properties=properties, children=children)
102
103
104def _parse_sequential_node(content: str) -> tuple[SgfTree, int]:
105 """
106 Parse a sequential node (starting with semicolon).
107
108 Args:
109 content: The content starting with ';'
110
111 Returns:
112 tuple: (parsed tree, number of characters consumed)
113 """
114 # Skip the semicolon
115 index = 1
116
117 # Parse properties of the current node
118 properties, index = _parse_properties(content, index)
119
120 # Parse children
121 children = []
122 consumed = index
123
124 # Check if there are more nodes or subtrees
125 if index < len(content):
126 if content[index] == '(': # Start of a child tree
127 child_tree, child_consumed = _parse_subtree(content[index:])
128 children.append(child_tree)
129 consumed += child_consumed
130 elif content[index] == ';': # Start of another sequential node
131 child_tree, child_consumed = _parse_sequential_node(content[index:])
132 children.append(child_tree)
133 consumed += child_consumed
134
135 return SgfTree(properties=properties, children=children), consumed
136
137
138def _parse_subtree(content: str) -> tuple[SgfTree, int]:
139 """
140 Parse a subtree (including its parentheses).
141
142 Args:
143 content: The content starting with '('
144
145 Returns:
146 tuple: (parsed tree, number of characters consumed)
147 """
148 # Find matching closing parenthesis
149 balance = 0
150 consumed = 0
151
152 for i, char in enumerate(content):
153 if char == '(':
154 balance += 1
155 elif char == ')':
156 balance -= 1
157 if balance == 0:
158 consumed = i + 1
159 break
160
161 # Edge Case: Mismatched parentheses
162 if balance != 0:
163 raise ValueError("tree missing")
164
165 # Parse the content inside the parentheses
166 inner_content = content[1:consumed-1]
167
168 # Edge Case: Empty subtree
169 if not inner_content:
170 raise ValueError("tree with no nodes")
171
172 # Edge Case: Missing semicolon
173 if not inner_content.startswith(';'):
174 raise ValueError("tree missing")
175
176 tree = _parse_tree(inner_content)
177 return tree, consumed
178
179
180def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
181 """
182 Parse properties from the content starting at start_index.
183
184 Args:
185 content: The content to parse
186 start_index: The index to start parsing from
187
188 Returns:
189 tuple: (properties dictionary, next index to parse)
190 """
191 properties = {}
192 index = start_index
193
194 while index < len(content) and content[index] not in '()':
195 # Parse key
196 key_start = index
197 while index < len(content) and content[index].isalpha():
198 index += 1
199
200 # Edge Case: Property key must be uppercase
201 key = content[key_start:index]
202 if not key.isupper():
203 raise ValueError("property must be in uppercase")
204
205 # Edge Case: Missing key
206 if not key:
207 raise ValueError("properties without delimiter")
208
209 # Parse values
210 values = []
211
212 # There must be at least one value
213 if index >= len(content) or content[index] != '[':
214 raise ValueError("properties without delimiter")
215
216 while index < len(content) and content[index] == '[':
217 index += 1 # Skip '['
218 value_start = index
219
220 # Parse value, handling escapes
221 while index < len(content) and content[index] != ']':
222 if content[index] == '\\':
223 index += 2 # Skip escape and next character
224 else:
225 index += 1
226
227 # Edge Case: Unclosed value bracket
228 if index >= len(content) or content[index] != ']':
229 raise ValueError("properties without delimiter")
230
231 value = content[value_start:index]
232 value = _unescape_value(value)
233 values.append(value)
234 index += 1 # Skip ']'
235
236 properties[key] = values
237
238 return properties, index
239
240
241def _unescape_value(value: str) -> str:
242 """
243 Process escape sequences in a value according to SGF text type rules.
244
245 Args:
246 value: The raw value string
247
248 Returns:
249 str: The processed value
250 """
251 result = []
252 i = 0
253 while i < len(value):
254 if value[i] == '\\':
255 i += 1
256 if i < len(value):
257 # Handle newline removal
258 if value[i] == '\n':
259 # Newline immediately after backslash - remove both
260 i += 1
261 elif value[i].isspace():
262 # Other whitespace after backslash becomes space
263 result.append(' ')
264 i += 1
265 elif value[i] == ']':
266 # Closing bracket - just insert the bracket
267 result.append(']')
268 i += 1
269 else:
270 # Any other character - becomes literal \ + character
271 result.append('\\' + value[i])
272 i += 1
273 else:
274 # Trailing backslash - keep as-is
275 result.append('\\')
276 else:
277 # All whitespace other than newline becomes space
278 if value[i] != '\n' and value[i].isspace():
279 result.append(' ')
280 else:
281 result.append(value[i])
282 i += 1
283
284 return ''.join(result)
285# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon after opening parenthesis, mismatched parentheses, empty subtree, missing semicolon in subtree, property key must be uppercase, missing key, missing delimiter for properties, unclosed value bracket
286# Handled Edge Cases: Property key must be uppercase, missing key, missing delimiter for properties, unclosed value bracket
287# Handled Edge Cases: Mismatched parentheses, empty subtree, missing semicolon
288# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon after opening parenthesis
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.