harrison

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial ';'
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree
87 child_tree, consumed = _parse_subtree(content[index:])
88 children.append(child_tree)
89 index += consumed
90 else:
91 # If we have more content but it doesn't start with '(', it's a sequence
92 # Parse the next node as a child
93 child_tree, consumed = _parse_node_sequence(content[index:])
94 children.append(child_tree)
95 index += consumed
96
97 return SgfTree(properties=properties, children=children)
98
99
100def _parse_subtree(content: str) -> tuple[SgfTree, int]:
101 """
102 Parse a subtree enclosed in parentheses.
103
104 Args:
105 content: String starting with '('
106
107 Returns:
108 Tuple of (parsed tree, number of characters consumed)
109 """
110 # Edge Case: Missing opening parenthesis
111 if not content.startswith('('):
112 raise ValueError("tree missing")
113
114 # Find matching closing parenthesis
115 balance = 1
116 index = 1
117 while index < len(content) and balance > 0:
118 if content[index] == '(':
119 balance += 1
120 elif content[index] == ')':
121 balance -= 1
122 index += 1
123
124 # Edge Case: Unmatched parenthesis
125 if balance != 0:
126 raise ValueError("tree missing")
127
128 # Parse the content inside the parentheses
129 inner_content = content[1:index-1]
130
131 # Edge Case: Empty subtree
132 if not inner_content:
133 raise ValueError("tree with no nodes")
134
135 # Edge Case: Subtree doesn't start with ';'
136 if not inner_content.startswith(';'):
137 raise ValueError("tree missing")
138
139 tree = _parse_tree(inner_content)
140 return tree, index
141
142
143def _parse_node_sequence(content: str) -> tuple[SgfTree, int]:
144 """
145 Parse a sequence of nodes (shorthand notation).
146
147 Args:
148 content: String starting with ';'
149
150 Returns:
151 Tuple of (parsed tree, number of characters consumed)
152 """
153 # Edge Case: Missing semicolon
154 if not content.startswith(';'):
155 raise ValueError("tree missing")
156
157 # Parse properties of the current node
158 properties, index = _parse_properties(content, 1)
159
160 # If there are more nodes, recursively parse them as children
161 if index < len(content) and content[index] == ';':
162 # Parse the rest as a child node
163 child, consumed = _parse_node_sequence(content[index:])
164 return SgfTree(properties=properties, children=[child]), index + consumed
165 elif index < len(content) and content[index] == '(':
166 # Parse variations
167 children = []
168 while index < len(content) and content[index] == '(':
169 child, consumed = _parse_subtree(content[index:])
170 children.append(child)
171 index += consumed
172 return SgfTree(properties=properties, children=children), index
173 else:
174 # No more nodes, this is a leaf
175 return SgfTree(properties=properties), index
176
177
178def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
179 """
180 Parse properties from the content starting at start_index.
181
182 Args:
183 content: The content string
184 start_index: Index to start parsing from
185
186 Returns:
187 Tuple of (properties dictionary, index after parsing)
188 """
189 properties = {}
190 index = start_index
191
192 while index < len(content) and content[index].isalpha():
193 # Parse key
194 key_start = index
195 while index < len(content) and content[index].isalpha():
196 # Edge Case: Property key is not uppercase
197 if not content[index].isupper():
198 raise ValueError("property must be in uppercase")
199 index += 1
200
201 key = content[key_start:index]
202
203 # Edge Case: Missing opening bracket for property value
204 if index >= len(content) or content[index] != '[':
205 raise ValueError("properties without delimiter")
206
207 # Parse values
208 values = []
209 while index < len(content) and content[index] == '[':
210 index += 1 # Skip '['
211 value_start = index
212
213 # Parse value, handling escapes
214 while index < len(content) and content[index] != ']':
215 if content[index] == '\\':
216 # Include the backslash in the value and move to next character
217 # The escaped character will be processed in the next iteration
218 index += 1
219 else:
220 index += 1
221
222 # Edge Case: Unmatched bracket
223 if index >= len(content) or content[index] != ']':
224 raise ValueError("properties without delimiter")
225
226 value = content[value_start:index]
227 # Process the value according to SGF text type rules
228 # Process the value according to SGF text type rules
229 value = _process_text_value(value)
230 values.append(value)
231 index += 1 # Skip ']'
232
233 properties[key] = values
234
235 return properties, index
236
237
238def _process_text_value(value: str) -> str:
239 """
240 Process a text value according to SGF text type rules.
241
242 Args:
243 value: The raw value string
244
245 Returns:
246 Processed value string
247 """
248 result = []
249 i = 0
250 while i < len(value):
251 if value[i] == '\\':
252 # Handle escape sequences
253 i += 1
254 if i < len(value):
255 # If next character is newline, remove it
256 if value[i] == '\n':
257 i += 1 # Skip the newline
258 else:
259 # Any character after \ is inserted as-is
260 # (including whitespace characters, which are converted to spaces)
261 if value[i].isspace() and value[i] != '\n':
262 result.append(' ')
263 else:
264 result.append(value[i])
265 i += 1
266 else:
267 # Convert all whitespace characters other than newline to spaces
268 if value[i].isspace() and value[i] != '\n':
269 result.append(' ')
270 i += 1
271 else:
272 result.append(value[i])
273 i += 1
274
275 return ''.join(result)
276
277# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Missing opening parenthesis, Unmatched parenthesis, Empty subtree, Subtree doesn't start with ';', Missing semicolon, Property key is not uppercase, Missing opening bracket for property value, Unmatched bracket
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.