sn62

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree missing")
59
60 return _parse_tree(content)
61 # Handled Edge Cases: Empty input, input without proper delimiters, input without starting '(', input without ending ')', empty content after parentheses removal, content not starting with ';'
62
63
64def _parse_tree(content: str) -> SgfTree:
65 """
66 Parse the content of an SGF tree.
67
68 Args:
69 content: The content inside the outer parentheses
70
71 Returns:
72 SgfTree: The parsed SGF tree
73 """
74 # Skip the initial semicolon
75 index = 1
76
77 # Parse properties of the current node
78 properties, index = _parse_properties(content, index)
79
80 # Parse children
81 children = []
82
83 while index < len(content):
84 if content[index] == '(': # Start of a child tree
85 child_tree, index = _parse_subtree(content, index)
86 children.append(child_tree)
87 else:
88 # If we encounter a semicolon, it's a shorthand for a child node
89 if content[index] == ';':
90 # Parse the next node as a child
91 child_content = content[index:]
92 child_tree = _parse_tree(child_content)
93 children.append(child_tree)
94 break # The rest is handled by the child
95 else:
96 # Unexpected character
97 raise ValueError("tree missing")
98
99 return SgfTree(properties=properties, children=children)
100 # Handled Edge Cases: Unexpected characters in tree structure
101
102
103def _parse_subtree(content: str, start_index: int) -> tuple[SgfTree, int]:
104 """
105 Parse a subtree enclosed in parentheses.
106
107 Args:
108 content: The content to parse
109 start_index: The index where the subtree starts (at the opening parenthesis)
110
111 Returns:
112 tuple: A tuple containing the parsed SgfTree and the index after the subtree
113 """
114 # Edge Case: Not starting with '('
115 if content[start_index] != '(':
116 raise ValueError("tree missing")
117
118 # Find the matching closing parenthesis
119 depth = 1
120 index = start_index + 1
121
122 while index < len(content) and depth > 0:
123 if content[index] == '(':
124 depth += 1
125 elif content[index] == ')':
126 depth -= 1
127 index += 1
128
129 # Edge Case: Unmatched parentheses
130 if depth > 0:
131 raise ValueError("tree missing")
132
133 # Extract the subtree content
134 subtree_content = content[start_index + 1:index - 1]
135
136 # Edge Case: Empty subtree
137 if not subtree_content:
138 raise ValueError("tree with no nodes")
139
140 # Edge Case: Subtree not starting with ';'
141 if not subtree_content.startswith(';'):
142 raise ValueError("tree missing")
143
144 # Parse the subtree
145 tree = _parse_tree(subtree_content)
146
147 return tree, index
148 # Handled Edge Cases: Not starting with '(', unmatched parentheses, empty subtree, subtree not starting with ';'
149
150
151def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
152 """
153 Parse properties from the content.
154
155 Args:
156 content: The content to parse
157 start_index: The index to start parsing from
158
159 Returns:
160 tuple: A tuple containing the parsed properties dictionary and the index after the properties
161 """
162 properties = {}
163 index = start_index
164
165 while index < len(content) and content[index] not in ['(', ')', ';']:
166 # Parse property key
167 key_start = index
168
169 # Edge Case: Key not in uppercase
170 if not content[index].isalpha() or not content[index].isupper():
171 raise ValueError("property must be in uppercase")
172
173 while index < len(content) and content[index].isalpha() and content[index].isupper():
174 index += 1
175
176 # Edge Case: Key with no value
177 if index >= len(content) or content[index] != '[':
178 raise ValueError("properties without delimiter")
179
180 key = content[key_start:index]
181
182 # Parse property values
183 values = []
184
185 while index < len(content) and content[index] == '[':
186 index += 1 # Skip the opening bracket
187 value_start = index
188
189 # Parse value
190 while index < len(content):
191 if content[index] == ']':
192 break
193 elif content[index] == '\\':
194 # Skip the escape character and the next character
195 index += 2
196 else:
197 index += 1
198
199 # Edge Case: Unclosed value bracket
200 if index >= len(content) or content[index] != ']':
201 raise ValueError("properties without delimiter")
202
203 value = content[value_start:index]
204 value = _unescape_text(value)
205 values.append(value)
206 index += 1 # Skip the closing bracket
207
208 # Edge Case: No values for a property
209 if not values:
210 raise ValueError("properties without delimiter")
211
212 properties[key] = values
213
214 return properties, index
215 # Handled Edge Cases: Key not in uppercase, key with no value, unclosed value bracket, no values for a property
216
217
218def _unescape_text(text: str) -> str:
219 """
220 Unescape SGF text according to SGF specifications.
221
222 Args:
223 text: The text to unescape
224
225 Returns:
226 str: The unescaped text
227 """
228 result = []
229 i = 0
230
231 while i < len(text):
232 if text[i] == '\\':
233 i += 1 # Skip the escape character
234
235 if i < len(text):
236 # If the next character is a newline, remove it
237 if text[i] == '\n':
238 i += 1 # Skip the newline
239 # If the next character is any other whitespace, convert to space
240 elif text[i].isspace():
241 result.append(' ')
242 i += 1
243 # Otherwise, insert the character as-is
244 else:
245 result.append(text[i])
246 i += 1
247 # Edge Case: Escape character at end of string
248 else:
249 result.append('\\')
250 else:
251 # Convert all whitespace characters (except newline) to spaces
252 if text[i] != '\n' and text[i].isspace():
253 result.append(' ')
254 else:
255 result.append(text[i])
256 i += 1
257
258 return ''.join(result)
259 # Handled Edge Cases: Escape character at end of string
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.