seahawk

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree content (without outer parentheses).
70
71 Args:
72 content: The content of a tree without the outer parentheses
73
74 Returns:
75 SgfTree: The parsed SGF tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree
87 child_tree, consumed = _parse_subtree(content[index:])
88 children.append(child_tree)
89 index += consumed
90 else:
91 # If we encounter a semicolon, it's a shorthand for a single child
92 if content[index] == ';':
93 # Parse the rest as a single child
94 child_content = content[index:]
95 child_tree = _parse_tree(child_content)
96 children.append(child_tree)
97 break
98 else:
99 # Unexpected character
100 raise ValueError("tree missing")
101
102 return SgfTree(properties=properties, children=children)
103
104
105def _parse_subtree(content: str) -> tuple[SgfTree, int]:
106 """
107 Parse a subtree enclosed in parentheses.
108
109 Args:
110 content: The content starting with '('
111
112 Returns:
113 tuple[SgfTree, int]: The parsed tree and the number of characters consumed
114 """
115 # Edge Case: Doesn't start with '('
116 if not content.startswith('('):
117 raise ValueError("tree missing")
118
119 # Find matching closing parenthesis, taking into account nested parentheses
120 depth = 0
121 end_index = -1
122 for i, char in enumerate(content):
123 if char == '(':
124 depth += 1
125 elif char == ')':
126 depth -= 1
127 if depth == 0:
128 end_index = i
129 break
130
131 # Edge Case: No matching closing parenthesis
132 if end_index == -1:
133 raise ValueError("tree missing")
134
135 # Parse the content inside the parentheses
136 inner_content = content[1:end_index]
137
138 # Edge Case: Empty subtree
139 if not inner_content:
140 raise ValueError("tree with no nodes")
141
142 # Edge Case: Subtree doesn't start with ';'
143 if not inner_content.startswith(';'):
144 raise ValueError("tree missing")
145
146 tree = _parse_tree(inner_content)
147 return tree, end_index + 1
148
149
150def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
151 """
152 Parse properties from the content starting at start_index.
153
154 Args:
155 content: The content to parse
156 start_index: The index to start parsing from
157
158 Returns:
159 tuple[dict, int]: The parsed properties and the index after the last property
160 """
161 properties = {}
162 index = start_index
163
164 while index < len(content):
165 char = content[index]
166
167 # If we encounter a '(' or ';', we've finished parsing properties
168 if char in '(;':
169 break
170
171 # Parse property key
172 if not char.isalpha() or not char.isupper():
173 # Edge Case: Property key is not uppercase
174 raise ValueError("property must be in uppercase")
175
176 key_start = index
177 while index < len(content) and content[index].isalpha() and content[index].isupper():
178 index += 1
179
180 key = content[key_start:index]
181
182 # Edge Case: Key with no values
183 if index >= len(content) or content[index] != '[':
184 raise ValueError("properties without delimiter")
185
186 # Parse property values
187 values = []
188 while index < len(content) and content[index] == '[':
189 index += 1 # Skip '['
190 value_start = index
191
192 # Find matching ']'
193 # Note: We don't handle escaping here - that's done in _unescape_text
194 # We just need to find the matching closing bracket
195 bracket_count = 1 # We start after the opening '['
196 while index < len(content):
197 if content[index] == ']':
198 bracket_count -= 1
199 if bracket_count == 0:
200 break
201 elif content[index] == '[':
202 bracket_count += 1
203 index += 1
204 else:
205 # Edge Case: No matching ']'
206 raise ValueError("properties without delimiter")
207
208 # Extract value
209 value_raw = content[value_start:index]
210 value = _unescape_text(value_raw)
211 values.append(value)
212 index += 1 # Skip ']'
213
214 properties[key] = values
215
216 return properties, index
217
218
219def _unescape_text(text: str) -> str:
220 """
221 Unescape SGF text according to SGF specification.
222
223 Args:
224 text: The escaped text
225
226 Returns:
227 str: The unescaped text
228 """
229 result = []
230 i = 0
231 while i < len(text):
232 if text[i] == '\\':
233 i += 1
234 if i < len(text):
235 char = text[i]
236 if char == '\n':
237 # Escaped newline - remove it
238 pass
239 elif char.isspace():
240 # Any whitespace character after \ is inserted as-is
241 result.append(char)
242 else:
243 # Any non-whitespace character after \ is inserted as-is
244 result.append(char)
245 else:
246 # Backslash at end - treat as literal backslash
247 result.append('\\')
248 else:
249 char = text[i]
250 if char == '\n':
251 # Regular newline
252 result.append(char)
253 elif char.isspace() and char != '\n':
254 # All whitespace characters other than newline are converted to spaces
255 result.append(' ')
256 else:
257 result.append(char)
258 i += 1
259
260 return ''.join(result)
261# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Doesn't start with '(', No matching closing parenthesis, Empty subtree, Subtree doesn't start with ';', Property key is not uppercase, Key with no values, No matching ']'"
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.