A.2.1

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is malformed according to SGF specifications
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that starts with '(' but doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree with no nodes")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse a tree from SGF content.
66
67 Args:
68 content: SGF content string starting with ';'
69
70 Returns:
71 SgfTree: Parsed tree structure
72 """
73 # Skip the initial ';'
74 index = 1
75
76 # Parse properties of the current node
77 properties, index = _parse_properties(content, index)
78
79 children = []
80
81 # Parse children if any
82 while index < len(content):
83 if content[index] == '(': # Start of a child tree
84 child_tree, index = _parse_subtree(content, index)
85 children.append(child_tree)
86 else:
87 # If we encounter a ';' not inside parentheses, it's a direct child
88 if content[index] == ';':
89 # Parse the next node as a child
90 child_content = content[index:]
91 child_tree = _parse_tree(child_content)
92 children.append(child_tree)
93 break # The rest is handled by the child
94 else:
95 # Unexpected character
96 index += 1
97
98 return SgfTree(properties=properties, children=children)
99
100
101def _parse_subtree(content: str, start_index: int) -> tuple[SgfTree, int]:
102 """
103 Parse a subtree enclosed in parentheses.
104
105 Args:
106 content: SGF content string
107 start_index: Index where '(' is located
108
109 Returns:
110 Tuple of (parsed SgfTree, next index to process)
111 """
112 # Edge Case: No opening parenthesis at start_index
113 if content[start_index] != '(':
114 raise ValueError("tree missing")
115
116 index = start_index + 1 # Skip '('
117
118 # Edge Case: Premature end of content
119 if index >= len(content):
120 raise ValueError("tree with no nodes")
121
122 # Edge Case: Content after '(' doesn't start with ';'
123 if content[index] != ';':
124 raise ValueError("tree with no nodes")
125
126 # Find the matching closing parenthesis
127 paren_count = 1
128 start_pos = index
129
130 index += 1
131 while index < len(content) and paren_count > 0:
132 if content[index] == '(':
133 paren_count += 1
134 elif content[index] == ')':
135 paren_count -= 1
136 index += 1
137
138 # Edge Case: Unmatched parentheses
139 if paren_count != 0:
140 raise ValueError("tree missing")
141
142 # Extract the subtree content (including the initial ';')
143 subtree_content = content[start_pos:index-1]
144
145 # Parse the subtree
146 tree = _parse_tree(subtree_content)
147
148 return tree, index
149
150
151def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
152 """
153 Parse properties from SGF content.
154
155 Args:
156 content: SGF content string
157 start_index: Index to start parsing from
158
159 Returns:
160 Tuple of (properties dictionary, next index to process)
161 """
162 properties = {}
163 index = start_index
164
165 while index < len(content):
166 char = content[index]
167
168 # If we encounter a ';' or '(' or ')', it means properties section is over
169 if char in [';', '(', ')']:
170 break
171
172 # Parse property key
173 if not char.isupper():
174 raise ValueError("property must be in uppercase")
175
176 key_start = index
177 while index < len(content) and content[index].isupper():
178 index += 1
179
180 key = content[key_start:index]
181
182 # Edge Case: Property without values (no '[')
183 if index >= len(content) or content[index] != '[':
184 raise ValueError("properties without delimiter")
185
186 # Parse property values
187 values = []
188 while index < len(content) and content[index] == '[':
189 index += 1 # Skip '['
190 value_start = index
191
192 # Find the matching ']'
193 bracket_count = 1
194 while index < len(content):
195 if content[index] == '\\':
196 index += 2 # Skip escape character and the next character
197 continue
198 elif content[index] == '[':
199 bracket_count += 1
200 elif content[index] == ']':
201 bracket_count -= 1
202 if bracket_count == 0:
203 break
204 index += 1
205
206 # Edge Case: Unmatched brackets
207 if bracket_count != 0:
208 raise ValueError("properties without delimiter")
209
210 value = content[value_start:index]
211 value = _unescape_text(value)
212 values.append(value)
213 index += 1 # Skip ']'
214
215 properties[key] = values
216
217 return properties, index
218
219
220def _unescape_text(text: str) -> str:
221 """
222 Process SGF text according to SGF specification.
223
224 Args:
225 text: Raw text from SGF property value
226
227 Returns:
228 Processed text with escape sequences handled
229 """
230 result = []
231 i = 0
232 while i < len(text):
233 if text[i] == '\\':
234 i += 1
235 if i < len(text):
236 # Check if next character is newline
237 if text[i] == '\n':
238 # Convert \n to space
239 result.append(' ')
240 i += 1
241 continue
242 else:
243 # Any non-whitespace character after \ is inserted as-is
244 # Any whitespace character after \ is converted per rules
245 char = text[i]
246 if char.isspace() and char != '\n':
247 result.append(' ')
248 else:
249 result.append(char)
250 i += 1
251 else:
252 char = text[i]
253 if char == '\n':
254 result.append(char)
255 elif char.isspace():
256 result.append(' ')
257 else:
258 result.append(char)
259 i += 1
260
261 return ''.join(result)
262# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, invalid property format, unmatched brackets, property not in uppercase, missing delimiters, unmatched parentheses in subtrees, escape sequences in text
263# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, invalid property format, unmatched brackets
264# Handled Edge Cases: No opening parenthesis, premature end of content, content after '(' doesn't start with ';', unmatched parentheses
265# Handled Edge Cases: Property without values, unmatched brackets, property not in uppercase, missing delimiters
266# Handled Edge Cases: Escape sequences in text according to SGF specification
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.