armor

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is malformed according to SGF specifications
40 """
41 # Edge Case: Empty input or whitespace only input
42 if not input_string or input_string.isspace():
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Content doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of an SGF tree.
70
71 Args:
72 content: The content of the SGF tree without outer parentheses
73
74 Returns:
75 SgfTree: A tree structure representing the SGF data
76 """
77 # Skip the initial ';'
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree
87 child_tree, index = _parse_subtree(content, index)
88 children.append(child_tree)
89 else:
90 # If we encounter a ';' not inside parentheses, it's a sibling node
91 # This means the current node has a single child which is the rest of the sequence
92 if content[index] == ';':
93 # Parse the remaining as a single child
94 remaining_content = content[index:]
95 child_tree = _parse_tree(remaining_content)
96 children.append(child_tree)
97 break
98 else:
99 # Unexpected character
100 raise ValueError("tree missing")
101
102 return SgfTree(properties, children)
103
104
105def _parse_subtree(content: str, start_index: int) -> tuple[SgfTree, int]:
106 """
107 Parse a subtree enclosed in parentheses.
108
109 Args:
110 content: The content string
111 start_index: The index where the subtree starts (at the '(')
112
113 Returns:
114 tuple: A tuple containing the parsed SgfTree and the index after the subtree
115 """
116 # Edge Case: Not starting with '('
117 if content[start_index] != '(':
118 raise ValueError("tree missing")
119
120 index = start_index + 1 # Skip the '('
121
122 # Find the matching closing parenthesis
123 paren_count = 1
124 while index < len(content) and paren_count > 0:
125 if content[index] == '(':
126 paren_count += 1
127 elif content[index] == ')':
128 paren_count -= 1
129 index += 1
130
131 # Edge Case: Unmatched parentheses
132 if paren_count > 0:
133 raise ValueError("tree missing")
134
135 # Extract the subtree content (excluding the outer parentheses)
136 subtree_content = content[start_index+1:index-1]
137
138 # Edge Case: Empty subtree
139 if not subtree_content:
140 raise ValueError("tree with no nodes")
141
142 # Edge Case: Subtree doesn't start with ';'
143 if not subtree_content.startswith(';'):
144 raise ValueError("tree missing")
145
146 tree = _parse_tree(subtree_content)
147
148 return tree, index
149
150
151def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
152 """
153 Parse properties from the content string.
154
155 Args:
156 content: The content string
157 start_index: The index to start parsing from
158
159 Returns:
160 tuple: A tuple containing the properties dictionary and the index after the last property
161 """
162 properties = {}
163 index = start_index
164
165 while index < len(content):
166 char = content[index]
167
168 # If we encounter a ';' or '(' or ')', we're done with properties
169 if char in [';', '(', ')']:
170 break
171
172 # Parse property key
173 key_start = index
174 while index < len(content) and content[index].isalpha():
175 index += 1
176
177 # Edge Case: No key found
178 if index == key_start:
179 raise ValueError("properties without delimiter")
180
181 key = content[key_start:index]
182
183 # Edge Case: Key is not uppercase
184 if key != key.upper() or not key.isalpha():
185 raise ValueError("property must be in uppercase")
186
187 # Parse property values
188 values = []
189 while index < len(content) and content[index] == '[':
190 value, index = _parse_value(content, index)
191 values.append(value)
192
193 # Edge Case: No values found for a key
194 if not values:
195 raise ValueError("properties without delimiter")
196
197 properties[key] = values
198
199 return properties, index
200
201
202def _parse_value(content: str, start_index: int) -> tuple[str, int]:
203 """
204 Parse a single property value enclosed in square brackets.
205
206 Args:
207 content: The content string
208 start_index: The index where the value starts (at the '[')
209
210 Returns:
211 tuple: A tuple containing the parsed value and the index after the closing ']'
212 """
213 # Edge Case: Not starting with '['
214 if content[start_index] != '[':
215 raise ValueError("properties without delimiter")
216
217 index = start_index + 1 # Skip the '['
218 value_chars = []
219
220 while index < len(content) and content[index] != ']':
221 if content[index] == '\\': # Escape character
222 index += 1 # Skip the escape character
223 # Edge Case: Escape at end of content
224 if index >= len(content):
225 raise ValueError("properties without delimiter")
226
227 escaped_char = content[index]
228 if escaped_char == 'n': # Special case for \n
229 value_chars.append('\n')
230 elif escaped_char == 't': # Special case for \t
231 value_chars.append('\t')
232 elif escaped_char == 'r': # Special case for \r
233 value_chars.append('\r')
234 elif escaped_char.isspace():
235 # According to SGF spec, whitespace after \ follows normal rules
236 # But since we're told newlines are Unix style and no \r or \r\n,
237 # we just add the whitespace as is, but convert non-newline whitespace to spaces
238 if escaped_char == '\n':
239 value_chars.append(escaped_char)
240 else:
241 value_chars.append(' ')
242 else:
243 value_chars.append(escaped_char)
244 else:
245 char = content[index]
246 # According to SGF Text type specification:
247 # - Newlines are removed if they come immediately after a \, otherwise they remain
248 # - All whitespace characters other than newline are converted to spaces
249 if char == '\n':
250 value_chars.append(char)
251 elif char.isspace():
252 value_chars.append(' ')
253 else:
254 value_chars.append(char)
255 index += 1
256
257 # Edge Case: Unclosed bracket
258 if index >= len(content) or content[index] != ']':
259 raise ValueError("properties without delimiter")
260
261 index += 1 # Skip the ']'
262
263 # Process the value according to SGF Text type rules
264 value = ''.join(value_chars)
265
266 return value, index
267
268# Handled Edge Cases: Empty input, input without outer parentheses, input without ending parenthesis, empty content after parentheses removal, content not starting with ';', unmatched parentheses, empty subtree, subtree not starting with ';', no key found, key not uppercase, no values found for a key, not starting with '[', escape at end of content, unclosed bracket
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.