A.2.1

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input or only whitespace
42 if not input_string or input_string.isspace():
43 raise ValueError("tree missing")
44
45 # Edge Case: Doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Content doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content inside the outer parentheses
73
74 Returns:
75 SgfTree: A tree structure representing the SGF data
76 """
77 # Skip the initial ';'
78 index = 1
79
80 # Parse properties of the root node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85
86 while index < len(content):
87 if content[index] == '(': # Start of a child tree
88 child_tree, index = _parse_subtree(content, index)
89 children.append(child_tree)
90 else:
91 # If we encounter a ';' not inside parentheses, it's a sibling node
92 # This means the current node has siblings, which should be treated as children
93 if content[index] == ';':
94 # Parse the sibling as a new child
95 sibling_content = content[index:]
96 sibling_tree = _parse_tree(sibling_content)
97 children.append(sibling_tree)
98 break
99 else:
100 index += 1
101
102 return SgfTree(properties, children)
103
104
105def _parse_subtree(content: str, start_index: int) -> tuple[SgfTree, int]:
106 """
107 Parse a subtree starting from a given index.
108
109 Args:
110 content: The content to parse
111 start_index: The index where the subtree starts (at '(')
112
113 Returns:
114 tuple: A tuple containing the parsed SgfTree and the index after the subtree
115 """
116 # Edge Case: Not starting with '('
117 if content[start_index] != '(':
118 raise ValueError("tree missing")
119
120 # Find the matching closing parenthesis
121 depth = 1
122 index = start_index + 1
123
124 while index < len(content) and depth > 0:
125 if content[index] == '(':
126 depth += 1
127 elif content[index] == ')':
128 depth -= 1
129 index += 1
130
131 # Edge Case: Unmatched parenthesis
132 if depth > 0:
133 raise ValueError("tree missing")
134
135 # Extract the subtree content
136 subtree_content = content[start_index+1:index-1]
137
138 # Edge Case: Empty subtree
139 if not subtree_content:
140 raise ValueError("tree with no nodes")
141
142 # Edge Case: Subtree doesn't start with ';'
143 if not subtree_content.startswith(';'):
144 raise ValueError("tree missing")
145
146 # Parse the subtree
147 tree = _parse_tree(subtree_content)
148
149 return tree, index
150
151
152def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
153 """
154 Parse properties from the content starting at a given index.
155
156 Args:
157 content: The content to parse
158 start_index: The index to start parsing from
159
160 Returns:
161 tuple: A tuple containing the parsed properties dictionary and the index after the properties
162 """
163 properties = {}
164 index = start_index
165
166 while index < len(content) and content[index] not in ['(', ')']:
167 # If we encounter a ';' not at the start, it's the beginning of a new node
168 if content[index] == ';' and index > start_index:
169 break
170
171 # Skip semicolons at the start
172 if content[index] == ';':
173 index += 1
174 continue
175
176 # Parse key
177 key_start = index
178 while index < len(content) and content[index].isalpha():
179 if not content[index].isupper():
180 raise ValueError("property must be in uppercase")
181 index += 1
182
183 # Edge Case: No key found
184 if index == key_start:
185 raise ValueError("properties without delimiter")
186
187 key = content[key_start:index]
188
189 # Parse values
190 values = []
191
192 # Expect at least one value
193 if index >= len(content) or content[index] != '[':
194 raise ValueError("properties without delimiter")
195
196 while index < len(content) and content[index] == '[':
197 index += 1 # Skip '['
198 value_start = index
199
200 # Parse value
201 value_chars = []
202 while index < len(content) and content[index] != ']':
203 if content[index] == '\\':
204 # Handle escape sequences
205 index += 1
206 if index >= len(content):
207 raise ValueError("properties without delimiter")
208
209 # Add both the backslash and the escaped character
210 value_chars.append('\\')
211 value_chars.append(content[index])
212 else:
213 value_chars.append(content[index])
214 index += 1
215
216 # Edge Case: Unclosed bracket
217 if index >= len(content) or content[index] != ']':
218 raise ValueError("properties without delimiter")
219
220 # Process the value according to SGF Text type rules
221 value = _process_text_value(''.join(value_chars))
222 values.append(value)
223 index += 1 # Skip ']'
224
225 # Add property
226 properties[key] = values
227
228 return properties, index
229
230
231def _process_text_value(value: str) -> str:
232 """
233 Process a text value according to SGF Text type rules.
234
235 Args:
236 value: The raw text value
237
238 Returns:
239 str: The processed text value
240 """
241 result = []
242 i = 0
243
244 while i < len(value):
245 if value[i] == '\\':
246 i += 1
247 if i >= len(value):
248 # Trailing backslash - this should not happen in valid SGF but we handle it
249 result.append('\\')
250 break
251
252 char = value[i]
253 # If next character is newline, remove the backslash and newline and add a space
254 if char == '\n':
255 result.append(' ')
256 i += 1
257 continue
258 # Add the backslash and the character to preserve the escape sequence
259 result.append('\\')
260 result.append(char)
261 i += 1
262 elif value[i] == '\n':
263 # Newlines remain as newlines unless immediately after a backslash
264 result.append(value[i])
265 i += 1
266 elif value[i].isspace() and value[i] != '\n':
267 # All whitespace characters other than newline are converted to spaces
268 result.append(' ')
269 i += 1
270 else:
271 result.append(value[i])
272 i += 1
273
274 return ''.join(result)
275
276# Handled Edge Cases: Empty input, missing parentheses, missing semicolons,
277# unmatched parentheses, empty subtrees, missing brackets,
278# non-uppercase keys, trailing backslashes, invalid escape sequences
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.