bohdan

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input string is not valid SGF
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from SGF content.
70
71 Args:
72 content: SGF content without outer parentheses
73
74 Returns:
75 SgfTree: The parsed SGF tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 children = []
84
85 # Parse children
86 while index < len(content):
87 if content[index] == '(': # Start of a child tree
88 child_tree, consumed = _parse_subtree(content[index:])
89 children.append(child_tree)
90 index += consumed
91 elif content[index] == ';': # Another node in sequence
92 # This is a shorthand - the next node is a child of the current node
93 child_tree, consumed = _parse_subtree('(' + content[index:] + ')')
94 children.append(child_tree)
95 break # The rest is handled by the child
96 else:
97 index += 1
98
99 return SgfTree(properties, children)
100
101
102def _parse_subtree(content: str) -> tuple[SgfTree, int]:
103 """
104 Parse a subtree from SGF content.
105
106 Args:
107 content: SGF content starting with '('
108
109 Returns:
110 tuple[SgfTree, int]: The parsed subtree and number of characters consumed
111 """
112 # Edge Case: Missing opening parenthesis
113 if not content.startswith('('):
114 raise ValueError("tree missing")
115
116 # Find matching closing parenthesis
117 depth = 1
118 index = 1
119
120 while index < len(content) and depth > 0:
121 if content[index] == '(':
122 depth += 1
123 elif content[index] == ')':
124 depth -= 1
125 index += 1
126
127 # Edge Case: Unmatched parenthesis
128 if depth > 0:
129 raise ValueError("tree missing")
130
131 # Parse the content inside the parentheses
132 inner_content = content[1:index-1]
133
134 # Edge Case: Empty subtree
135 if not inner_content:
136 raise ValueError("tree with no nodes")
137
138 # Edge Case: Missing semicolon
139 if not inner_content.startswith(';'):
140 raise ValueError("tree missing")
141
142 tree = _parse_tree(inner_content)
143
144 return tree, index
145
146
147def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
148 """
149 Parse properties from SGF content starting at the given index.
150
151 Args:
152 content: SGF content
153 start_index: Index to start parsing from
154
155 Returns:
156 tuple[dict, int]: The parsed properties and the next index
157 """
158 properties = {}
159 index = start_index
160
161 while index < len(content) and content[index] not in '();':
162 # Parse property key
163 key_start = index
164
165 # Edge Case: Key must be uppercase
166 while index < len(content) and content[index].isalpha():
167 if not content[index].isupper():
168 raise ValueError("property must be in uppercase")
169 index += 1
170
171 # Edge Case: Empty key
172 if index == key_start:
173 raise ValueError("properties without delimiter")
174
175 key = content[key_start:index]
176
177 # Edge Case: Missing opening bracket
178 if index >= len(content) or content[index] != '[':
179 raise ValueError("properties without delimiter")
180
181 values = []
182
183 # Parse all values for this key
184 while index < len(content) and content[index] == '[':
185 index += 1 # Skip opening bracket
186 value_start = index
187
188 # Parse value
189 while index < len(content):
190 if content[index] == ']':
191 # Check if this ] is escaped
192 backslash_count = 0
193 check_index = index - 1
194 while check_index >= value_start and content[check_index] == '\\':
195 backslash_count += 1
196 check_index -= 1
197 # If odd number of backslashes, this ] is escaped
198 if backslash_count % 2 == 1:
199 index += 1
200 continue
201 else:
202 break
203 else:
204 index += 1
205
206 # Edge Case: Missing closing bracket
207 if index >= len(content) or content[index] != ']':
208 raise ValueError("properties without delimiter")
209
210 value = _unescape_text(content[value_start:index])
211 values.append(value)
212 index += 1 # Skip closing bracket
213
214 properties[key] = values
215
216 return properties, index
217
218
219def _unescape_text(text: str) -> str:
220 """
221 Unescape SGF text according to SGF specification.
222
223 Args:
224 text: Escaped SGF text
225
226 Returns:
227 str: Unescaped text
228 """
229 result = []
230 index = 0
231
232 while index < len(text):
233 if text[index] == '\\':
234 index += 1
235 if index < len(text):
236 char = text[index]
237 # Newlines are removed if they come immediately after a \
238 if char == '\n':
239 pass # Remove the newline
240 # All whitespace characters other than newline are converted to spaces
241 elif char in ' \t\r':
242 result.append(' ')
243 # Any non-whitespace character after \ is inserted as-is
244 else:
245 result.append(char)
246 else:
247 char = text[index]
248 # All whitespace characters other than newline are converted to spaces
249 if char in ' \t\r':
250 result.append(' ')
251 else:
252 result.append(char)
253 index += 1
254
255 return ''.join(result)
256# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis, unmatched parenthesis, empty subtree, missing semicolon, key must be uppercase, empty key, missing opening bracket, missing closing bracket
257# Handled Edge Cases: Empty key, missing opening bracket, missing closing bracket
258# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis, unmatched parenthesis, empty subtree, missing semicolon, key must be uppercase
259# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.