sn62

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or only whitespace
42 if not input_string or input_string.isspace():
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree_content(content)
65
66
67def _parse_tree_content(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree, starting with ';'
73
74 Returns:
75 SgfTree: A tree structure representing the content
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85
86 while index < len(content):
87 char = content[index]
88 if char == '(': # Start of a child tree
89 child_tree, consumed = _parse_subtree(content[index:])
90 children.append(child_tree)
91 index += consumed
92 else:
93 # If we encounter a semicolon, it's a shorthand for a single child
94 if char == ';':
95 # Parse the next node as a child
96 child_content = content[index:]
97 child_tree = _parse_tree_content(child_content)
98 children.append(child_tree)
99 break # The rest is handled in the recursive call
100 else:
101 # Unexpected character
102 raise ValueError("tree missing")
103
104 return SgfTree(properties=properties, children=children)
105
106
107def _parse_subtree(content: str) -> tuple[SgfTree, int]:
108 """
109 Parse a subtree enclosed in parentheses.
110
111 Args:
112 content: String starting with '('
113
114 Returns:
115 tuple: (SgfTree, number of characters consumed)
116 """
117 if not content.startswith('('):
118 raise ValueError("tree missing")
119
120 # Find matching closing parenthesis
121 depth = 0
122 index = 0
123 for i, char in enumerate(content):
124 if char == '(':
125 depth += 1
126 elif char == ')':
127 depth -= 1
128 if depth == 0:
129 index = i
130 break
131 else:
132 # No matching closing parenthesis
133 raise ValueError("tree missing")
134
135 # Parse the content inside the parentheses
136 inner_content = content[1:index]
137
138 # Edge Case: Empty subtree
139 if not inner_content:
140 raise ValueError("tree with no nodes")
141
142 # Edge Case: Missing semicolon
143 if not inner_content.startswith(';'):
144 raise ValueError("tree missing")
145
146 tree = _parse_tree_content(inner_content)
147 return tree, index + 1
148
149
150def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
151 """
152 Parse properties from the content starting at start_index.
153
154 Args:
155 content: The string content to parse
156 start_index: The index to start parsing from
157
158 Returns:
159 tuple: (properties dictionary, next index to parse)
160 """
161 properties = {}
162 index = start_index
163
164 while index < len(content):
165 char = content[index]
166
167 # If we encounter a parenthesis or semicolon, we're done with properties
168 if char in '();':
169 break
170
171 # Parse property key
172 if not char.isalpha() or not char.isupper():
173 raise ValueError("property must be in uppercase")
174
175 key_start = index
176 while index < len(content) and content[index].isalpha() and content[index].isupper():
177 index += 1
178
179 key = content[key_start:index]
180
181 # Edge Case: Key with no values
182 if index >= len(content) or content[index] != '[':
183 raise ValueError("properties without delimiter")
184
185 # Parse values
186 values = []
187 while index < len(content) and content[index] == '[':
188 index += 1 # Skip opening bracket
189 value_start = index
190
191 # Parse value, handling escapes
192 while index < len(content) and content[index] != ']':
193 if content[index] == '\\':
194 index += 2 # Skip escape and next character
195 else:
196 index += 1
197
198 # Edge Case: Unclosed bracket
199 if index >= len(content):
200 raise ValueError("properties without delimiter")
201
202 value = content[value_start:index]
203 value = _unescape_text(value)
204 values.append(value)
205 index += 1 # Skip closing bracket
206
207 properties[key] = values
208
209 return properties, index
210
211
212def _unescape_text(text: str) -> str:
213 """
214 Process escape sequences in SGF text according to SGF specification.
215
216 Args:
217 text: The text to process
218
219 Returns:
220 str: The processed text
221 """
222 result = []
223 i = 0
224 while i < len(text):
225 if text[i] == '\\':
226 i += 1
227 if i < len(text):
228 # Check if next character is newline
229 if text[i] == '\n':
230 # Newline immediately after \ is converted to space
231 result.append(' ')
232 i += 1
233 else:
234 # Any character after \ follows the whitespace rules
235 # All whitespace other than newline is converted to space
236 if text[i].isspace():
237 result.append(' ')
238 else:
239 result.append(text[i])
240 i += 1
241 else:
242 # Convert all whitespace other than newline to spaces
243 if text[i].isspace() and text[i] != '\n':
244 result.append(' ')
245 else:
246 result.append(text[i])
247 i += 1
248
249 return ''.join(result)
250# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, empty content, invalid property keys, missing delimiters, unclosed brackets, escape sequences
251# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after removing parentheses, missing semicolon after opening parenthesis
252# Handled Edge Cases: Empty subtree, missing semicolon in subtree
253# Handled Edge Cases: Key with no values, unclosed bracket
254# Handled Edge Cases: Newlines after backslash, whitespace conversion, escape character handling
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.