great

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that starts with '(' but doesn't have a matching ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree with no nodes")
59
60 return _parse_tree_content(content)
61
62
63def _parse_tree_content(content: str) -> SgfTree:
64 """
65 Parse the content of a tree (without the outer parentheses).
66
67 Args:
68 content: The content of the tree
69
70 Returns:
71 SgfTree: A tree structure representing the content
72 """
73 # Find the first semicolon to separate the root node from children
74 first_semicolon = content.find(';', 1) # Start from index 1 to skip the first semicolon
75 first_paren = content.find('(', 1)
76
77 # Determine where the root node ends
78 if first_semicolon == -1 and first_paren == -1:
79 # No more nodes, the entire content is for the root
80 root_content = content[1:] # Skip the first semicolon
81 children_content = ""
82 elif first_semicolon == -1:
83 # No more semicolons, but there are parentheses
84 root_content = content[1:first_paren] # From first semicolon to first parenthesis
85 children_content = content[first_paren:]
86 elif first_paren == -1:
87 # No parentheses, but there are more semicolons
88 root_content = content[1:first_semicolon]
89 children_content = content[first_semicolon:]
90 else:
91 # Both semicolons and parentheses exist
92 root_end = min(first_semicolon, first_paren)
93 root_content = content[1:root_end] # Skip the first semicolon
94 children_content = content[root_end:]
95
96 # Parse the root node's properties
97 properties = _parse_properties(root_content)
98
99 # Parse children if they exist
100 children = []
101 if children_content:
102 children = _parse_children(children_content)
103
104 return SgfTree(properties, children)
105
106
107def _parse_properties(content: str) -> dict:
108 """
109 Parse the properties of a node.
110
111 Args:
112 content: The content containing properties
113
114 Returns:
115 dict: A dictionary of properties
116
117 Raises:
118 ValueError: If properties are malformed
119 """
120 properties = {}
121 i = 0
122
123 while i < len(content):
124 # Skip whitespace
125 if content[i].isspace():
126 i += 1
127 continue
128
129 # Parse property key
130 key_start = i
131 while i < len(content) and content[i].isalpha():
132 i += 1
133
134 # Edge Case: No key found
135 if i == key_start:
136 raise ValueError("properties without delimiter")
137
138 key = content[key_start:i]
139
140 # Edge Case: Key is not uppercase
141 if not key.isupper():
142 raise ValueError("property must be in uppercase")
143
144 # Parse values
145 values = []
146
147 # Edge Case: Key not followed by '['
148 if i >= len(content) or content[i] != '[':
149 raise ValueError("properties without delimiter")
150
151 while i < len(content) and content[i] == '[':
152 i += 1 # Skip '['
153 value_start = i
154
155 # Parse value, handling escapes
156 while i < len(content) and content[i] != ']':
157 if content[i] == '\\':
158 i += 2 # Skip escape character and the escaped character
159 else:
160 i += 1
161
162 # Edge Case: Unclosed value bracket
163 if i >= len(content) or content[i] != ']':
164 raise ValueError("properties without delimiter")
165
166 value = content[value_start:i]
167 # Process the value according to SGF Text type rules
168 processed_value = _process_sgf_text(value)
169 values.append(processed_value)
170 i += 1 # Skip ']'
171
172 properties[key] = values
173
174 return properties
175
176
177def _process_sgf_text(text: str) -> str:
178 """
179 Process text according to SGF Text type rules.
180
181 Args:
182 text: Raw text from SGF value
183
184 Returns:
185 str: Processed text
186 """
187 result = []
188 i = 0
189
190 while i < len(text):
191 if text[i] == '\\':
192 # Handle escape sequences
193 i += 1 # Skip the backslash
194 if i < len(text):
195 # If next character is newline, remove it
196 if text[i] == '\n':
197 i += 1 # Skip the newline
198 else:
199 # Any non-whitespace character after \ is inserted as-is
200 # Any whitespace character after \ is converted per rules
201 if text[i].isspace() and text[i] != '\n':
202 result.append(' ')
203 else:
204 result.append(text[i])
205 i += 1
206 else:
207 # Convert whitespace (except newline) to spaces
208 if text[i].isspace() and text[i] != '\n':
209 result.append(' ')
210 else:
211 result.append(text[i])
212 i += 1
213
214 return ''.join(result)
215
216
217def _parse_children(content: str) -> list[SgfTree]:
218 """
219 Parse children of a node.
220
221 Args:
222 content: The content containing children
223
224 Returns:
225 list[SgfTree]: A list of child trees
226 """
227 children = []
228 i = 0
229
230 while i < len(content):
231 if content[i] == '(':
232 # Find the matching parenthesis
233 paren_count = 1
234 start = i
235 i += 1
236
237 while i < len(content) and paren_count > 0:
238 if content[i] == '(':
239 paren_count += 1
240 elif content[i] == ')':
241 paren_count -= 1
242 i += 1
243
244 # Edge Case: Unmatched parenthesis
245 if paren_count != 0:
246 raise ValueError("tree missing")
247
248 # Parse the subtree
249 subtree_content = content[start:i]
250 children.append(parse(subtree_content))
251 else:
252 # Skip other characters
253 i += 1
254
255 return children
256
257# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis,
258# empty content after removing parentheses, content not starting with semicolon,
259# no key found, key not uppercase, key not followed by '[', unclosed value bracket,
260# unmatched parenthesis
Test NameStatus
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.