weekends

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input string
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Edge Case: Input is just '()'
54 if input_string == '()':
55 raise ValueError("tree with no nodes")
56
57 # Remove the outer parentheses
58 content = input_string[1:-1]
59
60 # Edge Case: Content is empty after removing parentheses
61 if not content:
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70 """
71 # Edge Case: Content doesn't start with ';'
72 if not content.startswith(';'):
73 raise ValueError("tree missing")
74
75 i = 1 # Skip the initial ';'
76 properties = {}
77 children = []
78
79 # Parse properties
80 while i < len(content) and content[i] != '(' and content[i] != ')' and content[i] != ';':
81 # Parse property key
82 key_start = i
83 while i < len(content) and content[i].isalpha():
84 i += 1
85
86 # Edge Case: No key found
87 if i == key_start:
88 raise ValueError("properties without delimiter")
89
90 key = content[key_start:i]
91
92 # Edge Case: Key is not uppercase
93 if not key.isupper():
94 raise ValueError("property must be in uppercase")
95
96 # Parse property values
97 values = []
98
99 # Edge Case: No values after key
100 if i >= len(content) or content[i] != '[':
101 raise ValueError("properties without delimiter")
102
103 while i < len(content) and content[i] == '[':
104 i += 1 # Skip '['
105 value_start = i
106
107 # Parse value, handling escapes
108 while i < len(content) and content[i] != ']':
109 if content[i] == '[':
110 # Start of next value, end current value here
111 break
112 elif content[i] == '\\':
113 i += 1 # Skip the backslash
114 if i < len(content):
115 i += 1 # Skip the escaped character
116 else:
117 i += 1
118
119 # Edge Case: Unclosed value bracket (unless we broke early for next value)
120 if i >= len(content) or (content[i] != ']' and content[i] != '['):
121 raise ValueError("properties without delimiter")
122
123 # If we broke early for next value, don't advance past the '['
124 if i < len(content) and content[i] == '[':
125 # We're at the start of the next value, so don't skip ']'
126 pass
127 else:
128 # We're at the end of the current value, so skip ']'
129 i += 1 # Skip ']'
130
131 value = content[value_start:i]
132 value = _unescape_value(value)
133 values.append(value)
134 i += 1 # Skip ']'
135
136 properties[key] = values
137
138 # Handle sequential nodes (nodes separated by semicolons)
139 while i < len(content) and content[i] == ';':
140 # Find the end of this node (next semicolon or opening parenthesis or end of string)
141 node_start = i
142 i += 1 # Skip the semicolon
143
144 # Parse this node's content until we hit a semicolon, opening parenthesis, or end
145 while i < len(content) and content[i] != ';' and content[i] != '(':
146 # Parse property key
147 if i < len(content) and content[i].isalpha():
148 key_start = i
149 while i < len(content) and content[i].isalpha():
150 i += 1
151
152 key = content[key_start:i]
153
154 # Parse property values
155 while i < len(content) and content[i] == '[':
156 i += 1 # Skip '['
157 # Skip to closing bracket
158 while i < len(content) and content[i] != ']':
159 if content[i] == '[':
160 # Start of next value, end current value here
161 break
162 elif content[i] == '\\':
163 i += 1 # Skip the backslash
164 if i < len(content):
165 i += 1 # Skip the escaped character
166 else:
167 i += 1
168
169 # If we broke early for next value, don't advance past the '['
170 if i < len(content) and content[i] == '[':
171 # We're at the start of the next value, so don't skip ']'
172 pass
173 else:
174 # We're at the end of the current value, so skip ']'
175 if i < len(content) and content[i] == ']':
176 i += 1 # Skip ']'
177 else:
178 i += 1
179
180 # Parse this node as a child
181 node_content = content[node_start:i]
182 children.append(_parse_tree(node_content))
183
184 # Parse children in parentheses
185 while i < len(content) and content[i] == '(':
186 # Find matching closing parenthesis
187 paren_count = 1
188 start = i + 1
189 i += 1
190
191 while i < len(content) and paren_count > 0:
192 if content[i] == '(':
193 paren_count += 1
194 elif content[i] == ')':
195 paren_count -= 1
196 i += 1
197
198 # Edge Case: Unmatched parenthesis
199 if paren_count != 0:
200 raise ValueError("tree missing")
201
202 child_content = content[start:i-1]
203 children.append(_parse_tree(child_content))
204
205 return SgfTree(properties, children)
206
207
208def _unescape_value(value: str) -> str:
209 """
210 Process escape sequences in SGF text values according to SGF specification.
211 """
212 result = []
213 i = 0
214
215 while i < len(value):
216 if value[i] == '\\':
217 i += 1
218 # Edge Case: Escape at end of string
219 if i >= len(value):
220 # According to SGF spec, this should just be ignored
221 break
222
223 char = value[i]
224 # If next character after \ is a newline, remove the newline but keep the backslash
225 if char == '\n':
226 # Remove the newline if it immediately follows \, but keep the backslash
227 result.append('\\')
228 else:
229 # All whitespace other than newline becomes space
230 if char.isspace():
231 result.append(' ')
232 else:
233 # Insert non-whitespace character as-is
234 result.append(char)
235 i += 1
236 else:
237 char = value[i]
238 # All whitespace other than newline becomes space
239 if char != '\n' and char.isspace():
240 result.append(' ')
241 else:
242 result.append(char)
243 i += 1
244
245 return ''.join(result)
246# Handled Edge Cases: Empty input string, Input doesn't start with '(', Input doesn't end with ')', Input is just '()', Content is empty after removing parentheses, Content doesn't start with ';', No key found, Key is not uppercase, No values after key, Unclosed value bracket, Unmatched parenthesis, Escape at end of string
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_multiple_properties
Fail
test_multiple_property_values
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_single_node_tree
Fail
test_two_child_trees
Fail
test_two_nodes
Fail
test_within_property_values_newlines_remain_as_newlines
Fail
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_tree_with_no_nodes
Pass
test_upper_and_lowercase_property
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.