seahawk

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree (variation)
87 # Find the matching closing parenthesis
88 paren_count = 1
89 start = index
90 index += 1
91 while index < len(content) and paren_count > 0:
92 if content[index] == '(':
93 paren_count += 1
94 elif content[index] == ')':
95 paren_count -= 1
96 index += 1
97
98 # Edge Case: Malformed parentheses
99 if paren_count != 0:
100 raise ValueError("tree missing")
101
102 # Parse the child tree
103 child_content = content[start:index]
104 children.append(parse(child_content))
105 elif content[index] == ';': # Start of a sequential child node
106 # Parse the sequential node and its children
107 child_node, index = _parse_sequential_node_from_position(content, index)
108 children.append(child_node)
109 else:
110 # Edge Case: Unexpected character
111 raise ValueError("tree missing")
112
113 return SgfTree(properties=properties, children=children)
114
115
116def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
117 """
118 Parse properties from the content starting at start_index.
119
120 Args:
121 content: The content to parse
122 start_index: The index to start parsing from
123
124 Returns:
125 tuple: A tuple of (properties_dict, next_index)
126 """
127 properties = {}
128 index = start_index
129
130 while index < len(content) and content[index] not in ['(', ')', ';']:
131 # Expecting a property in format Key[Value]
132 # Parse the key
133 key_start = index
134
135 # Edge Case: Key must be uppercase
136 if not content[index].isalpha() or not content[index].isupper():
137 raise ValueError("property must be in uppercase")
138
139 while index < len(content) and content[index].isalpha() and content[index].isupper():
140 index += 1
141
142 # Edge Case: Empty key
143 if index == key_start:
144 raise ValueError("properties without delimiter")
145
146 key = content[key_start:index]
147
148 # Edge Case: Missing opening bracket for value
149 if index >= len(content) or content[index] != '[':
150 raise ValueError("properties without delimiter")
151
152 # Parse all values for this key
153 values = []
154 while index < len(content) and content[index] == '[':
155 index += 1 # Skip the opening bracket
156 value_start = index
157
158 # Parse the value until closing bracket
159 while index < len(content) and content[index] != ']':
160 index += 1
161
162 # Edge Case: Missing closing bracket
163 if index >= len(content) or content[index] != ']':
164 raise ValueError("properties without delimiter")
165
166 value = content[value_start:index]
167 # Process the value according to SGF text type rules
168 value = _process_text_value(value)
169 values.append(value)
170 index += 1 # Skip the closing bracket
171
172 # Add the property
173 properties[key] = values
174
175 return properties, index
176
177
178def _parse_sequential_node_from_position(content: str, start_index: int) -> tuple[SgfTree, int]:
179 """
180 Parse a sequential node starting from a specific position.
181
182 Args:
183 content: The content to parse
184 start_index: The index where the semicolon is located
185
186 Returns:
187 tuple: (SgfTree node, new index position)
188 """
189 # Edge Case: Must start with semicolon
190 if content[start_index] != ';':
191 raise ValueError("tree missing")
192
193 # Skip the initial semicolon
194 index = start_index + 1
195
196 # Parse properties of the current node
197 properties, index = _parse_properties(content, index)
198
199 # Parse children (if any)
200 children = []
201 while index < len(content) and content[index] not in [')', '(']:
202 if content[index] == ';': # Start of a sequential child node
203 # Parse the next sequential node
204 child_node, index = _parse_sequential_node_from_position(content, index)
205 children.append(child_node)
206 else:
207 # Edge Case: Unexpected character
208 raise ValueError("tree missing")
209
210 # Handle variations (parenthesized children)
211 while index < len(content) and content[index] == '(': # Start of a child tree (variation)
212 # Find the matching closing parenthesis
213 paren_count = 1
214 start = index
215 index += 1
216 while index < len(content) and paren_count > 0:
217 if content[index] == '(':
218 paren_count += 1
219 elif content[index] == ')':
220 paren_count -= 1
221 index += 1
222
223 # Edge Case: Malformed parentheses
224 if paren_count != 0:
225 raise ValueError("tree missing")
226
227 # Parse the child tree
228 child_content = content[start:index]
229 children.append(parse(child_content))
230
231 return SgfTree(properties=properties, children=children), index
232
233
234def _process_text_value(value: str) -> str:
235 """
236 Process a text value according to SGF text type rules.
237
238 Args:
239 value: The raw value string
240
241 Returns:
242 str: The processed value
243 """
244 result = []
245 i = 0
246 while i < len(value):
247 if value[i] == '\\':
248 # Handle escape character
249 i += 1
250 if i < len(value):
251 if value[i] == '\n':
252 # Newline immediately after \ is removed
253 pass # Just skip both the \ and the \n
254 else:
255 # Any other character after \ is inserted as-is
256 # This includes whitespace characters which follow their normal rules
257 if value[i] in ' \t\r':
258 # All whitespace other than newline converted to space
259 result.append(' ')
260 else:
261 # Non-whitespace character inserted as-is
262 result.append(value[i])
263 else:
264 # Backslash at end of string
265 result.append('\\')
266 else:
267 # Handle regular characters
268 char = value[i]
269 if char == '\n':
270 # Newlines remain as newlines
271 result.append(char)
272 elif char in ' \t\r':
273 # All whitespace other than newline converted to space
274 result.append(' ')
275 else:
276 # All other characters inserted as-is
277 result.append(char)
278 i += 1
279
280 return ''.join(result)
281
282# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree with no nodes, missing semicolon after opening parenthesis, malformed parentheses, unexpected character, key must be uppercase, empty key, missing opening bracket for value, missing closing bracket, escape at end of string
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.