agent

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid SGF format
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Parse the content inside the outermost parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from its content string.
70
71 Args:
72 content: The content of a tree (without outer parentheses)
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Edge Case: Content doesn't start with semicolon
78 if not content.startswith(';'):
79 raise ValueError("tree with no nodes")
80
81 # Find the first node and any children
82 node_end = 1 # Start after the semicolon
83
84 # Parse properties of the first node
85 properties = {}
86 children = []
87 print(f"DEBUG: Parsing content: {repr(content)}")
88
89 # Parse properties
90 while node_end < len(content) and content[node_end] != '(' and content[node_end] != ';':
91 # Skip whitespace
92 while node_end < len(content) and content[node_end].isspace():
93 node_end += 1
94
95 # Check if we've reached the end of properties
96 if node_end >= len(content) or content[node_end] in '();':
97 break
98
99 # Parse a property
100 prop_start = node_end
101
102 # Find the property key (must be uppercase)
103 while node_end < len(content) and content[node_end].isalpha():
104 if not content[node_end].isupper():
105 raise ValueError("property must be in uppercase")
106 node_end += 1
107
108 # Edge Case: No property key found
109 if node_end == prop_start:
110 raise ValueError("properties without delimiter")
111
112 key = content[prop_start:node_end]
113
114 # Edge Case: Missing opening bracket
115 if node_end >= len(content) or content[node_end] != '[':
116 raise ValueError("properties without delimiter")
117
118 # Parse all values for this key
119 values = []
120 while node_end < len(content) and content[node_end] == '[':
121 node_end += 1 # Skip opening bracket
122 value_start = node_end
123
124 # Parse value, handling escapes
125 print(f"DEBUG: Starting value parsing from position {value_start}")
126 while node_end < len(content):
127 print(f"DEBUG: Checking character at position {node_end}: {repr(content[node_end])}")
128 if content[node_end] == ']':
129 # Check if this bracket is escaped
130 # Count consecutive backslashes immediately before this bracket
131 backslash_count = 0
132 check_pos = node_end - 1
133 while check_pos >= value_start and content[check_pos] == '\\':
134 backslash_count += 1
135 check_pos -= 1
136 print(f"DEBUG: Found ] at position {node_end}, backslash_count={backslash_count}, value_start={value_start}")
137 print(f"DEBUG: Characters before: {repr(content[value_start:node_end])}")
138 # If odd number of backslashes, this bracket is escaped
139 # But an escaped bracket can still serve as the closing bracket
140 print(f"DEBUG: Found real closing bracket")
141 break # This is the real closing bracket, whether escaped or not
142 else:
143 node_end += 1
144
145 # Edge Case: Missing closing bracket
146 if node_end >= len(content) or content[node_end] != ']':
147 raise ValueError("properties without delimiter")
148
149 value = _parse_value(content[value_start:node_end])
150 values.append(value)
151 node_end += 1 # Skip closing bracket
152
153 properties[key] = values
154 print(f"DEBUG: Added property {key} with values {values}")
155 print(f"DEBUG: node_end now at position {node_end}, remaining: {repr(content[node_end:])}")
156
157 # Parse children
158 while node_end < len(content) and content[node_end] == '(':
159 # Find the matching closing parenthesis
160 paren_count = 1
161 child_start = node_end
162 node_end += 1
163
164 while node_end < len(content) and paren_count > 0:
165 if content[node_end] == '(':
166 paren_count += 1
167 elif content[node_end] == ')':
168 paren_count -= 1
169 node_end += 1
170
171 # Edge Case: Mismatched parentheses
172 if paren_count != 0:
173 raise ValueError("tree missing")
174
175 # Parse the child tree
176 child_content = content[child_start+1:node_end-1]
177 child_tree = _parse_tree(child_content)
178 children.append(child_tree)
179
180 # Handle consecutive nodes (shorthand notation)
181 if node_end < len(content) and content[node_end] == ';':
182 # Parse the next node as a child
183 remaining_content = content[node_end:]
184 next_node = _parse_tree(remaining_content)
185 # If we already have children, this next node becomes a child of the last child
186 if children:
187 children[-1] = SgfTree(children[-1].properties, children[-1].children + [next_node])
188 else:
189 children.append(next_node)
190
191 return SgfTree(properties, children)
192
193
194def _parse_value(value_str: str) -> str:
195 """
196 Parse an SGF text value, handling escapes and whitespace.
197
198 Args:
199 value_str: The raw value string from between brackets
200
201 Returns:
202 str: The processed value
203 """
204 result = []
205 i = 0
206 while i < len(value_str):
207 if value_str[i] == '\\':
208 # Handle escape sequences
209 i += 1
210 if i < len(value_str):
211 char = value_str[i]
212 # According to SGF spec:
213 # - Newlines are removed if they come immediately after a '\\'
214 # - All whitespace characters other than newline are converted to spaces
215 # - '\\' is the escape character. Any non-whitespace character after '\\' is inserted as-is.
216 # - Any whitespace character after '\\' follows the above rules.
217
218 if char == '\n':
219 # Newline immediately after backslash is removed
220 i += 1
221 continue
222 elif char.isspace():
223 # Other whitespace after backslash becomes space
224 result.append(' ')
225 i += 1
226 else:
227 # Any non-whitespace character after backslash is inserted as-is
228 result.append(char)
229 i += 1
230 else:
231 # Trailing backslash
232 result.append('\\')
233 i += 1
234 else:
235 char = value_str[i]
236 if char == '\n':
237 # Regular newlines are preserved
238 result.append(char)
239 elif char.isspace():
240 # All other whitespace characters become spaces
241 result.append(' ')
242 else:
243 # Regular character
244 result.append(char)
245 i += 1
246
247 return ''.join(result)
248
249# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis, content doesn't start with semicolon, no property key found, missing opening bracket, missing closing bracket, mismatched parentheses, trailing backslash
250# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis, content doesn't start with semicolon, no property key found, missing opening bracket, missing closing bracket, mismatched parentheses
251# Handled Edge Cases: Backslash handling, whitespace conversion, newline handling
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.