xas

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input string is not valid SGF format
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Parse the content inside the outermost parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from its content string.
70
71 Args:
72 content: The content of the tree (without outer parentheses)
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 content = content[1:]
79
80 # Parse the root node
81 properties, remaining = _parse_properties(content)
82
83 children = []
84
85 # Parse children
86 if remaining:
87 if remaining.startswith('('):
88 # Parse variations (multiple children)
89 while remaining and remaining.startswith('('):
90 subtree_content, remaining = _parse_subtree(remaining)
91 child = _parse_tree(subtree_content)
92 children.append(child)
93 elif remaining.startswith(';'):
94 # Parse sequential nodes (chain of single children)
95 current_children = children
96 current_node = None
97
98 while remaining and remaining.startswith(';'):
99 # Parse a sequential node
100 remaining = remaining[1:] # Skip semicolon
101 child_properties, remaining = _parse_properties(remaining)
102 new_node = SgfTree(properties=child_properties, children=[])
103
104 if current_node is None:
105 # First sequential node - add to root's children
106 current_children.append(new_node)
107 else:
108 # Subsequent sequential nodes - add as child of previous node
109 current_node.children.append(new_node)
110
111 current_node = new_node
112
113 return SgfTree(properties=properties, children=children)
114
115
116def _parse_properties(content: str) -> tuple[dict, str]:
117 """
118 Parse properties from the content string.
119
120 Args:
121 content: String containing properties
122
123 Returns:
124 tuple: (properties_dict, remaining_content)
125 """
126 properties = {}
127 remaining = content
128
129 while remaining and remaining[0].isalpha():
130 # Parse property key
131 key_end = 0
132 while key_end < len(remaining) and remaining[key_end].isalpha():
133 key_end += 1
134
135 key = remaining[:key_end]
136
137 # Edge Case: Property key not in uppercase
138 if not key.isupper():
139 raise ValueError("property must be in uppercase")
140
141 remaining = remaining[key_end:]
142
143 # Edge Case: Missing opening bracket for property value
144 if not remaining.startswith('['):
145 raise ValueError("properties without delimiter")
146
147 # Parse all values for this key
148 values = []
149 while remaining.startswith('['):
150 remaining = remaining[1:] # Skip opening bracket
151 value_end = 0
152
153 # Find the closing bracket, handling escaped brackets
154 while value_end < len(remaining):
155 if remaining[value_end] == ']':
156 # Count consecutive backslashes before this bracket
157 backslash_count = 0
158 pos = value_end - 1
159 while pos >= 0 and remaining[pos] == '\\':
160 backslash_count += 1
161 pos -= 1
162
163 # If even number of backslashes, bracket is not escaped
164 # If odd number of backslashes, bracket is escaped
165 if backslash_count % 2 == 0:
166 break
167 value_end += 1
168
169 # Edge Case: Missing closing bracket
170 if value_end >= len(remaining):
171 raise ValueError("properties without delimiter")
172
173 value = remaining[:value_end]
174 values.append(_unescape_text(value))
175 remaining = remaining[value_end+1:] # Skip closing bracket and move to next
176
177 properties[key] = values
178
179 return properties, remaining
180
181
182def _parse_subtree(content: str) -> tuple[str, str]:
183 """
184 Parse a subtree from its content string.
185
186 Args:
187 content: String starting with '('
188
189 Returns:
190 tuple: (subtree_content, remaining_content)
191 """
192 # Edge Case: Not starting with '('
193 if not content.startswith('('):
194 raise ValueError("tree missing")
195
196 balance = 0
197 i = 0
198
199 while i < len(content):
200 if content[i] == '(':
201 balance += 1
202 elif content[i] == ')':
203 balance -= 1
204 # Edge Case: Found matching closing parenthesis
205 if balance == 0:
206 return content[1:i], content[i+1:]
207 i += 1
208
209 # Edge Case: Unmatched parentheses
210 raise ValueError("tree missing")
211
212
213def _unescape_text(text: str) -> str:
214 """
215 Unescape SGF text according to SGF specification.
216
217 Args:
218 text: The escaped text
219
220 Returns:
221 str: The unescaped text
222 """
223 result = []
224 i = 0
225
226 while i < len(text):
227 if text[i] == '\\':
228 i += 1
229 # Edge Case: Backslash at end of string
230 if i >= len(text):
231 result.append('\\')
232 break
233
234 char = text[i]
235 # Handle newline removal after backslash
236 if char == '\n':
237 # Newline is removed if immediately after backslash
238 pass
239 # Convert other whitespace to space
240 elif char in ' \t\r':
241 result.append(' ')
242 else:
243 # Insert non-whitespace characters as-is
244 result.append(char)
245 else:
246 char = text[i]
247 # Convert whitespace (except newline) to space
248 if char in ' \t\r':
249 result.append(' ')
250 else:
251 result.append(char)
252 i += 1
253
254 return ''.join(result)
255# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis, property key not in uppercase, missing opening bracket for property value, missing closing bracket, not starting with '(', unmatched parentheses, backslash at end of string
256# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon after opening parenthesis
257# Handled Edge Cases: Property key not in uppercase, missing opening bracket for property value, missing closing bracket
258# Handled Edge Cases: Not starting with '(', unmatched parentheses
259# Handled Edge Cases: Backslash at end of string
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.