unicode

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree with no nodes (only whitespace)
61 if not content.strip():
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70 """
71 # Edge Case: Content is empty
72 if not content:
73 raise ValueError("tree with no nodes")
74
75 nodes, remainder = _parse_nodes(content)
76
77 # Edge Case: No nodes parsed
78 if not nodes:
79 raise ValueError("tree with no nodes")
80
81 # Edge Case: Invalid tree structure - remaining content after parsing
82 if remainder.strip():
83 raise ValueError("tree missing")
84
85 # Convert the list of nodes to a tree structure
86 # The first node is the root, and each subsequent node is a child of the previous one
87 # unless there are variations (parentheses)
88 return _build_tree(nodes)
89
90
91def _parse_nodes(content: str) -> tuple[list, str]:
92 """
93 Parse a sequence of nodes from the content.
94 Returns a list of (properties, children) tuples and the remainder of the content.
95 """
96 nodes = []
97 i = 0
98
99 while i < len(content):
100 # Skip whitespace
101 if content[i].isspace():
102 i += 1
103 continue
104
105 # Parse a node
106 if content[i] == ';':
107 i += 1
108 properties = {}
109
110 # Parse properties
111 while i < len(content) and content[i].isalpha():
112 # Parse key
113 key_start = i
114 while i < len(content) and content[i].isalpha():
115 i += 1
116 key = content[key_start:i]
117
118 # Edge Case: Property key is not in uppercase
119 if not key.isupper():
120 raise ValueError("property must be in uppercase")
121
122 # Parse values
123 values = []
124 while i < len(content) and content[i] == '[':
125 i += 1 # Skip '['
126 value_start = i
127
128 # Parse value, handling escapes
129 while i < len(content) and content[i] != ']':
130 if content[i] == '\\':
131 i += 1 # Skip escape character but keep the next character
132 else:
133 i += 1
134
135 # Edge Case: Unclosed bracket
136 if i >= len(content) or content[i] != ']':
137 raise ValueError("properties without delimiter")
138
139 value = _parse_text_value(content[value_start:i])
140 values.append(value)
141 i += 1 # Skip ']'
142
143 # Edge Case: No values for property
144 if not values:
145 raise ValueError("properties without delimiter")
146
147 properties[key] = values
148
149 nodes.append((properties, []))
150
151 # Parse variations
152 elif content[i] == '(':
153 # If we have nodes, the variations are children of the last node
154 # Edge Case: Variation with no preceding node
155 if not nodes:
156 raise ValueError("tree missing")
157
158 i += 1 # Skip '('
159 variation_content_end = _find_matching_paren(content, i)
160
161 # Edge Case: Unclosed parenthesis
162 if variation_content_end == -1:
163 raise ValueError("tree missing")
164
165 variation_content = content[i:variation_content_end]
166 variation_tree = _parse_tree(variation_content)
167 nodes[-1][1].append(variation_tree) # Add as child of last node
168 i = variation_content_end + 1 # Move past the ')'
169
170 # If we encounter a ')' or end of content, we're done with this level
171 elif content[i] == ')':
172 break
173
174 else:
175 # Edge Case: Unexpected character
176 raise ValueError("tree missing")
177
178 return nodes, content[i:]
179
180
181def _parse_text_value(value: str) -> str:
182 """
183 Parse an SGF Text value, handling escapes and whitespace.
184 """
185 result = []
186 i = 0
187
188 while i < len(value):
189 if value[i] == '\\':
190 # Edge Case: Escape at end of string
191 if i + 1 >= len(value):
192 result.append('\\')
193 break
194
195 next_char = value[i + 1]
196
197 # If next character is a newline, remove the backslash and newline
198 if next_char == '\n':
199 i += 2 # Skip backslash and newline
200 continue
201
202 # For any other character after backslash, insert as-is
203 result.append(next_char)
204 i += 2
205 else:
206 char = value[i]
207 # Convert all whitespace (except newline) to spaces
208 if char != '\n' and char.isspace():
209 result.append(' ')
210 else:
211 result.append(char)
212 i += 1
213
214 return ''.join(result)
215
216
217def _find_matching_paren(content: str, start: int) -> int:
218 """
219 Find the matching parenthesis for the one at start-1.
220 Returns the index of the matching parenthesis, or -1 if not found.
221 """
222 level = 1
223 i = start
224
225 while i < len(content):
226 if content[i] == '(':
227 level += 1
228 elif content[i] == ')':
229 level -= 1
230 if level == 0:
231 return i
232 i += 1
233
234 return -1
235
236
237def _build_tree(nodes: list) -> SgfTree:
238 """
239 Build a tree from a list of (properties, children) tuples.
240 The first node is the root, and each subsequent node is a child of the previous one,
241 unless there are variations (children) which are handled separately.
242 """
243 # Edge Case: Empty nodes list
244 if not nodes:
245 raise ValueError("tree with no nodes")
246
247 # Create the root node
248 properties, children = nodes[0]
249 root = SgfTree(properties, children)
250
251 # Chain the remaining nodes as children
252 current = root
253 for properties, children in nodes[1:]:
254 new_node = SgfTree(properties, children)
255 current.children.append(new_node)
256 current = new_node
257
258 return root
259
260# Handled Edge Cases: Empty input, input without proper delimiters, input not in uppercase, input not forming a tree, input being a tree with no nodes, unclosed brackets, unclosed parentheses, unexpected characters, escape at end of string
Test NameStatus
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.