lexicon

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid SGF format
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from SGF content.
70
71 Args:
72 content: SGF content without outer parentheses
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85
86 # Handle both variations (parentheses) and shorthand notation (semicolons)
87 while index < len(content):
88 if content[index] == '(':
89 # Parse variation - child in parentheses
90 paren_count = 1
91 start = index + 1
92 index += 1
93
94 while index < len(content) and paren_count > 0:
95 if content[index] == '(':
96 paren_count += 1
97 elif content[index] == ')':
98 paren_count -= 1
99 index += 1
100
101 # Edge Case: Unmatched parenthesis
102 if paren_count != 0:
103 raise ValueError("tree missing")
104
105 # Parse the child tree
106 child_content = content[start:index-1]
107 child_tree = _parse_tree(child_content)
108 children.append(child_tree)
109 elif content[index] == ';':
110 # Parse shorthand notation - sequential node
111 index += 1 # Skip semicolon
112
113 # Parse properties of the child node
114 child_properties, index = _parse_properties(content, index)
115
116 # Create child tree and recursively parse its children
117 child_tree = SgfTree(child_properties, [])
118
119 # Parse any children of this child node
120 while index < len(content):
121 if content[index] == '(':
122 # Parse variation for this child
123 paren_count = 1
124 start = index + 1
125 index += 1
126
127 while index < len(content) and paren_count > 0:
128 if content[index] == '(':
129 paren_count += 1
130 elif content[index] == ')':
131 paren_count -= 1
132 index += 1
133
134 if paren_count != 0:
135 raise ValueError("tree missing")
136
137 grandchild_content = content[start:index-1]
138 grandchild_tree = _parse_tree(grandchild_content)
139 child_tree.children.append(grandchild_tree)
140 elif content[index] == ';':
141 # Another sequential node - this becomes a child of the current child
142 index += 1
143 grandchild_properties, index = _parse_properties(content, index)
144 grandchild_tree = SgfTree(grandchild_properties, [])
145 child_tree.children.append(grandchild_tree)
146 else:
147 break
148
149 children.append(child_tree)
150 else:
151 # Invalid character
152 break
153
154 return SgfTree(properties, children)
155
156
157def _parse_properties(content: str, index: int) -> tuple[dict, int]:
158 """
159 Parse properties from SGF content starting at index.
160
161 Args:
162 content: SGF content
163 index: Starting index
164
165 Returns:
166 tuple: (properties dict, new index)
167 """
168 properties = {}
169
170 # Parse all properties in this node (stop at semicolon, opening parenthesis, or end)
171 while index < len(content) and content[index] not in ';(':
172 # Parse key
173 if not content[index].isalpha():
174 # If we hit a non-alphabetic character that's not a delimiter, it's an error
175 if content[index] not in ')':
176 raise ValueError("properties without delimiter")
177 break
178
179 key_start = index
180 while index < len(content) and content[index].isalpha():
181 index += 1
182
183 key = content[key_start:index]
184
185 # Edge Case: Property key must be uppercase
186 if not key.isupper():
187 raise ValueError("property must be in uppercase")
188
189 # Edge Case: Missing opening bracket
190 if index >= len(content) or content[index] != '[':
191 raise ValueError("properties without delimiter")
192
193 # Parse values
194 values = []
195 while index < len(content) and content[index] == '[':
196 index += 1 # Skip opening bracket
197
198 # Parse value
199 value, index = _parse_value(content, index)
200
201 # Edge Case: Missing closing bracket
202 if index >= len(content) or content[index] != ']':
203 raise ValueError("properties without delimiter")
204
205 index += 1 # Skip closing bracket
206 values.append(value)
207
208 # Edge Case: No values for property
209 if not values:
210 raise ValueError("properties without delimiter")
211
212 properties[key] = values
213
214 return properties, index
215
216
217def _parse_value(content: str, index: int) -> tuple[str, int]:
218 """
219 Parse a value according to SGF Text type rules.
220
221 Args:
222 content: SGF content
223 index: Starting index
224
225 Returns:
226 tuple: (parsed value, new index)
227 """
228 value = []
229
230 while index < len(content) and content[index] != ']':
231 if content[index] == '\\':
232 # Handle escape character
233 index += 1
234
235 # Edge Case: Escape at end of content
236 if index >= len(content):
237 # Treat backslash at end as literal backslash
238 value.append('\\')
239 break
240
241 char = content[index]
242
243 # If next character is newline, remove the backslash and newline
244 if char == '\n':
245 # Remove the backslash-newline sequence
246 pass
247 else:
248 # Add the escaped character as-is
249 value.append(char)
250
251 index += 1
252 else:
253 char = content[index]
254
255 # Convert whitespace (except newline) to spaces
256 if char != '\n' and char.isspace():
257 value.append(' ')
258 else:
259 value.append(char)
260
261 index += 1
262
263 return ''.join(value), index
264
265# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon, unmatched parenthesis, property key not uppercase, missing opening bracket, missing closing bracket, no values for property, escape at end of content
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.