lexicon

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial ';'
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85
86 # Handle both variations (parentheses) and shorthand notation (semicolons)
87 while index < len(content):
88 if content[index] == '(':
89 # Handle variations - multiple children in parentheses
90 paren_count = 1
91 start = index + 1
92 index += 1
93
94 while index < len(content) and paren_count > 0:
95 if content[index] == '(':
96 paren_count += 1
97 elif content[index] == ')':
98 paren_count -= 1
99 index += 1
100
101 # Edge Case: Unmatched parentheses
102 if paren_count != 0:
103 raise ValueError("tree missing")
104
105 # Parse the subtree
106 subtree_content = content[start:index-1]
107 if subtree_content.startswith(';'):
108 children.append(_parse_tree(subtree_content))
109 else:
110 # Edge Case: Malformed subtree
111 raise ValueError("tree missing")
112 elif content[index] == ';':
113 # Handle shorthand notation - single child
114 # Create a new tree starting from this semicolon
115 child_content = content[index:]
116 children.append(_parse_tree(child_content))
117 break # No more content after this child
118 else:
119 # Invalid character - should be either '(' or ';'
120 break
121
122 return SgfTree(properties, children)
123
124
125def _parse_properties(content: str, index: int) -> tuple[dict, int]:
126 """
127 Parse properties from the content starting at index.
128
129 Args:
130 content: The content to parse
131 index: The starting index
132
133 Returns:
134 tuple: A tuple of (properties_dict, next_index)
135 """
136 properties = {}
137
138 while index < len(content) and content[index].isalpha():
139 # Parse key
140 key_start = index
141 while index < len(content) and content[index].isalpha():
142 # Edge Case: Property key is not uppercase
143 if not content[index].isupper():
144 raise ValueError("property must be in uppercase")
145 index += 1
146
147 key = content[key_start:index]
148
149 # Edge Case: Property without values
150 if index >= len(content) or content[index] != '[':
151 raise ValueError("properties without delimiter")
152
153 # Parse values
154 values = []
155 while index < len(content) and content[index] == '[':
156 index += 1 # Skip '['
157 value_start = index
158
159 # Parse value, handling escapes
160 while index < len(content) and content[index] != ']':
161 if content[index] == '\\':
162 index += 1 # Skip escape character
163 if index < len(content):
164 index += 1 # Include the escaped character
165 else:
166 index += 1
167
168 # Edge Case: Unmatched bracket
169 if index >= len(content):
170 raise ValueError("properties without delimiter")
171
172 value = content[value_start:index]
173 # Process the value according to SGF text type rules
174 value = _process_text_value(value)
175 values.append(value)
176 index += 1 # Skip ']'
177
178 # Check if there are more values for this property (starting with '[')
179 # or if we should move to the next property (starting with alphabetic character)
180 while index < len(content) and content[index] == '[':
181 index += 1 # Skip '['
182 value_start = index
183
184 # Parse value, handling escapes
185 while index < len(content) and content[index] != ']':
186 if content[index] == '\\':
187 index += 1 # Skip escape character
188 if index < len(content):
189 index += 1 # Include the escaped character
190 else:
191 index += 1
192
193 # Edge Case: Unmatched bracket
194 if index >= len(content):
195 raise ValueError("properties without delimiter")
196
197 value = content[value_start:index]
198 # Process the value according to SGF text type rules
199 value = _process_text_value(value)
200 values.append(value)
201 index += 1 # Skip ']'
202
203 # If we've reached an alphabetic character, it's a new property
204 # Break out of the values loop to parse the new property
205 if index < len(content) and content[index].isalpha():
206 break
207
208 # Edge Case: No values for property
209 if not values:
210 raise ValueError("properties without delimiter")
211
212 properties[key] = values
213
214 return properties, index
215
216
217def _process_text_value(value: str) -> str:
218 """
219 Process a text value according to SGF text type rules.
220
221 Args:
222 value: The raw text value
223
224 Returns:
225 str: The processed text value
226 """
227 result = []
228 i = 0
229
230 while i < len(value):
231 if value[i] == '\\':
232 # Edge Case: Escape sequence at end of string
233 if i + 1 >= len(value):
234 result.append('\\')
235 break
236
237 next_char = value[i+1]
238
239 # If next character is newline, remove the backslash and newline
240 if next_char == '\n':
241 i += 2 # Skip backslash and newline
242 continue
243 else:
244 # Add the next character as-is
245 result.append(next_char)
246 i += 2
247 elif value[i] in (' ', '\t', '\r'):
248 # Convert whitespace (except newline) to space
249 result.append(' ')
250 i += 1
251 else:
252 result.append(value[i])
253 i += 1
254
255 return ''.join(result)
256
257# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Unmatched parentheses, Malformed subtree, Property key is not uppercase, Property without values, Unmatched bracket, No values for property, Escape sequence at end of string
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.