xas

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Parse the content inside the outermost parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with semicolon (node marker)
61 if not content.startswith(';'):
62 raise ValueError("tree with no nodes")
63
64
65
66
67
68
69
70 # Parse the tree
71 tree, _ = _parse_tree(content)
72 return tree
73
74
75def _parse_tree(content: str) -> tuple[SgfTree, int]:
76 """
77 Parse a tree from the content string.
78
79 Args:
80 content: The content string to parse
81
82 Returns:
83 tuple: (SgfTree, index) where index is the position after the parsed tree
84 """
85 # Edge Case: Content is empty
86 if not content:
87 raise ValueError("tree with no nodes")
88
89 # Edge Case: Tree doesn't start with semicolon
90 if not content.startswith(';'):
91 raise ValueError("tree with no nodes")
92
93 # Parse the first node
94 node, index = _parse_node(content, 1) # Start after the semicolon
95
96 # Create the root tree
97 root = SgfTree(properties=node, children=[])
98
99 # Parse any child trees or sequential nodes
100 current_node = root
101 while index < len(content):
102 if content[index] == '(':
103 # Parse variation (child tree in parentheses)
104 child, child_index = _parse_tree(content[index+1:])
105 current_node.children.append(child)
106 # Find the matching closing parenthesis
107 paren_count = 1
108 i = index + 1
109 while i < len(content) and paren_count > 0:
110 if content[i] == '(':
111 paren_count += 1
112 elif content[i] == ')':
113 paren_count -= 1
114 i += 1
115 index = i
116 elif content[index] == ';':
117 # Parse sequential node (child node after semicolon)
118 child_node, child_index = _parse_node(content, index + 1)
119 new_node = SgfTree(properties=child_node, children=[])
120 current_node.children.append(new_node)
121 current_node = new_node # Move to the new node for next sequential parsing
122 index = child_index
123 else:
124 # Invalid character
125 break
126
127 return root, index
128
129
130def _parse_node(content: str, start_index: int) -> tuple[dict, int]:
131 """
132 Parse a node from the content string.
133
134 Args:
135 content: The content string to parse
136 start_index: The index to start parsing from
137
138 Returns:
139 tuple: (properties_dict, index) where index is the position after the parsed node
140 """
141 properties = {}
142 index = start_index
143
144 # Parse properties until we hit a ')' or '(' or end of string
145 while index < len(content) and content[index] not in [')', '(']:
146 # If we encounter a semicolon, it's the start of a new node
147 if content[index] == ';':
148 break
149
150 # Parse property
151 prop, prop_index = _parse_property(content, index)
152 key, values = prop
153
154 # Edge Case: Property key is not uppercase
155 if not key.isupper():
156 raise ValueError("property must be in uppercase")
157
158 properties[key] = values
159 index = prop_index
160
161 return properties, index
162
163
164def _parse_property(content: str, start_index: int) -> tuple[tuple[str, list[str]], int]:
165 """
166 Parse a property from the content string.
167
168 Args:
169 content: The content string to parse
170 start_index: The index to start parsing from
171
172 Returns:
173 tuple: ((key, values), index) where index is the position after the parsed property
174 """
175 # Parse property key (uppercase letters)
176 key_start = start_index
177 while start_index < len(content) and content[start_index].isalpha():
178 start_index += 1
179
180 # Edge Case: No property key found
181 if start_index == key_start:
182 raise ValueError("properties without delimiter")
183
184 key = content[key_start:start_index]
185
186 # Edge Case: Property key is not uppercase
187 if not key.isupper():
188 raise ValueError("property must be in uppercase")
189
190 values = []
191
192 # Parse values
193 while start_index < len(content) and content[start_index] == '[':
194 value, start_index = _parse_value(content, start_index + 1) # Skip the opening bracket
195 values.append(value)
196
197 # Skip the closing bracket
198 if start_index >= len(content) or content[start_index] != ']':
199 raise ValueError("properties without delimiter")
200 start_index += 1
201
202 # Edge Case: No values found for property
203 if not values:
204 raise ValueError("properties without delimiter")
205
206 return (key, values), start_index
207
208
209def _parse_value(content: str, start_index: int) -> tuple[str, int]:
210 """
211 Parse a value from the content string according to SGF Text type rules.
212
213 Args:
214 content: The content string to parse
215 start_index: The index to start parsing from
216
217 Returns:
218 tuple: (value_string, index) where index is the position after the parsed value
219 """
220 value = []
221 index = start_index
222
223 while index < len(content) and content[index] != ']':
224 char = content[index]
225
226 if char == '\\': # Escape character
227 index += 1
228 # Edge Case: Escape at end of content
229 if index >= len(content):
230 raise ValueError("properties without delimiter")
231
232 next_char = content[index]
233
234 # If next character is newline, skip both \ and newline
235 if next_char == '\n':
236 index += 1
237 continue
238 # If next character is whitespace, convert to space
239 elif next_char.isspace():
240 value.append(' ')
241 # Otherwise, add the character as-is
242 else:
243 value.append(next_char)
244 elif char == '\n': # Newline
245 # Check if previous character was backslash
246 if index > start_index and content[index-1] == '\\':
247 # Newline after backslash is removed
248 pass
249 else:
250 value.append(char)
251 elif char.isspace() and char != '\n': # Other whitespace
252 value.append(' ')
253 else:
254 value.append(char)
255
256 index += 1
257
258 # Edge Case: Unclosed value bracket
259 if index >= len(content) or content[index] != ']':
260 raise ValueError("properties without delimiter")
261
262 return ''.join(value), index
263
264# Handled Edge Cases: Empty input, missing parentheses, empty tree, missing semicolon,
265# property key not uppercase, no property key, no values for property,
266# escape at end of content, unclosed value bracket
267# Handled Edge Cases: Empty content, missing semicolon at start
268# Handled Edge Cases: Semicolon indicating new node, property key not uppercase
269# Handled Edge Cases: No property key, property key not uppercase, no values for property
270# Handled Edge Cases: Escape at end of content, newline after backslash, whitespace conversion, unclosed value bracket
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.