Kratos

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree missing semicolon
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree
87 child_tree, consumed = _parse_subtree(content[index:])
88 children.append(child_tree)
89 index += consumed
90 else:
91 # If we encounter a semicolon, it's a shorthand for a child node
92 if content[index] == ';':
93 # Parse the remaining as a single child
94 child_content = content[index:]
95 child_tree = _parse_tree(child_content)
96 children.append(child_tree)
97 break
98 else:
99 # Unexpected character
100 raise ValueError("tree missing")
101
102 return SgfTree(properties=properties, children=children)
103
104
105def _parse_subtree(content: str) -> tuple[SgfTree, int]:
106 """
107 Parse a subtree enclosed in parentheses.
108
109 Args:
110 content: The content starting with '('
111
112 Returns:
113 tuple: (parsed tree, number of characters consumed)
114 """
115 # Edge Case: Empty subtree
116 if len(content) < 2 or content[0] != '(':
117 raise ValueError("tree missing")
118
119 # Find matching closing parenthesis
120 depth = 1
121 index = 1
122 while index < len(content) and depth > 0:
123 if content[index] == '(':
124 depth += 1
125 elif content[index] == ')':
126 depth -= 1
127 index += 1
128
129 # Edge Case: Unmatched parentheses
130 if depth != 0:
131 raise ValueError("tree missing")
132
133 # Parse the content inside the parentheses
134 inner_content = content[1:index-1]
135
136 # Edge Case: Empty subtree content
137 if not inner_content:
138 raise ValueError("tree with no nodes")
139
140 tree = _parse_tree(inner_content)
141
142 return tree, index
143
144
145def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
146 """
147 Parse properties from the content starting at start_index.
148
149 Args:
150 content: The content to parse
151 start_index: The index to start parsing from
152
153 Returns:
154 tuple: (properties dictionary, next index to parse)
155 """
156 properties = {}
157 index = start_index
158
159 while index < len(content):
160 char = content[index]
161
162 # If we encounter a parenthesis or semicolon, it means properties are done
163 if char in '();':
164 break
165
166 # Parse property key
167 key_start = index
168 while index < len(content) and content[index].isalpha():
169 index += 1
170
171 key = content[key_start:index]
172
173 # Edge Case: Empty key
174 if not key:
175 raise ValueError("properties without delimiter")
176
177 # Edge Case: Key not in uppercase
178 if not key.isupper():
179 raise ValueError("property must be in uppercase")
180
181 # Parse values
182 values = []
183
184 # There must be at least one value
185 if index >= len(content) or content[index] != '[':
186 raise ValueError("properties without delimiter")
187
188 while index < len(content) and content[index] == '[':
189 index += 1 # Skip '['
190 value_start = index
191
192 # Parse value until closing bracket
193 while index < len(content) and content[index] != ']':
194 index += 1
195
196 # Edge Case: Unclosed bracket
197 if index >= len(content) or content[index] != ']':
198 raise ValueError("properties without delimiter")
199
200 value = content[value_start:index]
201 value = _unescape_value(value)
202 values.append(value)
203 index += 1 # Skip ']'
204
205 properties[key] = values
206
207 return properties, index
208
209
210def _unescape_value(value: str) -> str:
211 """
212 Unescape an SGF value according to SGF text type rules.
213
214 Args:
215 value: The escaped value
216
217 Returns:
218 str: The unescaped value
219 """
220 result = []
221 index = 0
222
223 while index < len(value):
224 if value[index] == '\\':
225 # Look at the next character
226 if index + 1 < len(value):
227 next_char = value[index + 1]
228
229 # If next character is a newline, remove the backslash and newline
230 if next_char == '\n':
231 index += 2 # Skip backslash and newline
232 continue
233 elif next_char.isspace():
234 # For whitespace characters (except newline), convert to space
235 result.append(' ')
236 index += 2
237 else:
238 # For non-whitespace characters, preserve the backslash and insert the character
239 result.append('\\')
240 result.append(next_char)
241 index += 2
242 else:
243 # Backslash at end of string, just add the backslash
244 result.append('\\')
245 index += 1
246 else:
247 # Convert whitespace (except newline) to space
248 if value[index] != '\n' and value[index].isspace():
249 result.append(' ')
250 else:
251 result.append(value[index])
252 index += 1
253
254 return ''.join(result)
255
256# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty tree, missing semicolon, unmatched parentheses, empty subtree content, empty key, key not in uppercase, missing delimiter for properties, unclosed bracket
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.