unicode

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the root node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children
84 children = []
85 while index < len(content):
86 if content[index] == '(': # Start of a child tree
87 # Find the matching closing parenthesis
88 paren_count = 1
89 start = index
90 index += 1
91 while index < len(content) and paren_count > 0:
92 if content[index] == '(':
93 paren_count += 1
94 elif content[index] == ')':
95 paren_count -= 1
96 index += 1
97
98 # Parse the child tree
99 child_content = content[start:index]
100 children.append(parse(child_content))
101 elif content[index] == ';': # Single child shorthand
102 # Parse the child node starting from this semicolon
103 index += 1 # Skip the semicolon
104
105 # Parse properties of the child node
106 child_properties, index = _parse_properties(content, index)
107
108 # Create child tree with these properties
109 # For single child shorthand, we need to continue parsing children of this child
110 child_tree = SgfTree(child_properties)
111
112 # Parse any further children of this child node
113 temp_children = []
114 while index < len(content) and content[index] in '();':
115 if content[index] == '(': # Variation
116 # Find the matching closing parenthesis
117 paren_count = 1
118 start = index
119 index += 1
120 while index < len(content) and paren_count > 0:
121 if content[index] == '(':
122 paren_count += 1
123 elif content[index] == ')':
124 paren_count -= 1
125 index += 1
126
127 # Parse the child tree
128 child_content = content[start:index]
129 temp_children.append(parse(child_content))
130 elif content[index] == ';': # Another single child
131 # For single child shorthand, create a chain
132 index += 1 # Skip the semicolon
133 next_properties, index = _parse_properties(content, index)
134 next_child = SgfTree(next_properties)
135 temp_children.append(next_child)
136 else:
137 index += 1
138
139 if temp_children:
140 child_tree.children = temp_children
141
142 children.append(child_tree)
143 else:
144 index += 1
145
146 return SgfTree(properties, children)
147
148
149def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
150 """
151 Parse properties from the content starting at start_index.
152
153 Args:
154 content: The content to parse
155 start_index: The index to start parsing from
156
157 Returns:
158 tuple: A tuple containing the parsed properties dictionary and the next index
159 """
160 properties = {}
161 index = start_index
162
163 while index < len(content) and content[index] not in '();':
164 # Parse key
165 key_start = index
166 while index < len(content) and content[index].isalpha():
167 index += 1
168
169 key = content[key_start:index]
170
171 # Edge Case: Property key must be in uppercase
172 if not key.isupper():
173 raise ValueError("property must be in uppercase")
174
175 # Edge Case: Missing property values
176 if index >= len(content) or content[index] != '[':
177 raise ValueError("properties without delimiter")
178
179 # Parse values
180 values = []
181 while index < len(content) and content[index] == '[':
182 index += 1 # Skip opening bracket
183 value_start = index
184
185 # Find the closing bracket, taking into account escaped brackets
186 bracket_count = 1
187 while index < len(content) and bracket_count > 0:
188 if content[index] == '\\':
189 # Skip escape character and the next character
190 # The escaped character should not affect bracket counting
191 index += 2
192 elif content[index] == '[':
193 bracket_count += 1
194 index += 1
195 elif content[index] == ']':
196 bracket_count -= 1
197 index += 1
198 else:
199 index += 1
200
201 # Edge Case: Unmatched bracket
202 if bracket_count > 0:
203 raise ValueError("properties without delimiter")
204
205 value = content[value_start:index-1] # Exclude the closing bracket
206 values.append(_parse_text(value))
207
208 properties[key] = values
209
210 return properties, index
211
212
213def _parse_text(text: str) -> str:
214 """
215 Parse SGF Text type according to SGF specification.
216
217 Args:
218 text: The text to parse
219
220 Returns:
221 str: The parsed text
222 """
223 result = []
224 i = 0
225 while i < len(text):
226 if text[i] == '\\':
227 # Edge Case: Handle escape sequences
228 i += 1
229 if i < len(text):
230 char = text[i]
231 if char == 'n':
232 # \n is not a special escape sequence in SGF, it's just 'n'
233 result.append('n')
234 elif char == 't':
235 # \t is not a special escape sequence in SGF, it's just 't'
236 result.append('t')
237 elif char == '\\':
238 # Escaped backslash
239 result.append('\\')
240 elif char == ']':
241 # Escaped closing bracket
242 result.append(']')
243 elif char == '[':
244 # Escaped opening bracket
245 result.append('[')
246 elif char == ':':
247 # Escaped colon
248 result.append(':')
249 elif char.isspace():
250 # Whitespace after backslash
251 if char == '\n':
252 # Newlines are removed if they come immediately after a \
253 pass
254 else:
255 # Other whitespace characters are converted to spaces
256 result.append(' ')
257 else:
258 # Any other character after \ is inserted as-is
259 result.append(char)
260 i += 1
261 else:
262 # Edge Case: Trailing backslash
263 result.append('\\')
264 i += 1
265 else:
266 char = text[i]
267 if char == '\n':
268 # Newlines remain as newlines
269 result.append(char)
270 elif char.isspace() and char != '\n':
271 # All whitespace characters other than newline are converted to spaces
272 result.append(' ')
273 else:
274 result.append(char)
275 i += 1
276
277 return ''.join(result)
278# Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree with no nodes, Missing semicolon after opening parenthesis, Property key must be in uppercase, Missing property values, Unmatched bracket, Handle escape sequences, Trailing backslash
279# Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree with no nodes, Missing semicolon after opening parenthesis
280# Handled Edge Cases: Property key must be in uppercase, Missing property values, Unmatched bracket
281# Handled Edge Cases: Handle escape sequences, Trailing backslash
Test NameStatus
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.