uv

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the parsed SGF data
37
38 Raises:
39 ValueError: If the input string is invalid SGF format
40 """
41 # Edge Case: Empty input or doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Parse the tree
57 tree, _ = _parse_tree(content, 0)
58 return tree
59
60
61def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
62 """
63 Parse a tree from the content string starting at the given index.
64
65 Args:
66 content: The string content to parse
67 index: The starting index for parsing
68
69 Returns:
70 tuple: A tuple containing the parsed SgfTree and the next index to parse
71 """
72 # Edge Case: No semicolon to start node
73 if index >= len(content) or content[index] != ';':
74 raise ValueError("tree with no nodes")
75
76 index += 1 # Skip the semicolon
77
78 # Parse properties of the current node
79 properties = {}
80 while index < len(content) and content[index] not in ['(', ')', ';']:
81 prop, index = _parse_property(content, index)
82 key, values = prop
83 properties[key] = values
84
85 # Parse children
86 children = []
87
88 # Handle children in parentheses
89 while index < len(content) and content[index] == '(':
90 # Parse a child tree
91 child, index = _parse_subtree(content, index)
92 children.append(child)
93
94 # Handle sequential nodes (separated by semicolons)
95 while index < len(content) and content[index] == ';':
96 # Parse the next sequential node as a child
97 child, index = _parse_tree(content, index)
98 children.append(child)
99
100 return SgfTree(properties, children), index
101
102
103def _parse_subtree(content: str, index: int) -> tuple[SgfTree, int]:
104 """
105 Parse a subtree from the content string starting at the given index.
106
107 Args:
108 content: The string content to parse
109 index: The starting index for parsing
110
111 Returns:
112 tuple: A tuple containing the parsed SgfTree and the next index to parse
113 """
114 # Edge Case: No opening parenthesis
115 if index >= len(content) or content[index] != '(':
116 raise ValueError("tree missing")
117
118 index += 1 # Skip the opening parenthesis
119
120 # Parse the tree inside the parentheses
121 tree, index = _parse_tree(content, index)
122
123 # Edge Case: No closing parenthesis
124 if index >= len(content) or content[index] != ')':
125 raise ValueError("tree missing")
126
127 index += 1 # Skip the closing parenthesis
128
129 return tree, index
130
131
132def _parse_property(content: str, index: int) -> tuple[tuple[str, list[str]], int]:
133 """
134 Parse a property from the content string starting at the given index.
135
136 Args:
137 content: The string content to parse
138 index: The starting index for parsing
139
140 Returns:
141 tuple: A tuple containing the parsed property (key, values) and the next index to parse
142 """
143 # Parse the property key
144 key_start = index
145 while index < len(content) and content[index].isalpha():
146 index += 1
147
148 key = content[key_start:index]
149
150 # Edge Case: No key found
151 if index == key_start:
152 raise ValueError("properties without delimiter")
153
154 # Edge Case: Property not in uppercase (takes precedence over delimiter error)
155 if not key.isupper():
156 raise ValueError("property must be in uppercase")
157
158 # Parse the property values
159 values = []
160
161 # Edge Case: No opening bracket after key
162 if index >= len(content) or content[index] != '[':
163 raise ValueError("properties without delimiter")
164
165 while index < len(content) and content[index] == '[':
166 index += 1 # Skip the opening bracket
167
168 # Parse the value
169 value, index = _parse_value(content, index)
170 values.append(value)
171
172 # Edge Case: No closing bracket
173 if index >= len(content) or content[index] != ']':
174 raise ValueError("properties without delimiter")
175
176 index += 1 # Skip the closing bracket
177
178 return (key, values), index
179
180
181def _parse_value(content: str, index: int) -> tuple[str, int]:
182 """
183 Parse a value from the content string starting at the given index.
184 This handles SGF Text type escaping rules.
185
186 Args:
187 content: The string content to parse
188 index: The starting index for parsing
189
190 Returns:
191 tuple: A tuple containing the parsed value and the next index to parse
192 """
193 value = []
194 while index < len(content) and content[index] != ']':
195 if content[index] == '\\':
196 # Handle escape character
197 index += 1
198
199 # Edge Case: Escape at end of content
200 if index >= len(content):
201 # Treat backslash at end as literal backslash
202 value.append('\\')
203 break
204
205 char = content[index]
206
207 # If next character is newline, remove it and replace with space
208 if char == '\n':
209 # Newline immediately after \ is removed and replaced with space
210 value.append(' ')
211 elif char.isspace():
212 # Other whitespace characters after \ are converted to spaces
213 value.append(' ')
214 else:
215 # Any non-whitespace character after \ is inserted as-is
216 value.append('\\')
217 value.append(char)
218 elif content[index] == '\n':
219 # Newlines remain as newlines (unless escaped)
220 value.append('\n')
221 elif content[index].isspace() and content[index] != '\n':
222 # All whitespace characters other than newline are converted to spaces
223 value.append(' ')
224 else:
225 # Regular character
226 value.append(content[index])
227 index += 1
228
229 return ''.join(value), index
230# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, missing semicolon, property not in uppercase, no key found, no opening bracket after key, no closing bracket, escape at end of content
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.