chris

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree missing")
59
60 return _parse_tree_content(content)
61
62
63def _parse_tree_content(content: str) -> SgfTree:
64 """
65 Parse the content of a tree (without the outer parentheses).
66
67 Args:
68 content: The content of the tree
69
70 Returns:
71 SgfTree: The parsed tree
72 """
73 # Find the first node
74 if not content.startswith(';'):
75 raise ValueError("tree missing")
76
77 # Parse the first node
78 node_end = _find_node_end(content, 1)
79 node_content = content[1:node_end]
80 properties = _parse_properties(node_content)
81
82 # Parse children
83 children_content = content[node_end:]
84 children = _parse_children(children_content)
85
86 return SgfTree(properties=properties, children=children)
87
88
89def _find_node_end(content: str, start: int) -> int:
90 """
91 Find the end of a node in the content.
92
93 Args:
94 content: The content to search in
95 start: The start position
96
97 Returns:
98 int: The position after the end of the node
99 """
100 i = start
101 while i < len(content):
102 # If we encounter a ';' or '(' or ')', it's the end of the current node
103 if content[i] in ';()':
104 break
105 i += 1
106 return i
107
108
109def _parse_properties(content: str) -> dict:
110 """
111 Parse properties from a node content.
112
113 Args:
114 content: The content of a node
115
116 Returns:
117 dict: The parsed properties
118 """
119 properties = {}
120 i = 0
121
122 while i < len(content):
123 # Skip whitespace
124 if content[i].isspace():
125 i += 1
126 continue
127
128 # Parse key
129 key_start = i
130 while i < len(content) and content[i].isalpha():
131 i += 1
132
133 # Edge Case: No key found
134 if i == key_start:
135 raise ValueError("properties without delimiter")
136
137 key = content[key_start:i]
138
139 # Edge Case: Key is not uppercase
140 if key != key.upper():
141 raise ValueError("property must be in uppercase")
142
143 # Parse values
144 values = []
145
146 # Edge Case: Key not followed by '['
147 if i >= len(content) or content[i] != '[':
148 raise ValueError("properties without delimiter")
149
150 while i < len(content) and content[i] == '[':
151 i += 1 # Skip '['
152 value_start = i
153
154 # Parse value
155 value_chars = []
156 while i < len(content) and content[i] != ']':
157 if content[i] == '\\':
158 # Handle escape character
159 i += 1
160 if i < len(content):
161 if content[i] == 'n' and i+1 < len(content) and content[i+1] == ']':
162 # Special case: \n] should be treated as \n (which is just n) followed by ]
163 value_chars.append('\\')
164 value_chars.append('n')
165 elif content[i] == '\n':
166 # Escaped newline - remove if immediately followed by \
167 # But according to spec, newlines are removed if they come immediately after \
168 pass # Skip the newline
169 else:
170 value_chars.append(content[i])
171 else:
172 # Edge Case: Escape character at end of input
173 raise ValueError("properties without delimiter")
174 else:
175 # Handle regular characters
176 # All whitespace other than newline is converted to spaces
177 if content[i] != '\n' and content[i].isspace():
178 value_chars.append(' ')
179 else:
180 value_chars.append(content[i])
181 i += 1
182
183 # Edge Case: Unclosed bracket
184 if i >= len(content) or content[i] != ']':
185 raise ValueError("properties without delimiter")
186
187 i += 1 # Skip ']'
188 values.append(''.join(value_chars))
189
190 properties[key] = values
191
192 return properties
193
194
195def _parse_children(content: str) -> list:
196 """
197 Parse children from content.
198
199 Args:
200 content: The content containing children
201
202 Returns:
203 list: The parsed children
204 """
205 children = []
206 i = 0
207
208 while i < len(content):
209 # Skip whitespace
210 if content[i].isspace():
211 i += 1
212 continue
213
214 # If we find '(', it's the start of a child
215 if content[i] == '(':
216 i += 1 # Skip '('
217
218 # Find matching ')'
219 paren_count = 1
220 child_start = i
221
222 while i < len(content) and paren_count > 0:
223 if content[i] == '(':
224 paren_count += 1
225 elif content[i] == ')':
226 paren_count -= 1
227 i += 1
228
229 # Edge Case: Unmatched parenthesis
230 if paren_count != 0:
231 raise ValueError("tree missing")
232
233 # Parse the child content
234 child_content = content[child_start:i-1]
235
236 # Edge Case: Empty child
237 if not child_content:
238 raise ValueError("tree with no nodes")
239
240 # Parse the child
241 child = _parse_tree_content(child_content)
242 children.append(child)
243 else:
244 # Edge Case: Unexpected character
245 raise ValueError("tree missing")
246
247 return children
248# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, missing semicolon, no key found, key not uppercase, key not followed by '[', unclosed bracket, escape character at end of input, unmatched parenthesis, empty child, unexpected character
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.