fortitude

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or input that doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Input that starts with '(' but doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Content that doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree with no nodes")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse the content of an SGF tree.
66
67 Args:
68 content: The content of the SGF tree (without outer parentheses)
69
70 Returns:
71 SgfTree: A tree structure representing the SGF data
72 """
73 # Skip the initial ';'
74 index = 1
75
76 # Parse properties of the current node
77 properties, index = _parse_properties(content, index)
78
79 # Parse children - can be either variations in parentheses or sequential nodes with semicolons
80 children = []
81
82 # Handle sequential nodes (with semicolons)
83 if index < len(content) and content[index] == ';':
84 # Create a chain of nodes for sequential semicolon-separated nodes
85 current_children = children
86 while index < len(content) and content[index] == ';':
87 # Parse the next node
88 node, index = _parse_node(content, index)
89 current_children.append(node)
90 # If this node has children, we need to continue chaining from its last child
91 if node.children:
92 # Find the deepest child in the chain
93 deepest = node
94 while deepest.children:
95 deepest = deepest.children[-1]
96 current_children = deepest.children
97
98 # Handle variations (in parentheses)
99 while index < len(content) and content[index] == '(':
100 # Find the matching closing parenthesis
101 paren_count = 1
102 start = index
103 index += 1
104
105 while index < len(content) and paren_count > 0:
106 if content[index] == '(':
107 paren_count += 1
108 elif content[index] == ')':
109 paren_count -= 1
110 index += 1
111
112 # Extract the subtree content (including parentheses)
113 subtree_content = content[start:index]
114
115 # Parse the subtree
116 child_tree = parse(subtree_content)
117 children.append(child_tree)
118
119 return SgfTree(properties, children)
120
121
122def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
123 """
124 Parse properties from the content starting at the given index.
125
126 Args:
127 content: The content to parse
128 start_index: The index to start parsing from
129
130 Returns:
131 tuple: A tuple containing the parsed properties dictionary and the next index
132 """
133 properties = {}
134 index = start_index
135
136 # Parse properties until we hit a ';' (new node), '(' (variation), or ')' (end)
137 while index < len(content) and content[index] not in [';', '(', ')']:
138 # Skip any whitespace (though problem states no optional whitespace)
139 if content[index].isspace():
140 index += 1
141 continue
142
143 # Parse property key
144 key_start = index
145
146 # Edge Case: Key that is not all uppercase
147 while index < len(content) and content[index].isalpha():
148 if not content[index].isupper():
149 raise ValueError("property must be in uppercase")
150 index += 1
151
152 # Edge Case: No key found
153 if index == key_start:
154 # If we're at a delimiter, we're done with properties
155 if index < len(content) and content[index] in [';', '(', ')']:
156 break
157 raise ValueError("properties without delimiter")
158
159 key = content[key_start:index]
160
161 # Parse property values
162 values = []
163
164 # Edge Case: Key not followed by '['
165 if index >= len(content) or content[index] != '[':
166 raise ValueError("properties without delimiter")
167
168 while index < len(content) and content[index] == '[':
169 index += 1 # Skip '['
170 value_start = index
171
172 # Parse value content, handling escaping
173 while index < len(content) and content[index] != ']':
174 if content[index] == '\\':
175 index += 1 # Skip escape character
176 # Make sure we don't go out of bounds
177 if index >= len(content):
178 break
179 index += 1
180
181 # Edge Case: Unclosed value bracket
182 if index >= len(content):
183 raise ValueError("properties without delimiter")
184
185 value_content = content[value_start:index]
186 value = _unescape_text(value_content)
187 values.append(value)
188 index += 1 # Skip ']'
189
190 properties[key] = values
191
192 return properties, index
193
194
195def _parse_node(content: str, start_index: int) -> tuple[SgfTree, int]:
196 """
197 Parse a single node starting with a semicolon.
198
199 Args:
200 content: The content to parse
201 start_index: The index to start parsing from (should point to a ';')
202
203 Returns:
204 tuple: A tuple containing the parsed SgfTree node and the next index
205 """
206 # Skip the semicolon
207 index = start_index + 1
208
209 # Parse properties of this node
210 properties, index = _parse_properties(content, index)
211
212 # Create the node
213 node = SgfTree(properties=properties, children=[])
214
215 # Handle any children of this node (sequential nodes with semicolons)
216 current_node = node
217 while index < len(content) and content[index] == ';':
218 # Parse the next node
219 child_node, index = _parse_node(content, index)
220 current_node.children.append(child_node)
221 current_node = child_node
222
223 return node, index
224
225def _unescape_text(text: str) -> str:
226 """
227 Unescape SGF text according to SGF specification.
228
229 Args:
230 text: The text to unescape
231
232 Returns:
233 str: The unescaped text
234 """
235 result = []
236 i = 0
237
238 while i < len(text):
239 if text[i] == '\\':
240 i += 1
241 # Edge Case: Escape at end of string
242 if i >= len(text):
243 result.append('\\')
244 break
245
246 char = text[i]
247 # If next character is 'n', convert to space (escaped newline)
248 if char == 'n':
249 result.append(' ')
250 # All whitespace characters other than newline are converted to spaces
251 elif char.isspace():
252 result.append(' ')
253 else:
254 # Any non-whitespace character after \ is inserted as-is
255 result.append(char)
256 else:
257 # Regular character
258 result.append(text[i])
259 i += 1
260
261 return ''.join(result)
262# Handled Edge Cases: Empty input, missing outer parentheses, empty content after removing parentheses, missing initial semicolon, key not in uppercase, missing key, key not followed by '[', unclosed value bracket, escape at end of string
263# Handled Edge Cases: Key not in uppercase, missing key, key not followed by '[', unclosed value bracket
264# Handled Edge Cases: Empty input or input that doesn't start with '(', input that starts with '(' but doesn't end with ')', empty content after removing parentheses, content that doesn't start with ';'
265# Handled Edge Cases: Escape at end of string, newline after escape, whitespace conversion, non-whitespace character after escape
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.