let it go

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input string
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Edge Case: Input is just parentheses with no content
54 if input_string == "()":
55 raise ValueError("tree with no nodes")
56
57 # Remove the outer parentheses
58 content = input_string[1:-1]
59
60 # Edge Case: Content is empty after removing parentheses
61 if not content:
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70 """
71 # Edge Case: Content doesn't start with ';'
72 if not content.startswith(';'):
73 raise ValueError("tree missing")
74
75 # Find the first node and any subsequent variations
76 i = 1 # Skip the initial ';'
77
78 # Parse the first node's properties
79 properties, i = _parse_properties(content, i)
80
81 # Parse children (if any)
82 children = []
83
84 # Look for variations (subtrees in parentheses)
85 while i < len(content):
86 if content[i] == '(':
87 # Find matching closing parenthesis
88 paren_count = 1
89 start = i + 1
90 i += 1
91 while i < len(content) and paren_count > 0:
92 if content[i] == '(':
93 paren_count += 1
94 elif content[i] == ')':
95 paren_count -= 1
96 i += 1
97
98 # Edge Case: Unmatched parenthesis
99 if paren_count != 0:
100 raise ValueError("tree missing")
101
102 # Parse the subtree
103 subtree_content = content[start:i-1]
104 children.append(_parse_tree(subtree_content))
105 elif content[i] == ';':
106 # Another node in the same sequence
107 node_properties, i = _parse_properties(content, i + 1)
108 # Create a new node and make it a child of the last child (chain)
109 new_node = SgfTree(node_properties, [])
110 if children:
111 # Add to the last child's children to create a chain
112 last_child = children[-1]
113 while last_child.children:
114 last_child = last_child.children[-1]
115 last_child.children.append(new_node)
116 else:
117 # No existing children, add as direct child
118 children.append(new_node)
119 else:
120 # Unexpected character
121 raise ValueError("tree missing")
122
123 return SgfTree(properties, children)
124
125
126def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
127 """
128 Parse properties from a node.
129
130 Returns:
131 Tuple of (properties_dict, next_index)
132 """
133 properties = {}
134 i = start_index
135
136 # Parse properties while we're not at the end or a special character
137 while i < len(content) and content[i] not in '();':
138 # Parse property key
139 key_start = i
140
141 # Edge Case: Key is not uppercase
142 if not content[i].isalpha() or not content[i].isupper():
143 raise ValueError("property must be in uppercase")
144
145 while i < len(content) and content[i].isalpha() and content[i].isupper():
146 i += 1
147
148 # Edge Case: Key with no values
149 if i >= len(content) or content[i] != '[':
150 raise ValueError("properties without delimiter")
151
152 key = content[key_start:i]
153
154 # Parse values
155 values = []
156
157 while i < len(content) and content[i] == '[':
158 i += 1 # Skip '['
159 value_start = i
160
161 # Parse value, handling escapes
162 while i < len(content):
163 if content[i] == ']':
164 # Check if this ] is escaped by counting backslashes before it
165 backslash_count = 0
166 j = i - 1
167 while j >= value_start and content[j] == '\\':
168 backslash_count += 1
169 j -= 1
170 # If even number of backslashes, the ] is NOT escaped
171 if backslash_count % 2 == 0:
172 break # Found unescaped closing bracket
173 # If odd number of backslashes, the ] is escaped, continue parsing
174 i += 1
175
176 # Edge Case: Unmatched bracket
177 if i >= len(content):
178 raise ValueError("properties without delimiter")
179
180 value = _unescape_text(content[value_start:i])
181 values.append(value)
182 i += 1 # Skip ']'
183
184 # Edge Case: No values for a property
185 if not values:
186 raise ValueError("properties without delimiter")
187
188 properties[key] = values
189
190 return properties, i
191
192
193def _unescape_text(text: str) -> str:
194 """
195 Process escape sequences in SGF text according to SGF specification.
196 """
197 result = []
198 i = 0
199
200 while i < len(text):
201 if text[i] == '\\':
202 i += 1 # Skip the backslash
203
204 # Edge Case: Backslash at end of text
205 if i >= len(text):
206 # According to SGF spec, trailing backslash is just ignored
207 break
208
209 char = text[i]
210
211 # Handle newline removal after backslash
212 if char == '\n':
213 # Newline is removed if immediately after backslash
214 pass
215 else:
216 # All other characters after backslash are inserted as-is
217 # All whitespace (except newline) is converted to space
218 if char.isspace() and char != '\n':
219 result.append(' ')
220 else:
221 result.append(char)
222 else:
223 char = text[i]
224 # All whitespace characters other than newline are converted to spaces
225 if char.isspace() and char != '\n':
226 result.append(' ')
227 else:
228 result.append(char)
229 i += 1
230
231 return ''.join(result)
232
233# Handled Edge Cases: Empty input string, Input doesn't start with '(', Input doesn't end with ')', Input is just parentheses with no content, Content is empty after removing parentheses, Content doesn't start with ';', Unmatched parenthesis, Key is not uppercase, Key with no values, Unmatched bracket, No values for a property, Backslash at end of text
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.