let it go

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input string
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Edge Case: Input is just '()'
54 if input_string == '()':
55 raise ValueError("tree with no nodes")
56
57 # Remove the outermost parentheses
58 content = input_string[1:-1]
59
60 # Edge Case: Content is empty after removing parentheses
61 if not content:
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70 """
71 # Edge Case: Content doesn't start with ';'
72 if not content.startswith(';'):
73 raise ValueError("tree missing")
74
75 # Parse the first node
76 node_content, remaining = _parse_node(content[1:])
77
78 # Parse any children
79 children = []
80 while remaining:
81 if remaining.startswith('('):
82 # Parse a subtree
83 subtree_content, remaining = _parse_subtree(remaining)
84 children.append(_parse_tree(subtree_content))
85 else:
86 # Parse another node in the same sequence
87 # Skip the semicolon
88 if remaining[0] != ';':
89 raise ValueError("tree with no nodes")
90 next_node_content, remaining = _parse_node(remaining[1:])
91 # Create a new tree for this node and make it a child
92 new_tree = SgfTree(properties=next_node_content)
93 # Chain sequential nodes properly
94 if children:
95 # Add this node as a child of the last child
96 last_child = children[-1]
97 while last_child.children:
98 last_child = last_child.children[-1]
99 last_child.children.append(new_tree)
100 else:
101 # This is the first child of the main node
102 children.append(new_tree)
103
104 return SgfTree(properties=node_content, children=children)
105
106
107def _parse_subtree(content: str) -> tuple[str, str]:
108 """
109 Parse a subtree enclosed in parentheses and return its content and the remaining string.
110 """
111 # Edge Case: Content doesn't start with '('
112 if not content.startswith('('):
113 raise ValueError("tree missing")
114
115 # Find the matching closing parenthesis
116 depth = 1
117 i = 1 # Start after the opening parenthesis
118 while i < len(content) and depth > 0:
119 if content[i] == '(':
120 depth += 1
121 elif content[i] == ')':
122 depth -= 1
123 i += 1
124
125 # Edge Case: No matching closing parenthesis
126 if depth > 0:
127 raise ValueError("tree missing")
128
129 # Extract the subtree content (without the outer parentheses)
130 subtree_content = content[1:i-1]
131 remaining = content[i:]
132
133 return subtree_content, remaining
134
135
136def _parse_node(content: str) -> tuple[dict, str]:
137 """
138 Parse a node (starting with ';') and return its properties and the remaining string.
139 """
140 properties = {}
141 i = 0
142
143 # Parse properties
144 while i < len(content) and content[i] != '(' and content[i] != ';':
145 # Parse property key
146 key_start = i
147 while i < len(content) and content[i].isalpha():
148 i += 1
149
150 # Edge Case: No key found
151 if i == key_start:
152 raise ValueError("properties without delimiter")
153
154 key = content[key_start:i]
155
156 # Edge Case: Key is not uppercase
157 if key != key.upper() or not key.isalpha():
158 raise ValueError("property must be in uppercase")
159
160 # Parse property values
161 values = []
162 while i < len(content) and content[i] == '[':
163 i += 1 # Skip '['
164 value_start = i
165
166 # Parse value, handling escapes
167 while i < len(content) and content[i] != ']':
168 if content[i] == '\\':
169 i += 1 # Skip escape character but keep the escaped character
170 if i < len(content):
171 i += 1 # Skip the escaped character too
172 else:
173 i += 1
174
175 # Edge Case: Unclosed value bracket
176 if i >= len(content) or content[i] != ']':
177 raise ValueError("properties without delimiter")
178
179 value = content[value_start:i]
180 # Process the value according to SGF text type rules
181 value = _process_text_value(value)
182 values.append(value)
183 i += 1 # Skip ']'
184
185 # Edge Case: No values for the property
186 if not values:
187 raise ValueError("properties without delimiter")
188
189 properties[key] = values
190
191 remaining = content[i:]
192 return properties, remaining
193
194
195def _process_text_value(value: str) -> str:
196 """
197 Process a text value according to SGF text type rules.
198 """
199 result = []
200 i = 0
201 while i < len(value):
202 if value[i] == '\\':
203 # Handle escape sequences
204 i += 1 # Skip the escape character
205 if i < len(value):
206 # For SGF text type:
207 # - Newlines are removed if they come immediately after a \
208 # - All whitespace characters other than newline are converted to spaces
209 # - Any character after \ is inserted as-is
210
211 if value[i] == '\n':
212 # Remove the newline (it comes immediately after \)
213 i += 1
214 elif value[i].isspace() and value[i] != '\n':
215 # Convert other whitespace to space
216 result.append(' ')
217 i += 1
218 else:
219 # Insert the character as-is
220 result.append(value[i])
221 i += 1
222 else:
223 # Handle regular characters
224 if value[i] != '\n' and value[i].isspace():
225 # Convert whitespace (other than newlines) to space
226 result.append(' ')
227 else:
228 result.append(value[i])
229 i += 1
230
231 return ''.join(result)
232# Handled Edge Cases: Empty input string, Input doesn't start with '(', Input doesn't end with ')', Input is just '()', Content is empty after removing parentheses, Content doesn't start with ';', No matching closing parenthesis, No key found, Key is not uppercase, Unclosed value bracket, No values for the property
233# Handled Edge Cases: Empty input string, Input doesn't start with '(', Input doesn't end with ')', Input is just '()', Content is empty after removing parentheses, Content doesn't start with ';', No matching closing parenthesis, No key found, Key is not uppercase, Unclosed value bracket, No values for the property
234# Handled Edge Cases: No matching closing parenthesis
235# Handled Edge Cases: No key found, Key is not uppercase, Unclosed value bracket, No values for the property
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.