fishlium

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: The parsed SGF tree structure
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or only whitespace
42 if not input_string or input_string.isspace():
43 raise ValueError("tree missing")
44
45 # Edge Case: Does not start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Does not end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Content does not start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree_content(content)
65
66
67def _parse_tree_content(content: str) -> SgfTree:
68 """
69 Parse the content inside the parentheses of an SGF string.
70
71 Args:
72 content: The content inside the outer parentheses
73
74 Returns:
75 SgfTree: The parsed SGF tree structure
76 """
77 # Edge Case: Content is just ';'
78 if content == ';':
79 return SgfTree()
80
81 # Parse the first node
82 node_content, remaining = _split_node_content(content[1:]) # Skip the initial ';'
83 properties = _parse_properties(node_content)
84
85 # Parse children
86 children = []
87 while remaining:
88 if remaining.startswith('('):
89 # Parse a child tree
90 child_content, remaining = _extract_subtree(remaining)
91 child_tree = _parse_tree_content(child_content)
92 children.append(child_tree)
93 elif remaining.startswith(';'):
94 # Parse a linear child
95 node_content, remaining = _split_node_content(remaining[1:]) # Skip the ';'
96 child_properties = _parse_properties(node_content)
97 # Create a chain of single-child nodes
98 if children:
99 # Add to the last child's children
100 current = children[-1]
101 while current.children:
102 current = current.children[0]
103 current.children.append(SgfTree(properties=child_properties))
104 else:
105 # Create a new child
106 child = SgfTree(properties=child_properties)
107 children.append(child)
108 else:
109 # Edge Case: Unexpected character
110 raise ValueError("tree missing")
111
112 return SgfTree(properties=properties, children=children)
113
114
115def _split_node_content(content: str) -> tuple[str, str]:
116 """
117 Split the content into node properties and remaining content.
118
119 Args:
120 content: The content to split
121
122 Returns:
123 tuple: (node_properties, remaining_content)
124 """
125 i = 0
126 while i < len(content):
127 if content[i] in '(;':
128 return content[:i], content[i:]
129 i += 1
130 return content, ''
131
132
133def _extract_subtree(content: str) -> tuple[str, str]:
134 """
135 Extract a subtree from content that starts with '('.
136
137 Args:
138 content: Content starting with '('
139
140 Returns:
141 tuple: (subtree_content, remaining_content)
142
143 Raises:
144 ValueError: If the subtree is malformed
145 """
146 if not content.startswith('('):
147 raise ValueError("tree missing")
148
149 depth = 0
150 for i, char in enumerate(content):
151 if char == '(':
152 depth += 1
153 elif char == ')':
154 depth -= 1
155 if depth == 0:
156 return content[1:i], content[i+1:] # Exclude the parentheses
157
158 # Edge Case: Unmatched parentheses
159 raise ValueError("tree missing")
160
161
162def _parse_properties(content: str) -> dict:
163 """
164 Parse properties from a node content string.
165
166 Args:
167 content: String containing properties in format KEY[value1][value2]...
168
169 Returns:
170 dict: Dictionary of properties
171
172 Raises:
173 ValueError: If properties are malformed
174 """
175 properties = {}
176 i = 0
177
178 while i < len(content):
179 # Parse key
180 key_start = i
181 while i < len(content) and content[i].isalpha():
182 i += 1
183
184 # Edge Case: No key found
185 if i == key_start:
186 raise ValueError("properties without delimiter")
187
188 key = content[key_start:i]
189
190 # Edge Case: Key is not uppercase
191 if key != key.upper() or not key.isalpha():
192 raise ValueError("property must be in uppercase")
193
194 # Parse values
195 values = []
196 while i < len(content) and content[i] == '[':
197 i += 1 # Skip '['
198 value_start = i
199
200 # Parse value
201 value_chars = []
202 while i < len(content) and content[i] != ']':
203 if content[i] == '\\':
204 # Handle escape sequences
205 i += 1
206 if i < len(content):
207 if content[i] == 'n' or content[i] == 'r' or content[i] == 't':
208 # These are not special in SGF, just literal characters
209 value_chars.append('\\')
210 value_chars.append(content[i])
211 elif content[i] == '\n':
212 # Newline immediately after backslash is removed
213 pass
214 else:
215 value_chars.append(content[i])
216 else:
217 # Edge Case: Escape at end of string
218 raise ValueError("properties without delimiter")
219 else:
220 # Convert whitespace (except newline) to spaces
221 if content[i] != '\n' and content[i].isspace():
222 value_chars.append(' ')
223 else:
224 value_chars.append(content[i])
225 i += 1
226
227 # Edge Case: Unclosed bracket
228 if i >= len(content) or content[i] != ']':
229 raise ValueError("properties without delimiter")
230
231 value = ''.join(value_chars)
232 values.append(value)
233 i += 1 # Skip ']'
234
235 # Edge Case: No values for key
236 if not values:
237 raise ValueError("properties without delimiter")
238
239 properties[key] = values
240
241 return properties
242
243# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, unmatched parentheses,
244# no key found, non-uppercase key, unclosed bracket, no values for key,
245# escape at end of string
246# Handled Edge Cases: Empty content after parentheses removal, content not starting with semicolon
247# Handled Edge Cases: Content is just semicolon, unexpected character in tree content
248# Handled Edge Cases: Missing delimiters, non-uppercase properties, missing tree structure, empty tree
Test NameStatus
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.