fistinz

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Parse properties of the root node (starting at index 0, which should be a semicolon)
78 properties, index = _parse_properties(content, 0)
79
80 # Parse children
81 children = []
82 while index < len(content):
83 if content[index] == '(': # Start of a child tree
84 # Find the matching closing parenthesis
85 paren_count = 1
86 start = index
87 index += 1
88 while index < len(content) and paren_count > 0:
89 if content[index] == '(':
90 paren_count += 1
91 elif content[index] == ')':
92 paren_count -= 1
93 index += 1
94
95 # Parse the child tree
96 child_content = content[start:index]
97 children.append(parse(child_content))
98 elif content[index] == ';':
99 # This should be the start of another node in the same tree
100 # Parse its properties
101 child_properties, new_index = _parse_properties(content, index)
102 # Create a child node with these properties
103 # For consecutive nodes, we need to chain them as parent-child relationships
104 child_node = SgfTree(properties=child_properties, children=[])
105
106 # If we already have children, we need to append this as a child of the last child
107 # Otherwise, add it as a direct child
108 if children:
109 # Find the deepest child in the chain and add this as its child
110 last_child = children[-1]
111 while last_child.children:
112 last_child = last_child.children[-1]
113 last_child.children.append(child_node)
114 else:
115 children.append(child_node)
116 index = new_index
117 else:
118 # Skip any other characters
119 index += 1
120
121 return SgfTree(properties=properties, children=children)
122
123
124def _parse_properties(content: str, index: int) -> tuple[dict, int]:
125 """
126 Parse properties from the content starting at index.
127
128 Args:
129 content: The content to parse
130 index: The starting index
131
132 Returns:
133 tuple: A tuple of (properties dict, new index)
134 """
135 properties = {}
136
137 # Expect a semicolon at the start
138 if index >= len(content) or content[index] != ';':
139 return properties, index
140
141 index += 1 # Skip the semicolon
142
143 # Parse all properties for this node
144 while index < len(content) and content[index].isalpha():
145 # Parse key
146 key_start = index
147 while index < len(content) and content[index].isalpha():
148 index += 1
149 key = content[key_start:index]
150
151 # Edge Case: Property not in uppercase
152 if not key.isupper():
153 raise ValueError("property must be in uppercase")
154
155 # Parse values
156 values = []
157
158 # Edge Case: Missing opening bracket
159 if index >= len(content) or content[index] != '[':
160 raise ValueError("properties without delimiter")
161
162 while index < len(content) and content[index] == '[':
163 index += 1 # Skip the opening bracket
164
165 # Parse value
166 value, index = _parse_value(content, index)
167
168 # Edge Case: Missing closing bracket
169 if index >= len(content) or content[index] != ']':
170 raise ValueError("properties without delimiter")
171
172 index += 1 # Skip the closing bracket
173 values.append(value)
174
175 properties[key] = values
176
177 return properties, index
178
179
180def _parse_value(content: str, index: int) -> tuple[str, int]:
181 """
182 Parse a value from the content starting at index.
183
184 Args:
185 content: The content to parse
186 index: The starting index
187
188 Returns:
189 tuple: A tuple of (parsed value, new index)
190 """
191 value = []
192
193 while index < len(content):
194 if content[index] == ']': # End of value
195 break
196 elif content[index] == '\\': # Escape character
197 index += 1
198 if index < len(content):
199 # According to SGF spec:
200 # - Newlines are removed if they come immediately after a \
201 # - All whitespace characters other than newline are converted to spaces
202 # - Any non-whitespace character after \ is inserted as-is
203 char = content[index]
204 if char == '\n': # Newline immediately after backslash
205 # Remove the newline (it's already processed by this point in Python)
206 pass
207 elif char.isspace(): # Other whitespace characters
208 value.append(' ')
209 else: # Non-whitespace character (including n, r, t)
210 value.append(char)
211 index += 1
212 else:
213 # Edge Case: Escape character at end of content
214 value.append('\\')
215 else:
216 # Regular character
217 # According to SGF spec:
218 # - All whitespace characters other than newline are converted to spaces
219 char = content[index]
220 if char != '\n' and char.isspace():
221 value.append(' ')
222 else:
223 value.append(char)
224 index += 1
225
226 return ''.join(value), index
227
228 return ''.join(value), index
229# Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis, Property not in uppercase, Missing opening bracket, Missing closing bracket, Escape character at end of content
230# Handled Edge Cases: Empty input, Missing opening parenthesis, Missing closing parenthesis, Empty tree, Missing semicolon after opening parenthesis
231# Handled Edge Cases: Property not in uppercase, Missing opening bracket, Missing closing bracket
232# Handled Edge Cases: Escape character at end of content
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.