let it go

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree with no nodes")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from its content string.
70
71 Args:
72 content: The content of the tree without outer parentheses
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Edge Case: Content is empty
78 if not content:
79 raise ValueError("tree with no nodes")
80
81 # Edge Case: Content doesn't start with ';'
82 if not content.startswith(';'):
83 raise ValueError("tree with no nodes")
84
85 i = 1 # Skip the initial ';'
86 properties = {}
87 children = []
88
89 # Parse properties
90 while i < len(content) and content[i] != '(' and content[i] != ';':
91 # Parse key
92 key_start = i
93 while i < len(content) and content[i].isalpha():
94 i += 1
95
96 # Edge Case: Key is empty
97 if i == key_start:
98 raise ValueError("properties without delimiter")
99
100 key = content[key_start:i]
101
102 # Edge Case: Key is not uppercase
103 if not key.isupper():
104 raise ValueError("property must be in uppercase")
105
106 # Parse values
107 values = []
108
109 # Edge Case: No values after key
110 if i >= len(content) or content[i] != '[':
111 raise ValueError("properties without delimiter")
112
113 while i < len(content) and content[i] == '[':
114 i += 1 # Skip '['
115
116 # Parse value according to SGF Text type rules
117 value_chars = []
118 while i < len(content):
119 if content[i] == ']':
120 # Check if this ] is escaped by counting consecutive backslashes before it
121 backslash_count = 0
122 j = i - 1
123 while j >= 0 and content[j] == '\\':
124 backslash_count += 1
125 j -= 1
126
127 # If even number of backslashes, then ] is not escaped
128 # If odd number of backslashes, then ] is escaped
129 if backslash_count % 2 == 0:
130 # End of value (unescaped closing bracket)
131 break
132 else:
133 # ] is escaped, treat as literal character
134 value_chars.append(content[i])
135 elif content[i] == '\\':
136 i += 1
137 if i < len(content):
138 # Handle escaped characters according to SGF text rules
139 # Any character after backslash is inserted as-is
140 # But newlines immediately after backslash are removed
141 if content[i] == '\n':
142 # Newlines are removed if they come immediately after a backslash
143 pass
144 else:
145 value_chars.append(content[i])
146 # Special case: if the escaped character is ] and it's immediately
147 # followed by a character that starts a new property or ends the content,
148 # then this ] also serves as the end of the value
149 if content[i] == ']' and (i + 1 >= len(content) or
150 content[i + 1].isalpha() and content[i + 1].isupper()):
151 # This ] serves as both literal content and end-of-value marker
152 i += 1 # Skip the ]
153 break
154 else:
155 # Edge Case: Escape at end of string
156 raise ValueError("properties without delimiter")
157 # Note: i is already incremented above, no need to increment again
158 else:
159 # Handle regular characters
160 if content[i] in ' \t\r':
161 # All whitespace other than newline converted to space
162 value_chars.append(' ')
163 elif content[i] == '\n':
164 # Newlines remain as newlines
165 value_chars.append('\n')
166 else:
167 value_chars.append(content[i])
168 i += 1
169
170 # Edge Case: Unclosed bracket
171 if i >= len(content) or content[i] != ']':
172 raise ValueError("properties without delimiter")
173
174 value = ''.join(value_chars)
175 values.append(value)
176 i += 1 # Skip ']'
177
178 properties[key] = values
179
180 # Parse children
181 while i < len(content) and content[i] == '(':
182 # Find matching closing parenthesis
183 paren_count = 1
184 start = i + 1
185 i += 1
186
187 while i < len(content) and paren_count > 0:
188 if content[i] == '(':
189 paren_count += 1
190 elif content[i] == ')':
191 paren_count -= 1
192 i += 1
193
194 # Edge Case: Unmatched parenthesis
195 if paren_count != 0:
196 raise ValueError("tree missing")
197
198 child_content = content[start:i-1]
199 children.append(_parse_tree(child_content))
200
201 # Skip possible subsequent variations
202 while i < len(content) and content[i] == '(':
203 paren_count = 1
204 start = i + 1
205 i += 1
206
207 while i < len(content) and paren_count > 0:
208 if content[i] == '(':
209 paren_count += 1
210 elif content[i] == ')':
211 paren_count -= 1
212 i += 1
213
214 # Edge Case: Unmatched parenthesis
215 if paren_count != 0:
216 raise ValueError("tree missing")
217
218 child_content = content[start:i-1]
219 children.append(_parse_tree(child_content))
220
221 # Handle shorthand notation where multiple nodes are in sequence
222 if i < len(content) and content[i] == ';':
223 # Create a child node with the remaining content
224 child_content = content[i:]
225 children.append(_parse_tree(child_content))
226
227 return SgfTree(properties, children)
228# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Content is empty, Content doesn't start with ';', Key is empty, Key is not uppercase, No values after key, Unclosed bracket, Escape at end of string, Unmatched parenthesis"
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.