weekends

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF (Smart Game Format) string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input string is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input string
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from SGF content.
70
71 Args:
72 content: SGF content string starting with ';'
73
74 Returns:
75 SgfTree: Parsed tree structure
76 """
77 # Skip the initial ';'
78 index = 1
79 properties = {}
80 children = []
81
82 # Parse properties
83 while index < len(content) and content[index] not in '();':
84 # Parse property key
85 key_start = index
86 while index < len(content) and content[index].isalpha():
87 if not content[index].isupper():
88 raise ValueError("property must be in uppercase")
89 index += 1
90
91 # Edge Case: Property key is empty
92 if index == key_start:
93 raise ValueError("properties without delimiter")
94
95 key = content[key_start:index]
96
97 # Parse property values
98 values = []
99 while index < len(content) and content[index] == '[':
100 index += 1 # Skip '['
101 value_start = index
102
103 # Parse value, handling escapes
104 value_chars = []
105 while index < len(content) and content[index] != ']':
106 if content[index] == '\\':
107 index += 1 # Skip escape character
108 # Make sure we don't go out of bounds
109 if index < len(content):
110 # If the next character is ], it's an escaped closing bracket
111 if content[index] == ']':
112 value_chars.append(']') # Add the escaped ] as just ]
113 index += 1 # Skip the ]
114 else:
115 # Any other character after backslash is inserted as-is
116 value_chars.append(content[index])
117 index += 1 # Skip escaped character
118 else:
119 # Handle escape at end of string
120 break
121 else:
122 value_chars.append(content[index])
123 index += 1
124
125 # Edge Case: Unclosed property value
126 if index >= len(content):
127 raise ValueError("properties without delimiter")
128
129 # Edge Case: Missing closing bracket
130 if content[index] != ']':
131 raise ValueError("properties without delimiter")
132
133 value = _parse_text_value(''.join(value_chars))
134 values.append(value)
135 index += 1 # Skip ']'
136
137 # Edge Case: Property has no values
138 if not values:
139 raise ValueError("properties without delimiter")
140
141 properties[key] = values
142
143 # Parse children
144 while index < len(content) and content[index] in '();':
145 if content[index] == '(': # Start of a subtree
146 # Find matching closing parenthesis
147 paren_count = 1
148 subtree_start = index
149 index += 1
150
151 while index < len(content) and paren_count > 0:
152 if content[index] == '(':
153 paren_count += 1
154 elif content[index] == ')':
155 paren_count -= 1
156 index += 1
157
158 # Edge Case: Unmatched parenthesis
159 if paren_count != 0:
160 raise ValueError("tree missing")
161
162 subtree_content = content[subtree_start:index]
163 children.append(_parse_tree(subtree_content[1:-1]))
164 elif content[index] == ';': # Another node in the same sequence
165 # Parse the rest as a child node
166 node_end = index + 1
167 paren_count = 0
168
169 # Find the end of this node
170 while node_end < len(content):
171 if content[node_end] == '(':
172 paren_count += 1
173 elif content[node_end] == ')':
174 if paren_count == 0:
175 break
176 paren_count -= 1
177 elif content[node_end] == ';' and paren_count == 0:
178 break
179 node_end += 1
180
181 node_content = content[index:node_end]
182 children.append(_parse_tree(node_content))
183 index = node_end
184 else:
185 index += 1
186
187 return SgfTree(properties, children)
188
189
190def _parse_text_value(value: str) -> str:
191 """
192 Parse an SGF text value, handling escape sequences and whitespace.
193
194 Args:
195 value: Raw text value from SGF
196
197 Returns:
198 str: Processed text value
199 """
200 result = []
201 i = 0
202
203 while i < len(value):
204 if value[i] == '\\': # Escape character
205 i += 1
206 # Edge Case: Escape at end of string
207 if i >= len(value):
208 result.append('\\')
209 break
210
211 char = value[i]
212 # If next character is newline, remove the backslash and newline
213 if char == '\n':
214 i += 1
215 continue
216 # Any other character after backslash is inserted as-is
217 else:
218 result.append(char)
219 i += 1
220 elif value[i] == '\n': # Preserve newlines
221 result.append('\n')
222 i += 1
223 elif value[i] in ' \t\f\v\r': # Whitespace characters (except newline)
224 result.append(' ')
225 i += 1
226 else:
227 result.append(value[i])
228 i += 1
229
230 return ''.join(result)
231
232# Handled Edge Cases: Empty input string, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree doesn't start with ';', Property key is empty, Property key is not in uppercase, Unclosed property value, Missing closing bracket, Property has no values, Unmatched parenthesis, Escape at end of string
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.