fistinz

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Parse the tree
61 tree, _ = _parse_tree_with_sequential(content, 0)
62 return tree
63
64
65def _parse_tree_with_sequential(content: str, index: int) -> tuple[SgfTree, int]:
66 """
67 Parse a tree from the content string starting at index, handling sequential nodes.
68
69 Args:
70 content: The SGF content string
71 index: The starting index to parse from
72
73 Returns:
74 tuple: (SgfTree, new_index)
75 """
76 # Parse the first node
77 tree, index = _parse_tree(content, index)
78
79 # Parse sequential nodes (parent-child relationships)
80 while index < len(content) and content[index] == ';':
81 child, index = _parse_tree(content, index)
82 tree.children.append(child)
83
84 return tree, index
85
86
87def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
88 """
89 Parse a tree from the content string starting at index.
90
91 Args:
92 content: The SGF content string
93 index: The starting index to parse from
94
95 Returns:
96 tuple: (SgfTree, new_index)
97 """
98 # Edge Case: Missing semicolon at start of node
99 if index >= len(content) or content[index] != ';':
100 raise ValueError("tree missing")
101
102 index += 1 # Skip the semicolon
103
104 # Parse properties of the current node
105 properties = {}
106 while index < len(content) and content[index] not in '();':
107 # Parse key
108 key_start = index
109 while index < len(content) and content[index].isupper():
110 index += 1
111
112 # Edge Case: Key is not uppercase
113 if index == key_start:
114 raise ValueError("property must be in uppercase")
115
116 key = content[key_start:index]
117
118 # Edge Case: Missing opening bracket for property value
119 if index >= len(content) or content[index] != '[':
120 raise ValueError("properties without delimiter")
121
122 # Parse all values for this key
123 values = []
124 # Parse the first value
125 if index < len(content) and content[index] == '[':
126 index += 1 # Skip opening bracket
127
128 # Parse value, handling escapes
129 value_chars = []
130 while index < len(content):
131 if content[index] == ']':
132 # Found closing bracket, end of value
133 break
134 elif content[index] == '\\':
135 index += 1
136 if index >= len(content):
137 raise ValueError("properties without delimiter")
138 # In SGF, escape the next character as-is
139 value_chars.append(content[index])
140 else:
141 value_chars.append(content[index])
142 index += 1
143
144 # Edge Case: Missing closing bracket
145 if index >= len(content) or content[index] != ']':
146 raise ValueError("properties without delimiter")
147
148 value = ''.join(value_chars)
149 # Process value according to SGF text rules
150 processed_value = _process_text_value(value)
151 values.append(processed_value)
152 index += 1 # Skip closing bracket
153
154 # Parse additional values for the same key (only if they start with [)
155 while index < len(content) and content[index] == '[':
156 index += 1 # Skip opening bracket
157
158 # Parse value, handling escapes
159 value_chars = []
160 while index < len(content):
161 if content[index] == ']':
162 # Found closing bracket, end of value
163 break
164 elif content[index] == '\\':
165 index += 1
166 if index >= len(content):
167 raise ValueError("properties without delimiter")
168 # In SGF, escape the next character as-is
169 value_chars.append(content[index])
170 else:
171 value_chars.append(content[index])
172 index += 1
173
174 # Edge Case: Missing closing bracket
175 if index >= len(content) or content[index] != ']':
176 raise ValueError("properties without delimiter")
177
178 value = ''.join(value_chars)
179 # Process value according to SGF text rules
180 processed_value = _process_text_value(value)
181 values.append(processed_value)
182 index += 1 # Skip closing bracket
183
184 properties[key] = values
185
186 # Parse children
187 children = []
188 while index < len(content) and content[index] == '(':
189 child, index = _parse_tree(content, index + 1) # Skip opening parenthesis
190 children.append(child)
191 # Skip closing parenthesis
192 if index >= len(content) or content[index] != ')':
193 raise ValueError("tree missing")
194 index += 1
195
196 return SgfTree(properties, children), index
197
198
199def _process_text_value(value: str) -> str:
200 """
201 Process a text value according to SGF text rules.
202
203 Args:
204 value: The raw text value
205
206 Returns:
207 str: The processed text value
208 """
209 # Process according to SGF text rules:
210 # 1. Newlines are removed if they come immediately after a `\`
211 # 2. All whitespace characters other than newline are converted to spaces
212 # 3. `\` is the escape character - any character after `\` is inserted as-is
213
214 result = []
215 i = 0
216 while i < len(value):
217 if value[i] == '\\':
218 # This is an escape character
219 i += 1
220 if i < len(value):
221 # Check if the next character is a newline
222 if value[i] == '\n':
223 # Remove the newline (skip it)
224 i += 1
225 else:
226 # Insert the next character as-is
227 result.append(value[i])
228 i += 1
229 # If we're at the end, just ignore the trailing backslash
230 else:
231 # Non-escaped character
232 # Convert all whitespace (except newlines) to spaces
233 if value[i].isspace() and value[i] != '\n':
234 result.append(' ')
235 else:
236 result.append(value[i])
237 i += 1
238
239 return ''.join(result)
240# Handled Edge Cases: Empty input, missing parentheses, missing semicolon, empty tree, key not uppercase, missing brackets, trailing backslash
241# Handled Edge Cases: Missing semicolon, missing opening bracket, missing closing bracket, key not uppercase
242# Handled Edge Cases: Missing closing parenthesis, missing opening parenthesis
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.