Konoha

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or only whitespace
42 if not input_string or input_string.isspace():
43 raise ValueError("tree missing")
44
45 # Edge Case: Missing opening parenthesis
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Missing closing parenthesis
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty content after removing parentheses
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Missing semicolon after opening parenthesis
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree_content(content)
65
66
67def _parse_tree_content(content: str) -> SgfTree:
68 """
69 Parse the content inside the parentheses of an SGF string.
70
71 Args:
72 content: The content inside the outer parentheses
73
74 Returns:
75 SgfTree: A tree structure representing the SGF data
76 """
77 # Skip the initial semicolon
78 index = 1
79
80 # Parse properties of the current node
81 properties, index = _parse_properties(content, index)
82
83 # Parse children nodes
84 children = []
85
86 # Edge Case: Handle multiple variations (children)
87 while index < len(content) and content[index] == '(':
88 # Find the matching closing parenthesis for this child
89 paren_count = 1
90 start = index + 1
91 index += 1
92
93 while index < len(content) and paren_count > 0:
94 if content[index] == '(':
95 paren_count += 1
96 elif content[index] == ')':
97 paren_count -= 1
98 index += 1
99
100 # Edge Case: Unmatched parentheses in child
101 if paren_count != 0:
102 raise ValueError("tree missing")
103
104 # Parse the child content
105 child_content = content[start:index-1]
106 child_tree = _parse_tree_content(child_content)
107 children.append(child_tree)
108
109 return SgfTree(properties, children)
110
111
112def _parse_properties(content: str, start_index: int) -> tuple[dict, int]:
113 """
114 Parse properties from the content string starting at the given index.
115
116 Args:
117 content: The content string
118 start_index: The index to start parsing from
119
120 Returns:
121 tuple: A tuple containing the properties dictionary and the next index
122 """
123 properties = {}
124 index = start_index
125
126 # Edge Case: Parse multiple properties in the same node
127 while index < len(content) and content[index].isalpha() and content[index].isupper():
128 # Parse key
129 key_start = index
130 while index < len(content) and content[index].isalpha() and content[index].isupper():
131 index += 1
132
133 key = content[key_start:index]
134
135 # Edge Case: Property key is not in uppercase
136 if not key.isupper():
137 raise ValueError("property must be in uppercase")
138
139 # Parse values
140 values = []
141
142 # Edge Case: Missing opening bracket for property value
143 if index >= len(content) or content[index] != '[':
144 raise ValueError("properties without delimiter")
145
146 # Edge Case: Parse multiple values for the same key
147 while index < len(content) and content[index] == '[':
148 index += 1 # Skip opening bracket
149 value_start = index
150
151 # Find the closing bracket
152 bracket_count = 1
153 while index < len(content) and bracket_count > 0:
154 if content[index] == '\\':
155 index += 2 # Skip escape character and the next character
156 elif content[index] == '[':
157 bracket_count += 1
158 index += 1
159 elif content[index] == ']':
160 bracket_count -= 1
161 index += 1
162 else:
163 index += 1
164
165 # Edge Case: Unmatched brackets
166 if bracket_count != 0:
167 raise ValueError("properties without delimiter")
168
169 value = content[value_start:index-1]
170 # Process the value according to SGF Text type rules
171 processed_value = _process_text_value(value)
172 values.append(processed_value)
173
174 properties[key] = values
175
176 return properties, index
177
178
179def _process_text_value(value: str) -> str:
180 """
181 Process a text value according to SGF Text type rules.
182
183 Args:
184 value: The raw text value
185
186 Returns:
187 str: The processed text value
188 """
189 result = []
190 i = 0
191
192 while i < len(value):
193 if value[i] == '\\':
194 # Edge Case: Handle escape sequences
195 i += 1
196 if i < len(value):
197 # According to SGF rules:
198 # - Newlines are removed if they come immediately after a backslash
199 # - All whitespace characters other than newline are converted to spaces
200 # - Any non-whitespace character after \ is inserted as-is
201 if value[i] == '\n':
202 # Newline immediately after backslash is removed
203 pass
204 elif value[i].isspace():
205 # Other whitespace characters are converted to spaces
206 result.append(' ')
207 else:
208 # Non-whitespace character is inserted as-is
209 result.append(value[i])
210 i += 1
211 else:
212 # Edge Case: Backslash at end of string
213 result.append('\\')
214 else:
215 # Edge Case: Convert non-newline whitespace to spaces
216 if value[i] != '\n' and value[i].isspace():
217 result.append(' ')
218 else:
219 result.append(value[i])
220 i += 1
221
222 return ''.join(result)
223# Handled Edge Cases: Empty input, missing delimiters, invalid tree structure, property format errors, escape sequences, whitespace handling
224# Handled Edge Cases: Unmatched parentheses, property parsing, multiple children
225# Handled Edge Cases: Property key validation, bracket matching, multiple values
226# Handled Edge Cases: Escape sequences, whitespace conversion, newline handling
Test NameStatus
test_all_lowercase_property
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_upper_and_lowercase_property
Fail
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_properties_without_delimiter
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.