Kratos

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree missing semicolon
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse the content of a tree (without the outer parentheses).
70
71 Args:
72 content: The content of the tree
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Skip the initial semicolon
78 content = content[1:]
79
80 # Parse properties of the current node
81 properties, remaining = _parse_properties(content)
82
83 children = []
84
85 # Parse children - can be either semicolon nodes or parenthesized variations
86 while remaining:
87 if remaining.startswith(';'):
88 # Parse a child node created by semicolon
89 child_tree = _parse_tree(remaining)
90 children.append(child_tree)
91 remaining = "" # All remaining content is consumed by the child
92 elif remaining.startswith('('):
93 # Parse a variation in parentheses
94 # Find the matching parenthesis for this child
95 balance = 1
96 i = 1
97 while i < len(remaining) and balance > 0:
98 if remaining[i] == '(':
99 balance += 1
100 elif remaining[i] == ')':
101 balance -= 1
102 i += 1
103
104 # Edge Case: Unmatched parenthesis
105 if balance != 0:
106 raise ValueError("tree missing")
107
108 # Parse the child tree
109 child_content = remaining[1:i-1]
110 child_tree = _parse_tree(child_content)
111 children.append(child_tree)
112
113 # Move to the next potential child
114 remaining = remaining[i:]
115 else:
116 # No more children
117 break
118
119 return SgfTree(properties, children)
120
121
122def _parse_properties(content: str) -> tuple[dict, str]:
123 """
124 Parse properties from the content string.
125
126 Args:
127 content: String containing properties
128
129 Returns:
130 tuple: (properties dictionary, remaining string)
131 """
132 properties = {}
133 remaining = content
134
135 while remaining and remaining[0].isalpha():
136 # Parse key
137 key_end = 0
138 while key_end < len(remaining) and remaining[key_end].isalpha():
139 key_end += 1
140
141 key = remaining[:key_end]
142
143 # Edge Case: Property key is not uppercase
144 if key != key.upper():
145 raise ValueError("property must be in uppercase")
146
147 remaining = remaining[key_end:]
148
149 # Edge Case: Missing opening bracket
150 if not remaining.startswith('['):
151 raise ValueError("properties without delimiter")
152
153 # Parse values
154 values = []
155 while remaining.startswith('['):
156 remaining = remaining[1:] # Skip opening bracket
157
158 # Parse value
159 value, remaining = _parse_value(remaining)
160
161 # Edge Case: Missing closing bracket
162 if not remaining.startswith(']'):
163 raise ValueError("properties without delimiter")
164
165 remaining = remaining[1:] # Skip closing bracket
166 values.append(value)
167
168 properties[key] = values
169
170 return properties, remaining
171
172
173def _parse_value(content: str) -> tuple[str, str]:
174 """
175 Parse a value from the content string.
176
177 Args:
178 content: String containing a value
179
180 Returns:
181 tuple: (parsed value, remaining string)
182 """
183 value = []
184 i = 0
185
186 while i < len(content):
187 if content[i] == ']':
188 # End of value
189 break
190
191 if content[i] == '\\':
192 # Handle escape character
193 i += 1
194 # Edge Case: Escape at end of value
195 if i >= len(content):
196 raise ValueError("properties without delimiter")
197
198 char = content[i]
199
200 # If next character is newline, remove the backslash and newline
201 if char == '\n':
202 # Do nothing - remove the backslash and newline
203 pass
204 elif char.isspace():
205 # Any whitespace character after \ follows the normal rules
206 # All whitespace characters other than newline are converted to spaces
207 value.append(' ')
208 else:
209 # Any non-whitespace character after \ is inserted as-is
210 value.append(char)
211 else:
212 char = content[i]
213 if char == '\n':
214 # Newlines remain as newlines
215 value.append(char)
216 elif char.isspace() and char != '\n':
217 # All whitespace characters other than newline are converted to spaces
218 value.append(' ')
219 else:
220 value.append(char)
221 i += 1
222
223 return ''.join(value), content[i:]
224
225# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, Tree missing semicolon, Unmatched parenthesis, Property key is not uppercase, Missing opening bracket, Missing closing bracket, Escape at end of value
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.