Kratos

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't follow SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Edge Case: Tree doesn't start with ';'
61 if not content.startswith(';'):
62 raise ValueError("tree missing")
63
64 return _parse_tree(content)
65
66
67def _parse_tree(content: str) -> SgfTree:
68 """
69 Parse a tree from its content string.
70
71 Args:
72 content: The content of the tree without outer parentheses
73
74 Returns:
75 SgfTree: The parsed tree
76 """
77 # Edge Case: Content is empty
78 if not content:
79 raise ValueError("tree with no nodes")
80
81 # Edge Case: Content doesn't start with ';'
82 if not content.startswith(';'):
83 raise ValueError("tree missing")
84
85 # Find the first node and any children
86 i = 1 # Skip the initial ';'
87
88 # Parse properties of the current node
89 properties = {}
90
91 while i < len(content) and content[i] != '(' and content[i] != ';':
92 # Parse key
93 key_start = i
94 # Advance through all alphabetic characters to get the full key
95 while i < len(content) and content[i].isalpha():
96 i += 1
97
98 # Edge Case: Key is empty
99 if i == key_start:
100 raise ValueError("properties without delimiter")
101
102 key = content[key_start:i]
103
104 # Edge Case: Key is not uppercase
105 if not key.isupper():
106 raise ValueError("property must be in uppercase")
107
108 # Parse values
109 values = []
110
111 # There must be at least one value
112 if i >= len(content) or content[i] != '[':
113 raise ValueError("properties without delimiter")
114
115 while i < len(content) and content[i] == '[':
116 i += 1 # Skip '['
117 value_start = i
118
119 # Parse value, handling escapes
120 while i < len(content) and content[i] != ']':
121 if content[i] == '\\':
122 # Include the escape character and the next character
123 i += 1 # Move to the escaped character
124 if i < len(content) and content[i] != ']':
125 i += 1 # Move past the escaped character
126 else:
127 i += 1
128
129 # Edge Case: Unclosed value bracket
130 if i >= len(content) or content[i] != ']':
131 raise ValueError("properties without delimiter")
132
133 value = content[value_start:i]
134 # Process the value according to SGF text type rules
135 value = _process_text(value)
136 values.append(value)
137 i += 1 # Skip ']'
138
139 properties[key] = values
140
141 # Parse children
142 children = []
143
144 while i < len(content) and content[i] == '(':
145 # Find matching closing parenthesis
146 paren_count = 1
147 start = i + 1
148 i += 1
149
150 while i < len(content) and paren_count > 0:
151 if content[i] == '(':
152 paren_count += 1
153 elif content[i] == ')':
154 paren_count -= 1
155 i += 1
156
157 # Edge Case: Unmatched parenthesis
158 if paren_count != 0:
159 raise ValueError("tree missing")
160
161 # Extract child content
162 child_content = content[start:i-1]
163
164 # Parse child tree
165 child_tree = _parse_tree(child_content)
166 children.append(child_tree)
167
168 # Handle consecutive nodes (shorthand notation)
169 if i < len(content) and content[i] == ';':
170 # Create a child node with the rest of the content
171 child_content = content[i:]
172 child_tree = _parse_tree(child_content)
173 children.append(child_tree)
174
175 return SgfTree(properties, children)
176
177
178def _process_text(text: str) -> str:
179 """
180 Process text according to SGF Text type rules.
181
182 Args:
183 text: Raw text value from SGF
184
185 Returns:
186 str: Processed text
187 """
188 result = []
189 i = 0
190
191 while i < len(text):
192 if text[i] == '\\':
193 # Handle escape character - preserve as literal backslash
194 result.append('\\')
195 i += 1
196 if i < len(text):
197 # Keep the escaped character as-is
198 result.append(text[i])
199 i += 1
200 elif text[i] in (' ', '\t', '\r'):
201 # All whitespace characters other than newline are converted to spaces
202 result.append(' ')
203 i += 1
204 else:
205 # All other characters (including literal newlines) are kept as-is
206 result.append(text[i])
207 i += 1
208
209 return ''.join(result)
210# Handled Edge Cases: Empty input, missing parentheses, empty tree, missing semicolon, properties without delimiter, non-uppercase keys, unclosed brackets, unmatched parentheses
211# Handled Edge Cases: Key parsing, value parsing with escapes, child parsing, consecutive nodes
212# Handled Edge Cases: Escape processing, whitespace conversion, newline handling
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.