kale

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 if not input_string:
30 raise ValueError("tree missing")
31
32 if not input_string.startswith('(') or not input_string.endswith(')'):
33 raise ValueError("tree missing")
34
35 # Edge Case: Empty tree with just parentheses
36 if input_string == '()':
37 raise ValueError("tree with no nodes")
38
39 # Remove the outer parentheses
40 content = input_string[1:-1]
41
42 if not content:
43 raise ValueError("tree with no nodes")
44
45 return parse_tree(content)
46
47
48def parse_tree(content: str) -> SgfTree:
49 if not content:
50 raise ValueError("tree with no nodes")
51
52 # Edge Case: Malformed tree that doesn't start with a semicolon or parenthesis
53 if not content.startswith(';') and not content.startswith('('):
54 raise ValueError("tree missing")
55
56 nodes, remainder = parse_nodes(content)
57
58 # Edge Case: Extra content after parsing
59 if remainder:
60 raise ValueError("tree missing")
61
62 return nodes[0] if nodes else SgfTree()
63
64
65# Parse a sequence of nodes and return the first node in the chain
66# Parse a sequence of nodes and return the first node in the chain
67def parse_nodes(content: str) -> tuple[list[SgfTree], str]:
68 if not content:
69 return [], ""
70
71 # If this is a variation group (starts with parenthesis)
72 if content.startswith('('):
73 nodes = []
74 while content.startswith('('):
75 child_content, content = extract_parenthesized_content(content)
76 child_nodes, _ = parse_nodes(child_content[1:-1]) # Remove parentheses
77 # Only add the first node of each variation
78 if child_nodes:
79 nodes.append(child_nodes[0])
80 return nodes, content
81
82 # Parse sequential nodes (separated by semicolons)
83 nodes = []
84
85 while content.startswith(';'):
86 # Parse one node
87 node, content = parse_node(content[1:]) # Skip the semicolon
88 nodes.append(node)
89
90 # Link sequential nodes as a chain
91 # Each node (except the last) has the next node as its first child
92 for i in range(len(nodes) - 1):
93 nodes[i].children.insert(0, nodes[i + 1])
94
95 # Check for variations that belong to the first node in the sequence
96 if nodes:
97 children = []
98 while content.startswith('('):
99 child_content, content = extract_parenthesized_content(content)
100 child_nodes, _ = parse_nodes(child_content[1:-1]) # Remove parentheses
101 # Only add the first node of each variation
102 if child_nodes:
103 children.append(child_nodes[0])
104
105 # Add variations as children of the first node
106 if children:
107 nodes[0].children.extend(children)
108
109 return nodes, content
110
111
112def parse_node(content: str) -> tuple[SgfTree, str]:
113 properties = {}
114
115 while content:
116 # Skip optional semicolons that separate properties within the same node
117 if content.startswith(';'):
118 content = content[1:]
119 continue
120
121 # Check if we have a property (alphabetic characters followed by '[')
122 i = 0
123 while i < len(content) and content[i].isalpha():
124 i += 1
125
126 # If we have alphabetic characters followed by '[', it's a property
127 if i > 0 and i < len(content) and content[i] == '[':
128 # Parse a property
129 key, values, content = parse_property(content)
130
131 # Edge Case: Property key not in uppercase
132 if not key.isupper():
133 raise ValueError("property must be in uppercase")
134
135 properties[key] = values
136 else:
137 # No more properties in this node
138 # Stop at semicolon (next node), parenthesis (variation), or end of content
139 break
140
141 return SgfTree(properties), content
142
143
144def parse_property(content: str) -> tuple[str, list[str], str]:
145 # Parse the key
146 key = ""
147 i = 0
148 while i < len(content) and content[i].isalpha():
149 key += content[i]
150 i += 1
151
152 # Edge Case: Properties without delimiter (no key or no opening bracket)
153 if not key or i >= len(content) or content[i] != '[':
154 raise ValueError("properties without delimiter")
155
156 # Parse the values
157 values = []
158 i += 1 # Skip the opening bracket
159
160 while True:
161 value, i = parse_value(content, i)
162 values.append(value)
163
164 # Check if there's another value for the same key
165 if i < len(content) and content[i] == '[':
166 i += 1 # Skip the opening bracket
167 else:
168 break
169
170 return key, values, content[i:]
171
172
173def parse_value(content: str, start_index: int) -> tuple[str, int]:
174 value = ""
175 i = start_index
176
177 while i < len(content) and content[i] != ']':
178 if content[i] == '\\':
179 # Handle escape sequences
180 i += 1
181 if i < len(content):
182 # According to SGF spec, any character after \ is inserted as-is
183 # But whitespace other than newline is converted to space
184 char = content[i]
185 if char != '\n' and char.isspace():
186 value += ' '
187 else:
188 value += char
189 i += 1
190 else:
191 # Edge Case: Escape character at end of input
192 raise ValueError("properties without delimiter")
193 elif content[i] == '\n':
194 # Edge Case: Newline handling - if immediately after \, remove it
195 # But this case should be handled by the escape check above
196 # If we get here, it's a standalone newline
197 value += content[i]
198 i += 1
199 else:
200 # Convert other whitespace to spaces
201 if content[i] != '\n' and content[i].isspace():
202 value += ' '
203 else:
204 value += content[i]
205 i += 1
206
207 # Edge Case: Missing closing bracket
208 if i >= len(content) or content[i] != ']':
209 raise ValueError("properties without delimiter")
210
211 return value, i + 1 # Skip the closing bracket
212
213
214def extract_parenthesized_content(content: str) -> tuple[str, str]:
215 if not content.startswith('('):
216 raise ValueError("tree missing")
217
218 balance = 0
219 for i, char in enumerate(content):
220 if char == '(':
221 balance += 1
222 elif char == ')':
223 balance -= 1
224 if balance == 0:
225 return content[:i+1], content[i+1:]
226
227 # Edge Case: Unbalanced parentheses
228 raise ValueError("tree missing")
Test NameStatus
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Fail
test_parentheses_in_property_value_don_t_need_to_be_escaped
Fail
test_properties_without_delimiter
Fail
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Fail
test_two_nodes
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.