sn62

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or doesn't start with '('
42 if not input_string or not input_string.startswith('('):
43 raise ValueError("tree missing")
44
45 # Edge Case: Doesn't end with ')'
46 if not input_string.endswith(')'):
47 raise ValueError("tree missing")
48
49 # Remove the outer parentheses
50 content = input_string[1:-1]
51
52 # Edge Case: Empty content after removing parentheses
53 if not content:
54 raise ValueError("tree with no nodes")
55
56 # Edge Case: Doesn't start with ';'
57 if not content.startswith(';'):
58 raise ValueError("tree with no nodes")
59
60 return _parse_tree(content)
61
62
63def _parse_tree(content: str) -> SgfTree:
64 """
65 Parse a tree from content string.
66
67 Args:
68 content: Content string starting with ';'
69
70 Returns:
71 SgfTree: The parsed tree
72 """
73 # Skip the initial ';'
74 index = 1
75
76 # Parse properties
77 properties = {}
78 while index < len(content) and content[index] != '(' and content[index] != ')':
79 # If we encounter a semicolon, it means we're starting a new node
80 # This current node should become the parent of the new node(s)
81 if content[index] == ';':
82 break
83
84 # Parse key
85 key_start = index
86 while index < len(content) and content[index].isalpha():
87 index += 1
88
89 # Edge Case: No key found
90 if key_start == index:
91 break
92
93 # Edge Case: Key is not all uppercase
94 key = content[key_start:index]
95 if not key.isupper():
96 raise ValueError("property must be in uppercase")
97
98 # Edge Case: Key exists but has no values
99 if index >= len(content) or content[index] != '[':
100 raise ValueError("properties without delimiter")
101
102 # Parse values
103 values = []
104 while index < len(content) and content[index] == '[':
105 index += 1 # Skip '['
106 value_start = index
107
108 # Parse value, handling escapes
109 while index < len(content) and content[index] != ']':
110 if content[index] == '\\':
111 index += 2 # Skip escape and next character
112 else:
113 index += 1
114
115 # Edge Case: Unclosed value bracket
116 if index >= len(content):
117 raise ValueError("properties without delimiter")
118
119 # Extract and process value
120 value = _process_text(content[value_start:index])
121 values.append(value)
122 index += 1 # Skip ']'
123
124 # Edge Case: No values found for key
125 if not values:
126 raise ValueError("properties without delimiter")
127
128 properties[key] = values
129
130 # Parse children
131 children = []
132
133 # First, check if we have a semicolon indicating a direct child node
134 if index < len(content) and content[index] == ';':
135 # Parse the rest of the content as a single child node
136 child_content = content[index:]
137 children.append(_parse_tree(child_content))
138 else:
139 # Parse variations in parentheses
140 while index < len(content) and content[index] == '(':
141 # Find matching parenthesis for this child
142 paren_count = 1
143 start = index
144 index += 1
145
146 while index < len(content) and paren_count > 0:
147 if content[index] == '(':
148 paren_count += 1
149 elif content[index] == ')':
150 paren_count -= 1
151 index += 1
152
153 # Edge Case: Mismatched parentheses
154 if paren_count != 0:
155 raise ValueError("tree missing")
156
157 # Parse the child tree
158 child_content = content[start+1:index-1]
159 children.append(_parse_tree(child_content))
160
161 # Edge Case: No properties and no children
162 if not properties and not children:
163 raise ValueError("tree with no nodes")
164
165 return SgfTree(properties, children)
166
167
168def _process_text(text: str) -> str:
169 """
170 Process SGF text according to SGF Text type rules.
171
172 Args:
173 text: Raw text from SGF property value
174
175 Returns:
176 str: Processed text
177 """
178 result = []
179 i = 0
180 while i < len(text):
181 if text[i] == '\\':
182 # Handle escape character
183 i += 1
184 if i < len(text):
185 # Check if next character is a newline
186 if text[i] == '\n':
187 # Newline immediately after \ is removed
188 i += 1
189 continue
190 else:
191 # Any character after \ is inserted as-is
192 result.append(text[i])
193 i += 1
194 else:
195 # Trailing backslash is just added
196 result.append('\\')
197 elif text[i] == '\n':
198 # Newline not preceded by \ is kept
199 result.append('\n')
200 i += 1
201 elif text[i] in (' ', '\t', '\r'):
202 # All whitespace other than newline becomes space
203 result.append(' ')
204 i += 1
205 else:
206 # All other characters added as-is
207 result.append(text[i])
208 i += 1
209
210 return ''.join(result)
211# Handled Edge Cases: Empty input, missing opening parenthesis, missing closing parenthesis, empty content after parentheses removal, missing semicolon, key not uppercase, missing delimiter for property values, unclosed value bracket, no values for key, mismatched parentheses
212# Handled Edge Cases: Escape character handling, newline handling, whitespace conversion
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_node_without_properties
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.