yahoo

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Parse the tree
61 tree, _ = _parse_tree(content, 0)
62 return tree
63
64
65def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
66 """
67 Parse a tree from the content string starting at index.
68
69 Args:
70 content: The SGF content string
71 index: The starting index to parse from
72
73 Returns:
74 tuple: (SgfTree, new_index) The parsed tree and the index after parsing
75 """
76 # Edge Case: No semicolon to start node
77 if index >= len(content) or content[index] != ';':
78 raise ValueError("tree missing")
79
80 index += 1 # Skip the semicolon
81
82 # Parse properties of the current node
83 properties = {}
84 while index < len(content) and content[index] not in '();':
85 # Parse key
86 key_start = index
87 while index < len(content) and content[index].isalpha() and content[index].isupper():
88 index += 1
89
90 # Edge Case: Key is not uppercase
91 if index == key_start:
92 raise ValueError("property must be in uppercase")
93
94 key = content[key_start:index]
95
96 # Edge Case: No values for the property
97 if index >= len(content) or content[index] != '[':
98 raise ValueError("properties without delimiter")
99
100 # Parse values
101 values = []
102 while index < len(content) and content[index] == '[':
103 index += 1 # Skip '['
104 value_start = index
105
106 # Parse value, handling escapes
107 value_chars = []
108 while index < len(content) and content[index] != ']':
109 if content[index] == '\\':
110 index += 1
111 if index >= len(content):
112 raise ValueError("properties without delimiter")
113
114 char = content[index]
115 # According to SGF specification, when backslash is followed by newline,
116 # both backslash and newline are removed
117 if char == '\n':
118 # Skip both backslash and newline - they are both removed
119 pass
120 else:
121 # For other escaped characters, add the backslash and character
122 # The text processing will handle the rest
123 value_chars.append('\\')
124 value_chars.append(char)
125 else:
126 value_chars.append(content[index])
127 index += 1
128
129 # Edge Case: Unclosed bracket
130 if index >= len(content) or content[index] != ']':
131 raise ValueError("properties without delimiter")
132
133 # Process the value according to SGF text rules
134 value = _process_sgf_text(''.join(value_chars))
135 values.append(value)
136 index += 1 # Skip ']'
137
138 properties[key] = values
139
140 # Parse children
141 children = []
142
143 # Handle children in parentheses (variations)
144 while index < len(content) and content[index] == '(':
145 child, index = _parse_tree(content, index+1) # Skip '('
146
147 # Find the matching ')'
148 paren_count = 1
149 i = index
150 while i < len(content) and paren_count > 0:
151 if content[i] == '(':
152 paren_count += 1
153 elif content[i] == ')':
154 paren_count -= 1
155 i += 1
156
157 # Skip to after the closing parenthesis
158 if paren_count == 0:
159 index = i
160
161 children.append(child)
162
163 # Handle sequential nodes (without parentheses)
164 while index < len(content) and content[index] == ';':
165 child, index = _parse_tree(content, index)
166 children.append(child)
167
168 return SgfTree(properties, children), index
169
170
171def _process_sgf_text(text: str) -> str:
172 """
173 Process SGF text according to SGF specification.
174
175 Args:
176 text: Raw text from SGF property value
177
178 Returns:
179 str: Processed text
180 """
181 result = []
182 i = 0
183 while i < len(text):
184 if text[i] == '\\':
185 i += 1
186 if i >= len(text):
187 result.append('\\')
188 break
189
190 # Handle escaped characters according to SGF rules
191 char = text[i]
192 # If the character after backslash is a newline, remove both backslash and newline
193 if char == '\n':
194 # Skip the newline (it's removed)
195 pass
196 # For other whitespace characters after backslash, convert to space
197 elif char != '\n' and char.isspace():
198 result.append(' ')
199 # For non-whitespace characters after backslash, insert as-is
200 else:
201 result.append(char)
202 else:
203 # Convert whitespace (except newlines) to spaces
204 if text[i] != '\n' and text[i].isspace():
205 result.append(' ')
206 else:
207 result.append(text[i])
208 i += 1
209
210 return ''.join(result)
211
212# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, No semicolon to start node, Key is not uppercase, No values for the property, Unclosed bracket"
Test NameStatus
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.