harrison

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Parse the tree
61 tree, _ = _parse_tree(content, 0)
62 return tree
63
64
65def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
66 """
67 Parse a tree from the content string starting at index.
68
69 Args:
70 content: The SGF content string
71 index: The starting index to parse from
72
73 Returns:
74 tuple: (SgfTree, next_index)
75 """
76 # Edge Case: No semicolon to start a node
77 if index >= len(content) or content[index] != ';':
78 raise ValueError("tree missing")
79
80 index += 1 # Skip the semicolon
81
82 # Parse properties of the current node
83 properties = {}
84 while index < len(content) and content[index].isupper():
85 # Parse key
86 key_start = index
87 while index < len(content) and content[index].isupper():
88 index += 1
89
90 # Edge Case: Key is not in uppercase
91 if index == key_start:
92 raise ValueError("property must be in uppercase")
93
94 key = content[key_start:index]
95
96 # Edge Case: No values for the property
97 if index >= len(content) or content[index] != '[':
98 raise ValueError("properties without delimiter")
99
100 # Parse values
101 values = []
102 while index < len(content) and content[index] == '[':
103 index += 1 # Skip '['
104 value_chars = []
105
106 # Parse value, handling escapes
107 while index < len(content):
108 if content[index] == ']':
109 # Check if this bracket is escaped
110 if value_chars and value_chars[-1] == '\\':
111 # This bracket is escaped, add it and continue
112 value_chars.append(content[index])
113 index += 1
114 else:
115 # This is the real closing bracket
116 break
117 elif content[index] == '\\':
118 # Add the escape character and the next character
119 value_chars.append(content[index])
120 index += 1
121 if index < len(content):
122 value_chars.append(content[index])
123 index += 1
124 else:
125 value_chars.append(content[index])
126 index += 1
127
128 # Edge Case: Unclosed value bracket
129 if index >= len(content) or content[index] != ']':
130 raise ValueError("properties without delimiter")
131
132 value = ''.join(value_chars)
133 # Process the value according to SGF Text type rules
134 value = _process_text_value(value)
135 values.append(value)
136 index += 1 # Skip ']'
137
138 properties[key] = values
139
140 # Check if we have a non-uppercase character where we expect a property
141 if index < len(content) and not content[index].isupper() and content[index] not in ';()':
142 # This might be a lowercase property key
143 if content[index].isalpha():
144 raise ValueError("property must be in uppercase")
145
146 # Parse children
147 children = []
148
149 # Handle sequential nodes (separated by ;)
150 if index < len(content) and content[index] == ';':
151 child, index = _parse_tree(content, index)
152 children.append(child)
153
154 # Handle variations (nodes in parentheses)
155 while index < len(content) and content[index] == '(':
156 index += 1 # Skip '('
157 child, index = _parse_tree(content, index)
158 children.append(child)
159 # Skip ')'
160 if index < len(content) and content[index] == ')':
161 index += 1
162
163 return SgfTree(properties, children), index
164
165
166def _process_text_value(value: str) -> str:
167 """
168 Process a text value according to SGF Text type rules.
169
170 Args:
171 value: The raw text value
172
173 Returns:
174 str: The processed text value
175 """
176 result = []
177 i = 0
178 while i < len(value):
179 if value[i] == '\\':
180 # Edge Case: Escape at end of string
181 if i + 1 >= len(value):
182 result.append('\\')
183 break
184
185 next_char = value[i+1]
186 # If next character is newline, remove the backslash and newline
187 if next_char == 'n':
188 i += 2 # Skip backslash and newline
189 continue
190 else:
191 # Any non-whitespace character after \ is inserted as-is
192 # Any whitespace character after \ follows the above rules
193 if next_char.isspace() and next_char != 'n':
194 result.append(' ')
195 else:
196 result.append(next_char)
197 i += 2
198 else:
199 # All whitespace characters other than newline are converted to spaces
200 if value[i].isspace() and value[i] != '\n':
201 result.append(' ')
202 else:
203 result.append(value[i])
204 i += 1
205
206 return ''.join(result)
207# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, No semicolon to start a node, Key is not in uppercase, No values for the property, Unclosed value bracket, Escape at end of string
Test NameStatus
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Fail
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Fail
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Fail
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Fail
test_upper_and_lowercase_property
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.