rockstar

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string in SGF format
34
35 Returns:
36 SgfTree: The parsed SGF tree
37
38 Raises:
39 ValueError: If the input is malformed or doesn't conform to SGF format
40 """
41 # Edge Case: Empty input
42 if not input_string:
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Edge Case: Input doesn't end with ')'
50 if not input_string.endswith(')'):
51 raise ValueError("tree missing")
52
53 # Remove the outer parentheses
54 content = input_string[1:-1]
55
56 # Edge Case: Empty tree with no nodes
57 if not content:
58 raise ValueError("tree with no nodes")
59
60 # Parse the tree
61 tree, _ = _parse_tree(content, 0)
62 return tree
63
64
65def _parse_tree(content: str, index: int) -> tuple[SgfTree, int]:
66 """
67 Parse a tree from the content string starting at index.
68
69 Args:
70 content: The SGF content string
71 index: The starting index to parse from
72
73 Returns:
74 tuple: A tuple containing the parsed SgfTree and the next index to parse from
75 """
76 # Edge Case: No semicolon to start a node
77 if index >= len(content) or content[index] != ';':
78 raise ValueError("tree with no nodes")
79
80 index += 1 # Skip the semicolon
81
82 # Parse properties of the current node
83 properties = {}
84 while index < len(content) and content[index].isalpha():
85 # Parse key
86 key_start = index
87 while index < len(content) and content[index].isalpha():
88 index += 1
89
90 key = content[key_start:index]
91
92 # Edge Case: Key is not in uppercase
93 if not key.isupper():
94 raise ValueError("property must be in uppercase")
95
96 # Edge Case: No values for the property
97 if index >= len(content) or content[index] != '[':
98 raise ValueError("properties without delimiter")
99
100 # Parse values
101 values = []
102 while index < len(content) and content[index] == '[':
103 index += 1 # Skip the opening bracket
104 value_start = index
105
106 # Parse value, handling escapes
107 while index < len(content):
108 if content[index] == ']':
109 # Count backslashes immediately before this bracket
110 backslash_count = 0
111 temp_index = index - 1
112 while temp_index >= value_start and content[temp_index] == '\\':
113 backslash_count += 1
114 temp_index -= 1
115
116 # If odd number of backslashes, this bracket is escaped
117 if backslash_count % 2 == 1:
118 index += 1 # Include the escaped bracket in the value
119 continue
120 else:
121 # This is an actual closing bracket
122 break
123 elif content[index] == '\\':
124 index += 1 # Skip the escape character
125 if index < len(content):
126 index += 1 # Skip the escaped character
127 else:
128 # Edge Case: Backslash at end of content
129 raise ValueError("properties without delimiter")
130 else:
131 index += 1
132
133 # Edge Case: Unclosed bracket
134 if index >= len(content) or content[index] != ']':
135 raise ValueError("properties without delimiter")
136
137 value = content[value_start:index]
138 # Process the value according to SGF text type rules
139 value = _process_text(value)
140 values.append(value)
141 index += 1 # Skip the closing bracket
142
143 properties[key] = values
144
145 # Parse children
146 children = []
147
148 # Handle immediate children in parentheses
149 while index < len(content) and content[index] == '(':
150 child, index = _parse_tree(content, index+1)
151 children.append(child)
152 # Skip the closing parenthesis
153 if index < len(content) and content[index] == ')':
154 index += 1
155
156 # Handle sequential nodes (separated by semicolons)
157 if index < len(content) and content[index] == ';':
158 # Create a child node for the sequential node
159 child, index = _parse_tree(content, index)
160 children.append(child)
161
162 return SgfTree(properties, children), index
163
164
165def _process_text(text: str) -> str:
166 """
167 Process text according to SGF Text type rules.
168
169 Args:
170 text: The raw text to process
171
172 Returns:
173 str: The processed text
174 """
175 result = []
176 i = 0
177 while i < len(text):
178 if text[i] == '\\':
179 i += 1
180 if i < len(text):
181 # If next character is newline, remove the backslash and newline
182 if text[i] == '\n':
183 i += 1 # Skip newline
184 else:
185 # Insert the next character as-is
186 result.append(text[i])
187 i += 1
188 else:
189 # Edge Case: Backslash at end of text
190 result.append('\\')
191 i += 1
192 else:
193 # Convert whitespace (except newline) to space
194 if text[i] != '\n' and text[i].isspace():
195 result.append(' ')
196 else:
197 result.append(text[i])
198 i += 1
199
200 return ''.join(result)
201
202# Handled Edge Cases: Empty input, Input doesn't start with '(', Input doesn't end with ')', Empty tree with no nodes, No semicolon to start a node, Key is not in uppercase, No values for the property, Unclosed bracket, Backslash at end of text
Test NameStatus
test_escaped_tab_in_property_value_is_converted_to_space
Fail
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.