couple

Finished
2626
2727
2828def parse(input_string: str) -> SgfTree:
29 pass
29 """
30 Parse an SGF string and return a tree structure of properties.
31
32 Args:
33 input_string: A string containing SGF formatted data
34
35 Returns:
36 SgfTree: A tree structure representing the SGF data
37
38 Raises:
39 ValueError: If the input is invalid according to SGF format rules
40 """
41 # Edge Case: Empty input or whitespace only input
42 if not input_string or input_string.isspace():
43 raise ValueError("tree missing")
44
45 # Edge Case: Input doesn't start with '('
46 if not input_string.startswith('('):
47 raise ValueError("tree missing")
48
49 # Parse the tree starting from the first '('
50 tree, _ = _parse_tree(input_string, 1)
51 return tree
52
53
54def _parse_tree(s: str, index: int) -> tuple[SgfTree, int]:
55 """
56 Parse a tree from the SGF string starting at the given index.
57
58 Args:
59 s: The SGF string
60 index: The starting index to parse from
61
62 Returns:
63 tuple: A tuple containing the parsed SgfTree and the next index to parse
64 """
65 # Edge Case: Missing semicolon after '('
66 if index >= len(s) or s[index] != ';':
67 raise ValueError("tree with no nodes")
68
69 index += 1 # Skip the semicolon
70
71 # Parse properties for the current node
72 properties, index = _parse_properties(s, index)
73
74 # Create the current node
75 current_node = SgfTree(properties, [])
76
77 # Parse children trees (variations)
78 while index < len(s) and s[index] == '(':
79 child, index = _parse_tree(s, index + 1)
80 current_node.children.append(child)
81 # Skip the closing ')'
82 if index >= len(s) or s[index] != ')':
83 raise ValueError("tree missing")
84 index += 1
85
86 # Check for sequential nodes (single child chain)
87 if index < len(s) and s[index] == ';':
88 # This is a single child chain, parse the next node as the only child
89 child, index = _parse_tree(s, index)
90 current_node.children = [child]
91
92 return current_node, index
93
94
95def _parse_properties(s: str, index: int) -> tuple[dict, int]:
96 """
97 Parse properties from the SGF string starting at the given index.
98
99 Args:
100 s: The SGF string
101 index: The starting index to parse from
102
103 Returns:
104 tuple: A tuple containing the parsed properties dictionary and the next index to parse
105 """
106 properties = {}
107
108 # Parse all properties for the current node
109 while index < len(s) and s[index].isalpha():
110 # Parse property key
111 key_start = index
112 while index < len(s) and s[index].isalpha():
113 index += 1
114
115 key = s[key_start:index]
116
117 # Edge Case: Property key is not uppercase
118 if key != key.upper():
119 raise ValueError("property must be in uppercase")
120
121 # Check if there's an opening bracket after the key
122 if index >= len(s) or s[index] != '[':
123 raise ValueError("properties without delimiter")
124
125 values = []
126
127 # Parse all values for this key
128 while index < len(s) and s[index] == '[':
129 index += 1 # Skip the opening '['
130 value_start = index
131
132 # Parse value content, handling escapes
133 while index < len(s) and s[index] != ']':
134 if s[index] == '\\':
135 index += 1 # Skip the escape character
136 if index >= len(s): # Handle escape at end of string
137 raise ValueError("properties without delimiter")
138 index += 1
139
140 # Edge Case: Unclosed value bracket
141 if index >= len(s) or s[index] != ']':
142 raise ValueError("properties without delimiter")
143
144 value = s[value_start:index]
145 # Process the value according to SGF text type rules
146 processed_value = _process_text_value(value)
147 values.append(processed_value)
148 index += 1 # Skip the closing ']'
149
150 properties[key] = values
151
152 # Continue to parse the next property if there is one
153 # No need to check for delimiter here as properties are naturally delimited by [brackets]
154
155 return properties, index
156
157
158def _process_text_value(value: str) -> str:
159 """
160 Process a text value according to SGF Text type rules.
161
162 Args:
163 value: The raw text value from SGF
164
165 Returns:
166 str: The processed text value
167 """
168 result = []
169 i = 0
170 while i < len(value):
171 if value[i] == '\\':
172 i += 1 # Move past the escape character
173 if i < len(value):
174 # Check if next character is a newline
175 if value[i] == '\n':
176 # Newlines are removed if they come immediately after a backslash
177 pass
178 else:
179 # Any non-whitespace character after \ is inserted as-is
180 # Any whitespace character after \ is converted to space
181 if value[i].isspace():
182 result.append(' ')
183 else:
184 result.append(value[i])
185 i += 1
186 else:
187 # All whitespace characters other than newline are converted to spaces
188 if value[i] != '\n' and value[i].isspace():
189 result.append(' ')
190 else:
191 result.append(value[i])
192 i += 1
193
194 return ''.join(result)
195
196# Handled Edge Cases: Empty input, input not starting with '(', missing semicolon after '(',
197# property key not uppercase, unclosed value bracket, property with no values
Test NameStatus
test_all_lowercase_property
Pass
test_empty_input
Pass
test_escaped_backslash_in_property_value_becomes_just_a_backslash
Pass
test_escaped_closing_bracket_within_property_value_becomes_just_a_closing_bracket
Pass
test_escaped_newline_in_property_value_is_converted_to_nothing_at_all
Pass
test_escaped_t_and_n_in_property_value_are_just_letters_not_whitespace
Pass
test_escaped_tab_in_property_value_is_converted_to_space
Pass
test_mixing_various_kinds_of_whitespace_and_escaped_characters_in_property_value
Pass
test_multiple_properties
Pass
test_multiple_property_values
Pass
test_node_without_properties
Pass
test_node_without_tree
Pass
test_opening_bracket_within_property_value_doesn_t_need_to_be_escaped
Pass
test_parentheses_in_property_value_don_t_need_to_be_escaped
Pass
test_properties_without_delimiter
Pass
test_semicolon_in_property_value_doesn_t_need_to_be_escaped
Pass
test_single_node_tree
Pass
test_tree_with_no_nodes
Pass
test_two_child_trees
Pass
test_two_nodes
Pass
test_upper_and_lowercase_property
Pass
test_within_property_values_newlines_remain_as_newlines
Pass
test_within_property_values_whitespace_characters_such_as_tab_are_converted_to_spaces
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.