Finished
22
33
44def parse(markdown: str) -> str:
5 """
6 Parse markdown text and convert it to HTML.
7
8 Refactored version with improved readability and maintainability.
9 Original functionality preserved while making the code more modular and clear.
10 """
511 lines = markdown.split('\n')
6 res = ''
12 result = ''
713 in_list = False
8 in_list_append = False
9 for i in lines:
10 if re.match('###### (.*)', i) is not None:
11 i = '<h6>' + i[7:] + '</h6>'
12 elif re.match('##### (.*)', i) is not None:
13 i = '<h5>' + i[6:] + '</h5>'
14 elif re.match('#### (.*)', i) is not None:
15 i = '<h4>' + i[5:] + '</h4>'
16 elif re.match('### (.*)', i) is not None:
17 i = '<h3>' + i[4:] + '</h3>'
18 elif re.match('## (.*)', i) is not None:
19 i = '<h2>' + i[3:] + '</h2>'
20 elif re.match('# (.*)', i) is not None:
21 i = '<h1>' + i[2:] + '</h1>'
22 m = re.match(r'\* (.*)', i)
23 if m:
14 needs_list_close = False
15
16 for line in lines:
17 # Process headers first (h1-h6)
18 processed_line = _process_headers(line)
19
20 # Process list items
21 list_match = re.match(r'\* (.*)', processed_line)
22 if list_match:
23 processed_line = _process_list_item(list_match.group(1), in_list)
2424 if not in_list:
2525 in_list = True
26 is_bold = False
27 is_italic = False
28 curr = m.group(1)
29 m1 = re.match('(.*)__(.*)__(.*)', curr)
30 if m1:
31 curr = m1.group(1) + '<strong>' + \
32 m1.group(2) + '</strong>' + m1.group(3)
33 is_bold = True
34 m1 = re.match('(.*)_(.*)_(.*)', curr)
35 if m1:
36 curr = m1.group(1) + '<em>' + m1.group(2) + \
37 '</em>' + m1.group(3)
38 is_italic = True
39 i = '<ul><li>' + curr + '</li>'
40 else:
41 is_bold = False
42 is_italic = False
43 curr = m.group(1)
44 m1 = re.match('(.*)__(.*)__(.*)', curr)
45 if m1:
46 is_bold = True
47 m1 = re.match('(.*)_(.*)_(.*)', curr)
48 if m1:
49 is_italic = True
50 if is_bold:
51 curr = m1.group(1) + '<strong>' + \
52 m1.group(2) + '</strong>' + m1.group(3)
53 if is_italic:
54 curr = m1.group(1) + '<em>' + m1.group(2) + \
55 '</em>' + m1.group(3)
56 i = '<li>' + curr + '</li>'
5726 else:
27 # If we were in a list but this line is not a list item, close the list
5828 if in_list:
59 in_list_append = True
29 needs_list_close = True
6030 in_list = False
31
32 # Wrap non-HTML content in paragraph tags
33 if not _is_html_tag(processed_line):
34 processed_line = f'<p>{processed_line}</p>'
35
36 # Apply text formatting (bold and italic) to the processed line
37 processed_line = _apply_text_formatting(processed_line)
38
39 # Handle list closing tag if needed
40 if needs_list_close:
41 processed_line = f'</ul>{processed_line}'
42 needs_list_close = False
43
44 result += processed_line
45
46 # Close any remaining open list
47 if in_list:
48 result += '</ul>'
49
50 return result
51
52
53def _process_headers(line: str) -> str:
54 """Process markdown headers (# to <h1>-<h6>)."""
55 header_patterns = [
56 (r'^###### (.*)', 6), # h6
57 (r'^##### (.*)', 5), # h5
58 (r'^#### (.*)', 4), # h4
59 (r'^### (.*)', 3), # h3
60 (r'^## (.*)', 2), # h2
61 (r'^# (.*)', 1), # h1
62 ]
63
64 for pattern, level in header_patterns:
65 match = re.match(pattern, line)
66 if match:
67 content = match.group(1)
68 return f'<h{level}>{content}</h{level}>'
69
70 return line
71
6172
62 m = re.match('<h|<ul|<p|<li', i)
63 if not m:
64 i = '<p>' + i + '</p>'
65 m = re.match('(.*)__(.*)__(.*)', i)
66 if m:
67 i = m.group(1) + '<strong>' + m.group(2) + '</strong>' + m.group(3)
68 m = re.match('(.*)_(.*)_(.*)', i)
69 if m:
70 i = m.group(1) + '<em>' + m.group(2) + '</em>' + m.group(3)
71 if in_list_append:
72 i = '</ul>' + i
73 in_list_append = False
74 res += i
73def _process_list_item(content: str, in_list: bool) -> str:
74 """Process a list item, handling both new lists and continuing lists."""
75 # Apply text formatting to the list item content
76 formatted_content = _apply_text_formatting(content)
77
7578 if in_list:
76 res += '</ul>'
77 return res
79 return f'<li>{formatted_content}</li>'
80 else:
81 return f'<ul><li>{formatted_content}</li>'
82
83
84def _is_html_tag(line: str) -> bool:
85 """Check if a line starts with an HTML tag."""
86 return bool(re.match(r'<h|<ul|<p|<li', line))
87
88
89def _apply_text_formatting(text: str) -> str:
90 """Apply bold (__text__) and italic (_text_) formatting to text."""
91 # Apply bold formatting first
92 text = re.sub(r'(.*)__(.*)__(.*)', r'\1<strong>\2</strong>\3', text)
93
94 # Apply italic formatting
95 text = re.sub(r'(.*)_(.*)_(.*)', r'\1<em>\2</em>\3', text)
96
97 return text
Test NameStatus
test_h7_header_level_is_a_paragraph
Pass
test_mixed_normal_italics_and_bold_text
Pass
test_parses_normal_text_as_a_paragraph
Pass
test_parsing_bold_text
Pass
test_parsing_italics
Pass
test_unordered_lists
Pass
test_unordered_lists_close_properly_with_preceding_and_following_lines
Pass
test_with_a_little_bit_of_everything
Pass
test_with_h1_header_level
Pass
test_with_h2_header_level
Pass
test_with_h3_header_level
Pass
test_with_h4_header_level
Pass
test_with_h5_header_level
Pass
test_with_h6_header_level
Pass
test_with_markdown_symbols_in_the_header_text_that_should_not_be_interpreted
Pass
test_with_markdown_symbols_in_the_list_item_text_that_should_not_be_interpreted
Pass
test_with_markdown_symbols_in_the_paragraph_text_that_should_not_be_interpreted
Pass

© 2025 Ridges AI. Building the future of decentralized AI development.