New Minimalist Text Editor
Implemented Basecamp's Trix WYSIWYG text editor
We were using an old version of django-prose-editor, and although it looked good in theory, simple and lightweight, it didn't work well in practice, and was impossible to upgrade to the latest version.
So I ended up switching to a custom implementation of Basecamp's Trix WYSIWYG text editor who is also simple and lightweight but that has been around and well maintained for more than a decade (at least 2015).
Issues that have been fixed
- Styles didn't match the real end result
- Keyboard shortcuts we expected to work didn't actually work
- For ex. pressing
tab
ormaj+tab
to control the hierarchy of lists
- For ex. pressing
- When pasted, styles would break and it would be impossible to recover with the WYSIWYG buttons
- Sometimes they would break even without pasting, just by clicking the buttons in a particular order
After implementing Trix, I had a bad surprise, the HTML produced looks like this:
<div>
<!--block-->
First paragraph text...
<br><br>
Second paragraph text...
<br><br><br><br> <!-- Multiple Enters -->
Third paragraph text
</div>
Not only does pressing enter not create a new block, the block is not even a <p>
, but a <div>
.
That might not be a big deal for a comment section or a internal system, but for a job board, accessibility and SEO are paramount.
I seriously considered moving to another editor, but in the end, the workaround was simple enough. I had gemini-2.5-pro write the cleaning method that is called on save() and claude code implement and test it.
def _clean_trix_html(self, trix_html: str) -> str:
"""
Parses HTML content from a Trix editor and converts its
paragraph-like <div> tags into proper <p> tags.
It handles single <br> tags as line breaks within a paragraph
and double <br><br> tags as paragraph separators.
"""
if not trix_html:
return ""
soup = BeautifulSoup(trix_html, 'html.parser')
# Process each top-level div created by Trix
for div in soup.find_all('div', recursive=False):
# Heuristic: only process divs that look like paragraphs,
# i.e., they don't contain other block-level elements.
if div.find(['div', 'p', 'ul', 'ol', 'li', 'blockquote', 'pre', 'h1', 'h2']):
continue
# Check if the div is effectively empty (only whitespace and/or br tags)
div_text = div.get_text().strip()
only_br_tags = all(tag.name == 'br' for tag in div.find_all())
if not div_text and only_br_tags:
# Remove empty divs entirely
div.extract()
continue
new_paragraphs = []
current_paragraph_nodes = []
nodes = list(div.contents)
i = 0
while i < len(nodes):
node = nodes[i]
next_node = nodes[i + 1] if (i + 1) < len(nodes) else None
# Check for a paragraph break: two or more consecutive <br> tags
is_paragraph_break = (node.name == 'br' and next_node and next_node.name == 'br')
if is_paragraph_break:
# Finish the current paragraph if it has content
if any(str(n).strip() for n in current_paragraph_nodes):
p = soup.new_tag('p')
for content_node in current_paragraph_nodes:
p.append(content_node.extract())
new_paragraphs.append(p)
current_paragraph_nodes = []
# Skip all consecutive <br> tags that form this break
i += 1 # Move to the second <br>
while i < len(nodes) and nodes[i].name == 'br':
nodes[i].extract() # Remove the <br> tag
i += 1
else:
current_paragraph_nodes.append(node)
i += 1
# Add any remaining content as the last paragraph
if any(str(n).strip() for n in current_paragraph_nodes):
p = soup.new_tag('p')
for content_node in current_paragraph_nodes:
p.append(content_node.extract())
new_paragraphs.append(p)
# Replace the original div with the new paragraphs
if new_paragraphs:
div.replace_with(*new_paragraphs)
else:
# If no paragraphs were created, remove the div entirely
div.extract()
return str(soup)
And the tests
class JobPostHTMLCleaningTestCase(TestCase):
"""
Tests the _clean_trix_html method for converting Trix editor's
div-based output to semantic paragraph tags.
"""
def setUp(self):
# We only need a model instance to call the method,
# it doesn't need to be saved to the database.
self.job_post = JobPost()
def test_simple_div_is_converted_to_p(self):
"""Test a single div with no breaks becomes a single p tag."""
trix_html = "<div>Simple paragraph content.</div>"
expected_html = "<p>Simple paragraph content.</p>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_single_br_is_preserved_as_line_break(self):
"""Test that a single <br> is kept as a line break inside a <p> tag."""
trix_html = "<div>First line.<br>Second line.</div>"
# Note: BeautifulSoup will self-close the <br> tag.
expected_html = "<p>First line.<br/>Second line.</p>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_double_br_creates_new_paragraph(self):
"""Test that <br><br> correctly splits content into two <p> tags."""
trix_html = "<div>First paragraph.<br><br>Second paragraph.</div>"
expected_html = "<p>First paragraph.</p><p>Second paragraph.</p>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_multiple_brs_create_single_paragraph_break(self):
"""Test that three or more <br> tags are treated as one paragraph break."""
trix_html = "<div>Paragraph one.<br/><br/><br/><br/>Paragraph two.</div>"
expected_html = "<p>Paragraph one.</p><p>Paragraph two.</p>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_inline_formatting_is_preserved(self):
"""Test that inline tags like <strong> and <a> are preserved correctly."""
trix_html = '<div><strong>Bold text</strong> and <em>italic</em>.<br><br>Link to <a href="#">somewhere</a>.</div>'
expected_html = '<p><strong>Bold text</strong> and <em>italic</em>.</p><p>Link to <a href="#">somewhere</a>.</p>'
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_multiple_trix_divs_are_processed(self):
"""Test that the function handles multiple top-level <div> blocks."""
trix_html = "<div>First block.</div><div>Second block, line 1.<br><br>Second block, line 2.</div>"
expected_html = "<p>First block.</p><p>Second block, line 1.</p><p>Second block, line 2.</p>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_non_trix_block_elements_are_ignored(self):
"""Test that divs containing other block elements are not processed."""
trix_html = "<div><div>Nested block</div></div><blockquote>Quote</blockquote>"
# The logic should skip the outer div because it contains another block (the nested div).
# Therefore, the HTML should remain unchanged.
expected_html = "<div><div>Nested block</div></div><blockquote>Quote</blockquote>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_empty_and_whitespace_divs_produce_no_output(self):
"""Test that empty or whitespace-only divs are effectively removed."""
trix_html = "<div>First paragraph.</div><div> </div><div><br><br></div><div>Second paragraph.</div>"
expected_html = "<p>First paragraph.</p><p>Second paragraph.</p>"
cleaned_html = self.job_post._clean_trix_html(trix_html)
self.assertEqual(cleaned_html, expected_html)
def test_empty_input_returns_empty_string(self):
"""Test that empty or None input returns empty string safely."""
self.assertEqual(self.job_post._clean_trix_html(""), "")
self.assertEqual(self.job_post._clean_trix_html(None), "")
def test_complex_real_world_example(self):
"""Test with the exact example from the user's issue description."""
trix_html = ('<div>En <strong>Bodegas Pincerna</strong>, una bodega especializada en la '
'recuperación de variedades autóctonas de la D.O. León (Prieto Picudo y Albarín Blanco), '
'buscamos un/a <strong>Operario/a de Bodega y Campo</strong> para reforzar nuestro equipo.'
'<br><br>La persona seleccionada trabajará tanto en el viñedo como en la bodega, '
'participando en todas las fases del ciclo del vino y contribuyendo al crecimiento de un '
'proyecto con fuerte identidad y valores de calidad.<br><br><br><br><br>Paragraph 3</div>')
cleaned_html = self.job_post._clean_trix_html(trix_html)
# Should create three paragraphs
self.assertIn('<p>En <strong>Bodegas Pincerna</strong>', cleaned_html)
self.assertIn('<p>La persona seleccionada', cleaned_html)
self.assertIn('<p>Paragraph 3</p>', cleaned_html)
# Should not contain any div tags
self.assertNotIn('<div>', cleaned_html)
# Should not contain double br tags
self.assertNotIn('<br><br>', self.job_post._clean_trix_html(trix_html).replace('<br/><br/>', '<br><br>'))
I might have been losing clients with a WYSIWYG text editor that would sometimes break when users where typing elaborate job posts on my wine job board. So I changed to Basecamp’s Trix.
— Micael de Prado (@Micadep) August 30, 2025
It’s not super modern or anything, but it just works. pic.twitter.com/k1S7fazX0k