New Minimalist Text Editor (EmpleoVino)

We were using an old version of django-prose-editor, and although it looked good in theory, simple and lightweight, it didn't work well in practice, and was impossible to upgrade to the latest version.

So I ended up switching to a custom implementation of Basecamp's Trix WYSIWYG text editor who is also simple and lightweight but that has been around and well maintained for more than a decade (at least 2015).

Issues that have been fixed

Styles didn't match the real end result
Keyboard shortcuts we expected to work didn't actually work
- For ex. pressing tab or maj+tab to control the hierarchy of lists
When pasted, styles would break and it would be impossible to recover with the WYSIWYG buttons
Sometimes they would break even without pasting, just by clicking the buttons in a particular order

After implementing Trix, I had a bad surprise, the HTML produced looks like this:

<div>
  <!--block-->
  First paragraph text...
  <br><br>
  Second paragraph text...
  <br><br><br><br> <!-- Multiple Enters -->
  Third paragraph text
</div>

Not only does pressing enter not create a new block, the block is not even a <p>, but a <div>.

That might not be a big deal for a comment section or a internal system, but for a job board, accessibility and SEO are paramount.

I seriously considered moving to another editor, but in the end, the workaround was simple enough. I had gemini-2.5-pro write the cleaning method that is called on save() and claude code implement and test it.

    def _clean_trix_html(self, trix_html: str) -> str:
        """
        Parses HTML content from a Trix editor and converts its
        paragraph-like <div> tags into proper <p> tags.

        It handles single <br> tags as line breaks within a paragraph
        and double <br><br> tags as paragraph separators.
        """
        if not trix_html:
            return ""

        soup = BeautifulSoup(trix_html, 'html.parser')

        # Process each top-level div created by Trix
        for div in soup.find_all('div', recursive=False):
            # Heuristic: only process divs that look like paragraphs,
            # i.e., they don't contain other block-level elements.
            if div.find(['div', 'p', 'ul', 'ol', 'li', 'blockquote', 'pre', 'h1', 'h2']):
                continue

            # Check if the div is effectively empty (only whitespace and/or br tags)
            div_text = div.get_text().strip()
            only_br_tags = all(tag.name == 'br' for tag in div.find_all())
            if not div_text and only_br_tags:
                # Remove empty divs entirely
                div.extract()
                continue

            new_paragraphs = []
            current_paragraph_nodes = []
            nodes = list(div.contents)
            i = 0
            while i < len(nodes):
                node = nodes[i]
                next_node = nodes[i + 1] if (i + 1) < len(nodes) else None

                # Check for a paragraph break: two or more consecutive <br> tags
                is_paragraph_break = (node.name == 'br' and next_node and next_node.name == 'br')

                if is_paragraph_break:
                    # Finish the current paragraph if it has content
                    if any(str(n).strip() for n in current_paragraph_nodes):
                        p = soup.new_tag('p')
                        for content_node in current_paragraph_nodes:
                            p.append(content_node.extract())
                        new_paragraphs.append(p)
                    current_paragraph_nodes = []

                    # Skip all consecutive <br> tags that form this break
                    i += 1  # Move to the second <br>
                    while i < len(nodes) and nodes[i].name == 'br':
                        nodes[i].extract()  # Remove the <br> tag
                        i += 1
                else:
                    current_paragraph_nodes.append(node)
                    i += 1

            # Add any remaining content as the last paragraph
            if any(str(n).strip() for n in current_paragraph_nodes):
                p = soup.new_tag('p')
                for content_node in current_paragraph_nodes:
                    p.append(content_node.extract())
                new_paragraphs.append(p)

            # Replace the original div with the new paragraphs
            if new_paragraphs:
                div.replace_with(*new_paragraphs)
            else:
                # If no paragraphs were created, remove the div entirely
                div.extract()

        return str(soup)

And the tests

class JobPostHTMLCleaningTestCase(TestCase):
    """
    Tests the _clean_trix_html method for converting Trix editor's
    div-based output to semantic paragraph tags.
    """

    def setUp(self):
        # We only need a model instance to call the method,
        # it doesn't need to be saved to the database.
        self.job_post = JobPost()

    def test_simple_div_is_converted_to_p(self):
        """Test a single div with no breaks becomes a single p tag."""
        trix_html = "<div>Simple paragraph content.</div>"
        expected_html = "<p>Simple paragraph content.</p>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_single_br_is_preserved_as_line_break(self):
        """Test that a single <br> is kept as a line break inside a <p> tag."""
        trix_html = "<div>First line.<br>Second line.</div>"
        # Note: BeautifulSoup will self-close the <br> tag.
        expected_html = "<p>First line.<br/>Second line.</p>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_double_br_creates_new_paragraph(self):
        """Test that <br><br> correctly splits content into two <p> tags."""
        trix_html = "<div>First paragraph.<br><br>Second paragraph.</div>"
        expected_html = "<p>First paragraph.</p><p>Second paragraph.</p>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_multiple_brs_create_single_paragraph_break(self):
        """Test that three or more <br> tags are treated as one paragraph break."""
        trix_html = "<div>Paragraph one.<br/><br/><br/><br/>Paragraph two.</div>"
        expected_html = "<p>Paragraph one.</p><p>Paragraph two.</p>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_inline_formatting_is_preserved(self):
        """Test that inline tags like <strong> and <a> are preserved correctly."""
        trix_html = '<div><strong>Bold text</strong> and <em>italic</em>.<br><br>Link to <a href="#">somewhere</a>.</div>'
        expected_html = '<p><strong>Bold text</strong> and <em>italic</em>.</p><p>Link to <a href="#">somewhere</a>.</p>'
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_multiple_trix_divs_are_processed(self):
        """Test that the function handles multiple top-level <div> blocks."""
        trix_html = "<div>First block.</div><div>Second block, line 1.<br><br>Second block, line 2.</div>"
        expected_html = "<p>First block.</p><p>Second block, line 1.</p><p>Second block, line 2.</p>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_non_trix_block_elements_are_ignored(self):
        """Test that divs containing other block elements are not processed."""
        trix_html = "<div><div>Nested block</div></div><blockquote>Quote</blockquote>"
        # The logic should skip the outer div because it contains another block (the nested div).
        # Therefore, the HTML should remain unchanged.
        expected_html = "<div><div>Nested block</div></div><blockquote>Quote</blockquote>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_empty_and_whitespace_divs_produce_no_output(self):
        """Test that empty or whitespace-only divs are effectively removed."""
        trix_html = "<div>First paragraph.</div><div>    </div><div><br><br></div><div>Second paragraph.</div>"
        expected_html = "<p>First paragraph.</p><p>Second paragraph.</p>"
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        self.assertEqual(cleaned_html, expected_html)

    def test_empty_input_returns_empty_string(self):
        """Test that empty or None input returns empty string safely."""
        self.assertEqual(self.job_post._clean_trix_html(""), "")
        self.assertEqual(self.job_post._clean_trix_html(None), "")

    def test_complex_real_world_example(self):
        """Test with the exact example from the user's issue description."""
        trix_html = ('<div>En <strong>Bodegas Pincerna</strong>, una bodega especializada en la '
                    'recuperación de variedades autóctonas de la D.O. León (Prieto Picudo y Albarín Blanco), '
                    'buscamos un/a <strong>Operario/a de Bodega y Campo</strong> para reforzar nuestro equipo.'
                    '<br><br>La persona seleccionada trabajará tanto en el viñedo como en la bodega, '
                    'participando en todas las fases del ciclo del vino y contribuyendo al crecimiento de un '
                    'proyecto con fuerte identidad y valores de calidad.<br><br><br><br><br>Paragraph 3</div>')
        
        cleaned_html = self.job_post._clean_trix_html(trix_html)
        
        # Should create three paragraphs
        self.assertIn('<p>En <strong>Bodegas Pincerna</strong>', cleaned_html)
        self.assertIn('<p>La persona seleccionada', cleaned_html)
        self.assertIn('<p>Paragraph 3</p>', cleaned_html)
        
        # Should not contain any div tags
        self.assertNotIn('<div>', cleaned_html)
        
        # Should not contain double br tags
        self.assertNotIn('<br><br>', self.job_post._clean_trix_html(trix_html).replace('<br/><br/>', '<br><br>'))

I might have been losing clients with a WYSIWYG text editor that would sometimes break when users where typing elaborate job posts on my wine job board. So I changed to Basecamp’s Trix.

It’s not super modern or anything, but it just works. pic.twitter.com/k1S7fazX0k
— Micael de Prado (@Micadep) August 30, 2025

Issues that have been fixed

Recent Changes