Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: removed extra spaces from start and end of content #35647

Merged
merged 1 commit into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions lms/djangoapps/discussion/rest_api/discussions_notifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
import re

from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, Tag
from django.conf import settings
from django.utils.text import Truncator

Expand Down Expand Up @@ -380,6 +380,30 @@ def remove_html_tags(text):
return re.sub(clean, '', text)


def strip_empty_tags(soup):
"""
Strip starting and ending empty tags from the soup object
"""
def strip_tag(element, reverse=False):
"""
Checks if element is empty and removes it
"""
if not element.get_text(strip=True):
element.extract()
return True
if isinstance(element, Tag):
child_list = element.contents[::-1] if reverse else element.contents
for child in child_list:
if not strip_tag(child):
break
return False

while soup.contents:
if not (strip_tag(soup.contents[0]) or strip_tag(soup.contents[-1], reverse=True)):
break
return soup


def clean_thread_html_body(html_body):
"""
Get post body with tags removed and limited to 500 characters
Expand All @@ -401,6 +425,9 @@ def clean_thread_html_body(html_body):
for match in html_body.find_all(tag):
match.unwrap()

if not html_body.find():
return str(html_body)

# Replace tags that are not allowed in email
tags_to_update = [
{"source": "button", "target": "span"},
Expand All @@ -412,11 +439,15 @@ def clean_thread_html_body(html_body):
for tag_dict in tags_to_update:
for source_tag in html_body.find_all(tag_dict['source']):
target_tag = html_body.new_tag(tag_dict['target'], **source_tag.attrs)
if source_tag.string:
target_tag.string = source_tag.string
source_tag.replace_with(target_tag)
if source_tag.contents:
for content in list(source_tag.contents):
target_tag.append(content)
source_tag.insert_before(target_tag)
source_tag.extract()

for tag in html_body.find_all(True):
tag.attrs = {}
tag['style'] = 'margin: 0'

html_body = strip_empty_tags(html_body)
return str(html_body)
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,23 @@ def test_button_tag_replace(self):
"""
Tests that the clean_thread_html_body function replaces the button tag with span tag
"""
# Tests for button replacement tag with text
html_body = '<button class="abc">Button</button>'
expected_output = '<span style="margin: 0">Button</span>'
result = clean_thread_html_body(html_body)
self.assertEqual(result, expected_output)

# Tests button tag replacement without text
html_body = '<p><p>abc</p><button class="abc"></button><p>abc</p></p>'
expected_output = '<p style="margin: 0"><p style="margin: 0">abc</p>'\
'<span style="margin: 0"></span><p style="margin: 0">abc</p></p>'
result = clean_thread_html_body(html_body)
self.assertEqual(result, expected_output)

def test_button_tag_removal(self):
"""
Tests button tag with no text is removed if at start or end
"""
html_body = '<button class="abc"></button>'
expected_output = '<span style="margin: 0"></span>'
expected_output = ''
result = clean_thread_html_body(html_body)
self.assertEqual(result, expected_output)

Expand All @@ -196,3 +204,11 @@ def test_attributes_removal_from_tag(self):
html_body = '<p class="abc" style="color:red" aria-disabled=true>Paragraph</p>'
result = clean_thread_html_body(html_body)
self.assertEqual(result, '<p style="margin: 0">Paragraph</p>')

def test_strip_empty_tags(self):
"""
Tests if the clean_thread_html_body function removes starting and ending empty tags
"""
html_body = '<div><p></p><p>content</p><p></p></div>'
result = clean_thread_html_body(html_body)
self.assertEqual(result, '<p style="margin: 0"><p style="margin: 0">content</p></p>')
Loading