v1.2 - fix html conent display issue
This commit is contained in:
parent
c3065028a3
commit
ab3bf98fa7
|
@ -36,6 +36,24 @@ def format_date(date_str):
|
||||||
logger.error(f"Error formatting date: {e}")
|
logger.error(f"Error formatting date: {e}")
|
||||||
return 'NoDate'
|
return 'NoDate'
|
||||||
|
|
||||||
|
def extract_email_content(email):
|
||||||
|
"""Extract the email content, prioritizing text/plain over text/html."""
|
||||||
|
if email.is_multipart():
|
||||||
|
for part in email.walk():
|
||||||
|
content_type = part.get_content_type()
|
||||||
|
disposition = str(part.get('Content-Disposition'))
|
||||||
|
if content_type == 'text/plain' and 'attachment' not in disposition:
|
||||||
|
return part.get_payload(decode=True).decode(errors='ignore')
|
||||||
|
elif content_type == 'text/html' and 'attachment' not in disposition:
|
||||||
|
return markdownify(part.get_payload(decode=True).decode(errors='ignore'))
|
||||||
|
else:
|
||||||
|
content_type = email.get_content_type()
|
||||||
|
if content_type == 'text/plain':
|
||||||
|
return email.get_payload(decode=True).decode(errors='ignore')
|
||||||
|
elif content_type == 'text/html':
|
||||||
|
return markdownify(email.get_payload(decode=True).decode(errors='ignore'))
|
||||||
|
return "No content available"
|
||||||
|
|
||||||
def save_email_as_markdown(email, index, output_subdir):
|
def save_email_as_markdown(email, index, output_subdir):
|
||||||
logger.info(f"Starting to process email {index + 1}")
|
logger.info(f"Starting to process email {index + 1}")
|
||||||
try:
|
try:
|
||||||
|
@ -52,16 +70,8 @@ def save_email_as_markdown(email, index, output_subdir):
|
||||||
filename = f"{sanitized_subject} - {sanitized_sender} - {sanitized_recipients} - {formatted_date}.md"
|
filename = f"{sanitized_subject} - {sanitized_sender} - {sanitized_recipients} - {formatted_date}.md"
|
||||||
filename = os.path.join(output_subdir, filename)
|
filename = os.path.join(output_subdir, filename)
|
||||||
|
|
||||||
# Handle potential None payload
|
# Extract email content
|
||||||
payload = email.get_payload(decode=True)
|
body_markdown = extract_email_content(email)
|
||||||
if payload is None:
|
|
||||||
body_markdown = "No content available"
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
body = payload.decode(errors='ignore')
|
|
||||||
body_markdown = markdownify(body)
|
|
||||||
except (UnicodeDecodeError, AttributeError) as e:
|
|
||||||
body_markdown = f"Error decoding content: {e}"
|
|
||||||
|
|
||||||
# Create a Markdown file for each email
|
# Create a Markdown file for each email
|
||||||
with open(filename, 'w', encoding='utf-8') as file:
|
with open(filename, 'w', encoding='utf-8') as file:
|
||||||
|
|
Loading…
Reference in New Issue