Skip to content

Commit

Permalink
Fixed parsing errors for forum posts that contained a copy of the sig…
Browse files Browse the repository at this point in the history
…nature separator in the signature
  • Loading branch information
Galarzaa90 committed Apr 27, 2021
1 parent 04b8f7d commit 3aa4a12
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 8 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ Changelog
Due to this library relying on external content, older versions are not guaranteed to work.
Try to always use the latest version.

.. v4.1.2
4.1.2 (2021-04-27)
==================
- Fixed parsing errors for forum posts that contained a copy of the signature separator in the signature.

.. v4.1.1
4.1.1 (2021-04-19)
Expand Down
2 changes: 2 additions & 0 deletions tests/tests_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import sys
import unittest.mock

import aiohttp
Expand Down Expand Up @@ -278,6 +279,7 @@ async def test_client_fetch_event_calendar_invalid_params(self):
await self.client.fetch_event_schedule(3)

@unittest.mock.patch("tibiapy.bazaar.AuctionDetails._parse_page_items")
@unittest.skipIf(sys.version_info < (3, 8, 0), "AsyncMock was implemented in 3.8")
async def test_client__fetch_all_pages_success(self, parse_page_items):
"""Testing internal method to fetch all pages of an auction item collection."""
paginator = tibiapy.ItemSummary(page=1, total_pages=5)
Expand Down
2 changes: 1 addition & 1 deletion tibiapy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '4.1.1'
__version__ = '4.1.2'
__author__ = 'Allan Galarza'

import logging
Expand Down
20 changes: 13 additions & 7 deletions tibiapy/forum.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,13 +1071,22 @@ def _parse_post_table(cls, post_table, offset=1):
character_info_container = post_table.find("div", attrs={"class": "PostCharacterText"})
post_author = ForumAuthor._parse_author_table(character_info_container)
content_container = post_table.find("div", attrs={"class": "PostText"})
content = content_container.encode_contents().decode()
title = None
signature = None
if signature_separator in content:
content, _ = content.split(signature_separator)
title_raw, content = content.split("<br/><br/>", 1)
emoticon = None
signature_container = post_table.find("td", attrs={"class": "ff_pagetext"})
if signature_container:
# Remove the signature's content from content container
signature_container.extract()
signature = signature_container.encode_contents().decode()
content = content_container.encode_contents().decode()
if signature_container:
# The signature separator will still be part of the content container, so we remove it
parts = content.split(signature_separator)
# This will handle the post containing another signature separator within the content
# We join back all the pieces except for the last one
content = signature_separator.join(parts[:-1])
title_raw, content = content.split("<br/><br/>", 1)
if title_raw:
title_html = bs4.BeautifulSoup(title_raw, 'lxml')
emoticon_img = title_html.find("img")
Expand All @@ -1086,9 +1095,6 @@ def _parse_post_table(cls, post_table, offset=1):
title_tag = title_html.find("b")
if title_tag:
title = title_tag.text
signature_container = post_table.find("td", attrs={"class": "ff_pagetext"})
if signature_container:
signature = signature_container.encode_contents().decode()
post_details = post_table.find('div', attrs={"class": "PostDetails"})
dates = post_dates_regex.findall(post_details.text)
edited_date = None
Expand Down

0 comments on commit 3aa4a12

Please sign in to comment.