Skip to content

Commit

Permalink
Merge pull request #521 from flathunters/revert-520-revert-516-main
Browse files Browse the repository at this point in the history
Revert "Revert "Fix for wggesucht crawler to only consider the desired listings""
  • Loading branch information
codders authored Jan 24, 2024
2 parents ac0ec3e + 610b047 commit 6677f4a
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions flathunter/crawler/wggesucht.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,16 @@ def parse_expose_element_to_details(row: Tag, crawler: str) -> Optional[Dict]:


def liste_attribute_filter(element: Union[Tag, str]) -> bool:
"""Return true for elements whose 'id' attribute starts with 'liste-'"""
"""Return true for elements whose 'id' attribute starts with 'liste-'
and are not contained in the 'premium_user_extra_list' container"""
if not isinstance(element, Tag):
return False
if "id" not in element.attrs:
if not element.attrs or "id" not in element.attrs:
return False
return element.attrs["id"].startswith('liste-')
if not element.parent or not element.parent.attrs or "class" not in element.parent.attrs:
return False
return element.attrs["id"].startswith('liste-') and \
'premium_user_extra_list' not in element.parent.attrs["class"]


class WgGesucht(Crawler):
Expand All @@ -175,7 +179,6 @@ def extract_data(self, soup: BeautifulSoup):
e for e in findings
if isinstance(e, Tag) and e.has_attr('class') and not 'display-none' in e['class']
]

for row in existing_findings:
details = parse_expose_element_to_details(row, self.get_name())
if details is None:
Expand Down

0 comments on commit 6677f4a

Please sign in to comment.