From 71266a758b4c6fa8a601389f47a4bb82762afeaa Mon Sep 17 00:00:00 2001 From: Arthur Taylor Date: Tue, 17 Dec 2024 12:50:59 +0100 Subject: [PATCH] Handle flats with more than 5 rooms on immobiliare --- flathunter/crawler/immobiliare.py | 2 +- test/crawler/test_crawl_immobiliare.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/flathunter/crawler/immobiliare.py b/flathunter/crawler/immobiliare.py index 9abbb0ea..44f0c62a 100644 --- a/flathunter/crawler/immobiliare.py +++ b/flathunter/crawler/immobiliare.py @@ -55,7 +55,7 @@ def extract_data(self, soup): detail_texts = [ item.find("span").text.strip() for item in details_list ] room_counts = [ match.group(1) for text in detail_texts - if (match := re.match(r"(\d+) local[ie]", text)) is not None ] + if (match := re.match(r"(\d+)\+? local[ie]", text)) is not None ] if len(room_counts) > 0: rooms = room_counts[0] else: diff --git a/test/crawler/test_crawl_immobiliare.py b/test/crawler/test_crawl_immobiliare.py index 1707d9f2..836d3ef7 100644 --- a/test/crawler/test_crawl_immobiliare.py +++ b/test/crawler/test_crawl_immobiliare.py @@ -24,4 +24,6 @@ def test(self): self.assertTrue(entries[0]['url'].startswith( "https://www.immobiliare.it/annunci/"), u"URL should be an apartment link") for attr in ['title', 'price', 'size', 'rooms', 'address', 'image']: - self.assertIsNotNone(entries[0][attr], attr + " should be set") + self.assertIsNotNone( + entries[0][attr], attr + " should be set (" + entries[0]['url'] + ")" + )