Skip to content

Commit

Permalink
Handle restricted access documents gracefully
Browse files Browse the repository at this point in the history
  • Loading branch information
ruben-quilez committed Mar 6, 2024
1 parent 4b9957f commit 639af7e
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/etls/boja/scrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def download_document(self, url: str) -> BOJAMetadataDocument:
texto_completo = ""
soup = HTTPRequester.get_soup(url)
try:
acceso_restringido = soup.find('h1', class_='title', string='Texto de acceso restringido')
if acceso_restringido:
return None
cuerpo = soup.find(id="cuerpo", class_="grid_11 contenidos_nivel3 boja_disposicion")
cabecera = soup.find(class_="punteado_izquierda cabecera_detalle_disposicion")
if not cabecera or not cuerpo:
Expand Down

0 comments on commit 639af7e

Please sign in to comment.