-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdsimage_downloader.py
executable file
·123 lines (100 loc) · 4.56 KB
/
pdsimage_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File name : pdsimage_downloader.py
# Author : Podalirius (@podalirius_)
# Date created : 2 Aug 2022
# to install requirements: python3 -m pip install requests bs4 lxml
import argparse
import sys
import requests
import os
from bs4 import BeautifulSoup
from rich import progress
# I'm disabling IPv6 since pdsimage2.wr.usgs.gov does not support it
requests.packages.urllib3.util.connection.HAS_IPV6 = False
def parseArgs():
print("pdsimage downloader v1.1 - by @podalirius_\n")
parser = argparse.ArgumentParser(description="A python script to filter by filename and download PDS images.")
parser.add_argument("-u", "--url", default=None, required=True, help='URL of the PDS image archive.')
parser.add_argument("-n", "--name-contains", default='', type=str, help='Filtering only files where the name contains this string.')
parser.add_argument("-D", "--output-dir", default="."+os.path.sep+"pdsimages_downloaded"+os.path.sep, help='Output directory where the images will be stored.')
parser.add_argument("-v", "--verbose", default=False, action="store_true", help='Verbose mode. (default: False)')
return parser.parse_args()
def yesno_question(msg):
msg = msg + " (y/N) "
response = input(msg).strip().lower()
while response not in ['y', 'Y', 'n', 'N']:
print("[!] Invalid choice. Please answer 'yes' or 'no'.")
response = input(msg).strip().lower()
if response in ['y', 'yes']:
return True
elif response in ['n', 'no']:
return False
def extract_pdsdata_links(url):
"""
Gets all the possible links to files from a pdsimage archive link
:param url:
:return: dict
"""
pdsdata = {}
print("[>] Sending request ... ", end="")
sys.stdout.flush()
r = requests.get(url, timeout=60*5)
print("got response!")
soup = BeautifulSoup(r.content, 'lxml')
for a in soup.findAll('a'):
if a['href'] in ["../", "./"]:
continue
if a['href'].startswith('/'):
baseurl = '/'.join(url.split('/')[:3]) + '/'
link = baseurl + a['href']
else:
link = url + a['href']
filename = link.split('/')[-1]
extension = filename.split('.')[-1]
if filename not in pdsdata.keys():
pdsdata[filename] = {}
pdsdata[filename][extension] = link
return pdsdata
def download_file(download_dir, download_url, filename, verbose=False):
filename = download_url.split('/')[-1]
r = requests.head(download_url, allow_redirects=True)
if r.status_code == 200:
target_file = download_dir + os.path.sep + filename
with progress.Progress() as p:
progress_bar, csize = p.add_task("[cyan]Downloading %s" % filename, total=int(r.headers["Content-Length"])), 1024*16
pdb = requests.get(r.url, headers={"User-Agent": "Microsoft-Symbol-Server/10.0.10036.206"}, stream=True)
with open(target_file, "wb") as f:
for chunk in pdb.iter_content(chunk_size=csize):
f.write(chunk)
p.update(progress_bar, advance=len(chunk))
else:
print("[!] (HTTP %d) Could not find %s " % (r.status_code, download_url))
if __name__ == '__main__':
options = parseArgs()
# Parsing page
pdsdata = extract_pdsdata_links(options.url)
print("[+] Detected %d PDS image files (couple of LBL and IMG)" % len(pdsdata.keys()))
# Creating the output dir if it exists
if not os.path.exists(options.output_dir):
os.makedirs(options.output_dir, exist_ok=True)
# Filtering and downloading only what you need:
keep_for_download = {}
number_of_files = 0
for filename, filelinks in pdsdata.items():
if options.name_contains in filename:
if options.verbose:
print("[debug] Keeping '%s' for download." % filename)
keep_for_download[filename] = filelinks
number_of_files += len(filelinks.keys())
else:
if options.verbose:
print("[debug] Ignoring '%s' for download." % filename)
if yesno_question("[?] Filter on filenames containing '%s' returned %d files. Download them ?" % (options.name_contains, number_of_files)):
# Download the selected files
for filename, filelinks in keep_for_download.items():
for extension in filelinks.keys():
download_file(options.output_dir, filelinks[extension], filename)
print("[+] Downloaded %d files!" % number_of_files)
else:
print("[!] Download aborted.")