Skip to content

Commit

Permalink
Add user agent and window size + minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
F33RNI committed Nov 3, 2023
1 parent b41b0ae commit 1c8d131
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 12 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pip install git+https://github.com/F33RNI/LMSDownloader.git

### Usage in terminal
```
lmsdownloader [-h] -l LOGIN -p PASSWORD -link LINK_TO_DOWNLOAD -path SAVE_TO [--login-link LOGIN_LINK] [--wait-between-pages WAIT_BETWEEN_PAGES] [--link-check-regex LINK_CHECK_REGEX] [--headless] [--no-logging-init]
usage: lmsdownloader [-h] -l LOGIN -p PASSWORD -link LINK_TO_DOWNLOAD -path SAVE_TO [--login-link LOGIN_LINK]
[--wait-between-pages WAIT_BETWEEN_PAGES] [--link-check-regex LINK_CHECK_REGEX]
[--user-agent USER_AGENT] [--window-size WINDOW_SIZE] [--headless] [--no-logging-init]
options:
-h, --help show this help message and exit
Expand All @@ -54,6 +56,10 @@ options:
how long to wait after going to next page
--link-check-regex LINK_CHECK_REGEX
regex expression to check link_to_download (replace to "^" to bypass link check)
--user-agent USER_AGENT
browser's user agent to prevent mobile version
--window-size WINDOW_SIZE
browser's window size
--headless specify to open Chrome in headless mode
--no-logging-init specify to bypass logging initialization
```
Expand Down Expand Up @@ -84,6 +90,8 @@ Params:
- `login_link` – Link to LMS login page
- `wait_between_pages` – How long to wait after going to next page
- `link_check_regex` – Regex expression to check link_to_download (replace to "^" to bypass link check)
- `user_agent` - Browser's user agent to prevent mobile version
- `window_size` - Default browser's window size
- `headless` – Set True to open Chrome in headless mode

### LMSDownloader.download()
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

setup(
name="LMSDownloader",
version="1.0.1",
version="1.1.0",
license="Apache License 2.0",
author="Fern Lane (aka F3RNI)",
author_email="xxoinvizionxx@gmail.com",
Expand Down
28 changes: 23 additions & 5 deletions src/LMSDownloader/LMSDownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,21 @@
CONTENT_TYPE_SCORM_BOOK = 1
CONTENT_TYPE_H5P_PRESENTATION = 2

# Default user agent
USER_AGENT_DEFAULT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"

# Default regex to check link_to_download
LINK_CHECK_REGEX_DEFAULT = "^(http|https):\\/\\/online\\.mospolytech\\.ru\\/mod\\/(scorm|hvp)\\/view\\.php\\?id="


class LMSDownloader:
def __init__(self, lms_login: str, lms_password: str, link_to_download: str,
login_link: str = "https://online.mospolytech.ru/login/index.php",
wait_between_pages: float = 1.,
link_check_regex: str = "^(http|https):\\/\\/online\\.mospolytech\\.ru\\/mod\\/(scorm|hvp)\\/view\\.php\\?id=",
link_check_regex: str = LINK_CHECK_REGEX_DEFAULT,
user_agent: str = USER_AGENT_DEFAULT,
window_size: str = "960,1080",
headless: bool = True) -> None:
"""
Initializes LMSDownloader class (just copies fields)
Expand All @@ -67,7 +76,9 @@ def __init__(self, lms_login: str, lms_password: str, link_to_download: str,
:param link_to_download: LMS link to download
:param login_link: Link to LMS login page
:param wait_between_pages: How long to wait after going to next page
:param link_check_regex: Regex expression to check link_to_download (replace to "^" to bypass link check)
:param link_check_regex: Regex expression to check link_to_download, replace to "^" to bypass link check
:param user_agent: Browser's user agent to prevent mobile version
:param window_size: Default browser's window size
:param headless: Set True to open Chrome in headless mode
"""
self._lms_login = lms_login
Expand All @@ -76,6 +87,8 @@ def __init__(self, lms_login: str, lms_password: str, link_to_download: str,
self._login_link = login_link
self._wait_between_pages = wait_between_pages
self._link_check_regex = link_check_regex
self._user_agent = user_agent
self._window_size = window_size
self._headless = headless

self.browser = None
Expand Down Expand Up @@ -123,7 +136,7 @@ def download(self, save_to_directory: str = "") -> list[str]:

# SCORM
if self.browser.find_elements(By.ID, "scorm_object"):
# Open iframe's src
logging.info("Switching to the iframe")
iframe_src = self.browser.find_element(By.ID, "scorm_object").get_attribute("src")
if iframe_src.lower().startswith("http"):
self.browser.get(iframe_src)
Expand Down Expand Up @@ -162,6 +175,7 @@ def download(self, save_to_directory: str = "") -> list[str]:
# H5P
elif self.browser.find_elements(By.CLASS_NAME, "h5p-iframe"):
# Open iframe's src
logging.info("Switching to the iframe")
self.browser.switch_to.frame(self.browser.find_element(By.CLASS_NAME, "h5p-iframe"))

# Wait until loaded
Expand Down Expand Up @@ -333,8 +347,12 @@ def _start_browser(self) -> None:
logging.info("Starting browser{}... Please wait".format(" in headless mode" if self._headless else ""))
chrome_options = webdriver.ChromeOptions()
if self._headless:
chrome_options.add_argument("--headless=new")
# chrome_options.add_argument("--window-size=1920,1920")
chrome_options.add_argument("--headless=old")
chrome_options.add_argument(f"--user-agent={self._user_agent}")
chrome_options.add_argument(f"--window-size={self._window_size}")
# chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--ignore-ssl-errors=yes")
Expand Down
26 changes: 21 additions & 5 deletions src/LMSDownloader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,21 @@ def main():
help="regex expression to check link_to_download (replace to \"^\" to bypass link check)",
type=str,
required=False,
default="^(http|https):\\/\\/online\\.mospolytech\\.ru\\/mod\\/(scorm|hvp)\\/view\\.php\\?id="
default=LMSDownloader.LINK_CHECK_REGEX_DEFAULT
)
parser.add_argument(
"--user-agent",
help="browser's user agent to prevent mobile version",
type=str,
required=False,
default=LMSDownloader.USER_AGENT_DEFAULT
)
parser.add_argument(
"--window-size",
help="browser's window size",
type=str,
required=False,
default="960,1080"
)
parser.add_argument(
"--headless",
Expand All @@ -117,10 +131,12 @@ def main():

# Initialize class
lms_downloader = LMSDownloader.LMSDownloader(args.login, args.password, args.link_to_download,
args.login_link,
args.wait_between_pages,
args.link_check_regex,
args.headless)
login_link=args.login_link,
wait_between_pages=args.wait_between_pages,
link_check_regex=args.link_check_regex,
user_agent=args.user_agent,
window_size=args.window_size,
headless=args.headless)

# Download
try:
Expand Down

0 comments on commit 1c8d131

Please sign in to comment.