Skip to content

Commit

Permalink
Merge pull request #11 from sirstudly/add_comet_support
Browse files Browse the repository at this point in the history
Adds Comet scraper support
  • Loading branch information
funkypenguin authored Oct 14, 2024
2 parents 098d07b + 18fbe89 commit e3ec7f2
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 2 deletions.
3 changes: 2 additions & 1 deletion scraper/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from scraper.services import zilean
from scraper.services import torbox
from scraper.services import mediafusion
from scraper.services import comet

#define subclass method
def __subclasses__():
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox,mediafusion]
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox,mediafusion,comet]

active = ['torrentio']
overwrite = []
Expand Down
183 changes: 183 additions & 0 deletions scraper/services/comet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# import modules
from ui.ui_print import *
import releases
import base64
import json
name = "comet"

request_timeout_sec = "60"
rate_limit_sec = "10" # minimum number of seconds between requests
manifest_json_url = "" # this is mandatory otherwise non-cached searches will fail without a valid debrid account


def request(func, *args):
try:
response = func(*args)
if hasattr(response, "status_code") and response.status_code != 200:
ui_print(f'[comet] error {str(response.status_code)}: failed response from comet. {response.content.decode("utf-8")}')
return []

except requests.exceptions.Timeout:
ui_print('[comet] error: request timed out.')
return []
except Exception as e:
ui_print('[comet] error: ' + str(e))
return []

try:
json_response = json.loads(response.content, object_hook=lambda d: SimpleNamespace(**d))
except Exception as e:
ui_print('[comet] error: unable to parse response:' + response.content.decode("utf-8") + " " + str(e))
return []
return json_response


def get(session: requests.Session, url: str) -> requests.Response:
ui_print(f"[comet] GET url: {url} ...", ui_settings.debug)
response = session.get(url, timeout=int(request_timeout_sec))
ui_print("done", ui_settings.debug)
return response


def setup(cls, new=False):
from settings import settings_list
from scraper.services import active
settings = []
for category, allsettings in settings_list:
for setting in allsettings:
if setting.cls == cls:
settings += [setting]
if settings == []:
if not cls.name in active:
active += [cls.name]
back = False
if not new:
while not back:
print("0) Back")
indices = []
for index, setting in enumerate(settings):
print(str(index + 1) + ') ' + setting.name)
indices += [str(index + 1)]
print()
if settings == []:
print("Nothing to edit!")
print()
time.sleep(3)
return
choice = input("Choose an action: ")
if choice in indices:
settings[int(choice) - 1].input()
if not cls.name in active:
active += [cls.name]
back = True
elif choice == '0':
back = True
else:
for setting in settings:
if setting.name == "Comet Scraper Parameters":
setting.setup()
if cls.name not in active:
active += [cls.name]


def scrape(query, altquery):
from scraper.services import active
if 'comet' not in active:
return []

url_search = regex.search(r"(https?:\/\/[^\/]+).*manifest.json", manifest_json_url, regex.I)
if not url_search:
ui_print('[comet] error: the scraper parameters URL is not configured correctly: ' + manifest_json_url)
return []
base_url = url_search.group(1)

if altquery == "(.*)":
altquery = query
type = ("show" if regex.search(
r'(S[0-9]|complete|S\?[0-9])', altquery, regex.I) else "movie")

if type == "show":
s = (regex.search(r'(?<=S)([0-9]+)', altquery, regex.I).group()
if regex.search(r'(?<=S)([0-9]+)', altquery, regex.I) else None)
e = (regex.search(r'(?<=E)([0-9]+)', altquery, regex.I).group()
if regex.search(r'(?<=E)([0-9]+)', altquery, regex.I) else None)
if s is None or int(s) == 0:
s = 1
if e is None or int(e) == 0:
e = 1

if regex.search(r'(tt[0-9]+)', altquery, regex.I):
imdb_id = regex.search(r'(tt[0-9]+)', altquery, regex.I).group()
else:
ui_print('[comet] error: search missing IMDB ID for query: ' + query)
return []

ui_print(f'[comet]: searching for {type}s with ID={imdb_id}', ui_settings.debug)
session = custom_session(get_rate_limit=float(rate_limit_sec), post_rate_limit=float(rate_limit_sec))
if type == 'movie':
return scrape_imdb_movie(session, base_url, _get_base64_config(), imdb_id)
return scrape_imdb_series(session, base_url, _get_base64_config(), imdb_id, s, e)


def scrape_imdb_movie(session: requests.Session, base_url: str, base64_config: str, imdb_id: str) -> list:
return collate_releases_from_response(request(get, session, f'{base_url}/{base64_config}/stream/movie/{imdb_id}.json'))


def scrape_imdb_series(session: requests.Session, base_url: str, base64_config: str, imdb_id: str, season: int = 1, episode: int = 1) -> list:
return collate_releases_from_response(request(get, session, f'{base_url}/{base64_config}/stream/series/{imdb_id}:{str(season)}:{str(episode)}.json'))


def collate_releases_from_response(response: requests.Response) -> list:
scraped_releases = []
if not hasattr(response, "streams"):
if response is not None:
ui_print('[comet] error: ' + repr(response))
return scraped_releases

ui_print(f"[comet] found {str(len(response.streams))} streams", ui_settings.debug)
for result in response.streams:

if result.title == "Invalid Comet config." or regex.search(r'(?<=Invalid )(.*)(?= account)', result.title):
ui_print(f'[comet] error: {result.title}')
return scraped_releases
elif not hasattr(result, "url"):
ui_print(f'[comet]: error: Missing URL in result {result.title}')
continue

try:
title = result.title.split("\n")[0]
infohash_pattern = regex.compile(r"(?!.*playback\/)[a-fA-F0-9]{40}")
infohash = infohash_pattern.search(result.url).group()
if not infohash:
ui_print(f'[comet]: error: infohash not found for title: {title}')
continue

size = int(result.torrentSize) / 1000000000 if hasattr(result, "torrentSize") else 0
links = ['magnet:?xt=urn:btih:' + infohash + '&dn=&tr=']
seeds = 0 # not available
source = regex.search(r'(?<=🔎 )(.*)(?=\n|$)', result.title).group() \
if regex.search(r'(?<=🔎 )(.*)(?=\n|$)', result.title) else "unknown"
scraped_releases += [releases.release(
'[comet: '+source+']', 'torrent', title, [], size, links, seeds)]
except Exception as e:
ui_print('[comet] stream parsing error: ' + str(e))
continue
return scraped_releases


# Retrieves the base64 configuration parameters from manifest_json_url
# If it isn't defined, then create a default profile without a debrid key
def _get_base64_config() -> str:

if manifest_json_url.endswith("manifest.json"):
return manifest_json_url.split("/")[-2]

return base64.b64encode(json.dumps({
"indexers": ["bitsearch","eztv","thepiratebay","therarbg","yts"],
"maxResults": 0,
"resolutions": ["All"],
"languages": ["All"],
"debridService": "realdebrid",
"debridApiKey": "",
"debridStreamProxyPassword": ""
}).encode("utf-8")).decode("utf-8")
9 changes: 8 additions & 1 deletion settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ def setup(self):
while not working:
print(self.name + ' - current value: ' + str(getattr(self.cls, self.key)))
print()
if not self.help == '':
print(self.help)
print()
if self.oauth:
device_code, user_code = self.cls.oauth()
print(self.prompt + str(user_code))
Expand Down Expand Up @@ -388,7 +391,11 @@ def get(self):
setting('Mediafusion Scraper Parameters', 'Please enter a valid Mediafusion manifest URL: ', scraper.services.mediafusion, 'manifest_json_url', entry="parameter",
help='This setting lets you control the mediafusion scraping parameters. Visit "https://mediafusion.elfhosted.com/configure" and configure your settings. Please choose "Direct Torrent (Free)" as the debrid service. Click on "Share Manifest URL" and paste it here.',
hidden=True),
]
setting('Comet Request Timeout', 'Please enter the request timeout in seconds: ', scraper.services.comet, 'request_timeout_sec', hidden=True),
setting('Comet Rate Limit', 'Please enter the minimum number of seconds between requests: ', scraper.services.comet, 'rate_limit_sec', hidden=True),
setting('Comet Scraper Parameters', 'Please enter a valid Comet manifest URL: ', scraper.services.comet, 'manifest_json_url', entry="parameter",
help='This setting lets you control the comet scraping parameters. Visit "https://comet.elfhosted.com/configure" and configure your settings. Click on "Copy Link" and paste it here.', hidden=True),
]
],
['Debrid Services', [
setting('Debrid Services', [''], debrid.services, 'active', required=True, preflight=True, entry="service",
Expand Down

0 comments on commit e3ec7f2

Please sign in to comment.