Skip to content

Commit

Permalink
Refactored and added method for parsing attachments
Browse files Browse the repository at this point in the history
  • Loading branch information
Pseudorizer committed Jul 26, 2021
1 parent 14895f6 commit dbd90ee
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 4 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "code-with-mosh-downloader",
"version": "0.0.2",
"version": "0.0.4",
"description": "Download courses from codewithmosh.com",
"main": ".webpack/main",
"scripts": {
Expand Down Expand Up @@ -88,6 +88,7 @@
"node-html-parser": "^4.1.0",
"node-loader": "^2.0.0",
"react-is": "^17.0.2",
"sanitize-filename": "^1.6.3",
"spectron": "^15.0.0",
"style-loader": "^3.2.1",
"styled-components": "^5.3.0",
Expand Down
40 changes: 37 additions & 3 deletions src/main/pageParser.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {DownloadQueueItemType} from 'Types/types';
import {getString} from 'Main/client';
import {HTMLElement} from 'node-html-parser';
import {ITypeParser, ParsedItem} from 'MainTypes/types';
import {ITypeParser, ParsedAttachment, ParsedItem} from 'MainTypes/types';

export async function parsePageFromUrl(url: string, type: DownloadQueueItemType) {
if (!type) {
Expand Down Expand Up @@ -94,6 +94,38 @@ export class CourseParser implements ITypeParser {
}

export class VideoParser implements ITypeParser {
private static getVideo(html: HTMLElement) {
const wistiaIdElement = html.querySelector('.attachment-wistia-player');

return wistiaIdElement ? wistiaIdElement.getAttribute('data-wistia-id') : null;
}

private static getAttachments(html: HTMLElement) {
const attachmentElements = html.querySelectorAll('.lecture-attachment:not(.lecture-attachment-type-video)');

const attachments: ParsedAttachment[] = [];

attachmentElements.forEach(x => {
if (x.hasAttribute('lecture-attachment-type-text')) {
const textContainer = x.querySelector('.lecture-text-container');

attachments.push({
type: 'text',
data: textContainer.innerHTML
});
} else if (x.hasAttribute('.lecture-attachment-type-file')) {
const downloadLink = x.querySelector('a');

attachments.push({
type: 'download',
data: downloadLink.getAttribute('href')
});
}
});

return attachments;
}

async parse(html: HTMLElement) {
const lectureId = html.querySelector('#lecture_heading').getAttribute('data-lecture-id');
const videoTitle = html.querySelector('#lecture_heading').textContent.trim().replace(/(\d+)(-)(.+)/gmi, '$1 $2$3');
Expand All @@ -103,7 +135,8 @@ export class VideoParser implements ITypeParser {
const initialCourseSectionHeading = courseSection.querySelector('.section-title').textContent.trim();
const courseSectionHeading = /(.+)\s\(\d+m\)/gmi.exec(initialCourseSectionHeading)[1];

const wistiaId = html.querySelector('.attachment-wistia-player').getAttribute('data-wistia-id');
const wistiaId = VideoParser.getVideo(html);
const attachments = VideoParser.getAttachments(html);

const courseTitle = html.querySelector('.course-sidebar-head > h2').textContent;

Expand All @@ -113,7 +146,8 @@ export class VideoParser implements ITypeParser {
extraData: {
courseTitle,
courseSectionHeading,
videoTitle
videoTitle,
attachments
}
}
] as ParsedItem[];
Expand Down
5 changes: 5 additions & 0 deletions src/main/types/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ export interface ITypeParser {
parse(html: HTMLElement): Promise<ParsedItem[]>
}

export type ParsedAttachment = {
type: 'text' | 'download',
data: unknown
}

export type ParsedItem = {
nextUrl: string,
nextType?: DownloadQueueItemType,
Expand Down
19 changes: 19 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6135,6 +6135,13 @@ safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.2,
resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==

sanitize-filename@^1.6.3:
version "1.6.3"
resolved "https://registry.yarnpkg.com/sanitize-filename/-/sanitize-filename-1.6.3.tgz#755ebd752045931977e30b2025d340d7c9090378"
integrity sha512-y/52Mcy7aw3gRm7IrcGDFx/bCk4AhRh2eI9luHOQM86nZsqwiRkkq2GekHXBBD+SmPidc8i2PqtYZl+pWJ8Oeg==
dependencies:
truncate-utf8-bytes "^1.0.0"

sax@^1.2.4:
version "1.2.4"
resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9"
Expand Down Expand Up @@ -6886,6 +6893,13 @@ trim-repeated@^1.0.0:
dependencies:
escape-string-regexp "^1.0.2"

truncate-utf8-bytes@^1.0.0:
version "1.0.2"
resolved "https://registry.yarnpkg.com/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz#405923909592d56f78a5818434b0b78489ca5f2b"
integrity sha1-QFkjkJWS1W94pYGENLC3hInKXys=
dependencies:
utf8-byte-length "^1.0.1"

ts-loader@^9.2.2:
version "9.2.3"
resolved "https://registry.yarnpkg.com/ts-loader/-/ts-loader-9.2.3.tgz#dc3b6362a4d4382493cd4f138d345f419656de68"
Expand Down Expand Up @@ -7078,6 +7092,11 @@ username@^5.1.0:
execa "^1.0.0"
mem "^4.3.0"

utf8-byte-length@^1.0.1:
version "1.0.4"
resolved "https://registry.yarnpkg.com/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz#f45f150c4c66eee968186505ab93fcbb8ad6bf61"
integrity sha1-9F8VDExm7uloGGUFq5P8u4rWv2E=

util-deprecate@^1.0.1, util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
Expand Down

0 comments on commit dbd90ee

Please sign in to comment.