-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.ts
108 lines (94 loc) · 2.69 KB
/
main.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import { existsSync, mkdirSync } from 'fs';
import { readFile, writeFile } from 'fs/promises';
import path from 'path';
import axios, { AxiosError } from 'axios';
import { JSDOM } from 'jsdom';
import { fileURLToPath } from 'url';
import { resolve } from 'path';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
function fetchPage(url: string): Promise<string | undefined> {
const HTMLData = axios
.get(url)
.then((res) => res.data)
.catch((error: AxiosError) => {
console.error(`There was an error with ${error?.config?.url!}.`);
console.error(error.toJSON());
});
return HTMLData;
}
async function fetchFromWebOrCache(url: string, ignoreCache = false) {
// If the cache folder doesn't exist, create it
if (!existsSync(resolve(__dirname, '.cache'))) {
mkdirSync('.cache');
}
console.log(`Getting data for ${url}...`);
if (
!ignoreCache &&
existsSync(
resolve(__dirname, `.cache/${Buffer.from(url).toString('base64')}.html`),
)
) {
console.log(`I read ${url} from cache`);
const HTMLData = await readFile(
resolve(__dirname, `.cache/${Buffer.from(url).toString('base64')}.html`),
{ encoding: 'utf8' },
);
const dom = new JSDOM(HTMLData);
return dom.window.document;
} else {
console.log(`I fetched ${url} fresh`);
const HTMLData = await fetchPage(url);
if (!ignoreCache && HTMLData) {
writeFile(
resolve(
__dirname,
`.cache/${Buffer.from(url).toString('base64')}.html`,
),
HTMLData,
{ encoding: 'utf8' },
);
}
const dom = new JSDOM(HTMLData);
console.log(HTMLData);
return dom.window.document;
}
}
function extractData(document: Document) {
console.log('Extracting data...')
const writingLinks: HTMLDivElement[] = Array.from(
document.querySelectorAll('td > div'),
);
writingLinks.forEach((link) => {
const text = link.id
console.log("a:", text);
})
const returnData: any[] = [];
writingLinks.forEach((link) => {
const text = link.textContent;
if (text) {
returnData.push({
rank: text,
});
}
})
return returnData;
}
function saveData(filename: string, data: any) {
console.log(`Saving data to ${filename}.json`)
if (!existsSync(resolve(__dirname, 'data'))) {
mkdirSync('data');
}
writeFile(resolve(__dirname, `data/${filename}.json`), JSON.stringify(data), {
encoding: 'utf8',
});
}
async function getData() {
const document = await fetchFromWebOrCache(
'https://es.sudoku-online.net/get?difficulty=normal',
true,
);
const data = extractData(document);
saveData('hacker-news-links', data);
}
getData();