diff --git a/.gitignore b/.gitignore index bf21eaf..5a67457 100644 --- a/.gitignore +++ b/.gitignore @@ -76,4 +76,5 @@ typings/ # FuseBox cache .fusebox/ -out/ \ No newline at end of file +out/ +data.csv \ No newline at end of file diff --git a/index.ts b/index.ts index 9a1e2dd..b9d12a5 100644 --- a/index.ts +++ b/index.ts @@ -1,35 +1,147 @@ "use strict"; -const SITE_URL = "https://trendogate.com/"; -const NUMBER_OF_DAYS = 100; + +import { addDays } from 'date-fns'; +import cheerio from 'cheerio'; +import axios from 'axios'; +import fs from 'fs'; + +const BASE_URL = "https://trendogate.com/"; +const CSV_PATH = "./data.csv"; const USA_ID = 23424977; -// https://trendogate.com/placebydate/23424977/2015-04-01 +const NUMBER_OF_DAYS = 100; +const WAIT = 5000; // in ms -// Topic | Date | Position +// e.g https://trendogate.com/placebydate/23424977/2015-04-01 +// Topic | Position | Date -// SO: https://stackoverflow.com/questions/4345045/javascript-loop-between-date-ranges/14655646 const now: Date = new Date(); let tmp : Date = now; let days: Array = []; -for (let i = 0; i < 100; i++) { - tmp = new Date(tmp.getDate() - 1); +for (let i = 0; i < NUMBER_OF_DAYS; i++) { + tmp = addDays(tmp, -1); days.push(tmp); } - - -function getNewDateURL(date: Date): String { - let year: Number = date.getFullYear(); - let month: Number = date.getMonth(); - let day: Number = date.getDay(); - let monthFmt: String; - let dayFmt: String; - - if (month < 10) monthFmt = `0${month}`; - else monthFmt = `${month}`; +async function scrapeData() { + const csv = new CSVManager(CSV_PATH); + csv.createWriteStream(); - if (day < 10) dayFmt = `0${day}`; - else dayFmt = `${day}`; + for (let i = 0; i < days.length; i++) { + const url = getNewDateUrl(days[i]); + axios.get(url).then((res: any) => { - return `${SITE_URL}/placebydate/${USA_ID}/${year}-${monthFmt}-${dayFmt}`; -} \ No newline at end of file + if (res.status === 200) { + let trends: Array = handleHttpResponse(res.data, days[i]); + process.stdout.write(`URL: ${url}\n`); + writeToCsv(trends, csv); + + } + else process.stderr.write(`Server returned with a Status of: ${res.status}\n`); + }); + + await sleep(WAIT); + } +} + +function handleHttpResponse(html: string, day: Date): Array { + let trends: Array = []; + const $ = cheerio.load(html); + + $('div.panel > ul.list-group').children().each((i, child) => { + let term = child.firstChild.firstChild.data; + trends.push(new Trend(i + 1, term, day)); + }); + + return trends; +} + +// SO: https://stackoverflow.com/questions/14249506/how-can-i-wait-in-node-js-javascript-l-need-to-pause-for-a-period-of-time +function sleep(ms: number) { + return new Promise(res => { + setTimeout(res, ms); + }); +} + +function getNewDateUrl(date: Date): string { + const year: number = date.getFullYear(); + const month: number = date.getMonth(); + const day: number = date.getDay(); + let monthStr: string; + let dayStr: string; + + if (month < 10) monthStr = `0${month}`; + else monthStr = `${month}`; + + if (day < 10) dayStr = `0${day}`; + else dayStr = `${day}`; + + return `${BASE_URL}/placebydate/${USA_ID}/${year}-${monthStr}-${dayStr}`; +} + +function writeToCsv(trends: Array, csv: CSVManager) { + for (let i = 0; i < trends.length; i++) { + csv.write(trends[i].toCsv()); + } +} + +class CSVManager { + private stream: fs.WriteStream = null; + private path: string; + + constructor(path: string) { + this.path = path; + } + + public write(row: string) { + if (this.stream !== null) { + this.stream.write(row, err => { if (err) throw err; }); + } else process.stderr.write(`Unable to write to "${this.path}". Stream does not exist.`); + } + + public createWriteStream() { + if (fs.existsSync(this.path)) fs.closeSync(fs.openSync(this.path, "w")); + this.stream = fs.createWriteStream(this.path, { flags: "a" }); + } + + public closeWriteStream() { + this.stream.end(); + this.stream = null; + } + + public getPath() { + return this.path; + } +} + + +class Trend { + private name: string; + private date: Date; + private ranking: number; + + constructor(ranking: number, name: string, date: Date) { + this.name = name; + this.date = date; + this.ranking = ranking; + } + + public getName() { + return this.name; + } + + public getDate() { + return this.date; + } + + public getRanking() { + return this.ranking; + } + + public toCsv() { + return `${this.name}, ${this.ranking}, ${this.date}\n`; + } +} + + +scrapeData(); \ No newline at end of file diff --git a/package.json b/package.json index 4349a27..7ea51d5 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,17 @@ "main": "index.js", "repository": "ssh://gitea@git.paoda.moe:31059/paoda/trendogate_scraper.git", "author": "paoda ", + "scripts": { + "start": "node ./out/index.js" + }, "license": "MIT", - "private": true + "private": true, + "dependencies": { + "axios": "^0.19.2", + "cheerio": "^1.0.0-rc.3", + "date-fns": "^2.10.0" + }, + "devDependencies": { + "@types/cheerio": "^0.22.16" + } } diff --git a/tsconfig.json b/tsconfig.json index 7f3508a..3f75783 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,7 +3,8 @@ "target": "ES5", "module": "CommonJS", "outDir": "out", - "sourceMap": true + "sourceMap": true, + "esModuleInterop": true }, "compileOnSave": true } \ No newline at end of file diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..1547648 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,199 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@types/cheerio@^0.22.16": + version "0.22.16" + resolved "https://registry.yarnpkg.com/@types/cheerio/-/cheerio-0.22.16.tgz#c748a97b8a6f781b04bbda4a552e11b35bcc77e4" + integrity sha512-bSbnU/D4yzFdzLpp3+rcDj0aQQMIRUBNJU7azPxdqMpnexjUSvGJyDuOBQBHeOZh1mMKgsJm6Dy+LLh80Ew4tQ== + dependencies: + "@types/node" "*" + +"@types/node@*": + version "13.7.7" + resolved "https://registry.yarnpkg.com/@types/node/-/node-13.7.7.tgz#1628e6461ba8cc9b53196dfeaeec7b07fa6eea99" + integrity sha512-Uo4chgKbnPNlxQwoFmYIwctkQVkMMmsAoGGU4JKwLuvBefF0pCq4FybNSnfkfRCpC7ZW7kttcC/TrRtAJsvGtg== + +axios@^0.19.2: + version "0.19.2" + resolved "https://registry.yarnpkg.com/axios/-/axios-0.19.2.tgz#3ea36c5d8818d0d5f8a8a97a6d36b86cdc00cb27" + integrity sha512-fjgm5MvRHLhx+osE2xoekY70AhARk3a6hkN+3Io1jc00jtquGvxYlKlsFUhmUET0V5te6CcZI7lcv2Ym61mjHA== + dependencies: + follow-redirects "1.5.10" + +boolbase@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" + integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24= + +cheerio@^1.0.0-rc.3: + version "1.0.0-rc.3" + resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.3.tgz#094636d425b2e9c0f4eb91a46c05630c9a1a8bf6" + integrity sha512-0td5ijfUPuubwLUu0OBoe98gZj8C/AA+RW3v67GPlGOrvxWjZmBXiBCRU+I8VEiNyJzjth40POfHiz2RB3gImA== + dependencies: + css-select "~1.2.0" + dom-serializer "~0.1.1" + entities "~1.1.1" + htmlparser2 "^3.9.1" + lodash "^4.15.0" + parse5 "^3.0.1" + +css-select@~1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/css-select/-/css-select-1.2.0.tgz#2b3a110539c5355f1cd8d314623e870b121ec858" + integrity sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg= + dependencies: + boolbase "~1.0.0" + css-what "2.1" + domutils "1.5.1" + nth-check "~1.0.1" + +css-what@2.1: + version "2.1.3" + resolved "https://registry.yarnpkg.com/css-what/-/css-what-2.1.3.tgz#a6d7604573365fe74686c3f311c56513d88285f2" + integrity sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg== + +date-fns@^2.10.0: + version "2.10.0" + resolved "https://registry.yarnpkg.com/date-fns/-/date-fns-2.10.0.tgz#abd10604d8bafb0bcbd2ba2e9b0563b922ae4b6b" + integrity sha512-EhfEKevYGWhWlZbNeplfhIU/+N+x0iCIx7VzKlXma2EdQyznVlZhCptXUY+BegNpPW2kjdx15Rvq503YcXXrcA== + +debug@=3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/debug/-/debug-3.1.0.tgz#5bb5a0672628b64149566ba16819e61518c67261" + integrity sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g== + dependencies: + ms "2.0.0" + +dom-serializer@0: + version "0.2.2" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.2.2.tgz#1afb81f533717175d478655debc5e332d9f9bb51" + integrity sha512-2/xPb3ORsQ42nHYiSunXkDjPLBaEj/xTwUO4B7XCZQTRk7EBtTOPaygh10YAAh2OI1Qrp6NWfpAhzswj0ydt9g== + dependencies: + domelementtype "^2.0.1" + entities "^2.0.0" + +dom-serializer@~0.1.1: + version "0.1.1" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-0.1.1.tgz#1ec4059e284babed36eec2941d4a970a189ce7c0" + integrity sha512-l0IU0pPzLWSHBcieZbpOKgkIn3ts3vAh7ZuFyXNwJxJXk/c4Gwj9xaTJwIDVQCXawWD0qb3IzMGH5rglQaO0XA== + dependencies: + domelementtype "^1.3.0" + entities "^1.1.1" + +domelementtype@1, domelementtype@^1.3.0, domelementtype@^1.3.1: + version "1.3.1" + resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-1.3.1.tgz#d048c44b37b0d10a7f2a3d5fee3f4333d790481f" + integrity sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w== + +domelementtype@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.0.1.tgz#1f8bdfe91f5a78063274e803b4bdcedf6e94f94d" + integrity sha512-5HOHUDsYZWV8FGWN0Njbr/Rn7f/eWSQi1v7+HsUVwXgn8nWWlL64zKDkS0n8ZmQ3mlWOMuXOnR+7Nx/5tMO5AQ== + +domhandler@^2.3.0: + version "2.4.2" + resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-2.4.2.tgz#8805097e933d65e85546f726d60f5eb88b44f803" + integrity sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA== + dependencies: + domelementtype "1" + +domutils@1.5.1: + version "1.5.1" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf" + integrity sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8= + dependencies: + dom-serializer "0" + domelementtype "1" + +domutils@^1.5.1: + version "1.7.0" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a" + integrity sha512-Lgd2XcJ/NjEw+7tFvfKxOzCYKZsdct5lczQ2ZaQY8Djz7pfAD3Gbp8ySJWtreII/vDlMVmxwa6pHmdxIYgttDg== + dependencies: + dom-serializer "0" + domelementtype "1" + +entities@^1.1.1, entities@~1.1.1: + version "1.1.2" + resolved "https://registry.yarnpkg.com/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56" + integrity sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w== + +entities@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-2.0.0.tgz#68d6084cab1b079767540d80e56a39b423e4abf4" + integrity sha512-D9f7V0JSRwIxlRI2mjMqufDrRDnx8p+eEOz7aUM9SuvF8gsBzra0/6tbjl1m8eQHrZlYj6PxqE00hZ1SAIKPLw== + +follow-redirects@1.5.10: + version "1.5.10" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.5.10.tgz#7b7a9f9aea2fdff36786a94ff643ed07f4ff5e2a" + integrity sha512-0V5l4Cizzvqt5D44aTXbFZz+FtyXV1vrDN6qrelxtfYQKW0KO0W2T/hkE8xvGa/540LkZlkaUjO4ailYTFtHVQ== + dependencies: + debug "=3.1.0" + +htmlparser2@^3.9.1: + version "3.10.1" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.10.1.tgz#bd679dc3f59897b6a34bb10749c855bb53a9392f" + integrity sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ== + dependencies: + domelementtype "^1.3.1" + domhandler "^2.3.0" + domutils "^1.5.1" + entities "^1.1.1" + inherits "^2.0.1" + readable-stream "^3.1.1" + +inherits@^2.0.1, inherits@^2.0.3: + version "2.0.4" + resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" + integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== + +lodash@^4.15.0: + version "4.17.15" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.15.tgz#b447f6670a0455bbfeedd11392eff330ea097548" + integrity sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A== + +ms@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8" + integrity sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g= + +nth-check@~1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-1.0.2.tgz#b2bd295c37e3dd58a3bf0700376663ba4d9cf05c" + integrity sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg== + dependencies: + boolbase "~1.0.0" + +parse5@^3.0.1: + version "3.0.3" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-3.0.3.tgz#042f792ffdd36851551cf4e9e066b3874ab45b5c" + integrity sha512-rgO9Zg5LLLkfJF9E6CCmXlSE4UVceloys8JrFqCcHloC3usd/kJCyPDwH2SOlzix2j3xaP9sUX3e8+kvkuleAA== + dependencies: + "@types/node" "*" + +readable-stream@^3.1.1: + version "3.6.0" + resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198" + integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA== + dependencies: + inherits "^2.0.3" + string_decoder "^1.1.1" + util-deprecate "^1.0.1" + +safe-buffer@~5.2.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519" + integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg== + +string_decoder@^1.1.1: + version "1.3.0" + resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e" + integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== + dependencies: + safe-buffer "~5.2.0" + +util-deprecate@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" + integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=