diff --git a/index.ts b/index.ts index fbfd54c..8fea5ad 100644 --- a/index.ts +++ b/index.ts @@ -2,7 +2,7 @@ * @file Scrapes https://trendogate.com and creates a .csv of historical trending data * @author Rekai Musuka * @license MIT - * @version 2020.03.17 + * @version 2020.03.23 */ import { addDays, format } from 'date-fns'; @@ -13,7 +13,6 @@ import fs from 'fs'; const BASE_URL = "https://trendogate.com/"; const CSV_PATH = "./data.csv"; const USA_ID = 23424977; -const NUMBER_OF_DAYS = 200; const WAIT = 500; // in ms // e.g https://trendogate.com/placebydate/23424977/2015-04-01 @@ -28,32 +27,43 @@ while (goal >= tmp) { tmp = addDays(tmp, 7); } +/** + * Scrapes Data from trendogate. + * + * @return a Promise with no success value + */ async function scrapeData(): Promise { const csv = new CSVManager(CSV_PATH); - csv.createWriteStream(); + csv.createWriteStream(); // remains open until we're completely done with scraping for (let i = 0; i < days.length; i++) { - const url = getNewDateUrl(days[i]); - axios.get(url).then((res: any) => { + const url = getNewDateUrl(days[i]); // Construct a valid trendogate URL given a date object + axios.get(url).then((res: any) => { // GET the URL if (res.status === 200) { let trends: Array = handleHttpResponse(res.data, days[i]); process.stdout.write(`URL: ${url}\n`); - writeToCsv(trends, csv); + writeToCsv(trends, csv); // Write the list of trends to the spreadsheet } else process.stderr.write(`Server returned with a Status of: ${res.status}\n`); }); - await sleep(WAIT); + await sleep(WAIT); // prevents spamming the server. Decreases chances of us being IP banned. } } +/** + * Transforms trendogate HTTP Response into a list of trends from said response. + * @param html String of HTML data + * @param day A Date object representing the day the data from trendogate is from. + * @return An Array of trends + */ function handleHttpResponse(html: string, day: Date): Array { let trends: Array = []; const $ = cheerio.load(html); - $('div.panel > ul.list-group').children().each((i, child) => { + $('div.panel > ul.list-group').children().each((i, child) => { // Query for getting the elements which house the trends let term = child.firstChild.firstChild.data; trends.push(new Trend(i + 1, term, day)); @@ -69,10 +79,23 @@ function sleep(ms: number): Promise { }); } +/** + * Creates a valid trendogate URL based on a provided Date Object + * @param date The day of the trends you want to query + */ function getNewDateUrl(date: Date): string { return `${BASE_URL}/placebydate/${USA_ID}/${format(date, "yyyy-MM-dd")}`; } +/** + * Iterates over a list of trends and writes a limited amount of them to a CSV file + * + * Some Contraints: + * * Number of trends per day + * * trend must be Valid Latin Extended Characterset (in UTF-8 of course) + * @param trends List of Trends + * @param csv CSV which will be written to + */ function writeToCsv(trends: Array, csv: CSVManager): void { for (let i = 0; i < trends.length; i++) { if (i == 20) break; // Should Only Write 20 per day hopefully. @@ -86,6 +109,9 @@ function writeToCsv(trends: Array, csv: CSVManager): void { } } +/** + * Manages a single CSV File + */ class CSVManager { private stream: fs.WriteStream = null; private path: string; @@ -94,50 +120,81 @@ class CSVManager { this.path = path; } + /** + * Writes Plaintext to the Database + * @param row The line of text which will be written + */ public write(row: string): void { if (this.stream !== null) { this.stream.write(row, err => { if (err) throw err; }); } else process.stderr.write(`Unable to write to "${this.path}". Stream does not exist.`); } + /** + * Creates a write stream which must be closed later + * + * Also, method checks to see if file exists beforehand. If not, a new file is "touched". + */ public createWriteStream(): void { if (!fs.existsSync(this.path)) fs.closeSync(fs.openSync(this.path, "w")); this.stream = fs.createWriteStream(this.path, { flags: "a" }); } + /** + * Closes an open write stream + */ public closeWriteStream(): void { this.stream.end(); this.stream = null; } + /** + * Gets a Path + */ public getPath(): string { return this.path; } } +/** + * Represents a trend pulled from trendogate + */ class Trend { private name: string; private date: Date; - private ranking: number; + private ranking: number; // from 1 -> 50, a trends ranking on a specific date constructor(ranking: number, name: string, date: Date) { - this.name = name.trim(); + this.name = name.trim(); // Cuts extra whitespaece this.date = date; this.ranking = ranking; } + /** + * Gets the title of the trend + */ public getName(): string { return this.name; } + /** + * Gets the date the trend is from + */ public getDate(): Date { return this.date; } + /** + * Gets a trend's day - ranking. + */ public getRanking(): number { return this.ranking; } + /** + * Returns a string which is constructed so that it can be + * appende to a CSV File + */ public toCsv(): string { return `${this.name}, ${this.ranking}, ${format(this.date, "yyyy-MM-dd")}\n`; } diff --git a/package.json b/package.json index 32cf998..1eadb66 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "trendogate_scraper", - "version": "2020.03.17", + "version": "2020.03.23", "description": "Scrapes information from https://trendogate.com/ for CSCI 1107", "main": "index.js", "repository": "ssh://gitea@git.paoda.moe:31059/paoda/trendogate_scraper.git",