141 lines
3.5 KiB
TypeScript
141 lines
3.5 KiB
TypeScript
"use strict";
|
|
|
|
import { addDays, format } from 'date-fns';
|
|
import cheerio from 'cheerio';
|
|
import axios from 'axios';
|
|
import fs from 'fs';
|
|
|
|
const BASE_URL = "https://trendogate.com/";
|
|
const CSV_PATH = "./data.csv";
|
|
const USA_ID = 23424977;
|
|
const NUMBER_OF_DAYS = 200;
|
|
const WAIT = 500; // in ms
|
|
|
|
// e.g https://trendogate.com/placebydate/23424977/2015-04-01
|
|
// Topic | Position | Date
|
|
|
|
const now: Date = new Date();
|
|
let tmp : Date = now;
|
|
let days: Array<Date> = [];
|
|
|
|
for (let i = 0; i < NUMBER_OF_DAYS; i++) {
|
|
tmp = addDays(tmp, -1);
|
|
days.push(tmp);
|
|
}
|
|
|
|
async function scrapeData() {
|
|
const csv = new CSVManager(CSV_PATH);
|
|
csv.createWriteStream();
|
|
|
|
for (let i = 0; i < days.length; i++) {
|
|
const url = getNewDateUrl(days[i]);
|
|
axios.get(url).then((res: any) => {
|
|
|
|
if (res.status === 200) {
|
|
let trends: Array<Trend> = handleHttpResponse(res.data, days[i]);
|
|
process.stdout.write(`URL: ${url}\n`);
|
|
writeToCsv(trends, csv);
|
|
|
|
}
|
|
else process.stderr.write(`Server returned with a Status of: ${res.status}\n`);
|
|
});
|
|
|
|
await sleep(WAIT);
|
|
}
|
|
}
|
|
|
|
function handleHttpResponse(html: string, day: Date): Array<Trend> {
|
|
let trends: Array<Trend> = [];
|
|
const $ = cheerio.load(html);
|
|
|
|
$('div.panel > ul.list-group').children().each((i, child) => {
|
|
let term = child.firstChild.firstChild.data;
|
|
trends.push(new Trend(i + 1, term, day));
|
|
|
|
});
|
|
|
|
return trends;
|
|
}
|
|
|
|
// SO: https://stackoverflow.com/questions/14249506/how-can-i-wait-in-node-js-javascript-l-need-to-pause-for-a-period-of-time
|
|
function sleep(ms: number) {
|
|
return new Promise(res => {
|
|
setTimeout(res, ms);
|
|
});
|
|
}
|
|
|
|
function getNewDateUrl(date: Date): string {
|
|
return `${BASE_URL}/placebydate/${USA_ID}/${format(date, "yyyy-MM-dd")}`;
|
|
}
|
|
|
|
function writeToCsv(trends: Array<Trend>, csv: CSVManager) {
|
|
for (let i = 0; i < trends.length; i++) {
|
|
if (i == 20) break; // Should Only Write 20 per day hopefully.
|
|
|
|
let trend = trends[i];
|
|
|
|
// Test if The String is Latin+ Some Other Codepoint ranges.
|
|
if (trend.getName().match(/^([A-Za-z\u00C0-\u00D6\u00D8-\u00f6\u00f8-\u00ff\s\#]*)$/i)) {
|
|
csv.write(trends[i].toCsv());
|
|
}
|
|
}
|
|
}
|
|
|
|
class CSVManager {
|
|
private stream: fs.WriteStream = null;
|
|
private path: string;
|
|
|
|
constructor(path: string) {
|
|
this.path = path;
|
|
}
|
|
|
|
public write(row: string) {
|
|
if (this.stream !== null) {
|
|
this.stream.write(row, err => { if (err) throw err; });
|
|
} else process.stderr.write(`Unable to write to "${this.path}". Stream does not exist.`);
|
|
}
|
|
|
|
public createWriteStream() {
|
|
if (!fs.existsSync(this.path)) fs.closeSync(fs.openSync(this.path, "w"));
|
|
this.stream = fs.createWriteStream(this.path, { flags: "a" });
|
|
}
|
|
|
|
public closeWriteStream() {
|
|
this.stream.end();
|
|
this.stream = null;
|
|
}
|
|
|
|
public getPath() {
|
|
return this.path;
|
|
}
|
|
}
|
|
|
|
class Trend {
|
|
private name: string;
|
|
private date: Date;
|
|
private ranking: number;
|
|
|
|
constructor(ranking: number, name: string, date: Date) {
|
|
this.name = name;
|
|
this.date = date;
|
|
this.ranking = ranking;
|
|
}
|
|
|
|
public getName() {
|
|
return this.name;
|
|
}
|
|
|
|
public getDate() {
|
|
return this.date;
|
|
}
|
|
|
|
public getRanking() {
|
|
return this.ranking;
|
|
}
|
|
|
|
public toCsv() {
|
|
return `${this.name}, ${this.ranking}, ${format(this.date, "yyyy-MM-dd")}\n`;
|
|
}
|
|
}
|
|
|
|
scrapeData(); |