From fc48ff89c2015757ba9936795b34201156e38b9e Mon Sep 17 00:00:00 2001 From: JoelHMikael Date: Sat, 18 Dec 2021 14:15:53 +0200 Subject: Food scraping Added food scraping from the RSS & implemented showing the result on the server --- parse.js | 5 ++++- scrape.js | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- server.js | 49 +++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 101 insertions(+), 17 deletions(-) diff --git a/parse.js b/parse.js index 819b3ed..7c47423 100644 --- a/parse.js +++ b/parse.js @@ -94,6 +94,8 @@ function getToLineStartingWith(s, ss, start = 0) function findExpression(data, expr, start = 0) { + if (start == -1) + return -1; if (!(Number.isInteger(start) && (start >= 0))) throw new TypeError("Start must be a positive integer!"); while ((data.substring(start, start + expr.length) !== expr) && (start + expr.length < data.length)) @@ -105,7 +107,7 @@ function findExpression(data, expr, start = 0) function parseCluttered(s) { - return s.replaceAll(".", "").replaceAll(" ", "").toUpperCase(); + return s.replaceAll(".", "").replaceAll(" ", "").replaceAll("<", "(").replaceAll(">", ")").toUpperCase(); } function parseClasses(classData, DB) @@ -280,3 +282,4 @@ exports.indexType = getIndexType; exports.classes = parseClasses; exports.get = getShift; exports.cluttered = parseCluttered; +exports.find = findExpression; diff --git a/scrape.js b/scrape.js index 17d6169..45dd0b4 100644 --- a/scrape.js +++ b/scrape.js @@ -1,17 +1,67 @@ -const https = require("https"); +const https = require("https"); const parse = require("./parse.js"); const fs = require("fs"); const events = require("events"); async function urlOpen(path) { - return new Promise(resolve => + return new Promise((resolve, reject) => { let req = https.get(path, res => { res.on("data", resolve); }); - req.on("error", e => - { - console.error(e); - }); - req.end(); }); + req.on("error", e => + { + console.error(e); + }); + req.end(); } + +async function scrapeFood(url) +{ + let data = await urlOpen(url); + data = data.toString("utf-8"); + + let foodList = []; + const weekdays = ["su", "ma", "ti", "ke", "to", "pe", "la"]; + + let titleTags = ["", ""]; + let foodTags = [""]; + const getSpan = (data, tags, i = 0) => + { + return [ + parse.find(data, tags[0], i) + tags[0].length, + parse.find(data, tags[1], i) + ]; + } + let mainTitle = parse.find(data, titleTags[1]) + titleTags[1].length; + let titleSpan = getSpan(data, titleTags, mainTitle); + let foodSpan = getSpan(data, foodTags); + + while ( + (titleSpan[0] !== -1) + && (titleSpan[1] !== -1) + && (foodSpan[0] !== -1) + && (foodSpan[1] !== -1) + ) + { + let title = data.substring(titleSpan[0], titleSpan[1]); + let food = data.substring(foodSpan[0], foodSpan[1]); + + let weekdayIndex = weekdays.findIndex(val => { return val === title.substring(0, 2); }); + if (weekdayIndex !== -1) + foodList[weekdayIndex] = [title, food]; + + titleSpan = getSpan(data, titleTags, foodSpan[1]); + foodSpan = getSpan(data, foodTags, titleSpan[1]); + } + + return foodList; +} + +function getFoodLink(week) +{ + return `https://eruokalista.lohja.fi/AromieMenus/FI/Default/Lohja/Koulut/Rss.aspx?Id=97f76449-f57c-4217-aede-b5f9dbf2b41e&DateMode=${week}`; +} + +exports.food = scrapeFood; +exports.link = getFoodLink; diff --git a/server.js b/server.js index 662d319..ba176ef 100644 --- a/server.js +++ b/server.js @@ -2,6 +2,7 @@ const http = require("http"); const fs = require("fs"); const url = require("url"); const parse = require("./parse.js"); +const scrape = require("./scrape.js"); async function init() @@ -15,13 +16,24 @@ async function init() }; const errorPath = "./404/index.html"; - let shiftCont = await openFile("./shifts.txt"); + // await for needed things in async + let [shiftCont, classCont, foodsThisWeek, foodsNextWeek] = await Promise.all([ + openFile("./shifts.txt"), + openFile("./classes.txt"), + scrape.food(scrape.link(1)), + scrape.food(scrape.link(2)) + ]); + + // get the food shift "database" shiftCont = shiftCont.toString("utf-8").replaceAll("\r", ""); // \r because of the \r\n newline on windows which creates problems - let classCont = await openFile("./classes.txt"); classCont = classCont.toString("utf-8").replaceAll("\r", ""); let DB = parse.build(shiftCont); parse.classes(classCont, DB); + // get the food "database" + const foods = [foodsThisWeek, foodsNextWeek]; + + // server code async function server(req, res) { let q = url.parse(req.url, true); @@ -33,21 +45,21 @@ async function init() const args = { "path": path, "query": q.query, - "db": DB + "db": DB, + "foods": foods }; + if (typeof build[path] === "function") - { data = await build[path](args); - } else - { data = await build404(errorPath, q.pathname); - } + res.write(data); res.end(); } - http.createServer(server).listen(8080); + // start server + http.createServer(server).listen(80); } @@ -68,6 +80,8 @@ async function buildMain(args) { const path = args["path"]; const query = args["query"]; + console.log(query); + const foods = args["foods"]; let index; if (typeof query.index === "string") index = parse.cluttered(query.index); @@ -79,6 +93,7 @@ async function buildMain(args) const d = new Date(); let day = d.getDay(); + const actualDay = day; day = +((day === 0) || (day === 6)) + (+(!(day === 0) && !(day === 6)) * day); if ((typeof query.day === "string") && (parseInt(query.day).toString() === query.day) && (!isNaN(parseInt(query.day))) && (parseInt(query.day) > 0) && (parseInt(query.day) < 7)) day = parseInt(query.day); @@ -118,10 +133,26 @@ async function buildMain(args) res["shift"] = "Kurssilla/opettajalla/luokalla ei ole ruokailua päivällä tai kurssia ei ole olemassa!"; // get the day - res["day"] = ["su", "ma", "ti", "ke", "to", "pe", "la"][day]; + let weekdays = ["su", "ma", "ti", "ke", "to", "pe", "la"]; + res["day"] = weekdays[day]; if (res["shift"] === "") data_string = data_string.replace('
', '