diff options
author | JoelHMikael <joel.h.kronqvist@gmail.com> | 2021-12-18 14:15:53 +0200 |
---|---|---|
committer | JoelHMikael <joel.h.kronqvist@gmail.com> | 2021-12-18 14:15:53 +0200 |
commit | fc48ff89c2015757ba9936795b34201156e38b9e (patch) | |
tree | 977099545772551d020f781de5dd68948f0fd4b0 | |
parent | b12ab0480ebeacbfb0d6841cb8aca218a5bf2f5a (diff) | |
download | LYLLRuoka-fc48ff89c2015757ba9936795b34201156e38b9e.tar.gz LYLLRuoka-fc48ff89c2015757ba9936795b34201156e38b9e.zip |
Food scraping
Added food scraping from the RSS & implemented showing the result on the server
-rw-r--r-- | parse.js | 5 | ||||
-rw-r--r-- | scrape.js | 64 | ||||
-rw-r--r-- | server.js | 49 |
3 files changed, 101 insertions, 17 deletions
@@ -94,6 +94,8 @@ function getToLineStartingWith(s, ss, start = 0) function findExpression(data, expr, start = 0) { + if (start == -1) + return -1; if (!(Number.isInteger(start) && (start >= 0))) throw new TypeError("Start must be a positive integer!"); while ((data.substring(start, start + expr.length) !== expr) && (start + expr.length < data.length)) @@ -105,7 +107,7 @@ function findExpression(data, expr, start = 0) function parseCluttered(s) { - return s.replaceAll(".", "").replaceAll(" ", "").toUpperCase(); + return s.replaceAll(".", "").replaceAll(" ", "").replaceAll("<", "(").replaceAll(">", ")").toUpperCase(); } function parseClasses(classData, DB) @@ -280,3 +282,4 @@ exports.indexType = getIndexType; exports.classes = parseClasses; exports.get = getShift; exports.cluttered = parseCluttered; +exports.find = findExpression; @@ -1,17 +1,67 @@ -const https = require("https"); +const https = require("https"); const parse = require("./parse.js"); const fs = require("fs"); const events = require("events"); async function urlOpen(path) { - return new Promise(resolve => + return new Promise((resolve, reject) => { let req = https.get(path, res => { res.on("data", resolve); }); - req.on("error", e => - { - console.error(e); - }); - req.end(); }); + req.on("error", e => + { + console.error(e); + }); + req.end(); } + +async function scrapeFood(url) +{ + let data = await urlOpen(url); + data = data.toString("utf-8"); + + let foodList = []; + const weekdays = ["su", "ma", "ti", "ke", "to", "pe", "la"]; + + let titleTags = ["<title>", "</title>"]; + let foodTags = ["<![CDATA[", "]]>"]; + const getSpan = (data, tags, i = 0) => + { + return [ + parse.find(data, tags[0], i) + tags[0].length, + parse.find(data, tags[1], i) + ]; + } + let mainTitle = parse.find(data, titleTags[1]) + titleTags[1].length; + let titleSpan = getSpan(data, titleTags, mainTitle); + let foodSpan = getSpan(data, foodTags); + + while ( + (titleSpan[0] !== -1) + && (titleSpan[1] !== -1) + && (foodSpan[0] !== -1) + && (foodSpan[1] !== -1) + ) + { + let title = data.substring(titleSpan[0], titleSpan[1]); + let food = data.substring(foodSpan[0], foodSpan[1]); + + let weekdayIndex = weekdays.findIndex(val => { return val === title.substring(0, 2); }); + if (weekdayIndex !== -1) + foodList[weekdayIndex] = [title, food]; + + titleSpan = getSpan(data, titleTags, foodSpan[1]); + foodSpan = getSpan(data, foodTags, titleSpan[1]); + } + + return foodList; +} + +function getFoodLink(week) +{ + return `https://eruokalista.lohja.fi/AromieMenus/FI/Default/Lohja/Koulut/Rss.aspx?Id=97f76449-f57c-4217-aede-b5f9dbf2b41e&DateMode=${week}`; +} + +exports.food = scrapeFood; +exports.link = getFoodLink; @@ -2,6 +2,7 @@ const http = require("http"); const fs = require("fs"); const url = require("url"); const parse = require("./parse.js"); +const scrape = require("./scrape.js"); async function init() @@ -15,13 +16,24 @@ async function init() }; const errorPath = "./404/index.html"; - let shiftCont = await openFile("./shifts.txt"); + // await for needed things in async + let [shiftCont, classCont, foodsThisWeek, foodsNextWeek] = await Promise.all([ + openFile("./shifts.txt"), + openFile("./classes.txt"), + scrape.food(scrape.link(1)), + scrape.food(scrape.link(2)) + ]); + + // get the food shift "database" shiftCont = shiftCont.toString("utf-8").replaceAll("\r", ""); // \r because of the \r\n newline on windows which creates problems - let classCont = await openFile("./classes.txt"); classCont = classCont.toString("utf-8").replaceAll("\r", ""); let DB = parse.build(shiftCont); parse.classes(classCont, DB); + // get the food "database" + const foods = [foodsThisWeek, foodsNextWeek]; + + // server code async function server(req, res) { let q = url.parse(req.url, true); @@ -33,21 +45,21 @@ async function init() const args = { "path": path, "query": q.query, - "db": DB + "db": DB, + "foods": foods }; + if (typeof build[path] === "function") - { data = await build[path](args); - } else - { data = await build404(errorPath, q.pathname); - } + res.write(data); res.end(); } - http.createServer(server).listen(8080); + // start server + http.createServer(server).listen(80); } @@ -68,6 +80,8 @@ async function buildMain(args) { const path = args["path"]; const query = args["query"]; + console.log(query); + const foods = args["foods"]; let index; if (typeof query.index === "string") index = parse.cluttered(query.index); @@ -79,6 +93,7 @@ async function buildMain(args) const d = new Date(); let day = d.getDay(); + const actualDay = day; day = +((day === 0) || (day === 6)) + (+(!(day === 0) && !(day === 6)) * day); if ((typeof query.day === "string") && (parseInt(query.day).toString() === query.day) && (!isNaN(parseInt(query.day))) && (parseInt(query.day) > 0) && (parseInt(query.day) < 7)) day = parseInt(query.day); @@ -118,10 +133,26 @@ async function buildMain(args) res["shift"] = "Kurssilla/opettajalla/luokalla ei ole ruokailua päivällä tai kurssia ei ole olemassa!"; // get the day - res["day"] = ["su", "ma", "ti", "ke", "to", "pe", "la"][day]; + let weekdays = ["su", "ma", "ti", "ke", "to", "pe", "la"]; + res["day"] = weekdays[day]; if (res["shift"] === "") data_string = data_string.replace('<div id="shift-result" class="float-block">', '<div id="shift-result" class="float-block" style="display: none;">'); + // get the food + let food; + food = foods[ +(day < actualDay) ][day]; // test this out more + if (food !== undefined) + { + res["food-header"] = food[0]; + res["food"] = food[1]; + } + else + { + res["food-header"] = weekdays[day]; + res["food"] = "Päivälle ei löytynyt ruokaa"; + } + res["food-header"] = `Päivän ${res["food-header"]} kouluruoka:`; + data_string = build_replace(data_string, res); return data_string; |