aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoelHMikael <joel.h.kronqvist@gmail.com>2021-12-18 14:15:53 +0200
committerJoelHMikael <joel.h.kronqvist@gmail.com>2021-12-18 14:15:53 +0200
commitfc48ff89c2015757ba9936795b34201156e38b9e (patch)
tree977099545772551d020f781de5dd68948f0fd4b0
parentb12ab0480ebeacbfb0d6841cb8aca218a5bf2f5a (diff)
downloadLYLLRuoka-fc48ff89c2015757ba9936795b34201156e38b9e.tar.gz
LYLLRuoka-fc48ff89c2015757ba9936795b34201156e38b9e.zip
Food scraping
Added food scraping from the RSS & implemented showing the result on the server
-rw-r--r--parse.js5
-rw-r--r--scrape.js64
-rw-r--r--server.js49
3 files changed, 101 insertions, 17 deletions
diff --git a/parse.js b/parse.js
index 819b3ed..7c47423 100644
--- a/parse.js
+++ b/parse.js
@@ -94,6 +94,8 @@ function getToLineStartingWith(s, ss, start = 0)
function findExpression(data, expr, start = 0)
{
+ if (start == -1)
+ return -1;
if (!(Number.isInteger(start) && (start >= 0)))
throw new TypeError("Start must be a positive integer!");
while ((data.substring(start, start + expr.length) !== expr) && (start + expr.length < data.length))
@@ -105,7 +107,7 @@ function findExpression(data, expr, start = 0)
function parseCluttered(s)
{
- return s.replaceAll(".", "").replaceAll(" ", "").toUpperCase();
+ return s.replaceAll(".", "").replaceAll(" ", "").replaceAll("<", "(").replaceAll(">", ")").toUpperCase();
}
function parseClasses(classData, DB)
@@ -280,3 +282,4 @@ exports.indexType = getIndexType;
exports.classes = parseClasses;
exports.get = getShift;
exports.cluttered = parseCluttered;
+exports.find = findExpression;
diff --git a/scrape.js b/scrape.js
index 17d6169..45dd0b4 100644
--- a/scrape.js
+++ b/scrape.js
@@ -1,17 +1,67 @@
-const https = require("https");
+const https = require("https"); const parse = require("./parse.js"); const fs = require("fs"); const events = require("events");
async function urlOpen(path)
{
- return new Promise(resolve =>
+ return new Promise((resolve, reject) =>
{
let req = https.get(path, res =>
{
res.on("data", resolve);
});
- req.on("error", e =>
- {
- console.error(e);
- });
- req.end();
});
+ req.on("error", e =>
+ {
+ console.error(e);
+ });
+ req.end();
}
+
+async function scrapeFood(url)
+{
+ let data = await urlOpen(url);
+ data = data.toString("utf-8");
+
+ let foodList = [];
+ const weekdays = ["su", "ma", "ti", "ke", "to", "pe", "la"];
+
+ let titleTags = ["<title>", "</title>"];
+ let foodTags = ["<![CDATA[", "]]>"];
+ const getSpan = (data, tags, i = 0) =>
+ {
+ return [
+ parse.find(data, tags[0], i) + tags[0].length,
+ parse.find(data, tags[1], i)
+ ];
+ }
+ let mainTitle = parse.find(data, titleTags[1]) + titleTags[1].length;
+ let titleSpan = getSpan(data, titleTags, mainTitle);
+ let foodSpan = getSpan(data, foodTags);
+
+ while (
+ (titleSpan[0] !== -1)
+ && (titleSpan[1] !== -1)
+ && (foodSpan[0] !== -1)
+ && (foodSpan[1] !== -1)
+ )
+ {
+ let title = data.substring(titleSpan[0], titleSpan[1]);
+ let food = data.substring(foodSpan[0], foodSpan[1]);
+
+ let weekdayIndex = weekdays.findIndex(val => { return val === title.substring(0, 2); });
+ if (weekdayIndex !== -1)
+ foodList[weekdayIndex] = [title, food];
+
+ titleSpan = getSpan(data, titleTags, foodSpan[1]);
+ foodSpan = getSpan(data, foodTags, titleSpan[1]);
+ }
+
+ return foodList;
+}
+
+function getFoodLink(week)
+{
+ return `https://eruokalista.lohja.fi/AromieMenus/FI/Default/Lohja/Koulut/Rss.aspx?Id=97f76449-f57c-4217-aede-b5f9dbf2b41e&DateMode=${week}`;
+}
+
+exports.food = scrapeFood;
+exports.link = getFoodLink;
diff --git a/server.js b/server.js
index 662d319..ba176ef 100644
--- a/server.js
+++ b/server.js
@@ -2,6 +2,7 @@ const http = require("http");
const fs = require("fs");
const url = require("url");
const parse = require("./parse.js");
+const scrape = require("./scrape.js");
async function init()
@@ -15,13 +16,24 @@ async function init()
};
const errorPath = "./404/index.html";
- let shiftCont = await openFile("./shifts.txt");
+ // await for needed things in async
+ let [shiftCont, classCont, foodsThisWeek, foodsNextWeek] = await Promise.all([
+ openFile("./shifts.txt"),
+ openFile("./classes.txt"),
+ scrape.food(scrape.link(1)),
+ scrape.food(scrape.link(2))
+ ]);
+
+ // get the food shift "database"
shiftCont = shiftCont.toString("utf-8").replaceAll("\r", ""); // \r because of the \r\n newline on windows which creates problems
- let classCont = await openFile("./classes.txt");
classCont = classCont.toString("utf-8").replaceAll("\r", "");
let DB = parse.build(shiftCont);
parse.classes(classCont, DB);
+ // get the food "database"
+ const foods = [foodsThisWeek, foodsNextWeek];
+
+ // server code
async function server(req, res)
{
let q = url.parse(req.url, true);
@@ -33,21 +45,21 @@ async function init()
const args = {
"path": path,
"query": q.query,
- "db": DB
+ "db": DB,
+ "foods": foods
};
+
if (typeof build[path] === "function")
- {
data = await build[path](args);
- }
else
- {
data = await build404(errorPath, q.pathname);
- }
+
res.write(data);
res.end();
}
- http.createServer(server).listen(8080);
+ // start server
+ http.createServer(server).listen(80);
}
@@ -68,6 +80,8 @@ async function buildMain(args)
{
const path = args["path"];
const query = args["query"];
+ console.log(query);
+ const foods = args["foods"];
let index;
if (typeof query.index === "string")
index = parse.cluttered(query.index);
@@ -79,6 +93,7 @@ async function buildMain(args)
const d = new Date();
let day = d.getDay();
+ const actualDay = day;
day = +((day === 0) || (day === 6)) + (+(!(day === 0) && !(day === 6)) * day);
if ((typeof query.day === "string") && (parseInt(query.day).toString() === query.day) && (!isNaN(parseInt(query.day))) && (parseInt(query.day) > 0) && (parseInt(query.day) < 7))
day = parseInt(query.day);
@@ -118,10 +133,26 @@ async function buildMain(args)
res["shift"] = "Kurssilla/opettajalla/luokalla ei ole ruokailua päivällä tai kurssia ei ole olemassa!";
// get the day
- res["day"] = ["su", "ma", "ti", "ke", "to", "pe", "la"][day];
+ let weekdays = ["su", "ma", "ti", "ke", "to", "pe", "la"];
+ res["day"] = weekdays[day];
if (res["shift"] === "")
data_string = data_string.replace('<div id="shift-result" class="float-block">', '<div id="shift-result" class="float-block" style="display: none;">');
+ // get the food
+ let food;
+ food = foods[ +(day < actualDay) ][day]; // test this out more
+ if (food !== undefined)
+ {
+ res["food-header"] = food[0];
+ res["food"] = food[1];
+ }
+ else
+ {
+ res["food-header"] = weekdays[day];
+ res["food"] = "Päivälle ei löytynyt ruokaa";
+ }
+ res["food-header"] = `Päivän ${res["food-header"]} kouluruoka:`;
+
data_string = build_replace(data_string, res);
return data_string;