Skip to content

Commit

Permalink
Backend API added
Browse files Browse the repository at this point in the history
  • Loading branch information
KrKOo committed Mar 14, 2022
1 parent 921bcd3 commit 2296d53
Show file tree
Hide file tree
Showing 10 changed files with 178 additions and 47 deletions.
10 changes: 10 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf
max_line_length = off
48 changes: 14 additions & 34 deletions main.js → extension/main.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


// Returns the ISO week of the date.
Date.prototype.getWeek = function () {
var date = new Date(this.getTime());
Expand All @@ -13,25 +11,11 @@ Date.prototype.getWeek = function () {
- 3 + (week1.getDay() + 6) % 7) / 7);
}

const getCoursePage = (course) => {
return fetch(`https://www.fit.vut.cz/study/course/${course}/.cs`)
.then(response => response.text())
.then(html => {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
return doc;
})
}

const getLectureTitles = (course) => {
return getCoursePage(course)
.then(page => {
const planList = page.querySelectorAll('.b-detail__content ol li')
let lectureTitles = []
planList.forEach(planElement => {
lectureTitles.push(planElement.innerText);
})
return lectureTitles;
return fetch(`https://fitscrap.herokuapp.com/lecture-titles/${course}`)
.then(response => response.json())
.catch(e => {
console.error(e);
})
}

Expand All @@ -56,10 +40,10 @@ const getDateFromLectureString = (str) => {
}

const insertLectureNumbering = async (course) => {
const lectureTitles = await getLectureTitles(course) || [];

const lectureTitles = await getLectureTitles(course);

const lectureList = document.querySelector("ul");
// get the last <ul> on the page
const lectureList = Array.from(document.querySelectorAll("ul")).pop();
const lectures = lectureList.children;

let prevWeek = -1;
Expand All @@ -70,37 +54,33 @@ const insertLectureNumbering = async (course) => {
const splitDate = getDateFromLectureString(lectureText);

// (Y, M - 1, D) - JS counts months from 0
const date = new Date(splitDate[2], splitDate[1] - 1, splitDate[0])
const date = new Date(splitDate[2], splitDate[1] - 1, splitDate[0]);
const week = date.getWeek();

if (firstSchoolWeek === -1) {
firstSchoolWeek = week
firstSchoolWeek = week;
};

const schoolWeek = week - firstSchoolWeek + 1;

if (prevWeek !== week) {
if (prevWeek !== week && !isNaN(week)) {
let titleElement = document.createElement("h2")

titleElement.innerHTML = "Week " + schoolWeek + ((lectureTitles.length) ? " - " + lectureTitles[schoolWeek - 1] : "");
titleElement.innerHTML = "Week " + schoolWeek + ((lectureTitles[schoolWeek - 1]) ? " - " + lectureTitles[schoolWeek - 1] : "");

lecture.parentElement.insertBefore(titleElement, lecture);
prevWeek = week;
}
});
}



const pageNavigation = document.querySelectorAll('tbody tr:nth-child(3) td:nth-child(2) > a')
const pageNavigationLevel = pageNavigation.length

const pageNavigation = document.querySelectorAll('tbody tr:nth-child(3) td:nth-child(2) > a');
const pageNavigationLevel = pageNavigation.length;

if (pageNavigationLevel == 3) {
const courseName = document.querySelector('tbody tr:nth-child(3) td:nth-child(2) > b').innerText
const courseName = document.querySelector('tbody tr:nth-child(3) td:nth-child(2) > b').innerText;
const courseID = courseName.substring(0, courseName.indexOf(' '));

console.log(courseID);
insertLectureNumbering(courseID);
}

13 changes: 13 additions & 0 deletions extension/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"manifest_version": 2,
"version": "1.0.1",
"name": "VUT FIT video server addon",
"description": "VUT FIT video server addon for showing lecture titles and week numbering",
"content_scripts": [
{
"matches": ["https://video1.fit.vutbr.cz/*"],
"js": ["main.js"]
}
],
"permissions": ["*://fitscrap.herokuapp.com/*"]
}
13 changes: 0 additions & 13 deletions manifest.json

This file was deleted.

55 changes: 55 additions & 0 deletions server/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
*_cache.sqlite

# Editors
.vscode/
.idea/

# Vagrant
.vagrant/

# Mac/OSX
.DS_Store

# Windows
Thumbs.db

# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Flask stuff:
instance/
.webassets-cache

# pyenv
.python-version

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
1 change: 1 addition & 0 deletions server/Procfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
web: python server.py
Empty file added server/Scrapper/__init__.py
Empty file.
36 changes: 36 additions & 0 deletions server/Scrapper/scrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import requests
from bs4 import BeautifulSoup


def getAllCourseIDs():
URL = "https://www.fit.vut.cz/study/courses/.cs"

page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
courseTDs = soup.select("table#list tbody tr td:nth-child(2)")
courseIDs = [element.text for element in courseTDs]

return courseIDs


def getLectureTitles(courseID):
URL = "https://www.fit.vut.cz/study/course/{}/.cs".format(courseID)

courseIDs = getAllCourseIDs()

if courseID not in courseIDs:
return

page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
labelArray = soup.select('p:-soup-contains("Osnova přednášek")')

if not labelArray:
return []

label = labelArray[0]
titleListContainer = label.findNext("div")
titlesListItems = titleListContainer.findAll("li")
titles = [element.text for element in titlesListItems]

return titles
21 changes: 21 additions & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
appdirs==1.4.4
attrs==21.4.0
beautifulsoup4==4.10.0
cattrs==1.10.0
certifi==2021.10.8
charset-normalizer==2.0.12
click==8.0.4
Flask==2.0.3
Flask-Cors==3.0.10
idna==3.3
itsdangerous==2.1.1
Jinja2==3.0.3
MarkupSafe==2.1.0
python-dotenv==0.19.2
requests==2.27.1
requests-cache==0.9.3
six==1.16.0
soupsieve==2.3.1
url-normalize==1.4.3
urllib3==1.26.8
Werkzeug==2.0.3
28 changes: 28 additions & 0 deletions server/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
from flask import Flask, jsonify
from flask_cors import CORS
from dotenv import load_dotenv
import requests_cache

from Scrapper.scrapper import getLectureTitles

load_dotenv()
requests_cache.install_cache("requests_cache", backend="sqlite")

app = Flask(__name__)
CORS(app) # enable CORS for all routes


@app.route("/")
def root():
return "<h1>VUT FIT website scrapper.</h1>"


@app.route("/lecture-titles/<courseID>")
def lectureTitles(courseID):
titles = getLectureTitles(courseID)
return jsonify(titles)


if __name__ == "__main__":
app.run(host=os.getenv("HOST"), port=os.getenv("PORT"))

0 comments on commit 2296d53

Please sign in to comment.