This repository has been archived by the owner on Jul 8, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spider.py
190 lines (165 loc) · 6.47 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
__author__ = "Sh-Zh-7"
__copyright__ = "Copyright (C) 2019 Sh-Zh-7"
__license__ = "MIT"
__email__ = "2431297348@qq.com"
import os
import json
import time
import getpass
import requests
import argparse
import warnings
from bs4 import BeautifulSoup as bs
# 禁止任何报错
warnings.filterwarnings("ignore")
# 禁止TF打印任何日志信息
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from dao.query import Query
from connect import login
from connect.grade import GetGradePageContent
from captcha import other
from captcha.recognize import RecognizeCAPTCHA
from connect.helper import HTML2CSV, CaptchaException, OtherException
from dao.lesson import LessonArray
max_try_time = 2
def Core(args):
"""
根据不同的命令行参数作出不同的动作
大部分情况下都在选择符合条件的课程
"""
with open("./grades_table.html", "r", encoding="GBK") as f:
content = f.read()
soup = bs(content, "html.parser")
table_rows = soup.select("body > table > tr")
lessons = LessonArray(table_rows)
if args.C:
try:
lesson_name = args.C.strip()
lesson = Query.SelectByCname(lessons, lesson_name)
print(lesson)
except:
print("未能查询到课程,请检查您的查询条件")
exit(0)
elif args.K or args.Y or args.A:
if args.K:
kind = args.K.strip()
lessons = Query.SelectByKind(lessons, kind)
if args.Y:
year = args.Y.strip()
lessons = Query.SelectByYear(lessons, year)
ShowLessonInfo(lessons, args)
else:
print("不明确的组合!请查阅官方文档!https://github.com/Sh-Zh-7/WHU-bkjw-crawler")
def ShowLessonInfo(lessons, args):
"""
根据课程和目标信息打印最后的结果
:param lessons: 用户选择的课程集合
:param args: 需要查询哪些信息(GPA,平均分,加权平均分,总学分)
"""
try:
if args.G:
print(lessons.GetGPA())
elif args.S:
print(lessons.GetAverageScore())
elif args.W:
print(lessons.GetWeightedScore())
elif args.T:
print(lessons.GetTotalPoint())
elif args.print:
print(lessons)
else:
print("请输入您想查询的信息:GPA?平均分?加权平均分?总学分?")
except:
print("未能查询到课程,请检查您的查询条件")
def GetUsernameAndPwd():
"""
根据用户的输入或者本地的存储返回学号和密码
"""
# 不使用mysql那种通过命令行传递用户名和密码的方式登录
# 使用Linux的方法,在进程中要求用户给定用户名和密码
if not os.path.exists("user_info.json"):
username = input("请输入您的学号: ")
password = getpass.getpass("请输入您的密码: ")
print()
# 持久化
user_info = {"username": username, "password": password}
with open("user_info.json", "w") as f:
json.dump(user_info, f, indent=4)
else:
with open("user_info.json", "r") as f:
user_info = json.load(f)
username = user_info["username"]
password = user_info["password"]
return username, password
def GetCAPTCHA(session):
"""
获取验证码
:param session: 全局唯一的回话对象
:return: 验证码结果的字符串形式,第一次访问教务系统得到的cookie
"""
target_captcha, cookie = other.GetCAPTCHA(session)
captcha = RecognizeCAPTCHA(target_captcha)
return captcha, cookie
def GetArgs():
"""
解析用户输入的命令行参数
"""
parser = argparse.ArgumentParser(description="您的教务系统助手", epilog="Author: WHU CS 2018级 沈之豪")
# 查询条件
parser.add_argument("-A", action="store_true", help="查询所有的课程")
parser.add_argument("-Y", type=str, default=0, help="查询某一学年的课程")
parser.add_argument("-C", type=str, default="", help="按照课程名称查找")
parser.add_argument("-K", type=str, default="",
choices=["GB", "GX", "ZB", "ZX", "B", "X"], help="按照课程的种类查找")
# 查询需求
requirement = parser.add_mutually_exclusive_group()
requirement.add_argument("-S", action="store_true", help="查询课程的平均分")
requirement.add_argument("-G", action="store_true", help="查询GPA")
requirement.add_argument("-W", action="store_true", help="查询以学分为权的平均分")
requirement.add_argument("-T", action="store_true", help="查询总学分")
requirement.add_argument("--print", action="store_true", help="打印所有课程信息")
# 其他
parser.add_argument("--clear", action="store_true", help="删除所有本地存储")
args = parser.parse_args()
return args
def Main(args):
if args.clear:
if os.path.exists("./grades_table.html"):
os.remove("./grades_table.html")
if os.path.exists("./user_info.json"):
os.remove("./user_info.json")
if os.path.exists("./grades_table.csv"):
os.remove("./grades_table.csv")
else:
if not os.path.exists("./grades_table.html"):
session = requests.session()
username, password = GetUsernameAndPwd()
# 循环登录
success = False
try_time = 0
while not success and try_time <= max_try_time:
try:
try_time += 1
captcha, cookie = GetCAPTCHA(session)
login_cookie, csrf_token = login.Login(session, username, password, captcha, cookie)
success = True
except CaptchaException:
print("验证码错误")
print("重试中.....")
time.sleep(5)
except OtherException as e:
print(e.msg)
# 方便用户进行重新登录
if e.msg == "用户名/密码错误":
os.remove("./user_info.json")
exit(0)
if try_time > max_try_time:
print("\n您就是非洲人? 请重试")
exit(0)
content = GetGradePageContent(session, login_cookie, csrf_token)
with open("grades_table.html", "w", encoding="GBK") as f:
f.write(content)
HTML2CSV(content)
Core(args)
if __name__ == "__main__":
Main(GetArgs())