-
Notifications
You must be signed in to change notification settings - Fork 2
/
analyzesController.py
87 lines (74 loc) · 2.62 KB
/
analyzesController.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
'''response解析器'''
import re
from lxml import etree
from dtanys import XDict
import parsel
from .publictool import original, responsecoding
# `response` 解析控制器
class analyze():
'''`response` 解析控制器'''
@staticmethod
def xpath(response, analytic : dict, auxiliary = original):
''' xpath 解析方法.
Args:
`response` : `response` response响应
`analytic` : `dict[str:str]` xpath解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
dict : 解析字典
'''
html = etree.HTML(responsecoding(response))
res = {}
for i, j in analytic.items():
res[i] = [auxiliary(r) for r in html.xpath(j)]
return res
@staticmethod
def json(response, analytic : dict, auxiliary = original):
''' json 解析方法.
Args:
`response` : `response` response响应
`analytic` : `dict[str:str]` json解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
dict : 解析字典
'''
if response.apparent_encoding == None:
response.encoding = 'utf-8'
else:
response.encoding = response.apparent_encoding
reDict = {}
for i,j in analytic.items():
reDict[i] = auxiliary(XDict(response.json(),j).edict())
return reDict
@staticmethod
def re(response, analytic : dict, auxiliary = original):
''' re 解析方法.
Args:
`response` : `response` response响应
`analytic` : `dict[str:str]` re解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
dict : 解析字典
'''
res = []
html = etree.HTML(responsecoding(response))
res = {}
for i, j in analytic.items():
res[i]=[auxiliary(r) for r in j(html)]
return res
@staticmethod
def css(response, analytic : dict, auxiliary = original):
''' css 解析方法.
Args:
`response` : `response` response响应
`analytic` : `dict[str:str]` css解析字典
`auxiliary` : `function` 自定义解析处理
Returns:
dict : 解析字典
'''
res = []
html = parsel.Selector(responsecoding(response))
res = {}
for i, j in analytic.items():
res[i] = [auxiliary(r) for r in html.css(j).extract()]
return res