-
Notifications
You must be signed in to change notification settings - Fork 0
/
goal-scrape.py
73 lines (66 loc) · 1.87 KB
/
goal-scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
from bs4 import BeautifulSoup
import os
import urllib
import re
import sys
def get_data(goal_one):
goal = 'how to ' + goal_one
goal += ' wikihow'
name = urllib.parse.quote_plus(goal)
url = 'http://www.google.com/search?q='+name
result = requests.get(url).text
link_start=result.find('wikihow.com')
link_end=result.find('&',link_start)
link = 'https://' + result[link_start:link_end]
if link_start == -1 or link_end == -1:
data = "Go for it! Unfortunately, there is no advice I can give you on the goal \'" + goal_one + "' for now. :/"
else:
if "%2522" in link:
link = link.replace("%2522","\"")
the_html = requests.get(link).text
soup = BeautifulSoup(the_html, "html.parser")
steps = soup.findAll("div",{'class':"step"})
count = 1
data = ""
for s in steps:
for p in s.findAll("b",{'class':"whb"}):
if(len(p.get_text()) == 1):
p.decompose()
count = count - 1
else:
if p.get_text() is not None:
data = data + "\n" + str(count) + ". " + p.get_text()
count = count + 1
for j in s.findAll("ul"):
for r in j.findAll("li"):
data = data + " " + r.get_text()
#data = data.split("\n")
print(data)
print("\n")
def get_option():
someIn = str(input('[1] Name a goal you\'d like to accomplish\n[0] Quit\nPress 0 or 1: '))
if (someIn == '0' or someIn == '1'):
return someIn
else:
while(someIn != '0' or someIn != '1'):
if (someIn == '0' or someIn == '1'):
return someIn
someIn = str(input('[1] Name a goal you\'d like to accomplish\n[0] Quit\nPress 0 or 1: '))
return someIn
def get_string():
try:
someIn = str(input('Name of the goal: '))
except:
someIn = str(input('Try again: '))
return someIn
def main():
while(True):
option = get_option()
if(option == '0'):
print('Have a nice day!')
sys.exit(0)
elif (option == '1'):
the_string = get_string()
get_data(the_string)
main()