-
Notifications
You must be signed in to change notification settings - Fork 0
/
HW_2.sql
147 lines (111 loc) · 4.86 KB
/
HW_2.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#1. info on posts posted with the last 10 days on our data
#with out INDEXing ~~ 5 sec
SELECT id, owneruserid, title, commentcount
FROM posts
WHERE creaiondate >= (SELECT Date_sub(max(creaiondate), interval 10 day)
FROM posts);
#INDEXing...
#we don't INDEX id and owneruserid FROM posts becasue id is primary key and owneruserid is foreign key i.e already INDEXed.
CREATE INDEX ix_posts_t
ON posts(title,commentcount);
CREATE INDEX ix_posts_c
ON posts(creaiondate);
#with INDEX ~~ 0.02 sec
SELECT id, owneruserid, title, commentcount
FROM posts
WHERE creaiondate >= (SELECT Date_sub(max(creaiondate), interval 10 day)
FROM posts);
DROP INDEX ix_posts_c
ON posts;
DROP INDEX ix_posts_t
ON posts;
#2.Badge names of poster of a post commented on and voted on the same day
CREATE INDEX ix_comment
ON comments(creationDate,score);
CREATE INDEX ix_vote
ON votes(creationDate,votetypeid);
#Without temporary table and with INDEX ~~ 2.3 sec
SELECT badges.name, owneruserid
FROM comments, votes, posts, badges
WHERE Date(comments.creationDate) = votes.CreationDate AND comments.postid = votes.postid
AND comments.score > 10 AND votetypeid = 2 AND posts.owneruserid = badges.userid;
#With temporary table and INDEX ~~ 0.8 sec
CREATE TEMPORARY TABLE com AS
SELECT DISTINCT owneruserid
FROM comments, votes, posts
WHERE Date(comments.creationDate) = votes.CreationDate AND comments.score > 10 AND votetypeid = 2 AND posts.id = comments.postid;
-- DROP temporary table com;
SELECT userid, badges.name
FROM badges
WHERE userid IN
(SELECT owneruserid
FROM com);
DROP INDEX ix_vote
ON votes;
DROP INDEX ix_comment
ON comments;
#3. Original Query 7 ~~ 5.813 sec
SELECT count(id) AS `Number of posts`, Date(MAX(p.CreaionDate)) AS `Latest date`
FROM stats.posts p
WHERE Date(p.CreaionDate) = (SELECT Date(MAX(p.CreaionDate))
FROM stats.posts p);
CREATE INDEX ix_id_creaiondate
ON posts(id, CreaionDate);
#After INDEXing ~~ 0.187
SELECT count(id) AS `Number of posts`, Date(MAX(p.CreaionDate)) AS `Latest date`
FROM stats.posts p
WHERE Date(p.CreaionDate) = (SELECT Date(MAX(p.CreaionDate))
FROM stats.posts p);
DROP INDEX ix_id_creaiondate
ON posts;
#4. Original Query ~~ 6.105
SELECT DISTINCT b.Name AS `Badge Name`, b.userid AS `User Id`
FROM stats.badges b
WHERE b.UserId = (SELECT owneruserid
FROM stats.posts
WHERE favoritecount = (SELECT MAX(favoritecount)
FROM stats.posts));
#rewrite - replacing the unnecessary subquery with a JOIN ~~ 5.4
SELECT b.Name AS `Badge Name`, b.userid AS `User Id`
FROM stats.badges b JOIN stats.posts p ON b.UserId= p.OwnerUserId
WHERE p.favoritecount =
(SELECT MAX(favoritecount)
FROM stats.posts);
#After INDEXing with the re-written query ~~ 0.016
CREATE INDEX ix_favcount
ON posts(FavoriteCount);
SELECT b.Name AS `Badge Name`, b.userid AS `User Id`
FROM stats.badges b JOIN stats.posts p ON b.UserId= p.OwnerUserId
WHERE p.favoritecount =
(SELECT MAX(favoritecount)
FROM stats.posts);
DROP INDEX ix_favcount
ON posts;
# 5 and 6. Average comment count based on user's location, 25 locations which aren't null taken randomly
# 5. Average comment count based on user's location, 25 locations which aren't null taken randomly
# without INDEX ~ 3 sec
SELECT count(*) AS 'Number of Inactive Users'
FROM posts p
WHERE Date(p.LasActivityDate) <= (SELECT Date(Date_sub(MAX(LasActivityDate), interval 1 year))
FROM posts);
CREATE INDEX lasactivitydate ON posts(LasActivityDate);
#with INDEX ~ 0.05 sec
SELECT COUNT(*) AS 'Number of Inactive Users'
FROM posts p
WHERE Date(p.LasActivityDate) <= (SELECT Date(Date_sub(max(LasActivityDate), INTERVAL 1 YEAR))
FROM posts);
DROP INDEX lasactivitydate
ON posts;
#6. What is the title of the post that have been viewed most and includes 'bayesian' tag?
# before INDEXing ~ 1.234 sec
SELECT p.title AS `Title`, MAX(p.ViewCount) AS `Maximum view count`
FROM stats.posts p
WHERE Tags LIKE '%bayesian%';
CREATE INDEX ix_title
ON posts (ViewCount, title, tags);
DROP INDEX ix_title
ON posts;
# after adding INDEX; ~ 0.110 sec
SELECT p.title AS `Title`, MAX(p.ViewCount) AS `Maximum view count`
FROM stats.posts p
WHERE Tags LIKE '%bayesian%';