-
Notifications
You must be signed in to change notification settings - Fork 0
/
finalprojectreport.html
792 lines (698 loc) · 26.9 KB
/
finalprojectreport.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<title>Project Report</title>
<script src="site_libs/header-attrs-2.16/header-attrs.js"></script>
<script src="site_libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/readable.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
h1.title {font-size: 38px;}
h2 {font-size: 30px;}
h3 {font-size: 24px;}
h4 {font-size: 18px;}
h5 {font-size: 16px;}
h6 {font-size: 12px;}
code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
pre:not([class]) { background-color: white }</style>
<script src="site_libs/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="site_libs/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="site_libs/tocify-1.9.1/jquery.tocify.js"></script>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<script src="site_libs/navigation-1.1/codefolding.js"></script>
<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-5.1.0/css/all.css" rel="stylesheet" />
<link href="site_libs/font-awesome-5.1.0/css/v4-shims.css" rel="stylesheet" />
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
details > summary > p:only-child {
display: inline;
}
pre code {
padding: 0;
}
</style>
<style type="text/css">
.dropdown-submenu {
position: relative;
}
.dropdown-submenu>.dropdown-menu {
top: 0;
left: 100%;
margin-top: -6px;
margin-left: -1px;
border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
display: block;
}
.dropdown-submenu>a:after {
display: block;
content: " ";
float: right;
width: 0;
height: 0;
border-color: transparent;
border-style: solid;
border-width: 5px 0 5px 5px;
border-left-color: #cccccc;
margin-top: 5px;
margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
border-left-color: #adb5bd;
}
.dropdown-submenu.pull-left {
float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
left: -100%;
margin-left: 10px;
border-radius: 6px 0 6px 6px;
}
</style>
<script type="text/javascript">
// manage active state of menu based on current page
$(document).ready(function () {
// active menu anchor
href = window.location.pathname
href = href.substr(href.lastIndexOf('/') + 1)
if (href === "")
href = "index.html";
var menuAnchor = $('a[href="' + href + '"]');
// mark the anchor link active (and if it's in a dropdown, also mark that active)
var dropdown = menuAnchor.closest('li.dropdown');
if (window.bootstrap) { // Bootstrap 4+
menuAnchor.addClass('active');
dropdown.find('> .dropdown-toggle').addClass('active');
} else { // Bootstrap 3
menuAnchor.parent().addClass('active');
dropdown.addClass('active');
}
// Navbar adjustments
var navHeight = $(".navbar").first().height() + 15;
var style = document.createElement('style');
var pt = "padding-top: " + navHeight + "px; ";
var mt = "margin-top: -" + navHeight + "px; ";
var css = "";
// offset scroll position for anchor links (for fixed navbar)
for (var i = 1; i <= 6; i++) {
css += ".section h" + i + "{ " + pt + mt + "}\n";
}
style.innerHTML = "body {" + pt + "padding-bottom: 40px; }\n" + css;
document.head.appendChild(style);
});
</script>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "";
border: none;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
<style type="text/css">
.code-folding-btn { margin-bottom: 4px; }
</style>
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
@media print {
.toc-content {
/* see https://github.com/w3c/csswg-drafts/issues/4434 */
float: right;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
</head>
<body>
<div class="container-fluid main-container">
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-bs-toggle="collapse" data-target="#navbar" data-bs-target="#navbar">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.html">Home</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
Exploratory Analyses
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="exploratory_data_analysis.html">Geographic Analyses by Confederation</a>
</li>
<li>
<a href="exploratory_data_analysis_2.html">Geographic Analyses by Country</a>
</li>
<li>
<a href="exploratory_data_analysis_3.html">Predictors of World Cup Wins</a>
</li>
</ul>
</li>
<li>
<a href="modelbuilding.html">Model Building</a>
</li>
<li>
<a href="https://axdthb-jiawen-zhao.shinyapps.io/shiny-appp/">Prediction Tool</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="finalprojectreport.html">Project Report</a>
</li>
<li>
<a href="https://youtu.be/28KKsYA7vMM">
<span class="fa fa-youtube fa-lg"></span>
</a>
</li>
<li>
<a href="https://github.com/mcmvp9/p8105_finalproject">
<span class="fa fa-github fa-lg"></span>
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
<div id="header">
<div class="btn-group pull-right float-right">
<button type="button" class="btn btn-default btn-xs btn-secondary btn-sm dropdown-toggle" data-toggle="dropdown" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false"><span>Code</span> <span class="caret"></span></button>
<ul class="dropdown-menu dropdown-menu-right" style="min-width: 50px;">
<li><a id="rmd-show-all-code" href="#">Show All Code</a></li>
<li><a id="rmd-hide-all-code" href="#">Hide All Code</a></li>
</ul>
</div>
<h1 class="title toc-ignore">Project Report</h1>
</div>
<style type="text/css">
h1.title {
text-align: center;
}
</style>
<div id="project-motivation" class="section level2">
<h2>Project Motivation</h2>
<p>The Fédération Internationale de Football Association (FIFA) Men’s
World Cup is an international soccer competition that takes place every
4 years and is contested by 32 national soccer teams of member
nations.</p>
<p>This year, the tournament takes place in Qatar between Nov. 20th and
Dec. 18 and thousands will be watching their favorite teams/ nations
play. The tournament boasts of some of the world’s best soccer athletes
and nations.</p>
<p> </p>
<div id="related-work" class="section level4">
<h4><strong>Related Work</strong></h4>
<p>The following resources are a sample of what inspired this
project.</p>
<ol style="list-style-type: decimal">
<li>“An in-depth analysis for FIFA World Cups.” <em>Vasileios
Stavropoulos</em>, January 15, 2018. <a
href="https://statathlon.com/an-in-depth-analysis-for-world-cups/">Link.</a></li>
<li>“Map Of The World Cup.” <em>Derek Shin</em>, April 15, 2018. <a
href="https://junsooshin.github.io/worldcupmap/">Link.</a></li>
<li>“FIFA World Cup 2022 - Statistics & Facts.” <em>Statista</em>,
2022. <a
href="https://www.statista.com/topics/9211/2022-fifa-world-cup/#topicOverview">Link.</a></li>
</ol>
<p> </p>
</div>
</div>
<div id="questions" class="section level2">
<h2>Questions</h2>
<p>To better understand this year’s World Cup, we’re interested in
understanding the factors that predicted the number of games won by each
nation that has participated in previous World Cup. We conduct
exploratory analysis to examine the factors that influence winning games
in the World Cup. Does the amount of times a country has participated in
the World Cup influence the amount of games won? How does FIFA rankings
for 2022 factor into the amount of games won overall in the World Cup?
How does participation affect the amount of games won overall in the
World Cup? Winning games is the key to success in soccer. In the World
Cup, this ultimately leads to arguably, the biggest sports trophy in the
world.</p>
<p>Our questions remained fairly stable throughout the development of
our project. Since the beginning, we were interested in exploring
predictors of success in the World Cup. However, the outcome of interest
changed from the number of goals scored per game to the number of world
cup wins, as we chose to use summary-level data available at the country
level rather than the game level. Additionally, we were initially
interested in using data from the 2018 World Cup, but decided to use
data consolidated from many World Cup tournaments instead.</p>
<p> </p>
</div>
<div id="data" class="section level2">
<h2>Data</h2>
<div id="dataset-creation" class="section level3">
<h3>Dataset Creation</h3>
<p>Data was pulled from several sources on the web via scraping and
downloaded as .csv files to create the final dataset for the project.
Data from multiple sources was merged together to create the final
dataset which is called <strong>worldcup_final</strong> and was stored
in the data folder in our repository. The as.character() and
as.numeric() functions were used to ensure that all data was stored as
the correct type. Additionally, the str_replace() function was used to
edit country’s names so that the names were consistent throughout all
datasets before they were merged together using merge() to create the
final dataset.</p>
</div>
<div id="data-sources" class="section level3">
<h3><em>Data Sources</em></h3>
<ul>
<li>Overall Team Records in the World Cup:<a
href="https://en.wikipedia.org/wiki/FIFA_World_Cup_records_and_statistics">Wikipedia.</a></li>
<li>Official FIFA Rankings 2022: <a
href="https://www.2026worldcupnorthamerica.com/fifa-ranking/">2026 World
Cup North America.</a></li>
<li>FIFA Confederations <a
href="https://www.kaggle.com/datasets/fivethirtyeight/fivethirtyeight-fifa-dataset">Kaggle.</a></li>
<li>Country-Level Geographic Data: <a
href="https://worldpopulationreview.com/countries">World Population
Review.</a></li>
<li>Top Goal Scorers Per Country: <a
href="https://en.wikipedia.org/wiki/List_of_top_international_men%27s_football_goal_scorers_by_country">Wikipedia.</a></li>
<li>Shapefile of World Countries (used to create geographic data
visualizations) <a
href="https://hub.arcgis.com/datasets/esri::world-countries-generalized/explore?location=-0.799744%2C0.000000%2C2.64">ArcGIS.</a></li>
</ul>
<p> </p>
</div>
</div>
<div id="variables-of-interest" class="section level2">
<h2><em>Variables of Interest</em></h2>
<div id="outcomes" class="section level5">
<h5><strong>Outcomes</strong></h5>
<ul>
<li><code>w</code>: Number of soccer games a country has won in the
World Cup</li>
<li><code>prop_w</code>: Proportion of games a country has won in the
World Cup. Calculated as the number of games a country has won in the
World Cup divided by the number of games a country has played in the
World Cup (<code>w</code> / <code>pld</code>)</li>
</ul>
</div>
<div id="candidate-predictors" class="section level5">
<h5><strong>Candidate Predictors</strong></h5>
<ul>
<li><code>part</code>: Number of times a country has participated in the
World Cup.</li>
<li><code>pld</code>: Number of games a country has played in the World
Cup.</li>
<li><code>d</code>: Number of soccer games a country has drawn in the
World Cup.</li>
<li><code>rank</code>: Country’s official 2022 FIFA ranking (top team
ranked as 1).</li>
<li><code>gf</code>: Number of goals a country has scored against
opponents in the World Cup.</li>
<li><code>ga</code>: Number of goals scored against a country in the
World Cup.</li>
<li><code>gf_per_game</code>: Average number of goals a country has
scored against opponents per game. Calculated as the total number of
goals a country has scored against opponents in the World Cup divided by
the number of games a country has played in the World Cup (gf /
pld).</li>
<li><code>ga_per_game</code>: Average number of goals scored against a
country per game. Calculated as the total number of goals scored against
a country in the World Cup divided by the number of games a country has
played in the World Cup (ga / pld).</li>
<li><code>player</code>: Name of top record goal scorer (includes active
and inactive players).</li>
<li><code>goals</code>: Number of goals scored by top record goal scorer
(includes active and inactive players).</li>
<li><code>confederation</code>: Country’s FIFA Confederation.</li>
<li><code>land_area_km</code>: Total area of the land-based portions of
a country’s geography (measured in square kilometers, km²).</li>
</ul>
</div>
</div>
<div id="exploratory-data-analysis" class="section level2">
<h2>Exploratory Data Analysis</h2>
<div id="exploring-world-cup-statistics-geographically-by-confederation"
class="section level3">
<h3>Exploring World Cup Statistics Geographically By Confederation</h3>
<p>To explore the geographic distribution of World Cup data at both the
Country and Confederation level, we created interactive choropleth maps
using the tmap package and plots using the plotly package. To create the
maps, a shapefile containing the geographic boundaries of all countries
that have participated in the World Cup was updated from ArcGIS and
merged with the final dataset. This merged shapefile is available in the
“geofiles” subfolder within the “data” folder.</p>
<div
id="national-teams-that-have-participated-in-the-world-cup-by-fifa-confederation"
class="section level4">
<h4><em>National Teams That Have Participated in the World Cup by FIFA
Confederation</em></h4>
<p>The purpose of this section is to allow visitors of the website to
visualize where all 79 countries that have participated in the World Cup
are located geographically. It also allows visitors to visualize the 6
FIFA Confederation regions to understand which countries belong to each
Confederation. Visitors can clearly see that most of the national teams
that have participated in the World Cup have been from the Union of
European Football Associations in Europe (UEFA) Confederation.</p>
</div>
<div id="number-of-participations-in-the-world-cup"
class="section level4">
<h4><em>Number of Participations in the World Cup</em></h4>
<p>This section shows an interactive choropleth world map of
Confederation with the number of games won and the number of times a
country has participated in the World Cup overlaid by country. This
allows visitors to see that national teams that have participated in the
most World Cup tournaments appear to be spatially clustered in the
CONMEBOL (South America) and UEFA (Europe) Confederations.</p>
</div>
</div>
<div id="exploring-world-cup-statistics-geographically-by-country"
class="section level3">
<h3>Exploring World Cup Statistics Geographically By Country</h3>
<p>This section shows interactive choropleth world maps of various
predictors including number of games won, FIFA ranking, goals scored by
top player, and goal difference at the country level. Additionally,
interactive plotly bar graphs were incorporated to allow the visitor to
visualize how all 79 counties compare to one another. These
visualizations allow the visitor to see that Brazil is the highest
ranked national team, followed by Belgium, Argentina, France, and
England. The highest ranked national teams appear to be spatially
clustered in the CONMEBOL (South America) and UEFA (Europe)
Confederations, while the lowest ranked teams are in the AFC (Asia and
Australia) and CFC (Africa) Confederations. Interestingly, even though
Brazil ranks the highest for most of the predictor variables in the
dataset, Brazil does not hold one of the top 7 slots for the country
with the most goals scored by their top record goal scorer.</p>
</div>
<div id="exploring-predictors-of-world-cup-wins" class="section level3">
<h3>Exploring Predictors of World Cup Wins</h3>
<p>To explore the factors that influence winning games in the World Cup,
we created interactive scatter plots using the plotly package. The plots
illustrate bivariable associations between the proportion of games won
and various predictors for each country that has participated in the
FIFA World Cup.</p>
<p>Additionally, to ensure comparability between all countries that have
participated in the FIFA World Cup, we created the following new
variables by mutating existing variables from the dataset:</p>
<ul>
<li><code>prop_w</code>. Proportion of games a country has won in the
World Cup. Calculated as the number of games a country has won in the
World Cup divided by the number of games a country has played in the
World Cup (“w” / “pld”).</li>
<li><code>gf_per_game</code>. Average number of goals a country has
scored against opponents (i.e. “goals for”) per game. Calculated as the
total number of goals a country has scored against opponents in the
World Cup divided by the number of games a country has played in the
World Cup (“gf” / “pld”).</li>
<li><code>ga_per_game</code>. Average number of goals scored against a
country per game. Calculated as the total number of goals scored against
a country in the World Cup divided by the number of games a country has
played in the World Cup (“ga” / “pld”).</li>
</ul>
<p>By mutating these variables as described, there is greater
comparability between countries that have played in 100+ World Cup
tournaments and those that have only participated in 1. Otherwise, a
country may have a higher value for “goals for” than another simply
because they played in more games, rather than because they actually
scored more goals per game (on average).</p>
<p>The visualizations illustrate a positive association between
percentage of games won and the following predictors:</p>
<ul>
<li><code>part</code>: Number of times a country has participated in the
World Cup.</li>
<li><code>pld</code>: Number of games a country has played in the World
Cup.</li>
<li><code>gf_per_game</code>: Average number of goals a country has
scored against opponents per game.</li>
</ul>
<p>Therefore, as a team’s number of World Cup participations, games
played, and average goals scored per game increase, their winning
percentage also increases. Countries in the UEFA and CONCACAF
Confederations have the highest percentage of games won and the highest
values for the three predictor variables above. These include top teams
such as Brazil, Germany, Italy, Argentina, France, and the
Netherlands.</p>
<p>The visualizations illustrate a negative association between
proportion of games won and the following predictors:</p>
<ul>
<li><code>Rank</code>: Country’s official 2022 FIFA ranking (top team
ranked as 1).</li>
<li><code>ga_per_game</code>: Average number of goals scored against a
country per game.</li>
</ul>
<p>Therefore, as a team’s FIFA rank and average goals scored against per
game increase, their winning percentage also decreases. This is expected
due to the FIFA ranking scheme that assigns the #1 slot to the top
performing team and the fact that teams that are doing well and winning
a high proportion of the games are receiving less goals scored against
them than teams that are not doing as well. Top teams in the UEFA and
CONCACAF Confederations have the highest percentage of games won and the
lowest (best) FIFA ranking and average number of goals scored against
them per game.</p>
</div>
</div>
<div id="statistical-analysis---regression-modeling"
class="section level2">
<h2>Statistical Analysis - Regression Modeling</h2>
<p>In the dataset, we had 14 variables and 79 observations. To check for
collinearity, we use correlation plots to explore the relationship
between each pair of numeric predictors. To only include the numeric
variables, we use the <code>select</code> function to exclude the
qualitative variables, such as player, country, and gd.</p>
<p>We tried different ways for model selection, including stepwise
selection, forward and backward selection, and LASSO, with different
selecting criterias, such as AIC, BIC, and p-values. After comparing the
rmse, R-square and p-values, we decide to use the model from forward
selection with p-value. Our final model was chosen by forward selection,
where we start with only the intercept and added one predictor at a
time. The criteria we chose is p-value, which means we will select
predictors to include based on p-values, until no more predictors having
p-value less than 0.05 can be added to the model.</p>
<p>While there are two variables that are on the borderline of our
threshold, alpha = 0.05, we got three different models to include either
or both of these two variables. Then, we used cross validation to check
which model has the best performance.After comparing their rmse, we get
the final model - <code>w</code> = -1.554369 + 0.661219<code>pld</code>
- 0.622216d + 0.015920<code>rank</code> + 0.153794<code>gf</code>
-0.225378<code>ga</code>.</p>
<p>After comparing the summary of the three models, we found that the
model with variables: <code>pld</code>, <code>d</code>,
<code>rank</code>, <code>gf</code>, <code>ga</code>, has the lowest rmse
and highest adj.R^2 (about 99%), which means our model explain over 99%
of the variance in the outcome (wins). The most significant variable in
the model is ga with coefficient of -0.225378. That means if the number
of goals scored against a country in the World Cup (since 1990)
increases by 1 goal, the would decrease the number of winning by
0.225378, while holding other variables constant.</p>
</div>
<div id="discussion-of-results" class="section level2">
<h2>Discussion of Results</h2>
<p>There are many factors that can be used to predict the number of
games won by a country participating in the World Cup. From our
statistical model results, we found that an increase in the number of
games played, FIFA rank, and goals for increases a country’s predicted
number of games won in the World Cup, while an increase in the number of
games drawn and goals against decreases a country’s predicted number of
games won. Additionally, our exploratory analyses revealed that success
in the world cup appears to be geographically clustered in particular
regions of the world. This information can be helpful for soccer fans to
predict the success of their favorite teams in future World Cup
tournaments.</p>
<p> </p>
</div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.odd').parent('tbody').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open');
});
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
window.initializeCodeFolding("hide" === "show");
});
</script>
<script>
$(document).ready(function () {
// temporarily add toc-ignore selector to headers for the consistency with Pandoc
$('.unlisted.unnumbered').addClass('toc-ignore')
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2,h3",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_');
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>