-
Notifications
You must be signed in to change notification settings - Fork 0
/
dvc.yaml
152 lines (152 loc) · 5.69 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
stages:
retrieve_cve_dates:
cmd: python3 dependency_search/retrieve_cve_dates.py data/cve_df_filename data/published_cve_df_filename
deps:
- data/cve_df_filename
- dependency_search/retrieve_cve_dates.py
outs:
- data/published_cve_df_filename
retrieve_cve_info:
cmd: python3 cve_information/retrieve_cve_info.py data/published_cve_df_filename
data/unique_cve_info-cvss_cwe_etc.parquet
deps:
- data/published_cve_df_filename
- cve_information/retrieve_cve_info.py
outs:
- data/unique_cve_info-cvss_cwe_etc.parquet
retrieve_project_info:
cmd: python3 projects_stats/retrieve_metadata_from_WoC_mongodb.py data/cve_df_filename
data/unique_project_info.parquet
deps:
- data/cve_df_filename
- projects_stats/retrieve_metadata_from_WoC_mongodb.py
outs:
- data/unique_project_info.parquet
merge_cve_dates:
cmd: python3 dependency_search/merge_cve_df_with_published_cve_df.py data/cve_df_filename
data/published_cve_df_filename data/commits_with_published_cve_df
deps:
- data/cve_df_filename
- data/published_cve_df_filename
- dependency_search/merge_cve_df_with_published_cve_df.py
outs:
- data/commits_with_published_cve_df
add_cvss_rankings:
cmd: python3 cve_information/add_cvss_ranking.py data/unique_cve_info-cvss_cwe_etc.parquet
data/unique_cve_info-cvss_cwe_ranking_etc.parquet
deps:
- cve_information/add_cvss_ranking.py
- data/unique_cve_info-cvss_cwe_etc.parquet
outs:
- data/unique_cve_info-cvss_cwe_ranking_etc.parquet
extension_to_language:
cmd: python3 dependency_search/prepare_extension_to_language_dict.py data/extension_to_language.json
deps:
- dependency_search/prepare_extension_to_language_dict.py
- https://raw.githubusercontent.com/github/linguist/master/lib/linguist/languages.yml
outs:
- data/extension_to_language.json:
cache: false
languages_for_cve:
cmd: python3 dependency_search/find_programming_language_for_cve.py data/commits_with_published_cve_df
data/extension_to_language.json data/combined_df
deps:
- data/commits_with_published_cve_df
- data/extension_to_language.json
- dependency_search/find_programming_language_for_cve.py
outs:
- data/combined_df
language_to_class:
cmd: python3 dependency_search/prepare_language_to_class_dict.py data/language_to_class.json
deps:
- dependency_search/prepare_language_to_class_dict.py
outs:
- data/language_to_class.json:
cache: false
dependency_df:
cmd: python3 dependency_search/prepare_dep_df.py data/cve_df_filename data/dep_df
deps:
- data/cve_df_filename
- dependency_search/prepare_dep_df.py
outs:
- data/dep_df
clean_combined_df:
cmd: python3 dependency_search/clean_data_before_cve_lifespan_calculation.py data/combined_df
data/dep_df data/language_to_class.json data/cleaned_cve_df
deps:
- data/combined_df
- data/dep_df
- data/language_to_class.json
- dependency_search/clean_data_before_cve_lifespan_calculation.py
outs:
- data/cleaned_cve_df
compute_cve_lifespan:
cmd: python3 dependency_search/calculate_cve_lifespan_per_project.py data/cleaned_cve_df
data/language_to_class.json data/cve_lifespan_language_df data/cve_lifespan_df
deps:
- data/cleaned_cve_df
- data/language_to_class.json
- dependency_search/calculate_cve_lifespan_per_project.py
outs:
- data/cve_lifespan_language_df
- data/cve_lifespan_df
merge_cve_info_into_lifespan:
cmd: python3 dependency_search/merge_cve_info_and_cve_lifespan.py data/unique_cve_info-cvss_cwe_ranking_etc.parquet
data/cve_lifespan_language_df data/cve_lifespan_df data/cve_lifespan_and_cve_info_df data/cve_lifespan_language_and_cve_info_df
deps:
- data/cve_lifespan_language_df
- data/cve_lifespan_df
- data/unique_cve_info-cvss_cwe_ranking_etc.parquet
- dependency_search/merge_cve_info_and_cve_lifespan.py
outs:
- data/cve_lifespan_and_cve_info_df
- data/cve_lifespan_language_and_cve_info_df
# NOTE: the following two "merge_project_info_*" stages may be moved before "merge_cve_info_into_lifespan" stage
merge_project_info_into_lifespan:
cmd: >-
python3 projects_stats/merge_with_project_metadata.py
data/cve_lifespan_and_cve_info_df data/unique_project_info.parquet
data/cve_survival_input_df
deps:
- projects_stats/merge_with_project_metadata.py
- data/unique_project_info.parquet
- data/cve_lifespan_and_cve_info_df
outs:
- data/cve_survival_input_df
merge_project_info_into_lifespan_language:
cmd: >-
python3 projects_stats/merge_with_project_metadata.py
data/cve_lifespan_language_and_cve_info_df data/unique_project_info.parquet
data/cve_survival_input_most_used_language_df
deps:
- projects_stats/merge_with_project_metadata.py
- data/unique_project_info.parquet
- data/cve_lifespan_language_and_cve_info_df
outs:
- data/cve_survival_input_most_used_language_df
evaluate-cve_surv:
cmd: >-
python3 analysis/cve_surv_analysis.py
--eval-path=eval/ --path-prefix=''
data/cve_survival_input_df
params:
- eval.confidence
- eval.bootstrap_samples
- eval.description
- eval.cve_survival_analysis
deps:
- analysis/cve_surv_analysis.py
- data/cve_survival_input_df
metrics:
- eval/cve_surv_metrics.json:
cache: false
- eval/cve_surv_group_metrics.csv:
cache: false
- eval/cve_surv_statistics.csv:
cache: false
plots:
- eval/cve_survival_function.png:
cache: false
outs:
- eval/cve_surv_params.yaml:
cache: false