Skip to content

Commit

Permalink
Merge pull request #3262 from cal-itp/kim/ntd_switch_to_sql
Browse files Browse the repository at this point in the history
new sql models for service checks
  • Loading branch information
csuyat-dot committed Feb 28, 2024
2 parents 53581f6 + 05b1c71 commit d6e9041
Show file tree
Hide file tree
Showing 8 changed files with 478 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ with voms_rr20 as (
select organization,
fiscal_year,
AVG(VOMX) as rr20_voms
FROM {{ ref('int_ntd_rr20_service_alldata') }}
FROM {{ ref('int_ntd_rr20_service_1alldata') }}
GROUP BY organization, fiscal_year
),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ SELECT
Fare_Revenues / NULLIF(Annual_UPT,0) as fare_rev_per_trip,
Annual_VRM / NULLIF(Annual_VRH,0) as rev_speed,
Annual_UPT / NULLIF(Annual_VRH,0) as trips_per_hr
FROM {{ ref('int_ntd_rr20_service_alldata') }}
FROM {{ ref('int_ntd_rr20_service_1alldata') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
-- DECLARE check_period STRING;
-- SET check_period = CONCAT(EXTRACT(YEAR from CURRENT_DATE()), ", ", EXTRACT(YEAR from CURRENT_DATE()) - 1);

-- DECLARE query STRING;
-- SET query = CONCAT(
with longform as (
select
organization,
-- CAST(fiscal_year as STRING) as fiscal_year,
fiscal_year,
mode,
VOMX,
Annual_VRM,
Annual_VRH,
Annual_UPT,
cost_per_hr,
miles_per_veh,
fare_rev_per_trip,
rev_speed,
trips_per_hr
from {{ ref('int_ntd_rr20_service_2ratioslong') }}
),

cph as (
select * from
(select organization, fiscal_year, mode,cost_per_hr from longform)
-- PIVOT(AVG(cost_per_hr) FOR fiscal_year IN (', check_period, '))
PIVOT(AVG(cost_per_hr) FOR fiscal_year IN (2022,2023,2024)) as cost_per_hr
ORDER BY organization
),

mpv as (
select * from
(select organization, fiscal_year, mode, miles_per_veh from longform)
PIVOT(AVG(miles_per_veh) FOR fiscal_year IN (2022,2023,2024)) as miles_per_veh
ORDER BY organization
),
frpt as (
select * from
(select organization, fiscal_year, mode, fare_rev_per_trip from longform)
PIVOT(AVG(fare_rev_per_trip) FOR fiscal_year IN (2022,2023,2024)) as fare_rev_per_trip
ORDER BY organization
),
rev_speed as (
select * from
(select organization, fiscal_year, mode, rev_speed from longform)
PIVOT(AVG(rev_speed) FOR fiscal_year IN (2022,2023,2024)) as rev_speed
ORDER BY organization
),
tph as (
select * from
(select organization, fiscal_year, mode, trips_per_hr from longform)
PIVOT(AVG(trips_per_hr) FOR fiscal_year IN (2022,2023,2024)) as trips_per_hr
ORDER BY organization
),
voms as (
select * from
(select organization, fiscal_year, mode, VOMX from longform)
PIVOT(AVG(VOMX) FOR fiscal_year IN (2022,2023,2024)) as VOMX
ORDER BY organization
),
vrm as (
select * from
(select organization, fiscal_year, mode, Annual_VRM from longform)
PIVOT(AVG(Annual_VRM) FOR fiscal_year IN (2022,2023,2024)) as Annual_VRM
ORDER BY organization
),
vrh as (
select * from
(select organization, fiscal_year, mode, Annual_VRH from longform)
PIVOT(AVG(Annual_VRH) FOR fiscal_year IN (2022,2023,2024)) as Annual_VRH
ORDER BY organization
),
upt as (
select * from
(select organization, fiscal_year, mode, Annual_UPT from longform)
PIVOT(AVG(Annual_UPT) FOR fiscal_year IN (2022,2023,2024)) as Annual_UPT
ORDER BY organization
)

-- select * from mpv
select distinct cph.organization,
cph.mode,
cph._2022 as cph_2022,
cph._2023 as cph_2023,
cph._2024 as cph_2024,
mpv._2022 as mpv_2022,
mpv._2023 as mpv_2023,
mpv._2024 as mpv_2024,
frpt._2022 as frpt_2022,
frpt._2023 as frpt_2023,
frpt._2024 as frpt_2024,
rev_speed._2022 as rev_speed_2022,
rev_speed._2023 as rev_speed_2023,
rev_speed._2024 as rev_speed_2024,
tph._2022 as tph_2022,
tph._2023 as tph_2023,
tph._2024 as tph_2024,
voms._2022 as voms_2022,
voms._2023 as voms_2023,
voms._2024 as voms_2024,
vrm._2022 as vrm_2022,
vrm._2023 as vrm_2023,
vrm._2024 as vrm_2024,
vrh._2022 as vrh_2022,
vrh._2023 as vrh_2023,
vrh._2024 as vrh_2024,
upt._2022 as upt_2022,
upt._2023 as upt_2023,
upt._2024 as upt_2024
from cph
FULL OUTER JOIN mpv
on cph.organization = mpv.organization
AND cph.mode = mpv.mode
FULL OUTER JOIN frpt
on cph.organization = frpt.organization
AND cph.mode = frpt.mode
FULL OUTER JOIN rev_speed
on cph.organization = rev_speed.organization
AND cph.mode = rev_speed.mode
FULL OUTER JOIN tph
on cph.organization = tph.organization
AND cph.mode = tph.mode
FULL OUTER JOIN voms
on cph.organization = voms.organization
AND cph.mode = voms.mode
FULL OUTER JOIN vrm
on cph.organization = vrm.organization
AND cph.mode = vrm.mode
FULL OUTER JOIN vrh
on cph.organization = vrh.organization
AND cph.mode = vrh.mode
FULL OUTER JOIN upt
on cph.organization = upt.organization
AND cph.mode = upt.mode
ORDER BY organization
-- );

-- EXECUTE IMMEDIATE query;
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,23 @@ models:
description: |
Setting up the RR-20 data for comparing totals, for operating and capital expenses, reported in different ares of the RR-20
For NTD validation error ID #s RR20F-001OA, RR20F-001C, RR20F-182
- name: int_ntd_rr20_service_alldata
- name: int_ntd_rr20_service_1alldata
description: |
Combines 2023 and 2022 data in preparation for doing NTD validation checks.
1st intermediate cleaning step for service data. Combines 2023 and 2022 data in preparation for doing NTD validation checks.
The 2022 data was *not* from the API and so formatted differently
We are *assuming* that data in 2024 and onwards will be the same format as 2023
If you get errors in 2024, check which columns may differ and read errors carefully.
- name: int_ntd_rr20_service_ratios
NOTE!!!! You must add in the 2024 data when it is available with another CTE. IF NOT THERE WILL BE NO 2024 DATA CHECKED.
- name: int_ntd_rr20_service_2ratioslong
description: |
makes ratios for validation checks
2nd intermediate cleaning step for service data. Calculates all needed NTD metrics that are ratios of two other values. E.g., cost per hour,
miles_per_veh,fare_rev_per_trip,rev_speed,trips_per_hr.
NOTE this only works because there is ONE row per org, year, mode.
If this ever changes or gets duplicated upstream, then these calculations will be off.
- name: int_ntd_rr20_service_3ratios_wide
description: |
3rd intermediate cleaning step for service data. Pivots data from the 2nd cleaning step (above) from longform to wide form.
NOTE!!!! A column for 2024 has already been added, but in 2025 and beyond you must go in and add a column for the year of interest
e.g., 2025, (and up to you to delete any columns you don't want, e.g. 2022)
config:
materialized: table
4 changes: 2 additions & 2 deletions warehouse/models/mart/ntd_validation/_mart_ntd_validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ version: 2
models:
- name: fct_ntd_rr20_service_checks
description: |
Runs validation checks on the RR-20 service data. Source data is int_ntd_rr20_service_ratios.
This model is still in python but should be converted to SQL as time allows.
Runs validation checks on the RR-20 service data. Source data is int_ntd_rr20_service_3ratios_wide.
NOTE: This model uses "this_year" and "last_year" as dynamic variables based on the date on which it is run.
- name: fct_ntd_a30_vomscheck
description: |
Runs various checks on VOMS data submitted to NTD, that are also in the file voms_inventory_check.py.
Expand Down
Loading

0 comments on commit d6e9041

Please sign in to comment.