Skip to content

Commit

Permalink
Improve GTFS chapter
Browse files Browse the repository at this point in the history
  • Loading branch information
estebanzimanyi committed Jul 3, 2024
1 parent b10b2b0 commit ca0763b
Showing 1 changed file with 44 additions and 59 deletions.
103 changes: 44 additions & 59 deletions docs/GTFS.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@
<para><varname>calendar_dates.txt</varname> define exceptions to the default service patterns defined in <varname>calendar.txt</varname>. There are two types of exceptions: 1 means that the service has been added for the specified date, and 2 means that the service has been removed for the specified date.</para>
</listitem>

<listitem>
<para><varname>route_types.txt</varname> contains transportation types used on routes, such as bus, metro, tramway, etc.</para>
</listitem>

<listitem>
<para><varname>routes.txt</varname> contains transit routes. A route is a group of trips that are displayed to riders as a single service.</para>
</listitem>
Expand Down Expand Up @@ -85,7 +81,6 @@ CREATE TABLE calendar (
end_date date NOT NULL,
CONSTRAINT calendar_pkey PRIMARY KEY (service_id)
);
CREATE INDEX calendar_service_id ON calendar (service_id);

CREATE TABLE exception_types (
exception_type int PRIMARY KEY,
Expand All @@ -97,19 +92,14 @@ CREATE TABLE calendar_dates (
date date NOT NULL,
exception_type int REFERENCES exception_types(exception_type)
);
CREATE INDEX calendar_dates_dateidx ON calendar_dates (date);

CREATE TABLE route_types (
route_type int PRIMARY KEY,
description text
);
CREATE INDEX calendar_dates_date_idx ON calendar_dates (date);

CREATE TABLE routes (
route_id text,
route_short_name text DEFAULT '',
route_long_name text DEFAULT '',
route_desc text DEFAULT '',
route_type int REFERENCES route_types(route_type),
route_type int,
route_url text,
route_color text,
route_text_color text,
Expand All @@ -120,17 +110,17 @@ CREATE TABLE shapes (
shape_id text NOT NULL,
shape_pt_lat double precision NOT NULL,
shape_pt_lon double precision NOT NULL,
shape_pt_sequence int NOT NULL
shape_pt_sequence int NOT NULL,
shape_dist_traveled float NOT NULL
);
CREATE INDEX shapes_shape_key ON shapes (shape_id);
CREATE INDEX shapes_shape_id_idx ON shapes (shape_id);

-- Create a table to store the shape geometries
CREATE TABLE shape_geoms (
shape_id text NOT NULL,
shape_geom geometry('LINESTRING', 4326),
shape_geom geometry('LINESTRING', 3857),
CONSTRAINT shape_geom_pkey PRIMARY KEY (shape_id)
);
CREATE INDEX shape_geoms_key ON shapes (shape_id);

CREATE TABLE location_types (
location_type int PRIMARY KEY,
Expand All @@ -146,9 +136,9 @@ CREATE TABLE stops (
stop_lon double precision,
zone_id text,
stop_url text,
location_type integer REFERENCES location_types(location_type),
location_type integer REFERENCES location_types(location_type),
parent_station integer,
stop_geom geometry('POINT', 4326),
stop_geom geometry('POINT', 3857),
platform_code text DEFAULT NULL,
CONSTRAINT stops_pkey PRIMARY KEY (stop_id)
);
Expand All @@ -160,7 +150,7 @@ CREATE TABLE pickup_dropoff_types (

CREATE TABLE stop_times (
trip_id text NOT NULL,
-- Check that casting to time interval works.
-- Check that casting to time interval works
arrival_time interval CHECK (arrival_time::interval = arrival_time::interval),
departure_time interval CHECK (departure_time::interval = departure_time::interval),
stop_id text,
Expand All @@ -183,7 +173,6 @@ CREATE TABLE trips (
shape_id text,
CONSTRAINT trips_pkey PRIMARY KEY (trip_id)
);
CREATE INDEX trips_trip_id ON trips (trip_id);

INSERT INTO exception_types (exception_type, description) VALUES
(1, 'service has been added'),
Expand Down Expand Up @@ -218,8 +207,6 @@ COPY trips(route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape
FROM '/home/gtfs_tutorial/trips.txt' DELIMITER ',' CSV HEADER;
COPY agency(agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone)
FROM '/home/gtfs_tutorial/agency.txt' DELIMITER ',' CSV HEADER;
COPY route_types(route_type,description)
FROM '/home/gtfs_tutorial/route_types.txt' DELIMITER ',' CSV HEADER;
COPY routes(route_id,route_short_name,route_long_name,route_desc,route_type,route_url,
route_color,route_text_color) FROM '/home/gtfs_tutorial/routes.txt' DELIMITER ','
CSV HEADER;
Expand All @@ -233,12 +220,12 @@ CSV HEADER;
<programlisting language="sql">
INSERT INTO shape_geoms
SELECT shape_id, ST_MakeLine(array_agg(
ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat),4326) ORDER BY shape_pt_sequence))
ST_Transform(ST_Point(shape_pt_lon, shape_pt_lat, 4326), 3857) ORDER BY shape_pt_sequence))
FROM shapes
GROUP BY shape_id;

UPDATE stops
SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326);
SET stop_geom = ST_Transform(ST_Point(stop_lon, stop_lat, 4326), 3857);
</programlisting>
The visualization of the routes and stops in QGIS is given in <xref linkend="stib" />. In the figure, red lines correspond to the trajectories of vehicles, while orange points correspond to the location of stops.
</para>
Expand All @@ -253,21 +240,21 @@ SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326);
<sect1>
<title>Transforming GTFS Data for MobilityDB</title>
<para>
We start by creating a table that contains couples of <varname>service_id</varname> and <varname>date</varname> defining the dates at which a service is provided.
We start by creating a table that contains couples of <varname>service_id</varname> and <varname>date</varname> defining the dates at which a service is provided.
<programlisting language="sql">
DROP TABLE IF EXISTS service_dates;
CREATE TABLE service_dates AS (
SELECT service_id, date_trunc('day', d)::date AS date
FROM calendar c, generate_series(start_date, end_date, '1 day'::interval) AS d
WHERE (
(monday = 1 AND extract(isodow FROM d) = 1) OR
(tuesday = 1 AND extract(isodow FROM d) = 2) OR
(wednesday = 1 AND extract(isodow FROM d) = 3) OR
(thursday = 1 AND extract(isodow FROM d) = 4) OR
(friday = 1 AND extract(isodow FROM d) = 5) OR
(saturday = 1 AND extract(isodow FROM d) = 6) OR
(sunday = 1 AND extract(isodow FROM d) = 7)
)
(monday = 1 AND extract(isodow FROM d) = 1) OR
(tuesday = 1 AND extract(isodow FROM d) = 2) OR
(wednesday = 1 AND extract(isodow FROM d) = 3) OR
(thursday = 1 AND extract(isodow FROM d) = 4) OR
(friday = 1 AND extract(isodow FROM d) = 5) OR
(saturday = 1 AND extract(isodow FROM d) = 6) OR
(sunday = 1 AND extract(isodow FROM d) = 7)
)
EXCEPT
SELECT service_id, date
FROM calendar_dates WHERE exception_type = 2
Expand Down Expand Up @@ -304,8 +291,8 @@ FROM trips t JOIN stop_times s ON t.trip_id = s.trip_id;

UPDATE trip_stops t
SET perc = CASE
WHEN stop_sequence = 1 then 0.0
WHEN stop_sequence = no_stops then 1.0
WHEN stop_sequence = 1 THEN 0.0
WHEN stop_sequence = no_stops THEN 1.0
ELSE ST_LineLocatePoint(g.shape_geom, s.stop_geom)
END
FROM shape_geoms g, stops s
Expand Down Expand Up @@ -359,7 +346,7 @@ SET seg_length = ST_Length(seg_geom), no_points = ST_NumPoints(seg_geom);
</para>

<para>
The geometry of a segment is a linestring containing multiple points. From the previous table we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table <varname>trip_points</varname> that contains all the points composing the geometry of a segment.
The geometry of a segment is a linestring containing multiple points. From table <varname>trip_stops</varname> we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table <varname>trip_points</varname> that contains all the points composing the geometry of a segment.
<programlisting language="sql">
DROP TABLE IF EXISTS trip_points;
CREATE TABLE trip_points (
Expand All @@ -377,29 +364,26 @@ INSERT INTO trip_points (trip_id, route_id, service_id, stop1_sequence,
point_sequence, point_geom, point_arrival_time)
WITH temp1 AS (
SELECT trip_id, route_id, service_id, stop1_sequence, stop2_sequence,
no_stops, stop1_arrival_time, stop2_arrival_time, seg_length,
(dp).path[1] AS point_sequence, no_points, (dp).geom as point_geom
FROM trip_segs, ST_DumpPoints(seg_geom) AS dp
),
no_stops, stop1_arrival_time, stop2_arrival_time, seg_length,
(dp).path[1] AS point_sequence, no_points, (dp).geom as point_geom
FROM trip_segs, ST_DumpPoints(seg_geom) AS dp ),
temp2 AS (
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
stop2_arrival_time, seg_length, point_sequence, no_points, point_geom
FROM temp1
WHERE point_sequence &lt;&gt; no_points OR stop2_sequence = no_stops
),
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
stop2_arrival_time, seg_length, point_sequence, no_points, point_geom
FROM temp1
WHERE point_sequence != no_points OR stop2_sequence = no_stops ),
temp3 AS (
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
stop2_arrival_time, point_sequence, no_points, point_geom,
ST_Length(ST_MakeLine(array_agg(point_geom) OVER w)) / seg_length AS perc
FROM temp2 WINDOW w AS (PARTITION BY trip_id, service_id, stop1_sequence
ORDER BY point_sequence)
)
SELECT trip_id, route_id, service_id, stop1_sequence, stop1_arrival_time,
stop2_arrival_time, point_sequence, no_points, point_geom,
ST_Length(ST_MakeLine(array_agg(point_geom) OVER w)) / seg_length AS perc
FROM temp2 WINDOW w AS (PARTITION BY trip_id, service_id, stop1_sequence
ORDER BY point_sequence) )
SELECT trip_id, route_id, service_id, stop1_sequence, point_sequence, point_geom,
CASE
WHEN point_sequence = 1 then stop1_arrival_time
WHEN point_sequence = no_points then stop2_arrival_time
ELSE stop1_arrival_time + ((stop2_arrival_time - stop1_arrival_time) * perc)
END AS point_arrival_time
CASE
WHEN point_sequence = 1 THEN stop1_arrival_time
WHEN point_sequence = no_points THEN stop2_arrival_time
ELSE stop1_arrival_time + ((stop2_arrival_time - stop1_arrival_time) * perc)
END AS point_arrival_time
FROM temp3;
</programlisting>
In the temporary table <varname>temp1</varname> we use the function <varname>ST_DumpPoints</varname> to obtain the points composing the geometry of a segment. Nevertheless, this table contains duplicate points, that is, the last point of a segment is equal to the first point of the next one. In the temporary table <varname>temp2</varname> we filter out the last point of a segment unless it is the last segment of the trip. In the temporary table <varname>temp3</varname> we compute in the attribute <varname>perc</varname> the relative position of a point within a trip segment with window functions. For this we use the function <varname>ST_MakeLine</varname> to construct the subsegment from the first point of the segment to the current one, determine the length of the subsegment with function <varname>ST_Length</varname> and divide this length by the overall segment length. Finally, in the outer query we use the computed percentage to determine the arrival time to that point.
Expand Down Expand Up @@ -441,14 +425,15 @@ CREATE TABLE trips_mdb (
);

INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip)
SELECT trip_id, service_id, route_id, date, tgeompoint_seq(array_agg(tgeompoint_inst(point_geom, t) ORDER BY T))
SELECT trip_id, service_id, route_id, date, tgeompointSeq(array_agg(
tgeompoint(point_geom, t) ORDER BY T))
FROM trips_input
GROUP BY trip_id, service_id, route_id, date;

INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip)
SELECT trip_id, route_id, t.service_id, d.date,
shift(trip, make_interval(days => d.date - t.date))
FROM trips_mdb t JOIN service_dates d ON t.service_id = d.service_id AND t.date &lt;&gt; d.date;
shiftTime(trip, make_interval(days => d.date - t.date))
FROM trips_mdb t JOIN service_dates d ON t.service_id = d.service_id AND t.date != d.date;
</programlisting>
In the first <varname>INSERT</varname> statement we group the rows in the <varname>trips_input</varname> table by <varname>trip_id</varname> and <varname>date</varname> while keeping the <varname>route_id</varname> atribute, use the <varname>array_agg</varname> function to construct an array containing the temporal points composing the trip ordered by time, and compute the trip from this array using the function <varname>tgeompointseq</varname>. As explained above, table <varname>trips_input</varname> only contains the first date of a trip. In the second <varname>INSERT</varname> statement we add the trips for all the other dates with the function <varname>shift</varname>.
</para>
Expand Down

0 comments on commit ca0763b

Please sign in to comment.