Skip to content

Commit

Permalink
handle feeds with 0 agencies & routes.agency_id = null 🐛
Browse files Browse the repository at this point in the history
follow-up of 02d307b
  • Loading branch information
derhuerst committed Oct 31, 2024
1 parent bf78e07 commit 21d658e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 17 deletions.
10 changes: 10 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const {Stringifier} = require('csv-stringify')
const formatters = require('./lib')
const getDependencies = require('./lib/deps')
const pkg = require('./package.json')
const {DEFAULT_AGENCY_ID} = require('./lib/agency')

const convertGtfsToSql = async function* (files, opt = {}) {
opt = {
Expand All @@ -24,6 +25,8 @@ const convertGtfsToSql = async function* (files, opt = {}) {
statsByAgencyIdAndRouteIdAndStopAndHour: 'none',
statsActiveTripsByHour: 'none',
schema: 'public',
// todo: find something more helpful than falling back to Etc/GMT!
defaultTimezone: new Intl.DateTimeFormat().resolvedOptions().timeZone || 'Etc/GMT',
postgraphile: false,
postgraphilePassword: process.env.POSTGRAPHILE_PGPASSWORD || null,
postgrest: false,
Expand Down Expand Up @@ -208,6 +211,7 @@ LANGUAGE sql;
const nrOfRowsByName = new Map()
const workingState = {
nrOfRowsByName,
insertDefaultAgency: false,
onlyAgencyId: null,
}

Expand All @@ -217,13 +221,19 @@ LANGUAGE sql;
// However, because we have to use left join instead of an inner join in tables referencing `agency`, this prevents the PostgreSQL query planner from doing some filter pushdowns, e.g.
// - when querying `arrivals_departures` by route, stop, date and t_departure/t_arrival
{
let agencies = 0
for await (const agency of await readCsv('agency')) {
workingState.onlyAgencyId = agency.agency_id
if (++agencies >= 2) {
workingState.onlyAgencyId = null
break
}
}
// We insert a mock agency in order to use an inner join in tables referencing `agency`.
if (agencies === 0) {
workingState.insertDefaultAgency = true
workingState.onlyAgencyId = DEFAULT_AGENCY_ID
}
}

for (const name of order) {
Expand Down
25 changes: 24 additions & 1 deletion lib/agency.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict'

const DEFAULT_AGENCY_ID = 'default-agency'

// https://gtfs.org/schedule/reference/#agencytxt
const beforeAll = (opt) => `\
CREATE TABLE "${opt.schema}".agency (
Expand Down Expand Up @@ -39,11 +41,32 @@ const formatAgencyRow = (a) => {
]
}

const afterAll = `\
const afterAll = (opt, workingState) => {
let sql = `\
\\.
`

if (workingState.insertDefaultAgency) {
sql += `\
INSERT INTO "${opt.schema}".agency (
agency_id,
agency_name,
agency_url,
agency_timezone
) VALUES (
'${DEFAULT_AGENCY_ID}',
'implicit default agency, the CSV file doesn\\'t contain one',
'http://example.org',
'${opt.defaultTimezone}'
);
`
}

return sql
}

module.exports = {
DEFAULT_AGENCY_ID,
beforeAll,
formatRow: formatAgencyRow,
afterAll,
Expand Down
18 changes: 2 additions & 16 deletions lib/stop_times.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,7 @@ WITH stop_times_based AS NOT MATERIALIZED (
LEFT JOIN "${opt.schema}".stops stations ON stops.parent_station = stations.stop_id
JOIN "${opt.schema}".trips ON s.trip_id = trips.trip_id
JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id
LEFT JOIN "${opt.schema}".agency ON (
-- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed.
-- Note: We implicitly rely on other parts of the code base to validate that agency has just one row!
-- It seems that GTFS has allowed this at least since 2016:
-- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
routes.agency_id IS NULL -- match first (and only) agency
OR routes.agency_id = agency.agency_id -- match by ID
)
JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id
JOIN "${opt.schema}".service_days ON trips.service_id = service_days.service_id
)
-- todo: this slows down slightly
Expand Down Expand Up @@ -465,14 +458,7 @@ WITH stop_times_based AS NOT MATERIALIZED (
) AS to_wheelchair_boarding
FROM "${opt.schema}".trips
LEFT JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id
LEFT JOIN "${opt.schema}".agency ON (
-- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed.
-- Note: We implicitly rely on other parts of the code base to validate that agency has just one row!
-- It seems that GTFS has allowed this at least since 2016:
-- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
routes.agency_id IS NULL -- match first (and only) agency
OR routes.agency_id = agency.agency_id -- match by ID
)
JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id
LEFT JOIN "${opt.schema}".stop_times ON trips.trip_id = stop_times.trip_id
LEFT JOIN "${opt.schema}".stops from_stops ON stop_times.stop_id = from_stops.stop_id
LEFT JOIN "${opt.schema}".stops from_stations ON from_stops.parent_station = from_stations.stop_id
Expand Down

0 comments on commit 21d658e

Please sign in to comment.