diff --git a/index.js b/index.js index 003194a..0a8a394 100644 --- a/index.js +++ b/index.js @@ -9,6 +9,7 @@ const {Stringifier} = require('csv-stringify') const formatters = require('./lib') const getDependencies = require('./lib/deps') const pkg = require('./package.json') +const {DEFAULT_AGENCY_ID} = require('./lib/agency') const convertGtfsToSql = async function* (files, opt = {}) { opt = { @@ -24,6 +25,8 @@ const convertGtfsToSql = async function* (files, opt = {}) { statsByAgencyIdAndRouteIdAndStopAndHour: 'none', statsActiveTripsByHour: 'none', schema: 'public', + // todo: find something more helpful than falling back to Etc/GMT! + defaultTimezone: new Intl.DateTimeFormat().resolvedOptions().timeZone || 'Etc/GMT', postgraphile: false, postgraphilePassword: process.env.POSTGRAPHILE_PGPASSWORD || null, postgrest: false, @@ -208,6 +211,7 @@ LANGUAGE sql; const nrOfRowsByName = new Map() const workingState = { nrOfRowsByName, + insertDefaultAgency: false, onlyAgencyId: null, } @@ -217,6 +221,7 @@ LANGUAGE sql; // However, because we have to use left join instead of an inner join in tables referencing `agency`, this prevents the PostgreSQL query planner from doing some filter pushdowns, e.g. // - when querying `arrivals_departures` by route, stop, date and t_departure/t_arrival { + let agencies = 0 for await (const agency of await readCsv('agency')) { workingState.onlyAgencyId = agency.agency_id if (++agencies >= 2) { @@ -224,6 +229,11 @@ LANGUAGE sql; break } } + // We insert a mock agency in order to use an inner join in tables referencing `agency`. + if (agencies === 0) { + workingState.insertDefaultAgency = true + workingState.onlyAgencyId = DEFAULT_AGENCY_ID + } } for (const name of order) { diff --git a/lib/agency.js b/lib/agency.js index f9e3697..7cd2b03 100644 --- a/lib/agency.js +++ b/lib/agency.js @@ -1,5 +1,7 @@ 'use strict' +const DEFAULT_AGENCY_ID = 'default-agency' + // https://gtfs.org/schedule/reference/#agencytxt const beforeAll = (opt) => `\ CREATE TABLE "${opt.schema}".agency ( @@ -39,11 +41,32 @@ const formatAgencyRow = (a) => { ] } -const afterAll = `\ +const afterAll = (opt, workingState) => { + let sql = `\ \\. ` + if (workingState.insertDefaultAgency) { + sql += `\ +INSERT INTO "${opt.schema}".agency ( + agency_id, + agency_name, + agency_url, + agency_timezone +) VALUES ( + '${DEFAULT_AGENCY_ID}', + 'implicit default agency, the CSV file doesn\\'t contain one', + 'http://example.org', + '${opt.defaultTimezone}' +); +` + } + + return sql +} + module.exports = { + DEFAULT_AGENCY_ID, beforeAll, formatRow: formatAgencyRow, afterAll, diff --git a/lib/stop_times.js b/lib/stop_times.js index dd1c37c..a2ad1c9 100644 --- a/lib/stop_times.js +++ b/lib/stop_times.js @@ -225,14 +225,7 @@ WITH stop_times_based AS NOT MATERIALIZED ( LEFT JOIN "${opt.schema}".stops stations ON stops.parent_station = stations.stop_id JOIN "${opt.schema}".trips ON s.trip_id = trips.trip_id JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id - LEFT JOIN "${opt.schema}".agency ON ( - -- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed. - -- Note: We implicitly rely on other parts of the code base to validate that agency has just one row! - -- It seems that GTFS has allowed this at least since 2016: - -- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554 - routes.agency_id IS NULL -- match first (and only) agency - OR routes.agency_id = agency.agency_id -- match by ID - ) + JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id JOIN "${opt.schema}".service_days ON trips.service_id = service_days.service_id ) -- todo: this slows down slightly @@ -465,14 +458,7 @@ WITH stop_times_based AS NOT MATERIALIZED ( ) AS to_wheelchair_boarding FROM "${opt.schema}".trips LEFT JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id - LEFT JOIN "${opt.schema}".agency ON ( - -- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed. - -- Note: We implicitly rely on other parts of the code base to validate that agency has just one row! - -- It seems that GTFS has allowed this at least since 2016: - -- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554 - routes.agency_id IS NULL -- match first (and only) agency - OR routes.agency_id = agency.agency_id -- match by ID - ) + JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id LEFT JOIN "${opt.schema}".stop_times ON trips.trip_id = stop_times.trip_id LEFT JOIN "${opt.schema}".stops from_stops ON stop_times.stop_id = from_stops.stop_id LEFT JOIN "${opt.schema}".stops from_stations ON from_stops.parent_station = from_stations.stop_id