Skip to content

Commit

Permalink
DuckDB [todo]
Browse files Browse the repository at this point in the history
  • Loading branch information
derhuerst committed Jul 8, 2024
1 parent 5321ae0 commit 3329074
Show file tree
Hide file tree
Showing 38 changed files with 1,846 additions and 2,373 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ pnpm-debug.log
/shrinkwrap.yaml

/test/amtrak-gtfs-2021-10-06
/test/*.duckdb

/*.gtfs
/*.gtfs.zip
/*.gtfs.tar.gz
/*.gtfs.tar.zst

/*.duckdb
94 changes: 18 additions & 76 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ const {
'lower-case-lang-codes': {
type: 'boolean',
},
'stops-location-index': {
type: 'boolean',
},
'stats-by-route-date': {
type: 'string',
},
Expand All @@ -59,21 +56,6 @@ const {
'schema': {
type: 'string',
},
'postgraphile': {
type: 'boolean',
},
'postgraphile-password': {
type: 'string',
},
'postgrest': {
type: 'boolean',
},
'postgrest-password': {
type: 'string',
},
'postgrest-query-cost-limit': {
type: 'string',
},
'import-metadata': {
type: 'boolean',
}
Expand All @@ -84,7 +66,7 @@ const {
if (flags.help) {
process.stdout.write(`
Usage:
gtfs-to-sql [options] [--] <gtfs-file> ...
import-gtfs-into-duckdb [options] [--] <path-to-duckdb> <gtfs-file> ...
Options:
--silent -s Don't show files being converted.
--require-dependencies -d Require files that the specified GTFS files depend
Expand All @@ -101,8 +83,6 @@ Options:
--routes-without-agency-id Don't require routes.txt items to have an agency_id.
--stops-without-level-id Don't require stops.txt items to have a level_id.
Default if levels.txt has not been provided.
--stops-location-index Create a spatial index on stops.stop_loc for efficient
queries by geolocation.
--lower-case-lang-codes Accept Language Codes (e.g. in feed_info.feed_lang)
with a different casing than the official BCP-47
language tags (as specified by the GTFS spec),
Expand All @@ -123,34 +103,18 @@ Options:
currently running trips over time, by hour.
Like --stats-by-route-date, this flag accepts
none, view & materialized-view.
--schema The schema to use for the database. Default: public
Even when importing into a schema other than \`public\`,
a function \`public.gtfs_via_postgres_import_version()\`
gets created, to ensure that multiple imports into the
same database are all made using the same version. See
also multiple-datasets.md in the docs.
--postgraphile Tweak generated SQL for PostGraphile usage.
https://www.graphile.org/postgraphile/
--postgraphile-password Password for the PostGraphile PostgreSQL user.
Default: $POSTGRAPHILE_PGPASSWORD, fallback random.
--postgrest Tweak generated SQL for PostgREST usage.
Please combine it with --schema.
https://postgrest.org/
--postgrest-password Password for the PostgREST PostgreSQL user \`web_anon\`.
Default: $POSTGREST_PGPASSWORD, fallback random.
--postgrest-query-cost-limit Define a cost limit [1] for queries executed by PostgREST
on behalf of a user. It is only enforced if
pg_plan_filter [2] is installed in the database!
Must be a positive float. Default: none
[1] https://www.postgresql.org/docs/14/using-explain.html
[2] https://github.com/pgexperts/pg_plan_filter
--schema The schema to use for the database. Default: main
May not contain \`.\`.
--import-metadata Create functions returning import metadata:
- gtfs_data_imported_at (timestamp with time zone)
- gtfs_via_postgres_version (text)
- gtfs_via_postgres_options (jsonb)
Notes:
If you just want to check if the GTFS data can be imported but don't care about the
resulting DuckDB database file, you can import into an in-memory database by specifying
\`:memory:\` as the <path-to-duckdb>.
Examples:
gtfs-to-sql some-gtfs/*.txt | sponge | psql -b # import into PostgreSQL
gtfs-to-sql -u -- some-gtfs/*.txt | gzip >gtfs.sql.gz # generate a gzipped SQL dump
import-gtfs-into-duckdb some-gtfs.duckdb some-gtfs/*.txt
[1] https://developers.google.com/transit/gtfs/reference/extended-route-types
[2] https://groups.google.com/g/gtfs-changes/c/keT5rTPS7Y0/m/71uMz2l6ke0J
Expand All @@ -164,11 +128,11 @@ if (flags.version) {
}

const {basename, extname} = require('path')
const {pipeline} = require('stream')
const convertGtfsToSql = require('./index')
const DataError = require('./lib/data-error')

const files = args.map((file) => {
const [pathToDb] = args

const files = args.slice(1).map((file) => {
const name = basename(file, extname(file))
return {name, file}
})
Expand All @@ -184,9 +148,7 @@ const opt = {
statsByRouteIdAndDate: flags['stats-by-route-date'] || 'none',
statsByAgencyIdAndRouteIdAndStopAndHour: flags['stats-by-agency-route-stop-hour'] || 'none',
statsActiveTripsByHour: flags['stats-active-trips-by-hour'] || 'none',
schema: flags['schema'] || 'public',
postgraphile: !!flags.postgraphile,
postgrest: !!flags.postgrest,
schema: flags['schema'] || 'main',
importMetadata: !!flags['import-metadata'],
}
if ('stops-without-level-id' in flags) {
Expand All @@ -195,31 +157,11 @@ if ('stops-without-level-id' in flags) {
if ('lower-case-lang-codes' in flags) {
opt.lowerCaseLanguageCodes = flags['lower-case-lang-codes']
}
if ('postgraphile-password' in flags) {
opt.postgraphilePassword = flags['postgraphile-password']
}
if ('postgrest-password' in flags) {
opt.postgrestPassword = flags['postgrest-password']
}
if ('postgrest-query-cost-limit' in flags) {
const limit = parseFloat(flags['postgrest-query-cost-limit'])
if (!Number.isFinite(limit) || limit < 0) {
console.error('Invalid --postgrest-query-cost-limit value.')
process.exit(1)
}
opt.lowerCaseLanguageCodes = limit
}

pipeline(
convertGtfsToSql(files, opt),
process.stdout,
(err) => {
if (!err) return;
if (err instanceof DataError) {
console.error(String(err))
} else if (err.code !== 'EPIPE') {
console.error(err)
}
process.exit(1)
convertGtfsToSql(pathToDb, files, opt)
.catch((err) => {
if (err.code !== 'EPIPE') { // todo: check still necessary? we don't pipe anymore
console.error(err)
}
)
process.exit(1)
})
Loading

0 comments on commit 3329074

Please sign in to comment.