From ee0cceade749832a6f77637e66e3438e5cf75771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonah=20Br=C3=BCchert?= Date: Sun, 5 Jan 2025 21:28:35 +0100 Subject: [PATCH] Improve trip display name heuristics --- src/loader/gtfs/trip.cc | 97 ++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 30 deletions(-) diff --git a/src/loader/gtfs/trip.cc b/src/loader/gtfs/trip.cc index 6f7bd44a..72862ddb 100644 --- a/src/loader/gtfs/trip.cc +++ b/src/loader/gtfs/trip.cc @@ -190,44 +190,81 @@ void trip::interpolate() { std::string trip::display_name(timetable const& tt) const { auto const is_digit = [](char const x) { return x >= '0' && x <= '9'; }; - if (route_->clasz_ == clasz::kBus) { - return route_->short_name_.empty() ? "Bus " + short_name_ - : "Bus " + route_->short_name_; - } else if (route_->clasz_ == clasz::kTram) { - return route_->short_name_.empty() ? "Tram " + short_name_ - : "Tram " + route_->short_name_; - } + auto const is_letter = [](char const x) { + return (x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z'); + }; - auto const trip_name_is_number = - !short_name_.empty() && utl::all_of(short_name_, is_digit); - if (!route_->short_name_.starts_with("IC") && - route_->agency_ != provider_idx_t::invalid() && - tt.providers_[route_->agency_].long_name_ == "DB Fernverkehr AG") { - if (route_->clasz_ == clasz::kHighSpeed) { - return trip_name_is_number - ? fmt::format("ICE {}", utl::parse(short_name_)) - : fmt::format("ICE {}", route_->short_name_); - } else if (route_->clasz_ == clasz::kLongDistance) { - return trip_name_is_number - ? fmt::format("IC {}", utl::parse(short_name_)) - : fmt::format("IC {}", route_->short_name_); + // Depending on the class, there are different expectations from users + // if the special case doesn't work based on available data, fall back to + // generic code afterwards + switch (route_->clasz_) { + // Busses and Trams: We just care about type + number + case clasz::kBus: + return route_->short_name_.empty() ? "Bus " + short_name_ + : "Bus " + route_->short_name_; + case clasz::kTram: + return route_->short_name_.empty() ? "Tram " + short_name_ + : "Tram " + route_->short_name_; + // For subway / metro, only the line matters since it's frequent enough. + case clasz::kSubway: + case clasz::kMetro: + case clasz::kRegional: + case clasz::kRegionalFast: + if (!route_->short_name_.empty()) { + return route_->short_name_; + } + break; + // Deutsche Bahn heuristics to classify ICEs and ICs + case clasz::kHighSpeed: + case clasz::kLongDistance: { + auto const trip_name_is_number = + !short_name_.empty() && utl::all_of(short_name_, is_digit); + if (!route_->short_name_.starts_with("IC") && + route_->agency_ != provider_idx_t::invalid() && + tt.providers_[route_->agency_].long_name_ == "DB Fernverkehr AG") { + if (route_->clasz_ == clasz::kHighSpeed) { + return trip_name_is_number + ? fmt::format("ICE {}", utl::parse(short_name_)) + : fmt::format("ICE {}", route_->short_name_); + } else if (route_->clasz_ == clasz::kLongDistance) { + return trip_name_is_number + ? fmt::format("IC {}", utl::parse(short_name_)) + : fmt::format("IC {}", route_->short_name_); + } + } + + break; + } + default: { } } - auto const starts_with_letter_and_ends_with_number = - [](std::string_view line_id) { - return !(line_id.front() >= '0' && line_id.front() <= '9') && - (line_id.back() >= '0' && line_id.back() <= '9'); - }; + // route name looks like product name, trip name is number + auto const is_product_name = [=](std::string_view route) { + return utl::all_of(route, is_letter); + }; - if (starts_with_letter_and_ends_with_number(route_->short_name_)) { - return route_->short_name_; - } else if (trip_name_is_number) { + auto const is_trip_number = [=](std::string_view trip_name) { + return utl::all_of(trip_name, is_digit); + }; + + if (!route_->short_name_.empty() && is_product_name(route_->short_name_) && + !short_name_.empty() && is_trip_number(short_name_)) { return fmt::format("{} {}", route_->short_name_, utl::parse(short_name_)); - } else { - return fmt::format("{} {}", route_->short_name_, short_name_); } + + // prefer trip name over route short name, prefer route short name over route + // long name + if (!short_name_.empty()) { + return short_name_; + } + + if (!route_->short_name_.empty()) { + return route_->short_name_; + } + + return route_->long_name_; } clasz trip::get_clasz(timetable const& tt) const {