- Using sphinx to documente the project - Autodocumenting Python using numpy-style docstr and sphinx autodoc with napoleon
816 lines
34 KiB
Python
816 lines
34 KiB
Python
import pandas as pd
|
|
import os
|
|
|
|
|
|
|
|
class GTFS:
|
|
"""
|
|
DataFrame based representation of the GTFS standard, able to read folder of GTFS files, validate a GTFS object for accordance with the standard and write its data to a GTFS folder.
|
|
|
|
Attributes
|
|
----------
|
|
folder_path : str
|
|
Path to folder where the data is read from and/or to be written to
|
|
agency, stops, routes, trips, stop_times, calendar, calendar_dates, fare_attributes, fare_rules, timeframes, fare_media, fare_products, fare_leg_rules, fare_transfer_rules, areas, stop_areas, networks, route_networks, shapes, frequencies, transfers, pathways, levels, location_groups, location_group_stops, locations_geojson, booking_rules, translations, feed_info, attributions : GTFSFile
|
|
Objects representing the data in the corresponding .txt/.geojson files in the GTFS Reference.
|
|
errors: list[str]
|
|
Human readable messages explaining why a validation failed if it did.
|
|
|
|
Methods
|
|
-------
|
|
get_files():
|
|
Return all member objects of type GTFSFile
|
|
get_fields(name):
|
|
Return all fields present in a given instance of a GTFSFile
|
|
export(path, dirname):
|
|
Save all GTFS data represented by the current instance as a folder of files corresponding to the GTFS Reference.
|
|
validate():
|
|
For all GTFSFile member objects, validate if they individually conmply with GTFS.
|
|
validate_required_fields(df, required_fields):
|
|
Check if a DataFrame contains all required fields according to the GTFS reference for the file it represents.
|
|
validate_optional_fields(df, optional_fields):
|
|
Check if a DataFrame does not contain any unexpected fields, not compliant with the GTFS reference for the file it represents.
|
|
validate_lat_lon(df):
|
|
Check if a Stop is correctly mapped using geographical coordinates.
|
|
"""
|
|
|
|
|
|
def __init__(self, folder_path: str = "", dfs: dict[str, pd.DataFrame] = None):
|
|
"""
|
|
Parameters
|
|
----------
|
|
folder_path : str
|
|
Path of the folder to read GTFS data from or potentially write it to when export() is called. Defaults to an empty str.
|
|
dfs : dict[str : pd.DataFrame]
|
|
DataFrames containing the data to be represented by this object as values, corresponding GTFSFile.file_names as keys.
|
|
|
|
Raises
|
|
------
|
|
TypeError
|
|
If neither folder_path nor dfs is provided
|
|
If folder_path is not a valid str or dfs is not a dict of DataFrames
|
|
ValueError
|
|
If folder_path is not a well formatted path
|
|
"""
|
|
self.folder_path = folder_path
|
|
self.agency = self.Agency(self.folder_path, dfs)
|
|
self.stops = self.Stops(self.folder_path, dfs)
|
|
self.routes = self.Routes(self.folder_path, dfs)
|
|
self.trips = self.Trips(self.folder_path, dfs)
|
|
self.stop_times = self.StopTimes(self.folder_path, dfs)
|
|
self.calendar = self.Calendar(self.folder_path, dfs)
|
|
self.calendar_dates = self.CalendarDates(self.folder_path, dfs)
|
|
self.fare_attributes = self.FareAttributes(self.folder_path, dfs)
|
|
self.fare_rules = self.FareRules(self.folder_path, dfs)
|
|
self.timeframes = self.Timeframes(self.folder_path, dfs)
|
|
self.fare_media = self.FareMedia(self.folder_path, dfs)
|
|
self.fare_products = self.FareProducts(self.folder_path, dfs)
|
|
self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs)
|
|
self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs)
|
|
self.areas = self.Areas(self.folder_path, dfs)
|
|
self.stop_areas = self.StopAreas(self.folder_path, dfs)
|
|
self.networks = self.Networks(self.folder_path, dfs)
|
|
self.route_networks = self.RouteNetworks(self.folder_path, dfs)
|
|
self.shapes = self.Shapes(self.folder_path, dfs)
|
|
self.frequencies = self.Frequencies(self.folder_path, dfs)
|
|
self.transfers = self.Transfers(self.folder_path, dfs)
|
|
self.pathways = self.Pathways(self.folder_path, dfs)
|
|
self.levels = self.Levels(self.folder_path, dfs)
|
|
self.location_groups = self.LocationGroups(self.folder_path, dfs)
|
|
self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs)
|
|
self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs)
|
|
self.booking_rules = self.BookingRules(self.folder_path, dfs)
|
|
self.translations = self.Translations(self.folder_path, dfs)
|
|
self.feed_info = self.FeedInfo(self.folder_path, dfs)
|
|
self.attributions = self.Attributions(self.folder_path, dfs)
|
|
self.errors = []
|
|
|
|
class GTFSFile:
|
|
"""
|
|
All given fields and their corresponding values are stored as a DataFrame.
|
|
|
|
Attributes
|
|
----------
|
|
file_name : str
|
|
Extension-less name of the corresponding .txt file from the GTFS Reference
|
|
folder_path : str
|
|
Folder to read data from or potentially write it to
|
|
data : pd.DataFrame
|
|
All csv data from the corresponding .txt file represented as a Pandas DataFrame
|
|
|
|
Methods
|
|
-------
|
|
load_data(dfs):
|
|
Load data from list of DataFrames if given else read it from the corresponding .txt file in csv format.
|
|
"""
|
|
def __init__(self, folder_path, file_name, dfs):
|
|
"""
|
|
Parameters
|
|
----------
|
|
folder_path : str
|
|
Where to read GTFS files from or write it to
|
|
file_name : str
|
|
Name of the .txt file without the .txt Extension
|
|
dfs : dict[str, pd.DataFrame]
|
|
If given, data variable is set as corresponding DataFrame in this dict
|
|
If not, data is read from the csv
|
|
"""
|
|
self.file_name = file_name
|
|
self.file_path = f"{folder_path}/{file_name}.txt"
|
|
self.data = self.load_data(dfs)
|
|
|
|
def load_data(self, dfs):
|
|
"""
|
|
Fill the data attribute with GTFS data either with a given DataFrame or from the corresponding csv
|
|
|
|
Parameters
|
|
----------
|
|
dfs : dict[str, pd.DataFrame]
|
|
Dict of dataframes mapped to the corresponding file names. If given, the corresponding DataFrame is returned if the key exists else an empty DataFrame
|
|
"""
|
|
if dfs:
|
|
return dfs[self.file_name] if self.file_name in dfs.keys() else pd.DataFrame()
|
|
else:
|
|
try:
|
|
return pd.read_csv(self.file_path)
|
|
except FileNotFoundError:
|
|
return pd.DataFrame()
|
|
|
|
class Agency(GTFSFile):
|
|
"""
|
|
Represents agency.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'agency', dfs)
|
|
|
|
class Stops(GTFSFile):
|
|
"""
|
|
Represents stops.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'stops', dfs)
|
|
|
|
class Routes(GTFSFile):
|
|
"""
|
|
Represents routes.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'routes', dfs)
|
|
|
|
class Trips(GTFSFile):
|
|
"""
|
|
Represents trips.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'trips', dfs)
|
|
|
|
class StopTimes(GTFSFile):
|
|
"""
|
|
Represents stop_times.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'stop_times', dfs)
|
|
|
|
class Calendar(GTFSFile):
|
|
"""
|
|
Represents calendar.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'calendar', dfs)
|
|
|
|
class CalendarDates(GTFSFile):
|
|
"""
|
|
Represents calendar_dates.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'calendar_dates', dfs)
|
|
|
|
class FareAttributes(GTFSFile):
|
|
"""
|
|
Represents fare_attributes.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'fare_attributes', dfs)
|
|
|
|
class FareRules(GTFSFile):
|
|
"""
|
|
Represents fare_rules.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'fare_rules', dfs)
|
|
|
|
class Timeframes(GTFSFile):
|
|
"""
|
|
Represents timeframes.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'timeframes', dfs)
|
|
|
|
class FareMedia(GTFSFile):
|
|
"""
|
|
Represents fare_media.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'fare_media', dfs)
|
|
|
|
class FareProducts(GTFSFile):
|
|
"""
|
|
Represents fare_products.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'fare_products', dfs)
|
|
|
|
class FareLegRules(GTFSFile):
|
|
"""
|
|
Represents fare_leg_rules.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'fare_leg_rules', dfs)
|
|
|
|
class FareTransferRules(GTFSFile):
|
|
"""
|
|
Represents fare_transfer_rules.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'fare_transfer_rules', dfs)
|
|
|
|
class Areas(GTFSFile):
|
|
"""
|
|
Represents areas.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'areas', dfs)
|
|
|
|
class StopAreas(GTFSFile):
|
|
"""
|
|
Represents stop_areas.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'stop_areas', dfs)
|
|
|
|
class Networks(GTFSFile):
|
|
"""
|
|
Represents networks.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'networks', dfs)
|
|
|
|
class RouteNetworks(GTFSFile):
|
|
"""
|
|
Represents route_networks.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'route_networks', dfs)
|
|
|
|
class Shapes(GTFSFile):
|
|
"""
|
|
Represents shapes.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'shapes', dfs)
|
|
|
|
class Frequencies(GTFSFile):
|
|
"""
|
|
Represents frequencies.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'frequencies', dfs)
|
|
|
|
class Transfers(GTFSFile):
|
|
"""
|
|
Represents transfers.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'transfers', dfs)
|
|
|
|
class Pathways(GTFSFile):
|
|
"""
|
|
Represents pathways.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'pathways', dfs)
|
|
|
|
class Levels(GTFSFile):
|
|
"""
|
|
Represents levels.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'levels', dfs)
|
|
|
|
class LocationGroups(GTFSFile):
|
|
"""
|
|
Represents location_groups.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'location_groups', dfs)
|
|
|
|
class LocationGroupStops(GTFSFile):
|
|
"""
|
|
Represents location_group_stops.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'location_group_stops', dfs)
|
|
|
|
class LocationsGeojson(GTFSFile):
|
|
"""
|
|
Represents locations.geojson from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
self.file_path = f"{folder_path}/locations.geojson"
|
|
if os.path.exists(self.file_path):
|
|
self.data = self.load_data()
|
|
else:
|
|
self.data = pd.DataFrame()
|
|
|
|
def load_data(self):
|
|
try:
|
|
return pd.read_json(self.file_path)
|
|
except ValueError:
|
|
return pd.DataFrame()
|
|
|
|
class BookingRules(GTFSFile):
|
|
"""
|
|
Represents booking_rules.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'booking_rules', dfs)
|
|
|
|
class Translations(GTFSFile):
|
|
"""
|
|
Represents translations.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'translations', dfs)
|
|
|
|
class FeedInfo(GTFSFile):
|
|
"""
|
|
Represents feed_info.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'feed_info', dfs)
|
|
|
|
class Attributions(GTFSFile):
|
|
"""
|
|
Represents attributions.txt from the GTFS reference
|
|
"""
|
|
def __init__(self, folder_path, dfs):
|
|
super().__init__(folder_path, 'attributions', dfs)
|
|
|
|
def get_files(self):
|
|
"""
|
|
Get all GTFSFile object
|
|
|
|
Returns
|
|
-------
|
|
list[GTFSFile]
|
|
All member objects of type GTFSFile
|
|
"""
|
|
return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)]
|
|
|
|
def get_fields(self, name):
|
|
"""
|
|
Given the name of a file specified in the GTFS specification, return all fields present in the data.
|
|
|
|
Parameters
|
|
----------
|
|
name : str
|
|
name of a file as specified by GTFS (ommiting the .txt/.geojson extension)
|
|
|
|
Returns
|
|
-------
|
|
list[pd.core.indexes.range.RangeIndex]
|
|
a list of all the fields present in the datastream of the specified file
|
|
"""
|
|
file = getattr(self, name)
|
|
if not file:
|
|
return None
|
|
return list(file.data.columns)
|
|
|
|
def export(self, path = None, dirname = ""):
|
|
"""
|
|
Save this object's data to files as specified by GTFS.
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
parent directory where to save the files, defaults to the objects folder_path property
|
|
dirname : str
|
|
If specified, subdirectory to create or use inside path. Default behaviour is to save directly to path.
|
|
"""
|
|
if not path:
|
|
path = self.folder_path
|
|
else:
|
|
path = f"{os.path.normpath(path)}/{dirname}"
|
|
if not os.path.exists(path):
|
|
os.mkdir(path)
|
|
for name in self.get_files():
|
|
df = getattr(self, name).data
|
|
fpath = f"{path}/{name}.txt"
|
|
if name == 'locations_geojson':
|
|
fpath = f"{path}/{name}.geojson"
|
|
df.to_json(fpath)
|
|
else:
|
|
df.to_csv(fpath, index=False)
|
|
|
|
def validate(self):
|
|
"""
|
|
Check this object's data for compliance with the GTFS reference. Resets self.errors and stores human readable error messages to it.
|
|
|
|
Returns
|
|
-------
|
|
list[str]
|
|
List of human readable error messages, also saved to self.errors, if any, else None.
|
|
"""
|
|
self.error = []
|
|
if not self.agency.data.empty:
|
|
self.validate_agency()
|
|
if not self.stops.data.empty:
|
|
self.validate_stops()
|
|
if not self.routes.data.empty:
|
|
self.validate_routes()
|
|
if not self.trips.data.empty:
|
|
self.validate_trips()
|
|
if not self.stop_times.data.empty:
|
|
self.validate_stop_times()
|
|
if not self.calendar.data.empty:
|
|
self.validate_calendar()
|
|
if not self.calendar_dates.data.empty:
|
|
self.validate_calendar_dates()
|
|
if not self.fare_attributes.data.empty:
|
|
self.validate_fare_attributes()
|
|
if not self.fare_rules.data.empty:
|
|
self.validate_fare_rules()
|
|
if not self.timeframes.data.empty:
|
|
self.validate_timeframes()
|
|
if not self.fare_media.data.empty:
|
|
self.validate_fare_media()
|
|
if not self.fare_products.data.empty:
|
|
self.validate_fare_products()
|
|
if not self.fare_leg_rules.data.empty:
|
|
self.validate_fare_leg_rules()
|
|
if not self.fare_transfer_rules.data.empty:
|
|
self.validate_fare_transfer_rules()
|
|
if not self.areas.data.empty:
|
|
self.validate_areas()
|
|
if not self.stop_areas.data.empty:
|
|
self.validate_stop_areas()
|
|
if not self.networks.data.empty:
|
|
self.validate_networks()
|
|
if not self.route_networks.data.empty:
|
|
self.validate_route_networks()
|
|
if not self.shapes.data.empty:
|
|
self.validate_shapes()
|
|
if not self.frequencies.data.empty:
|
|
self.validate_frequencies()
|
|
if not self.transfers.data.empty:
|
|
self.validate_transfers()
|
|
if not self.pathways.data.empty:
|
|
self.validate_pathways()
|
|
if not self.levels.data.empty:
|
|
self.validate_levels()
|
|
if not self.location_groups.data.empty:
|
|
self.validate_location_groups()
|
|
if not self.location_group_stops.data.empty:
|
|
self.validate_location_group_stops()
|
|
if not self.locations_geojson.data.empty:
|
|
self.validate_locations_geojson()
|
|
if not self.booking_rules.data.empty:
|
|
self.validate_booking_rules()
|
|
if not self.translations.data.empty:
|
|
self.validate_translations()
|
|
if not self.feed_info.data.empty:
|
|
self.validate_feed_info()
|
|
if not self.attributions.data.empty:
|
|
self.validate_attributions()
|
|
|
|
if not self.errors:
|
|
return None
|
|
else:
|
|
return self.errors
|
|
|
|
def validate_agency(self):
|
|
"""
|
|
Check Agency object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["agency_name", "agency_url", "agency_timezone"]
|
|
optional_fields = ["agency_id", "agency_lang", "agency_phone", "agency_fare_url", "agency_email"]
|
|
self.validate_required_fields(self.agency.data, required_fields, "agency.txt")
|
|
self.validate_optional_fields(self.agency.data, optional_fields, "agency.txt")
|
|
|
|
def validate_stops(self):
|
|
"""
|
|
Check Stops object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["stop_id", "stop_name"]
|
|
optional_fields = ["stop_code", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url",
|
|
"location_type", "parent_station", "stop_timezone", "wheelchair_boarding",
|
|
"level_id", "platform_code"]
|
|
self.validate_required_fields(self.stops.data, required_fields, "stops.txt")
|
|
self.validate_optional_fields(self.stops.data, optional_fields, "stops.txt")
|
|
self.validate_lat_lon(self.stops.data)
|
|
|
|
def validate_routes(self):
|
|
"""
|
|
Check Routes object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"]
|
|
optional_fields = ["agency_id", "route_desc", "route_url", "route_color", "route_text_color",
|
|
"route_sort_order", "continuous_pickup", "continuous_drop_off"]
|
|
self.validate_required_fields(self.routes.data, required_fields, "routes.txt")
|
|
self.validate_optional_fields(self.routes.data, optional_fields, "routes.txt")
|
|
|
|
def validate_trips(self):
|
|
"""
|
|
Check Trips object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["route_id", "service_id", "trip_id"]
|
|
optional_fields = ["trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id",
|
|
"wheelchair_accessible", "bikes_allowed"]
|
|
self.validate_required_fields(self.trips.data, required_fields, "trips.txt")
|
|
self.validate_optional_fields(self.trips.data, optional_fields, "trips.txt")
|
|
|
|
def validate_stop_times(self):
|
|
"""
|
|
Check StopTimes object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"]
|
|
optional_fields = ["stop_headsign", "pickup_type", "drop_off_type", "shape_dist_traveled",
|
|
"timepoint"]
|
|
self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt")
|
|
self.validate_optional_fields(self.stop_times.data, optional_fields, "stop_times.txt")
|
|
|
|
def validate_calendar(self):
|
|
"""
|
|
Check Calendar object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday",
|
|
"saturday", "sunday", "start_date", "end_date"]
|
|
self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt")
|
|
|
|
def validate_calendar_dates(self):
|
|
"""
|
|
Check CalendarDates object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["service_id", "date", "exception_type"]
|
|
self.validate_required_fields(self.calendar_dates.data, required_fields, "calendar_dates.txt")
|
|
|
|
def validate_fare_attributes(self):
|
|
"""
|
|
Check FareAttributes object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["fare_id", "price", "currency_type", "payment_method", "transfers"]
|
|
optional_fields = ["agency_id", "transfer_duration"]
|
|
self.validate_required_fields(self.fare_attributes.data, required_fields, "fare_attributes.txt")
|
|
self.validate_optional_fields(self.fare_attributes.data, optional_fields, "fare_attributes.txt")
|
|
|
|
def validate_fare_rules(self):
|
|
"""
|
|
Check FareRules object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["fare_id"]
|
|
optional_fields = ["route_id", "origin_id", "destination_id", "contains_id"]
|
|
self.validate_required_fields(self.fare_rules.data, required_fields, "fare_rules.txt")
|
|
self.validate_optional_fields(self.fare_rules.data, optional_fields, "fare_rules.txt")
|
|
|
|
def validate_timeframes(self):
|
|
"""
|
|
Check Timeframes object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["timeframe_id", "start_time", "end_time"]
|
|
optional_fields = ["timeframe_name", "timeframe_desc"]
|
|
self.validate_required_fields(self.timeframes.data, required_fields, "timeframes.txt")
|
|
self.validate_optional_fields(self.timeframes.data, optional_fields, "timeframes.txt")
|
|
|
|
def validate_fare_media(self):
|
|
"""
|
|
Check FareMedia object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["media_id", "media_name", "media_type"]
|
|
optional_fields = ["media_desc"]
|
|
self.validate_required_fields(self.fare_media.data, required_fields, "fare_media.txt")
|
|
self.validate_optional_fields(self.fare_media.data, optional_fields, "fare_media.txt")
|
|
|
|
def validate_fare_products(self):
|
|
"""
|
|
Check FareProducts object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["product_id", "product_name", "product_type", "product_price", "currency"]
|
|
optional_fields = ["product_desc"]
|
|
self.validate_required_fields(self.fare_products.data, required_fields, "fare_products.txt")
|
|
self.validate_optional_fields(self.fare_products.data, optional_fields, "fare_products.txt")
|
|
|
|
def validate_fare_leg_rules(self):
|
|
"""
|
|
Check FareLegRules object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["leg_id", "from_stop_id", "to_stop_id"]
|
|
optional_fields = ["leg_desc"]
|
|
self.validate_required_fields(self.fare_leg_rules.data, required_fields, "fare_leg_rules.txt")
|
|
self.validate_optional_fields(self.fare_leg_rules.data, optional_fields, "fare_leg_rules.txt")
|
|
|
|
def validate_fare_transfer_rules(self):
|
|
"""
|
|
Check FareTransferRules object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["from_leg_id", "to_leg_id", "transfer_type"]
|
|
optional_fields = ["transfer_time"]
|
|
self.validate_required_fields(self.fare_transfer_rules.data, required_fields, "fare_transfer_rules.txt")
|
|
self.validate_optional_fields(self.fare_transfer_rules.data, optional_fields, "fare_transfer_rules.txt")
|
|
|
|
def validate_areas(self):
|
|
"""
|
|
Check Areas object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["area_id", "area_name"]
|
|
optional_fields = ["area_desc"]
|
|
self.validate_required_fields(self.areas.data, required_fields, "areas.txt")
|
|
self.validate_optional_fields(self.areas.data, optional_fields, "areas.txt")
|
|
|
|
def validate_stop_areas(self):
|
|
"""
|
|
Check StopAreas object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["stop_id", "area_id"]
|
|
optional_fields = []
|
|
self.validate_required_fields(self.stop_areas.data, required_fields, "stop_areas.txt")
|
|
self.validate_optional_fields(self.stop_areas.data, optional_fields, "stop_areas.txt")
|
|
|
|
def validate_networks(self):
|
|
"""
|
|
Check Networks object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["network_id", "network_name"]
|
|
optional_fields = ["network_desc"]
|
|
self.validate_required_fields(self.networks.data, required_fields, "networks.txt")
|
|
self.validate_optional_fields(self.networks.data, optional_fields, "networks.txt")
|
|
|
|
def validate_route_networks(self):
|
|
"""
|
|
Check RouteNetworks object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["route_id", "network_id"]
|
|
optional_fields = []
|
|
self.validate_required_fields(self.route_networks.data, required_fields, "route_networks.txt")
|
|
self.validate_optional_fields(self.route_networks.data, optional_fields, "route_networks.txt")
|
|
|
|
def validate_shapes(self):
|
|
"""
|
|
Check Shapes object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"]
|
|
optional_fields = ["shape_dist_traveled"]
|
|
self.validate_required_fields(self.shapes.data, required_fields, "shapes.txt")
|
|
self.validate_optional_fields(self.shapes.data, optional_fields, "shapes.txt")
|
|
|
|
def validate_frequencies(self):
|
|
"""
|
|
Check Frequencies object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["trip_id", "start_time", "end_time", "headway_secs"]
|
|
optional_fields = ["exact_times"]
|
|
self.validate_required_fields(self.frequencies.data, required_fields, "frequencies.txt")
|
|
self.validate_optional_fields(self.frequencies.data, optional_fields, "frequencies.txt")
|
|
|
|
def validate_transfers(self):
|
|
"""
|
|
Check Transfers object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["from_stop_id", "to_stop_id", "transfer_type"]
|
|
optional_fields = ["min_transfer_time"]
|
|
self.validate_required_fields(self.transfers.data, required_fields, "transfers.txt")
|
|
self.validate_optional_fields(self.transfers.data, optional_fields, "transfers.txt")
|
|
|
|
def validate_pathways(self):
|
|
"""
|
|
Check Pathways object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["pathway_id", "from_stop_id", "to_stop_id", "pathway_mode", "is_bidirectional"]
|
|
optional_fields = ["length", "traversal_time", "stair_count", "max_slope", "min_width", "signposted_as", "reversed_signposted_as"]
|
|
self.validate_required_fields(self.pathways.data, required_fields, "pathways.txt")
|
|
self.validate_optional_fields(self.pathways.data, optional_fields, "pathways.txt")
|
|
|
|
def validate_levels(self):
|
|
"""
|
|
Check Levels object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["level_id", "level_index"]
|
|
optional_fields = ["level_name"]
|
|
self.validate_required_fields(self.levels.data, required_fields, "levels.txt")
|
|
self.validate_optional_fields(self.levels.data, optional_fields, "levels.txt")
|
|
|
|
def validate_location_groups(self):
|
|
"""
|
|
Check Agency LocationGroups for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["location_group_id", "location_group_name"]
|
|
optional_fields = ["location_group_desc"]
|
|
self.validate_required_fields(self.location_groups.data, required_fields, "location_groups.txt")
|
|
self.validate_optional_fields(self.location_groups.data, optional_fields, "location_groups.txt")
|
|
|
|
def validate_location_group_stops(self):
|
|
"""
|
|
Check LocationGroupStops object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["location_group_id", "stop_id"]
|
|
optional_fields = []
|
|
self.validate_required_fields(self.location_group_stops.data, required_fields, "location_group_stops.txt")
|
|
self.validate_optional_fields(self.location_group_stops.data, optional_fields, "location_group_stops.txt")
|
|
|
|
def validate_locations_geojson(self):
|
|
"""
|
|
Check LocationsGeojson object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["type", "features"]
|
|
optional_fields = []
|
|
self.validate_required_fields(self.locations_geojson.data, required_fields, "locations.geojson")
|
|
self.validate_optional_fields(self.locations_geojson.data, optional_fields, "locations.geojson")
|
|
|
|
def validate_booking_rules(self):
|
|
"""
|
|
Check BookingRules object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["booking_rule_id"]
|
|
optional_fields = ["booking_rule_name", "booking_rule_desc"]
|
|
self.validate_required_fields(self.booking_rules.data, required_fields, "booking_rules.txt")
|
|
self.validate_optional_fields(self.booking_rules.data, optional_fields, "booking_rules.txt")
|
|
|
|
def validate_translations(self):
|
|
"""
|
|
Check Translations object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["table_name", "field_name", "language", "translation"]
|
|
optional_fields = ["record_id", "record_sub_id", "field_value"]
|
|
self.validate_required_fields(self.translations.data, required_fields, "translations.txt")
|
|
self.validate_optional_fields(self.translations.data, optional_fields, "translations.txt")
|
|
|
|
def validate_feed_info(self):
|
|
"""
|
|
Check FeedInfo object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["feed_publisher_name", "feed_publisher_url", "feed_lang"]
|
|
optional_fields = ["feed_start_date", "feed_end_date", "feed_version"]
|
|
self.validate_required_fields(self.feed_info.data, required_fields, "feed_info.txt")
|
|
self.validate_optional_fields(self.feed_info.data, optional_fields, "feed_info.txt")
|
|
|
|
def validate_attributions(self):
|
|
"""
|
|
Check Attributions object for compliance with the GTFS reference.
|
|
"""
|
|
required_fields = ["attribution_id"]
|
|
optional_fields = ["agency_id", "route_id", "trip_id", "organization_name", "is_producer", "is_operator", "is_authority", "attribution_url", "attribution_email", "attribution_phone"]
|
|
self.validate_required_fields(self.attributions.data, required_fields, "attributions.txt")
|
|
self.validate_optional_fields(self.attributions.data, optional_fields, "attributions.txt")
|
|
|
|
def validate_required_fields(self, df, required_fields, file_name):
|
|
"""
|
|
Check if a DataFrame contains all required fields according to the GTFS reference for the file it represents.
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
DataFrame to be checked
|
|
required_fields : list[str]
|
|
list of field names to check for inclusion in df
|
|
file_name : str
|
|
Name to be printed in error message if validation fails
|
|
"""
|
|
missing_fields = set(required_fields) - set(df.columns)
|
|
if missing_fields:
|
|
self.errors.append(f"{file_name} is missing required fields: {missing_fields}")
|
|
|
|
def validate_optional_fields(self, df, optional_fields, file_name):
|
|
"""
|
|
Check if DataFrame contains fields not specified in the GTFS Reference.
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
DataFrame to be checked
|
|
optional_fields: list[str]
|
|
list of field names allowed in df
|
|
file_name : str
|
|
Name to be printed in error message if validation fails
|
|
|
|
"""
|
|
unexpected_fields = set(df.columns) - set(optional_fields) - set(df.columns)
|
|
if unexpected_fields:
|
|
self.errors.append(f"{file_name} has unexpected fields: {unexpected_fields}")
|
|
|
|
def validate_lat_lon(self, df, file_name="stops.txt", prefix="stop"):
|
|
"""
|
|
Check if a DataFrame contains either no coordinates of a given prefix or both latitude and longitude.
|
|
|
|
Parameters
|
|
----------
|
|
df : pd.DataFrame
|
|
DataFrame to be checked
|
|
file_name : str
|
|
Name to be printed in error message if validation fails
|
|
prefix : str
|
|
Prefix to be used for coordinate fileds. Expands to {prefix}_lat and {prefix}_lon
|
|
"""
|
|
if f"{prefix}_lat" in df.columns and f"{prefix}_lon" in df.columns:
|
|
if df[[f"{prefix}_lat", f"{prefix}_lon"]].isnull().any().any():
|
|
self.errors.append(f"{file_name} has missing lat/lon values.")
|
|
|