import pandas as pd import os class GTFS: def __init__(self, folder_path: str, dfs: list[list] = None): self.folder_path = folder_path self.agency = self.Agency(self.folder_path, dfs) self.stops = self.Stops(self.folder_path, dfs) self.routes = self.Routes(self.folder_path, dfs) self.trips = self.Trips(self.folder_path, dfs) self.stop_times = self.StopTimes(self.folder_path, dfs) self.calendar = self.Calendar(self.folder_path, dfs) self.calendar_dates = self.CalendarDates(self.folder_path, dfs) self.fare_attributes = self.FareAttributes(self.folder_path, dfs) self.fare_rules = self.FareRules(self.folder_path, dfs) self.timeframes = self.Timeframes(self.folder_path, dfs) self.fare_media = self.FareMedia(self.folder_path, dfs) self.fare_products = self.FareProducts(self.folder_path, dfs) self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs) self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs) self.areas = self.Areas(self.folder_path, dfs) self.stop_areas = self.StopAreas(self.folder_path, dfs) self.networks = self.Networks(self.folder_path, dfs) self.route_networks = self.RouteNetworks(self.folder_path, dfs) self.shapes = self.Shapes(self.folder_path, dfs) self.frequencies = self.Frequencies(self.folder_path, dfs) self.transfers = self.Transfers(self.folder_path, dfs) self.pathways = self.Pathways(self.folder_path, dfs) self.levels = self.Levels(self.folder_path, dfs) self.location_groups = self.LocationGroups(self.folder_path, dfs) self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs) self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs) self.booking_rules = self.BookingRules(self.folder_path, dfs) self.translations = self.Translations(self.folder_path, dfs) self.feed_info = self.FeedInfo(self.folder_path, dfs) self.attributions = self.Attributions(self.folder_path, dfs) self.errors = [] class GTFSFile: def __init__(self, folder_path, file_name, dfs): self.file_name = file_name self.file_path = f"{folder_path}/{file_name}.txt" self.data = self.load_data(dfs) def load_data(self, dfs): if dfs: return dfs[self.file_name] else: try: return pd.read_csv(self.file_path) except FileNotFoundError: return pd.DataFrame() class Agency(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'agency', dfs) class Stops(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'stops', dfs) class Routes(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'routes', dfs) class Trips(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'trips', dfs) class StopTimes(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'stop_times', dfs) class Calendar(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'calendar', dfs) class CalendarDates(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'calendar_dates', dfs) class FareAttributes(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'fare_attributes', dfs) class FareRules(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'fare_rules', dfs) class Timeframes(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'timeframes', dfs) class FareMedia(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'fare_media', dfs) class FareProducts(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'fare_products', dfs) class FareLegRules(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'fare_leg_rules', dfs) class FareTransferRules(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'fare_transfer_rules', dfs) class Areas(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'areas', dfs) class StopAreas(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'stop_areas', dfs) class Networks(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'networks', dfs) class RouteNetworks(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'route_networks', dfs) class Shapes(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'shapes', dfs) class Frequencies(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'frequencies', dfs) class Transfers(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'transfers', dfs) class Pathways(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'pathways', dfs) class Levels(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'levels', dfs) class LocationGroups(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'location_groups', dfs) class LocationGroupStops(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'location_group_stops', dfs) class LocationsGeojson(GTFSFile): def __init__(self, folder_path, dfs): self.file_path = f"{folder_path}/locations.geojson" if os.path.exists(self.file_path): self.data = self.load_data() else: self.data = pd.DataFrame() def load_data(self): try: return pd.read_json(self.file_path) except ValueError: return pd.DataFrame() class BookingRules(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'booking_rules', dfs) class Translations(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'translations', dfs) class FeedInfo(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'feed_info', dfs) class Attributions(GTFSFile): def __init__(self, folder_path, dfs): super().__init__(folder_path, 'attributions', dfs) def get_files(self): return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)] def get_fields(self, name): file = getattr(self, name) if not file: return None return list(file.data.columns) def export(self, path, dirname): path = f"{os.path.normpath(path)}/{dirname}" if not os.path.exists(path): os.mkdir(path) for name in self.get_files(): df = getattr(self, name).data fpath = f"{path}/{name}.txt" if name == 'locations_geojson': fpath = f"{path}/{name}.geojson" df.to_json(fpath) else: df.to_csv(fpath, index=False) def validate(self): if not self.agency.data.empty: self.validate_agency() if not self.stops.data.empty: self.validate_stops() if not self.routes.data.empty: self.validate_routes() if not self.trips.data.empty: self.validate_trips() if not self.stop_times.data.empty: self.validate_stop_times() if not self.calendar.data.empty: self.validate_calendar() if not self.calendar_dates.data.empty: self.validate_calendar_dates() if not self.fare_attributes.data.empty: self.validate_fare_attributes() if not self.fare_rules.data.empty: self.validate_fare_rules() if not self.timeframes.data.empty: self.validate_timeframes() if not self.fare_media.data.empty: self.validate_fare_media() if not self.fare_products.data.empty: self.validate_fare_products() if not self.fare_leg_rules.data.empty: self.validate_fare_leg_rules() if not self.fare_transfer_rules.data.empty: self.validate_fare_transfer_rules() if not self.areas.data.empty: self.validate_areas() if not self.stop_areas.data.empty: self.validate_stop_areas() if not self.networks.data.empty: self.validate_networks() if not self.route_networks.data.empty: self.validate_route_networks() if not self.shapes.data.empty: self.validate_shapes() if not self.frequencies.data.empty: self.validate_frequencies() if not self.transfers.data.empty: self.validate_transfers() if not self.pathways.data.empty: self.validate_pathways() if not self.levels.data.empty: self.validate_levels() if not self.location_groups.data.empty: self.validate_location_groups() if not self.location_group_stops.data.empty: self.validate_location_group_stops() if not self.locations_geojson.data.empty: self.validate_locations_geojson() if not self.booking_rules.data.empty: self.validate_booking_rules() if not self.translations.data.empty: self.validate_translations() if not self.feed_info.data.empty: self.validate_feed_info() if not self.attributions.data.empty: self.validate_attributions() if not self.errors: return None else: return self.errors def validate_agency(self): required_fields = ["agency_name", "agency_url", "agency_timezone"] optional_fields = ["agency_id", "agency_lang", "agency_phone", "agency_fare_url", "agency_email"] self.validate_required_fields(self.agency.data, required_fields, "agency.txt") self.validate_optional_fields(self.agency.data, optional_fields, "agency.txt") def validate_stops(self): required_fields = ["stop_id", "stop_name"] optional_fields = ["stop_code", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding", "level_id", "platform_code"] self.validate_required_fields(self.stops.data, required_fields, "stops.txt") self.validate_optional_fields(self.stops.data, optional_fields, "stops.txt") self.validate_lat_lon(self.stops.data) def validate_routes(self): required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"] optional_fields = ["agency_id", "route_desc", "route_url", "route_color", "route_text_color", "route_sort_order", "continuous_pickup", "continuous_drop_off"] self.validate_required_fields(self.routes.data, required_fields, "routes.txt") self.validate_optional_fields(self.routes.data, optional_fields, "routes.txt") def validate_trips(self): required_fields = ["route_id", "service_id", "trip_id"] optional_fields = ["trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed"] self.validate_required_fields(self.trips.data, required_fields, "trips.txt") self.validate_optional_fields(self.trips.data, optional_fields, "trips.txt") def validate_stop_times(self): required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"] optional_fields = ["stop_headsign", "pickup_type", "drop_off_type", "shape_dist_traveled", "timepoint"] self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt") self.validate_optional_fields(self.stop_times.data, optional_fields, "stop_times.txt") def validate_calendar(self): required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "start_date", "end_date"] self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt") def validate_calendar_dates(self): required_fields = ["service_id", "date", "exception_type"] self.validate_required_fields(self.calendar_dates.data, required_fields, "calendar_dates.txt") def validate_fare_attributes(self): required_fields = ["fare_id", "price", "currency_type", "payment_method", "transfers"] optional_fields = ["agency_id", "transfer_duration"] self.validate_required_fields(self.fare_attributes.data, required_fields, "fare_attributes.txt") self.validate_optional_fields(self.fare_attributes.data, optional_fields, "fare_attributes.txt") def validate_fare_rules(self): required_fields = ["fare_id"] optional_fields = ["route_id", "origin_id", "destination_id", "contains_id"] self.validate_required_fields(self.fare_rules.data, required_fields, "fare_rules.txt") self.validate_optional_fields(self.fare_rules.data, optional_fields, "fare_rules.txt") def validate_timeframes(self): required_fields = ["timeframe_id", "start_time", "end_time"] optional_fields = ["timeframe_name", "timeframe_desc"] self.validate_required_fields(self.timeframes.data, required_fields, "timeframes.txt") self.validate_optional_fields(self.timeframes.data, optional_fields, "timeframes.txt") def validate_fare_media(self): required_fields = ["media_id", "media_name", "media_type"] optional_fields = ["media_desc"] self.validate_required_fields(self.fare_media.data, required_fields, "fare_media.txt") self.validate_optional_fields(self.fare_media.data, optional_fields, "fare_media.txt") def validate_fare_products(self): required_fields = ["product_id", "product_name", "product_type", "product_price", "currency"] optional_fields = ["product_desc"] self.validate_required_fields(self.fare_products.data, required_fields, "fare_products.txt") self.validate_optional_fields(self.fare_products.data, optional_fields, "fare_products.txt") def validate_fare_leg_rules(self): required_fields = ["leg_id", "from_stop_id", "to_stop_id"] optional_fields = ["leg_desc"] self.validate_required_fields(self.fare_leg_rules.data, required_fields, "fare_leg_rules.txt") self.validate_optional_fields(self.fare_leg_rules.data, optional_fields, "fare_leg_rules.txt") def validate_fare_transfer_rules(self): required_fields = ["from_leg_id", "to_leg_id", "transfer_type"] optional_fields = ["transfer_time"] self.validate_required_fields(self.fare_transfer_rules.data, required_fields, "fare_transfer_rules.txt") self.validate_optional_fields(self.fare_transfer_rules.data, optional_fields, "fare_transfer_rules.txt") def validate_areas(self): required_fields = ["area_id", "area_name"] optional_fields = ["area_desc"] self.validate_required_fields(self.areas.data, required_fields, "areas.txt") self.validate_optional_fields(self.areas.data, optional_fields, "areas.txt") def validate_stop_areas(self): required_fields = ["stop_id", "area_id"] optional_fields = [] self.validate_required_fields(self.stop_areas.data, required_fields, "stop_areas.txt") self.validate_optional_fields(self.stop_areas.data, optional_fields, "stop_areas.txt") def validate_networks(self): required_fields = ["network_id", "network_name"] optional_fields = ["network_desc"] self.validate_required_fields(self.networks.data, required_fields, "networks.txt") self.validate_optional_fields(self.networks.data, optional_fields, "networks.txt") def validate_route_networks(self): required_fields = ["route_id", "network_id"] optional_fields = [] self.validate_required_fields(self.route_networks.data, required_fields, "route_networks.txt") self.validate_optional_fields(self.route_networks.data, optional_fields, "route_networks.txt") def validate_shapes(self): required_fields = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"] optional_fields = ["shape_dist_traveled"] self.validate_required_fields(self.shapes.data, required_fields, "shapes.txt") self.validate_optional_fields(self.shapes.data, optional_fields, "shapes.txt") def validate_frequencies(self): required_fields = ["trip_id", "start_time", "end_time", "headway_secs"] optional_fields = ["exact_times"] self.validate_required_fields(self.frequencies.data, required_fields, "frequencies.txt") self.validate_optional_fields(self.frequencies.data, optional_fields, "frequencies.txt") def validate_transfers(self): required_fields = ["from_stop_id", "to_stop_id", "transfer_type"] optional_fields = ["min_transfer_time"] self.validate_required_fields(self.transfers.data, required_fields, "transfers.txt") self.validate_optional_fields(self.transfers.data, optional_fields, "transfers.txt") def validate_pathways(self): required_fields = ["pathway_id", "from_stop_id", "to_stop_id", "pathway_mode", "is_bidirectional"] optional_fields = ["length", "traversal_time", "stair_count", "max_slope", "min_width", "signposted_as", "reversed_signposted_as"] self.validate_required_fields(self.pathways.data, required_fields, "pathways.txt") self.validate_optional_fields(self.pathways.data, optional_fields, "pathways.txt") def validate_levels(self): required_fields = ["level_id", "level_index"] optional_fields = ["level_name"] self.validate_required_fields(self.levels.data, required_fields, "levels.txt") self.validate_optional_fields(self.levels.data, optional_fields, "levels.txt") def validate_location_groups(self): required_fields = ["location_group_id", "location_group_name"] optional_fields = ["location_group_desc"] self.validate_required_fields(self.location_groups.data, required_fields, "location_groups.txt") self.validate_optional_fields(self.location_groups.data, optional_fields, "location_groups.txt") def validate_location_group_stops(self): required_fields = ["location_group_id", "stop_id"] optional_fields = [] self.validate_required_fields(self.location_group_stops.data, required_fields, "location_group_stops.txt") self.validate_optional_fields(self.location_group_stops.data, optional_fields, "location_group_stops.txt") def validate_locations_geojson(self): required_fields = ["type", "features"] optional_fields = [] self.validate_required_fields(self.locations_geojson.data, required_fields, "locations.geojson") self.validate_optional_fields(self.locations_geojson.data, optional_fields, "locations.geojson") def validate_booking_rules(self): required_fields = ["booking_rule_id"] optional_fields = ["booking_rule_name", "booking_rule_desc"] self.validate_required_fields(self.booking_rules.data, required_fields, "booking_rules.txt") self.validate_optional_fields(self.booking_rules.data, optional_fields, "booking_rules.txt") def validate_translations(self): required_fields = ["table_name", "field_name", "language", "translation"] optional_fields = ["record_id", "record_sub_id", "field_value"] self.validate_required_fields(self.translations.data, required_fields, "translations.txt") self.validate_optional_fields(self.translations.data, optional_fields, "translations.txt") def validate_feed_info(self): required_fields = ["feed_publisher_name", "feed_publisher_url", "feed_lang"] optional_fields = ["feed_start_date", "feed_end_date", "feed_version"] self.validate_required_fields(self.feed_info.data, required_fields, "feed_info.txt") self.validate_optional_fields(self.feed_info.data, optional_fields, "feed_info.txt") def validate_attributions(self): required_fields = ["attribution_id"] optional_fields = ["agency_id", "route_id", "trip_id", "organization_name", "is_producer", "is_operator", "is_authority", "attribution_url", "attribution_email", "attribution_phone"] self.validate_required_fields(self.attributions.data, required_fields, "attributions.txt") self.validate_optional_fields(self.attributions.data, optional_fields, "attributions.txt") def validate_required_fields(self, df, required_fields, file_name): missing_fields = set(required_fields) - set(df.columns) if missing_fields: self.errors.append(f"{file_name} is missing required fields: {missing_fields}") def validate_optional_fields(self, df, optional_fields, file_name): unexpected_fields = set(df.columns) - set(optional_fields) - set(df.columns) if unexpected_fields: self.errors.append(f"{file_name} has unexpected fields: {unexpected_fields}") def validate_lat_lon(self, df): if 'stop_lat' in df.columns and 'stop_lon' in df.columns: if df[['stop_lat', 'stop_lon']].isnull().any().any(): self.errors.append(f"stops.txt has missing lat/lon values.")