diff --git a/TODO.md b/TODO.md index fdb57c7..5ea030b 100644 --- a/TODO.md +++ b/TODO.md @@ -6,10 +6,8 @@ - Add TODOs ## Backend -- Process GTFS files for im- and export - Serve data to views in an intuitive way. As an object of a custom class? - Fetch data to serve to views - Write data received from views - Implement views serve data to the templates - Handle requests corrrectly in views and urls -- Convert the data object <-> Pandas DataFrame diff --git a/transport_accessibility/pt_map/bridge.py b/transport_accessibility/pt_map/bridge.py index 8c2ee85..372105f 100644 --- a/transport_accessibility/pt_map/bridge.py +++ b/transport_accessibility/pt_map/bridge.py @@ -252,7 +252,7 @@ gtfs_schema = { def to_camel_case(s: str): return ''.join(word.capitalize() for word in s.split('_')) -def standardize_time(time_str): +def standardize_time(time_str: str): date_str = f"Jan 19, 1999 {time_str}" ntuple=email.utils.parsedate(date_str) timestamp=time.mktime(ntuple) @@ -263,7 +263,7 @@ def standardize_time(time_str): def is_NaN(v): return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v)) -def stdz(v, m, f): +def stdz(v, m: django.db.models.Model, f: str): if m._meta.get_field(f).get_internal_type() == 'DateField': return str(v) if m._meta.get_field(f).get_internal_type() == 'TimeField': @@ -276,9 +276,6 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS): name = to_camel_case(singularize(k)) m = getattr(pt_map.models, name) df = getattr(g, k).data - print("\n\n\n\n") - print(name) - print("#############################################################################################") if not df.empty: for _, row in df.iterrows(): defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} @@ -288,4 +285,42 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS): **kw_args, ) +reversed_file_mapping = { + "Agency": "agency", + "Stop": "stops", + "Route": "routes", + "Trip": "trips", + "StopTime": "stop_times", + "Calendar": "calendar", + "CalendarDate": "calendar_dates", + "FareAttribute": "fare_attributes", + "FareRule": "fare_rules", + "Timeframe": "timeframes", + "FareMedium": "fare_media", + "FareProduct": "fare_products", + "FareLegRule": "fare_leg_rules", + "FareTransferRule": "fare_transfer_rules", + "Area": "areas", + "StopArea": "stop_areas", + "Network": "networks", + "RouteNetwork": "route_networks", + "Shape": "shapes", + "Frequency": "frequencies", + "Transfer": "transfers", + "Pathway": "pathways", + "Level": "levels", + "LocationGroup": "location_groups", + "LocationGroupStop": "location_group_stops", + "LocationsGeojson": "locations.geojson", + "BookingRule": "booking_rules", + "Translation": "translations", + "FeedInfo": "feed_info", + "Attribution": "attributions" +} + +def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str): + dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q} + g = pt_map.gtfs.GTFS(folder_path, dfs) + g.validate() + return g diff --git a/transport_accessibility/pt_map/gtfs.py b/transport_accessibility/pt_map/gtfs.py index 99f573c..9ffc35d 100644 --- a/transport_accessibility/pt_map/gtfs.py +++ b/transport_accessibility/pt_map/gtfs.py @@ -2,153 +2,157 @@ import pandas as pd import os class GTFS: - def __init__(self, folder_path): + def __init__(self, folder_path: str, dfs: list[list] = None): self.folder_path = folder_path - self.agency = self.Agency(self.folder_path) - self.stops = self.Stops(self.folder_path) - self.routes = self.Routes(self.folder_path) - self.trips = self.Trips(self.folder_path) - self.stop_times = self.StopTimes(self.folder_path) - self.calendar = self.Calendar(self.folder_path) - self.calendar_dates = self.CalendarDates(self.folder_path) - self.fare_attributes = self.FareAttributes(self.folder_path) - self.fare_rules = self.FareRules(self.folder_path) - self.timeframes = self.Timeframes(self.folder_path) - self.fare_media = self.FareMedia(self.folder_path) - self.fare_products = self.FareProducts(self.folder_path) - self.fare_leg_rules = self.FareLegRules(self.folder_path) - self.fare_transfer_rules = self.FareTransferRules(self.folder_path) - self.areas = self.Areas(self.folder_path) - self.stop_areas = self.StopAreas(self.folder_path) - self.networks = self.Networks(self.folder_path) - self.route_networks = self.RouteNetworks(self.folder_path) - self.shapes = self.Shapes(self.folder_path) - self.frequencies = self.Frequencies(self.folder_path) - self.transfers = self.Transfers(self.folder_path) - self.pathways = self.Pathways(self.folder_path) - self.levels = self.Levels(self.folder_path) - self.location_groups = self.LocationGroups(self.folder_path) - self.location_group_stops = self.LocationGroupStops(self.folder_path) - self.locations_geojson = self.LocationsGeojson(self.folder_path) - self.booking_rules = self.BookingRules(self.folder_path) - self.translations = self.Translations(self.folder_path) - self.feed_info = self.FeedInfo(self.folder_path) - self.attributions = self.Attributions(self.folder_path) + self.agency = self.Agency(self.folder_path, dfs) + self.stops = self.Stops(self.folder_path, dfs) + self.routes = self.Routes(self.folder_path, dfs) + self.trips = self.Trips(self.folder_path, dfs) + self.stop_times = self.StopTimes(self.folder_path, dfs) + self.calendar = self.Calendar(self.folder_path, dfs) + self.calendar_dates = self.CalendarDates(self.folder_path, dfs) + self.fare_attributes = self.FareAttributes(self.folder_path, dfs) + self.fare_rules = self.FareRules(self.folder_path, dfs) + self.timeframes = self.Timeframes(self.folder_path, dfs) + self.fare_media = self.FareMedia(self.folder_path, dfs) + self.fare_products = self.FareProducts(self.folder_path, dfs) + self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs) + self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs) + self.areas = self.Areas(self.folder_path, dfs) + self.stop_areas = self.StopAreas(self.folder_path, dfs) + self.networks = self.Networks(self.folder_path, dfs) + self.route_networks = self.RouteNetworks(self.folder_path, dfs) + self.shapes = self.Shapes(self.folder_path, dfs) + self.frequencies = self.Frequencies(self.folder_path, dfs) + self.transfers = self.Transfers(self.folder_path, dfs) + self.pathways = self.Pathways(self.folder_path, dfs) + self.levels = self.Levels(self.folder_path, dfs) + self.location_groups = self.LocationGroups(self.folder_path, dfs) + self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs) + self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs) + self.booking_rules = self.BookingRules(self.folder_path, dfs) + self.translations = self.Translations(self.folder_path, dfs) + self.feed_info = self.FeedInfo(self.folder_path, dfs) + self.attributions = self.Attributions(self.folder_path, dfs) self.errors = [] class GTFSFile: - def __init__(self, folder_path, file_name): + def __init__(self, folder_path, file_name, dfs): + self.file_name = file_name self.file_path = f"{folder_path}/{file_name}.txt" - self.data = self.load_data() + self.data = self.load_data(dfs) - def load_data(self): - try: - return pd.read_csv(self.file_path) - except FileNotFoundError: - return pd.DataFrame() + def load_data(self, dfs): + if dfs: + return dfs[self.file_name] + else: + try: + return pd.read_csv(self.file_path) + except FileNotFoundError: + return pd.DataFrame() class Agency(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'agency') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'agency', dfs) class Stops(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'stops') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'stops', dfs) class Routes(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'routes') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'routes', dfs) class Trips(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'trips') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'trips', dfs) class StopTimes(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'stop_times') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'stop_times', dfs) class Calendar(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'calendar') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'calendar', dfs) class CalendarDates(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'calendar_dates') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'calendar_dates', dfs) class FareAttributes(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'fare_attributes') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'fare_attributes', dfs) class FareRules(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'fare_rules') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'fare_rules', dfs) class Timeframes(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'timeframes') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'timeframes', dfs) class FareMedia(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'fare_media') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'fare_media', dfs) class FareProducts(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'fare_products') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'fare_products', dfs) class FareLegRules(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'fare_leg_rules') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'fare_leg_rules', dfs) class FareTransferRules(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'fare_transfer_rules') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'fare_transfer_rules', dfs) class Areas(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'areas') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'areas', dfs) class StopAreas(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'stop_areas') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'stop_areas', dfs) class Networks(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'networks') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'networks', dfs) class RouteNetworks(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'route_networks') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'route_networks', dfs) class Shapes(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'shapes') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'shapes', dfs) class Frequencies(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'frequencies') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'frequencies', dfs) class Transfers(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'transfers') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'transfers', dfs) class Pathways(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'pathways') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'pathways', dfs) class Levels(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'levels') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'levels', dfs) class LocationGroups(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'location_groups') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'location_groups', dfs) class LocationGroupStops(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'location_group_stops') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'location_group_stops', dfs) class LocationsGeojson(GTFSFile): - def __init__(self, folder_path): + def __init__(self, folder_path, dfs): self.file_path = f"{folder_path}/locations.geojson" if os.path.exists(self.file_path): self.data = self.load_data() @@ -162,20 +166,20 @@ class GTFS: return pd.DataFrame() class BookingRules(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'booking_rules') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'booking_rules', dfs) class Translations(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'translations') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'translations', dfs) class FeedInfo(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'feed_info') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'feed_info', dfs) class Attributions(GTFSFile): - def __init__(self, folder_path): - super().__init__(folder_path, 'attributions') + def __init__(self, folder_path, dfs): + super().__init__(folder_path, 'attributions', dfs) def get_files(self): return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)] @@ -200,37 +204,66 @@ class GTFS: df.to_csv(fpath, index=False) def validate(self): - self.validate_agency() - self.validate_stops() - self.validate_routes() - self.validate_trips() - self.validate_stop_times() - self.validate_calendar() - self.validate_calendar_dates() - self.validate_fare_attributes() - self.validate_fare_rules() - self.validate_timeframes() - self.validate_fare_media() - self.validate_fare_products() - self.validate_fare_leg_rules() - self.validate_fare_transfer_rules() - self.validate_areas() - self.validate_stop_areas() - self.validate_networks() - self.validate_route_networks() - self.validate_shapes() - self.validate_frequencies() - self.validate_transfers() - self.validate_pathways() - self.validate_levels() - self.validate_location_groups() - self.validate_location_group_stops() - self.validate_locations_geojson() - self.validate_booking_rules() - self.validate_translations() - self.validate_feed_info() - self.validate_attributions() - self.validate_cross_references() + if not self.agency.data.empty: + self.validate_agency() + if not self.stops.data.empty: + self.validate_stops() + if not self.routes.data.empty: + self.validate_routes() + if not self.trips.data.empty: + self.validate_trips() + if not self.stop_times.data.empty: + self.validate_stop_times() + if not self.calendar.data.empty: + self.validate_calendar() + if not self.calendar_dates.data.empty: + self.validate_calendar_dates() + if not self.fare_attributes.data.empty: + self.validate_fare_attributes() + if not self.fare_rules.data.empty: + self.validate_fare_rules() + if not self.timeframes.data.empty: + self.validate_timeframes() + if not self.fare_media.data.empty: + self.validate_fare_media() + if not self.fare_products.data.empty: + self.validate_fare_products() + if not self.fare_leg_rules.data.empty: + self.validate_fare_leg_rules() + if not self.fare_transfer_rules.data.empty: + self.validate_fare_transfer_rules() + if not self.areas.data.empty: + self.validate_areas() + if not self.stop_areas.data.empty: + self.validate_stop_areas() + if not self.networks.data.empty: + self.validate_networks() + if not self.route_networks.data.empty: + self.validate_route_networks() + if not self.shapes.data.empty: + self.validate_shapes() + if not self.frequencies.data.empty: + self.validate_frequencies() + if not self.transfers.data.empty: + self.validate_transfers() + if not self.pathways.data.empty: + self.validate_pathways() + if not self.levels.data.empty: + self.validate_levels() + if not self.location_groups.data.empty: + self.validate_location_groups() + if not self.location_group_stops.data.empty: + self.validate_location_group_stops() + if not self.locations_geojson.data.empty: + self.validate_locations_geojson() + if not self.booking_rules.data.empty: + self.validate_booking_rules() + if not self.translations.data.empty: + self.validate_translations() + if not self.feed_info.data.empty: + self.validate_feed_info() + if not self.attributions.data.empty: + self.validate_attributions() if not self.errors: return None @@ -435,10 +468,3 @@ class GTFS: if df[['stop_lat', 'stop_lon']].isnull().any().any(): self.errors.append(f"stops.txt has missing lat/lon values.") - def validate_cross_references(self): - # Example: Validate that all stop_ids in stop_times.txt exist in stops.txt - if not self.stop_times.data.empty and not self.stops.data.empty: - invalid_stops = set(self.stop_times.data['stop_id']) - set(self.stops.data['stop_id']) - if invalid_stops: - self.errors.append(f"stop_times.txt has invalid stop_ids: {invalid_stops}") -