import pt_map.gtfs import pt_map.models import pandas as pd from pattern.text.en import singularize import math import numbers import email.utils import time import datetime import django.db.models gtfs_schema = { "agency": [ "agency_id", "agency_name", "agency_url", "agency_timezone", "agency_lang", "agency_phone", "agency_email", "agency_fare_url" ], "stops": [ "stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding", "level_id", "platform_code" ], "routes": [ "route_id", "agency_id", "route_short_name", "route_long_name", "route_desc", "route_type", "route_url", "route_color", "route_text_color", "route_sort_order", "continuous_pickup", "continuous_drop_off" ], "trips": [ "trip_id", "route_id", "service_id", "trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", "wheelchair_accessible", "bikes_allowed" ], "stop_times": [ "trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", "pickup_type", "drop_off_type", "shape_dist_traveled", "timepoint" ], "calendar": [ "service_id", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "start_date", "end_date" ], "calendar_dates": [ "service_id", "date", "exception_type" ], "fare_attributes": [ "fare_id", "price", "currency_type", "payment_method", "transfers", "transfer_duration" ], "fare_rules": [ "fare_id", "route_id", "origin_id", "destination_id", "contains_id" ], "timeframes": [ "timeframe_id", "start_time", "end_time", "headway_sec", "exact_times" ], "fare_media": [ "media_id", "agency_id", "fare_id", "seat_type", "price" ], "fare_products": [ "product_id", "agency_id", "product_type", "fare_id", "product_name", "short_name", "description", "duration", "transfers" ], "fare_leg_rules": [ "fare_id", "route_id", "origin_id", "destination_id", "contains_id" ], "fare_transfer_rules": [ "from_fare_id", "to_fare_id", "transfer_type", "min_transfer_time" ], "areas": [ "area_id", "area_name", "area_description" ], "stop_areas": [ "stop_area_id", "stop_id", "area_id", "location_type", "parent_station", "fare_zone_id" ], "networks": [ "network_id", "network_name", "network_description" ], "route_networks": [ "route_id", "network_id" ], "shapes": [ "shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence", "shape_dist_traveled" ], "frequencies": [ "trip_id", "start_time", "end_time", "headway_secs", "exact_times" ], "transfers": [ "from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time" ], "pathways": [ "pathway_id", "from_stop_id", "to_stop_id", "pathway_mode", "is_bidirectional", "length", "traversal_time", "stair_count", "max_slope", "min_width", "signposted_as", "reversed_signposted_as" ], "levels": [ "level_id", "level_index", "level_name" ], "location_groups": [ "location_group_id", "location_group_name" ], "location_group_stops": [ "location_group_id", "stop_id" ], "locations_geojson": [ "type", "features" ], "booking_rules": [ "rule_id", "stop_id", "rule_type", "booking_url", "admission_rules", "admission_requirements" ], "translations": [ "table_name", "field_name", "language", "translation" ], "feed_info": [ "feed_publisher_name", "feed_publisher_url", "feed_lang", "default_lang", "feed_start_date", "feed_end_date", "feed_version", "feed_contact_email", "feed_contact_url" ], "attributions": [ "attribution_id", "organization_name", "is_producer" ] } def to_camel_case(s: str): return ''.join(word.capitalize() for word in s.split('_')) def standardize_time(time_str: str): date_str = f"Jan 19, 1999 {time_str}" ntuple=email.utils.parsedate(date_str) timestamp=time.mktime(ntuple) date=datetime.datetime.fromtimestamp(timestamp) return date.strftime('%H:%M:%S') def is_NaN(v): return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v)) def stdz(v, m: django.db.models.Model, f: str): if m._meta.get_field(f).get_internal_type() == 'DateField': return str(v) if m._meta.get_field(f).get_internal_type() == 'TimeField': return standardize_time(v) return v def gtfs_to_db(g: pt_map.gtfs.GTFS): for k,v in gtfs_schema.items(): name = to_camel_case(singularize(k)) m = getattr(pt_map.models, name) df = getattr(g, k).data if not df.empty: for _, row in df.iterrows(): defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} kw_args = {v[0]: row[v[0]]} m.objects.update_or_create( defaults = defaults, **kw_args, ) reversed_file_mapping = { "Agency": "agency", "Stop": "stops", "Route": "routes", "Trip": "trips", "StopTime": "stop_times", "Calendar": "calendar", "CalendarDate": "calendar_dates", "FareAttribute": "fare_attributes", "FareRule": "fare_rules", "Timeframe": "timeframes", "FareMedium": "fare_media", "FareProduct": "fare_products", "FareLegRule": "fare_leg_rules", "FareTransferRule": "fare_transfer_rules", "Area": "areas", "StopArea": "stop_areas", "Network": "networks", "RouteNetwork": "route_networks", "Shape": "shapes", "Frequency": "frequencies", "Transfer": "transfers", "Pathway": "pathways", "Level": "levels", "LocationGroup": "location_groups", "LocationGroupStop": "location_group_stops", "LocationsGeojson": "locations.geojson", "BookingRule": "booking_rules", "Translation": "translations", "FeedInfo": "feed_info", "Attribution": "attributions" } def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str): dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q} g = pt_map.gtfs.GTFS(folder_path, dfs) g.validate() return g