From 9f7c36009676c69715f597c0c652a9445114aed1442429d8d6729c3a28f91794 Mon Sep 17 00:00:00 2001 From: Johannes Randerath Date: Tue, 11 Jun 2024 22:16:45 +0200 Subject: [PATCH] Refactored --- requirements.txt | 2 + transport_accessibility/pt_map/bridge.py | 303 +++--------------- transport_accessibility/pt_map/gtfs_schema.py | 8 + transport_accessibility/pt_map/views.py | 26 +- 4 files changed, 77 insertions(+), 262 deletions(-) diff --git a/requirements.txt b/requirements.txt index bfea8cc..c367d11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,6 +28,7 @@ jaraco.collections==5.0.1 jaraco.context==5.3.0 jaraco.functools==4.0.1 jaraco.text==3.12.0 +jedi==0.19.1 Jinja2==3.1.4 joblib==1.4.2 lxml==5.2.2 @@ -40,6 +41,7 @@ numpy==1.26.4 packaging==24.0 pandas==2.2.2 parsimonious==0.10.0 +parso==0.8.4 Pattern==3.6 pdfminer.six==20231228 platformdirs==4.2.2 diff --git a/transport_accessibility/pt_map/bridge.py b/transport_accessibility/pt_map/bridge.py index 220b0ce..1eeac0a 100644 --- a/transport_accessibility/pt_map/bridge.py +++ b/transport_accessibility/pt_map/bridge.py @@ -7,10 +7,16 @@ Contents -------- Constants --------- -gtfs_schema : dir{str,list[str]} - Maps GTFS file names (without filename extension) to fields described by the GTFS Reference reversed_file_mapping : dict(str,str) Map CamelCased filenames to '_'-separated +class_names : dict{str,str} + Map CamelCase, singularized class names to pluralized, snake_cased file names +primary_keys : dict{str, (str or None)} + For all pt_map.models, map primary keys if applicable +foreign_keys + For all pt_map.models, map foreign keys if any. Also ordered for model creation without foreign reference conflicts. +time_delta : int + Unix time for Jan 1, 2024. To be used to calculate time prefix strings. Functions --------- @@ -43,248 +49,10 @@ import time import datetime import django.db.models import time +from pt_map.gtfs_schema import gtfs_schema time_delta = int(datetime.datetime(2024,1,1).timestamp()) -gtfs_schema = { - "agency": [ - "agency_id", - "agency_name", - "agency_url", - "agency_timezone", - "agency_lang", - "agency_phone", - "agency_email", - "agency_fare_url" - ], - "stops": [ - "stop_id", - "stop_code", - "stop_name", - "stop_desc", - "stop_lat", - "stop_lon", - "zone_id", - "stop_url", - "location_type", - "parent_station", - "stop_timezone", - "wheelchair_boarding", - "level_id", - "platform_code" - ], - "routes": [ - "route_id", - "agency_id", - "route_short_name", - "route_long_name", - "route_desc", - "route_type", - "route_url", - "route_color", - "route_text_color", - "route_sort_order", - "continuous_pickup", - "continuous_drop_off" - ], - "trips": [ - "trip_id", - "route_id", - "service_id", - "trip_headsign", - "trip_short_name", - "direction_id", - "block_id", - "shape_id", - "wheelchair_accessible", - "bikes_allowed" - ], - "stop_times": [ - "trip_id", - "arrival_time", - "departure_time", - "stop_id", - "stop_sequence", - "stop_headsign", - "pickup_type", - "drop_off_type", - "shape_dist_traveled", - "timepoint" - ], - "calendar": [ - "service_id", - "monday", - "tuesday", - "wednesday", - "thursday", - "friday", - "saturday", - "sunday", - "start_date", - "end_date" - ], - "calendar_dates": [ - "service_id", - "date", - "exception_type" - ], - "fare_attributes": [ - "fare_id", - "price", - "currency_type", - "payment_method", - "transfers", - "transfer_duration" - ], - "fare_rules": [ - "fare_id", - "route_id", - "origin_id", - "destination_id", - "contains_id" - ], - "timeframes": [ - "timeframe_id", - "start_time", - "end_time", - "headway_sec", - "exact_times" - ], - "fare_media": [ - "media_id", - "agency_id", - "fare_id", - "seat_type", - "price" - ], - "fare_products": [ - "product_id", - "agency_id", - "product_type", - "fare_id", - "product_name", - "short_name", - "description", - "duration", - "transfers" - ], - "fare_leg_rules": [ - "fare_id", - "route_id", - "origin_id", - "destination_id", - "contains_id" - ], - "fare_transfer_rules": [ - "from_fare_id", - "to_fare_id", - "transfer_type", - "min_transfer_time" - ], - "areas": [ - "area_id", - "area_name", - "area_description" - ], - "stop_areas": [ - "stop_area_id", - "stop_id", - "area_id", - "location_type", - "parent_station", - "fare_zone_id" - ], - "networks": [ - "network_id", - "network_name", - "network_description" - ], - "route_networks": [ - "route_id", - "network_id" - ], - "shapes": [ - "shape_id", - "shape_pt_lat", - "shape_pt_lon", - "shape_pt_sequence", - "shape_dist_traveled" - ], - "frequencies": [ - "trip_id", - "start_time", - "end_time", - "headway_secs", - "exact_times" - ], - "transfers": [ - "from_stop_id", - "to_stop_id", - "transfer_type", - "min_transfer_time" - ], - "pathways": [ - "pathway_id", - "from_stop_id", - "to_stop_id", - "pathway_mode", - "is_bidirectional", - "length", - "traversal_time", - "stair_count", - "max_slope", - "min_width", - "signposted_as", - "reversed_signposted_as" - ], - "levels": [ - "level_id", - "level_index", - "level_name" - ], - "location_groups": [ - "location_group_id", - "location_group_name" - ], - "location_group_stops": [ - "location_group_id", - "stop_id" - ], - "locations_geojson": [ - "type", - "features" - ], - "booking_rules": [ - "rule_id", - "stop_id", - "rule_type", - "booking_url", - "admission_rules", - "admission_requirements" - ], - "translations": [ - "table_name", - "field_name", - "language", - "translation" - ], - "feed_info": [ - "feed_publisher_name", - "feed_publisher_url", - "feed_lang", - "default_lang", - "feed_start_date", - "feed_end_date", - "feed_version", - "feed_contact_email", - "feed_contact_url" - ], - "attributions": [ - "attribution_id", - "organization_name", - "is_producer" - ] -} primary_keys = { pt_map.models.Agency: "agency_id", @@ -432,6 +200,18 @@ def stdz(v, m: django.db.models.Model, f: str): return v def to_snake_case(name): + """ + Convert CamelCase to snake_case. + + Parameters + ---------- + name : str + str in CamelCase + + Returns + ------- + Str in snake_case + """ name = name[0].lower() + name[1:] for c in name[1:]: if c.isupper(): @@ -441,6 +221,20 @@ def to_snake_case(name): return name def unqfk(ts, fk): + """ + Primary keys of imported data and in the database are likely to overlap. To avoid this, the current time in seconds since Jan 1, 2024 is added as a prefix. + Foreign key references must know of this new key so they are processed in the same way. To make this possible, we use the same time in seconds for all objects. + + Parameters + ---------- + ts : str + time in seconds to be prepended + fk : primary or foreign key to be processed. + + Returns + ------- + Str with prefix + """ if not isinstance(fk, str): fk = str(int(fk)) return f"{ts}{fk}".strip() @@ -454,28 +248,23 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS): g : gtfs.GTFS GTFS object to be saved to db """ - ts = str(int(time.time())-time_delta) + ts = str(int(time.time())-time_delta) # Prepend the current time in seconds since Jan 1, 2024 to ids to make them more or less unique for model in foreign_keys: - if model[0] in [pt_map.models.Calendar, pt_map.models.CalendarDate, ]: - continue m = model[0] - df = getattr(g, class_names[m.__name__]).data - if not df.empty: - v = gtfs_schema[class_names[m.__name__]] - for _, row in df.iterrows(): - for fk in model[1]: + df = getattr(g, class_names[m.__name__]).data # Extract dataframe for each model from gtfs.GTFS object + if not df.empty: # Only process GTFS files actually present + v = gtfs_schema[class_names[m.__name__]] # field names + for _, row in df.iterrows(): # the rows of the dataframe are the individual entries in the GTFS file and should be the individual instances of the db model + for fk in model[1]: # Map foreign_keys to objects of the foreign model if row.get(fk[1]): - row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: unqfk(ts, row[fk[1]])}) - defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} - print(model[0]) - if model[0] == pt_map.models.StopTime: - print(row) + row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: unqfk(ts, row[fk[1]])}) + defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} # dict of fields and values of current model object to create if primary_keys[m]: - row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) + row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) # primary_keys should be unique, use current time in seconds as a prefix defaults[primary_keys[m]] = row[primary_keys[m]] try: - m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) - except m.DoesNotExist: + m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) # Make sure there is no object with identical primary_key, exception is expected to be risen + except m.DoesNotExist: m.objects.update_or_create( defaults = defaults, diff --git a/transport_accessibility/pt_map/gtfs_schema.py b/transport_accessibility/pt_map/gtfs_schema.py index 8c47487..3d13af1 100644 --- a/transport_accessibility/pt_map/gtfs_schema.py +++ b/transport_accessibility/pt_map/gtfs_schema.py @@ -1,4 +1,11 @@ +""" +Make gtfs_schema constant available to modules in package without circular imports. +Constants +--------- +gtfs_schema : dict{str,list[str]} + Maps GTFS file names (without filename extension) to fields described by the GTFS Reference +""" gtfs_schema = { "agency": [ "agency_id", @@ -238,3 +245,4 @@ gtfs_schema = { "is_producer" ] } + diff --git a/transport_accessibility/pt_map/views.py b/transport_accessibility/pt_map/views.py index 5135748..dd01a17 100644 --- a/transport_accessibility/pt_map/views.py +++ b/transport_accessibility/pt_map/views.py @@ -15,12 +15,28 @@ from .forms import * import json from datetime import datetime -def print_r(r, s): - if not len(r): - print(s) - return r - def get_timetable(r, trips, stop_sequences): + """ + Given a pt_map.models.Route, calculate the timetable for all its stops. + + Parameters + ---------- + r : pt_map.models.Route + Route, the timetable should be calculated for + trips : dict(str, list(pt_map.Trip)) + Dictionary mapping all trips to route_ids they travel on + stop_sequences : dict(str, list(str)) + Dict mapping route_ids to lists of stop_ids they serve. Currently the first trip is taken as reference for stops and sequence. + + Returns + ------- + dict{"stop_sequence": list(str), "stop_times": dict(str, list(str)} + Dict containing two elements: + "stop_sequence" : list(str) + list of stop_ids the route serves + "stop_times" : dict(str, list(str)) + dict mapping stop_ids from stop_sequence to time strings the route is serving the stop at + """ timetable = {"stop_sequence": stop_sequences[r.route_id]} sts = {} for stop in stop_sequences[r.route_id]: