Refactored

2024-06-11 22:16:45 +02:00 · 2024-06-11 22:16:45 +02:00 · 9f7c360096
commit 9f7c360096
parent 1dd906a87f
4 changed files with 77 additions and 262 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -28,6 +28,7 @@ jaraco.collections==5.0.1
 jaraco.context==5.3.0
 jaraco.functools==4.0.1
 jaraco.text==3.12.0
 jedi==0.19.1
 Jinja2==3.1.4
 joblib==1.4.2
 lxml==5.2.2
@ -40,6 +41,7 @@ numpy==1.26.4
 packaging==24.0
 pandas==2.2.2
 parsimonious==0.10.0
 parso==0.8.4
 Pattern==3.6
 pdfminer.six==20231228
 platformdirs==4.2.2
--- a/transport_accessibility/pt_map/bridge.py
+++ b/transport_accessibility/pt_map/bridge.py
@ -7,10 +7,16 @@ Contents
 --------
 Constants
 ---------
 gtfs_schema : dir{str,list[str]}
    Maps GTFS file names (without filename extension) to fields described by the GTFS Reference
 reversed_file_mapping : dict(str,str)
    Map CamelCased filenames to '_'-separated
 class_names : dict{str,str}
    Map CamelCase, singularized class names to pluralized, snake_cased file names
 primary_keys : dict{str, (str or None)}
    For all pt_map.models, map primary keys if applicable
 foreign_keys
    For all pt_map.models, map foreign keys if any. Also ordered for model creation without foreign reference conflicts.
 time_delta : int
    Unix time for Jan 1, 2024. To be used to calculate time prefix strings.
 Functions
 ---------
@ -43,248 +49,10 @@ import time
 import datetime
 import django.db.models
 import time
 from pt_map.gtfs_schema import gtfs_schema
 time_delta = int(datetime.datetime(2024,1,1).timestamp())
 gtfs_schema = {
    "agency": [
        "agency_id",
        "agency_name",
        "agency_url",
        "agency_timezone",
        "agency_lang",
        "agency_phone",
        "agency_email",
        "agency_fare_url"
    ],
    "stops": [
        "stop_id",
        "stop_code",
        "stop_name",
        "stop_desc",
        "stop_lat",
        "stop_lon",
        "zone_id",
        "stop_url",
        "location_type",
        "parent_station",
        "stop_timezone",
        "wheelchair_boarding",
        "level_id",
        "platform_code"
    ],
    "routes": [
        "route_id",
        "agency_id",
        "route_short_name",
        "route_long_name",
        "route_desc",
        "route_type",
        "route_url",
        "route_color",
        "route_text_color",
        "route_sort_order",
        "continuous_pickup",
        "continuous_drop_off"
    ],
    "trips": [
        "trip_id",
        "route_id",
        "service_id",
        "trip_headsign",
        "trip_short_name",
        "direction_id",
        "block_id",
        "shape_id",
        "wheelchair_accessible",
        "bikes_allowed"
    ],
    "stop_times": [
        "trip_id",
        "arrival_time",
        "departure_time",
        "stop_id",
        "stop_sequence",
        "stop_headsign",
        "pickup_type",
        "drop_off_type",
        "shape_dist_traveled",
        "timepoint"
    ],
    "calendar": [
        "service_id",
        "monday",
        "tuesday",
        "wednesday",
        "thursday",
        "friday",
        "saturday",
        "sunday",
        "start_date",
        "end_date"
    ],
    "calendar_dates": [
        "service_id",
        "date",
        "exception_type"
    ],
    "fare_attributes": [
        "fare_id",
        "price",
        "currency_type",
        "payment_method",
        "transfers",
        "transfer_duration"
    ],
    "fare_rules": [
        "fare_id",
        "route_id",
        "origin_id",
        "destination_id",
        "contains_id"
    ],
    "timeframes": [
        "timeframe_id",
        "start_time",
        "end_time",
        "headway_sec",
        "exact_times"
    ],
    "fare_media": [
        "media_id",
        "agency_id",
        "fare_id",
        "seat_type",
        "price"
    ],
    "fare_products": [
        "product_id",
        "agency_id",
        "product_type",
        "fare_id",
        "product_name",
        "short_name",
        "description",
        "duration",
        "transfers"
    ],
    "fare_leg_rules": [
        "fare_id",
        "route_id",
        "origin_id",
        "destination_id",
        "contains_id"
    ],
    "fare_transfer_rules": [
        "from_fare_id",
        "to_fare_id",
        "transfer_type",
        "min_transfer_time"
    ],
    "areas": [
        "area_id",
        "area_name",
        "area_description"
    ],
    "stop_areas": [
        "stop_area_id",
        "stop_id",
        "area_id",
        "location_type",
        "parent_station",
        "fare_zone_id"
    ],
    "networks": [
        "network_id",
        "network_name",
        "network_description"
    ],
    "route_networks": [
        "route_id",
        "network_id"
    ],
    "shapes": [
        "shape_id",
        "shape_pt_lat",
        "shape_pt_lon",
        "shape_pt_sequence",
        "shape_dist_traveled"
    ],
    "frequencies": [
        "trip_id",
        "start_time",
        "end_time",
        "headway_secs",
        "exact_times"
    ],
    "transfers": [
        "from_stop_id",
            "to_stop_id",
        "transfer_type",
        "min_transfer_time"
    ],
    "pathways": [
        "pathway_id",
        "from_stop_id",
        "to_stop_id",
        "pathway_mode",
        "is_bidirectional",
        "length",
        "traversal_time",
        "stair_count",
        "max_slope",
        "min_width",
        "signposted_as",
        "reversed_signposted_as"
    ],
    "levels": [
        "level_id",
        "level_index",
        "level_name"
    ],
    "location_groups": [
        "location_group_id",
        "location_group_name"
    ],
    "location_group_stops": [
        "location_group_id",
        "stop_id"
    ],
    "locations_geojson": [
        "type",
        "features"
    ],
    "booking_rules": [
        "rule_id",
        "stop_id",
        "rule_type",
        "booking_url",
        "admission_rules",
        "admission_requirements"
    ],
    "translations": [
        "table_name",
        "field_name",
        "language",
        "translation"
    ],
    "feed_info": [
        "feed_publisher_name",
        "feed_publisher_url",
        "feed_lang",
        "default_lang",
        "feed_start_date",
        "feed_end_date",
        "feed_version",
        "feed_contact_email",
        "feed_contact_url"
    ],
    "attributions": [
        "attribution_id",
        "organization_name",
        "is_producer"
    ]
 }
 primary_keys = 	{ pt_map.models.Agency: "agency_id",
@ -432,6 +200,18 @@ def stdz(v, m: django.db.models.Model, f: str):
    return v
 def to_snake_case(name):
    """
    Convert CamelCase to snake_case.
    Parameters
    ----------
    name : str
        str in CamelCase
    Returns
    -------
    Str in snake_case
    """
    name = name[0].lower() + name[1:]
    for c in name[1:]:
        if c.isupper():
@ -441,6 +221,20 @@ def to_snake_case(name):
    return name
 def unqfk(ts, fk):
    """
    Primary keys of imported data and in the database are likely to overlap. To avoid this, the current time in seconds since Jan 1, 2024 is added as a prefix.
    Foreign key references must know of this new key so they are processed in the same way. To make this possible, we use the same time in seconds for all objects.
    Parameters
    ----------
    ts : str
        time in seconds to be prepended
    fk : primary or foreign key to be processed.
    Returns
    -------
    Str with prefix
    """
    if not isinstance(fk, str):
        fk = str(int(fk))
    return f"{ts}{fk}".strip()
@ -454,28 +248,23 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS):
    g : gtfs.GTFS
        GTFS object to be saved to db
    """
-    ts = str(int(time.time())-time_delta)
+    ts = str(int(time.time())-time_delta) # Prepend the current time in seconds since Jan 1, 2024 to ids to make them more or less unique
    for model in foreign_keys:
        if model[0] in [pt_map.models.Calendar, pt_map.models.CalendarDate, ]:
            continue
        m = model[0]
-        df = getattr(g, class_names[m.__name__]).data
+        df = getattr(g, class_names[m.__name__]).data # Extract dataframe for each model from gtfs.GTFS object
-        if not df.empty:
+        if not df.empty: # Only process GTFS files actually present
-            v = gtfs_schema[class_names[m.__name__]]
+            v = gtfs_schema[class_names[m.__name__]] # field names
-            for _, row in df.iterrows():
+            for _, row in df.iterrows(): # the rows of the dataframe are the individual entries in the GTFS file and should be the individual instances of the db model
-                for fk in model[1]:
+                for fk in model[1]: # Map foreign_keys to objects of the foreign model
                    if row.get(fk[1]):
-                        row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: unqfk(ts, row[fk[1]])})
+                        row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: unqfk(ts, row[fk[1]])}) 
-                defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])}
+                defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} # dict of fields and values of current model object to create
                print(model[0])
                if model[0] == pt_map.models.StopTime:
                    print(row)
                if primary_keys[m]:
-                    row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]])
+                    row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) # primary_keys should be unique, use current time in seconds as a prefix 
                    defaults[primary_keys[m]] = row[primary_keys[m]]
                    try:
-                        m.objects.get(**{primary_keys[m]: row[primary_keys[m]]})
+                        m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) # Make sure there is no object with identical primary_key, exception is expected to be risen
-                    except m.DoesNotExist:
+                    except m.DoesNotExist: 
                        m.objects.update_or_create(
                            defaults = defaults,
--- a/transport_accessibility/pt_map/gtfs_schema.py
+++ b/transport_accessibility/pt_map/gtfs_schema.py
@ -1,4 +1,11 @@
 """
 Make gtfs_schema constant available to modules in package without circular imports.
 Constants
 ---------
 gtfs_schema : dict{str,list[str]}
    Maps GTFS file names (without filename extension) to fields described by the GTFS Reference
 """
 gtfs_schema = {
    "agency": [
        "agency_id",
@ -238,3 +245,4 @@ gtfs_schema = {
        "is_producer"
    ]
 }
--- a/transport_accessibility/pt_map/views.py
+++ b/transport_accessibility/pt_map/views.py
@ -15,12 +15,28 @@ from .forms import *
 import json
 from datetime import datetime
 def print_r(r, s):
    if not len(r):
        print(s)
    return r
 def get_timetable(r, trips, stop_sequences):
    """ 
    Given a pt_map.models.Route, calculate the timetable for all its stops.
    Parameters
    ----------
    r : pt_map.models.Route
        Route, the timetable should be calculated for
    trips : dict(str, list(pt_map.Trip))
        Dictionary mapping all trips to route_ids they travel on
    stop_sequences : dict(str, list(str))
        Dict mapping route_ids to lists of stop_ids they serve. Currently the first trip is taken as reference for stops and sequence.
    Returns
    -------
    dict{"stop_sequence": list(str), "stop_times": dict(str, list(str)}
        Dict containing two elements:
            "stop_sequence" : list(str)
                list of stop_ids the route serves
            "stop_times" : dict(str, list(str))
                dict mapping stop_ids from stop_sequence to time strings the route is serving the stop at
    """
    timetable = {"stop_sequence": stop_sequences[r.route_id]}
    sts = {}
    for stop in stop_sequences[r.route_id]: