Refactored
This commit is contained in:
parent
1dd906a87f
commit
9f7c360096
|
|
@ -28,6 +28,7 @@ jaraco.collections==5.0.1
|
||||||
jaraco.context==5.3.0
|
jaraco.context==5.3.0
|
||||||
jaraco.functools==4.0.1
|
jaraco.functools==4.0.1
|
||||||
jaraco.text==3.12.0
|
jaraco.text==3.12.0
|
||||||
|
jedi==0.19.1
|
||||||
Jinja2==3.1.4
|
Jinja2==3.1.4
|
||||||
joblib==1.4.2
|
joblib==1.4.2
|
||||||
lxml==5.2.2
|
lxml==5.2.2
|
||||||
|
|
@ -40,6 +41,7 @@ numpy==1.26.4
|
||||||
packaging==24.0
|
packaging==24.0
|
||||||
pandas==2.2.2
|
pandas==2.2.2
|
||||||
parsimonious==0.10.0
|
parsimonious==0.10.0
|
||||||
|
parso==0.8.4
|
||||||
Pattern==3.6
|
Pattern==3.6
|
||||||
pdfminer.six==20231228
|
pdfminer.six==20231228
|
||||||
platformdirs==4.2.2
|
platformdirs==4.2.2
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,16 @@ Contents
|
||||||
--------
|
--------
|
||||||
Constants
|
Constants
|
||||||
---------
|
---------
|
||||||
gtfs_schema : dir{str,list[str]}
|
|
||||||
Maps GTFS file names (without filename extension) to fields described by the GTFS Reference
|
|
||||||
reversed_file_mapping : dict(str,str)
|
reversed_file_mapping : dict(str,str)
|
||||||
Map CamelCased filenames to '_'-separated
|
Map CamelCased filenames to '_'-separated
|
||||||
|
class_names : dict{str,str}
|
||||||
|
Map CamelCase, singularized class names to pluralized, snake_cased file names
|
||||||
|
primary_keys : dict{str, (str or None)}
|
||||||
|
For all pt_map.models, map primary keys if applicable
|
||||||
|
foreign_keys
|
||||||
|
For all pt_map.models, map foreign keys if any. Also ordered for model creation without foreign reference conflicts.
|
||||||
|
time_delta : int
|
||||||
|
Unix time for Jan 1, 2024. To be used to calculate time prefix strings.
|
||||||
|
|
||||||
Functions
|
Functions
|
||||||
---------
|
---------
|
||||||
|
|
@ -43,248 +49,10 @@ import time
|
||||||
import datetime
|
import datetime
|
||||||
import django.db.models
|
import django.db.models
|
||||||
import time
|
import time
|
||||||
|
from pt_map.gtfs_schema import gtfs_schema
|
||||||
|
|
||||||
time_delta = int(datetime.datetime(2024,1,1).timestamp())
|
time_delta = int(datetime.datetime(2024,1,1).timestamp())
|
||||||
|
|
||||||
gtfs_schema = {
|
|
||||||
"agency": [
|
|
||||||
"agency_id",
|
|
||||||
"agency_name",
|
|
||||||
"agency_url",
|
|
||||||
"agency_timezone",
|
|
||||||
"agency_lang",
|
|
||||||
"agency_phone",
|
|
||||||
"agency_email",
|
|
||||||
"agency_fare_url"
|
|
||||||
],
|
|
||||||
"stops": [
|
|
||||||
"stop_id",
|
|
||||||
"stop_code",
|
|
||||||
"stop_name",
|
|
||||||
"stop_desc",
|
|
||||||
"stop_lat",
|
|
||||||
"stop_lon",
|
|
||||||
"zone_id",
|
|
||||||
"stop_url",
|
|
||||||
"location_type",
|
|
||||||
"parent_station",
|
|
||||||
"stop_timezone",
|
|
||||||
"wheelchair_boarding",
|
|
||||||
"level_id",
|
|
||||||
"platform_code"
|
|
||||||
],
|
|
||||||
"routes": [
|
|
||||||
"route_id",
|
|
||||||
"agency_id",
|
|
||||||
"route_short_name",
|
|
||||||
"route_long_name",
|
|
||||||
"route_desc",
|
|
||||||
"route_type",
|
|
||||||
"route_url",
|
|
||||||
"route_color",
|
|
||||||
"route_text_color",
|
|
||||||
"route_sort_order",
|
|
||||||
"continuous_pickup",
|
|
||||||
"continuous_drop_off"
|
|
||||||
],
|
|
||||||
"trips": [
|
|
||||||
"trip_id",
|
|
||||||
"route_id",
|
|
||||||
"service_id",
|
|
||||||
"trip_headsign",
|
|
||||||
"trip_short_name",
|
|
||||||
"direction_id",
|
|
||||||
"block_id",
|
|
||||||
"shape_id",
|
|
||||||
"wheelchair_accessible",
|
|
||||||
"bikes_allowed"
|
|
||||||
],
|
|
||||||
"stop_times": [
|
|
||||||
"trip_id",
|
|
||||||
"arrival_time",
|
|
||||||
"departure_time",
|
|
||||||
"stop_id",
|
|
||||||
"stop_sequence",
|
|
||||||
"stop_headsign",
|
|
||||||
"pickup_type",
|
|
||||||
"drop_off_type",
|
|
||||||
"shape_dist_traveled",
|
|
||||||
"timepoint"
|
|
||||||
],
|
|
||||||
"calendar": [
|
|
||||||
"service_id",
|
|
||||||
"monday",
|
|
||||||
"tuesday",
|
|
||||||
"wednesday",
|
|
||||||
"thursday",
|
|
||||||
"friday",
|
|
||||||
"saturday",
|
|
||||||
"sunday",
|
|
||||||
"start_date",
|
|
||||||
"end_date"
|
|
||||||
],
|
|
||||||
"calendar_dates": [
|
|
||||||
"service_id",
|
|
||||||
"date",
|
|
||||||
"exception_type"
|
|
||||||
],
|
|
||||||
"fare_attributes": [
|
|
||||||
"fare_id",
|
|
||||||
"price",
|
|
||||||
"currency_type",
|
|
||||||
"payment_method",
|
|
||||||
"transfers",
|
|
||||||
"transfer_duration"
|
|
||||||
],
|
|
||||||
"fare_rules": [
|
|
||||||
"fare_id",
|
|
||||||
"route_id",
|
|
||||||
"origin_id",
|
|
||||||
"destination_id",
|
|
||||||
"contains_id"
|
|
||||||
],
|
|
||||||
"timeframes": [
|
|
||||||
"timeframe_id",
|
|
||||||
"start_time",
|
|
||||||
"end_time",
|
|
||||||
"headway_sec",
|
|
||||||
"exact_times"
|
|
||||||
],
|
|
||||||
"fare_media": [
|
|
||||||
"media_id",
|
|
||||||
"agency_id",
|
|
||||||
"fare_id",
|
|
||||||
"seat_type",
|
|
||||||
"price"
|
|
||||||
],
|
|
||||||
"fare_products": [
|
|
||||||
"product_id",
|
|
||||||
"agency_id",
|
|
||||||
"product_type",
|
|
||||||
"fare_id",
|
|
||||||
"product_name",
|
|
||||||
"short_name",
|
|
||||||
"description",
|
|
||||||
"duration",
|
|
||||||
"transfers"
|
|
||||||
],
|
|
||||||
"fare_leg_rules": [
|
|
||||||
"fare_id",
|
|
||||||
"route_id",
|
|
||||||
"origin_id",
|
|
||||||
"destination_id",
|
|
||||||
"contains_id"
|
|
||||||
],
|
|
||||||
"fare_transfer_rules": [
|
|
||||||
"from_fare_id",
|
|
||||||
"to_fare_id",
|
|
||||||
"transfer_type",
|
|
||||||
"min_transfer_time"
|
|
||||||
],
|
|
||||||
"areas": [
|
|
||||||
"area_id",
|
|
||||||
"area_name",
|
|
||||||
"area_description"
|
|
||||||
],
|
|
||||||
"stop_areas": [
|
|
||||||
"stop_area_id",
|
|
||||||
"stop_id",
|
|
||||||
"area_id",
|
|
||||||
"location_type",
|
|
||||||
"parent_station",
|
|
||||||
"fare_zone_id"
|
|
||||||
],
|
|
||||||
"networks": [
|
|
||||||
"network_id",
|
|
||||||
"network_name",
|
|
||||||
"network_description"
|
|
||||||
],
|
|
||||||
"route_networks": [
|
|
||||||
"route_id",
|
|
||||||
"network_id"
|
|
||||||
],
|
|
||||||
"shapes": [
|
|
||||||
"shape_id",
|
|
||||||
"shape_pt_lat",
|
|
||||||
"shape_pt_lon",
|
|
||||||
"shape_pt_sequence",
|
|
||||||
"shape_dist_traveled"
|
|
||||||
],
|
|
||||||
"frequencies": [
|
|
||||||
"trip_id",
|
|
||||||
"start_time",
|
|
||||||
"end_time",
|
|
||||||
"headway_secs",
|
|
||||||
"exact_times"
|
|
||||||
],
|
|
||||||
"transfers": [
|
|
||||||
"from_stop_id",
|
|
||||||
"to_stop_id",
|
|
||||||
"transfer_type",
|
|
||||||
"min_transfer_time"
|
|
||||||
],
|
|
||||||
"pathways": [
|
|
||||||
"pathway_id",
|
|
||||||
"from_stop_id",
|
|
||||||
"to_stop_id",
|
|
||||||
"pathway_mode",
|
|
||||||
"is_bidirectional",
|
|
||||||
"length",
|
|
||||||
"traversal_time",
|
|
||||||
"stair_count",
|
|
||||||
"max_slope",
|
|
||||||
"min_width",
|
|
||||||
"signposted_as",
|
|
||||||
"reversed_signposted_as"
|
|
||||||
],
|
|
||||||
"levels": [
|
|
||||||
"level_id",
|
|
||||||
"level_index",
|
|
||||||
"level_name"
|
|
||||||
],
|
|
||||||
"location_groups": [
|
|
||||||
"location_group_id",
|
|
||||||
"location_group_name"
|
|
||||||
],
|
|
||||||
"location_group_stops": [
|
|
||||||
"location_group_id",
|
|
||||||
"stop_id"
|
|
||||||
],
|
|
||||||
"locations_geojson": [
|
|
||||||
"type",
|
|
||||||
"features"
|
|
||||||
],
|
|
||||||
"booking_rules": [
|
|
||||||
"rule_id",
|
|
||||||
"stop_id",
|
|
||||||
"rule_type",
|
|
||||||
"booking_url",
|
|
||||||
"admission_rules",
|
|
||||||
"admission_requirements"
|
|
||||||
],
|
|
||||||
"translations": [
|
|
||||||
"table_name",
|
|
||||||
"field_name",
|
|
||||||
"language",
|
|
||||||
"translation"
|
|
||||||
],
|
|
||||||
"feed_info": [
|
|
||||||
"feed_publisher_name",
|
|
||||||
"feed_publisher_url",
|
|
||||||
"feed_lang",
|
|
||||||
"default_lang",
|
|
||||||
"feed_start_date",
|
|
||||||
"feed_end_date",
|
|
||||||
"feed_version",
|
|
||||||
"feed_contact_email",
|
|
||||||
"feed_contact_url"
|
|
||||||
],
|
|
||||||
"attributions": [
|
|
||||||
"attribution_id",
|
|
||||||
"organization_name",
|
|
||||||
"is_producer"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
primary_keys = { pt_map.models.Agency: "agency_id",
|
primary_keys = { pt_map.models.Agency: "agency_id",
|
||||||
|
|
@ -432,6 +200,18 @@ def stdz(v, m: django.db.models.Model, f: str):
|
||||||
return v
|
return v
|
||||||
|
|
||||||
def to_snake_case(name):
|
def to_snake_case(name):
|
||||||
|
"""
|
||||||
|
Convert CamelCase to snake_case.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name : str
|
||||||
|
str in CamelCase
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Str in snake_case
|
||||||
|
"""
|
||||||
name = name[0].lower() + name[1:]
|
name = name[0].lower() + name[1:]
|
||||||
for c in name[1:]:
|
for c in name[1:]:
|
||||||
if c.isupper():
|
if c.isupper():
|
||||||
|
|
@ -441,6 +221,20 @@ def to_snake_case(name):
|
||||||
return name
|
return name
|
||||||
|
|
||||||
def unqfk(ts, fk):
|
def unqfk(ts, fk):
|
||||||
|
"""
|
||||||
|
Primary keys of imported data and in the database are likely to overlap. To avoid this, the current time in seconds since Jan 1, 2024 is added as a prefix.
|
||||||
|
Foreign key references must know of this new key so they are processed in the same way. To make this possible, we use the same time in seconds for all objects.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ts : str
|
||||||
|
time in seconds to be prepended
|
||||||
|
fk : primary or foreign key to be processed.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Str with prefix
|
||||||
|
"""
|
||||||
if not isinstance(fk, str):
|
if not isinstance(fk, str):
|
||||||
fk = str(int(fk))
|
fk = str(int(fk))
|
||||||
return f"{ts}{fk}".strip()
|
return f"{ts}{fk}".strip()
|
||||||
|
|
@ -454,28 +248,23 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS):
|
||||||
g : gtfs.GTFS
|
g : gtfs.GTFS
|
||||||
GTFS object to be saved to db
|
GTFS object to be saved to db
|
||||||
"""
|
"""
|
||||||
ts = str(int(time.time())-time_delta)
|
ts = str(int(time.time())-time_delta) # Prepend the current time in seconds since Jan 1, 2024 to ids to make them more or less unique
|
||||||
for model in foreign_keys:
|
for model in foreign_keys:
|
||||||
if model[0] in [pt_map.models.Calendar, pt_map.models.CalendarDate, ]:
|
|
||||||
continue
|
|
||||||
m = model[0]
|
m = model[0]
|
||||||
df = getattr(g, class_names[m.__name__]).data
|
df = getattr(g, class_names[m.__name__]).data # Extract dataframe for each model from gtfs.GTFS object
|
||||||
if not df.empty:
|
if not df.empty: # Only process GTFS files actually present
|
||||||
v = gtfs_schema[class_names[m.__name__]]
|
v = gtfs_schema[class_names[m.__name__]] # field names
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows(): # the rows of the dataframe are the individual entries in the GTFS file and should be the individual instances of the db model
|
||||||
for fk in model[1]:
|
for fk in model[1]: # Map foreign_keys to objects of the foreign model
|
||||||
if row.get(fk[1]):
|
if row.get(fk[1]):
|
||||||
row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: unqfk(ts, row[fk[1]])})
|
row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: unqfk(ts, row[fk[1]])})
|
||||||
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])}
|
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} # dict of fields and values of current model object to create
|
||||||
print(model[0])
|
|
||||||
if model[0] == pt_map.models.StopTime:
|
|
||||||
print(row)
|
|
||||||
if primary_keys[m]:
|
if primary_keys[m]:
|
||||||
row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]])
|
row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) # primary_keys should be unique, use current time in seconds as a prefix
|
||||||
defaults[primary_keys[m]] = row[primary_keys[m]]
|
defaults[primary_keys[m]] = row[primary_keys[m]]
|
||||||
try:
|
try:
|
||||||
m.objects.get(**{primary_keys[m]: row[primary_keys[m]]})
|
m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) # Make sure there is no object with identical primary_key, exception is expected to be risen
|
||||||
except m.DoesNotExist:
|
except m.DoesNotExist:
|
||||||
|
|
||||||
m.objects.update_or_create(
|
m.objects.update_or_create(
|
||||||
defaults = defaults,
|
defaults = defaults,
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,11 @@
|
||||||
|
"""
|
||||||
|
Make gtfs_schema constant available to modules in package without circular imports.
|
||||||
|
|
||||||
|
Constants
|
||||||
|
---------
|
||||||
|
gtfs_schema : dict{str,list[str]}
|
||||||
|
Maps GTFS file names (without filename extension) to fields described by the GTFS Reference
|
||||||
|
"""
|
||||||
gtfs_schema = {
|
gtfs_schema = {
|
||||||
"agency": [
|
"agency": [
|
||||||
"agency_id",
|
"agency_id",
|
||||||
|
|
@ -238,3 +245,4 @@ gtfs_schema = {
|
||||||
"is_producer"
|
"is_producer"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,12 +15,28 @@ from .forms import *
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
def print_r(r, s):
|
|
||||||
if not len(r):
|
|
||||||
print(s)
|
|
||||||
return r
|
|
||||||
|
|
||||||
def get_timetable(r, trips, stop_sequences):
|
def get_timetable(r, trips, stop_sequences):
|
||||||
|
"""
|
||||||
|
Given a pt_map.models.Route, calculate the timetable for all its stops.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
r : pt_map.models.Route
|
||||||
|
Route, the timetable should be calculated for
|
||||||
|
trips : dict(str, list(pt_map.Trip))
|
||||||
|
Dictionary mapping all trips to route_ids they travel on
|
||||||
|
stop_sequences : dict(str, list(str))
|
||||||
|
Dict mapping route_ids to lists of stop_ids they serve. Currently the first trip is taken as reference for stops and sequence.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict{"stop_sequence": list(str), "stop_times": dict(str, list(str)}
|
||||||
|
Dict containing two elements:
|
||||||
|
"stop_sequence" : list(str)
|
||||||
|
list of stop_ids the route serves
|
||||||
|
"stop_times" : dict(str, list(str))
|
||||||
|
dict mapping stop_ids from stop_sequence to time strings the route is serving the stop at
|
||||||
|
"""
|
||||||
timetable = {"stop_sequence": stop_sequences[r.route_id]}
|
timetable = {"stop_sequence": stop_sequences[r.route_id]}
|
||||||
sts = {}
|
sts = {}
|
||||||
for stop in stop_sequences[r.route_id]:
|
for stop in stop_sequences[r.route_id]:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user