transport-accessibility/transport_accessibility/pt_map/bridge.py

327 lines
7.5 KiB
Python

import pt_map.gtfs
import pt_map.models
import pandas as pd
from pattern.text.en import singularize
import math
import numbers
import email.utils
import time
import datetime
import django.db.models
gtfs_schema = {
"agency": [
"agency_id",
"agency_name",
"agency_url",
"agency_timezone",
"agency_lang",
"agency_phone",
"agency_email",
"agency_fare_url"
],
"stops": [
"stop_id",
"stop_code",
"stop_name",
"stop_desc",
"stop_lat",
"stop_lon",
"zone_id",
"stop_url",
"location_type",
"parent_station",
"stop_timezone",
"wheelchair_boarding",
"level_id",
"platform_code"
],
"routes": [
"route_id",
"agency_id",
"route_short_name",
"route_long_name",
"route_desc",
"route_type",
"route_url",
"route_color",
"route_text_color",
"route_sort_order",
"continuous_pickup",
"continuous_drop_off"
],
"trips": [
"trip_id",
"route_id",
"service_id",
"trip_headsign",
"trip_short_name",
"direction_id",
"block_id",
"shape_id",
"wheelchair_accessible",
"bikes_allowed"
],
"stop_times": [
"trip_id",
"arrival_time",
"departure_time",
"stop_id",
"stop_sequence",
"stop_headsign",
"pickup_type",
"drop_off_type",
"shape_dist_traveled",
"timepoint"
],
"calendar": [
"service_id",
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
"start_date",
"end_date"
],
"calendar_dates": [
"service_id",
"date",
"exception_type"
],
"fare_attributes": [
"fare_id",
"price",
"currency_type",
"payment_method",
"transfers",
"transfer_duration"
],
"fare_rules": [
"fare_id",
"route_id",
"origin_id",
"destination_id",
"contains_id"
],
"timeframes": [
"timeframe_id",
"start_time",
"end_time",
"headway_sec",
"exact_times"
],
"fare_media": [
"media_id",
"agency_id",
"fare_id",
"seat_type",
"price"
],
"fare_products": [
"product_id",
"agency_id",
"product_type",
"fare_id",
"product_name",
"short_name",
"description",
"duration",
"transfers"
],
"fare_leg_rules": [
"fare_id",
"route_id",
"origin_id",
"destination_id",
"contains_id"
],
"fare_transfer_rules": [
"from_fare_id",
"to_fare_id",
"transfer_type",
"min_transfer_time"
],
"areas": [
"area_id",
"area_name",
"area_description"
],
"stop_areas": [
"stop_area_id",
"stop_id",
"area_id",
"location_type",
"parent_station",
"fare_zone_id"
],
"networks": [
"network_id",
"network_name",
"network_description"
],
"route_networks": [
"route_id",
"network_id"
],
"shapes": [
"shape_id",
"shape_pt_lat",
"shape_pt_lon",
"shape_pt_sequence",
"shape_dist_traveled"
],
"frequencies": [
"trip_id",
"start_time",
"end_time",
"headway_secs",
"exact_times"
],
"transfers": [
"from_stop_id",
"to_stop_id",
"transfer_type",
"min_transfer_time"
],
"pathways": [
"pathway_id",
"from_stop_id",
"to_stop_id",
"pathway_mode",
"is_bidirectional",
"length",
"traversal_time",
"stair_count",
"max_slope",
"min_width",
"signposted_as",
"reversed_signposted_as"
],
"levels": [
"level_id",
"level_index",
"level_name"
],
"location_groups": [
"location_group_id",
"location_group_name"
],
"location_group_stops": [
"location_group_id",
"stop_id"
],
"locations_geojson": [
"type",
"features"
],
"booking_rules": [
"rule_id",
"stop_id",
"rule_type",
"booking_url",
"admission_rules",
"admission_requirements"
],
"translations": [
"table_name",
"field_name",
"language",
"translation"
],
"feed_info": [
"feed_publisher_name",
"feed_publisher_url",
"feed_lang",
"default_lang",
"feed_start_date",
"feed_end_date",
"feed_version",
"feed_contact_email",
"feed_contact_url"
],
"attributions": [
"attribution_id",
"organization_name",
"is_producer"
]
}
def to_camel_case(s: str):
return ''.join(word.capitalize() for word in s.split('_'))
def standardize_time(time_str: str):
date_str = f"Jan 19, 1999 {time_str}"
ntuple=email.utils.parsedate(date_str)
timestamp=time.mktime(ntuple)
date=datetime.datetime.fromtimestamp(timestamp)
return date.strftime('%H:%M:%S')
def is_NaN(v):
return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v))
def stdz(v, m: django.db.models.Model, f: str):
if m._meta.get_field(f).get_internal_type() == 'DateField':
return str(v)
if m._meta.get_field(f).get_internal_type() == 'TimeField':
return standardize_time(v)
return v
def gtfs_to_db(g: pt_map.gtfs.GTFS):
for k,v in gtfs_schema.items():
name = to_camel_case(singularize(k))
m = getattr(pt_map.models, name)
df = getattr(g, k).data
if not df.empty:
for _, row in df.iterrows():
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])}
kw_args = {v[0]: row[v[0]]}
m.objects.update_or_create(
defaults = defaults,
**kw_args,
)
reversed_file_mapping = {
"Agency": "agency",
"Stop": "stops",
"Route": "routes",
"Trip": "trips",
"StopTime": "stop_times",
"Calendar": "calendar",
"CalendarDate": "calendar_dates",
"FareAttribute": "fare_attributes",
"FareRule": "fare_rules",
"Timeframe": "timeframes",
"FareMedium": "fare_media",
"FareProduct": "fare_products",
"FareLegRule": "fare_leg_rules",
"FareTransferRule": "fare_transfer_rules",
"Area": "areas",
"StopArea": "stop_areas",
"Network": "networks",
"RouteNetwork": "route_networks",
"Shape": "shapes",
"Frequency": "frequencies",
"Transfer": "transfers",
"Pathway": "pathways",
"Level": "levels",
"LocationGroup": "location_groups",
"LocationGroupStop": "location_group_stops",
"LocationsGeojson": "locations.geojson",
"BookingRule": "booking_rules",
"Translation": "translations",
"FeedInfo": "feed_info",
"Attribution": "attributions"
}
def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str):
dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q}
g = pt_map.gtfs.GTFS(folder_path, dfs)
g.validate()
return g