327 lines
7.5 KiB
Python
327 lines
7.5 KiB
Python
import pt_map.gtfs
|
|
import pt_map.models
|
|
import pandas as pd
|
|
from pattern.text.en import singularize
|
|
import math
|
|
import numbers
|
|
import email.utils
|
|
import time
|
|
import datetime
|
|
import django.db.models
|
|
|
|
gtfs_schema = {
|
|
"agency": [
|
|
"agency_id",
|
|
"agency_name",
|
|
"agency_url",
|
|
"agency_timezone",
|
|
"agency_lang",
|
|
"agency_phone",
|
|
"agency_email",
|
|
"agency_fare_url"
|
|
],
|
|
"stops": [
|
|
"stop_id",
|
|
"stop_code",
|
|
"stop_name",
|
|
"stop_desc",
|
|
"stop_lat",
|
|
"stop_lon",
|
|
"zone_id",
|
|
"stop_url",
|
|
"location_type",
|
|
"parent_station",
|
|
"stop_timezone",
|
|
"wheelchair_boarding",
|
|
"level_id",
|
|
"platform_code"
|
|
],
|
|
"routes": [
|
|
"route_id",
|
|
"agency_id",
|
|
"route_short_name",
|
|
"route_long_name",
|
|
"route_desc",
|
|
"route_type",
|
|
"route_url",
|
|
"route_color",
|
|
"route_text_color",
|
|
"route_sort_order",
|
|
"continuous_pickup",
|
|
"continuous_drop_off"
|
|
],
|
|
"trips": [
|
|
"trip_id",
|
|
"route_id",
|
|
"service_id",
|
|
"trip_headsign",
|
|
"trip_short_name",
|
|
"direction_id",
|
|
"block_id",
|
|
"shape_id",
|
|
"wheelchair_accessible",
|
|
"bikes_allowed"
|
|
],
|
|
"stop_times": [
|
|
"trip_id",
|
|
"arrival_time",
|
|
"departure_time",
|
|
"stop_id",
|
|
"stop_sequence",
|
|
"stop_headsign",
|
|
"pickup_type",
|
|
"drop_off_type",
|
|
"shape_dist_traveled",
|
|
"timepoint"
|
|
],
|
|
"calendar": [
|
|
"service_id",
|
|
"monday",
|
|
"tuesday",
|
|
"wednesday",
|
|
"thursday",
|
|
"friday",
|
|
"saturday",
|
|
"sunday",
|
|
"start_date",
|
|
"end_date"
|
|
],
|
|
"calendar_dates": [
|
|
"service_id",
|
|
"date",
|
|
"exception_type"
|
|
],
|
|
"fare_attributes": [
|
|
"fare_id",
|
|
"price",
|
|
"currency_type",
|
|
"payment_method",
|
|
"transfers",
|
|
"transfer_duration"
|
|
],
|
|
"fare_rules": [
|
|
"fare_id",
|
|
"route_id",
|
|
"origin_id",
|
|
"destination_id",
|
|
"contains_id"
|
|
],
|
|
"timeframes": [
|
|
"timeframe_id",
|
|
"start_time",
|
|
"end_time",
|
|
"headway_sec",
|
|
"exact_times"
|
|
],
|
|
"fare_media": [
|
|
"media_id",
|
|
"agency_id",
|
|
"fare_id",
|
|
"seat_type",
|
|
"price"
|
|
],
|
|
"fare_products": [
|
|
"product_id",
|
|
"agency_id",
|
|
"product_type",
|
|
"fare_id",
|
|
"product_name",
|
|
"short_name",
|
|
"description",
|
|
"duration",
|
|
"transfers"
|
|
],
|
|
"fare_leg_rules": [
|
|
"fare_id",
|
|
"route_id",
|
|
"origin_id",
|
|
"destination_id",
|
|
"contains_id"
|
|
],
|
|
"fare_transfer_rules": [
|
|
"from_fare_id",
|
|
"to_fare_id",
|
|
"transfer_type",
|
|
"min_transfer_time"
|
|
],
|
|
"areas": [
|
|
"area_id",
|
|
"area_name",
|
|
"area_description"
|
|
],
|
|
"stop_areas": [
|
|
"stop_area_id",
|
|
"stop_id",
|
|
"area_id",
|
|
"location_type",
|
|
"parent_station",
|
|
"fare_zone_id"
|
|
],
|
|
"networks": [
|
|
"network_id",
|
|
"network_name",
|
|
"network_description"
|
|
],
|
|
"route_networks": [
|
|
"route_id",
|
|
"network_id"
|
|
],
|
|
"shapes": [
|
|
"shape_id",
|
|
"shape_pt_lat",
|
|
"shape_pt_lon",
|
|
"shape_pt_sequence",
|
|
"shape_dist_traveled"
|
|
],
|
|
"frequencies": [
|
|
"trip_id",
|
|
"start_time",
|
|
"end_time",
|
|
"headway_secs",
|
|
"exact_times"
|
|
],
|
|
"transfers": [
|
|
"from_stop_id",
|
|
"to_stop_id",
|
|
"transfer_type",
|
|
"min_transfer_time"
|
|
],
|
|
"pathways": [
|
|
"pathway_id",
|
|
"from_stop_id",
|
|
"to_stop_id",
|
|
"pathway_mode",
|
|
"is_bidirectional",
|
|
"length",
|
|
"traversal_time",
|
|
"stair_count",
|
|
"max_slope",
|
|
"min_width",
|
|
"signposted_as",
|
|
"reversed_signposted_as"
|
|
],
|
|
"levels": [
|
|
"level_id",
|
|
"level_index",
|
|
"level_name"
|
|
],
|
|
"location_groups": [
|
|
"location_group_id",
|
|
"location_group_name"
|
|
],
|
|
"location_group_stops": [
|
|
"location_group_id",
|
|
"stop_id"
|
|
],
|
|
"locations_geojson": [
|
|
"type",
|
|
"features"
|
|
],
|
|
"booking_rules": [
|
|
"rule_id",
|
|
"stop_id",
|
|
"rule_type",
|
|
"booking_url",
|
|
"admission_rules",
|
|
"admission_requirements"
|
|
],
|
|
"translations": [
|
|
"table_name",
|
|
"field_name",
|
|
"language",
|
|
"translation"
|
|
],
|
|
"feed_info": [
|
|
"feed_publisher_name",
|
|
"feed_publisher_url",
|
|
"feed_lang",
|
|
"default_lang",
|
|
"feed_start_date",
|
|
"feed_end_date",
|
|
"feed_version",
|
|
"feed_contact_email",
|
|
"feed_contact_url"
|
|
],
|
|
"attributions": [
|
|
"attribution_id",
|
|
"organization_name",
|
|
"is_producer"
|
|
]
|
|
}
|
|
|
|
def to_camel_case(s: str):
|
|
return ''.join(word.capitalize() for word in s.split('_'))
|
|
|
|
def standardize_time(time_str: str):
|
|
date_str = f"Jan 19, 1999 {time_str}"
|
|
ntuple=email.utils.parsedate(date_str)
|
|
timestamp=time.mktime(ntuple)
|
|
date=datetime.datetime.fromtimestamp(timestamp)
|
|
return date.strftime('%H:%M:%S')
|
|
|
|
|
|
def is_NaN(v):
|
|
return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v))
|
|
|
|
def stdz(v, m: django.db.models.Model, f: str):
|
|
if m._meta.get_field(f).get_internal_type() == 'DateField':
|
|
return str(v)
|
|
if m._meta.get_field(f).get_internal_type() == 'TimeField':
|
|
return standardize_time(v)
|
|
return v
|
|
|
|
|
|
def gtfs_to_db(g: pt_map.gtfs.GTFS):
|
|
for k,v in gtfs_schema.items():
|
|
name = to_camel_case(singularize(k))
|
|
m = getattr(pt_map.models, name)
|
|
df = getattr(g, k).data
|
|
if not df.empty:
|
|
for _, row in df.iterrows():
|
|
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])}
|
|
kw_args = {v[0]: row[v[0]]}
|
|
m.objects.update_or_create(
|
|
defaults = defaults,
|
|
**kw_args,
|
|
)
|
|
|
|
reversed_file_mapping = {
|
|
"Agency": "agency",
|
|
"Stop": "stops",
|
|
"Route": "routes",
|
|
"Trip": "trips",
|
|
"StopTime": "stop_times",
|
|
"Calendar": "calendar",
|
|
"CalendarDate": "calendar_dates",
|
|
"FareAttribute": "fare_attributes",
|
|
"FareRule": "fare_rules",
|
|
"Timeframe": "timeframes",
|
|
"FareMedium": "fare_media",
|
|
"FareProduct": "fare_products",
|
|
"FareLegRule": "fare_leg_rules",
|
|
"FareTransferRule": "fare_transfer_rules",
|
|
"Area": "areas",
|
|
"StopArea": "stop_areas",
|
|
"Network": "networks",
|
|
"RouteNetwork": "route_networks",
|
|
"Shape": "shapes",
|
|
"Frequency": "frequencies",
|
|
"Transfer": "transfers",
|
|
"Pathway": "pathways",
|
|
"Level": "levels",
|
|
"LocationGroup": "location_groups",
|
|
"LocationGroupStop": "location_group_stops",
|
|
"LocationsGeojson": "locations.geojson",
|
|
"BookingRule": "booking_rules",
|
|
"Translation": "translations",
|
|
"FeedInfo": "feed_info",
|
|
"Attribution": "attributions"
|
|
}
|
|
|
|
|
|
def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str):
|
|
dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q}
|
|
g = pt_map.gtfs.GTFS(folder_path, dfs)
|
|
g.validate()
|
|
return g
|