Added export function to create GTFS files from a list of QuerySets

This commit is contained in:
Johannes Randerath 2024-06-03 00:50:02 +02:00
parent 3e8b2b2711
commit dcf1b47609
2 changed files with 201 additions and 140 deletions

View File

@ -252,7 +252,7 @@ gtfs_schema = {
def to_camel_case(s: str):
return ''.join(word.capitalize() for word in s.split('_'))
def standardize_time(time_str):
def standardize_time(time_str: str):
date_str = f"Jan 19, 1999 {time_str}"
ntuple=email.utils.parsedate(date_str)
timestamp=time.mktime(ntuple)
@ -263,7 +263,7 @@ def standardize_time(time_str):
def is_NaN(v):
return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v))
def stdz(v, m, f):
def stdz(v, m: django.db.models.Model, f: str):
if m._meta.get_field(f).get_internal_type() == 'DateField':
return str(v)
if m._meta.get_field(f).get_internal_type() == 'TimeField':
@ -276,9 +276,6 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS):
name = to_camel_case(singularize(k))
m = getattr(pt_map.models, name)
df = getattr(g, k).data
print("\n\n\n\n")
print(name)
print("#############################################################################################")
if not df.empty:
for _, row in df.iterrows():
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])}
@ -288,4 +285,42 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS):
**kw_args,
)
reversed_file_mapping = {
"Agency": "agency",
"Stop": "stops",
"Route": "routes",
"Trip": "trips",
"StopTime": "stop_times",
"Calendar": "calendar",
"CalendarDate": "calendar_dates",
"FareAttribute": "fare_attributes",
"FareRule": "fare_rules",
"Timeframe": "timeframes",
"FareMedium": "fare_media",
"FareProduct": "fare_products",
"FareLegRule": "fare_leg_rules",
"FareTransferRule": "fare_transfer_rules",
"Area": "areas",
"StopArea": "stop_areas",
"Network": "networks",
"RouteNetwork": "route_networks",
"Shape": "shapes",
"Frequency": "frequencies",
"Transfer": "transfers",
"Pathway": "pathways",
"Level": "levels",
"LocationGroup": "location_groups",
"LocationGroupStop": "location_group_stops",
"LocationsGeojson": "locations.geojson",
"BookingRule": "booking_rules",
"Translation": "translations",
"FeedInfo": "feed_info",
"Attribution": "attributions"
}
def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str):
dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q}
g = pt_map.gtfs.GTFS(folder_path, dfs)
g.validate()
return g

View File

@ -2,153 +2,157 @@ import pandas as pd
import os
class GTFS:
def __init__(self, folder_path):
def __init__(self, folder_path: str, dfs: list[list] = None):
self.folder_path = folder_path
self.agency = self.Agency(self.folder_path)
self.stops = self.Stops(self.folder_path)
self.routes = self.Routes(self.folder_path)
self.trips = self.Trips(self.folder_path)
self.stop_times = self.StopTimes(self.folder_path)
self.calendar = self.Calendar(self.folder_path)
self.calendar_dates = self.CalendarDates(self.folder_path)
self.fare_attributes = self.FareAttributes(self.folder_path)
self.fare_rules = self.FareRules(self.folder_path)
self.timeframes = self.Timeframes(self.folder_path)
self.fare_media = self.FareMedia(self.folder_path)
self.fare_products = self.FareProducts(self.folder_path)
self.fare_leg_rules = self.FareLegRules(self.folder_path)
self.fare_transfer_rules = self.FareTransferRules(self.folder_path)
self.areas = self.Areas(self.folder_path)
self.stop_areas = self.StopAreas(self.folder_path)
self.networks = self.Networks(self.folder_path)
self.route_networks = self.RouteNetworks(self.folder_path)
self.shapes = self.Shapes(self.folder_path)
self.frequencies = self.Frequencies(self.folder_path)
self.transfers = self.Transfers(self.folder_path)
self.pathways = self.Pathways(self.folder_path)
self.levels = self.Levels(self.folder_path)
self.location_groups = self.LocationGroups(self.folder_path)
self.location_group_stops = self.LocationGroupStops(self.folder_path)
self.locations_geojson = self.LocationsGeojson(self.folder_path)
self.booking_rules = self.BookingRules(self.folder_path)
self.translations = self.Translations(self.folder_path)
self.feed_info = self.FeedInfo(self.folder_path)
self.attributions = self.Attributions(self.folder_path)
self.agency = self.Agency(self.folder_path, dfs)
self.stops = self.Stops(self.folder_path, dfs)
self.routes = self.Routes(self.folder_path, dfs)
self.trips = self.Trips(self.folder_path, dfs)
self.stop_times = self.StopTimes(self.folder_path, dfs)
self.calendar = self.Calendar(self.folder_path, dfs)
self.calendar_dates = self.CalendarDates(self.folder_path, dfs)
self.fare_attributes = self.FareAttributes(self.folder_path, dfs)
self.fare_rules = self.FareRules(self.folder_path, dfs)
self.timeframes = self.Timeframes(self.folder_path, dfs)
self.fare_media = self.FareMedia(self.folder_path, dfs)
self.fare_products = self.FareProducts(self.folder_path, dfs)
self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs)
self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs)
self.areas = self.Areas(self.folder_path, dfs)
self.stop_areas = self.StopAreas(self.folder_path, dfs)
self.networks = self.Networks(self.folder_path, dfs)
self.route_networks = self.RouteNetworks(self.folder_path, dfs)
self.shapes = self.Shapes(self.folder_path, dfs)
self.frequencies = self.Frequencies(self.folder_path, dfs)
self.transfers = self.Transfers(self.folder_path, dfs)
self.pathways = self.Pathways(self.folder_path, dfs)
self.levels = self.Levels(self.folder_path, dfs)
self.location_groups = self.LocationGroups(self.folder_path, dfs)
self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs)
self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs)
self.booking_rules = self.BookingRules(self.folder_path, dfs)
self.translations = self.Translations(self.folder_path, dfs)
self.feed_info = self.FeedInfo(self.folder_path, dfs)
self.attributions = self.Attributions(self.folder_path, dfs)
self.errors = []
class GTFSFile:
def __init__(self, folder_path, file_name):
def __init__(self, folder_path, file_name, dfs):
self.file_name = file_name
self.file_path = f"{folder_path}/{file_name}.txt"
self.data = self.load_data()
self.data = self.load_data(dfs)
def load_data(self):
def load_data(self, dfs):
if dfs:
return dfs[self.file_name]
else:
try:
return pd.read_csv(self.file_path)
except FileNotFoundError:
return pd.DataFrame()
class Agency(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'agency')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'agency', dfs)
class Stops(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'stops')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stops', dfs)
class Routes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'routes')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'routes', dfs)
class Trips(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'trips')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'trips', dfs)
class StopTimes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'stop_times')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stop_times', dfs)
class Calendar(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'calendar')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'calendar', dfs)
class CalendarDates(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'calendar_dates')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'calendar_dates', dfs)
class FareAttributes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_attributes')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_attributes', dfs)
class FareRules(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_rules')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_rules', dfs)
class Timeframes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'timeframes')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'timeframes', dfs)
class FareMedia(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_media')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_media', dfs)
class FareProducts(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_products')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_products', dfs)
class FareLegRules(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_leg_rules')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_leg_rules', dfs)
class FareTransferRules(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_transfer_rules')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_transfer_rules', dfs)
class Areas(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'areas')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'areas', dfs)
class StopAreas(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'stop_areas')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stop_areas', dfs)
class Networks(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'networks')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'networks', dfs)
class RouteNetworks(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'route_networks')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'route_networks', dfs)
class Shapes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'shapes')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'shapes', dfs)
class Frequencies(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'frequencies')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'frequencies', dfs)
class Transfers(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'transfers')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'transfers', dfs)
class Pathways(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'pathways')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'pathways', dfs)
class Levels(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'levels')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'levels', dfs)
class LocationGroups(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'location_groups')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'location_groups', dfs)
class LocationGroupStops(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'location_group_stops')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'location_group_stops', dfs)
class LocationsGeojson(GTFSFile):
def __init__(self, folder_path):
def __init__(self, folder_path, dfs):
self.file_path = f"{folder_path}/locations.geojson"
if os.path.exists(self.file_path):
self.data = self.load_data()
@ -162,20 +166,20 @@ class GTFS:
return pd.DataFrame()
class BookingRules(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'booking_rules')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'booking_rules', dfs)
class Translations(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'translations')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'translations', dfs)
class FeedInfo(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'feed_info')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'feed_info', dfs)
class Attributions(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'attributions')
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'attributions', dfs)
def get_files(self):
return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)]
@ -200,37 +204,66 @@ class GTFS:
df.to_csv(fpath, index=False)
def validate(self):
if not self.agency.data.empty:
self.validate_agency()
if not self.stops.data.empty:
self.validate_stops()
if not self.routes.data.empty:
self.validate_routes()
if not self.trips.data.empty:
self.validate_trips()
if not self.stop_times.data.empty:
self.validate_stop_times()
if not self.calendar.data.empty:
self.validate_calendar()
if not self.calendar_dates.data.empty:
self.validate_calendar_dates()
if not self.fare_attributes.data.empty:
self.validate_fare_attributes()
if not self.fare_rules.data.empty:
self.validate_fare_rules()
if not self.timeframes.data.empty:
self.validate_timeframes()
if not self.fare_media.data.empty:
self.validate_fare_media()
if not self.fare_products.data.empty:
self.validate_fare_products()
if not self.fare_leg_rules.data.empty:
self.validate_fare_leg_rules()
if not self.fare_transfer_rules.data.empty:
self.validate_fare_transfer_rules()
if not self.areas.data.empty:
self.validate_areas()
if not self.stop_areas.data.empty:
self.validate_stop_areas()
if not self.networks.data.empty:
self.validate_networks()
if not self.route_networks.data.empty:
self.validate_route_networks()
if not self.shapes.data.empty:
self.validate_shapes()
if not self.frequencies.data.empty:
self.validate_frequencies()
if not self.transfers.data.empty:
self.validate_transfers()
if not self.pathways.data.empty:
self.validate_pathways()
if not self.levels.data.empty:
self.validate_levels()
if not self.location_groups.data.empty:
self.validate_location_groups()
if not self.location_group_stops.data.empty:
self.validate_location_group_stops()
if not self.locations_geojson.data.empty:
self.validate_locations_geojson()
if not self.booking_rules.data.empty:
self.validate_booking_rules()
if not self.translations.data.empty:
self.validate_translations()
if not self.feed_info.data.empty:
self.validate_feed_info()
if not self.attributions.data.empty:
self.validate_attributions()
self.validate_cross_references()
if not self.errors:
return None
@ -435,10 +468,3 @@ class GTFS:
if df[['stop_lat', 'stop_lon']].isnull().any().any():
self.errors.append(f"stops.txt has missing lat/lon values.")
def validate_cross_references(self):
# Example: Validate that all stop_ids in stop_times.txt exist in stops.txt
if not self.stop_times.data.empty and not self.stops.data.empty:
invalid_stops = set(self.stop_times.data['stop_id']) - set(self.stops.data['stop_id'])
if invalid_stops:
self.errors.append(f"stop_times.txt has invalid stop_ids: {invalid_stops}")