Added export function to create GTFS files from a list of QuerySets

This commit is contained in:
Johannes Randerath 2024-06-03 00:50:02 +02:00
parent 3e8b2b2711
commit 71f8e765c3
3 changed files with 201 additions and 142 deletions

View File

@ -6,10 +6,8 @@
- Add TODOs - Add TODOs
## Backend ## Backend
- Process GTFS files for im- and export
- Serve data to views in an intuitive way. As an object of a custom class? - Serve data to views in an intuitive way. As an object of a custom class?
- Fetch data to serve to views - Fetch data to serve to views
- Write data received from views - Write data received from views
- Implement views serve data to the templates - Implement views serve data to the templates
- Handle requests corrrectly in views and urls - Handle requests corrrectly in views and urls
- Convert the data object <-> Pandas DataFrame

View File

@ -252,7 +252,7 @@ gtfs_schema = {
def to_camel_case(s: str): def to_camel_case(s: str):
return ''.join(word.capitalize() for word in s.split('_')) return ''.join(word.capitalize() for word in s.split('_'))
def standardize_time(time_str): def standardize_time(time_str: str):
date_str = f"Jan 19, 1999 {time_str}" date_str = f"Jan 19, 1999 {time_str}"
ntuple=email.utils.parsedate(date_str) ntuple=email.utils.parsedate(date_str)
timestamp=time.mktime(ntuple) timestamp=time.mktime(ntuple)
@ -263,7 +263,7 @@ def standardize_time(time_str):
def is_NaN(v): def is_NaN(v):
return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v)) return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v))
def stdz(v, m, f): def stdz(v, m: django.db.models.Model, f: str):
if m._meta.get_field(f).get_internal_type() == 'DateField': if m._meta.get_field(f).get_internal_type() == 'DateField':
return str(v) return str(v)
if m._meta.get_field(f).get_internal_type() == 'TimeField': if m._meta.get_field(f).get_internal_type() == 'TimeField':
@ -276,9 +276,6 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS):
name = to_camel_case(singularize(k)) name = to_camel_case(singularize(k))
m = getattr(pt_map.models, name) m = getattr(pt_map.models, name)
df = getattr(g, k).data df = getattr(g, k).data
print("\n\n\n\n")
print(name)
print("#############################################################################################")
if not df.empty: if not df.empty:
for _, row in df.iterrows(): for _, row in df.iterrows():
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])}
@ -288,4 +285,42 @@ def gtfs_to_db(g: pt_map.gtfs.GTFS):
**kw_args, **kw_args,
) )
reversed_file_mapping = {
"Agency": "agency",
"Stop": "stops",
"Route": "routes",
"Trip": "trips",
"StopTime": "stop_times",
"Calendar": "calendar",
"CalendarDate": "calendar_dates",
"FareAttribute": "fare_attributes",
"FareRule": "fare_rules",
"Timeframe": "timeframes",
"FareMedium": "fare_media",
"FareProduct": "fare_products",
"FareLegRule": "fare_leg_rules",
"FareTransferRule": "fare_transfer_rules",
"Area": "areas",
"StopArea": "stop_areas",
"Network": "networks",
"RouteNetwork": "route_networks",
"Shape": "shapes",
"Frequency": "frequencies",
"Transfer": "transfers",
"Pathway": "pathways",
"Level": "levels",
"LocationGroup": "location_groups",
"LocationGroupStop": "location_group_stops",
"LocationsGeojson": "locations.geojson",
"BookingRule": "booking_rules",
"Translation": "translations",
"FeedInfo": "feed_info",
"Attribution": "attributions"
}
def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str):
dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q}
g = pt_map.gtfs.GTFS(folder_path, dfs)
g.validate()
return g

View File

@ -2,153 +2,157 @@ import pandas as pd
import os import os
class GTFS: class GTFS:
def __init__(self, folder_path): def __init__(self, folder_path: str, dfs: list[list] = None):
self.folder_path = folder_path self.folder_path = folder_path
self.agency = self.Agency(self.folder_path) self.agency = self.Agency(self.folder_path, dfs)
self.stops = self.Stops(self.folder_path) self.stops = self.Stops(self.folder_path, dfs)
self.routes = self.Routes(self.folder_path) self.routes = self.Routes(self.folder_path, dfs)
self.trips = self.Trips(self.folder_path) self.trips = self.Trips(self.folder_path, dfs)
self.stop_times = self.StopTimes(self.folder_path) self.stop_times = self.StopTimes(self.folder_path, dfs)
self.calendar = self.Calendar(self.folder_path) self.calendar = self.Calendar(self.folder_path, dfs)
self.calendar_dates = self.CalendarDates(self.folder_path) self.calendar_dates = self.CalendarDates(self.folder_path, dfs)
self.fare_attributes = self.FareAttributes(self.folder_path) self.fare_attributes = self.FareAttributes(self.folder_path, dfs)
self.fare_rules = self.FareRules(self.folder_path) self.fare_rules = self.FareRules(self.folder_path, dfs)
self.timeframes = self.Timeframes(self.folder_path) self.timeframes = self.Timeframes(self.folder_path, dfs)
self.fare_media = self.FareMedia(self.folder_path) self.fare_media = self.FareMedia(self.folder_path, dfs)
self.fare_products = self.FareProducts(self.folder_path) self.fare_products = self.FareProducts(self.folder_path, dfs)
self.fare_leg_rules = self.FareLegRules(self.folder_path) self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs)
self.fare_transfer_rules = self.FareTransferRules(self.folder_path) self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs)
self.areas = self.Areas(self.folder_path) self.areas = self.Areas(self.folder_path, dfs)
self.stop_areas = self.StopAreas(self.folder_path) self.stop_areas = self.StopAreas(self.folder_path, dfs)
self.networks = self.Networks(self.folder_path) self.networks = self.Networks(self.folder_path, dfs)
self.route_networks = self.RouteNetworks(self.folder_path) self.route_networks = self.RouteNetworks(self.folder_path, dfs)
self.shapes = self.Shapes(self.folder_path) self.shapes = self.Shapes(self.folder_path, dfs)
self.frequencies = self.Frequencies(self.folder_path) self.frequencies = self.Frequencies(self.folder_path, dfs)
self.transfers = self.Transfers(self.folder_path) self.transfers = self.Transfers(self.folder_path, dfs)
self.pathways = self.Pathways(self.folder_path) self.pathways = self.Pathways(self.folder_path, dfs)
self.levels = self.Levels(self.folder_path) self.levels = self.Levels(self.folder_path, dfs)
self.location_groups = self.LocationGroups(self.folder_path) self.location_groups = self.LocationGroups(self.folder_path, dfs)
self.location_group_stops = self.LocationGroupStops(self.folder_path) self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs)
self.locations_geojson = self.LocationsGeojson(self.folder_path) self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs)
self.booking_rules = self.BookingRules(self.folder_path) self.booking_rules = self.BookingRules(self.folder_path, dfs)
self.translations = self.Translations(self.folder_path) self.translations = self.Translations(self.folder_path, dfs)
self.feed_info = self.FeedInfo(self.folder_path) self.feed_info = self.FeedInfo(self.folder_path, dfs)
self.attributions = self.Attributions(self.folder_path) self.attributions = self.Attributions(self.folder_path, dfs)
self.errors = [] self.errors = []
class GTFSFile: class GTFSFile:
def __init__(self, folder_path, file_name): def __init__(self, folder_path, file_name, dfs):
self.file_name = file_name
self.file_path = f"{folder_path}/{file_name}.txt" self.file_path = f"{folder_path}/{file_name}.txt"
self.data = self.load_data() self.data = self.load_data(dfs)
def load_data(self): def load_data(self, dfs):
if dfs:
return dfs[self.file_name]
else:
try: try:
return pd.read_csv(self.file_path) return pd.read_csv(self.file_path)
except FileNotFoundError: except FileNotFoundError:
return pd.DataFrame() return pd.DataFrame()
class Agency(GTFSFile): class Agency(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'agency') super().__init__(folder_path, 'agency', dfs)
class Stops(GTFSFile): class Stops(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stops') super().__init__(folder_path, 'stops', dfs)
class Routes(GTFSFile): class Routes(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'routes') super().__init__(folder_path, 'routes', dfs)
class Trips(GTFSFile): class Trips(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'trips') super().__init__(folder_path, 'trips', dfs)
class StopTimes(GTFSFile): class StopTimes(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stop_times') super().__init__(folder_path, 'stop_times', dfs)
class Calendar(GTFSFile): class Calendar(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'calendar') super().__init__(folder_path, 'calendar', dfs)
class CalendarDates(GTFSFile): class CalendarDates(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'calendar_dates') super().__init__(folder_path, 'calendar_dates', dfs)
class FareAttributes(GTFSFile): class FareAttributes(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_attributes') super().__init__(folder_path, 'fare_attributes', dfs)
class FareRules(GTFSFile): class FareRules(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_rules') super().__init__(folder_path, 'fare_rules', dfs)
class Timeframes(GTFSFile): class Timeframes(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'timeframes') super().__init__(folder_path, 'timeframes', dfs)
class FareMedia(GTFSFile): class FareMedia(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_media') super().__init__(folder_path, 'fare_media', dfs)
class FareProducts(GTFSFile): class FareProducts(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_products') super().__init__(folder_path, 'fare_products', dfs)
class FareLegRules(GTFSFile): class FareLegRules(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_leg_rules') super().__init__(folder_path, 'fare_leg_rules', dfs)
class FareTransferRules(GTFSFile): class FareTransferRules(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_transfer_rules') super().__init__(folder_path, 'fare_transfer_rules', dfs)
class Areas(GTFSFile): class Areas(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'areas') super().__init__(folder_path, 'areas', dfs)
class StopAreas(GTFSFile): class StopAreas(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stop_areas') super().__init__(folder_path, 'stop_areas', dfs)
class Networks(GTFSFile): class Networks(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'networks') super().__init__(folder_path, 'networks', dfs)
class RouteNetworks(GTFSFile): class RouteNetworks(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'route_networks') super().__init__(folder_path, 'route_networks', dfs)
class Shapes(GTFSFile): class Shapes(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'shapes') super().__init__(folder_path, 'shapes', dfs)
class Frequencies(GTFSFile): class Frequencies(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'frequencies') super().__init__(folder_path, 'frequencies', dfs)
class Transfers(GTFSFile): class Transfers(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'transfers') super().__init__(folder_path, 'transfers', dfs)
class Pathways(GTFSFile): class Pathways(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'pathways') super().__init__(folder_path, 'pathways', dfs)
class Levels(GTFSFile): class Levels(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'levels') super().__init__(folder_path, 'levels', dfs)
class LocationGroups(GTFSFile): class LocationGroups(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'location_groups') super().__init__(folder_path, 'location_groups', dfs)
class LocationGroupStops(GTFSFile): class LocationGroupStops(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'location_group_stops') super().__init__(folder_path, 'location_group_stops', dfs)
class LocationsGeojson(GTFSFile): class LocationsGeojson(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
self.file_path = f"{folder_path}/locations.geojson" self.file_path = f"{folder_path}/locations.geojson"
if os.path.exists(self.file_path): if os.path.exists(self.file_path):
self.data = self.load_data() self.data = self.load_data()
@ -162,20 +166,20 @@ class GTFS:
return pd.DataFrame() return pd.DataFrame()
class BookingRules(GTFSFile): class BookingRules(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'booking_rules') super().__init__(folder_path, 'booking_rules', dfs)
class Translations(GTFSFile): class Translations(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'translations') super().__init__(folder_path, 'translations', dfs)
class FeedInfo(GTFSFile): class FeedInfo(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'feed_info') super().__init__(folder_path, 'feed_info', dfs)
class Attributions(GTFSFile): class Attributions(GTFSFile):
def __init__(self, folder_path): def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'attributions') super().__init__(folder_path, 'attributions', dfs)
def get_files(self): def get_files(self):
return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)] return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)]
@ -200,37 +204,66 @@ class GTFS:
df.to_csv(fpath, index=False) df.to_csv(fpath, index=False)
def validate(self): def validate(self):
if not self.agency.data.empty:
self.validate_agency() self.validate_agency()
if not self.stops.data.empty:
self.validate_stops() self.validate_stops()
if not self.routes.data.empty:
self.validate_routes() self.validate_routes()
if not self.trips.data.empty:
self.validate_trips() self.validate_trips()
if not self.stop_times.data.empty:
self.validate_stop_times() self.validate_stop_times()
if not self.calendar.data.empty:
self.validate_calendar() self.validate_calendar()
if not self.calendar_dates.data.empty:
self.validate_calendar_dates() self.validate_calendar_dates()
if not self.fare_attributes.data.empty:
self.validate_fare_attributes() self.validate_fare_attributes()
if not self.fare_rules.data.empty:
self.validate_fare_rules() self.validate_fare_rules()
if not self.timeframes.data.empty:
self.validate_timeframes() self.validate_timeframes()
if not self.fare_media.data.empty:
self.validate_fare_media() self.validate_fare_media()
if not self.fare_products.data.empty:
self.validate_fare_products() self.validate_fare_products()
if not self.fare_leg_rules.data.empty:
self.validate_fare_leg_rules() self.validate_fare_leg_rules()
if not self.fare_transfer_rules.data.empty:
self.validate_fare_transfer_rules() self.validate_fare_transfer_rules()
if not self.areas.data.empty:
self.validate_areas() self.validate_areas()
if not self.stop_areas.data.empty:
self.validate_stop_areas() self.validate_stop_areas()
if not self.networks.data.empty:
self.validate_networks() self.validate_networks()
if not self.route_networks.data.empty:
self.validate_route_networks() self.validate_route_networks()
if not self.shapes.data.empty:
self.validate_shapes() self.validate_shapes()
if not self.frequencies.data.empty:
self.validate_frequencies() self.validate_frequencies()
if not self.transfers.data.empty:
self.validate_transfers() self.validate_transfers()
if not self.pathways.data.empty:
self.validate_pathways() self.validate_pathways()
if not self.levels.data.empty:
self.validate_levels() self.validate_levels()
if not self.location_groups.data.empty:
self.validate_location_groups() self.validate_location_groups()
if not self.location_group_stops.data.empty:
self.validate_location_group_stops() self.validate_location_group_stops()
if not self.locations_geojson.data.empty:
self.validate_locations_geojson() self.validate_locations_geojson()
if not self.booking_rules.data.empty:
self.validate_booking_rules() self.validate_booking_rules()
if not self.translations.data.empty:
self.validate_translations() self.validate_translations()
if not self.feed_info.data.empty:
self.validate_feed_info() self.validate_feed_info()
if not self.attributions.data.empty:
self.validate_attributions() self.validate_attributions()
self.validate_cross_references()
if not self.errors: if not self.errors:
return None return None
@ -435,10 +468,3 @@ class GTFS:
if df[['stop_lat', 'stop_lon']].isnull().any().any(): if df[['stop_lat', 'stop_lon']].isnull().any().any():
self.errors.append(f"stops.txt has missing lat/lon values.") self.errors.append(f"stops.txt has missing lat/lon values.")
def validate_cross_references(self):
# Example: Validate that all stop_ids in stop_times.txt exist in stops.txt
if not self.stop_times.data.empty and not self.stops.data.empty:
invalid_stops = set(self.stop_times.data['stop_id']) - set(self.stops.data['stop_id'])
if invalid_stops:
self.errors.append(f"stop_times.txt has invalid stop_ids: {invalid_stops}")