From aeed247b265e1c827fa9ea1d6c992fd9c8691562f3dcd6a93a71c4f791de0127 Mon Sep 17 00:00:00 2001 From: Johannes Randerath Date: Sat, 1 Jun 2024 04:04:04 +0200 Subject: [PATCH] Added TODO List and a utility to convert a GTFS folder to a Pandas Dataframe and check the validity of the feed. --- TODO.md | 15 +++ bin/f2py | 8 ++ transport_accessibility/pt_map/gtfs.py | 149 +++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 TODO.md create mode 100755 bin/f2py create mode 100644 transport_accessibility/pt_map/gtfs.py diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..fdb57c7 --- /dev/null +++ b/TODO.md @@ -0,0 +1,15 @@ +## General +- Decide for a license +- Decide how to license produced data as the work of inserting the data and the data itself is not ours. Make the users agree on a FLOSS or CC license for the GTFS files produced from their data? What if someone doesn't own the data they upload? What if it's not free - How can we produce data in that case? ToS? + +## Frontend +- Add TODOs + +## Backend +- Process GTFS files for im- and export +- Serve data to views in an intuitive way. As an object of a custom class? +- Fetch data to serve to views +- Write data received from views +- Implement views serve data to the templates +- Handle requests corrrectly in views and urls +- Convert the data object <-> Pandas DataFrame diff --git a/bin/f2py b/bin/f2py new file mode 100755 index 0000000..3ec159f --- /dev/null +++ b/bin/f2py @@ -0,0 +1,8 @@ +#!/home/johannes/code/transport-accessibility/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/transport_accessibility/pt_map/gtfs.py b/transport_accessibility/pt_map/gtfs.py new file mode 100644 index 0000000..bc862e3 --- /dev/null +++ b/transport_accessibility/pt_map/gtfs.py @@ -0,0 +1,149 @@ +import pandas as pd + +class GTFS: + def __init__(self, folder_path): + self.folder_path = folder_path + self.agency = self.Agency(self.folder_path) + self.stops = self.Stops(self.folder_path) + self.routes = self.Routes(self.folder_path) + self.trips = self.Trips(self.folder_path) + self.stop_times = self.StopTimes(self.folder_path) + self.calendar = self.Calendar(self.folder_path) + self.calendar_dates = self.CalendarDates(self.folder_path) + self.fare_attributes = self.FareAttributes(self.folder_path) + self.fare_rules = self.FareRules(self.folder_path) + self.shapes = self.Shapes(self.folder_path) + self.frequencies = self.Frequencies(self.folder_path) + self.transfers = self.Transfers(self.folder_path) + self.feed_info = self.FeedInfo(self.folder_path) + self.errors = [] + + class GTFSFile: + def __init__(self, folder_path, file_name): + self.file_path = f"{folder_path}/{file_name}.txt" + self.data = self.load_data() + + def load_data(self): + try: + return pd.read_csv(self.file_path) + except FileNotFoundError: + return None + + class Agency(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'agency') + + class Stops(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'stops') + + class Routes(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'routes') + + class Trips(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'trips') + + class StopTimes(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'stop_times') + + class Calendar(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'calendar') + + class CalendarDates(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'calendar_dates') + + class FareAttributes(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'fare_attributes') + + class FareRules(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'fare_rules') + + class Shapes(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'shapes') + + class Frequencies(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'frequencies') + + class Transfers(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'transfers') + + class FeedInfo(GTFSFile): + def __init__(self, folder_path): + super().__init__(folder_path, 'feed_info') + + def validate(self): + self.validate_agency() + self.validate_stops() + self.validate_routes() + self.validate_trips() + self.validate_stop_times() + self.validate_calendar() + self.validate_cross_references() + + if not self.errors: + return None + else: + return self.errors + + def validate_agency(self): + required_fields = ["agency_id", "agency_name", "agency_url", "agency_timezone"] + self.validate_required_fields(self.agency.data, required_fields, "agency.txt") + + def validate_stops(self): + required_fields = ["stop_id", "stop_name", "stop_lat", "stop_lon"] + self.validate_required_fields(self.stops.data, required_fields, "stops.txt") + self.validate_lat_lon(self.stops.data) + + def validate_routes(self): + required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"] + self.validate_required_fields(self.routes.data, required_fields, "routes.txt") + + def validate_trips(self): + required_fields = ["route_id", "service_id", "trip_id"] + self.validate_required_fields(self.trips.data, required_fields, "trips.txt") + + def validate_stop_times(self): + required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"] + self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt") + + def validate_calendar(self): + required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "start_date", "end_date"] + self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt") + + def validate_required_fields(self, data, required_fields, filename): + for field in required_fields: + if field not in data.columns: + self.errors.append(f"Error: {filename} missing required field: {field}") + + def validate_lat_lon(self, data): + for index, row in data.iterrows(): + if not (-90 <= row['stop_lat'] <= 90): + self.errors.append(f"Error: stops.txt invalid latitude at row {index}: {row['stop_lat']}") + if not (-180 <= row['stop_lon'] <= 180): + self.errors.append(f"Error: stops.txt invalid longitude at row {index}: {row['stop_lon']}") + + def validate_cross_references(self): + # Validate that trip_ids in stop_times.txt exist in trips.txt + stop_times_trip_ids = set(self.stop_times.data['trip_id']) + trips_trip_ids = set(self.trips.data['trip_id']) + missing_trip_ids = stop_times_trip_ids - trips_trip_ids + for trip_id in missing_trip_ids: + self.errors.append(f"Error: trip_id {trip_id} in stop_times.txt does not exist in trips.txt") + + # Validate that stop_ids in stop_times.txt exist in stops.txt + stop_times_stop_ids = set(self.stop_times.data['stop_id']) + stops_stop_ids = set(self.stops.data['stop_id']) + missing_stop_ids = stop_times_stop_ids - stops_stop_ids + for stop_id in missing_stop_ids: + self.errors.append(f"Error: stop_id {stop_id} in stop_times.txt does not exist in stops.txt") +