import pandas as pd class GTFS: def __init__(self, folder_path): self.folder_path = folder_path self.agency = self.Agency(self.folder_path) self.stops = self.Stops(self.folder_path) self.routes = self.Routes(self.folder_path) self.trips = self.Trips(self.folder_path) self.stop_times = self.StopTimes(self.folder_path) self.calendar = self.Calendar(self.folder_path) self.calendar_dates = self.CalendarDates(self.folder_path) self.fare_attributes = self.FareAttributes(self.folder_path) self.fare_rules = self.FareRules(self.folder_path) self.shapes = self.Shapes(self.folder_path) self.frequencies = self.Frequencies(self.folder_path) self.transfers = self.Transfers(self.folder_path) self.feed_info = self.FeedInfo(self.folder_path) self.errors = [] class GTFSFile: def __init__(self, folder_path, file_name): self.file_path = f"{folder_path}/{file_name}.txt" self.data = self.load_data() def load_data(self): try: return pd.read_csv(self.file_path) except FileNotFoundError: return None class Agency(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'agency') class Stops(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'stops') class Routes(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'routes') class Trips(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'trips') class StopTimes(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'stop_times') class Calendar(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'calendar') class CalendarDates(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'calendar_dates') class FareAttributes(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'fare_attributes') class FareRules(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'fare_rules') class Shapes(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'shapes') class Frequencies(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'frequencies') class Transfers(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'transfers') class FeedInfo(GTFSFile): def __init__(self, folder_path): super().__init__(folder_path, 'feed_info') def validate(self): self.validate_agency() self.validate_stops() self.validate_routes() self.validate_trips() self.validate_stop_times() self.validate_calendar() self.validate_cross_references() if not self.errors: return None else: return self.errors def validate_agency(self): required_fields = ["agency_id", "agency_name", "agency_url", "agency_timezone"] self.validate_required_fields(self.agency.data, required_fields, "agency.txt") def validate_stops(self): required_fields = ["stop_id", "stop_name", "stop_lat", "stop_lon"] self.validate_required_fields(self.stops.data, required_fields, "stops.txt") self.validate_lat_lon(self.stops.data) def validate_routes(self): required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"] self.validate_required_fields(self.routes.data, required_fields, "routes.txt") def validate_trips(self): required_fields = ["route_id", "service_id", "trip_id"] self.validate_required_fields(self.trips.data, required_fields, "trips.txt") def validate_stop_times(self): required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"] self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt") def validate_calendar(self): required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "start_date", "end_date"] self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt") def validate_required_fields(self, data, required_fields, filename): for field in required_fields: if field not in data.columns: self.errors.append(f"Error: {filename} missing required field: {field}") def validate_lat_lon(self, data): for index, row in data.iterrows(): if not (-90 <= row['stop_lat'] <= 90): self.errors.append(f"Error: stops.txt invalid latitude at row {index}: {row['stop_lat']}") if not (-180 <= row['stop_lon'] <= 180): self.errors.append(f"Error: stops.txt invalid longitude at row {index}: {row['stop_lon']}") def validate_cross_references(self): # Validate that trip_ids in stop_times.txt exist in trips.txt stop_times_trip_ids = set(self.stop_times.data['trip_id']) trips_trip_ids = set(self.trips.data['trip_id']) missing_trip_ids = stop_times_trip_ids - trips_trip_ids for trip_id in missing_trip_ids: self.errors.append(f"Error: trip_id {trip_id} in stop_times.txt does not exist in trips.txt") # Validate that stop_ids in stop_times.txt exist in stops.txt stop_times_stop_ids = set(self.stop_times.data['stop_id']) stops_stop_ids = set(self.stops.data['stop_id']) missing_stop_ids = stop_times_stop_ids - stops_stop_ids for stop_id in missing_stop_ids: self.errors.append(f"Error: stop_id {stop_id} in stop_times.txt does not exist in stops.txt")