diff --git a/transport_accessibility/pt_map/bridge.py b/transport_accessibility/pt_map/bridge.py deleted file mode 100644 index 5747fe2..0000000 --- a/transport_accessibility/pt_map/bridge.py +++ /dev/null @@ -1,240 +0,0 @@ -""" -Bridge -====== -Bridge between Django with its models and database and gtfs.GTFS as intermediate object for File IO. - -Contents --------- -Constants ---------- -reversed_file_mapping : dict(str,str) - Map CamelCased filenames to '_'-separated -class_names : dict{str,str} - Map CamelCase, singularized class names to pluralized, snake_cased file names -primary_keys : dict{str, (str or None)} - For all pt_map.models, map primary keys if applicable -foreign_keys - For all pt_map.models, map foreign keys if any. Also ordered for model creation without foreign reference conflicts. -time_delta : int - Unix time for Jan 1, 2024. To be used to calculate time prefix strings. - -Functions ---------- -to_camel_case(s): - Converts '_'-separated str to CamelCase with capital first letter - -standardize_time(time_str): - Converts str in unicode time format to %H:%M:%S format with normalized 24 hour time - -is_NaN(v): - Checks if given variable is either a str expressing NaN or NaN as object - -stdz(v): - Standardize date and time formats - -gtfs_to_db(g): - Write an existing gtfs.GTFS object to the database using the GTFS compliant models - -db_to_gtfs(q, folder_path): - Convert list of query sets to gtfs.GTFS object and write to specified folder if validation for GTFS compliance passes. -""" -import pt_map.gtfs -import pt_map.models -import pandas as pd -from pattern.text.en import singularize, pluralize -import math -import numbers -import email.utils -import time -import datetime -import django.db.models -import time -from pt_map.gtfs_schema import gtfs_schema -from .class_names import * - - -def toCamelCase(s: str): - """ - Convert '_'-separated str to CamelCase with the first letter capitalized. - - Parameters - ---------- - s : str - '_'-separated string - - Returns - ------- - str - CamelCased str, first letter capitalized - """ - return ''.join(word.capitalize() for word in s.split('_')) - -def standardize_time(time_str: str): - """ - Convert time str to standardized %H:%M:%S format. - - Parameters - ---------- - time_str: str - str encoding time - - Returns - ------- - str in format '%H:%M:%S' - """ - date_str = f"Jan 19, 1999 {time_str}" - ntuple=email.utils.parsedate(date_str) - timestamp=time.mktime(ntuple) - date=datetime.datetime.fromtimestamp(timestamp) - return date.strftime('%H:%M:%S') - - -def is_NaN(v): - """ - Returns - ------- - True - If v is either a str representing NaN or NaN as an object - False - Otherwise - """ - return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v)) - -def stdz(v, m: django.db.models.Model, f: str): - """ - If f is a time or date field, convert to a format our db can easily work with. - If f is a foreign key - - Parameters - ---------- - v : object - object to be standardized - m : django.db.models.Model - model to be written to - f : str - field name in question - - Returns - ------- - Converted str - If m.f is a DateField or a TimeField - Unchanged str - Otherwise - """ - if m._meta.get_field(f).get_internal_type() == 'DateField': - return str(v) - if m._meta.get_field(f).get_internal_type() == 'TimeField': - return standardize_time(v) - if m._meta.get_field(f).get_internal_type() == 'ForeignKey': - pass - return v - -def to_snake_case(name): - """ - Convert CamelCase to snake_case. - - Parameters - ---------- - name : str - str in CamelCase - - Returns - ------- - Str in snake_case - """ - name = name[0].lower() + name[1:] - for c in name[1:]: - if c.isupper(): - name.insert(i,'_') - else: - c.lower() - return name - -def unqfk(ts, fk): - """ - Primary keys of imported data and in the database are likely to overlap. To avoid this, the current time in seconds since Jan 1, 2024 is added as a prefix. - Foreign key references must know of this new key so they are processed in the same way. To make this possible, we use the same time in seconds for all objects. - - Parameters - ---------- - ts : str - time in seconds to be prepended - fk : primary or foreign key to be processed. - - Returns - ------- - Str with prefix - """ - if not isinstance(fk, str): - fk = str(int(fk)) - return f"{ts}{fk}".strip() - -def gtfs_to_db(g: pt_map.gtfs.GTFS): - """ - Given a gtfs.GTFS object, write GTFS-compliantly to db by creating the correct models - - Parameters - ---------- - g : gtfs.GTFS - GTFS object to be saved to db - """ - feed_id = 0 - #model = (pt_map.models.Trip, [(pt_map.models.Route, 'route_id'), (pt_map.models.Shape, 'shape_id'), ]) - ts = str(int(time.time())-time_delta) # Prepend the current time in seconds since Jan 1, 2024 to ids to make them more or less unique - #if model == pt_map.models.Shape: - for model in foreign_keys: - m = model[0] - df = getattr(g, reversed_file_mapping[m.__name__]).data # Extract dataframe for each model from gtfs.GTFS object - if not df.empty: # Only process GTFS files actually present - if primary_keys[m] in df.keys() and not m == pt_map.models.FeedInfo: - #df[primary_keys[m]] = df[primary_keys[m]].astype(str) - df[primary_keys[m]] = [f"{feed_id.feed_id}_{pk}" for pk in df[primary_keys[m]]] - elif not m == pt_map.models.FeedInfo: - df[primary_keys[m]] = [f"{feed_id.feed_id}_{pk}" for pk in range(1, df.index.size + 1)] - v = gtfs_schema[reversed_file_mapping[m.__name__]] # field names - for _, row in df.iterrows(): # the rows of the dataframe are the individual entries in the GTFS file and should be the individual instances of the db model - for fk in model[1]: # Map foreign_keys to objects of the foreign model - if row.get(fk[1]): - row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: f"{feed_id.feed_id}_{row[fk[1]]}"})#unqfk(ts, row[fk[1]])}) - defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} # dict of fields and values of current model object to create - o = None - if not m == pt_map.models.FeedInfo: - defaults["feed_info_id"] = feed_id - if primary_keys[m] in df.keys(): - #row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) # primary_keys should be unique, use current time in seconds as a prefix - #defaults[primary_keys[m]] = row[primary_keys[m]] - try: - o = m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) # Make sure there is no object with identical primary_key, exception is expected to be risen - except m.DoesNotExist: - o = m.objects.update_or_create( - defaults = defaults, - **{primary_keys[m]: row[primary_keys[m]]} - ) - else: - o = m.objects.create(**defaults) - if m == pt_map.models.FeedInfo: - feed_id = o - - - -def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str = ""): - """ - Convert given list of query sets to gtfs.GTFS object - - Parameters - ---------- - q : list[django.db.models.query.QuerySet] - List of QuerySets containing the retrieved data to be Converted - folder_path : str - path to be set as the results folder_path instance variable - - Returns - ------- - gtfs.GTFS - object containing the queried data - """ - dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q} - dfs = {key: dfs[key].astype({col: pd.Timestamp for col in dfs[key].columns if isinstance(getattr(getattr(pt_map.models, {v:k for k,v in reversed_file_mapping.items()}[key]), col), django.db.models.DateField)}) for key in dfs.keys()} - g = pt_map.gtfs.GTFS(folder_path, dfs) - g.validate() - return g diff --git a/transport_accessibility/pt_map/gtfs.py b/transport_accessibility/pt_map/gtfs.py deleted file mode 100644 index 132300b..0000000 --- a/transport_accessibility/pt_map/gtfs.py +++ /dev/null @@ -1,816 +0,0 @@ -import pandas as pd -import os - - - -class GTFS: - """ - DataFrame based representation of the GTFS standard, able to read folder of GTFS files, validate a GTFS object for accordance with the standard and write its data to a GTFS folder. - - Attributes - ---------- - folder_path : str - Path to folder where the data is read from and/or to be written to - agency, stops, routes, trips, stop_times, calendar, calendar_dates, fare_attributes, fare_rules, timeframes, fare_media, fare_products, fare_leg_rules, fare_transfer_rules, areas, stop_areas, networks, route_networks, shapes, frequencies, transfers, pathways, levels, location_groups, location_group_stops, locations_geojson, booking_rules, translations, feed_info, attributions : GTFSFile - Objects representing the data in the corresponding .txt/.geojson files in the GTFS Reference. - errors: list[str] - Human readable messages explaining why a validation failed if it did. - - Methods - ------- - get_files(): - Return all member objects of type GTFSFile - get_fields(name): - Return all fields present in a given instance of a GTFSFile - export(path, dirname): - Save all GTFS data represented by the current instance as a folder of files corresponding to the GTFS Reference. - validate(): - For all GTFSFile member objects, validate if they individually conmply with GTFS. - validate_required_fields(df, required_fields): - Check if a DataFrame contains all required fields according to the GTFS reference for the file it represents. - validate_optional_fields(df, optional_fields): - Check if a DataFrame does not contain any unexpected fields, not compliant with the GTFS reference for the file it represents. - validate_lat_lon(df): - Check if a Stop is correctly mapped using geographical coordinates. - """ - - - def __init__(self, folder_path: str = "", dfs: dict[str, pd.DataFrame] = None): - """ - Parameters - ---------- - folder_path : str - Path of the folder to read GTFS data from or potentially write it to when export() is called. Defaults to an empty str. - dfs : dict[str : pd.DataFrame] - DataFrames containing the data to be represented by this object as values, corresponding GTFSFile.file_names as keys. - - Raises - ------ - TypeError - If neither folder_path nor dfs is provided - If folder_path is not a valid str or dfs is not a dict of DataFrames - ValueError - If folder_path is not a well formatted path - """ - self.folder_path = folder_path - self.agency = self.Agency(self.folder_path, dfs) - self.stops = self.Stops(self.folder_path, dfs) - self.routes = self.Routes(self.folder_path, dfs) - self.trips = self.Trips(self.folder_path, dfs) - self.stop_times = self.StopTimes(self.folder_path, dfs) - self.calendar = self.Calendar(self.folder_path, dfs) - self.calendar_dates = self.CalendarDates(self.folder_path, dfs) - self.fare_attributes = self.FareAttributes(self.folder_path, dfs) - self.fare_rules = self.FareRules(self.folder_path, dfs) - self.timeframes = self.Timeframes(self.folder_path, dfs) - self.fare_media = self.FareMedia(self.folder_path, dfs) - self.fare_products = self.FareProducts(self.folder_path, dfs) - self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs) - self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs) - self.areas = self.Areas(self.folder_path, dfs) - self.stop_areas = self.StopAreas(self.folder_path, dfs) - self.networks = self.Networks(self.folder_path, dfs) - self.route_networks = self.RouteNetworks(self.folder_path, dfs) - self.shapes = self.Shapes(self.folder_path, dfs) - self.frequencies = self.Frequencies(self.folder_path, dfs) - self.transfers = self.Transfers(self.folder_path, dfs) - self.pathways = self.Pathways(self.folder_path, dfs) - self.levels = self.Levels(self.folder_path, dfs) - self.location_groups = self.LocationGroups(self.folder_path, dfs) - self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs) - self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs) - self.booking_rules = self.BookingRules(self.folder_path, dfs) - self.translations = self.Translations(self.folder_path, dfs) - self.feed_info = self.FeedInfo(self.folder_path, dfs) - self.attributions = self.Attributions(self.folder_path, dfs) - self.errors = [] - - class GTFSFile: - """ - All given fields and their corresponding values are stored as a DataFrame. - - Attributes - ---------- - file_name : str - Extension-less name of the corresponding .txt file from the GTFS Reference - folder_path : str - Folder to read data from or potentially write it to - data : pd.DataFrame - All csv data from the corresponding .txt file represented as a Pandas DataFrame - - Methods - ------- - load_data(dfs): - Load data from list of DataFrames if given else read it from the corresponding .txt file in csv format. - """ - def __init__(self, folder_path, file_name, dfs): - """ - Parameters - ---------- - folder_path : str - Where to read GTFS files from or write it to - file_name : str - Name of the .txt file without the .txt Extension - dfs : dict[str, pd.DataFrame] - If given, data variable is set as corresponding DataFrame in this dict - If not, data is read from the csv - """ - self.file_name = file_name - self.file_path = f"{folder_path}/{file_name}.txt" - self.data = self.load_data(dfs) - - def load_data(self, dfs): - """ - Fill the data attribute with GTFS data either with a given DataFrame or from the corresponding csv - - Parameters - ---------- - dfs : dict[str, pd.DataFrame] - Dict of dataframes mapped to the corresponding file names. If given, the corresponding DataFrame is returned if the key exists else an empty DataFrame - """ - if dfs: - return dfs[self.file_name] if self.file_name in dfs.keys() else pd.DataFrame() - else: - try: - return pd.read_csv(self.file_path) - except FileNotFoundError: - return pd.DataFrame() - - class Agency(GTFSFile): - """ - Represents agency.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'agency', dfs) - - class Stops(GTFSFile): - """ - Represents stops.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'stops', dfs) - - class Routes(GTFSFile): - """ - Represents routes.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'routes', dfs) - - class Trips(GTFSFile): - """ - Represents trips.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'trips', dfs) - - class StopTimes(GTFSFile): - """ - Represents stop_times.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'stop_times', dfs) - - class Calendar(GTFSFile): - """ - Represents calendar.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'calendar', dfs) - - class CalendarDates(GTFSFile): - """ - Represents calendar_dates.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'calendar_dates', dfs) - - class FareAttributes(GTFSFile): - """ - Represents fare_attributes.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'fare_attributes', dfs) - - class FareRules(GTFSFile): - """ - Represents fare_rules.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'fare_rules', dfs) - - class Timeframes(GTFSFile): - """ - Represents timeframes.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'timeframes', dfs) - - class FareMedia(GTFSFile): - """ - Represents fare_media.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'fare_media', dfs) - - class FareProducts(GTFSFile): - """ - Represents fare_products.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'fare_products', dfs) - - class FareLegRules(GTFSFile): - """ - Represents fare_leg_rules.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'fare_leg_rules', dfs) - - class FareTransferRules(GTFSFile): - """ - Represents fare_transfer_rules.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'fare_transfer_rules', dfs) - - class Areas(GTFSFile): - """ - Represents areas.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'areas', dfs) - - class StopAreas(GTFSFile): - """ - Represents stop_areas.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'stop_areas', dfs) - - class Networks(GTFSFile): - """ - Represents networks.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'networks', dfs) - - class RouteNetworks(GTFSFile): - """ - Represents route_networks.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'route_networks', dfs) - - class Shapes(GTFSFile): - """ - Represents shapes.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'shapes', dfs) - - class Frequencies(GTFSFile): - """ - Represents frequencies.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'frequencies', dfs) - - class Transfers(GTFSFile): - """ - Represents transfers.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'transfers', dfs) - - class Pathways(GTFSFile): - """ - Represents pathways.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'pathways', dfs) - - class Levels(GTFSFile): - """ - Represents levels.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'levels', dfs) - - class LocationGroups(GTFSFile): - """ - Represents location_groups.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'location_groups', dfs) - - class LocationGroupStops(GTFSFile): - """ - Represents location_group_stops.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'location_group_stops', dfs) - - class LocationsGeojson(GTFSFile): - """ - Represents locations.geojson from the GTFS reference - """ - def __init__(self, folder_path, dfs): - self.file_path = f"{folder_path}/locations.geojson" - if os.path.exists(self.file_path): - self.data = self.load_data() - else: - self.data = pd.DataFrame() - - def load_data(self): - try: - return pd.read_json(self.file_path) - except ValueError: - return pd.DataFrame() - - class BookingRules(GTFSFile): - """ - Represents booking_rules.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'booking_rules', dfs) - - class Translations(GTFSFile): - """ - Represents translations.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'translations', dfs) - - class FeedInfo(GTFSFile): - """ - Represents feed_info.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'feed_info', dfs) - - class Attributions(GTFSFile): - """ - Represents attributions.txt from the GTFS reference - """ - def __init__(self, folder_path, dfs): - super().__init__(folder_path, 'attributions', dfs) - - def get_files(self): - """ - Get all GTFSFile object - - Returns - ------- - list[GTFSFile] - All member objects of type GTFSFile - """ - return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)] - - def get_fields(self, name): - """ - Given the name of a file specified in the GTFS specification, return all fields present in the data. - - Parameters - ---------- - name : str - name of a file as specified by GTFS (ommiting the .txt/.geojson extension) - - Returns - ------- - list[pd.core.indexes.range.RangeIndex] - a list of all the fields present in the datastream of the specified file - """ - file = getattr(self, name) - if not file: - return None - return list(file.data.columns) - - def export(self, path = None, dirname = ""): - """ - Save this object's data to files as specified by GTFS. - - Parameters - ---------- - path : str - parent directory where to save the files, defaults to the objects folder_path property - dirname : str - If specified, subdirectory to create or use inside path. Default behaviour is to save directly to path. - """ - if not path: - path = self.folder_path - else: - path = f"{os.path.normpath(path)}/{dirname}" - if not os.path.exists(path): - os.mkdir(path) - for name in self.get_files(): - df = getattr(self, name).data - df = df.astype({col: 'int8' for col in df.columns if df[col].dtype == 'bool'}) - fpath = f"{path}/{name}.txt" - if name == 'locations_geojson': - fpath = f"{path}/{name}.geojson" - df.to_json(fpath) - else: - df.to_csv(fpath, date_format='%Y%m%d', index=False) - - def validate(self): - """ - Check this object's data for compliance with the GTFS reference. Resets self.errors and stores human readable error messages to it. - - Returns - ------- - list[str] - List of human readable error messages, also saved to self.errors, if any, else None. - """ - self.error = [] - if not self.agency.data.empty: - self.validate_agency() - if not self.stops.data.empty: - self.validate_stops() - if not self.routes.data.empty: - self.validate_routes() - if not self.trips.data.empty: - self.validate_trips() - if not self.stop_times.data.empty: - self.validate_stop_times() - if not self.calendar.data.empty: - self.validate_calendar() - if not self.calendar_dates.data.empty: - self.validate_calendar_dates() - if not self.fare_attributes.data.empty: - self.validate_fare_attributes() - if not self.fare_rules.data.empty: - self.validate_fare_rules() - if not self.timeframes.data.empty: - self.validate_timeframes() - if not self.fare_media.data.empty: - self.validate_fare_media() - if not self.fare_products.data.empty: - self.validate_fare_products() - if not self.fare_leg_rules.data.empty: - self.validate_fare_leg_rules() - if not self.fare_transfer_rules.data.empty: - self.validate_fare_transfer_rules() - if not self.areas.data.empty: - self.validate_areas() - if not self.stop_areas.data.empty: - self.validate_stop_areas() - if not self.networks.data.empty: - self.validate_networks() - if not self.route_networks.data.empty: - self.validate_route_networks() - if not self.shapes.data.empty: - self.validate_shapes() - if not self.frequencies.data.empty: - self.validate_frequencies() - if not self.transfers.data.empty: - self.validate_transfers() - if not self.pathways.data.empty: - self.validate_pathways() - if not self.levels.data.empty: - self.validate_levels() - if not self.location_groups.data.empty: - self.validate_location_groups() - if not self.location_group_stops.data.empty: - self.validate_location_group_stops() - if not self.locations_geojson.data.empty: - self.validate_locations_geojson() - if not self.booking_rules.data.empty: - self.validate_booking_rules() - if not self.translations.data.empty: - self.validate_translations() - if not self.feed_info.data.empty: - self.validate_feed_info() - if not self.attributions.data.empty: - self.validate_attributions() - - if not self.errors: - return None - else: - return self.errors - - def validate_agency(self): - """ - Check Agency object for compliance with the GTFS reference. - """ - required_fields = ["agency_name", "agency_url", "agency_timezone"] - optional_fields = ["agency_id", "agency_lang", "agency_phone", "agency_fare_url", "agency_email"] - self.validate_required_fields(self.agency.data, required_fields, "agency.txt") - self.validate_optional_fields(self.agency.data, optional_fields, "agency.txt") - - def validate_stops(self): - """ - Check Stops object for compliance with the GTFS reference. - """ - required_fields = ["stop_id", "stop_name"] - optional_fields = ["stop_code", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url", - "location_type", "parent_station", "stop_timezone", "wheelchair_boarding", - "level_id", "platform_code"] - self.validate_required_fields(self.stops.data, required_fields, "stops.txt") - self.validate_optional_fields(self.stops.data, optional_fields, "stops.txt") - self.validate_lat_lon(self.stops.data) - - def validate_routes(self): - """ - Check Routes object for compliance with the GTFS reference. - """ - required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"] - optional_fields = ["agency_id", "route_desc", "route_url", "route_color", "route_text_color", - "route_sort_order", "continuous_pickup", "continuous_drop_off"] - self.validate_required_fields(self.routes.data, required_fields, "routes.txt") - self.validate_optional_fields(self.routes.data, optional_fields, "routes.txt") - - def validate_trips(self): - """ - Check Trips object for compliance with the GTFS reference. - """ - required_fields = ["route_id", "service_id", "trip_id"] - optional_fields = ["trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id", - "wheelchair_accessible", "bikes_allowed"] - self.validate_required_fields(self.trips.data, required_fields, "trips.txt") - self.validate_optional_fields(self.trips.data, optional_fields, "trips.txt") - - def validate_stop_times(self): - """ - Check StopTimes object for compliance with the GTFS reference. - """ - required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"] - optional_fields = ["stop_headsign", "pickup_type", "drop_off_type", "shape_dist_traveled", - "timepoint"] - self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt") - self.validate_optional_fields(self.stop_times.data, optional_fields, "stop_times.txt") - - def validate_calendar(self): - """ - Check Calendar object for compliance with the GTFS reference. - """ - required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday", - "saturday", "sunday", "start_date", "end_date"] - self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt") - - def validate_calendar_dates(self): - """ - Check CalendarDates object for compliance with the GTFS reference. - """ - required_fields = ["service_id", "date", "exception_type"] - self.validate_required_fields(self.calendar_dates.data, required_fields, "calendar_dates.txt") - - def validate_fare_attributes(self): - """ - Check FareAttributes object for compliance with the GTFS reference. - """ - required_fields = ["fare_id", "price", "currency_type", "payment_method", "transfers"] - optional_fields = ["agency_id", "transfer_duration"] - self.validate_required_fields(self.fare_attributes.data, required_fields, "fare_attributes.txt") - self.validate_optional_fields(self.fare_attributes.data, optional_fields, "fare_attributes.txt") - - def validate_fare_rules(self): - """ - Check FareRules object for compliance with the GTFS reference. - """ - required_fields = ["fare_id"] - optional_fields = ["route_id", "origin_id", "destination_id", "contains_id"] - self.validate_required_fields(self.fare_rules.data, required_fields, "fare_rules.txt") - self.validate_optional_fields(self.fare_rules.data, optional_fields, "fare_rules.txt") - - def validate_timeframes(self): - """ - Check Timeframes object for compliance with the GTFS reference. - """ - required_fields = ["timeframe_id", "start_time", "end_time"] - optional_fields = ["timeframe_name", "timeframe_desc"] - self.validate_required_fields(self.timeframes.data, required_fields, "timeframes.txt") - self.validate_optional_fields(self.timeframes.data, optional_fields, "timeframes.txt") - - def validate_fare_media(self): - """ - Check FareMedia object for compliance with the GTFS reference. - """ - required_fields = ["media_id", "media_name", "media_type"] - optional_fields = ["media_desc"] - self.validate_required_fields(self.fare_media.data, required_fields, "fare_media.txt") - self.validate_optional_fields(self.fare_media.data, optional_fields, "fare_media.txt") - - def validate_fare_products(self): - """ - Check FareProducts object for compliance with the GTFS reference. - """ - required_fields = ["product_id", "product_name", "product_type", "product_price", "currency"] - optional_fields = ["product_desc"] - self.validate_required_fields(self.fare_products.data, required_fields, "fare_products.txt") - self.validate_optional_fields(self.fare_products.data, optional_fields, "fare_products.txt") - - def validate_fare_leg_rules(self): - """ - Check FareLegRules object for compliance with the GTFS reference. - """ - required_fields = ["leg_id", "from_stop_id", "to_stop_id"] - optional_fields = ["leg_desc"] - self.validate_required_fields(self.fare_leg_rules.data, required_fields, "fare_leg_rules.txt") - self.validate_optional_fields(self.fare_leg_rules.data, optional_fields, "fare_leg_rules.txt") - - def validate_fare_transfer_rules(self): - """ - Check FareTransferRules object for compliance with the GTFS reference. - """ - required_fields = ["from_leg_id", "to_leg_id", "transfer_type"] - optional_fields = ["transfer_time"] - self.validate_required_fields(self.fare_transfer_rules.data, required_fields, "fare_transfer_rules.txt") - self.validate_optional_fields(self.fare_transfer_rules.data, optional_fields, "fare_transfer_rules.txt") - - def validate_areas(self): - """ - Check Areas object for compliance with the GTFS reference. - """ - required_fields = ["area_id", "area_name"] - optional_fields = ["area_desc"] - self.validate_required_fields(self.areas.data, required_fields, "areas.txt") - self.validate_optional_fields(self.areas.data, optional_fields, "areas.txt") - - def validate_stop_areas(self): - """ - Check StopAreas object for compliance with the GTFS reference. - """ - required_fields = ["stop_id", "area_id"] - optional_fields = [] - self.validate_required_fields(self.stop_areas.data, required_fields, "stop_areas.txt") - self.validate_optional_fields(self.stop_areas.data, optional_fields, "stop_areas.txt") - - def validate_networks(self): - """ - Check Networks object for compliance with the GTFS reference. - """ - required_fields = ["network_id", "network_name"] - optional_fields = ["network_desc"] - self.validate_required_fields(self.networks.data, required_fields, "networks.txt") - self.validate_optional_fields(self.networks.data, optional_fields, "networks.txt") - - def validate_route_networks(self): - """ - Check RouteNetworks object for compliance with the GTFS reference. - """ - required_fields = ["route_id", "network_id"] - optional_fields = [] - self.validate_required_fields(self.route_networks.data, required_fields, "route_networks.txt") - self.validate_optional_fields(self.route_networks.data, optional_fields, "route_networks.txt") - - def validate_shapes(self): - """ - Check Shapes object for compliance with the GTFS reference. - """ - required_fields = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"] - optional_fields = ["shape_dist_traveled"] - self.validate_required_fields(self.shapes.data, required_fields, "shapes.txt") - self.validate_optional_fields(self.shapes.data, optional_fields, "shapes.txt") - - def validate_frequencies(self): - """ - Check Frequencies object for compliance with the GTFS reference. - """ - required_fields = ["trip_id", "start_time", "end_time", "headway_secs"] - optional_fields = ["exact_times"] - self.validate_required_fields(self.frequencies.data, required_fields, "frequencies.txt") - self.validate_optional_fields(self.frequencies.data, optional_fields, "frequencies.txt") - - def validate_transfers(self): - """ - Check Transfers object for compliance with the GTFS reference. - """ - required_fields = ["from_stop_id", "to_stop_id", "transfer_type"] - optional_fields = ["min_transfer_time"] - self.validate_required_fields(self.transfers.data, required_fields, "transfers.txt") - self.validate_optional_fields(self.transfers.data, optional_fields, "transfers.txt") - - def validate_pathways(self): - """ - Check Pathways object for compliance with the GTFS reference. - """ - required_fields = ["pathway_id", "from_stop_id", "to_stop_id", "pathway_mode", "is_bidirectional"] - optional_fields = ["length", "traversal_time", "stair_count", "max_slope", "min_width", "signposted_as", "reversed_signposted_as"] - self.validate_required_fields(self.pathways.data, required_fields, "pathways.txt") - self.validate_optional_fields(self.pathways.data, optional_fields, "pathways.txt") - - def validate_levels(self): - """ - Check Levels object for compliance with the GTFS reference. - """ - required_fields = ["level_id", "level_index"] - optional_fields = ["level_name"] - self.validate_required_fields(self.levels.data, required_fields, "levels.txt") - self.validate_optional_fields(self.levels.data, optional_fields, "levels.txt") - - def validate_location_groups(self): - """ - Check Agency LocationGroups for compliance with the GTFS reference. - """ - required_fields = ["location_group_id", "location_group_name"] - optional_fields = ["location_group_desc"] - self.validate_required_fields(self.location_groups.data, required_fields, "location_groups.txt") - self.validate_optional_fields(self.location_groups.data, optional_fields, "location_groups.txt") - - def validate_location_group_stops(self): - """ - Check LocationGroupStops object for compliance with the GTFS reference. - """ - required_fields = ["location_group_id", "stop_id"] - optional_fields = [] - self.validate_required_fields(self.location_group_stops.data, required_fields, "location_group_stops.txt") - self.validate_optional_fields(self.location_group_stops.data, optional_fields, "location_group_stops.txt") - - def validate_locations_geojson(self): - """ - Check LocationsGeojson object for compliance with the GTFS reference. - """ - required_fields = ["type", "features"] - optional_fields = [] - self.validate_required_fields(self.locations_geojson.data, required_fields, "locations.geojson") - self.validate_optional_fields(self.locations_geojson.data, optional_fields, "locations.geojson") - - def validate_booking_rules(self): - """ - Check BookingRules object for compliance with the GTFS reference. - """ - required_fields = ["booking_rule_id"] - optional_fields = ["booking_rule_name", "booking_rule_desc"] - self.validate_required_fields(self.booking_rules.data, required_fields, "booking_rules.txt") - self.validate_optional_fields(self.booking_rules.data, optional_fields, "booking_rules.txt") - - def validate_translations(self): - """ - Check Translations object for compliance with the GTFS reference. - """ - required_fields = ["table_name", "field_name", "language", "translation"] - optional_fields = ["record_id", "record_sub_id", "field_value"] - self.validate_required_fields(self.translations.data, required_fields, "translations.txt") - self.validate_optional_fields(self.translations.data, optional_fields, "translations.txt") - - def validate_feed_info(self): - """ - Check FeedInfo object for compliance with the GTFS reference. - """ - required_fields = ["feed_publisher_name", "feed_publisher_url", "feed_lang"] - optional_fields = ["feed_start_date", "feed_end_date", "feed_version"] - self.validate_required_fields(self.feed_info.data, required_fields, "feed_info.txt") - self.validate_optional_fields(self.feed_info.data, optional_fields, "feed_info.txt") - - def validate_attributions(self): - """ - Check Attributions object for compliance with the GTFS reference. - """ - required_fields = ["attribution_id"] - optional_fields = ["agency_id", "route_id", "trip_id", "organization_name", "is_producer", "is_operator", "is_authority", "attribution_url", "attribution_email", "attribution_phone"] - self.validate_required_fields(self.attributions.data, required_fields, "attributions.txt") - self.validate_optional_fields(self.attributions.data, optional_fields, "attributions.txt") - - def validate_required_fields(self, df, required_fields, file_name): - """ - Check if a DataFrame contains all required fields according to the GTFS reference for the file it represents. - - Parameters - ---------- - df : pd.DataFrame - DataFrame to be checked - required_fields : list[str] - list of field names to check for inclusion in df - file_name : str - Name to be printed in error message if validation fails - """ - missing_fields = set(required_fields) - set(df.columns) - if missing_fields: - self.errors.append(f"{file_name} is missing required fields: {missing_fields}") - - def validate_optional_fields(self, df, optional_fields, file_name): - """ - Check if DataFrame contains fields not specified in the GTFS Reference. - - Parameters - ---------- - df : pd.DataFrame - DataFrame to be checked - optional_fields: list[str] - list of field names allowed in df - file_name : str - Name to be printed in error message if validation fails - - """ - unexpected_fields = set(df.columns) - set(optional_fields) - set(df.columns) - if unexpected_fields: - self.errors.append(f"{file_name} has unexpected fields: {unexpected_fields}") - - def validate_lat_lon(self, df, file_name="stops.txt", prefix="stop"): - """ - Check if a DataFrame contains either no coordinates of a given prefix or both latitude and longitude. - - Parameters - ---------- - df : pd.DataFrame - DataFrame to be checked - file_name : str - Name to be printed in error message if validation fails - prefix : str - Prefix to be used for coordinate fileds. Expands to {prefix}_lat and {prefix}_lon - """ - if f"{prefix}_lat" in df.columns and f"{prefix}_lon" in df.columns: - if df[[f"{prefix}_lat", f"{prefix}_lon"]].isnull().any().any(): - self.errors.append(f"{file_name} has missing lat/lon values.") - diff --git a/transport_accessibility/test.py b/transport_accessibility/test.py deleted file mode 100644 index 56efe13..0000000 --- a/transport_accessibility/test.py +++ /dev/null @@ -1,10 +0,0 @@ -import pt_map.models -import api.io -import inspect -#api.io.models_csv("/home/johannes/Downloads/torun") -q = [m.objects.all() for _,m in inspect.getmembers(pt_map.models, inspect.isclass)] -for s in q: - f = f"/home/johannes/Downloads/test3/{pt_map.class_names.case_swap[s.model._meta.object_name]}.txt" - with open(f, 'w') as file: - file.write(api.io.csv_queryset(s)) -quit()