Deleted unneccessary files

This commit is contained in:
Johannes Randerath 2024-06-24 14:28:49 +02:00
parent f314bfb396
commit 6472673d0a
3 changed files with 0 additions and 1066 deletions

View File

@ -1,240 +0,0 @@
"""
Bridge
======
Bridge between Django with its models and database and gtfs.GTFS as intermediate object for File IO.
Contents
--------
Constants
---------
reversed_file_mapping : dict(str,str)
Map CamelCased filenames to '_'-separated
class_names : dict{str,str}
Map CamelCase, singularized class names to pluralized, snake_cased file names
primary_keys : dict{str, (str or None)}
For all pt_map.models, map primary keys if applicable
foreign_keys
For all pt_map.models, map foreign keys if any. Also ordered for model creation without foreign reference conflicts.
time_delta : int
Unix time for Jan 1, 2024. To be used to calculate time prefix strings.
Functions
---------
to_camel_case(s):
Converts '_'-separated str to CamelCase with capital first letter
standardize_time(time_str):
Converts str in unicode time format to %H:%M:%S format with normalized 24 hour time
is_NaN(v):
Checks if given variable is either a str expressing NaN or NaN as object
stdz(v):
Standardize date and time formats
gtfs_to_db(g):
Write an existing gtfs.GTFS object to the database using the GTFS compliant models
db_to_gtfs(q, folder_path):
Convert list of query sets to gtfs.GTFS object and write to specified folder if validation for GTFS compliance passes.
"""
import pt_map.gtfs
import pt_map.models
import pandas as pd
from pattern.text.en import singularize, pluralize
import math
import numbers
import email.utils
import time
import datetime
import django.db.models
import time
from pt_map.gtfs_schema import gtfs_schema
from .class_names import *
def toCamelCase(s: str):
"""
Convert '_'-separated str to CamelCase with the first letter capitalized.
Parameters
----------
s : str
'_'-separated string
Returns
-------
str
CamelCased str, first letter capitalized
"""
return ''.join(word.capitalize() for word in s.split('_'))
def standardize_time(time_str: str):
"""
Convert time str to standardized %H:%M:%S format.
Parameters
----------
time_str: str
str encoding time
Returns
-------
str in format '%H:%M:%S'
"""
date_str = f"Jan 19, 1999 {time_str}"
ntuple=email.utils.parsedate(date_str)
timestamp=time.mktime(ntuple)
date=datetime.datetime.fromtimestamp(timestamp)
return date.strftime('%H:%M:%S')
def is_NaN(v):
"""
Returns
-------
True
If v is either a str representing NaN or NaN as an object
False
Otherwise
"""
return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v))
def stdz(v, m: django.db.models.Model, f: str):
"""
If f is a time or date field, convert to a format our db can easily work with.
If f is a foreign key
Parameters
----------
v : object
object to be standardized
m : django.db.models.Model
model to be written to
f : str
field name in question
Returns
-------
Converted str
If m.f is a DateField or a TimeField
Unchanged str
Otherwise
"""
if m._meta.get_field(f).get_internal_type() == 'DateField':
return str(v)
if m._meta.get_field(f).get_internal_type() == 'TimeField':
return standardize_time(v)
if m._meta.get_field(f).get_internal_type() == 'ForeignKey':
pass
return v
def to_snake_case(name):
"""
Convert CamelCase to snake_case.
Parameters
----------
name : str
str in CamelCase
Returns
-------
Str in snake_case
"""
name = name[0].lower() + name[1:]
for c in name[1:]:
if c.isupper():
name.insert(i,'_')
else:
c.lower()
return name
def unqfk(ts, fk):
"""
Primary keys of imported data and in the database are likely to overlap. To avoid this, the current time in seconds since Jan 1, 2024 is added as a prefix.
Foreign key references must know of this new key so they are processed in the same way. To make this possible, we use the same time in seconds for all objects.
Parameters
----------
ts : str
time in seconds to be prepended
fk : primary or foreign key to be processed.
Returns
-------
Str with prefix
"""
if not isinstance(fk, str):
fk = str(int(fk))
return f"{ts}{fk}".strip()
def gtfs_to_db(g: pt_map.gtfs.GTFS):
"""
Given a gtfs.GTFS object, write GTFS-compliantly to db by creating the correct models
Parameters
----------
g : gtfs.GTFS
GTFS object to be saved to db
"""
feed_id = 0
#model = (pt_map.models.Trip, [(pt_map.models.Route, 'route_id'), (pt_map.models.Shape, 'shape_id'), ])
ts = str(int(time.time())-time_delta) # Prepend the current time in seconds since Jan 1, 2024 to ids to make them more or less unique
#if model == pt_map.models.Shape:
for model in foreign_keys:
m = model[0]
df = getattr(g, reversed_file_mapping[m.__name__]).data # Extract dataframe for each model from gtfs.GTFS object
if not df.empty: # Only process GTFS files actually present
if primary_keys[m] in df.keys() and not m == pt_map.models.FeedInfo:
#df[primary_keys[m]] = df[primary_keys[m]].astype(str)
df[primary_keys[m]] = [f"{feed_id.feed_id}_{pk}" for pk in df[primary_keys[m]]]
elif not m == pt_map.models.FeedInfo:
df[primary_keys[m]] = [f"{feed_id.feed_id}_{pk}" for pk in range(1, df.index.size + 1)]
v = gtfs_schema[reversed_file_mapping[m.__name__]] # field names
for _, row in df.iterrows(): # the rows of the dataframe are the individual entries in the GTFS file and should be the individual instances of the db model
for fk in model[1]: # Map foreign_keys to objects of the foreign model
if row.get(fk[1]):
row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: f"{feed_id.feed_id}_{row[fk[1]]}"})#unqfk(ts, row[fk[1]])})
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} # dict of fields and values of current model object to create
o = None
if not m == pt_map.models.FeedInfo:
defaults["feed_info_id"] = feed_id
if primary_keys[m] in df.keys():
#row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) # primary_keys should be unique, use current time in seconds as a prefix
#defaults[primary_keys[m]] = row[primary_keys[m]]
try:
o = m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) # Make sure there is no object with identical primary_key, exception is expected to be risen
except m.DoesNotExist:
o = m.objects.update_or_create(
defaults = defaults,
**{primary_keys[m]: row[primary_keys[m]]}
)
else:
o = m.objects.create(**defaults)
if m == pt_map.models.FeedInfo:
feed_id = o
def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str = ""):
"""
Convert given list of query sets to gtfs.GTFS object
Parameters
----------
q : list[django.db.models.query.QuerySet]
List of QuerySets containing the retrieved data to be Converted
folder_path : str
path to be set as the results folder_path instance variable
Returns
-------
gtfs.GTFS
object containing the queried data
"""
dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q}
dfs = {key: dfs[key].astype({col: pd.Timestamp for col in dfs[key].columns if isinstance(getattr(getattr(pt_map.models, {v:k for k,v in reversed_file_mapping.items()}[key]), col), django.db.models.DateField)}) for key in dfs.keys()}
g = pt_map.gtfs.GTFS(folder_path, dfs)
g.validate()
return g

View File

@ -1,816 +0,0 @@
import pandas as pd
import os
class GTFS:
"""
DataFrame based representation of the GTFS standard, able to read folder of GTFS files, validate a GTFS object for accordance with the standard and write its data to a GTFS folder.
Attributes
----------
folder_path : str
Path to folder where the data is read from and/or to be written to
agency, stops, routes, trips, stop_times, calendar, calendar_dates, fare_attributes, fare_rules, timeframes, fare_media, fare_products, fare_leg_rules, fare_transfer_rules, areas, stop_areas, networks, route_networks, shapes, frequencies, transfers, pathways, levels, location_groups, location_group_stops, locations_geojson, booking_rules, translations, feed_info, attributions : GTFSFile
Objects representing the data in the corresponding .txt/.geojson files in the GTFS Reference.
errors: list[str]
Human readable messages explaining why a validation failed if it did.
Methods
-------
get_files():
Return all member objects of type GTFSFile
get_fields(name):
Return all fields present in a given instance of a GTFSFile
export(path, dirname):
Save all GTFS data represented by the current instance as a folder of files corresponding to the GTFS Reference.
validate():
For all GTFSFile member objects, validate if they individually conmply with GTFS.
validate_required_fields(df, required_fields):
Check if a DataFrame contains all required fields according to the GTFS reference for the file it represents.
validate_optional_fields(df, optional_fields):
Check if a DataFrame does not contain any unexpected fields, not compliant with the GTFS reference for the file it represents.
validate_lat_lon(df):
Check if a Stop is correctly mapped using geographical coordinates.
"""
def __init__(self, folder_path: str = "", dfs: dict[str, pd.DataFrame] = None):
"""
Parameters
----------
folder_path : str
Path of the folder to read GTFS data from or potentially write it to when export() is called. Defaults to an empty str.
dfs : dict[str : pd.DataFrame]
DataFrames containing the data to be represented by this object as values, corresponding GTFSFile.file_names as keys.
Raises
------
TypeError
If neither folder_path nor dfs is provided
If folder_path is not a valid str or dfs is not a dict of DataFrames
ValueError
If folder_path is not a well formatted path
"""
self.folder_path = folder_path
self.agency = self.Agency(self.folder_path, dfs)
self.stops = self.Stops(self.folder_path, dfs)
self.routes = self.Routes(self.folder_path, dfs)
self.trips = self.Trips(self.folder_path, dfs)
self.stop_times = self.StopTimes(self.folder_path, dfs)
self.calendar = self.Calendar(self.folder_path, dfs)
self.calendar_dates = self.CalendarDates(self.folder_path, dfs)
self.fare_attributes = self.FareAttributes(self.folder_path, dfs)
self.fare_rules = self.FareRules(self.folder_path, dfs)
self.timeframes = self.Timeframes(self.folder_path, dfs)
self.fare_media = self.FareMedia(self.folder_path, dfs)
self.fare_products = self.FareProducts(self.folder_path, dfs)
self.fare_leg_rules = self.FareLegRules(self.folder_path, dfs)
self.fare_transfer_rules = self.FareTransferRules(self.folder_path, dfs)
self.areas = self.Areas(self.folder_path, dfs)
self.stop_areas = self.StopAreas(self.folder_path, dfs)
self.networks = self.Networks(self.folder_path, dfs)
self.route_networks = self.RouteNetworks(self.folder_path, dfs)
self.shapes = self.Shapes(self.folder_path, dfs)
self.frequencies = self.Frequencies(self.folder_path, dfs)
self.transfers = self.Transfers(self.folder_path, dfs)
self.pathways = self.Pathways(self.folder_path, dfs)
self.levels = self.Levels(self.folder_path, dfs)
self.location_groups = self.LocationGroups(self.folder_path, dfs)
self.location_group_stops = self.LocationGroupStops(self.folder_path, dfs)
self.locations_geojson = self.LocationsGeojson(self.folder_path, dfs)
self.booking_rules = self.BookingRules(self.folder_path, dfs)
self.translations = self.Translations(self.folder_path, dfs)
self.feed_info = self.FeedInfo(self.folder_path, dfs)
self.attributions = self.Attributions(self.folder_path, dfs)
self.errors = []
class GTFSFile:
"""
All given fields and their corresponding values are stored as a DataFrame.
Attributes
----------
file_name : str
Extension-less name of the corresponding .txt file from the GTFS Reference
folder_path : str
Folder to read data from or potentially write it to
data : pd.DataFrame
All csv data from the corresponding .txt file represented as a Pandas DataFrame
Methods
-------
load_data(dfs):
Load data from list of DataFrames if given else read it from the corresponding .txt file in csv format.
"""
def __init__(self, folder_path, file_name, dfs):
"""
Parameters
----------
folder_path : str
Where to read GTFS files from or write it to
file_name : str
Name of the .txt file without the .txt Extension
dfs : dict[str, pd.DataFrame]
If given, data variable is set as corresponding DataFrame in this dict
If not, data is read from the csv
"""
self.file_name = file_name
self.file_path = f"{folder_path}/{file_name}.txt"
self.data = self.load_data(dfs)
def load_data(self, dfs):
"""
Fill the data attribute with GTFS data either with a given DataFrame or from the corresponding csv
Parameters
----------
dfs : dict[str, pd.DataFrame]
Dict of dataframes mapped to the corresponding file names. If given, the corresponding DataFrame is returned if the key exists else an empty DataFrame
"""
if dfs:
return dfs[self.file_name] if self.file_name in dfs.keys() else pd.DataFrame()
else:
try:
return pd.read_csv(self.file_path)
except FileNotFoundError:
return pd.DataFrame()
class Agency(GTFSFile):
"""
Represents agency.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'agency', dfs)
class Stops(GTFSFile):
"""
Represents stops.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stops', dfs)
class Routes(GTFSFile):
"""
Represents routes.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'routes', dfs)
class Trips(GTFSFile):
"""
Represents trips.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'trips', dfs)
class StopTimes(GTFSFile):
"""
Represents stop_times.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stop_times', dfs)
class Calendar(GTFSFile):
"""
Represents calendar.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'calendar', dfs)
class CalendarDates(GTFSFile):
"""
Represents calendar_dates.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'calendar_dates', dfs)
class FareAttributes(GTFSFile):
"""
Represents fare_attributes.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_attributes', dfs)
class FareRules(GTFSFile):
"""
Represents fare_rules.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_rules', dfs)
class Timeframes(GTFSFile):
"""
Represents timeframes.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'timeframes', dfs)
class FareMedia(GTFSFile):
"""
Represents fare_media.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_media', dfs)
class FareProducts(GTFSFile):
"""
Represents fare_products.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_products', dfs)
class FareLegRules(GTFSFile):
"""
Represents fare_leg_rules.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_leg_rules', dfs)
class FareTransferRules(GTFSFile):
"""
Represents fare_transfer_rules.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'fare_transfer_rules', dfs)
class Areas(GTFSFile):
"""
Represents areas.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'areas', dfs)
class StopAreas(GTFSFile):
"""
Represents stop_areas.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'stop_areas', dfs)
class Networks(GTFSFile):
"""
Represents networks.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'networks', dfs)
class RouteNetworks(GTFSFile):
"""
Represents route_networks.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'route_networks', dfs)
class Shapes(GTFSFile):
"""
Represents shapes.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'shapes', dfs)
class Frequencies(GTFSFile):
"""
Represents frequencies.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'frequencies', dfs)
class Transfers(GTFSFile):
"""
Represents transfers.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'transfers', dfs)
class Pathways(GTFSFile):
"""
Represents pathways.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'pathways', dfs)
class Levels(GTFSFile):
"""
Represents levels.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'levels', dfs)
class LocationGroups(GTFSFile):
"""
Represents location_groups.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'location_groups', dfs)
class LocationGroupStops(GTFSFile):
"""
Represents location_group_stops.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'location_group_stops', dfs)
class LocationsGeojson(GTFSFile):
"""
Represents locations.geojson from the GTFS reference
"""
def __init__(self, folder_path, dfs):
self.file_path = f"{folder_path}/locations.geojson"
if os.path.exists(self.file_path):
self.data = self.load_data()
else:
self.data = pd.DataFrame()
def load_data(self):
try:
return pd.read_json(self.file_path)
except ValueError:
return pd.DataFrame()
class BookingRules(GTFSFile):
"""
Represents booking_rules.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'booking_rules', dfs)
class Translations(GTFSFile):
"""
Represents translations.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'translations', dfs)
class FeedInfo(GTFSFile):
"""
Represents feed_info.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'feed_info', dfs)
class Attributions(GTFSFile):
"""
Represents attributions.txt from the GTFS reference
"""
def __init__(self, folder_path, dfs):
super().__init__(folder_path, 'attributions', dfs)
def get_files(self):
"""
Get all GTFSFile object
Returns
-------
list[GTFSFile]
All member objects of type GTFSFile
"""
return [attr for attr in dir(self) if isinstance(getattr(self, attr), self.GTFSFile)]
def get_fields(self, name):
"""
Given the name of a file specified in the GTFS specification, return all fields present in the data.
Parameters
----------
name : str
name of a file as specified by GTFS (ommiting the .txt/.geojson extension)
Returns
-------
list[pd.core.indexes.range.RangeIndex]
a list of all the fields present in the datastream of the specified file
"""
file = getattr(self, name)
if not file:
return None
return list(file.data.columns)
def export(self, path = None, dirname = ""):
"""
Save this object's data to files as specified by GTFS.
Parameters
----------
path : str
parent directory where to save the files, defaults to the objects folder_path property
dirname : str
If specified, subdirectory to create or use inside path. Default behaviour is to save directly to path.
"""
if not path:
path = self.folder_path
else:
path = f"{os.path.normpath(path)}/{dirname}"
if not os.path.exists(path):
os.mkdir(path)
for name in self.get_files():
df = getattr(self, name).data
df = df.astype({col: 'int8' for col in df.columns if df[col].dtype == 'bool'})
fpath = f"{path}/{name}.txt"
if name == 'locations_geojson':
fpath = f"{path}/{name}.geojson"
df.to_json(fpath)
else:
df.to_csv(fpath, date_format='%Y%m%d', index=False)
def validate(self):
"""
Check this object's data for compliance with the GTFS reference. Resets self.errors and stores human readable error messages to it.
Returns
-------
list[str]
List of human readable error messages, also saved to self.errors, if any, else None.
"""
self.error = []
if not self.agency.data.empty:
self.validate_agency()
if not self.stops.data.empty:
self.validate_stops()
if not self.routes.data.empty:
self.validate_routes()
if not self.trips.data.empty:
self.validate_trips()
if not self.stop_times.data.empty:
self.validate_stop_times()
if not self.calendar.data.empty:
self.validate_calendar()
if not self.calendar_dates.data.empty:
self.validate_calendar_dates()
if not self.fare_attributes.data.empty:
self.validate_fare_attributes()
if not self.fare_rules.data.empty:
self.validate_fare_rules()
if not self.timeframes.data.empty:
self.validate_timeframes()
if not self.fare_media.data.empty:
self.validate_fare_media()
if not self.fare_products.data.empty:
self.validate_fare_products()
if not self.fare_leg_rules.data.empty:
self.validate_fare_leg_rules()
if not self.fare_transfer_rules.data.empty:
self.validate_fare_transfer_rules()
if not self.areas.data.empty:
self.validate_areas()
if not self.stop_areas.data.empty:
self.validate_stop_areas()
if not self.networks.data.empty:
self.validate_networks()
if not self.route_networks.data.empty:
self.validate_route_networks()
if not self.shapes.data.empty:
self.validate_shapes()
if not self.frequencies.data.empty:
self.validate_frequencies()
if not self.transfers.data.empty:
self.validate_transfers()
if not self.pathways.data.empty:
self.validate_pathways()
if not self.levels.data.empty:
self.validate_levels()
if not self.location_groups.data.empty:
self.validate_location_groups()
if not self.location_group_stops.data.empty:
self.validate_location_group_stops()
if not self.locations_geojson.data.empty:
self.validate_locations_geojson()
if not self.booking_rules.data.empty:
self.validate_booking_rules()
if not self.translations.data.empty:
self.validate_translations()
if not self.feed_info.data.empty:
self.validate_feed_info()
if not self.attributions.data.empty:
self.validate_attributions()
if not self.errors:
return None
else:
return self.errors
def validate_agency(self):
"""
Check Agency object for compliance with the GTFS reference.
"""
required_fields = ["agency_name", "agency_url", "agency_timezone"]
optional_fields = ["agency_id", "agency_lang", "agency_phone", "agency_fare_url", "agency_email"]
self.validate_required_fields(self.agency.data, required_fields, "agency.txt")
self.validate_optional_fields(self.agency.data, optional_fields, "agency.txt")
def validate_stops(self):
"""
Check Stops object for compliance with the GTFS reference.
"""
required_fields = ["stop_id", "stop_name"]
optional_fields = ["stop_code", "stop_desc", "stop_lat", "stop_lon", "zone_id", "stop_url",
"location_type", "parent_station", "stop_timezone", "wheelchair_boarding",
"level_id", "platform_code"]
self.validate_required_fields(self.stops.data, required_fields, "stops.txt")
self.validate_optional_fields(self.stops.data, optional_fields, "stops.txt")
self.validate_lat_lon(self.stops.data)
def validate_routes(self):
"""
Check Routes object for compliance with the GTFS reference.
"""
required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"]
optional_fields = ["agency_id", "route_desc", "route_url", "route_color", "route_text_color",
"route_sort_order", "continuous_pickup", "continuous_drop_off"]
self.validate_required_fields(self.routes.data, required_fields, "routes.txt")
self.validate_optional_fields(self.routes.data, optional_fields, "routes.txt")
def validate_trips(self):
"""
Check Trips object for compliance with the GTFS reference.
"""
required_fields = ["route_id", "service_id", "trip_id"]
optional_fields = ["trip_headsign", "trip_short_name", "direction_id", "block_id", "shape_id",
"wheelchair_accessible", "bikes_allowed"]
self.validate_required_fields(self.trips.data, required_fields, "trips.txt")
self.validate_optional_fields(self.trips.data, optional_fields, "trips.txt")
def validate_stop_times(self):
"""
Check StopTimes object for compliance with the GTFS reference.
"""
required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"]
optional_fields = ["stop_headsign", "pickup_type", "drop_off_type", "shape_dist_traveled",
"timepoint"]
self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt")
self.validate_optional_fields(self.stop_times.data, optional_fields, "stop_times.txt")
def validate_calendar(self):
"""
Check Calendar object for compliance with the GTFS reference.
"""
required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday",
"saturday", "sunday", "start_date", "end_date"]
self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt")
def validate_calendar_dates(self):
"""
Check CalendarDates object for compliance with the GTFS reference.
"""
required_fields = ["service_id", "date", "exception_type"]
self.validate_required_fields(self.calendar_dates.data, required_fields, "calendar_dates.txt")
def validate_fare_attributes(self):
"""
Check FareAttributes object for compliance with the GTFS reference.
"""
required_fields = ["fare_id", "price", "currency_type", "payment_method", "transfers"]
optional_fields = ["agency_id", "transfer_duration"]
self.validate_required_fields(self.fare_attributes.data, required_fields, "fare_attributes.txt")
self.validate_optional_fields(self.fare_attributes.data, optional_fields, "fare_attributes.txt")
def validate_fare_rules(self):
"""
Check FareRules object for compliance with the GTFS reference.
"""
required_fields = ["fare_id"]
optional_fields = ["route_id", "origin_id", "destination_id", "contains_id"]
self.validate_required_fields(self.fare_rules.data, required_fields, "fare_rules.txt")
self.validate_optional_fields(self.fare_rules.data, optional_fields, "fare_rules.txt")
def validate_timeframes(self):
"""
Check Timeframes object for compliance with the GTFS reference.
"""
required_fields = ["timeframe_id", "start_time", "end_time"]
optional_fields = ["timeframe_name", "timeframe_desc"]
self.validate_required_fields(self.timeframes.data, required_fields, "timeframes.txt")
self.validate_optional_fields(self.timeframes.data, optional_fields, "timeframes.txt")
def validate_fare_media(self):
"""
Check FareMedia object for compliance with the GTFS reference.
"""
required_fields = ["media_id", "media_name", "media_type"]
optional_fields = ["media_desc"]
self.validate_required_fields(self.fare_media.data, required_fields, "fare_media.txt")
self.validate_optional_fields(self.fare_media.data, optional_fields, "fare_media.txt")
def validate_fare_products(self):
"""
Check FareProducts object for compliance with the GTFS reference.
"""
required_fields = ["product_id", "product_name", "product_type", "product_price", "currency"]
optional_fields = ["product_desc"]
self.validate_required_fields(self.fare_products.data, required_fields, "fare_products.txt")
self.validate_optional_fields(self.fare_products.data, optional_fields, "fare_products.txt")
def validate_fare_leg_rules(self):
"""
Check FareLegRules object for compliance with the GTFS reference.
"""
required_fields = ["leg_id", "from_stop_id", "to_stop_id"]
optional_fields = ["leg_desc"]
self.validate_required_fields(self.fare_leg_rules.data, required_fields, "fare_leg_rules.txt")
self.validate_optional_fields(self.fare_leg_rules.data, optional_fields, "fare_leg_rules.txt")
def validate_fare_transfer_rules(self):
"""
Check FareTransferRules object for compliance with the GTFS reference.
"""
required_fields = ["from_leg_id", "to_leg_id", "transfer_type"]
optional_fields = ["transfer_time"]
self.validate_required_fields(self.fare_transfer_rules.data, required_fields, "fare_transfer_rules.txt")
self.validate_optional_fields(self.fare_transfer_rules.data, optional_fields, "fare_transfer_rules.txt")
def validate_areas(self):
"""
Check Areas object for compliance with the GTFS reference.
"""
required_fields = ["area_id", "area_name"]
optional_fields = ["area_desc"]
self.validate_required_fields(self.areas.data, required_fields, "areas.txt")
self.validate_optional_fields(self.areas.data, optional_fields, "areas.txt")
def validate_stop_areas(self):
"""
Check StopAreas object for compliance with the GTFS reference.
"""
required_fields = ["stop_id", "area_id"]
optional_fields = []
self.validate_required_fields(self.stop_areas.data, required_fields, "stop_areas.txt")
self.validate_optional_fields(self.stop_areas.data, optional_fields, "stop_areas.txt")
def validate_networks(self):
"""
Check Networks object for compliance with the GTFS reference.
"""
required_fields = ["network_id", "network_name"]
optional_fields = ["network_desc"]
self.validate_required_fields(self.networks.data, required_fields, "networks.txt")
self.validate_optional_fields(self.networks.data, optional_fields, "networks.txt")
def validate_route_networks(self):
"""
Check RouteNetworks object for compliance with the GTFS reference.
"""
required_fields = ["route_id", "network_id"]
optional_fields = []
self.validate_required_fields(self.route_networks.data, required_fields, "route_networks.txt")
self.validate_optional_fields(self.route_networks.data, optional_fields, "route_networks.txt")
def validate_shapes(self):
"""
Check Shapes object for compliance with the GTFS reference.
"""
required_fields = ["shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence"]
optional_fields = ["shape_dist_traveled"]
self.validate_required_fields(self.shapes.data, required_fields, "shapes.txt")
self.validate_optional_fields(self.shapes.data, optional_fields, "shapes.txt")
def validate_frequencies(self):
"""
Check Frequencies object for compliance with the GTFS reference.
"""
required_fields = ["trip_id", "start_time", "end_time", "headway_secs"]
optional_fields = ["exact_times"]
self.validate_required_fields(self.frequencies.data, required_fields, "frequencies.txt")
self.validate_optional_fields(self.frequencies.data, optional_fields, "frequencies.txt")
def validate_transfers(self):
"""
Check Transfers object for compliance with the GTFS reference.
"""
required_fields = ["from_stop_id", "to_stop_id", "transfer_type"]
optional_fields = ["min_transfer_time"]
self.validate_required_fields(self.transfers.data, required_fields, "transfers.txt")
self.validate_optional_fields(self.transfers.data, optional_fields, "transfers.txt")
def validate_pathways(self):
"""
Check Pathways object for compliance with the GTFS reference.
"""
required_fields = ["pathway_id", "from_stop_id", "to_stop_id", "pathway_mode", "is_bidirectional"]
optional_fields = ["length", "traversal_time", "stair_count", "max_slope", "min_width", "signposted_as", "reversed_signposted_as"]
self.validate_required_fields(self.pathways.data, required_fields, "pathways.txt")
self.validate_optional_fields(self.pathways.data, optional_fields, "pathways.txt")
def validate_levels(self):
"""
Check Levels object for compliance with the GTFS reference.
"""
required_fields = ["level_id", "level_index"]
optional_fields = ["level_name"]
self.validate_required_fields(self.levels.data, required_fields, "levels.txt")
self.validate_optional_fields(self.levels.data, optional_fields, "levels.txt")
def validate_location_groups(self):
"""
Check Agency LocationGroups for compliance with the GTFS reference.
"""
required_fields = ["location_group_id", "location_group_name"]
optional_fields = ["location_group_desc"]
self.validate_required_fields(self.location_groups.data, required_fields, "location_groups.txt")
self.validate_optional_fields(self.location_groups.data, optional_fields, "location_groups.txt")
def validate_location_group_stops(self):
"""
Check LocationGroupStops object for compliance with the GTFS reference.
"""
required_fields = ["location_group_id", "stop_id"]
optional_fields = []
self.validate_required_fields(self.location_group_stops.data, required_fields, "location_group_stops.txt")
self.validate_optional_fields(self.location_group_stops.data, optional_fields, "location_group_stops.txt")
def validate_locations_geojson(self):
"""
Check LocationsGeojson object for compliance with the GTFS reference.
"""
required_fields = ["type", "features"]
optional_fields = []
self.validate_required_fields(self.locations_geojson.data, required_fields, "locations.geojson")
self.validate_optional_fields(self.locations_geojson.data, optional_fields, "locations.geojson")
def validate_booking_rules(self):
"""
Check BookingRules object for compliance with the GTFS reference.
"""
required_fields = ["booking_rule_id"]
optional_fields = ["booking_rule_name", "booking_rule_desc"]
self.validate_required_fields(self.booking_rules.data, required_fields, "booking_rules.txt")
self.validate_optional_fields(self.booking_rules.data, optional_fields, "booking_rules.txt")
def validate_translations(self):
"""
Check Translations object for compliance with the GTFS reference.
"""
required_fields = ["table_name", "field_name", "language", "translation"]
optional_fields = ["record_id", "record_sub_id", "field_value"]
self.validate_required_fields(self.translations.data, required_fields, "translations.txt")
self.validate_optional_fields(self.translations.data, optional_fields, "translations.txt")
def validate_feed_info(self):
"""
Check FeedInfo object for compliance with the GTFS reference.
"""
required_fields = ["feed_publisher_name", "feed_publisher_url", "feed_lang"]
optional_fields = ["feed_start_date", "feed_end_date", "feed_version"]
self.validate_required_fields(self.feed_info.data, required_fields, "feed_info.txt")
self.validate_optional_fields(self.feed_info.data, optional_fields, "feed_info.txt")
def validate_attributions(self):
"""
Check Attributions object for compliance with the GTFS reference.
"""
required_fields = ["attribution_id"]
optional_fields = ["agency_id", "route_id", "trip_id", "organization_name", "is_producer", "is_operator", "is_authority", "attribution_url", "attribution_email", "attribution_phone"]
self.validate_required_fields(self.attributions.data, required_fields, "attributions.txt")
self.validate_optional_fields(self.attributions.data, optional_fields, "attributions.txt")
def validate_required_fields(self, df, required_fields, file_name):
"""
Check if a DataFrame contains all required fields according to the GTFS reference for the file it represents.
Parameters
----------
df : pd.DataFrame
DataFrame to be checked
required_fields : list[str]
list of field names to check for inclusion in df
file_name : str
Name to be printed in error message if validation fails
"""
missing_fields = set(required_fields) - set(df.columns)
if missing_fields:
self.errors.append(f"{file_name} is missing required fields: {missing_fields}")
def validate_optional_fields(self, df, optional_fields, file_name):
"""
Check if DataFrame contains fields not specified in the GTFS Reference.
Parameters
----------
df : pd.DataFrame
DataFrame to be checked
optional_fields: list[str]
list of field names allowed in df
file_name : str
Name to be printed in error message if validation fails
"""
unexpected_fields = set(df.columns) - set(optional_fields) - set(df.columns)
if unexpected_fields:
self.errors.append(f"{file_name} has unexpected fields: {unexpected_fields}")
def validate_lat_lon(self, df, file_name="stops.txt", prefix="stop"):
"""
Check if a DataFrame contains either no coordinates of a given prefix or both latitude and longitude.
Parameters
----------
df : pd.DataFrame
DataFrame to be checked
file_name : str
Name to be printed in error message if validation fails
prefix : str
Prefix to be used for coordinate fileds. Expands to {prefix}_lat and {prefix}_lon
"""
if f"{prefix}_lat" in df.columns and f"{prefix}_lon" in df.columns:
if df[[f"{prefix}_lat", f"{prefix}_lon"]].isnull().any().any():
self.errors.append(f"{file_name} has missing lat/lon values.")

View File

@ -1,10 +0,0 @@
import pt_map.models
import api.io
import inspect
#api.io.models_csv("/home/johannes/Downloads/torun")
q = [m.objects.all() for _,m in inspect.getmembers(pt_map.models, inspect.isclass)]
for s in q:
f = f"/home/johannes/Downloads/test3/{pt_map.class_names.case_swap[s.model._meta.object_name]}.txt"
with open(f, 'w') as file:
file.write(api.io.csv_queryset(s))
quit()