Merge branch 'data_feature' of gitea.randerath.eu:johannes/transport-accessibility into data_feature

This commit is contained in:
Jan Kiljanski 2024-06-11 20:54:32 +02:00
commit d0c2c98e17
3 changed files with 0 additions and 472 deletions

View File

@ -1,173 +0,0 @@
import pandas as pd
import os
class GTFS:
def __init__(self, folder_path):
self.folder_path = folder_path
self.agency = self.Agency(self.folder_path)
self.stops = self.Stops(self.folder_path)
self.routes = self.Routes(self.folder_path)
self.trips = self.Trips(self.folder_path)
self.stop_times = self.StopTimes(self.folder_path)
self.calendar = self.Calendar(self.folder_path)
self.calendar_dates = self.CalendarDates(self.folder_path)
self.fare_attributes = self.FareAttributes(self.folder_path)
self.fare_rules = self.FareRules(self.folder_path)
self.shapes = self.Shapes(self.folder_path)
self.frequencies = self.Frequencies(self.folder_path)
self.transfers = self.Transfers(self.folder_path)
self.feed_info = self.FeedInfo(self.folder_path)
self.errors = []
class GTFSFile:
def __init__(self, folder_path, file_name):
self.file_path = f"{folder_path}/{file_name}.txt"
self.data = self.load_data()
def load_data(self):
try:
return pd.read_csv(self.file_path)
except FileNotFoundError:
return pd.DataFrame()
class Agency(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'agency')
class Stops(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'stops')
class Routes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'routes')
class Trips(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'trips')
class StopTimes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'stop_times')
class Calendar(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'calendar')
class CalendarDates(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'calendar_dates')
class FareAttributes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_attributes')
class FareRules(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'fare_rules')
class Shapes(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'shapes')
class Frequencies(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'frequencies')
class Transfers(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'transfers')
class FeedInfo(GTFSFile):
def __init__(self, folder_path):
super().__init__(folder_path, 'feed_info')
def get_files(self):
return [attr for attr in list(set(dir(self)) - set(dir(GTFS))) if isinstance(getattr(self,attr),self.GTFSFile)]
def get_fields(self, name):
file = getattr(self, name)
if not file:
return None
return list(set(dir(file)) - set(dir(GTFSFile)))
def export(self, path, dirname):
path = f"{os.path.normpath(path)}/{dirname}"
if not os.path.exists(path):
os.mkdir(path)
print(self.get_files())
for name in self.get_files():
df = getattr(self, name).data
fpath = f"{path}/{name}.txt"
# print(f"name: {name}")
print(name)
df.to_csv(fpath, index=False)
def validate(self):
self.validate_agency()
self.validate_stops()
self.validate_routes()
self.validate_trips()
self.validate_stop_times()
self.validate_calendar()
self.validate_cross_references()
if not self.errors:
return None
else:
return self.errors
def validate_agency(self):
required_fields = ["agency_id", "agency_name", "agency_url", "agency_timezone"]
self.validate_required_fields(self.agency.data, required_fields, "agency.txt")
def validate_stops(self):
required_fields = ["stop_id", "stop_name", "stop_lat", "stop_lon"]
self.validate_required_fields(self.stops.data, required_fields, "stops.txt")
self.validate_lat_lon(self.stops.data)
def validate_routes(self):
required_fields = ["route_id", "route_short_name", "route_long_name", "route_type"]
self.validate_required_fields(self.routes.data, required_fields, "routes.txt")
def validate_trips(self):
required_fields = ["route_id", "service_id", "trip_id"]
self.validate_required_fields(self.trips.data, required_fields, "trips.txt")
def validate_stop_times(self):
required_fields = ["trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence"]
self.validate_required_fields(self.stop_times.data, required_fields, "stop_times.txt")
def validate_calendar(self):
required_fields = ["service_id", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", "start_date", "end_date"]
self.validate_required_fields(self.calendar.data, required_fields, "calendar.txt")
def validate_required_fields(self, data, required_fields, filename):
for field in required_fields:
if field not in data.columns:
self.errors.append(f"Error: {filename} missing required field: {field}")
def validate_lat_lon(self, data):
for index, row in data.iterrows():
if not (-90 <= row['stop_lat'] <= 90):
self.errors.append(f"Error: stops.txt invalid latitude at row {index}: {row['stop_lat']}")
if not (-180 <= row['stop_lon'] <= 180):
self.errors.append(f"Error: stops.txt invalid longitude at row {index}: {row['stop_lon']}")
def validate_cross_references(self):
# Validate that trip_ids in stop_times.txt exist in trips.txt
stop_times_trip_ids = set(self.stop_times.data['trip_id'])
trips_trip_ids = set(self.trips.data['trip_id'])
missing_trip_ids = stop_times_trip_ids - trips_trip_ids
for trip_id in missing_trip_ids:
self.errors.append(f"Error: trip_id {trip_id} in stop_times.txt does not exist in trips.txt")
# Validate that stop_ids in stop_times.txt exist in stops.txt
stop_times_stop_ids = set(self.stop_times.data['stop_id'])
stops_stop_ids = set(self.stops.data['stop_id'])
missing_stop_ids = stop_times_stop_ids - stops_stop_ids
for stop_id in missing_stop_ids:
self.errors.append(f"Error: stop_id {stop_id} in stop_times.txt does not exist in stops.txt")

View File

@ -1,137 +0,0 @@
from django.db import models
class Agency(models.Model):
agency_id = models.BigAutoField(primary_key=True)
agency_name = models.CharField(max_length=250)
agency_url = models.URLField()
agency_phone = models.CharField(max_length=15)
agency_email = models.EmailField()
agency_fare_url = models.URLField()
class Stop(models.Model):
stop_id = models.BigAutoField(primary_key=True)
stop_code = models.CharField(max_length=50)
stop_name = models.CharField(max_length=250)
tts_stop_name = models.CharField(max_length=250)
stop_desc = models.CharField(max_length=500)
stop_lat = models.IntegerField()
stop_lon = models.IntegerField()
zone_id = models.IntegerField(unique=True)
stop_url = models.URLField(blank=True, null=True)
location_type = models.IntegerField(blank=True, null=True)
parent_station = models.ForeignKey('self', on_delete=models.SET_NULL, blank=True, null=True)
stop_timezone = models.CharField(max_length=255, blank=True, null=True)
wheelchair_boarding = models.IntegerField(blank=True, null=True)
level_id = models.CharField(max_length=255, blank=True, null=True)
platform_code = models.CharField(max_length=255, blank=True, null=True)
class Route(models.Model):
route_id = models.BigAutoField(primary_key=True)
agency = models.ForeignKey(Agency, on_delete=models.CASCADE)
route_short_name = models.CharField(max_length=255)
route_long_name = models.CharField(max_length=255)
route_desc = models.TextField(blank=True, null=True)
route_type = models.IntegerField()
route_url = models.URLField(blank=True, null=True)
route_color = models.CharField(max_length=6, blank=True, null=True)
route_text_color = models.CharField(max_length=6, blank=True, null=True)
class Trip(models.Model):
trip_id = models.BigAutoField(primary_key=True)
route = models.ForeignKey(Route, on_delete=models.CASCADE)
service_id = models.CharField(max_length=255)
trip_headsign = models.CharField(max_length=255, blank=True, null=True)
trip_short_name = models.CharField(max_length=255, blank=True, null=True)
direction_id = models.IntegerField(blank=True, null=True)
block_id = models.CharField(max_length=255, blank=True, null=True)
shape_id = models.CharField(max_length=255, blank=True, null=True)
wheelchair_accessible = models.IntegerField(blank=True, null=True)
bikes_allowed = models.IntegerField(blank=True, null=True)
class StopTime(models.Model):
trip = models.ForeignKey(Trip, on_delete=models.CASCADE)
arrival_time = models.TimeField()
departure_time = models.TimeField()
stop = models.ForeignKey(Stop, on_delete=models.CASCADE)
stop_sequence = models.IntegerField()
stop_headsign = models.CharField(max_length=255, blank=True, null=True)
pickup_type = models.IntegerField(blank=True, null=True)
drop_off_type = models.IntegerField(blank=True, null=True)
shape_dist_traveled = models.FloatField(blank=True, null=True)
timepoint = models.IntegerField(blank=True, null=True)
class Meta:
unique_together = (('trip', 'stop_sequence'),)
class Calendar(models.Model):
service_id = models.BigAutoField(primary_key=True)
monday = models.BooleanField()
tuesday = models.BooleanField()
wednesday = models.BooleanField()
thursday = models.BooleanField()
friday = models.BooleanField()
saturday = models.BooleanField()
sunday = models.BooleanField()
start_date = models.DateField()
end_date = models.DateField()
class CalendarDate(models.Model):
service_id = models.ForeignKey(Calendar, on_delete=models.CASCADE)
date = models.DateField()
exception_type = models.IntegerField()
class Meta:
unique_together = (('service_id', 'date'),)
class FareAttribute(models.Model):
fare_id = models.BigAutoField(primary_key=True)
price = models.FloatField()
currency_type = models.CharField(max_length=255)
payment_method = models.IntegerField()
transfers = models.IntegerField()
agency = models.ForeignKey(Agency, on_delete=models.CASCADE, blank=True, null=True)
transfer_duration = models.IntegerField(blank=True, null=True)
class FareRule(models.Model):
fare = models.ForeignKey(FareAttribute, on_delete=models.CASCADE)
route = models.ForeignKey(Route, on_delete=models.CASCADE, blank=True, null=True)
origin_id = models.CharField(max_length=255, blank=True, null=True)
destination_id = models.CharField(max_length=255, blank=True, null=True)
contains_id = models.CharField(max_length=255, blank=True, null=True)
class Shape(models.Model):
shape_id = models.CharField(max_length=255)
shape_pt_lat = models.FloatField()
shape_pt_lon = models.FloatField()
shape_pt_sequence = models.IntegerField()
shape_dist_traveled = models.FloatField(blank=True, null=True)
class Meta:
unique_together = (('shape_id', 'shape_pt_sequence'),)
class Frequency(models.Model):
trip = models.ForeignKey(Trip, on_delete=models.CASCADE)
start_time = models.TimeField()
end_time = models.TimeField()
headway_secs = models.IntegerField()
exact_times = models.IntegerField(blank=True, null=True)
class Transfer(models.Model):
from_stop = models.ForeignKey(Stop, on_delete=models.CASCADE, related_name='transfers_from')
to_stop = models.ForeignKey(Stop, on_delete=models.CASCADE, related_name='transfers_to')
transfer_type = models.IntegerField()
min_transfer_time = models.IntegerField(blank=True, null=True)
class Meta:
unique_together = (('from_stop', 'to_stop'),)
class FeedInfo(models.Model):
feed_publisher_name = models.CharField(max_length=255)
feed_publisher_url = models.URLField()
feed_lang = models.CharField(max_length=255)
feed_start_date = models.DateField(blank=True, null=True)
feed_end_date = models.DateField(blank=True, null=True)
feed_version = models.CharField(max_length=255, blank=True, null=True)
feed_id = models.BigAutoField(primary_key=True)

View File

@ -1,162 +0,0 @@
from django.db import models
class Agency(models.Model):
agency_id = models.CharField(max_length=255, primary_key=True)
agency_name = models.CharField(max_length=255)
agency_url = models.URLField()
agency_timezone = models.CharField(max_length=255)
agency_lang = models.CharField(max_length=2, blank=True, null=True)
agency_phone = models.CharField(max_length=50, blank=True, null=True)
agency_fare_url = models.URLField(blank=True, null=True)
agency_email = models.EmailField(blank=True, null=True)
class Stop(models.Model):
stop_id = models.CharField(max_length=255, primary_key=True)
stop_code = models.CharField(max_length=50, blank=True, null=True)
stop_name = models.CharField(max_length=255)
stop_desc = models.TextField(blank=True, null=True)
stop_lat = models.FloatField()
stop_lon = models.FloatField()
zone_id = models.CharField(max_length=255, blank=True, null=True)
stop_url = models.URLField(blank=True, null=True)
location_type = models.IntegerField(blank=True, null=True)
parent_station = models.ForeignKey('self', on_delete=models.SET_NULL, blank=True, null=True)
stop_timezone = models.CharField(max_length=255, blank=True, null=True)
wheelchair_boarding = models.IntegerField(blank=True, null=True)
level_id = models.CharField(max_length=255, blank=True, null=True)
platform_code = models.CharField(max_length=50, blank=True, null=True)
class Route(models.Model):
route_id = models.CharField(max_length=255, primary_key=True)
agency = models.ForeignKey(Agency, on_delete=models.CASCADE, blank=True, null=True)
route_short_name = models.CharField(max_length=50)
route_long_name = models.CharField(max_length=255, blank=True, null=True)
route_desc = models.TextField(blank=True, null=True)
route_type = models.IntegerField(default=0)
route_url = models.URLField(blank=True, null=True)
route_color = models.CharField(max_length=6, blank=True, null=True)
route_text_color = models.CharField(max_length=6, blank=True, null=True)
route_sort_order = models.IntegerField(blank=True, null=True)
continuous_pickup = models.IntegerField(blank=True, null=True)
continuous_drop_off = models.IntegerField(blank=True, null=True)
class Trip(models.Model):
trip_id = models.CharField(max_length=255, primary_key=True)
route = models.ForeignKey(Route, on_delete=models.CASCADE)
service_id = models.CharField(max_length=255)
trip_headsign = models.CharField(max_length=255, blank=True, null=True)
trip_short_name = models.CharField(max_length=255, blank=True, null=True)
direction_id = models.IntegerField(blank=True, null=True)
block_id = models.CharField(max_length=255, blank=True, null=True)
shape_id = models.CharField(max_length=255, blank=True, null=True)
wheelchair_accessible = models.IntegerField(blank=True, null=True)
bikes_allowed = models.IntegerField(blank=True, null=True)
class StopTime(models.Model):
trip = models.ForeignKey(Trip, on_delete=models.CASCADE)
arrival_time = models.TimeField(blank=True, null=True)
departure_time = models.TimeField(blank=True, null=True)
stop = models.ForeignKey(Stop, on_delete=models.CASCADE)
stop_sequence = models.IntegerField()
stop_headsign = models.CharField(max_length=255, blank=True, null=True)
pickup_type = models.IntegerField(blank=True, null=True)
drop_off_type = models.IntegerField(blank=True, null=True)
shape_dist_traveled = models.FloatField(blank=True, null=True)
timepoint = models.IntegerField(blank=True, null=True)
class Meta:
unique_together = (('trip', 'stop_sequence'),)
class Calendar(models.Model):
service_id = models.CharField(max_length=255, primary_key=True)
monday = models.BooleanField()
tuesday = models.BooleanField()
wednesday = models.BooleanField()
thursday = models.BooleanField()
friday = models.BooleanField()
saturday = models.BooleanField()
sunday = models.BooleanField()
start_date = models.DateField()
end_date = models.DateField()
class CalendarDate(models.Model):
service_id = models.CharField(max_length=255)
date = models.DateField()
exception_type = models.IntegerField()
class Meta:
unique_together = (('service_id', 'date'),)
class FareAttribute(models.Model):
fare_id = models.CharField(max_length=255, primary_key=True)
price = models.FloatField()
currency_type = models.CharField(max_length=3)
payment_method = models.IntegerField()
transfers = models.IntegerField()
agency = models.ForeignKey(Agency, on_delete=models.CASCADE, blank=True, null=True)
transfer_duration = models.IntegerField(blank=True, null=True)
class FareRule(models.Model):
fare = models.ForeignKey(FareAttribute, on_delete=models.CASCADE)
route = models.ForeignKey(Route, on_delete=models.CASCADE, blank=True, null=True)
origin_id = models.CharField(max_length=255, blank=True, null=True)
destination_id = models.CharField(max_length=255, blank=True, null=True)
contains_id = models.CharField(max_length=255, blank=True, null=True)
class Shape(models.Model):
shape_id = models.CharField(max_length=255)
shape_pt_lat = models.FloatField()
shape_pt_lon = models.FloatField()
shape_pt_sequence = models.IntegerField()
shape_dist_traveled = models.FloatField(blank=True, null=True)
class Meta:
unique_together = (('shape_id', 'shape_pt_sequence'),)
class Frequency(models.Model):
trip = models.ForeignKey(Trip, on_delete=models.CASCADE)
start_time = models.TimeField()
end_time = models.TimeField()
headway_secs = models.IntegerField()
exact_times = models.IntegerField(blank=True, null=True)
class Transfer(models.Model):
from_stop = models.ForeignKey(Stop, on_delete=models.CASCADE, related_name='transfers_from')
to_stop = models.ForeignKey(Stop, on_delete=models.CASCADE, related_name='transfers_to')
transfer_type = models.IntegerField()
min_transfer_time = models.IntegerField(blank=True, null=True)
class Meta:
unique_together = (('from_stop', 'to_stop'),)
class Pathway(models.Model):
pathway_id = models.CharField(max_length=255, primary_key=True)
from_stop = models.ForeignKey(Stop, on_delete=models.CASCADE, related_name='pathways_from')
to_stop = models.ForeignKey(Stop, on_delete=models.CASCADE, related_name='pathways_to')
pathway_mode = models.IntegerField()
is_bidirectional = models.IntegerField()
length = models.FloatField(blank=True, null=True)
traversal_time = models.IntegerField(blank=True, null=True)
stair_count = models.IntegerField(blank=True, null=True)
max_slope = models.FloatField(blank=True, null=True)
min_width = models.FloatField(blank=True, null=True)
signposted_as = models.CharField(max_length=255, blank=True, null=True)
reversed_signposted_as = models.CharField(max_length=255, blank=True, null=True)
class Level(models.Model):
level_id = models.CharField(max_length=255, primary_key=True)
level_index = models.FloatField()
level_name = models.CharField(max_length=255, blank=True, null=True)
class FeedInfo(models.Model):
feed_publisher_name = models.CharField(max_length=255)
feed_publisher_url = models.URLField()
feed_lang = models.CharField(max_length=255)
default_lang = models.CharField(max_length=255, blank=True, null=True)
feed_start_date = models.DateField(blank=True, null=True)
feed_end_date = models.DateField(blank=True, null=True)
feed_version = models.CharField(max_length=255, blank=True, null=True)
feed_contact_email = models.EmailField(blank=True, null=True)
feed_contact_url = models.URLField(blank=True, null=True)
feed_id = models.BigAutoField(primary_key=True)