- Ported the bridge that was using a custom GTFS class and Pandas Dataframes to a native Django solution for and interface between db and csv (see api/io.py) - Fixed some issues regarding the compliance of the exported csv files with the GTFS reference. I.e. now allowing times 24:00:00 <= t >= 24:59:59
241 lines
8.0 KiB
Python
241 lines
8.0 KiB
Python
"""
|
|
Bridge
|
|
======
|
|
Bridge between Django with its models and database and gtfs.GTFS as intermediate object for File IO.
|
|
|
|
Contents
|
|
--------
|
|
Constants
|
|
---------
|
|
reversed_file_mapping : dict(str,str)
|
|
Map CamelCased filenames to '_'-separated
|
|
class_names : dict{str,str}
|
|
Map CamelCase, singularized class names to pluralized, snake_cased file names
|
|
primary_keys : dict{str, (str or None)}
|
|
For all pt_map.models, map primary keys if applicable
|
|
foreign_keys
|
|
For all pt_map.models, map foreign keys if any. Also ordered for model creation without foreign reference conflicts.
|
|
time_delta : int
|
|
Unix time for Jan 1, 2024. To be used to calculate time prefix strings.
|
|
|
|
Functions
|
|
---------
|
|
to_camel_case(s):
|
|
Converts '_'-separated str to CamelCase with capital first letter
|
|
|
|
standardize_time(time_str):
|
|
Converts str in unicode time format to %H:%M:%S format with normalized 24 hour time
|
|
|
|
is_NaN(v):
|
|
Checks if given variable is either a str expressing NaN or NaN as object
|
|
|
|
stdz(v):
|
|
Standardize date and time formats
|
|
|
|
gtfs_to_db(g):
|
|
Write an existing gtfs.GTFS object to the database using the GTFS compliant models
|
|
|
|
db_to_gtfs(q, folder_path):
|
|
Convert list of query sets to gtfs.GTFS object and write to specified folder if validation for GTFS compliance passes.
|
|
"""
|
|
import pt_map.gtfs
|
|
import pt_map.models
|
|
import pandas as pd
|
|
from pattern.text.en import singularize, pluralize
|
|
import math
|
|
import numbers
|
|
import email.utils
|
|
import time
|
|
import datetime
|
|
import django.db.models
|
|
import time
|
|
from pt_map.gtfs_schema import gtfs_schema
|
|
from .class_names import *
|
|
|
|
|
|
def toCamelCase(s: str):
|
|
"""
|
|
Convert '_'-separated str to CamelCase with the first letter capitalized.
|
|
|
|
Parameters
|
|
----------
|
|
s : str
|
|
'_'-separated string
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
CamelCased str, first letter capitalized
|
|
"""
|
|
return ''.join(word.capitalize() for word in s.split('_'))
|
|
|
|
def standardize_time(time_str: str):
|
|
"""
|
|
Convert time str to standardized %H:%M:%S format.
|
|
|
|
Parameters
|
|
----------
|
|
time_str: str
|
|
str encoding time
|
|
|
|
Returns
|
|
-------
|
|
str in format '%H:%M:%S'
|
|
"""
|
|
date_str = f"Jan 19, 1999 {time_str}"
|
|
ntuple=email.utils.parsedate(date_str)
|
|
timestamp=time.mktime(ntuple)
|
|
date=datetime.datetime.fromtimestamp(timestamp)
|
|
return date.strftime('%H:%M:%S')
|
|
|
|
|
|
def is_NaN(v):
|
|
"""
|
|
Returns
|
|
-------
|
|
True
|
|
If v is either a str representing NaN or NaN as an object
|
|
False
|
|
Otherwise
|
|
"""
|
|
return (isinstance(v, str) and v.lower() == "nan") or (isinstance(v, numbers.Number) and math.isnan(v))
|
|
|
|
def stdz(v, m: django.db.models.Model, f: str):
|
|
"""
|
|
If f is a time or date field, convert to a format our db can easily work with.
|
|
If f is a foreign key
|
|
|
|
Parameters
|
|
----------
|
|
v : object
|
|
object to be standardized
|
|
m : django.db.models.Model
|
|
model to be written to
|
|
f : str
|
|
field name in question
|
|
|
|
Returns
|
|
-------
|
|
Converted str
|
|
If m.f is a DateField or a TimeField
|
|
Unchanged str
|
|
Otherwise
|
|
"""
|
|
if m._meta.get_field(f).get_internal_type() == 'DateField':
|
|
return str(v)
|
|
if m._meta.get_field(f).get_internal_type() == 'TimeField':
|
|
return standardize_time(v)
|
|
if m._meta.get_field(f).get_internal_type() == 'ForeignKey':
|
|
pass
|
|
return v
|
|
|
|
def to_snake_case(name):
|
|
"""
|
|
Convert CamelCase to snake_case.
|
|
|
|
Parameters
|
|
----------
|
|
name : str
|
|
str in CamelCase
|
|
|
|
Returns
|
|
-------
|
|
Str in snake_case
|
|
"""
|
|
name = name[0].lower() + name[1:]
|
|
for c in name[1:]:
|
|
if c.isupper():
|
|
name.insert(i,'_')
|
|
else:
|
|
c.lower()
|
|
return name
|
|
|
|
def unqfk(ts, fk):
|
|
"""
|
|
Primary keys of imported data and in the database are likely to overlap. To avoid this, the current time in seconds since Jan 1, 2024 is added as a prefix.
|
|
Foreign key references must know of this new key so they are processed in the same way. To make this possible, we use the same time in seconds for all objects.
|
|
|
|
Parameters
|
|
----------
|
|
ts : str
|
|
time in seconds to be prepended
|
|
fk : primary or foreign key to be processed.
|
|
|
|
Returns
|
|
-------
|
|
Str with prefix
|
|
"""
|
|
if not isinstance(fk, str):
|
|
fk = str(int(fk))
|
|
return f"{ts}{fk}".strip()
|
|
|
|
def gtfs_to_db(g: pt_map.gtfs.GTFS):
|
|
"""
|
|
Given a gtfs.GTFS object, write GTFS-compliantly to db by creating the correct models
|
|
|
|
Parameters
|
|
----------
|
|
g : gtfs.GTFS
|
|
GTFS object to be saved to db
|
|
"""
|
|
feed_id = 0
|
|
#model = (pt_map.models.Trip, [(pt_map.models.Route, 'route_id'), (pt_map.models.Shape, 'shape_id'), ])
|
|
ts = str(int(time.time())-time_delta) # Prepend the current time in seconds since Jan 1, 2024 to ids to make them more or less unique
|
|
#if model == pt_map.models.Shape:
|
|
for model in foreign_keys:
|
|
m = model[0]
|
|
df = getattr(g, reversed_file_mapping[m.__name__]).data # Extract dataframe for each model from gtfs.GTFS object
|
|
if not df.empty: # Only process GTFS files actually present
|
|
if primary_keys[m] in df.keys() and not m == pt_map.models.FeedInfo:
|
|
#df[primary_keys[m]] = df[primary_keys[m]].astype(str)
|
|
df[primary_keys[m]] = [f"{feed_id.feed_id}_{pk}" for pk in df[primary_keys[m]]]
|
|
elif not m == pt_map.models.FeedInfo:
|
|
df[primary_keys[m]] = [f"{feed_id.feed_id}_{pk}" for pk in range(1, df.index.size + 1)]
|
|
v = gtfs_schema[reversed_file_mapping[m.__name__]] # field names
|
|
for _, row in df.iterrows(): # the rows of the dataframe are the individual entries in the GTFS file and should be the individual instances of the db model
|
|
for fk in model[1]: # Map foreign_keys to objects of the foreign model
|
|
if row.get(fk[1]):
|
|
row[fk[1]] = fk[0].objects.get(**{primary_keys[fk[0]]: f"{feed_id.feed_id}_{row[fk[1]]}"})#unqfk(ts, row[fk[1]])})
|
|
defaults = {field: stdz(row.get(field), m, field) for field in v if row.get(field) and not is_NaN(row[field])} # dict of fields and values of current model object to create
|
|
o = None
|
|
if not m == pt_map.models.FeedInfo:
|
|
defaults["feed_info_id"] = feed_id
|
|
if primary_keys[m] in df.keys():
|
|
#row[primary_keys[m]] = unqfk(ts, row[primary_keys[m]]) # primary_keys should be unique, use current time in seconds as a prefix
|
|
#defaults[primary_keys[m]] = row[primary_keys[m]]
|
|
try:
|
|
o = m.objects.get(**{primary_keys[m]: row[primary_keys[m]]}) # Make sure there is no object with identical primary_key, exception is expected to be risen
|
|
except m.DoesNotExist:
|
|
o = m.objects.update_or_create(
|
|
defaults = defaults,
|
|
**{primary_keys[m]: row[primary_keys[m]]}
|
|
)
|
|
else:
|
|
o = m.objects.create(**defaults)
|
|
if m == pt_map.models.FeedInfo:
|
|
feed_id = o
|
|
|
|
|
|
|
|
def db_to_gtfs(q: list[django.db.models.query.QuerySet], folder_path: str = ""):
|
|
"""
|
|
Convert given list of query sets to gtfs.GTFS object
|
|
|
|
Parameters
|
|
----------
|
|
q : list[django.db.models.query.QuerySet]
|
|
List of QuerySets containing the retrieved data to be Converted
|
|
folder_path : str
|
|
path to be set as the results folder_path instance variable
|
|
|
|
Returns
|
|
-------
|
|
gtfs.GTFS
|
|
object containing the queried data
|
|
"""
|
|
dfs = {reversed_file_mapping[m.model.__name__]: (pd.DataFrame(list(m.values())) if m else pd.DataFrame()) for m in q}
|
|
dfs = {key: dfs[key].astype({col: pd.Timestamp for col in dfs[key].columns if isinstance(getattr(getattr(pt_map.models, {v:k for k,v in reversed_file_mapping.items()}[key]), col), django.db.models.DateField)}) for key in dfs.keys()}
|
|
g = pt_map.gtfs.GTFS(folder_path, dfs)
|
|
g.validate()
|
|
return g
|