Source code for lasso.transit

"""Transit-related classes to parse, compare, and write standard and cube transit files.

  Typical usage example:

    tn = CubeTransit.create_from_cube(CUBE_DIR)
    transit_change_list = tn.evaluate_differences(base_transit_network)

    cube_transit_net = StandardTransit.read_gtfs(BASE_TRANSIT_DIR)
    cube_transit_net.write_as_cube_lin(os.path.join(WRITE_DIR, "outfile.lin"))
"""
import os
import copy
import csv
import datetime, time
from typing import Any, Dict, Optional, Union

from lark import Lark, Transformer, v_args
from pandas import DataFrame

import pandas as pd
import partridge as ptg
import numpy as np

from network_wrangler import TransitNetwork

from .logger import WranglerLogger
from .parameters import Parameters

[docs]class CubeTransit(object): """Class for storing information about transit defined in Cube line files. Has the capability to: - Parse cube line file properties and shapes into python dictionaries - Compare line files and represent changes as Project Card dictionaries .. highlight:: python Typical usage example: :: tn = CubeTransit.create_from_cube(CUBE_DIR) transit_change_list = tn.evaluate_differences(base_transit_network) Attributes: lines (list): list of strings representing unique line names in the cube network. line_properties (dict): dictionary of line properties keyed by line name. Property values are stored in a dictionary by property name. These properties are directly read from the cube line files and haven't been translated to standard transit values. shapes (dict): dictionary of shapes keyed by line name. Shapes stored as a pandas DataFrame of nodes with following columns: - 'node_id' (int): positive integer of node id - 'node' (int): node number, with negative indicating a non-stop - 'stop' (boolean): indicates if it is a stop - 'order' (int): order within this shape program_type (str): Either PT or TRNBLD parameters (Parameters): Parameters instance that will be applied to this instance which includes information about time periods and variables. source_list (list): List of cube line file sources that have been read and added. diff_dict (dict): """
[docs] def __init__(self, parameters: Union[Parameters, dict] = {}): """ Constructor for CubeTransit parameters: dictionary of parameter settings (see Parameters class) or an instance of Parameters """ WranglerLogger.debug("Creating a new Cube Transit instance") self.lines = [] self.line_properties = {} self.shapes = {} self.program_type = None if type(parameters) is dict: self.parameters = Parameters(**parameters) elif isinstance(parameters, Parameters): self.parameters = Parameters(**parameters.__dict__) else: msg = "Parameters should be a dict or instance of Parameters: found {} which is of type:{}".format( parameters, type(parameters) ) WranglerLogger.error(msg) raise ValueError(msg) self.source_list = [] self.diff_dict = Dict[str, Any]
[docs] def add_cube(self, transit_source: str): """Reads a .lin file and adds it to existing TransitNetwork instance. Args: transit_source: a string or the directory of the cube line file to be parsed """ """ Figure out what kind of transit source it is """ parser = Lark(TRANSIT_LINE_FILE_GRAMMAR, debug="debug", parser="lalr") if "NAME=" in transit_source: WranglerLogger.debug("reading transit source as string") self.source_list.append("input_str") parse_tree = parser.parse(transit_source) elif os.path.isfile(transit_source): print("reading: {}".format(transit_source)) with open(transit_source) as file: WranglerLogger.debug( "reading transit source: {}".format(transit_source) ) self.source_list.append(transit_source) parse_tree = parser.parse(file.read()) elif os.path.isdir(transit_source): import glob for lin_file in glob.glob(os.path.join(transit_source, "*.LIN")): self.add_cube(lin_file) return else: msg = "{} not a valid transit line string, directory, or file" WranglerLogger.error(msg) raise ValueError(msg) WranglerLogger.debug("finished parsing cube line file") # WranglerLogger.debug("--Parse Tree--\n {}".format(parse_tree.pretty())) transformed_tree_data = CubeTransformer().transform(parse_tree) # WranglerLogger.debug("--Transformed Parse Tree--\n {}".format(transformed_tree_data)) _line_data = transformed_tree_data["lines"] line_properties_dict = {k: v["line_properties"] for k, v in _line_data.items()} line_shapes_dict = {k: v["line_shape"] for k, v in _line_data.items()} new_lines = list(line_properties_dict.keys()) """ Before adding lines, check to see if any are overlapping with existing ones in the network """ overlapping_lines = set(new_lines) & set(self.lines) if overlapping_lines: msg = "Overlapping lines found when adding from {}. \nSource files:\n{}\n{} Overlapping Lines of {} total new lines.\n-->{}".format( transit_source, "\n - ".join(self.source_list), len(new_lines), len(overlapping_lines), overlapping_lines, ) print(msg) WranglerLogger.error(msg) raise ValueError(msg) self.program_type = transformed_tree_data.get("program_type", None) self.lines += new_lines self.line_properties.update(line_properties_dict) self.shapes.update(line_shapes_dict) WranglerLogger.debug("Added lines to CubeTransit: \n".format(new_lines))
[docs] @staticmethod def create_from_cube(transit_source: str, parameters: Optional[dict] = {}): """ Reads a cube .lin file and stores as TransitNetwork object. Args: transit_source: a string or the directory of the cube line file to be parsed Returns: A ::CubeTransit object created from the transit_source. """ tn = CubeTransit(parameters) tn.add_cube(transit_source) return tn
[docs] def evaluate_differences(self, base_transit): """ 1. Identifies what routes need to be updated, deleted, or added 2. For routes being added or updated, identify if the time periods have changed or if there are multiples, and make duplicate lines if so 3. Create project card dictionaries for each change. Args: base_transit (CubeTransit): an instance of this class for the base condition Returns: A list of dictionaries containing project card changes required to evaluate the differences between the base network and this transit network instance. """ transit_change_list = [] """ Identify what needs to be evaluated """ lines_to_update = [l for l in self.lines if l in base_transit.lines] lines_to_delete = [l for l in base_transit.lines if l not in self.lines] lines_to_add = [l for l in self.lines if l not in base_transit.lines] project_card_changes = [] """ Evaluate Property Updates """ for line in lines_to_update: WranglerLogger.debug( "Finding differences in time periods for: {}".format(line) ) """ Find any additional time periods that might need to add or delete. """ base_cube_time_period_numbers = ( CubeTransit.get_time_period_numbers_from_cube_properties( base_transit.line_properties[line] ) ) try: assert len(base_cube_time_period_numbers) == 1 except: msg = "Base network line {} should only have one time period per route, but {} found".format( line, base_cube_time_period_numbers ) WranglerLogger.error(msg) raise ValueError(msg) base_cube_time_period_number = base_cube_time_period_numbers[0] build_cube_time_period_numbers = ( CubeTransit.get_time_period_numbers_from_cube_properties( self.line_properties[line] ) ) time_periods_to_add = [ tp for tp in build_cube_time_period_numbers if tp not in base_cube_time_period_numbers ] for tp in time_periods_to_add: lines_to_add.append(self.add_additional_time_periods(tp, line)) time_periods_to_delete = [ tp for tp in base_cube_time_period_numbers if tp not in build_cube_time_period_numbers ] for tp in time_periods_to_delete: lines_to_delete.append(line) WranglerLogger.debug("Evaluating differences in: {}".format(line)) updated_properties = self.evaluate_route_property_differences( self.line_properties[line], base_transit.line_properties[line], base_cube_time_period_number, ) updated_shapes = CubeTransit.evaluate_route_shape_changes( self.shapes[line].node, base_transit.shapes[line].node ) if updated_properties: update_prop_card_dict = self.create_update_route_card_dict( line, updated_properties ) project_card_changes.append(update_prop_card_dict) if updated_shapes: update_shape_card_dict = self.create_update_route_card_dict( line, updated_shapes ) project_card_changes.append(update_shape_card_dict) """ Evaluate Deletions """ for line in lines_to_delete: delete_card_dict = self.create_delete_route_card_dict( line, base_transit.line_properties[line] ) project_card_changes.append(delete_card_dict) """ Evaluate Additions First assess if need to add multiple routes if there are multiple time periods """ for line in lines_to_add: time_period_numbers = ( CubeTransit.get_time_period_numbers_from_cube_properties( self.line_properties[line] ) ) if len(time_period_numbers) > 1: for tp in time_period_numbers[1:]: lines_to_add.append(self.add_additional_time_periods(tp, line)) for line in lines_to_add: add_card_dict = self.create_add_route_card_dict(line) project_card_changes.append(add_card_dict) return project_card_changes
[docs] def add_additional_time_periods( self, new_time_period_number: int, orig_line_name: str ): """ Copies a route to another cube time period with appropriate values for time-period-specific properties. New properties are stored under the new name in: - ::self.shapes - ::self.line_properties Args: new_time_period_number (int): cube time period number orig_line_name(str): name of the originating line, from which the new line will copy its properties. Returns: Line name with new time period. """ WranglerLogger.debug( "adding time periods {} to line {}".format( new_time_period_number, orig_line_name ) ) ( route_id, _init_time_period, agency_id, direction_id, ) = CubeTransit.unpack_route_name(orig_line_name) new_time_period_name = self.parameters.cube_time_periods[new_time_period_number] new_tp_line_name = CubeTransit.build_route_name( route_id=route_id, time_period=new_time_period_name, agency_id=agency_id, direction_id=direction_id, ) try: assert new_tp_line_name not in self.lines except: msg = "Trying to add a new time period {} to line {}, but constructed name {} is already in line list.".format( new_time_period_number, orig_line_name, new_tp_line_name ) WrangerLogger.error(msg) raise ValueError(msg) # copy to a new line and add it to list of lines to add self.line_properties[new_tp_line_name] = copy.deepcopy( self.line_properties[orig_line_name] ) self.shapes[new_tp_line_name] = copy.deepcopy(self.shapes[orig_line_name]) self.line_properties[new_tp_line_name]["NAME"] = new_tp_line_name """ Remove entries that aren't for this time period from the new line's properties list. """ this_time_period_properties_list = [ p + "[" + str(new_time_period_number) + "]" ##todo parameterize all time period specific variables for p in ["HEADWAY", "FREQ"] ] not_this_tp_properties_list = list( set(self.parameters.time_period_properties_list) - set(this_time_period_properties_list) ) for k in not_this_tp_properties_list: self.line_properties[new_tp_line_name].pop(k, None) """ Remove entries for time period from the original line's properties list. """ for k in this_time_period_properties_list: self.line_properties[orig_line_name].pop(k, None) """ Add new line to list of lines to add. """ WranglerLogger.debug( "Adding new time period {} for line {} as {}.".format( new_time_period_number, orig_line_name, new_tp_line_name ) ) return new_tp_line_name
[docs] def create_update_route_card_dict(self, line: str, updated_properties_dict: dict): """ Creates a project card change formatted dictionary for updating the line. Args: line: name of line that is being updated updated_properties_dict: dictionary of attributes to update as 'property': <property name>, 'set': <new property value> Returns: A project card change-formatted dictionary for the attribute update. """ base_start_time_str, base_end_time_str = self.calculate_start_end_times( self.line_properties[line] ) update_card_dict = { "category": "Transit Service Property Change", "facility": { "route_id": line.split("_")[1], "direction_id": int(line.split("_")[-2].strip("d\"")), "shape_id": line.split("_")[-1].strip("s\""), "start_time": base_start_time_str, "end_time": base_end_time_str, }, "properties": updated_properties_dict, } WranglerLogger.debug( "Updating {} route to changes:\n{}".format(line, str(update_card_dict)) ) return update_card_dict
[docs] def create_delete_route_card_dict( self, line: str, base_transit_line_properties_dict: dict ): """ Creates a project card change formatted dictionary for deleting a line. Args: line: name of line that is being deleted base_transit_line_properties_dict: dictionary of cube-style attribute values in order to find time periods and start and end times. Returns: A project card change-formatted dictionary for the route deletion. """ base_start_time_str, base_end_time_str = self.calculate_start_end_times( base_transit_line_properties_dict ) delete_card_dict = { "category": "Delete Transit Service", "facility": { "route_id": line.split("_")[1], "direction_id": int(line.strip('"')[-1]), "start_time": base_start_time_str, "end_time": base_end_time_str, }, } WranglerLogger.debug( "Deleting {} route to changes:\n{}".format(line, delete_card_dict) ) return delete_card_dict
[docs] def create_add_route_card_dict(self, line: str): """ Creates a project card change formatted dictionary for adding a route based on the information in self.route_properties for the line. Args: line: name of line that is being updated Returns: A project card change-formatted dictionary for the route addition. """ start_time_str, end_time_str = self.calculate_start_end_times( self.line_properties[line] ) standard_properties = self.cube_properties_to_standard_properties( self.line_properties[line] ) routing_properties = { "property": "routing", "set": self.shapes[line]["node"].tolist(), } add_card_dict = { "category": "New Transit Service", "facility": { "route_id": line.split("_")[1], "direction_id": int(line.strip('_')[-2]), "start_time": start_time_str, "end_time": end_time_str, "agency_id": line.strip('_')[0], }, "properties": standard_properties + [routing_properties], } WranglerLogger.debug( "Adding {} route to changes:\n{}".format(line, add_card_dict) ) return add_card_dict
[docs] @staticmethod def get_time_period_numbers_from_cube_properties(properties_list: list): """ Finds properties that are associated with time periods and the returns the numbers in them. Args: properties_list (list): list of all properties. Returns: list of strings of the time period numbers found """ time_periods_list = [] for p in properties_list: if ("[" not in p) or ("]" not in p): continue tp_num = p.split("[")[1][0] if tp_num and tp_num not in time_periods_list: time_periods_list.append(tp_num) return time_periods_list
[docs] @staticmethod def build_route_name( route_id: str = "", time_period: str = "", agency_id: str = 0, direction_id: str = 1, ): """ Create a route name by contatenating route, time period, agency, and direction Args: route_id: i.e. 452-111 time_period: i.e. pk direction_id: i.e. 1 agency_id: i.e. 0 Returns: constructed line_name i.e. "0_452-111_452_pk1" """ return ( str(agency_id) + "_" + str(route_id) + "_" + str(route_id.split("-")[0]) + "_" + str(time_period) + str(direction_id) )
[docs] @staticmethod def unpack_route_name(line_name: str): """ Unpacks route name into direction, route, agency, and time period info Args: line_name (str): i.e. "0_452-111_452_pk1" Returns: route_id (str): 452-111 time_period (str): i.e. pk direction_id (str) : i.e. 1 agency_id (str) : i.e. 0 """ line_name = line_name.strip('"') agency_id, route_id, _rtid, _tp_direction = line_name.split("_") time_period = _tp_direction[0:-1] direction_id = _tp_direction[-1] return route_id, time_period, agency_id, direction_id
[docs] def calculate_start_end_times(self, line_properties_dict: dict): """ Calculate the start and end times of the property change WARNING: Doesn't take care of discongruous time periods!!!! Args: line_properties_dict: dictionary of cube-flavor properties for a transit line """ start_time_m = 24 * 60 end_time_m = 0 * 60 WranglerLogger.debug( "parameters.time_period_properties_list: {}".format( self.parameters.time_period_properties_list ) ) current_cube_time_period_numbers = ( CubeTransit.get_time_period_numbers_from_cube_properties( line_properties_dict ) ) WranglerLogger.debug( "current_cube_time_period_numbers:{}".format( current_cube_time_period_numbers ) ) for tp in current_cube_time_period_numbers: time_period_name = self.parameters.cube_time_periods[tp] WranglerLogger.debug("time_period_name:{}".format(time_period_name)) _start_time, _end_time = self.parameters.time_period_to_time[ time_period_name ] # change from "HH:MM" to integer # of seconds _start_time_m = (int(_start_time.split(":")[0]) * 60) + int( _start_time.split(":")[1] ) _end_time_m = (int(_end_time.split(":")[0]) * 60) + int( _end_time.split(":")[1] ) # find bounding start and end times if _start_time_m < start_time_m: start_time_m = _start_time_m if _end_time_m > end_time_m: end_time_m = _end_time_m if start_time_m > end_time_m: msg = "Start time ({}) is after end time ({})".format( start_time_m, end_time_m ) #WranglerLogger.error(msg) #raise ValueError(msg) start_time_str = "{:02d}:{:02d}".format(*divmod(start_time_m, 60)) end_time_str = "{:02d}:{:02d}".format(*divmod(end_time_m, 60)) return start_time_str, end_time_str
[docs] @staticmethod def cube_properties_to_standard_properties(cube_properties_dict: dict): """ Converts cube style properties to standard properties. This is most pertinent to time-period specific variables like headway, and varibles that have stnadard units like headway, which is minutes in cube and seconds in standard format. Args: cube_properties_dict: <cube style property name> : <property value> Returns: A list of dictionaries with values for `"property": <standard style property name>, "set" : <property value with correct units>` """ standard_properties_list = [] for k, v in cube_properties_dict.items(): change_item = {} if any(i in k for i in ["HEADWAY", "FREQ"]): change_item["property"] = "headway_secs" change_item["set"] = v * 60 else: change_item["property"] = k change_item["set"] = v standard_properties_list.append(change_item) return standard_properties_list
[docs] def evaluate_route_property_differences( self, properties_build: dict, properties_base: dict, time_period_number: str, absolute: bool = True, validate_base: bool = False, ): """ Checks if any values have been updated or added for a specific route and creates project card entries for each. Args: properties_build: ::<property_name>: <property_value> properties_base: ::<property_name>: <property_value> time_period_number: time period to evaluate absolute: if True, will use `set` command rather than a change. If false, will automatically check the base value. Note that this only applies to the numeric values of frequency/headway validate_base: if True, will add the `existing` line in the project card Returns: transit_change_list (list): a list of dictionary values suitable for writing to a project card `{ 'property': <property_name>, 'set': <set value>, 'change': <change from existing value>, 'existing': <existing value to check>, }` """ # Remove time period specific values for things that aren't part of the time period in question this_time_period_properties_list = [ p + "[" + str(time_period_number) + "]" ##todo parameterize all time period specific variables for p in ["HEADWAY", "FREQ"] ] not_this_tp_properties_list = list( set(self.parameters.time_period_properties_list) - set(this_time_period_properties_list) ) for k in not_this_tp_properties_list: properties_build.pop(k, None) properties_base.pop(k, None) difference_dict = dict( set(properties_build.items()) ^ set(properties_base.items()) ) # Iterate through properties list to build difference project card list properties_list = [] for k, v in difference_dict.items(): change_item = {} if any(i in k for i in ["HEADWAY", "FREQ"]): change_item["property"] = "headway_secs" if absolute: change_item["set"] = ( v * 60 ) # project cards are in secs, cube is in minutes else: change_item["change"] = ( properties_build[k] - properties_base[k] ) * 60 if validate_base or not absolute: change_item["existing"] = properties_base[k] * 60 else: change_item["property"] = k change_item["set"] = v if validate_base: change_item["existing"] = properties_base[k] properties_list.append(change_item) WranglerLogger.debug( "Evaluated Route Changes: \n {})".format( "\n".join(map(str, properties_list)) ) ) return properties_list
[docs] @staticmethod def evaluate_route_shape_changes( shape_build: DataFrame, shape_base: DataFrame ): """ Compares two route shapes and constructs returns list of changes suitable for a project card. Args: shape_build: DataFrame of the build-version of the route shape. shape_base: dDataFrame of the base-version of the route shape. Returns: List of shape changes formatted as a project card-change dictionary. """ if shape_build.equals(shape_base): return None shape_change_list = [] base_node_list = shape_base.tolist() build_node_list = shape_build.tolist() sort_len = max(len(base_node_list), len(build_node_list)) start_pos = None end_pos = None for i in range(sort_len): if (i == len(base_node_list)) | (i == len(build_node_list)): start_pos = i - 1 break if base_node_list[i] != build_node_list[i]: start_pos = i break else: continue j = -1 for i in range(sort_len): if (i == len(base_node_list)) | (i == len(build_node_list)): end_pos = j + 1 break if base_node_list[j] != build_node_list[j]: end_pos = j break else: j -= 1 if start_pos or end_pos: existing = base_node_list[ (start_pos - 2 if start_pos > 1 else None) : ( end_pos + 2 if end_pos < -2 else None ) ] set = build_node_list[ (start_pos - 2 if start_pos > 1 else None) : ( end_pos + 2 if end_pos < -2 else None ) ] shape_change_list.append( {"property": "routing", "existing": existing, "set": set} ) return shape_change_list
[docs]class StandardTransit(object): """Holds a standard transit feed as a Partridge object and contains methods to manipulate and translate the GTFS data to MetCouncil's Cube Line files. .. highlight:: python Typical usage example: :: cube_transit_net = StandardTransit.read_gtfs(BASE_TRANSIT_DIR) cube_transit_net.write_as_cube_lin(os.path.join(WRITE_DIR, "outfile.lin")) Attributes: feed: Partridge Feed object containing read-only access to GTFS feed parameters (Parameters): Parameters instance containing information about time periods and variables. """
[docs] def __init__(self, ptg_feed, parameters: Union[Parameters, dict] = {}): """ Args: ptg_feed: partridge feed object parameters: dictionary of parameter settings (see Parameters class) or an instance of Parameters """ self.feed = ptg_feed if type(parameters) is dict: self.parameters = Parameters(**parameters) elif isinstance(parameters, Parameters): self.parameters = Parameters(**parameters.__dict__) else: msg = "Parameters should be a dict or instance of Parameters: found {} which is of type:{}".format( parameters, type(parameters) ) WranglerLogger.error(msg) raise ValueError(msg)
[docs] @staticmethod def fromTransitNetwork( transit_network_object: TransitNetwork, parameters: Union[Parameters, dict] = {} ): """ RoadwayNetwork to ModelRoadwayNetwork Args: transit_network_object: Reference to an instance of TransitNetwork. parameters: dictionary of parameter settings (see Parameters class) or an instance of Parameters. If not provided will use default parameters. Returns: StandardTransit """ return StandardTransit(transit_network_object.feed, parameters=parameters)
[docs] @staticmethod def read_gtfs(gtfs_feed_dir: str, parameters: Union[Parameters, dict] = {}): """ Reads GTFS files from a directory and returns a StandardTransit instance. Args: gtfs_feed_dir: location of the GTFS files parameters: dictionary of parameter settings (see Parameters class) or an instance of Parameters. If not provided will use default parameters. Returns: StandardTransit instance """ return StandardTransit(ptg.load_feed(gtfs_feed_dir), parameters=parameters)
[docs] def write_as_cube_lin(self, outpath: str = None): """ Writes the gtfs feed as a cube line file after converting gtfs properties to MetCouncil cube properties. #MC Args: outpath: File location for output cube line file. """ if not outpath: outpath = os.path.join(self.parameters.scratch_location, "outtransit.lin") trip_cube_df = self.route_properties_gtfs_to_cube(self) trip_cube_df["LIN"] = trip_cube_df.apply(self.cube_format, axis=1) l = trip_cube_df["LIN"].tolist() with open(outpath, "w") as f: f.write("\n".join(l))
[docs] @staticmethod def route_properties_gtfs_to_cube(self): """ Prepare gtfs for cube lin file. #MC Does the following operations: 1. Combines route, frequency, trip, and shape information 2. Converts time of day to time periods 3. Calculates cube route name from gtfs route name and properties 4. Assigns a cube-appropriate mode number 5. Assigns a cube-appropriate operator number Returns: trip_df (DataFrame): DataFrame of trips with cube-appropriate values for: - NAME - ONEWAY - OPERATOR - MODE - HEADWAY """ WranglerLogger.info( "Converting GTFS Standard Properties to MetCouncil's Cube Standard" ) metro_operator_dict = { "0": 3, "1": 3, "2": 3, "3": 4, "4": 2, "5": 5, "6": 8, "7": 1, "8": 1, "9": 10, "10": 3, "11": 9, "12": 3, "13": 4, "14": 4, "15": 3, } shape_df = self.feed.shapes.copy() trip_df = self.feed.trips.copy() """ Add information from: routes, frequencies, and routetype to trips_df """ trip_df = pd.merge(trip_df, self.feed.routes, how="left", on="route_id") trip_df = pd.merge(trip_df, self.feed.frequencies, how="left", on="trip_id") trip_df["tod_name"] = trip_df.start_time.apply(self.time_to_cube_time_period) inv_cube_time_periods_map = { v: k for k, v in self.parameters.cube_time_periods.items() } trip_df["tod_num"] = trip_df.tod_name.map(inv_cube_time_periods_map) trip_df["tod_name"] = trip_df.tod_name.map( self.parameters.cube_time_periods_name ) trip_df["NAME"] = trip_df.apply( lambda x: x.agency_id + "_" + x.route_id + "_" + x.route_short_name + "_" + x.tod_name + str(x.direction_id), axis=1, ) trip_df["LONGNAME"] = trip_df["route_long_name"] trip_df["HEADWAY"] = (trip_df["headway_secs"] / 60).astype(int) trip_df["MODE"] = trip_df.apply(self.calculate_cube_mode, axis=1) trip_df["ONEWAY"] = "T" trip_df["OPERATOR"] = trip_df["agency_id"].map(metro_operator_dict) return trip_df
[docs] def calculate_cube_mode(self, row): """ Assigns a cube mode number by following logic. #MC For rail, uses GTFS route_type variable: https://developers.google.com/transit/gtfs/reference :: # route_type : cube_mode route_type_to_cube_mode = {0: 8, # Tram, Streetcar, Light rail 3: 0, # Bus; further disaggregated for cube 2: 9} # Rail For buses, uses route id numbers and route name to find express and suburban buses as follows: :: if not cube_mode: if 'express' in row['LONGNAME'].lower(): cube_mode = 7 # Express elif int(row['route_id'].split("-")[0]) > 99: cube_mode = 6 # Suburban Local else: cube_mode = 5 # Urban Local Args: row: A DataFrame row with route_type, route_long_name, and route_id Returns: cube mode number """ # route_type : cube_mode route_type_to_cube_mode = { 0: 8, # Tram, Streetcar, Light rail 3: 0, # Bus; further disaggregated for cube 2: 9, } # Rail cube_mode = route_type_to_cube_mode[row["route_type"]] if not cube_mode: if "express" in row["route_long_name"].lower(): cube_mode = 7 # Express elif int(row["route_id"].split("-")[0]) > 99: cube_mode = 6 # Suburban Local else: cube_mode = 5 # Urban Local return cube_mode
[docs] def time_to_cube_time_period( self, start_time_secs: int, as_str: bool = True, verbose: bool = False ): """ Converts seconds from midnight to the cube time period. Args: start_time_secs: start time for transit trip in seconds from midnight as_str: if True, returns the time period as a string, otherwise returns a numeric time period Returns: this_tp_num: if as_str is False, returns the numeric time period this_tp: if as_str is True, returns the Cube time period name abbreviation """ from .util import hhmmss_to_datetime, secs_to_datetime # set initial time as the time that spans midnight start_time_dt = secs_to_datetime(start_time_secs) # set initial time as the time that spans midnight this_tp = "NA" for tp_name, _times in self.parameters.time_period_to_time.items(): _start_time, _end_time = _times _dt_start_time = hhmmss_to_datetime(_start_time) _dt_end_time = hhmmss_to_datetime(_end_time) if _dt_start_time > _dt_end_time: this_tp = tp_name break for tp_name, _times in self.parameters.time_period_to_time.items(): _start_time, _end_time = _times _dt_start_time = hhmmss_to_datetime(_start_time) if start_time_dt >= _dt_start_time: this_time = _dt_start_time this_tp = tp_name if verbose: WranglerLogger.debug( "Finding Cube Time Period from Start Time: \ \n - start_time_sec: {} \ \n - start_time_dt: {} \ \n - this_tp: {}".format( start_time_secs, start_time_dt, this_tp ) ) if as_str: return this_tp name_to_num = {v: k for k, v in self.parameters.cube_time_periods.items()} this_tp_num = name_to_num.get(this_tp) if not this_tp_num: msg = ( "Cannot find time period number in {} for time period name: {}".format( name_to_num, this_tp ) ) WranglerLogger.error(msg) raise ValueError(msg) return this_tp_num
[docs] def shape_gtfs_to_dict_list(self, trip_id: str, shape_id: str, add_nntime: bool): """ This is a copy of StandardTransit.shape_gtfs_to_cube() because we need the same logic of stepping through the routed nodes and corresponding them with shape nodes. TODO: eliminate this necessity by tagging the stop nodes in the shapes to begin with when the transit routing on the roadway network is first performed. As such, I'm copying the code from StandardTransit.shape_gtfs_to_cube() with minimal modifications. Args: trip_id of the trip in question shape_id of the trip in question Returns: list of dict records with columns: trip_id shape_id shape_pt_sequence shape_mode_node_id is_stop access stop_sequence """ # get the stop times for this route # https://developers.google.com/transit/gtfs/reference#stop_timestxt trip_stop_times_df = self.feed.stop_times.loc[ self.feed.stop_times.trip_id == trip_id, ['trip_id','arrival_time','departure_time','stop_id','stop_sequence','pickup_type','drop_off_type']].copy() trip_stop_times_df.sort_values(by='stop_sequence', inplace=True) trip_stop_times_df.reset_index(drop=True, inplace=True) # print("trip_stop_times_df:\n{}".format(trip_stop_times_df)) # print("trip_stop_times_df.dtypes:\n{}".format(trip_stop_times_df.dtypes)) # trip_stop_times_df: # trip_id arrival_time departure_time stop_id stop_sequence pickup_type drop_off_type # 0 10007 0 0 7781 1 0 NaN # 1 10007 120 120 7845 2 0 NaN # 2 10007 300 300 7790 3 0 NaN # 3 10007 360 360 7854 4 0 NaN # 4 10007 390 390 7951 5 0 NaN # 5 10007 720 720 7950 6 0 NaN # 6 10007 810 810 7850 7 0 NaN # 7 10007 855 855 7945 8 0 NaN # 8 10007 900 900 7803 9 0 NaN # 9 10007 930 930 7941 10 0 NaN # trip_stop_times_df.dtypes: # trip_id object # arrival_time object # departure_time object # stop_id object # stop_sequence int64 # pickup_type object # drop_off_type object # get the shapes for this route # https://developers.google.com/transit/gtfs/reference#shapestxt trip_node_df = self.feed.shapes.loc[self.feed.shapes.shape_id == shape_id].copy() trip_node_df.sort_values(by="shape_pt_sequence", inplace = True) trip_node_df.reset_index(drop=True, inplace=True) # print("trip_node_df.head(20):\n{}".format(trip_node_df.head(20))) # print("trip_node_df.dtypes:\n{}".format(trip_node_df.dtypes)) # trip_node_df: # shape_id shape_pt_sequence shape_osm_node_id shape_shst_node_id shape_model_node_id shape_pt_lat shape_pt_lon # 0 696 1 1429334016 35cb440c505534e8aedbd3a286b70eab 2139625 NaN NaN # 1 696 2 444242480 39e263722d5849b3c732b48734671400 2164862 NaN NaN # 2 696 3 5686705779 4c41c608c35f457079fd673bce5556e5 2169898 NaN NaN # 3 696 4 3695761874 d0f5b2173189bbb1b5dbaa78a004e8c4 2021876 NaN NaN # 4 696 5 1433982749 60726971f0fb359a57e9d8df30bf384b 2002078 NaN NaN # 5 696 6 1433982740 634c301424647d5883191edf522180e3 2156807 NaN NaN # 6 696 7 4915736746 f03c3d7f1aa0358a91c165f53dac1e20 2145185 NaN NaN # 7 696 8 65604864 68b8df24f1572d267ecf834107741393 2120788 NaN NaN # 8 696 9 65604866 e412a013ad45af6649fa1b396f74c127 2066513 NaN NaN # 9 696 10 956664242 657e1602aa8585383ed058f28f7811ed 2006476 NaN NaN # 10 696 11 291642561 726b03cced023a6459d7333885927208 2133933 NaN NaN # 11 696 12 291642583 709a0c00811f213f7476349a2c002003 2159991 NaN NaN # 12 696 13 291642745 c5aaab62e0c78c34d93ee57795f06953 2165343 NaN NaN # 13 696 14 5718664845 c7f1f4aa88887071a0d28154fc84604b 2007965 NaN NaN # 14 696 15 291642692 0ef007a79b391e8ba98daf4985f26f9b 2160569 NaN NaN # 15 696 16 5718664843 2ce63288e77747abc3a4124f0e28efcf 2047955 NaN NaN # 16 696 17 3485537279 ec0c8eb524f41072a9fd87ecfd45e15f 2169094 NaN NaN # 17 696 18 5718664419 57ca23828db4adea39355a92fb0fc3ff 2082102 NaN NaN # 18 696 19 5718664417 4aba41268ada1058ee58e99a84e28d37 2019974 NaN NaN # 19 696 20 65545418 d4f815a2f6da6c95d2f032a3cd61020c 2025374 NaN NaN # trip_node_df.dtypes: # shape_id object # shape_pt_sequence int64 # shape_osm_node_id object # shape_shst_node_id object # shape_model_node_id object # shape_pt_lat object # shape_pt_lon object # we only need: shape_id, shape_pt_sequence, shape_model_node_id trip_node_df = trip_node_df[['shape_id','shape_pt_sequence','shape_model_node_id']] if 'trip_id' in self.feed.stops.columns: trip_stop_times_df = pd.merge( trip_stop_times_df, self.feed.stops, how="left", on=['trip_id', "stop_id"] ) else: trip_stop_times_df = pd.merge( trip_stop_times_df, self.feed.stops, how="left", on="stop_id" ) # print("trip_stop_times_df:\n{}".format(trip_stop_times_df)) # print("trip_stop_times_df.dtypes:\n{}".format(trip_stop_times_df.dtypes)) # trip_stop_times_df.dtypes: # trip_id object # arrival_time object # departure_time object # stop_id object # stop_sequence int64 # pickup_type object # drop_off_type object # stop_name object # stop_lat float64 # stop_lon float64 # zone_id object # agency_raw_name object # stop_code object # location_type float64 # parent_station object # stop_desc object # stop_url object # stop_timezone object # wheelchair_boarding float64 # platform_code object # position object # direction object # * used by routes object # osm_node_id object # shst_node_id object # model_node_id object trip_stop_times_df["model_node_id"] = pd.to_numeric(trip_stop_times_df["model_node_id"]).astype(int) trip_node_df["shape_model_node_id"] = pd.to_numeric(trip_node_df["shape_model_node_id"]).astype(int) stop_node_id_list = trip_stop_times_df["model_node_id"].tolist() trip_node_list = trip_node_df["shape_model_node_id"].tolist() # sometimes GTFS `stop_sequence` does not start with 1, e.g. SFMTA light rails trip_stop_times_df["internal_stop_sequence"] = range(1, 1+len(trip_stop_times_df)) # sometimes GTFS `departure_time` is not recorded for every stop, e.g. VTA light rails trip_stop_times_df["departure_time"].fillna(method = "ffill", inplace = True) trip_stop_times_df["departure_time"].fillna(0, inplace = True) trip_stop_times_df["NNTIME"] = trip_stop_times_df["departure_time"].diff() / 60 # CUBE NNTIME takes 2 decimals trip_stop_times_df["NNTIME"] = trip_stop_times_df["NNTIME"].round(2) trip_stop_times_df["NNTIME"].fillna(-1, inplace = True) # ACCESS def _access_type(x): if (x.pickup_type in [1, "1"]): return 2 elif (x.drop_off_type in [1, "1"]): return 1 else: return 0 trip_stop_times_df["ACCESS"] = trip_stop_times_df.apply(lambda x: _access_type(x), axis = 1) # this is the same as shape_gtfs_to_cube but we'll build up a list of dicts with shape/stop information shape_stop_dict_list = [] # node list node_list_str = "" stop_seq = 0 for nodeIdx in range(len(trip_node_list)): if trip_node_list[nodeIdx] in stop_node_id_list: # in case a route stops at a stop more than once, e.g. circular route stop_seq += 1 if (add_nntime) & (stop_seq > 1): if len(trip_stop_times_df[ trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]]) > 1: nntime_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]) & (trip_stop_times_df["internal_stop_sequence"] == stop_seq), "NNTIME"].iloc[0] else: nntime_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"NNTIME"].iloc[0] if nntime_v > 0: nntime = ", NNTIME=%s" % (nntime_v) else: nntime = "" else: nntime = "" access_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"ACCESS"].iloc[0] if access_v > 0: access = ", ACCESS=%s" % (access_v) else: access = "" node_list_str += "\n %s%s%s" % (trip_node_list[nodeIdx], nntime, access) # add this stop to shape_stop_df node_dict = trip_node_df.iloc[nodeIdx].to_dict() node_dict['trip_id' ] = trip_id node_dict['is_stop' ] = True node_dict['access' ] = access_v node_dict['stop_sequence'] = stop_seq shape_stop_dict_list.append(node_dict) if nodeIdx < (len(trip_node_list) - 1): node_list_str += "," if ((add_nntime) & (stop_seq > 1) & (len(nntime) > 0)) | (len(access) > 0): node_list_str += " N=" else: node_list_str += "\n -%s" % (trip_node_list[nodeIdx]) # add this stop to shape_stop_df node_dict = trip_node_df.iloc[nodeIdx].to_dict() node_dict['trip_id'] = trip_id node_dict['is_stop'] = False shape_stop_dict_list.append(node_dict) if nodeIdx < (len(trip_node_list) - 1): node_list_str += "," # remove NNTIME = 0 node_list_str = node_list_str.replace(" NNTIME=0.0, N=", "") node_list_str = node_list_str.replace(" NNTIME=0.0,", "") # print("node_list_str: {}".format(node_list_str)) return shape_stop_dict_list
[docs] def shape_gtfs_to_cube(self, row, add_nntime = False): """ Creates a list of nodes that for the route in appropriate cube format. Args: row: DataFrame row with both shape_id and trip_id Returns: a string representation of the node list for a route in cube format. """ trip_stop_times_df = self.feed.stop_times.copy() trip_stop_times_df = trip_stop_times_df[ trip_stop_times_df.trip_id == row.trip_id ] trip_node_df = self.feed.shapes.copy() trip_node_df = trip_node_df[trip_node_df.shape_id == row.shape_id] trip_node_df.sort_values(by = ["shape_pt_sequence"], inplace = True) if 'trip_id' in self.feed.stops.columns: trip_stop_times_df = pd.merge( trip_stop_times_df, self.feed.stops, how="left", on=['trip_id', "stop_id"] ) else: trip_stop_times_df = pd.merge( trip_stop_times_df, self.feed.stops, how="left", on="stop_id" ) trip_stop_times_df["model_node_id"] = pd.to_numeric(trip_stop_times_df["model_node_id"]).astype(int) trip_node_df["shape_model_node_id"] = pd.to_numeric(trip_node_df["shape_model_node_id"]).astype(int) stop_node_id_list = trip_stop_times_df["model_node_id"].tolist() trip_node_list = trip_node_df["shape_model_node_id"].tolist() trip_stop_times_df.sort_values(by = ["stop_sequence"], inplace = True) # sometimes GTFS `stop_sequence` does not start with 1, e.g. SFMTA light rails trip_stop_times_df["internal_stop_sequence"] = range(1, 1+len(trip_stop_times_df)) # sometimes GTFS `departure_time` is not recorded for every stop, e.g. VTA light rails trip_stop_times_df["departure_time"].fillna(method = "ffill", inplace = True) trip_stop_times_df["departure_time"].fillna(0, inplace = True) trip_stop_times_df["NNTIME"] = trip_stop_times_df["departure_time"].diff() / 60 # CUBE NNTIME takes 2 decimals trip_stop_times_df["NNTIME"] = trip_stop_times_df["NNTIME"].round(2) trip_stop_times_df["NNTIME"].fillna(-1, inplace = True) # ACCESS def _access_type(x): if (x.pickup_type in [1, "1"]): return 2 elif (x.drop_off_type in [1, "1"]): return 1 else: return 0 trip_stop_times_df["ACCESS"] = trip_stop_times_df.apply(lambda x: _access_type(x), axis = 1) # node list node_list_str = "" stop_seq = 0 for nodeIdx in range(len(trip_node_list)): if trip_node_list[nodeIdx] in stop_node_id_list: # in case a route stops at a stop more than once, e.g. circular route stop_seq += 1 if (add_nntime) & (stop_seq > 1): if len(trip_stop_times_df[ trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]]) > 1: nntime_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]) & (trip_stop_times_df["internal_stop_sequence"] == stop_seq), "NNTIME"].iloc[0] else: nntime_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"NNTIME"].iloc[0] if nntime_v > 0: nntime = ", NNTIME=%s" % (nntime_v) else: nntime = "" else: nntime = "" access_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"ACCESS"].iloc[0] if access_v > 0: access = ", ACCESS=%s" % (access_v) else: access = "" node_list_str += "\n %s%s%s" % (trip_node_list[nodeIdx], nntime, access) if nodeIdx < (len(trip_node_list) - 1): node_list_str += "," if ((add_nntime) & (stop_seq > 1) & (len(nntime) > 0)) | (len(access) > 0): node_list_str += " N=" else: node_list_str += "\n -%s" % (trip_node_list[nodeIdx]) if nodeIdx < (len(trip_node_list) - 1): node_list_str += "," # remove NNTIME = 0 node_list_str = node_list_str.replace(" NNTIME=0.0, N=", "") node_list_str = node_list_str.replace(" NNTIME=0.0,", "") return node_list_str
[docs] def cube_format(self, row): """ Creates a string represnting the route in cube line file notation. #MC Args: row: row of a DataFrame representing a cube-formatted trip, with the Attributes trip_id, shape_id, NAME, LONGNAME, tod, HEADWAY, MODE, ONEWAY, OPERATOR Returns: string representation of route in cube line file notation """ s = '\nLINE NAME="{}",'.format(row.NAME) s += '\n LONGNAME="{}",'.format(row.LONGNAME) s += "\n HEADWAY[{}]={},".format(row.tod_num, row.HEADWAY) s += "\n MODE={},".format(row.MODE) s += "\n ONEWAY={},".format(row.ONEWAY) s += "\n OPERATOR={},".format(row.OPERATOR) s += "\n NODES={}".format(self.shape_gtfs_to_cube(row)) return s
[docs] def shape_gtfs_to_emme(self, trip_row): """ Creates transit segment for the trips in appropriate emme format. Args: row: DataFrame row with both shape_id and trip_id Returns: a dataframe representation of the transit segment for a trip in emme format. """ trip_stop_times_df = self.feed.stop_times.copy() trip_stop_times_df = trip_stop_times_df[ trip_stop_times_df.trip_id == trip_row.trip_id ] trip_node_df = self.feed.shapes.copy() trip_node_df = trip_node_df[trip_node_df.shape_id == trip_row.shape_id] trip_node_df.sort_values(by = ["shape_pt_sequence"], inplace = True) trip_stop_times_df = pd.merge( trip_stop_times_df, self.feed.stops, how="left", on="stop_id" ) stop_node_id_list = trip_stop_times_df["model_node_id"].tolist() trip_node_list = trip_node_df["shape_model_node_id"].tolist() trip_stop_times_df.sort_values(by = ["stop_sequence"], inplace = True) # sometimes GTFS `stop_sequence` does not start with 1, e.g. SFMTA light rails trip_stop_times_df["internal_stop_sequence"] = range(1, 1+len(trip_stop_times_df)) # sometimes GTFS `departure_time` is not recorded for every stop, e.g. VTA light rails trip_stop_times_df["departure_time"].fillna(method = "ffill", inplace = True) trip_stop_times_df["departure_time"].fillna(0, inplace = True) trip_stop_times_df["NNTIME"] = trip_stop_times_df["departure_time"].diff() / 60 # CUBE NNTIME takes 2 decimals trip_stop_times_df["NNTIME"] = trip_stop_times_df["NNTIME"].round(2) trip_stop_times_df["NNTIME"].fillna(-1, inplace = True) # node list stop_seq = 0 nntimes = [] allow_alightings=[] allow_boardings=[] stop_names=[] if trip_row.TM2_line_haul_name in ["Light rail", "Heavy rail", "Commuter rail", "Ferry service"]: add_nntime = True else: add_nntime = False for nodeIdx in range(len(trip_node_list)): if trip_node_list[nodeIdx] in stop_node_id_list: # in case a route stops at a stop more than once, e.g. circular route stop_seq += 1 if (add_nntime) & (stop_seq > 1): if len(trip_stop_times_df[ trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]]) > 1: nntime_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]) & (trip_stop_times_df["internal_stop_sequence"] == stop_seq), "NNTIME"].iloc[0] else: nntime_v = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"NNTIME"].iloc[0] nntimes.append(nntime_v) else: nntimes.append(0) pickup_type = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"pickup_type"].iloc[0] if pickup_type in [1, "1"]: allow_alightings.append(0) else: allow_alightings.append(1) drop_off_type = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"drop_off_type"].iloc[0] if drop_off_type in [1, "1"]: allow_boardings.append(0) else: allow_boardings.append(1) stop_name = trip_stop_times_df.loc[ (trip_stop_times_df["model_node_id"] == trip_node_list[nodeIdx]),"stop_name"].iloc[0] stop_names.append(stop_name) else: nntimes.append(0) allow_alightings.append(0) allow_boardings.append(0) stop_names.append("") trip_node_df['time_minutes'] = nntimes trip_node_df['allow_alightings'] = allow_alightings trip_node_df['allow_boardings'] = allow_boardings trip_node_df['stop_name'] = stop_names trip_node_df['line_id'] = trip_row['line_id'] trip_node_df['node_id'] = trip_node_df['shape_model_node_id'].astype(int) trip_node_df['stop_order'] = trip_node_df['shape_pt_sequence'] return trip_node_df
[docs] def evaluate_differences(self, transit_changes): """ Compare changes from the transit_changes dataframe with the standard transit network returns the project card changes in dictionary format """ # simple properties change trip_df = self.feed.trips.copy() mode_crosswalk = pd.read_csv(self.parameters.mode_crosswalk_file) mode_crosswalk.drop_duplicates(subset = ["agency_raw_name", "route_type", "is_express_bus"], inplace = True) trip_df = pd.merge(trip_df, self.feed.routes.drop("agency_raw_name", axis = 1), how="left", on="route_id") trip_df = pd.merge(trip_df, self.feed.frequencies, how="left", on="trip_id") trip_df["tod"] = trip_df.start_time.apply(self.time_to_cube_time_period, as_str = False) trip_df["tod_name"] = trip_df.start_time.apply(self.time_to_cube_time_period) trip_df["headway_minutes"] = (trip_df["headway_secs"] / 60).astype(int) trip_df = pd.merge(trip_df, self.feed.agency[["agency_name", "agency_raw_name", "agency_id"]], how = "left", on = ["agency_raw_name", "agency_id"]) # identify express bus # moved this here from top since this StandardTransit shouldn't depend on mtc... from .mtc import _is_express_bus trip_df["is_express_bus"] = trip_df.apply(lambda x: _is_express_bus(x), axis = 1) trip_df.drop("agency_name", axis = 1 , inplace = True) trip_df = pd.merge( trip_df, mode_crosswalk.drop("agency_id", axis = 1), how = "left", on = ["agency_raw_name", "route_type", "is_express_bus"] ) trip_df["line_id"] = trip_df.apply( lambda x: str(x.TM2_operator) + "_" + str(x.route_id) + "_" + x.tod_name + "_" + "d" + str(int(x.direction_id)) + "_s" + x.shape_id, axis=1, ) trip_df["line_id"] = trip_df["line_id"].str.slice(stop = 28) project_card_changes = [] # lines updated transit_changes['line_id'] = transit_changes.apply( lambda x: '-'.join(x['element_id'].split('-')[:-3]) if x['object'] == 'TRANSIT_STOP' else x['element_id'], axis = 1 ) lines_updated_df = transit_changes[ (transit_changes['operation'] == 'C') & (transit_changes['line_id'].isin(trip_df['line_id'].tolist())) ].copy() ######################### # simple property changes ######################### property_changes_df = lines_updated_df[ lines_updated_df.object == 'TRANSIT_LINE' ].copy() property_attribute_list = ['headway_secs'] for index, row in property_changes_df.iterrows(): line_id = row['line_id'] properties_list = [] change_item = {} for c in property_attribute_list: existing_value = int(trip_df[ trip_df['line_id'] == line_id ][c].iloc[0]) change_item["existing"] = existing_value if c == 'headway_secs': change_item["set"] = row['headway'] * 60 else: change_item["set"] = row[c] change_item["property"] = c properties_list.append(change_item) property_changes_df.loc[index, 'properties'] = properties_list ############### # shape changes ############### shape_changes_df = lines_updated_df[ lines_updated_df.object.isin(['TRANSIT_SHAPE']) ].copy() for index, row in shape_changes_df.iterrows(): line_id = row.line_id # get base shape trip_row = trip_df[trip_df.line_id == line_id].copy().squeeze() base_shape = self.shape_gtfs_to_emme( trip_row=trip_row ) base_shape['shape_model_node_id'] = base_shape['shape_model_node_id'].astype(int) # get build shape build_shape = row.new_itinerary updated_shapes = CubeTransit.evaluate_route_shape_changes( shape_base = base_shape.shape_model_node_id, shape_build = pd.Series(row.new_itinerary) ) updated_shapes[0]['property'] = 'shapes' shape_changes_df.loc[index, 'properties'] = updated_shapes ############## # stop changes ############## stop_changes_df = lines_updated_df[ lines_updated_df.object.isin(['TRANSIT_STOP']) ].copy() stop_attribute_list = ['allow_alightings', 'allow_boardings'] stop_changes_df = stop_changes_df.groupby( ['line_id','i_node'] )[stop_attribute_list].last().reset_index() stop_attribute_changes_df = pd.DataFrame() for attribute in stop_attribute_list: attribute_df = stop_changes_df.groupby( ['line_id', attribute] )['i_node'].apply(list).reset_index() attribute_df['properties'] = attribute_df.apply( lambda x: { 'property' : attribute if x[attribute] == True else 'no_'+attribute.split('_')[-1], 'set': x['i_node']}, axis = 1 ) stop_attribute_changes_df = pd.concat( [stop_attribute_changes_df, attribute_df[['line_id', 'properties']]], sort = False, ignore_index = True ) ############## # combine all transit changes ############## transit_changes_df = pd.concat( [ property_changes_df, shape_changes_df, stop_attribute_changes_df ], sort = False, ignore_index = True ) # groupby line_id transit_changes_df = transit_changes_df.groupby( ['line_id'] )['properties'].apply(list).reset_index() # create change items by line_id for index, row in transit_changes_df.iterrows(): line_id = row['line_id'] base_start_time_str = self.parameters.time_period_to_time.get( line_id.split("_")[2] )[0] base_end_time_str = self.parameters.time_period_to_time.get( line_id.split("_")[2] )[1] update_card_dict = { "category": "Transit Service Property Change", "facility": { "route_id": line_id.split("_")[1], "direction_id": int(line_id.split("_")[-2].strip("d\"")), "shape_id": line_id.split("_")[-1].strip("s\""), "start_time": base_start_time_str, "end_time": base_end_time_str }, "properties": row['properties'], } project_card_changes.append(update_card_dict) return project_card_changes
class CubeTransformer(Transformer): """A lark-parsing Transformer which transforms the parse-tree to a dictionary. .. highlight:: python Typical usage example: :: transformed_tree_data = CubeTransformer().transform(parse_tree) Attributes: line_order (int): a dynamic counter to hold the order of the nodes within a route shape lines_list (list): a list of the line names """ def __init__(self): self.line_order = 0 self.lines_list = [] def lines(self, line): # WranglerLogger.debug("lines: \n {}".format(line)) # This MUST be a tuple because it returns to start in the tree lines = {k: v for k, v in line} return ("lines", lines) @v_args(inline=True) def program_type_line(self, PROGRAM_TYPE, whitespace=None): # WranglerLogger.debug("program_type_line:{}".format(PROGRAM_TYPE)) self.program_type = PROGRAM_TYPE.value # This MUST be a tuple because it returns to start in the tree return ("program_type", PROGRAM_TYPE.value) @v_args(inline=True) def line(self, lin_attributes, nodes): # WranglerLogger.debug("line...attributes:\n {}".format(lin_attributes)) # WranglerLogger.debug("line...nodes:\n {}".format(nodes)) lin_name = lin_attributes["NAME"] self.line_order = 0 # WranglerLogger.debug("parsing: {}".format(lin_name)) return (lin_name, {"line_properties": lin_attributes, "line_shape": nodes}) @v_args(inline=True) def lin_attributes(self, *lin_attr): lin_attr = {k: v for (k, v) in lin_attr} # WranglerLogger.debug("lin_attributes: {}".format(lin_attr)) return lin_attr @v_args(inline=True) def lin_attr(self, lin_attr_name, attr_value, SEMICOLON_COMMENT=None): # WranglerLogger.debug("lin_attr {}: {}".format(lin_attr_name, attr_value)) return lin_attr_name, attr_value def lin_attr_name(self, args): attr_name = args[0].value.upper() # WranglerLogger.debug(".......args {}".format(args)) if attr_name in ["FREQ", "HEADWAY"]: attr_name = attr_name + "[" + str(args[2]) + "]" return attr_name def attr_value(self, attr_value): try: return int(attr_value[0].value) except: return attr_value[0].value def nodes(self, lin_node): lin_node = DataFrame(lin_node) # WranglerLogger.debug("nodes:\n {}".format(lin_node)) return lin_node @v_args(inline=True) def lin_node(self, NODE_NUM, SEMICOLON_COMMENT=None, *lin_nodeattr): self.line_order += 1 n = int(NODE_NUM.value) return {"node_id": abs(n), "node": n, "stop": n > 0, "order": self.line_order} start = dict TRANSIT_LINE_FILE_GRAMMAR = r""" start : program_type_line? lines WHITESPACE : /[ \t\r\n]/+ STRING : /("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i SEMICOLON_COMMENT : /;[^\n]*/ BOOLEAN : "T"i | "F"i program_type_line : ";;<<" PROGRAM_TYPE ">><<LINE>>;;" WHITESPACE? PROGRAM_TYPE : "PT" | "TRNBUILD" lines : line* line : "LINE" lin_attributes nodes lin_attributes : lin_attr+ lin_attr : lin_attr_name "=" attr_value "," SEMICOLON_COMMENT* TIME_PERIOD : "1".."5" !lin_attr_name : "allstops"i | "color"i | ("freq"i "[" TIME_PERIOD "]") | ("headway"i "[" TIME_PERIOD "]") | "mode"i | "name"i | "oneway"i | "owner"i | "runtime"i | "timefac"i | "xyspeed"i | "longname"i | "shortname"i | ("usera1"i) | ("usera2"i) | "circular"i | "vehicletype"i | "operator"i | "faresystem"i attr_value : BOOLEAN | STRING | SIGNED_INT | FLOAT nodes : lin_node+ lin_node : ("N" | "NODES")? "="? NODE_NUM ","? SEMICOLON_COMMENT? lin_nodeattr* NODE_NUM : SIGNED_INT lin_nodeattr : lin_nodeattr_name "=" attr_value ","? SEMICOLON_COMMENT* !lin_nodeattr_name : "access_c"i | "access"i | "delay"i | "xyspeed"i | "timefac"i | "nntime"i | "time"i operator : SEMICOLON_COMMENT* "OPERATOR" opmode_attr* SEMICOLON_COMMENT* mode : SEMICOLON_COMMENT* "MODE" opmode_attr* SEMICOLON_COMMENT* opmode_attr : ( (opmode_attr_name "=" attr_value) ","? ) opmode_attr_name : "number" | "name" | "longname" %import common.SIGNED_INT %import common.FLOAT %import common.WS %ignore WS """