Source code for network_wrangler.transitnetwork

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import annotations

import copy
import os
import re
from typing import Tuple, Union

import networkx as nx
import numpy as np
import pandas as pd
import partridge as ptg
from partridge.config import default_config

from .logger import WranglerLogger
from .utils import parse_time_spans
from .roadwaynetwork import RoadwayNetwork

SHAPES_FOREIGN_KEY = "shape_model_node_id"
STOPS_FOREIGN_KEY = "model_node_id"

ID_SCALAR = 100000000


[docs]class TransitNetwork(object):
    """
    Representation of a Transit Network.

    .. highlight:: python

    Typical usage example:
    ::
        import network_wrangler as wr
        stpaul = r'/home/jovyan/work/example/stpaul'
        tc=wr.TransitNetwork.read(path=stpaul)

    Attributes:
        feed (DotDict): Partridge feed mapping dataframes.
        config (nx.DiGraph): Partridge config
        road_net (RoadwayNetwork): Associated roadway network object.
        graph (nx.MultiDiGraph): Graph for associated roadway network object.
        feed_path (str): Where the feed was read in from.
        validated_frequencies (bool): The frequencies have been validated.
        validated_road_network_consistency (): The network has been validated against the road network.
        shapes_foreign_key (str): foreign key between shapes dataframe and roadway network nodes.
        stops_foreign_key (str): foreign key between stops dataframe and roadway network nodes.
        id_scalar (int): scalar value added to create new stop and shape IDs when necessary.
        REQUIRED_FILES (list[str]): list of files that the transit network requires.
    """

    REQUIRED_FILES = [
        "agency.txt",
        "frequencies.txt",
        "routes.txt",
        "shapes.txt",
        "stop_times.txt",
        "stops.txt",
        "trips.txt",
    ]

[docs]    def __init__(
        self,
        feed: DotDict = None,
        config: nx.DiGraph = None,
        shapes_foreign_key: str = None,
        stops_foreign_key: str = None,
        id_scalar: int = None,
    ):
        """
        Constructor

        .. todo:: Make graph a reference to associated RoadwayNetwork's graph, not its own thing.
        """
        self.feed: DotDict = feed
        self.config: nx.DiGraph = config

        self.id_scalar = id_scalar
        self.shapes_foreign_key = shapes_foreign_key
        self.stops_foreign_key = stops_foreign_key

        self.road_net: RoadwayNetwork = None
        self.graph: nx.MultiDiGraph = None
        self.feed_path = None

        self.validated_frequencies = False
        self.validated_road_network_consistency = False

        if not self.validate_frequencies():
            raise ValueError(
                "Transit lines with non-positive frequencies exist in the network"
            )

[docs]    @staticmethod
    def empty() -> TransitNetwork:
        """
        Create an empty transit network instance using the default config.

        .. todo:: fill out this method
        """
        ##TODO

        msg = "TransitNetwork.empty is not implemented."
        WranglerLogger.error(msg)
        raise NotImplemented(msg)

[docs]    @staticmethod
    def read(
        feed_path: str,
        shapes_foreign_key: str = SHAPES_FOREIGN_KEY,
        stops_foreign_key: str = STOPS_FOREIGN_KEY,
        id_scalar: int = ID_SCALAR,
    ) -> TransitNetwork:
        """
        Read GTFS feed from folder and TransitNetwork object.

        Args:
            feed_path: where to read transit network files from.
            shapes_foreign_key: foreign key between shapes dataframe and roadway network nodes. Will default to SHAPES_FOREIGN_KEY if not provided.
            stops_foreign_key: foreign key between stops dataframe and roadway network nodes. Will defaul to STOPS_FOREIGN_KEY if not provided.
            id_scalar: scalar value added to create new stop and shape IDs when necessary. Will default to ID_SCALAR if not provided.

        Returns: a TransitNetwork object.
        """
        config = default_config()
        feed = ptg.load_feed(feed_path, config=config)
        WranglerLogger.info("Read in transit feed from: {}".format(feed_path))

        updated_config = TransitNetwork.validate_feed(feed, config)

        # Read in each feed so we can write over them
        editable_feed = DotDict()
        for node in updated_config.nodes.keys():
            # Load (initiate Partridge's lazy load)
            editable_feed[node.replace(".txt", "")] = feed.get(node)

        transit_network = TransitNetwork(
            feed=editable_feed,
            config=updated_config,
            shapes_foreign_key=shapes_foreign_key,
            stops_foreign_key=stops_foreign_key,
            id_scalar=id_scalar,
        )
        transit_network.feed_path = feed_path

        fare_attributes_df = pd.read_csv(os.path.join(feed_path, 'fare_attributes.txt'))
        fare_rules_df = pd.read_csv(os.path.join(feed_path, 'fare_rules.txt'))

        transit_network.feed.fare_attributes = fare_attributes_df
        transit_network.feed.fare_rules = fare_rules_df

        return transit_network

[docs]    @staticmethod
    def validate_feed(feed: DotDict, config: nx.DiGraph) -> bool:
        """
        Since Partridge lazily loads the df, load each file to make sure it
        actually works.

        Partridge uses a DiGraph from the networkx library to represent the
        relationships between GTFS files. Each file is a 'node', and the
        relationship between files are 'edges'.

        Args:
            feed: partridge feed
            config: partridge config
        """
        updated_config = copy.deepcopy(config)
        files_not_found = []
        for node in config.nodes.keys():

            n = feed.get(node)
            WranglerLogger.debug("...{}:\n{}".format(node, n[:10]))
            if n.shape[0] == 0:
                WranglerLogger.info(
                    "Removing {} from transit network config because file not found".format(
                        node
                    )
                )
                updated_config.remove_node(node)
                if node in TransitNetwork.REQUIRED_FILES:
                    files_not_found.append(node)

        if files_not_found:
            msg = "Required files not found or valid: {}".format(
                ",".join(files_not_found)
            )
            WranglerLogger.error(msg)
            raise AttributeError(msg)
            return False

        TransitNetwork.validate_network_keys(feed)

        return updated_config

[docs]    def validate_frequencies(self) -> bool:
        """
        Validates that there are no transit trips in the feed with zero frequencies.

        Changes state of self.validated_frequencies boolean based on outcome.

        Returns:
            boolean indicating if valid or not.
        """

        _valid = True
        zero_freq = self.feed.frequencies[self.feed.frequencies.headway_secs <= 0]

        if len(zero_freq.index) > 0:
            _valid = False
            msg = "Transit lines {} have non-positive frequencies".format(
                zero_freq.trip_id.to_list()
            )
            WranglerLogger.error(msg)

        self.validated_frequencies = True

        return _valid

[docs]    def validate_road_network_consistencies(self) -> bool:
        """
        Validates transit network against the road network for both stops
        and shapes.

        Returns:
            boolean indicating if valid or not.
        """
        if self.road_net is None:
            raise ValueError(
                "RoadwayNetwork not set yet, see TransitNetwork.set_roadnet()"
            )

        valid = True

        valid_stops = self.validate_transit_stops()
        valid_shapes = self.validate_transit_shapes()

        self.validated_road_network_consistency = True

        if not valid_stops or not valid_shapes:
            valid = False
            raise ValueError("Transit network is not consistent with road network.")

        return valid

[docs]    def validate_transit_stops(self) -> bool:
        """
        Validates that all transit stops are part of the roadway network.

        Returns:
            Boolean indicating if valid or not.
        """

        if self.road_net is None:
            raise ValueError(
                "RoadwayNetwork not set yet, see TransitNetwork.set_roadnet()"
            )

        stops = self.feed.stops
        nodes = self.road_net.nodes_df

        valid = True

        stop_ids = [int(s) for s in stops[self.stops_foreign_key].to_list()]
        node_ids = [int(n) for n in nodes[self.road_net.node_foreign_key].to_list()]

        if not set(stop_ids).issubset(node_ids):
            valid = False
            missing_stops = list(set(stop_ids) - set(node_ids))
            msg = "Not all transit stops are part of the roadyway network. "
            msg += "Missing stops ({}) from the roadway nodes are {}.".format(
                self.stops_foreign_key, missing_stops
            )
            WranglerLogger.error(msg)

        return valid

[docs]    def validate_transit_shapes(self) -> bool:
        """
        Validates that all transit shapes are part of the roadway network.

        Returns:
            Boolean indicating if valid or not.
        """

        if self.road_net is None:
            raise ValueError(
                "RoadwayNetwork not set yet, see TransitNetwork.set_roadnet()"
            )

        shapes_df = self.feed.shapes
        nodes_df = self.road_net.nodes_df
        links_df = self.road_net.links_df

        valid = True

        # check if all the node ids exist in the network
        shape_ids = [int(s) for s in shapes_df[self.shapes_foreign_key].to_list()]
        node_ids = [int(n) for n in nodes_df[self.road_net.node_foreign_key].to_list()]

        if not set(shape_ids).issubset(node_ids):
            valid = False
            missing_shapes = list(set(shape_ids) - set(node_ids))
            msg = "Not all transit shapes are part of the roadyway network. "
            msg += "Missing shapes ({}) from the roadway network are {}.".format(
                self.shapes_foreign_key, missing_shapes
            )
            WranglerLogger.error(msg)
            return valid

        # check if all the links in transit shapes exist in the network
        # and transit is allowed
        shapes_df = shapes_df.astype({self.shapes_foreign_key: int})
        unique_shape_ids = shapes_df.shape_id.unique().tolist()

        for id in unique_shape_ids:
            subset_shapes_df = shapes_df[shapes_df["shape_id"] == id]
            subset_shapes_df = subset_shapes_df.sort_values(by=["shape_pt_sequence"])
            subset_shapes_df = subset_shapes_df.add_suffix("_1").join(
                subset_shapes_df.shift(-1).add_suffix("_2")
            )
            subset_shapes_df = subset_shapes_df.dropna()

            merged_df = subset_shapes_df.merge(
                links_df,
                how="left",
                left_on=[
                    self.shapes_foreign_key + "_1",
                    self.shapes_foreign_key + "_2",
                ],
                right_on=["A", "B"],
                indicator=True,
            )

            missing_links_df = merged_df.query('_merge == "left_only"')

            # there are shape links which does not exist in the roadway network
            if len(missing_links_df.index) > 0:
                valid = False
                msg = "There are links for shape id {} which are missing in the roadway network.".format(
                    id
                )
                WranglerLogger.error(msg)

            transit_not_allowed_df = merged_df.query(
                '_merge == "both" & drive_access == 0 & bus_only == 0 & rail_only == 0'
            )

            # there are shape links where transit is not allowed
            if len(transit_not_allowed_df.index) > 0:
                valid = False
                msg = "There are links for shape id {} which does not allow transit in the roadway network.".format(
                    id
                )
                WranglerLogger.error(msg)

        return valid

[docs]    @staticmethod
    def route_ids_in_routestxt(feed: DotDict) -> Bool:
        """
        Wherever route_id occurs, make sure it is in routes.txt

        Args:
            feed: partridge feed object

        Returns:
            Boolean indicating if feed is okay.
        """
        route_ids_routestxt = set(feed.routes.route_id.tolist())
        route_ids_referenced = set(feed.trips.route_id.tolist())

        missing_routes = route_ids_referenced - route_ids_routestxt

        if missing_routes:
            WranglerLogger.warning(
                "The following route_ids are referenced but missing from routes.txt: {}".format(
                    list(missing_routes)
                )
            )
            return False
        return True

[docs]    @staticmethod
    def trip_ids_in_tripstxt(feed: DotDict) -> Bool:
        """
        Wherever trip_id occurs, make sure it is in trips.txt

        Args:
            feed: partridge feed object

        Returns:
            Boolean indicating if feed is okay.
        """
        trip_ids_tripstxt = set(feed.trips.trip_id.tolist())
        trip_ids_referenced = set(
            feed.stop_times.trip_id.tolist() + feed.frequencies.trip_id.tolist()
        )

        missing_trips = trip_ids_referenced - trip_ids_tripstxt

        if missing_trips:
            WranglerLogger.warning(
                "The following trip_ids are referenced but missing from trips.txt: {}".format(
                    list(missing_trips)
                )
            )
            return False
        return True

[docs]    @staticmethod
    def shape_ids_in_shapestxt(feed: DotDict) -> Bool:
        """
        Wherever shape_id occurs, make sure it is in shapes.txt

        Args:
            feed: partridge feed object

        Returns:
            Boolean indicating if feed is okay.
        """

        shape_ids_shapestxt = set(feed.shapes.shape_id.tolist())
        shape_ids_referenced = set(feed.trips.shape_id.tolist())

        missing_shapes = shape_ids_referenced - shape_ids_shapestxt

        if missing_shapes:
            WranglerLogger.warning(
                "The following shape_ids from trips.txt are missing from shapes.txt: {}".format(
                    list(missing_shapes)
                )
            )
            return False
        return True

[docs]    @staticmethod
    def stop_ids_in_stopstxt(feed: DotDict) -> Bool:
        """
        Wherever stop_id occurs, make sure it is in stops.txt

        Args:
            feed: partridge feed object

        Returns:
            Boolean indicating if feed is okay.
        """
        stop_ids_stopstxt = set(feed.stops.stop_id.tolist())
        stop_ids_referenced = []

        # STOP_TIMES
        stop_ids_referenced.extend(feed.stop_times.stop_id.dropna().tolist())
        stop_ids_referenced.extend(feed.stops.parent_station.dropna().tolist())

        # TRANSFERS
        if feed.get("transfers.txt").shape[0] > 0:
            stop_ids_referenced.extend(feed.transfers.from_stop_id.dropna().tolist())
            stop_ids_referenced.extend(feed.transfers.to_stop_id.dropna().tolist())

        # PATHWAYS
        if feed.get("pathways.txt").shape[0] > 0:
            stop_ids_referenced.extend(feed.pathways.from_stop_id.dropna().tolist())
            stop_ids_referenced.extend(feed.pathways.to_stop_id.dropna().tolist())

        stop_ids_referenced = set(stop_ids_referenced)

        missing_stops = stop_ids_referenced - stop_ids_stopstxt

        if missing_stops:
            WranglerLogger.warning(
                "The following stop_ids from are referenced but missing from stops.txt: {}".format(
                    list(missing_stops)
                )
            )
            return False
        return True

[docs]    @staticmethod
    def validate_network_keys(feed: DotDict) -> Bool:
        """
        Validates foreign keys are present in all connecting feed files.

        Args:
            feed: partridge feed object

        Returns:
            Boolean indicating if feed is okay.
        """
        result = True
        result = result and TransitNetwork.route_ids_in_routestxt(feed)
        result = result and TransitNetwork.trip_ids_in_tripstxt(feed)
        result = result and TransitNetwork.shape_ids_in_shapestxt(feed)
        result = result and TransitNetwork.stop_ids_in_stopstxt(feed)
        return result

[docs]    def set_roadnet(
        self,
        road_net: RoadwayNetwork,
        graph_shapes: bool = False,
        graph_stops: bool = False,
        validate_consistency: bool = True,
    ) -> None:
        self.road_net: RoadwayNetwork = road_net
        self.graph: nx.MultiDiGraph = RoadwayNetwork.ox_graph(
            road_net.nodes_df, road_net.links_df
        )
        if graph_shapes:
            self._graph_shapes()
        if graph_stops:
            self._graph_stops()

        if validate_consistency:
            self.validate_road_network_consistencies()

    def _graph_shapes(self) -> None:
        """

        .. todo:: Fill out this method.
        """
        existing_shapes = self.feed.shapes
        msg = "_graph_shapes() not implemented yet."
        WranglerLogger.error(msg)
        raise NotImplemented(msg)
        # graphed_shapes = pd.DataFrame()

        # for shape_id in shapes:
        # TODO traverse point by point, mapping shortest path on graph,
        # then append to a list
        # return total list of all link ids
        # rebuild rows in shapes dataframe and add to graphed_shapes
        # make graphed_shapes a GeoDataFrame

        # self.feed.shapes = graphed_shapes

    def _graph_stops(self) -> None:
        """
        .. todo:: Fill out this method.
        """
        existing_stops = self.feed.stops
        msg = "_graph_stops() not implemented yet."
        WranglerLogger.error(msg)
        raise NotImplemented(msg)
        # graphed_stops = pd.DataFrame()

        # for stop_id in stops:
        # TODO

        # self.feed.stops = graphed_stops

[docs]    def write(self, path: str = ".", filename: str = None) -> None:
        """
        Writes a network in the transit network standard

        Args:
            path: the path were the output will be saved
            filename: the name prefix of the transit files that will be generated
        """
        WranglerLogger.info("Writing transit to directory: {}".format(path))
        for node in self.config.nodes.keys():

            df = self.feed.get(node.replace(".txt", ""))
            if not df.empty:
                if filename:
                    outpath = os.path.join(path, filename + "_" + node)
                else:
                    outpath = os.path.join(path, node)
                WranglerLogger.debug("Writing file: {}".format(outpath))

                df.to_csv(outpath, index=False)

[docs]    @staticmethod
    def transit_net_to_gdf(transit: Union(TransitNetwork, pd.DataFrame)):
        """
        Returns a geodataframe given a TransitNetwork or a valid Shapes DataFrame.

        Args:
            transit: either a TransitNetwork or a Shapes GeoDataFrame

        .. todo:: Make more sophisticated.
        """
        from partridge import geo

        if type(transit) is pd.DataFrame:
            shapes = transit
        else:
            shapes = transit.feed.shapes

        transit_gdf = geo.build_shapes(shapes)
        return transit_gdf

[docs]    def apply(self, project_card_dictionary: dict):
        """
        Wrapper method to apply a project to a transit network.

        Args:
            project_card_dictionary: dict
                a dictionary of the project card object

        """
        WranglerLogger.info(
            "Applying Project to Transit Network: {}".format(
                project_card_dictionary["project"]
            )
        )

        def _apply_individual_change(project_dictionary: dict):
            if (
                project_dictionary["category"].lower()
                == "transit service property change"
            ):
                self.apply_transit_feature_change(
                    self.select_transit_features(project_dictionary["facility"]),
                    project_dictionary["properties"],
                )
            elif project_dictionary["category"].lower() == "parallel managed lanes":
                # Grab the list of nodes in the facility from road_net
                # It should be cached because managed lane projects are
                # processed by RoadwayNetwork first via
                # Scenario.apply_all_projects
                try:
                    managed_lane_nodes = self.road_net.selections(
                        self.road_net.build_selection_key(
                            project_dictionary["facility"]
                        )
                    )["route"]
                except ValueError:
                    WranglerLogger.error(
                        "RoadwayNetwork not set yet, see TransitNetwork.set_roadnet()"
                    )

                # Reroute any transit using these nodes
                self.apply_transit_managed_lane(
                    self.select_transit_features_by_nodes(managed_lane_nodes),
                    managed_lane_nodes,
                    self.RoadNet.managed_lanes_node_id_scalar,
                )
            elif project_dictionary["category"].lower() == "add transit":
                self.apply_python_calculation(project_dictionary["pycode"])
            elif project_dictionary["category"].lower() == "roadway deletion":
                WranglerLogger.warning(
                    "Roadway Deletion not yet implemented in Transit; ignoring"
                )
            else:
                msg = "{} not implemented yet in TransitNetwork; can't apply.".format(
                    project_dictionary["category"]
                )
                WranglerLogger.error(msg)
                raise (msg)

        if project_card_dictionary.get("changes"):
            for project_dictionary in project_card_dictionary["changes"]:
                _apply_individual_change(project_dictionary)
        else:
            _apply_individual_change(project_card_dictionary)

[docs]    def apply_python_calculation(
        self, pycode: str, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        """
        Changes roadway network object by executing pycode.

        Args:
            pycode: python code which changes values in the roadway network object
            in_place: update self or return a new roadway network object
        """
        exec(pycode)

[docs]    def select_transit_features(self, selection: dict) -> pd.Series:
        """
        combines multiple selections

        Args:
            selection : selection dictionary

        Returns: trip identifiers : list of GTFS trip IDs in the selection
        """
        trip_ids = pd.Series()

        if selection.get("route"):
            for route_dictionary in selection["route"]:
                trip_ids = trip_ids.append(
                    self._select_transit_features(route_dictionary)
                )
        else:
            trip_ids = self._select_transit_features(selection)

        return trip_ids

    def _select_transit_features(self, selection: dict) -> pd.Series:
        """
        Selects transit features that satisfy selection criteria

        Args:
            selection : selection dictionary

        Returns: trip identifiers : list of GTFS trip IDs in the selection
        """
        trips = self.feed.trips
        routes = self.feed.routes
        freq = self.feed.frequencies

        # Turn selection's values into lists if they are not already
        for key in selection.keys():
            if type(selection[key]) not in [list, tuple]:
                selection[key] = [selection[key]]

        # Based on the key in selection, filter trips
        if "trip_id" in selection:
            trips = trips[trips.trip_id.isin(selection["trip_id"])]

        elif "route_id" in selection:
            trips = trips[trips.route_id.isin(selection["route_id"])]

        elif "route_short_name" in selection:
            routes = routes[routes.route_short_name.isin(selection["route_short_name"])]
            trips = trips[trips.route_id.isin(routes["route_id"])]

        elif "route_long_name" in selection:
            matches = []
            for sel in selection["route_long_name"]:
                for route_long_name in routes["route_long_name"]:
                    x = re.search(sel, route_long_name)
                    if x is not None:
                        matches.append(route_long_name)

            routes = routes[routes.route_long_name.isin(matches)]
            trips = trips[trips.route_id.isin(routes["route_id"])]

        else:
            WranglerLogger.error("Selection not supported %s", selection.keys())
            raise ValueError

        # If a time key exists, filter trips using frequency table
        if selection.get("time"):
            selection["time"] = parse_time_spans(selection["time"])
        elif selection.get("start_time") and selection.get("end_time"):
            selection["time"] = parse_time_spans(
                [selection["start_time"][0], selection["end_time"][0]]
            )
            # Filter freq to trips in selection
            freq = freq[freq.trip_id.isin(trips["trip_id"])]
            freq = freq[freq.start_time == selection["time"][0]]
            freq = freq[freq.end_time == selection["time"][1]]

            # Filter trips table to those still in freq table
            trips = trips[trips.trip_id.isin(freq["trip_id"])]

        # If any other key exists, filter routes or trips accordingly
        for key in selection.keys():
            if key not in [
                "trip_id",
                "route_id",
                "route_short_name",
                "route_long_name",
                "time",
                "start_time",
                "end_time"
            ]:
                if key in trips:
                    trips = trips[trips[key].isin(selection[key])]
                elif key in routes:
                    routes = routes[routes[key].isin(selection[key])]
                    trips = trips[trips.route_id.isin(routes["route_id"])]
                else:
                    WranglerLogger.error("Selection not supported %s", key)
                    raise ValueError

        # Check that there is at least one trip in trips table or raise error
        if len(trips) < 1:
            WranglerLogger.error("Selection returned zero trips")
            raise ValueError

        # Return pandas.Series of trip_ids
        return trips["trip_id"]

[docs]    def select_transit_features_by_nodes(
        self, node_ids: list, require_all: bool = False
    ) -> pd.Series:
        """
        Selects transit features that use any one of a list of node_ids

        Args:
            node_ids: list (generally coming from nx.shortest_path)
            require_all : bool if True, the returned trip_ids must traverse all of
              the nodes (default = False)

        Returns:
            trip identifiers  list of GTFS trip IDs in the selection
        """
        # If require_all, the returned trip_ids must traverse all of the nodes
        # Else, filter any shapes that use any one of the nodes in node_ids
        if require_all:
            shape_ids = (
                self.feed.shapes.groupby("shape_id").filter(
                    lambda x: all(
                        i in x[self.shapes_foreign_key].tolist() for i in node_ids
                    )
                )
            ).shape_id.drop_duplicates()
        else:
            shape_ids = self.feed.shapes[
                self.feed.shapes[self.shapes_foreign_key].isin(node_ids)
            ].shape_id.drop_duplicates()

        # Return pandas.Series of trip_ids
        return self.feed.trips[self.feed.trips.shape_id.isin(shape_ids)].trip_id

[docs]    def check_network_connectivity(self, shapes_foreign_key : pd.Series) -> pd.Series:
        """
        check if new shapes contain any links that are not in the roadway network
        """
        shape_links_df = pd.DataFrame(
            {
                "A" : shapes_foreign_key.tolist()[:-1],
                "B" : shapes_foreign_key.tolist()[1:],
            }
        )

        shape_links_df["A"] = shape_links_df["A"].astype(int)
        shape_links_df["B"] = shape_links_df["B"].astype(int)

        shape_links_df = pd.merge(
            shape_links_df,
            self.road_net.links_df[["A", "B", "model_link_id"]],
            how = "left",
            on = ["A", "B"]
        )

        missing_shape_links_df = shape_links_df[shape_links_df["model_link_id"].isnull()]

        if len(missing_shape_links_df) > 0:
            for index, row in missing_shape_links_df.iterrows():
                WranglerLogger.warning(
                    "Missing connections from node {} to node {} for the new routing, find complete path using default graph".format(int(row.A), int(row.B))
                )

                complete_node_list = TransitNetwork.route_between_nodes(self.graph, row.A, row.B)
                complete_node_list = pd.Series([str(int(i)) for i in complete_node_list])

                WranglerLogger.info(
                    "Routing path from node {} to node {} for missing connections: {}.".format(int(row.A), int(row.B), complete_node_list.tolist())
                )

                nodes = shapes_foreign_key.tolist()
                index_replacement_starts = [i for i,d in enumerate(nodes) if d == str(int(row.A))][0]
                index_replacement_ends = [i for i,d in enumerate(nodes) if d == str(int(row.B))][-1]
                shapes_foreign_key = pd.concat(
                    [
                        shapes_foreign_key.iloc[:index_replacement_starts],
                        complete_node_list,
                        shapes_foreign_key.iloc[index_replacement_ends + 1 :],
                    ],
                    ignore_index=True,
                    sort=False,
                )

        return shapes_foreign_key

[docs]    @staticmethod
    def route_between_nodes(graph, A, B) -> list:
        """
        find complete path when the new shape has connectivity issue
        """

        node_list = nx.shortest_path(
            graph,
            A,
            B,
            weight = "length"
        )

        return node_list

[docs]    def apply_transit_feature_change(
        self, trip_ids: pd.Series, properties: list, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        """
        Changes the transit attributes for the selected features based on the
        project card information passed

        Args:
            trip_ids : pd.Series
                all trip_ids to apply change to
            properties : list of dictionaries
                transit properties to change
            in_place : bool
                whether to apply changes in place or return a new network

        Returns:
            None
        """
        for i in properties:
            if i["property"] in ["headway_secs"]:
                self._apply_transit_feature_change_frequencies(trip_ids, i, in_place)

            elif i["property"] in ["routing"]:
                self._apply_transit_feature_change_routing(trip_ids, i, in_place)

            elif i["property"] in ["shapes"]:
                self._apply_transit_feature_change_shapes(trip_ids, i, in_place)

            elif i["property"] in ['no_alightings', 'no_boardings', 'allow_alightings', 'allow_boardings']:
                self._apply_transit_feature_change_stops(trip_ids, i, in_place)

    def _apply_transit_feature_change_routing(
        self, trip_ids: pd.Series, properties: dict, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        shapes = self.feed.shapes.copy()
        stop_times = self.feed.stop_times.copy()
        stops = self.feed.stops.copy()

        # A negative sign in "set" indicates a traversed node without a stop
        # If any positive numbers, stops have changed
        stops_change = False
        if any(x > 0 for x in properties["set"]):
            # Simplify "set" and "existing" to only stops
            properties["set_stops"] = [str(i) for i in properties["set"] if i > 0]
            if properties.get("existing") is not None:
                properties["existing_stops"] = [
                    str(i) for i in properties["existing"] if i > 0
                ]
            stops_change = True

        # Convert ints to objects
        properties["set_shapes"] = [str(abs(i)) for i in properties["set"]]
        if properties.get("existing") is not None:
            properties["existing_shapes"] = [
                str(abs(i)) for i in properties["existing"]
            ]

        # Replace shapes records
        trips = self.feed.trips  # create pointer rather than a copy
        shape_ids = trips[trips["trip_id"].isin(trip_ids)].shape_id
        for shape_id in set(shape_ids):
            # Check if `shape_id` is used by trips that are not in
            # parameter `trip_ids`
            trips_using_shape_id = trips.loc[trips["shape_id"] == shape_id, ["trip_id"]]
            if not all(trips_using_shape_id.isin(trip_ids)["trip_id"]):
                # In this case, we need to create a new shape_id so as to leave
                # the trips not part of the query alone
                WranglerLogger.warning(
                    "Trips that were not in your query selection use the "
                    "same `shape_id` as trips that are in your query. Only "
                    "the trips' shape in your query will be changed."
                )
                old_shape_id = shape_id
                shape_id = str(int(shape_id) + self.id_scalar)
                if shape_id in shapes["shape_id"].tolist():
                    WranglerLogger.error("Cannot create a unique new shape_id.")
                dup_shape = shapes[shapes.shape_id == old_shape_id].copy()
                dup_shape["shape_id"] = shape_id
                shapes = pd.concat([shapes, dup_shape], ignore_index=True)

            # Pop the rows that match shape_id
            this_shape = shapes[shapes.shape_id == shape_id]

            # Make sure they are ordered by shape_pt_sequence
            this_shape = this_shape.sort_values(by=["shape_pt_sequence"])

            # Build a pd.DataFrame of new shape records
            new_shape_rows = pd.DataFrame(
                {
                    "shape_id": shape_id,
                    "shape_pt_lat": None,  # FIXME Populate from self.road_net?
                    "shape_pt_lon": None,  # FIXME
                    "shape_osm_node_id": None,  # FIXME
                    "shape_pt_sequence": None,
                    self.shapes_foreign_key: properties["set_shapes"],
                }
            )

            check_new_shape_nodes = self.check_network_connectivity(new_shape_rows[self.shapes_foreign_key])

            if len(check_new_shape_nodes) != len(new_shape_rows):
                new_shape_rows = pd.DataFrame(
                    {
                        "shape_id": shape_id,
                        "shape_pt_lat": None,  # FIXME Populate from self.road_net?
                        "shape_pt_lon": None,  # FIXME
                        "shape_osm_node_id": None,  # FIXME
                        "shape_pt_sequence": None,
                        self.shapes_foreign_key: check_new_shape_nodes,
                    }
                )
                properties["set_shapes"] = check_new_shape_nodes.tolist()

            # If "existing" is specified, replace only that segment
            # Else, replace the whole thing
            if properties.get("existing") is not None:
                # Match list
                nodes = this_shape[self.shapes_foreign_key].tolist()
                index_replacement_starts = [i for i,d in enumerate(nodes) if d == properties["existing_shapes"][0]][0]
                index_replacement_ends = [i for i,d in enumerate(nodes) if d == properties["existing_shapes"][-1]][-1]
                this_shape = pd.concat(
                    [
                        this_shape.iloc[:index_replacement_starts],
                        new_shape_rows,
                        this_shape.iloc[index_replacement_ends + 1 :],
                    ],
                    ignore_index=True,
                    sort=False,
                )
            else:
                this_shape = new_shape_rows

            # Renumber shape_pt_sequence
            this_shape["shape_pt_sequence"] = np.arange(len(this_shape))

            # Add rows back into shapes
            shapes = pd.concat(
                [shapes[shapes.shape_id != shape_id], this_shape],
                ignore_index=True,
                sort=False,
            )

        # Replace stop_times and stops records (if required)
        if stops_change:
            # If node IDs in properties["set_stops"] are not already
            # in stops.txt, create a new stop_id for them in stops
            existing_fk_ids = set(stops[self.stops_foreign_key].tolist())
            nodes_df = self.road_net.nodes_df.loc[:, [self.stops_foreign_key, "X", "Y"]]
            for fk_i in properties["set_stops"]:
                if fk_i not in existing_fk_ids:
                    WranglerLogger.info(
                        "Creating a new stop in stops.txt for node ID: {}".format(fk_i)
                    )
                    # Add new row to stops
                    new_stop_id = str(int(fk_i) + self.id_scalar)
                    if new_stop_id in stops["stop_id"].tolist():
                        WranglerLogger.error("Cannot create a unique new stop_id.")
                    stops.loc[
                        len(stops.index) + 1,
                        ["stop_id", "stop_lat", "stop_lon", self.stops_foreign_key,],
                    ] = [
                        new_stop_id,
                        nodes_df.loc[nodes_df[self.stops_foreign_key] == int(fk_i), "Y"],
                        nodes_df.loc[nodes_df[self.stops_foreign_key] == int(fk_i), "X"],
                        fk_i,
                    ]

            # Loop through all the trip_ids
            for trip_id in trip_ids:
                # Pop the rows that match trip_id
                this_stoptime = stop_times[stop_times.trip_id == trip_id]

                # Merge on node IDs using stop_id (one node ID per stop_id)
                this_stoptime = this_stoptime.merge(
                    stops[["stop_id", self.stops_foreign_key]],
                    how="left",
                    on="stop_id",
                )

                # Make sure the stop_times are ordered by stop_sequence
                this_stoptime = this_stoptime.sort_values(by=["stop_sequence"])

                # Build a pd.DataFrame of new shape records from properties
                new_stoptime_rows = pd.DataFrame(
                    {
                        "trip_id": trip_id,
                        "arrival_time": None,
                        "departure_time": None,
                        "pickup_type": None,
                        "drop_off_type": None,
                        "stop_distance": None,
                        "timepoint": None,
                        "stop_is_skipped": None,
                        self.stops_foreign_key: properties["set_stops"],
                    }
                )

                # Merge on stop_id using node IDs (many stop_id per node ID)
                new_stoptime_rows = (
                    new_stoptime_rows.merge(
                        stops[["stop_id", self.stops_foreign_key]],
                        how="left",
                        on=self.stops_foreign_key,
                    )
                    .groupby([self.stops_foreign_key])
                    .head(1)
                )  # pick first

                # If "existing" is specified, replace only that segment
                # Else, replace the whole thing
                if properties.get("existing") is not None:
                    # Match list (remember stops are passed in with node IDs)
                    nodes = this_stoptime[self.stops_foreign_key].tolist()
                    index_replacement_starts = nodes.index(
                        properties["existing_stops"][0]
                    )
                    index_replacement_ends = nodes.index(
                        properties["existing_stops"][-1]
                    )
                    this_stoptime = pd.concat(
                        [
                            this_stoptime.iloc[:index_replacement_starts],
                            new_stoptime_rows,
                            this_stoptime.iloc[index_replacement_ends + 1 :],
                        ],
                        ignore_index=True,
                        sort=False,
                    )
                else:
                    this_stoptime = new_stoptime_rows

                # Remove node ID
                del this_stoptime[self.stops_foreign_key]

                # Renumber stop_sequence
                this_stoptime["stop_sequence"] = np.arange(len(this_stoptime))

                # Add rows back into stoptime
                stop_times = pd.concat(
                    [stop_times[stop_times.trip_id != trip_id], this_stoptime],
                    ignore_index=True,
                    sort=False,
                )

        # Replace self if in_place, else return
        if in_place:
            self.feed.shapes = shapes
            self.feed.stops = stops
            self.feed.stop_times = stop_times
        else:
            updated_network = copy.deepcopy(self)
            updated_network.feed.shapes = shapes
            updated_network.feed.stops = stops
            updated_network.feed.stop_times = stop_times
            return updated_network

    def _apply_transit_feature_change_frequencies(
        self, trip_ids: pd.Series, properties: dict, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        freq = self.feed.frequencies.copy()

        # Grab only those records matching trip_ids (aka selection)
        freq = freq[freq.trip_id.isin(trip_ids)]

        # Check all `existing` properties if given
        if properties.get("existing") is not None:
            if not all(freq.headway_secs == properties["existing"]):
                WranglerLogger.error(
                    "Existing does not match for at least "
                    "1 trip in:\n {}".format(trip_ids.to_string())
                )
                raise ValueError

        # Calculate build value
        if properties.get("set") is not None:
            build_value = properties["set"]
        else:
            build_value = [i + properties["change"] for i in freq.headway_secs]

        # Update self or return a new object
        q = self.feed.frequencies.trip_id.isin(freq["trip_id"])
        if in_place:
            self.feed.frequencies.loc[q, properties["property"]] = build_value
        else:
            updated_network = copy.deepcopy(self)
            updated_network.loc[q, properties["property"]] = build_value
            return updated_network

[docs]    def apply_transit_managed_lane(
        self, trip_ids: pd.Series, node_ids: list, scalar: int, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        # Traversed nodes without a stop should be negative integers
        all_stops = self.feed.stops[self.stops_foreign_key].tolist()
        node_ids = [int(x) if str(x) in all_stops else int(x) * -1 for x in node_ids]

        self._apply_transit_feature_change_routing(
            trip_ids=trip_ids,
            properties={
                "existing": node_ids,
                "set": RoadwayNetwork.get_managed_lane_node_ids(node_ids, scalar),
            },
            in_place=in_place,
        )

    def _apply_transit_feature_change_shapes(
        self, trip_ids: pd.Series, properties: dict, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        shapes = self.feed.shapes.copy()
        stop_times = self.feed.stop_times.copy()
        stops = self.feed.stops.copy()

        properties["set_shapes"] = [str(abs(i)) for i in properties["set"]]
        if properties.get("existing") is not None:
            properties["existing_shapes"] = [
                str(abs(i)) for i in properties["existing"]
            ]

        # Replace shapes records
        trips = self.feed.trips  # create pointer rather than a copy
        shape_ids = trips[trips["trip_id"].isin(trip_ids)].shape_id

        for shape_id in set(shape_ids):
            # Check if `shape_id` is used by trips that are not in
            # parameter `trip_ids`
            trips_using_shape_id = trips.loc[trips["shape_id"] == shape_id, ["trip_id"]]
            if not all(trips_using_shape_id.isin(trip_ids)["trip_id"]):
                # In this case, we need to create a new shape_id so as to leave
                # the trips not part of the query alone
                WranglerLogger.warning(
                    "Trips that were not in your query selection use the "
                    "same `shape_id` as trips that are in your query. Only "
                    "the trips' shape in your query will be changed."
                )
                old_shape_id = shape_id
                shape_id = str(int(shape_id) + self.id_scalar)
                if shape_id in shapes["shape_id"].tolist():
                    WranglerLogger.error("Cannot create a unique new shape_id.")
                dup_shape = shapes[shapes.shape_id == old_shape_id].copy()
                dup_shape["shape_id"] = shape_id
                shapes = pd.concat([shapes, dup_shape], ignore_index=True)
                # change the shape_id for the trip record
                trips.loc[trips["trip_id"].isin(trip_ids), "shape_id"] = shape_id

            # Pop the rows that match shape_id
            this_shape = shapes[shapes.shape_id == shape_id]

            # Make sure they are ordered by shape_pt_sequence
            this_shape = this_shape.sort_values(by=["shape_pt_sequence"])

            # Build a pd.DataFrame of new shape records
            new_shape_rows = pd.DataFrame(
                {
                    "shape_id": shape_id,
                    "shape_pt_lat": None,  # FIXME Populate from self.road_net?
                    "shape_pt_lon": None,  # FIXME
                    "shape_osm_node_id": None,  # FIXME
                    "shape_pt_sequence": None,
                    self.shapes_foreign_key: properties["set_shapes"],
                }
            )

            check_new_shape_nodes = self.check_network_connectivity(new_shape_rows[self.shapes_foreign_key])

            if len(check_new_shape_nodes) != len(new_shape_rows):
                new_shape_rows = pd.DataFrame(
                    {
                        "shape_id": shape_id,
                        "shape_pt_lat": None,  # FIXME Populate from self.road_net?
                        "shape_pt_lon": None,  # FIXME
                        "shape_osm_node_id": None,  # FIXME
                        "shape_pt_sequence": None,
                        self.shapes_foreign_key: check_new_shape_nodes,
                    }
                )
                properties["set_shapes"] = check_new_shape_nodes.tolist()

            # If "existing" is specified, replace only that segment
            # Else, replace the whole thing
            if properties.get("existing") is not None:
                # Match list
                nodes = this_shape[self.shapes_foreign_key].tolist()
                index_replacement_starts = [i for i,d in enumerate(nodes) if d == properties["existing_shapes"][0]][0]
                index_replacement_ends = [i for i,d in enumerate(nodes) if d == properties["existing_shapes"][-1]][-1]
                this_shape = pd.concat(
                    [
                        this_shape.iloc[:index_replacement_starts],
                        new_shape_rows,
                        this_shape.iloc[index_replacement_ends + 1 :],
                    ],
                    ignore_index=True,
                    sort=False,
                )
            else:
                this_shape = new_shape_rows

            # Renumber shape_pt_sequence
            this_shape["shape_pt_sequence"] = np.arange(len(this_shape))

            # Add rows back into shapes
            shapes = pd.concat(
                [shapes[shapes.shape_id != shape_id], this_shape],
                ignore_index=True,
                sort=False,
            )

        # Replace self if in_place, else return
        if in_place:
            self.feed.shapes = shapes
        else:
            updated_network = copy.deepcopy(self)
            updated_network.feed.shapes = shapes
            return updated_network

    def _apply_transit_feature_change_stops(
        self, trip_ids: pd.Series, properties: dict, in_place: bool = True
    ) -> Union(None, TransitNetwork):
        shapes = self.feed.shapes.copy()
        stop_times = self.feed.stop_times.copy()
        stops = self.feed.stops.copy()
        trips = self.feed.trips.copy()
        nodes_df = self.road_net.nodes_df.loc[:, [self.stops_foreign_key, "X", "Y"]]

        for node_id in properties['set']:

            # check if new stop node
            existing_stop_fk_ids = set(stops[self.stops_foreign_key].tolist())
            if str(node_id) not in existing_stop_fk_ids:
                WranglerLogger.info(
                    "Creating a new stop in stops.txt for node ID: {}".format(node_id)
                )
                # Add new row to stops
                new_stop_id = str(int(node_id) + self.id_scalar)
                if new_stop_id in stops["stop_id"].tolist():
                    WranglerLogger.error("Cannot create a unique new stop_id.")
                stops.loc[
                    len(stops.index) + 1,
                    ["stop_id", "stop_lat", "stop_lon", self.stops_foreign_key,],
                ] = [
                    new_stop_id,
                    nodes_df.loc[nodes_df[self.stops_foreign_key] == int(node_id), "Y"],
                    nodes_df.loc[nodes_df[self.stops_foreign_key] == int(node_id), "X"],
                    str(node_id),
                ]
            else:
                WranglerLogger.info(
                    "Modifying existing stop in stop_times.txt for node ID: {}".format(node_id)
                )
                for trip_id in trip_ids:
                    # Pop the rows that match trip_id
                    this_stoptime = stop_times[stop_times.trip_id == trip_id].copy()

                    # Merge on node IDs using stop_id (one node ID per stop_id)
                    this_stoptime = this_stoptime.merge(
                        stops[["stop_id", self.stops_foreign_key]],
                        how="left",
                        on="stop_id",
                    )

                    stop_id = this_stoptime[this_stoptime[self.stops_foreign_key] == str(node_id)]['stop_id'].iloc[0]

                    if properties['property'] == 'allow_alightings':
                        stop_times.loc[
                            (stop_times['trip_id'] == trip_id) & (stop_times['stop_id'] == stop_id),
                            'pickup_type'
                        ] = 0
                    if properties['property'] == 'no_alightings':
                        stop_times.loc[
                            (stop_times['trip_id'] == trip_id) & (stop_times['stop_id'] == stop_id),
                            'pickup_type'
                        ] = 1
                    if properties['property'] == 'allow_boardings':
                        stop_times.loc[
                            (stop_times['trip_id'] == trip_id) & (stop_times['stop_id'] == stop_id),
                            'drop_off_type'
                        ] = 0
                    if properties['property'] == 'no_boardings':
                        stop_times.loc[
                            (stop_times['trip_id'] == trip_id) & (stop_times['stop_id'] == stop_id),
                            'drop_off_type'
                        ] = 1
                
                continue

            for trip_id in trip_ids:
                # Pop the rows that match trip_id
                this_stoptime = stop_times[stop_times.trip_id == trip_id].copy()

                # Merge on node IDs using stop_id (one node ID per stop_id)
                this_stoptime = this_stoptime.merge(
                    stops[["stop_id", self.stops_foreign_key]],
                    how="left",
                    on="stop_id",
                )

                # Make sure the stop_times are ordered by stop_sequence
                this_stoptime = this_stoptime.sort_values(by=["stop_sequence"])

                # get shapes
                shape_id = trips[trips.trip_id == trip_id].shape_id.iloc[0]
                
                this_shape = shapes[shapes.shape_id == shape_id].copy()

                # Make sure the shapes are ordered by shape_sequence
                this_shape = this_shape.sort_values(by=["shape_pt_sequence"])

                this_shape['is_stop'] = np.where(
                    (this_shape[self.shapes_foreign_key].isin(this_stoptime[self.stops_foreign_key])) |
                    (this_shape[self.shapes_foreign_key] == str(node_id)),
                    1,
                    0
                )

                stops_on_this_shape = this_shape[this_shape.is_stop == 1].copy()
                stops_node_list = stops_on_this_shape[self.shapes_foreign_key].tolist()
                
                this_stop_index = stops_node_list.index(str(node_id))
                
                # the stop node id before this stop
                previous_stop_node_id = stops_node_list[this_stop_index - 1]
                
                # the stop node id after this stop
                next_stop_node_id = stops_node_list[this_stop_index + 1]

                stoptime_node_ids = this_stoptime[self.stops_foreign_key].tolist()
                
                index_replacement_starts = stoptime_node_ids.index(
                    previous_stop_node_id
                )
                
                index_replacement_ends = stoptime_node_ids.index(
                    next_stop_node_id
                )

                pickup_type = 0
                drop_off_type = 0
                if properties['property'] == 'allow_alightings':
                    pickup_type = 0
                if properties['property'] == 'no_alightings':
                    pickup_type = 1
                if properties['property'] == 'allow_boardings':
                    drop_off_type = 0
                if properties['property'] == 'no_boardings':
                    drop_off_type = 1

                # Build a pd.DataFrame of new shape records from properties
                new_stoptime_rows = pd.DataFrame(
                    {
                        "trip_id": trip_id,
                        "arrival_time": None,
                        "departure_time": None,
                        "pickup_type": pickup_type,
                        "drop_off_type": drop_off_type,
                        "stop_distance": None,
                        "timepoint": None,
                        "stop_is_skipped": None,
                        self.stops_foreign_key: [str(node_id)],
                    }
                )

                # Merge on stop_id using node IDs (many stop_id per node ID)
                new_stoptime_rows = (
                    new_stoptime_rows.merge(
                        stops[["stop_id", self.stops_foreign_key]],
                        how="left",
                        on=self.stops_foreign_key,
                    )
                    .groupby([self.stops_foreign_key])
                    .head(1)
                )  # pick first

                this_stoptime = pd.concat(
                    [
                        this_stoptime.iloc[:index_replacement_starts + 1],
                        new_stoptime_rows,
                        this_stoptime.iloc[index_replacement_ends :],
                    ],
                    ignore_index=True,
                    sort=False,
                )

                # Remove node ID
                del this_stoptime[self.stops_foreign_key]

                # Renumber stop_sequence
                this_stoptime["stop_sequence"] = np.arange(len(this_stoptime))

                # Add rows back into stoptime
                stop_times = pd.concat(
                    [stop_times[stop_times.trip_id != trip_id], this_stoptime],
                    ignore_index=True,
                    sort=False,
                )

        # Replace self if in_place, else return
        if in_place:
            self.feed.shapes = shapes
            self.feed.stops = stops
            self.feed.stop_times = stop_times
        else:
            updated_network = copy.deepcopy(self)
            updated_network.feed.shapes = shapes
            updated_network.feed.stops = stops
            updated_network.feed.stop_times = stop_times
            return updated_network

class DotDict(dict):
    """
    dot.notation access to dictionary attributes
    Source: https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary
    """

    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __getattr__(self, key):
        try:
            return self[key]
        except KeyError:
            raise AttributeError(key)