In [1]:
import os, glob
import fiona
import geopandas as gpd
import pandas as pd

### set up input/output folders

In [2]:
local_folder = '..\\data'

box_folder = 'C:\\Users\\{}\\Box\\DataViz Projects\\Spatial Analysis and Mapping\\Active Transportation Plan\\Data'.format(os.getenv('USERNAME'))

geojson_folder = os.path.join(box_folder, 'geojson')
shst_match_folder = os.path.join(local_folder, 'shst_match')

all_matched_folder = os.path.join(local_folder, 'shst_match_results', 'matched')
all_unmatched_folder = os.path.join(local_folder, 'shst_match_results', 'unmatched')

### functions

In [16]:
def read_shst_match(path, suffix, link_type):
    """
    Read all matched records regardless of the matching rules.
    """
    
    match_gdf = pd.DataFrame()
    
    match_file = glob.glob(path + '\\' + link_type + '/**/' + suffix, recursive = True)
#     print(match_file)
    print('----------start reading shst matched data-------------')
    for i in match_file:
        print('reading shst matched data : ', i)
        new = gpd.read_file(i)
        new['source'] = i.split('shst_match\\')[1]
        match_gdf = pd.concat([match_gdf, new],
                             ignore_index = True,
                             sort = False)
    print('----------finished reading shst matched data-------------')
    
    if match_gdf.shape[0] == 0:
        print('no matched {} links'.format(link_type))
        return match_gdf
    
    elif match_gdf.shape[0] > 0:
        match_gdf.rename(columns = {'shstFromIntersectionId': 'fromIntersectionId',
                                    'shstToIntersectionId'  : 'toIntersectionId'},
                        inplace = True)


        unique_match_gdf = match_gdf.drop_duplicates(
            subset = ['shstReferenceId', 'shstGeometryId', 'fromIntersectionId', 'toIntersectionId', 'pp_mtc_facility_id']).copy()

        unique_facility_id = unique_match_gdf.pp_mtc_facility_id.nunique()

        print('{} {} links matched to shst network'.format(unique_facility_id, link_type))

        unique_match_gdf.drop(columns=['gisReferenceId', 'gisGeometryId', 'gisTotalSegments', 'gisSegmentIndex',
                                       'gisFromIntersectionId', 'gisToIntersectionId',
                                       'startSideOfStreet', 'endSideOfStreet', 'sideOfStreet',
                                       'score', 'matchType'], inplace=True)

        if link_type == 'existing':
            export_file_name = suffix.split('exst')[0]+'exst_matched.geojson'
        elif link_type == 'proposed':
            export_file_name = suffix.split('ppsd')[0]+'ppsd_matched.geojson'
        print('----------export all matched to {}'.format(export_file_name))
        
        try:
            unique_match_gdf.to_file(os.path.join(all_matched_folder, export_file_name),
                                     driver = "GeoJSON")
        except:
            print('{} did not export'.format(export_file_name))

        return unique_match_gdf

In [17]:
def read_shst_unmatch(path, suffix, link_type, final_match_rule_ls):
    """
    Read unmatched records.
    """
    
    unmatched_gdf = pd.DataFrame()
    
    for final_rule in final_match_rule_ls:
    
        unmatched_file = glob.glob(path + '\\' + link_type + '\\' + final_rule + '\\' + suffix)

        print('----------start reading shst unmatched data-------------')
        for i in unmatched_file:
            print('reading shst unmatched data : ', i)
            new = gpd.read_file(i)
            unmatched_gdf = pd.concat([unmatched_gdf, new],
                                      ignore_index = True,
                                      sort = False)
    
    print('----------finished reading shst unmatched data-------------')
    if unmatched_gdf.shape[0] == 0:
        print('no unmatched {} links'.format(link_type))
    
    elif unmatched_gdf.shape[0] > 0:
        unmatched_gdf.drop_duplicates(subset=['mtc_facility_id'], inplace=True)

        print('{} {} links failed to match'.format(unmatched_gdf.mtc_facility_id.nunique(), link_type))

        if link_type == 'existing':
            export_file_name = suffix.split('exst')[0]+'exst_unmatched.geojson'
        elif link_type == 'proposed':
            export_file_name = suffix.split('ppsd')[0]+'ppsd_unmatched.geojson'
        print('----------export all matched to {}'.format(export_file_name))
    
        try:
            unmatched_gdf.to_file(os.path.join(all_unmatched_folder, export_file_name),
                                  driver = "GeoJSON")
        except:
            print('{} did not export'.format(export_file_name))

    return unmatched_gdf

### process all datasets

In [6]:
data_list = ['actc_bike_network_epsg4326',
             'batc_bike_network_v2_epsg4326',         # regional
             'caltrans_d4_bike_network_epsg4326',     # regional
             'ccag_bike_network_epsg4326',
             'ccta_bike_network_epsg4326',
             'nvta_bike_network_epsg4326',
             'oakland_bike_network_epsg4326',
             'san_jose_bike_nw_epsg4326',
             'scta_bike_network_epsg4326',
             'sfcta_bike_network_epsg4326',
             'sta_bike_network_epsg4326',
             'tam_bike_network_epsg4326',
             'vta_bike_network_v2_epsg4326'  # regional
]

data_dict = dict()

In [14]:
for i in [
    'actc_bike_network_epsg4326',
    'ccag_bike_network_epsg4326',
    'ccta_bike_network_epsg4326',
    'nvta_bike_network_epsg4326',
    'oakland_bike_network_epsg4326',
    'san_jose_bike_nw_epsg4326',
    'scta_bike_network_epsg4326',
    'sfcta_bike_network_epsg4326',
    'sta_bike_network_epsg4326',
    'tam_bike_network_epsg4326'
]:
    print('process {}'.format(i))
    
    existing_matched_suffix = i+'_exst.out.matched.geojson'
    df1 = read_shst_match(shst_match_folder, existing_matched_suffix, 'existing')
    
    existing_unmatched_suffix = i+'_exst.out.unmatched.geojson'
    df2 = read_shst_unmatch(shst_match_folder, existing_unmatched_suffix, 'existing', ['car_rules_10m'])
    
    proposed_matched_suffix = i+'_ppsd.out.matched.geojson'
    df3 = read_shst_match(shst_match_folder, proposed_matched_suffix, 'proposed')
    
    proposed_unmatched_suffix = i+'_ppsd.out.unmatched.geojson'
    df4 = read_shst_unmatch(shst_match_folder, proposed_unmatched_suffix, 'proposed', ['car_rules_10m', 'ped_rules_10m'])
    
    data_dict[i] = [df1, df2, df3, df4]

process sfcta_bike_network_epsg4326
----------start reading shst matched data-------------
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\sfcta_bike_network_epsg4326_exst.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_20m\sfcta_bike_network_epsg4326_exst.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_30m\sfcta_bike_network_epsg4326_exst.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_40m\sfcta_bike_network_epsg4326_exst.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_50m\sfcta_bike_network_epsg4326_exst.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\car_rules_10m\sfcta_bike_network_epsg4326_exst.out.matched.geojson
----------finished reading shst matched data-------------
5253 existing links matched to shst network
----------export all matched to sfcta_bike_network_epsg432

In [18]:
for i in [
    'batc_bike_network_v2_epsg4326', 
    'caltrans_d4_bike_network_epsg4326',
     'vta_bike_network_v2_epsg4326'
]:
    print('process {}'.format(i))
    
    existing_matched_suffix = i+'_exst_*.out.matched.geojson'
    df1 = read_shst_match(shst_match_folder, existing_matched_suffix, 'existing')
    
    existing_unmatched_suffix = i+'_exst_*.out.unmatched.geojson'
    df2 = read_shst_unmatch(shst_match_folder, existing_unmatched_suffix, 'existing', ['car_rules_10m'])

    proposed_matched_suffix = i+'_ppsd_*.out.matched.geojson'
    df3 = read_shst_match(shst_match_folder, proposed_matched_suffix, 'proposed')
    
    proposed_unmatched_suffix = i+'_ppsd_*.out.unmatched.geojson'
    df4 = read_shst_unmatch(shst_match_folder, proposed_unmatched_suffix, 'proposed', ['car_rules_10m', 'ped_rules_10m'])
    
    data_dict[i] = [df1, df2, df3, df4]

process batc_bike_network_v2_epsg4326
----------start reading shst matched data-------------
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_1.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_10.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_11.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_12.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_13.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_14.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\batc_bike_network_v2_epsg4326_exst_2.out.matched.geojson

----------finished reading shst matched data-------------
1814 existing links matched to shst network
----------export all matched to batc_bike_network_v2_epsg4326_exst_matched.geojson
----------start reading shst unmatched data-------------
reading shst unmatched data :  ..\data\shst_match\existing\car_rules_10m\batc_bike_network_v2_epsg4326_exst_1.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\existing\car_rules_10m\batc_bike_network_v2_epsg4326_exst_10.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\existing\car_rules_10m\batc_bike_network_v2_epsg4326_exst_11.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\existing\car_rules_10m\batc_bike_network_v2_epsg4326_exst_12.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\existing\car_rules_10m\batc_bike_network_v2_epsg4326_exst_13.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\existing\car_rules_10m\batc_bike_network_v2_

reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_3.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_4.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_5.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_6.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_7.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_8.out.unmatched.geojson
reading shst unmatched data :  ..\data\shst_match\proposed\ped_rules_10m\batc_bike_network_v2_epsg4326_ppsd_9.out.unmatched.geojson
----------finished reading shst unmatched data-------------
562 proposed lin

process vta_bike_network_v2_epsg4326
----------start reading shst matched data-------------
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\vta_bike_network_v2_epsg4326_exst_11.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\vta_bike_network_v2_epsg4326_exst_12.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\vta_bike_network_v2_epsg4326_exst_13.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_10m\vta_bike_network_v2_epsg4326_exst_6.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_20m\vta_bike_network_v2_epsg4326_exst_11.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_20m\vta_bike_network_v2_epsg4326_exst_12.out.matched.geojson
reading shst matched data :  ..\data\shst_match\existing\bike_rules_20m\vta_bike_network_v2_epsg4326_exst_13.out.matched.geojson
readin

In [20]:
# batc_bike_network_v2_epsg4326_ppsd_unmatched.geojson did not export
# fix batc dataset's encoding error and export

df = data_dict['batc_bike_network_v2_epsg4326'][3]

# double check: 562 proposed links failed to match
print(df.mtc_facility_id.nunique())

print(df.primarytrailname.unique())
df['primarytrailname'] = df['primarytrailname'].apply(lambda x: x.replace('�', ''))
print(df.primarytrailname.unique())

print('export')
df.to_file(os.path.join(all_unmatched_folder, 'batc_bike_network_v2_epsg4326_ppsd_unmatched.geojson'), driver = "GeoJSON")

562
['Marsh Creek Regional Trail' 'Southern Pacific Railroad'
 'Great California Delta Trail' 'California Delta Trail'
 'Brushy Peak to Bethany Reservoir Trail' 'Bay Area Ridge Trail'
 'Crockett to Crockett Hills Regional Park' 'Waterfront Road Trail'
 'Iron Horse Trail' 'Martinez Shoreline to Pt. Edith'
 'San Francisco Bay Trail' 'Wildcat Creek Trail'
 'Walnut Creek Channel Extension' 'Delta de Anza Regional Trail'
 'Richmond Greenway' 'Lafayette-Moraga Trail' 'Mission Blvd'
 'Niles Canyon Trail' 'Niles reroute' 'East Bay Greenway'
 'Five Wounds Trail' 'Guadelupe River Trail'
 'Juan Bautista de Anza NHT Juan Bautista de Anza NHT na'
 'Juan Bautista de Anza NHT (bicycle route) Juan Bautista de Anza NHT  Guadalupe Sub-regional Trail'
 'Coyote Creek Trail'
 'Guadalupe Sub-regional Trail Guadalupe Sub-regional Trail na'
 'Upper Guadalupe Trail Connector Trail' 'Penitencia Creek Trail'
 'Los Gatos Creek Trail' 'Stevens Creek Blvd' 'Central Bikeway'
 'Lonus Street' 'CA Highway 17 Trail Cros