import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
import tracc
from r5py import TransportNetwork, TravelTimeMatrixComputer, TransitMode, LegMode
from datetime import timedelta
import matplotlib.pyplot as plt
"--max-memory", "8G"])
sys.argv.append([
= "/Users/azanchetta/OneDrive - The Alan Turing Institute/demoland_data"
data_folder
# Ordnance Survey (OS) Greenspace data
# (using Tyne and Wear data for now, generated previously in QGis)
# greenspace_file = f"{data_folder}/raw/accessibility/OS Open Greenspace (GPKG) GB/data/opgrsp_gb.gpkg"
= (
greenspace_sites_file f"{data_folder}/processed/accessibility/greenspace-sites_tynewear.gpkg"
)= f"{data_folder}/processed/accessibility/accessTOgs_tynewear.gpkg"
accesspoints_file
# OSM landuse data (Tyne and Wear data)
= f"{data_folder}/raw/OSM_tynewear/tyne-and-wear-latest-free.shp/gis_osm_landuse_a_free_1.shp"
osm_landuse_file
# if needed for mapping purposes (?)
= f"{data_folder}/processed/authorities/LADs_tynewear.shp" # needed in order to filter greenspace data within the regional boundaries region_lads_file
Appendix B — Green space data cleaning
Data: Ordnance Survey greenspace sites
Available layers: sites (areas, polygons), access points
Metric we want to create: from ttm matrix (OAs to GS entrances) retain one access point per park, IE consider only once every park that is reachable at least through one access point in 15 min. Then sum up all the areas of the reachable parks.
We first need to clean the OS data because there are overlapping features within features, IE for example a playground or tennis court could be enclosed in a public park.
Consideration: most of the times it’s the 2 categories Playing Field
and Public Park Or Garden
that contain other categories within their boundaries. We could play with this. Other Sport Facilities
also contains and is contained, but mostly is contained.
The hierarchy of the layers seems to be so:
Public Park Or Garden
contains
Playing Field
contains
Other Sport Facilities
contains
rest of categories.
Last idea: separate each category into a new gdf, then overlay.difference
with the order above.
Workflow
… ** 1 . filter out unwanted categories from OS layer (allottments
, golf course
, green bowling
)
NOTE: might need to get rid of these categories after the integration with OSM, or we’d re-introduce golf courses areas from OSM (in some cases they have internal areas classified as “forest”).
Though, we have to get rid of these before the overlay/dissolve, or we’d end up with categories we don’t want
2 . clean OS data: - filter the entrances (only the ones on the edges??) - overlay / dissolve / intersect to get rid of overlying polygons (ex sport pitch inside a park)
3 . add “missing” parks: prepare OSM data (entrances?)
** 4 . put together the last and OS
5 . get areas size
6 . run accessibility analysis (ready) - filter entrances (one per park) - assign metric as sum[park size]
NOTE OSM data is problematic:
it over-estimates green spaces:
have manually cross-checked parks with existing trails passing through them (using https://hiking.waymarkedtrails.org/) AND Google Street View to check entrances to parks/greens/ etc
Some of the parks are but some are closed (not-comprehensive list is in some hand-written notes)the entrance point to the park is not given
I don’t think it’s a good approximation to consider random points along the border as entrances. we could maybe try to generate entrances by intersecting with walking paths.
[from OSM landuse layer we consider the tags/fields: cemetery
, forest
, nature reserve
, park
, recreation ground
]
B.1 Variables definition and data import
- Data import
= gpd.read_file(
greenspace_sites ="grenspace-sites_tynewear"
greenspace_sites_file, layer
)
greenspace_sites.head()
= gpd.read_file(accesspoints_file, layer="pointsaccessTOgs_tynewear")
accesspoints
accesspoints.head()
# for mapping:
= gpd.read_file(region_lads_file)
region_lads region_lads.head()
# selecting from greenspace layer only the relevant layers
= greenspace_sites[["id", "function", "geometry"]]
greenspace_sites greenspace_sites.head()
="function") greenspace_sites.explore(column
# viewing 'function' categories to work on later
"function"].unique() greenspace_sites[
Working on OS data before understanding how to integrate OSM data
IE points 1, 2, 5, 6 from Workflow above
B.2 1. filter categories from OS data
we consider only open and not paid-for spaces
= greenspace_sites.query(
greenspace_sites_select "function!='Allotments Or Community Growing Spaces' & function!='Golf Course' & function!='Bowling Green'"
)
="function") greenspace_sites_select.explore(column
B.3 2. Clean OS data
A. dissolve the overlapping polygons
= greenspace_sites_select.query("function=='Public Park Or Garden'")
publicpark = greenspace_sites_select.query("function=='Playing Field'")
playingfield = greenspace_sites_select.query("function=='Other Sports Facility'")
othersport = greenspace_sites_select.query(
therest "function!='Playing Field' & function!='Public Park Or Garden' & function!='Other Sports Facility'"
)
# find 'therest' not included in the upper categories
# we use sjoin to performe a spatial filter of 'therest' polygons contained in upper categories
= gpd.sjoin(therest, othersport, op="within", how="inner")
join11 = gpd.sjoin(therest, playingfield, op="within", how="inner")
join12 = gpd.sjoin(therest, publicpark, op="within", how="inner")
join13 # join13.columns = join13.columns.str.replace('index_', 'join3')
join13.head()# generate list of the IDs of 'therest' contained in upper categories, in order to eliminate the corresponding polygons from the layer
= join11["id_left"].drop_duplicates().to_list()
list_for_diff11 # diff1 =therest.query('id'.isin('list_for_diff1')) # doesn't work
# diff1 = therest[therest['id'].isin('list_for_diff1')] # doesn't work
= therest[
diff11 ~therest.id.isin(list_for_diff11)
# 1st difference layer # note the negation character ~ to take the polygons NOT included
]
= join12["id_left"].drop_duplicates().to_list()
list_for_diff12 = diff11[~diff11.id.isin(list_for_diff12)] # 2nd difference layer
diff12
= join13["id_left"].drop_duplicates().to_list()
list_for_diff13 = diff12[
diff13 ~diff12.id.isin(list_for_diff13)
# 3rd difference layer, this is for 'therest' categories ]
# we repeat the same operation for subsequent categories:
# find 'othersport' not included in the upper categories
= gpd.sjoin(othersport, playingfield, op="within", how="inner")
join21 = gpd.sjoin(othersport, publicpark, op="within", how="inner")
join22
= join21["id_left"].drop_duplicates().to_list()
list_for_diff21 = othersport[~othersport.id.isin(list_for_diff21)]
diff21
= join22["id_left"].drop_duplicates().to_list()
list_for_diff22 = diff21[~diff21.id.isin(list_for_diff22)] # 'othersport' difference diff22
# find 'playing fields' not included in the upper categories (and viceversa?)
= gpd.sjoin(playingfield, publicpark, op="within", how="inner")
join31 = gpd.sjoin(
join32 ="within", how="inner"
publicpark, playingfield, op## check it is not empty ... it is empty, we do not use this join
)
= join31["id_left"].drop_duplicates().to_list()
list_for_diff31 = playingfield[
diff31 ~playingfield.id.isin(list_for_diff31)
# 'playingfield' difference ]
# put together all the differences layers: (and should bring out to the desired output)
= pd.concat([diff13, diff22]).pipe(
together1
gpd.GeoDataFrame# 'therest' + 'othersport' differences
)
together1.head()= pd.concat([together1, diff31]).pipe(
together2
gpd.GeoDataFrame# last gdf + 'playingfield' difference
) = pd.concat([together2, publicpark]).pipe(
together_again
gpd.GeoDataFrame# last gdf + all the public parks)
)
together_again.head()
= together_again.set_crs("epsg:27700") together_again
# re-setting the crs as we lost in the last operation
together_again.explore(="function"
column# any reason why it doesn't show the whole gdf )
B. Filter the entrances (only the ones on the edges)
We can, as a first measure, filter out from the entrances the ones to greenspaces that are not in the data set anymore.
The entrances that are left should only be located along the edges of the relevant polygons.
= together_again["id"].to_list() list_gs_id
accesspoints.head()
= accesspoints[accesspoints.refToGreenspaceSite.isin(list_gs_id)] accesspoints_edge
accesspoints_edge.explore()
# checking crs
# epsg:27700
together_again.crs # epsg:4326 (wgs84)
accesspoints_edge.crs = accesspoints_edge.to_crs("epsg:27700") accesspoints_edge_OSGB36
# saving the two edited into geopackage. first the sites
= (
complete_gpkg_filename f"{data_folder}/processed/accessibility/greenspace_tynewear_edited.gpkg"
)="sites") together_again.to_file(complete_gpkg_filename, layer
# adding the entrances to the same gpkg
="access_points") accesspoints_edge_OSGB36.to_file(complete_gpkg_filename, layer
B.4 5 . get areas size
"area_m2"] = together_again["geometry"].area together_again[
="area_m2", cmap="plasma", scheme="NaturalBreaks", k=10) together_again.explore(column
B.5 6. run accessibility analysis
# define variables path
= (
oas_centroids_file f"{data_folder}/processed/OA_centroids_TyneWear.gpkg" # used for population origin
)= f"{data_folder}/processed/authorities/OA_TyneWear.gpkg" # needed for visualisation purposes
oas_file = f"{data_folder}/raw/accessibility/tyne-and-wear-latest.osm.pbf"
osm_data_file = f"{data_folder}/raw/accessibility/itm_north_east_gtfs.zip" gtfs_data_file
# importing needed data
# origins (IE output areas, OAs)
= gpd.read_file(oas_centroids_file, layer="OA_centroids_TyneWear")
origin_centroids "id"] = origin_centroids[
origin_centroids["OA11CD"
# Origin dataset must contain an 'id' column
]
origin_centroids.head()= gpd.read_file(oas_file, layer="OA_TyneWear") oas_boundaries
oas_boundaries.crs
# checking crs are compatible with OSM and GTFS data
origin_centroids.crs# wgs84
accesspoints_edge.crs = origin_centroids.to_crs("epsg:4326")
origin_centroids_wgs84
origin_centroids_wgs84.crs= oas_boundaries.to_crs("epsg:4326") oas_boundaries_wgs84
# generate transport network
= TransportNetwork(osm_data_file, [gtfs_data_file]) transport_network
# generate ttm (by foot only for now)
= TravelTimeMatrixComputer(
ttm_walking_OAtoGS
transport_network,=origin_centroids_wgs84,
origins=accesspoints_edge,
destinations=dt.timedelta(seconds=900), # restricting travel to 15min
max_time=4.8,
speed_walking=[LegMode.WALK],
transport_modes
)= ttm_walking_OAtoGS.compute_travel_times()
ttm_walking_OAtoGS ttm_walking_OAtoGS.head()
# accessibility calculation
= tracc.costs(ttm_walking_OAtoGS)
df_tracc df_tracc.data.head()
= ttm_walking_OAtoGS.groupby("from_id")["travel_time"].max()
max_time max() max_time.
# Computing impedance function based on a 15 minute travel time threshold.
df_tracc.impedence_calc(="travel_time",
cost_column="cumulative",
impedence_func=15, # minutes cap
impedence_func_params="cum_15",
output_col_name=False,
prune_output
) df_tracc.data.head()
= df_tracc.data[df_tracc.data.loc[:, "cum_15"] == 1]
df_tracc_15min df_tracc_15min.head()
A. filter entrances (one per park)
accesspoints.head()
# associate park ID to entrances
= pd.merge(
accesspoints_withPark
df_tracc_15min,"id", "refToGreenspaceSite"]],
accesspoints[[="to_id",
left_on="id",
right_on="right",
how )
accesspoints_withPark.head()
len(accesspoints_withPark.from_id.unique()) # 3726
len(accesspoints_withPark.refToGreenspaceSite.unique()) # 1171
# not relevant, but just to get an idea of the dimensions
= accesspoints_withPark.sort_values("travel_time").drop_duplicates(
oneaccess_perpark "from_id", "refToGreenspaceSite"]
[
) oneaccess_perpark.head()
# oneaccess_perpark = accesspoints_withPark.groupby('from_id')['refToGreenspaceSite'].agg(['unique'])
# oneaccess_perpark.head()
B. assign metric as sum[park size]
oneaccess_perpark.describe() oneaccess_perpark.columns
# generate df with area per site
= pd.merge(
oneaccess_perpark_witharea "from_id", "refToGreenspaceSite"]],
oneaccess_perpark[["id", "area_m2"]],
together_again[[="refToGreenspaceSite",
left_on="id",
right_on
) oneaccess_perpark_witharea.head()
# generate df with one row pr OA and sum of sites' areas
= (
OAs_metric "from_id"])["area_m2"].sum().reset_index()
oneaccess_perpark_witharea.groupby([ )
OAs_metric.head()
"../output/acc_walking_gs15_OAtoGS_tynewear.csv") OAs_metric.to_csv(
# plotting results
= oas_boundaries_wgs84.merge(
oas_boundaries_metric ="geo_code", right_on="from_id", how="right"
OAs_metric, left_on )
oas_boundaries_metric.explore(="area_m2", cmap="plasma", scheme="NaturalBreaks", k=10
column )