import pandas as pd
import geopandas as gpd
import numpy as np
Appendix P — Sample explanatory variables based on empirical data and custom scenario definition
This notebook contains code to generate explanatory variables for an output area based on 4 sliders:
- Level of urbanity
- Use
- Greenspace
- Job types
The data are either adapted from the original observed value (if the level of urbanity does not change) or sampled from the signature types from across the GB.
= "/Users/martin/Library/CloudStorage/OneDrive-SharedLibraries-TheAlanTuringInstitute/Daniel Arribas-Bel - demoland_data" data_folder
Load the data informing the distributions.
= pd.read_parquet(f"{data_folder}/sampling/median_form.parquet")
median_form = pd.read_parquet(f"{data_folder}/sampling/iqr_form.parquet")
iqr_form = pd.read_parquet(f"{data_folder}/sampling/median_function.parquet")
median_function = pd.read_parquet(f"{data_folder}/sampling/iqr_function.parquet")
iqr_function = (
oa f"{data_folder}/processed/interpolated/all_oa.parquet")
gpd.read_parquet("geo_code")
.set_index(={"population_estimate": "population"})
.rename(columns
)= pd.read_parquet(f"{data_folder}/sampling/oa_key.parquet") oa_key
Get OA areas for area-weighted variables.
= oa.area oa_area
Define a sampling method.
def _form(signature_type, variable, random_seed):
"""Get values for form variables
Values are sampled from a normal distribution around
median of a variable per signature type. The spread is
defined as 1/5 of interquartile range.
"""
= np.random.default_rng(random_seed)
rng return rng.normal(
median_form.loc[signature_type, variable],/ 5,
iqr_form.loc[signature_type, variable]
)
def _function(signature_type, variable, random_seed):
"""Get values for function variables
Values are sampled from a normal distribution around
median of a variable per signature type. The spread is
defined as 1/5 of interquartile range.
"""
= np.random.default_rng(random_seed)
rng return rng.normal(
median_function.loc[signature_type, variable],/ 5,
iqr_function.loc[signature_type, variable]
)
def _populations(defaults, index):
"""Balance residential and workplace population
Workplace population and residential population are treated 1:1 and
are re-allocated based on the index. The proportion of workplace categories
is not changed.
"""
if not -1 <= index <= 1:
raise ValueError(f"use index must be in a range -1...1. {index} given.")
= [
jobs "A, B, D, E. Agriculture, energy and water",
"C. Manufacturing",
"F. Construction",
"G, I. Distribution, hotels and restaurants",
"H, J. Transport and communication",
"K, L, M, N. Financial, real estate, professional and administrative activities",
"O,P,Q. Public administration, education and health",
"R, S, T, U. Other",
]= defaults[jobs].sum()
n_jobs if index < 0:
= index * n_jobs
difference else:
= index * defaults.population
difference = n_jobs + difference
new_n_jobs = defaults.population - difference
defaults.population = new_n_jobs / n_jobs
multiplier = defaults[jobs] * multiplier
defaults[jobs] return defaults
def _greenspace(defaults, index):
"""Allocate greenspace to OA
Allocate publicly accessible formal greenspace to OA. Defines a portion
of OA that is covered by gren urban areas. Realistic values are be fairly
low. The value affects populations and other land cover classes.
"""
if not 0 <= index <= 1:
raise ValueError(f"greenspace index must be in a range 0...1. {index} given.")
= defaults["Land cover [Green urban areas]"]
greenspace_orig = index - greenspace_orig
newly_allocated_gs = defaults * (1 - newly_allocated_gs)
defaults "Land cover [Green urban areas]"] = index
defaults[return defaults
def _job_types(defaults, index):
"""Balance job types
Balance job types between manual and white collar workplace categories.
Index represents the proportion of white collar jobs in an area. The
total sum of FTEs is not changed.
The service category is not affected under an assumption that both white
and blue collar workers need the same amount of services to provide food etc.
"""
if not 0 <= index <= 1:
raise ValueError(f"job_types index must be in a range 0...1. {index} given.")
= [
blue "A, B, D, E. Agriculture, energy and water",
"C. Manufacturing",
"F. Construction",
"H, J. Transport and communication",
]= [
white "K, L, M, N. Financial, real estate, professional and administrative activities",
"O,P,Q. Public administration, education and health",
]= defaults[blue].sum()
blue_collar = defaults[white].sum()
white_collar = blue_collar + white_collar
total = white_collar / total
orig_proportion
= total * (1 - index)
new_blue = total * index
new_white
= new_blue / blue_collar
blue_diff = new_white / white_collar
white_diff
= defaults[blue] * blue_diff
defaults[blue] = defaults[white] * white_diff
defaults[white]
return defaults
def get_signature_values(
str,
oa_code: str = None,
signature_type: float = 0,
use: float = None,
greenspace: float = None,
job_types: int = None,
random_seed:
):"""Generate explanatory variables based on a scenario
Generates values for explanatory variables based on empirical data derived
from the Urban Grammar project and a scenario definition based on a
Urban Grammar signature type, land use balance, greenspace allocation
and a job type balance.
If the target ``signature_type`` differs from the one already allocated
to OA, the data is sampled from the distribution from the whole GB. If
they are equal, the existing values measured in place are used. That allows
playing with other variables without changing the form.
Parameters
----------
oa_code : string
String representing the OA code, e.g. ``"E00042707"``.
signature_type : string
String representing signature type. See below the possible options
and their relationship to the level of urbanity.
0: 'Wild countryside',
1: 'Countryside agriculture',
2: 'Urban buffer',
3: 'Warehouse/Park land',
4: 'Open sprawl',
5: 'Disconnected suburbia',
6: 'Accessible suburbia',
7: 'Connected residential neighbourhoods',
8: 'Dense residential neighbourhoods',
9: 'Gridded residential quarters',
10: 'Dense urban neighbourhoods',
11: 'Local urbanity',
12: 'Regional urbanity',
13: 'Metropolitan urbanity',
14: 'Concentrated urbanity',
15: 'Hyper concentrated urbanity',
use : float, optional
Float in a range -1...1 reflecting the land use balance between
fully residential (-1) and fully commercial (1). Defautls to 0,
a value derived from signatures. For values < 0, we are allocating
workplace population to residential population. For values > 0, we
are allocating residential population to workplace population.
Extremes are allowed but are not realistic, in most cases.
greenspace : float, optional
Float in a range 0...1 reflecting the amount of greenspace in the
area. 0 representes no accessible greenspace, 1 represents whole
area covered by a greenspace. This value will proportionally affect
the amounts of jobs and population.
job_types : float, optional
Float in a range 0...1 reflecting the balance of job types in the
area between entirely blue collar jobs (0) and entirely white collar
jobs (1).
random_seed : int, optional
Random seed
Returns
-------
Series
"""
= oa_key.primary_type[oa_code]
orig_type if signature_type is not None and orig_type != signature_type:
= pd.Series(
form for var in median_form.columns],
[_form(signature_type, var, random_seed) =median_form.columns,
index=oa_code,
nameabs()
).
= pd.Series(
defaults
[
_function(signature_type, var, random_seed)for var in median_function.columns
],=median_function.columns,
index=oa_code,
nameabs()
).
= [
area_weighted "population",
"A, B, D, E. Agriculture, energy and water",
"C. Manufacturing",
"F. Construction",
"G, I. Distribution, hotels and restaurants",
"H, J. Transport and communication",
"K, L, M, N. Financial, real estate, professional and administrative activities",
"O,P,Q. Public administration, education and health",
"R, S, T, U. Other",
]= defaults[area_weighted] * oa_area[oa_code]
defaults[area_weighted]
else:
= oa.loc[oa_code][median_form.columns]
form = oa.loc[oa_code][median_function.columns]
defaults
# population
if use != 0:
= _populations(defaults, index=use)
defaults
# greenspace
if greenspace:
= _greenspace(defaults, greenspace)
defaults
if job_types:
= _job_types(defaults, job_types)
defaults return pd.concat([form, defaults])
Example:
Set the OA we are interested in.
= "E00042271" oa_code
Check the signature type of the OA.
oa_key.primary_type[oa_code]
'Dense urban neighbourhoods'
This is the actual value with no changes.
get_signature_values(
oa_code, )
sdbAre 836.43386
sdbCoA 10.849734
ssbCCo 0.342091
ssbCor 5.918075
ssbSqu 5.799881
ssbERI 0.8782
ssbCCM 28.454278
ssbCCD 3.243832
stbOri 14.042689
sdcAre 3387.82744
sscCCo 0.449487
sscERI 0.960133
sicCAR 0.245127
stbCeA 5.194842
mtbAli 4.3889
mtbNDi 20.755305
mtcWNe 0.028451
ltbIBD 21.166978
sdsSPW 28.213326
sdsSWD 2.899166
sdsSPO 0.447096
sdsLen 109.429581
sssLin 0.92334
ldsMSL 1645.009527
mtdDeg 2.850976
linP3W 0.810375
linP4W 0.108821
linPDE 0.080804
lcnClo 0.000002
ldsCDL 162.855545
xcnSCl 0.018812
linWID 0.035681
stbSAl 3.947726
sdsAre 17595.05196
sisBpM 0.056843
misCel 12.861005
ltcRea 49.85095
ldeAre 59270.775826
lseCCo 0.387218
lseERI 0.814681
lteOri 13.778403
lteWNB 0.031542
lieWCe 0.000269
population 391
A, B, D, E. Agriculture, energy and water 0.685492
C. Manufacturing 0.097322
F. Construction 7.038725
G, I. Distribution, hotels and restaurants 21.731618
H, J. Transport and communication 22.106873
K, L, M, N. Financial, real estate, professional and administrative activities 10.598732
O,P,Q. Public administration, education and health 74.328726
R, S, T, U. Other 9.858182
Land cover [Non-irrigated arable land] 0.0
Land cover [Industrial or commercial units] 0.509093
Land cover [Sport and leisure facilities] 0.0
Land cover [Green urban areas] 0.0
Land cover [Discontinuous urban fabric] 0.490907
Land cover [Pastures] 0.0
Land cover [Continuous urban fabric] 0.0
Name: E00042271, dtype: object
Stay within the same signature type and change only use.
- More residential
=-0.5) get_signature_values(oa_code, use
sdbAre 836.43386
sdbCoA 10.849734
ssbCCo 0.342091
ssbCor 5.918075
ssbSqu 5.799881
ssbERI 0.8782
ssbCCM 28.454278
ssbCCD 3.243832
stbOri 14.042689
sdcAre 3387.82744
sscCCo 0.449487
sscERI 0.960133
sicCAR 0.245127
stbCeA 5.194842
mtbAli 4.3889
mtbNDi 20.755305
mtcWNe 0.028451
ltbIBD 21.166978
sdsSPW 28.213326
sdsSWD 2.899166
sdsSPO 0.447096
sdsLen 109.429581
sssLin 0.92334
ldsMSL 1645.009527
mtdDeg 2.850976
linP3W 0.810375
linP4W 0.108821
linPDE 0.080804
lcnClo 0.000002
ldsCDL 162.855545
xcnSCl 0.018812
linWID 0.035681
stbSAl 3.947726
sdsAre 17595.05196
sisBpM 0.056843
misCel 12.861005
ltcRea 49.85095
ldeAre 59270.775826
lseCCo 0.387218
lseERI 0.814681
lteOri 13.778403
lteWNB 0.031542
lieWCe 0.000269
population 464.222835
A, B, D, E. Agriculture, energy and water 0.342746
C. Manufacturing 0.048661
F. Construction 3.519362
G, I. Distribution, hotels and restaurants 10.865809
H, J. Transport and communication 11.053436
K, L, M, N. Financial, real estate, professional and administrative activities 5.299366
O,P,Q. Public administration, education and health 37.164363
R, S, T, U. Other 4.929091
Land cover [Non-irrigated arable land] 0.0
Land cover [Industrial or commercial units] 0.509093
Land cover [Sport and leisure facilities] 0.0
Land cover [Green urban areas] 0.0
Land cover [Discontinuous urban fabric] 0.490907
Land cover [Pastures] 0.0
Land cover [Continuous urban fabric] 0.0
Name: E00042271, dtype: object
- Less residential, more jobs
=0.4) get_signature_values(oa_code, use
sdbAre 836.43386
sdbCoA 10.849734
ssbCCo 0.342091
ssbCor 5.918075
ssbSqu 5.799881
ssbERI 0.8782
ssbCCM 28.454278
ssbCCD 3.243832
stbOri 14.042689
sdcAre 3387.82744
sscCCo 0.449487
sscERI 0.960133
sicCAR 0.245127
stbCeA 5.194842
mtbAli 4.3889
mtbNDi 20.755305
mtcWNe 0.028451
ltbIBD 21.166978
sdsSPW 28.213326
sdsSWD 2.899166
sdsSPO 0.447096
sdsLen 109.429581
sssLin 0.92334
ldsMSL 1645.009527
mtdDeg 2.850976
linP3W 0.810375
linP4W 0.108821
linPDE 0.080804
lcnClo 0.000002
ldsCDL 162.855545
xcnSCl 0.018812
linWID 0.035681
stbSAl 3.947726
sdsAre 17595.05196
sisBpM 0.056843
misCel 12.861005
ltcRea 49.85095
ldeAre 59270.775826
lseCCo 0.387218
lseERI 0.814681
lteOri 13.778403
lteWNB 0.031542
lieWCe 0.000269
population 234.6
A, B, D, E. Agriculture, energy and water 1.41758
C. Manufacturing 0.201259
F. Construction 14.555892
G, I. Distribution, hotels and restaurants 44.940396
H, J. Transport and communication 45.716413
K, L, M, N. Financial, real estate, professional and administrative activities 21.91789
O,P,Q. Public administration, education and health 153.709788
R, S, T, U. Other 20.386452
Land cover [Non-irrigated arable land] 0.0
Land cover [Industrial or commercial units] 0.509093
Land cover [Sport and leisure facilities] 0.0
Land cover [Green urban areas] 0.0
Land cover [Discontinuous urban fabric] 0.490907
Land cover [Pastures] 0.0
Land cover [Continuous urban fabric] 0.0
Name: E00042271, dtype: object
- More residential and more greenspace.
Check current greenspace first.
get_signature_values(
oa_code,"Land cover [Green urban areas]"] )[
0.0
Nothing. Allocate 20% of area
=0.4, greenspace=0.2) get_signature_values(oa_code, use
sdbAre 836.43386
sdbCoA 10.849734
ssbCCo 0.342091
ssbCor 5.918075
ssbSqu 5.799881
ssbERI 0.8782
ssbCCM 28.454278
ssbCCD 3.243832
stbOri 14.042689
sdcAre 3387.82744
sscCCo 0.449487
sscERI 0.960133
sicCAR 0.245127
stbCeA 5.194842
mtbAli 4.3889
mtbNDi 20.755305
mtcWNe 0.028451
ltbIBD 21.166978
sdsSPW 28.213326
sdsSWD 2.899166
sdsSPO 0.447096
sdsLen 109.429581
sssLin 0.92334
ldsMSL 1645.009527
mtdDeg 2.850976
linP3W 0.810375
linP4W 0.108821
linPDE 0.080804
lcnClo 0.000002
ldsCDL 162.855545
xcnSCl 0.018812
linWID 0.035681
stbSAl 3.947726
sdsAre 17595.05196
sisBpM 0.056843
misCel 12.861005
ltcRea 49.85095
ldeAre 59270.775826
lseCCo 0.387218
lseERI 0.814681
lteOri 13.778403
lteWNB 0.031542
lieWCe 0.000269
population 187.68
A, B, D, E. Agriculture, energy and water 1.134064
C. Manufacturing 0.161007
F. Construction 11.644714
G, I. Distribution, hotels and restaurants 35.952317
H, J. Transport and communication 36.57313
K, L, M, N. Financial, real estate, professional and administrative activities 17.534312
O,P,Q. Public administration, education and health 122.96783
R, S, T, U. Other 16.309162
Land cover [Non-irrigated arable land] 0.0
Land cover [Industrial or commercial units] 0.407275
Land cover [Sport and leisure facilities] 0.0
Land cover [Green urban areas] 0.2
Land cover [Discontinuous urban fabric] 0.392725
Land cover [Pastures] 0.0
Land cover [Continuous urban fabric] 0.0
Name: E00042271, dtype: object
Change job type allocation towards more blue collar jobs.
get_signature_values(
oa_code,=0.4,
use=0.2,
greenspace=0.2,
job_types )
sdbAre 836.43386
sdbCoA 10.849734
ssbCCo 0.342091
ssbCor 5.918075
ssbSqu 5.799881
ssbERI 0.8782
ssbCCM 28.454278
ssbCCD 3.243832
stbOri 14.042689
sdcAre 3387.82744
sscCCo 0.449487
sscERI 0.960133
sicCAR 0.245127
stbCeA 5.194842
mtbAli 4.3889
mtbNDi 20.755305
mtcWNe 0.028451
ltbIBD 21.166978
sdsSPW 28.213326
sdsSWD 2.899166
sdsSPO 0.447096
sdsLen 109.429581
sssLin 0.92334
ldsMSL 1645.009527
mtdDeg 2.850976
linP3W 0.810375
linP4W 0.108821
linPDE 0.080804
lcnClo 0.000002
ldsCDL 162.855545
xcnSCl 0.018812
linWID 0.035681
stbSAl 3.947726
sdsAre 17595.05196
sisBpM 0.056843
misCel 12.861005
ltcRea 49.85095
ldeAre 59270.775826
lseCCo 0.387218
lseERI 0.814681
lteOri 13.778403
lteWNB 0.031542
lieWCe 0.000269
population 187.68
A, B, D, E. Agriculture, energy and water 3.481745
C. Manufacturing 0.494316
F. Construction 35.751011
G, I. Distribution, hotels and restaurants 35.952317
H, J. Transport and communication 112.284974
K, L, M, N. Financial, real estate, professional and administrative activities 4.74268
O,P,Q. Public administration, education and health 33.260332
R, S, T, U. Other 16.309162
Land cover [Non-irrigated arable land] 0.0
Land cover [Industrial or commercial units] 0.407275
Land cover [Sport and leisure facilities] 0.0
Land cover [Green urban areas] 0.2
Land cover [Discontinuous urban fabric] 0.392725
Land cover [Pastures] 0.0
Land cover [Continuous urban fabric] 0.0
Name: E00042271, dtype: object
Change the signature type (a proxy for a level of urbanity).
get_signature_values(
oa_code,="Local urbanity",
signature_type=0.4,
use=0.2,
greenspace=0.2,
job_types )
sdbAre 422.259877
sdbCoA 0.000000
ssbCCo 0.408337
ssbCor 3.769432
ssbSqu 0.403360
ssbERI 0.982311
ssbCCM 18.595822
ssbCCD 0.135386
stbOri 8.222212
sdcAre 1663.577623
sscCCo 0.438507
sscERI 0.984578
sicCAR 0.229900
stbCeA 1.659903
mtbAli 2.924640
mtbNDi 15.906085
mtcWNe 0.032347
ltbIBD 21.239290
sdsSPW 26.420297
sdsSWD 3.844587
sdsSPO 0.384913
sdsLen 114.906193
sssLin 0.991167
ldsMSL 1938.858230
mtdDeg 3.000000
linP3W 0.773400
linP4W 0.092012
linPDE 0.144101
lcnClo 0.000002
ldsCDL 210.710475
xcnSCl 0.001760
linWID 0.034503
stbSAl 3.207152
sdsAre 18131.928008
sisBpM 0.048301
misCel 15.195866
ltcRea 49.395492
ldeAre 19916.510706
lseCCo 0.347203
lseERI 0.865735
lteOri 19.688769
lteWNB 0.004753
lieWCe 0.000877
population 432.770089
A, B, D, E. Agriculture, energy and water 10.577324
C. Manufacturing 18.857491
F. Construction 96.518541
G, I. Distribution, hotels and restaurants 92.453344
H, J. Transport and communication 128.179567
K, L, M, N. Financial, real estate, professional and administrative activities 30.728056
O,P,Q. Public administration, education and health 32.805174
R, S, T, U. Other 24.494562
Land cover [Non-irrigated arable land] 0.000000
Land cover [Industrial or commercial units] 0.000000
Land cover [Sport and leisure facilities] 0.000000
Land cover [Green urban areas] 0.200000
Land cover [Discontinuous urban fabric] 0.614254
Land cover [Pastures] 0.000000
Land cover [Continuous urban fabric] 0.042641
Name: E00042271, dtype: float64