Appendix P — Sample explanatory variables based on empirical data and custom scenario definition

This notebook contains code to generate explanatory variables for an output area based on 4 sliders:

The data are either adapted from the original observed value (if the level of urbanity does not change) or sampled from the signature types from across the GB.

import pandas as pd
import geopandas as gpd
import numpy as np
data_folder = "/Users/martin/Library/CloudStorage/OneDrive-SharedLibraries-TheAlanTuringInstitute/Daniel Arribas-Bel - demoland_data"

Load the data informing the distributions.

median_form = pd.read_parquet(f"{data_folder}/sampling/median_form.parquet")
iqr_form = pd.read_parquet(f"{data_folder}/sampling/iqr_form.parquet")
median_function = pd.read_parquet(f"{data_folder}/sampling/median_function.parquet")
iqr_function = pd.read_parquet(f"{data_folder}/sampling/iqr_function.parquet")
oa = (
    gpd.read_parquet(f"{data_folder}/processed/interpolated/all_oa.parquet")
    .set_index("geo_code")
    .rename(columns={"population_estimate": "population"})
)
oa_key = pd.read_parquet(f"{data_folder}/sampling/oa_key.parquet")

Get OA areas for area-weighted variables.

oa_area = oa.area

Define a sampling method.

def _form(signature_type, variable, random_seed):
    """Get values for form variables

    Values are sampled from a normal distribution around
    median of a variable per signature type. The spread is
    defined as 1/5 of interquartile range.
    """
    rng = np.random.default_rng(random_seed)
    return rng.normal(
        median_form.loc[signature_type, variable],
        iqr_form.loc[signature_type, variable] / 5,
    )


def _function(signature_type, variable, random_seed):
    """Get values for function variables

    Values are sampled from a normal distribution around
    median of a variable per signature type. The spread is
    defined as 1/5 of interquartile range.
    """
    rng = np.random.default_rng(random_seed)
    return rng.normal(
        median_function.loc[signature_type, variable],
        iqr_function.loc[signature_type, variable] / 5,
    )


def _populations(defaults, index):
    """Balance residential and workplace population

    Workplace population and residential population are treated 1:1 and
    are re-allocated based on the index. The proportion of workplace categories
    is not changed.
    """
    if not -1 <= index <= 1:
        raise ValueError(f"use index must be in a range -1...1. {index} given.")
    jobs = [
        "A, B, D, E. Agriculture, energy and water",
        "C. Manufacturing",
        "F. Construction",
        "G, I. Distribution, hotels and restaurants",
        "H, J. Transport and communication",
        "K, L, M, N. Financial, real estate, professional and administrative activities",
        "O,P,Q. Public administration, education and health",
        "R, S, T, U. Other",
    ]
    n_jobs = defaults[jobs].sum()
    if index < 0:
        difference = index * n_jobs
    else:
        difference = index * defaults.population
    new_n_jobs = n_jobs + difference
    defaults.population = defaults.population - difference
    multiplier = new_n_jobs / n_jobs
    defaults[jobs] = defaults[jobs] * multiplier
    return defaults


def _greenspace(defaults, index):
    """Allocate greenspace to OA

    Allocate publicly accessible formal greenspace to OA. Defines a portion
    of OA that is covered by gren urban areas. Realistic values are be fairly
    low. The value affects populations and other land cover classes.
    """
    if not 0 <= index <= 1:
        raise ValueError(f"greenspace index must be in a range 0...1. {index} given.")
    greenspace_orig = defaults["Land cover [Green urban areas]"]
    newly_allocated_gs = index - greenspace_orig
    defaults = defaults * (1 - newly_allocated_gs)
    defaults["Land cover [Green urban areas]"] = index
    return defaults


def _job_types(defaults, index):
    """Balance job types

    Balance job types between manual and white collar workplace categories.
    Index represents the proportion of white collar jobs in an area. The
    total sum of FTEs is not changed.

    The service category is not affected under an assumption that both white
    and blue collar workers need the same amount of services to provide food etc.
    """
    if not 0 <= index <= 1:
        raise ValueError(f"job_types index must be in a range 0...1. {index} given.")
    blue = [
        "A, B, D, E. Agriculture, energy and water",
        "C. Manufacturing",
        "F. Construction",
        "H, J. Transport and communication",
    ]
    white = [
        "K, L, M, N. Financial, real estate, professional and administrative activities",
        "O,P,Q. Public administration, education and health",
    ]
    blue_collar = defaults[blue].sum()
    white_collar = defaults[white].sum()
    total = blue_collar + white_collar
    orig_proportion = white_collar / total

    new_blue = total * (1 - index)
    new_white = total * index

    blue_diff = new_blue / blue_collar
    white_diff = new_white / white_collar

    defaults[blue] = defaults[blue] * blue_diff
    defaults[white] = defaults[white] * white_diff

    return defaults


def get_signature_values(
    oa_code: str,
    signature_type: str = None,
    use: float = 0,
    greenspace: float = None,
    job_types: float = None,
    random_seed: int = None,
):
    """Generate explanatory variables based on a scenario

    Generates values for explanatory variables based on empirical data derived
    from the Urban Grammar project and a scenario definition based on a
    Urban Grammar signature type, land use balance, greenspace allocation
    and a job type balance.

    If the target ``signature_type`` differs from the one already allocated
    to OA, the data is sampled from the distribution from the whole GB. If
    they are equal, the existing values measured in place are used. That allows
    playing with other variables without changing the form.

    Parameters
    ----------
    oa_code : string
        String representing the OA code, e.g. ``"E00042707"``.

    signature_type : string
        String representing signature type. See below the possible options
        and their relationship to the level of urbanity.

            0: 'Wild countryside',
            1: 'Countryside agriculture',
            2: 'Urban buffer',
            3: 'Warehouse/Park land',
            4: 'Open sprawl',
            5: 'Disconnected suburbia',
            6: 'Accessible suburbia',
            7: 'Connected residential neighbourhoods',
            8: 'Dense residential neighbourhoods',
            9: 'Gridded residential quarters',
            10: 'Dense urban neighbourhoods',
            11: 'Local urbanity',
            12: 'Regional urbanity',
            13: 'Metropolitan urbanity',
            14: 'Concentrated urbanity',
            15: 'Hyper concentrated urbanity',

    use : float, optional
        Float in a range -1...1 reflecting the land use balance between
        fully residential (-1) and fully commercial (1). Defautls to 0,
        a value derived from signatures. For values < 0, we are allocating
        workplace population to residential population. For values > 0, we
        are allocating residential population to workplace population.
        Extremes are allowed but are not realistic, in most cases.
    greenspace : float, optional
        Float in a range 0...1 reflecting the amount of greenspace in the
        area. 0 representes no accessible greenspace, 1 represents whole
        area covered by a greenspace. This value will proportionally affect
        the amounts of jobs and population.
    job_types : float, optional
        Float in a range 0...1 reflecting the balance of job types in the
        area between entirely blue collar jobs (0) and entirely white collar
        jobs (1).
    random_seed : int, optional
        Random seed

    Returns
    -------
    Series
    """
    orig_type = oa_key.primary_type[oa_code]
    if signature_type is not None and orig_type != signature_type:
        form = pd.Series(
            [_form(signature_type, var, random_seed) for var in median_form.columns],
            index=median_form.columns,
            name=oa_code,
        ).abs()

        defaults = pd.Series(
            [
                _function(signature_type, var, random_seed)
                for var in median_function.columns
            ],
            index=median_function.columns,
            name=oa_code,
        ).abs()

        area_weighted = [
            "population",
            "A, B, D, E. Agriculture, energy and water",
            "C. Manufacturing",
            "F. Construction",
            "G, I. Distribution, hotels and restaurants",
            "H, J. Transport and communication",
            "K, L, M, N. Financial, real estate, professional and administrative activities",
            "O,P,Q. Public administration, education and health",
            "R, S, T, U. Other",
        ]
        defaults[area_weighted] = defaults[area_weighted] * oa_area[oa_code]

    else:
        form = oa.loc[oa_code][median_form.columns]
        defaults = oa.loc[oa_code][median_function.columns]

    # population
    if use != 0:
        defaults = _populations(defaults, index=use)

    # greenspace
    if greenspace:
        defaults = _greenspace(defaults, greenspace)

    if job_types:
        defaults = _job_types(defaults, job_types)
    return pd.concat([form, defaults])

Example:

Set the OA we are interested in.

oa_code = "E00042271"

Check the signature type of the OA.

oa_key.primary_type[oa_code]
'Dense urban neighbourhoods'

This is the actual value with no changes.

get_signature_values(
    oa_code,
)
sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                                                                0.449487
sscERI                                                                                0.960133
sicCAR                                                                                0.245127
stbCeA                                                                                5.194842
mtbAli                                                                                  4.3889
mtbNDi                                                                               20.755305
mtcWNe                                                                                0.028451
ltbIBD                                                                               21.166978
sdsSPW                                                                               28.213326
sdsSWD                                                                                2.899166
sdsSPO                                                                                0.447096
sdsLen                                                                              109.429581
sssLin                                                                                 0.92334
ldsMSL                                                                             1645.009527
mtdDeg                                                                                2.850976
linP3W                                                                                0.810375
linP4W                                                                                0.108821
linPDE                                                                                0.080804
lcnClo                                                                                0.000002
ldsCDL                                                                              162.855545
xcnSCl                                                                                0.018812
linWID                                                                                0.035681
stbSAl                                                                                3.947726
sdsAre                                                                             17595.05196
sisBpM                                                                                0.056843
misCel                                                                               12.861005
ltcRea                                                                                49.85095
ldeAre                                                                            59270.775826
lseCCo                                                                                0.387218
lseERI                                                                                0.814681
lteOri                                                                               13.778403
lteWNB                                                                                0.031542
lieWCe                                                                                0.000269
population                                                                                 391
A, B, D, E. Agriculture, energy and water                                             0.685492
C. Manufacturing                                                                      0.097322
F. Construction                                                                       7.038725
G, I. Distribution, hotels and restaurants                                           21.731618
H, J. Transport and communication                                                    22.106873
K, L, M, N. Financial, real estate, professional and administrative activities       10.598732
O,P,Q. Public administration, education and health                                   74.328726
R, S, T, U. Other                                                                     9.858182
Land cover [Non-irrigated arable land]                                                     0.0
Land cover [Industrial or commercial units]                                           0.509093
Land cover [Sport and leisure facilities]                                                  0.0
Land cover [Green urban areas]                                                             0.0
Land cover [Discontinuous urban fabric]                                               0.490907
Land cover [Pastures]                                                                      0.0
Land cover [Continuous urban fabric]                                                       0.0
Name: E00042271, dtype: object

Stay within the same signature type and change only use.

  1. More residential
get_signature_values(oa_code, use=-0.5)
sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                                                                0.449487
sscERI                                                                                0.960133
sicCAR                                                                                0.245127
stbCeA                                                                                5.194842
mtbAli                                                                                  4.3889
mtbNDi                                                                               20.755305
mtcWNe                                                                                0.028451
ltbIBD                                                                               21.166978
sdsSPW                                                                               28.213326
sdsSWD                                                                                2.899166
sdsSPO                                                                                0.447096
sdsLen                                                                              109.429581
sssLin                                                                                 0.92334
ldsMSL                                                                             1645.009527
mtdDeg                                                                                2.850976
linP3W                                                                                0.810375
linP4W                                                                                0.108821
linPDE                                                                                0.080804
lcnClo                                                                                0.000002
ldsCDL                                                                              162.855545
xcnSCl                                                                                0.018812
linWID                                                                                0.035681
stbSAl                                                                                3.947726
sdsAre                                                                             17595.05196
sisBpM                                                                                0.056843
misCel                                                                               12.861005
ltcRea                                                                                49.85095
ldeAre                                                                            59270.775826
lseCCo                                                                                0.387218
lseERI                                                                                0.814681
lteOri                                                                               13.778403
lteWNB                                                                                0.031542
lieWCe                                                                                0.000269
population                                                                          464.222835
A, B, D, E. Agriculture, energy and water                                             0.342746
C. Manufacturing                                                                      0.048661
F. Construction                                                                       3.519362
G, I. Distribution, hotels and restaurants                                           10.865809
H, J. Transport and communication                                                    11.053436
K, L, M, N. Financial, real estate, professional and administrative activities        5.299366
O,P,Q. Public administration, education and health                                   37.164363
R, S, T, U. Other                                                                     4.929091
Land cover [Non-irrigated arable land]                                                     0.0
Land cover [Industrial or commercial units]                                           0.509093
Land cover [Sport and leisure facilities]                                                  0.0
Land cover [Green urban areas]                                                             0.0
Land cover [Discontinuous urban fabric]                                               0.490907
Land cover [Pastures]                                                                      0.0
Land cover [Continuous urban fabric]                                                       0.0
Name: E00042271, dtype: object
  1. Less residential, more jobs
get_signature_values(oa_code, use=0.4)
sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                                                                0.449487
sscERI                                                                                0.960133
sicCAR                                                                                0.245127
stbCeA                                                                                5.194842
mtbAli                                                                                  4.3889
mtbNDi                                                                               20.755305
mtcWNe                                                                                0.028451
ltbIBD                                                                               21.166978
sdsSPW                                                                               28.213326
sdsSWD                                                                                2.899166
sdsSPO                                                                                0.447096
sdsLen                                                                              109.429581
sssLin                                                                                 0.92334
ldsMSL                                                                             1645.009527
mtdDeg                                                                                2.850976
linP3W                                                                                0.810375
linP4W                                                                                0.108821
linPDE                                                                                0.080804
lcnClo                                                                                0.000002
ldsCDL                                                                              162.855545
xcnSCl                                                                                0.018812
linWID                                                                                0.035681
stbSAl                                                                                3.947726
sdsAre                                                                             17595.05196
sisBpM                                                                                0.056843
misCel                                                                               12.861005
ltcRea                                                                                49.85095
ldeAre                                                                            59270.775826
lseCCo                                                                                0.387218
lseERI                                                                                0.814681
lteOri                                                                               13.778403
lteWNB                                                                                0.031542
lieWCe                                                                                0.000269
population                                                                               234.6
A, B, D, E. Agriculture, energy and water                                              1.41758
C. Manufacturing                                                                      0.201259
F. Construction                                                                      14.555892
G, I. Distribution, hotels and restaurants                                           44.940396
H, J. Transport and communication                                                    45.716413
K, L, M, N. Financial, real estate, professional and administrative activities        21.91789
O,P,Q. Public administration, education and health                                  153.709788
R, S, T, U. Other                                                                    20.386452
Land cover [Non-irrigated arable land]                                                     0.0
Land cover [Industrial or commercial units]                                           0.509093
Land cover [Sport and leisure facilities]                                                  0.0
Land cover [Green urban areas]                                                             0.0
Land cover [Discontinuous urban fabric]                                               0.490907
Land cover [Pastures]                                                                      0.0
Land cover [Continuous urban fabric]                                                       0.0
Name: E00042271, dtype: object
  1. More residential and more greenspace.

Check current greenspace first.

get_signature_values(
    oa_code,
)["Land cover [Green urban areas]"]
0.0

Nothing. Allocate 20% of area

get_signature_values(oa_code, use=0.4, greenspace=0.2)
sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                                                                0.449487
sscERI                                                                                0.960133
sicCAR                                                                                0.245127
stbCeA                                                                                5.194842
mtbAli                                                                                  4.3889
mtbNDi                                                                               20.755305
mtcWNe                                                                                0.028451
ltbIBD                                                                               21.166978
sdsSPW                                                                               28.213326
sdsSWD                                                                                2.899166
sdsSPO                                                                                0.447096
sdsLen                                                                              109.429581
sssLin                                                                                 0.92334
ldsMSL                                                                             1645.009527
mtdDeg                                                                                2.850976
linP3W                                                                                0.810375
linP4W                                                                                0.108821
linPDE                                                                                0.080804
lcnClo                                                                                0.000002
ldsCDL                                                                              162.855545
xcnSCl                                                                                0.018812
linWID                                                                                0.035681
stbSAl                                                                                3.947726
sdsAre                                                                             17595.05196
sisBpM                                                                                0.056843
misCel                                                                               12.861005
ltcRea                                                                                49.85095
ldeAre                                                                            59270.775826
lseCCo                                                                                0.387218
lseERI                                                                                0.814681
lteOri                                                                               13.778403
lteWNB                                                                                0.031542
lieWCe                                                                                0.000269
population                                                                              187.68
A, B, D, E. Agriculture, energy and water                                             1.134064
C. Manufacturing                                                                      0.161007
F. Construction                                                                      11.644714
G, I. Distribution, hotels and restaurants                                           35.952317
H, J. Transport and communication                                                     36.57313
K, L, M, N. Financial, real estate, professional and administrative activities       17.534312
O,P,Q. Public administration, education and health                                   122.96783
R, S, T, U. Other                                                                    16.309162
Land cover [Non-irrigated arable land]                                                     0.0
Land cover [Industrial or commercial units]                                           0.407275
Land cover [Sport and leisure facilities]                                                  0.0
Land cover [Green urban areas]                                                             0.2
Land cover [Discontinuous urban fabric]                                               0.392725
Land cover [Pastures]                                                                      0.0
Land cover [Continuous urban fabric]                                                       0.0
Name: E00042271, dtype: object

Change job type allocation towards more blue collar jobs.

get_signature_values(
    oa_code,
    use=0.4,
    greenspace=0.2,
    job_types=0.2,
)
sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                                                                0.449487
sscERI                                                                                0.960133
sicCAR                                                                                0.245127
stbCeA                                                                                5.194842
mtbAli                                                                                  4.3889
mtbNDi                                                                               20.755305
mtcWNe                                                                                0.028451
ltbIBD                                                                               21.166978
sdsSPW                                                                               28.213326
sdsSWD                                                                                2.899166
sdsSPO                                                                                0.447096
sdsLen                                                                              109.429581
sssLin                                                                                 0.92334
ldsMSL                                                                             1645.009527
mtdDeg                                                                                2.850976
linP3W                                                                                0.810375
linP4W                                                                                0.108821
linPDE                                                                                0.080804
lcnClo                                                                                0.000002
ldsCDL                                                                              162.855545
xcnSCl                                                                                0.018812
linWID                                                                                0.035681
stbSAl                                                                                3.947726
sdsAre                                                                             17595.05196
sisBpM                                                                                0.056843
misCel                                                                               12.861005
ltcRea                                                                                49.85095
ldeAre                                                                            59270.775826
lseCCo                                                                                0.387218
lseERI                                                                                0.814681
lteOri                                                                               13.778403
lteWNB                                                                                0.031542
lieWCe                                                                                0.000269
population                                                                              187.68
A, B, D, E. Agriculture, energy and water                                             3.481745
C. Manufacturing                                                                      0.494316
F. Construction                                                                      35.751011
G, I. Distribution, hotels and restaurants                                           35.952317
H, J. Transport and communication                                                   112.284974
K, L, M, N. Financial, real estate, professional and administrative activities         4.74268
O,P,Q. Public administration, education and health                                   33.260332
R, S, T, U. Other                                                                    16.309162
Land cover [Non-irrigated arable land]                                                     0.0
Land cover [Industrial or commercial units]                                           0.407275
Land cover [Sport and leisure facilities]                                                  0.0
Land cover [Green urban areas]                                                             0.2
Land cover [Discontinuous urban fabric]                                               0.392725
Land cover [Pastures]                                                                      0.0
Land cover [Continuous urban fabric]                                                       0.0
Name: E00042271, dtype: object

Change the signature type (a proxy for a level of urbanity).

get_signature_values(
    oa_code,
    signature_type="Local urbanity",
    use=0.4,
    greenspace=0.2,
    job_types=0.2,
)
sdbAre                                                                              422.259877
sdbCoA                                                                                0.000000
ssbCCo                                                                                0.408337
ssbCor                                                                                3.769432
ssbSqu                                                                                0.403360
ssbERI                                                                                0.982311
ssbCCM                                                                               18.595822
ssbCCD                                                                                0.135386
stbOri                                                                                8.222212
sdcAre                                                                             1663.577623
sscCCo                                                                                0.438507
sscERI                                                                                0.984578
sicCAR                                                                                0.229900
stbCeA                                                                                1.659903
mtbAli                                                                                2.924640
mtbNDi                                                                               15.906085
mtcWNe                                                                                0.032347
ltbIBD                                                                               21.239290
sdsSPW                                                                               26.420297
sdsSWD                                                                                3.844587
sdsSPO                                                                                0.384913
sdsLen                                                                              114.906193
sssLin                                                                                0.991167
ldsMSL                                                                             1938.858230
mtdDeg                                                                                3.000000
linP3W                                                                                0.773400
linP4W                                                                                0.092012
linPDE                                                                                0.144101
lcnClo                                                                                0.000002
ldsCDL                                                                              210.710475
xcnSCl                                                                                0.001760
linWID                                                                                0.034503
stbSAl                                                                                3.207152
sdsAre                                                                            18131.928008
sisBpM                                                                                0.048301
misCel                                                                               15.195866
ltcRea                                                                               49.395492
ldeAre                                                                            19916.510706
lseCCo                                                                                0.347203
lseERI                                                                                0.865735
lteOri                                                                               19.688769
lteWNB                                                                                0.004753
lieWCe                                                                                0.000877
population                                                                          432.770089
A, B, D, E. Agriculture, energy and water                                            10.577324
C. Manufacturing                                                                     18.857491
F. Construction                                                                      96.518541
G, I. Distribution, hotels and restaurants                                           92.453344
H, J. Transport and communication                                                   128.179567
K, L, M, N. Financial, real estate, professional and administrative activities       30.728056
O,P,Q. Public administration, education and health                                   32.805174
R, S, T, U. Other                                                                    24.494562
Land cover [Non-irrigated arable land]                                                0.000000
Land cover [Industrial or commercial units]                                           0.000000
Land cover [Sport and leisure facilities]                                             0.000000
Land cover [Green urban areas]                                                        0.200000
Land cover [Discontinuous urban fabric]                                               0.614254
Land cover [Pastures]                                                                 0.000000
Land cover [Continuous urban fabric]                                                  0.042641
Name: E00042271, dtype: float64