11  Subcriteria A2b

Using R with terra package

11.1 Updating the assessment workflow

The original workflow included a combination of two remote sensing products (Friedl et al. 2010; Hansen et al. 2013). The updated workflow is based on an updated version of one data source that spans more years and uses improved algorithms. We have two version of the workflow, one based on R and local copies of the remote sensing products (this chapter), and one based on Python and Earth Engine for cloud computing (see Section 12.1).

Differences between original and updated assessment workflows for subcriterion A2b.

Step 2019 2025 (R) 2025 (Python)
GIS data prep. GRASS GIS / gdal / R R / gdal / terra Earth Engine
Data source GFC v1.2 (2015) and MODIS LC v5.1 GFC v1.12 (2024) GFC v1.12 (2024)
Analysis R / glm R / glm / redlistr Python / Earth Engine
Timeframe 2000-2050 2000-2050 2000-2050

11.2 Set up

Load packages

We will run this analysis using the following packages in R.

suppressPackageStartupMessages({
    library(terra)
    library(sf)
    library(dplyr)
    library(tidyr)
    library(units)
    library(exactextractr)
    library(redlistr)
    })

Load data

We start by configuring the relative path to our data folder:

here::i_am("R/subcriterion-A2b.ipynb")
data_dir <- here::here("downloaded-data")
here() starts at /Users/z3529065/proyectos/Forests-Americas/RLE-example-dry-forest-guajira

We already downloaded the vector data for the distribution of the target macrogroup from OSF in Chapter 4, now we read the spatial data in vector format using the function vect from package terra:

M563 <- read_sf(here::here(data_dir,
  "1.A.1.Ei.563---Guajiran-Seasonal-Dry-Forest-1km.gpkg"))

We already downloaded and prepared the GFC data for estimates of tree cover in Chapter 7, now we read the spatial data in raster format using the function rast from package terra.

tif_files <- dir(
  here::here(data_dir,"GFC-2024-v1.12"), 
  recursive=TRUE, 
  pattern="*tif$",
  full.names=TRUE)

lossyear <- rast( grep("lossyear", tif_files, value=TRUE)) 
gain <- rast(grep("gain", tif_files, value=TRUE)) 
cover <- rast(grep("treecover2000", tif_files, value=TRUE)) 
cover24 <- rast(grep("treecover2024", tif_files, value=TRUE)) 

A quick check that this is indeed correct:

plot(cover)

Now make a couple of extra files for the calculations below:

loss <- (lossyear > 0) + 0
initial <- (cover > 0) + 0
                                          

Now we bundle these raster files together:

gfc <- c(cover, cover24, initial, gain, loss)
names(gfc) <- c("cover00", "cover24", "initial", "gain", "loss")

The data is in unprojected units, but we can calculate the resolution for each cell:

raster_resolution <- cellSize(loss)
                                          
raster_resolution
class       : SpatRaster 
size        : 41880, 67800, 1  (nrow, ncol, nlyr)
resolution  : 0.00025, 0.00025  (x, y)
extent      : -77.37, -60.42, 1.99, 12.46  (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326) 
source      : spat_102fafb106af_66298_P2mm2aa9P9QX7pk.tif 
varname     : GFC-2024-v1.12.M563.lossyear 
name        :     area 
min value   : 751.6660 
max value   : 768.8654 

11.3 Summarise changes

Processing these large raster files in R can be slow, but it is possible to speed up the calculations by using a grid and functions from library exactextractr:

rawgrid <- st_make_grid(M563, cellsize = .091)
grid <- st_sf(id = 1:length(rawgrid), geoms = rawgrid, 
        stringsAsFactors = FALSE)

We try with a cell size that is roughly 10 by 10 km in area:

range(set_units(st_area(grid),'km2'))
Units: [km^2]
[1]  99.99352 102.32401

Ok let’s run this while we get a cup of coffee or glass of water…

qry <- exact_extract(gfc, grid, fun="sum", append_cols = c("id"))
Cannot preload entire working area of 14178211740 cells with max_cells_in_memory = 3e+07. Raster values will be read for each feature individually.
  |======================================================================| 100%
res_gfc <- exact_extract(raster_resolution, grid, fun="mean", append_cols = c("id"),force_df=TRUE)
Cannot preload entire working area of 2835642348 cells with max_cells_in_memory = 3e+07. Raster values will be read for each feature individually.
  |======================================================================| 100%

Ok, we are back and we have some results, now we will try to summarise these per cell.

res_gfc <- res_gfc |> mutate(res=set_units(mean,'m2') |> set_units('km2'))
extent_estimates <- 
    tibble(qry) |> left_join(res_gfc,by='id') |>
        filter(sum.initial>0 | sum.gain>0) |>
        mutate(cells_2000=sum.initial,
               cells_2024=sum.initial + sum.gain - sum.loss,
               
               mean_2000 = (sum.cover00 / 100) / sum.initial,
               eco_extent_2000 = cells_2000 * res * mean_2000,
               eco_extent_2024 = cells_2024 * res * mean_2000,
               alt_extent_2000 = (sum.cover00/100) * res,
               alt_extent_2024 = (sum.cover24/100) * res
              )
head(extent_estimates)
A tibble: 6 × 15
id sum.cover00 sum.cover24 sum.initial sum.gain sum.loss mean res cells_2000 cells_2024 mean_2000 eco_extent_2000 eco_extent_2024 alt_extent_2000 alt_extent_2024
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <[km^2]> <dbl> <dbl> <dbl> <[km^2]> <[km^2]> <[km^2]> <[km^2]>
16 394338.00 368003.00 6402.000 9 367.00000 768.8402 0.0007688402 [km^2] 6402.000 6044.000 0.6159606 3.0318291 [km^2] 2.8622891 [km^2] 3.0318291 [km^2] 2.8293550 [km^2]
17 210969.00 187326.00 4873.000 1 404.00000 768.8402 0.0007688402 [km^2] 4873.000 4470.000 0.4329345 1.6220145 [km^2] 1.4878729 [km^2] 1.6220145 [km^2] 1.4402376 [km^2]
18 786993.69 704305.69 13946.496 59 1257.00000 768.8402 0.0007688402 [km^2] 13946.496 12748.496 0.5642949 6.0507239 [km^2] 5.5309685 [km^2] 6.0507239 [km^2] 5.4149853 [km^2]
19 69479.45 65038.04 1124.920 1 54.67097 768.8402 0.0007688402 [km^2] 1124.920 1071.249 0.6176392 0.5341859 [km^2] 0.5086994 [km^2] 0.5341859 [km^2] 0.5000386 [km^2]
205 336483.91 290286.06 5995.497 3 680.13544 768.7968 0.0007687968 [km^2] 5995.497 5318.361 0.5612278 2.5868776 [km^2] 2.2947139 [km^2] 2.5868776 [km^2] 2.2317100 [km^2]
206 873585.75 764134.00 16832.078 63 1703.53223 768.7968 0.0007687968 [km^2] 16832.078 15191.546 0.5190005 6.7160994 [km^2] 6.0615173 [km^2] 6.7160994 [km^2] 5.8746378 [km^2]
extent_estimates |> 
    summarise(
        raw2000=sum(alt_extent_2000),
        raw2024=sum(alt_extent_2024),
        eco2000=sum(eco_extent_2000, na.rm = TRUE), 
        eco2024=sum(eco_extent_2024, na.rm = TRUE))
A tibble: 1 × 4
raw2000 raw2024 eco2000 eco2024
<[km^2]> <[km^2]> <[km^2]> <[km^2]>
97390.84 [km^2] 85126.09 [km^2] 97390.84 [km^2] 84968.26 [km^2]

11.4 Calculate declines

calc_decline <- function(data,start,end) {
  stopifnot("eco_extent" %in% colnames(data),
            "years" %in% colnames(data),
            start<=nrow(data),
            end<=nrow(data))
  start_date <- pull(data[start,"years"])
  end_date <- pull(data[end,"years"])
  stopifnot(start_date<end_date)
  start_extent <- pull(data[start,"eco_extent"])
  end_extent <- pull(data[end,"eco_extent"])
  res <- dplyr::tibble(
    start_date,
    end_date,
    time_frame = end_date-start_date,
    decline = (units::set_units(1,'1') - 
                 (end_extent/start_extent)) %>% 
      units::set_units('%')
      )
  if ("error_extent" %in% colnames(data)) {
    start_error <- pull(data[start,"error_extent"])
    end_error <- pull(data[end,"error_extent"])
    res$error <- sqrt(((end_extent^2 * start_error^2) + (start_extent^2 * end_error^2)) / (start_extent^4)) %>% units::set_units('%')
  }
  return(res)
}
rslts <- extent_estimates |> 
    dplyr::select(id,eco_extent_2000,eco_extent_2024) |>
    pivot_longer(c(eco_extent_2000,eco_extent_2024), 
                 names_prefix = "eco_extent_",
                 names_transform = as.numeric,
                 names_to = "years", values_to="eco_extent") |>
    group_by(years) |>
    summarise(eco_extent=sum(eco_extent, na.rm = TRUE))
rslts
A tibble: 2 × 2
years eco_extent
<dbl> <[km^2]>
2000 97390.84 [km^2]
2024 84968.26 [km^2]
85126.09 / 97390.84 
0.874066698675153
DS <- getDeclineStats(drop_units(rslts$eco_extent[1]), drop_units(rslts$eco_extent[2]), 
                           year.t1 = rslts$years[1], year.t2 = rslts$years[2],
                                 methods = c('PRD', 'ARD', 'ARC'))
DS
A data.frame: 1 × 4
absolute.loss ARD PRD ARC
<dbl> <dbl> <dbl> <dbl>
12422.58 517.6075 0.5669467 -0.56856
y_future <- extrapolateEstimate(drop_units(rslts$eco_extent[1]),  
                    year.t1 = rslts$years[1], 
                    nYears = 50,
                                  PRD = DS$PRD,
                                  ARD = DS$ARD,
                                  ARC = DS$ARC) |>
    tibble()
y_future
A tibble: 1 × 4
forecast.year A.ARD.t3 A.PRD.t3 A.ARC.t3
<dbl> <dbl> <dbl> <dbl>
2050 71510.46 73292.05 73292.05
rslts <- bind_rows(
    rslts,
    {transmute(y_future,
        eco_extent=set_units(A.ARD.t3,'km2'), 
        years=forecast.year) },
    {transmute(y_future,
        eco_extent=set_units(A.PRD.t3,'km2'), 
        years=forecast.year) },
    {transmute(y_future,
        eco_extent=set_units(A.ARC.t3,'km2'), 
        years=forecast.year) })
calc_decline(rslts,1,2:5)
A tibble: 4 × 4
start_date end_date time_frame decline
<dbl> <dbl> <dbl> <[%]>
2000 2024 24 12.75539 [%]
2000 2050 50 26.57373 [%]
2000 2050 50 24.74441 [%]
2000 2050 50 24.74441 [%]
st_area(M563) |> set_units('km2')
253656.4 [km^2]
(st_area(M563) |> set_units('km2') / (105206342 + 54502069 + 233854))
0.001585924 [km^2]
97390.84 /1.5
64927.2266666667