09 Dask demo

09 Dask demo#

UW Geospatial Data Analysis
CEE467/CEWA567
David Shean, Eric Gagliano, Quinn Brencher

from pathlib import Path
import xarray as xr
import os
import time
from dask.distributed import Client
client = Client()  
client

Client

Client-ee37f65f-fd5b-11ef-aaa0-00155de26d4e

Connection method: Cluster object Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status

Cluster Info

era5_data_dir = f'{Path.home()}/gda_demo_data/era5_data'
anom_fn = os.path.join(era5_data_dir, '1month_anomaly_Global_ea_2t.nc') 
def coarsen_and_polyfit(ds, coarsen_factor=2):
    ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby('longitude')
    ds_coarsened = ds.coarsen(latitude=coarsen_factor, longitude=coarsen_factor, boundary='trim').mean()
    ds_coarsened_polyfit = ds_coarsened.polyfit(dim='time',deg=1)
    return ds_coarsened_polyfit
anom_nochunks_ds = xr.open_dataset(anom_fn, chunks=None)
anom_nochunks_ds
<xarray.Dataset> Size: 2GB
Dimensions:    (time: 517, latitude: 721, longitude: 1440)
Coordinates:
  * time       (time) datetime64[ns] 4kB 1979-01-01 1979-02-01 ... 2022-01-01
  * latitude   (latitude) float64 6kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
  * longitude  (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8
Data variables:
    t2m        (time, latitude, longitude) float32 2GB ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2022-02-28T07:59 GRIB to CDM+CF via cfgrib-0.9.1...
start = time.time()
coarsen_and_polyfit(anom_nochunks_ds, coarsen_factor=6)
end = time.time()
print(f"Time taken without chunks: {end-start:.2f} seconds")
Time taken without chunks: 11.90 seconds
anom_nochunks_ds = None
anom_autochunks_ds = xr.open_dataset(anom_fn, chunks="auto")
anom_autochunks_ds
<xarray.Dataset> Size: 2GB
Dimensions:    (time: 517, latitude: 721, longitude: 1440)
Coordinates:
  * time       (time) datetime64[ns] 4kB 1979-01-01 1979-02-01 ... 2022-01-01
  * latitude   (latitude) float64 6kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
  * longitude  (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8
Data variables:
    t2m        (time, latitude, longitude) float32 2GB dask.array<chunksize=(205, 286, 571), meta=np.ndarray>
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2022-02-28T07:59 GRIB to CDM+CF via cfgrib-0.9.1...
start = time.time()
coarsen_and_polyfit(anom_autochunks_ds, coarsen_factor=6).compute()
end = time.time()
print(f"Time taken with auto chunks: {end-start:.2f} seconds")
Time taken with auto chunks: 6.94 seconds