This notebook serves as supplementary material to the manuscript on low-level jets submitted to Wind Energy Science. The supplementary material consists of 6 notebooks:
In this notebook (3/6), we show how both ERA5 and the observations are biased.
# Xarray is our netCDF working horse
import xarray as xr
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
plt.xkcd()
# Import code to read ECN and ERA5 data
import sys
sys.path.append('../../code/datasets')
import ecn
import era5
obs = ecn.load_platform_data()
obs_1hfreq = {}
obs_1hmean = {}
for name, ds in obs.items():
print(name)
# obs_1hfreq[name] = ds.resample(time='1h').asfreq() # ! This changes the order of the dimensions (see below)
obs_1hfreq[name] = xr.merge([ds.wspd.resample(time='1h').asfreq(), # this is also quite a bit faster
ds.wdir.resample(time='1h').asfreq()])
print('rolling')
obs_1hmean[name] = ds.wspd.rolling(time=5, center=True).mean().resample(time='1h').asfreq()
era_intp = {}
for name, ds in obs.items():
print(name)
# In case the dataset is prepared with NCO, we need to squeeze the lat and lon dimensions
ds_intp = xr.open_dataset('../../data/interim/ERA5platforms/'+name+'_interpolated.nc').transpose()
ds_intp['wspd'] = (ds_intp.u**2+ds_intp.v**2)**.5
ds_intp['wdir'] = (np.arctan2(-ds_intp.u, -ds_intp.v)*180/np.pi%360)
era_intp[name] = ds_intp.drop(['u', 'v'])
era_intp['MMIJ']
# names = [name for name in lljs_full.keys()]
# Use a predefined order to ensure consistency across paper figure colors with smaller datasets on top.
names = ['MMIJ', 'LEG', 'BWF1', 'BWF2', 'EPL', 'HKZA', 'HKZB', 'HKNA', 'HKNB', 'K13']
era_all = xr.concat([era_intp[name] for name in names], dim='platform').assign(platform=names)
obs_all_1hfreq = xr.concat([obs_1hfreq[name] for name in names], dim='platform').assign(platform=names)
obs_all_1hmean = xr.concat([obs_1hmean[name].rename('wspd').to_dataset() for name in names], dim='platform').assign(platform=names)
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
wspd_an = era_all.wspd
wspd_obs_1hmean = obs_all_1hmean.wspd
wspd_obs_1hfreq = obs_all_1hfreq.wspd
a, c1 = xr.align(wspd_obs_1hfreq, wspd_an)
b, c2 = xr.align(wspd_obs_1hmean, wspd_an)
axs[0].scatter(a.values.ravel(),c1.values.ravel(), s=0.5, label=name)
axs[1].scatter(b.values.ravel(),c2.values.ravel(), s=0.5, label=name)
for ax in axs:
ax.plot([0, 30], [0, 30], 'k-')
ax.set_xlabel('Observed wind speed (m/s)')
ax.set_ylabel('ERA5 wind speed (m/s)')
axs[0].set_title('Hourly instantaneous observations')
axs[1].set_title('Hourly averaged observations')
fig.subplots_adjust(wspace=0.5)
plt.show()
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
axs[0].hexbin(a.values.ravel(),c1.values.ravel(), cmap='binary')
axs[1].hexbin(b.values.ravel(),c2.values.ravel(), cmap='binary')
for ax in axs:
ax.plot([0, 30], [0, 30], 'k-')
ax.set_xlabel('Observed wind speed (m/s)')
ax.set_ylabel('ERA5 wind speed (m/s)')
ax.set_xlim(0, 30)
ax.set_ylim(0, 30)
axs[0].set_title('Hourly instantaneous observations')
axs[1].set_title('Hourly averaged observations')
fig.subplots_adjust(wspace=0.5)
plt.show()
plt.xkcd()
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
for name in names:
wspd_an = era_all.sel(platform=name).wspd # Note - I still have both the dataset and the dictionary to get this data
wspd_obs_1hmean = obs_1hmean[name]
wspd_obs_1hfreq = obs_1hfreq[name].wspd
# Subplot 1: hourly instantaneous
bias = (wspd_an-wspd_obs_1hfreq).mean()
stde = (wspd_an-wspd_obs_1hfreq).std()
axs[0].scatter(bias, stde, s=500, zorder=2)
axs[0].text(bias, stde, name, ha='center', va='top', fontsize=16)
# Subplot 2: hourly averages
bias = (wspd_an-wspd_obs_1hmean).mean()
stde = (wspd_an-wspd_obs_1hmean).std()
axs[1].scatter(bias, stde, s=500, zorder=3)
axs[1].text(bias, stde, name, ha='center', va='top', fontsize=16)
a, b = np.meshgrid(np.linspace(-0.55, 0, 5), np.linspace(1.15, 1.5, 5))
rmse = (a**2+b**2)**.5
levels = [1.2, 1.4]
for ax in axs:
cs = ax.contour(a, b, rmse, [1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55], colors='lightgrey', zorder=1)
ax.clabel(cs, levels, fmt='RMSE=%.1f', colors='grey')
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.set_xlabel('Wind speed bias (m/s)')
ax.set_ylabel('Wind speed stde (m/s)')
ax.set_xlim(-0.55, -0.05)
ax.set_ylim(1.17, 1.52)
axs[0].set_title('Hourly instantaneous observations')
axs[1].set_title('Hourly averaged observations')
fig.subplots_adjust(wspace=0.5)
fig.savefig('../../reports/figures/error_diagrams_wspd', bbox_inches='tight', dpi=200)
plt.show()
The differences between the two representations of the observations are small. The STDE is a bit smaller for the hourly averaged observations. Therefore - and because it makes sense from a theoretical point of view - we will continue to work with this version of the observations.
bias = {}
rmse = {}
stde = {}
for name, ds_obs in obs.items():
wspd_an = era_all.wspd.sel(platform=name)
wspd_obs_1hmean = obs_all_1hmean.wspd.sel(platform=name)
# wspd_obs_1hfreq = obs_all_1hfreq.wspd.sel(platform=name)
bias[name] = (wspd_an-wspd_obs_1hmean).groupby('time.hour').mean()
rmse[name] = (wspd_an-wspd_obs_1hmean).groupby('time.hour').std()
stde[name] = (rmse[name]**2+bias[name]**2)**.5
fig, ax = plt.subplots(figsize=(12,6))
fs = 16
for name, b in bias.items():
ax.scatter(b, stde[name], s=100, label=name)
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.set_xlabel('BIAS', fontsize=fs)
ax.set_ylabel('STDE', fontsize=fs)
ax.set_title('error diagram grouped by hour of day')
ax.legend()
plt.show()
bias = {}
rmse = {}
stde = {}
for name, ds_obs in obs.items():
wspd_an = era_all.wspd.sel(platform=name)
wspd_obs_1hmean = obs_all_1hmean.wspd.sel(platform=name)
# wspd_obs_1hfreq = obs_all_1hfreq.wspd.sel(platform=name)
bias[name] = (wspd_an-wspd_obs_1hmean).groupby('height').mean()
rmse[name] = (wspd_an-wspd_obs_1hmean).groupby('height').std()
stde[name] = (rmse[name]**2+bias[name]**2)**.5
fig, ax = plt.subplots(figsize=(12,6))
fs = 16
for name, b in bias.items():
ax.scatter(b, stde[name], s=100, label=name)
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.set_xlabel('BIAS', fontsize=fs)
ax.set_ylabel('STDE', fontsize=fs)
ax.set_title('Error diagram grouped by level')
ax.legend()
plt.show()
bias = (era_all.wspd - obs_all_1hmean.wspd).groupby('platform').mean()
stde = (era_all.wspd - obs_all_1hmean.wspd).groupby('platform').std()
fig, axs = plt.subplots(3, 4, figsize=(12, 12), sharex=True, sharey=True)
axs = axs.ravel()
for i, (name, ds_obs) in enumerate(obs.items()):
wspd_an = era_all.wspd.sel(platform=name).dropna(dim='height', how='all')
wspd_obs_1hmean = obs_all_1hmean.wspd.sel(platform=name).dropna(dim='height', how='all')
# Make sure to exclude masked values!
wspd_an, wspd_obs_1hmean = xr.align(wspd_an, wspd_obs_1hmean)
wspd_an = wspd_an.where(np.isfinite(wspd_obs_1hmean))
mean_an = wspd_an.mean(dim='time')
mean_obs = wspd_obs_1hmean.mean(dim='time')
std_an = wspd_an.std(dim='time')
std_obs = wspd_obs_1hmean.std(dim='time')
z = mean_an.height.values
l, = axs[i].plot(mean_an, z)
c = l.get_color()
axs[i].fill_betweenx(z, mean_an+std_an, mean_an-std_an, color=c, alpha=0.5)
l, = axs[i].plot(mean_obs, z, 'o--')
c = l.get_color()
axs[i].fill_betweenx(z, mean_obs+std_obs, mean_obs-std_obs, color=c, alpha=0.5)
axs[i].set_title(name)
axs[i].text(17, 30, 'Bias: %.1f\nSTDE: %.1f'%(bias.sel(platform=name),stde.sel(platform=name)), ha='right')
axs[-4].set_xlabel('Wind speed (m/s)')
axs[8].set_ylabel('Altitude (m)')
plt.show()
fig, axs = plt.subplots(3, 4, figsize=(12, 12), sharex=True, sharey=True)
axs = axs.ravel()
for i, (name, ds_obs) in enumerate(obs.items()):
wspd_an = era_all.wspd.sel(platform=name).dropna(dim='height', how='all')
wspd_obs_1hmean = obs_all_1hmean.wspd.sel(platform=name).dropna(dim='height', how='all')
# Make sure to exclude masked values!
wspd_an, wspd_obs_1hmean = xr.align(wspd_an, wspd_obs_1hmean)
wspd_an = wspd_an.where(np.isfinite(wspd_obs_1hmean))
# Compute hourly average
mean_an = wspd_an.groupby('time.hour').mean()
mean_obs = wspd_obs_1hmean.groupby('time.hour').mean()
l, = axs[i].plot(range(24), mean_an)
l, = axs[i].plot(range(24), mean_obs, 'o--')
axs[i].set_title(name)
axs[8].set_ylabel('Wind speed (m/s)')
axs[8].set_xlabel('Hour (UTC)')
plt.show()
fig, axs = plt.subplots(3, 4, figsize=(12, 12), sharex=True, sharey=True)
axs = axs.ravel()
for i, (name, ds_obs) in enumerate(obs.items()):
wspd_an = era_all.wspd.sel(platform=name).dropna(dim='height', how='all')
wdir_an = era_all.wdir.sel(platform=name).dropna(dim='height', how='all')
wspd_obs_1hmean = obs_all_1hmean.wspd.sel(platform=name).dropna(dim='height', how='all')
# Align all datasets
wdir_an, wspd_an, wspd_obs = xr.align(wdir_an, wspd_an, wspd_obs_1hmean)
# Make sure to exclude masked values!
wspd_an = wspd_an.where(np.isfinite(wspd_obs))
wdir_an = wdir_an.where(np.isfinite(wspd_obs))
# Compute hourly average
bins = np.linspace(0, 360, 17)
mean_an = wspd_an.groupby_bins(wdir_an.rename('wdir_bin'), bins).mean()
mean_obs = wspd_obs.groupby_bins(wdir_an.rename('wdir_bin'), bins).mean()
l, = axs[i].plot(range(16), mean_an)
l, = axs[i].plot(range(16), mean_obs, 'o--')
axs[i].set_title(name)
axs[8].set_ylabel('Wind speed (m/s)')
axs[8].set_xlabel('Wind direction (degrees)')
axs[8].set_xticks(np.arange(0, 17, 4))
axs[8].set_xticklabels(np.arange(0, 361, 90))
plt.show()
fig, ax = plt.subplots()
for name in names:
# Using instantaneous wind directions, would it be cleaner to average them????
wdir_an = era_all.wdir.sel(platform=name).dropna(dim='height', how='all')
wdir_obs = obs_all_1hfreq.wdir.sel(platform=name).dropna(dim='height', how='all')
a, b = xr.align(wdir_obs, wdir_an)
ax.scatter(a.values.ravel(),b.values.ravel(), s=0.5, label=name)
ax.plot([0, 360], [0, 360], 'k-')
ax.set_xlabel('observed wind direction')
ax.set_ylabel('analysis wind direction')
ax.legend(bbox_to_anchor=(1.05, 1))
plt.show()
There's quite some scatter in the wind direction, but the majority of the data is close to the 1:1 line. Since we don't really evaluate anything related to wind direction, we won't dig deeper into this at this point.
# Reload all data
# Load measurement data
obs_10mins = ecn.load_platform_data()
obs_1hmean = {}
for name, ds in obs_10mins.items():
obs_1hmean[name] = ds.wspd.rolling(time=5, center=True).mean().resample(time='1h').asfreq()
# Load ERA5 data
era_full = {}
era_z_nd = {}
era_coll = {}
era_intp = {}
era_fair = {}
for name, ds in obs_10mins.items(): # this version has lat/lon info
lon = ds.longitude.values
lat = ds.latitude.values
# In case the dataset is prepared with NCO, we need to squeeze the lat and lon dimensions
era_full[name]= xr.open_dataset('../../data/interim/ERA5platforms/'+name+'.nc').squeeze().sel(level=slice(None, 124, -1))
era_z_nd[name]= xr.open_dataset('../../data/interim/ERA5platforms/'+name+'_z.nc').squeeze()
era_intp[name]= xr.open_dataset('../../data/interim/ERA5platforms/'+name+'_interpolated.nc')
era_coll[name] = era_full[name].where(np.isfinite(ds.wspd.mean(dim='height')))
era_fair[name] = era_intp[name].where(np.isfinite(ds.wspd.mean(dim='height'))) #.mean(dim='height') is not necessary here
# for the 'fair' dataset it is not necessary to take the mean of ds.wspd, because the heights are aligned.
# However, this does lead to different results
plt.rcdefaults()
plt.xkcd()
names = ['MMIJ', 'LEG', 'BWF1', 'BWF2', 'EPL', 'HKZA', 'HKZB', 'HKNA', 'HKNB', 'K13']
# fig, axs = plt.subplots(1, 3, figsize=(12,4), dpi=150)
fig = plt.figure(figsize=(14,4), dpi=150)
ax0 = fig.add_axes([0.05, 0.05, 0.25, 0.9])
ax1 = fig.add_axes([0.37, 0.05, 0.25, 0.9]) # This subplot is shifted slightly to the right
ax2 = fig.add_axes([0.65, 0.05, 0.25, 0.9])
axs=[ax0, ax1, ax2]
#####################################################
for name in names:
wspd_obs = obs_1hmean[name]
wspd_full = (era_full[name].u**2+era_full[name].v**2)**.5
wspd_coll = (era_coll[name].u**2+era_coll[name].v**2)**.5
wspd_fair = (era_fair[name].u**2+era_fair[name].v**2)**.5
z_full = era_z_nd[name].z_nd.mean(dim='time')
z_coll = z_full.where(np.isfinite(wspd_obs.mean(dim='time')))
# Plot the profiles
l, = axs[0].plot(wspd_full.mean(dim='time'), z_full, '-', label=name+'-full')
col = l.get_color()
axs[0].plot(wspd_coll.mean(dim='time'), z_coll, '--', c=col, label=name+'-subs')
axs[0].plot(wspd_fair.mean(dim='time'), wspd_fair.height, 'o', c=col, label='_nolegend_')
# Plot diurnal cycle bias
bias = (wspd_fair-wspd_obs).mean(dim='height').groupby('time.hour').mean(dim='time').values
rmse = (wspd_fair-wspd_obs).mean(dim='height').groupby('time.hour').std(dim='time').values
axs[1].plot(range(24), bias, color=col, label=name+'-bias')
axs[1].plot(range(24), (rmse**2-bias**2)**.5, '--', color=col, label=name+'-stde')
axs[1].yaxis.grid(which="major", color='lightgrey', linestyle='--', linewidth=1)
axs[1].axhline(0, color='k', linestyle='--', linewidth=2)
# Plot the error diagrams
bias = (wspd_fair-wspd_obs).mean()
stde = (wspd_fair-wspd_obs).std()
axs[2].scatter(bias, stde, s=500, zorder=3, c=col)
axs[2].text(bias, stde, name, ha='center', va='top', fontsize=16)
#####################################################
# Add contours of equal RMSE
a, b = np.meshgrid(np.linspace(-0.55, 0, 5), np.linspace(1.15, 1.5, 5))
rmse = (a**2+b**2)**.5
levels = [1.2, 1.4]
cs = axs[2].contour(a, b, rmse, [1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55], colors='lightgrey', zorder=1)
axs[2].clabel(cs, levels, fmt='RMSE=%.1f', colors='grey')
axs[2].yaxis.tick_right()
axs[2].yaxis.set_label_position("right")
axs[2].spines['left'].set_visible(False)
axs[2].spines['top'].set_visible(False)
axs[2].set_xlim(-0.55, -0.05)
axs[2].set_ylim(1.17, 1.52)
#####################################################
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)
# axs[2].legend(bbox_to_anchor=(1.05, 1), ncol=2)
axs[2].set_xlabel('Hour of day')
axs[2].set_ylabel('Wind speed (m/s)')
axs[0].set_xlabel('mean wind speed (m/s)')
axs[0].set_ylabel('altitude (m)')
axs[1].set_xlabel('hour of day (UTC)')
axs[1].set_ylabel('wind speed bias/stde (m/s)')
axs[2].set_xlabel('Wind speed bias (m/s)')
axs[2].set_ylabel('Wind speed stde (m/s)')
axs[0].set_title('A. ERA5 profiles')
axs[1].set_title('B. Diurnal cycle')
axs[2].set_title('C. Error diagram')
fig.savefig('../../reports/figures/validation', bbox_inches='tight')
plt.show()
In (time) aligning the ERA5 data with the observations, there are two possibilities. For the data up to 500 m, we excluded all timestamps for which the observations were all equal to NaN. These are the time stamps for which the LiDARs were not operating at all. However, sometimes the LiDARs did operate, but only some elevations reported NaNs (due to postprocessing or so). Thus, for the altitude-aligned version of the ERA5 data, we can choose to include or exclude those timestamps. This makes a difference in the mean wind profiles. It doesn't really affect the statistical performance though. For the figure above, if we include those semi-complete timestamps, the dots in the left panel do not coincide with the mean profiles. If we exclude them, they do. To avoid confusion, we decided to exclude them.