Skip to content

Core

Find_era5

Source code in isca_tools/era5/get_jasmin_era5/core.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
class Find_era5:
    """

    """
    def __init__(self, archive: Literal[None, 1, 't'] = None):
        """
        Initialise object to load ERA5 data from JASMIN
        Args:
            archive: There are three types of ERA5 archives:

                * `None` to use default ERA5 archive at `/badc/ecmwf-era5`

                * `1` to use ERA5.1 at `/badc/ecmwf-era51`,
                    which is suggested for model level data in years 2000-2006 inclusive.

                * `t` to use Preliminary at `/badc/ecmwf-era5t`, near real-time data
        """
        self._init_vars(archive)
        self.pl = Pressure_levels_era5(archive)
        self.gz = Geopotential_levels_era5(archive)
        self.enda = Ensemble_era5(archive)

    def _init_vars(self, archive: Literal[None, 1, 't'] = None):
        self.archive = '' if archive is None else str(archive)
        self.path = pathlib.Path(f"/badc/ecmwf-era5{self.archive}/data/")
        self._INVARIANTS = [
            "anor",
            "cl",
            "cvh",
            "cvl",
            "dl",
            "isor",
            "lsm",
            "sdfor",
            "sdor",
            "slor",
            "slt",
            "tvh",
            "tvl",
            "z",
        ]
        self._INVARIANT_DATE = datetime(2000, 1, 1)

        self._ML_VARS = ['sp', 'lnsp', 'o3', 'q', 't', 'u', 'v', 'vo', 'z']       # variables on model levels
        self._SURF_VARS = ['10u', '10v', '2d', '2t', 'asn', 'cape', 'ci',   # variables on surface level
                           'msl', 'sd', 'skt', 'sst', 'tcc', 'tcwv']
        self._ML_WARNING_YEARS = np.arange(2000, 2007).tolist()  # in these years model level data suffer from statospheric cold biases - should use ERA5.1

    def __getitem__(self, args):
        var = args[0]
        date = args[1]
        sel = {}
        if len(args) > 2 and args[2] is not None:
            sel["level"] = args[2]
        if len(args) > 3 and args[3] is not None:
            sel["longitude"] = args[3]
        if len(args) > 4 and args[4] is not None:
            sel["latitude"] = args[4]

        if len(args) > 5:
            model = args[5]
        else:
            model = "oper"

        if isinstance(date, slice):
            if date.step is None:
                freq = "1h"
            else:
                freq = date.step
            dates = (
                pd.date_range(
                    pd.to_datetime(date.start),
                    pd.to_datetime(date.stop),
                    inclusive="left",
                    freq=freq,
                )
                .to_pydatetime()
                .tolist()
            )
        else:
            dates = [pd.to_datetime(date).to_pydatetime()]

        if isinstance(var, str):
            var = [var]

        # If requested surface pressure, must get log of surface pressure first and convert later
        # Record info here
        sp_info = {'in_var': 'sp' in var}
        if sp_info['in_var']:
            var.remove('sp')
            if 'lnsp' not in var:
                # Requested just sp
                var.append('lnsp')
                sp_info['delete_lnsp'] = True
            else:
                # Requested lnsp and sp
                sp_info['delete_lnsp'] = False

        self.warn_missing_years(var, dates)

        files = sum(
            [
                self.find_files(v, dates, model=model)
                for v in var
                if v not in self._INVARIANTS
            ],
            [],
        )
        ds = None
        if len(files) > 0:
            ds = xr.open_mfdataset(files, combine="by_coords")
            ds = sel_era5(ds, sel)
        invar_files = sum(
            [
                self.find_files(v, dates, model=model)
                for v in var
                if v in self._INVARIANTS
            ],
            [],
        )
        if len(invar_files) > 0:
            invar_ds = xr.open_mfdataset(invar_files, combine="by_coords").squeeze(
                drop=True
            )
            invar_ds = sel_era5(invar_ds, sel)
            if len(files) > 0:
                for invar in invar_ds.data_vars:
                    ds[invar] = invar_ds[invar]
            else:
                ds = invar_ds
        if ds is None:
            # If no data found
            raise ValueError(f'No data found for ecmwf-era5{self.archive}, var={var} and date={date}.\n'
                             f'Model level variables = {self._ML_VARS}\n'
                             f'Surface variables = {self._SURF_VARS}\n'
                             f'Invariant variables = {self._INVARIANTS}')

        if sp_info['in_var']:
            ds = convert_lnsp_to_sp(ds, delete_lnsp=sp_info['delete_lnsp'])

        return ds

    def find_files(
        self, var: str, dates: list[datetime], model: str = "oper"
    ) -> list[str]:
        if var in self._INVARIANTS:
            return self.find_invariant(var)
        else:
            return sum(
                [self.find_single_file(var, date, model=model) for date in dates], []
            )

    def find_invariant(self, var: str) -> list[str]:
        if self.archive != '':
            warnings.warn(f'Using base archive (ecmwf-era5), for invariant var={var} '
                          f'despite requested archive of ecmwf-era5{self.archive}.')
        date = self._INVARIANT_DATE
        files = sorted(
            list(
                self.path.glob(
                    f"invariants/ecmwf-era5_oper_an_sfc_{date.year:04d}{date.month:02d}{date.day:02d}0000.{var}.inv.nc"
                )
            )
        )

        return files

    def find_single_file(
        self, var: str, date: datetime, model: str = "oper"
    ) -> list[str]:
        if model == "enda":
            level_type = "em_sfc"
        else:
            level_type = "*"
        files = sorted(
            list(
                self.path.glob(
                    f"{model}/{level_type}/{date.year:04d}/{date.month:02d}/{date.day:02d}/"
                    f"ecmwf-era5{self.archive}_{model}_*_{date.year:04d}{date.month:02d}{date.day:02d}{date.hour:02d}*.{var}.nc"
                )
            )
        )
        return files

    def warn_missing_years(self, var: list[str], dates: list[datetime], model: str = "oper") -> None:
        """
        Warn about some years that might be missing model level data for ERA5 - print README in relevant directory
        Args:
            var: List of variables requested
            dates: List of dates requested
            model: Model requested

        Returns:

        """
        var_to_warn = [v for v in var if v in self._ML_VARS]
        if self.archive=='' and (model == "oper") and (len(var_to_warn) > 0):
            years_to_warn = list({d.year for d in dates if d.year in self._ML_WARNING_YEARS})
            if len(years_to_warn) > 0:
                dir_use = list(self.path.glob(f'oper/an_ml/{years_to_warn[0]}'))[0]
                with open(f"{dir_use}/00README", "r") as f:
                    contents = f.read()
                warnings.warn(f"README for year {years_to_warn[0]} (Can use ERA5.1 by setting `archive=1`):\n{contents}")

__init__(archive=None)

Initialise object to load ERA5 data from JASMIN Args: archive: There are three types of ERA5 archives:

    * `None` to use default ERA5 archive at `/badc/ecmwf-era5`

    * `1` to use ERA5.1 at `/badc/ecmwf-era51`,
        which is suggested for model level data in years 2000-2006 inclusive.

    * `t` to use Preliminary at `/badc/ecmwf-era5t`, near real-time data
Source code in isca_tools/era5/get_jasmin_era5/core.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, archive: Literal[None, 1, 't'] = None):
    """
    Initialise object to load ERA5 data from JASMIN
    Args:
        archive: There are three types of ERA5 archives:

            * `None` to use default ERA5 archive at `/badc/ecmwf-era5`

            * `1` to use ERA5.1 at `/badc/ecmwf-era51`,
                which is suggested for model level data in years 2000-2006 inclusive.

            * `t` to use Preliminary at `/badc/ecmwf-era5t`, near real-time data
    """
    self._init_vars(archive)
    self.pl = Pressure_levels_era5(archive)
    self.gz = Geopotential_levels_era5(archive)
    self.enda = Ensemble_era5(archive)

warn_missing_years(var, dates, model='oper')

Warn about some years that might be missing model level data for ERA5 - print README in relevant directory Args: var: List of variables requested dates: List of dates requested model: Model requested

Returns:

Source code in isca_tools/era5/get_jasmin_era5/core.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def warn_missing_years(self, var: list[str], dates: list[datetime], model: str = "oper") -> None:
    """
    Warn about some years that might be missing model level data for ERA5 - print README in relevant directory
    Args:
        var: List of variables requested
        dates: List of dates requested
        model: Model requested

    Returns:

    """
    var_to_warn = [v for v in var if v in self._ML_VARS]
    if self.archive=='' and (model == "oper") and (len(var_to_warn) > 0):
        years_to_warn = list({d.year for d in dates if d.year in self._ML_WARNING_YEARS})
        if len(years_to_warn) > 0:
            dir_use = list(self.path.glob(f'oper/an_ml/{years_to_warn[0]}'))[0]
            with open(f"{dir_use}/00README", "r") as f:
                contents = f.read()
            warnings.warn(f"README for year {years_to_warn[0]} (Can use ERA5.1 by setting `archive=1`):\n{contents}")