ehratm APIs
real.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 # *******************************************************
4 # * International Data Centre *
5 # * Comprehensive Nuclear Test Ban Treaty Organization *
6 # * Vienna *
7 # * Austria *
8 # *
9 # * Don Morton (DM) *
10 # * Boreal Scientific Computing *
11 # * Fairbanks, Alaska USA *
12 # *******************************************************
13 
14 
15 
16 #-----------------------------------------------------------------------------
17 
18 #import argparse
19 import datetime
20 import logging
21 import os
22 import uuid
23 
24 import f90nml
25 
26 import ehratm.defaults
27 
28 #import nwpservice.wps.namelistwps
29 import nwpservice.wrf.real
30 
32 
33 DEFAULT_LOG_LEVEL = logging.DEBUG
34 
35 LOGGER = logging.getLogger(__name__)
36 handler = logging.StreamHandler()
37 formatter = logging.Formatter(
38  '%(levelname)s - %(filename)s:%(funcName)s:%(lineno)d --> %(message)s'
39  )
40 handler.setFormatter(formatter)
41 LOGGER.addHandler(handler)
42 
43 LOGGER.setLevel(DEFAULT_LOG_LEVEL)
44 
45 
46 class RealWorkflow(object):
47 
48 
49  '''Class for managing preparation and execution of real components
50  '''
51 
52 
53 
54  def __init__(self,
55  wpswrf_distro_path=None,
56  working_rootdir=None,
57  start_time_dt=None,
58  stop_time_dt=None,
59  hours_intvl=None,
60  num_nests=None,
61  nests_defn_dict=None,
62  metgrid_data_dict=None,
63  num_mpi_tasks=None,
64  log_level=DEFAULT_LOG_LEVEL,
65  bypass_namelist_input=None,
66  ):
67 
68  '''Check and initialise the class
69 
70 
71  Parameters
72  ----------
73  wpswrf_distro_path : str
74  (Optional) Full path to the WPS/WRF distribution to be used.
75  This is assumed to have been installed in a way that is compatible
76  with the nwpservice module. If arg is not present, uses a default
77  value
78  working_rootdir : str
79  (Optional) Full path to a directory (assumed to have already
80  been created), to be used as scratch space for setting up and
81  running this real instance. If arg is not present, uses a
82  default value
83  start_time_dt : datetime
84  Time of first ungribbed file(s)
85  stop_time_dt : datetime
86  Time of last ungribbed file(s)
87  hours_intvl : int
88  Interval (in hours) between ungribbed files
89  num_nests : int
90  Number of domain nests represented in metgrid data, and in
91  the real data that will be produced
92  nests_defn_dict : dict
93  Dict of nest definitions (of use if generating namelist.input)
94  num_mpi_tasks : int
95  (Optional) Number of MPI tasks to use for running this instance
96  of real. If arg is not present, non-MPI execution is assumed.
97  metgrid_data_dict : dict
98  Necessary information on the metgrid files. We expect dict with
99  entries {'path' : <path>, 'filelist' : [list of metgrid files]}.
100  This implies that the metgrid files are all in the same src
101  directory
102  log_level : int
103  Python logging level (e.g. logging.INFO)
104  bypass_namelist_input : str
105  This is an assumed-correct namelist.input to be used for
106  execution of real. Its presence bypasses the normal process
107  of creating a namelist.input, and puts complete trust in the
108  correctness of the provided namelist.input. If this argument
109  is present, then any values for start_time_dt, stop_time_dt,
110  hours_intvl and num_nests are ignored. This is meant primarily
111  for devtest operations, and probably shouldn't be used for
112  normal workflows.
113 
114 
115  Example
116  -------
117  These are examples of the metgrid_data_dict
118 
119  ::
120 
121  metgrid_data_dict = {
122  'path' : '/path/to/dir/with/metgrid/files'
123  'filelist' : ['met_em.d01.2022-05-14_12:00:00.nc',
124  'met_em.d01.2022-05-14_15:00:00.nc',
125  'met_em.d02.2022-05-14_12:00:00.nc',
126  'met_em.d02.2022-05-14_15:00:00.nc'
127  ]
128  }
129  '''
130 
131 
132 
133  self._logging_level = log_level
134  LOGGER.setLevel(self._logging_level)
135 
136  # Now I can use the logger
137  LOGGER.debug('started')
138 
139  # Do some checks (need to expand on these over the years)
140  # Essentially, would be good to do a controlled bail here if there's
141  # a problem as opposed to doing it further down
142 
143 
144  # Check bypass_namelist_input arg. If present, verify good
145  # path. And, if present, some of the following checks will be
146  # ignored
147  self._bypass_namelist_input = None
148  if bypass_namelist_input:
149  if not os.path.isfile(bypass_namelist_input):
150  raise FileNotFoundError('bypass_namelist_input not found: %s' %
151  bypass_namelist_input)
152  self._bypass_namelist_input = bypass_namelist_input
153  LOGGER.debug('bypass_namelist_input: %s' %
155 
156  if not wpswrf_distro_path:
157  wpswrf_distro_path = DEFAULTS.wpswrf_distro_path()
158  self._wpswrf_distro_path = wpswrf_distro_path
159  LOGGER.debug('wpswrf_distro_path: %s' % self._wpswrf_distro_path)
160  if not os.path.isdir(self._wpswrf_distro_path):
161  raise FileNotFoundError('wpswrf_distro_path not found: %s' %
162  self._wpswrf_distro_path)
163 
164  if not working_rootdir:
165  working_rootdir = DEFAULTS.working_scratch_rootdir()
166  self._working_rootdir = working_rootdir
167  if not os.path.isdir(self._working_rootdir):
168  raise FileNotFoundError('working_rootdir not found: %s' %
169  self._working_rootdir)
170  LOGGER.debug('working_rootdir: %s' % self._working_rootdir)
171 
172 
173 
174 
175  # Check that expected metgrid files are available (use metgrid_data_dict)
176  metgrid_dir = metgrid_data_dict['path_to_files']
177  metgrid_files = metgrid_data_dict['files_list']
178  for fname in metgrid_files:
179  fpath = os.path.join(metgrid_dir, fname)
180  if not os.path.isfile(fpath):
181  raise FileNotFoundError('fpath: %s' % fpath)
182 
183 
184  #----- Don't deal with these possibly empty variables if we're
185  #----- using the bypass_namelist_input
186  if not self._bypass_namelist_input:
187  # Basic test of num_nests
188  if not num_nests:
189  raise ValueError("Missing num_nests arg")
190  if num_nests <= 0:
191  raise ValueError("num_nests: %d" % num_nests)
192  self._num_nests = num_nests
193 
194  # Check that we have expected nests
195  for i in range(self._num_nests):
196  expected_nest = 'd%02d' % (i+1)
197  nest_found = False
198  for fname in metgrid_files:
199  nest_str = fname[7:10] # Pull out the 'd0n' from filename
200  #LOGGER.debug('expected_nest: %s, nest_str: %s' % (expected_nest,
201  # nest_str))
202  if expected_nest == nest_str:
203  nest_found = True
204  if not nest_found:
205  raise ValueError('Did not find nest: %s' % expected_nest)
206 
207  # Check that we have the nest definitions
208  if nests_defn_dict:
209  self._nests_defn_dict = nests_defn_dict
210  else:
211  raise ValueError('Did not find nests_defn_dict...')
212 
213  # Use specified number of MPI tasks, or default to 0, which will imply
214  # non-MPI execution
215  self._mpirunpath = None # Just give it a namespace for now
216  if num_mpi_tasks:
217  # Ensure valid number, otherwise raise exception. If the value
218  # looks good, check that the default mpirun executable is
219  if 1 <= num_mpi_tasks <= DEFAULTS.max_mpi_tasks():
220  # Go ahead and check the mpirun path
221  mpirunpath = DEFAULTS.mpirun_path()
222  if os.path.isfile(mpirunpath) and \
223  os.access(mpirunpath, os.X_OK):
224  self._num_mpi_tasks = num_mpi_tasks
225  self._mpirunpath = mpirunpath
226  else:
227  raise FileNotFoundError('mpirun not executable: %s' %
228  mpirunpath)
229  else:
230  raise ValueError('Bad num_mpi_tasks value: %d' % num_mpi_tasks)
231  else:
232  # This default value of 0 will denote non-MPI execution when
233  # invoking the geogrid service
234  self._num_mpi_tasks = 0
235  LOGGER.debug('num_mpi_tasks: %d' % self._num_mpi_tasks)
236 
237 
238  # If we got through all this, save the time info for the run_real()
239  # method
240 
241 
242  if self._bypass_namelist_input:
243  # We won't use these if bypass_namelist_input is being used, so
244  # set them to None so that other code won't mistakenly try to use
245  # them
246  self._start_time_dt = None
247  self._stop_time_dt = None
248  self._hours_intvl = None
249  self._num_nests = None
250 
251  else:
252  self._start_time_dt = start_time_dt
253  self._stop_time_dt = stop_time_dt
254  self._hours_intvl = hours_intvl
255 
256 
257  self._metgrid_data_dict = metgrid_data_dict
258 
259 
260  def run_real(self, real_output_stagedir=None):
261 
262 
263  '''Set up and run, via nwpservice module, instance of real
264 
265  Parameters
266  ----------
267  real_output_stagedir : str
268  (Optional) Full path to dir where real files will be staged.
269  If not specified, no staging will be done. If specified, we
270  assume the dir already exists.
271 
272  Returns
273  -------
274  return_dict : dict
275  Dictionary with manifest of the real files and the location
276  (if applicable) of staged files
277  '''
278 
279 
280  LOGGER.debug('Starting run_real()...')
281 
282  # Check the output stagedir
283 
284  # If we're using the bypass_namelist_input, then there's
285  # just a little prep necessary. Otherwise, we will need to
286  # create the namelist based on a number of parameters, including
287  # the domain_defn and the dates/times, etc.
288  if self._bypass_namelist_input:
289  # Test for its presence
290  if not os.path.isfile(self._bypass_namelist_input):
291  raise FileNotFoundError(self._bypass_namelist_input)
292  namelist_input_path = self._bypass_namelist_input
293 
294  # Just in case they're needed later, get the num_nests and
295  # hours_intvl values from the namelist (They were set to None
296  # in __init__() for the bypass namelist scenario)
297  nml = f90nml.read(self._bypass_namelist_input)
298  self._hours_intvl = int(
299  nml['time_control']['interval_seconds'] / 3600)
300  self._num_nests = nml['domains']['max_dom']
301 
302  else:
303  # This is where eventually we'll add in the namelist.input
304  # creation
305  namelist_input_path = None
306  raise NotImplementedError(
307  'namelist.input creation not yet supported')
308 
309 
310  # Create nwpservice object, then setup and run
311  domainpath = os.path.join(self._working_rootdir, 'real_rundir')
312  LOGGER.debug('domainpath: %s' % domainpath)
313 
314 
315  nwpreal_obj = nwpservice.wrf.real.Real(
316  wpswrf_distro_path=self._wpswrf_distro_path,
317  wpswrf_rundir=domainpath,
318  metgriddatadir=self._metgrid_data_dict['path_to_files'],
319  namelist_input=namelist_input_path,
320  output_dir=real_output_stagedir,
321  numpes=self._num_mpi_tasks,
322  mpirun_path=self._mpirunpath,
323  log_level=self._logging_level
324  )
325 
326  nwpreal_obj.setup()
327  output_manifest = nwpreal_obj.run()
328  LOGGER.debug('output_manifest: %s' % output_manifest)
329 
330  # If this wasn't successful (even if an output staging dir was
331  # never specified), it should return false
332  stage_success = nwpreal_obj.stage_output(auxfiles=True)
333  if stage_success:
334  staging_dir = real_output_stagedir
335  else:
336  staging_dir = None
337 
338 
339 
340 
341  return_dict = {
342  'real_output_manifest' : output_manifest,
343  'staging_dir' : staging_dir,
344  'hours_intvl' : self._hours_intvl
345  }
346  LOGGER.debug('return_dict: %s' % return_dict)
347 
348 
349  return return_dict
ehratm.wrf.real.RealWorkflow._nests_defn_dict
_nests_defn_dict
Definition: real.py:197
ehratm.defaults
Definition: defaults.py:1
ehratm.wrf.real.RealWorkflow._num_nests
_num_nests
Definition: real.py:180
ehratm.wrf.real.RealWorkflow._logging_level
_logging_level
Definition: real.py:121
ehratm.wrf.real.RealWorkflow._start_time_dt
_start_time_dt
Definition: real.py:234
ehratm.wrf.real.RealWorkflow._mpirunpath
_mpirunpath
Definition: real.py:203
ehratm.wrf.real.RealWorkflow._bypass_namelist_input
_bypass_namelist_input
Definition: real.py:135
ehratm.wrf.real.RealWorkflow._metgrid_data_dict
_metgrid_data_dict
Definition: real.py:245
ehratm.wrf.real.RealWorkflow._wpswrf_distro_path
_wpswrf_distro_path
Definition: real.py:146
ehratm.defaults.Defaults
Definition: defaults.py:9
ehratm.wrf.real.RealWorkflow.__init__
def __init__(self, wpswrf_distro_path=None, working_rootdir=None, start_time_dt=None, stop_time_dt=None, hours_intvl=None, num_nests=None, nests_defn_dict=None, metgrid_data_dict=None, num_mpi_tasks=None, log_level=DEFAULT_LOG_LEVEL, bypass_namelist_input=None)
Definition: real.py:54
ehratm.wrf.real.RealWorkflow._working_rootdir
_working_rootdir
Definition: real.py:154
ehratm.wrf.real.RealWorkflow.run_real
def run_real(self, real_output_stagedir=None)
Definition: real.py:260
ehratm.wrf.real.RealWorkflow
Definition: real.py:46
ehratm.wrf.real.RealWorkflow._num_mpi_tasks
_num_mpi_tasks
Definition: real.py:212
ehratm.wrf.real.RealWorkflow._hours_intvl
_hours_intvl
Definition: real.py:236
ehratm.wrf.real.RealWorkflow._stop_time_dt
_stop_time_dt
Definition: real.py:235