# -*- coding: UTF-8 -*-.
"""
| ---------------------------------------------------------------------------------------------------------------------
| Date : April 2020
| Copyright : © 2020 by Ann Crabbé
| Email : acrabbe.foss@gmail.com
| Acknowledgements : Based on DINEOF 4.0 (ULG Liege) by Alida Alvera-Azcárte and Alexander Barth.
|
| This file is part of the EOF-based Time Series Reconstructor QGIS plugin package.
|
| This program is free software: you can redistribute it and/or modify it under the terms of the
| GNU General Public License as published by the Free Software Foundation, either version 3
| of the License, or any later version.
|
| This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
| without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
| PURPOSE. See the GNU General Public License for more details.
|
| You should have received a copy of the GNU General Public License (COPYING.txt). If not see www.gnu.org/licenses.
| ---------------------------------------------------------------------------------------------------------------------
"""
import os
import pathlib
import platform
import subprocess
from osgeo import gdal
def _qgis_to_fortran(path):
parts = path.split('"')
return parts[1] + "#" + parts[2].split(':')[1]
[docs]class Dineof:
""" EOF-based method to fill in missing data from geophysical fields, such as clouds in sea surface temperature.
:param data_path: path to netCDF data with gaps to be filled
:param output_folder: path to output folder where all results will be stored
:param eof_max: the max number of EOF modes to compute;
only the optimal number of EOFs + 3 will be computed for robust optimum detection
must be <= temporal size of data - 5;
start high and adjust down for subsequent reconstructions;
if the max number of modes have been used, the algorithm probably needs more information
:param krylov_max: the maximum size for the Krylov subspace;
must be: >= eof_max + 5 and <= temporal size of data
:param mask_path: path to land-sea mask; netCDF format (optional)
without mask, all points are considered as ocean
:param eof_min: the min number of EOF modes to computes; default = 1 (recommended)
:param eof_max_it: set the maximum iterations for each EOF calculation; default = 300;
eof decomposition <-> truncated reconstruction and replacement of missing data;
use large number and only increase if this number is reached for each EOF
:param eof_it_stop: stop EOF iterations once: rms(successive reconstruction) / std(existing data) < eof_it_stop
:param lanczos: threshold for Lanczos convergence; default = 1.e-8 (recommended)
:param time_path: path to time file; netCDF format; used for the diffusion of the covariance matrix
specified as the relative increment in time between your individual images;
example: [1,2,3,4,...] if all data is 1 day apart; [1,2,4,...] if day 3 is missing
:param time_alpha: sets strength of the filter; 0 to deactivate
:param time_it: number of iterations for the filter
:param reconstruct_all: 1: reconstruct all points; 0: reconstruct only missing data
:param save_eof_base: 1: save left and right EOFs; 0: not
:param normalize_input: 1: normalize the input matrix for multivariate case; 0: not
:param cv_seed: initialize random number generator, for selection of cross-validation points;
repeating an experiment with the same seed will yield the exact same result while
another seed will yield a slight variation
:param cv_clouds: path to cloud file; netCDF format; mask with manual selection of cross-validation points
cv_seed will be ignored
For more information on how DINEOF works, please refer to:
Alvera-Azcárate, A., Barth, A., Rixen, M., Beckers, J.M., 2005, Reconstruction of incomplete oceanographic data sets
using empirical orthogonal functions: application to the Adriatic Sea surface temperature, Ocean Modelling, 9-4,
p. 325-346
Beckers, J.M., Rixen, M., 2003, EOF calculations and data filling from incomplete oceanographic data sets, Journal
of Atmospheric and Oceanic Technology, 20-12, p. 1839-1856
The multivariate application of DINEOF is explained in:
Alvera-Azcárate, A., Barth, A., Beckers, J.M., Weisberg, R.H., 2007, Multivariate reconstruction of missing data in
sea surface temperature, chlorophyll, and wind satellite fields, Journal of Geophysical Research. Oceans, 112-C3,
p. C03008
The error calculation using an optimal interpolation approach is explained in:
Beckers, J.M., Barth, A., Alvera-Azcárate, A., 2006, DINEOF reconstruction of clouded images including error maps –
application to the Sea-Surface Temperature around Corsican Island, Ocean Science, 2, 183–199
For more information about the Lanczos solver:
Toumazou, V., Cretaux, J.F., 2001, Using a Lanczos Eigensolver in the Computation of Empirical Orthogonal Functions,
Monthly Weather Review, 129-5, p. 1243–1250
"""
def __init__(self, data_path: str, output_folder: str, eof_max: int, krylov_max: int,
mask_path: str = None,
eof_min: int = 1, eof_max_it: int = 300, eof_it_stop: float = 1.0e-3, lanczos: float = 1.0e-8,
time_path: str = None, time_alpha: float = 0.01, time_it: int = 3, reconstruct_all: int = 1,
save_eof_base: int = 0, normalize_input: int = 0, cv_seed: int = 243435, cv_clouds: str = None):
file_error = "The file '{}' does not exist or is not in netCDF format."
if 'netCDF' in gdal.Info(data_path):
self.data_path = _qgis_to_fortran(data_path)
n_time = gdal.Open(data_path).RasterCount
else:
raise FileNotFoundError(file_error.format(data_path))
if mask_path:
if 'netCDF' in gdal.Info(mask_path):
self.mask_path = _qgis_to_fortran(mask_path)
else:
raise FileNotFoundError(file_error.format(mask_path))
else:
self.mask_path = None
if not os.path.exists(output_folder):
os.mkdir(output_folder)
if os.path.isdir(output_folder):
self.output_folder = output_folder
else:
raise NotADirectoryError("The path '{}' is not a directory.".format(output_folder))
self.reconstruct_all = reconstruct_all
self.save_eof_base = save_eof_base
self.normalize_input = normalize_input
# EOF parameters
if eof_max <= n_time - 5:
self.eof_max = eof_max
else:
raise ValueError("The number of EOF modes to compute should be smaller than or equal to 'the temporal size "
"of data minus 5'")
if (krylov_max >= eof_max + 5) and (krylov_max <= n_time):
self.krylov_max = krylov_max
else:
raise ValueError("The the maximum size of the Krylov subspace must be greater than or equal to 'the number "
"of EOF modes plus 5' and smaller than or equal to the temporal size of data")
self.eof_min = eof_min
self.eof_max_it = eof_max_it
self.eof_it_stop = eof_it_stop
self.lanczos = lanczos
# diffusion of the covariance matrix
self.time_alpha = time_alpha
self.time_it = time_it
if time_alpha != 0:
if 'netCDF' in gdal.Info(time_path):
self.time_path = _qgis_to_fortran(time_path)
else:
raise FileNotFoundError(file_error.format(time_path))
else:
self.time_path = None
# cross validation
self.cv_seed = cv_seed
if cv_clouds:
if 'netCDF' in gdal.Info(cv_clouds):
self.cv_clouds = _qgis_to_fortran(cv_clouds)
else:
raise FileNotFoundError(file_error.format(cv_clouds))
else:
self.cv_clouds = None
[docs] def write_init_file(self, path):
"""
:param path: the path of the init file
:return: Write the input variables as text string to path
"""
string = [
"data = ['{}']".format(self.data_path),
"Output = '{}/'".format(self.output_folder),
"results = ['{}']".format(os.path.join(self.output_folder, os.path.basename(self.data_path))),
"rec = {}".format(self.reconstruct_all),
"nev = {}".format(self.eof_max),
"ncv = {}".format(self.krylov_max),
"neini = {}".format(self.eof_min),
"nitemax = {}".format(self.eof_max_it),
"tol = {}".format(self.lanczos),
"toliter = {}".format(self.eof_it_stop),
"alpha = {}".format(self.time_alpha),
"numit = {}".format(self.time_it),
"norm = {}".format(self.normalize_input),
"seed = {}".format(self.cv_seed),
"eof = {}".format(self.save_eof_base),
"EOF.U = ['{}']".format(os.path.join(self.output_folder, "eof.nc#Usst")),
"EOF.V = '{}'".format(os.path.join(self.output_folder, "eof.nc#V")),
"EOF.Sigma = '{}'".format(os.path.join(self.output_folder, "eof.nc#Sigma"))
]
if self.mask_path:
string.append("mask = ['{}']".format(self.mask_path))
if self.time_alpha != 0:
string.append("time = '{}'".format(self.time_path))
if self.cv_clouds:
string.append("clouds = '{}'".format(self.cv_clouds))
full_text = '\n'.join(string)
file = open(path, 'w')
file.write(full_text)
file.close()
[docs] def execute(self):
script_path = pathlib.Path(__file__).resolve().parents[0]
executables = {
'Windows': (script_path / 'bin' / 'dineof-4.0-windows.exe').as_posix(),
'Linux': (script_path / 'bin' / 'dineof-4.0-linux.exe').as_posix(),
}
parameter_file = (pathlib.Path(self.output_folder).resolve() / 'parameter_file.txt').as_posix()
log_file = (pathlib.Path(self.output_folder).resolve() / 'log.txt').as_posix()
error_file = (pathlib.Path(self.output_folder).resolve() / 'error_log.txt').as_posix()
self.write_init_file(parameter_file)
system = platform.system()
stdout = open(log_file, "wb")
stderr = open(error_file, "wb")
args = [executables[system], parameter_file]
subprocess.run(args, stdout=stdout, stderr=stderr)
stdout.close()
stderr.close()