Source code for dineof.core.dineof

# -*- coding: UTF-8 -*-.
"""
| ---------------------------------------------------------------------------------------------------------------------
| Date                : April 2020
| Copyright           : © 2020 by Ann Crabbé
| Email               : acrabbe.foss@gmail.com
| Acknowledgements    : Based on DINEOF 4.0 (ULG Liege) by Alida Alvera-Azcárte and Alexander Barth.
|
| This file is part of the EOF-based Time Series Reconstructor QGIS plugin package.
|
| This program is free software: you can redistribute it and/or modify it under the terms of the
| GNU General Public License as published by the Free Software Foundation, either version 3
| of the License, or any later version.
|
| This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
| without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
| PURPOSE. See the GNU General Public License for more details.
|
| You should have received a copy of the GNU General Public License (COPYING.txt). If not see www.gnu.org/licenses.
| ---------------------------------------------------------------------------------------------------------------------
"""
import os
import pathlib
import platform
import subprocess

from osgeo import gdal


def _qgis_to_fortran(path):
    parts = path.split('"')
    return parts[1] + "#" + parts[2].split(':')[1]


[docs]class Dineof: """ EOF-based method to fill in missing data from geophysical fields, such as clouds in sea surface temperature. :param data_path: path to netCDF data with gaps to be filled :param output_folder: path to output folder where all results will be stored :param eof_max: the max number of EOF modes to compute; only the optimal number of EOFs + 3 will be computed for robust optimum detection must be <= temporal size of data - 5; start high and adjust down for subsequent reconstructions; if the max number of modes have been used, the algorithm probably needs more information :param krylov_max: the maximum size for the Krylov subspace; must be: >= eof_max + 5 and <= temporal size of data :param mask_path: path to land-sea mask; netCDF format (optional) without mask, all points are considered as ocean :param eof_min: the min number of EOF modes to computes; default = 1 (recommended) :param eof_max_it: set the maximum iterations for each EOF calculation; default = 300; eof decomposition <-> truncated reconstruction and replacement of missing data; use large number and only increase if this number is reached for each EOF :param eof_it_stop: stop EOF iterations once: rms(successive reconstruction) / std(existing data) < eof_it_stop :param lanczos: threshold for Lanczos convergence; default = 1.e-8 (recommended) :param time_path: path to time file; netCDF format; used for the diffusion of the covariance matrix specified as the relative increment in time between your individual images; example: [1,2,3,4,...] if all data is 1 day apart; [1,2,4,...] if day 3 is missing :param time_alpha: sets strength of the filter; 0 to deactivate :param time_it: number of iterations for the filter :param reconstruct_all: 1: reconstruct all points; 0: reconstruct only missing data :param save_eof_base: 1: save left and right EOFs; 0: not :param normalize_input: 1: normalize the input matrix for multivariate case; 0: not :param cv_seed: initialize random number generator, for selection of cross-validation points; repeating an experiment with the same seed will yield the exact same result while another seed will yield a slight variation :param cv_clouds: path to cloud file; netCDF format; mask with manual selection of cross-validation points cv_seed will be ignored For more information on how DINEOF works, please refer to: Alvera-Azcárate, A., Barth, A., Rixen, M., Beckers, J.M., 2005, Reconstruction of incomplete oceanographic data sets using empirical orthogonal functions: application to the Adriatic Sea surface temperature, Ocean Modelling, 9-4, p. 325-346 Beckers, J.M., Rixen, M., 2003, EOF calculations and data filling from incomplete oceanographic data sets, Journal of Atmospheric and Oceanic Technology, 20-12, p. 1839-1856 The multivariate application of DINEOF is explained in: Alvera-Azcárate, A., Barth, A., Beckers, J.M., Weisberg, R.H., 2007, Multivariate reconstruction of missing data in sea surface temperature, chlorophyll, and wind satellite fields, Journal of Geophysical Research. Oceans, 112-C3, p. C03008 The error calculation using an optimal interpolation approach is explained in: Beckers, J.M., Barth, A., Alvera-Azcárate, A., 2006, DINEOF reconstruction of clouded images including error maps – application to the Sea-Surface Temperature around Corsican Island, Ocean Science, 2, 183–199 For more information about the Lanczos solver: Toumazou, V., Cretaux, J.F., 2001, Using a Lanczos Eigensolver in the Computation of Empirical Orthogonal Functions, Monthly Weather Review, 129-5, p. 1243–1250 """ def __init__(self, data_path: str, output_folder: str, eof_max: int, krylov_max: int, mask_path: str = None, eof_min: int = 1, eof_max_it: int = 300, eof_it_stop: float = 1.0e-3, lanczos: float = 1.0e-8, time_path: str = None, time_alpha: float = 0.01, time_it: int = 3, reconstruct_all: int = 1, save_eof_base: int = 0, normalize_input: int = 0, cv_seed: int = 243435, cv_clouds: str = None): file_error = "The file '{}' does not exist or is not in netCDF format." if 'netCDF' in gdal.Info(data_path): self.data_path = _qgis_to_fortran(data_path) n_time = gdal.Open(data_path).RasterCount else: raise FileNotFoundError(file_error.format(data_path)) if mask_path: if 'netCDF' in gdal.Info(mask_path): self.mask_path = _qgis_to_fortran(mask_path) else: raise FileNotFoundError(file_error.format(mask_path)) else: self.mask_path = None if not os.path.exists(output_folder): os.mkdir(output_folder) if os.path.isdir(output_folder): self.output_folder = output_folder else: raise NotADirectoryError("The path '{}' is not a directory.".format(output_folder)) self.reconstruct_all = reconstruct_all self.save_eof_base = save_eof_base self.normalize_input = normalize_input # EOF parameters if eof_max <= n_time - 5: self.eof_max = eof_max else: raise ValueError("The number of EOF modes to compute should be smaller than or equal to 'the temporal size " "of data minus 5'") if (krylov_max >= eof_max + 5) and (krylov_max <= n_time): self.krylov_max = krylov_max else: raise ValueError("The the maximum size of the Krylov subspace must be greater than or equal to 'the number " "of EOF modes plus 5' and smaller than or equal to the temporal size of data") self.eof_min = eof_min self.eof_max_it = eof_max_it self.eof_it_stop = eof_it_stop self.lanczos = lanczos # diffusion of the covariance matrix self.time_alpha = time_alpha self.time_it = time_it if time_alpha != 0: if 'netCDF' in gdal.Info(time_path): self.time_path = _qgis_to_fortran(time_path) else: raise FileNotFoundError(file_error.format(time_path)) else: self.time_path = None # cross validation self.cv_seed = cv_seed if cv_clouds: if 'netCDF' in gdal.Info(cv_clouds): self.cv_clouds = _qgis_to_fortran(cv_clouds) else: raise FileNotFoundError(file_error.format(cv_clouds)) else: self.cv_clouds = None
[docs] def write_init_file(self, path): """ :param path: the path of the init file :return: Write the input variables as text string to path """ string = [ "data = ['{}']".format(self.data_path), "Output = '{}/'".format(self.output_folder), "results = ['{}']".format(os.path.join(self.output_folder, os.path.basename(self.data_path))), "rec = {}".format(self.reconstruct_all), "nev = {}".format(self.eof_max), "ncv = {}".format(self.krylov_max), "neini = {}".format(self.eof_min), "nitemax = {}".format(self.eof_max_it), "tol = {}".format(self.lanczos), "toliter = {}".format(self.eof_it_stop), "alpha = {}".format(self.time_alpha), "numit = {}".format(self.time_it), "norm = {}".format(self.normalize_input), "seed = {}".format(self.cv_seed), "eof = {}".format(self.save_eof_base), "EOF.U = ['{}']".format(os.path.join(self.output_folder, "eof.nc#Usst")), "EOF.V = '{}'".format(os.path.join(self.output_folder, "eof.nc#V")), "EOF.Sigma = '{}'".format(os.path.join(self.output_folder, "eof.nc#Sigma")) ] if self.mask_path: string.append("mask = ['{}']".format(self.mask_path)) if self.time_alpha != 0: string.append("time = '{}'".format(self.time_path)) if self.cv_clouds: string.append("clouds = '{}'".format(self.cv_clouds)) full_text = '\n'.join(string) file = open(path, 'w') file.write(full_text) file.close()
[docs] def execute(self): script_path = pathlib.Path(__file__).resolve().parents[0] executables = { 'Windows': (script_path / 'bin' / 'dineof-4.0-windows.exe').as_posix(), 'Linux': (script_path / 'bin' / 'dineof-4.0-linux.exe').as_posix(), } parameter_file = (pathlib.Path(self.output_folder).resolve() / 'parameter_file.txt').as_posix() log_file = (pathlib.Path(self.output_folder).resolve() / 'log.txt').as_posix() error_file = (pathlib.Path(self.output_folder).resolve() / 'error_log.txt').as_posix() self.write_init_file(parameter_file) system = platform.system() stdout = open(log_file, "wb") stderr = open(error_file, "wb") args = [executables[system], parameter_file] subprocess.run(args, stdout=stdout, stderr=stderr) stdout.close() stderr.close()