Source code for EMToolKit.schemas.util

import copy, os, pickle, resource, numpy as np
#from processify import processify
from scipy.io import readsav

[docs] def masked_median_filter(data,mask,radius,footprint=None,missing=0.0,footprint_ind_offset=0): if(footprint is None): if(np.isscalar(radius)): radarr = np.array([radius]*data.ndim) else: radarr = radius coordas = np.indices(2*radarr+1) for i in range(0,len(radarr)): coordas[i] = (coordas[i]-radarr[i])/radarr[i] radii = np.sum(coordas**2,axis=0)**0.5 footprint = radii <= 1 flatinds = np.ravel_multi_index(np.indices(data.shape),data.shape).flatten() footprint_inds = np.ravel_multi_index(np.indices(footprint.shape),footprint.shape) footprint_inds = np.array(np.unravel_index(footprint_inds[footprint],footprint.shape)) footprint_inds = footprint_inds.transpose(np.roll(np.arange(footprint_inds.ndim),-1)) data_filt = np.zeros(data.size)+missing footprint_pad = np.floor(0.5*np.array(footprint.shape)).astype(np.int32) footprint_pad = np.array([footprint_pad,footprint_pad]).T data_fppad = np.pad(data,footprint_pad) dat_pad_shape = data_fppad.shape data_fppad = data_fppad.flatten() mask_fppad = np.pad(mask,footprint_pad).flatten() tparg = np.roll(np.arange(footprint_inds.ndim),1) data_filt = _masked_medfilt_inner(flatinds,data,footprint_inds,footprint_ind_offset,tparg,dat_pad_shape,data_fppad,mask_fppad,data_filt) #for ind in flatinds: # ijkpad = np.unravel_index(ind,data.shape)+footprint_inds-footprint_ind_offset # ijkpad = np.ravel_multi_index(ijkpad.transpose(tparg),dat_pad_shape) # dat = data_fppad[ijkpad] # good = mask_fppad[ijkpad] # if(np.sum(good) > 0): data_filt[ind] = np.median(dat[good]) return data_filt.reshape(data.shape)
def _masked_medfilt_inner(flatinds,data,footprint_inds,footprint_ind_offset,tparg,dat_pad_shape,data_fppad,mask_fppad,data_filt): for ind in flatinds: ijkpad = np.unravel_index(ind,data.shape)+footprint_inds-footprint_ind_offset ijkpad = np.ravel_multi_index(ijkpad.transpose(tparg),dat_pad_shape) dat = data_fppad[ijkpad] good = mask_fppad[ijkpad] if(np.sum(good) > 0): data_filt[ind] = np.median(dat[good]) return data_filt # This routine multiplies a matrix with each element of a set of vectors. # The vectors must be numpy arrays dimensioned nvec by ndim, where ndim is the # dimensionality of the space, while the matrix is ndim by ndim. This applies in # general for other operations of a set of vectors with a single vector -- # i.e., the vector space index must be the last one. For # instance, to add a vector shift to a set of vectors, the array of # vectors must also be nvec by ndim. Then they can be added as c = a+b. See # https://numpy.org/doc/stable/user/basics.broadcasting.html # This is somewhat backward to how dot products otherwise work in numpy # So the order of operations has to be reversed compared to normal and the # matrix must be transposed. i.e., instead of v2 = np.dot(fwd,v1), it's neccesary # to instead do v2 = np.dot(v1,fwd.T). There's may be a built in numpy way to # do this, but I haven't found it so far. linalg.multi_dot doesn't appear to # function any differently from dot in this case. I've implemented it # here as a very small subroutine rather than spreading it all over the code # for ease of maintenance and explanation. It works for single vectors, too.
[docs] def multivec_matmul(a,b): return np.dot(b,a.T)
# Forward rolling transpose, for switching from coordinate dimension last to # coordinate dimension first in multidimensional coordinate arrays:
[docs] def ftp(a): return a.transpose(np.roll(np.arange(a.ndim),1))
# Backward rolling transpose, for switching from coordinate dimension first to # coordinate dimension last in multidimensional coordinate arrays:
[docs] def btp(a): return a.transpose(np.roll(np.arange(a.ndim),-1))
# Numpy's indices method is extremely useful, but it puts the coordinate # dimension (e.g., ijk) first, but for easy vector operations it should be last. # Transposing puts the coordinate dimension last but it also reverses all of # the other dimensions, which gets super confusing. This does a `roll' transpose # which just shifts the dimensions forward by 1. Very simple, but you can see how # it could get unggkljhly real quick
[docs] def rindices(dims,**kwargs): ia = np.indices(dims,**kwargs) return btp(ia)
[docs] def bindown(d,n): inds = np.ravel_multi_index(np.floor((np.indices(d.shape).T*n/np.array(d.shape))).T.astype(np.uint32),n) return np.bincount(inds.flatten(),weights=d.flatten(),minlength=np.prod(n)).reshape(n)
[docs] def bindown2(d,f): n = np.round(np.array(d.shape)/f).astype(np.int32) inds = np.ravel_multi_index(np.floor((np.indices(d.shape).T*n/np.array(d.shape))).T.astype(np.uint32),n) return np.bincount(inds.flatten(),weights=d.flatten(),minlength=np.prod(n)).reshape(n)
[docs] def binup(d,f): n = np.round(np.array(d.shape)*np.round(f)).astype(np.int32) inds = np.ravel_multi_index(np.floor((np.indices(n).T/np.array(f))).T.astype(np.uint32),d.shape) return np.reshape(d.flatten()[inds],n)
[docs] def as_dict(rec): """ turn a numpy recarray record into a dict. this is mostly useful just to have a human readable output of a record on the console. as_dict(my_data[234]) """ return {name:rec[name] for name in rec.dtype.names}
[docs] def get_mask_errs(dat_cube, iris_err_fac, error_cube=None, filt_thold=2.5): dat_arr = copy.deepcopy(dat_cube) dat_filt = masked_median_filter(dat_arr,(np.isnan(dat_arr)==0),np.array([1,2,1])) if(error_cube is None): error_cube = ((iris_err_fac**2+np.clip(dat_arr,0,None)*iris_err_fac)**0.5).astype('float64') dat_median = np.nanmedian(np.abs(dat_filt)) dat_mask = (np.isnan(dat_arr) + (dat_arr < 0.00*error_cube) + (np.abs(dat_arr-dat_filt) > filt_thold*(dat_median+np.abs(dat_filt)))) > 0 return dat_mask, error_cube