Source code for speclite.downsample

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Downsample spectra by combining adjacent pixels.
"""
from __future__ import print_function, division

import numpy as np
import numpy.ma as ma


[docs] def downsample(data_in, downsampling, weight=None, axis=-1, start_index=0, auto_trim=True, data_out=None): """Downsample spectral data by a constant factor. Downsampling consists of dividing the input data into fixed-size groups of consecutive bins, then calculated downsampled values as weighted averages within each group. The basic usage is: >>> data = np.ones((6,), dtype=[('flux', float), ('ivar', float)]) >>> out = downsample(data, downsampling=2, weight='ivar') >>> np.all(out == ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)], ... dtype=[('flux', '<f8'), ('ivar', '<f8')])) True Any partial group at the end of the input data will be silently ignored unless `auto_trim=False`: >>> out = downsample(data, downsampling=4, weight='ivar') >>> np.all(out == ... np.array([(1.0, 4.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')])) True >>> out = downsample(data, downsampling=4, weight='ivar', auto_trim=False) Traceback (most recent call last): ... ValueError: Input data does not evenly divide with downsampling = 4. A multi-dimensional array of spectra with the same binning can be downsampled in a single operation, for example: >>> data = np.ones((2,16,3,), dtype=[('flux', float), ('ivar', float)]) >>> results = downsample(data, 4, axis=1) >>> results.shape (2, 4, 3) If no axis is specified, the last axis of the input array is assumed. If the input data is masked, only unmasked entries will be used to calculate the weighted averages for each downsampled group and the output will also be masked: >>> data = ma.ones((6,), dtype=[('flux', float), ('ivar', float)]) >>> data.mask[3:] = True >>> out = downsample(data, 2, weight='ivar') >>> type(out) == ma.core.MaskedArray True If the input fields have different masks, their logical OR will be used for all output fields since, otherwise, each output field would require its own output weight field. As a consequence, masking a single input field is equivalent to masking all input fields. Parameters ---------- data_in : numpy.ndarray or numpy.ma.MaskedArray Structured numpy array containing input spectrum data to downsample. downsampling : int Number of consecutive bins to combine into each downsampled bin. Must be at least one and not larger than the input data size. weight : string or None. The name of a field whose values provide the weights to use for downsampling. When None, a weight value of one will be used. The output array will contain a field with this name, unless it is None, containing values of the downsampled weights. All weights must be non-negative. start_index : int Index of the first bin to use for downsampling. Any bins preceeding the start bin will not be included in the downsampled results. Negative indices are not allowed. axis : int Index of the axis to perform downsampling in. The default is to use the last index of the input data array. auto_trim : bool When True, any bins at the end of the input data that do not fill a complete downsampled bin will be automatically (and silently) trimmed. When False, a ValueError will be raised. data_out : numpy.ndarray or None Structured numpy array where output spectrum data should be written. If none is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same output array for a sequence of downsampling operations. Returns ------- numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of downsampled result, containing the same fields as the input data and the same shape except along the specified downsampling axis. If the input data is masked, the output data will also be masked, with each output field's mask determined by the combination of the optional weight field mask and the corresponding input field mask. """ if not isinstance(data_in, np.ndarray): raise ValueError('Invalid data_in type: {0}.'.format(type(data_in))) if data_out is not None and not isinstance(data_out, np.ndarray): raise ValueError('Invalid data_out type: {0}.'.format(type(data_out))) shape_in = data_in.shape try: num_bins = shape_in[axis] except IndexError: raise ValueError('Invalid axis = {0}.'.format(axis)) if downsampling < 1 or downsampling > num_bins: raise ValueError('Invalid downsampling = {0}.'.format(downsampling)) if start_index < 0 or start_index >= num_bins: raise ValueError('Invalid start_index = {0}.'.format(start_index)) num_downsampled = (num_bins - start_index) // downsampling if num_downsampled <= 0: raise ValueError( 'Incompatible downsampling = {0} and start_index = {1}.' .format(downsampling, start_index)) stop_index = start_index + num_downsampled * downsampling assert stop_index <= num_bins if stop_index < num_bins and not auto_trim: raise ValueError( 'Input data does not evenly divide with downsampling = {0}.' .format(downsampling)) if weight is not None: if not isinstance(weight, str): raise ValueError('Invalid weight type: {0}.'.format(type(weight))) if weight in data_in.dtype.fields: # If data_in is a MaskedArray, weights_in will also be masked. weights_in = data_in[weight] if np.any(weights_in < 0): raise ValueError('Some input weights < 0.') else: raise ValueError('No such weight field: {0}.'.format(weight)) else: if ma.isMA(data_in): weights_in = ma.ones(shape_in) else: weights_in = np.ones(shape_in) shape_out = list(shape_in) shape_out[axis] = num_downsampled shape_out = tuple(shape_out) expanded_shape = list(shape_in) expanded_shape[axis] = downsampling expanded_shape.insert(axis, num_downsampled) sum_axis = axis + 1 if axis >= 0 else len(shape_in) + axis + 1 dtype_out = data_in.dtype if data_out is None: if ma.isMA(data_in): data_out = ma.empty(shape_out, dtype=data_in.dtype) data_out.mask = False else: data_out = np.empty(shape_out, dtype=data_in.dtype) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.' .format(data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.' .format(data_out.dtype, dtype_out)) if ma.isMA(data_in): # Each field has an independent mask in the input, but we want to # use the same output weights for all fields. Use the logical OR # of the individual input field masks to achieve this. or_mask = np.zeros(shape_in, dtype=bool) for field in data_in.dtype.fields: or_mask = or_mask | data_in[field].mask weights_in.mask = or_mask # Loop over fields in the input data. weights_out = np.sum( weights_in[start_index:stop_index].reshape(expanded_shape), axis=sum_axis) for field in data_in.dtype.fields: if field == weight: continue weighted = ( weights_in[start_index:stop_index] * data_in[field][start_index:stop_index]) if ma.isMA(data_in): weighted.mask = or_mask data_out[field] = np.sum( weighted.reshape(expanded_shape), axis=sum_axis) / weights_out if weight is not None: data_out[weight] = weights_out return data_out