Source code for dysmalpy.fitting.nested_sampling

# coding=utf8
# Copyright (c) MPE/IR-Submm Group. See LICENSE.rst for license information.
#
# Classes and functions for fitting DYSMALPY kinematic models
#   to the observed data using Nested sampling, with Dynesty:
#   Speagle 2020, https://ui.adsabs.harvard.edu/abs/2020MNRAS.493.3132S/abstract
#   dynesty.readthedocs.io

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

## Standard library
import logging
try:
    from multiprocess import cpu_count, Pool
except:
    # Old python versions:
    from multiprocessing import cpu_count, Pool

# DYSMALPY code
from dysmalpy.data_io import load_pickle, dump_pickle, pickle_module
from dysmalpy import plotting
from dysmalpy import galaxy
# from dysmalpy.utils import fit_uncertainty_ellipse
# from dysmalpy import utils_io as dpy_utils_io
from dysmalpy import utils as dpy_utils
from dysmalpy.fitting import base
from dysmalpy.fitting import utils as fit_utils

# Third party imports
import os
import numpy as np
from collections import OrderedDict
import astropy.units as u
import copy

import time, datetime


__all__ = ['NestedFitter', 'NestedResults']


# LOGGER SETTINGS
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('DysmalPy')
logger.setLevel(logging.INFO)

try:
    import dynesty
    import dynesty.utils
    dynesty.utils.pickle_module = pickle_module
    _dynesty_loaded = True
except:
    _dynesty_loaded = False
    logger.warn("dynesty installation not found!")



[docs]
class NestedFitter(base.Fitter):
    """
    Class to hold the Nested sampling fitter attributes + methods
    """
    def __init__(self, **kwargs):
        if not _dynesty_loaded:
            raise ValueError("dynesty was not loaded!")

        self._set_defaults()
        super(NestedFitter, self).__init__(fit_method='Nested', **kwargs)

    def _set_defaults(self):
        # Nested sampling specific defaults
        self.maxiter = None     # No limit to iterations

        self.bound = 'multi'
        self.sample = 'unif'

        self.nlive_init = 1000
        self.nlive_batch = 1000
        self.use_stop = True
        self.pfrac = 1.0

        self.nCPUs = 1.0
        self.cpuFrac = None

        self.oversampled_chisq = True

        self.nPostBins = 50
        self.linked_posterior_names = None


        self.print_func = None



[docs]
    def fit(self, gal, output_options):
        """
        Fit observed kinematics using nested sampling and a DYSMALPY model set.

        Parameters
        ----------
            gal : `Galaxy` instance
                observed galaxy, including kinematics.
                also contains instrument the galaxy was observed with (gal.instrument)
                and the DYSMALPY model set, with the parameters to be fit (gal.model)

            output_options : `config.OutputOptions` instance
                instance holding ouptut options for nested sampling fitting.

        Returns
        -------
            nestedResults : `NestedResults` instance
                NestedResults class instance containing the bestfit parameters, sampler_results information, etc.
        """

        # --------------------------------
        # Check option validity:

        # # Temporary: testing:
        # if self.red_chisq:
        #     raise ValueError("red_chisq=True is currently *DISABLED* to test lnlike impact vs lnprior")

        # Check the FOV is large enough to cover the data output:
        dpy_utils._check_data_inst_FOV_compatibility(gal)

        # Pre-calculate instrument kernels:
        gal = dpy_utils._set_instrument_kernels(gal)

        # --------------------------------
        # Basic setup:

        # For compatibility with Python 2.7:
        mod_in = copy.deepcopy(gal.model)
        gal.model = mod_in

        #if nCPUs is None:
        if self.cpuFrac is not None:
            self.nCPUs = int(np.floor(cpu_count()*self.cpuFrac))

        # +++++++++++++++++++++++
        # Setup for oversampled_chisq:
        if self.oversampled_chisq:
            gal = fit_utils.setup_oversampled_chisq(gal)
        # +++++++++++++++++++++++

        # Set output options: filenames / which to save, etc
        output_options.set_output_options(gal, self)


        # MUST INCLUDE NESTED-SPECIFICS NOW!
        fit_utils._check_existing_files_overwrite(output_options,
                                                  fit_type='nested',
                                                  fitter=self)


        # --------------------------------
        # Setup file redirect logging:
        if output_options.f_log is not None:
            loggerfile = logging.FileHandler(output_options.f_log)
            loggerfile.setLevel(logging.INFO)
            logger.addHandler(loggerfile)

        # ++++++++++++++++++++++++++++++++
        # Run Dynesty:

        # Keywords for log likelihood:
        logl_kwargs = {'gal': gal,
                       'fitter': self}

        # Keywords for prior transform:
        # This needs to include the gal object,
        #   so we can get the appropriate per-free-param priors
        ptform_kwargs = {'gal': gal}

        ndim = gal.model.nparams_free

        # Set blob switch for Dynesty:
        if self.blob_name is not None:
            _calc_blob = True
        else:
            _calc_blob = False

        # --------------------------------
        # Start pool
        if (self.nCPUs > 1):
            pool = Pool(self.nCPUs)
            queue_size = self.nCPUs
        else:
            pool = queue_size = None


        dsampler = dynesty.DynamicNestedSampler(log_like_dynesty,
                                                prior_transform_dynsety,
                                                ndim,
                                                bound=self.bound,
                                                sample=self.sample,
                                                logl_kwargs=logl_kwargs,
                                                ptform_kwargs=ptform_kwargs,
                                                blob=_calc_blob,
                                                pool=pool,
                                                queue_size=queue_size)


        resume = False

        # If not overwriting, check to see if there is a file to restore:
        if not output_options.overwrite:
            if output_options.f_checkpoint is not None:
                if os.path.isfile(output_options.f_checkpoint):
                    logger.info("Reloading checkpoint file: {}".format(output_options.f_checkpoint))

                    # Resume the checkpointed sampler from file:
                    resume = True
                    dsampler = dynesty.DynamicNestedSampler.restore(output_options.f_checkpoint,
                                                pool=pool)


        dsampler.run_nested(nlive_init=self.nlive_init,
                            nlive_batch=self.nlive_batch,
                            maxiter=self.maxiter,
                            use_stop=self.use_stop,
                            resume=resume,
                            print_func=self.print_func,
                            checkpoint_file=output_options.f_checkpoint,
                            wt_kwargs={'pfrac': self.pfrac})

        res = dsampler.results

        if output_options.f_sampler_results is not None:
            # Save stuff to file, for future use:
            dump_pickle(res, filename=output_options.f_sampler_results,
                        overwrite=output_options.overwrite)


        # --------------------------------
        # Bundle the results up into a results class:
        nestedResults = NestedResults(model=gal.model, sampler_results=res,
                    linked_posterior_names=self.linked_posterior_names,
                    blob_name=self.blob_name,
                    nPostBins=self.nPostBins)

        if self.oversampled_chisq:
            nestedResults.oversample_factor_chisq = OrderedDict()
            for obs_name in gal.observations:
                obs = gal.observations[obs_name]
                nestedResults.oversample_factor_chisq[obs_name] = obs.data.oversample_factor_chisq

        # Do all analysis, plotting, saving:
        nestedResults.analyze_plot_save_results(gal, output_options=output_options)

        # --------------------------------
        # Clean up logger:
        if output_options.f_log is not None:
            logger.removeHandler(loggerfile)
            loggerfile.close()

        return nestedResults








[docs]
class NestedResults(base.BayesianFitResults, base.FitResults):
    """
    Class to hold results of nested sampling fitting to DYSMALPY models.

    Notes:
    ------
        The dynesty *Results* object (containing the results of the run) is stored in `nestedResults.sampler_results`.

        The names of free parameters in the chain are accessed through `nestedResults.chain_param_names` or more generally (separate model + parameter names) through `nestedResults.free_param_names`

    Optional Attribute:
    ----------------------
        `linked_posterior_names`
            Indicates if best-fit parameters should be measured in multi-dimensional histogram space.
            It takes a list of linked parameter sets, where each set consists of len-2 tuples/lists of
            the component + parameter names.

    Structure Explanation:
    ----------------------
    1. To analyze component + param 1 and 2 together, and then 3 and 4 together: `linked_posterior_names = [joint_param_bundle1, joint_param_bundle2]` with `joint_param_bundle1 = [[cmp1, par1], [cmp2, par2]]` and `joint_param_bundle2 = [[cmp3, par3], [cmp4, par4]]`, for a full array of: `linked_posterior_names = [[[cmp1, par1], [cmp2, par2]],[[cmp3, par3], [cmp4, par4]]]`.

    2. To analyze component + param 1 and 2 together: `linked_posterior_names = [joint_param_bundle1]` with `joint_param_bundle1 = [[cmp1, par1], [cmp2, par2]]`, for a full array of `linked_posterior_names = [[[cmp1, par1], [cmp2, par2]]]`.
            Example: Look at halo: mvirial and disk+bulge: total_mass together
                `linked_posterior_names = [[['halo', 'mvirial'], ['disk+bulge', 'total_mass']]]`
    """
    def __init__(self, model=None, sampler_results=None,
                 linked_posterior_names=None,
                 blob_name=None, nPostBins=50):

        super(NestedResults, self).__init__(model=model, blob_name=blob_name,
                                            fit_method='Nested',
                                            linked_posterior_names=linked_posterior_names,
                                            sampler_results=sampler_results, nPostBins=nPostBins)

    def __setstate__(self, state):
        # Compatibility hacks
        super(NestedResults, self).__setstate__(state)

        # # ---------
        # if ('sampler' not in state.keys()) & ('sampler_results' in state.keys()):
        #     self._setup_samples_blobs()


    def _setup_samples_blobs(self):

        # Extract weighted samples, as in
        # https://dynesty.readthedocs.io/en/v1.2.3/quickstart.html?highlight=resample_equal#basic-post-processing

        samples_unweighted = self.sampler_results.samples
        blobs_unweighted = self.sampler_results.blob

        # weights = np.exp(self.sampler.logwt - self.sampler.logz[-1])

        # Updated, see https://dynesty.readthedocs.io/en/v2.0.3/quickstart.html#basic-post-processing
        weights = self.sampler_results.importance_weights()

        samples = dynesty.utils.resample_equal(samples_unweighted, weights)

        # Check if blobs_unweighted is None?
        blobs = dynesty.utils.resample_equal(blobs_unweighted, weights)

        self.sampler = base.BayesianSampler(samples=samples, blobs=blobs,
                                            weights=weights,
                                            samples_unweighted=samples_unweighted,
                                            blobs_unweighted=blobs_unweighted)


[docs]
    def plot_run(self, fileout=None, overwrite=False):
        """Plot/replot the trace for the Bayesian fitting"""
        plotting.plot_run(self, fileout=fileout, overwrite=overwrite)



[docs]
    def reload_sampler_results(self, filename=None):
        """Reload the Nested sampling results saved earlier"""
        if filename is None:
            #filename = self.f_sampler_results
            raise ValueError

        #hdf5_aliases = ['h5', 'hdf5']
        pickle_aliases = ['pickle', 'pkl', 'pcl']
        # if (filename.split('.')[-1].lower() in hdf5_aliases):
        #     self.sampler_results = _reload_sampler_results_hdf5(filename=filename)

        # elif (filename.split('.')[-1].lower() in pickle_aliases):
        if (filename.split('.')[-1].lower() in pickle_aliases):
            self.sampler_results = _reload_sampler_results_pickle(filename=filename)





def log_like_dynesty(theta, gal=None, fitter=None):

    # Update the parameters
    gal.model.update_parameters(theta)

    # Update the model data
    gal.create_model_data()

    # Evaluate likelihood prob of theta
    llike = base.log_like(gal, fitter=fitter)

    return llike


def prior_transform_dynsety(u, gal=None):
    """
    From Dynesty documentation:
    Transforms the uniform random variables `u ~ Unif[0., 1.)`
    to the parameters of interest.
    """
    # NEEDS TO BE IN ORDER OF THE VARIABLES
    # -- which means we need to construct this from the gal.model method

    v = gal.model.get_prior_transform(u)

    return v



def _reload_sampler_results_pickle(filename=None):
    return load_pickle(filename)



def _reload_all_fitting_nested(filename_galmodel=None, filename_results=None):
    gal = galaxy.load_galaxy_object(filename=filename_galmodel)
    results = NestedResults()
    results.reload_results(filename=filename_results)
    return gal, results