1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
//! <div>
//! <img src="../potpourri.svg" width="800" />
//! </div>
//!
//! Package for models with discrete, unobservable latent variables that can be learned with the
//! Expectation Maximization algorithm.
//! The package aims at highest modularity to allow for easy experimentation for research
//! such as adding parallelization on clusters and exploring new models
//!
//! Conventions:
//! * Traits: Capital letters and CamelCase, adjectives used as nouns that indicate a cabability.
//! * Structs: Capital letters and CamelCase, nouns describing things and concepts
//! * methods/functions: snake_case and imperatives or short, discriptive imperative clauses
pub mod backend;
pub mod errors;
pub mod mixture;
pub mod model;
use errors::Error;
pub use mixture::{Latent, Mixable, Mixture};
pub use model::Model;
/// Average log-likelihood. Used to meature convergence
#[derive(Debug, Clone)]
pub struct AvgLLH(f64);
pub trait Parametrizable {
type SufficientStatistics: Send + Sync;
type Likelihood;
type DataIn<'a>: Sync;
type DataOut;
// weights: Self::DataIn<'_>,
/// The E-Step. Computes the likelihood for each component in the mixture
/// Note that for `Mixables`, this is the log-likelihood
fn expect(&self, data: &Self::DataIn<'_>) -> Result<(Self::Likelihood, AvgLLH), Error>;
// Consider combining `compute` and `maximize` – no that is a bad idea
// &mut self,
// store: Option<bool>, // consider removing. The parent class should take care of that
/// Computes the sufficient statistics from the responsibility matrix. The
/// Optionally, stores the
/// sufficient statistics (for incremental learning and store.restore functionality)
/// can be disabled for performance (defaults to `True`)
fn compute(
&self,
data: &Self::DataIn<'_>,
responsibilities: &Self::Likelihood,
) -> Result<Self::SufficientStatistics, Error>;
/// Maximize the model parameters from
fn maximize(&mut self, sufficient_statistics: &Self::SufficientStatistics)
-> Result<(), Error>;
fn predict(
&self,
// responsibilities: &Self::DataIn<'_>,
data: &Self::DataIn<'_>,
) -> Result<Self::DataOut, Error>;
/// Update the stored sufficient statistics (for incremental learning)
/// Weights is a tuple (a float should suffice, if summing to one)
fn update(
&mut self,
sufficient_statistics: &Self::SufficientStatistics,
weight: f64,
) -> Result<(), Error>;
/// merge multiple sufficient statistics into one.
fn merge(
sufficient_statistics: &[&Self::SufficientStatistics],
weights: &[f64],
) -> Result<Self::SufficientStatistics, Error>;
/// Generate a random expectation. Used as an initalization. It is recommended
/// to draw the expectations from a univorm Dirichlet distribution.
/// Note: This works better than an initialization method, because the layers
/// such as the `Probabilistic` trait don't need to implement backend-specific
/// random samplers.
fn expect_rand(&self, _data: &Self::DataIn<'_>, _k: usize) -> Result<Self::Likelihood, Error> {
todo!()
}
}
/// Probabilistic mixables should implement this trait
/// A mixture model has a discrete and unobservable variable (i.e., latent) variable
/// associated with each data point. It can be interpreted as a pointer to the component
/// of a mixture generated the sample. This component computes weights the components
/// in the mixture, that is, the probability for each component that the next sample will
/// be drawn from it. In case of non-probabilistic models (k-mm and SOM) this is irrelevant.
pub trait Learning {
type DataIn<'a>;
type DataOut;
fn fit(&mut self, data: &Self::DataIn<'_>) -> Result<(), Error>;
fn predict(&self, data: &Self::DataIn<'_>) -> Result<Self::DataOut, Error>;
}