Rust is on its seventh year as the most loved language with 87% of developers saying they want to continue using it. Rust also ties with Python as the most wanted technology with TypeScript running a close second. (stackoverflow survey 2022)
pub struct NewsArticle {
pub headline: String,
pub location: String,
pub author: String,
pub content: String,
}
pub trait Summary {
fn summarize(&self) -> String;
}
impl Summary for NewsArticle {
fn summarize(&self) -> String {
format!("{}, by {} ({})", self.headline, self.author, self.location)
}
}
let mut s = String::from("hello");
{
let r1 = &mut s;
} // r1 goes out of scope here, so we can make a new reference with no problems.
let r2 = &mut s;
enum Message {
Quit,
Move { x: i32, y: i32 },
Write(String),
ChangeColor(i32, i32, i32),
}
Gaussian mixture models can be trained with EM
High-dimensional clustering (Warning: $\mathcal O(n^3)$!)
$ # Python packaging and dependency management
$ curl -sSL https://install.python-poetry.org | python3 -
$ # Get the code
$ git clone git@github.com:StefanUlbrich/numeric-rust-python-tutorial.git tutorial
$ cd tutorial && git checkout python-skeleton
tutorial$ # Create virtual environment and install dependencies
tutorial$ poetry env use python3.11 # Optional
tutorial$ poetry install
@dataclass
class GMM:
means: NDArray[np.float64] # k x d
covs: NDArray[np.float64] # k x d x d
weights: NDArray[np.float64] # k
def expect(
gmm: GaussianMixtureModel,
data: NDArray[np.float64]
) -> NDArray[np.float64]:
...
def maximize(
gmm: GaussianMixtureModel,
responsibilities: NDArray[np.float64],
data: NDArray[np.float64]
) -> None:
...
Intimidating, but …
knd, kn, knd -> kdd
einstein_sum_notation('knd, kn, knd -> kdd', data, responsibilities, data)
einsum('knd, kn, knd -> kdd', data, responsibilities, data)
np.einsum('knd, kn, knd -> kdd', data, responsibilities, data)
data, _ = gmm.make_blobs(n_samples=10000, centers=20, n_features=2, random_state=7)
model = gmm.initialize(data, 20)
print(",",model.means)
r = gmm.expect(model, data)
einsum
13 ms ± 369 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.37 ms ± 194 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
$ # Installation
$ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
$ rustup update # Optional: Update the tool chain
$ cd tutorial && git checkout rust-examples
tutorial$ # git checkout rust-implementation # spoiler alert!
tutorial$ (cd data; poetry run data) # we need data for the experiments
tutorial$ cargo run --example read_data
tutorial$ # cargo bench # run benchmarks later
read_data
example, sorry!
fn main() {
let data: Array2 = read_npy("data/data.npy").unwrap();
println!("{}", data);
let responsibilities: Array2 = read_npy("data/responsibilities.npy").unwrap();
println!("{}", responsibilities);
let means: Array2 = read_npy("data/means.npy").unwrap();
println!("{}", means);
}
use ndarray::prelude::*;
pub fn foo(data: Array2) -> Array2 { Array2::::zeros((0,0)) }
use ndarray::prelude::*;
pub fn foo(data: &Array2) -> Array2 { Array2::::zeros((0,0)) }
use ndarray::prelude::*;
pub fn foo(mut data: &Array2, other: ArrayView2:: ) {
temp.assign(&data);
}
let sum_responsibilities = responsibilities.sum_axis(Axis(0));
sum_responsibilities = responsibilities.sum(axis=1)
let x = (&responsibilities.slice(s![.., .., NewAxis]) * &data.slice(s![.., NewAxis, ..]))
x = np.sum(data[np.newaxis, :, :] * responsibilities[:, :, np.newaxis], axis=1)
let cov = &x.t().dot(&y)
covs = x.T @ y
ndarray
has an interface
that reminds of numpy
einsum
in Rust criterion.rs
tutorial$ git checkout extension-skeleton
tutorial$ # git checkout extension-final # spoiler alert!
tutorial$ maturin develop -r --strip # Builds the extensions and adds it to the venv
tutorial$ maturin build -r --strip # Creates a binary wheel
data, _ = gmm.make_blobs(n_samples=10000, centers=20, n_features=2, random_state=7)
model = gmm.initialize(data, 20)
print(",",model.means)
r = gmm.expect(model, data)
einsum
13 ms ± 369 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.37 ms ± 194 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.49 ms ± 23.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)