MVA-2021 / computational_stats / temp_saem.ipynb
temp_saem.ipynb
Raw
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "CompStats_project.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "gbSJIO-nk4Do"
      },
      "source": [
        "import math\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "from matplotlib.patches import Ellipse\n",
        "import matplotlib.transforms as transforms\n",
        "\n",
        "from scipy.stats import multivariate_normal\n",
        "\n",
        "import sklearn\n",
        "from sklearn.cluster import KMeans\n",
        "from sklearn.preprocessing import LabelEncoder\n",
        "from sklearn.preprocessing import StandardScaler"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "F-X9hWgUlxWC"
      },
      "source": [
        "def data_generator(cluster_nums, cluster_means, cluster_var):\n",
        "    data = []\n",
        "    for num, mu, var in zip(cluster_nums, cluster_means, cluster_var):\n",
        "        data += [np.random.multivariate_normal(mu, np.diag(var), num)]\n",
        "    data = np.vstack(data)\n",
        "    return data\n",
        "\n",
        "mus= [[-6.1, 3.8],[-5.9, -4.2], [6, 0.1]]\n",
        "vs = [[1, 3],[2, 2],[6, 2]]\n",
        "\n",
        "X = data_generator(cluster_nums=[50,50,50],\n",
        "                   cluster_means=mus,\n",
        "                   cluster_var=vs)\n",
        "\n",
        "def confidence_ellipse(mu, cov, ax, nstd=3.0, **kwargs):\n",
        "    def eigsorted(cov):\n",
        "        vals, vecs = np.linalg.eigh(cov)\n",
        "        order = vals.argsort()[::-1]\n",
        "        return vals[order], vecs[:,order]\n",
        "    if ax is None:\n",
        "        ax = plt.gca()\n",
        "    vals, vecs = eigsorted(cov)\n",
        "    theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))\n",
        "    width, height = 2 * nstd * np.sqrt(vals)\n",
        "    ellip = Ellipse(xy=mu, width=width, height=height, angle=theta, facecolor=\"none\", **kwargs)\n",
        "    ax.add_artist(ellip)\n",
        "    return ellip\n",
        "\n",
        "ax = plt.axes()\n",
        "plt.scatter(X[:, 0], X[:, 1])\n",
        "\n",
        "for j in range(len(mus)):\n",
        "    confidence_ellipse(mus[j], np.diag(vs[j]), ax, edgecolor=\"r\")\n",
        "\n",
        "plt.show()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ED49Fq6Y4BTd"
      },
      "source": [
        "def Tk(k, a=0, b=-1, c=1, r=1):\n",
        "    kappa = (k+r*c)/r\n",
        "    return 1 + a**kappa + b*np.sin(kappa)/kappa\n",
        "\n",
        "x = np.arange(1,1000)\n",
        "plt.plot(x, Tk(x))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qlWwrIWLXpsr"
      },
      "source": [
        "class GaussianMixture():\n",
        "\n",
        "    def __init__(self, n_components):\n",
        "        \"\"\"Inits the GaussianMixture class by setting n_components and n_iter\"\"\"\n",
        "        self.n_components = n_components\n",
        "        \n",
        "    \n",
        "    def kmeans_init(self, X):\n",
        "        n = X.shape[0]\n",
        "        d = X.shape[1]\n",
        "        m = self.n_components\n",
        "\n",
        "        kms = KMeans(n_clusters=m)\n",
        "        preds = kms.fit_predict(X)\n",
        "        \n",
        "        # sort X by clusters predicted from kmeans\n",
        "        x_comp = [[] for _ in range(m)]\n",
        "        for i,pred in enumerate(preds):\n",
        "            x_comp[pred].append(X[i])\n",
        "        x_comp = [np.array(x) for x in x_comp]\n",
        "        \n",
        "        # compute prior, mean and variance of each cluster \n",
        "        alpha = np.zeros(m)\n",
        "        mu = np.zeros((m, d))\n",
        "        sigma = np.zeros((m, d, d))\n",
        "        for i,x in enumerate(x_comp):\n",
        "            alpha[i] = x.shape[0] / n\n",
        "            mu[i] = np.mean(x)\n",
        "            sigma[i] = np.cov(x.T)\n",
        "\n",
        "        return alpha, mu, sigma\n",
        "\n",
        "    # Appendix B.1 of the paper\n",
        "    def fit_em(self, X, n_iter):\n",
        "        n = X.shape[0]\n",
        "        d = X.shape[1]\n",
        "        m = self.n_components\n",
        "        \n",
        "        # init with KMeans\n",
        "        alpha, mu, sigma = self.kmeans_init(X)\n",
        "        \n",
        "        # EM loop\n",
        "        for k in range(n_iter):\n",
        "\n",
        "            # Expectation\n",
        "            tau = np.zeros((m, n))\n",
        "            for j in range(m):\n",
        "                tau[j] = alpha[j] * multivariate_normal.pdf(X, mu[j], sigma[j])\n",
        "            tau /= tau.sum(axis=0)\n",
        "            llikelihood = np.sum(np.log(np.sum(tau, axis=0)))\n",
        "\n",
        "            # Maximization\n",
        "            a = np.sum(tau, axis=1)\n",
        "            alpha = a / np.sum(a)\n",
        "\n",
        "            mu = np.sum(tau[:,:,None]*X, axis=1) / a[:, None]\n",
        "\n",
        "            Xm = X - mu[:,None]\n",
        "            cov = Xm[:,:,:,None] * Xm[:,:,None,:]\n",
        "            sigma = np.sum(tau[:,:,None,None]*cov, axis=1) / a[:, None,None] + np.eye(d) * 1e-8 # the np.eye 1e-8 prevents some numerical issues\n",
        "            \n",
        "        self.alpha = alpha\n",
        "        self.mu = mu\n",
        "        self.sigma = sigma\n",
        "        self.tau = tau\n",
        "\n",
        "        return np.argmax(tau, axis=0)\n",
        "\n",
        "\n",
        "    # Appendix B.2 of the paper\n",
        "    def fit_saem(self, X, n_iter, params):\n",
        "        n = X.shape[0]\n",
        "        d = X.shape[1]\n",
        "        m = self.n_components\n",
        "\n",
        "        if 'burnin' in params and 'alpha_burnin' in params:\n",
        "            burnin, alpha_burnin = params['burnin'], params['alpha_burnin']\n",
        "        else:\n",
        "            raise Exception(\"Params must contain: {'burnin': int, 'alpha_burnin': float}\")\n",
        "\n",
        "        # init with KMeans\n",
        "        alpha, mu, sigma = self.kmeans_init(X)\n",
        "\n",
        "        # init the sufficient statistics\n",
        "        S1 = np.zeros(m)\n",
        "        S2 = np.zeros((m,d))\n",
        "        S3 = np.zeros((m,d,d))\n",
        "\n",
        "        # SAEM loop\n",
        "        np.random.seed(42)\n",
        "        for k in range(n_iter):\n",
        "\n",
        "            # Simulation\n",
        "            ## compute tau \n",
        "            tau = np.zeros((m, n))\n",
        "            for j in range(m):\n",
        "                tau[j] = alpha[j] * multivariate_normal.pdf(X, mu[j], sigma[j])\n",
        "            \n",
        "            ## compute distribution\n",
        "            tau /= tau.sum(axis=0)\n",
        "            llikelihood = np.sum(np.log(np.sum(tau, axis=0)))\n",
        "\n",
        "            ## simulate latent variables\n",
        "            z = np.zeros(n, dtype=int)\n",
        "            for i in range(n):\n",
        "                z[i] = np.random.choice(range(m), p = tau[:,i])\n",
        "\n",
        "            # Stochastic Approximation\n",
        "            gamma = 1 if k <= burnin else (k-burnin)**(-alpha_burnin)\n",
        "            S1_new = np.zeros(m)\n",
        "            S2_new = np.zeros((m,d))\n",
        "            S3_new = np.zeros((m,d,d))\n",
        "            for i in range(n):\n",
        "                S1_new[z[i]] += 1\n",
        "                S2_new[z[i]] += X[i]\n",
        "                S3_new[z[i]] += X[i, None] * X[i,None].T\n",
        "            S1 = S1 + gamma*(S1_new-S1)\n",
        "            S2 = S2 + gamma*(S2_new-S2)\n",
        "            S3 = S3 + gamma*(S3_new-S3)\n",
        "\n",
        "            # Maximization\n",
        "            alpha = S1/n\n",
        "            mu = S2 / S1[:,None]\n",
        "            sigma = (S3-S2[:,:,None]*mu[:,None,:]) / S1[:,None,None]\n",
        "            \n",
        "        self.alpha = alpha\n",
        "        self.mu = mu\n",
        "        self.sigma = sigma\n",
        "        self.tau = tau\n",
        "\n",
        "        return np.argmax(tau, axis=0)\n",
        "\n",
        "    # Appendix B.3 of the paper\n",
        "    def fit_temp_saem(self, X, n_iter, params):\n",
        "        n = X.shape[0]\n",
        "        d = X.shape[1]\n",
        "        m = self.n_components\n",
        "\n",
        "        if 'burnin' in params and 'alpha_burnin' in params:\n",
        "            burnin, alpha_burnin = params['burnin'], params['alpha_burnin']\n",
        "        else:\n",
        "            raise Exception(\"Params must contain: {'burnin': int, 'alpha_burnin': float}\")\n",
        "\n",
        "        if 'decreasing_rate' in params and 'amplitude_rate' in params and 'scaling' in params and 'delay' in params:\n",
        "            decreasing_rate = params['decreasing_rate']\n",
        "            amplitude_rate = params['amplitude_rate']\n",
        "            scaling = params['scaling']\n",
        "            delay = params['delay']\n",
        "        else:\n",
        "            raise Exception(\"Params must contain: {'decreasing_rate', 'amplitude_rate', 'scaling', 'delay'}\")\n",
        "        def Tk(k):\n",
        "            kappa = (k+scaling*delay)/scaling\n",
        "            return 1 + decreasing_rate**kappa + amplitude_rate*np.sin(kappa)/kappa\n",
        "\n",
        "        # init with KMeans\n",
        "        alpha, mu, sigma = self.kmeans_init(X)\n",
        "\n",
        "        # init the sufficient statistics\n",
        "        S1 = np.zeros(m)\n",
        "        S2 = np.zeros((m,d))\n",
        "        S3 = np.zeros((m,d,d))\n",
        "\n",
        "        # SAEM loop\n",
        "        np.random.seed(42)\n",
        "        for k in range(n_iter):\n",
        "\n",
        "            # Simulation\n",
        "            ## compute tau \n",
        "            tau = np.zeros((m, n))\n",
        "            for j in range(m):\n",
        "                tau[j] = alpha[j] * multivariate_normal.pdf(X, mu[j], sigma[j])\n",
        "            \n",
        "            ## compute tempered distribution\n",
        "            T = Tk(k)\n",
        "            tau = tau**(1/T)\n",
        "            tau /= tau.sum(axis=0)\n",
        "            llikelihood = np.sum(np.log(np.sum(tau, axis=0)))\n",
        "\n",
        "            ## simulate latent variables\n",
        "            z = np.zeros(n, dtype=int)\n",
        "            for i in range(n):\n",
        "                z[i] = np.random.choice(range(m), p = tau[:,i])\n",
        "\n",
        "            # Stochastic Approximation\n",
        "            gamma = 1 if k <= burnin else (k-burnin)**(-alpha_burnin)\n",
        "            S1_new = np.zeros(m)\n",
        "            S2_new = np.zeros((m,d))\n",
        "            S3_new = np.zeros((m,d,d))\n",
        "            for i in range(n):\n",
        "                S1_new[z[i]] += 1\n",
        "                S2_new[z[i]] += X[i]\n",
        "                S3_new[z[i]] += X[i, None] * X[i,None].T\n",
        "            S1 = S1 + gamma*(S1_new-S1)\n",
        "            S2 = S2 + gamma*(S2_new-S2)\n",
        "            S3 = S3 + gamma*(S3_new-S3)\n",
        "\n",
        "            # Maximization\n",
        "            alpha = S1/n\n",
        "            mu = S2 / S1[:,None]\n",
        "            sigma = (S3-S2[:,:,None]*mu[:,None,:]) / S1[:,None,None]\n",
        "            \n",
        "        self.alpha = alpha\n",
        "        self.mu = mu\n",
        "        self.sigma = sigma\n",
        "        self.tau = tau\n",
        "\n",
        "        return np.argmax(tau, axis=0)\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TRgt_a0glhmC"
      },
      "source": [
        "gmm = GaussianMixture(n_components=3)\n",
        "# params_saem = {'burnin': 10, 'alpha_burnin': 0.8}\n",
        "# pred_gmm = gmm.fit_saem(X, n_iter= 100, params = params_saem)\n",
        "params_temp_saem = {'burnin': 10, 'alpha_burnin': 0.8, \n",
        "                    'decreasing_rate': 0.0, 'amplitude_rate': -1, 'scaling': 1, 'delay': 1}\n",
        "pred_gmm = gmm.fit_temp_saem(X, n_iter= 100, params = params_temp_saem)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eRbZvCfOlk5H"
      },
      "source": [
        "ax = plt.axes()\n",
        "plt.scatter(X[:, 0], X[:, 1], c=pred_gmm)\n",
        "plt.suptitle('GMM clusters')\n",
        "\n",
        "for j in range(len(gmm.mu)):\n",
        "    confidence_ellipse(gmm.mu[j], gmm.sigma[j], ax, edgecolor=\"r\")\n",
        "for j in range(len(gmm.mu)):\n",
        "    confidence_ellipse(mus[j], np.diag(vs[j]), ax, edgecolor=\"g\")\n",
        "\n",
        "plt.show()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FbaSoeyGln1n"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}