Files
d3-spring-model/src/t-sne.js
2018-03-22 16:12:25 +00:00

375 lines
9.8 KiB
JavaScript

/* eslint-disable block-scoped-var */
import constant from './constant';
/**
* Set the node id accessor to the specified i.
* @param {node} d - node.
* @param {accessor} i - id accessor.
* @return {accessor} - node id accessor.
*/
function index (d, i) {
return i;
}
/**
* t-SNE implementation in D3 by using the code existing in tsnejs
* (https://github.com/karpathy/tsnejs) to compute the solution.
*/
export default function () {
var id = index,
distance = constant(300),
nodes,
perplexity = 30,
learningRate = 10,
iteration = 0,
dim = 2,
N, // length of the nodes.
P, // probability matrix.
Y, // solution.
gains,
ystep;
/**
* Make a step in t-SNE algorithm and set the velocities for the nodes
* to accumulate the values from solution.
*/
function force () {
// Make a step at each iteration.
step();
var solution = getSolution();
// Set the velocity for each node using the solution.
for (var i = 0; i < nodes.length; i++) {
nodes[i].vx += solution[i][0];
nodes[i].vy += solution[i][1];
}
}
/**
* Calculates the random number from Gaussian distribution.
* @return {number} random number.
*/
function gaussRandom () {
let u = 2 * Math.random() - 1;
let v = 2 * Math.random() - 1;
let r = u * u + v * v;
if (r == 0 || r > 1) {
return gaussRandom();
}
return u * Math.sqrt(-2 * Math.log(r) / r);
}
/**
* Return the normalized number.
* @return {number} normalized random number from Gaussian distribution.
*/
function randomN () {
return gaussRandom() * 1e-4;
}
function sign (x) {
return x > 0 ? 1 : x < 0 ? -1 : 0;
}
/**
* Create an array of length n filled with zeros.
* @param {number} n - length of array.
* @return {Float64Array} - array of zeros with length n.
*/
function zeros (n) {
if (typeof n === 'undefined' || isNaN(n)) {
return [];
}
return new Float64Array(n); // typed arrays are faster
}
// Returns a 2d array of random numbers
/**
* Creates a 2d array filled with random numbers.
* @param {number} n - rows.
* @param {number} d - columns.
* @return {array} - 2d array
*/
function random2d (n, d) {
var x = [];
for (var i = 0; i < n; i++) {
var y = [];
for (var j = 0; j < d; j++) {
y.push(randomN());
}
x.push(y);
}
return x;
}
/**
* Compute the probability matrix using the provided data.
* @param {array} data - nodes.
* @param {number} perplexity - used to calculate entropy of distribution.
* @param {number} tol - limit for entropy difference.
* @return {2d array} - 2d matrix containing probabilities.
*/
function d2p (data, perplexity, tol) {
N = Math.floor(data.length);
var Htarget = Math.log(perplexity); // target entropy of distribution.
var P1 = zeros(N * N); // temporary probability matrix.
var prow = zeros(N); // a temporary storage compartment.
for (var i = 0; i < N; i++) {
var betamin = -Infinity;
var betamax = Infinity;
var beta = 1; // initial value of precision.
var done = false;
var maxtries = 50;
// Perform binary search to find a suitable precision beta
// so that the entropy of the distribution is appropriate.
var num = 0;
while (!done) {
// Compute entropy and kernel row with beta precision.
var psum = 0.0;
for (var j = 0; j < N; j++) {
var pj = Math.exp(-distance(data[i], data[j]) * beta);
// Ignore the diagonals
if (i === j) {
pj = 0;
}
prow[j] = pj;
psum += pj;
}
// Normalize p and compute entropy.
var Hhere = 0.0;
for (j = 0; j < N; j++) {
if (psum == 0) {
pj = 0;
} else {
pj = prow[j] / psum;
}
prow[j] = pj;
if (pj > 1e-7) {
Hhere -= pj * Math.log(pj);
}
}
// Adjust beta based on result.
if (Hhere > Htarget) {
// Entropy was too high (distribution too diffuse)
// so we need to increase the precision for more peaky distribution.
betamin = beta; // move up the bounds.
if (betamax === Infinity) {
beta = beta * 2;
} else {
beta = (beta + betamax) / 2;
}
} else {
// Converse case. Make distrubtion less peaky.
betamax = beta;
if (betamin === -Infinity) {
beta = beta / 2;
} else {
beta = (beta + betamin) / 2;
}
}
// Stopping conditions: too many tries or got a good precision.
num++;
if (Math.abs(Hhere - Htarget) < tol || num >= maxtries) {
done = true;
}
}
// Copy over the final prow to P1 at row i
for (j = 0; j < N; j++) {
P1[i * N + j] = prow[j];
}
}
// Symmetrize P and normalize it to sum to 1 over all ij
var Pout = zeros(N * N);
var N2 = N * 2;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
Pout[i * N + j] = Math.max((P1[i * N + j] + P1[j * N + i]) / N2, 1e-100);
}
}
return Pout;
}
/**
* Initialize a starting (random) solution.
*/
function initSolution () {
Y = random2d(N, dim);
// Step gains to accelerate progress in unchanging directions.
gains = random2d(N, dim, 1.0);
// Momentum accumulator.
ystep = random2d(N, dim, 0.0);
iteration = 0;
}
/**
* @return {2d array} the solution.
*/
function getSolution () {
return Y;
}
/**
* Do a single step (iteration) for the layout.
* @return {number} the current cost.
*/
function step () {
iteration += 1;
var cg = costGrad(Y); // Evaluate gradient.
var cost = cg.cost;
var grad = cg.grad;
// Perform gradient step.
var ymean = zeros(dim);
for (var i = 0; i < N; i++) {
for (var d = 0; d < dim; d++) {
var gid = grad[i][d];
var sid = ystep[i][d];
var gainid = gains[i][d];
// Compute gain update.
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
if (newgain < 0.01) {
newgain = 0.01;
}
gains[i][d] = newgain;
// Compute momentum step direction.
var momval = iteration < 250 ? 0.5 : 0.8;
var newsid = momval * sid - learningRate * newgain * grad[i][d];
ystep[i][d] = newsid;
// Do the step.
Y[i][d] += newsid;
// Accumulate mean so that we can center later.
ymean[d] += Y[i][d];
}
}
// Reproject Y to have the zero mean.
for (i = 0; i < N; i++) {
for (d = 0; d < dim; d++) {
Y[i][d] -= ymean[d] / N;
}
}
return cost;
}
/**
* Calculate the cost and the gradient.
* @param {2d array} Y - the current solution to evaluate.
* @return {object} that contains a cost and a gradient.
*/
function costGrad (Y) {
var pmul = iteration < 100 ? 4 : 1;
// Compute current Q distribution, unnormalized first.
var Qu = zeros(N * N);
var qsum = 0.0;
for (var i = 0; i < N; i++) {
for (var j = i + 1; j < N; j++) {
var dsum = 0.0;
for (var d = 0; d < dim; d++) {
var dhere = Y[i][d] - Y[j][d];
dsum += dhere * dhere;
}
var qu = 1.0 / (1.0 + dsum); // Student t-distribution.
Qu[i * N + j] = qu;
Qu[j * N + i] = qu;
qsum += 2 * qu;
}
}
// Normalize Q distribution to sum to 1.
var NN = N * N;
var Q = zeros(NN);
for (var q = 0; q < NN; q++) {
Q[q] = Math.max(Qu[q] / qsum, 1e-100);
}
var cost = 0.0;
var grad = [];
for (i = 0; i < N; i++) {
var gsum = new Array(dim); // Initialize gradiet for point i.
for (d = 0; d < dim; d++) {
gsum[d] = 0.0;
}
for (j = 0; j < N; j++) {
// Accumulate the cost.
cost += -P[i * N + j] * Math.log(Q[i * N + j]);
var premult = 4 * (pmul * P[i * N + j] - Q[i * N + j]) * Qu[i * N + j];
for (d = 0; d < dim; d++) {
gsum[d] += premult * (Y[i][d] - Y[j][d]);
}
}
grad.push(gsum);
}
return {
cost: cost,
grad: grad
};
}
/**
* Calculates the stress. Basically, it computes the difference between
* high dimensional distance and real distance. The lower the stress is,
* the better layout.
* @return {number} - stress of the layout.
*/
function getStress () {
var totalDiffSq = 0,
totalHighDistSq = 0;
for (var i = 0, source, target, realDist, highDist; i < nodes.length; i++) {
for (var j = 0; j < nodes.length; j++) {
if (i !== j) {
source = nodes[i], target = nodes[j];
realDist = Math.hypot(target.x - source.x, target.y - source.y);
highDist = +distance(nodes[i], nodes[j]);
totalDiffSq += Math.pow(realDist - highDist, 2);
totalHighDistSq += highDist * highDist;
}
}
}
return Math.sqrt(totalDiffSq / totalHighDistSq);
}
// API for initializing the algorithm, setting parameters and querying
// metrics.
force.initialize = function (_) {
nodes = _;
N = nodes.length;
// Initialize the probability matrix.
P = d2p(nodes, perplexity, 1e-4);
initSolution();
};
force.id = function (_) {
return arguments.length ? (id = _, force) : id;
};
force.distance = function (_) {
return arguments.length ? (distance = typeof _ === 'function' ? _ : constant(+_), force) : distance;
};
force.stress = function () {
return getStress();
};
force.learningRate = function (_) {
return arguments.length ? (learningRate = +_, force) : learningRate;
};
force.perplexity = function (_) {
return arguments.length ? (perplexity = +_, force) : perplexity;
};
return force;
}