/* eslint-disable block-scoped-var */ import constant from './constant'; /** * Set the node id accessor to the specified i. * @param {node} d - node. * @param {accessor} i - id accessor. * @return {accessor} - node id accessor. */ function index (d, i) { return i; } /** * t-SNE implementation in D3 by using the code existing in tsnejs * (https://github.com/karpathy/tsnejs) to compute the solution. */ export default function () { var id = index, distance = constant(300), nodes, perplexity = 30, learningRate = 10, iteration = 0, dim = 2, N, // length of the nodes. P, // probability matrix. Y, // solution. gains, ystep; /** * Make a step in t-SNE algorithm and set the velocities for the nodes * to accumulate the values from solution. */ function force () { // Make a step at each iteration. step(); var solution = getSolution(); // Set the velocity for each node using the solution. for (var i = 0; i < nodes.length; i++) { nodes[i].vx += solution[i][0]; nodes[i].vy += solution[i][1]; } } /** * Calculates the random number from Gaussian distribution. * @return {number} random number. */ function gaussRandom () { let u = 2 * Math.random() - 1; let v = 2 * Math.random() - 1; let r = u * u + v * v; if (r == 0 || r > 1) { return gaussRandom(); } return u * Math.sqrt(-2 * Math.log(r) / r); } /** * Return the normalized number. * @return {number} normalized random number from Gaussian distribution. */ function randomN () { return gaussRandom() * 1e-4; } function sign (x) { return x > 0 ? 1 : x < 0 ? -1 : 0; } /** * Create an array of length n filled with zeros. * @param {number} n - length of array. * @return {Float64Array} - array of zeros with length n. */ function zeros (n) { if (typeof n === 'undefined' || isNaN(n)) { return []; } return new Float64Array(n); // typed arrays are faster } // Returns a 2d array of random numbers /** * Creates a 2d array filled with random numbers. * @param {number} n - rows. * @param {number} d - columns. * @return {array} - 2d array */ function random2d (n, d) { var x = []; for (var i = 0; i < n; i++) { var y = []; for (var j = 0; j < d; j++) { y.push(randomN()); } x.push(y); } return x; } /** * Compute the probability matrix using the provided data. * @param {array} data - nodes. * @param {number} perplexity - used to calculate entropy of distribution. * @param {number} tol - limit for entropy difference. * @return {2d array} - 2d matrix containing probabilities. */ function d2p (data, perplexity, tol) { N = Math.floor(data.length); var Htarget = Math.log(perplexity); // target entropy of distribution. var P1 = zeros(N * N); // temporary probability matrix. var prow = zeros(N); // a temporary storage compartment. for (var i = 0; i < N; i++) { var betamin = -Infinity; var betamax = Infinity; var beta = 1; // initial value of precision. var done = false; var maxtries = 50; // Perform binary search to find a suitable precision beta // so that the entropy of the distribution is appropriate. var num = 0; while (!done) { // Compute entropy and kernel row with beta precision. var psum = 0.0; for (var j = 0; j < N; j++) { var pj = Math.exp(-distance(data[i], data[j]) * beta); // Ignore the diagonals if (i === j) { pj = 0; } prow[j] = pj; psum += pj; } // Normalize p and compute entropy. var Hhere = 0.0; for (j = 0; j < N; j++) { if (psum == 0) { pj = 0; } else { pj = prow[j] / psum; } prow[j] = pj; if (pj > 1e-7) { Hhere -= pj * Math.log(pj); } } // Adjust beta based on result. if (Hhere > Htarget) { // Entropy was too high (distribution too diffuse) // so we need to increase the precision for more peaky distribution. betamin = beta; // move up the bounds. if (betamax === Infinity) { beta = beta * 2; } else { beta = (beta + betamax) / 2; } } else { // Converse case. Make distrubtion less peaky. betamax = beta; if (betamin === -Infinity) { beta = beta / 2; } else { beta = (beta + betamin) / 2; } } // Stopping conditions: too many tries or got a good precision. num++; if (Math.abs(Hhere - Htarget) < tol || num >= maxtries) { done = true; } } // Copy over the final prow to P1 at row i for (j = 0; j < N; j++) { P1[i * N + j] = prow[j]; } } // Symmetrize P and normalize it to sum to 1 over all ij var Pout = zeros(N * N); var N2 = N * 2; for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { Pout[i * N + j] = Math.max((P1[i * N + j] + P1[j * N + i]) / N2, 1e-100); } } return Pout; } /** * Initialize a starting (random) solution. */ function initSolution () { Y = random2d(N, dim); // Step gains to accelerate progress in unchanging directions. gains = random2d(N, dim, 1.0); // Momentum accumulator. ystep = random2d(N, dim, 0.0); iteration = 0; } /** * @return {2d array} the solution. */ function getSolution () { return Y; } /** * Do a single step (iteration) for the layout. * @return {number} the current cost. */ function step () { iteration += 1; var cg = costGrad(Y); // Evaluate gradient. var cost = cg.cost; var grad = cg.grad; // Perform gradient step. var ymean = zeros(dim); for (var i = 0; i < N; i++) { for (var d = 0; d < dim; d++) { var gid = grad[i][d]; var sid = ystep[i][d]; var gainid = gains[i][d]; // Compute gain update. var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2; if (newgain < 0.01) { newgain = 0.01; } gains[i][d] = newgain; // Compute momentum step direction. var momval = iteration < 250 ? 0.5 : 0.8; var newsid = momval * sid - learningRate * newgain * grad[i][d]; ystep[i][d] = newsid; // Do the step. Y[i][d] += newsid; // Accumulate mean so that we can center later. ymean[d] += Y[i][d]; } } // Reproject Y to have the zero mean. for (i = 0; i < N; i++) { for (d = 0; d < dim; d++) { Y[i][d] -= ymean[d] / N; } } return cost; } /** * Calculate the cost and the gradient. * @param {2d array} Y - the current solution to evaluate. * @return {object} that contains a cost and a gradient. */ function costGrad (Y) { var pmul = iteration < 100 ? 4 : 1; // Compute current Q distribution, unnormalized first. var Qu = zeros(N * N); var qsum = 0.0; for (var i = 0; i < N; i++) { for (var j = i + 1; j < N; j++) { var dsum = 0.0; for (var d = 0; d < dim; d++) { var dhere = Y[i][d] - Y[j][d]; dsum += dhere * dhere; } var qu = 1.0 / (1.0 + dsum); // Student t-distribution. Qu[i * N + j] = qu; Qu[j * N + i] = qu; qsum += 2 * qu; } } // Normalize Q distribution to sum to 1. var NN = N * N; var Q = zeros(NN); for (var q = 0; q < NN; q++) { Q[q] = Math.max(Qu[q] / qsum, 1e-100); } var cost = 0.0; var grad = []; for (i = 0; i < N; i++) { var gsum = new Array(dim); // Initialize gradiet for point i. for (d = 0; d < dim; d++) { gsum[d] = 0.0; } for (j = 0; j < N; j++) { // Accumulate the cost. cost += -P[i * N + j] * Math.log(Q[i * N + j]); var premult = 4 * (pmul * P[i * N + j] - Q[i * N + j]) * Qu[i * N + j]; for (d = 0; d < dim; d++) { gsum[d] += premult * (Y[i][d] - Y[j][d]); } } grad.push(gsum); } return { cost: cost, grad: grad }; } /** * Calculates the stress. Basically, it computes the difference between * high dimensional distance and real distance. The lower the stress is, * the better layout. * @return {number} - stress of the layout. */ function getStress () { var totalDiffSq = 0, totalHighDistSq = 0; for (var i = 0, source, target, realDist, highDist; i < nodes.length; i++) { for (var j = 0; j < nodes.length; j++) { if (i !== j) { source = nodes[i], target = nodes[j]; realDist = Math.hypot(target.x - source.x, target.y - source.y); highDist = +distance(nodes[i], nodes[j]); totalDiffSq += Math.pow(realDist - highDist, 2); totalHighDistSq += highDist * highDist; } } } return Math.sqrt(totalDiffSq / totalHighDistSq); } // API for initializing the algorithm, setting parameters and querying // metrics. force.initialize = function (_) { nodes = _; N = nodes.length; // Initialize the probability matrix. P = d2p(nodes, perplexity, 1e-4); initSolution(); }; force.id = function (_) { return arguments.length ? (id = _, force) : id; }; force.distance = function (_) { return arguments.length ? (distance = typeof _ === 'function' ? _ : constant(+_), force) : distance; }; force.stress = function () { return getStress(); }; force.learningRate = function (_) { return arguments.length ? (learningRate = +_, force) : learningRate; }; force.perplexity = function (_) { return arguments.length ? (perplexity = +_, force) : perplexity; }; return force; }