375 lines
10 KiB
JavaScript
375 lines
10 KiB
JavaScript
import constant from "./constant";
|
|
|
|
/**
|
|
* Set the node id accessor to the specified i.
|
|
* @param {node} d - node.
|
|
* @param {accessor} i - id accessor.
|
|
* @return {accessor} - node id accessor.
|
|
*/
|
|
function index(d, i) {
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* t-SNE implementation in D3 by using the code existing in tsnejs
|
|
* (https://github.com/karpathy/tsnejs) to compute the solution.
|
|
*/
|
|
export default function() {
|
|
var id = index,
|
|
distance = constant(300),
|
|
nodes,
|
|
perplexity = 30,
|
|
learningRate = 10,
|
|
iteration = 0,
|
|
dim = 2,
|
|
N, // length of the nodes.
|
|
P, // probability matrix.
|
|
Y, // solution.
|
|
gains,
|
|
ystep;
|
|
|
|
/**
|
|
* Make a step in t-SNE algorithm and set the velocities for the nodes
|
|
* to accumulate the values from solution.
|
|
*/
|
|
function force() {
|
|
// Make a step at each iteration.
|
|
step();
|
|
var solution = getSolution();
|
|
|
|
// Set the velocity for each node using the solution.
|
|
for (var i = 0; i < nodes.length; i++) {
|
|
nodes[i].vx += solution[i][0];
|
|
nodes[i].vy += solution[i][1];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculates the random number from Gaussian distribution.
|
|
* @return {number} random number.
|
|
*/
|
|
function gaussRandom() {
|
|
let u = 2 * Math.random() - 1;
|
|
let v = 2 * Math.random() - 1;
|
|
let r = u * u + v * v;
|
|
if (r == 0 || r > 1) return gaussRandom();
|
|
return u * Math.sqrt(-2 * Math.log(r) / r);
|
|
}
|
|
|
|
/**
|
|
* Return the normalized number.
|
|
* @return {number} normalized random number from Gaussian distribution.
|
|
*/
|
|
function randomN() {
|
|
return gaussRandom() * 1e-4;
|
|
}
|
|
|
|
function sign(x) {
|
|
return x > 0 ? 1 : x < 0 ? -1 : 0;
|
|
}
|
|
|
|
/**
|
|
* Create an array of length n filled with zeros.
|
|
* @param {number} n - length of array.
|
|
* @return {Float64Array} - array of zeros with length n.
|
|
*/
|
|
function zeros(n) {
|
|
if (typeof(n) === 'undefined' || isNaN(n)) {
|
|
return [];
|
|
}
|
|
return new Float64Array(n); // typed arrays are faster
|
|
}
|
|
|
|
// Returns a 2d array of random numbers
|
|
/**
|
|
* Creates a 2d array filled with random numbers.
|
|
* @param {number} n - rows.
|
|
* @param {number} d - columns.
|
|
* @return {array} - 2d array
|
|
*/
|
|
function random2d(n, d) {
|
|
var x = [];
|
|
for (var i = 0; i < n; i++) {
|
|
var y = [];
|
|
for (var j = 0; j < d; j++) {
|
|
y.push(randomN());
|
|
}
|
|
x.push(y);
|
|
}
|
|
return x;
|
|
}
|
|
|
|
/**
|
|
* Compute the probability matrix using the provided data.
|
|
* @param {array} data - nodes.
|
|
* @param {number} perplexity - used to calculate entropy of distribution.
|
|
* @param {number} tol - limit for entropy difference.
|
|
* @return {2d array} - 2d matrix containing probabilities.
|
|
*/
|
|
function d2p(data, perplexity, tol) {
|
|
N = Math.floor(data.length);
|
|
var Htarget = Math.log(perplexity); // target entropy of distribution.
|
|
var P1 = zeros(N * N); // temporary probability matrix.
|
|
|
|
var prow = zeros(N); // a temporary storage compartment.
|
|
for (var i = 0; i < N; i++) {
|
|
var betamin = -Infinity;
|
|
var betamax = Infinity;
|
|
var beta = 1; // initial value of precision.
|
|
var done = false;
|
|
var maxtries = 50;
|
|
|
|
// Perform binary search to find a suitable precision beta
|
|
// so that the entropy of the distribution is appropriate.
|
|
var num = 0;
|
|
while (!done) {
|
|
// Compute entropy and kernel row with beta precision.
|
|
var psum = 0.0;
|
|
for (var j = 0; j < N; j++) {
|
|
var pj = Math.exp(-distance(data[i], data[j]) * beta);
|
|
// Ignore the diagonals
|
|
if (i === j) {
|
|
pj = 0;
|
|
}
|
|
prow[j] = pj;
|
|
psum += pj;
|
|
}
|
|
// Normalize p and compute entropy.
|
|
var Hhere = 0.0;
|
|
for (j = 0; j < N; j++) {
|
|
if (psum == 0) {
|
|
pj = 0;
|
|
} else {
|
|
pj = prow[j] / psum;
|
|
}
|
|
prow[j] = pj;
|
|
if (pj > 1e-7) {
|
|
Hhere -= pj * Math.log(pj);
|
|
}
|
|
}
|
|
|
|
// Adjust beta based on result.
|
|
if (Hhere > Htarget) {
|
|
// Entropy was too high (distribution too diffuse)
|
|
// so we need to increase the precision for more peaky distribution.
|
|
betamin = beta; // move up the bounds.
|
|
if (betamax === Infinity) {
|
|
beta = beta * 2;
|
|
} else {
|
|
beta = (beta + betamax) / 2;
|
|
}
|
|
|
|
} else {
|
|
// Converse case. Make distrubtion less peaky.
|
|
betamax = beta;
|
|
if (betamin === -Infinity) {
|
|
beta = beta / 2;
|
|
} else {
|
|
beta = (beta + betamin) / 2;
|
|
}
|
|
}
|
|
|
|
// Stopping conditions: too many tries or got a good precision.
|
|
num++;
|
|
if (Math.abs(Hhere - Htarget) < tol || num >= maxtries) {
|
|
done = true;
|
|
}
|
|
}
|
|
|
|
// Copy over the final prow to P1 at row i
|
|
for (j = 0; j < N; j++) {
|
|
P1[i * N + j] = prow[j];
|
|
}
|
|
|
|
}
|
|
|
|
// Symmetrize P and normalize it to sum to 1 over all ij
|
|
var Pout = zeros(N * N);
|
|
var N2 = N * 2;
|
|
for (i = 0; i < N; i++) {
|
|
for (j = 0; j < N; j++) {
|
|
Pout[i * N + j] = Math.max((P1[i * N + j] + P1[j * N + i]) / N2, 1e-100);
|
|
}
|
|
}
|
|
return Pout;
|
|
}
|
|
|
|
/**
|
|
* Initialize a starting (random) solution.
|
|
*/
|
|
function initSolution() {
|
|
Y = random2d(N, dim);
|
|
// Step gains to accelerate progress in unchanging directions.
|
|
gains = random2d(N, dim, 1.0);
|
|
// Momentum accumulator.
|
|
ystep = random2d(N, dim, 0.0);
|
|
iteration = 0;
|
|
}
|
|
|
|
/**
|
|
* @return {2d array} the solution.
|
|
*/
|
|
function getSolution() {
|
|
return Y;
|
|
}
|
|
|
|
/**
|
|
* Do a single step (iteration) for the layout.
|
|
* @return {number} the current cost.
|
|
*/
|
|
function step() {
|
|
iteration += 1;
|
|
|
|
var cg = costGrad(Y); // Evaluate gradient.
|
|
var cost = cg.cost;
|
|
var grad = cg.grad;
|
|
|
|
// Perform gradient step.
|
|
var ymean = zeros(dim);
|
|
for (var i = 0; i < N; i++) {
|
|
for (var d = 0; d < dim; d++) {
|
|
var gid = grad[i][d];
|
|
var sid = ystep[i][d];
|
|
var gainid = gains[i][d];
|
|
|
|
// Compute gain update.
|
|
var newgain = sign(gid) === sign(sid) ? gainid * 0.8 : gainid + 0.2;
|
|
if (newgain < 0.01) {
|
|
newgain = 0.01;
|
|
}
|
|
gains[i][d] = newgain;
|
|
|
|
// Compute momentum step direction.
|
|
var momval = iteration < 250 ? 0.5 : 0.8;
|
|
var newsid = momval * sid - learningRate * newgain * grad[i][d];
|
|
ystep[i][d] = newsid;
|
|
|
|
// Do the step.
|
|
Y[i][d] += newsid;
|
|
|
|
// Accumulate mean so that we can center later.
|
|
ymean[d] += Y[i][d];
|
|
}
|
|
}
|
|
|
|
// Reproject Y to have the zero mean.
|
|
for (i = 0; i < N; i++) {
|
|
for (d = 0; d < dim; d++) {
|
|
Y[i][d] -= ymean[d] / N;
|
|
}
|
|
}
|
|
return cost;
|
|
}
|
|
|
|
/**
|
|
* Calculate the cost and the gradient.
|
|
* @param {2d array} Y - the current solution to evaluate.
|
|
* @return {object} that contains a cost and a gradient.
|
|
*/
|
|
function costGrad(Y) {
|
|
|
|
var pmul = iteration < 100 ? 4 : 1;
|
|
|
|
// Compute current Q distribution, unnormalized first.
|
|
var Qu = zeros(N * N);
|
|
var qsum = 0.0;
|
|
for (var i = 0; i < N; i++) {
|
|
for (var j = i + 1; j < N; j++) {
|
|
var dsum = 0.0;
|
|
for (var d = 0; d < dim; d++) {
|
|
var dhere = Y[i][d] - Y[j][d];
|
|
dsum += dhere * dhere;
|
|
}
|
|
var qu = 1.0 / (1.0 + dsum); // Student t-distribution.
|
|
Qu[i * N + j] = qu;
|
|
Qu[j * N + i] = qu;
|
|
qsum += 2 * qu;
|
|
}
|
|
}
|
|
// Normalize Q distribution to sum to 1.
|
|
var NN = N * N;
|
|
var Q = zeros(NN);
|
|
for (var q = 0; q < NN; q++) {
|
|
Q[q] = Math.max(Qu[q] / qsum, 1e-100);
|
|
}
|
|
|
|
var cost = 0.0;
|
|
var grad = [];
|
|
for (i = 0; i < N; i++) {
|
|
var gsum = new Array(dim); // Initialize gradiet for point i.
|
|
for (d = 0; d < dim; d++) {
|
|
gsum[d] = 0.0;
|
|
}
|
|
for (j = 0; j < N; j++) {
|
|
// Accumulate the cost.
|
|
cost += -P[i * N + j] * Math.log(Q[i * N + j]);
|
|
var premult = 4 * (pmul * P[i * N + j] - Q[i * N + j]) * Qu[i * N + j];
|
|
for (d = 0; d < dim; d++) {
|
|
gsum[d] += premult * (Y[i][d] - Y[j][d]);
|
|
}
|
|
}
|
|
grad.push(gsum);
|
|
}
|
|
|
|
return {
|
|
cost: cost,
|
|
grad: grad
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Calculates the stress. Basically, it computes the difference between
|
|
* high dimensional distance and real distance. The lower the stress is,
|
|
* the better layout.
|
|
* @return {number} - stress of the layout.
|
|
*/
|
|
function getStress() {
|
|
var totalDiffSq = 0,
|
|
totalHighDistSq = 0;
|
|
for (var i = 0, source, target, realDist, highDist; i < nodes.length; i++) {
|
|
for (var j = 0; j < nodes.length; j++) {
|
|
if (i !== j) {
|
|
source = nodes[i], target = nodes[j];
|
|
realDist = Math.hypot(target.x - source.x, target.y - source.y);
|
|
highDist = +distance(nodes[i], nodes[j]);
|
|
totalDiffSq += Math.pow(realDist - highDist, 2);
|
|
totalHighDistSq += highDist * highDist;
|
|
}
|
|
}
|
|
}
|
|
return Math.sqrt(totalDiffSq / totalHighDistSq);
|
|
}
|
|
|
|
// API for initializing the algorithm, setting parameters and querying
|
|
// metrics.
|
|
force.initialize = function(_) {
|
|
nodes = _;
|
|
N = nodes.length;
|
|
// Initialize the probability matrix.
|
|
P = d2p(nodes, perplexity, 1e-4);
|
|
initSolution();
|
|
};
|
|
|
|
force.id = function(_) {
|
|
return arguments.length ? (id = _, force) : id;
|
|
};
|
|
|
|
force.distance = function(_) {
|
|
return arguments.length ? (distance = typeof _ === "function" ? _ : constant(+_), force) : distance;
|
|
};
|
|
|
|
force.stress = function() {
|
|
return getStress();
|
|
};
|
|
|
|
force.learningRate = function(_) {
|
|
return arguments.length ? (learningRate = +_, force) : learningRate;
|
|
};
|
|
|
|
force.perplexity = function(_) {
|
|
return arguments.length ? (perplexity = +_, force) : perplexity;
|
|
};
|
|
|
|
return force;
|
|
}
|