Init from given files

This commit is contained in:
Pitchaya Boonsarngsuk
2017-11-07 21:33:16 +00:00
commit 61f2be55fe
45 changed files with 34460 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @return {number} the distance between source and target nodes.
*/
function calculateCosineSimilarity(source, target, properties, normArgs) {
var numerator = 0.0;
// console.log(properties);
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (property.toLowerCase() !== "class" && property.toLowerCase() !== "app" && property.toLowerCase() !== "user" && property.toLowerCase() !== "weekday") {
var s = source[property],
t = target[property];
numerator += s * t;
}
}
let denominator = squareRooted(source, properties, normArgs) * squareRooted(target, properties, normArgs);
// console.log(Math.abs(numerator / denominator));
return Math.abs(numerator / denominator);
}
function squareRooted(node, properties, normArgs) {
var sum = 0.0;
for (var i = 0, s; i < properties.length; i++) {
var s = node[properties[i]];
sum += s * s;
}
return Math.sqrt(sum);
}

View File

@@ -0,0 +1,28 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @return {number} the distance between source and target nodes.
*/
function calculateDiceDissimilarity(source, target, properties, normArgs) {
var notShared = 0.0;
// console.log(properties);
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (property.toLowerCase() !== "class" && property.toLowerCase() !== "app" && property.toLowerCase() !== "user" && property.toLowerCase() !== "weekday") {
var s = source[property],
t = target[property];
if (s !== t) {
notShared++;
}
}
}
// console.log(Math.sqrt(sumDiff)/cols);
// console.log(cols);
return notShared / (notShared + 2 * (properties.length - notShared));
}

View File

@@ -0,0 +1,70 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @param {object} normArgs - the normalization arguments.
* @return {number} the distance between source and target nodes.
*/
function calculateDistance(source, target, properties, normArgs) {
var val1 = 0.0, val2 = 0.0,
sumDiff = 0.0,
ordDiff = 1.0,
ORD_FACTOR = 0.75,
cols = 0,
average = normArgs.avg,
sigma = normArgs.sig,
st_dev = normArgs.st_d;
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (source.hasOwnProperty(property) && target.hasOwnProperty(property)
&& property.toLowerCase() !== "index" ) {
var s = source[property],
t = target[property];
// Comparing Floats and Integers
if ((isNumeric(s) && isNumeric(t))) {
val1 = parseFloat(s);
val2 = parseFloat(t);
if (sigma[i] != 0) {
val1 = (val1 - average[i]) / (st_dev[i] * sigma[i]);
val2 = (val2 - average[i]) / (st_dev[i] * sigma[i]);
}
sumDiff += (val1-val2) * (val1-val2);
cols++;
// Comparing strings
} else if (/[a-zA-Z]/.test(s) && /[a-zA-Z]/.test(t) && s === t) {
ordDiff *= ORD_FACTOR;
cols++;
} else {
// Comparing Dates
var parsedDateS = Date.parse(s);
var parsedDateT = Date.parse(t);
if (isNaN(s) && !isNaN(parsedDateS)
&& isNaN(t) && !isNaN(parsedDateT)) {
val1 = parsedDateS.valueOf(),
val2 = parsedDateT.valueOf();
if (sigma[i] !== 0) {
val1 = (val1 - average[i]) / (st_dev[i] * sigma[i]);
val2 = (val2 - average[i]) / (st_dev[i] * sigma[i]);
}
sumDiff += (val1-val2) * (val1-val2);
cols++;
}
}
}
}
sumDiff = Math.sqrt(sumDiff);
sumDiff *= ordDiff;
if (cols > 0) {
sumDiff *= properties.length/cols;
}
console.log(sumDiff);
return sumDiff;
}

View File

@@ -0,0 +1,43 @@
/**
* The distance function that is specifically made for Poker Hands data set.
* The suit of the cards does not play an important role when finding
* the differences in poker hands so it was not used in calculations.
* @param {node} source
* @param {node} target
* @return {number} the distance between source and target nodes.
*/
function calculateDistancePoker(source, target) {
var sumDiff = 0.0,
ordDiff = 1.0,
ORD_FACTOR = 1.5,
cards = ["C1", "C2", "C3", "C4", "C5"],
cols = 0;
// Iterate through cards
for (var i = 0; i < cards.length; i++) {
card = cards[i];
if (source.hasOwnProperty(card) && target.hasOwnProperty(card)) {
var s = parseInt(source[card]),
t = parseInt(target[card]);
// Calculate the squared difference.
sumDiff += (s-t) * (s-t);
}
}
// Class of poker hands describes the similarities the best
// so give it more priority than checking the differences between cards.
if (source.hasOwnProperty("CLASS") && target.hasOwnProperty("CLASS")) {
var s = parseInt(source["CLASS"]),
t = parseInt(target["CLASS"]);
// If classes differ, then scale them by a factor.
if (s !== t) {
ordDiff *= (ORD_FACTOR * (Math.abs(s-t)))
}
}
sumDiff = Math.sqrt(sumDiff);
sumDiff *= ordDiff;
return sumDiff;
}

View File

@@ -0,0 +1,32 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @return {number} the distance between source and target nodes.
*/
function calculateEuclideanDistance(source, target, properties, normArgs) {
var sumDiff = 0.0;
// console.log(normArgs);
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (property.toLowerCase() !== "class" && property.toLowerCase() !== "app" && property.toLowerCase() !== "user" && property.toLowerCase() !== "weekday") {
var s = source[property],
t = target[property];
if (normArgs.sig[i] !== 0) {
s = (s - normArgs.avg[i]) / (2.0 * normArgs.sig[i]);
t = (t - normArgs.avg[i]) / (2.0 * normArgs.sig[i]);
}
sumDiff += (s - t) * (s - t);
}
}
// console.log(Math.sqrt(sumDiff)/cols);
// console.log(cols);
// sumDiff = Math.sqrt(sumDiff);
// console.log(sumDiff);
return Math.sqrt(sumDiff);
}

View File

@@ -0,0 +1,33 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @return {number} the distance between source and target nodes.
*/
function calculateEuclideanDistanceTSNE(source, target, properties, normArgs) {
var dotProduct = 0.0,
sumX = 0.0,
sumY = 0.0;
// console.log(normArgs);
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (source.hasOwnProperty(property) && target.hasOwnProperty(property) &&
property.toLowerCase() !== "class") {
var s = source[property],
t = target[property];
dotProduct += s * t;
sumX += s * s;
sumY += t * t;
}
}
// console.log("Dot", dotProduct);
// console.log((-2 * dotProduct) + sumX + sumY);
return -2 * dotProduct + sumX + sumY;
}

View File

@@ -0,0 +1,28 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @return {number} the distance between source and target nodes.
*/
function calculateJaccardDissimilarity(source, target, properties, normArgs) {
var notShared = 0.0;
// console.log(properties);
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (property.toLowerCase() !== "class" && property.toLowerCase() !== "app" && property.toLowerCase() !== "user" && property.toLowerCase() !== "weekday") {
var s = source[property],
t = target[property];
if (s !== t) {
notShared++;
}
}
}
// console.log(Math.sqrt(sumDiff)/cols);
// console.log(cols);
return notShared / properties.length;
}

View File

@@ -0,0 +1,34 @@
/**
* Calculate the distances by using the numbers, strings and dates.
* @param {node} source
* @param {node} target
* @param {array} properties - the properties of the nodes.
* @return {number} the distance between source and target nodes.
*/
function calculateManhattanDistance(source, target, properties, normArgs) {
var sum = 0.0,
cols = 0;
// console.log(properties);
// Iterate through every column of data
for (var i = 0; i < properties.length; i++) {
property = properties[i];
if (property.toLowerCase() !== "class" && property.toLowerCase() !== "app" && property.toLowerCase() !== "user" && property.toLowerCase() !== "weekday") {
var s = source[property],
t = target[property];
if (s !== t) {
cols++;
}
if (normArgs.sig[i] !== 0) {
s = (s - normArgs.avg[i]) / (2.0 * normArgs.sig[i]);
t = (t - normArgs.avg[i]) / (2.0 * normArgs.sig[i]);
}
sum += Math.abs(s - t);
}
}
// console.log(Math.sqrt(sumDiff)/cols);
return sum * (cols / properties.length);
}

View File

@@ -0,0 +1,93 @@
/**
* Calculate the values that are used for normalizing the data.
* @param {array} nodes
* @return {object} that contains the normalization parameters.
*/
function calculateNormalization(nodes) {
var STANDARD_DEV = 2.0,
properties = Object.keys(nodes[0]),
sums = calculateSums(nodes, properties),
average = [],
sigma = [];
// For each property, calculate mean and sigma.
for (var i = 0; i < properties.length; i++) {
var avg = sums.sumOfVal[i] / nodes.length;
average[i] = avg;
sigma[i] = Math.sqrt((sums.sumOfSq[i] - (nodes.length * Math.pow(avg, 2))) / nodes.length);
}
return {
avg: average,
sig: sigma,
st_d: standardDevation(nodes, properties, average)
};
}
function standardDevation(nodes, properties, avg) {
var stDev = new Array(properties.length).fill(0)
for (var i = 0; i < properties.length; i++) {
var sum = 0;
nodes.forEach(function (node) {
var val = node[properties[i]];
var parsedDate = Date.parse(val);
var propAvg = avg[i];
if (isNaN(val) && !isNaN(parsedDate)) {
val = parsedDate.valueOf();
} else if (isNumeric(val)) {
val = parseFloat(val);
// Ignore the strings.
} else {
val = 0;
}
sum += Math.pow(val - propAvg, 2);
});
stDev[i] = Math.sqrt(sum/nodes.length);
}
return stDev;
}
// Calculate the sum of values and the squared sum
/**
* Calculate the sums of each property.
* @param {array} nodes
* @param {array} properties - list of properties
* @return {object} that contains arrays with sum of values
* and the squared sums.
*/
function calculateSums(nodes, properties) {
var sumOfValues = new Array(properties.length).fill(0),
sumOfSquares = new Array(properties.length).fill(0);
// Calculate the sums for each node.
nodes.forEach(function (node) {
for (var i = 0; i < properties.length; i++) {
var val = node[properties[i]];
var parsedDate = Date.parse(val);
if (isNaN(val) && !isNaN(parsedDate)) {
sumOfValues[i] += parsedDate.valueOf();
sumOfSquares[i] += Math.pow(parsedDate.valueOf(), 2);
} else if (isNumeric(val)) {
sumOfValues[i] += parseFloat(val);
sumOfSquares[i] += Math.pow(parseFloat(val), 2);
// Ignore the strings.
} else {
sumOfValues[i] += 0;
sumOfSquares[i] += 0;
}
}
});
return {
sumOfVal: sumOfValues,
sumOfSq: sumOfSquares
};
}

View File

@@ -0,0 +1,8 @@
/**
* Check if the object (string, number, etc.) contains a number.
* @param {object} n - object to check.
* @return {Boolean} true, if it is a number, false otherwise.
*/
function isNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}