Source: Cosine.js

/**
 * Class representing cosine similarity calculations.
 * @class
 */
class Cosine {
    /**
     * Generates a term frequency map, counting the occurrences of each word in the input string.
     *
     * @param {string} str - The input string to analyze.
     * @returns {object} - A term frequency map with words as keys and their frequencies as values.
     */
    static termFreqMap(str) {
        const words = str.split(' ');
        const termFreq = {};
        words.forEach(function(w) {
            termFreq[w] = (termFreq[w] || 0) + 1;
        });
        return termFreq;
    }

    /**
     * Adds all keys from a map to a dictionary, ensuring a consistent set of terms for vector comparison.
     *
     * @param {object} map - The map containing keys to add to the dictionary.
     * @param {object} dict - The dictionary to add the keys to.
     */
    static addKeysToDict(map, dict) {
        for (const key in map) {
            dict[key] = true;
        }
    }

    /**
     * Converts a term frequency map into a term frequency vector, using a common dictionary for consistent order.
     *
     * @param {object} map - The term frequency map to convert.
     * @param {object} dict - The dictionary of terms to use for vector alignment.
     * @returns {array} - A term frequency vector representing the word frequencies of the input string.
     */
    static termFreqMapToVector(map, dict) {
        const termFreqVector = [];
        for (const term in dict) {
            termFreqVector.push(map[term] || 0);
        }
        return termFreqVector;
    }

    /**
     * Calculates the dot product of two vectors, representing the sum of their element-wise multiplications.
     *
     * @param {array} vecA - The first vector for the dot product.
     * @param {array} vecB - The second vector for the dot product.
     * @returns {number} - The dot product of the two vectors.
     */
    static vecDotProduct(vecA, vecB) {
        let product = 0;
        for (let i = 0; i < vecA.length; i++) {
            product += vecA[i] * vecB[i];
        }
        return product;
    }

    /**
     * Calculates the magnitude (Euclidean length) of a vector.
     *
     * @param {array} vec - The vector for which to calculate the magnitude.
     * @returns {number} - The magnitude of the vector.
     */
    static vecMagnitude(vec) {
        let sum = 0;
        for (let i = 0; i < vec.length; i++) {
            sum += vec[i] * vec[i];
        }
        return Math.sqrt(sum);
    }

    /**
     * Calculates the cosine similarity between two vectors using their dot product and magnitudes.
     *
     * @param {array} vecA - The first vector for comparison.
     * @param {array} vecB - The second vector for comparison.
     * @returns {number} - The cosine similarity between the two vectors, ranging from 0.0 (no similarity) to 1.0 (perfect similarity).
     */
    static calculateSimilarity(vecA, vecB) {
        return this.vecDotProduct(vecA, vecB) / (this.vecMagnitude(vecA) * this.vecMagnitude(vecB));
    }

    /**
     * Calculates the cosine similarity between two strings based on their term frequency vectors.
     *
     * @param {string} strA - The first string for comparison.
     * @param {string} strB - The second string for comparison.
     * @returns {number} - The cosine similarity between the two strings, ranging from 0.0 (no similarity) to 1.0 (perfect similarity).
     */
    static similarity(strA, strB) {
        const termFreqA = this.termFreqMap(strA);
        const termFreqB = this.termFreqMap(strB);

        const dict = {};
        this.addKeysToDict(termFreqA, dict);
        this.addKeysToDict(termFreqB, dict);

        const termFreqVecA = this.termFreqMapToVector(termFreqA, dict);
        const termFreqVecB = this.termFreqMapToVector(termFreqB, dict);

        return this.calculateSimilarity(termFreqVecA, termFreqVecB);
    }
}

export default Cosine;