import { filter, map, reduce, forEach } from 'lodash';
import $ from 'jquery';
import { Ibook, ImarkProcessed } from '../models';

interface IpageElement {
    lineIndex: number;
    text: string;
    normalizedText: string;
    possibleMarks: ImarkProcessed[];
}

const textToSpeechConfig = {
    skipMissedConsecutive: 1,
    foundInElementStringWeight: 0.5,
    matchesNormalizedTextWeight: 1,
    matchesOriginalElementWeight: 1,
    matchesElementAndUniqueWeight: 4,
    foundInMarkValueWeight: 0.25,
    divideConsecutiveMatchConfidence: 2,
    minimumThreashholdForPossibleMatch: 1,
    minimumThreashholdForTryingSingleCharacterMatch: 2.75, // foundInElementStringWeight + matchesNormalizedTextWeight + matchesOriginalElementWeight
    maximumTimeBetweenMatches: 2000,
    speechMarkRegex: /([’'?])|/g,
    HTMLelementRegex: /([;:?!"()”“|`’'~*])|([,.-]\B|(&nbsp;))/g
};

const intialMarkProcessed = {
    unique: false,
    normalizedValue: '',
    confirmed: 0,
    elementMatchConfidence: 0,
    consecutiveMatchConfidence: 0
};

/*
* receive what might be line with words and return false if it is not
*/
const isLine = (possibleLine: string) => {
    if (possibleLine.search('<a')!== -1){
        return false
    } else {
        return true;
    }
}

export const addTappableClassOnly = (pageData: string) => {
    // add tappable class to each word
    const $pageData = $(pageData);
    $pageData.find('.t').each((index, $tDiv) => {
        // array of divs with class "t"
        if (isLine($tDiv.innerHTML) === false){
            return;
        }
        const lineArray = $tDiv.innerHTML.trim().split(' '), // array of words inside that div - which is a single line
        len = lineArray.length,
        result = [];
        for (var i = 0; i < len; i++) {
            result[i] = `<span class="tappable">${lineArray[i]} </span>`;
        }
        $tDiv.innerHTML = result.join(' ');
    });
    return $pageData[0].outerHTML;
};

/*
 * determinElementMatchConfidence
 * return a new array of possible matching speechmarks for the element with added weight for how confident we are in the match
 */
const determinElementMatchConfidence = (
    elementIndex: number,
    allElements: IpageElement[],
    marks: ImarkProcessed[]
) => {
    const elementNormalizedText = allElements[elementIndex].normalizedText;
    return map(marks, (mark, index) => {
        const foundInElementString =
            elementNormalizedText.search(mark.normalizedValue) >= 0;
        const matchesNormalizedText =
            elementNormalizedText === mark.normalizedValue;
        const matchesOriginalElement =
            allElements[elementIndex].text === mark.value;
        const matchesElementAndUnique = matchesNormalizedText && mark.unique; // if the mark value is unique and we matched it with the element
        const foundInMarkValue =
            mark.normalizedValue.search(elementNormalizedText) >= 0;
        let consecutiveMatchConfidence = 0;
        let confirmations = 0;

        const {
            foundInElementStringWeight,
            matchesNormalizedTextWeight,
            matchesOriginalElementWeight,
            matchesElementAndUniqueWeight,
            foundInMarkValueWeight,
            divideConsecutiveMatchConfidence,
            minimumThreashholdForPossibleMatch
        } = textToSpeechConfig;
        confirmations = foundInElementString
            ? confirmations + foundInElementStringWeight
            : confirmations;
        confirmations = matchesNormalizedText
            ? confirmations + matchesNormalizedTextWeight
            : confirmations;
        confirmations = matchesOriginalElement
            ? confirmations + matchesOriginalElementWeight
            : confirmations;
        confirmations = matchesElementAndUnique
            ? confirmations + matchesElementAndUniqueWeight
            : confirmations;
        /* not sure if foundInMarkValue is helpuful, if comment in, set foundInElementString to .5 and minimum confimations to .25???
         * this is because not finding a match here and finding the clossest timecode based on the elements around this one
         * will be more reliable
         */
        confirmations = foundInMarkValue
            ? confirmations + foundInMarkValueWeight
            : confirmations;

        // if match and element.length greater than 3
        if (
            (matchesNormalizedText || matchesOriginalElement) &&
            elementNormalizedText.length > 2
        ) {
            confirmations += 2;
        }

        // only check consecutive if we have at least one confirmation so far
        if (confirmations > foundInMarkValueWeight) {
            consecutiveMatchConfidence = determinConsecutiveElementConfidence(
                elementNormalizedText,
                elementIndex,
                allElements,
                marks,
                index
            );
            if (consecutiveMatchConfidence > 0) {
                consecutiveMatchConfidence =
                    consecutiveMatchConfidence /
                    divideConsecutiveMatchConfidence;
            }
        }

        // minimum threshhold for being considered a possible match, add foundInMarkValueWeight because we do not want to
        // include a match solely based on that.
        if (
            confirmations + consecutiveMatchConfidence <
            minimumThreashholdForPossibleMatch + foundInMarkValueWeight
        ) {
            return {
                ...mark,
                elementMatchConfidence: 0,
                consecutiveMatchConfidence: 0
            };
        }
        return {
            ...mark,
            elementMatchConfidence: confirmations,
            consecutiveMatchConfidence
        };
    });
};

/*
 * determinConsecutiveElementConfidence
 * return a new array of possible matching speechmarks for the element with the added weight for how many
 * consecutive words before and after match up between the elements and the speech marks
 * if no match is found, break out of the for loop early
 */
const determinConsecutiveElementConfidence = (
    element: string,
    elementIndex: number,
    allElements: IpageElement[],
    marks: ImarkProcessed[],
    markIndex: number
) => {
    let confirmations = 0;
    let countMissedAfter = 0;
    // how many consecutive AFTER this mark
    for (
        let index = 1;
        markIndex + index < marks.length &&
        elementIndex + index < allElements.length;
        index++
    ) {
        // console.log('checking consecutive', markIndex, index, elementIndex, allElements, element)
        const nextMark = marks[markIndex + index];
        const currentElement = allElements[elementIndex + index];

        if (
            currentElement &&
            nextMark.normalizedValue === currentElement.normalizedText
        ) {
            if (
                currentElement &&
                allElements[elementIndex].lineIndex === currentElement.lineIndex
            ) {
                confirmations += 1;
            } else {
                confirmations += 0.5;
            }
        } else if (
            countMissedAfter > textToSpeechConfig.skipMissedConsecutive
        ) {
            break;
        } else {
            countMissedAfter++;
        }
    }
    // how many consecutive BEFORE this mark
    let countMissedBefore = 0;
    for (
        let index = -1;
        markIndex + index >= 0 && elementIndex + index >= 0;
        index--
    ) {
        // console.log('checking consecutive', markIndex, index, elementIndex, allElements, element)
        const prevMark = marks[markIndex + index];
        const currentElement = allElements[elementIndex + index];
        if (
            currentElement &&
            prevMark.normalizedValue === currentElement.normalizedText
        ) {
            if (
                currentElement &&
                allElements[elementIndex].lineIndex === currentElement.lineIndex
            ) {
                confirmations += 1;
            } else {
                confirmations += 0.5;
            }
        } else if (
            countMissedBefore > textToSpeechConfig.skipMissedConsecutive
        ) {
            break;
        } else {
            countMissedBefore++;
        }
    }
    return confirmations;
};

/*
 * add the timecode to the HTML
 */
const addTimecode = (word: string, mark: { time: string }) => {
    let startTimeAttribute = `data-time-start="${mark.time}"`;
    return `<span class="tappable" ${startTimeAttribute} >${word} </span>`;
};

const updatePageHTML = (
    $arrayOfLines: JQuery<HTMLElement>,
    allPageElements: IpageElement[],
    speechMarksData: ImarkProcessed[]
) => {
    let elementIndex = 0;
    $arrayOfLines.each((lineIndex, $tDiv) => {
        const lineString = $tDiv.innerHTML.trim();
        const lineArray = lineString.split(' ');
        const updatedLine = lineArray.map((originalWord, index) => {
            const flatWordObj = allPageElements[elementIndex];
            let wordHTMLc = '';
            if (
                flatWordObj &&
                flatWordObj.possibleMarks &&
                flatWordObj.possibleMarks.length
            ) {
                let chosenMark = flatWordObj.possibleMarks[0];
                if (flatWordObj.possibleMarks.length > 1) {
                    // we will have multiple possible marks only if both matchSingleCharacterElements() and matchSingleCharacterElements() find a match
                    const reducedMark = reduce(
                        flatWordObj.possibleMarks,
                        (prev, current) => {
                            return current.elementMatchConfidence +
                                current.consecutiveMatchConfidence >
                                prev.elementMatchConfidence +
                                    prev.consecutiveMatchConfidence
                                ? current
                                : prev;
                        }
                    );
                    if (reducedMark) {
                        chosenMark = reducedMark;
                    }
                }
                wordHTMLc = addTimecode(flatWordObj.text, chosenMark);
            } else {
                /*
                 * if there are 0 possibleMarks and there is a possibleMark for the element before and after this
                 * that span a maximum of 2000ms, use the mark between the two possible marks or use the mark time for
                 * after
                 */
                let timeBeforeWord: number = 0;
                let timeAfterWord: number = 0;
                console.log(
                    'element missing timecode, trying to find timecodes before and after: ' +
                        originalWord
                );
                if (elementIndex > 0) {
                    const previousElement = allPageElements[elementIndex - 1];
                    timeBeforeWord = previousElement.possibleMarks.length
                        ? parseInt(previousElement.possibleMarks[0].time)
                        : 0;
                }
                if (elementIndex + 1 < allPageElements.length) {
                    const nextElement = allPageElements[elementIndex + 1];
                    timeAfterWord = nextElement.possibleMarks.length
                        ? parseInt(nextElement.possibleMarks[0].time)
                        : 0;
                }
                if (
                    timeBeforeWord &&
                    timeAfterWord &&
                    Math.abs(timeAfterWord - timeBeforeWord) <
                        textToSpeechConfig.maximumTimeBetweenMatches
                ) {
                    const targetTime = (timeBeforeWord + timeAfterWord) / 2;
                    const clossestSpeechMark = reduce(
                        speechMarksData,
                        (prev, current) => {
                            return Math.abs(
                                parseInt(current.time) - targetTime
                            ) < Math.abs(parseInt(prev.time) - targetTime)
                                ? current
                                : prev;
                        }
                    );
                    if (clossestSpeechMark) {
                        wordHTMLc = addTimecode(flatWordObj.text, {
                            time: clossestSpeechMark.time
                        });
                        console.log(
                            'found clossest time for missing timecode: ' +
                                flatWordObj.text +
                                ' ' +
                                clossestSpeechMark.value,
                            clossestSpeechMark
                        );
                    }
                    // had an idea to only use the time before or after, but it is very unreliable.
                    // } else if (timeBeforeWord) {
                    //     const targetTime = timeBeforeWord + 500;
                    //     const clossestSpeechMark = reduce(speechMarksData, (prev, current) => {
                    //         return (Math.abs(current.time - targetTime) < Math.abs(prev.time - targetTime) ? current: prev)
                    //     })
                    //     wordHTMLc = addTimecode(flatWordObj.text, {time: clossestSpeechMark.time})
                    //     console.log('found clossest BEFORE time for missing timecode: ' + flatWordObj.text + ' ' + clossestSpeechMark.value, clossestSpeechMark)
                    // } else if (timeAfterWord) {
                    //     const targetTime = timeAfterWord - 500;
                    //     const clossestSpeechMark = reduce(speechMarksData, (prev, current) => {
                    //         return (Math.abs(current.time - targetTime) < Math.abs(prev.time - targetTime) ? current: prev)
                    //     })
                    //     wordHTMLc = addTimecode(flatWordObj.text, {time: clossestSpeechMark.time})
                    //     console.log('found clossest BEFORE time for missing timecode: ' + flatWordObj.text + ' ' + clossestSpeechMark.value, clossestSpeechMark)
                } else {
                    wordHTMLc = `<span class="tappable">${flatWordObj.text} </span>`;
                    console.warn(
                        'could not find missing timecode for: ' +
                            flatWordObj.text
                    );
                }
            }
            elementIndex++;
            return wordHTMLc;
        });
        $tDiv.innerHTML = updatedLine.join(' ');
    });
};

const reducePossibleMarks = (
    allPageElements: IpageElement[],
    speechMarksData: ImarkProcessed[]
) => {
    let selectedMarks: {[key: string]: ImarkProcessed} = {} // keep track of which marks have been used
    // if a mark has been used before, compare the confidence.  If the confidence for the usedMark is equal or lower
    // then add .5 to the confidence because we can be more confident in a mark that has Not been used yet.
    return map(allPageElements, (element, elementIndex) => {
        if (element.normalizedText === '') {
            return element;
        }
        let foundMark: ImarkProcessed | undefined = undefined;
        const foundMarksWithConfidence = determinElementMatchConfidence(
            elementIndex,
            allPageElements,
            speechMarksData
        );
        // if we have multiple matches, then reduce down to the word we have the most confidence in
        if (foundMarksWithConfidence.length > 1) {
            const reducedMark = reduce(
                foundMarksWithConfidence,
                (prev, current) => {
                    let currentConfidence = current.elementMatchConfidence +
                    current.consecutiveMatchConfidence;
                    const prevConfidence = prev.elementMatchConfidence +
                    prev.consecutiveMatchConfidence;
                    const usedMark = selectedMarks[prev.time];
                    if (prevConfidence === currentConfidence && prevConfidence !== 0 && usedMark && usedMark.elementMatchConfidence + usedMark.consecutiveMatchConfidence >= currentConfidence){
                        currentConfidence += .5;
                    }
                    return currentConfidence >
                    prevConfidence
                    ? current
                    : prev;
                }
            );
            if (reducedMark) {
                foundMark = reducedMark;
            }
        } else {
            foundMark = foundMarksWithConfidence[0];
        }

        if (
            !foundMark ||
            (foundMark.elementMatchConfidence === 0 &&
                foundMark.consecutiveMatchConfidence === 0)
        ) {
            // did not find any possibleMarks
            // try again with speechMarksData that has been flattened out to single characters.
            return element;
        }
        selectedMarks[foundMark.time] = foundMark;
        return {
            ...element,
            possibleMarks: [...element.possibleMarks, foundMark],
            foundElement: foundMark.value
        };
    });
};

const flattenSpeechMarksData = (speechMarksData: ImarkProcessed[]) => {
    let flatSpeechMarkData: ImarkProcessed[] = [];
    forEach(speechMarksData, (mark) => {
        const splitMarkValue = mark.normalizedValue.split('');
        forEach(splitMarkValue, (singleCharacter) => {
            flatSpeechMarkData.push({
                ...mark,
                normalizedValue: singleCharacter,
                unique: false
            });
        });
    });
    return flatSpeechMarkData;
};

const matchSingleCharacterElements = (
    allPageElements: IpageElement[],
    speechMarksData: ImarkProcessed[]
) => {
    const flatSpeechMarksData = flattenSpeechMarksData(speechMarksData);
    // console.log('flatMarks', flatSpeechMarksData)
    return map(allPageElements, (element, elementIndex) => {
        // skip if the text is empty or the number of characters is more than 1
        if (
            element.normalizedText === '' ||
            element.normalizedText.length > 1
        ) {
            return element;
        }

        // if we already found a mark and the elementMatchConfidence is reasonably high
        if (
            element.possibleMarks.length > 0 &&
            element.possibleMarks[0].elementMatchConfidence >
                textToSpeechConfig.minimumThreashholdForTryingSingleCharacterMatch
        ) {
            return element;
        }
        let foundMark: ImarkProcessed | undefined;
        const foundMarksWithConfidence = determinElementMatchConfidence(
            elementIndex,
            allPageElements,
            flatSpeechMarksData
        );

        // if we have multiple matches, then reduce down to the word we have the most confidence in
        if (foundMarksWithConfidence.length > 1) {
            foundMark = reduce(foundMarksWithConfidence, (prev, current) => {
                return current.elementMatchConfidence +
                    current.consecutiveMatchConfidence >
                    prev.elementMatchConfidence +
                        prev.consecutiveMatchConfidence
                    ? current
                    : prev;
            });
        } else {
            foundMark = foundMarksWithConfidence[0];
        }

        if (
            foundMark === undefined ||
            (foundMark.elementMatchConfidence === 0 &&
                foundMark.consecutiveMatchConfidence === 0)
        ) {
            // did not find any possibleMarks
            return element;
        }

        return {
            ...element,
            possibleMarks: [...element.possibleMarks, foundMark],
            foundElement: foundMark.value
        };
    });
};
/*
 * The MAIN function for adding speech mark times to the HTML
 */
export const addSpeechMarkTimesAndTappableClass = (
    book: Ibook,
    pageData: string,
    pageNumber: number
) => {
    const pageKey = `page${pageNumber}`;
    const rawSpeechMarksData = book.cachedPages[pageKey].speechMarks;
    const $pageData = $(pageData);
    const $arrayOfLines = $pageData.find('.t');
    let allPageElements: IpageElement[] = []; // {lineIndex: 0, text: 'The', normalizedText: 'the ', possibleMarks: []}

    // prep speech Marks
    // sometimes a single mark has 2 words, split them up and add the same timecode for both
    let speechMarksData: ImarkProcessed[] = [];
    forEach(rawSpeechMarksData, (tempMark, index) => {
        const splitMarkValue = tempMark.value.split(' ');
        if (splitMarkValue.length > 1) {
            speechMarksData.push({
                ...intialMarkProcessed,
                ...tempMark,
                unique: false,
                confirmed: 0,
                normalizedValue: splitMarkValue[0]
                    .toLowerCase()
                    .replace(textToSpeechConfig.speechMarkRegex, '')
            });
            speechMarksData.push({
                ...intialMarkProcessed,
                ...tempMark,
                unique: false,
                confirmed: 0,
                normalizedValue: splitMarkValue[1]
                    .toLowerCase()
                    .replace(textToSpeechConfig.speechMarkRegex, '')
            });
            return;
        }
        const normalizedValue = tempMark.value
            .toLowerCase()
            .replace(textToSpeechConfig.speechMarkRegex, '');
        let uniqueCount = filter(
            rawSpeechMarksData,
            (value) =>
                value.value
                    .toLowerCase()
                    .replace(textToSpeechConfig.speechMarkRegex, '') ===
                normalizedValue
        ).length;
        if (uniqueCount && uniqueCount === 1 && tempMark.value.length > 3) {
            speechMarksData.push({
                ...intialMarkProcessed,
                ...tempMark,
                unique: true,
                confirmed: 0,
                normalizedValue
            });
        } else {
            speechMarksData.push({
                ...intialMarkProcessed,
                ...tempMark,
                unique: false,
                confirmed: 0,
                normalizedValue
            });
        }
    });

    if (!speechMarksData || speechMarksData.length <= 0) {
        console.error(
            'missing speechMarks data, switching to only adding tappable classes'
        );
        return addTappableClassOnly(pageData);
    }

    /*
     * normalize to an array of indexed elements
     * for each element, loop over the speech marks and assign a confidence rating for how confident we are tha a
     * particular mark matches this element.
     * Then send this arry of all the elements to a function that changes the HTML of the page
     */
    // console.log(`starting first pass for speechamarks page: ${pageNumber}`);
    $arrayOfLines.each((lineIndex, $tDiv) => {
        // array of divs with class "t"
        if (isLine($tDiv.innerHTML) === false){
            return;
        }
        const lineString = $tDiv.innerHTML.trim();
        const lineArray = lineString.split(' ');
        const lineArrayCleaned = lineString.split(' ').map((text) => {
            const normalizedText = text
                .trim()
                .toLowerCase()
                .replace(textToSpeechConfig.HTMLelementRegex, '');
            allPageElements.push({
                lineIndex,
                text,
                normalizedText,
                possibleMarks: []
            });
            return normalizedText;
        }); // array of words inside that div - which is a single line

        if (lineArrayCleaned.length <= 0) {
            console.error('empty line', lineArrayCleaned, lineArray);
            return;
        }
        if (lineArrayCleaned.length !== lineArray.length) {
            console.error('lineArrayCleaned length different than lineArray');
        }
    });

    allPageElements = reducePossibleMarks(allPageElements, speechMarksData);
    allPageElements = matchSingleCharacterElements(
        allPageElements,
        speechMarksData
    );

    // console.log('result of adding speechMarks', speechMarksData);
    // console.log('index of class', pageData);
    // console.log('allPageElements', allPageElements);
    updatePageHTML($arrayOfLines, allPageElements, speechMarksData);
    const updatedHTML = $pageData[0].outerHTML;
    return updatedHTML;
};
