#include "wtimport.h"

using namespace Qt::StringLiterals;

wtimport::wtimport(QObject *parent)
    : QObject(parent), rxDate("", QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::CaseInsensitiveOption), rxSong("", QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::CaseInsensitiveOption)
{
    sql = &Singleton<sql_class>::Instance();
    connect(&epb, &epub::ProcessTOCEntry, this, &wtimport::ProcessTOCEntry);
    connect(&epb, &epub::ProcessCoverImage, this, &wtimport::ProcessCoverImage);
}

wtimport::wtimport(QString fileName, QObject *parent)
    : wtimport(parent)
{
    init(fileName);
}

QString wtimport::Import()
{
    validDates = 0;
    lastError.clear();
    firstDate.clear();
    lastDate.clear();
    rxLoadState = rxLoadStates::noLoadAttempt;
    if (prepared) {
        sql->startTransaction();
        epb.ImportTOC();
        epb.ImportCover();
        if (lastError.isEmpty()) {
            sql->commitTransaction();
            return resultText();
        } else {
            return lastError;
        }
    } else {
        lastError = epb.lastErr;
        return lastError;
    }
}

QString wtimport::importFile(const QString fileName)
{
    if (fileName.endsWith("jwpub", Qt::CaseInsensitive)) {
        jwpub jwpub;
        jwpub.Prepare(fileName);
        return "";
    } else {
        init(fileName);
        return Import();
    }
}

QString wtimport::importHtml(const QString url, QString html)
{
    validDates = 0;
    lastError.clear();
    firstDate.clear();
    lastDate.clear();
    rxLoadState = rxLoadStates::noLoadAttempt;
    QUrl u(url);
    QStringList urlParts = u.path().split(QLatin1Char('/'), Qt::SkipEmptyParts);
    QString langCode = urlParts.first();
    QString documentId;
    if (u.host() == "wol.jw.org") {
        documentId = urlParts.last();
        year = documentId.mid(0, 4).toInt();
    } else if (u.host() == "www.jw.org") {
        QRegularExpression docIdRegex("\\sdocId-(\\d+)\\s");
        QRegularExpressionMatch docIdMatch = docIdRegex.match(html);
        if (docIdMatch.hasMatch()) {
            documentId = docIdMatch.captured(1);
            year = documentId.mid(0, 4).toInt();
        }
    } else {
        return "Invalid URL!";
    }
    month = (documentId.right(3).toInt() - 200) / 40; // issue

    sql = &Singleton<sql_class>::Instance();
    // get language from html attribute
    QRegularExpression langRegex("\\slang=['\"](?<lang>[\\w\\-]+)['\"]\\s");
    QRegularExpressionMatch langMatch = langRegex.match(html);
    if (langMatch.hasMatch()) {
        langCode = langMatch.captured("lang");
    }
    // get library language code
    langRegex = QRegularExpression("\\s(data-lang=['\"]|ml-)(?<lang>\\w+)(['\"])*\\s");
    langMatch = langRegex.match(html);
    QString libraryLangCode;
    if (langMatch.hasMatch()) {
        libraryLangCode = langMatch.captured("lang");
    }
    if ((langCode == "en" && libraryLangCode != "E")
        || langCode.compare(libraryLangCode, Qt::CaseInsensitive) == 0) {
        // prefer db value when the lang value appears to be wrong
        QString dbLangCode = sql->getLanguageCode(libraryLangCode);
        langCode = !dbLangCode.isEmpty() ? dbLangCode : langCode;
    }
    QString languageCodeWtLocale = QString("%1|%2").arg(langCode).arg(libraryLangCode);

    epb.curlocal = new QLocale(langCode);
    epb.language = langCode;
    epb.epubLangCode = libraryLangCode;
    epb.language_ex = languageCodeWtLocale;

    QRegularExpression regex("(<img.+?>|<hr.+?>|<input.+?>)");
    QRegularExpressionMatchIterator gi = regex.globalMatch(html);
    while (gi.hasNext()) {
        auto match = gi.next();
        if (match.hasMatch()) {
            QString elem = match.captured(0);
            html = html.replace(elem, "");
        }
    }
    html = html.replace("&quot;", "\"").replace("&nbsp;", " ");

    htmldata = html;

    lastError.clear();
    sql->startTransaction();
    ProcessTOCEntry(documentId, "");
    if (lastError.isEmpty()) {
        sql->commitTransaction();
        return resultText();
    } else {
        sql->rollbackTransaction();
        return lastError;
    }
}

QString wtimport::getDocumentId(QDate date)
{
    QDate weekStartDate = date.addDays(-(date.dayOfWeek() - Qt::Monday)); // start date of the week (Monday)
    if (wtDocumentIds.contains(weekStartDate))
        loadArticleDates(weekStartDate.year());
    return wtDocumentIds[weekStartDate];
}

void wtimport::init(QString fileName)
{
    prepared = !epb.Prepare(fileName);
    if (prepared) {
        int yr(0);
        int mo(0);
        QRegularExpression extractYearAndMonth("/w_.+_(\\d\\d\\d\\d\\d\\d)");
        QRegularExpressionMatch match = extractYearAndMonth.match(fileName);
        if (match.hasMatch()) {
            QString txt = match.captured(1);
            yr = QStringView { txt }.left(4).toInt();
            mo = QStringView { txt }.mid(4, 2).toInt();
            if (yr > 2015 && yr < 3000 && mo > 0 && mo < 13) {
                year = yr;
                month = mo;
            } else {
                yr = 0;
            }
        }
        if (yr == 0) {
            year = -1;
            month = -1;
        }
    } else {
        lastError = epb.lastErr;
    }
}

void wtimport::ProcessTOCEntry(QString href, QString chapter)
{
    Q_UNUSED(chapter);

    // load regex definitions (if not done yet) to parse the xml reader's results
    if (epb.curlocal != nullptr && rxLoadState == rxLoadStates::noLoadAttempt) {
        rxLoadState = rxLoadStates::attempted;
        qDebug() << "Load regular expressions with the language setting:" << epb.language_ex;
        sql_items rx = sql->selectSql(QString("SELECT * FROM lmm_workbookregex WHERE lang LIKE '%1'").arg(epb.language_ex));
        if (rx.size() == 0) {
            QString dbLanguageCode = sql->getLanguageCode(epb.epubLangCode);
            QString languageCodeWtLocale = QString("%1|%2")
                                                   .arg(dbLanguageCode.isEmpty() ? "%" : dbLanguageCode)
                                                   .arg(epb.epubLangCode);
            qDebug() << "Regular expressions not found - try language setting:" << languageCodeWtLocale;
            rx = sql->selectSql(QString("SELECT * FROM lmm_workbookregex WHERE lang LIKE '%1'").arg(languageCodeWtLocale));
        }
        if (rx.size() == 0)
            rx = sql->selectSql("lmm_workbookregex", "lang", epb.epubLangCode);
        if (rx.size() == 0)
            rx = sql->selectSql("lmm_workbookregex", "lang", epb.language);
        for (unsigned int i = 0; i < rx.size(); i++) {
            sql_item kv = rx[i];
            regexes.insert(kv.value("key").toString(), kv.value("value").toString());
        }
        if (regexes.size() > 0) {
            rxSong.setPattern(regexes["song"]);
            if (!rxSong.isValid())
                qDebug() << "Invalid regular expression to parse the songs!";
            rxDate.setPattern(regexes["date1"]);
            if (!rxDate.isValid())
                qDebug() << "Invalid regular expression to parse the week's start date!";
            rxLoadState = rxLoadStates::loaded;
        }
        if (rxLoadState == rxLoadStates::attempted) {
            lastError = tr("Regular expressions are missing for the language '%1'!", "Import schedule").arg(epb.language_ex);
        }
    }

    queueTheme.clear();
    queueOpenSong = 0;
    queueCloseSong = 0;
    queueArticle_DocId = href.length() >= 7 ? href.mid(0, 7).toInt() : 0;
    queueArticle_Number = 0;

    xml_reader r(epb.oebpsPath + "/" + href, !htmldata.isEmpty() ? htmldata.toUtf8() : nullptr);

    // will work if in Table of Contents page
    r.register_attributesearch("div", "class", "groupTOC", xmlPartsContexts::groupTOC);
    r.register_elementsearch("h3", xmlPartsContexts::studyArticleInfo, xmlPartsContexts::groupTOC, true);
    r.register_attributesearch("p", "class", "*se", xmlPartsContexts::studyArticleDocIdPar, xmlPartsContexts::groupTOC, true);
    r.register_elementsearch("a", xmlPartsContexts::studyArticleDocId, xmlPartsContexts::studyArticleDocIdPar, false);

    // will work if in wt article
    r.register_attributesearch("header", "", "", xmlPartsContexts::header);
    r.register_attributesearch("p", "class", "contextTtl*", xmlPartsContexts::article, xmlPartsContexts::header);
    r.register_elementsearch("h1", xmlPartsContexts::title, xmlPartsContexts::header);
    r.register_attributesearch("p", "class", "pubRefs*", xmlPartsContexts::song);
    XLM_READER_CONNECT(r);
    r.read();

    // qDebug() << "queueArticle_Date:" << queueArticle_Date;
    // qDebug() << "queueArticle_Number:" << queueArticle_Number;
    // qDebug() << "queueArticle_DocId:" << queueArticle_DocId;
    // qDebug() << "queueOpenSong:" << queueOpenSong;
    // qDebug() << "queueCloseSong:" << queueCloseSong;
    // qDebug() << "queueTheme:" << queueTheme;

    if (queueOpenSong > 0 && queueCloseSong > 0 && !queueTheme.isEmpty() && (queueArticle_Number > 0 || queueArticle_Date.isValid())) {
        QDate dt = queueArticle_Date.isValid() ? queueArticle_Date : articleDates.value(queueArticle_Number);

        if (!dt.isValid() && year >= 2023 && year < 2026) {
            // find start date for the January issue (week with 1st Thursday in March)
            QDate dtTest(year, 3, 1);
            if (dtTest.dayOfWeek() >= 5)
                dtTest = dtTest.addDays(7);
            dtTest = dtTest.addDays((dtTest.dayOfWeek() - 1) * -1);
            // move to the week of the current article
            dtTest = dtTest.addDays(7 * (queueArticle_Number - 1));
            // add one week if the study of the current article takes place later than the Memorial (of the WT issue's year)
            sql_item item;
            item.insert(":date", dtTest.addDays(6));
            item.insert(":year", QVariant(year).toString());
            int memorial = sql->selectScalar("select count(*) from exceptions where date <= :date and strftime('%Y', date) = :year and type = 2 and publicmeetingday = 0", &item).toInt();
            if (memorial > 0)
                dtTest = dtTest.addDays(7);
            dt = dtTest;
        }

        if (dt.isValid()) {
            lastDate = QLocale().toString(dt, QLocale::ShortFormat);
            if (validDates++ == 0)
                firstDate = lastDate;

            sql_item parts;
            parts.insert("wt_theme", queueTheme);
            parts.insert("song_wt_start", queueOpenSong);
            parts.insert("song_wt_end", queueCloseSong);
            parts.insert("wt_source", QVariant(month).toString() + "/" + QString::number(queueArticle_Number));
            sql->updateSql("publicmeeting", "date", dt.toString(Qt::ISODate), &parts);

            if (sql->lastNumRowsAffected == 0) {
                parts.insert("date", dt.toString(Qt::ISODate));
                sql->insertSql("publicmeeting", &parts, "id");
            }
        }
    }
}

QString wtimport::readRuby(QXmlStreamReader *xml)
{
    Q_ASSERT(xml->isStartElement() && xml->name() == "ruby"_L1);
    QString innerText;
    while (xml->readNextStartElement()) {
        if (xml->name() == "rb"_L1) {
            innerText += xml->readElementText(QXmlStreamReader::IncludeChildElements);
        } else if (xml->name() == "rt"_L1)
            xml->skipCurrentElement();
        else
            innerText += xml->readElementText();
    }
    return innerText;
}

QString wtimport::readInnerText(QXmlStreamReader *xml)
{
    QString innerText("");
    bool quitRequest(false);
    while (!xml->atEnd() && !xml->hasError() && !quitRequest) {
        QXmlStreamReader::TokenType tokenType = xml->readNext();
        switch (tokenType) {
        case QXmlStreamReader::TokenType::Characters:
            innerText += xml->text().toString();
            break;
        case QXmlStreamReader::TokenType::StartElement:
            if (xml->name() == "ruby"_L1) {
                innerText += readRuby(xml);
            } else if (xml->name() == "a"_L1) {
                // inner text may contain ruby
                innerText += readInnerText(xml);
            } else
                innerText += xml->readElementText(QXmlStreamReader::IncludeChildElements);
            break;
        case QXmlStreamReader::TokenType::EndElement:
            quitRequest = true;
            break;
        default:
            break;
        }
    }
    return innerText;
}

void wtimport::xmlPartFound(QXmlStreamReader *xml, QXmlStreamReader::TokenType tokenType, int context, int relativeDepth)
{
    Q_UNUSED(relativeDepth)

    QStringView name = xml->name();
    QStringView xmlText = xml->text();
    // QStringView id = xml->attributes().value("id");
    // QMetaEnum contextsMetaEnum = QMetaEnum::fromType<xmlPartsContexts>();
    // QString contextString = contextsMetaEnum.valueToKey(context);
    // qDebug() << "----8<------------------------------------------------";
    // qDebug() << name << "tokentype: " << tokenString() << " context " << contextString;
    // qDebug() << xmlText;
    // qDebug() << "------------------------------------------------>8----";
    // if (id == "p36")
    //     qDebug() << "id found";

    switch (context) {
    case xmlPartsContexts::studyArticleInfo:
        // for file with Table of Contents
        if (tokenType == QXmlStreamReader::TokenType::Characters) {
            if (year < 2026) {
                QRegularExpression rxArticle("Study\\s*[aA]rticle\\s*(\\d+|[\\w]+)[:]*\\s*(\\D+)\\s+(\\d+).*?,\\s*(\\d+)", QRegularExpression::UseUnicodePropertiesOption); //"Study Article (\\d+).*\\s*(\\D+)\\s*(\\d+).*,\\s*(\\d+)"
                QRegularExpressionMatch match = rxArticle.matchView(xmlText);
                if (match.hasMatch()) {
                    QString temp = match.captured(2) + " " + match.captured(3) + " " + match.captured(4);
                    QLocale loc(QLocale::English);
                    QDate dt = loc.toDate(temp, "MMMM d yyyy");
                    if (dt.isValid()) {
                        queueArticle_Number = match.captured(1).toInt();
                        addArticleDate(queueArticle_Number, dt, queueArticle_Number);
                    }
                }
            } else if (rxLoadState == rxLoadStates::loaded) {
                // beginning with the 2026 issues, use the document id instead of the article number
                // 1) read week's start date from the 'h3' tag
                // 2) read the document id from the link's 'href' attribute
                QDate dt = getDate(xmlText);
                if (dt.isValid()) {
                    queueArticle_Date = dt;
                }
            }
        }
        break;

    case xmlPartsContexts::studyArticleDocId:
        // for file with Table of Contents
        if (year >= 2026 && rxLoadState == rxLoadStates::loaded) {
            // beginning with the 2026 issues, use the document id instead of the article number
            // 1) read week's start date from the 'h3' tag
            // 2) read the document id from the link's 'href' attribute
            if (name.toString() == "a") {
                queueArticle_DocId = xml->attributes().value("href").mid(0, 7).toInt();
                if (queueArticle_Date.isValid()) {
                    queueArticle_Number += 1; // consecutive number per issue
                    addArticleDate(queueArticle_DocId, queueArticle_Date, queueArticle_Number);
                    queueArticle_Date = QDate(); // reset date to prevent adding the next entry
                }
            }
        }
        break;

    case xmlPartsContexts::article:
        if (tokenType == QXmlStreamReader::TokenType::StartElement) {
            QString innerText = readInnerText(xml).trimmed();
            if (year < 2026) { // year of the issue
                // read issue number
                QRegularExpression numberMatcher("(\\d+)", QRegularExpression::UseUnicodePropertiesOption);
                QRegularExpressionMatch match = numberMatcher.matchView(innerText);
                if (match.hasMatch()) {
                    queueArticle_Number = epb.stringToInt(match.captured(1));
                }
            } else if (rxLoadState == rxLoadStates::loaded) {
                // read week's start date
                queueArticle_Date = getDate(innerText);
                // add article when reading from the browser cache
                addArticleDate(queueArticle_DocId, queueArticle_Date, queueArticle_DocId);
            }
        }
        break;

    case xmlPartsContexts::title:
        if (tokenType == QXmlStreamReader::TokenType::StartElement) {
            queueTheme = readInnerText(xml).trimmed();
        }
        break;

    case xmlPartsContexts::song:
        if (tokenType == QXmlStreamReader::TokenType::StartElement) {
            QString innerText = readInnerText(xml);
            if (rxLoadState == rxLoadStates::loaded) {
                QRegularExpressionMatch match = rxSong.matchView(innerText);
                if (match.hasMatch()) {
                    if (queueOpenSong > 0)
                        queueCloseSong = epb.stringToInt(match.captured(1));
                    else
                        queueOpenSong = epb.stringToInt(match.captured(1));
                }
            }
        }
        break;
    }
}

void wtimport::ProcessCoverImage(QString fileName)
{
    // format of the setting: <reference pixel x position>|<reference pixel y position>,<January color>,..,<December color>
    // reference pixel position is used to read the color of the given pixel from the cover image
    QString defaultMonthlyColors(QString("600|1520,#,#,#,#,#,#,#,#,#,#,#,#").replace("#", "#656164"));
    QString epubColors(sql->getSetting("wt_colors", defaultMonthlyColors));
    QStringList colors = epubColors.split(",");
    if (colors.size() != 13)
        colors = defaultMonthlyColors.split(",");
    int x(600);
    int y(1520);
    if (colors[0].contains("|")) {
        QStringList pixel = colors[0].split("|");
        x = pixel[0].toInt();
        y = pixel[1].toInt();
    }
    QImage img(fileName);
    QColor coverColor = img.pixelColor(x, y);
    if (month > 0 && month < colors.size() && coverColor != QColorConstants::White) {
        colors[month] = coverColor.name();
    }
    sql->saveSetting("wt_colors", colors.join(","));
}

bool wtimport::downloadArticleDates()
{
    QNetworkAccessManager manager;
    QEventLoop q;
    QTimer timer;
    bool imported(false);

    timer.setSingleShot(true);
    QObject::connect(&timer, SIGNAL(timeout()), &q, SLOT(quit()));
    QObject::connect(&manager, SIGNAL(finished(QNetworkReply *)),
                     &q, SLOT(quit()));

    QNetworkRequest request;
    QString sUrl =
            "https://www.theocbase.net/workbook/wtassist" + QString("%1").arg(year, 4, 10, QLatin1Char('0')) + ".txt";
    request.setUrl(QUrl(sUrl));
    QNetworkReply *reply = manager.get(request);

    timer.start(10000); // 10s timeout
    q.exec();

    if (timer.isActive()) {
        // download complete
        timer.stop();
        if (reply->error() != QNetworkReply::NoError) {
            qDebug() << "downloadArticleDates error" << reply->errorString();
        } else {
            QDate dt;
            while (!reply->atEnd()) {
                QString ln = reply->readLine();
                if (ln.isEmpty())
                    continue;
                if (ln.endsWith("\n"))
                    ln = ln.left(ln.length() - 1);
                QStringList parts(ln.split('\t'));
                queueArticle_Number = QVariant::fromValue(parts[1]).toInt();
                dt = QDate::fromString(parts[2], "yyyy-MM-dd");
                if (queueArticle_Number > 0 && dt.isValid()) {
                    addArticleDate(queueArticle_Number, dt, queueArticle_Number);
                }
            }
            imported = true;
        }
    }

    if (reply != nullptr)
        delete reply;
    return !imported;
}

void wtimport::addArticleDate(int documentId, QDate dt, int articleNr)
{
    if (!(documentId > 0 && dt.isValid() && articleNr > 0))
        return;

    articleDates.insert(articleNr, dt);

    sql_item values;
    values.insert(":yr", year);
    values.insert(":article", documentId);
    values.insert(":dt", dt.toString("yyyy-MM-dd"));
    sql->execSql("replace into wt_article_dates (yr, article, dt) values (:yr, :article, :dt)", &values);
}

void wtimport::loadArticleDates(int year)
{
    sql_item param;
    param.insert(":yr", year);
    sql_items data = sql->selectSql("select * from wt_article_dates where yr >= :yr order by yr, article", &param);
    for (sql_item row : data) {
        QString articleNr = row.value("article").toString();
        QDate dt(row.value("dt").toDate());
        articleDates.insert(articleNr.toInt(), dt);
        wtDocumentIds[dt] = articleNr;
    }
}

QDate wtimport::getDate(QStringView value)
{
    int weeksYear = year;
    int weeksMonth = (month + 2) % 12;
    int fromDay = 1;

    QRegularExpression yearMatcher("(?<year>\\d{4})");
    QRegularExpressionMatch match = yearMatcher.matchView(value);
    if (match.hasMatch()) {
        weeksYear = match.captured("year").toInt();
    }
    qDebug() << QString("Use regex: '%1' to get the start date from the article's header: '%2'").arg(rxDate.pattern()).arg(value);
    if (!value.isEmpty()) {
        QRegularExpressionMatch dateMatch = rxDate.matchView(value);
        if (dateMatch.hasMatch()) {
            fromDay = epb.stringToInt(dateMatch.captured("fromday"));
            // we assume that the article is in the calculated month and not included from the previous or following month
            QDate dt = QDate(weeksYear, weeksMonth, fromDay);
            if (dt.dayOfWeek() != Qt::Monday) {
                // the date is not on a Monday, so we are wrong and the month must either be the following or the previous one
                // check the surrounding months while we keep in mind that the first Monday of March is in the January issue
                if (month < 12)
                    dt = QDate(weeksYear, weeksMonth + 1, fromDay);
                if (dt.dayOfWeek() != Qt::Monday && month > 1)
                    dt = QDate(weeksYear, weeksMonth - 1, fromDay);
            }
            if (dt.dayOfWeek() == Qt::Monday)
                return dt;
        }
    }
    return QDate();
}

QString wtimport::resultText()
{
    if (firstDate.isEmpty() || validDates < 1) {
        return QObject::tr("Nothing imported (no dates recognized)");
    } else if (validDates == 1) {
        return QObject::tr("Imported week of %1", "Import schedule").arg(firstDate);
    } else
        return QObject::tr("Imported %1 week(s) from %2 thru %3", "Import schedule", validDates).arg(QVariant(validDates).toString(), firstDate, lastDate);
}

void wtimport::exportAssistFiles()
{
    QFile *file(nullptr);
    QTextStream *stream(nullptr);
    sql_item param;
    param.insert(":yr", year - 1);
    sql_items data = sql->selectSql("select * from wt_article_dates where yr >= :yr order by yr, article", &param);
    int lastYr(0);
    for (sql_item row : data) {
        int yr = row.value("yr").toInt();
        QDate dt(row.value("dt").toDate());
        if (lastYr != yr) {
            if (file) {
                delete stream;
                file->close();
                delete file;
            }
            QString assist(sql->databasepath.mid(0, sql->databasepath.lastIndexOf('/')) + "/wtassist" + QString("%1").arg(yr, 4, 10, QLatin1Char('0')) + ".txt");
            file = new QFile(assist);
            if (!file->open(QIODevice::ReadWrite)) {
                QMessageBox::information(0, "TheocBase", "Could not create " + assist);
                delete file;
                return;
            }
            stream = new QTextStream(file);
        }
        (*stream) << yr << '\t' << row.value("article").toString() << '\t' << dt.toString("yyyy-MM-dd") << "\n";
        lastYr = yr;
    }

    if (file) {
        delete stream;
        file->close();
        delete file;
        QMessageBox::information(0, "TheocBase", "lmmassist files created created");
    }
}
