/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.eval.textstats;

import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.util.FastMath;
import org.apache.tika.eval.langid.Language;
import org.apache.tika.eval.textstats.LanguageAwareTokenCountStats;
import org.apache.tika.eval.tokens.CommonTokenCountManager;
import org.apache.tika.eval.tokens.LangModel;
import org.apache.tika.eval.tokens.TokenCounts;

public class CommonTokensKLDivergence
implements LanguageAwareTokenCountStats<Double> {
    private final CommonTokenCountManager commonTokenCountManager;

    public CommonTokensKLDivergence(CommonTokenCountManager mgr) {
        this.commonTokenCountManager = mgr;
    }

    @Override
    public Double calculate(List<Language> languages, TokenCounts tokenCounts) {
        Pair<String, LangModel> pair = this.commonTokenCountManager.getLangTokens(languages.get(0).getLanguage());
        LangModel model = pair.getValue();
        double kl = 0.0;
        if (tokenCounts.getTokens().entrySet().size() == 0) {
            return 1.0;
        }
        for (Map.Entry<String, MutableInt> e : tokenCounts.getTokens().entrySet()) {
            double p = (double)e.getValue().intValue() / (double)tokenCounts.getTotalTokens();
            if (p == 0.0) continue;
            double q = model.getProbability(e.getKey());
            kl += p * FastMath.log(q / p);
        }
        return -1.0 * kl;
    }
}

