/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.eval.metadata;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.eval.langid.Language;
import org.apache.tika.eval.langid.LanguageIDWrapper;
import org.apache.tika.eval.textstats.BasicTokenCountStatsCalculator;
import org.apache.tika.eval.textstats.CommonTokens;
import org.apache.tika.eval.textstats.CompositeTextStatsCalculator;
import org.apache.tika.eval.textstats.TextStatsCalculator;
import org.apache.tika.eval.tokens.CommonTokenResult;
import org.apache.tika.eval.tokens.TokenCounts;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.filter.MetadataFilter;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;

public class TikaEvalMetadataFilter
implements MetadataFilter {
    public static String TIKA_EVAL_NS = "tika-eval:";
    public static Property NUM_TOKENS = Property.externalInteger(TIKA_EVAL_NS + "numTokens");
    public static Property NUM_UNIQUE_TOKENS = Property.externalInteger(TIKA_EVAL_NS + "numUniqueTokens");
    public static Property NUM_ALPHA_TOKENS = Property.externalInteger(TIKA_EVAL_NS + "numAlphaTokens");
    public static Property NUM_UNIQUE_ALPHA_TOKENS = Property.externalInteger(TIKA_EVAL_NS + "numUniqueAlphaTokens");
    public static Property LANGUAGE = Property.externalText(TIKA_EVAL_NS + "lang");
    public static Property LANGUAGE_CONFIDENCE = Property.externalReal(TIKA_EVAL_NS + "langConfidence");
    public static Property OUT_OF_VOCABULARY = Property.externalReal(TIKA_EVAL_NS + "oov");
    static CompositeTextStatsCalculator TEXT_STATS_CALCULATOR;

    @Override
    public void filter(Metadata metadata) throws TikaException {
        String content = metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT);
        if (StringUtils.isAllBlank(content)) {
            return;
        }
        this.calcStats(content, metadata);
    }

    private void calcStats(String content, Metadata metadata) {
        Map<Class, Object> results = TEXT_STATS_CALCULATOR.calculate(content);
        TokenCounts tokenCounts = (TokenCounts)results.get(BasicTokenCountStatsCalculator.class);
        metadata.set(NUM_TOKENS, tokenCounts.getTotalTokens());
        metadata.set(NUM_UNIQUE_TOKENS, tokenCounts.getTotalUniqueTokens());
        CommonTokenResult commonTokenResult = (CommonTokenResult)results.get(CommonTokens.class);
        metadata.set(NUM_ALPHA_TOKENS, commonTokenResult.getAlphabeticTokens());
        metadata.set(NUM_UNIQUE_ALPHA_TOKENS, commonTokenResult.getUniqueAlphabeticTokens());
        if (commonTokenResult.getAlphabeticTokens() > 0) {
            metadata.set(OUT_OF_VOCABULARY, commonTokenResult.getOOV());
        } else {
            metadata.set(OUT_OF_VOCABULARY, -1.0);
        }
        List probabilities = (List)results.get(LanguageIDWrapper.class);
        if (probabilities.size() > 0) {
            metadata.set(LANGUAGE, ((Language)probabilities.get(0)).getLanguage());
            metadata.set(LANGUAGE_CONFIDENCE, ((Language)probabilities.get(0)).getConfidence());
        }
    }

    static {
        ArrayList<TextStatsCalculator> calcs = new ArrayList<TextStatsCalculator>();
        calcs.add(new BasicTokenCountStatsCalculator());
        calcs.add(new CommonTokens());
        TEXT_STATS_CALCULATOR = new CompositeTextStatsCalculator(calcs);
    }
}

