/*
 * Decompiled with CFR 0.152.
 */
package almaligner;

import almaligner.Main;
import almaligner.SegmentsEvaluation;
import almaligner.Tokenizer;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.poi.xssf.usermodel.XSSFRow;

public class SegmentsEvaluator {
    private static final int MINIMUM_LENGTH = 2;
    private static final int MINIMUM_FREQUENCY = 2;
    private static final double MINIMUM_PROBABILITY = 0.51;
    private static final int MAXIMUM_TOKENS = 5;
    private final boolean computingLexicalWeight;
    private HashMap<String, ArrayList<String>> stopWords;
    private final ArrayList<String> languages;

    public SegmentsEvaluator(ArrayList<String> languages, boolean computingLexicalWeight) {
        this.languages = languages;
        this.loadStopWords();
        this.computingLexicalWeight = computingLexicalWeight;
    }

    public SegmentsEvaluation evaluate(XSSFRow row) {
        ArrayList<String> segments = new ArrayList<String>();
        for (int c = 0; c < this.languages.size(); ++c) {
            segments.add(row.getCell(c).getStringCellValue());
        }
        if (this.hasLowFrequency(row)) {
            return SegmentsEvaluation.LOW_FREQUENCY;
        }
        if (this.hasLowMeanProbability(row)) {
            return SegmentsEvaluation.LOW_PROBABILITY;
        }
        if (this.isTooShort(segments)) {
            return SegmentsEvaluation.TOO_SHORT;
        }
        if (this.containsNumbers(segments)) {
            return SegmentsEvaluation.CONTAINS_NUMBER;
        }
        if (this.containsNonAlhpa(segments)) {
            return SegmentsEvaluation.NON_ALPHA;
        }
        if (this.isTooLong(segments)) {
            return SegmentsEvaluation.TOO_LONG;
        }
        if (this.containsOnlyStopWords(segments)) {
            return SegmentsEvaluation.ONLY_STOPWORDS;
        }
        if (this.endsWithStopword(segments)) {
            return SegmentsEvaluation.ENDS_WITH_STOPWORD;
        }
        return SegmentsEvaluation.ACCEPTABLE;
    }

    private boolean hasLowMeanProbability(XSSFRow row) {
        int multiplier = 1;
        if (this.computingLexicalWeight) {
            multiplier = 2;
        }
        int firstProbColumn = this.languages.size() * multiplier;
        double probSum = 0.0;
        for (int n = 0; n < this.languages.size(); ++n) {
            probSum += row.getCell(n + firstProbColumn).getNumericCellValue();
        }
        return probSum / (double)this.languages.size() < 0.51;
    }

    private boolean hasLowFrequency(XSSFRow row) {
        double frequency = row.getCell(row.getLastCellNum() - 1).getNumericCellValue();
        return frequency < 2.0;
    }

    private boolean isTooShort(ArrayList<String> segments) {
        boolean retval = false;
        for (String segment : segments) {
            if (segment.length() > 2) continue;
            char[] charArray = segment.toCharArray();
            for (int i = 0; i < charArray.length; ++i) {
                if (Character.isUpperCase(charArray[i])) continue;
                retval = true;
            }
        }
        return retval;
    }

    private boolean isTooLong(ArrayList<String> segments) {
        boolean retval = false;
        for (String segment : segments) {
            if (Tokenizer.count(segment) <= 5) continue;
            retval = true;
        }
        return retval;
    }

    private boolean containsNonAlhpa(ArrayList<String> segments) {
        for (String segment : segments) {
            if (segment.startsWith(".")) {
                return true;
            }
            if (segment.startsWith("-")) {
                return true;
            }
            if (segment.endsWith("-")) {
                return true;
            }
            if (segment.startsWith("'")) {
                return true;
            }
            if (segment.endsWith("'")) {
                return true;
            }
            if (segment.startsWith("/")) {
                return true;
            }
            if (segment.endsWith("/")) {
                return true;
            }
            String pattern = ".*[" + Pattern.quote("!\"#$%&()*+,:;<=>?@[]^_`{|}~") + "].*";
            if (!Pattern.matches(pattern, segment)) continue;
            return true;
        }
        return false;
    }

    private boolean containsNumbers(ArrayList<String> segments) {
        boolean retval = false;
        Iterator<String> iterator = segments.iterator();
        while (iterator.hasNext()) {
            String pattern = ".*[0-9].*";
            String segment = iterator.next();
            if (!Pattern.matches(pattern, segment)) continue;
            return true;
        }
        return retval;
    }

    private boolean endsWithStopword(ArrayList<String> segments) {
        boolean retval = false;
        int langIndex = 0;
        for (String segment : segments) {
            ArrayList<String> stopwordList = this.stopWords.get(this.languages.get(langIndex));
            if (stopwordList == null) {
                return false;
            }
            String[] tokens = Tokenizer.tokenize(segment);
            if (stopwordList.contains(tokens[tokens.length - 1].toLowerCase())) {
                return true;
            }
            ++langIndex;
        }
        return retval;
    }

    private boolean containsOnlyStopWords(ArrayList<String> segments) {
        boolean retval = false;
        int langIndex = 0;
        for (String segment : segments) {
            ArrayList<String> stopwordList = this.stopWords.get(this.languages.get(langIndex));
            if (stopwordList == null) {
                return false;
            }
            String[] tokens = Tokenizer.tokenize(segment);
            int stopwordsCount = 0;
            for (String token : tokens) {
                if (!stopwordList.contains(token.toLowerCase())) continue;
                ++stopwordsCount;
            }
            if (stopwordsCount >= tokens.length) {
                return true;
            }
            ++langIndex;
        }
        return retval;
    }

    private void loadStopWords() {
        try {
            File[] listFiles;
            File rootAppPath = new File(Main.class.getProtectionDomain().getCodeSource().getLocation().toURI());
            rootAppPath = rootAppPath.getParentFile().getName().equals("build") ? rootAppPath.getParentFile().getParentFile() : rootAppPath.getParentFile();
            File stopwordsDir = new File(rootAppPath + File.separator + "resources" + File.separator + "stopwords");
            this.stopWords = new HashMap();
            for (File file : listFiles = stopwordsDir.listFiles(new FileFilter(){

                @Override
                public boolean accept(File file) {
                    return !file.isHidden();
                }
            })) {
                ArrayList<String> list;
                try (Scanner s = new Scanner(file);){
                    list = new ArrayList<String>();
                    while (s.hasNext()) {
                        list.add(s.next());
                    }
                }
                String langId = FilenameUtils.getBaseName((String)file.toString());
                this.stopWords.put(langId.toLowerCase(), list);
            }
        }
        catch (FileNotFoundException | URISyntaxException ex) {
            Logger.getLogger(SegmentsEvaluator.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

