/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.dictionary.lookup2.dictionary;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.ctakes.core.util.collection.ArrayListMap;
import org.apache.ctakes.core.util.collection.CollectionMap;
import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
import org.apache.ctakes.dictionary.lookup2.util.CuiCodeUtil;
import org.apache.ctakes.dictionary.lookup2.util.LookupUtil;
import org.apache.log4j.Logger;

public final class RareWordTermMapCreator {
    private static final Logger LOGGER = Logger.getLogger((String)"RareWordTermMapCreator");
    private static final Collection<String> PREFIXES = new HashSet<String>(Arrays.asList("e-", "a-", "u-", "x-", "agro-", "ante-", "anti-", "arch-", "be-", "bi-", "bio-", "co-", "counter-", "cross-", "cyber-", "de-", "eco-", "ex-", "extra-", "inter-", "intra-", "macro-", "mega-", "micro-", "mid-", "mini-", "multi-", "neo-", "non-", "over-", "pan-", "para-", "peri-", "post-", "pre-", "pro-", "pseudo-", "quasi-", "re-", "semi-", "sub-", "super-", "tri-", "ultra-", "un-", "uni-", "vice-", "electro-", "gasto-", "homo-", "hetero-", "ortho-", "phospho-"));
    private static final Collection<String> SUFFIXES = new HashSet<String>(Arrays.asList("-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most", "-o-torium", "-rama", "-wise"));
    private static final Collection<String> BAD_POS_TERMS = new HashSet<String>(Arrays.asList("zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "and", "or", "but", "for", "nor", "so", "yet", "this", "that", "these", "those", "the", "there", "can", "should", "will", "may", "might", "must", "could", "would", "some", "any", "all", "both", "half", "none", "twice", "at", "before", "after", "behind", "beneath", "beside", "between", "into", "through", "across", "of", "concerning", "like", "except", "with", "without", "toward", "to", "past", "against", "during", "until", "throughout", "below", "besides", "beyond", "from", "inside", "near", "outside", "since", "upon", "my", "our", "i", "you", "he", "she", "it", "mine", "yours", "his", "hers", "its", "ours", "theirs", "about", "off", "up", "along", "away", "back", "by", "down", "forward", "in", "on", "out", "over", "around", "under", "to", "what", "whatever", "which", "whichever", "who", "whom", "which", "that", "whoever", "whomever", "how", "where", "when", "however", "wherever", "whenever"));

    private RareWordTermMapCreator() {
    }

    public static CollectionMap<String, RareWordTerm, List<RareWordTerm>> createRareWordTermMap(Iterable<CuiTerm> cuiTerms) {
        ArrayListMap rareWordTermMap = new ArrayListMap();
        Map<String, Integer> tokenCountMap = RareWordTermMapCreator.createTokenCountMap(cuiTerms);
        for (CuiTerm cuiTerm : cuiTerms) {
            String term = cuiTerm.getTerm();
            String rareWord = RareWordTermMapCreator.getRareWord(term, tokenCountMap);
            int wordIndex = RareWordTermMapCreator.getWordIndex(term, rareWord);
            int tokenCount = RareWordTermMapCreator.getTokenCount(term);
            if (wordIndex < 0) {
                LOGGER.warn((Object)("Bad Rare Word Index for " + rareWord + " in " + term));
                continue;
            }
            RareWordTerm rareWordTerm = new RareWordTerm(term, cuiTerm.__cui, rareWord, wordIndex, tokenCount);
            rareWordTermMap.placeValue((Object)rareWord, (Object)rareWordTerm);
        }
        return rareWordTermMap;
    }

    private static Map<String, Integer> createTokenCountMap(Iterable<CuiTerm> cuiTerms) {
        HashMap<String, Integer> tokenCountMap = new HashMap<String, Integer>();
        for (CuiTerm cuiTerm : cuiTerms) {
            String[] tokens;
            for (String token : tokens = LookupUtil.fastSplit(cuiTerm.getTerm(), ' ')) {
                if (!RareWordTermMapCreator.isRarableToken(token)) continue;
                Integer count = (Integer)tokenCountMap.get(token);
                if (count == null) {
                    count = 0;
                }
                tokenCountMap.put(token, count + 1);
            }
        }
        return tokenCountMap;
    }

    private static String getRareWord(String tokenizedTerm, Map<String, Integer> tokenCountMap) {
        String[] tokens = LookupUtil.fastSplit(tokenizedTerm, ' ');
        if (tokens.length == 1) {
            return tokens[0];
        }
        String bestWord = tokens[0];
        int bestCount = Integer.MAX_VALUE;
        for (String token : tokens) {
            Integer count;
            if (!RareWordTermMapCreator.isRarableToken(token) || (count = tokenCountMap.get(token)) == null || count >= bestCount) continue;
            bestWord = token;
            bestCount = count;
        }
        return bestWord;
    }

    private static boolean isRarableToken(String token) {
        if (token.length() <= 1) {
            return false;
        }
        boolean hasLetter = false;
        for (int i = 0; i < token.length(); ++i) {
            if (!Character.isLetter(token.charAt(i))) continue;
            hasLetter = true;
            break;
        }
        if (!hasLetter) {
            return false;
        }
        return !BAD_POS_TERMS.contains(token);
    }

    private static int getWordIndex(String tokenizedTerm, String word) {
        String[] tokens;
        int index = 0;
        for (String token : tokens = LookupUtil.fastSplit(tokenizedTerm, ' ')) {
            if (token.equals(word)) {
                return index;
            }
            ++index;
        }
        return -1;
    }

    private static int getTokenCount(String tokenizedTerm) {
        return LookupUtil.fastSplit(tokenizedTerm, ' ').length;
    }

    private static String getTokenizedTerm(String term) {
        if (term.isEmpty()) {
            return term;
        }
        String[] splits = term.split("\\s+");
        if (splits.length == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (String split : splits) {
            List<String> tokens = RareWordTermMapCreator.getTokens(split);
            for (String token : tokens) {
                sb.append(token).append(" ");
            }
        }
        sb.setLength(Math.max(0, sb.length() - 1));
        return sb.toString();
    }

    private static List<String> getTokens(String word) {
        ArrayList<String> tokens = new ArrayList<String>();
        StringBuilder sb = new StringBuilder();
        int count = word.length();
        for (int i = 0; i < count; ++i) {
            char c = word.charAt(i);
            if (Character.isLetterOrDigit(c)) {
                sb.append(c);
                continue;
            }
            if (c == '-' && (RareWordTermMapCreator.isPrefix(sb.toString()) || RareWordTermMapCreator.isSuffix(word, i + 1))) {
                sb.append(c);
                continue;
            }
            if (c == '\'' && RareWordTermMapCreator.isOwnerApostrophe(word, i + 1) || c == '.' && RareWordTermMapCreator.isNumberDecimal(word, i + 1)) {
                if (sb.length() != 0) {
                    tokens.add(sb.toString());
                    sb.setLength(0);
                }
                sb.append(c);
                continue;
            }
            if (sb.length() != 0) {
                tokens.add(sb.toString());
                sb.setLength(0);
            }
            tokens.add("" + c);
        }
        if (sb.length() != 0) {
            tokens.add(sb.toString());
        }
        return tokens;
    }

    private static boolean isPrefix(String word) {
        return PREFIXES.contains(word + "-");
    }

    private static boolean isSuffix(String word, int startIndex) {
        if (word.length() <= startIndex) {
            return false;
        }
        String nextCharTerm = RareWordTermMapCreator.getNextCharTerm(word.substring(startIndex));
        if (nextCharTerm.isEmpty()) {
            return false;
        }
        return SUFFIXES.contains("-" + nextCharTerm);
    }

    private static boolean isOwnerApostrophe(CharSequence word, int startIndex) {
        return word.length() == startIndex + 1 && word.charAt(startIndex) == 's';
    }

    private static boolean isNumberDecimal(CharSequence word, int startIndex) {
        return word.length() == startIndex + 1 && Character.isDigit(word.charAt(startIndex));
    }

    private static String getNextCharTerm(String word) {
        StringBuilder sb = new StringBuilder();
        int count = word.length();
        for (int i = 0; i < count; ++i) {
            char c = word.charAt(i);
            if (!Character.isLetterOrDigit(c)) {
                return sb.toString();
            }
            sb.append(c);
        }
        return sb.toString();
    }

    public static class CuiTerm {
        private final String __term;
        private final Long __cui;
        private final int __hashcode;

        public CuiTerm(String cui, String term) {
            this.__term = RareWordTermMapCreator.getTokenizedTerm(term);
            this.__cui = CuiCodeUtil.getInstance().getCuiCode(cui);
            this.__hashcode = (this.__cui + "_" + this.__term).hashCode();
        }

        public Long getCui() {
            return this.__cui;
        }

        public String getTerm() {
            return this.__term;
        }

        public boolean equals(Object value) {
            return value instanceof CuiTerm && this.__term.equals(((CuiTerm)value).__term) && this.__cui.equals(((CuiTerm)value).__cui);
        }

        public int hashCode() {
            return this.__hashcode;
        }
    }
}

