/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.util.treebank;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.cleartk.util.treebank.TopTreebankNode;
import org.cleartk.util.treebank.TreebankNode;

public class TreebankFormatParser {
    public static final String LEAF_NODE_REGEX = "\\(([^( )]+) ([^( )]+)\\s*\\)";
    private static Pattern leafNodePattern = Pattern.compile("\\(([^( )]+) ([^( )]+)\\s*\\)");
    public static final String TYPE_REGEX = "^\\(([^() ]+)";
    private static Pattern typePattern = Pattern.compile("^\\(([^() ]+)");
    public static final String cleanUPRegex1 = "\\s+";
    private static final Pattern cleanUpPattern1 = Pattern.compile("\\s+", 8);
    public static final String cleanUPRegex2 = "\\( \\(";
    private static final Pattern cleanUpPattern2 = Pattern.compile("\\( \\(", 8);
    public static final String cleanUPRegex3 = "\\) \\)";
    private static final Pattern cleanUpPattern3 = Pattern.compile("\\) \\)", 8);
    public static final String cleanUPRegex4 = "\\s*\\(\\s*\\(";
    private static final Pattern cleanUpPattern4 = Pattern.compile("\\s*\\(\\s*\\(", 8);
    private static final Pattern nonwhiteSpaceCharPattern = Pattern.compile("[^\\s]");

    public static TreebankNode getLeafNode(String parseFragment) {
        Matcher leafNodeMatcher = leafNodePattern.matcher(parseFragment);
        if (leafNodeMatcher.matches()) {
            String type = leafNodeMatcher.group(1);
            String value = leafNodeMatcher.group(2);
            TreebankNode node = new TreebankNode();
            node.setType(TreebankFormatParser.getTypeFromType(type));
            node.setTags(TreebankFormatParser.getTagsFromType(type));
            node.setValue(value);
            node.setLeaf(true);
            String token = TreebankFormatParser.getToken(node.getValue(), node.getType());
            node.setText(token);
            return node;
        }
        return null;
    }

    private static String getTypeFromType(String fullType) {
        if (fullType.startsWith("-")) {
            return fullType.substring(0, fullType.indexOf(45, 1) + 1);
        }
        return fullType.split("[-=]")[0];
    }

    private static String[] getTagsFromType(String fullType) {
        if (fullType.startsWith("-")) {
            String rest = fullType.substring(fullType.indexOf(45, 1) + 1);
            if (rest.length() > 0) {
                return rest.split("[-=]");
            }
            return new String[0];
        }
        String[] parts = fullType.split("[-=]");
        String[] tags = new String[parts.length - 1];
        for (int i = 1; i < parts.length; ++i) {
            tags[i - 1] = parts[i];
        }
        return tags;
    }

    public static String getType(String parseFragment) {
        Matcher typeMatcher = typePattern.matcher(parseFragment);
        if (typeMatcher.find()) {
            return typeMatcher.group(1);
        }
        return null;
    }

    public static String prepareString(String parse) {
        parse = cleanUpPattern1.matcher(parse).replaceAll(" ");
        parse = cleanUpPattern2.matcher(parse).replaceAll("((");
        parse = cleanUpPattern3.matcher(parse).replaceAll("))");
        parse = cleanUpPattern4.matcher(parse).replaceFirst("(TOP (");
        return parse.trim();
    }

    public static String inferPlainText(String treebankText) {
        StringBuilder sb = new StringBuilder();
        for (String parse : TreebankFormatParser.splitSentences(treebankText)) {
            Matcher matcher = leafNodePattern.matcher(parse);
            while (matcher.find()) {
                TreebankNode node = TreebankFormatParser.getLeafNode(matcher.group());
                if (node.getText() == null || node.getText().length() <= 0) continue;
                int lastIndex = sb.length() - 1;
                if (lastIndex > 0 && !TreebankFormatParser.needsSpaceBefore(node.getText()) && sb.charAt(lastIndex) == ' ') {
                    sb.deleteCharAt(lastIndex);
                }
                sb.append(node.getText());
                if (!TreebankFormatParser.needsSpaceAfter(node.getText())) continue;
                sb.append(" ");
            }
            int lastIndex = sb.length() - 1;
            if (lastIndex >= 0 && sb.charAt(lastIndex) == ' ') {
                sb.deleteCharAt(lastIndex);
            }
            sb.append('\n');
        }
        return sb.toString().trim();
    }

    private static boolean needsSpaceBefore(String tokenText) {
        String[] noSpaceTokens;
        for (String noSpaceToken : noSpaceTokens = new String[]{".", ",", ":", ";", "?", "'s", "'t", "\"", "!", ")", "]"}) {
            if (!tokenText.equals(noSpaceToken)) continue;
            return false;
        }
        return true;
    }

    private static boolean needsSpaceAfter(String tokenText) {
        String[] noSpaceTokens;
        for (String noSpaceToken : noSpaceTokens = new String[]{"\"", "(", "["}) {
            if (!tokenText.equals(noSpaceToken)) continue;
            return false;
        }
        return true;
    }

    public static TopTreebankNode parse(String parse) {
        parse = TreebankFormatParser.prepareString(parse);
        String plainText = TreebankFormatParser.inferPlainText(parse).trim();
        return TreebankFormatParser.parse(parse, plainText, 0);
    }

    private static void checkText(TreebankNode node, String text) {
        int end;
        int start;
        String text2;
        String text1 = node.getText();
        if (!text1.equals(text2 = text.substring(start = node.getTextBegin(), end = node.getTextEnd()))) {
            String prefix1 = text1.substring(0, text1.length() - 1);
            String prefix2 = text2.substring(0, text2.length() - 1);
            if (text1.endsWith(".") && prefix1.equals(prefix2)) {
                node.setTextEnd(node.getTextEnd() - 1);
            } else {
                throw new IllegalArgumentException("plain text does not align with tokens in treebank parse.  node text = '" + text1 + "'  plain text = '" + text2 + "'");
            }
        }
    }

    public static TopTreebankNode parse(String parse, String text, int textOffset) {
        try {
            TopTreebankNode topNode = new TopTreebankNode();
            parse = TreebankFormatParser.prepareString(parse);
            StringBuffer consumedText = new StringBuffer();
            if (text != null) {
                textOffset = TreebankFormatParser.movePastWhiteSpaceChars(text, textOffset);
                consumedText.append(text.substring(0, textOffset));
            }
            Stack<Integer> parseOffsetStack = new Stack<Integer>();
            Stack<Integer> plainTextOffsetStack = new Stack<Integer>();
            Stack<TreebankNode> parseStack = new Stack<TreebankNode>();
            for (int ci = 0; ci < parse.length(); ++ci) {
                int realBegin;
                char c = parse.charAt(ci);
                if (c == '(') {
                    parseOffsetStack.push(ci);
                    plainTextOffsetStack.push(consumedText.length());
                    continue;
                }
                if (c != ')') continue;
                int begin = (Integer)parseOffsetStack.pop();
                int end = ci;
                String subParse = parse.substring(begin, end + 1);
                int textBegin = (Integer)plainTextOffsetStack.pop();
                TreebankNode node = TreebankFormatParser.getLeafNode(subParse);
                if (node != null) {
                    node.setTopNode(topNode);
                    node.setParseBegin(begin);
                    node.setParseEnd(end + 1);
                    String token = node.getText();
                    if (token.length() > 0) {
                        realBegin = TreebankFormatParser.movePastWhiteSpaceChars(text, textBegin);
                        consumedText.append(text.substring(textBegin, realBegin));
                        consumedText.append(token);
                        node.setTextBegin(realBegin);
                        node.setTextEnd(realBegin + token.length());
                    } else {
                        node.setTextBegin(textBegin);
                        node.setTextEnd(textBegin + token.length());
                    }
                    TreebankFormatParser.checkText(node, text);
                    parseStack.push(node);
                    continue;
                }
                node = parse.lastIndexOf(41) == ci ? topNode : new TreebankNode();
                node.setTopNode(topNode);
                node.setParseBegin(begin);
                node.setParseEnd(end + 1);
                String type = TreebankFormatParser.getType(subParse);
                node.setType(TreebankFormatParser.getTypeFromType(type));
                node.setTags(TreebankFormatParser.getTagsFromType(type));
                node.setLeaf(false);
                while (parseStack.size() > 0 && ((TreebankNode)parseStack.peek()).getParseBegin() > node.getParseBegin()) {
                    TreebankNode child = (TreebankNode)parseStack.pop();
                    node.addChild(child);
                    child.setParent(node);
                }
                realBegin = TreebankFormatParser.movePastWhiteSpaceChars(text, textBegin);
                node.setTextBegin(realBegin);
                node.setTextEnd(Math.max(realBegin, consumedText.length()));
                try {
                    node.setText(consumedText.substring(node.getTextBegin(), node.getTextEnd()));
                }
                catch (StringIndexOutOfBoundsException sioobe) {
                    node.setText("");
                }
                TreebankFormatParser.checkText(node, text);
                parseStack.push(node);
            }
            topNode.setTreebankParse(parse);
            topNode.initTerminalNodes();
            return topNode;
        }
        catch (RuntimeException e) {
            throw new IllegalArgumentException("exception thrown when parsing the following: " + parse, e);
        }
    }

    public static int movePastWhiteSpaceChars(String text, int textOffset) {
        Matcher matcher = nonwhiteSpaceCharPattern.matcher(text);
        if (matcher.find(textOffset)) {
            return matcher.start();
        }
        return textOffset;
    }

    private static String getToken(String value, String type) {
        value = value.replace("-RCB-", "}");
        value = value.replace("-LCB-", "{");
        value = value.replace("-RRB-", ")");
        value = value.replace("-LRB-", "(");
        value = value.replace("-RSB-", "]");
        value = value.replace("-LSB-", "[");
        value = value.replace("``", "\"");
        value = value.replace("''", "\"");
        if (type.equals("-NONE-")) {
            return "";
        }
        if (value.contains("\\/")) {
            return value.replace("\\/", "/");
        }
        return value;
    }

    public static String[] splitSentences(String mrgContents) {
        String[] lines;
        String[] contents = mrgContents.split("(?=\\(\\s*\\()");
        if (contents.length > 1) {
            if (contents.length > 0 && contents[0].trim().equals("")) {
                String[] returnValues = new String[contents.length - 1];
                System.arraycopy(contents, 1, returnValues, 0, returnValues.length);
                return returnValues;
            }
            String[] returnValues = new String[contents.length];
            System.arraycopy(contents, 0, returnValues, 0, returnValues.length);
            return returnValues;
        }
        for (String line : lines = mrgContents.split("\r?\n")) {
            if (TreebankFormatParser.parensMatch(line)) continue;
            throw new IllegalArgumentException("Parentheses counts do not match for treebank sentence: " + line);
        }
        return lines;
    }

    public static boolean parensMatch(String contents) {
        int leftParenCount = 0;
        int rightParenCount = 0;
        for (char c : contents.toCharArray()) {
            if (c == '(') {
                ++leftParenCount;
            }
            if (c != ')') continue;
            ++rightParenCount;
        }
        return leftParenCount == rightParenCount;
    }

    public static List<TopTreebankNode> parseDocument(String parse, int textOffset, String text) {
        String[] sentenceParses;
        ArrayList<TopTreebankNode> returnValues = new ArrayList<TopTreebankNode>();
        for (String sentenceParse : sentenceParses = TreebankFormatParser.splitSentences(parse)) {
            TopTreebankNode topNode = TreebankFormatParser.parse(sentenceParse, text, textOffset);
            textOffset = topNode.getTextEnd();
            returnValues.add(topNode);
        }
        return returnValues;
    }
}

