/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.coreference.cc;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Scanner;
import libsvm.svm_node;
import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
import org.apache.ctakes.constituency.parser.util.TreeUtils;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.coreference.type.BooleanLabeledFS;
import org.apache.ctakes.coreference.type.DemMarkable;
import org.apache.ctakes.coreference.type.Markable;
import org.apache.ctakes.coreference.type.MarkablePairSet;
import org.apache.ctakes.coreference.type.NEMarkable;
import org.apache.ctakes.coreference.util.FSIteratorToList;
import org.apache.ctakes.coreference.util.GoldStandardLabeler;
import org.apache.ctakes.coreference.util.MarkableTreeUtils;
import org.apache.ctakes.coreference.util.PairAttributeCalculator;
import org.apache.ctakes.coreference.util.SvmVectorCreator;
import org.apache.ctakes.relationextractor.eval.XMIReader;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.utils.tree.SimpleTree;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSList;
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.apache.uima.jcas.tcas.Annotation;

@PipeBitInfo(name="ODIE Vector File Writer", description="Write ODIE Vector File.", role=PipeBitInfo.Role.WRITER, dependencies={PipeBitInfo.TypeProduct.DOCUMENT_ID, PipeBitInfo.TypeProduct.MARKABLE})
public class ODIEVectorFileWriter
extends JCasAnnotator_ImplBase {
    private Logger log = Logger.getLogger(((Object)((Object)this)).getClass());
    private String outputDir = null;
    private String goldStandardDir = null;
    private PrintWriter neOut = null;
    private PrintWriter pronOut = null;
    private PrintWriter demOut = null;
    private PrintWriter neTreeOut = null;
    private PrintWriter pronTreeOut = null;
    private PrintWriter demTreeOut = null;
    private PrintWriter debug = null;
    private boolean initialized = false;
    private int posNeInst = 0;
    private int negNeInst = 0;
    private int posDemInst = 0;
    private int negDemInst = 0;
    private int posPronInst = 0;
    private int negPronInst = 0;
    private int posAnaphInst = 0;
    private int negAnaphInst = 0;
    private PairAttributeCalculator attr = null;
    private HashSet<String> stopwords;
    private ArrayList<String> treeFrags;
    private SvmVectorCreator vecCreator = null;
    private GoldStandardLabeler labeler = null;
    private boolean printVectors;
    private boolean printTrees;
    private boolean useFrags = true;
    public static final String PARAM_OUTPUT_DIR = "outputDir";
    public static final String PARAM_GOLD_DIR = "goldStandardDir";
    public static final String PARAM_VECTORS = "writeVectors";
    public static final String PARAM_TREES = "writeTrees";
    public static final String PARAM_FRAGS = "treeFrags";
    public static final String PARAM_STOPS = "stopWords";

    public void initialize(UimaContext aContext) {
        this.outputDir = (String)aContext.getConfigParameterValue(PARAM_OUTPUT_DIR);
        this.goldStandardDir = (String)aContext.getConfigParameterValue(PARAM_GOLD_DIR);
        this.printVectors = (Boolean)aContext.getConfigParameterValue(PARAM_VECTORS);
        this.printTrees = (Boolean)aContext.getConfigParameterValue(PARAM_TREES);
        try {
            String l;
            File neDir = new File(this.outputDir + "/" + "ne" + "/vectors/");
            neDir.mkdirs();
            File proDir = new File(this.outputDir + "/" + "pronoun" + "/vectors/");
            proDir.mkdirs();
            File demDir = new File(this.outputDir + "/" + "dem" + "/vectors/");
            demDir.mkdirs();
            if (this.printTrees) {
                this.neTreeOut = new PrintWriter(this.outputDir + "/" + "ne" + "/trees.txt");
                this.demTreeOut = new PrintWriter(this.outputDir + "/" + "dem" + "/trees.txt");
                this.pronTreeOut = new PrintWriter(this.outputDir + "/" + "pronoun" + "/trees.txt");
                this.debug = new PrintWriter((Writer)new PrintWriter(this.outputDir + "/" + "ne" + "/fulltrees_debug.txt"), true);
            }
            this.stopwords = new HashSet();
            File stopFile = FileLocator.locateFile((String)((String)aContext.getConfigParameterValue(PARAM_STOPS)));
            BufferedReader br = new BufferedReader(new FileReader(stopFile));
            while ((l = br.readLine()) != null) {
                if ((l = l.trim()).length() == 0) continue;
                int i = l.indexOf(124);
                if (i > 0) {
                    this.stopwords.add(l.substring(0, i).trim());
                    continue;
                }
                if (i >= 0) continue;
                this.stopwords.add(l.trim());
            }
            this.vecCreator = new SvmVectorCreator(this.stopwords);
            File fragFile = FileLocator.locateFile((String)((String)aContext.getConfigParameterValue(PARAM_FRAGS)));
            Scanner scanner = new Scanner(fragFile);
            if (this.useFrags) {
                this.treeFrags = new ArrayList();
                while (scanner.hasNextLine()) {
                    String line = scanner.nextLine();
                    this.treeFrags.add(line.split(" ")[1]);
                }
                this.vecCreator.setFrags(this.treeFrags);
            }
            this.initialized = true;
        }
        catch (Exception e) {
            System.err.println("Error initializing file writers.");
        }
    }

    public void process(JCas jcas) {
        if (!this.initialized) {
            return;
        }
        String docId = DocumentIDAnnotationUtil.getDocumentID((JCas)jcas);
        if ((docId = docId.substring(docId.lastIndexOf(47) + 1, docId.length())) == null) {
            docId = "141471681_1";
        }
        System.out.println("creating vectors for " + docId);
        int numPos = 0;
        FSIterator markIter = jcas.getAnnotationIndex(Markable.type).iterator();
        LinkedList<Annotation> lm = FSIteratorToList.convert(markIter);
        this.labeler = new GoldStandardLabeler(this.goldStandardDir, docId, lm);
        FSIterator iter = null;
        if (this.printVectors) {
            try {
                this.neOut = new PrintWriter(this.outputDir + "/" + "ne" + "/vectors/" + docId + ".libsvm");
                this.demOut = new PrintWriter(this.outputDir + "/" + "dem" + "/vectors/" + docId + ".libsvm");
                this.pronOut = new PrintWriter(this.outputDir + "/" + "pronoun" + "/vectors/" + docId + ".libsvm");
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
        iter = jcas.getJFSIndexRepository().getAllIndexedFS(MarkablePairSet.type);
        while (iter.hasNext()) {
            MarkablePairSet pair = (MarkablePairSet)((Object)iter.next());
            Markable anaphor = pair.getAnaphor();
            String corefType = anaphor instanceof NEMarkable ? "ne" : (anaphor instanceof DemMarkable ? "dem" : "pronoun");
            FSList pairList = pair.getAntecedentList();
            while (pairList instanceof NonEmptyFSList) {
                NonEmptyFSList node = (NonEmptyFSList)pairList;
                BooleanLabeledFS labeledProb = (BooleanLabeledFS)node.getHead();
                int label = labeledProb.getLabel() ? 1 : 0;
                Markable antecedent = (Markable)labeledProb.getFeature();
                int n = label = this.labeler.isGoldPair(anaphor, antecedent) ? 1 : 0;
                if (label == 1) {
                    ++numPos;
                    if (corefType.equals("ne")) {
                        ++this.posNeInst;
                    } else if (corefType.equals("dem")) {
                        ++this.posDemInst;
                    } else if (corefType.equals("pronoun")) {
                        ++this.posPronInst;
                    }
                } else if (label == 0) {
                    if (corefType.equals("ne")) {
                        ++this.negNeInst;
                    } else if (corefType.equals("dem")) {
                        ++this.negDemInst;
                    } else if (corefType.equals("pronoun")) {
                        ++this.negPronInst;
                    }
                }
                if (this.printVectors) {
                    svm_node[] nodes = this.vecCreator.getNodeFeatures(anaphor, antecedent, jcas);
                    PrintWriter writer = null;
                    if (corefType.equals("ne")) {
                        writer = this.neOut;
                    } else if (corefType.equals("pronoun")) {
                        writer = this.pronOut;
                    } else if (corefType.equals("dem")) {
                        writer = this.demOut;
                    }
                    writer.print(label);
                    for (svm_node inst : nodes) {
                        writer.print(" ");
                        writer.print(inst.index);
                        writer.print(":");
                        writer.print(inst.value);
                    }
                    writer.println();
                    writer.flush();
                }
                if (this.printTrees) {
                    TreebankNode antecedentNode = MarkableTreeUtils.markableNode(jcas, antecedent.getBegin(), antecedent.getEnd());
                    TreebankNode anaphorNode = MarkableTreeUtils.markableNode(jcas, anaphor.getBegin(), anaphor.getEnd());
                    this.debug.println(TreeUtils.tree2str((TreebankNode)antecedentNode));
                    this.debug.println(TreeUtils.tree2str((TreebankNode)anaphorNode));
                    SimpleTree pathTree = TreeExtractor.extractPathTree((TreebankNode)antecedentNode, (TreebankNode)anaphorNode);
                    SimpleTree petTree = TreeExtractor.extractPathEnclosedTree((TreebankNode)antecedentNode, (TreebankNode)anaphorNode, (JCas)jcas);
                    String treeStr = pathTree.toString();
                    PrintWriter writer = null;
                    if (corefType.equals("ne")) {
                        writer = this.neTreeOut;
                    } else if (corefType.equals("pronoun")) {
                        writer = this.pronTreeOut;
                    } else if (corefType.equals("dem")) {
                        writer = this.demTreeOut;
                    }
                    writer.print(label == 1 ? "+1" : "-1");
                    writer.print(" |BT| ");
                    writer.print(treeStr.replaceAll("\\) \\(", ")("));
                    writer.println(" |ET|");
                }
                pairList = node.getTail();
            }
        }
        if (this.printVectors) {
            this.neOut.close();
            this.demOut.close();
            this.pronOut.close();
        }
    }

    private int getLabel(String nodeStr) {
        return Integer.parseInt(nodeStr.substring(0, 1));
    }

    public void batchProcessComplete() throws AnalysisEngineProcessException {
        super.batchProcessComplete();
        if (!this.initialized) {
            return;
        }
        if (this.printVectors) {
            this.neOut.close();
            this.demOut.close();
            this.pronOut.close();
        }
        if (this.printTrees) {
            this.neTreeOut.flush();
            this.neTreeOut.close();
            this.demTreeOut.flush();
            this.demTreeOut.close();
            this.pronTreeOut.flush();
            this.pronTreeOut.close();
        }
    }

    private double[] listToDoubleArray(ArrayList<Integer> list) {
        double[] array = new double[list.size()];
        for (int i = 0; i < list.size(); ++i) {
            array[i] = list.get(i).intValue();
        }
        return array;
    }

    public static void main(String[] args) {
        File xmiDir;
        if (args.length < 3) {
            System.err.println("Arguments: <training directory> <gold-pairs directory> <output directory>");
            System.exit(-1);
        }
        if (!(xmiDir = new File(args[0])).isDirectory()) {
            System.err.println("Arg1 should be a directory! (full of xmi files)");
            System.exit(-1);
        }
        File[] files = xmiDir.listFiles();
        String[] paths = new String[files.length];
        for (int i = 0; i < files.length; ++i) {
            paths[i] = files[i].getAbsolutePath();
        }
        try {
            CollectionReader xmiReader = CollectionReaderFactory.createReader(XMIReader.class, (Object[])new Object[]{"files", paths});
            AnalysisEngine consumer = AnalysisEngineFactory.createEngine(ODIEVectorFileWriter.class, (Object[])new Object[]{PARAM_VECTORS, true, PARAM_TREES, false, PARAM_STOPS, "org/apache/ctakes/coreference/models/stop.txt", PARAM_FRAGS, "org/apache/ctakes/coreference/models/frags.txt", PARAM_GOLD_DIR, args[1], PARAM_OUTPUT_DIR, args[2]});
            SimplePipeline.runPipeline((CollectionReader)xmiReader, (AnalysisEngine[])new AnalysisEngine[]{consumer});
        }
        catch (Exception e) {
            System.err.println("Exception thrown!");
            e.printStackTrace();
        }
    }
}

