/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.corpus.penntreebank;

import com.google.common.annotations.Beta;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.cleartk.corpus.penntreebank.ListSpecification;
import org.cleartk.util.ViewUriUtil;

@SofaCapability(outputSofas={"TREEBANK_VIEW", "UriView"})
public class PennTreebankReader
extends JCasCollectionReader_ImplBase {
    public static final String TREEBANK_VIEW = "TREEBANK_VIEW";
    public static final String PARAM_CORPUS_DIRECTORY_NAME = "corpusDirectoryName";
    private static final String CORPUS_DIRECTORY_DESCRIPTION = "Specifies the location of WSJ/PennTreebank treebank files.  The directory should contain subdirectories corresponding to the sections (e.g. '00', '01', etc.) That is, if a local copy of PennTreebank sits at C:/Data/PTB/wsj/mrg, then the the subdirectory C:/Data/PTB/wsj/mrg/00 should exist. There are 24 sections in PTB corresponding to the directories 00, 01, 02, ... 24. ";
    @ConfigurationParameter(name="corpusDirectoryName", mandatory=true, description="Specifies the location of WSJ/PennTreebank treebank files.  The directory should contain subdirectories corresponding to the sections (e.g. '00', '01', etc.) That is, if a local copy of PennTreebank sits at C:/Data/PTB/wsj/mrg, then the the subdirectory C:/Data/PTB/wsj/mrg/00 should exist. There are 24 sections in PTB corresponding to the directories 00, 01, 02, ... 24. ")
    private String corpusDirectoryName;
    public static final String PARAM_SECTIONS_SPECIFIER = "sectionsSpecifier";
    private static final String SECTIONS_DESCRIPTION = "specifies which sections of PTB to read in.  The required format for values of this parameter allows for comma-separated section numbers and section ranges, for example '02,07-12,16'.";
    @ConfigurationParameter(name="sectionsSpecifier", defaultValue={"00-24"}, description="specifies which sections of PTB to read in.  The required format for values of this parameter allows for comma-separated section numbers and section ranges, for example '02,07-12,16'.")
    private String sectionsSpecifier;
    protected File directory;
    protected LinkedList<File> files;
    protected int numberOfFiles;
    protected ListSpecification sections;

    public void initialize(UimaContext context) throws ResourceInitializationException {
        this.sections = new ListSpecification(this.sectionsSpecifier);
        this.directory = new File(this.corpusDirectoryName);
        this.files = new LinkedList();
        PennTreebankReader.collectSections(new File(this.directory.getPath()), this.files, this.sections);
        Collections.sort(this.files);
        this.numberOfFiles = this.files.size();
    }

    @Beta
    public static void collectSections(File wsjDirectory, List<File> treebankFiles, ListSpecification wsjSections) {
        if (!wsjDirectory.isDirectory()) {
            return;
        }
        for (File subFile : wsjDirectory.listFiles()) {
            if (!subFile.isDirectory()) continue;
            try {
                int section = Integer.valueOf(subFile.getName());
                if (!wsjSections.contains(section)) {
                }
            }
            catch (NumberFormatException e) {}
            continue;
            PennTreebankReader.collectFiles(subFile, treebankFiles);
        }
    }

    static void collectFiles(File file, List<File> treebankFiles) {
        if (file.isFile() && file.getName().endsWith(".mrg")) {
            treebankFiles.add(file);
        } else if (file.isDirectory()) {
            for (File subFile : file.listFiles()) {
                PennTreebankReader.collectFiles(subFile, treebankFiles);
            }
        }
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        File treebankFile = this.files.removeFirst();
        this.getUimaContext().getLogger().log(Level.FINEST, "reading treebank file: " + treebankFile.getPath());
        ViewUriUtil.setURI((JCas)jCas, (URI)treebankFile.toURI());
        try {
            JCas treebankView = ViewCreatorAnnotator.createViewSafely((JCas)jCas, (String)TREEBANK_VIEW);
            treebankView.setSofaDataString(FileUtils.file2String((File)treebankFile), "text/plain");
        }
        catch (AnalysisEngineProcessException aepe) {
            throw new CollectionException((Throwable)aepe);
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.numberOfFiles - this.files.size(), this.numberOfFiles, "entities")};
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.files.size() > 0;
    }

    @Beta
    public void setCorpusDirectoryName(String corpusDirectoryName) {
        this.corpusDirectoryName = corpusDirectoryName;
    }

    @Beta
    public void setSectionsSpecifier(String sectionsString) {
        this.sectionsSpecifier = sectionsString;
    }
}

