/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.ae;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.regex.RegexSpanFinder;
import org.apache.ctakes.typesystem.type.textspan.Paragraph;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="Paragraph Annotator", description="Annotates Paragraphs by detecting them using Regular Expressions provided in an input File or by empty text lines.", dependencies={PipeBitInfo.TypeProduct.SECTION}, products={PipeBitInfo.TypeProduct.PARAGRAPH})
public final class ParagraphAnnotator
extends JCasAnnotator_ImplBase {
    private static final Logger LOGGER = Logger.getLogger((String)"ParagraphAnnotator");
    public static final String PARAGRAPH_TYPES_PATH = "PARAGRAPH_TYPES_PATH";
    public static final String PARAGRAPH_TYPES_DESC = "path to a file containing a list of regular expressions and corresponding paragraph types.";
    @ConfigurationParameter(name="PARAGRAPH_TYPES_PATH", description="path to a file containing a list of regular expressions and corresponding paragraph types.", mandatory=false)
    private String _paragraphTypesPath;
    private static final String DEFAULT_PARAGRAPH = "Default Paragraph||(?:(?:\\r?\\n){2,})";
    private final Collection<ParagraphType> _paragraphTypes = new HashSet<ParagraphType>();

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        if (this._paragraphTypesPath == null) {
            LOGGER.info((Object)"No path to a file containing a list of regular expressions and corresponding paragraph types.");
            LOGGER.info((Object)"Using default paragraph separator: two newlines");
            this.parseBsvLine(DEFAULT_PARAGRAPH);
            return;
        }
        LOGGER.info((Object)("Parsing " + this._paragraphTypesPath));
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileLocator.getAsStream(this._paragraphTypesPath)));){
            String line = reader.readLine();
            while (line != null) {
                this.parseBsvLine(line);
                line = reader.readLine();
            }
        }
        catch (IOException ioE) {
            throw new ResourceInitializationException((Throwable)ioE);
        }
        LOGGER.info((Object)"Finished Parsing");
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        LOGGER.info((Object)"Starting processing");
        if (this._paragraphTypes.isEmpty()) {
            LOGGER.info((Object)"Finished processing, no section types defined");
            return;
        }
        this.createParagraphs(jcas);
        LOGGER.info((Object)"Finished processing");
    }

    private Collection<Pair<Integer>> findSeparators(String docText) {
        HashSet<Pair<Integer>> separators = new HashSet<Pair<Integer>>();
        for (ParagraphType paragraphType : this._paragraphTypes) {
            if (paragraphType.__separatorPattern == null) continue;
            separators.addAll(ParagraphAnnotator.findSeparators(docText, paragraphType.__separatorPattern));
        }
        return separators;
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    static Collection<Pair<Integer>> findSeparators(String docText, Pattern pattern) {
        try (RegexSpanFinder finder = new RegexSpanFinder(pattern);){
            List<Pair<Integer>> list = finder.findSpans(docText);
            return list;
        }
        catch (IllegalArgumentException iaE) {
            LOGGER.error((Object)iaE.getMessage());
            return Collections.emptyList();
        }
    }

    private void createParagraphs(JCas jcas) {
        Collection sections = JCasUtil.select((JCas)jcas, Segment.class);
        for (Segment section : sections) {
            int paragraphEnd;
            int offset = section.getBegin();
            String text = section.getCoveredText();
            Collection<Pair<Integer>> separators = this.findSeparators(text);
            if (separators.isEmpty()) {
                Paragraph paragraph = new Paragraph(jcas, offset, section.getEnd());
                paragraph.addToIndexes();
                continue;
            }
            ArrayList<Pair<Integer>> boundsList = new ArrayList<Pair<Integer>>(separators);
            Collections.sort(boundsList, (p1, p2) -> (Integer)p1.getValue1() - (Integer)p2.getValue2());
            Pair leftBounds = (Pair)boundsList.get(0);
            if ((Integer)leftBounds.getValue1() > 0) {
                paragraphEnd = (Integer)leftBounds.getValue1();
                Paragraph paragraph = new Paragraph(jcas, offset, offset + paragraphEnd);
                paragraph.addToIndexes();
            }
            int length = boundsList.size();
            for (int i = 0; i < length; ++i) {
                int paragraphBegin;
                paragraphEnd = i + 1 < length ? ((Integer)((Pair)boundsList.get(i + 1)).getValue1()).intValue() : text.length();
                if (paragraphEnd - (paragraphBegin = ((Integer)(leftBounds = (Pair)boundsList.get(i)).getValue2()).intValue()) <= 1) continue;
                Paragraph paragraph = new Paragraph(jcas, offset + paragraphBegin, offset + paragraphEnd);
                paragraph.addToIndexes();
            }
        }
    }

    private void parseBsvLine(String line) {
        if (line.isEmpty() || line.startsWith("#") || line.startsWith("//")) {
            return;
        }
        String[] splits = line.split("\\|\\|");
        if (splits.length < 2) {
            LOGGER.warn((Object)("Bad Paragraph definition: " + line + " ; please use the following:\n" + "NAME||SEPARATOR_REGEX"));
            return;
        }
        String name = splits[0].trim();
        String separatorRegex = splits[1].trim();
        ParagraphType paragraphType = new ParagraphType(name, separatorRegex);
        this._paragraphTypes.add(paragraphType);
    }

    private static final class ParagraphType {
        private final String __name;
        private final Pattern __separatorPattern;

        private ParagraphType(String name, String separatorRegex) {
            this.__name = name;
            this.__separatorPattern = separatorRegex == null ? null : Pattern.compile(separatorRegex, 8);
        }
    }
}

