/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.cmdline.tokenizer;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.cmdline.tokenizer.TrainingParams;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Parameters;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.BaseModel;
import opennlp.tools.util.model.ModelUtil;

public final class TokenizerTrainerTool
extends AbstractTrainerTool<TokenSample, TrainerToolParams> {
    public TokenizerTrainerTool() {
        super(TokenSample.class, TrainerToolParams.class);
    }

    @Override
    public String getShortDescription() {
        return "Trainer for the learnable tokenizer";
    }

    static Dictionary loadDict(File f) throws IOException {
        Dictionary dict = null;
        if (f != null && f.exists()) {
            CmdLineUtil.checkInputFile("abb dict", f);
            try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(f));){
                if (((InputStream)in).available() == 0) {
                    throw new InvalidFormatException("Encountered an empty dictionary file?!");
                }
                dict = new Dictionary((InputStream)in);
            }
        }
        return dict;
    }

    @Override
    public void run(String format, String[] args) {
        TokenizerModel model;
        super.run(format, args);
        this.mlParams = null != ((TrainerToolParams)this.params).getParams() ? CmdLineUtil.loadTrainingParameters(((TrainerToolParams)this.params).getParams(), false) : TrainingParameters.setParams((String[])args);
        if (this.mlParams != null) {
            if (!TrainerFactory.isValid((Parameters)this.mlParams)) {
                throw new TerminateToolException(1, "Training parameters file '" + ((TrainerToolParams)this.params).getParams() + "' is invalid!");
            }
            if (!TrainerFactory.TrainerType.EVENT_MODEL_TRAINER.equals((Object)TrainerFactory.getTrainerType((Parameters)this.mlParams))) {
                throw new TerminateToolException(1, "Sequence training is not supported!");
            }
        }
        if (this.mlParams == null) {
            this.mlParams = ModelUtil.createDefaultTrainingParameters();
        }
        File modelOutFile = ((TrainerToolParams)this.params).getModel();
        CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);
        try {
            Dictionary dict = TokenizerTrainerTool.loadDict(((TrainerToolParams)this.params).getAbbDict());
            TokenizerFactory tokFactory = TokenizerFactory.create((String)((TrainerToolParams)this.params).getFactory(), (String)((TrainerToolParams)this.params).getLang(), (Dictionary)dict, (boolean)((TrainerToolParams)this.params).getAlphaNumOpt(), null);
            model = TokenizerME.train((ObjectStream)this.sampleStream, (TokenizerFactory)tokFactory, (TrainingParameters)this.mlParams);
        }
        catch (IOException e) {
            throw this.createTerminationIOException(e);
        }
        finally {
            try {
                this.sampleStream.close();
            }
            catch (IOException iOException) {}
        }
        CmdLineUtil.writeModel("tokenizer", modelOutFile, (BaseModel)model);
    }

    static interface TrainerToolParams
    extends TrainingParams,
    TrainingToolParams {
    }
}

