/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.ml.feature.function;

import java.util.Collections;
import java.util.List;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
import org.cleartk.ml.feature.extractor.CleartkExtractorException;
import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;
import org.cleartk.ml.feature.function.FeatureFunction;

public class CharacterCategoryPatternFunction<T extends Annotation>
implements FeatureFunction {
    private PatternType patternType;
    private String name;

    public static <T extends Annotation> NamedFeatureExtractor1<T> createExtractor() {
        return CharacterCategoryPatternFunction.createExtractor(PatternType.ONE_PER_CHAR);
    }

    public static <T extends Annotation> NamedFeatureExtractor1<T> createExtractor(PatternType patternType) {
        final CharacterCategoryPatternFunction<T> ccpf = new CharacterCategoryPatternFunction<T>(patternType);
        return new NamedFeatureExtractor1<T>(){

            @Override
            public List<Feature> extract(JCas view, Annotation focusAnnotation) throws CleartkExtractorException {
                String text = focusAnnotation.getCoveredText();
                return ccpf.apply(new Feature(null, text));
            }

            @Override
            public String getFeatureName() {
                return ccpf.getFeatureName();
            }
        };
    }

    public CharacterCategoryPatternFunction() {
        this(PatternType.ONE_PER_CHAR);
    }

    public CharacterCategoryPatternFunction(PatternType patternType) {
        this.patternType = patternType;
        switch (this.patternType) {
            case ONE_PER_CHAR: {
                this.name = "CharPattern";
                break;
            }
            case REPEATS_MERGED: {
                this.name = "CharPatternRepeatsMerged";
                break;
            }
            case REPEATS_AS_KLEENE_PLUS: {
                this.name = "CharPatternRepeatsAsKleenePlus";
            }
        }
    }

    public String getFeatureName() {
        return this.name;
    }

    public List<Feature> apply(Feature feature) {
        String featureName = Feature.createName(this.getFeatureName(), feature.getName());
        Object featureValue = feature.getValue();
        if (featureValue == null) {
            return Collections.emptyList();
        }
        if (featureValue instanceof String) {
            String text = featureValue.toString();
            StringBuilder builder = new StringBuilder();
            String lastType = null;
            boolean multipleRepeats = false;
            for (int i = 0; i < text.length(); ++i) {
                char c = text.charAt(i);
                String type = this.classifyChar(c);
                switch (this.patternType) {
                    case ONE_PER_CHAR: {
                        builder.append(type);
                        break;
                    }
                    case REPEATS_MERGED: {
                        if (type.equals(lastType)) break;
                        builder.append(type);
                        break;
                    }
                    case REPEATS_AS_KLEENE_PLUS: {
                        if (!type.equals(lastType)) {
                            builder.append(type);
                            multipleRepeats = false;
                            break;
                        }
                        if (multipleRepeats) break;
                        builder.append('+');
                        multipleRepeats = true;
                    }
                }
                lastType = type;
            }
            return Collections.singletonList(new Feature(featureName, builder.toString()));
        }
        return Collections.emptyList();
    }

    protected String classifyChar(char c) {
        int typeInt = Character.getType(c);
        switch (typeInt) {
            case 15: {
                return "CC";
            }
            case 16: {
                return "Cf";
            }
            case 0: {
                return "Cn";
            }
            case 18: {
                return "Co";
            }
            case 19: {
                return "Cs";
            }
            case 2: {
                return "Ll";
            }
            case 4: {
                return "Lm";
            }
            case 5: {
                return "Lo";
            }
            case 3: {
                return "Lt";
            }
            case 1: {
                return "Lu";
            }
            case 8: {
                return "Mc";
            }
            case 7: {
                return "Me";
            }
            case 6: {
                return "Mn";
            }
            case 9: {
                return "Nd";
            }
            case 10: {
                return "Nl";
            }
            case 11: {
                return "No";
            }
            case 23: {
                return "Pc";
            }
            case 20: {
                return "Pd";
            }
            case 22: {
                return "Pe";
            }
            case 30: {
                return "Pf";
            }
            case 29: {
                return "Pi";
            }
            case 24: {
                return "Po";
            }
            case 21: {
                return "Ps";
            }
            case 26: {
                return "Sc";
            }
            case 27: {
                return "Sk";
            }
            case 25: {
                return "Sm";
            }
            case 28: {
                return "So";
            }
            case 13: {
                return "Zl";
            }
            case 14: {
                return "Zp";
            }
            case 12: {
                return "Zs";
            }
        }
        throw new RuntimeException("Unknown character type: " + typeInt);
    }

    public static enum PatternType {
        ONE_PER_CHAR,
        REPEATS_MERGED,
        REPEATS_AS_KLEENE_PLUS;

    }
}

