/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hudi;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.SparkRowWriteHelper;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.keygen.BuiltinKeyGenerator;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import scala.collection.Iterable;
import scala.collection.JavaConverters;
import scala.collection.Seq;

public class HoodieDatasetBulkInsertHelper {
    private static final Logger LOG = LogManager.getLogger(HoodieDatasetBulkInsertHelper.class);
    private static final String RECORD_KEY_UDF_FN = "hudi_recordkey_gen_function_";
    private static final String PARTITION_PATH_UDF_FN = "hudi_partition_gen_function_";

    public static Dataset<Row> prepareHoodieDatasetForBulkInsert(SQLContext sqlContext, HoodieWriteConfig config, Dataset<Row> rows, String structName, String recordNamespace, BulkInsertPartitioner<Dataset<Row>> bulkInsertPartitionerRows, boolean isGlobalIndex, boolean dropPartitionColumns) {
        Dataset rowDatasetWithHoodieColumns;
        List originalFields = Arrays.stream(rows.schema().fields()).map(f -> new Column(f.name())).collect(Collectors.toList());
        TypedProperties properties = new TypedProperties();
        properties.putAll((Map<?, ?>)config.getProps());
        String keyGeneratorClass = properties.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key());
        BuiltinKeyGenerator keyGenerator = (BuiltinKeyGenerator)ReflectionUtils.loadClass(keyGeneratorClass, properties);
        String tableName = properties.getString(HoodieWriteConfig.TBL_NAME.key());
        String recordKeyUdfFn = RECORD_KEY_UDF_FN + tableName;
        String partitionPathUdfFn = PARTITION_PATH_UDF_FN + tableName;
        sqlContext.udf().register(recordKeyUdfFn, keyGenerator::getRecordKey, DataTypes.StringType);
        sqlContext.udf().register(partitionPathUdfFn, keyGenerator::getPartitionPath, DataTypes.StringType);
        Dataset rowDatasetWithRecordKeys = rows.withColumn("_hoodie_record_key", functions.callUDF((String)recordKeyUdfFn, (Column[])new Column[]{functions.struct((Seq)((Iterable)JavaConverters.collectionAsScalaIterableConverter(originalFields).asScala()).toSeq())}));
        Dataset rowDatasetWithRecordKeysAndPartitionPath = rowDatasetWithRecordKeys.withColumn("_hoodie_partition_path", functions.callUDF((String)partitionPathUdfFn, (Column[])new Column[]{functions.struct((Seq)((Iterable)JavaConverters.collectionAsScalaIterableConverter(originalFields).asScala()).toSeq())}));
        Dataset processedDf = rowDatasetWithHoodieColumns = rowDatasetWithRecordKeysAndPartitionPath.withColumn("_hoodie_commit_time", functions.lit((Object)"").cast(DataTypes.StringType)).withColumn("_hoodie_commit_seqno", functions.lit((Object)"").cast(DataTypes.StringType)).withColumn("_hoodie_file_name", functions.lit((Object)"").cast(DataTypes.StringType));
        if (dropPartitionColumns) {
            String partitionColumns = String.join((CharSequence)",", keyGenerator.getPartitionPathFields());
            for (String partitionField : keyGenerator.getPartitionPathFields()) {
                originalFields.remove(new Column(partitionField));
            }
            processedDf = rowDatasetWithHoodieColumns.drop(partitionColumns);
        }
        Dataset dedupedDf = processedDf;
        if (config.shouldCombineBeforeInsert()) {
            dedupedDf = SparkRowWriteHelper.newInstance().deduplicateRows((Dataset<Row>)processedDf, config.getPreCombineField(), isGlobalIndex);
        }
        List orderedFields = Stream.concat(HoodieRecord.HOODIE_META_COLUMNS.stream().map(Column::new), originalFields.stream()).collect(Collectors.toList());
        Dataset colOrderedDataset = dedupedDf.select(((Iterable)JavaConverters.collectionAsScalaIterableConverter(orderedFields).asScala()).toSeq());
        return bulkInsertPartitionerRows.repartitionRecords((Dataset<Row>)colOrderedDataset, config.getBulkInsertShuffleParallelism());
    }

    public static Dataset<Row> prepareHoodieDatasetForBulkInsertWithoutMetaFields(Dataset<Row> rows) {
        Dataset rowsWithMetaCols = rows.withColumn("_hoodie_commit_time", functions.lit((Object)"").cast(DataTypes.StringType)).withColumn("_hoodie_commit_seqno", functions.lit((Object)"").cast(DataTypes.StringType)).withColumn("_hoodie_record_key", functions.lit((Object)"").cast(DataTypes.StringType)).withColumn("_hoodie_partition_path", functions.lit((Object)"").cast(DataTypes.StringType)).withColumn("_hoodie_file_name", functions.lit((Object)"").cast(DataTypes.StringType));
        List originalFields = Arrays.stream(rowsWithMetaCols.schema().fields()).filter(field -> !HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION.contains(field.name())).map(f -> new Column(f.name())).collect(Collectors.toList());
        List metaFields = Arrays.stream(rowsWithMetaCols.schema().fields()).filter(field -> HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION.contains(field.name())).map(f -> new Column(f.name())).collect(Collectors.toList());
        ArrayList allCols = new ArrayList();
        allCols.addAll(metaFields);
        allCols.addAll(originalFields);
        return rowsWithMetaCols.select(((Iterable)JavaConverters.collectionAsScalaIterableConverter(allCols).asScala()).toSeq());
    }
}

