package com.cloudera.cdx.extractor.hive;

import com.cloudera.cdx.extractor.model.Service;
import com.cloudera.cdx.extractor.model.hive.HColumn;
import com.cloudera.cdx.extractor.model.hive.HDatabase;
import com.cloudera.cdx.extractor.model.hive.HMetaStore;
import com.cloudera.cdx.extractor.model.hive.HPartition;
import com.cloudera.cdx.extractor.model.hive.HPartitionBatch;
import com.cloudera.cdx.extractor.model.hive.HTable;
import com.cloudera.cdx.extractor.model.hive.HView;
import com.cloudera.cdx.extractor.model.hive.NamedColumnSet;
import com.cloudera.cdx.extractor.util.FilterUtil;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/cloudera/cdx/extractor/hive/AbstractHiveExtractor.class */
public abstract class AbstractHiveExtractor implements Runnable {
    private static final Logger LOG = LoggerFactory.getLogger(AbstractHiveExtractor.class);
    public static final String TRANSIENT_LAST_DDL_TIME = "transient_lastDdlTime";
    private static final int FIELD_MARKER = 1;
    private static final int LIST_ITEM_MARKER = 2;
    private static final int MAP_VALUE_MARKER = 3;
    protected static final String LAST_MODIFIED_PARAMETER = "last_modified_time";
    protected static final String LAST_MODIFIED_BY_PARAMETER = "last_modified_by";
    protected static final String COMMENT = "comment";
    protected final HiveExtractorContext context;
    protected final HiveExtractorHelper helper = HiveExtractorHelperFactory.newHelper();
    protected HiveMetaStoreClient metastore;
    protected AbstractHiveExtractorState state;

    public AbstractHiveExtractor(HiveExtractorContext hiveExtractorContext) {
        this.context = hiveExtractorContext;
    }

    abstract void run(HiveMetaStoreClient hiveMetaStoreClient);

    @Override // java.lang.Runnable
    public void run() {
        try {
            run(this.helper.setUpConnection(this.context.getConfig()));
        } catch (TException e) {
            LOG.error("Error connecting to Hive Metastore. ", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void extractDatabases(Instant instant) throws TException {
        List allDatabases = this.metastore.getAllDatabases();
        HMetaStore hMetaStore = new HMetaStore();
        hMetaStore.setDatabases(allDatabases);
        hMetaStore.setExtractionTime(instant);
        hMetaStore.setSourceId(this.context.getService().getCdxId());
        hMetaStore.setCdxId(this.context.getHiveIdGenerator().generateHmsId(this.context.getService()));
        this.context.getExporter().send(hMetaStore);
        Iterator it = allDatabases.iterator();
        while (it.hasNext()) {
            extractDatabase((String) it.next(), instant);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void extractDatabases(Map<String, Set<String>> map, Instant instant) throws TException {
        if (MapUtils.isEmpty(map)) {
            return;
        }
        List allDatabases = this.metastore.getAllDatabases();
        HMetaStore hMetaStore = new HMetaStore();
        hMetaStore.setDatabases(allDatabases);
        hMetaStore.setExtractionTime(instant);
        hMetaStore.setSourceId(this.context.getService().getCdxId());
        hMetaStore.setCdxId(this.context.getHiveIdGenerator().generateHmsId(this.context.getService()));
        this.context.getExporter().send(hMetaStore);
        for (Map.Entry<String, Set<String>> entry : map.entrySet()) {
            extractDatabase(entry.getKey(), entry.getValue(), instant);
        }
    }

    protected void extractDatabase(String str, Instant instant) throws TException {
        LOG.trace("Extracting Database {}", str);
        HDatabase createHDatabase = createHDatabase(str, this.metastore.getDatabase(str), Hashing.sha256().newHasher(), instant);
        List<String> allTables = this.metastore.getAllTables(str);
        createHDatabase.setTableNames(allTables);
        this.context.getExporter().send(createHDatabase);
        for (String str2 : allTables) {
            try {
                extractViewOrTable(str, str2, createHDatabase, instant);
            } catch (Exception e) {
                LOG.info(String.format("Failed to extract table %s from database %s with error: %s; Potentiallu being extracted after deletion.", str2, str, e.getMessage()), e);
            }
        }
        this.context.getReporter().incrementNumDatabases();
    }

    protected void extractDatabase(String str, Set<String> set, Instant instant) throws TException {
        LOG.trace("Extracting Database {} and tables {}", str, set);
        HDatabase createHDatabase = createHDatabase(str, this.metastore.getDatabase(str), Hashing.sha256().newHasher(), instant);
        createHDatabase.setTableNames(Lists.newArrayList(set));
        this.context.getExporter().send(createHDatabase);
        for (String str2 : set) {
            try {
                extractViewOrTable(str, str2, createHDatabase, instant);
            } catch (Exception e) {
                LOG.debug(String.format("Failed to extract table %s from database %s with error: %s", str2, str, e.getMessage()), e);
            }
        }
        this.context.getReporter().incrementNumDatabases();
    }

    protected HDatabase createHDatabase(String str, Database database, Hasher hasher, Instant instant) {
        Service service = this.context.getService();
        HDatabase hDatabase = new HDatabase(this.context.getHiveIdGenerator().generateDbIdentity(service, str), service.getCdxId());
        hDatabase.setName(updateHash(hasher, str));
        hDatabase.setFileSystemPath(updateHash(hasher, database.getLocationUri()));
        hDatabase.setDescription(updateHash(hasher, database.getDescription()));
        hDatabase.setParams(updateHash(hasher, database.getParameters()));
        hDatabase.setExtractionTime(instant);
        return hDatabase;
    }

    protected void extractViewOrTable(String str, String str2, HDatabase hDatabase, Instant instant) throws TException {
        HView hView;
        Table table = this.metastore.getTable(str, str2);
        Hasher putString = Hashing.sha256().newHasher().putString(str, StandardCharsets.UTF_8).putString(str2, StandardCharsets.UTF_8).putString(table.getTableType(), StandardCharsets.UTF_8);
        Collection<HPartitionBatch> emptyList = Collections.emptyList();
        if (TableType.EXTERNAL_TABLE.name().equals(table.getTableType()) || TableType.MANAGED_TABLE.name().equals(table.getTableType())) {
            HView extractTable = extractTable(table, str, putString, str2);
            hView = extractTable;
            emptyList = extractPartitions(hDatabase, extractTable, instant);
        } else {
            if (!TableType.VIRTUAL_VIEW.name().equals(table.getTableType())) {
                LOG.trace("Unexpected table type {}", table.getTableType());
                return;
            }
            HView extractView = extractView(table, hDatabase, putString, str2);
            hView = extractView;
            if (this.context.getOptions().isPiiMaskingEnabled()) {
                extractView.setQueryText(FilterUtil.maskText(extractView.getQueryText(), this.context.getOptions().getPiiMaskingRegex()));
            }
        }
        hView.setDatabaseId(hDatabase.getCdxId());
        hView.setDatabaseName(hDatabase.getName());
        hView.setExtractionTime(instant);
        hView.setColumns(extractColumns(hDatabase, table, hView, instant));
        this.context.getExporter().send(hView);
        exportPartBatches(emptyList);
        LOG.trace("Done extracting table {}", hView.getName());
    }

    private void exportPartBatches(Collection<HPartitionBatch> collection) {
        Iterator<HPartitionBatch> it = collection.iterator();
        while (it.hasNext()) {
            this.context.getExporter().send(it.next());
        }
    }

    private HTable extractTable(Table table, String str, Hasher hasher, String str2) throws TException {
        LOG.trace("Extracting Table {}", str2);
        HTable createHTable = createHTable(table, str, hasher);
        this.context.getReporter().incrementNumTables();
        return createHTable;
    }

    protected HTable createHTable(Table table, String str, Hasher hasher) {
        HTable hTable = new HTable(this.context.getHiveIdGenerator().generateTableIdentity(this.context.getService().getCdxId(), str, table.getTableName()), this.context.getService().getCdxId());
        hTable.setName(table.getTableName());
        hTable.setOwner(updateHash(hasher, table.getOwner()));
        hTable.setCreated(new Instant(updateHash(hasher, table.getCreateTime() * 1000)));
        setTableProperties(hasher, hTable, table.getParameters());
        hTable.setLastAccessed(new Instant(updateHash(hasher, table.getLastAccessTime() * 1000)));
        List partitionKeys = table.getPartitionKeys();
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(partitionKeys.size());
        Iterator it = partitionKeys.iterator();
        while (it.hasNext()) {
            newArrayListWithCapacity.add(((FieldSchema) it.next()).getName());
        }
        hTable.setPartColNames(updateHash(hasher, newArrayListWithCapacity));
        StorageDescriptor sd = table.getSd();
        hTable.setFileSystemPath(updateHash(hasher, sd.getLocation()));
        hTable.setInputFormat(updateHash(hasher, sd.getInputFormat()));
        hTable.setOutputFormat(updateHash(hasher, sd.getOutputFormat()));
        hTable.setCompressed(Boolean.valueOf(updateHash(hasher, sd.isCompressed())));
        hTable.setClusteredByColNames(updateHash(hasher, sd.getBucketCols()));
        hTable.setSerdeName(updateHash(hasher, sd.getSerdeInfo().getName()));
        hTable.setSerdeLibName(updateHash(hasher, sd.getSerdeInfo().getSerializationLib()));
        hTable.setSerdeProps(updateHash(hasher, sd.getSerdeInfo().getParameters()));
        ArrayList newArrayListWithCapacity2 = Lists.newArrayListWithCapacity(sd.getSortColsSize());
        Iterator it2 = sd.getSortCols().iterator();
        while (it2.hasNext()) {
            newArrayListWithCapacity2.add(((Order) it2.next()).getCol());
        }
        hTable.setSortByColNames(updateHash(hasher, newArrayListWithCapacity2));
        return hTable;
    }

    private HView extractView(Table table, HDatabase hDatabase, Hasher hasher, String str) {
        LOG.trace("Extracting View {}", str);
        HView createHView = createHView(table, hDatabase, hasher);
        this.context.getReporter().incrementNumViews();
        return createHView;
    }

    private HView createHView(Table table, HDatabase hDatabase, Hasher hasher) {
        HView hView = new HView(this.context.getHiveIdGenerator().generateTableIdentity(this.context.getService().getCdxId(), hDatabase.getName(), table.getTableName()), this.context.getService().getCdxId());
        hView.setName(table.getTableName());
        hView.setCreated(new Instant(updateHash(hasher, table.getCreateTime() * 1000)));
        hView.setLastAccessed(new Instant(updateHash(hasher, table.getLastAccessTime() * 1000)));
        hView.setQueryText(updateHash(hasher, table.getViewOriginalText()));
        setTableProperties(hasher, hView, table.getParameters());
        return hView;
    }

    private List<HColumn> extractColumns(HDatabase hDatabase, Table table, NamedColumnSet namedColumnSet, Instant instant) {
        LOG.trace("Getting columns for table {}", table.getTableName());
        List<FieldSchema> columns = this.helper.getColumns(table, this.context.getConfig());
        List<FieldSchema> partitionColumns = this.helper.getPartitionColumns(table);
        LOG.trace("Found {} columns and {} partition columns", Integer.valueOf(columns.size()), Integer.valueOf(partitionColumns.size()));
        LinkedList newLinkedList = Lists.newLinkedList();
        Iterator it = Iterables.concat(columns, partitionColumns).iterator();
        while (it.hasNext()) {
            newLinkedList.add(createColumn((FieldSchema) it.next(), namedColumnSet, hDatabase, instant));
        }
        return newLinkedList;
    }

    private HColumn createColumn(FieldSchema fieldSchema, NamedColumnSet namedColumnSet, HDatabase hDatabase, Instant instant) {
        LOG.trace(String.format("Creating column %s; owner: %s, db: %s", fieldSchema.getName(), namedColumnSet.getName(), hDatabase.getName()));
        HColumn hColumn = new HColumn(this.context.getHiveIdGenerator().generateColumnIdentity(this.context.getService().getCdxId(), hDatabase.getName(), namedColumnSet.getName(), fieldSchema.getName()), this.context.getService().getCdxId(), hDatabase.getName(), namedColumnSet.getName());
        hColumn.setName(fieldSchema.getName());
        hColumn.setDataType(fieldSchema.getType());
        hColumn.setDescription(fieldSchema.getComment());
        hColumn.setExtractionTime(instant);
        return hColumn;
    }

    private void setTableProperties(Hasher hasher, NamedColumnSet namedColumnSet, Map<String, String> map) {
        if (map == null) {
            return;
        }
        updateHash(hasher, map);
        HashMap newHashMap = Maps.newHashMap(map);
        if (newHashMap.containsKey(LAST_MODIFIED_PARAMETER)) {
            String str = (String) newHashMap.remove(LAST_MODIFIED_PARAMETER);
            try {
                namedColumnSet.setLastModified(new Instant(Long.parseLong(str) * 1000));
            } catch (NumberFormatException e) {
                LOG.error("Last modified date is not a number: " + str);
            }
        }
        if (newHashMap.containsKey(LAST_MODIFIED_BY_PARAMETER)) {
            namedColumnSet.setLastModifiedBy((String) newHashMap.remove(LAST_MODIFIED_BY_PARAMETER));
        }
        namedColumnSet.setDescription((String) newHashMap.remove(COMMENT));
        Iterator it = this.context.getOptions().getHiveTableAndViewPropertiesToExclude().iterator();
        while (it.hasNext()) {
            newHashMap.remove((String) it.next());
        }
        namedColumnSet.setTechnicalProperties(newHashMap);
    }

    private Collection<HPartitionBatch> extractPartitions(HDatabase hDatabase, HTable hTable, Instant instant) throws TException {
        List listPartitionNames = this.metastore.listPartitionNames(hDatabase.getName(), hTable.getName(), (short) 0);
        int size = listPartitionNames.size();
        int i = 0;
        ArrayList newArrayList = Lists.newArrayList();
        if (CollectionUtils.isNotEmpty(listPartitionNames)) {
            Iterator it = Iterables.partition(listPartitionNames, this.context.getOptions().getExtractorHivePartBatchSize()).iterator();
            while (it.hasNext()) {
                List partitionsByNames = this.metastore.getPartitionsByNames(hDatabase.getName(), hTable.getName(), (List) it.next());
                LinkedList newLinkedList = Lists.newLinkedList();
                Iterator it2 = partitionsByNames.iterator();
                while (it2.hasNext()) {
                    newLinkedList.add(createHPartition((Partition) it2.next(), hTable, hDatabase, instant));
                }
                HPartitionBatch createPartitionBatch = createPartitionBatch(hDatabase, hTable, size, i, newLinkedList);
                newArrayList.add(createPartitionBatch);
                i += createPartitionBatch.getBatchSize();
            }
        }
        return newArrayList;
    }

    private HPartitionBatch createPartitionBatch(HDatabase hDatabase, HTable hTable, int i, int i2, List<HPartition> list) {
        HPartitionBatch hPartitionBatch = new HPartitionBatch();
        hPartitionBatch.setTableId(hTable.getCdxId());
        hPartitionBatch.setDatabaseName(hDatabase.getName());
        hPartitionBatch.setTableName(hTable.getName());
        hPartitionBatch.setOffset(i2);
        hPartitionBatch.setTotalPartitions(i);
        hPartitionBatch.setPartitions(list);
        hPartitionBatch.setSourceId(hTable.getSourceId());
        return hPartitionBatch;
    }

    private HPartition createHPartition(Partition partition, HTable hTable, HDatabase hDatabase, Instant instant) {
        Hasher putString = Hashing.sha256().newHasher().putString(hDatabase.getName(), StandardCharsets.UTF_8).putString(hTable.getName(), StandardCharsets.UTF_8);
        List<String> values = partition.getValues();
        String join = Joiner.on(";").join(values);
        LOG.trace("Extracting Partition {}", join);
        HPartition hPartition = new HPartition(this.context.getHiveIdGenerator().generatePartitionIdentity(this.context.getService(), hDatabase, hTable, join), this.context.getService().getCdxId());
        hPartition.setColValues(updateHash(putString, values));
        hPartition.setCreated(new Instant(updateHash(putString, partition.getCreateTime() * 1000)));
        hPartition.setLastAccessed(new Instant(updateHash(putString, partition.getLastAccessTime() * 1000)));
        hPartition.setFileSystemPath(updateHash(putString, partition.getSd().getLocation()));
        hPartition.setParams(updateHash(putString, partition.getParameters()));
        hPartition.setExtractionTime(instant);
        List partColNames = hTable.getPartColNames();
        if (partColNames.size() == values.size()) {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < partColNames.size(); i += FIELD_MARKER) {
                sb.append(((String) partColNames.get(i)) + "=" + values.get(i));
                if (i < partColNames.size() - FIELD_MARKER) {
                    sb.append("/");
                }
            }
            hPartition.setName(sb.toString());
        }
        return hPartition;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public <T> T loadState(Class<T> cls) {
        return (T) this.context.getStateStore().load(cls, new String[]{this.context.getService().getCdxId(), cls.getName()});
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void saveState(Object obj) {
        this.context.getStateStore().save(obj, new String[]{this.context.getService().getCdxId(), obj.getClass().getName()});
    }

    private boolean updateHash(Hasher hasher, boolean z) {
        hasher.putInt(FIELD_MARKER).putBoolean(z);
        return z;
    }

    private long updateHash(Hasher hasher, long j) {
        hasher.putInt(FIELD_MARKER).putLong(j);
        return j;
    }

    private String updateHash(Hasher hasher, String str) {
        hasher.putInt(FIELD_MARKER);
        if (str != null) {
            hasher.putString(str, StandardCharsets.UTF_8);
        }
        return str;
    }

    private List<String> updateHash(Hasher hasher, List<String> list) {
        hasher.putInt(FIELD_MARKER);
        if (list != null) {
            hasher.putInt(list.size());
            Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                hasher.putInt(LIST_ITEM_MARKER).putString(it.next(), StandardCharsets.UTF_8);
            }
        }
        return list;
    }

    private Map<String, String> updateHash(Hasher hasher, Map<String, String> map) {
        hasher.putInt(FIELD_MARKER);
        if (map != null) {
            hasher.putInt(map.size());
            for (Map.Entry<String, String> entry : map.entrySet()) {
                hasher.putInt(LIST_ITEM_MARKER).putString(entry.getKey(), StandardCharsets.UTF_8).putInt(MAP_VALUE_MARKER).putString(entry.getValue(), StandardCharsets.UTF_8);
            }
        }
        return map;
    }
}
