package com.cloudera.nav.mapreduce;

import com.cloudera.nav.core.model.Entity;
import com.cloudera.nav.core.model.EntityType;
import com.cloudera.nav.core.model.Relation;
import com.cloudera.nav.core.model.Source;
import com.cloudera.nav.core.model.SourceType;
import com.cloudera.nav.core.model.UserSubOperation;
import com.cloudera.nav.core.model.relations.ControlFlowRelation;
import com.cloudera.nav.core.model.relations.DataFlowRelation;
import com.cloudera.nav.core.model.relations.ParentChildRelation;
import com.cloudera.nav.hdfs.HdfsExtractorUtils;
import com.cloudera.nav.hdfs.datasets.DatasetIdGenerator;
import com.cloudera.nav.hdfs.extractor.HdfsIdGenerator;
import com.cloudera.nav.mapreduce.model.JobExecution;
import com.cloudera.nav.persist.ElementManager;
import com.cloudera.nav.utils.MD5IdGenerator;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/cloudera/nav/mapreduce/FileBasedLineageExtractor.class */
public class FileBasedLineageExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(FileBasedLineageExtractor.class);

    @VisibleForTesting
    /* loaded from: input_file:com/cloudera/nav/mapreduce/FileBasedLineageExtractor$CustomLineage.class */
    static class CustomLineage {
        public String source;
        public String target;
        public String sourceParentPath;
        public String targetParentPath;
        public String expression;
        public String type;

        CustomLineage() {
        }
    }

    public Collection<Relation> extractColumnLevelLineage(MRExtractorContext mRExtractorContext, JobExecution jobExecution, String str, String[] strArr, String[] strArr2, SourceType sourceType) {
        ArrayList newArrayList = Lists.newArrayList();
        if (!isSupportedSourceType(strArr2[0])) {
            return newArrayList;
        }
        String hdfsSourceUrl = HdfsIdGenerator.getHdfsSourceUrl(strArr2[0].replaceAll("#", ""));
        Optional sourceWithUrl = mRExtractorContext.getSourceManager().getSourceWithUrl(mRExtractorContext.getCluster(), hdfsSourceUrl, SourceType.HDFS);
        if (!sourceWithUrl.isPresent()) {
            LOG.error("HDFS source missing for url:{}", hdfsSourceUrl);
            throw new RuntimeException("Missing HDFS source");
        }
        Source source = (Source) sourceWithUrl.get();
        List<CustomLineage> loadLineageMap = loadLineageMap(str);
        if (loadLineageMap != null) {
            for (CustomLineage customLineage : loadLineageMap) {
                Optional<Long> fieldId = getFieldId(mRExtractorContext.getEm(), strArr, customLineage.source, customLineage.sourceParentPath, source);
                Optional<Long> fieldId2 = getFieldId(mRExtractorContext.getEm(), strArr2, customLineage.target, customLineage.targetParentPath, source);
                if (fieldId.isPresent() && fieldId2.isPresent()) {
                    LOG.debug("linking elements " + fieldId + " with " + fieldId2);
                    UserSubOperation userSubOperation = new UserSubOperation();
                    userSubOperation.setFirstClassParentId(jobExecution.getSourceId());
                    if (customLineage.expression != null) {
                        userSubOperation.setOriginalName(customLineage.expression);
                    } else {
                        userSubOperation.setOriginalName(jobExecution.getJobID());
                    }
                    userSubOperation.setIdentity(generateExpressionIdentity(jobExecution, customLineage.expression, customLineage.type));
                    userSubOperation.setSourceId(jobExecution.getSourceId());
                    userSubOperation.setSourceType(jobExecution.getSourceType());
                    mRExtractorContext.getEm().persist(userSubOperation, true);
                    newArrayList.add(ParentChildRelation.builder().id(mRExtractorContext.getSequenceGenerator().getNextRelationId()).parentId(jobExecution.getId()).parentType(jobExecution.getType()).parentSourceId(jobExecution.getSourceId()).parentSourceType(jobExecution.getSourceType()).childrenIds(Lists.newArrayList(new Long[]{userSubOperation.getId()})).childType(UserSubOperation.TYPE).isUserSpecified(true).childSourceId(jobExecution.getSourceId()).childSourceType(jobExecution.getSourceType()).sourceId(jobExecution.getSourceId()).sourceType(jobExecution.getSourceType()).extractorRunId(jobExecution.getExtractorRunId()).build());
                    if (customLineage.type.equalsIgnoreCase("projection")) {
                        newArrayList.add(DataFlowRelation.builder().id(mRExtractorContext.getSequenceGenerator().getNextRelationId()).sourceIds(Lists.newArrayList(new Long[]{(Long) fieldId.get()})).sourceSourceId(source.getId()).sourceType(EntityType.FIELD).sourceSourceType(SourceType.HDFS).targetIds(Lists.newArrayList(new Long[]{userSubOperation.getId()})).targetSourceId(userSubOperation.getSourceId()).targetType(UserSubOperation.TYPE).targetSourceType(sourceType).isUserSpecified(true).extractorRunId(jobExecution.getExtractorRunId()).build());
                        newArrayList.add(DataFlowRelation.builder().id(mRExtractorContext.getSequenceGenerator().getNextRelationId()).sourceIds(Lists.newArrayList(new Long[]{userSubOperation.getId()})).sourceSourceId(userSubOperation.getSourceId()).sourceType(UserSubOperation.TYPE).sourceSourceType(sourceType).targetIds(Lists.newArrayList(new Long[]{(Long) fieldId2.get()})).targetSourceId(source.getId()).targetType(EntityType.FIELD).targetSourceType(SourceType.HDFS).isUserSpecified(true).extractorRunId(jobExecution.getExtractorRunId()).build());
                    } else if (customLineage.type.equalsIgnoreCase("predicate")) {
                        newArrayList.add(ControlFlowRelation.builder().id(mRExtractorContext.getSequenceGenerator().getNextRelationId()).sourceIds(Lists.newArrayList(new Long[]{(Long) fieldId.get()})).sourceSourceId(source.getId()).sourceType(EntityType.FIELD).sourceSourceType(SourceType.HDFS).targetIds(Lists.newArrayList(new Long[]{userSubOperation.getId()})).targetSourceId(userSubOperation.getSourceId()).targetType(UserSubOperation.TYPE).targetSourceType(sourceType).isUserSpecified(true).extractorRunId(jobExecution.getExtractorRunId()).build());
                        newArrayList.add(ControlFlowRelation.builder().id(mRExtractorContext.getSequenceGenerator().getNextRelationId()).sourceIds(Lists.newArrayList(new Long[]{userSubOperation.getId()})).sourceSourceId(userSubOperation.getSourceId()).sourceType(UserSubOperation.TYPE).sourceSourceType(sourceType).targetIds(Lists.newArrayList(new Long[]{(Long) fieldId2.get()})).targetSourceId(source.getId()).targetType(EntityType.FIELD).targetSourceType(SourceType.HDFS).isUserSpecified(true).extractorRunId(jobExecution.getExtractorRunId()).build());
                    }
                } else {
                    LOG.warn("improper column lineage mapping between source column {} on dataset {} and target column {} on dataset {}. Please check the lineage mapping json", new Object[]{customLineage.source, customLineage.sourceParentPath, customLineage.target, customLineage.targetParentPath});
                }
            }
        }
        return newArrayList;
    }

    @VisibleForTesting
    List<CustomLineage> loadLineageMap(String str) {
        if (str == null) {
            return null;
        }
        LOG.info("Parsing custom lineage mapping");
        try {
            ObjectMapper objectMapper = new ObjectMapper();
            JsonNode jsonNode = objectMapper.readTree(str).get("mapping");
            return (List) objectMapper.readValue(jsonNode.traverse(), new TypeReference<List<CustomLineage>>() { // from class: com.cloudera.nav.mapreduce.FileBasedLineageExtractor.1
            });
        } catch (IOException e) {
            LOG.warn("Could not parse custom lineage config");
            return null;
        }
    }

    private Optional<Long> getFieldId(ElementManager elementManager, String[] strArr, String str, String str2, Source source) {
        Optional<Long> absent = Optional.absent();
        int length = strArr.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            String str3 = strArr[i];
            if (isSupportedSourceType(str3)) {
                String hdfsEntityPath = HdfsIdGenerator.getHdfsEntityPath(str3);
                String parentPath = getParentPath(hdfsEntityPath, "/");
                String substring = hdfsEntityPath.substring(hdfsEntityPath.lastIndexOf("/") + 1);
                String str4 = str2;
                if (str2.contains("/")) {
                    String parentPath2 = getParentPath(str2, "/");
                    parentPath = !StringUtils.isEmpty(parentPath2) ? hdfsEntityPath + parentPath2 : hdfsEntityPath;
                    substring = str2.substring(str2.lastIndexOf("/") + 1);
                    str4 = substring;
                }
                if (str4.equalsIgnoreCase(substring)) {
                    Optional findById = elementManager.findById(DatasetIdGenerator.fieldIdFromPath(DatasetIdGenerator.datasetId(HdfsIdGenerator.generateFSEntityIdentity(source, HdfsExtractorUtils.generateFileSystemPath(parentPath, substring)), substring), str));
                    absent = findById.isPresent() ? Optional.of(((Entity) findById.get()).getId()) : Optional.absent();
                }
            }
            i++;
        }
        Logger logger = LOG;
        Object[] objArr = new Object[4];
        objArr[0] = absent.isPresent() ? absent.get() : "absent";
        objArr[1] = str;
        objArr[2] = str2;
        objArr[3] = source.getId();
        logger.debug("Returning id {} for field {}, dataset {} and source {}", objArr);
        return absent;
    }

    private String getParentPath(String str, String str2) {
        if (StringUtils.isEmpty(str) || StringUtils.isEmpty(str2)) {
            return str;
        }
        int lastIndexOf = str.lastIndexOf(str2);
        return lastIndexOf == -1 ? str : str.substring(0, lastIndexOf);
    }

    private String generateExpressionIdentity(JobExecution jobExecution, String str, String str2) {
        MD5IdGenerator mD5IdGenerator = new MD5IdGenerator();
        mD5IdGenerator.update(str);
        mD5IdGenerator.update(str2);
        ArrayList newArrayList = Lists.newArrayList(jobExecution.getInputs());
        Collections.sort(newArrayList);
        mD5IdGenerator.update((String[]) newArrayList.toArray(new String[newArrayList.size()]));
        ArrayList newArrayList2 = Lists.newArrayList(jobExecution.getOutputs());
        Collections.sort(newArrayList2);
        mD5IdGenerator.update((String[]) newArrayList2.toArray(new String[newArrayList2.size()]));
        return mD5IdGenerator.getIdentity();
    }

    private boolean isSupportedSourceType(String str) {
        return !SourceType.S3.equals(MRUtils.getSourceTypeFromPath(str));
    }
}
