/*
 * Decompiled with CFR 0.152.
 */
package picard.vcf;

import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.liftover.LiftOver;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.StringUtil;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.vcf.VCFRecordCodec;
import java.io.File;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.programgroups.VcfOrBcf;

@CommandLineProgramProperties(summary="Lifts over a VCF file from one reference build to another.  This tool adjusts the coordinates of variants within a VCF file to match a new reference. The output file will be sorted and indexed using the target reference build. To be clear, REFERENCE_SEQUENCE should be the <em>target</em> reference build. The tool is based on the UCSC liftOver tool (see: http://genome.ucsc.edu/cgi-bin/hgLiftOver) and uses a UCSC chain file to guide its operation. <br /><br />Note that records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the source and target reference genomes.  Rejected records will be emitted with filters to the REJECT file, using the source genome coordinates.<br /><h4>Usage example:</h4><pre>java -jar picard.jar LiftoverVcf \\<br />     I=input.vcf \\<br />     O=lifted_over.vcf \\<br />     CHAIN=b37tohg19.chain \\<br />     REJECT=rejected_variants.vcf \\<br />     R=reference_sequence.fasta</pre>For additional information, please see: http://genome.ucsc.edu/cgi-bin/hgLiftOver<hr />", oneLineSummary="Lifts over a VCF file from one reference build to another.  ", programGroup=VcfOrBcf.class)
@DocumentedFeature
public class LiftoverVcf
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Lifts over a VCF file from one reference build to another.  ";
    static final String USAGE_DETAILS = "This tool adjusts the coordinates of variants within a VCF file to match a new reference. The output file will be sorted and indexed using the target reference build. To be clear, REFERENCE_SEQUENCE should be the <em>target</em> reference build. The tool is based on the UCSC liftOver tool (see: http://genome.ucsc.edu/cgi-bin/hgLiftOver) and uses a UCSC chain file to guide its operation. <br /><br />Note that records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the source and target reference genomes.  Rejected records will be emitted with filters to the REJECT file, using the source genome coordinates.<br /><h4>Usage example:</h4><pre>java -jar picard.jar LiftoverVcf \\<br />     I=input.vcf \\<br />     O=lifted_over.vcf \\<br />     CHAIN=b37tohg19.chain \\<br />     REJECT=rejected_variants.vcf \\<br />     R=reference_sequence.fasta</pre>For additional information, please see: http://genome.ucsc.edu/cgi-bin/hgLiftOver<hr />";
    @Argument(shortName="I", doc="The input VCF/BCF file to be lifted over.")
    public File INPUT;
    @Argument(shortName="O", doc="The output location to write the lifted over VCF/BCF to.")
    public File OUTPUT;
    @Argument(shortName="C", doc="The liftover chain file. See https://genome.ucsc.edu/goldenPath/help/chain.html for a description of chain files.  See http://hgdownload.soe.ucsc.edu/downloads.html#terms for where to download chain files.")
    public File CHAIN;
    @Argument(doc="File to which to write rejected records.")
    public File REJECT;
    @Argument(shortName="WMC", doc="Warn on missing contig.", optional=true)
    public boolean WARN_ON_MISSING_CONTIG = false;
    @Argument(doc="Write the original contig/position for lifted variants to the INFO field.", optional=true)
    public boolean WRITE_ORIGINAL_POSITION = false;
    @Argument(doc="The minimum percent match required for a variant to be lifted.", optional=true)
    public double LIFTOVER_MIN_MATCH = 1.0;
    @Argument(doc="Allow INFO and FORMAT in the records that are not found in the header", optional=true)
    public boolean ALLOW_MISSING_FIELDS_IN_HEADER = false;
    protected static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1;
    public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel";
    public static final String FILTER_NO_TARGET = "NoTarget";
    public static final String FILTER_MISMATCHING_REF_ALLELE = "MismatchedRefAllele";
    public static final String FILTER_INDEL_STRADDLES_TWO_INTERVALS = "IndelStraddlesMultipleIntevals";
    private static final List<VCFFilterHeaderLine> FILTERS = CollectionUtil.makeList((Object[])new VCFFilterHeaderLine[]{new VCFFilterHeaderLine("ReverseComplementedIndel", "Indel falls into a reverse complemented region in the target genome."), new VCFFilterHeaderLine("NoTarget", "Variant could not be lifted between genome builds."), new VCFFilterHeaderLine("MismatchedRefAllele", "Reference allele does not match reference genome sequence after liftover."), new VCFFilterHeaderLine("IndelStraddlesMultipleIntevals", "Indel is straddling multiple intervalss in the chain, and so the results are not well defined.")});
    public static final String ORIGINAL_CONTIG = "OriginalContig";
    public static final String ORIGINAL_START = "OriginalStart";
    public static final String ATTEMPTED_LOCUS = "AttemptedLocus";
    private static final List<VCFInfoHeaderLine> ATTRS = CollectionUtil.makeList((Object[])new VCFInfoHeaderLine[]{new VCFInfoHeaderLine("OriginalContig", 1, VCFHeaderLineType.String, "The name of the source contig/chromosome prior to liftover."), new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.String, "The position of the variant on the source contig prior to liftover.")});
    private VariantContextWriter rejects;
    private final Log log = Log.getInstance(LiftoverVcf.class);
    private SortingCollection<VariantContext> sorter;
    private long failedLiftover = 0L;
    private long failedAlleleCheck = 0L;

    @Override
    protected ReferenceArgumentCollection makeReferenceArgumentCollection() {
        return new ReferenceArgumentCollection(){
            @Argument(shortName="R", common=false, doc="The reference sequence (fasta) for the TARGET genome build.  The fasta file must have an accompanying sequence dictionary (.dict file).")
            public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;

            @Override
            public File getReferenceFile() {
                return this.REFERENCE_SEQUENCE;
            }
        };
    }

    public static void main(String[] args) {
        new LiftoverVcf().instanceMainWithExit(args);
    }

    /*
     * Enabled aggressive block sorting
     */
    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable((File)this.INPUT);
        IOUtil.assertFileIsReadable((File)this.REFERENCE_SEQUENCE);
        IOUtil.assertFileIsReadable((File)this.CHAIN);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        IOUtil.assertFileIsWritable((File)this.REJECT);
        LiftOver liftOver = new LiftOver(this.CHAIN);
        VCFFileReader in = new VCFFileReader(this.INPUT, false);
        this.log.info(new Object[]{"Loading up the target reference genome."});
        ReferenceSequenceFileWalker walker = new ReferenceSequenceFileWalker(this.REFERENCE_SEQUENCE);
        HashMap<String, ReferenceSequence> refSeqs = new HashMap<String, ReferenceSequence>();
        for (SAMSequenceRecord rec : walker.getSequenceDictionary().getSequences()) {
            refSeqs.put(rec.getSequenceName(), walker.get(rec.getSequenceIndex()));
        }
        CloserUtil.close((Object)walker);
        VCFHeader inHeader = in.getFileHeader();
        VCFHeader outHeader = new VCFHeader(inHeader);
        outHeader.setSequenceDictionary(walker.getSequenceDictionary());
        if (this.WRITE_ORIGINAL_POSITION) {
            for (VCFInfoHeaderLine line : ATTRS) {
                outHeader.addMetaDataLine((VCFHeaderLine)line);
            }
        }
        VariantContextWriter out = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY).modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, this.ALLOW_MISSING_FIELDS_IN_HEADER).setOutputFile(this.OUTPUT).setReferenceDictionary(walker.getSequenceDictionary()).build();
        out.writeHeader(outHeader);
        this.rejects = new VariantContextWriterBuilder().setOutputFile(this.REJECT).unsetOption(Options.INDEX_ON_THE_FLY).modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, this.ALLOW_MISSING_FIELDS_IN_HEADER).build();
        VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
        for (VCFFilterHeaderLine line : FILTERS) {
            rejectHeader.addMetaDataLine((VCFHeaderLine)line);
        }
        rejectHeader.addMetaDataLine((VCFHeaderLine)new VCFInfoHeaderLine(ATTEMPTED_LOCUS, 1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to mismatching alleles."));
        this.rejects.writeHeader(rejectHeader);
        long total = 0L;
        this.log.info(new Object[]{"Lifting variants over and sorting."});
        this.sorter = SortingCollection.newInstance(VariantContext.class, (SortingCollection.Codec)new VCFRecordCodec(outHeader, this.ALLOW_MISSING_FIELDS_IN_HEADER || this.VALIDATION_STRINGENCY != ValidationStringency.STRICT), (Comparator)outHeader.getVCFRecordComparator(), (int)this.MAX_RECORDS_IN_RAM, (Collection)this.TMP_DIR);
        ProgressLogger progress = new ProgressLogger(this.log, 1000000, "read");
        for (VariantContext ctx : in) {
            ReferenceSequence refSeq;
            ++total;
            Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
            Interval target = liftOver.liftOver(source, this.LIFTOVER_MIN_MATCH);
            if (target == null) {
                this.rejectVariant(ctx, FILTER_NO_TARGET);
                continue;
            }
            if (ctx.getReference().length() != target.length()) {
                this.rejectVariant(ctx, FILTER_INDEL_STRADDLES_TWO_INTERVALS);
                continue;
            }
            if (target.isNegativeStrand() && (ctx.isMixed() || ctx.isIndel() && !ctx.isBiallelic())) {
                this.rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_INDEL);
            } else if (!refSeqs.containsKey(target.getContig())) {
                this.rejectVariant(ctx, FILTER_NO_TARGET);
                String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
                if (!this.WARN_ON_MISSING_CONTIG) {
                    this.log.error(new Object[]{missingContigMessage});
                    return EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE;
                }
                this.log.warn(new Object[]{missingContigMessage});
            } else if (target.isNegativeStrand() && ctx.isIndel() && ctx.isBiallelic()) {
                refSeq = (ReferenceSequence)refSeqs.get(target.getContig());
                VariantContext flippedIndel = LiftoverVcf.flipIndel(ctx, liftOver, refSeq);
                if (flippedIndel == null) {
                    throw new IllegalArgumentException("Unexpectedly found null VC. This should have not happened.");
                }
                this.tryToAddVariant(flippedIndel, refSeq, ctx);
            } else {
                refSeq = (ReferenceSequence)refSeqs.get(target.getContig());
                VariantContext liftedVariant = LiftoverVcf.liftSimpleVariant(ctx, target);
                this.tryToAddVariant(liftedVariant, refSeq, ctx);
            }
            progress.record(ctx.getContig(), ctx.getStart());
        }
        DecimalFormat pfmt = new DecimalFormat("0.0000%");
        String pct = pfmt.format((double)(this.failedLiftover + this.failedAlleleCheck) / (double)total);
        this.log.info(new Object[]{"Processed ", total, " variants."});
        this.log.info(new Object[]{this.failedLiftover, " variants failed to liftover."});
        this.log.info(new Object[]{this.failedAlleleCheck, " variants lifted over but had mismatching reference alleles after lift over."});
        this.log.info(new Object[]{pct, " of variants were not successfully lifted over and written to the output."});
        this.rejects.close();
        in.close();
        this.sorter.doneAdding();
        progress = new ProgressLogger(this.log, 1000000, "written");
        this.log.info(new Object[]{"Writing out sorted records to final VCF."});
        CloseableIterator closeableIterator = this.sorter.iterator();
        while (true) {
            if (!closeableIterator.hasNext()) {
                out.close();
                this.sorter.cleanup();
                return 0;
            }
            VariantContext ctx = (VariantContext)closeableIterator.next();
            out.add(ctx);
            progress.record(ctx.getContig(), ctx.getStart());
        }
    }

    private void rejectVariant(VariantContext ctx, String reason) {
        this.rejects.add(new VariantContextBuilder(ctx).filter(reason).make());
        ++this.failedLiftover;
    }

    private void tryToAddVariant(VariantContext vc, ReferenceSequence refSeq, VariantContext source) {
        VariantContextBuilder builder = new VariantContextBuilder(vc);
        builder.filters(source.getFilters());
        builder.log10PError(source.getLog10PError());
        builder.attributes(source.getAttributes());
        builder.id(source.getID());
        if (this.WRITE_ORIGINAL_POSITION) {
            builder.attribute(ORIGINAL_CONTIG, (Object)source.getContig());
            builder.attribute(ORIGINAL_START, (Object)source.getStart());
        }
        boolean mismatchesReference = false;
        for (Allele allele : builder.getAlleles()) {
            if (!allele.isReference()) continue;
            byte[] ref = refSeq.getBases();
            String refString = StringUtil.bytesToString((byte[])ref, (int)(vc.getStart() - 1), (int)(vc.getEnd() - vc.getStart() + 1));
            if (refString.equalsIgnoreCase(allele.getBaseString())) break;
            mismatchesReference = true;
            break;
        }
        if (mismatchesReference) {
            this.rejects.add(new VariantContextBuilder(source).filter(FILTER_MISMATCHING_REF_ALLELE).attribute(ATTEMPTED_LOCUS, (Object)String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd())).make());
            ++this.failedAlleleCheck;
        } else {
            this.sorter.add((Object)builder.make());
        }
    }

    protected static VariantContext liftSimpleVariant(VariantContext source, Interval target) {
        if (target == null) {
            return null;
        }
        if (source.getReference().length() != target.length()) {
            return null;
        }
        ArrayList<Allele> alleles = new ArrayList<Allele>();
        HashMap<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2);
        for (Allele oldAllele : source.getAlleles()) {
            if (target.isPositiveStrand() || oldAllele.isSymbolic()) {
                alleles.add(oldAllele);
                continue;
            }
            Allele fixedAllele = Allele.create((String)SequenceUtil.reverseComplement((String)oldAllele.getBaseString()), (boolean)oldAllele.isReference());
            reverseComplementAlleleMap.put(oldAllele, fixedAllele);
            alleles.add(fixedAllele);
        }
        VariantContextBuilder builder = new VariantContextBuilder(source.getSource(), target.getContig(), (long)target.getStart(), (long)target.getEnd(), alleles);
        builder.id(source.getID());
        builder.attributes(source.getAttributes());
        builder.genotypes(LiftoverVcf.fixGenotypes(source.getGenotypes(), reverseComplementAlleleMap));
        builder.filters(source.getFilters());
        builder.log10PError(source.getLog10PError());
        return builder.make();
    }

    protected static VariantContext flipIndel(VariantContext source, LiftOver liftOver, ReferenceSequence referenceSequence) {
        if (!source.isBiallelic()) {
            return null;
        }
        Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd());
        Interval target = liftOver.liftOver(originalLocus);
        if (target == null) {
            return null;
        }
        if (!target.isNegativeStrand()) {
            throw new IllegalArgumentException("Expecting a variant the is lifted over with an inversion. Got " + source + " maps to " + target.toString());
        }
        boolean addToStart = target.getStart() > 1;
        HashMap<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2);
        ArrayList<Allele> alleles = new ArrayList<Allele>();
        for (Allele oldAllele : source.getAlleles()) {
            StringBuilder alleleBuilder = new StringBuilder(target.getEnd() - target.getStart() + 1);
            if (addToStart) {
                alleleBuilder.append((char)referenceSequence.getBases()[target.getStart() - 2]);
            }
            alleleBuilder.append(SequenceUtil.reverseComplement((String)oldAllele.getBaseString().substring(1, oldAllele.length())));
            if (!addToStart) {
                alleleBuilder.append((char)referenceSequence.getBases()[target.getEnd() - 1]);
            }
            Allele fixedAllele = Allele.create((String)alleleBuilder.toString(), (boolean)oldAllele.isReference());
            alleles.add(fixedAllele);
            reverseComplementAlleleMap.put(oldAllele, fixedAllele);
        }
        VariantContextBuilder builder = new VariantContextBuilder(source.getSource(), target.getContig(), (long)(target.getStart() - (addToStart ? 1 : 0)), (long)(target.getEnd() - (addToStart ? 1 : 0)), alleles);
        builder.id(source.getID());
        builder.attributes(source.getAttributes());
        builder.genotypes(LiftoverVcf.fixGenotypes(source.getGenotypes(), reverseComplementAlleleMap));
        builder.filters(source.getFilters());
        builder.log10PError(source.getLog10PError());
        return LiftoverVcf.leftAlignVariant(builder.make(), referenceSequence);
    }

    protected static GenotypesContext fixGenotypes(GenotypesContext originals, Map<Allele, Allele> alleleMap) {
        if (alleleMap.isEmpty()) {
            return originals;
        }
        GenotypesContext fixedGenotypes = GenotypesContext.create((int)originals.size());
        for (Genotype genotype : originals) {
            ArrayList<Allele> fixedAlleles = new ArrayList<Allele>();
            for (Allele allele : genotype.getAlleles()) {
                Allele fixedAllele = alleleMap.getOrDefault(allele, allele);
                fixedAlleles.add(fixedAllele);
            }
            fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
        }
        return fixedGenotypes;
    }

    protected static VariantContext leftAlignVariant(VariantContext vc, ReferenceSequence referenceSequence) {
        boolean changesInAlleles = true;
        int start = vc.getStart();
        int end = vc.getEnd();
        if (!vc.getContig().equals(referenceSequence.getName())) {
            throw new IllegalArgumentException("vc contig doesn't match that of supplied reference: " + vc.getContig() + " != " + referenceSequence.getName());
        }
        HashMap<Allele, byte[]> alleleBasesMap = new HashMap<Allele, byte[]>();
        vc.getAlleles().forEach(a -> alleleBasesMap.put((Allele)a, a.getBases()));
        while (changesInAlleles) {
            changesInAlleles = false;
            if (alleleBasesMap.values().stream().collect(Collectors.groupingBy(a -> a[((byte[])a).length - 1], Collectors.toSet())).size() == 1 && end > 1) {
                for (Allele allele : alleleBasesMap.keySet()) {
                    alleleBasesMap.put(allele, LiftoverVcf.truncateBase((byte[])alleleBasesMap.get(allele), true));
                }
                changesInAlleles = true;
                --end;
            }
            if (!alleleBasesMap.values().stream().map(a -> ((byte[])a).length).anyMatch(l -> l == 0)) continue;
            for (Allele allele : alleleBasesMap.keySet()) {
                byte extraBase = start > 1 ? referenceSequence.getBases()[start - 2] : referenceSequence.getBases()[end];
                alleleBasesMap.put(allele, LiftoverVcf.extendOneBase((byte[])alleleBasesMap.get(allele), extraBase));
            }
            changesInAlleles = true;
            --start;
        }
        while (alleleBasesMap.values().stream().allMatch(a -> ((byte[])a).length >= 2) && alleleBasesMap.values().stream().collect(Collectors.groupingBy(a -> a[0], Collectors.toSet())).size() == 1) {
            for (Allele allele : alleleBasesMap.keySet()) {
                alleleBasesMap.put(allele, LiftoverVcf.truncateBase((byte[])alleleBasesMap.get(allele), false));
            }
            ++start;
        }
        VariantContextBuilder builder = new VariantContextBuilder(vc);
        builder.start((long)start);
        builder.stop((long)end);
        Map<Allele, Allele> fixedAlleleMap = alleleBasesMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, me -> Allele.create((byte[])((byte[])me.getValue()), (boolean)((Allele)me.getKey()).isReference())));
        builder.alleles(fixedAlleleMap.values());
        builder.genotypes(LiftoverVcf.fixGenotypes(vc.getGenotypes(), fixedAlleleMap));
        return builder.make();
    }

    private static byte[] truncateBase(byte[] allele, boolean truncateRightmost) {
        return Arrays.copyOfRange(allele, truncateRightmost ? 0 : 1, truncateRightmost ? allele.length - 1 : allele.length);
    }

    private static byte[] extendOneBase(byte[] bases, byte base) {
        byte[] newBases = new byte[bases.length + 1];
        System.arraycopy(bases, 0, newBases, 1, bases.length);
        newBases[0] = base;
        return newBases;
    }
}

