/*
 * Decompiled with CFR 0.152.
 */
package fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop;

import fr.ens.biologie.genomique.eoulsan.CommonHadoop;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.EoulsanLogger;
import fr.ens.biologie.genomique.eoulsan.EoulsanRuntime;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.Settings;
import fr.ens.biologie.genomique.eoulsan.annotations.HadoopOnly;
import fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionOutputFormat;
import fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMInputFormat;
import fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMOutputFormat;
import fr.ens.biologie.genomique.eoulsan.core.InputPorts;
import fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder;
import fr.ens.biologie.genomique.eoulsan.core.Parameter;
import fr.ens.biologie.genomique.eoulsan.core.StepConfigurationContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskResult;
import fr.ens.biologie.genomique.eoulsan.core.TaskStatus;
import fr.ens.biologie.genomique.eoulsan.data.Data;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormats;
import fr.ens.biologie.genomique.eoulsan.modules.expression.AbstractExpressionModule;
import fr.ens.biologie.genomique.eoulsan.modules.expression.ExpressionCounterUtils;
import fr.ens.biologie.genomique.eoulsan.modules.expression.FinalExpressionFeaturesCreator;
import fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.ExpressionMapper;
import fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.ExpressionReducer;
import fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.ExpressionSAMOutputMapper;
import fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.PreTreatmentExpressionMapper;
import fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.PreTreatmentExpressionReducer;
import fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils;
import fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils;
import fr.ens.biologie.genomique.eoulsan.util.locker.Locker;
import fr.ens.biologie.genomique.eoulsan.util.locker.ZooKeeperLocker;
import fr.ens.biologie.genomique.kenetre.KenetreException;
import fr.ens.biologie.genomique.kenetre.bio.expressioncounter.ExpressionCounter;
import fr.ens.biologie.genomique.kenetre.util.StringUtils;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.net.InetAddress;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

@HadoopOnly
public class ExpressionHadoopModule
extends AbstractExpressionModule {
    private static final String TSAM_EXTENSION = ".tsam";
    private static final String SERIALIZATION_EXTENSION = ".ser";
    static final char SAM_RECORD_PAIRED_END_SERPARATOR = '\u00a3';
    static final String GENOME_DESC_PATH_KEY = Globals.PARAMETER_PREFIX + ".expression.genome.desc.file";
    private Configuration conf;

    @Override
    public InputPorts getInputPorts() {
        return InputPortsBuilder.allPortsRequiredInWorkingDirectory(super.getInputPorts());
    }

    @Override
    public void configure(StepConfigurationContext context, Set<Parameter> stepParameters) throws EoulsanException {
        super.configure(context, stepParameters);
        this.conf = CommonHadoop.createConfiguration(EoulsanRuntime.getSettings());
    }

    @Override
    public TaskResult execute(TaskContext context, TaskStatus status) {
        Data alignmentsData = context.getInputData(DataFormats.MAPPER_RESULTS_SAM);
        Data featureAnnotationData = context.getInputData(this.isGTFInputFormat() ? DataFormats.ANNOTATION_GFF : DataFormats.ANNOTATION_GFF);
        Data genomeDescriptionData = context.getInputData(DataFormats.GENOME_DESC_TXT);
        Data outData = context.getOutputData(this.isSAMOutputFormat() ? DataFormats.MAPPER_RESULTS_SAM : DataFormats.EXPRESSION_RESULTS_TSV, alignmentsData);
        Configuration conf = CommonHadoop.createConfiguration();
        try {
            long startTime = System.currentTimeMillis();
            EoulsanLogger.getLogger().info("Counter: " + this.getExpressionCounter());
            ExpressionHadoopModule.initializeCounter(this.getExpressionCounter(), genomeDescriptionData, featureAnnotationData);
            boolean pairedEnd = ExpressionHadoopModule.isPairedData(alignmentsData.getDataFile().open());
            if (pairedEnd) {
                MapReduceUtils.submitAndWaitForJob(ExpressionHadoopModule.createPairedEndJob(conf, context, alignmentsData, genomeDescriptionData), alignmentsData.getName(), 5000, status, "expression");
            }
            Job job = ExpressionHadoopModule.createExpressionJob(conf, context, alignmentsData, genomeDescriptionData, featureAnnotationData, outData, this.getExpressionCounter(), pairedEnd);
            MapReduceUtils.submitAndWaitForJob(job, alignmentsData.getName(), 5000, status, "expression");
            long mapReduceEndTime = System.currentTimeMillis();
            EoulsanLogger.getLogger().info("Finish the first part of the expression computation in " + (mapReduceEndTime - startTime) / 1000L + " seconds.");
            if (!this.isSAMOutputFormat()) {
                ExpressionHadoopModule.createFinalExpressionFeaturesFile(context, this.getExpressionCounter(), outData, job, this.conf);
                EoulsanLogger.getLogger().info("Finish the create of the final expression files in " + (System.currentTimeMillis() - mapReduceEndTime) / 1000L + " seconds.");
            }
            return status.createTaskResult();
        }
        catch (IOException e) {
            return status.createTaskResult(e, "Error while running job: " + e.getMessage());
        }
        catch (EoulsanException | KenetreException e) {
            return status.createTaskResult(e, "Error while reading the annotation file: " + e.getMessage());
        }
    }

    private static Job createExpressionJob(Configuration parentConf, TaskContext context, Data alignmentsData, Data genomeDescriptionData, Data featureAnnotationData, Data outData, ExpressionCounter counter, boolean tsamFormat) throws IOException, EoulsanException {
        Configuration jobConf = new Configuration(parentConf);
        DataFile inputDataFile = alignmentsData.getDataFile();
        if (inputDataFile == null) {
            throw new IOException("No input file found.");
        }
        Object dataFileSource = tsamFormat ? StringUtils.filenameWithoutExtension((String)inputDataFile.getSource()) + TSAM_EXTENSION : inputDataFile.getSource();
        Path inputPath = new Path((String)dataFileSource);
        DataFile annotationDataFile = featureAnnotationData.getDataFile();
        DataFile outFile = outData.getDataFile();
        EoulsanLogger.getLogger().fine("sample: " + alignmentsData.getName());
        EoulsanLogger.getLogger().fine("inputPath.getName(): " + inputPath.getName());
        EoulsanLogger.getLogger().fine("annotationDataFile: " + annotationDataFile.getSource());
        EoulsanLogger.getLogger().fine("outFile: " + outFile.getSource());
        jobConf.set("mapred.child.java.opts", "-Xmx1024m");
        jobConf.set(CommonHadoop.COUNTER_GROUP_KEY, "expression");
        DataFile genomeDescDataFile = genomeDescriptionData.getDataFile();
        jobConf.set(GENOME_DESC_PATH_KEY, genomeDescDataFile.getSource());
        DataFile featureAnnotationFile = featureAnnotationData.getDataFile();
        Path counterSerializationFilePath = new Path(new DataFile(featureAnnotationFile.getParent(), featureAnnotationFile.getBasename() + SERIALIZATION_EXTENSION).getSource());
        EoulsanLogger.getLogger().info("counterSerializationFilePath: " + counterSerializationFilePath);
        if (!PathUtils.isFile(counterSerializationFilePath, jobConf)) {
            Locker lock = ExpressionHadoopModule.createZookeeperLock(parentConf, context);
            lock.lock();
            ExpressionHadoopModule.serializeCounter(context, counter, counterSerializationFilePath, jobConf);
            lock.unlock();
        }
        Job job = Job.getInstance((Configuration)jobConf, (String)("Expression computation with htseq-count (" + alignmentsData.getName() + ", " + inputPath.getName() + ", " + annotationDataFile.getSource() + ")"));
        job.addCacheFile(counterSerializationFilePath.toUri());
        job.setJarByClass(ExpressionHadoopModule.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{inputPath});
        job.setInputFormatClass(SAMInputFormat.class);
        if (DataFormats.MAPPER_RESULTS_SAM.equals(outData.getFormat())) {
            job.setMapperClass(ExpressionSAMOutputMapper.class);
            job.setNumReduceTasks(0);
            job.setOutputFormatClass(SAMOutputFormat.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            FileOutputFormat.setOutputPath((Job)job, (Path)new Path(outFile.getSource()));
        } else {
            job.setMapperClass(ExpressionMapper.class);
            job.setCombinerClass(ExpressionReducer.class);
            job.setReducerClass(ExpressionReducer.class);
            job.setOutputFormatClass(ExpressionOutputFormat.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            DataFile tmpFile = new DataFile(outFile.getParent(), outFile.getBasename() + ".tmp");
            EoulsanLogger.getLogger().fine("tmpFile: " + tmpFile.getSource());
            FileOutputFormat.setOutputPath((Job)job, (Path)new Path(tmpFile.getSource()));
        }
        return job;
    }

    private static Job createPairedEndJob(Configuration parentConf, TaskContext context, Data alignmentsData, Data genomeDescriptionData) throws IOException {
        Configuration jobConf = new Configuration(parentConf);
        DataFile inputDataFile = alignmentsData.getDataFile();
        Path inputPath = new Path(inputDataFile.getSource());
        jobConf.set(CommonHadoop.COUNTER_GROUP_KEY, "expression");
        jobConf.set(GENOME_DESC_PATH_KEY, genomeDescriptionData.getDataFilename());
        Job job = Job.getInstance((Configuration)jobConf, (String)("Pretreatment for the expression estimation step (" + alignmentsData.getName() + ", " + inputDataFile.getSource() + ")"));
        job.setJarByClass(ExpressionHadoopModule.class);
        FileInputFormat.addInputPath((Job)job, (Path)inputPath);
        job.setMapperClass(PreTreatmentExpressionMapper.class);
        job.setReducerClass(PreTreatmentExpressionReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        String outputName = StringUtils.filenameWithoutExtension((String)inputPath.getName()) + TSAM_EXTENSION;
        FileOutputFormat.setOutputPath((Job)job, (Path)new Path(inputPath.getParent(), outputName));
        return job;
    }

    private static void initializeCounter(ExpressionCounter counter, Data genomeDescData, Data annotationData) throws KenetreException, IOException {
        ExpressionCounterUtils.init(counter, genomeDescData.getDataFile(), annotationData.getDataFile(), annotationData.getFormat() == DataFormats.ANNOTATION_GTF);
    }

    private static void serializeCounter(TaskContext context, ExpressionCounter counter, Path counterSerializationFilePath, Configuration conf) throws IOException, EoulsanException {
        if (PathUtils.isFile(counterSerializationFilePath, conf)) {
            return;
        }
        File counterSerializationFile = context.getRuntime().createFileInTempDir(counterSerializationFilePath.getName() + SERIALIZATION_EXTENSION);
        ExpressionHadoopModule.serializeCounter(counter, counterSerializationFile);
        PathUtils.copyLocalFileToPath(counterSerializationFile, counterSerializationFilePath, conf);
        if (!counterSerializationFile.delete()) {
            EoulsanLogger.getLogger().warning("Can not delete the counter serialization file: " + counterSerializationFile.getAbsolutePath());
        }
    }

    private static void serializeCounter(ExpressionCounter counter, File counterSerializationFile) throws IOException {
        if (counter == null) {
            throw new NullPointerException("counter argument cannot be null");
        }
        if (counterSerializationFile == null) {
            throw new NullPointerException("featuresIndexFile argument cannot be null");
        }
        try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(counterSerializationFile));){
            oos.writeObject(counter);
        }
    }

    private static void createFinalExpressionFeaturesFile(TaskContext context, ExpressionCounter counter, Data outData, Job job, Configuration conf) throws IOException {
        FinalExpressionFeaturesCreator fefc = null;
        fefc = new FinalExpressionFeaturesCreator(counter);
        Path resultPath = new Path(outData.getDataFile().getSource());
        FileSystem fs = resultPath.getFileSystem(conf);
        fefc.initializeExpressionResults();
        fefc.loadPreResults(new DataFile(job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir")).open());
        fefc.saveFinalResults((OutputStream)fs.create(resultPath));
    }

    private static Locker createZookeeperLock(Configuration conf, TaskContext context) throws IOException {
        Settings settings = context.getSettings();
        Object connectString = settings.getZooKeeperConnectString();
        if (connectString == null) {
            connectString = conf.get("yarn.resourcemanager.hostname").split(":")[0] + ":" + settings.getZooKeeperDefaultPort();
        }
        return new ZooKeeperLocker((String)connectString, settings.getZooKeeperSessionTimeout(), "/eoulsan-locks-" + InetAddress.getLocalHost().getHostName(), "expression-lock-job-" + context.getJobUUID() + "-step-" + context.getCurrentStep().getNumber());
    }

    public static boolean isPairedData(InputStream samIs) {
        if (samIs == null) {
            throw new NullPointerException("is argument cannot be null");
        }
        try {
            SamReader input = SamReaderFactory.makeDefault().open(SamInputResource.of((InputStream)samIs));
            SAMRecordIterator samIterator = input.iterator();
            boolean result = false;
            if (samIterator.hasNext() && ((SAMRecord)samIterator.next()).getReadPairedFlag()) {
                result = true;
            }
            input.close();
            return result;
        }
        catch (IOException e) {
            return false;
        }
    }
}

