bismark2report

#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long;
use FindBin qw($RealBin);
use lib "$RealBin/../lib";

## This program is Copyright (C) 2010-23, Felix Krueger (fkrueger@altoslabs.com)

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.

my $bismark2report_version = 'v0.24.2';
my (@alignment_reports,@dedup_reports,@splitting_reports,@mbias_reports,@nuc_reports);

my ($output_dir,$verbose,$manual_output_file) = process_commandline();

# print join (",",@alignment_reports)."\n";
# print join (",",@dedup_reports)."\n";
# print join (",",@splitting_reports)."\n";
# print join (",",@mbias_reports)."\n";
# print join (",",@nuc_reports)."\n";

while (@alignment_reports){

  my $alignment_report = shift @alignment_reports;
  my $dedup_report     = shift @dedup_reports;
  my $splitting_report = shift @splitting_reports;
  my $mbias_report     = shift @mbias_reports;
  my $nuc_report       = shift @nuc_reports;

  ### HTML OUTPUT FILE
  my $report_output = $alignment_report;
  $report_output =~ s/^.*\///; # deleting optional path information
  $report_output =~ s/\.txt$//;
  $report_output =~ s/$/.html/;

  # if -o output_file was specified we are going to use that name preferentially.
  # This may only happen if there is a single report in the folder, or if a single report has been specified manually
  if ($manual_output_file){
      warn "A specific output filename was specified: $manual_output_file. Using that one instead of deriving the filename\n"; sleep(1);
      $report_output = $manual_output_file;
  }
  
  $report_output = $output_dir.$report_output;
  warn "\nWriting Bismark HTML report to >> $report_output <<\n\n";
  
  # Get the report template into a string
  my $doc = read_report_template('plotly_template.tpl');
  
  # Get the plot.ly code into a string. This makes the template so much more managable and also allows for quick replacement of the plot.ly
  # code itself should it get updated
  my $plotly_code = read_report_template('plot.ly');
  
  # replacing the Plot.ly spaceholders with the actual plot.ly code
  if ($doc =~ s/\{\{plotly_goes_here\}\}.*\{\{plotly_goes_here\}\}/$plotly_code/s){  # treating the string as a single line
      warn "Plot.ly injection successful!\n" if $verbose;
  }
  else{
      die "Plot.ly incjection not working, won't be able to construct any meaningful HTML reports in this case....\n\n";
  }

  my $bismark_logo = read_report_template('bismark.logo');
  if ($doc =~ s/\{\{bismark_logo_goes_here\}\}.*\{\{bismark_logo_goes_here\}\}/$bismark_logo/s){  # treating the string as a single line
      warn "bismark.logo injection successful!\n" if $verbose;
  }
  
  my $bioinf_logo = read_report_template('bioinf.logo');
  if ($doc =~ s/\{\{bioinf_logo_goes_here\}\}.*\{\{bioinf_logo_goes_here\}\}/$bioinf_logo/s){  # treating the string as a single line
      warn "bioinf.logo injection successful!\n" if $verbose;
  }


  # Create timestamp
  $doc = getLoggingTime($doc);
  
  # BISMARK ALIGNMENT REPORT (mandatory)
  warn "="x110,"\n";
  warn "Using the following alignment report:\t\t> $alignment_report <\n";
  $doc = read_alignment_report($alignment_report,$doc);
  
  # DEDUPLICATION REPORT (optional)
  if ($dedup_report){
    warn "Using the following deduplication report:\t> $dedup_report <\n";
    $doc =~ s/\{\{deduplication_section\}\}//g; # deleting these tags and then go ahead and fill the table
    $doc = read_deduplication_report($dedup_report,$doc);
  }
  else{
    warn "No deduplication report file specified, skipping this step\n";
    # deleting the entire Deduplication Section from the template
    $doc =~ s/\{\{deduplication_section\}\}.*\{\{deduplication_section\}\}//s; # treating the string as a single line
  }

  # SPLITTING REPORT (optional)
  if ($splitting_report){
    warn "Using the following splitting report:\t\t> $splitting_report <\n";
    $doc =~ s/\{\{cytosine_methylation_post_deduplication_section\}\}//g; # deleting these tags and then go ahead and fill the table
    $doc = read_splitting_report($splitting_report,$doc);
  }
  else{
    warn "No splitting report file specified, skipping this step\n";
    # deleting the entire Splitting Report C context section from the template
    $doc =~ s/\{\{cytosine_methylation_post_deduplication_section\}\}.*\{\{cytosine_methylation_post_deduplication_section\}\}//s; # treating the string as a single line
  }

  # M-BIAS REPORT (optional)
  if ($mbias_report){
      warn "Using the following M-bias report:\t\t> $mbias_report <\n";
      $doc =~ s/\{\{mbias_r1_section\}\}//g; # deleting these tags and then go ahead and fill the table
      (my $state, $doc) = read_mbias_report($mbias_report,$doc);
  
      # warn "OK have read the report\n\n";
      # warn ">$state<<<\n";
      # sleep(1);
      if ($state eq 'single'){
	  # warn "Read was $state: Deleting the M-bias2 section\n";
	  $doc =~ s/(\{\{mbias_r2_section\}\}.*\{\{mbias_r2_section\}\})//s; # treating the string as a single line
	  # warn "Deleting: $1\n\n";sleep(5);
      }
      else{ # paired-end
	  $doc =~ s/\{\{mbias_r2_section\}\}//g; # deleting these tags and then go ahead and fill the table
      }
  }
  else{
      warn "No M-bias report file specified, skipping this step\n";
      $doc =~ s/\{\{mbias_r1_section\}\}.*\{\{mbias_r1_section\}\}//s; # treating the string as a single line
      $doc =~ s/\{\{mbias_r2_section\}\}.*\{\{mbias_r2_section\}\}//s; # treating the string as a single line
  }
  
  # NUCLEOTIDE COVERAGE REPORT (optional)
  if ($nuc_report){
      warn "Using the following nucleotide coverage report:\t> $nuc_report <\n";
      $doc =~ s/\{\{nucleotide_coverage_section\}\}//g; # deleting these tags and then go ahead and fill the table
      $doc = read_nucleotide_coverage_report($nuc_report,$doc);
  }
  else{
      warn "No nucleotide coverage report file specified, skipping this step\n";
      # deleting the entire Nucleotide Coverage section from the template
      # warn "Deleting the entire Nucleotide Coverage section!!!\n\n";
      # print $doc;
      $doc =~ s/\{\{nucleotide_coverage_section\}\}.*\{\{nucleotide_coverage_section\}\}//s; # treating the string as a single line
  }
  warn "="x110,"\n\n\n";
  $verbose and sleep(3);

  write_out_report($report_output,$doc);

}

sub write_out_report{
  my ($report_output,$doc) = @_;
  open (OUT,'>',$report_output) or die "Failed to write to output file $report_output: $!\n\n";
  print OUT $doc;
}

sub getLoggingTime {
  my $doc = shift;
  my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);

  my $time = sprintf ("%02d:%02d:%02d", $hour,$min,$sec);
  my $date = sprintf ("%04d-%02d-%02d", $year+1900,$mon+1,$mday);
  warn "Using Time: $time, and date: $date\n\n" if ($verbose);

  $doc =~ s/\{\{date\}\}/$date/g;
  $doc =~ s/\{\{time\}\}/$time/g;

  return $doc;
}


sub read_alignment_report{

  my ($alignment_report,$doc) = @_;

  warn "Processing alignment report $alignment_report ...\n";
  open (ALN,$alignment_report) or die "Couldn't read from file $alignment_report: $!\n\n";

  my $unique;
  my $no_aln;
  my $multiple;
  my $no_genomic;
  my $total_seqs;
  my $bismark_version;
  my $input_filename;

  my $unique_text;
  my $no_aln_text;
  my $multiple_text;
  my $total_seq_text;

  my $total_C_count;
  my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
  my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);
  my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown);

  my $number_OT;
  my $number_CTOT;
  my $number_CTOB;
  my $number_OB;

  while (<ALN>){
    chomp;

    ### General Alignment stats
    if ($_ =~ /^Sequence pairs analysed in total:/ ){                        ## Paired-end
      (undef,$total_seqs) = split /\t/;
      print "Total paired seqs: >> $total_seqs <<\n" if ($verbose);
      $total_seq_text = 'Sequence pairs analysed in total';
    }
    elsif ($_ =~ /^Sequences analysed in total:/ ){                          ## Single-end
      (undef,$total_seqs) = split /\t/;
      $total_seq_text = 'Sequences analysed in total';
      print "total single-end seqs >> $total_seqs <<\n" if ($verbose);
    }

    elsif($_ =~ /^Bismark report for: (.*) \(version: (.*)\)/){
      $input_filename = $1;
      $bismark_version = $2;
      print "Input filename(s) >> $input_filename <<\n" if ($verbose);
      print "Bismark version >> $bismark_version <<\n" if ($verbose);
    }

    elsif($_ =~ /^Number of paired-end alignments with a unique best hit:/){ ## Paired-end
      (undef,$unique) = split /\t/;
      print "Unique PE>> $unique <<\n" if ($verbose);;
      $unique_text = 'Paired-end alignments with a unique best hit';
    }
    elsif($_ =~ /^Number of alignments with a unique best hit from/){        ## Single-end
      (undef,$unique) = split /\t/;
      print "Unique SE>> $unique <<\n" if ($verbose);
      $unique_text = 'Single-end alignments with a unique best hit';
    }

    elsif($_ =~ /^Sequence pairs with no alignments under any condition:/){  ## Paired-end
      (undef,$no_aln) = split /\t/;
      print "No alignment PE >> $no_aln <<\n" if ($verbose);
      $no_aln_text = 'Pairs without alignments under any condition';
    }
    elsif($_ =~ /^Sequences with no alignments under any condition:/){       ## Single-end
      (undef,$no_aln) = split /\t/;
      print "No alignments SE>> $no_aln <<\n" if ($verbose);
      $no_aln_text = 'Sequences without alignments under any condition';
    }

    elsif($_ =~ /^Sequence pairs did not map uniquely:/){                    ## Paired-end
      (undef,$multiple) = split /\t/;
      print "Multiple alignments PE >> $multiple <<\n" if ($verbose);
      $multiple_text = 'Pairs that did not map uniquely';
    }
    elsif($_ =~ /^Sequences did not map uniquely:/){                         ## Single-end
      (undef,$multiple) = split /\t/;
      print "Multiple alignments SE >> $multiple <<\n" if ($verbose);
      $multiple_text = 'Sequences that did not map uniquely';
    }

    elsif($_ =~ /^Sequence pairs which were discarded because genomic sequence could not be extracted:/){ ## Paired-end
      (undef,$no_genomic) = split /\t/;
      print "No genomic sequence PE >> $no_genomic <<\n" if ($verbose);
    }
    elsif($_ =~ /^Sequences which were discarded because genomic sequence could not be extracted:/){ ## Single-end
      (undef,$no_genomic) = split /\t/;
      print "No genomic sequence SE>> $no_genomic <<\n" if ($verbose);
    }

    ### Context Methylation
    elsif($_ =~ /^Total number of C/ ){
      (undef,$total_C_count) = split /\t/;
      print "Total number C >> $total_C_count <<\n" if ($verbose);
    }

      elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
      (undef,$meth_CpG) = split /\t/;
      print "meth CpG >> $meth_CpG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
      (undef,$meth_CHG) = split /\t/;
      print "meth CHG >> $meth_CHG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
      (undef,$meth_CHH) = split /\t/;
      print "meth CHH >> $meth_CHH <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
      (undef,$meth_unknown) = split /\t/;
      print "meth Unknown >> $meth_unknown <<\n" if ($verbose);
    }

    elsif($_ =~ /^Total unmethylated C\'s in CpG context:/ or $_ =~ /^Total C to T conversions in CpG context:/){
      (undef,$unmeth_CpG) = split /\t/;
      print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total unmethylated C\'s in CHG context:/ or $_ =~ /^Total C to T conversions in CHG context:/){
      (undef,$unmeth_CHG) = split /\t/;
      print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total unmethylated C\'s in CHH context:/ or $_ =~ /^Total C to T conversions in CHH context:/){
      (undef,$unmeth_CHH) = split /\t/;
      print "unmeth CHH >> $unmeth_CHH <<\n"if ($verbose);
    }
    elsif($_ =~ /^Total unmethylated C\'s in Unknown context:/ or $_ =~ /^Total C to T conversions in Unknown context:/){
      (undef,$unmeth_unknown) = split /\t/;
      print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose);
    }

    elsif($_ =~ /^C methylated in CpG context:/ ){
      (undef,$perc_CpG) = split /\t/;
      $perc_CpG =~ s/%//;
      print "percentage CpG >> $perc_CpG <<\n" if ($verbose);
    }
    elsif($_ =~ /^C methylated in CHG context:/ ){
      (undef,$perc_CHG) = split /\t/;
      $perc_CHG =~ s/%//;
      print "percentage CHG >> $perc_CHG <<\n" if ($verbose);
    }
    elsif($_ =~ /^C methylated in CHH context:/ ){
      (undef,$perc_CHH) = split /\t/;
      $perc_CHH =~ s/%//;
      print "percentage CHH >> $perc_CHH <<\n" if ($verbose);
    }
    elsif($_ =~ /^C methylated in Unknown context \(CN or CHN\):/ ){
      (undef,$perc_unknown) = split /\t/;
      $perc_unknown =~ s/%//;
      print "percentage Unknown >> $perc_unknown <<\n" if ($verbose);
    }


    ### Strand Origin

    elsif($_ =~ /^CT\/GA\/CT:/ ){             ## Paired-end
      (undef,$number_OT) = split /\t/;
      print "Number OT PE>> $number_OT <<\n" if ($verbose);
    }
    elsif($_ =~ /^CT\/CT:/ ){                 ## Single-end
      (undef,$number_OT) = split /\t/;
      print "Number OT SE>> $number_OT <<\n" if ($verbose);
    }

    elsif($_ =~ /^GA\/CT\/CT:/ ){             ## Paired-end
      (undef,$number_CTOT) = split /\t/;
      print "Number CTOT PE >> $number_CTOT <<\n" if ($verbose);
    }
    elsif($_ =~ /^GA\/CT:/ ){                 ## Single-end
      (undef,$number_CTOT) = split /\t/;
      print "Number CTOT SE >> $number_CTOT <<\n" if ($verbose);
    }

    elsif($_ =~ /^GA\/CT\/GA:/ ){             ## Paired-end
      (undef,$number_CTOB) = split /\t/;
      print "Number CTOB PE >> $number_CTOB <<\n" if ($verbose);
    }
    elsif($_ =~ /^GA\/GA:/ ){                 ## Single-end
      (undef,$number_CTOB) = split /\t/;
      print "Number CTOB SE >> $number_CTOB <<\n" if ($verbose);
    }

    elsif($_ =~ /^CT\/GA\/GA:/ ){             ## Paired-end
      (undef,$number_OB) = split /\t/;
      print "Number OB PE >> $number_OB <<\n" if ($verbose);
    }
    elsif($_ =~ /^CT\/GA:/ ){                 ## Single-end
      (undef,$number_OB) = split /\t/;
      print "Number OB SE >> $number_OB <<\n" if ($verbose);
    }


  }

  if (defined $unique and defined $no_aln and defined $multiple and defined $no_genomic and defined $total_seqs){
    warn "Got all necessary information, editing HTML report\n"  if ($verbose);

    ### General Alignment Stats
    $doc =~ s/\{\{unique_seqs\}\}/$unique/g;
    $doc =~ s/\{\{unique_seqs_text\}\}/$unique_text/g;

    $doc =~ s/\{\{no_alignments\}\}/$no_aln/g;
    $doc =~ s/\{\{no_alignments_text\}\}/$no_aln_text/g;

    $doc =~ s/\{\{multiple_alignments\}\}/$multiple/g;
    $doc =~ s/\{\{multiple_alignments_text\}\}/$multiple_text/g;

    $doc =~ s/\{\{no_genomic\}\}/$no_genomic/g;

    $doc =~ s/\{\{total_sequences_alignments\}\}/$total_seqs/g;
    $doc =~ s/\{\{sequences_analysed_in_total\}\}/$total_seq_text/g;

    $doc =~ s/\{\{filename\}\}/$input_filename/g;
    $doc =~ s/\{\{bismark_version\}\}/$bismark_version/g;

    ### Alignment stats Plot
    $doc =~ s/\{\{alignment_stats_plotly\}\}/$unique,$no_aln,$multiple,$no_genomic/g;

    ### Strand Origin
    $doc =~ s/\{\{number_OT\}\}/$number_OT/g;
    $doc =~ s/\{\{number_CTOT\}\}/$number_CTOT/g;
    $doc =~ s/\{\{number_CTOB\}\}/$number_CTOB/g;
    $doc =~ s/\{\{number_OB\}\}/$number_OB/g;

    ### Context Methylation
    $doc =~ s/\{\{total_C_count\}\}/$total_C_count/g;

    ### Strand Alignment Plot
    $doc =~ s/\{\{strand_alignment_plotly\}\}/$number_OT,$number_CTOT,$number_CTOB,$number_OB/g;

    unless (defined $perc_CpG){
      $perc_CpG = 'N/A';
    }
    unless (defined $perc_CHG){
      $perc_CHG = 'N/A';
    }
    unless (defined $perc_CHH){
      $perc_CHH = 'N/A';
    }
    unless (defined $perc_unknown){
      $perc_unknown = 'N/A';
    }

    ### Unknown sequence context, just for Bowtie 2 alignments
    my $meth_unknown_inject;
    my $unmeth_unknown_inject;
    my $perc_unknown_inject;

    if (defined $meth_unknown){ # if one Unknown context file is present, so should the others
      $meth_unknown_inject = "     <tr>
                                <th>Methylated C's in Unknown context</th>
    				<td>$meth_unknown</td>
    			</tr>";
      $unmeth_unknown_inject = "     <tr>
                                <th>Unmethylated C's in Unknown context</th>
    				<td>$unmeth_unknown</td>
    			</tr>";
      $perc_unknown_inject = "     <tr>
                                <th>Methylated C's in Unknown context</th>
    				<td>$perc_unknown%</td>
    			</tr>";
    }
    else{
      $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = '';
    }

    ### injecting this into the table
    $doc =~ s/\{\{meth_unknown\}\}/$meth_unknown_inject/g;
    $doc =~ s/\{\{unmeth_unknown\}\}/$unmeth_unknown_inject/g;
    $doc =~ s/\{\{perc_unknown\}\}/$perc_unknown_inject/g;

    $doc =~ s/\{\{meth_CpG\}\}/$meth_CpG/g;
    $doc =~ s/\{\{meth_CHG\}\}/$meth_CHG/g;
    $doc =~ s/\{\{meth_CHH\}\}/$meth_CHH/g;

    $doc =~ s/\{\{unmeth_CpG\}\}/$unmeth_CpG/g;
    $doc =~ s/\{\{unmeth_CHG\}\}/$unmeth_CHG/g;
    $doc =~ s/\{\{unmeth_CHH\}\}/$unmeth_CHH/g;

    $doc =~ s/\{\{perc_CpG\}\}/$perc_CpG/g;
    $doc =~ s/\{\{perc_CHG\}\}/$perc_CHG/g;
    $doc =~ s/\{\{perc_CHH\}\}/$perc_CHH/g;

    my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph);

    if ($perc_CpG eq 'N/A'){
      $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_CpG_graph =  $perc_CpG;
    }

    if ($perc_CHG eq 'N/A'){
      $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_CHG_graph =  $perc_CHG;
    }

    if ($perc_CHH eq 'N/A'){
      $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_CHH_graph =  $perc_CHH;
    }

    if ($perc_unknown eq 'N/A'){
      $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_unknown_graph =  $perc_unknown;
    }

    ### Context Methylation Plot
    $doc =~ s/\{\{cytosine_methylation_plotly\}\}/$perc_CpG_graph,$perc_CHG_graph,$perc_CHH_graph/g;
  
  }
  else{
    warn "Am I missing something?\n\n";
  }

  warn "Complete\n\n";
  return $doc;
}


sub read_deduplication_report{

  my ($dedup_report,$doc) = @_;

  warn "Processing deduplication report $dedup_report ...\n";
  open (DEDUP,$dedup_report) or die "Couldn't read from file $dedup_report: $!\n\n";

  my $total_seqs;
  my $dups;
  my $diff_pos;
  my $leftover;

  while (<DEDUP>){
    chomp;
    if ($_ =~ /^Total number of alignments/){
      (undef,$total_seqs) = split /\t/;
      warn "Total number of seqs >> $total_seqs <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total number duplicated/){
      (undef,$dups) = split /\t/;
      $dups =~ s/\s.*//; # just need the number, not the percentage
      warn "Duplicated >> $dups <<\n" if ($verbose);
    }
    elsif($_ =~ /^Duplicated alignments were found at/){
      (undef,$diff_pos) = split /\t/;
      $diff_pos =~ s/\s.*//; # just need the number
      warn "Different positions >> $diff_pos <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total count of deduplicated leftover sequences: (\d+)/){
      $leftover = $1;
      warn "Leftover seqs >> $leftover <<\n" if ($verbose);
    }
  }

  unless (defined $leftover){
    if (defined $dups and defined $total_seqs){
      $leftover = $total_seqs - $dups;
    }
  }

  # Checking if we got all we need
  if (defined $dups and defined $total_seqs and defined $diff_pos and defined $leftover){
    # warn "Got all I need!\n\n";
    $doc =~ s/\{\{seqs_total_duplicates\}\}/$total_seqs/g;
    $doc =~ s/\{\{unique_alignments_duplicates\}\}/$leftover/g;
    $doc =~ s/\{\{duplicate_alignments_duplicates\}\}/$dups/g;
    $doc =~ s/\{\{different_positions_duplicates\}\}/$diff_pos/g;
    
    ### Duplication Plot Plot.ly
    $doc =~ s/\{\{duplication_stats_plotly\}\}/$leftover,$dups/g;
  }
  else{
    warn "Something went wrong... Use --verbose to get a clue...\n";
    # skipping this plot entirely if values could not be extracted
    return $doc;
  }
  warn "Complete\n\n";
  return $doc;
}


sub read_nucleotide_coverage_report{
    
    my ($nuc_report,$doc) = @_;
    
    warn "Processing nucleotide coverage report '$nuc_report' ...\n";
    open (NUC,$nuc_report) or die "Couldn't read from file $nuc_report: $!\n\n";
    
    my %nucs; # storing nucleotides and frequencies
    my $linecount = 0;
    
    while (<NUC>){
	chomp;
	$_ =~ s/\r//; # removing carriage returns
	# warn "$_\n"; sleep(1);
	my ($element,$count_obs,$observed,$count_exp,$expected,$coverage) = (split /\t/);
	# warn "$element , $count_obs , $observed , $count_exp , $expected, $coverage\n"; sleep(1);
	if ($linecount == 0){ # verifying that the data appears to be a Bismark nucleotide coverage report
	    if ($observed eq 'percent sample'){
		# warn "Fine, found '$observed'\n";
	    }
	    else{
		die "Expected to find 'percent sample' as entry in line 1, column 3 but found '$observed'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n";
	    }
	    
	    if ($expected eq 'percent genomic'){
		# warn "Fine, found '$expected'\n";
	    }
	    else{
		die "Expected to find 'percent genomic' as entry in line 1, column 5 but found '$expected'. This doesn't look like a Bismark nucleotide coverage report. Please respecify!\n";
	    }
	}
	else{
	    $nucs{$element}->{obs}->{percent}  = $observed;
	    $nucs{$element}->{exp}->{percent}  = $expected;
	    $nucs{$element}->{obs}->{counts}   = $count_obs;
	    $nucs{$element}->{exp}->{counts}   = $count_exp;
	    $nucs{$element}->{obs}->{coverage} = $coverage; # coverage of that nucleotide in the sample
	    warn "Element '$element' observed: $observed\n" if $verbose;
	    warn "Element '$element' expected: $expected\n" if $verbose;
	}

	++$linecount;

    }

    # Checking if we got all we need
    my $looksOK = 1;
    foreach my $key (keys %nucs){
	unless ( (defined $nucs{$key}->{obs}) and (defined $nucs{$key}->{exp})){
	    $looksOK = 0;
	}
    }
    
    if ($looksOK){
	warn "Got all necessary information, editing HTML report ...\n" if $verbose;
	my $minmax = 0;
	my @y_array;
	my @x_genomic;
	my @x_sample;

#	foreach my $key (sort {$a cmp $b} keys %nucs){
	foreach my $key ('A','T','C','G','AC','CA','TC','CT','CC','CG','GC','GG','AG','GA','TG','GT','TT','TA','AT','AA'){
	    my $nuc_obs = $nucs{$key}->{obs}->{percent};
	    my $nuc_exp = $nucs{$key}->{exp}->{percent};
	    my $counts_obs = $nucs{$key}->{obs}->{counts};
	    my $counts_exp = $nucs{$key}->{exp}->{counts};
	    my $cov = $nucs{$key}->{obs}->{coverage};

	    # calculating log2 observed/expected
	    my $ratio = $nuc_obs/$nuc_exp;
	    #  my $logratio = sprintf ("%.2f",log($ratio)/log(2));
	    # if (abs($logratio) > $minmax){
	    # $minmax = abs($logratio);
	    # }
	    warn "$key\tnuc_${key}_obs\t$nuc_obs\tnuc_${key}_exp\t$nuc_exp\tratio: $ratio\n" if $verbose;
	    
	    $doc =~ s/\{\{nuc_${key}_p_obs\}\}/$nuc_obs/g;
	    $doc =~ s/\{\{nuc_${key}_p_exp\}\}/$nuc_exp/g;
	    $doc =~ s/\{\{nuc_${key}_counts_obs\}\}/$counts_obs/g;
	    $doc =~ s/\{\{nuc_${key}_counts_exp\}\}/$counts_exp/g;
	    $doc =~ s/\{\{nuc_${key}_coverage\}\}/$cov/g;

	    # for the plot.ly bargraph
	    push @y_array,   $key;
	    push @x_genomic, $nuc_exp;
	    push @x_sample,  $nuc_obs;
	}

	my $y_array   = join ("','",@y_array);
	my $x_sample  = join (" , ",@x_sample);
	my $x_genomic = join (" , ",@x_genomic);

	$y_array = "'".$y_array."'";

	if ($verbose){
	    print "Y-array: $y_array\n";
	    print "X genomic array: $x_genomic\n";
	    print "X sample array: $x_sample\n";
	}

	$doc =~ s/\{\{nucleo_sample_x\}\}/$x_sample/g;
	$doc =~ s/\{\{nucleo_genomic_x\}\}/$x_genomic/g;
	$doc =~ s/\{\{nucleo_sample_y\}\}/$y_array/g;
	$doc =~ s/\{\{nucleo_genomic_y\}\}/$y_array/g;
	# warn "Minimum/maxium ratio was: $minmax\n" if $verbose;
	# $doc =~ s/\{\{nuc_minmax\}\}/$minmax/g;
    }
    else{
	warn "Something went wrong, skipping this plot entirely... Use --verbose to get a clue...\n";
	# skipping this plot entirely if values could not be extracted
	return $doc;
    }

    warn "Complete\n\n";
    return $doc;
}


sub read_splitting_report{

  my ($splitting_report,$doc) = @_;

  warn "Processing splitting report $splitting_report ...\n";
  open (SPLIT,$splitting_report) or die "Couldn't read from file $splitting_report: $!\n\n";

  my $total_seqs;

  my $total_C_count;
  my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
  my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);
  my ($perc_CpG,$perc_CHG,$perc_CHH,$perc_unknown);

  while (<SPLIT>){
    chomp;

    ### Context Methylation
    if($_ =~ /^Total number of C/ ){
      (undef,$total_C_count) = split /\t/;
      print "total calls >> $total_C_count <<\n" if ($verbose);
    }

    elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
      (undef,$meth_CpG) = split /\t/;
      print "meth CpG >> $meth_CpG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
      (undef,$meth_CHG) = split /\t/;
      print "meth CHG>> $meth_CHG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
      (undef,$meth_CHH) = split /\t/;
      print "meth CHH >> $meth_CHH <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
      (undef,$meth_unknown) = split /\t/;
      print "meth Unknown >> $meth_unknown <<\n" if ($verbose);
    }

    elsif($_ =~ /^Total C to T conversions in CpG context:/ ){
      (undef,$unmeth_CpG) = split /\t/;
      print "unmeth CpG >> $unmeth_CpG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total C to T conversions in CHG context:/ ){
      (undef,$unmeth_CHG) = split /\t/;
      print "unmeth CHG >> $unmeth_CHG <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total C to T conversions in CHH context:/ ){
      (undef,$unmeth_CHH) = split /\t/;
      print "unmeth CHH >> $unmeth_CHH <<\n" if ($verbose);
    }
    elsif($_ =~ /^Total C to T conversions in Unknown context:/ ){
      (undef,$unmeth_unknown) = split /\t/;
      print "unmeth Unknown >> $unmeth_unknown <<\n" if ($verbose);
    }

    elsif($_ =~ /^C methylated in CpG context:/ ){
      (undef,$perc_CpG) = split /\t/;
      $perc_CpG =~ s/%//;
      print "percentage CpG >> $perc_CpG <<\n" if ($verbose);
    }
    elsif($_ =~ /^C methylated in CHG context:/ ){
      (undef,$perc_CHG) = split /\t/;
      $perc_CHG =~ s/%//;
      print "percentage CHG >> $perc_CHG <<\n" if ($verbose);
    }
    elsif($_ =~ /^C methylated in CHH context:/ ){
      (undef,$perc_CHH) = split /\t/;
      $perc_CHH =~ s/%//;
      print "percentage CHH >> $perc_CHH <<\n" if ($verbose);
    }
    elsif($_ =~ /^C methylated in Unknown context:/ ){
      (undef,$perc_unknown) = split /\t/;
      $perc_unknown =~ s/%//;
      print "percentage unknown >> $perc_unknown <<\n" if ($verbose);
    }
  }

  if (defined $meth_CpG and defined $meth_CHG and defined $meth_CHH and defined $unmeth_CpG and defined $unmeth_CHG and defined $unmeth_CHH){
    warn "Got all necessary information, editing HTML report ...\n" if ($verbose);

    ### Context Methylation
    $doc =~ s/\{\{total_C_count_splitting\}\}/$total_C_count/g;

    $doc =~ s/\{\{meth_CpG_splitting\}\}/$meth_CpG/g;
    $doc =~ s/\{\{meth_CHG_splitting\}\}/$meth_CHG/g;
    $doc =~ s/\{\{meth_CHH_splitting\}\}/$meth_CHH/g;

    $doc =~ s/\{\{unmeth_CpG_splitting\}\}/$unmeth_CpG/g;
    $doc =~ s/\{\{unmeth_CHG_splitting\}\}/$unmeth_CHG/g;
    $doc =~ s/\{\{unmeth_CHH_splitting\}\}/$unmeth_CHH/g;

    unless (defined $perc_CpG){
      $perc_CpG = 'N/A';
    }
    unless (defined $perc_CHG){
      $perc_CHG = 'N/A';
    }
    unless (defined $perc_CHH){
      $perc_CHH = 'N/A';
    }
    unless (defined $perc_unknown){
      $perc_unknown = 'N/A';
    }

    ### Unknown sequence context, just for Bowtie 2 alignments
    my $meth_unknown_inject;
    my $unmeth_unknown_inject;
    my $perc_unknown_inject;

    if (defined $meth_unknown){ # if one Unknown context file is present, so should the others
      $meth_unknown_inject = "     <tr>
                                <th>Methylated C's in Unknown context</th>
    				<td>$meth_unknown</td>
    			</tr>";
      $unmeth_unknown_inject = "     <tr>
                                <th>Unmethylated C's in Unknown context</th>
    				<td>$unmeth_unknown</td>
    			</tr>";
      $perc_unknown_inject = "     <tr>
                                <th>Methylated C's in Unknown context</th>
    				<td>$perc_unknown%</td>
    			</tr>";
    }
    else{
      $meth_unknown_inject = $unmeth_unknown_inject = $perc_unknown_inject = '';
    }

    ### injecting this into the table
    $doc =~ s/\{\{meth_unknown_splitting\}\}/$meth_unknown_inject/g;
    $doc =~ s/\{\{unmeth_unknown_splitting\}\}/$unmeth_unknown_inject/g;
    $doc =~ s/\{\{perc_unknown_splitting\}\}/$perc_unknown_inject/g;

    # for the graph we need to take care that there are no N/A values in the percentage fields
    my ($perc_CpG_graph, $perc_CHG_graph,$perc_CHH_graph,$perc_unknown_graph);

    if ($perc_CpG eq 'N/A'){
      $perc_CpG_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_CpG_graph =  $perc_CpG;
    }

    if ($perc_CHG eq 'N/A'){
      $perc_CHG_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_CHG_graph =  $perc_CHG;
    }

    if ($perc_CHH eq 'N/A'){
      $perc_CHH_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_CHH_graph =  $perc_CHH;
    }

    if ($perc_unknown eq 'N/A'){
      $perc_unknown_graph = 0; # values of 0 won't show in the graph and won't produce errors
    }
    else{
      $perc_unknown_graph =  $perc_unknown;
    }

    # Context Methylation post Duplication Graph
    $doc =~ s/\{\{cytosine_methylation_post_duplication_plotly\}\}/$perc_CpG_graph,$perc_CHG_graph,$perc_CHH_graph/g;

    # Table
    $doc =~ s/\{\{perc_CpG_splitting\}\}/$perc_CpG/g;
    $doc =~ s/\{\{perc_CHG_splitting\}\}/$perc_CHG/g;
    $doc =~ s/\{\{perc_CHH_splitting\}\}/$perc_CHH/g;
  }
  else{
    warn "Am I missing something? Try using --verbose to get a clue...\n\n";
  }
  warn "Complete\n\n";

  return $doc;

}


sub read_mbias_report{

  my ($mbias_report,$doc) = @_;

  warn "Processing M-bias report $mbias_report ...\n";
  open (MBIAS,$mbias_report) or die "Couldn't read from file $mbias_report: $!\n\n";

  my %mbias_1;
  my %mbias_2;

  my $context;
  my $read_identity;
  my $state = 'single'; # setting this to 'single' if there is no read 2

  while (<MBIAS>){
      chomp;
      if ($_ =~ /^(C.{2}) context/){
        $context = $1;

          if ($_ =~ /R2/){
	           $read_identity = 2;
	           $state = 'paired';
          }
          else{
	         $read_identity = 1;
          }

      # warn "new context is: $context\n";
      # warn "Read identity is: Read $read_identity\n";
      }
      if ($_ =~ /^\d/){
          my ($pos,$meth,$unmeth,$perc,$coverage) = (split /\t/);
          if ($read_identity == 1){ 
              push @{$mbias_1{$context}->{coverage_x}}, $pos;
              push @{$mbias_1{$context}->{coverage_y}}, $coverage;
              push @{$mbias_1{$context}->{perc_x}}, $pos; 
              push @{$mbias_1{$context}->{perc_y}}, $perc;  
	  }
	  elsif ($read_identity == 2){
	      push @{$mbias_2{$context}->{coverage_x}}, $pos;
	      push @{$mbias_2{$context}->{coverage_y}}, $coverage;
	      push @{$mbias_2{$context}->{perc_x}}, $pos;
	      push @{$mbias_2{$context}->{perc_y}}, $perc;
	  }
	  else{
	      warn "read identity was unknown : '$read_identity'\n\n";
	  }

      # print join (" ",$pos,$meth,$unmeth,$perc,$coverage)."\n";
    }
  }

  # Read 1 M-bias
  my $r1_CpG_coverage_x = join (',',@{$mbias_1{'CpG'}->{coverage_x}});
  my $r1_CpG_coverage_y = join (',',@{$mbias_1{'CpG'}->{coverage_y}}); 
  my $r1_CpG_perc_x     = join (',',@{$mbias_1{'CpG'}->{perc_x}});
  my $r1_CpG_perc_y     = join (',',@{$mbias_1{'CpG'}->{perc_y}});

  my $r1_CHG_coverage_x = join (',',@{$mbias_1{'CHG'}->{coverage_x}});
  my $r1_CHG_coverage_y = join (',',@{$mbias_1{'CHG'}->{coverage_y}}); 
  my $r1_CHG_perc_x     = join (',',@{$mbias_1{'CHG'}->{perc_x}});
  my $r1_CHG_perc_y     = join (',',@{$mbias_1{'CHG'}->{perc_y}});

  my $r1_CHH_coverage_x = join (',',@{$mbias_1{'CHH'}->{coverage_x}});
  my $r1_CHH_coverage_y = join (',',@{$mbias_1{'CHH'}->{coverage_y}}); 
  my $r1_CHH_perc_x     = join (',',@{$mbias_1{'CHH'}->{perc_x}});
  my $r1_CHH_perc_y     = join (',',@{$mbias_1{'CHH'}->{perc_y}});

  # warn "R1 CpG coverage:\n$r1_CpG_coverage_x\n$r1_CpG_coverage_y\nR1 CpG methylation:\n$r1_CpG_perc_x\n$r1_CpG_perc_y\n\n";
  # warn "R1 CHG coverage:\n$r1_CHG_coverage_x\n$r1_CHG_coverage_y\nR1 CHG methylation:\n$r1_CHG_perc_x\n$r1_CHG_perc_y\n\n";
  # warn "R1 CHH coverage:\n$r1_CHH_coverage_x\n$r1_CHH_coverage_y\nR1 CHH methylation:\n$r1_CHH_perc_x\n$r1_CHH_perc_y\n\n";
  
  # CpG
  $doc =~ s/\{\{mbias1_CpG_meth_x\}\}/$r1_CpG_perc_x/g;
  $doc =~ s/\{\{mbias1_CpG_meth_y\}\}/$r1_CpG_perc_y/g; 
  $doc =~ s/\{\{mbias1_CpG_coverage_x\}\}/$r1_CpG_coverage_x/g;
  $doc =~ s/\{\{mbias1_CpG_coverage_y\}\}/$r1_CpG_coverage_y/g; 
  # CHG 
  $doc =~ s/\{\{mbias1_CHG_meth_x\}\}/$r1_CHG_perc_x/g;
  $doc =~ s/\{\{mbias1_CHG_meth_y\}\}/$r1_CHG_perc_y/g; 
  $doc =~ s/\{\{mbias1_CHG_coverage_x\}\}/$r1_CHG_coverage_x/g;
  $doc =~ s/\{\{mbias1_CHG_coverage_y\}\}/$r1_CHG_coverage_y/g; 
  # CHH
  $doc =~ s/\{\{mbias1_CHH_meth_x\}\}/$r1_CHH_perc_x/g;
  $doc =~ s/\{\{mbias1_CHH_meth_y\}\}/$r1_CHH_perc_y/g; 
  $doc =~ s/\{\{mbias1_CHH_coverage_x\}\}/$r1_CHH_coverage_x/g;
  $doc =~ s/\{\{mbias1_CHH_coverage_y\}\}/$r1_CHH_coverage_y/g; 

  # Read 2 M-bias
  if (%mbias_2){
    my $r2_CpG_coverage_x = join (',',@{$mbias_2{'CpG'}->{coverage_x}});
    my $r2_CpG_coverage_y = join (',',@{$mbias_2{'CpG'}->{coverage_y}}); 
    my $r2_CpG_perc_x     = join (',',@{$mbias_2{'CpG'}->{perc_x}});
    my $r2_CpG_perc_y     = join (',',@{$mbias_2{'CpG'}->{perc_y}});

    my $r2_CHG_coverage_x = join (',',@{$mbias_2{'CHG'}->{coverage_x}});
    my $r2_CHG_coverage_y = join (',',@{$mbias_2{'CHG'}->{coverage_y}}); 
    my $r2_CHG_perc_x     = join (',',@{$mbias_2{'CHG'}->{perc_x}});
    my $r2_CHG_perc_y     = join (',',@{$mbias_2{'CHG'}->{perc_y}});

    my $r2_CHH_coverage_x = join (',',@{$mbias_2{'CHH'}->{coverage_x}});
    my $r2_CHH_coverage_y = join (',',@{$mbias_2{'CHH'}->{coverage_y}}); 
    my $r2_CHH_perc_x     = join (',',@{$mbias_2{'CHH'}->{perc_x}});
    my $r2_CHH_perc_y     = join (',',@{$mbias_2{'CHH'}->{perc_y}});

    # warn "r2 CpG coverage:\n$r2_CpG_coverage_x\n$r2_CpG_coverage_y\nr2 CpG methylation:\n$r2_CpG_perc_x\n$r2_CpG_perc_y\n\n";
    # warn "r2 CHG coverage:\n$r2_CHG_coverage_x\n$r2_CHG_coverage_y\nr2 CHG methylation:\n$r2_CHG_perc_x\n$r2_CHG_perc_y\n\n";
    # warn "r2 CHH coverage:\n$r2_CHH_coverage_x\n$r2_CHH_coverage_y\nr2 CHH methylation:\n$r2_CHH_perc_x\n$r2_CHH_perc_y\n\n";
    
    # CpG
    $doc =~ s/\{\{mbias2_CpG_meth_x\}\}/$r2_CpG_perc_x/g;
    $doc =~ s/\{\{mbias2_CpG_meth_y\}\}/$r2_CpG_perc_y/g; 
    $doc =~ s/\{\{mbias2_CpG_coverage_x\}\}/$r2_CpG_coverage_x/g;
    $doc =~ s/\{\{mbias2_CpG_coverage_y\}\}/$r2_CpG_coverage_y/g; 
    # CHG 
    $doc =~ s/\{\{mbias2_CHG_meth_x\}\}/$r2_CHG_perc_x/g;
    $doc =~ s/\{\{mbias2_CHG_meth_y\}\}/$r2_CHG_perc_y/g; 
    $doc =~ s/\{\{mbias2_CHG_coverage_x\}\}/$r2_CHG_coverage_x/g;
    $doc =~ s/\{\{mbias2_CHG_coverage_y\}\}/$r2_CHG_coverage_y/g; 
    # CHH
    $doc =~ s/\{\{mbias2_CHH_meth_x\}\}/$r2_CHH_perc_x/g;
    $doc =~ s/\{\{mbias2_CHH_meth_y\}\}/$r2_CHH_perc_y/g; 
    $doc =~ s/\{\{mbias2_CHH_coverage_x\}\}/$r2_CHH_coverage_x/g;
    $doc =~ s/\{\{mbias2_CHH_coverage_y\}\}/$r2_CHH_coverage_y/g; 


  }
  else {
      $doc =~ s/\{\{bm_mbias_2\}\}/false/g;
  }
  warn "Complete\n\n";

  return ($state,$doc);

}


sub read_report_template{
    my $template = shift;
    my $doc;
    warn "Attempting to open file from: $RealBin/$template\n\n" if ($verbose);
    open (DOC,"$RealBin/plotly/$template") or die "Failed to find file $template: $!";
    while(<DOC>){
	chomp;
	$_ =~ s/\r//g;
	$doc .= $_."\n";
    }
    
    close DOC or warn $!;
    return $doc;
}


sub process_commandline{
	my $help;
	my $output_dir;
	my $manual_output_file;
	my $alignment_report;
	my $dedup_report;
	my $splitting_report;
	my $mbias_report;
	my $nucleotide_coverage_report;  # stores nucleotide coverage statistics
	my $verbose;

	my $version;
	
	my $command_line = GetOptions ('help|man'              => \$help,
				 'dir=s'                 => \$output_dir,
				 'o|output=s'            => \$manual_output_file,
				 'alignment_report=s'    => \$alignment_report,
				 'dedup_report=s'        => \$dedup_report,
				 'splitting_report=s'    => \$splitting_report,
				 'mbias_report=s'        => \$mbias_report,
				 'nucleotide_report=s'   => \$nucleotide_coverage_report,
				 'version'               => \$version,
				 'verbose'               => \$verbose,
				);

	### EXIT ON ERROR if there were errors with any of the supplied options
	unless ($command_line){
		die "Please respecify command line options\n";
	}

	### HELPFILE
	if ($help){
		print_helpfile();
		exit;
	}

	if ($version){
		print << "VERSION";


                              Bismark HTML Report Module

                         bismark2report version: $bismark2report_version
                     Copyright 2010-22 Felix Krueger, Altos Bioinformatics
                            https://github.com/FelixKrueger/Bismark
	
VERSION
		exit;
	}

	### OUTPUT DIR PATH
	if (defined $output_dir){
		unless ($output_dir eq ''){ # if the output dir has been passed on by the methylation extractor and is an empty string we don't want to change it
			unless ($output_dir =~ /\/$/){
				$output_dir =~ s/$/\//;
			}
		}
	}
	else{
		$output_dir = '';
	}	


	## First we are looking for alignment reports, and then look whether there are any optional plots with the same base name

	if ($alignment_report){
		### we only process the one alignment report (and possibly the other ones as well) that was specified
		push @alignment_reports, $alignment_report;
	}
	else{ ### no alignment report specified, looking in the current directory for files ending in *E_report.txt (SE or PE)

		### looking in the current directory for report files. Less than 1 report file is not allowed
		@alignment_reports = <*E_report.txt>;

		if (scalar @alignment_reports == 0){
			warn "Found no potential alignment reports in the current directory. Please specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n";
			print_helpfile();
			exit;
		}
		else{
			# there are Bismark alignment report(s) in the directory
			warn "Found ",scalar @alignment_reports," alignment reports in current directory. Now trying to figure out whether there are corresponding optional reports\n";
		}
	}

	### Ensuring that there isn't more than 1 file in @alignment_reports if someone manually specified an output file.
	if (scalar @alignment_reports > 1){
		if (defined $manual_output_file){
			die "You cannot run bismark2report on more than 1 file while specifying a single output file. Either lose the option -o to derive the output filenames automatically, or specify a single Bismark alignment report file using the option '--alignment_report FILE'\n\n";
		}
	}

	foreach my $aln (@alignment_reports){

		# warn "Figuring things out for:\n$aln\n";
		$aln =~ /^(.+)_(P|S)E_report.txt$/;
		my $basename = $1;
		# warn "using this basename:\n$basename\n\n";

		### DEDUPLICATION REPORT (optional)
		if ($dedup_report){
			warn "User specifified dedup report: $dedup_report\n";sleep(1);
			if (lc$dedup_report eq 'none'){
				push @dedup_reports, ''; # user may wish to skip this step by specifying 'none'
			}
			else{
				push @dedup_reports, $dedup_report;
			}
		}
		else{
			### looking in the current directory for a report file with the same base name
			my @dedup_report_files = <$basename*deduplication_report.txt>;
			# warn "Number of deduplication reports found in current directory for basename $1: ", scalar @dedup_report_files , "\n";

			if (scalar @dedup_report_files > 1){
				die "Found ", scalar @dedup_report_files ," potential deduplication reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--dedup_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
			}
			elsif (scalar @dedup_report_files == 0){
				push @dedup_reports, ''; # no corresponding deduplication report found
			}
			else{
				# there is only a single deduplication report in the current directory, using this one
				$dedup_report = shift @dedup_report_files;
				push @dedup_reports, $dedup_report;
				# warn "Going to use this dedup report: $dedup_report\n\n";
			}
		}

		### NUCLEOTIDE COVERAGE REPORT (optional)
		if (defined $nucleotide_coverage_report){
			# warn "User specified nucleotide coverage report: $nucleotide_coverage_report\n";sleep(1);
			if (lc$nucleotide_coverage_report eq 'none'){
				push @nuc_reports, ''; # user may wish to skip this step by specifying 'none'
			}
			else{
				push @nuc_reports, $nucleotide_coverage_report;
			}	
		}
		else{
			### looking in the current directory for a report file with the same base name
			my @nucleotide_coverage_report_files = <$basename*nucleotide_stats.txt>;
			# warn "Number of nucleotide coverage statistic reports found in current directory for basename $1: ", scalar @nucleotide_coverage_report_files , "\n";
	
			if (scalar @nucleotide_coverage_report_files > 1){
				die "Found ", scalar @nucleotide_coverage_report_files ," potential nucleotide coverage reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--nucleotide_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
			}
			elsif (scalar @nucleotide_coverage_report_files == 0){
				# warn "Found no corresponding nucleotide coverage file\n";
				push @nuc_reports, ''; # no corresponding nucleotide coverge file report found
			}
			else{
				# there is only a single nucleotide coverage report in the current directory, using this one
				$nucleotide_coverage_report = shift @nucleotide_coverage_report_files;
				push @nuc_reports, $nucleotide_coverage_report;
				# warn "Going to use this nucleotide coverage report: $nucleotide_coverage_report\n\n"; sleep(3);
			}
		}

    
		### METHYLATION EXTRACTOR SPLITTING REPORT
		if ($splitting_report){
			if (lc$splitting_report eq 'none'){
				push @splitting_reports, ''; # user may wish to skip this step by specifying 'none'
			}
			else{
				push @splitting_reports, $splitting_report;
			}
		}
		else{
			### looking in the current directory for a report file with the same basename
			my @splitting_report_files = <$basename*splitting_report.txt>;
			# warn "Number of splitting reports found in current directory: ", scalar @splitting_report_files , "\n";

			if (scalar @splitting_report_files > 1){
				die "Found ", scalar @splitting_report_files ," potential methylation extractor splitting reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--splitting_report FILE' or otherwise provide filenames that are easier to figure out...\n\n";
			}
			elsif (scalar @splitting_report_files == 0){
				push @splitting_reports, ''; # no corresponding methylation extractor report found
			}
			else{
				# there is only a single splitting report in the current directory, using this one
				$splitting_report = shift @splitting_report_files;
				push  @splitting_reports, $splitting_report;
			}
		}

		### M-BIAS REPORT
		if ($mbias_report){
			if (lc$mbias_report eq 'none'){
				$mbias_report = ''; # user may wish to skip this step by specifying 'none'
				push @mbias_reports, $mbias_report;
			}
			else{
				push @mbias_reports, $mbias_report;
			}
		}
		else{
			### looking in the current directory for a single report file. More (or less) than 1 report file is not allowed
			my @mbias_report_files = <$basename*M-bias.txt>;
	
			# warn "Number of M-bias reports found in current directory: ", scalar @mbias_report_files , "\n";
	
			if (scalar @mbias_report_files > 1){
				die "Found ", scalar @mbias_report_files ," potential M-bias reports with the same basename ($basename) in the current directory. Please specify a single report using the option '--mbias_report FILE'or otherwise provide filenames that are easier to figure out automatically ...\n\n";
			}
			elsif (scalar @mbias_report_files == 0){
				push @mbias_reports, '';
			}
			else{
				# there is only a single M-bias report in the current directory, using this one
				$mbias_report = shift @mbias_report_files;
				push @mbias_reports, $mbias_report;
			}
		}
		$dedup_report = $splitting_report = $mbias_report = $nucleotide_coverage_report = undef;
	}

	return ($output_dir,$verbose,$manual_output_file);

}

sub print_helpfile{
  print <<EOF

  SYNOPSIS:

  This script uses a Bismark alignment report to generate a graphical HTML report page. Optionally, further reports of
  the Bismark suite such as deduplication, methylation extractor splitting or M-bias reports can be specified as well. If several
  Bismark reports are found in the same folder, a separate report will be generated for each of these, whereby the output filename
  will be derived from the Bismark alignment report file. bismark2report attempts to find optional reports automatically based
  on the file basename.


  USAGE: bismark2report [options]


-o/--output <filename>     Name of the output file (optional). If not specified explicitly, the output filename will be derived
                           from the Bismark alignment report file. Specifying an output filename only works if the HTML report is
                           to be generated for a single Bismark alignment report (and potentially additional reports).

--dir                      Output directory. Output is written to the current directory if not specified explicitly.


--alignment_report FILE    If not specified explicitly, bismark2report attempts to find Bismark report file(s) in the current
                           directory and produces a separate HTML report for each mapping report file. Based on the basename of
                           the Bismark mapping report, bismark2report will also attempt to find the other Bismark reports (see below)
                           for inclusion into the HTML report. Specifying a Bismark alignment report file is mandatory.

--dedup_report FILE        If not specified explicitly, bismark2report attempts to find a deduplication report file with the same
                           basename as the Bismark mapping report (generated by deduplicate_bismark) in the
                           current working directory. Including a deduplication report is optional, and using the FILE 'none'
                           will skip this step entirely.

--splitting_report FILE    If not specified explicitly, bismark2report attempts to find a splitting report file with the same
                           basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current
                           working directory. Including a splitting report is optional, and using the FILE 'none' will skip this
                           step entirely.

--mbias_report FILE        If not specified explicitly, bismark2report attempts to find a single M-bias report file with the same
                           basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current
                           working directory. Including an M-Bias report is optional, and using the FILE 'none' will skip this step
                           entirely.

--nucleotide_report FILE   If not specified explicitly, bismark2report attempts to find a single nucleotide coverage report file
                           with the same basename as the Bismark mapping report (generated by Bismark with the option
                           '--nucleotide_coverage') in the current working directory. Including a nucleotide coverage statistics
                           report is optional, and using the FILE 'none' will skip this report entirely.

                           Script last modified: 13 Sep 2021

EOF
    ;
  exit 1;
}