bismark

#!/usr/bin/env perl
use strict;
use warnings;
use IO::Handle;
use Cwd;
$|++;
use Getopt::Long;
use FindBin qw($RealBin);
use lib "$RealBin/../lib";

## This program is Copyright (C) 2010-23, Felix Krueger (fkrueger@altoslabs.com)

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.

my $parent_dir = getcwd();

my $bismark_version = 'v0.24.2';
my $copyright_dates = "2010-23";

my $start_run = time();
my $command_line = join (" ",@ARGV);


### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the '.' in the option name will cause Getopt::Long to fail
foreach my $arg (@ARGV){
	if ($arg eq '--solexa1.3-quals'){
		$arg = '--phred64-quals';
	}
}
my @filenames;   # will be populated by processing the command line

my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$path_to_hisat2,$path_to_minimap2,$sequence_file_format,$aligner_options,
$directional,$unmapped,$ambiguous,$phred64,$output_dir,$bowtie2,$hisat2,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,
$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag,$basename,$score_min_intercept,
$score_min_slope,$bt2_large_index,$multicore,$rg_tag,$rg_id,$rg_sample,$ambig_bam,$cram,$cram_ref,$nucleotide_coverage,$dovetail,
$aligner_version,$slam,$icpc,$local,$strandID,$mm2,$maximum_length_cutoff) = process_command_line();


my @fhs;         # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment
my %chromosomes; # stores the chromosome sequences of the mouse genome
my %SQ_order;    # stores the order of sequences in the reference. This is to produce SAM/BAM files with a known order of chromosomes
my %counting;    # counting various events
my $final_output_filename; # required for the nucleotide coverage report
my @pids; # storing the process IDs of child processes in parallel mode


my $seqID_contains_tabs;
my $verbose = 0;

if ($multicore > 1){
	warn "Running Bismark Parallel version. Number of parallel instances to be spawned: $multicore\n\n";
}


sub multi_process_handling{

	my $offset = 1;
	my $process_id;
	if ($multicore > 1){

		until ($offset == $multicore){
		# warn "multicore: $multicore\noffset: $offset\n";
			my $fork = fork;

			if (defined $fork){
				if ($fork != 0){
					$process_id = $fork;
					push @pids, $process_id;
					if ($offset < $multicore){
						++$offset;
					# warn "I am the parent process, child pid: $fork\nIncrementing offset counter to: $offset\n\n";
					}
					else{
						# warn "Reached the number of maximum multicores. Proceeeding to processing...\n";
					}
				}
				elsif ($fork == 0){
					# warn "I am a child process, pid: $fork\nOffset counter is: $offset\nProceeding to processing...\n";
					$process_id = $fork;
					last;
				}
			}
			else{
				die "[FATAL ERROR]: Forking unsuccessful. This normally means that something is fundamentally not working with the fork command. Please run again without the --parallel option, or ask your system admin to look into this.\n";
			}	
		}

		# warn "\nThe Thread Identity\n===================\n";
		if ($process_id){
			# print "I am the parent process. My children are called:\n";
			# print join ("\t",@pids),"\n";
			# print "I am going to process the following line count: $offset\n\n";
		}
		elsif($process_id == 0){
			# warn "I am a child process: Process ID: $process_id\n";
			# warn "I am going to process the following line count: $offset\n\n";
		}
		else{
			die "Process ID was: '$process_id'\n";
		}
	}
	else{
		warn "Single-core mode: setting pid to 1\n";
		$process_id = 1;
	}

	return ($process_id,$offset);
	
}


sub subset_input_file_FastQ{

	my ($filename,$process_id,$offset) = @_;

	if ($filename =~ /gz$/){
		open (OFFSET,"gunzip -c $filename |") or die "Couldn't read from file '$filename': $!\n";
	}
	else{
		open (OFFSET,$filename) or die "Couldn't read from file '$filename': $!\n";
	}

	# warn "offset is $offset\n";
	my $temp = $filename;
	$temp .= ".temp.$offset";
	$temp =~ s/^.*\///; # replacing everything upto and including the last /, i.e. removing file path information

	if ($gzip){
		$temp .= '.gz';
		open (TEMPFQ,"| gzip -c - > ${temp_dir}${temp}") or die "Can't write to file ${temp_dir}${temp}: $!\n";
	}
	else{
		open (TEMPFQ,'>',"${temp_dir}${temp}") or die "Failed to write output ${temp_dir}${temp}: $!\n";
	}

	my $line_count = 0;
	my $seqs_processed = 0;
	if (defined $upto){
		# warn "Before we begin: -u was set to: -u $upto\n";
	}

	while (1){
		my $l1 = <OFFSET>;
		my $l2 = <OFFSET>;
		my $l3 = <OFFSET>;
		my $l4 = <OFFSET>;

		last unless ($l4);
		++$line_count;
		
		# If user only want to process a subset of the input file
		if (defined $upto){
			if ($seqs_processed == $upto){
				last;
			}
		}

		if ( ($line_count - $offset)%$multicore == 0){
			# warn "line count: $line_count\noffset: $offset\n";
			# warn "Modulus: ",($line_count - $offset)%$multicore,"\n";
			# warn "processing this line $line_count (processID: $process_id with \$offset $offset)\n";
			print TEMPFQ "$l1$l2$l3$l4";
			$seqs_processed++;
		}
		else{
			# warn "skipping line $line_count for processID: $process_id with \$offset $offset)\n";
			next;
		}
	}

	close OFFSET; # or warn $!;
	close TEMPFQ or warn "Failed to close file handle TEMPFQ: $!\n";

	warn "Finished subdividing $filename for PID: $process_id and offset $offset (sequences written out: $seqs_processed)\n\n";

	return ($temp); # returning the subset filename

}

sub subset_input_file_FastA{

	my ($filename,$process_id,$offset) = @_;

	if ($filename =~ /gz$/){
		open (OFFSET,"gunzip -c $filename |") or die "Couldn't read from file '$filename': $!\n";
	}
	else{
		open (OFFSET,$filename) or die "Couldn't read from file '$filename': $!\n";
	}

	# warn "offset is $offset\n";
	my $temp = $filename;
	$temp .= ".temp.$offset";
	$temp =~ s/^.*\///; # replacing everything upto and including the last /, i.e. removing file path information

	if ($gzip){
		$temp .= '.gz';
		open (TEMPFA,"| gzip -c - > ${temp_dir}${temp}") or die "Can't write to file ${temp_dir}${temp}: $!\n";
	}
	else{
		open (TEMPFA,'>',"${temp_dir}${temp}") or die "Failed to write output ${temp_dir}${temp}: $!\n";
	}

	warn "Writing temporary infile to $temp\n";

	my $line_count = 0;
	my $seqs_processed = 0;

	while (1){
		my $l1 = <OFFSET>;
		my $l2 = <OFFSET>;

		last unless ($l2);
		++$line_count;

		# If user only want to process a subset of the input file
		if (defined $upto){
			if ($seqs_processed == $upto){
				last;
			}
		}

		if ( ($line_count - $offset)%$multicore == 0){
			# warn "line count: $line_count\noffset: $offset\n";
			# warn "Modulus: ",($line_count - $offset)%$multicore,"\n";
			# warn "processing this line $line_count (processID: $process_id with \$offset $offset)\n";
			print TEMPFA "$l1$l2";
			$seqs_processed++;
		}
		else{
			# warn "skipping line $line_count for processID: $process_id with \$offset $offset)\n";
			next;
		}
	}

	close OFFSET or warn $!;
	close TEMPFA or warn "Failed to close file handle TEMPFQ: $!\n";

	warn "Finished subdividing $filename for PID: $process_id and offset $offset (sequences processed: $seqs_processed)\n\n";

	return ($temp); # returning the subset filename

}

####
####


foreach my $filename (@filenames){

	my $original_filename = $filename;
	my $original_filename_1;
	my $original_filename_2;

	chdir $parent_dir or die "Unable to move to initial working directory'$parent_dir' $!\n";
	### resetting the counting hash and fhs
	reset_counters_and_fhs($filename);
	@pids = ();
	$seqID_contains_tabs = 0;

	### if 2 or more files are provided we can hold the genome in memory and don't need to read it in a second time
	unless (%chromosomes){
		my $cwd = getcwd(); # storing the path of the current working directory
		warn "Current working directory is: $cwd\n\n";
		read_genome_into_memory($cwd);
	}

	### As of version 0.14.0 we support multi-threading. In a first instance we accomplish this by
	### splitting the input file(s) into several smaller subfiles and merging the results back at
	### the end.

	# get general settings (also for single-threaded use)
	my ($pid,$offset) = multi_process_handling ();
	my ($single_end,$paired_end);

	### PAIRED-END ALIGNMENTS
	if ($filename =~ ','){

		$single_end = 0;
		$paired_end = 1;

		my ($C_to_T_infile_1,$G_to_A_infile_1); # to be made from mate1 file

		$fhs[0]->{name} = 'CTread1GAread2CTgenome';
		$fhs[1]->{name} = 'GAread1CTread2GAgenome';
		$fhs[2]->{name} = 'GAread1CTread2CTgenome';
		$fhs[3]->{name} = 'CTread1GAread2GAgenome';
		warn "\nPaired-end alignments will be performed\n",'='x39,"\n\n";

		my ($filename_1,$filename_2) = (split (/,/,$filename));
		$original_filename_1 = $filename_1;
		$original_filename_2 = $filename_2;

		warn "The provided filenames for paired-end alignments are $filename_1 and $filename_2\n";

		### subsetting the input file(s)
		unless ($multicore == 1){ # not needed in single-core mode
			# warn "My PID: $pid\nMy offset: $offset\n";
			if ($sequence_file_format eq 'FASTA'){
				my $temp_filename_1 = subset_input_file_FastA($filename_1,$pid,$offset);
				warn "Using the subset file >${temp_dir}$temp_filename_1< as new in-file 1 (instead of >$filename_1<)\n";
				$filename_1 = "${temp_dir}$temp_filename_1";

				my $temp_filename_2 = subset_input_file_FastA($filename_2,$pid,$offset);
				warn "Using the subset file >${temp_dir}$temp_filename_2< as new in-file 2 (instead of >$filename_2<)\n";
				$filename_2 = "${temp_dir}$temp_filename_2";
			}	
			else{ # FastQ format, default
				my $temp_filename_1 = subset_input_file_FastQ($filename_1,$pid,$offset);
				warn "Using the subset file >${temp_dir}$temp_filename_1< as new in-file 1 (instead of >$filename_1<)\n";
				$filename_1 = "${temp_dir}$temp_filename_1";

				my $temp_filename_2 = subset_input_file_FastQ($filename_2,$pid,$offset);
				warn "Using the subset file >${temp_dir}$temp_filename_2< as new in-file 2 (instead of >$filename_2<)\n";
				$filename_2 = "${temp_dir}$temp_filename_2";
			}	
		}

		### additional variables only for paired-end alignments
		my ($C_to_T_infile_2,$G_to_A_infile_2); # to be made from mate2 file

		my $read1_count; # to see if R1 and R2 have the same length
		my $read2_count;
		
		### FastA format
		if ($sequence_file_format eq 'FASTA'){
			warn "Input files are in FastA format\n";

			if ($directional){
				($C_to_T_infile_1,$read1_count) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
				($G_to_A_infile_2,$read2_count) = biTransformFastAFiles_paired_end ($filename_2,2);

				$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
				$fhs[1]->{inputfile_1} = undef;
				$fhs[1]->{inputfile_2} = undef;
				$fhs[2]->{inputfile_1} = undef;
				$fhs[2]->{inputfile_2} = undef;
				$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
			}
			elsif($pbat){ # PBAT-Seq
				($G_to_A_infile_1,$read1_count) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
				($C_to_T_infile_2,$read2_count) = biTransformFastAFiles_paired_end ($filename_2,2);

				$fhs[0]->{inputfile_1} = undef;
				$fhs[0]->{inputfile_2} = undef;
				$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[3]->{inputfile_1} = undef;
				$fhs[3]->{inputfile_2} = undef;
			}
			else{
				($C_to_T_infile_1,$G_to_A_infile_1,$read1_count) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
				($C_to_T_infile_2,$G_to_A_infile_2,$read2_count) = biTransformFastAFiles_paired_end ($filename_2,2);

				$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
				$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
			}

			unless ($read1_count eq $read2_count){
				die "[FATAL ERROR]:\tNumber of bisulfite transformed reads are not equal between Read 1 (\#$read1_count) and Read 2 (\#$read2_count).\nPossible causes: file truncation, or as a result of specifying read pairs that do not belong to each other?! Please re-specify file names! Exiting...\n\n";
			}

			if ($bowtie2){
				paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
			}
			else{ # HISAT2
				paired_end_align_fragments_to_bisulfite_genome_fastA_hisat2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
			}
		}
		### FastQ format
		else{
			warn "Input files are in FastQ format\n";
			if ($directional){
			
				if ($slam){
					($C_to_T_infile_1,$read1_count) = biTransformFastQFiles_paired_end_slam ($filename_1,1); # also passing the read number
					($G_to_A_infile_2,$read2_count) = biTransformFastQFiles_paired_end_slam ($filename_2,2);
				}
				else{
					($C_to_T_infile_1,$read1_count) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
					($G_to_A_infile_2,$read2_count) = biTransformFastQFiles_paired_end ($filename_2,2);
				}
				
				$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
				$fhs[1]->{inputfile_1} = undef;
				$fhs[1]->{inputfile_2} = undef;
				$fhs[2]->{inputfile_1} = undef;
				$fhs[2]->{inputfile_2} = undef;
				$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
			}
			elsif($pbat){ # PBAT-Seq
				### At the moment we are only performing alignments only with uncompressed FastQ files
				if ($slam){
					($G_to_A_infile_1,$read1_count) = biTransformFastQFiles_paired_end_slam ($filename_1,1); # also passing the read number
					($C_to_T_infile_2,$read2_count) = biTransformFastQFiles_paired_end_slam ($filename_2,2);
				}	
				else{
					($G_to_A_infile_1,$read1_count) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
					($C_to_T_infile_2,$read2_count) = biTransformFastQFiles_paired_end ($filename_2,2);
				}

				$fhs[0]->{inputfile_1} = undef;
				$fhs[0]->{inputfile_2} = undef;
				$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[3]->{inputfile_1} = undef;
				$fhs[3]->{inputfile_2} = undef;
			}
			else{ # non-directional
				if ($slam){
					($C_to_T_infile_1,$G_to_A_infile_1,$read1_count) = biTransformFastQFiles_paired_end_slam ($filename_1,1); # also passing the read number
					($C_to_T_infile_2,$G_to_A_infile_2,$read2_count) = biTransformFastQFiles_paired_end_slam ($filename_2,2);
				}	
				else{
					($C_to_T_infile_1,$G_to_A_infile_1,$read1_count) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
					($C_to_T_infile_2,$G_to_A_infile_2,$read2_count) = biTransformFastQFiles_paired_end ($filename_2,2);
				}

				$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
				$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
				$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
				$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
				$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
			}
			
			unless ($read1_count eq $read2_count){
					die "[FATAL ERROR]:\tNumber of bisulfite transformed reads are not equal between Read 1 (\#$read1_count) and Read 2 (\#$read2_count).\nPossible causes: file truncation, or as a result of specifying read pairs that do not belong to each other?! Please re-specify file names! Exiting...\n\n";
			}
						
			if ($bowtie2){
				paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
			}
			elsif ($mm2){
				paired_end_align_fragments_to_bisulfite_genome_fastQ_minimap2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
			}
			else{ #
				paired_end_align_fragments_to_bisulfite_genome_fastQ_hisat2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
			}
		}
		start_methylation_call_procedure_paired_ends($filename_1,$filename_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid);
	}
	### Else we are performing SINGLE-END ALIGNMENTS
	else{
		warn "\nSingle-end alignments will be performed\n",'='x39,"\n\n";

		$single_end = 1;
		$paired_end = 0;

		### subsetting the input file(s)
		unless ($multicore == 1){ # not needed in single-core mode
			# warn "My PID: $pid\nMy offset: $offset\n";
			if ($sequence_file_format eq 'FASTA'){
				my $temp_filename = subset_input_file_FastA($filename,$pid,$offset);
				warn "Using the subset file >${temp_dir}$temp_filename< as new in-file (instead of >$filename<)\n";
				$filename = "${temp_dir}$temp_filename";
			}
			else{ # FastQ format, default
				my $temp_filename = subset_input_file_FastQ($filename,$pid,$offset);
				warn "Using the subset file >${temp_dir}$temp_filename< as new in-file (instead of >$filename<)\n";
				$filename = "${temp_dir}$temp_filename";
			}
		}

		### Initialising bisulfite conversion filenames
		my ($C_to_T_infile,$G_to_A_infile);

		### FastA format
		if ($sequence_file_format eq 'FASTA'){
			warn "Input file is in FastA format\n";
			if ($directional){
				($C_to_T_infile) = biTransformFastAFiles ($filename);
				$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
			}
			else{
				($C_to_T_infile,$G_to_A_infile) = biTransformFastAFiles ($filename);
				$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
				$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
			}

			### Creating 4 different bowtie filehandles and storing the first entry
			if ($bowtie2){
				single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 ($C_to_T_infile,$G_to_A_infile);
			}
			else{
				single_end_align_fragments_to_bisulfite_genome_fastA_hisat2 ($C_to_T_infile,$G_to_A_infile);
			}
		}
		## FastQ format
		else{
			warn "Input file is in FastQ format\n";
			if ($directional){
				if ($slam){
					($C_to_T_infile) = biTransformFastQFiles_slam ($filename);
				}
				else{
					($C_to_T_infile) = biTransformFastQFiles ($filename);
				}
				$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
			}	
			elsif($pbat){
				if ($slam){
					($G_to_A_infile) = biTransformFastQFiles_slam ($filename);
				}
				else{
					($G_to_A_infile) = biTransformFastQFiles ($filename);
				}
				$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $G_to_A_infile; # PBAT-Seq only uses the G to A converted files
			}
			else{
				if ($slam){
					($C_to_T_infile,$G_to_A_infile) = biTransformFastQFiles_slam ($filename);
				}
				else{
					($C_to_T_infile,$G_to_A_infile) = biTransformFastQFiles ($filename);
				}
				$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
				$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
			}

			### Creating up to 4 different filehandles and storing the first entry
			if ($pbat){
				if ($bowtie2){ # as of version 0.10.2 we also support PBAT alignments for Bowtie 2
					single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 (undef,$G_to_A_infile);
				}
				elsif($mm2){
					single_end_align_fragments_to_bisulfite_genome_fastQ_minimap2 (undef,$G_to_A_infile);
				}
				else{ # HISAT2
					single_end_align_fragments_to_bisulfite_genome_fastQ_hisat2 (undef,$G_to_A_infile);
				}
			}
			elsif ($bowtie2){
				single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile,$G_to_A_infile);
			}
			elsif($mm2){
				# warn "Now kicking off the single-end Minimap2 alignments\n\n"; sleep(3);
				single_end_align_fragments_to_bisulfite_genome_fastQ_minimap2 ($C_to_T_infile,$G_to_A_infile);
			}
			else{ # HISAT2
				single_end_align_fragments_to_bisulfite_genome_fastQ_hisat2 ($C_to_T_infile,$G_to_A_infile);
			}
		}

		start_methylation_call_procedure_single_ends($filename,$C_to_T_infile,$G_to_A_infile,$pid);

	}

	### MERGING AND DELETING TEMP FILES // TIDYING UP AFTER A MULTICORE PROCESS

	if ($pid){ # only performing this for the parent process

		if ($multicore > 1){

			warn "Now waiting for all child processes to complete\n";

			### we need to ensure that we wait for all child processes to be finished before continuing
			# warn "here are the child IDs: @pids\n";
			# warn "Looping through the child process IDs:\n";

			my $all_children_succeeded = 1;
			foreach my $id (@pids){
				# print "$id\t";
				my $kid = waitpid ($id,0);
				# print "Returned: $kid\nExit status: $?\n";
				unless ($? == 0){
					$all_children_succeeded = 0;
					warn "\nChild process terminated with exit signal: '$?'\n\n";
				}
			}
			if ($all_children_succeeded) {
				print "All child process successfully finished.";
			}
			else {
				die "\nTerminating. Not all child processes successfully finished.";
			}

			# regenerating names for temporary files
			my @temp_input;
			my @temp_output;
			my @temp_reports;
			my @temp_unmapped_1;  # will store single end reads or R1 of paired-end
			my @temp_unmapped_2;
			my @temp_ambiguous_1; # will store single end reads or R1 of paired-end
			my @temp_ambiguous_2;
			my @temp_ambig_bam;

			for (1..$offset){

				# Temp Input Files
				if ($single_end){
					if ($gzip){
						push @temp_input, "${original_filename}.temp.${_}.gz";
					}
					else{
						push @temp_input, "${original_filename}.temp.${_}";
					}
				}
				elsif($paired_end){
					if ($gzip){
						push @temp_input, "${original_filename_1}.temp.${_}.gz";
						push @temp_input, "${original_filename_2}.temp.${_}.gz";
					}
					else{
						push @temp_input, "${original_filename_1}.temp.${_}";
						push @temp_input, "${original_filename_2}.temp.${_}";
					}
				}

				# if files had a prefix we need to specify it
				my $add_prefix;
				if (defined $prefix){
					$add_prefix = "${prefix}.";
				}
				else{
					$add_prefix = '';
				}

				if ($single_end){
			 
					# Temp Output Files
					my $pathless_filename = ${original_filename}; # 10 Jan 2017
					$pathless_filename =~ s/.*\///; # deleting path information    
				
					if ($bowtie2){
						if ($gzip){
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_bt2.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_bt2_SE_report.txt";
							push @temp_ambig_bam,  "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_bt2.ambig.bam";   # only for Bowtie 2
						}
						else{
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_bt2.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_bt2_SE_report.txt";
							push @temp_ambig_bam,  "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_bt2.ambig.bam";      # only for Bowtie 2
						}	
					}
					elsif($mm2){
						if ($gzip){
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_mm22.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_mm2_SE_report.txt";
						}
						else{
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_mm2.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_mm2_SE_report.txt";
						}	
					}
					else{ # HISAT2
						if ($gzip){
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_hisat2.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_bismark_hisat2_SE_report.txt";
						}
						else{
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_hisat2.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_bismark_hisat2_SE_report.txt";
						}
					}

					if ($unmapped){
						if ($gzip){
							push @temp_unmapped_1,   "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_unmapped_reads.fq";
						}
						else{
							push @temp_unmapped_1,   "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_unmapped_reads.fq";
						}
					}
				
					if ($ambiguous){
						if ($gzip){
							push @temp_ambiguous_1,  "${output_dir}${add_prefix}${pathless_filename}.temp.${_}.gz_ambiguous_reads.fq";
						}
						else{
							push @temp_ambiguous_1,  "${output_dir}${add_prefix}${pathless_filename}.temp.${_}_ambiguous_reads.fq";
						}
					}
				}
				elsif($paired_end){
				
					# Temp Output Files
					my $pathless_filename_1 = ${original_filename_1}; # 10 Jan 2017
					my $pathless_filename_2 = ${original_filename_2};
					$pathless_filename_1 =~ s/.*\///; # deleting path information 
					$pathless_filename_2 =~ s/.*\///; # deleting path information 
					
					if ($bowtie2){
						if ($gzip){
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_bt2_pe.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_bt2_PE_report.txt";
							push @temp_ambig_bam,  "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_bt2_pe.ambig.bam";      # only for Bowtie 2
						}
						else{
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_bt2_pe.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_bt2_PE_report.txt";
							push @temp_ambig_bam,  "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_bt2_pe.ambig.bam";         # only for Bowtie 2
						}
					}
					elsif($mm2){
						if ($gzip){
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_mm2_pe.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_mm2_PE_report.txt";
						}
						else{
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_mm2_pe.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_mm2_PE_report.txt";
						}
					}
					else{
						if ($gzip){
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_hisat2_pe.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_bismark_hisat2_PE_report.txt";
						}
						else{
							push @temp_output,     "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_hisat2_pe.bam";
							push @temp_reports,    "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_bismark_hisat2_PE_report.txt";
						}
					}

					if ($unmapped){
						if ($gzip){
							push @temp_unmapped_1,   "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_unmapped_reads_1.fq";
							push @temp_unmapped_2,   "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_unmapped_reads_2.fq";
						}
						else{
							push @temp_unmapped_1,   "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_unmapped_reads_1.fq";
							push @temp_unmapped_2,   "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_unmapped_reads_2.fq";
						}
					}
			  
					if ($ambiguous){
						if ($gzip){
							push @temp_ambiguous_1,   "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_ambiguous_reads_1.fq";
							push @temp_ambiguous_2,   "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_ambiguous_reads_2.fq";
						}
						else{
							push @temp_ambiguous_1,   "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_ambiguous_reads_1.fq";
							push @temp_ambiguous_2,   "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_ambiguous_reads_2.fq";
						}
					}
				}
			}

			warn "\n\nRight, cleaning up now...\n\n";

			# deleting temp files;
			warn "Deleting temporary sequence files...\n";
			foreach my $temp (@temp_input){
				#print "$temp\t";
				$temp =~ s/.*\///; # deleting path information
				print "${temp_dir}${temp}\t";
				unlink "${temp_dir}${temp}" or warn "Failed to delete temporary FastQ file ${temp_dir}$temp: $!\n";
			}
			print "\n\n";

			# merging temp BAM files
			if ($single_end){
				merge_individual_BAM_files(\@temp_output,$original_filename,$single_end);
			}
			else{
				merge_individual_BAM_files(\@temp_output,$original_filename_1,$single_end);
			}

			# deleting temp BAM files
			warn "Deleting temporary BAM files...\n";
			foreach my $temp (@temp_output){
				# print "$temp\t";
				$temp =~ s/.*\///; # deleting path information
				print "${output_dir}${temp}\t";
				unlink "${output_dir}${temp}" or warn "Failed to delete temporary BAM file ${output_dir}${temp}: $!\n";
			}
			print "\n\n";

			### AMBIGUOUS BAM files
			if ($ambig_bam){

				# merging temp AMBIG BAM files
				if ($single_end){
					merge_individual_ambig_BAM_files(\@temp_ambig_bam,$original_filename,$single_end);
				}
				else{
					merge_individual_ambig_BAM_files(\@temp_ambig_bam,$original_filename_1,$single_end);
				}

				# deleting temp BAM files
				warn "Deleting temporary ambiguous BAM files...\n";
				foreach my $temp (@temp_ambig_bam){
					# print "$temp\t";
					$temp =~ s/.*\///; # deleting path information
					print "${output_dir}${temp}\t";
					unlink "${output_dir}${temp}" or warn "Failed to delete temporary ambiguous BAM file ${output_dir}${temp}: $!\n";
				}
				print "\n\n";
			}

			if ($unmapped){
				if ($single_end){
					merge_individual_unmapped_files(\@temp_unmapped_1,$original_filename,$single_end);
				}
				else{
					merge_individual_unmapped_files(\@temp_unmapped_1,$original_filename_1,$single_end,'_1');
					merge_individual_unmapped_files(\@temp_unmapped_2,$original_filename_2,$single_end,'_2');
				}

				# deleting temp unmapped files
				warn "Deleting temporary unmapped files...\n";
				foreach my $temp (@temp_unmapped_1){
					print "$temp\t";
					unlink "${output_dir}${temp}" or warn "Failed to delete temporary unmapped FastQ file ${output_dir}$temp: $!\n";
				}
				if ($paired_end){
					foreach my $temp (@temp_unmapped_2){
						print "$temp\t";
						unlink "${output_dir}${temp}" or warn "Failed to delete temporary unmapped FastQ file ${output_dir}$temp: $!\n";
					}
				}
				print "\n\n";
			}

			if ($ambiguous){
				if ($single_end){
					merge_individual_ambiguous_files(\@temp_ambiguous_1,$original_filename,$single_end);
				}
				else{
					merge_individual_ambiguous_files(\@temp_ambiguous_1,$original_filename_1,$single_end,'_1');
					merge_individual_ambiguous_files(\@temp_ambiguous_2,$original_filename_2,$single_end,'_2');
				}

				# deleting temp ambiguous files
				warn "Deleting temporary ambiguous files...\n";
				foreach my $temp (@temp_ambiguous_1){
					print "$temp\t";
					unlink "${output_dir}${temp}" or warn "Failed to delete temporary ambiguous FastQ file ${output_dir}$temp: $!\n";
				}

				if ($paired_end){
					foreach my $temp (@temp_ambiguous_2){
						print "$temp\t";
						unlink "${output_dir}${temp}" or warn "Failed to delete temporary ambiguous FastQ file ${output_dir}$temp: $!\n";
					}
				}
				print "\n\n";
			}

			# resetting the counters once more so we can add all data from all temporary reports
			reset_counters_and_fhs($original_filename);

			### Merging the Bismark mapping report files
			if ($single_end){
				merge_individual_mapping_reports(\@temp_reports,$original_filename,$single_end);
				print_final_analysis_report_single_end('mock_file1','mock_file_2','mock_pid','mergeThis');
			}
			else{
				merge_individual_mapping_reports(\@temp_reports,$original_filename_1,$single_end,$original_filename_2);
				print_final_analysis_report_paired_ends('mock_file1','mock_file_2','mock_file3','mock_file_4','mock_pid','mergeThis');
			}

			# deleting temp report files
			warn "Deleting temporary report files...\n";
			foreach my $temp (@temp_reports){
				print "$temp\t";
				unlink "${output_dir}${temp}" or warn "Failed to delete temporary report file $output_dir$temp: $!\n";
			}
			print "\n\n";
		}
	}

	if ($pid){ # only for the Parent
	
		### Produce Run Time
		my $end_run = time();
		my $run_time = $end_run - $start_run;
		my $days  = int($run_time/(24*60*60));
		my $hours = ($run_time/(60*60))%24;
		my $mins  = ($run_time/60)%60;
		my $secs  = $run_time%60;

		warn "Bismark completed in ${days}d ${hours}h ${mins}m ${secs}s\n";
		print REPORT "Bismark completed in ${days}d ${hours}h ${mins}m ${secs}s\n";
	
		warn "\n====================\nBismark run complete\n====================\n\n";

		if ($nucleotide_coverage){
			warn "Now calculating observed and expected nucleotide coverage statistics... \n\n";
			if ($final_output_filename =~ /(bam|cram)|/){
				my @args;
				push @args, "--genome $genome_folder";
				push @args, "--dir '$output_dir'";
				push @args, "--samtools_path $samtools_path";
				push @args, $final_output_filename;
				print "@args","\n"; sleep(3);
	
				system ("$RealBin/bam2nuc @args");
				warn "Finished bam2nuc calculation ...\n\n";
			}
			else{
				warn "Nucleotide coverage statistics are currently only available for BAM or CRAM files\n\n";
			}
		}
		# exit 0; # will terminate after a single supplied file....
	}
	else{
		# If multiple files were supplied as the command line, like so:
		# -1 R1.fastq,simulated_1.fastq,ZZZ_R1.fastq -2 R2.fastq,simulated_2.fastq,ZZZ_R2.fastq --multicore 4
		# we need to exit from the child processes if we don't want a steady increase of new Bismark instances! Fixed 30 10 2017
		# warn "Terminating Child process\n\n"; 
		exit 0;
	}

}

sub merge_individual_mapping_reports{

  my ($temp_reports,$original_filename_1,$single_end,$original_filename_2) = @_;
  my $report_file = $original_filename_1;
  $report_file =~ s/.*\///; # removing path information
  $report_file =~ s/(\.fastq\.gz|\.fq\.gz|\.fastq|\.fq)$//; # attempting to remove fastq.gz etc to make filename a little shorter

  if ($prefix){
    $report_file = "${prefix}.${report_file}";
  }

  if ($basename){ # Output file basename is set using the -B argument
    $report_file = ${basename};
  }

	if ($single_end){
		if ($bowtie2){
			$report_file .= '_bismark_bt2_SE_report.txt';
		}
		elsif($mm2){
			$report_file .= '_bismark_mm2_SE_report.txt';
		}
		else{
			$report_file .= '_bismark_hisat2_SE_report.txt';
		}
	}
	else{
		if ($bowtie2){
			$report_file .= '_bismark_bt2_PE_report.txt';
		}
		elsif($mm2){
			$report_file .= '_bismark_mm2_PE_report.txt';
		}
		else{
			$report_file .= '_bismark_hisat2_PE_report.txt';
		}
	}
	warn "Writing report to ${output_dir}${report_file}\n";
	open (REPORT,'>',"$output_dir$report_file") or die "Failed to write to ${output_dir}${report_file}: $!\n";

	foreach my $temp(@$temp_reports){
		$temp =~ s/.*\///; # removing path information
	}

	warn "Now merging temporary reports @$temp_reports into >>> ${output_dir}${report_file} <<<\n";

	if ($single_end){
		print REPORT "Bismark report for: $original_filename_1 (version: $bismark_version)\n";
	}
	else{ # paired-end
		print REPORT "Bismark report for: $original_filename_1 and $original_filename_2 (version: $bismark_version)\n";
	}


  my $first = 0;

  foreach my $temp(@$temp_reports){
    # $temp =~ s/.*\///; # removing path information

    warn "Merging from file >> $temp <<\n";
    open (IN,"${output_dir}${temp}") or die "Failed to read from temporary mapping report '${output_dir}${temp}'\n";

    ### this is printing the first couple of lines
    while (<IN>){
      chomp;
      if ($_ =~ /^Bismark report/){
	next;
      }

      unless ($first){ # only happens for the first run we are processing
	if ($_ =~ /^Final Alignment/){
	  ++$first;
	  last;
	}
	else{
	  print REPORT "$_\n";
	}
      }
    }
    close IN or warn "Failed to close filehandle\n";

    ### Simon says: You are going to regret this in the future. Just for the record. He might be right...
    read_alignment_report($temp,$single_end);

  }
  warn "\n";

}

sub read_alignment_report{
  my ($report,$single_end) = @_;

  my $unique;
  my $no_aln;
  my $multiple;
  my $no_genomic;
  my $total_seqs;
  my $bismark_version;
  my $input_filename;

  my $unique_text;
  my $no_aln_text;
  my $multiple_text;
  my $total_seq_text;

  my $total_C_count;
  my ($meth_CpG,$meth_CHG,$meth_CHH,$meth_unknown);
  my ($unmeth_CpG,$unmeth_CHG,$unmeth_CHH,$unmeth_unknown);

  my $number_OT;
  my $number_CTOT;
  my $number_CTOB;
  my $number_OB;

  open (ALN,"${output_dir}${report}") or die "Failed to read from temporary mapping report '$output_dir$report'\n";

  while (<ALN>){
    chomp;

    ### General Alignment stats
    if ($_ =~ /^Sequence pairs analysed in total:/ ){ ## Paired-end
      (undef,$total_seqs) = split /\t/;
      # warn "Total paired seqs: >> $total_seqs <<\n";
    }
    elsif ($_ =~ /^Sequences analysed in total:/ ){   ## Single-end
      (undef,$total_seqs) = split /\t/;
      # warn "total single-end seqs >> $total_seqs <<\n";
    }

    elsif($_ =~ /^Number of paired-end alignments with a unique best hit:/){ ## Paired-end
      (undef,$unique) = split /\t/;
      # warn "Unique PE>> $unique <<\n";
    }
    elsif($_ =~ /^Number of alignments with a unique best hit from/){        ## Single-end
      (undef,$unique) = split /\t/;
      # warn "Unique SE>> $unique <<\n";
    }

    elsif($_ =~ /^Sequence pairs with no alignments under any condition:/){  ## Paired-end
      (undef,$no_aln) = split /\t/;
      # warn "No alignment PE >> $no_aln <<\n";
    }
    elsif($_ =~ /^Sequences with no alignments under any condition:/){  ## Single-end
      (undef,$no_aln) = split /\t/;
      # warn "No alignments SE>> $no_aln <<\n";
    }

    elsif($_ =~ /^Sequence pairs did not map uniquely:/){ ## Paired-end
      (undef,$multiple) = split /\t/;
      # warn "Multiple alignments PE >> $multiple <<\n";
    }
    elsif($_ =~ /^Sequences did not map uniquely:/){ ## Single-end
      (undef,$multiple) = split /\t/;
      # warn "Multiple alignments SE >> $multiple <<\n";
    }

    elsif($_ =~ /^Sequence pairs which were discarded because genomic sequence could not be extracted:/){ ## Paired-end
      (undef,$no_genomic) = split /\t/;
      # warn "No genomic sequence PE >> $no_genomic <<\n";
    }
    elsif($_ =~ /^Sequences which were discarded because genomic sequence could not be extracted:/){ ## Single-end
      (undef,$no_genomic) = split /\t/;
      # warn "No genomic sequence SE>> $no_genomic <<\n";
    }

    ### Context Methylation
    elsif($_ =~ /^Total number of C/ ){
      (undef,$total_C_count) = split /\t/;
      # warn "Total number C >> $total_C_count <<\n";
    }

    elsif($_ =~ /^Total methylated C\'s in CpG context:/ ){
      (undef,$meth_CpG) = split /\t/;
      # warn "meth CpG >> $meth_CpG <<\n" ;
    }
    elsif($_ =~ /^Total methylated C\'s in CHG context:/ ){
      (undef,$meth_CHG) = split /\t/;
      # warn "meth CHG >> $meth_CHG <<\n" ;
    }
    elsif($_ =~ /^Total methylated C\'s in CHH context:/ ){
      (undef,$meth_CHH) = split /\t/;
      # warn "meth CHH >> $meth_CHH <<\n" ;
    }
    elsif($_ =~ /^Total methylated C\'s in Unknown context:/ ){
      (undef,$meth_unknown) = split /\t/;
      # warn "meth Unknown >> $meth_unknown <<\n" ;
    }

    elsif($_ =~ /^Total unmethylated C\'s in CpG context:/ or $_ =~ /^Total C to T conversions in CpG context:/){
      (undef,$unmeth_CpG) = split /\t/;
      # warn "unmeth CpG >> $unmeth_CpG <<\n" ;
    }
    elsif($_ =~ /^Total unmethylated C\'s in CHG context:/ or $_ =~ /^Total C to T conversions in CHG context:/){
      (undef,$unmeth_CHG) = split /\t/;
      # warn "unmeth CHG >> $unmeth_CHG <<\n" ;
    }
    elsif($_ =~ /^Total unmethylated C\'s in CHH context:/ or $_ =~ /^Total C to T conversions in CHH context:/){
      (undef,$unmeth_CHH) = split /\t/;
      # warn "unmeth CHH >> $unmeth_CHH <<\n";
    }
    elsif($_ =~ /^Total unmethylated C\'s in Unknown context:/ or $_ =~ /^Total C to T conversions in Unknown context:/){
      (undef,$unmeth_unknown) = split /\t/;
      # warn "unmeth Unknown >> $unmeth_unknown <<\n" ;
    }

    ### Strand Origin

    elsif($_ =~ /^CT\/GA\/CT:/ ){             ## Paired-end
      (undef,$number_OT) = split /\t/;
      # warn "Number OT PE>> $number_OT <<\n" ;
    }
    elsif($_ =~ /^CT\/CT:/ ){                 ## Single-end
      (undef,$number_OT) = split /\t/;
      # warn "Number OT SE>> $number_OT <<\n" ;
    }

    elsif($_ =~ /^GA\/CT\/CT:/ ){             ## Paired-end
      (undef,$number_CTOT) = split /\t/;
      # warn "Number CTOT PE >> $number_CTOT <<\n" ;
    }
    elsif($_ =~ /^GA\/CT:/ ){                 ## Single-end
      (undef,$number_CTOT) = split /\t/;
      # warn "Number CTOT SE >> $number_CTOT <<\n" ;
    }

    elsif($_ =~ /^GA\/CT\/GA:/ ){             ## Paired-end
      (undef,$number_CTOB) = split /\t/;
      # warn "Number CTOB PE >> $number_CTOB <<\n" ;
    }
    elsif($_ =~ /^GA\/GA:/ ){                 ## Single-end
      (undef,$number_CTOB) = split /\t/;
      # warn "Number CTOB SE >> $number_CTOB <<\n";
    }

    elsif($_ =~ /^CT\/GA\/GA:/ ){             ## Paired-end
      (undef,$number_OB) = split /\t/;
      # warn "Number OB PE >> $number_OB <<\n";
    }
    elsif($_ =~ /^CT\/GA:/ ){                 ## Single-end
      (undef,$number_OB) = split /\t/;
      # warn "Number OB SE >> $number_OB <<\n";
    }
  }

  $counting{sequences_count}                               += $total_seqs;
  $counting{unique_best_alignment_count}                   += $unique;
  $counting{no_single_alignment_found}                     += $no_aln;
  $counting{unsuitable_sequence_count}                     += $multiple;
  $counting{genomic_sequence_could_not_be_extracted_count} += $no_genomic;

  $counting{total_meCHH_count}                             += $meth_CHH;
  $counting{total_meCHG_count}                             += $meth_CHG;
  $counting{total_meCpG_count}                             += $meth_CpG;
  if ($bowtie2){
    $counting{total_meC_unknown_count}                     += $meth_unknown;
  }

  $counting{total_unmethylated_CHH_count}                  += $unmeth_CHH;
  $counting{total_unmethylated_CHG_count}                  += $unmeth_CHG;
  $counting{total_unmethylated_CpG_count}                  += $unmeth_CpG;
  if ($bowtie2){
    $counting{total_unmethylated_C_unknown_count}          += $unmeth_unknown;
  }

  if ($single_end){
    $counting{CT_CT_count}    += $number_OT;
    $counting{CT_GA_count}    += $number_OB;
    $counting{GA_CT_count}    += $number_CTOT;
    $counting{GA_GA_count}    += $number_CTOB;
  }
  else{
    # paired-end
    $counting{GA_CT_CT_count} += $number_CTOT;
    $counting{CT_GA_CT_count} += $number_OT;
    $counting{GA_CT_GA_count} += $number_CTOB;
    $counting{CT_GA_GA_count} += $number_OB;
  }
}

sub merge_individual_ambiguous_files{

  my ($temp_ambiguous,$original_filename,$single_end,$paired_information) = @_;
  my $ambiguous_file = $original_filename;
  $ambiguous_file =~ s/.*\///; # removing path information

  if ($prefix){
    $ambiguous_file = "${prefix}.${ambiguous_file}";
  }

  if ($single_end){

    if ($basename){ # Output file basename is set using the -B argument
      if ($sequence_file_format eq 'FASTQ'){
	$ambiguous_file = "${basename}_ambiguous_reads.fq.gz";
      }
      else{
	$ambiguous_file = "${basename}_ambiguous_reads.fa.gz";
      }
    }
    else{
      if ($sequence_file_format eq 'FASTQ'){
	$ambiguous_file =~ s/$/_ambiguous_reads.fq.gz/;
      }
      else{
	$ambiguous_file =~ s/$/_ambiguous_reads.fa.gz/;
      }
    }
  }
  else{ # paired-end

    if ($basename){ # Output file basename is set using the -B argument
      if ($sequence_file_format eq 'FASTQ'){
	$ambiguous_file = "${basename}_ambiguous_reads${paired_information}.fq.gz";
      }
      else{
	$ambiguous_file = "${basename}_ambiguous_reads${paired_information}.fa.gz";
      }
    }
    else{
      if ($sequence_file_format eq 'FASTQ'){
	$ambiguous_file =~ s/$/_ambiguous_reads${paired_information}.fq.gz/;
      }
      else{
	$ambiguous_file =~ s/$/_ambiguous_reads${paired_information}.fa.gz/;
      }
    }
  }

  foreach my $temp(@$temp_ambiguous){
    $temp =~ s/.*\///; # removing path information
  }

  open (AMBIGUOUS,"| gzip -c - > $output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";
  warn "Now merging ambiguous sequences @$temp_ambiguous into >>> $output_dir$ambiguous_file <<<\n";

  foreach my $temp(@$temp_ambiguous){
    warn "Merging from file >> $temp <<\n";
    if ($temp =~ /gz$/){
      open (IN,"gunzip -c ${output_dir}$temp |") or die "Failed to read from ambiguous temp file '${output_dir}$temp'\n";
    }
    else{
      open (IN,"${output_dir}$temp") or die "Failed to read from ambiguous temp file '${output_dir}$temp'\n";
    }

    while (<IN>){
      print AMBIGUOUS;
    }
    close IN or warn "Failed to close filehandle\n";
  }
  warn "\n";

  close AMBIGUOUS or warn "Failed to close output filehandle AMBIGUOUS\n\n";
}

sub merge_individual_unmapped_files{

  my ($temp_unmapped,$original_filename,$single_end,$paired_information) = @_;
  my $unmapped_file = $original_filename;
  $unmapped_file =~ s/.*\///; # removing path information

  if ($prefix){
    $unmapped_file = "${prefix}.${unmapped_file}";
  }

  if ($single_end){

    if ($basename){ # Output file basename is set using the -B argument
      if ($sequence_file_format eq 'FASTQ'){
	$unmapped_file = "${basename}_unmapped_reads.fq.gz";
      }
      else{
	$unmapped_file = "${basename}_unmapped_reads.fa.gz";
      }
    }
    else{
      if ($sequence_file_format eq 'FASTQ'){
	$unmapped_file =~ s/$/_unmapped_reads.fq.gz/;
      }
      else{
	$unmapped_file =~ s/$/_unmapped_reads.fa.gz/;
      }
    }
  }
  else{ # paired-end

    if ($basename){ # Output file basename is set using the -B argument
      if ($sequence_file_format eq 'FASTQ'){
	$unmapped_file = "${basename}_unmapped_reads${paired_information}.fq.gz";
      }
      else{
	$unmapped_file = "${basename}_unmapped_reads${paired_information}.fa.gz";
      }
    }
    else{
      if ($sequence_file_format eq 'FASTQ'){
	$unmapped_file =~ s/$/_unmapped_reads${paired_information}.fq.gz/;
      }
      else{
	$unmapped_file =~ s/$/_unmapped_reads${paired_information}.fa.gz/;
      }
    }
  }

  foreach my $temp(@$temp_unmapped){
    $temp =~ s/.*\///; # removing path information
  }

  open (UNMAPPED,"| gzip -c - > ${output_dir}${unmapped_file}") or die "Failed to write to ${output_dir}${unmapped_file}: $!\n";
  warn "Now merging unmapped sequences @$temp_unmapped into >>> ${output_dir}${unmapped_file} <<<\n";

  foreach my $temp(@$temp_unmapped){
    warn "Merging from file >> $temp <<\n";
    if ($temp =~ /gz$/){
      open (IN,"gunzip -c ${output_dir}${temp} |") or die "Failed to read from unmapped temp file '${output_dir}$temp'\n";
    }
    else{
      open (IN,"${output_dir}${temp}") or die "Failed to read from unmapped temp file '${output_dir}${temp}'\n";
    }

    while (<IN>){
      print UNMAPPED;
    }
    close IN or warn "Failed to close filehandle\n";
  }
  warn "\n";

  close UNMAPPED or warn "Failed to close output filehandle UNMAPPED\n\n";
}

sub merge_individual_BAM_files{

  my ($tempbam,$original_filename,$single_end) = @_;
  my $merged_name = $original_filename;

  # warn "merged name is: $merged_name\n";
  $merged_name =~ s/.*\///; # deleting path information
  # warn "merged name is: $merged_name\n";
  $merged_name =~ s/(\.fastq\.gz|\.fq\.gz|\.fastq|\.fq)$//; # attempting to remove fastq.gz etc to make filename a little shorter
  # warn "merged name is: $merged_name\n"; sleep(5);

  foreach my $temp_bam(@$tempbam){
      $temp_bam =~ s/.*\///; # deleting path information
  }

  if ($prefix){
    $merged_name = "$prefix.$merged_name";
  }

  if ($single_end){
    if ($bowtie2){ # BAM format is the default for Bowtie 2
      $merged_name .= '_bismark_bt2.bam';
    }
	elsif($mm2){  
      $merged_name .= '_bismark_mm2.bam';
    }
    else{          # BAM is the default output
      $merged_name .= '_bismark_hisat2.bam';
    }

    if ($basename){ # Output file basename is set using the -B argument
      $merged_name = "${basename}.bam";
    }
  }
  else{
    if ($bowtie2){ # BAM format is the default for Bowtie 2
      $merged_name .= '_bismark_bt2_pe.bam';
    }
	elsif($mm2){  
      $merged_name .= '_bismark_mm2_pe.bam';
    }
    else{          # BAM is the default output
      $merged_name .= '_bismark_hisat2_pe.bam';
    }

    if ($basename){ # Output file basename is set using the -B argument
      $merged_name = "${basename}_pe.bam";
    }
  }


  if ($cram){
      $merged_name =~ s/bam$/cram/;
      warn "At this stage we write out a single CRAM file and delete all temporary BAM files\n";
      warn "Now merging BAM files @$tempbam into >>> $merged_name <<<\n";
      $final_output_filename = "${output_dir}${merged_name}";

      open (OUT,"| $samtools_path view -h -C -T $cram_ref 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to CRAM file $merged_name: $!\nPlease note that this option requires Samtools version 1.2 or higher!\n\n";
  }
  else{
      $final_output_filename = "${output_dir}${merged_name}";
      warn "Now merging BAM files @$tempbam into >>> $merged_name <<<\n";
      open (OUT,"| $samtools_path view -bSh 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to $merged_name: $!\n";
  }

  my $first = 0;

  foreach my $temp_bam(@$tempbam){
    # $temp_bam =~ s/.*\///; # deleting path information

    warn "Merging from file >> $temp_bam <<\n";

    if ($first > 0){
      open (IN,"$samtools_path view ${output_dir}${temp_bam} |") or die "Failed to read from BAM file ${output_dir}${temp_bam}\n";
    }
    else{ # only for the first file we print the header as well
      open (IN,"$samtools_path view -h ${output_dir}${temp_bam} |") or die "Failed to read from BAM file ${output_dir}${temp_bam}\n";
    }

    while (<IN>){
      print OUT;
    }
    close IN or warn "Failed to close filehandle\n";
    ++$first;
  }
  warn "\n";

  close OUT or warn "Failed to close output filehandle\n\n";

}


sub merge_individual_ambig_BAM_files{

    my ($tempbam,$original_filename,$single_end) = @_;
    my $merged_name = $original_filename;

    # warn "merged name is: $merged_name\n";
    $merged_name =~ s/.*\///; # deleting path information
    # warn "merged name is: $merged_name\n"; sleep(1);

    foreach my $temp_bam(@$tempbam){
	$temp_bam =~ s/.*\///; # deleting path information
    }

    if ($prefix){
	$merged_name = "$prefix.$merged_name";
    }

    if ($single_end){
	if ($bowtie2){ # BAM format is the default for Bowtie 2
	    $merged_name .= '_bismark_bt2.ambig.bam';
	}

	if ($basename){ # Output file basename is set using the -B argument
	    $merged_name = "${basename}.ambig.bam";
	}
    }
    else{
	if ($bowtie2){ # BAM format is the default for Bowtie 2
	    $merged_name .= '_bismark_bt2_pe.ambig.bam';
	}

	if ($basename){ # Output file basename is set using the -B argument
	    $merged_name = "${basename}_pe.ambig.bam";
	}
    }

    warn "Now merging ambiguous BAM files @$tempbam into >>> $merged_name <<<\n";
    open (OUT,"| $samtools_path view -bSh 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to $merged_name: $!\n";
    my $first = 0;

    foreach my $temp_bam(@$tempbam){
	# $temp_bam =~ s/.*\///; # deleting path information

	warn "Merging from file >> $temp_bam <<\n";

	if ($first > 0){
	    open (IN,"$samtools_path view ${output_dir}${temp_bam} |") or die "Failed to read from BAM file ${output_dir}${temp_bam}\n";
	}
	else{ # only for the first file we print the header as well
	    open (IN,"$samtools_path view -h ${output_dir}${temp_bam} |") or die "Failed to read from BAM file ${output_dir}${temp_bam}\n";
	}

	while (<IN>){
	    print OUT;
	}
	close IN or warn "Failed to close filehandle\n";
	++$first;
    }
    warn "\n";

    close OUT or warn "Failed to close output filehandle\n\n";
}

sub start_methylation_call_procedure_single_ends {
	my ($sequence_file,$C_to_T_infile,$G_to_A_infile,$pid) = @_;
	my ($dir,$filename);

	if ($sequence_file =~ /\//){
		($dir,$filename) = $sequence_file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $sequence_file;
	}

	### printing all alignments to a results file
	my $outfile = $filename;
	# warn "Outfile: $outfile\n";
	$outfile =~ s/(\.fastq\.gz|\.fq\.gz|\.fastq|\.fq)$//; # attempting to remove fastq.gz etc to make filename a little shorter
	# warn "Outfile: $outfile\n";sleep(5);

	if ($prefix){
		$outfile = "$prefix.$outfile";
	}
	if ($bowtie2){ # SAM format is the default for Bowtie 2
		$outfile =~ s/$/_bismark_bt2.sam/;
	}
	elsif($mm2){  # SAM output is the default for minimap2
		$outfile =~ s/$/_bismark_mm2.sam/;
	}
	else{ # SAM output is the default for HISAT2
		$outfile =~ s/$/_bismark_hisat2.sam/;
	}
		
	if ($basename){ # Output file basename is set using the -B argument
		$outfile = "${basename}.sam";
	}

	$bam = 0 unless (defined $bam);

	if ($ambig_bam){
		my $ambig_bam_out = $outfile;
		$ambig_bam_out =~ s/sam$/ambig.bam/;
		warn "Ambiguous BAM output: $ambig_bam_out\n";
		open (AMBIBAM,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$ambig_bam_out") or die "Failed to write to $ambig_bam_out: $!\n";
	}

	if ($cram){ ### Samtools is installed, writing out CRAM directly. This qill require Samtools version 1.2 or higher!
		### for multicore processing we write out BAM files by default and merge them together as a single CRAM file in the merging step later on.
		### This avoids having to change all the file endings on the way
		if($multicore > 1){
			$outfile =~ s/sam$/bam/;
			open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
		}
		else{ # single-core mode
			$outfile =~ s/sam$/cram/;
			$final_output_filename = "${output_dir}${outfile}";
			open (OUT,"| $samtools_path view -h -C -T $cram_ref 2>/dev/null - > $output_dir$outfile") or die "Failed to write to CRAM file $outfile: $!\nPlease note that this option requires Samtools version 1.2 or higher!\n\n";
		}
	}
	elsif($bam == 1){ ### Samtools is installed, writing out BAM directly
		$outfile =~ s/sam$/bam/;
		$final_output_filename = "${output_dir}${outfile}";
		open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
	}
	elsif($bam == 2){ ### no Samtools found on system. Using GZIP compression instead
		$outfile .= '.gz';
		open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
	}
	else{ # uncompressed ouput, default
		open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
	}
	warn "\n>>> Writing bisulfite mapping results to $output_dir$outfile <<<\n\n";

	sleep(1);

	### printing alignment and methylation call summary to a report file
	my $reportfile = $filename;
	$reportfile =~ s/(\.fastq\.gz|\.fq\.gz|\.fastq|\.fq)$//; # attempting to remove fastq.gz etc to make filename a little shorter

	if ($prefix){
		$reportfile = "$prefix.$reportfile";
	}
	if ($bowtie2){
		$reportfile =~ s/$/_bismark_bt2_SE_report.txt/;
	}
	elsif($mm2){
		$reportfile =~ s/$/_bismark_mm2_SE_report.txt/;
	}
	else{
		$reportfile =~ s/$/_bismark_hisat2_SE_report.txt/;
	}

	if ($basename){ # Output file basename is set using the -B argument
		$reportfile = "${basename}_SE_report.txt";
	}

	open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
	print REPORT "Bismark report for: $sequence_file (version: $bismark_version)\n";

	if ($unmapped){
		my $unmapped_file = $filename;
		if ($prefix){
			$unmapped_file = "$prefix.$unmapped_file";
		}

		if ($basename){ # Output file basename is set using the -B argument
			if ($sequence_file_format eq 'FASTQ'){
				$unmapped_file = "${basename}_unmapped_reads.fq";
			}
			else{
				$unmapped_file = "${basename}_unmapped_reads.fa";
			}
		}
		else{
			if ($sequence_file_format eq 'FASTQ'){
				$unmapped_file =~ s/$/_unmapped_reads.fq/;
			}
			else{
				$unmapped_file =~ s/$/_unmapped_reads.fa/;
			}
		}

		if ($multicore > 1){ # multicore runs already output gzipped unmapped files
			open (UNMAPPED,'>',"$output_dir$unmapped_file") or die "Failed to write to $unmapped_file: $!\n";
		}
		else{
			$unmapped_file .= '.gz';
			open (UNMAPPED,"| gzip -c - > $output_dir$unmapped_file") or die "Failed to write to $unmapped_file: $!\n";
		}
		warn "Unmapped sequences will be written to $output_dir$unmapped_file\n";
	}

	if ($ambiguous){
		my $ambiguous_file = $filename;

		if ($prefix){
			$ambiguous_file = "$prefix.$ambiguous_file";
		}

		if ($basename){ # Output file basename is set using the -B argument
			if ($sequence_file_format eq 'FASTQ'){
				$ambiguous_file =  "${basename}_ambiguous_reads.fq";
			}
			else{
				$ambiguous_file =  "${basename}_ambiguous_reads.fa";
			}
		}
		else{
			if ($sequence_file_format eq 'FASTQ'){
				$ambiguous_file =~ s/$/_ambiguous_reads.fq/;
			}
			else{
				$ambiguous_file =~ s/$/_ambiguous_reads.fa/;
			}
		}

		if ($multicore > 1){ # multicore runs already output gzipped amobiguous files
			open (AMBIG,'>',"$output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";
		}
		else{
			$ambiguous_file .= '.gz';
			open (AMBIG,"| gzip -c - > $output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";
		}
		warn "Ambiguously mapping sequences will be written to $output_dir$ambiguous_file\n";
	}

	if ($directional){
		print REPORT "Option '--directional' specified (default mode): alignments to complementary strands (CTOT, CTOB) were ignored (i.e. not performed)\n";
	}
	elsif ($pbat){
		print REPORT "Option '--pbat' specified: alignments to original strands (OT and OB) strands were ignored (i.e. not performed)\n";
	}	
	else{
		print REPORT "Option '--non_directional' specified: alignments to all strands were being performed (OT, OB, CTOT, CTOB)\n";
	}

	if ($bowtie2){
		print REPORT "Bismark was run with Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	elsif ($mm2){
		print REPORT "Bismark was run with minimap2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		print REPORT   "Bismark was run with HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}


	unless ($sam_no_hd){
		generate_SAM_header();
	}

	### Input file is in FastA format
	if ($sequence_file_format eq 'FASTA'){
		process_single_end_fastA_file_for_methylation_call($sequence_file,$C_to_T_infile,$G_to_A_infile,$pid);
	}
	### Input file is in FastQ format
	else{
		process_single_end_fastQ_file_for_methylation_call($sequence_file,$C_to_T_infile,$G_to_A_infile,$pid);
	}
}

sub start_methylation_call_procedure_paired_ends {
	my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid) = @_;
	my ($dir_1,$filename_1);

	if ($sequence_file_1 =~ /\//){
		($dir_1,$filename_1) = $sequence_file_1 =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename_1 = $sequence_file_1;
	}

	my ($dir_2,$filename_2);

	if  ($sequence_file_2 =~ /\//){
		($dir_2,$filename_2) = $sequence_file_2 =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename_2 = $sequence_file_2;
	}

	### printing all alignments to a results file
	my $outfile = $filename_1;
	# warn "Outfile: $outfile\n";
	$outfile =~ s/(\.fastq\.gz|\.fq\.gz|\.fastq|\.fq)$//; # attempting to remove fastq.gz etc to make filename a little shorter
	# warn "Outfile: $outfile\n";sleep(5);

	if ($prefix){
		$outfile = "$prefix.$outfile";
	}
	if ($bowtie2){ # SAM format is the default Bowtie 2 output
		$outfile =~ s/$/_bismark_bt2_pe.sam/;
	}
	else{ # SAM format is the default for HISAT2
		$outfile =~ s/$/_bismark_hisat2_pe.sam/;
	}

	if ($basename){ # Output file basename is set using the -B argument
		$outfile = "${basename}_pe.sam";
	}

	if ($ambig_bam){
		my $ambig_bam_out = $outfile;
		$ambig_bam_out =~ s/sam$/ambig.bam/;
		warn "Ambiguous BAM output: $ambig_bam_out\n";
		open (AMBIBAM,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$ambig_bam_out") or die "Failed to write to $ambig_bam_out: $!\n";
	}

	$bam = 0 unless (defined $bam);

	if ($cram){ ### Samtools is installed, writing out CRAM directly. This qill require Samtools version 1.2 or higher!
		if ($multicore > 1){
			$outfile =~ s/sam$/bam/;
			open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
		}
		else{ # single-core mode
			$outfile =~ s/sam$/cram/;
			$final_output_filename = "${output_dir}${outfile}";
			open (OUT,"| $samtools_path view -h -C -T $cram_ref 2>/dev/null - > $output_dir$outfile") or die "Failed to write to CRAM file $outfile: $!\nPlease note that this option requires Samtools version 1.2 or higher!\n\n";
		}
	}
	elsif ($bam == 1){ ### Samtools is installed, writing out BAM directly
		$outfile =~ s/sam$/bam/;
		$final_output_filename = "${output_dir}${outfile}";
		open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
	}
	elsif($bam == 2){ ### no Samtools found on system. Using GZIP compression instead
		$outfile .= '.gz';
		open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
	}
	else{ # uncompressed ouput, default
		open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
	}

	warn "\n>>> Writing bisulfite mapping results to $outfile <<<\n\n";
	sleep(1);

  
	### printing alignment and methylation call summary to a report file
	my $reportfile = $filename_1;
	$reportfile =~ s/(\.fastq\.gz|\.fq\.gz|\.fastq|\.fq)$//; # attempting to remove fastq.gz etc to make filename a little shorter

	if ($prefix){
		$reportfile = "$prefix.$reportfile";
	}

	if ($bowtie2){
		$reportfile =~ s/$/_bismark_bt2_PE_report.txt/;
	}
	else{
		$reportfile =~ s/$/_bismark_hisat2_PE_report.txt/;
	}

	if ($basename){ # Output file basename is set using the -B argument
		$reportfile = "${basename}_PE_report.txt";
	}

	open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
	print REPORT "Bismark report for: $sequence_file_1 and $sequence_file_2 (version: $bismark_version)\n";

	if ($bowtie2){
		print REPORT "Bismark was run with Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n";
	}
	else{
		print REPORT "Bismark was run with HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n";
	}


	### Unmapped read output
	if ($unmapped){
		my $unmapped_1 = $filename_1;
		my $unmapped_2 = $filename_2;

		if ($prefix){
			$unmapped_1 = "$prefix.$unmapped_1";
			$unmapped_2 = "$prefix.$unmapped_2";
		}

		if ($basename){ # Output file basename is set using the -B argument
			if ($sequence_file_format eq 'FASTQ'){
				$unmapped_1 = "${basename}_unmapped_reads_1.fq";
				$unmapped_2 = "${basename}_unmapped_reads_2.fq";
			}
			else{
				$unmapped_1 = "${basename}_unmapped_reads_1.fa";
				$unmapped_2 = "${basename}_unmapped_reads_2.fa";
			}
		}
		else{
			if ($sequence_file_format eq 'FASTQ'){
				$unmapped_1 =~ s/$/_unmapped_reads_1.fq/;
				$unmapped_2 =~ s/$/_unmapped_reads_2.fq/;
			}
			else{
				$unmapped_1 =~ s/$/_unmapped_reads_1.fa/;
				$unmapped_2 =~ s/$/_unmapped_reads_2.fa/;
			}
		}

		if ($multicore > 1){ # unmapped files are merged into .gz files in multicore runs anyway
			open (UNMAPPED_1,'>',"$output_dir$unmapped_1") or die "Failed to write to $unmapped_1: $!\n";
			open (UNMAPPED_2,'>',"$output_dir$unmapped_2") or die "Failed to write to $unmapped_2: $!\n";
		}	
		else{
			$unmapped_1 .= '.gz';
			$unmapped_2 .= '.gz';
			open (UNMAPPED_1,"| gzip -c - > $output_dir$unmapped_1") or die "Failed to write to $unmapped_1: $!\n";
			open (UNMAPPED_2,"| gzip -c - > $output_dir$unmapped_2") or die "Failed to write to $unmapped_2: $!\n";
		}
		warn "Unmapped sequences will be written to $unmapped_1 and $unmapped_2\n";
	}

	if ($ambiguous){
		my $amb_1 = $filename_1;
		my $amb_2 = $filename_2;

		if ($prefix){
			$amb_1 = "$prefix.$amb_1";
			$amb_2 = "$prefix.$amb_2";
		}

		if ($basename){ # Output file basename is set using the -B argument
			if ($sequence_file_format eq 'FASTQ'){
				$amb_1 = "${basename}_ambiguous_reads_1.fq";
				$amb_2 = "${basename}_ambiguous_reads_2.fq";
			}
			else{
				$amb_1 = "${basename}_ambiguous_reads_1.fa";
				$amb_2 = "${basename}_ambiguous_reads_2.fa";
			}
		}
		else{
			if ($sequence_file_format eq 'FASTQ'){
				$amb_1 =~ s/$/_ambiguous_reads_1.fq/;
				$amb_2 =~ s/$/_ambiguous_reads_2.fq/;
			}
			else{
				$amb_1 =~ s/$/_ambiguous_reads_1.fa/;
				$amb_2 =~ s/$/_ambiguous_reads_2.fa/;
			}
		}

		if ($multicore > 1){ # ambiguous files are merged into .gz files in multicore runs anyway
			open (AMBIG_1,'>',"$output_dir$amb_1") or die "Failed to write to $amb_1: $!\n";
			open (AMBIG_2,'>',"$output_dir$amb_2") or die "Failed to write to $amb_2: $!\n";
		}
			else{
			$amb_1 .= '.gz';
			$amb_2 .= '.gz';
			open (AMBIG_1,"| gzip -c - > $output_dir$amb_1") or die "Failed to write to $amb_1: $!\n";
			open (AMBIG_2,"| gzip -c - > $output_dir$amb_2") or die "Failed to write to $amb_2: $!\n";
		}
		warn "Ambiguously mapping sequences will be written to $amb_1 and $amb_2\n";
	}

	if ($directional){
		print REPORT "Option '--directional' specified (default mode): alignments to complementary strands (CTOT, CTOB) were ignored (i.e. not performed)\n\n";
	}
	elsif ($pbat){
		print REPORT "Option '--pbat' specified: alignments to original strands (OT, OB) were ignored (i.e. not performed)\n\n";
	}
	else{
		print REPORT "Option '--non_directional' specified: alignments to all strands were being performed (OT, OB, CTOT, CTOB)\n\n";
	}

	unless ($sam_no_hd){
		generate_SAM_header();
	}

	### Input files are in FastA format
	if ($sequence_file_format eq 'FASTA'){
		process_fastA_files_for_paired_end_methylation_calls($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid);
	}
	### Input files are in FastQ format
	else{
		process_fastQ_files_for_paired_end_methylation_calls($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid);
	}
}

sub print_final_analysis_report_single_end{
	my ($C_to_T_infile,$G_to_A_infile,$pid,$merge_multi) = @_;

	if ($merge_multi){
		warn "Printing a final merged alignment report for all individual sub-reports\n\n";
	}
	else{
		### All sequences from the original sequence file have been analysed now
		### deleting temporary C->T or G->A infiles

		if ($directional){
			my $deletion_successful =  unlink "$temp_dir$C_to_T_infile";
			if ($deletion_successful == 1){
				warn "\nSuccessfully deleted the temporary file $temp_dir$C_to_T_infile\n\n";
			}
			else{
				warn "Could not delete temporary file $C_to_T_infile properly $!\n";
			}
		}
		elsif ($pbat){
			my $deletion_successful =  unlink "$temp_dir$G_to_A_infile";
			if ($deletion_successful == 1){
				warn "\nSuccessfully deleted the temporary file $temp_dir$G_to_A_infile\n\n";
			}
			else{
				warn "Could not delete temporary file $G_to_A_infile properly $!\n";
			}
		}
		else{
			my $deletion_successful =  unlink "$temp_dir$C_to_T_infile","$temp_dir$G_to_A_infile";
			if ($deletion_successful == 2){
				warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile and $temp_dir$G_to_A_infile\n\n";
			}
			else{
				warn "Could not delete temporary files properly $!\n";
			}
		}
	}

	### printing a final report for the alignment procedure
	print REPORT "Final Alignment report\n",'='x22,"\n";
	warn "Final Alignment report\n",'='x22,"\n";
	#  foreach my $index (0..$#fhs){
	#    print "$fhs[$index]->{name}\n";
	#    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
	#    print "$fhs[$index]->{wrong_strand}\talignments were discarded (nonsensical alignments)\n\n";
	#  }

	### printing a final report for the methylation call procedure
	warn "Sequences analysed in total:\t$counting{sequences_count}\n";
	print REPORT "Sequences analysed in total:\t$counting{sequences_count}\n";
	my $percent_alignable_sequences;

	if ($counting{sequences_count} == 0){
		$percent_alignable_sequences = 0;
	}
	else{
		$percent_alignable_sequences = sprintf ("%.1f",$counting{unique_best_alignment_count}*100/$counting{sequences_count});
	}

	warn "Number of alignments with a unique best hit from the different alignments:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequences}%\n\n";
	print REPORT "Number of alignments with a unique best hit from the different alignments:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequences}%\n";

	### percentage of low complexity reads overruled because of low complexity (thereby creating a bias for highly methylated reads),
	### only calculating the percentage if there were any overruled alignments
	if ($counting{low_complexity_alignments_overruled_count}){
		my $percent_overruled_low_complexity_alignments = sprintf ("%.1f",$counting{low_complexity_alignments_overruled_count}*100/$counting{sequences_count});
		#   print REPORT "Number of low complexity alignments which were overruled to have a unique best hit rather than discarding them:\t$counting{low_complexity_alignments_overruled_count}\t(${percent_overruled_low_complexity_alignments}%)\n";
	}

	print "Sequences with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
	print "Sequences did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
	print "Sequences which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
	print "Number of sequences with unique best (first) alignment came from the bowtie output:\n";
	print join ("\n","CT/CT:\t$counting{CT_CT_count}\t((converted) top strand)","CT/GA:\t$counting{CT_GA_count}\t((converted) bottom strand)","GA/CT:\t$counting{GA_CT_count}\t(complementary to (converted) top strand)","GA/GA:\t$counting{GA_GA_count}\t(complementary to (converted) bottom strand)"),"\n\n";

	print REPORT "Sequences with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
	print REPORT "Sequences did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
	print REPORT "Sequences which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
	print REPORT "Number of sequences with unique best (first) alignment came from the bowtie output:\n";
	print REPORT join ("\n","CT/CT:\t$counting{CT_CT_count}\t((converted) top strand)","CT/GA:\t$counting{CT_GA_count}\t((converted) bottom strand)","GA/CT:\t$counting{GA_CT_count}\t(complementary to (converted) top strand)","GA/GA:\t$counting{GA_GA_count}\t(complementary to (converted) bottom strand)"),"\n\n";

	if ($directional){
		print "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
		print REPORT "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
	}

	### detailed information about Cs analysed
	warn "Final Cytosine Methylation Report\n",'='x33,"\n";
	my	 $total_number_of_C = $counting{total_meCHH_count}+$counting{total_meCHG_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CpG_count};
	warn "Total number of C's analysed:\t$total_number_of_C\n\n";
	warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
	warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
	warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
	warn "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n\n";

	warn "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
	warn "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
	warn "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
	warn "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n\n";
  
	print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
	print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";

	print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
	print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
	print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
	print REPORT "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n\n";
  
	print REPORT "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
	print REPORT "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
	print REPORT "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
    print REPORT "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n\n";

	my $percent_meCHG;
	if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
		$percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
	}

	my $percent_meCHH;
	if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
		$percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
	}

	my $percent_meCpG;
	if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
		$percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
	}

	my $percent_meC_unknown;
	if (($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}) > 0){
		$percent_meC_unknown = sprintf("%.1f",100*$counting{total_meC_unknown_count}/($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}));
	}

	### printing methylated CpG percentage if applicable
	if ($percent_meCpG){
		warn "C methylated in CpG context:\t${percent_meCpG}%\n";
		print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
	}

	### printing methylated C percentage (CHG context) if applicable
	if ($percent_meCHG){
		warn "C methylated in CHG context:\t${percent_meCHG}%\n";
		print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
	}

	### printing methylated C percentage (CHH context) if applicable
	if ($percent_meCHH){
		warn "C methylated in CHH context:\t${percent_meCHH}%\n";
		print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
	}

	### printing methylated C percentage (Unknown C context) if applicable
	if ($percent_meC_unknown){
		warn "C methylated in Unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
		print REPORT "C methylated in Unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n";
	}
	print REPORT "\n\n";
	warn "\n\n";

	if ($seqID_contains_tabs){
		warn "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
		print REPORT "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
	}
}

sub print_final_analysis_report_paired_ends{
	my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid,$merge_multi) = @_;

	if ($merge_multi){
		warn "Printing a final merged alignment report for all individual sub-reports\n\n";
	}
	else{
		### All sequences from the original sequence file have been analysed now, therefore deleting temporary C->T or G->A infiles
		if ($directional){
			my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_2";
			if ($deletion_successful == 2){
				warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2\n\n";
			}
			else{
				warn "Could not delete temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2 properly: $!\n";
			}
		}
		elsif($pbat){
			# PBAT data should only have 2 files to delete, similar to directional files
			my $deletion_successful =  unlink "$temp_dir$G_to_A_infile_1","$temp_dir$C_to_T_infile_2";
			if ($deletion_successful == 2){
				warn "\nSuccessfully deleted the temporary files $temp_dir$G_to_A_infile_1 and $temp_dir$C_to_T_infile_2\n\n";
			}
			else{
				warn "Could not delete temporary files $temp_dir$G_to_A_infile_1 and $temp_dir$C_to_T_infile_2 properly: $!\n";
			}
		}
		else{ # non-directional
			my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1","$temp_dir$C_to_T_infile_2","$temp_dir$G_to_A_infile_2";
			if ($deletion_successful == 4){
				warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1, $temp_dir$G_to_A_infile_1, $temp_dir$C_to_T_infile_2 and $temp_dir$G_to_A_infile_2\n\n";
			}
			else{
				warn "Could not delete temporary files properly: $!\n";
			}
		}
	}

	### printing a final report for the alignment procedure
	warn "Final Alignment report\n",'='x22,"\n";
	print REPORT "Final Alignment report\n",'='x22,"\n";
	#  foreach my $index (0..$#fhs){
	#    print "$fhs[$index]->{name}\n";
	#    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
	#    print "$fhs[$index]->{wrong_strand}\talignments were discarded (nonsensical alignments)\n\n";
	#  }

	### printing a final report for the methylation call procedure
	warn "Sequence pairs analysed in total:\t$counting{sequences_count}\n";
	print REPORT "Sequence pairs analysed in total:\t$counting{sequences_count}\n";

	my $percent_alignable_sequence_pairs;
	if ($counting{sequences_count} == 0){
		$percent_alignable_sequence_pairs = 0;
	}
	else{
		$percent_alignable_sequence_pairs = sprintf ("%.1f",$counting{unique_best_alignment_count}*100/$counting{sequences_count});
	}	
	print "Number of paired-end alignments with a unique best hit:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequence_pairs}%\n\n";
	print REPORT "Number of paired-end alignments with a unique best hit:\t$counting{unique_best_alignment_count}\nMapping efficiency:\t${percent_alignable_sequence_pairs}% \n";
	
	print "Sequence pairs with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
	print "Sequence pairs did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
	print "Sequence pairs which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
	print "Number of sequence pairs with unique best (first) alignment came from the bowtie output:\n";
	print join ("\n","CT/GA/CT:\t$counting{CT_GA_CT_count}\t((converted) top strand)","GA/CT/CT:\t$counting{GA_CT_CT_count}\t(complementary to (converted) top strand)","GA/CT/GA:\t$counting{GA_CT_GA_count}\t(complementary to (converted) bottom strand)","CT/GA/GA:\t$counting{CT_GA_GA_count}\t((converted) bottom strand)"),"\n\n";


	print REPORT "Sequence pairs with no alignments under any condition:\t$counting{no_single_alignment_found}\n";
	print REPORT "Sequence pairs did not map uniquely:\t$counting{unsuitable_sequence_count}\n";
	print REPORT "Sequence pairs which were discarded because genomic sequence could not be extracted:\t$counting{genomic_sequence_could_not_be_extracted_count}\n\n";
	print REPORT "Number of sequence pairs with unique best (first) alignment came from the bowtie output:\n";
	print REPORT join ("\n","CT/GA/CT:\t$counting{CT_GA_CT_count}\t((converted) top strand)","GA/CT/CT:\t$counting{GA_CT_CT_count}\t(complementary to (converted) top strand)","GA/CT/GA:\t$counting{GA_CT_GA_count}\t(complementary to (converted) bottom strand)","CT/GA/GA:\t$counting{CT_GA_GA_count}\t((converted) bottom strand)"),"\n\n";
	### detailed information about Cs analysed

	if ($directional){
		print "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
		print REPORT "Number of alignments to (merely theoretical) complementary strands being rejected in total:\t$counting{alignments_rejected_count}\n\n";
	}

	warn "Final Cytosine Methylation Report\n",'='x33,"\n";
	print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";

	my $total_number_of_C = $counting{total_meCHG_count}+ $counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
	warn "Total number of C's analysed:\t$total_number_of_C\n\n";
	warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
	warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
	warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
	warn "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n\n";
	
	warn "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
	warn "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
	warn "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
	warn "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n\n";

	print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
	print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
	print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
	print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
	print REPORT "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n\n";
	  
	print REPORT "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
	print REPORT "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
	print REPORT "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
	print REPORT "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n\n";
  
	my $percent_meCHG;
	if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
		$percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
	}

	my $percent_meCHH;
	if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
		$percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
	}

	my $percent_meCpG;
	if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
		$percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
	}

	my $percent_meC_unknown;
	if (($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}) > 0){
		$percent_meC_unknown = sprintf("%.1f",100*$counting{total_meC_unknown_count}/($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}));
	}

	### printing methylated CpG percentage if applicable
	if ($percent_meCpG){
		warn "C methylated in CpG context:\t${percent_meCpG}%\n";
		print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
	}

	### printing methylated C percentage in CHG context if applicable
	if ($percent_meCHG){
		warn "C methylated in CHG context:\t${percent_meCHG}%\n";
		print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
	}

	### printing methylated C percentage in CHH context if applicable
	if ($percent_meCHH){
		warn "C methylated in CHH context:\t${percent_meCHH}%\n";
		print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
	}

	### printing methylated C percentage (Unknown C context) if applicable
	if ($percent_meC_unknown){
		warn "C methylated in Unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
		print REPORT "C methylated in Unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
	}
	else{
		warn "Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n";
		print REPORT "Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n";
	}

	print REPORT "\n\n";
	warn "\n\n";

}

sub process_single_end_fastA_file_for_methylation_call{
	my ($sequence_file,$C_to_T_infile,$G_to_A_infile,$pid) = @_;
	### this is a FastA sequence file; we need the actual sequence to compare it against the genomic sequence in order to make a methylation call.
	### Now reading in the sequence file sequence by sequence and see if the current sequence was mapped to one (or both) of the converted genomes in either
	### the C->T or G->A version

	### gzipped version of the infile
	if ($sequence_file =~ /\.gz$/){
		open (IN,"gunzip -c $sequence_file |") or die $!;
	}
	else{
		open (IN,$sequence_file) or die $!;
	}

	my $count = 0;

	warn "\nReading in the sequence file $sequence_file\n";
	while (1) {
		# last if ($counting{sequences_count} > 100);
		my $identifier = <IN>;
		my $sequence = <IN>;
		last unless ($identifier and $sequence);

		chomp $sequence;
		chomp $identifier;
		
		$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces

		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$counting{sequences_count}++;
		if ($counting{sequences_count}%1000000==0) {
			warn "Processed $counting{sequences_count} sequences so far\n";
		}
	
		$identifier =~ s/^>//; # deletes the > at the beginning of FastA headers

		my $return = check_results_single_end (uc$sequence,$identifier);
	
		unless ($return){
			$return = 0;
		}

		# print the sequence to ambiguous.out if --ambiguous was specified
		if ($ambiguous and $return == 2){
			print AMBIG ">$identifier\n";
			print AMBIG "$sequence\n";
		}

		# print the sequence to <unmapped.out> file if --un was specified
		elsif ($unmapped and $return == 1){
			print UNMAPPED ">$identifier\n";
			print UNMAPPED "$sequence\n";
		}
	}
	print "Processed $counting{sequences_count} sequences in total\n\n";

	close OUT or warn "Failed to close filehandle OUT: $!\n";
	if ($ambiguous){
		close AMBIG or warn "Failed to close filehandle AMBIG: $!";
	}
	if ($unmapped){
		close UNMAPPED or warn "Failed to close filehandle UNMAPPED: $!";
	}

	print_final_analysis_report_single_end($C_to_T_infile,$G_to_A_infile,$pid);

}

sub process_single_end_fastQ_file_for_methylation_call{

	my ($sequence_file,$C_to_T_infile,$G_to_A_infile,$pid) = @_;

	### this is the Illumina sequence file; we need the actual sequence to compare it against the genomic sequence in order to make a methylation call.
	### Now reading in the sequence file sequence by sequence and see if the current sequence was mapped to one (or both) of the converted genomes in either
	### the C->T or G->A version

	### gzipped version of the infile
	if ($sequence_file =~ /\.gz$/){
		open (IN,"gunzip -c $sequence_file |") or die $!;
	}
	else{
		open (IN,$sequence_file) or die $!;
	}

	my $count = 0;

	warn "\nReading in the sequence file $sequence_file\n";

	while (1) {
		my $identifier = <IN>;
		my $sequence = <IN>;
		my $identifier_2 = <IN>;
		my $quality_value = <IN>;
		last unless ($identifier and $sequence and $identifier_2 and $quality_value);
		
		chomp $identifier;
		$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier .= "\n";
		
		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$counting{sequences_count}++;

		if ($counting{sequences_count}%1000000==0) {
			warn "Processed $counting{sequences_count} sequences so far\n";
		}
		chomp $sequence;
		chomp $identifier;
		chomp $quality_value;

		$identifier =~ s/^\@//;  # deletes the @ at the beginning of Illumin FastQ headers

		my $return = check_results_single_end (uc$sequence,$identifier,$quality_value);
		
		unless ($return){
			$return = 0;
		}
	
		# print the sequence to ambiguous.out if --ambiguous was specified
		if ($ambiguous and $return == 2){
			print AMBIG "\@$identifier\n";
			print AMBIG "$sequence\n";
			print AMBIG $identifier_2;
			print AMBIG "$quality_value\n";
		}

		# print the sequence to <unmapped.out> file if --un was specified
	
		elsif ($unmapped and $return == 1){
			print UNMAPPED "\@$identifier\n";
			print UNMAPPED "$sequence\n";
			print UNMAPPED $identifier_2;
			print UNMAPPED "$quality_value\n";
		}
	}
	print "Processed $counting{sequences_count} sequences in total\n\n";

	close OUT or warn "Failed to close filehandle OUT: $!";
	if ($ambiguous){
		close AMBIG or warn "Failed to close filehandle AMBIG: $!";
	}
	if ($unmapped){
		close UNMAPPED or warn "Failed to close filehandle UNMAPPED: $!";
	}

	print_final_analysis_report_single_end($C_to_T_infile,$G_to_A_infile,$pid);
	if ($ambig_bam){
		close AMBIBAM or warn "Had trouble closing filehandle AMBIBAM: $!\n";
	}
	
}

sub process_fastA_files_for_paired_end_methylation_calls{
	my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid) = @_;
	### Processing the two FastA sequence files; we need the actual sequences of both reads to compare them against the genomic sequence in order to
	### make a methylation call. The sequence idetifier per definition needs to be the same for a sequence pair used for paired-end mapping.
	### Now reading in the sequence files sequence by sequence and see if the current sequences produced an alignment to one (or both) of the
	### converted genomes (either the C->T or G->A version)

	### gzipped version of the infiles
	if ($sequence_file_1 =~ /\.gz$/ and $sequence_file_2 =~ /\.gz$/){
		open (IN1,"gunzip -c $sequence_file_1 |") or die "Failed to open gunzip -c pipe to $sequence_file_1 $!\n";
		open (IN2,"gunzip -c $sequence_file_2 |") or die "Failed to open gunzip -c pipe to $sequence_file_2 $!\n";
	}
	else{
		open (IN1,$sequence_file_1) or die $!;
		open (IN2,$sequence_file_2) or die $!;
	}

	warn "\nReading in the sequence files $sequence_file_1 and $sequence_file_2\n";
	### Both files are required to have the exact same number of sequences, therefore we can process the sequences jointly one by one

	my $count = 0;

	while (1) {
		# reading from the first input file
		my $identifier_1 = <IN1>;
		my $sequence_1 = <IN1>;
		# reading from the second input file
		my $identifier_2 = <IN2>;
		my $sequence_2 = <IN2>;
		last unless ($identifier_1 and $sequence_1 and $identifier_2 and $sequence_2);

		chomp $sequence_1;
		chomp $identifier_1;
		chomp $sequence_2;
		chomp $identifier_2;
		
		$identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier_2 = fix_IDs($identifier_2);

		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$counting{sequences_count}++;
		if ($counting{sequences_count}%1000000==0) {
			warn "Processed $counting{sequences_count} sequence pairs so far\n";
		}
		my $orig_identifier_1 = $identifier_1;
		my $orig_identifier_2 = $identifier_2;

		$identifier_1 =~ s/^>//; # deletes the > at the beginning of FastA headers

		my $return = check_results_paired_end (uc$sequence_1,uc$sequence_2,$identifier_1);
		
		unless ($return){
		  $return = 0;
		}

		# print the sequences to ambiguous_1 and _2 if --ambiguous was specified
		if ($ambiguous and $return == 2){
			print AMBIG_1 "$orig_identifier_1\n";
			print AMBIG_1 "$sequence_1\n";
			print AMBIG_2 "$orig_identifier_2\n";
			print AMBIG_2 "$sequence_2\n";
		}

		# print the sequences to unmapped_1.out and unmapped_2.out if --un was specified
		elsif ($unmapped and $return == 1){
			print UNMAPPED_1 "$orig_identifier_1\n";
			print UNMAPPED_1 "$sequence_1\n";
			print UNMAPPED_2 "$orig_identifier_2\n";
			print UNMAPPED_2 "$sequence_2\n";
		}
	}

	warn "Processed $counting{sequences_count} sequences in total\n\n";
	close OUT or die $!;
	if ($ambiguous){
		close AMBIG_1 or warn "Failed to close filehandle AMBIG_1: $!";
		close AMBIG_2 or warn "Failed to close filehandle AMBIG_2: $!";
	}
	if ($unmapped){
		close UNMAPPED_1 or warn "Failed to close filehandle UNMAPPED_1: $!";
		close UNMAPPED_2 or warn "Failed to close filehandle UNMAPPED_2: $!";
	}
	print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid);

}

sub process_fastQ_files_for_paired_end_methylation_calls{
	
	my ($sequence_file_1,$sequence_file_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid) = @_;
	### Processing the two Illumina sequence files; we need the actual sequence of both reads to compare them against the genomic sequence in order to
	### make a methylation call. The sequence identifier per definition needs to be same for a sequence pair used for paired-end alignments.
	### Now reading in the sequence files sequence by sequence and see if the current sequences produced a paired-end alignment to one (or both)
	### of the converted genomes (either C->T or G->A version)

	### gzipped version of the infiles
	if ($sequence_file_1 =~ /\.gz$/ and $sequence_file_2 =~ /\.gz$/){
		open (IN1,"gunzip -c $sequence_file_1 |") or die "Failed to open gunzip -c pipe to $sequence_file_1 $!\n";
		open (IN2,"gunzip -c $sequence_file_2 |") or die "Failed to open gunzip -c pipe to $sequence_file_2 $!\n";
	}
	else{
		open (IN1,$sequence_file_1) or die $!;
		open (IN2,$sequence_file_2) or die $!;
	}	

	my $count = 0;

	warn "\nReading in the sequence files $sequence_file_1 and $sequence_file_2\n";
	### Both files are required to have the exact same number of sequences, therefore we can process the sequences jointly one by one
	while (1) {
		# reading from the first input file
		my $identifier_1 = <IN1>;
		my $sequence_1 = <IN1>;
		my $ident_1 = <IN1>;         # not needed
		my $quality_value_1 = <IN1>; # not needed
		# reading from the second input file
		my $identifier_2 = <IN2>;
		my $sequence_2 = <IN2>;
		my $ident_2 = <IN2>;         # not needed
		my $quality_value_2 = <IN2>; # not needed
		last unless ($identifier_1 and $sequence_1 and $quality_value_1 and $identifier_2 and $sequence_2 and $quality_value_2);
	
		chomp $sequence_1;
		chomp $identifier_1;
		chomp $sequence_2;
		chomp $identifier_2;
		chomp $quality_value_1;
		chomp $quality_value_2;
	
		$identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier_2 = fix_IDs($identifier_2);

		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$counting{sequences_count}++;
		if ($counting{sequences_count}%1000000==0) {
			warn "Processed $counting{sequences_count} sequence pairs so far\n";
		}

		my $orig_identifier_1 = $identifier_1;
		my $orig_identifier_2 = $identifier_2;

		$identifier_1 =~ s/^\@//;  # deletes the @ at the beginning of the FastQ ID

		my $return = check_results_paired_end (uc$sequence_1,uc$sequence_2,$identifier_1,$quality_value_1,$quality_value_2);
		
		unless ($return){
			$return = 0;
		}

		# print the sequences to ambiguous_1 and _2 if --ambiguous was specified
		if ($ambiguous and $return == 2){
			# seq_1
			print AMBIG_1 "$orig_identifier_1\n";
			print AMBIG_1 "$sequence_1\n";
			print AMBIG_1 $ident_1;
			print AMBIG_1 "$quality_value_1\n";
			# seq_2
			print AMBIG_2 "$orig_identifier_2\n";
			print AMBIG_2 "$sequence_2\n";
			print AMBIG_2 $ident_2;
			print AMBIG_2 "$quality_value_2\n";
		}

		# print the sequences to unmapped_1.out and unmapped_2.out if --un was specified
		elsif ($unmapped and $return == 1){
			# seq_1
			print UNMAPPED_1 "$orig_identifier_1\n";
			print UNMAPPED_1 "$sequence_1\n";
			print UNMAPPED_1 $ident_1;
			print UNMAPPED_1 "$quality_value_1\n";
			# seq_2
			print UNMAPPED_2 "$orig_identifier_2\n";
			print UNMAPPED_2 "$sequence_2\n";
			print UNMAPPED_2 $ident_2;
			print UNMAPPED_2 "$quality_value_2\n";
		}
	
	}

	warn "Processed $counting{sequences_count} sequences in total\n\n";

	close OUT or warn "Failed to close filehandle OUT: $!\n\n";
	if ($ambiguous){
		close AMBIG_1 or warn "Failed to close filehandle AMBIG_1: $!";
		close AMBIG_2 or warn "Failed to close filehandle AMBIG_2: $!";
	}
	if ($unmapped){
		close UNMAPPED_1 or warn "Failed to close filehandle UNMAPPED_1: $!";
		close UNMAPPED_2 or warn "Failed to close filehandle UNMAPPED_2: $!";
	}

	if ($ambig_bam){
		close AMBIBAM or warn "Had trouble closing filehandle AMBIBAM: $!\n\n";
	}

	print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2,$pid);

}

####################################################
### BOWTIE 2 // HISAT2 // MINIMAP2 // SINGLE-END ###
####################################################

sub check_results_single_end{
	my ($sequence,$identifier,$quality_value) = @_;
	# warn "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nCurrent sequence: $sequence\t$identifier\t$quality_value\n";
	# sleep(10);
	
	unless ($quality_value){ # FastA sequences get assigned a quality value of Phred 40 throughout
		$quality_value = 'I'x(length$sequence);
	}	
	# as of version Bowtie 2 2.0.0 beta7, when input reads are unpaired, Bowtie 2 no longer removes the trailing /1 or /2 from the read name.
	# $identifier =~ s/\/[1234567890]+$//; # some sequencers don't just have /1 or /2 at the end of read IDs
	# print "sequence $sequence\nid $identifier\nquality: '$quality_value'\n";
	

	my $alignment_ambiguous = 0;
	my $first_ambig_alignment; # storing the first ambiguous alignment so it can be written out in case '--ambig_bam' was specified
	my $best_AS_so_far;   ## we need to keep a memory of the best alignment score so far
	my $amb_same_thread = 0;   ## if a reads primary and secondary alignments have the same alignment score we set this to true.

	my %alignments = ();
  
	### reading from the Bowtie 2 output filehandles
	foreach my $index (0..$#fhs){
			# if ($fhs[$index]->{last_line} =~ /\d+S/){
			# print "Index: $index\n";
			# print "$fhs[$index]->{last_line}\n";
			# print "$fhs[$index]->{last_seq_id}\n";
			# }
		### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
		next unless ($fhs[$index]->{last_line} and defined $fhs[$index]->{last_seq_id});

		### if the sequence we are currently looking at produced an alignment we are doing various things with it
		# print "last seq id: $fhs[$index]->{last_seq_id} and identifier: $identifier\n";

		if ($fhs[$index]->{last_seq_id} eq $identifier) {
		
			my ($id,$flag,$mapped_chromosome,$position,$mapping_quality,$cigar,$bowtie_sequence,$qual) = (split (/\t/,$fhs[$index]->{last_line}))[0,1,2,3,4,5,9,10];
			## If a sequence has no reported alignments there will be a single output line with a bit-wise flag value of 4. We can store the next alignment and move on to the next Bowtie 2 instance
			if ($flag == 4){
				## reading in the next alignment, which must be the next sequence
				my $newline = $fhs[$index]->{fh}-> getline();
				if ($newline){
					chomp $newline;
					my ($seq_id) = split (/\t/,$newline);
					$fhs[$index]->{last_seq_id} = $seq_id;
					$fhs[$index]->{last_line} = $newline;
					if ($seq_id eq $identifier){
						die "Sequence with ID $identifier did not produce any alignment, but next seq-ID was also $fhs[$index]->{last_seq_id}!\n";
					}
					next; # next instance
				}
				else{
					# assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
					$fhs[$index]->{last_seq_id} = undef;
					$fhs[$index]->{last_line} = undef;
					next;
				}
			}

			# if there are one or more proper alignments we can extract the chromosome number
			my $chromosome;
			# warn "FLAG: $flag\nCHR: $mapped_chromosome\n";sleep(1);
			if ($mapped_chromosome =~ s/_(CT|GA)_converted$//){
				# warn "FLAG: $flag\nCHR: $mapped_chromosome\n";sleep(1);
				$chromosome = $mapped_chromosome;
			}
			else{
				die "Chromosome number extraction failed for $mapped_chromosome\n";
			}

			### We will use the optional field to determine the best alignment. Later on we extract the number of mismatches and/or indels from the CIGAR string
			my ($alignment_score,$second_best,$MD_tag);
			my @fields = split (/\t/,$fhs[$index]->{last_line});

			foreach (11..$#fields){
				#warn "$fields[$_]";
				if ($fields[$_] =~ /AS:i:(.*)/){
					$alignment_score = $1;
				}
				elsif ($fields[$_] =~ /ZS:i:(.*)/){
					$second_best = $1;
				}
				elsif ($fields[$_] =~ /MD:Z:(.*)/){
					$MD_tag = $1;
				}
				else{
					if ($bowtie2){
						if ($fields[$_] =~ /XS:i:(.*)/){
							$second_best = $1;
						}
						elsif ($fields[$_] =~ /ZS:i:(.*)/){ # HISAT2 uses ZS:i: instead of XS:i:
							$second_best = $1;
						}
					}
				}
			}
			#sleep(5);
			my $overwrite = 0; # If we get 2 alignments to the very same position, e.g. to OT with and AS of -156 and to CTOB with and AS of 0 we need the latter to trump the former, else
			# the read will be assigned to the wrong strand which may result in incorrect methylation calls.
			# this was brought to our attention by Sylvain Foret (ANU Canberra), 13 April 2016

			if (!defined $best_AS_so_far){
				$best_AS_so_far = $alignment_score;
				$overwrite++;
				# warn "First alignment score, setting \$best_AS_so_far to $best_AS_so_far\n";
				if ($ambig_bam){ # also setting the first_ambig_alignment
					$first_ambig_alignment = $fhs[$index]->{last_line};
					$first_ambig_alignment =~ s/_(CT|GA)_converted//;
					# warn "$first_ambig_alignment\n"; sleep(1);
				}
			}
			else{
				if ($alignment_score >= $best_AS_so_far){ # AS are generally negative with a maximum of 0;
					# 19 07 2016: changed this to >= so that equally good alignments are also added. Ambiguous alignments from different threads will be identified later on
					$overwrite++;
			  
					# 22 07 2016: resetting the ambiguous score within same thread only if the current alignment is really better than the previous one
					if ($alignment_score > $best_AS_so_far){
						# warn "Resetting amb within thread value to 0\n";
						$amb_same_thread = 0;
			  
						if ($ambig_bam){ # also setting a new first_ambig_alignment
							$first_ambig_alignment = $fhs[$index]->{last_line};
							$first_ambig_alignment =~ s/_(CT|GA)_converted//;
							# warn "$first_ambig_alignment\n"; sleep(1);
						}
					}
					$best_AS_so_far = $alignment_score; # 26 06 2017: moved this down so that the $amb_same_thread gets a chance to reset
					# warn "Found better or equal alignment score ($alignment_score), setting \$best_AS_so_far to $best_AS_so_far\n";
				}
				else{
					# warn "Current alignment (AS $alignment_score) isn't better than the best so far ($best_AS_so_far). Not changing anything\n";
				}
			}	

			# warn "First  best alignment_score is: '$alignment_score'\n";
			# warn "MD tag is: '$MD_tag'\n";
			die "Failed to extract alignment score ($alignment_score) and MD tag ($MD_tag) from line $fhs[$index]->{last_line}!\n" unless (defined $alignment_score and defined $MD_tag);
			
			if (defined $second_best){
				# warn "second best alignment_score is: '$second_best'\n\n";  sleep(1);

				# If the first alignment score is the same as the alignment score of the second best hit we keep a memory of this
				if ($alignment_score == $second_best){
	      
					# checking to see if this read produced the best alignment
					if ($alignment_score == $best_AS_so_far){  # yes this read is the best one so far, however it is ambiguous
						# warn "Read is ambiguous within the same thread, or otherwise as good as the best one so far. Setting \$amb_same_thread to 1 for currently best AS: $best_AS_so_far\n";
						$amb_same_thread = 1;
					}
					else{
						# warn "This read has a worse alignments score than the best alignment so far and will be ignored even though it is ambiguous in itself\n";
					}

					### if there is a better alignment later on -> fine. If not, the read will get booted altogether

					## need to read and discard all additional ambiguous reads until we reach the next sequence
					until ($fhs[$index]->{last_seq_id} ne $identifier){
						my $newline = $fhs[$index]->{fh}-> getline();
						if ($newline){
							chomp $newline;
							my ($seq_id) = split (/\t/,$newline);
							$fhs[$index]->{last_seq_id} = $seq_id;
							$fhs[$index]->{last_line} = $newline;
						}
						else{
							# assigning undef to last_seq_id and last_line and jumping to the next index (end of HISAT2 output)
							$fhs[$index]->{last_seq_id} = undef;
							$fhs[$index]->{last_line} = undef;
							last; # break free in case we have reached the end of the alignment output
						}
					}
					#  warn "Index: $index\tThe current Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
				}
				else{ # the next best alignment has a lower alignment score than the current read, so we can safely store the current alignment

					my $alignment_location = join (":",$chromosome,$position);

					### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
					### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
					### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
					### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 1, i.e. OT and OB

					if ($overwrite){
						$alignments{$alignment_location}->{seq_id} = $id;
						$alignments{$alignment_location}->{alignment_score} = $alignment_score;
						$alignments{$alignment_location}->{alignment_score_second_best} = $second_best;
						$alignments{$alignment_location}->{bowtie_sequence} = $bowtie_sequence;
						$alignments{$alignment_location}->{index} = $index;
						$alignments{$alignment_location}->{chromosome} = $chromosome;
						$alignments{$alignment_location}->{position} = $position;
						$alignments{$alignment_location}->{CIGAR} = $cigar;
						$alignments{$alignment_location}->{MD_tag} = $MD_tag;
					}	

					### now reading and discarding all (inferior) alignments of this sequencing read until we hit the next sequence
					until ($fhs[$index]->{last_seq_id} ne $identifier){
						my $newline = $fhs[$index]->{fh}-> getline();
						if ($newline){
							chomp $newline;
							my ($seq_id) = split (/\t/,$newline);
							$fhs[$index]->{last_seq_id} = $seq_id;
							$fhs[$index]->{last_line} = $newline;
						}
						else{
							# assigning undef to last_seq_id and last_line and jumping to the next index (end of HISAT2 output)
							$fhs[$index]->{last_seq_id} = undef;
							$fhs[$index]->{last_line} = undef;
							last; # break free in case we have reached the end of the alignment output
						}
					}
					#  warn "Index: $index\tThe current Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
				}
			}
			else{ # there is no second best hit, so we can just store this one and read in the next sequence
	
				my $alignment_location = join (":",$chromosome,$position);
				# warn "There is no second best hit. Overwrite status: $overwrite\n";
				### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
				### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
				### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
				### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 1, i.e. OT and OB

				if ($overwrite){
					$alignments{$alignment_location}->{seq_id} = $id;
					$alignments{$alignment_location}->{alignment_score} = $alignment_score;
					$alignments{$alignment_location}->{alignment_score_second_best} = undef;
					$alignments{$alignment_location}->{bowtie_sequence} = $bowtie_sequence;
					$alignments{$alignment_location}->{index} = $index;
					$alignments{$alignment_location}->{chromosome} = $chromosome;
					$alignments{$alignment_location}->{position} = $position;
					$alignments{$alignment_location}->{MD_tag} = $MD_tag;
					$alignments{$alignment_location}->{CIGAR} = $cigar;
				}
				### now reading and discarding all (inferior) alignments of this sequencing read until we hit the next sequence
				until ($fhs[$index]->{last_seq_id} ne $identifier){
					my $newline = $fhs[$index]->{fh}-> getline();
					if ($newline){
						# warn "$newline\n";
						chomp $newline;
						
						my ($seq_id) = split (/\t/,$newline);
						$fhs[$index]->{last_seq_id} = $seq_id;
						$fhs[$index]->{last_line} = $newline;
					}
					else{
						# assigning undef to last_seq_id and last_line and jumping to the next index (end of HISAT2 output)
						$fhs[$index]->{last_seq_id} = undef;
						$fhs[$index]->{last_line}   = undef;
						last; # break free in case we have reached the end of the alignment output
					}
				}
			}
		}
	}

	### If there were several equally good alignments for the best alignment score we will boot the read
	if ($amb_same_thread){
		$alignment_ambiguous = 1;
		# warn "\$alignment_ambiguous now: $alignment_ambiguous\n";
	}
	else{
		# warn "alignment won't be considered ambiguous. This time....\n";
	}


	### if the read produced several ambiguous alignments already now can returning already now. If --ambiguous or --unmapped was specified the read sequence will be printed out.
	if ($alignment_ambiguous == 1){
		$counting{unsuitable_sequence_count}++;
		### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
		# my $ambiguous_read_output = join("\t",$identifier,'256','*','0','0','*','*','0','0',$sequence,$quality_value);
		# print "$ambiguous_read_output\n";
	
		if ($ambig_bam){
			# warn "Sequence is ambiguous, printing out BAM file:\n";
			print AMBIBAM "$first_ambig_alignment\n";
		}

		if ($ambiguous){
			return 2; # => exits to next sequence, and prints it out to _ambiguous_reads.fq.gz if '--ambiguous' was specified
		}
		elsif ($unmapped){
			return 1; # => exits to next sequence, and prints it out to _unmapped_reads.fq.gz if '--unmapped' but not '--ambiguous' was specified
		}
		else{
			return 0;
		}
	}

	### if there was no alignment found for a certain sequence at all we continue with the next sequence in the sequence file
	unless(%alignments){
		$counting{no_single_alignment_found}++;
		# my $unmapped_read_output = join("\t",$identifier,'4','*','0','0','*','*','0','0',$sequence,$quality_value);
		# print  "$unmapped_read_output\n";
		if ($unmapped){
			return 1; # => exits to next sequence, and prints it out to _unmapped_reads.txt if '--unmapped' was specified
		}
		else{
			return 0; # default
		}
	}

	#######################################################################################################################################################
	
	### If the sequence was not rejected so far we are now looking if there is a unique best alignment among all alignment instances. If there is only one
	### single best position we are going to store the alignment information in the $meth_call variable. If there are multiple hits with the same (highest)
	### alignment score we are discarding the sequence altogether.
	### For end-to-end alignments the maximum alignment score can be 0, each mismatch can receive penalties up to 6, and each gap receives penalties for
	### opening (5) and extending (3 per bp) the gap.

	#######################################################################################################################################################

	my $methylation_call_params; # hash reference which will store all information we need for the methylation call
	my $sequence_fails = 0; # Going to use $sequence_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)

	### print contents of %alignments for debugging
	# if (scalar keys %alignments > 1){
	#     print "\n******\n";
	#     foreach my $alignment_location (sort {$a cmp $b} keys %alignments){
	#       print "Loc:  $alignment_location\n";
	#       print "ID:   $alignments{$alignment_location}->{seq_id}\n";
	#       print "AS:   $alignments{$alignment_location}->{alignment_score}\n";
	#       print "Seq:  $alignments{$alignment_location}->{bowtie_sequence}\n";
	#       print "Index $alignments{$alignment_location}->{index}\n";
	#       print "Chr:  $alignments{$alignment_location}->{chromosome}\n";
	#       print "pos:  $alignments{$alignment_location}->{position}\n";
	# 	      print "MD:   $alignments{$alignment_location}->{MD_tag}\n\n";
	#     }
	#     print "\n******\n";
	# 	  }

	### if there is only 1 entry in the hash with we accept it as the best alignment
	if (scalar keys %alignments == 1){
		for my $unique_best_alignment (keys %alignments){
			$methylation_call_params->{$identifier}->{bowtie_sequence} = $alignments{$unique_best_alignment}->{bowtie_sequence};
			$methylation_call_params->{$identifier}->{chromosome}      = $alignments{$unique_best_alignment}->{chromosome};
			$methylation_call_params->{$identifier}->{position}        = $alignments{$unique_best_alignment}->{position};
			$methylation_call_params->{$identifier}->{index}           = $alignments{$unique_best_alignment}->{index};
			$methylation_call_params->{$identifier}->{alignment_score} = $alignments{$unique_best_alignment}->{alignment_score};
			$methylation_call_params->{$identifier}->{alignment_score_second_best} = $alignments{$unique_best_alignment}->{alignment_score_second_best};
			$methylation_call_params->{$identifier}->{MD_tag}          = $alignments{$unique_best_alignment}->{MD_tag};
			$methylation_call_params->{$identifier}->{CIGAR}           = $alignments{$unique_best_alignment}->{CIGAR};
		}
	}

	### otherwise we are going to find out if there is a best match among the multiple alignments, or whether there are 2 or more equally good alignments (in which case
	### we boot the sequence altogether
	elsif (scalar keys %alignments >= 2  and scalar keys %alignments <= 4){
		my $best_alignment_score;
		my $best_alignment_location;
		foreach my $alignment_location (sort {$alignments{$b}->{alignment_score} <=> $alignments{$a}->{alignment_score}} keys %alignments){
			# print "$alignments{$alignment_location}->{alignment_score}\n";
			unless (defined $best_alignment_score){
				$best_alignment_score = $alignments{$alignment_location}->{alignment_score};
				$best_alignment_location = $alignment_location;
				# print "setting best alignment score: $best_alignment_score\n";
			}
			else{
				### if the second best alignment has the same alignment score as the first one, the sequence will get booted
				if ($alignments{$alignment_location}->{alignment_score} == $best_alignment_score){
					# warn "Same alignment score, the sequence will get booted!\n";
					$sequence_fails = 1;
					last; # exiting after the second alignment since we know that the sequence has ambiguous alignments
				}
				### else we are going to store the best alignment for further processing
				else{
					$methylation_call_params->{$identifier}->{bowtie_sequence} = $alignments{$best_alignment_location}->{bowtie_sequence};
					$methylation_call_params->{$identifier}->{chromosome}      = $alignments{$best_alignment_location}->{chromosome};
					$methylation_call_params->{$identifier}->{position}        = $alignments{$best_alignment_location}->{position};
					$methylation_call_params->{$identifier}->{index}           = $alignments{$best_alignment_location}->{index};
					$methylation_call_params->{$identifier}->{alignment_score} = $alignments{$best_alignment_location}->{alignment_score};
					$methylation_call_params->{$identifier}->{MD_tag}          = $alignments{$best_alignment_location}->{MD_tag};
					$methylation_call_params->{$identifier}->{CIGAR}           = $alignments{$best_alignment_location}->{CIGAR};
					
					if (defined $alignments{$best_alignment_location}->{alignment_score_second_best} and $alignments{$best_alignment_location}-> {alignment_score_second_best} > $alignments{$alignment_location}->{alignment_score}) {
						$methylation_call_params->{$identifier}->{alignment_score_second_best} = $alignments{$best_alignment_location}->{alignment_score_second_best};
					}
					else{
						$methylation_call_params->{$identifier}->{alignment_score_second_best} = $alignments{$alignment_location}->{alignment_score};
					}
					last; # exiting after processing the second alignment since the sequence produced a unique best alignment
				}
			}
		}
	}
	else{
		die "There are too many potential hits for this sequence (1-4 expected, but found: ",scalar keys %alignments,")\n";;
	}

	### skipping the sequence completely if there were multiple alignments with the same best alignment score at different positions
	if ($sequence_fails == 1){
		$counting{unsuitable_sequence_count}++;

		### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
		# my $ambiguous_read_output = join("\t",$identifier,'256','*','0','0','*','*','0','0',$sequence,$quality_value);
		# print OUT "$ambiguous_read_output\n";

		if ($ambiguous){
			return 2; # => exits to next sequence, and prints it out (in FastQ format) to _ambiguous_reads.txt if '--ambiguous' was specified
		}
		elsif ($unmapped){
			return 1; # => exits to next sequence, and prints it out (in FastQ format) to _unmapped_reads.txt if '--unmapped' but not '--ambiguous' was specified
		}
		else{
			return 0; # => exits to next sequence (default)
		}
	}

	### --DIRECTIONAL
	### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
	### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
	if ($directional){
		if ( ($methylation_call_params->{$identifier}->{index} == 2) or ($methylation_call_params->{$identifier}->{index} == 3) ){
			# warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
			$counting{alignments_rejected_count}++;
			return 0;
		}
	}

	### If the sequence has not been rejected so far it has a unique best alignment
	$counting{unique_best_alignment_count}++;

	### Now we need to extract a genomic sequence that exactly corresponds to the reported alignment. This potentially means that we need to deal with insertions or deletions as well
	extract_corresponding_genomic_sequence_single_end ($identifier,$methylation_call_params);

	### check test to see if the genomic sequence we extracted has the same length as the observed sequence+2, and only then we perform the methylation call
	if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence}) != length($sequence)+2){
		warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position}\n";
		$counting{genomic_sequence_could_not_be_extracted_count}++;
		return 0;
	}

	# Compute MAPQ value
	$methylation_call_params->{$identifier}->{mapq} = calc_mapq (length($sequence), undef,
							       $methylation_call_params->{$identifier}->{alignment_score},
							       $methylation_call_params->{$identifier}->{alignment_score_second_best});

	### otherwise we are set to perform the actual methylation call
	
	if ($slam){
		$methylation_call_params->{$identifier}->{methylation_call} = methylation_call_slam($identifier,$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{read_conversion});
	}
	else{
		$methylation_call_params->{$identifier}->{methylation_call} = methylation_call($identifier,$sequence,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence},$methylation_call_params->{$identifier}->{read_conversion});
	}
	
	print_bisulfite_mapping_result_single_end ($identifier,$sequence,$methylation_call_params,$quality_value);

	return 0; ## if a sequence got this far we do not want to print it to unmapped or ambiguous.out

}


sub methylation_call_slam{
	my ($identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion) = @_;
	### splitting both the actually observed sequence and the genomic sequence up into single bases so we can compare them one by one
	# warn "SLAM-SEQ CALL\n\n";sleep (1);
	my @seq = split(//,$sequence_actually_observed);
	my @genomic = split(//,$genomic_sequence);
	#  print join ("\n",$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion),"\n";
	### Creating a match-string with different characters for non-cytosine bases (disregarding mismatches here), methyl-Cs or non-methyl Cs in either
	### CpG, CHH or CHG context

	### SLAM-SEQ: HERE we are not interested in cytosines and their context, but simply the "methylation" state of Ts
	#################################################################
	### . for bases not involving cytosines                       ###
	### X for methylated C in CHG context (was protected)         ###
	### x for not methylated C in CHG context (was converted)     ###
	### H for methylated C in CHH context (was protected)         ###
	### h for not methylated C in CHH context (was converted)     ###
	### Z for methylated C in CpG context (was protected)         ###
	### z for not methylated C in CpG context (was converted)     ###
	### U for methylated C in unknown context (was protected)     ###
	### u for not methylated C in unknwon context (was converted) ###
	#################################################################

	my @match =();
	warn "length of \@seq: ",scalar @seq,"\tlength of \@genomic: ",scalar @genomic,"\n" unless (scalar @seq eq (scalar@genomic-2)); ## CHH changed to -2
	my $methyl_CHH_count = 0;
	my $methyl_CHG_count = 0;
	my $methyl_CpG_count = 0;
	my $methyl_C_unknown_count = 0;
	my $unmethylated_CHH_count = 0;
	my $unmethylated_CHG_count = 0;
	my $unmethylated_CpG_count = 0;
	my $unmethylated_C_unknown_count = 0;
	# 	print join ("\n",$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion),"\n~~~~\n"; sleep(1);
	if ($read_conversion eq 'CT'){
		for my $index (0..$#seq) {
			if ($seq[$index] eq $genomic[$index]) {
				### For SLAM-Seq we are only interested Ts. If the was T was not converted to C the RNA was not newly synthesized
				if ($genomic[$index] eq 'T') {
					++$unmethylated_CpG_count;
					push @match,'z';
				}
				else {
					push @match, '.';
				}
			}
			elsif ($seq[$index] ne $genomic[$index]) {
				if ($genomic[$index] eq 'T' and $seq[$index] eq 'C') {
					++$methyl_CpG_count;
					push @match,'Z'; # converted C, not methylated, in CpG context
				}
				### all other mismatches are not of interest for a methylation call
				else {
					push @match,'.';
				}	
			}
			else{
				die "There can be only 2 possibilities\n";
			}
		}
	}
	elsif ($read_conversion eq 'GA'){
		# print join ("\n",'***',$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion,'***'),"\n";

		for my $index (0..$#seq) {
			if ($seq[$index] eq $genomic[$index+2]) {
				### The residue can only be an A if the T on the other strand was not converted to C, i.e. not newly synthesized
				if ($genomic[$index+2] eq 'A') {
					++$unmethylated_CpG_count;
					push @match,'z';
				}
				else{
					push @match, '.';
				}
			}
			elsif ($seq[$index] ne $genomic[$index+2]) {
				if ($genomic[$index+2] eq 'A' and $seq[$index] eq 'G') {
					++$methyl_CpG_count;
					push @match,'Z'; # converted C on opposing strand, not methylated, in CpG context
				}
				### all other mismatches are not of interest for a methylation call
				else {
					push @match,'.';
				}
			}
			else{
				die "There can be only 2 possibilities\n";
			}
		}
	}
	else{
		die "Strand conversion info is required to perform a methylation call\n";
	}

	my $methylation_call = join ("",@match);
	
	$counting{total_meCHH_count} += $methyl_CHH_count;
	$counting{total_meCHG_count} += $methyl_CHG_count;
	$counting{total_meCpG_count} += $methyl_CpG_count;
	$counting{total_meC_unknown_count} += $methyl_C_unknown_count;
	$counting{total_unmethylated_CHH_count} += $unmethylated_CHH_count;
	$counting{total_unmethylated_CHG_count} += $unmethylated_CHG_count;
	$counting{total_unmethylated_CpG_count} += $unmethylated_CpG_count;
	$counting{total_unmethylated_C_unknown_count} += $unmethylated_C_unknown_count;	

	# print "\n$sequence_actually_observed\n$genomic_sequence\n",@match,"\n$read_conversion\n\n";

	return $methylation_call;
}


########################################
### BOWTIE 2 // HISAT2 // PAIRED-END ###
########################################

sub check_results_paired_end{

    my ($sequence_1,$sequence_2,$identifier,$quality_value_1,$quality_value_2) = @_;
    
    ### quality values are not given for FastA files, so they are initialised with a Phred quality of 40
    unless ($quality_value_1){
		$quality_value_1 = 'I'x(length$sequence_1);
    }

    unless ($quality_value_2){
		$quality_value_2 = 'I'x(length$sequence_2);
    }
    # print "Read ID:$identifier\nLast ID [0]: $fhs[0]->{last_seq_id}\nLast ID [1]: $fhs[1]->{last_seq_id}\nLast ID [2]: $fhs[2]->{last_seq_id}\nLast ID [3]: $fhs[3]->{last_seq_id}\n\n"; sleep(1);
    
    my %alignments;
    my $alignment_ambiguous = 0;
    
    my $first_ambig_alignment_line1; # storing the first ambiguous alignment so it can be written out in case '--ambig_bam' was specified R1
    my $first_ambig_alignment_line2; # R2
    
    my $best_AS_so_far;   ## we need to keep a memory of the best alignment score so far
    my $amb_same_thread = 0;   ## if a read's primary and secondary alignments have the same alignment score we set this to true.

    ### reading from the Bowtie 2 output filehandles
    
    ### for paired end reads we are reporting alignments to the OT strand first (index 0), then the OB strand (index 3!!), similiar to the single end way.
    ### alignments to the complementary strands are reported afterwards (CTOT got index 1, and CTOB got index 2).
    ### This is needed so that alignments which either contain no single C or G or reads which contain only protected Cs are reported to the original strands (OT and OB)
    ### Before the complementary strands. Remember that it does not make any difference for the methylation calls, but it will matter if alignments to the complementary
    ### strands are not being reported when '--directional' is specified

    foreach my $index (0,3,1,2){
		### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
		next unless ($fhs[$index]->{last_line_1} and $fhs[$index]->{last_line_2} and defined $fhs[$index]->{last_seq_id});

		### if the sequence pair we are currently looking at produced an alignment we are doing various things with it
		if ($fhs[$index]->{last_seq_id} eq $identifier) {
			my ($id_1,$flag_1,$mapped_chromosome_1,$position_1,$mapping_quality_1,$cigar_1,$bowtie_sequence_1,$qual_1) = (split (/\t/,$fhs[$index]->{last_line_1}))[0,1,2,3,4,5,9,10];
			my ($id_2,$flag_2,$mapped_chromosome_2,$position_2,$mapping_quality_2,$cigar_2,$bowtie_sequence_2,$qual_2) = (split (/\t/,$fhs[$index]->{last_line_2}))[0,1,2,3,4,5,9,10];
			# print "Index: $index\t$fhs[$index]->{last_line_1}\n";
			# print "Index: $index\t$fhs[$index]->{last_line_2}\n";
			# print join ("\t",$id_1,$flag_1,$mapped_chromosome_1,$position_1,$mapping_quality_1,$cigar_1,$bowtie_sequence_1,$qual_1),"\n";
			# print join ("\t",$id_2,$flag_2,$mapped_chromosome_2,$position_2,$mapping_quality_2,$cigar_2,$bowtie_sequence_2,$qual_2),"\n";
			$id_1 =~ s/\/1$//;
			$id_2 =~ s/\/2$//;

			### If a sequence has no reported alignments there will be a single output line per sequence with a bit-wise flag value of 77 for read 1 (1+4+8+64), or 141 for read 2 (1+4+8+128).
			### We can store the next alignment and move on to the next Bowtie 2 instance
			if ($flag_1 == 77 and $flag_2 == 141){

				## reading in the next alignment, which must be the next sequence
				my $newline_1 = $fhs[$index]->{fh}-> getline();
				my $newline_2 = $fhs[$index]->{fh}-> getline();
		
				if ($newline_1 and $newline_2){
					chomp $newline_1;
					chomp $newline_2;
					my ($seq_id_1) = split (/\t/,$newline_1);
					my ($seq_id_2) = split (/\t/,$newline_2);
					$seq_id_1 =~ s/\/1$//;
					$seq_id_2 =~ s/\/2$//;
					$fhs[$index]->{last_seq_id} = $seq_id_1;
					$fhs[$index]->{last_line_1} = $newline_1;
					$fhs[$index]->{last_line_2} = $newline_2;
		    
					#  print "current sequence ($identifier) did not map, reading in next sequence\n";
					#  print "$index\t$fhs[$index]->{last_seq_id}\n";
					#  print "$index\t$fhs[$index]->{last_line_1}\n";
					#  print "$index\t$fhs[$index]->{last_line_2}\n";
					next; # next instance
				}
				else{
					# assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
					$fhs[$index]->{last_seq_id} = undef;
					$fhs[$index]->{last_line_1} = undef;
					$fhs[$index]->{last_line_2} = undef;
					next;
				}
			}
	    
			### If there are one or more proper alignments we can extract the chromosome number
			my ($chromosome_1,$chromosome_2);
			if ($mapped_chromosome_1 =~ s/_(CT|GA)_converted$//){
				$chromosome_1 = $mapped_chromosome_1;
			}
			else{
				die "Chromosome number extraction failed for $mapped_chromosome_1\n";
			}
			if ($mapped_chromosome_2 =~ s/_(CT|GA)_converted$//){
				$chromosome_2 = $mapped_chromosome_2;
			}
			else{
				die "Chromosome number extraction failed for $mapped_chromosome_2\n";
			}

			die "Paired-end alignments need to be on the same chromosome\n" unless ($chromosome_1 eq $chromosome_2);

			### We will use the optional fields to determine the best alignments. Later on we extract the number of mismatches and/or indels from the CIGAR string
			my ($alignment_score_1,$alignment_score_2,$second_best_1,$second_best_2,$MD_tag_1,$MD_tag_2);

			my @fields_1 = split (/\t/,$fhs[$index]->{last_line_1});
			my @fields_2 = split (/\t/,$fhs[$index]->{last_line_2});

			foreach (11..$#fields_1){
				if ($fields_1[$_] =~ /AS:i:(.*)/){
					$alignment_score_1 = $1;
				}
				elsif ($fields_1[$_] =~ /XS:i:(.*)/){
					$second_best_1 = $1;
				}
				elsif ($fields_1[$_] =~ /MD:Z:(.*)/){
					$MD_tag_1 = $1;
				}
			}

			foreach (11..$#fields_2){
				if ($fields_2[$_] =~ /AS:i:(.*)/){
					$alignment_score_2 = $1;
				}
				elsif ($fields_2[$_] =~ /MD:Z:(.*)/){
					$MD_tag_2 = $1;
				}
				else{
					if ($bowtie2){
						if ($fields_2[$_] =~ /XS:i:(.*)/){
							$second_best_2 = $1;
						}
					}	
					else{ # HISAT2 uses the ZS tag instead
						if($fields_2[$_] =~ /ZS:i:(.*)/){ 
							$second_best_2 = $1;
						}
					}
				}
			}

			die "Failed to extract alignment score 1 ($alignment_score_1) and MD tag ($MD_tag_1)!\nlast alignment 1: $fhs[$index]->{last_line_1}\nlast alignment 2: $fhs[$index]->{last_line_2}\n" unless (defined $alignment_score_1 and defined $MD_tag_1);
			die "Failed to extract alignment score 2 ($alignment_score_2) and MD tag ($MD_tag_2)!\nlast alignment 1: $fhs[$index]->{last_line_1}\nlast alignment 2: $fhs[$index]->{last_line_2}\n" unless (defined $alignment_score_2 and defined $MD_tag_2);

			# warn "First read 1 alignment score is: '$alignment_score_1'\n";
			# warn "First read 2 alignment score is: '$alignment_score_2'\n";
			# warn "XS/ZS tag 1 is: '$second_best_1'\n";
			# warn "XS/ZS tag 2 is: '$second_best_2'\n";
			# warn "MD tag 1 is: '$MD_tag_1'\n";
			# warn "MD tag 2 is: '$MD_tag_2'\n";

			### To decide whether a sequence pair has a unique best alignment we will look at the highest sum of alignment scores from both alignments
			my $sum_of_alignment_scores_1 = $alignment_score_1 + $alignment_score_2 ;
			# warn "sum of alignment scores: $sum_of_alignment_scores_1\n\n"; sleep(1);

			my $overwrite = 0; # If there are 2 alternative alignments to the same position, e.g. OT with 50 mismatches and CTOB with 0 mismatches, the CTOB one trumps the OT one.
			# introduced 13 April 2016 as a suggestion by Sylvain Foret, ANU Canberra

			if (!defined $best_AS_so_far){
				$overwrite = 1;
				$best_AS_so_far = $sum_of_alignment_scores_1;
				# warn "First alignment score, setting \$best_AS_so_far to $best_AS_so_far\n";
				if ($ambig_bam){ # also setting the first_ambig_alignment
					# Read 1
					$first_ambig_alignment_line1 = $fhs[$index]->{last_line_1};
					$first_ambig_alignment_line1 =~ s/_(CT|GA)_converted//;
					# Read 2
					$first_ambig_alignment_line2 = $fhs[$index]->{last_line_2};
					$first_ambig_alignment_line2 =~ s/_(CT|GA)_converted//;
					# warn "$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n\n"; sleep(1);
				}
			}
			else{
				if ($sum_of_alignment_scores_1 >= $best_AS_so_far){ # AS are generally negative with a maximum of 0
					# 19 07 2016 Changed to >= so that equally good alignments to different positions get added as well. Ambiguous alignments are identified and removed later.
					$overwrite = 1;

					# resetting the ambiguous within thread memory (if applicable at all) only if the current alignment is really better than the previous one.
					# 22 07 2016: ambiguous score within same thread only resets if the current alignment is really better than the previous one
					if ($sum_of_alignment_scores_1 > $best_AS_so_far){
						# warn "Resetting amb within thread value to 0\n";
						$amb_same_thread = 0;
		    
						if ($ambig_bam){ # also setting a new first_ambig_alignment
							# Read 1
							$first_ambig_alignment_line1 = $fhs[$index]->{last_line_1};
							$first_ambig_alignment_line1 =~ s/_(CT|GA)_converted//;
							# Read 2
							$first_ambig_alignment_line2 = $fhs[$index]->{last_line_2};
							$first_ambig_alignment_line2 =~ s/_(CT|GA)_converted//;
							# warn "$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n\n"; sleep(1);
						}
					}
					$best_AS_so_far = $sum_of_alignment_scores_1; # moved this down so that $amb_within_thread gets a chance to be reset
					# warn "Found better or equal sum of alignment scores ($sum_of_alignment_scores_1), setting \$best_AS_so_far to $best_AS_so_far\n";
				}
				else{
					# warn "current alignment (AS $sum_of_alignment_scores) isn't better than the best so far ($best_AS_so_far). Not changing anything\n";
				}		
			}
	    
			# If either of the reads has a second best alignment but the other one doesn't we assign a value of the best alignment, i.e. the AS score
			if (defined $second_best_1 or defined $second_best_2){    
				unless (defined $second_best_1){
					$second_best_1 = $alignment_score_1;
				}
				unless (defined $second_best_2){
					$second_best_2 = $alignment_score_2;
				}
				# warn "\n\n#############################\n\nXS Read 1: $second_best_1\nXS Read 2: $second_best_2\n\n##########################\n\n";
			}
	    
			if (defined $second_best_1 and defined $second_best_2){
				my $sum_of_alignment_scores_second_best = $second_best_1 + $second_best_2;
				# warn "Second best alignment_score_1 is: '$second_best_1'\n";
				# warn "Second best alignment_score_2 is: '$second_best_2'\n";
				# warn "Second best alignment sum of alignment scores is: '$sum_of_alignment_scores_second_best'\n";
	    
				# If the first alignment score for the first read pair is the same as the alignment score of the second best hit we we keep a memory of this
				if ($sum_of_alignment_scores_1 == $sum_of_alignment_scores_second_best){
					# checking to see if this read pair produced the best alignment
					if ($sum_of_alignment_scores_1 == $best_AS_so_far){  # yes this is the best read pair so far, either within the thread or between threads, however it is ambiguous
						#warn "Read pair is ambiguous within the same thread, or otherwise as good as the best one so far. Setting \$amb_same_thread to 1 for currently best AS: $best_AS_so_far\n";
						$amb_same_thread = 1;
					}
					else{
						# warn "This read pair has a worse alignment score than the best alignment so far and will be ignored even though it is ambiguous in itself\n";
					}

					### if there is a better alignment later on -> fine. If not, the read will get booted altogether one way or another

					## need to read and discard all additional ambiguous reads until we reach the next sequence
					until ($fhs[$index]->{last_seq_id} ne $identifier){
						my $newline_1 = $fhs[$index]->{fh}-> getline();
						my $newline_2 = $fhs[$index]->{fh}-> getline();
						if ($newline_1 and $newline_2){
							chomp $newline_1;
							chomp $newline_2;
							my ($seq_id_1) = split (/\t/,$newline_1);
							my ($seq_id_2) = split (/\t/,$newline_2);
							$seq_id_1 =~ s/\/1$//;
							$seq_id_2 =~ s/\/2$//;
							# print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";

							$fhs[$index]->{last_seq_id} = $seq_id_1;
							$fhs[$index]->{last_line_1} = $newline_1;
							$fhs[$index]->{last_line_2} = $newline_2;
						}
						else{
							# assigning undef to last_seq_id and last_line and jumping to the next index (end of Bowtie 2 output)
							$fhs[$index]->{last_seq_id} = undef;
							$fhs[$index]->{last_line_1} = undef;
							$fhs[$index]->{last_line_2} = undef;
							last; # break free if the end of the alignment output was reached
						}
					}
				}
				#  if ($fhs[$index]->{last_seq_id}){
				#    warn "Index: $index\tThis Seq-ID is $identifier, skipped all ambiguous sequences until the next ID which is: $fhs[$index]->{last_seq_id}\n";
				#  }
			    else{ # the next best alignment has a lower alignment score than the current read, so we can safely store the current alignment

					my $alignment_location;
					if ($position_1 <= $position_2){
						$alignment_location = join(":",$chromosome_1,$position_1,$position_2);
					}
					elsif($position_2 < $position_1){
						$alignment_location = join(":",$chromosome_1,$position_2,$position_1);
					}

					### If a sequence aligns to exactly the same location twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
					### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
					### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
					### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 3, i.e. OT and OB

					if ($overwrite){ # see comment above at "my $overwrite = ..."
						#unless (exists $alignments{$alignment_location}){
						$alignments{$alignment_location}->{seq_id} = $id_1;
						$alignments{$alignment_location}->{alignment_score_1} = $alignment_score_1;
						$alignments{$alignment_location}->{alignment_score_2} = $alignment_score_2;
						$alignments{$alignment_location}->{sum_of_alignment_scores} = $sum_of_alignment_scores_1;
						$alignments{$alignment_location}->{sum_of_alignment_scores_second_best} = $sum_of_alignment_scores_second_best;
						$alignments{$alignment_location}->{bowtie_sequence_1} = $bowtie_sequence_1;
						$alignments{$alignment_location}->{bowtie_sequence_2} = $bowtie_sequence_2;
						$alignments{$alignment_location}->{index} = $index;
						$alignments{$alignment_location}->{chromosome} = $chromosome_1; # either is fine
						$alignments{$alignment_location}->{position_1} = $position_1;
						$alignments{$alignment_location}->{position_2} = $position_2;
						$alignments{$alignment_location}->{mismatch_info_1} = $MD_tag_1;
						$alignments{$alignment_location}->{mismatch_info_2} = $MD_tag_2;
						$alignments{$alignment_location}->{CIGAR_1} = $cigar_1;
						$alignments{$alignment_location}->{CIGAR_2} = $cigar_2;
						$alignments{$alignment_location}->{flag_1} = $flag_1;
						$alignments{$alignment_location}->{flag_2} = $flag_2;
						# warn "added best of several alignments to \%alignments hash\n";
					}

					### now reading and discarding all (inferior) alignments of this read pair until we hit the next sequence
					until ($fhs[$index]->{last_seq_id} ne $identifier){
						my $newline_1 = $fhs[$index]->{fh}-> getline();
						my $newline_2 = $fhs[$index]->{fh}-> getline();
						if ($newline_1 and $newline_2){
							chomp $newline_1;
							chomp $newline_2;
							my ($seq_id_1) = split (/\t/,$newline_1);
							my ($seq_id_2) = split (/\t/,$newline_2);
							$seq_id_1 =~ s/\/1$//;
							$seq_id_2 =~ s/\/2$//;
							# print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";

							$fhs[$index]->{last_seq_id} = $seq_id_1;
							$fhs[$index]->{last_line_1} = $newline_1;
							$fhs[$index]->{last_line_2} = $newline_2;
						}
						else{
							# assigning undef to last_seq_id and last_line_1 and _2 and jumping to the next index (end of Bowtie 2 output)
							$fhs[$index]->{last_seq_id} = undef;
							$fhs[$index]->{last_line_1} = undef;
							$fhs[$index]->{last_line_2} = undef;
							last; # break free if the end of the alignment output was reached
						}
					}
			# if($fhs[$index]->{last_seq_id}){
			#   warn "Index: $index\tThis Seq-ID is $identifier, skipped all other alignments until the next ID was reached which is: $fhs[$index]->{last_seq_id}\n";
			# }
				}
			}	
			else{ # there is no second best hit, so we can just store this one and read in the next sequence

				my $alignment_location = join(":",$chromosome_1,$position_1,$position_2);
					# print "$alignment_location\n";
				### If a sequence aligns to exactly the same location with a perfect match twice the sequence does either not contain any C or G, or all the Cs (or Gs on the reverse
				### strand) were methylated and therefore protected. Alternatively it will align better in one condition than in the other. In any case, it is not needed to overwrite
				### the same positional entry with a second entry for the same location, as the genomic sequence extraction and methylation call would not be affected by this. The only
				### thing which would change is the index number for the found alignment). We will continue to assign these alignments to the first indexes 0 and 3, i.e. OT and OB

				#unless (exists $alignments{$alignment_location}){ # see comment above at my $overwrite = ...
				if ($overwrite){
					$alignments{$alignment_location}->{seq_id} = $id_1;
					$alignments{$alignment_location}->{alignment_score_1} = $alignment_score_1;
					$alignments{$alignment_location}->{alignment_score_2} = $alignment_score_2;
					$alignments{$alignment_location}->{sum_of_alignment_scores} = $sum_of_alignment_scores_1;
					$alignments{$alignment_location}->{sum_of_alignment_scores_second_best} = undef;
					$alignments{$alignment_location}->{bowtie_sequence_1} = $bowtie_sequence_1;
					$alignments{$alignment_location}->{bowtie_sequence_2} = $bowtie_sequence_2;
					$alignments{$alignment_location}->{index} = $index;
					$alignments{$alignment_location}->{chromosome} = $chromosome_1; # either is fine
					$alignments{$alignment_location}->{position_1} = $position_1;
					$alignments{$alignment_location}->{position_2} = $position_2;
					$alignments{$alignment_location}->{mismatch_info_1} = $MD_tag_1;
					$alignments{$alignment_location}->{mismatch_info_2} = $MD_tag_2;
					$alignments{$alignment_location}->{CIGAR_1} = $cigar_1;
					$alignments{$alignment_location}->{CIGAR_2} = $cigar_2;
					$alignments{$alignment_location}->{flag_1} = $flag_1;
					$alignments{$alignment_location}->{flag_2} = $flag_2;
					# warn "added unique alignment to \%alignments hash\n";
				}

				# Now reading and storing the next read pair
				until ($fhs[$index]->{last_seq_id} ne $identifier){
					my $newline_1 = $fhs[$index]->{fh}-> getline();
					my $newline_2 = $fhs[$index]->{fh}-> getline();
					if ($newline_1 and $newline_2){
						chomp $newline_1;
						chomp $newline_2;
						# print "$newline_1\n";
						# print "$newline_2\n";
						my ($seq_id_1) = split (/\t/,$newline_1);
						my ($seq_id_2) = split (/\t/,$newline_2);
						$seq_id_1 =~ s/\/1$//;
						$seq_id_2 =~ s/\/2$//;
						# print "New Seq IDs:\t$seq_id_1\t$seq_id_2\n";

						$fhs[$index]->{last_seq_id} = $seq_id_1;
						$fhs[$index]->{last_line_1} = $newline_1;
						$fhs[$index]->{last_line_2} = $newline_2;
					}
					else{
						# assigning undef to last_seq_id and last_line_1 and _2 and jumping to the next index (end of Bowtie 2 output)
						$fhs[$index]->{last_seq_id} = undef;
						$fhs[$index]->{last_line_1} = undef;
						$fhs[$index]->{last_line_2} = undef;
						last; # break free if the end of the alignment output was reached
					}
				}
			}
		}
	}

	### If there were several equally good alignments for the best alignment score we will boot the read
	if ($amb_same_thread){
		# warn "\$alignment_ambiguous now: $alignment_ambiguous\n";
		$alignment_ambiguous = 1;
		# warn "\$alignment_ambiguous now: $alignment_ambiguous\n";
	}
	else{
		# warn "alignment won't be considered ambiguous. This time....\n";
	}


	### if the read produced several ambiguous alignments for a single instance of Bowtie 2 we can return already now. If --ambiguous was specified the read sequence will be printed out in FastQ format
	if ($alignment_ambiguous == 1){
		$counting{unsuitable_sequence_count}++;
		### report that the sequence pair has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
		#  my $ambiguous_read_1 = join("\t",$identifier.'/1','256','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
		#  my $ambiguous_read_2 = join("\t",$identifier.'/2','256','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
		#  print "$ambiguous_read_1\n";
		#  print "$ambiguous_read_2\n";

		if ($ambig_bam){
			# warn "Sequence is ambiguous, printing out to ambiguous BAM file:\n";
			# replacing the first /1\t in the ID of R1
			# warn "Was\n$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n";
			$first_ambig_alignment_line1 =~ s/\/1\t/\t/;
			$first_ambig_alignment_line2 =~ s/\/2\t/\t/;
			# warn "Is:\n$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n\n";

			print AMBIBAM "$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n";
			# print "$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n";
		}

		if ($ambiguous){
			return 2; # => exits to next sequence pair, and prints it out to _ambiguous_reads_1.txt and _ambiguous_reads_2.txt if '--ambiguous' was specified
		}
		elsif ($unmapped){
			return 1; # => exits to next sequence pair, and prints it out to _unmapped_reads_1.txt and _unmapped_reads_2.txt if '--unmapped' but not '--ambiguous' was specified
		}
		else{
			return 0;
		}
	}

	### if no alignment was found for a certain sequence at all we continue with the next sequence in the sequence file
	unless (%alignments){
		$counting{no_single_alignment_found}++;

		# my $unmapped_read_1 = join("\t",$identifier.'/1','77','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
		# my $unmapped_read_2 = join("\t",$identifier.'/2','141','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
		# print "$unmapped_read_1\n";
		# print "$unmapped_read_2\n";
		if ($unmapped){
			return 1; # => exits to next sequence pair, and prints it out to _unmapped_reads_1.txt and _unmapped_read_2.txt if '--unmapped' was specified
		}
		else{
			return 0;
		}
	}

	#######################################################################################################################################################
	
	### If the sequence pair was not rejected so far we are now looking if there is a unique best alignment among all alignment instances. If there is only one
	### single best position we are going to store the alignment information in the $meth_call variable. If there are multiple hits with the same (highest)
	### alignment score we are discarding the sequence pair altogether.
	### For end-to-end alignments the maximum alignment score is 0, each mismatch receives a penalty of 6, and each gap receives penalties for opening (5)
	### and extending (3 per bp) the gap.

	#######################################################################################################################################################

	### Declaring an empty hash reference which will store all information we need for the methylation call
	my $methylation_call_params; # hash reference
	my $sequence_pair_fails = 0; # using $sequence_pair_fails as a 'memory' if a sequence could not be aligned uniquely (set to 1 then)

	  ### print contents of %alignments for debugging
	  ##  if (scalar keys %alignments >= 1){
	  #     print "\n******\n";
	  #     foreach my $alignment_location (sort {$a cmp $b} keys %alignments){
	  #       print "Loc:  $alignment_location\n";
	  #       print "ID:      $alignments{$alignment_location}->{seq_id}\n";
	  #       print "AS_1:    $alignments{$alignment_location}->{alignment_score_1}\n";
	  #       print "AS_2:    $alignments{$alignment_location}->{alignment_score_2}\n";
	  #       print "Seq_1:   $alignments{$alignment_location}->{bowtie_sequence_1}\n";
	  #       print "Seq_2:   $alignments{$alignment_location}->{bowtie_sequence_2}\n";
	  #       print "Index    $alignments{$alignment_location}->{index}\n";
	  #       print "Chr:     $alignments{$alignment_location}->{chromosome}\n";
	  #       print "Pos_1:   $alignments{$alignment_location}->{position_1}\n";
	  #       print "Pos_2:   $alignments{$alignment_location}->{position_2}\n";
	  #       print "CIGAR_1: $alignments{$alignment_location}->{CIGAR_1}\n";
	  #       print "CIGAR_2: $alignments{$alignment_location}->{CIGAR_2}\n";
	  #       print "MD_1:    $alignments{$alignment_location}->{mismatch_info_1}\n";
	  #       print "MD_2:    $alignments{$alignment_location}->{mismatch_info_2}\n";
	  #       print "Flag 1:  $alignments{$alignment_location}->{flag_1}\n";
	  #       print "Flag 2:  $alignments{$alignment_location}->{flag_2}\n";
	  #    }
	  #    print "\n******\n";
	  #  }

	## if there is only 1 entry in the %alignments hash we accept it as the best alignment
	if (scalar keys %alignments == 1){
		for my $unique_best_alignment (keys %alignments){
			$methylation_call_params->{$identifier}->{bowtie_sequence_1} = $alignments{$unique_best_alignment}->{bowtie_sequence_1};
			$methylation_call_params->{$identifier}->{bowtie_sequence_2} = $alignments{$unique_best_alignment}->{bowtie_sequence_2};
			$methylation_call_params->{$identifier}->{chromosome}        = $alignments{$unique_best_alignment}->{chromosome};
			$methylation_call_params->{$identifier}->{position_1}        = $alignments{$unique_best_alignment}->{position_1};
			$methylation_call_params->{$identifier}->{position_2}        = $alignments{$unique_best_alignment}->{position_2};
			$methylation_call_params->{$identifier}->{index}             = $alignments{$unique_best_alignment}->{index};
			$methylation_call_params->{$identifier}->{alignment_score_1} = $alignments{$unique_best_alignment}->{alignment_score_1};
			$methylation_call_params->{$identifier}->{alignment_score_2} = $alignments{$unique_best_alignment}->{alignment_score_2};
			$methylation_call_params->{$identifier}->{sum_of_alignment_scores} = $alignments{$unique_best_alignment}->{sum_of_alignment_scores};
			$methylation_call_params->{$identifier}->{sum_of_alignment_scores_second_best} = $alignments{$unique_best_alignment}->{sum_of_alignment_scores_second_best};
			$methylation_call_params->{$identifier}->{mismatch_info_1}   = $alignments{$unique_best_alignment}->{mismatch_info_1};
			$methylation_call_params->{$identifier}->{mismatch_info_2}   = $alignments{$unique_best_alignment}->{mismatch_info_2};
			$methylation_call_params->{$identifier}->{CIGAR_1}           = $alignments{$unique_best_alignment}->{CIGAR_1};
			$methylation_call_params->{$identifier}->{CIGAR_2}           = $alignments{$unique_best_alignment}->{CIGAR_2};
			$methylation_call_params->{$identifier}->{flag_1}            = $alignments{$unique_best_alignment}->{flag_1};
			$methylation_call_params->{$identifier}->{flag_2}            = $alignments{$unique_best_alignment}->{flag_2};
		}
	}

	### otherwise we are going to find out if there is a best match among the multiple alignments, or whether there are 2 or more equally good alignments (in which case
	### we boot the sequence pair altogether)
	elsif (scalar keys %alignments >= 2  and scalar keys %alignments <= 4){
		my $best_sum_of_alignment_scores;
		my $best_alignment_location;
		foreach my $alignment_location (sort {$alignments{$b}->{sum_of_alignment_scores} <=> $alignments{$a}->{sum_of_alignment_scores}} keys %alignments){

			# warn "$alignments{$alignment_location}->{sum_of_alignment_scores}\n"; sleep(1);

			unless (defined $best_sum_of_alignment_scores){
				$best_sum_of_alignment_scores = $alignments{$alignment_location}->{sum_of_alignment_scores};
				$best_alignment_location = $alignment_location;
				# print "setting best alignment score to: $best_sum_of_alignment_scores\n";
			}
			else{
				### if the second best alignment has the same sum of alignment scores as the first one, the sequence pair will get booted
				if ($alignments{$alignment_location}->{sum_of_alignment_scores} == $best_sum_of_alignment_scores){
					# warn "Same sum of alignment scores for 2 different alignments, the sequence pair will get booted!\n";
					$sequence_pair_fails = 1;
					last; # exiting since we know that the sequence has ambiguous alignments
				}
				### else we are going to store the best alignment for further processing
				else{
					$methylation_call_params->{$identifier}->{bowtie_sequence_1} = $alignments{$best_alignment_location}->{bowtie_sequence_1};
					$methylation_call_params->{$identifier}->{bowtie_sequence_2} = $alignments{$best_alignment_location}->{bowtie_sequence_2};
					$methylation_call_params->{$identifier}->{chromosome}        = $alignments{$best_alignment_location}->{chromosome};
					$methylation_call_params->{$identifier}->{position_1}        = $alignments{$best_alignment_location}->{position_1};
					$methylation_call_params->{$identifier}->{position_2}        = $alignments{$best_alignment_location}->{position_2};
					$methylation_call_params->{$identifier}->{index}             = $alignments{$best_alignment_location}->{index};
					$methylation_call_params->{$identifier}->{alignment_score_1} = $alignments{$best_alignment_location}->{alignment_score_1};
					$methylation_call_params->{$identifier}->{alignment_score_2} = $alignments{$best_alignment_location}->{alignment_score_2};
					$methylation_call_params->{$identifier}->{sum_of_alignment_scores} = $alignments{$best_alignment_location}->{sum_of_alignment_scores};
					$methylation_call_params->{$identifier}->{mismatch_info_1}   = $alignments{$best_alignment_location}->{mismatch_info_1};
					$methylation_call_params->{$identifier}->{mismatch_info_2}   = $alignments{$best_alignment_location}->{mismatch_info_2};
					$methylation_call_params->{$identifier}->{CIGAR_1}           = $alignments{$best_alignment_location}->{CIGAR_1};
					$methylation_call_params->{$identifier}->{CIGAR_2}           = $alignments{$best_alignment_location}->{CIGAR_2};
					$methylation_call_params->{$identifier}->{flag_1}            = $alignments{$best_alignment_location}->{flag_1};
					$methylation_call_params->{$identifier}->{flag_2}            = $alignments{$best_alignment_location}->{flag_2};

					if (defined $alignments{$best_alignment_location}->{sum_of_alignment_scores_second_best} and ( $alignments{$best_alignment_location}->{sum_of_alignment_scores_second_best} > $alignments{$alignment_location}->{sum_of_alignment_scores} )) {
						$methylation_call_params->{$identifier}->{sum_of_alignment_scores_second_best} = $alignments{$best_alignment_location}->{sum_of_alignment_scores_second_best};
					}
					else {
						$methylation_call_params->{$identifier}->{sum_of_alignment_scores_second_best} = $alignments{$alignment_location}->{sum_of_alignment_scores};
					}

					last; # exiting since the sequence produced a unique best alignment
				}
			}
		}
	}
	else{
		die "There are too many potential hits for this sequence pair (1-4 expected, but found: '",scalar keys %alignments,"')\n";;
	}

	### skipping the sequence completely if there were multiple alignments with the same best sum of alignment scores at different positions
	if ($sequence_pair_fails == 1){
		$counting{unsuitable_sequence_count}++;

		### report that the sequence has multiple hits with bitwise flag 256. We can print the sequence to the result file straight away and skip everything else
		# my $ambiguous_read_1 = join("\t",$identifier.'/1','256','*','0','0','*','*','0','0',$sequence_1,$quality_value_1);
		# my $ambiguous_read_2 = join("\t",$identifier.'/2','256','*','0','0','*','*','0','0',$sequence_2,$quality_value_2);
		# warn "$ambiguous_read_1\n";
		# warn "$ambiguous_read_2\n";

		if ($ambiguous){
			return 2; # => exits to next sequence pair, and prints it out (in FastQ format) to _ambiguous_reads_1.txt and _ambiguous_reads_2.txt if '--ambiguous' was specified
		}
		elsif ($unmapped){
			return 1; # => exits to next sequence pair, and prints it out (in FastQ format) to _unmapped_reads_1.txt and _unmapped_reads_2.txt if '--unmapped' but not '--ambiguous' was specified
		}
		else{
			return 0; # => exits to next sequence pair (default)
		}
	}

	### --DIRECTIONAL
	### If the option --directional has been specified the user wants to consider only alignments to the original top strand or the original bottom strand. We will therefore
	### discard all alignments to strands complementary to the original strands, as they should not exist in reality due to the library preparation protocol
	if ($directional){
		if ( ($methylation_call_params->{$identifier}->{index} == 1) or ($methylation_call_params->{$identifier}->{index} == 2) ){
			#    warn "Alignment rejected! (index was: $methylation_call_params->{$identifier}->{index})\n";
			$counting{alignments_rejected_count}++;
			return 0;
		}
	}

	### If the sequence pair has not been rejected so far it does have a unique best alignment
	$counting{unique_best_alignment_count}++;
	extract_corresponding_genomic_sequence_paired_end($identifier,$methylation_call_params);

	### check to see if the genomic sequences we extracted has the same length as the observed sequences +2, and only then we perform the methylation call
	if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1}) != length($sequence_1)+2){
		warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position_1}\n";
		$counting{genomic_sequence_could_not_be_extracted_count}++;
		return 0;
	}
	if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}) != length($sequence_2)+2){
		warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position_2}\n";
		$counting{genomic_sequence_could_not_be_extracted_count}++;
		return 0;
	}

	### Compute MAPQ value
	$methylation_call_params->{$identifier}->{mapq} = calc_mapq (length($sequence_1), length($sequence_2),
                                                                           $methylation_call_params->{$identifier}->{sum_of_alignment_scores},
                                                                           $methylation_call_params->{$identifier}->{sum_of_alignment_scores_second_best});


	### now we are set to perform the actual methylation call
	if ($slam){
		$methylation_call_params->{$identifier}->{methylation_call_1} = methylation_call_slam($identifier,$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{read_conversion_1});
		$methylation_call_params->{$identifier}->{methylation_call_2} = methylation_call_slam($identifier,$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{read_conversion_2});
	}
	else{
		$methylation_call_params->{$identifier}->{methylation_call_1} = methylation_call($identifier,$sequence_1,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1},$methylation_call_params->{$identifier}->{read_conversion_1});
		$methylation_call_params->{$identifier}->{methylation_call_2} = methylation_call($identifier,$sequence_2,$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2},$methylation_call_params->{$identifier}->{read_conversion_2});
	}
	# warn "$methylation_call_params->{$identifier}->{read_conversion_2}\n";
	# warn "  $sequence_2\n";
	# warn "$methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}\n";
	# warn "  $methylation_call_params->{$identifier}->{methylation_call_2}\n";

	print_bisulfite_mapping_results_paired_ends($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2);
	return 0; ## otherwise 1 will be returned by default, which would print the sequence pair to unmapped_1 and _2
}


sub determine_number_of_transliterations_performed{
	my ($sequence,$read_conversion) = @_;
	my $number_of_transliterations;
	if ($read_conversion eq 'CT'){
		$number_of_transliterations = $sequence =~ tr/C/T/;
	}
	elsif ($read_conversion eq 'GA'){
		$number_of_transliterations = $sequence =~ tr/G/A/;
	}
	else{
		die "Read conversion mode of the read was not specified $!\n";
	}
	return $number_of_transliterations;
}


###

# Compute MAPQ value for a read or read pair as in Bowtie2-2.2.2 (specifically, V2 of the MAPQ calculator: "class BowtieMapq2")
# assuming end-to-end alignment with the default calculation of the minimum alignment score

sub calc_mapq {
	my ($read1Len, $read2Len, $AS_best, $AS_secBest) = @_;

	# Calculate the minimum alignment score either with linear or logarithmic function
	# Bismark hardcodes the expectation that end-to-end alignments will receive a linear score_min function (L,Intercept,Coefficient) while local alignment will receive logarithmic score_min function(G,Intercept,Coefficient)
	# This matches the defaults function forms in bowtie2: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#bowtie2-options-score-min
	# For details on scoring functions: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#setting-function-options
	# If this expectation is lifted, the following code will need to account for the user-requested function too.
	# For the moment, we can do:
	my $scMin = $score_min_intercept + $score_min_slope * ($local ? log $read1Len : $read1Len);
	### read2Len is only defined for paired-end reads, so for single-end mode we can just a score min value for read 1
	if (defined $read2Len){
		$scMin += $score_min_intercept + $score_min_slope * ($local ? log $read2Len : $read2Len);
	}

	my $diff = abs$scMin; # scores can vary by up to this much (since max AS is 0 for end-to-end alignment)
	my $bestOver = $AS_best - $scMin;
	#warn "AS_best: $AS_best\n";
	#warn "scMin: $scMin\n";
	#warn "diff: $diff\n";
	#warn "bestOver (AS_best - scMin): $bestOver\n~~~~~~~~~~~~~~~~~~~~~~~~~\n";
	
	if (!$local){
		# warn "End-to-End alignment\n";
		if (!defined $AS_secBest) {
			if    ($bestOver >= $diff * 0.8) { return 42; }
			elsif ($bestOver >= $diff * 0.7) { return 40; }
			elsif ($bestOver >= $diff * 0.6) { return 24; }
			elsif ($bestOver >= $diff * 0.5) { return 23; }
			elsif ($bestOver >= $diff * 0.4) { return  8; }
			elsif ($bestOver >= $diff * 0.3) { return  3; }
			else                             { return  0; }
		}
		else{
			my $bestDiff = abs(abs($AS_best) - abs($AS_secBest));
			if ($bestDiff >= $diff * 0.9) {
				if ($bestOver == $diff) {
					return 39;
				} 
				else {
					return 33;
				}
			} 
			elsif ($bestDiff >= $diff * 0.8) {
				if ($bestOver == $diff) {
					return 38;
				} 
				else {
					return 27;
				}
			} 
			elsif ($bestDiff >= $diff * 0.7) {
				if ($bestOver == $diff) {
					return 37;
				}
				else {
					return 26;
				}
			} 
			elsif ($bestDiff >= $diff * 0.6) {
				if ($bestOver == $diff) {
					return 36;
				} 
				else {
					return 22;
				}
			}
			elsif ($bestDiff >= $diff * 0.5) {
				if ($bestOver == $diff) {
					return 35;
				} 
				elsif ($bestOver >= $diff * 0.84) {
					return 25;
				}
				elsif ($bestOver >= $diff * 0.68) {
					return 16;
				}
				else {
					return 5;
				}
			} 
			elsif ($bestDiff >= $diff * 0.4) {
				if ($bestOver == $diff) {
					return 34;
				} 
				elsif ($bestOver >= $diff * 0.84) {
					return 21;
				} 
				elsif ($bestOver >= $diff * 0.68) {
					return 14;
				} 
				else {
					return 4;
				}
			} 
			elsif ($bestDiff >= $diff * 0.3) {
				if ($bestOver == $diff) {
					return 32;
				} 
				elsif ($bestOver >= $diff * 0.88) {
					return 18;
				} 
				elsif ($bestOver >= $diff * 0.67) {
					return 15;
				} 
				else {
					return 3;
				}
			} 
			elsif ($bestDiff >= $diff * 0.2) {
				if ($bestOver == $diff) {
					return 31;
				} 
				elsif ($bestOver >= $diff * 0.88) {
					return 17;
				} 
				elsif ($bestOver >= $diff * 0.67) {
					return 11;
				} 
				else {
					return 0;
				}
			} 
			elsif ($bestDiff >= $diff * 0.1) {
				if ($bestOver == $diff) {
					return 30;
				} 
				elsif ($bestOver >= $diff * 0.88) {
					return 12;
				}
				elsif ($bestOver >= $diff * 0.67) {
					return 7;
				} 
				else {
					return 0;
				}
			} 
			elsif ($bestDiff > 0) {
				if ($bestOver >= $diff * 0.67) {
					return 6;
				} 
				else {
					return 2;
				}
			}
			else {
				if ($bestOver >= $diff * 0.67) {
					return 1;
				}
				else {
					return 0;
				}
			}
		}
	}
	else{
		## Local alignment 
		## For more information see here: https://github.com/FelixKrueger/Bismark/issues/260
		if(!defined $AS_secBest) {
				if   ($bestOver >= $diff * 0.8)   { return 44; }
				elsif($bestOver >= $diff * 0.7)   { return 42; }
				elsif($bestOver >= $diff * 0.6)   { return 41; }
				elsif($bestOver >= $diff * 0.5)   { return 36; }
				elsif($bestOver >= $diff * 0.4)   { return 28; }
				elsif($bestOver >= $diff * 0.3)   { return 24; }
				else                              { return 22; }
		}
		else {
			# FK: Not sure what to do about this to be honest secbest = s.paired() ?
			# FK: Not sure what to do about this to be honest s.bestUnchosenCScore().score() : s.bestUnchosenScore(mate1).score();
			my $bestDiff = abs(abs($AS_best) - abs($AS_secBest));
			# warn "bestDiff is: $bestDiff\n";
			if   ($bestDiff >= $diff * 0.9){
				return 40;
			}	
			elsif($bestDiff >= $diff * 0.8){
				return 39;
			}
			elsif($bestDiff >= $diff * 0.7){
				return 38;
			}
			elsif($bestDiff >= $diff * 0.6){
				return 37;
			}
			elsif($bestDiff >= $diff * 0.5){
				if ($bestOver == $diff){
					return 35;
				}
				elsif($bestOver >= $diff * 0.50){
					return 25;
				}
				else{
					return 20;
				}
			} 
			elsif($bestDiff >= $diff * 0.4){
				if ($bestOver == $diff){
					return 34;
				}
				elsif($bestOver >= $diff * 0.50){
					return 21;
				}
				else{
					return 19;
				}
			}
			elsif($bestDiff >= $diff * 0.3){
				if ($bestOver == $diff){
					return 33;
				}
				elsif($bestOver >= $diff * 0.5){
					return 18;
				}
				else{
					return 16;
				}
			}	
			elsif($bestDiff >= $diff * 0.2){
				if ($bestOver == $diff){
					return 32;
				}
				elsif($bestOver >= $diff * 0.5){
					return 17;
				}
				else{
					return 12;
				}
			}
			elsif($bestDiff >= $diff * 0.1){
				if ($bestOver == $diff){ 
					return 31;
				}
				elsif($bestOver >= $diff * 0.5){ 
					return 14; 
				}
				else{ 
					return 9;
				}
			}
			elsif($bestDiff > 0){
				if($bestOver >= $diff * 0.5){
					return 11;
				}
				else{
					return 2;
				}
			} 
			else {
				if($bestOver >= $diff * 0.5){
					return 1;
				}
				else{
					return 0;
				}
			}
		}	
	}
}


##########################################
###      PRINT SINGLE END RESULTS      ###
##########################################

sub print_bisulfite_mapping_result_single_end{
	my ($identifier,$sequence,$methylation_call_params,$quality_value)= @_;

	### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
	if ($phred64){
		$quality_value = convert_phred64_quals_to_phred33($quality_value);
	}

	### writing every mapped read and its methylation call to the SAM output file (unmapped and ambiguous reads were already printed)
	single_end_SAM_output($identifier,$sequence,$methylation_call_params,$quality_value); # at the end of the script
}

##########################################
###      PRINT PAIRED END ESULTS       ###
##########################################

sub print_bisulfite_mapping_results_paired_ends{
	my ($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2)= @_;

  ### we will output the FastQ quality in Sanger encoding (Phred 33 scale)
  if ($phred64){
    $quality_value_1 = convert_phred64_quals_to_phred33($quality_value_1);
    $quality_value_2 = convert_phred64_quals_to_phred33($quality_value_2);
  }
 
  ### writing every single aligned read and its methylation call to the output file  (unmapped and ambiguous reads were already printed)
  paired_end_SAM_output($identifier,$sequence_1,$sequence_2,$methylation_call_params,$quality_value_1,$quality_value_2); # at the end of the script

}


sub convert_phred64_quals_to_phred33{

	my $qual = shift;
	my @quals = split (//,$qual);
	my @new_quals;

	foreach my $index (0..$#quals){
		my $phred_score = convert_phred64_quality_string_into_phred_score ($quals[$index]);
		my $phred33_quality_string = convert_phred_score_into_phred33_quality_string ($phred_score);
		$new_quals[$index] = $phred33_quality_string;
	}

	my $phred33_quality = join ("",@new_quals);
	return $phred33_quality;
}

sub convert_solexa_quals_to_phred33{

	my $qual = shift;
	my @quals = split (//,$qual);
	my @new_quals;

	foreach my $index (0..$#quals){
		my $phred_score = convert_solexa_pre1_3_quality_string_into_phred_score ($quals[$index]);
		my $phred33_quality_string = convert_phred_score_into_phred33_quality_string ($phred_score);
		$new_quals[$index] = $phred33_quality_string;
	}

	my $phred33_quality = join ("",@new_quals);
	return $phred33_quality;
}

sub convert_phred_score_into_phred33_quality_string{
	my $qual = shift;
	$qual = chr($qual+33);
	return $qual;
}

sub convert_phred64_quality_string_into_phred_score{
	my $string = shift;
	my $qual = ord($string)-64;
	return $qual;
}

sub convert_solexa_pre1_3_quality_string_into_phred_score{
	### We will just use 59 as the offset here as all Phred Scores between 10 and 40 look exactly the same, there is only a minute difference for values between 0 and 10
	my $string = shift;
	my $qual = ord($string)-59;
	return $qual;
}


### EXTRACT GENOMIC SEQUENCE | SINGLE-END

sub extract_corresponding_genomic_sequence_single_end{
	my ($sequence_identifier,$methylation_call_params) = @_;

	my $MD_tag = $methylation_call_params->{$sequence_identifier}->{MD_tag};
	my $cigar  = $methylation_call_params->{$sequence_identifier}->{CIGAR};

	my $contains_deletion = 0;
	if ($cigar =~ /D/){
		$contains_deletion = 1;
		# warn "$cigar\n$MD_tag\n";
	}

	### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
	### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call

	### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
	### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
	my $alignment_strand;
	my $read_conversion_info;
	my $genome_conversion;

	### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
	### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence
	my $non_bisulfite_sequence = '';
	my $genomic_seq_for_MD_tag = ''; # this sequence contains potential deletions in the genome as well so that we can generate a proper MD tag for the SAM output

	### Positions in SAM format are 1 based, so we need to subract 1 when getting substrings
	my $pos = $methylation_call_params->{$sequence_identifier}->{position}-1;

	# parsing CIGAR string
	my @len = split (/\D+/,$cigar); # storing the length per operation
	my @ops = split (/\d+/,$cigar); # storing the operation
	shift @ops; # remove the empty first element
	die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

	my $pbat_index_modifier = 0;

	if ($pbat){
		$pbat_index_modifier += 2; # (we are simply not running indexes 0 or 1!
	}

	### If the sequence aligns best as CT converted reads vs. GA converted genome (OB, index 1) or GA converted reads vs. GA converted genome (CTOB, index 3)
	if ( (($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 1) or (($methylation_call_params->{$sequence_identifier}->{index}  + $pbat_index_modifier) == 3) ){
		## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
		unless ( ($pos-2) >= 0){ # exiting with en empty genomic sequence otherwise
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
			$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag} = $genomic_seq_for_MD_tag;
			return;
		}
		$non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos-2,2);
	}

	my $indels = 0;

	foreach (0..$#len){
		if ($ops[$_] eq 'M'){
			# extracting genomic sequence
			$non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,$len[$_]);
			if ($contains_deletion){
				$genomic_seq_for_MD_tag .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,$len[$_]);
			}
			# adjusting position
			$pos += $len[$_];
		}
		elsif ($ops[$_] eq 'I'){ # insertion in the read sequence
			# we simply add padding Xs instead of finding genomic sequence. This will not be used to infer methylation calls and we can later ignore it better during the generation of the MD:Z-tag
			$non_bisulfite_sequence .= 'X' x $len[$_];
			if ($contains_deletion){
				$genomic_seq_for_MD_tag .= 'X' x $len[$_];
			}
			# warn "$non_bisulfite_sequence\n";
			# position doesn't need adjusting

			### 03 06 2014: In fact we don't need to add anything to the hemming distance for insertions since we use padding Xs which will fail the base by base comparison in hemming_dist()
			# $indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
		}
		elsif ($ops[$_] eq 'S'){ # soft-clipped read sequence
			# we simply add padding Xs instead of finding genomic sequence. This will not be used to infer methylation calls 
			# and we can later ignore it better during the generation of the MD:Z-tag
			$non_bisulfite_sequence .= 'X' x $len[$_];
			if ($contains_deletion){
				$genomic_seq_for_MD_tag .= 'X' x $len[$_];
			}
			# warn "soft-clipped sequence! $sequence_identifier\n";
			# warn "$non_bisulfite_sequence\n";
			# position doesn't need adjusting,
			# we don't need to add anything to the hemming distance for soft-clipped bases since we use padding Xs which will
			# fail the base by base comparison in hemming_dist()
		}
		elsif ($ops[$_] eq 'D'){ # deletion in the read sequence
			# we do not add any genomic sequence but only adjust the position

			# we do however add the genomic sequence to the $genomic_sequence for MD-tag determination if the CIGAR string contained a deletion
			if ($contains_deletion){
				$genomic_seq_for_MD_tag .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,$len[$_]);
			}
			$pos += $len[$_];
			$indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
		}
		elsif ($ops[$_] eq 'N'){ # skipped region; a region not present in the read sequence: this is a spliced read
			# we do not add any genomic sequence but only adjust the position
			
			$pos += $len[$_];
			# I don't think that skipped regions add to the hemming distance
			# $indels += $len[$_]; # adding this to $indels so we can determine the hemming distance for the SAM output (= single-base substitutions (mismatches, insertions, deletions)
		}
		elsif($cigar =~ tr/[HPX=]//){ # if these (for standard mapping) illegal characters exist, we die
			die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I', 'D', 'S' or 'N': $cigar";
		}
		else{
			die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I', 'D', 'S' or 'N': $cigar";
		}
	}	

	### If the sequence aligns best as CT converted reads vs. CT converted genome (OT, index 0) or GA converted reads vs. CT converted genome (CTOT, index 2)
	if ( ( ($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 0) or ( ($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 2) ){
		## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
		unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos+2){ # exiting with en empty genomic sequence otherwise
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
			$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag} = $genomic_seq_for_MD_tag;
			return;
		}
		$non_bisulfite_sequence .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos,2);
		# print "$methylation_call_params->{$sequence_identifier}->{bowtie_sequence}\n$non_bisulfite_sequence\n";
	}

	### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
	if ( ($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 0){
		### [Index 0, sequence originated from (converted) forward strand]
		$counting{CT_CT_count}++;
		$alignment_strand = '+';
		$read_conversion_info = 'CT';
		$genome_conversion = 'CT';
	}

	### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
	elsif ( ($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 1){
		### [Index 1, sequence originated from (converted) reverse strand]
		$counting{CT_GA_count}++;
		$alignment_strand = '-';
		$read_conversion_info = 'CT';
		$genome_conversion = 'GA';

		### reverse complement!
		$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
		if ($contains_deletion){
			$genomic_seq_for_MD_tag = reverse_complement($genomic_seq_for_MD_tag);
		}
	}

	### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
	elsif ( ($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 2){
		### [Index 2, sequence originated from complementary to (converted) forward strand]
		$counting{GA_CT_count}++;
		$alignment_strand = '-';
		$read_conversion_info = 'GA';
		$genome_conversion = 'CT';

		### reverse complement!
		$non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
		if ($contains_deletion){
			$genomic_seq_for_MD_tag = reverse_complement($genomic_seq_for_MD_tag);
		}
	}

	### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
	elsif ( ($methylation_call_params->{$sequence_identifier}->{index} + $pbat_index_modifier) == 3){
		### [Index 3, sequence originated from complementary to (converted) reverse strand]
		$counting{GA_GA_count}++;
		$alignment_strand = '+';
		$read_conversion_info = 'GA';
		$genome_conversion = 'GA';
	}
	else{
		die "Too many Bowtie 2 result filehandles\n";
	}

	$methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
	$methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
	$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
	$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
	$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag} = $genomic_seq_for_MD_tag;

	# warn "Found sequence:               $methylation_call_params->{$sequence_identifier}->{bowtie_sequence}\n";
	# warn "final non-bisulfite sequence: $non_bisulfite_sequence\n"; sleep(1);

	# if ($contains_deletion){
	#   warn "non-bis: $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence}\n";
	#   warn "MD-seq:  $methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag}\n";
	# }
	
	### the end position of a read is stored in $pos
	$methylation_call_params->{$sequence_identifier}->{end_position} = $pos;
	$methylation_call_params->{$sequence_identifier}->{indels} = $indels;
}

### EXTRACT GENOMIC SEQUENCE | PAIRED-END

sub extract_corresponding_genomic_sequence_paired_end{
	my ($sequence_identifier,$methylation_call_params) = @_;
	### A bisulfite sequence pair for 1 location in the genome can theoretically be on any of the 4 possible converted strands. We are also giving the
	### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
	# $verbose = 1;
	my $cigar_1 = $methylation_call_params->{$sequence_identifier}->{CIGAR_1};
	my $cigar_2 = $methylation_call_params->{$sequence_identifier}->{CIGAR_2};
	my $flag_1 =  $methylation_call_params->{$sequence_identifier}->{flag_1};
	my $flag_2 =  $methylation_call_params->{$sequence_identifier}->{flag_2};

	my $contains_deletion_1 = 0;
	my $contains_deletion_2 = 0;
	if ($cigar_1 =~ /D/){
		$contains_deletion_1 = 1;
		if ($verbose){ warn "$cigar_1\n$methylation_call_params->{$sequence_identifier}->{mismatch_info_1}\n";}
	}
	if ($cigar_2 =~ /D/){
		$contains_deletion_2 = 1;
		if ($verbose){ warn "$cigar_2\n$methylation_call_params->{$sequence_identifier}->{mismatch_info_2}\n";}
	}

	
	# warn "$cigar_1\t$cigar_2\t$flag_1\t$flag_2\n";
	### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
	### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence

	### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
	### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
	my $alignment_read_1;
	my $alignment_read_2;
	my $read_conversion_info_1;
	my $read_conversion_info_2;
	my $genome_conversion;

	### Now extracting the same sequence from the mouse genomic sequence, +2 extra bases at one of the ends so that we can also make a CpG, CHG or CHH methylation call
	### if the C happens to be at the last position of the actually observed sequence
	my $non_bisulfite_sequence_1 = '';
	my $non_bisulfite_sequence_2 = '';
	my $genomic_seq_for_MD_tag_1 = ''; # this sequence contains potential deletions in the genome as well so that we can generate a proper MD tag for the SAM output
	my $genomic_seq_for_MD_tag_2 = '';

	### Positions in SAM format are 1 based, so we need to subract 1 when getting substrings
	my $pos_1 = $methylation_call_params->{$sequence_identifier}->{position_1}-1;
	my $pos_2 = $methylation_call_params->{$sequence_identifier}->{position_2}-1;

	# parsing CIGAR 1 string
	my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
	my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
	shift @ops_1; # remove the empty first element
	die "CIGAR 1 string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
	# parsing CIGAR 2 string
	my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
	my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
	shift @ops_2; # remove the empty first element
	die "CIGAR 2 string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);

	my $indels_1 = 0; # adding these to the hemming distance value (needed for the NM field in the final SAM output
	my $indels_2 = 0;

	### Extracting read 1 genomic sequence ###

	# extracting 2 additional bp at the 5' end (read 1)
	if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
		# checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
		unless ( ($pos_1-2) > 0){# exiting with en empty genomic sequence otherwise
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
			$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag_1} = $genomic_seq_for_MD_tag_1;
			return;
		}
		$non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1-2,2);
	}

	foreach (0..$#len_1){
		if ($ops_1[$_] eq 'M'){
			# extracting genomic sequence
			$non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,$len_1[$_]);
			if ($contains_deletion_1){
				$genomic_seq_for_MD_tag_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,$len_1[$_]);
			}
			#   warn "$non_bisulfite_sequence_1\n";
			# adjusting position
			$pos_1 += $len_1[$_];
		}
		elsif ($ops_1[$_] eq 'I'){ # insertion in the read sequence
			# we simply add padding Xs instead of finding genomic sequence. This will not be used to infer methylation calls, and we can later ignore it for the generation of the MD;Z: tag
			$non_bisulfite_sequence_1 .= 'X' x $len_1[$_];
			if ($contains_deletion_1){
				$genomic_seq_for_MD_tag_1 .= 'X' x $len_1[$_];
			}
			# warn "$non_bisulfite_sequence_1\n";
			# position doesn't need adjusting

			### 03 06 2014: In fact we don't need to add anything to the hemming distance for insertions since we use padding Xs which will fail a base by base comparison in hemming_dist()
			# indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
		}
		elsif ($ops_1[$_] eq 'S'){ # soft-clipped read sequence
			# we simply add padding Xs instead of finding genomic sequence. These will not be used to infer methylation calls,
			# and we can later ignore them better during the generation of the MD:Z: tag
			$non_bisulfite_sequence_1 .= 'X' x $len_1[$_];
			if ($contains_deletion_1){
				$genomic_seq_for_MD_tag_1 .= 'X' x $len_1[$_];
			}
			# warn "Soft-clipped Read 1 sequence! $sequence_identifier\n";
			# warn "$non_bisulfite_sequence_1\n";
			# position doesn't need adjusting

			### We don't need to add anything to the hemming distance for soft-clipped bases 
			# since we use padding Xs which will fail a base by base comparison in hemming_dist()
		}
		elsif ($ops_1[$_] eq 'D'){ # deletion in the read sequence
			# we do not add any genomic sequence but only adjust the position
			# we do however need to add the genomic sequence to $genomic_seq_for_MD-tag so we can create a proper MD tag later
			if ($contains_deletion_1){
				$genomic_seq_for_MD_tag_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,$len_1[$_]);
			}
			#     warn "Just adjusting the position by: ",$len_1[$_],"bp\n";
			$pos_1 += $len_1[$_];
			$indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
		}
		elsif ($ops_1[$_] eq 'N'){ # skipped region in the read; a splice junction
			# we do not add any genomic sequence but only adjust the position
			#     warn "Just adjusting the position by: ",$len_1[$_],"bp\n";
			$pos_1 += $len_1[$_];
			# not altering the variable needed for the hemming distance
			# $indels_1 += $len_1[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
		}
		elsif($cigar_1 =~ tr/[HPX=]//){ # if these (for standard mapping) illegal characters exist we die
			die "The CIGAR 1 string contained illegal CIGAR operations in addition to 'M', 'I', 'D', 'S' and 'N': $cigar_1";
		}
		else{
			die "The CIGAR 1 string contained undefined CIGAR operations in addition to 'M', 'I', 'D', 'S' and 'N': $cigar_1";
		}	
	}

	### 3' end of read 1
	if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
		## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
		unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_1+2){# exiting with en empty genomic sequence otherwise
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
			return;
		}

		$non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,2);
	}	


	### Extracting read 2 genomic sequence ###

	### 5' end of read 2
	if ( ($methylation_call_params->{$sequence_identifier}->{index} == 1) or ($methylation_call_params->{$sequence_identifier}->{index} == 3) ){
		## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
		unless ( ($pos_2-2) >= 0){# exiting with en empty genomic sequence otherwise
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
			$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag_2} = $genomic_seq_for_MD_tag_2;
			return;
		}
		$non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2-2,2);
	}

	foreach (0..$#len_2){
		if ($ops_2[$_] eq 'M'){
			# extracting genomic sequence
			$non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,$len_2[$_]);
			if ($contains_deletion_2){
				$genomic_seq_for_MD_tag_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,$len_2[$_]);
			}
			# warn "$non_bisulfite_sequence_2\n";
			# adjusting position
			$pos_2 += $len_2[$_];
		}
		elsif ($ops_2[$_] eq 'I'){ # insertion in the read sequence
			# we simply add padding Xs instead of finding genomic sequence. This will not be used to infer methylation calls and we can ignore this later during the generation of the MD:Z: tag
			$non_bisulfite_sequence_2 .= 'X' x $len_2[$_];
			if ($contains_deletion_2){
				$genomic_seq_for_MD_tag_2 .= 'X' x $len_2[$_];
			}
			# warn "$non_bisulfite_sequence_2\n";
			# position doesn't need adjusting

			### 03 06 2014: In fact we don't need to add anything to the hemming distance for insertions since we use padding Xs which will fail a base by base comparison in hemming_dist()
			# $indels_2 += $len_2[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
		}
		elsif ($ops_2[$_] eq 'S'){ # soft-clipped read sequence
			# we simply add padding Xs instead of finding genomic sequence. This will not be used to infer methylation calls
			# and we can ignore this later during the generation of the MD:Z: tag
			$non_bisulfite_sequence_2 .= 'X' x $len_2[$_];
			if ($contains_deletion_2){
				$genomic_seq_for_MD_tag_2 .= 'X' x $len_2[$_];
			}
			# warn "Soft-clipped Read 2sequence! ID: $sequence_identifier\n";
			# warn "$non_bisulfite_sequence_2\n";
			# position doesn't need adjusting

			# We don't need to add anything to the hemming distance for insertions since we use padding Xs which will fail a base by base comparison in hemming_dist()
		}
		elsif ($ops_2[$_] eq 'D'){ # deletion in the read sequence
			# we do not add any genomic sequence but only adjust the position
			# we do however need to add the genomic sequence to $genomic_seq_for_MD-tag so we can create a proper MD tag later
			if ($contains_deletion_2){
				$genomic_seq_for_MD_tag_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,$len_2[$_]);
			}
			# warn "Just adjusting the position by: ",$len_2[$_],"bp\n";
			$pos_2 += $len_2[$_];
			$indels_2 += $len_2[$_]; # adding to $indels_1 to determine the hemming distance (= single base mismatches, insertions or deletions) for the SAM output
		}
		elsif ($ops_2[$_] eq 'N'){ # deletion in the read sequence
			# we do not add any genomic sequence but only adjust the position
			# warn "Just adjusting the position by: ",$len_2[$_],"bp\n";
			$pos_2 += $len_2[$_];
		}
		elsif($cigar_2 =~ tr/[SHPX=]//){ # if these (for standard mapping) illegal characters exist we die
			die "The CIGAR 2 string contained illegal CIGAR operations in addition to 'M', 'I', 'D', 'S' and 'N': $cigar_2";
		}
		else{
			die "The CIGAR 2 string contained undefined CIGAR operations in addition to 'M', 'I', 'D', 'S' and 'N': $cigar_2";
		}
	}

	### 3' end of read 2
	if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
		## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
			unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_2+2){# exiting with en empty genomic sequence otherwise
			# need to set read 1 as well now to prevent warning
			#  warn "'$non_bisulfite_sequence_1'\n'$non_bisulfite_sequence_2'\n\n";
			#  sleep(5);
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
			$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
			return;
		}
		$non_bisulfite_sequence_2 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_2,2);
	}

	### all paired-end alignments reported by Bowtie 2 have the Read 1 alignment first and the Read 2 alignment as the second one irrespective of whether read 1 or read 2 was
	### the + alignment. We also read in sequences read 1 then read 2 so they should correspond perfectly

	### results from CT converted read 1 plus GA converted read 2 vs. CT converted genome (+/- orientation alignments are reported only)
	if ($methylation_call_params->{$sequence_identifier}->{index} == 0){
		### [Index 0, sequence originated from (converted) forward strand]
		$counting{CT_GA_CT_count}++;
		$alignment_read_1 = '+';
		$alignment_read_2 = '-';
		$read_conversion_info_1 = 'CT';
		$read_conversion_info_2 = 'GA';
		$genome_conversion = 'CT';
		### Read 1 is always the forward hit
		### Read 2 is will always on the reverse strand, so it needs to be reverse complemented
		$non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
		if ($contains_deletion_2){
			$genomic_seq_for_MD_tag_2 = reverse_complement($genomic_seq_for_MD_tag_2);
		}
	}

	### results from GA converted read 1 plus CT converted read 2 vs. GA converted genome (+/- orientation alignments are reported only)
	elsif ($methylation_call_params->{$sequence_identifier}->{index} == 1){
		### [Index 1, sequence originated from complementary to (converted) bottom strand]
		$counting{GA_CT_GA_count}++;
		$alignment_read_1 = '+';
		$alignment_read_2 = '-';
		$read_conversion_info_1 = 'GA';
		$read_conversion_info_2 = 'CT';
		$genome_conversion = 'GA';
		### Read 1 is always the forward hit
		### Read 2 is will always on the reverse strand, so it needs to be reverse complemented
		$non_bisulfite_sequence_2 = reverse_complement($non_bisulfite_sequence_2);
		if ($contains_deletion_2){
			$genomic_seq_for_MD_tag_2 = reverse_complement($genomic_seq_for_MD_tag_2);
		}
	}

	### results from GA converted read 1 plus CT converted read 2 vs. CT converted genome (-/+ orientation alignments are reported only)
	elsif ($methylation_call_params->{$sequence_identifier}->{index} == 2){
		### [Index 2, sequence originated from the complementary to (converted) top strand]
		$counting{GA_CT_CT_count}++;
		$alignment_read_1 = '-';
		$alignment_read_2 = '+';
		$read_conversion_info_1 = 'GA';
		$read_conversion_info_2 = 'CT';
		$genome_conversion = 'CT';

		### Read 1 (the reverse strand) genomic sequence needs to be reverse complemented
		$non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
		if ($contains_deletion_1){
			$genomic_seq_for_MD_tag_1 = reverse_complement($genomic_seq_for_MD_tag_1);
		}
	}

	### results from CT converted read 1 plus GA converted read 2 vs. GA converted genome (-/+ orientation alignments are reported only)
	elsif ($methylation_call_params->{$sequence_identifier}->{index} == 3){
		### [Index 3, sequence originated from the (converted) reverse strand]
		$counting{CT_GA_GA_count}++;
		$alignment_read_1 = '-';
		$alignment_read_2 = '+';
		$read_conversion_info_1 = 'CT';
		$read_conversion_info_2 = 'GA';
		$genome_conversion = 'GA';
		### Read 1 (the reverse strand) genomic sequence needs to be reverse complemented
		$non_bisulfite_sequence_1 = reverse_complement($non_bisulfite_sequence_1);
		if ($contains_deletion_1){
			$genomic_seq_for_MD_tag_1 = reverse_complement($genomic_seq_for_MD_tag_1);
		}
	}
	else{
		die "Too many bowtie result filehandles\n";
	}
	### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
	### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions

	$methylation_call_params->{$sequence_identifier}->{alignment_read_1} = $alignment_read_1;
	$methylation_call_params->{$sequence_identifier}->{alignment_read_2} = $alignment_read_2;
	$methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
	$methylation_call_params->{$sequence_identifier}->{read_conversion_1} = $read_conversion_info_1;
	$methylation_call_params->{$sequence_identifier}->{read_conversion_2} = $read_conversion_info_2;
	$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
	$methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
	$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag_1} = $genomic_seq_for_MD_tag_1;
	$methylation_call_params->{$sequence_identifier}->{genomic_seq_for_MD_tag_2} = $genomic_seq_for_MD_tag_2;

	## the end position of a read is stored in $pos
	$methylation_call_params->{$sequence_identifier}->{end_position_1} = $pos_1;
	$methylation_call_params->{$sequence_identifier}->{end_position_2} = $pos_2;
	$methylation_call_params->{$sequence_identifier}->{indels_1} = $indels_1;
	$methylation_call_params->{$sequence_identifier}->{indels_2} = $indels_2;
}


### METHYLATION CALL

sub methylation_call{
	my ($identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion) = @_;
	### splitting both the actually observed sequence and the genomic sequence up into single bases so we can compare them one by one
	my @seq = split(//,$sequence_actually_observed);
	my @genomic = split(//,$genomic_sequence);
	#  print join ("\n",$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion),"\n";
	### Creating a match-string with different characters for non-cytosine bases (disregarding mismatches here), methyl-Cs or non-methyl Cs in either
	### CpG, CHH or CHG context

  #################################################################
  ### . for bases not involving cytosines                       ###
  ### X for methylated C in CHG context (was protected)         ###
  ### x for not methylated C in CHG context (was converted)     ###
  ### H for methylated C in CHH context (was protected)         ###
  ### h for not methylated C in CHH context (was converted)     ###
  ### Z for methylated C in CpG context (was protected)         ###
  ### z for not methylated C in CpG context (was converted)     ###
  ### U for methylated C in unknown context (was protected)     ###
  ### u for not methylated C in unknwon context (was converted) ###
  #################################################################

  my @match =();
  warn "length of \@seq: ",scalar @seq,"\tlength of \@genomic: ",scalar @genomic,"\n" unless (scalar @seq eq (scalar@genomic-2)); ## CHH changed to -2
  my $methyl_CHH_count = 0;
  my $methyl_CHG_count = 0;
  my $methyl_CpG_count = 0;
  my $methyl_C_unknown_count = 0;
  my $unmethylated_CHH_count = 0;
  my $unmethylated_CHG_count = 0;
  my $unmethylated_CpG_count = 0;
  my $unmethylated_C_unknown_count = 0;

  if ($read_conversion eq 'CT'){
    for my $index (0..$#seq) {
      if ($seq[$index] eq $genomic[$index]) {
	### The residue can only be a C if it was not converted to T, i.e. protected my methylation
	if ($genomic[$index] eq 'C') {
	  ### If the residue is a C we want to know if it was in CpG context or in any other context
	  my $downstream_base = $genomic[$index+1];

	  if ($downstream_base eq 'G'){
	    ++$methyl_CpG_count;
	    push @match,'Z'; # protected C, methylated, in CpG context
	  }
	  elsif ($downstream_base eq 'N' or $downstream_base eq 'X'){ # if the downstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
	    ++$methyl_C_unknown_count;
	    push @match,'U'; # protected C, methylated, in Unknown context
	  }
	  else {
	    ### C in not in CpG-context, determining the second downstream base context
	    my $second_downstream_base = $genomic[$index+2];

	    if ($second_downstream_base eq 'G'){
	      ++$methyl_CHG_count;
	      push @match,'X'; # protected C, methylated, in CHG context
	    }
	    elsif ($second_downstream_base eq 'N' or $second_downstream_base eq 'X'){
	      ++$methyl_C_unknown_count; # if the second downstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
	      push @match,'U'; # protected C, methylated, in Unknown context
	    }
	    else{
	      ++$methyl_CHH_count;
	      push @match,'H'; # protected C, methylated, in CHH context
	    }
	  }
	}
	else {
	  push @match, '.';
	}
      }
      elsif ($seq[$index] ne $genomic[$index]) {
	### for the methylation call we are only interested in mismatches involving cytosines (in the genomic sequence) which were converted into Ts
	### in the actually observed sequence
	if ($genomic[$index] eq 'C' and $seq[$index] eq 'T') {
	  ### If the residue was converted to T we want to know if it was in CpG, CHG or CHH  context
	  my $downstream_base = $genomic[$index+1];

	  if ($downstream_base eq 'G'){
	    ++$unmethylated_CpG_count;
	    push @match,'z'; # converted C, not methylated, in CpG context
	  }
	  elsif ($downstream_base eq 'N' or $downstream_base eq 'X'){ # if the downstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
	    ++$unmethylated_C_unknown_count;
	    push @match,'u'; # converted C, not methylated, in Unknown context
	  }
	  else{
	    ### C in not in CpG-context, determining the second downstream base context
	    my $second_downstream_base = $genomic[$index+2];

	    if ($second_downstream_base eq 'G'){
	      ++$unmethylated_CHG_count;
	      push @match,'x'; # converted C, not methylated, in CHG context
	    }
	    elsif ($second_downstream_base eq 'N' or $second_downstream_base eq 'X'){
	      ++$unmethylated_C_unknown_count; # if the second downstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
	      push @match,'u'; # converted C, not methylated, in Unknown context
	    }
	    else{
	      ++$unmethylated_CHH_count;
	      push @match,'h'; # converted C, not methylated, in CHH context
	    }
	  }
	}
	### all other mismatches are not of interest for a methylation call
	else {
	  push @match,'.';
	}
      }
      else{
	die "There can be only 2 possibilities\n";
      }
    }
  }
  elsif ($read_conversion eq 'GA'){
    # print join ("\n",'***',$identifier,$sequence_actually_observed,$genomic_sequence,$read_conversion,'***'),"\n";

    for my $index (0..$#seq) {
      if ($seq[$index] eq $genomic[$index+2]) {
	### The residue can only be a G if the C on the other strand was not converted to T, i.e. protected my methylation
	if ($genomic[$index+2] eq 'G') {
	  ### If the residue is a G we want to know if the C on the other strand was in CpG, CHG or CHH context, therefore we need
	  ### to look if the base upstream is a C

	  my $upstream_base = $genomic[$index+1];

	  if ($upstream_base eq 'C'){
	    ++$methyl_CpG_count;
	    push @match,'Z'; # protected C on opposing strand, methylated, in CpG context
	  }
	  elsif ($upstream_base eq 'N' or $upstream_base eq 'X'){ # if the upstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
	    ++$methyl_C_unknown_count;
	    push @match,'U'; # protected C on opposing strand, methylated, in Unknown context
	  }
	  else{
	    ### C in not in CpG-context, determining the second upstream base context
	    my $second_upstream_base = $genomic[$index];

	    if ($second_upstream_base eq 'C'){
	      ++$methyl_CHG_count;
	      push @match,'X'; # protected C on opposing strand, methylated, in CHG context
	    }
	    elsif ($second_upstream_base eq 'N' or $second_upstream_base eq 'X'){
	      ++$methyl_C_unknown_count; # if the second upstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
	      push @match,'U'; # protected C, methylated, in Unknown context
	    }
	    else{
	      ++$methyl_CHH_count;
	      push @match,'H'; # protected C on opposing strand, methylated, in CHH context
	    }
	  }
	}
	else{
	  push @match, '.';
	}
      }
      elsif ($seq[$index] ne $genomic[$index+2]) {
	### for the methylation call we are only interested in mismatches involving cytosines (in the genomic sequence) which were converted to Ts
	### on the opposing strand, so G to A conversions in the actually observed sequence
	if ($genomic[$index+2] eq 'G' and $seq[$index] eq 'A') {
	  ### If the C residue on the opposing strand was converted to T then we will see an A in the currently observed sequence. We want to know if
	  ### the C on the opposing strand was it was in CpG, CHG or CHH context, therefore we need to look one (or two) bases upstream!

	  my $upstream_base = $genomic[$index+1];

	  if ($upstream_base eq 'C'){
	    ++$unmethylated_CpG_count;
	    push @match,'z'; # converted C on opposing strand, not methylated, in CpG context
	  }
	  elsif ($upstream_base eq 'N' or $upstream_base eq 'X'){ # if the upstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
	    ++$unmethylated_C_unknown_count;
	    push @match,'u'; # converted C on opposing strand, not methylated, in Unknown context
	  }
	  else{
	    ### C in not in CpG-context, determining the second upstream base context
	    my $second_upstream_base = $genomic[$index];

	    if ($second_upstream_base eq 'C'){
	      ++$unmethylated_CHG_count;
	      push @match,'x'; # converted C on opposing strand, not methylated, in CHG context
	    }
	    elsif ($second_upstream_base eq 'N' or $second_upstream_base eq 'X'){
	      ++$unmethylated_C_unknown_count; # if the second upstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
	      push @match,'u'; # converted C on opposing strand, not methylated, in Unknown context
	    }
	    else{
	      ++$unmethylated_CHH_count;
	      push @match,'h'; # converted C on opposing strand, not methylated, in CHH context
	    }
	  }
	}
	### all other mismatches are not of interest for a methylation call
	else {
	  push @match,'.';
	}
      }
      else{
	die "There can be only 2 possibilities\n";
      }
    }
  }
  else{
    die "Strand conversion info is required to perform a methylation call\n";
  }

  my $methylation_call = join ("",@match);

  $counting{total_meCHH_count} += $methyl_CHH_count;
  $counting{total_meCHG_count} += $methyl_CHG_count;
  $counting{total_meCpG_count} += $methyl_CpG_count;
  $counting{total_meC_unknown_count} += $methyl_C_unknown_count;
  $counting{total_unmethylated_CHH_count} += $unmethylated_CHH_count;
  $counting{total_unmethylated_CHG_count} += $unmethylated_CHG_count;
  $counting{total_unmethylated_CpG_count} += $unmethylated_CpG_count;
  $counting{total_unmethylated_C_unknown_count} += $unmethylated_C_unknown_count;

  # print "\n$sequence_actually_observed\n$genomic_sequence\n",@match,"\n$read_conversion\n\n";

  return $methylation_call;
}


sub read_genome_into_memory{

    ## working directoy
    my $cwd = shift;

    ## reading in and storing the specified genome in the %chromosomes hash
    chdir ($genome_folder) or die "Can't move to $genome_folder: $!";
    warn "Now reading in and storing sequence information of the genome specified in: $genome_folder\n\n";

    my @chromosome_filenames =  <*.fa>;

    ### if there aren't any genomic files with the extension .fa we will look for files with the extension .fa.gz
    unless (@chromosome_filenames){
	@chromosome_filenames =  <*.fa.gz>;
    }
    
    ### if there aren't any genomic files with the extension .fa or .fa.gz we will look for files with the extension .fasta
    unless (@chromosome_filenames){
	@chromosome_filenames =  <*.fasta>;
    }
    
    ### if there aren't any genomic files with the extension .fa or .fa.gz or .fasta we will look for files with the extension .fasta.gz
    unless (@chromosome_filenames){
	@chromosome_filenames =  <*.fasta.gz>;
    }
    
    unless (@chromosome_filenames){
	die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions, with or w/o .gz extension)\n";
    }

    my $SQ_count = 0;

    foreach my $chromosome_filename (@chromosome_filenames){
	# warn "Now processing: $chromosome_filename\n";
	if ($chromosome_filename =~ /\.gz$/){
	    open (CHR_IN,"gunzip -c $chromosome_filename |") or die "Failed to read from sequence file $chromosome_filename $!\n";
	}
	else{
	    open (CHR_IN,$chromosome_filename) or die "Failed to read from sequence file $chromosome_filename $!\n";
	}

	### first line needs to be a fastA header
	my $first_line = <CHR_IN>;
	chomp $first_line;
	$first_line =~ s/\r//;
	### Extracting chromosome name from the FastA header
	my $chromosome_name = extract_chromosome_name($first_line);
	if ($chromosome_name eq ''){ # should prevent chromosome name with spaces at the start such as > chr1, > chr2
	    die "Chromosome names must not be empty! Please check that there are no spaces at the start of the FastA header(s) and try again\n\n";
	}
	my $sequence;

	while (<CHR_IN>){
	  chomp;
	  $_ =~ s/\r//; # removing carriage returns if present
	  if ($_ =~ /^>/){

	    ### storing the previous chromosome in the %chromosomes hash, only relevant for Multi-Fasta-Files (MFA)
	    if (exists $chromosomes{$chromosome_name}){
	      print "chr $chromosome_name (",length $sequence ," bp)\n";
	      die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name!\n";
	    }
	    else {
	      if (length($sequence) == 0){
		warn "Chromosome $chromosome_name in the multi-fasta file $chromosome_filename did not contain any sequence information!\n";
	      }
	      print "chr $chromosome_name (",length $sequence ," bp)\n";
	      $chromosomes{$chromosome_name} = $sequence;
	      # warn "$SQ_count\t$chromosome_name\n";
	      ++$SQ_count;
	      $SQ_order{$SQ_count} = $chromosome_name;
	    }
	    ### resetting the sequence variable
	    $sequence = '';
	    ### setting new chromosome name
	    $chromosome_name = extract_chromosome_name($_);
	    if ($chromosome_name eq ''){ # should prevent chromosome name with spaces at the start such as > chr1, > chr2
		die "Chromosome names must not be empty! Please check that there are no spaces at the start of the FastA header(s) and try again.\n\n";
	    }
	  }
	  else{
	    $sequence .= uc$_;
	  }
	}

 	### Processing last chromosome of a multi Fasta File or the only entry in case of single entry FastA files

	if (exists $chromosomes{$chromosome_name}){
	    print "chr $chromosome_name (",length $sequence ," bp)\t";
	    die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\n";
	}
	else{
	    if (length($sequence) == 0){
		warn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\n";
	    }

	    ++$SQ_count;
	    # warn "$SQ_count\t$chromosome_name\n";
	    print "chr $chromosome_name (",length $sequence ," bp)\n";
	    $chromosomes{$chromosome_name} = $sequence;
	    $SQ_order{$SQ_count} = $chromosome_name;
	}
    }
    print "\n";
    chdir $cwd or die "Failed to move to directory $cwd\n";

    ### If no single multi-FastA genome file was specified explicitely we will generate one here and write it to the output directory
    if ($cram){
	unless (defined $cram_ref){
	    warn "Reconstituting a single multi-FastA genome file as CRAM reference (you may specify such a file using --cram_ref <file> explicitely to prevent this behaviour)\n";

	    $cram_ref = "${output_dir}Bismark_genome_CRAM_reference.mfa";
	    warn "Writing multi-FastA file to $cram_ref\n";
	    open (REF,'>',"$cram_ref") or die "Failed to write to file $cram_ref\n";
	    foreach my $chr(keys %chromosomes){
		print REF ">$chr\n$chromosomes{$chr}\n";
	    }
	    warn "Complete\n";
	    close REF or warn "Failed to close filehandle REF: $!\n";
	}

    }


}

sub extract_chromosome_name {
    ## Bowtie seems to extract the first string after the inition > in the FASTA file, so we are doing this as well
    my $fasta_header = shift;
    if ($fasta_header =~ s/^>//){
	my ($chromosome_name) = split (/\s+/,$fasta_header);
	return $chromosome_name;
    }
    else{
	die "The specified chromosome ($fasta_header) file doesn't seem to be in FASTA format as required!\n";
    }
}

sub reverse_complement{
	my $sequence = shift;
	$sequence =~ tr/CATG/GTAC/;
	$sequence = reverse($sequence);
	return $sequence;
}


sub biTransformFastAFiles {
	my $file = shift;
	my ($dir,$filename);
	if ($file =~ /\//){
		($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $file;
	}

	### gzipped version of the infile
	if ($file =~ /\.gz$/){
		open (IN,"gunzip -c $file |") or die "Couldn't read from file $file: $!\n";
	}
	else{
		open (IN,$file) or die "Couldn't read from file $file: $!\n";
	}

	if ($skip){
		warn "Skipping the first $skip reads from $file\n";
		sleep (1);
	}
	if ($upto){
		warn "Processing reads up to sequence no. $upto from $file\n";
		sleep (1);
	}

	my $C_to_T_infile = my $G_to_A_infile = $filename;

	if ($gzip){
		$C_to_T_infile =~ s/$/_C_to_T.fa.gz/;
		$G_to_A_infile =~ s/$/_G_to_A.fa.gz/;
	}	
	else{
		$C_to_T_infile =~ s/$/_C_to_T.fa/;
		$G_to_A_infile =~ s/$/_G_to_A.fa/;
	}	

	if ($prefix){
		#  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
		$C_to_T_infile = "$prefix.$C_to_T_infile";
		$G_to_A_infile = "$prefix.$G_to_A_infile";
		#  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
	}
	
	if ($pbat){ # PBAT-Seq
		warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
		if ($gzip){
			open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!";
		}
		else{
			open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!";
		}
	}
	else{
		warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
		if ($gzip){
			open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!";
		}
		else{
			open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!";
		}

		unless ($directional){
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			if ($gzip){
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!";
			}
			else{
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!";
			}
		}
	}
	
	my $count = 0;

	while (1){
		my $header = <IN>;
		my $sequence= <IN>;
		last unless ($header and $sequence);
		
		chomp $header;
		$header = fix_IDs($header); # this is to avoid problems with truncated read ID when they contain white spaces
		$header .= "\n";
		
		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$sequence = uc$sequence; # make input file case insensitive

		# detecting if the input file contains tab stops, as this is likely to result in no alignments
		if (index($header,"\t") != -1){
			$seqID_contains_tabs++;
		}

		### small check if the sequence seems to be in FastA format
		die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>.*/);

		if ($pbat){
			my $sequence_G_to_A = $sequence;
			$sequence_G_to_A =~ tr/G/A/;
			print GTOA "$header$sequence_G_to_A";
		}
		else{ # directional or non-directional
			my $sequence_C_to_T = $sequence;
			$sequence_C_to_T =~ tr/C/T/;
			print CTOT "$header$sequence_C_to_T";

			unless ($directional){
				my $sequence_G_to_A = $sequence;
				$sequence_G_to_A =~ tr/G/A/;
				print GTOA "$header$sequence_G_to_A";
			}
		}
	}	
	
	if ($directional){
		close CTOT or die "Failed to close filehandle $!";
		warn "\nCreated C -> T converted versions of the FastA file $filename ($count sequences in total)\n\n";
	}
	elsif($pbat){
		warn "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
		close GTOA or die "Failed to close filehandle $!";
		return ($G_to_A_infile);
	}
	else{
		close CTOT or die "Failed to close filehandle $!";
		close GTOA or die "Failed to close filehandle $!";
		warn "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
	}
	return ($C_to_T_infile,$G_to_A_infile);
}

sub biTransformFastAFiles_paired_end {
	my ($file,$read_number) = @_;

	if ($gzip){
		warn "GZIP compression of temporary files is not supported for paired-end FastA data. Continuing to write uncompressed files\n";
		sleep (2);
	}

	my ($dir,$filename);
	if ($file =~ /\//){
		($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $file;
	}

	### gzipped version of the infile
	if ($file =~ /\.gz$/){
		open (IN,"gunzip -c $file |") or die "Couldn't read from file $file: $!\n";
	}
	else{
		open (IN,$file) or die "Couldn't read from file $file: $!\n";
	}

	if ($skip){
		warn "Skipping the first $skip reads from $file\n";
		sleep (1);
	}
	if ($upto){
		warn "Processing reads up to sequence no. $upto from $file\n";
		sleep (1);
	}

	my $C_to_T_infile = my $G_to_A_infile = $filename;

	$C_to_T_infile =~ s/$/_C_to_T.fa/;
	$G_to_A_infile =~ s/$/_G_to_A.fa/;

	if ($prefix){
		#  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
		$C_to_T_infile = "$prefix.$C_to_T_infile";
		$G_to_A_infile = "$prefix.$G_to_A_infile";
		#  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
	}

	if ($directional){
		if ($read_number == 1){
			warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
			open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
		}
		elsif ($read_number == 2){
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
		}
		else{
			die "Read number needs to be 1 or 2, but was: $read_number\n\n";
		}	
	}
	elsif($pbat){
		if ($read_number == 1){
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
		}
		elsif ($read_number == 2){
			warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
			open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
		}
		else{
			die "Read number needs to be 1 or 2, but was: $read_number\n\n";
		}	
	}
	else{ # non-directional all four strand output
		warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
		warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
		open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
		open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
	}

	my $count = 0;

	while (1){
		my $header = <IN>;
		my $sequence= <IN>;
		last unless ($header and $sequence);
		
		chomp $header;
		$header = fix_IDs($header); # this is to avoid problems with truncated read ID when they contain white spaces
		$header .= "\n";
		
		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$sequence = uc$sequence; # make input file case insensitive

		# detecting if the input file contains tab stops, as this is likely to result in no alignments
		if (index($header,"\t") != -1){
			$seqID_contains_tabs++;
		}

		## small check if the sequence seems to be in FastA format
		die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>/);

		if ($read_number == 1){
			$header =~ s/$/\/1\/1/;
		}
		elsif ($read_number == 2){
			$header =~ s/$/\/2\/2/;
		}
		else{
			die "Read number needs to be 1 or 2, but was: $read_number\n\n";
		}
		
		my $sequence_C_to_T = my $sequence_G_to_A = $sequence;

		
		if ($directional){
			if ($read_number == 1){
				$sequence_C_to_T =~ tr/C/T/;
				print CTOT "$header$sequence_C_to_T";
			}
			elsif ($read_number == 2){
				$sequence_G_to_A =~ tr/G/A/;	
				print GTOA "$header$sequence_G_to_A";
			}
		}
		elsif($pbat){
			if ($read_number == 1){
				$sequence_G_to_A =~ tr/G/A/;	
				print GTOA "$header$sequence_G_to_A";
			}
			elsif ($read_number == 2){
				$sequence_C_to_T =~ tr/C/T/;
				print CTOT "$header$sequence_C_to_T";
			}
		}
		else{
			$sequence_C_to_T =~ tr/C/T/;
			$sequence_G_to_A =~ tr/G/A/;
			print CTOT "$header$sequence_C_to_T";
			print GTOA "$header$sequence_G_to_A";
		}
	}

	if ($directional){
		if ($read_number == 1){
			warn "\nCreated C -> T converted version of the FastA file $filename ($count sequences in total)\n\n";
			close CTOT or die "Failed to close filehandle $!\n";
			return ($C_to_T_infile,$count);
		}
		else{
			warn "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
			close GTOA or die "Failed to close filehandle $!\n";
			return ($G_to_A_infile,$count);
		}
	}
	elsif($pbat){
		if ($read_number == 1){
			warn "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
			close GTOA or die "Failed to close filehandle $!\n";
			return ($G_to_A_infile,$count);
		}
		else{
			warn "\nCreated C -> T converted version of the FastA file $filename ($count sequences in total)\n\n";
			close CTOT or die "Failed to close filehandle $!\n";
			return ($C_to_T_infile,$count);
		}
	}
	else{
		warn "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
		close CTOT or die "Failed to close filehandle $!\n";
		close GTOA or die "Failed to close filehandle $!\n";
		return ($C_to_T_infile,$G_to_A_infile,$count);
	}
}

sub biTransformFastQFiles {
	my $file = shift;
	my ($dir,$filename);
	if ($file =~ /\//){
		($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $file;
	}

	### gzipped version of the infile
	if ($file =~ /\.gz$/){
		open (IN,"gunzip -c $file |") or die "Couldn't read from file $file: $!\n";
	}
	else{
		open (IN,$file) or die "Couldn't read from file $file: $!\n";
	}

	if ($skip){
		warn "Skipping the first $skip reads from $file\n";
	}
	if ($upto){
		warn "Processing reads up to sequence no. $upto from $file\n";
	}	

	my $C_to_T_infile = my $G_to_A_infile = $filename;

	if ($prefix){
		# warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
		$C_to_T_infile = "$prefix.$C_to_T_infile";
		$G_to_A_infile = "$prefix.$G_to_A_infile";
		# warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
	}

	if ($pbat){ # PBAT-Seq
		if ($gzip){
			$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
		}
		else{
			$G_to_A_infile =~ s/$/_G_to_A.fastq/;
		}

		warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";

		if ($gzip){
			open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
		}
		else{
			open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
		}
	}
	else{ # directional or non-directional
		if ($gzip){
			$C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
		}
		else{
			$C_to_T_infile =~ s/$/_C_to_T.fastq/;
		}

		warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";

		if ($gzip){
			open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
		}
		else{
			open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n"; # uncompressed option
		}

		unless ($directional){
			if ($gzip){
				$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
			}
			else{
				$G_to_A_infile =~ s/$/_G_to_A.fastq/;
			}

			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";

			if ($gzip){
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
			}
		}
	}

	my $count = 0;
	while (1){
		my $identifier = <IN>;
		my $sequence = <IN>;
		my $identifier2 = <IN>;
		my $quality_score = <IN>;
		last unless ($identifier and $sequence and $identifier2 and $quality_score);
		
		chomp $identifier;
		$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier .= "\n";
		
		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$sequence = uc$sequence; # make input file case insensitive
		if (defined $maximum_length_cutoff){
			# $maximum_length_cutoff  defined globally for mm2 runs
			if (length$sequence > $maximum_length_cutoff){
				print "Removing this sequence with length ",length$sequence,"\n";
				next;
			}
		}

		# detecting if the input file contains tab stops, as this is likely to result in no alignments
		if (index($identifier,"\t") != -1){
			$seqID_contains_tabs++;
		}

		## small check if the sequence file appears to be a FastQ file
		if ($count == 1){
			if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
				die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
			}
		}

		if ($pbat){
			my $sequence_G_to_A = $sequence;
			$sequence_G_to_A =~ tr/G/A/;
			print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
		}
		else{ # directional or non-directional
			my $sequence_C_to_T = $sequence;
			$sequence_C_to_T =~ tr/C/T/;
			print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);

			unless ($directional){
				my $sequence_G_to_A = $sequence;
				$sequence_G_to_A =~ tr/G/A/;
				print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
			}
		}
	}

	if ($directional){
		close CTOT or die "Failed to close filehandle $!\n";
		warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
	}
	elsif($pbat){
		warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
		close GTOA or die "Failed to close filehandle $!\n";
		return ($G_to_A_infile);
	}
	else{
		close CTOT or die "Failed to close filehandle $!\n";
		close GTOA or die "Failed to close filehandle $!\n";
		warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
	}
	return ($C_to_T_infile,$G_to_A_infile);
}

sub biTransformFastQFiles_slam{
	my $file = shift;
	my ($dir,$filename);
	if ($file =~ /\//){
		($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $file;
	}

	### gzipped version of the infile
	if ($file =~ /\.gz$/){
		open (IN,"gunzip -c $file |") or die "Couldn't read from file $file: $!\n";
	}
	else{
		open (IN,$file) or die "Couldn't read from file $file: $!\n";
	}

	if ($skip){
		warn "Skipping the first $skip reads from $file\n";
	}
	if ($upto){
		warn "Processing reads up to sequence no. $upto from $file\n";
	}	

	my $C_to_T_infile = my $G_to_A_infile = $filename;

	if ($prefix){
		# warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
		$C_to_T_infile = "$prefix.$C_to_T_infile";
		$G_to_A_infile = "$prefix.$G_to_A_infile";
		# warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
	}

	if ($pbat){ # PBAT-Seq
		if ($gzip){
			$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
		}
		else{
			$G_to_A_infile =~ s/$/_G_to_A.fastq/;
		}

		warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";

		if ($gzip){
			open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
		}
		else{
			open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
		}
	}
	else{ # directional or non-directional
		if ($gzip){
			$C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
		}
		else{
			$C_to_T_infile =~ s/$/_C_to_T.fastq/;
		}

		warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";

		if ($gzip){
			open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
		}
		else{
			open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n"; # uncompressed option
		}

		unless ($directional){
			if ($gzip){
				$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
			}
			else{
				$G_to_A_infile =~ s/$/_G_to_A.fastq/;
			}

			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";

			if ($gzip){
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
			}
		}
	}

	my $count = 0;
	while (1){
		my $identifier = <IN>;
		my $sequence = <IN>;
		my $identifier2 = <IN>;
		my $quality_score = <IN>;
		last unless ($identifier and $sequence and $identifier2 and $quality_score);
		
		chomp $identifier;
		$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier .= "\n";
		
		++$count;

		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$sequence = uc$sequence; # make input file case insensitive

		# detecting if the input file contains tab stops, as this is likely to result in no alignments
		if (index($identifier,"\t") != -1){
			$seqID_contains_tabs++;
		}

		## small check if the sequence file appears to be a FastQ file
		if ($count == 1){
			if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
				die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
			}
		}

		if ($pbat){
			my $sequence_G_to_A = $sequence;
			$sequence_G_to_A =~ tr/A/G/;  # changed for SLAM-Seq
			print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
		}
		else{ # directional or non-directional
			my $sequence_C_to_T = $sequence;
			$sequence_C_to_T =~ tr/T/C/;  # changed for SLAM-Seq
			print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);

			unless ($directional){
				my $sequence_G_to_A = $sequence;
				$sequence_G_to_A =~ tr/A/G/; # changed for SLAM-Seq
				print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
			}
		}
	}

	if ($directional){
		close CTOT or die "Failed to close filehandle $!\n";
		warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
	}
	elsif($pbat){
		warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
		close GTOA or die "Failed to close filehandle $!\n";
		return ($G_to_A_infile);
	}
	else{
		close CTOT or die "Failed to close filehandle $!\n";
		close GTOA or die "Failed to close filehandle $!\n";
		warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
	}
	return ($C_to_T_infile,$G_to_A_infile);
}

sub biTransformFastQFiles_paired_end {
	my ($file,$read_number) = @_;
	my ($dir,$filename);

	if ($file =~ /\//){
		($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $file;
	}

	### gzipped version of the infile
	if ($file =~ /\.gz$/){
		open (IN,"gunzip -c $file |") or die "Couldn't read from file $file: $!\n";
	}
	else{
		open (IN,$file) or die "Couldn't read from file $file: $!\n";
	}

	if ($skip){
		warn "Skipping the first $skip reads from $file\n";
	}
	if ($upto){
		warn "Processing reads up to sequence no. $upto from $file\n";
	}
		
	my $C_to_T_infile = my $G_to_A_infile = $filename;

	if ($gzip){
		$C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
		$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
	}
	else{
		$C_to_T_infile =~ s/$/_C_to_T.fastq/;
		$G_to_A_infile =~ s/$/_G_to_A.fastq/;
	}

	if ($prefix){
		#  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
		$C_to_T_infile = "$prefix.$C_to_T_infile";
		$G_to_A_infile = "$prefix.$G_to_A_infile";
		#  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
	}
	
	if ($pbat){
		if ($read_number == 1){
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			if ($gzip){
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
			}
		}
		elsif ($read_number == 2){
			warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
			if ($gzip){
				open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
			}
		}
		else{
			die "Read number needs to be 1 or 2, but was $read_number!\n\n";
		}		
	}
	else{
		if ($directional){
			if ($read_number == 1){
				warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
				if ($gzip){
					open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
				}
				else{
					open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
				}
			}
			elsif ($read_number == 2){
				warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
				if ($gzip){
					open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
				}
				else{
					open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
				}
			}
			else{
				die "Read number needs to be 1 or 2, but was $read_number!\n\n";
			}
		}
		else{ # non-directional
			warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			if ($gzip){
				open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
			}
		}
	}

	my $count = 0;
	while (1){
		my $identifier = <IN>;
		my $sequence = <IN>;
		my $identifier2 = <IN>;
		my $quality_score = <IN>;
		last unless ($identifier and $sequence and $identifier2 and $quality_score);
		++$count;

		chomp $identifier;
		$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier .= "\n";
		
		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$sequence= uc$sequence; # make input file case insensitive

		## small check if the sequence file appears to be a FastQ file
		if ($count == 1){
			if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
				die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
			}	
		}
		my $sequence_C_to_T = my $sequence_G_to_A = $sequence;

		if ($read_number == 1){
			if ($mm2){
				$identifier =~ s/$/\/1/; # minimap2 does not remove the last /1 /2 automatically
			}
			else{
				$identifier =~ s/$/\/1\/1/;
			}
		}
		elsif ($read_number == 2){
			if ($mm2){
				$identifier =~ s/$/\/2/; # minimap2 does not remove the last /1 /2 automatically
			}
			else{
				$identifier =~ s/$/\/2\/2/;
			}
		}
		else{
			die "Read number needs to be 1 or 2\n";
		}

		if ($pbat){
			if ($read_number == 1){
				$sequence_G_to_A =~ tr/G/A/;
				print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
			}
			else{
				$sequence_C_to_T =~ tr/C/T/;
				print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
			}
		}	
		else{
			if ($directional){
				if ($read_number == 1){
					$sequence_C_to_T =~ tr/C/T/;
					print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
				}
				else{
					$sequence_G_to_A =~ tr/G/A/;
					print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
				}	
			}
			else{
				$sequence_C_to_T =~ tr/C/T/;
				$sequence_G_to_A =~ tr/G/A/;			
				print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
				print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
			}
		}
	}

	if ($directional){
		if ($read_number == 1){
			warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close CTOT or die "Failed to close filehandle $!\n";
			return ($C_to_T_infile,$count); # passing back the number of transliterated sequences so we can make sure R1 and R2 files have the same length
		}
		else{
			warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close GTOA or die "Failed to close filehandle $!\n";
			return ($G_to_A_infile,$count);
		}
	}
	elsif($pbat){
		if($read_number == 1){
			warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close GTOA or die "Failed to close filehandle $!\n";
			return ($G_to_A_infile,$count);
		}
		else{
			warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close CTOT or die "Failed to close filehandle $!\n";
			return ($C_to_T_infile,$count); # passing back the number of transliterated sequences so we can make sure R1 and R2 files have the same length
		}
	}
	else{
		warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
		close CTOT or die "Failed to close filehandle $!\n";
		close GTOA or die "Failed to close filehandle $!\n";
		return ($C_to_T_infile,$G_to_A_infile,$count);  # passing back the number of transliterated sequences so we can make sure R1 and R2 files have the same length
	}
}

sub biTransformFastQFiles_paired_end_slam {
	my ($file,$read_number) = @_;
	my ($dir,$filename);

	if ($file =~ /\//){
		($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
	}
	else{
		$filename = $file;
	}

	### gzipped version of the infile
	if ($file =~ /\.gz$/){
		open (IN,"gunzip -c $file |") or die "Couldn't read from file $file: $!\n";
	}
	else{
		open (IN,$file) or die "Couldn't read from file $file: $!\n";
	}

	if ($skip){
		warn "Skipping the first $skip reads from $file\n";
	}
	if ($upto){
		warn "Processing reads up to sequence no. $upto from $file\n";
	}
		
	my $C_to_T_infile = my $G_to_A_infile = $filename;

	if ($gzip){
		$C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
		$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
	}
	else{
		$C_to_T_infile =~ s/$/_C_to_T.fastq/;
		$G_to_A_infile =~ s/$/_G_to_A.fastq/;
	}

	if ($prefix){
		#  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
		$C_to_T_infile = "$prefix.$C_to_T_infile";
		$G_to_A_infile = "$prefix.$G_to_A_infile";
		#  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
	}
	
	if ($pbat){
		if ($read_number == 1){
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			if ($gzip){
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
			}
		}
		elsif ($read_number == 2){
			warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
			if ($gzip){
				open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
			}
		}
		else{
			die "Read number needs to be 1 or 2, but was $read_number!\n\n";
		}		
	}
	else{
		if ($directional){
			if ($read_number == 1){
				warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
				if ($gzip){
					open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
				}
				else{
					open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
				}
			}
			elsif ($read_number == 2){
				warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
				if ($gzip){
					open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
				}
				else{
					open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
				}
			}
			else{
				die "Read number needs to be 1 or 2, but was $read_number!\n\n";
			}
		}
		else{ # non-directional
			warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
			warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
			if ($gzip){
				open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
				open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
			}
			else{
				open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
				open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
			}
		}
	}

	my $count = 0;
	while (1){
		my $identifier = <IN>;
		my $sequence = <IN>;
		my $identifier2 = <IN>;
		my $quality_score = <IN>;
		last unless ($identifier and $sequence and $identifier2 and $quality_score);
		++$count;
		
		chomp $identifier;
		$identifier = fix_IDs($identifier); # this is to avoid problems with truncated read ID when they contain white spaces
		$identifier .= "\n";
		
		if ($skip){
			next unless ($count > $skip);
		}
		if ($upto){
			last if ($count > $upto);
		}

		$sequence= uc$sequence; # make input file case insensitive

		## small check if the sequence file appears to be a FastQ file
		if ($count == 1){
			if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
				die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
			}	
		}
		my $sequence_C_to_T = my $sequence_G_to_A = $sequence;

		if ($read_number == 1){
			$identifier =~ s/$/\/1\/1/;
		}
		elsif ($read_number == 2){
			$identifier =~ s/$/\/2\/2/;
		}
		else{
			die "Read number needs to be 1 or 2\n";
		}

		if ($pbat){
			if ($read_number == 1){
				$sequence_G_to_A =~ tr/A/G/; # changed for SLAM-Seq
				print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
			}
			else{
				$sequence_C_to_T =~ tr/T/C/; # changed for SLAM-Seq
				print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
			}
		}	
		else{
			if ($directional){
				if ($read_number == 1){
					$sequence_C_to_T =~ tr/T/C/; # changed for SLAM-Seq
					print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
				}
				else{
					$sequence_G_to_A =~ tr/A/G/; # changed for SLAM-Seq
					print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
				}	
			}
			else{
				$sequence_C_to_T =~ tr/T/C/;   	 # changed for SLAM-Seq
				$sequence_G_to_A =~ tr/A/G/;	 # changed for SLAM-Seq		
				print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
				print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
			}
		}
	}

	if ($directional){
		if ($read_number == 1){
			warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close CTOT or die "Failed to close filehandle $!\n";
			return ($C_to_T_infile,$count); # passing back the number of transliterated sequences so we can make sure R1 and R2 files have the same length
		}
		else{
			warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close GTOA or die "Failed to close filehandle $!\n";
			return ($G_to_A_infile,$count);
		}
	}
	elsif($pbat){
		if($read_number == 1){
			warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close GTOA or die "Failed to close filehandle $!\n";
			return ($G_to_A_infile,$count);
		}
		else{
			warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
			close CTOT or die "Failed to close filehandle $!\n";
			return ($C_to_T_infile,$count); # passing back the number of transliterated sequences so we can make sure R1 and R2 files have the same length
		}
	}
	else{
		warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
		close CTOT or die "Failed to close filehandle $!\n";
		close GTOA or die "Failed to close filehandle $!\n";
		return ($C_to_T_infile,$G_to_A_infile,$count);  # passing back the number of transliterated sequences so we can make sure R1 and R2 files have the same length
	}
}


sub fix_IDs{
	my $id = shift;
	# warn "Got $id\n";
	if ($icpc){ # added 13 03 2019; see https://github.com/FelixKrueger/Bismark/issues/236
		$id =~ s/[ \t].*$//g; # truncating read ID at the first space or tab
	}
	else{
		$id =~ s/[ \t]+/_/g; # replace spaces or tabs with underscores
	}
	# warn "returning $id\n"; sleep(1);
	return $id;
}


#####################################################################################################################################################


### Bowtie 2 | PAIRED-END | FASTA
sub paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
	my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
	if ($directional){
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
	}
	else{
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
	}

	## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
	## data structure above
	if ($directional){
		warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {

		if ($directional){
			unless ($fh->{inputfile_1}){
				$fh->{last_seq_id} = undef;
				$fh->{last_line_1} = undef;
				$fh->{last_line_2} = undef;
				next;
			}
		}

		my $bt2_options = $aligner_options;
		if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
			$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$bt2_options .= ' --nofw';
		}

		warn "Now starting a Bowtie 2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt2_options))\n";
		open ($fh->{fh},"$path_to_bowtie $bt2_options -x $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";

		### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else{
				last; # no alignment output
			}
		}

		my $line_1 = $_;
		my $line_2 = $fh->{fh}->getline();

		# if Bowtie produces an alignment we store the first line of the output
		if ($line_1 and $line_2) {
			chomp $line_1;
			chomp $line_2;
			my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
			my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line

			### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
			### We will thus identify which sequence was read 1 and store this ID as last_seq_id

			if ($id_1 =~ s/\/1$//){ # removing the read 1 /1 tag if present (remember that Bowtie2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
				$fh->{last_seq_id} = $id_1;
			}
			elsif ($id_2 =~ s/\/1$//){ # removing the read 1 /2 tag if present
				$fh->{last_seq_id} = $id_2;
			}
			else{
				warn "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
			}

			$fh->{last_line_1} = $line_1; # this contains either read 1 or read 2
			$fh->{last_line_2} = $line_2; # this contains either read 1 or read 2
			warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
		}
		# otherwise we just initialise last_seq_id and last_lines as undefined
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line_1} = undef;
			$fh->{last_line_2} = undef;
		}
	}
}

### HISAT2 | PAIRED-END | FASTA
sub paired_end_align_fragments_to_bisulfite_genome_fastA_hisat2 {
	my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
	if ($directional){
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
	}
	else{
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
	}

	## Now starting up to 4 instances of HISAT2 feeding in the converted sequence files and reading in the first line of the output, and storing it in the
	## data structure above
	if ($directional){
		warn "Now running 2 instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {

		if ($directional){
			unless ($fh->{inputfile_1}){
				$fh->{last_seq_id} = undef;
				$fh->{last_line_1} = undef;
				$fh->{last_line_2} = undef;
				next;
			}
		}

		my $hisat2_options = $aligner_options;
		if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
			$hisat2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$hisat2_options .= ' --nofw';
		}

		warn "Now starting a HISAT2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $hisat2_options))\n";
		open ($fh->{fh},"$path_to_hisat2 $hisat2_options -x $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to HISAT2: $!";

		### HISAT2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else{
				last; # no alignment output
			}
		}

		my $line_1 = $_;
		my $line_2 = $fh->{fh}->getline();

		# if HISAT2 produces an alignment we store the first line of the output
		if ($line_1 and $line_2) {
			chomp $line_1;
			chomp $line_2;
			my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first HISAT2 output line (= the sequence identifier)
			my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second HSIAT2 output line

			### HISAT2 always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
			### We will thus identify which sequence was read 1 and store this ID as last_seq_id

			if ($id_1 =~ s/\/1$//){ # removing the read 1 /1 tag if present (remember that HISAT2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
				$fh->{last_seq_id} = $id_1;
			}
			elsif ($id_2 =~ s/\/1$//){ # removing the read 1 /2 tag if present
				$fh->{last_seq_id} = $id_2;
			}
			else{
				warn "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
			}

			$fh->{last_line_1} = $line_1; # this contains either read 1 or read 2
			$fh->{last_line_2} = $line_2; # this contains either read 1 or read 2
			warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
		}
		# otherwise we just initialise last_seq_id and last_lines as undefined
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line_1} = undef;
			$fh->{last_line_2} = undef;
		}
	}
}


### Bowtie 2 | PAIRED-END | FASTQ
sub paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {

	my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
	if ($directional){
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
	}
	elsif ($pbat){
		warn "Input files are $G_to_A_infile_1 and $C_to_T_infile_2 (FastQ)\n";
	}
	else{
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
	}

	## Now starting up 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
	## data structure above
	if ($directional or $pbat){
		warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {

		if ($directional or $pbat){ # skipping unwanted filehandles
			unless ($fh->{inputfile_1}){
			$fh->{last_seq_id} = undef;
			$fh->{last_line_1} = undef;
			$fh->{last_line_2} = undef;
			next;
		}
	}

	my $bt2_options = $aligner_options;
	if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
		$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
	}
	else {
		$bt2_options .= ' --nofw';
	}

	warn "Now starting a Bowtie 2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt2_options))\n";
	open ($fh->{fh},"$path_to_bowtie $bt2_options -x $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";

	### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
	while (1){
		$_ = $fh->{fh}->getline();
		if ($_) {
			last unless ($_ =~ /^\@/); # SAM headers start with @
		}
		else{
			last; # no alignment output
		}
	}

    my $line_1 = $_;
    my $line_2 = $fh->{fh}->getline();

    # if Bowtie2 produces an alignment we store the first line of the output
    if ($line_1 and $line_2) {
		chomp $line_1;
		chomp $line_2;
		### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
		### We will thus identify which sequence was read 1 and store this ID as last_seq_id

		my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first bowtie output line (= the sequence identifier)
		my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second bowtie output line
		# warn "$id_1\n$id_2\n";sleep(1);
		if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present (remember that Bowtie2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
			$fh->{last_seq_id} = $id_1;
		}
		elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
			$fh->{last_seq_id} = $id_2;
		}
		else{
			die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
		}
		# warn "$id_1\n$id_2\n";sleep(1);
		$fh->{last_line_1} = $line_1; # this contains read 1 or read 2
		$fh->{last_line_2} = $line_2; # this contains read 1 or read 2
		# warn "Set last_seq_id as: $fh->{last_seq_id}\n";
		warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
	}
	# otherwise we just initialise last_seq_id and last_lines as undefined
	else {
		warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line_1} = undef;
			$fh->{last_line_2} = undef;
		}
	}
}

### HISAT2   | PAIRED-END | FASTQ
sub paired_end_align_fragments_to_bisulfite_genome_fastQ_hisat2 {

	my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
	if ($directional){
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
	}
	elsif ($pbat){
		warn "Input files are $G_to_A_infile_1 and $C_to_T_infile_2 (FastQ)\n";
	}
	else{
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
	}

	## Now starting up 4 instances of HISAT2 feeding in the converted sequence files and reading in the first line of the output, and storing it in the
	## data structure above
	if ($directional or $pbat){
		warn "Now running 2 instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {

		if ($directional or $pbat){ # skipping unwanted filehandles
			unless ($fh->{inputfile_1}){
				$fh->{last_seq_id} = undef;
				$fh->{last_line_1} = undef;
				$fh->{last_line_2} = undef;
				next;
			}
		}

		my $hisat2_options = $aligner_options;
		if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
			$hisat2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$hisat2_options .= ' --nofw';
		}

		warn "Now starting a HISAT2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $hisat2_options))\n";
		open ($fh->{fh},"$path_to_hisat2 $hisat2_options -x $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to HISAT2: $!";

		### HISAT2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else{
				last; # no alignment output
			}
		}

		my $line_1 = $_;
		my $line_2 = $fh->{fh}->getline();

		# if HISAT2 produces an alignment we store the first line of the output
		if ($line_1 and $line_2) {
			chomp $line_1;
			chomp $line_2;
			
			# Need to make sure HISAT2 does the same
			### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
			### We will thus identify which sequence was read 1 and store this ID as last_seq_id

			my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first HISAT2 output line (= the sequence identifier)
			my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second HISAT2 output line
			
			if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present (remember that HISAT2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
				$fh->{last_seq_id} = $id_1;
			}
			elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
				$fh->{last_seq_id} = $id_2;
			}
			else{
				die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
			}
			
			$fh->{last_line_1} = $line_1; # this contains read 1 or read 2
			$fh->{last_line_2} = $line_2; # this contains read 1 or read 2
			# warn "Set last_seq_id as: $fh->{last_seq_id}\n";
			warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
		}
		# otherwise we just initialise last_seq_id and last_lines as undefined
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line_1} = undef;
			$fh->{last_line_2} = undef;
		}
	}
}


### MINIMAP2   | PAIRED-END | FASTQ
sub paired_end_align_fragments_to_bisulfite_genome_fastQ_minimap2 {

	my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
	if ($directional){
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
	}
	elsif ($pbat){
		warn "Input files are $G_to_A_infile_1 and $C_to_T_infile_2 (FastQ)\n";
	}
	else{
		warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
	}

	## Now starting up 4 instances of minimap2 feeding in the converted sequence files and reading in the first line of the output, and storing it

	if ($directional or $pbat){
		warn "Now running 2 instances of minimap2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of minimap2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	
	foreach my $fh (@fhs) {

		if ($directional or $pbat){ # skipping unwanted filehandles
			unless ($fh->{inputfile_1}){
				$fh->{last_seq_id} = undef;
				$fh->{last_line_1} = undef;
				$fh->{last_line_2} = undef;
				next;
			}
		}

		my $mm2_options = $aligner_options;
		# Have yet to find out whether this is relevant at all
		# if ($fh->{name} eq 'CTread1GAread2CTgenome' or $fh->{name} eq 'GAread1CTread2GAgenome'){
		# 	$hisat2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		# }
		# else {
		# 	$hisat2_options .= ' --nofw';
		# }
		# The minimap index requires the entire name of the index (including .mmi)
		my $mmi = $fh->{bisulfiteIndex}.".mmi";
		warn "Using minimap2 index: $mmi\n\n";

		warn "Now starting a minimap2 paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $mm2_options))\n";
		open ($fh->{fh},"$path_to_minimap2 $mm2_options $mmi $temp_dir$fh->{inputfile_1} $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to minimap2: $!";

		### minimap2 outputs out SAM format (option -a), so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else{
				last; # no alignment output
			}
		}

		my $line_1 = $_;
		my $line_2 = $fh->{fh}->getline();

		# we store the first line of the output (unmapped reads should also produce a SAM line)
		if ($line_1 and $line_2) {
			chomp $line_1;
			chomp $line_2;
			
			# Need to make sure minimap2 does the same
			### Bowtie always reports the alignment with the smaller chromosomal position first. This can be either sequence 1 or sequence 2.
			### We will thus identify which sequence was read 1 and store this ID as last_seq_id

			my $id_1 = (split(/\t/,$line_1))[0]; # this is the first element of the first minimap2 output line (= the sequence identifier)
			my $id_2 = (split(/\t/,$line_2))[0]; # this is the first element of the second minimap2 output line
			
			# TODO: Need to check this.
			warn "$id_1\n$id_2\n";sleep(1);
 			if ($id_1 =~ s/\/1$//){ # removing the read 1 tag if present (remember that HISAT2 clips off /1 or /2 line endings itself, so we added /1/1 or /2/2 to start with
				$fh->{last_seq_id} = $id_1;
			}
			elsif ($id_2 =~ s/\/1$//){ # removing the read 1 tag if present
				$fh->{last_seq_id} = $id_2;
			}
			else{
				die "Either the first or the second id need to be read 1! ID1 was: $id_1; ID2 was: $id_2\n";
			}
			warn "$id_1\n$id_2\n";sleep(1);

			$fh->{last_line_1} = $line_1; # this contains read 1 or read 2
			$fh->{last_line_2} = $line_2; # this contains read 1 or read 2
			warn "Set last_seq_id as: $fh->{last_seq_id}\n";
			warn "Found first alignment:\n$fh->{last_line_1}\n$fh->{last_line_2}\n";
		}
		# otherwise we just initialise last_seq_id and last_lines as undefined
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line_1} = undef;
			$fh->{last_line_2} = undef;
		}
	}
}


#####################################################################################################################################################

### Bowtie 2 | SINGLE-END | FASTA
sub single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
	my ($C_to_T_infile,$G_to_A_infile) = @_;
	if ($directional){
		warn "Input file is $C_to_T_infile (FastA)\n";
	}
	else{
		warn "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
	}

	## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
	## data structure above
	if ($directional){
		warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {

		my $bt2_options = $aligner_options;
		if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
			$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$bt2_options .= ' --nofw';
		}

		warn "Now starting the Bowtie 2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt2_options)\n";
		open ($fh->{fh},"$path_to_bowtie $bt2_options -x $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie 2: $!";

		### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else{
				last; # no alignment output
			}
		}

		# Bowtie 2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
		if ($_) {
			chomp;
			my $id = (split(/\t/))[0]; # this is the first element of the Bowtie output (= the sequence identifier)
			$fh->{last_seq_id} = $id;
			$fh->{last_line} = $_;
			warn "Found first alignment:\t$fh->{last_line}\n";
		}
		# otherwise we just initialise last_seq_id and last_line as undefinded. This should only happen at the end of a file for Bowtie 2 output
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line} = undef;
		}
	}
}

### HISAT2   | SINGLE-END | FASTA
sub single_end_align_fragments_to_bisulfite_genome_fastA_hisat2 {
	my ($C_to_T_infile,$G_to_A_infile) = @_;
	if ($directional){
		warn "Input file is $C_to_T_infile (FastA)\n";
	}
	else{
		warn "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
	}

	## Now starting up to 4 instances of HISAT2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
	## data structure above
	if ($directional){
		warn "Now running 2 instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {

		my $hisat2_options = $aligner_options;
		if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
			$hisat2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$hisat2_options .= ' --nofw';
		}

		warn "Now starting the HISAT2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $hisat2_options)\n";
		open ($fh->{fh},"$path_to_hisat2 $hisat2_options -x $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to HISAT2: $!";

		### HISAT2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else{
				last; # no alignment output
			}
		}

		# HISAT2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
		if ($_) {
			chomp;
			my $id = (split(/\t/))[0]; # this is the first element of the HISAT2 output (= the sequence identifier)
			$fh->{last_seq_id} = $id;
			$fh->{last_line} = $_;
			warn "Found first alignment:\t$fh->{last_line}\n";
		}
		# otherwise we just initialise last_seq_id and last_line as undefinded. This should only happen at the end of a file for Bowtie 2 output
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line} = undef;
		}
	}
}

### Bowtie 2 | SINGLE-END | FASTQ
sub single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {

	my ($C_to_T_infile,$G_to_A_infile) = @_;
	if ($directional){
		warn "Input file is $C_to_T_infile (FastQ)\n\n";
	}
	elsif ($pbat){
		warn "Input file is $G_to_A_infile (FastQ)\n\n";
	}
	else{
		warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
	}

	## Now starting up to 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
	## the data structure above
	if ($directional or $pbat){
		warn "Now running 2 instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {
		my $bt2_options = $aligner_options;
		if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
			$bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$bt2_options .= ' --nofw';
		}
		warn "Now starting the Bowtie 2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options $bt2_options)\n";
		warn "Using Bowtie 2 index: $fh->{bisulfiteIndex}\n\n";

		open ($fh->{fh},"$path_to_bowtie $bt2_options -x $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
		### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			# warn "$_\n";
			# sleep(1);
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else {
				last;
			}
		}

		# Bowtie 2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
		if ($_) {
			chomp;
			my $id = (split(/\t/))[0]; # this is the first element of the Bowtie 2 output (= the sequence identifier)
			$fh->{last_seq_id} = $id;
			$fh->{last_line} = $_;
			warn "Found first alignment:\t$fh->{last_line}\n";
			# warn "storing $id and\n$_\n";
		}
		# otherwise we just initialise last_seq_id and last_line as undefined. This should only happen at the end of a file for Bowtie 2 output
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line} = undef;
		}
	}
}

### HISAT2   | SINGLE-END | FASTQ
sub single_end_align_fragments_to_bisulfite_genome_fastQ_hisat2 {

	my ($C_to_T_infile,$G_to_A_infile) = @_;
	if ($directional){
		warn "Input file is $C_to_T_infile (FastQ)\n\n";
	}
	elsif ($pbat){
		warn "Input file is $G_to_A_infile (FastQ)\n\n";
	}
	else{
		warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
	}

	## Now starting up to 4 instances of HISAT2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
	## the data structure above
	if ($directional or $pbat){
		warn "Now running 2 instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of HISAT2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {
		my $hisat2_options = $aligner_options;
		if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
			$hisat2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		}
		else {
			$hisat2_options .= ' --nofw';
		}
		warn "Now starting the HISAT2 aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options $hisat2_options)\n";
		warn "Using HISAT2 index: $fh->{bisulfiteIndex}\n\n";
		# warn "HISAT2 command line:$path_to_hisat2 $hisat2_options -x $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile}\n";
		open ($fh->{fh},"$path_to_hisat2 $hisat2_options -x $fh->{bisulfiteIndex} -U $temp_dir$fh->{inputfile} |") or die "Can't open pipe to HISAT2: $!";
		### HISAT2 outputs out SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			# warn "$_\n";
			# sleep(1);
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else {
				last;
			}
		}

		# HISAT2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
		if ($_) {
			chomp;
			my $id = (split(/\t/))[0]; # this is the first element of the HISAT2 output (= the sequence identifier)
			$fh->{last_seq_id} = $id;
			$fh->{last_line} = $_;
			warn "Found first alignment:\t$fh->{last_line}\n";
			warn "storing $id and\n$_\n";
		}
		# otherwise we just initialise last_seq_id and last_line as undefined. This should only happen at the end of a file for Bowtie 2 output
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line} = undef;
		}
	}
}

# 23 04 2021
### Minimap2   | SINGLE-END | FASTQ
sub single_end_align_fragments_to_bisulfite_genome_fastQ_minimap2 {

	my ($C_to_T_infile,$G_to_A_infile) = @_;
	if ($directional){
		warn "Input file is $C_to_T_infile (FastQ)\n\n";
	}
	elsif ($pbat){
		warn "Input file is $G_to_A_infile (FastQ)\n\n";
	}
	else{
		warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
	}

	## Now starting up to 4 instances of Minimap2 feeding in the converted sequence files, reading in the first line of its output, and storing it
	if ($directional or $pbat){
		warn "Now running 2 instances of minimap2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}
	else{
		warn "Now running 4 individual instances of minimap2 against the bisulfite genome of $genome_folder with the specified options: $aligner_options\n\n";
	}

	foreach my $fh (@fhs) {
		my $mm2_options = $aligner_options;

		# Map short accurate genomic reads
		# minimap2 -ax sr ref.fa reads-se.fq > aln.sam           # single-end alignment
		# minimap2 -ax sr ref.fa read1.fq read2.fq > aln.sam     # paired-end alignment
		# minimap2 -ax sr ref.fa reads-interleaved.fq > aln.sam  # paired-end alignment
		
		# if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
		# 	$mm2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
		# }
		# else {
		# 	$mm2_options .= ' --nofw';
		# }

		warn "Now starting >> minimap2 << for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options $mm2_options)\n";
		# warn "Using minimap2 index: $fh->{bisulfiteIndex}\n\n";
		
		# The minimap index requires the entire name of the index (including .mmi)
		my $mmi = $fh->{bisulfiteIndex}.".mmi";
		warn "Using minimap2 index: $mmi\n\n";
		
		my $minimap_commandline = "$path_to_minimap2 $mm2_options $mmi $temp_dir$fh->{inputfile}";
		warn "Minimap2 command line:\n$minimap_commandline\n";
		
		open ($fh->{fh},"$minimap_commandline |") or die "Can't open pipe to minimap2: $!";
		### Minimap2 outputs SAM format, so we need to skip everything until the first sequence
		while (1){
			$_ = $fh->{fh}->getline();
			# warn "$_\n";
			# sleep(1);
			if ($_) {
				last unless ($_ =~ /^\@/); # SAM headers start with @
			}
			else {
				last;
			}
		}

		# HISAT2 outputs a result line even for sequences without any alignments. We thus store the first line of the output
		if ($_) {
			chomp;
			my $id = (split(/\t/))[0]; # this is the first element of the output (= the sequence identifier)
			$fh->{last_seq_id} = $id;
			$fh->{last_line} = $_;
			warn "Found first alignment:\t$fh->{last_line}\n";
			warn "storing $id and\n$_\n";
		}
		# otherwise we just initialise last_seq_id and last_line as undefined. This should only happen at the end of a file for Bowtie 2 output
		else {
			warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
			$fh->{last_seq_id} = undef;
			$fh->{last_line} = undef;
		}
	}
}

sub ensure_the_aligner_is_working{

    my ($path_info,$aligner) = @_;
    # Before we start let's have quick look if the selected aligner seems to be working with the path information provided
    # To change the path to Bowtie 2 or HISAT2 use --path_to_bowtie2 or --path_to_hisat2 /full/path/to/the/executable
	
	# warn "Path given for aligner $aligner: $path_info\n"; sleep(3);

    my $aligner_version;
    my $return = system "$path_info --version >/dev/null 2>&1";
	
	if ($return ne 0){
		die "Failed to execute $aligner porperly (return code of '$path_info --version' was $return). Please install Bowtie 2 or HISAT2 first and make sure it is in the PATH, or specify the path to the Bowtie 2 with --path_to_bowtie2 /path/to/bowtie2, or --path_to_hisat2 /path/to/hisat2\n\n";
    }
    else{
		$aligner_version = `$path_info --version`;
       	chomp $aligner_version;
		if ($aligner eq 'Bowtie 2'){
			$aligner_version =~ /bowtie.*\s+version\s+(\d+\.\d+\.\d+)/;
			$aligner_version = $1;
		}
		elsif($aligner eq 'minimap2'){
			# warn "$aligner_version\n";
			# minimap2 reports only the version number
		}	
		else{
			$aligner_version =~ /hisat2.*\s+version\s+(\d+\.\d+\.\d+)/;
			$aligner_version = $1;
		}
		warn "$aligner seems to be working fine (tested command '$path_info --version' [$aligner_version])\n";
    }
    return $aligner_version;
}

###########################################################################################################################################

sub reset_counters_and_fhs{
  my $filename = shift;
  %counting=(
	     total_meCHH_count => 0,
	     total_meCHG_count => 0,
	     total_meCpG_count => 0,
	     total_meC_unknown_count => 0,
	     total_unmethylated_CHH_count => 0,
	     total_unmethylated_CHG_count => 0,
	     total_unmethylated_CpG_count => 0,
	     total_unmethylated_C_unknown_count => 0,
	     sequences_count => 0,
	     no_single_alignment_found => 0,
	     unsuitable_sequence_count => 0,
	     genomic_sequence_could_not_be_extracted_count => 0,
	     unique_best_alignment_count => 0,
	     low_complexity_alignments_overruled_count => 0,
	     CT_CT_count => 0, #(CT read/CT genome, original top strand)
	     CT_GA_count => 0, #(CT read/GA genome, original bottom strand)
	     GA_CT_count => 0, #(GA read/CT genome, complementary to original top strand)
	     GA_GA_count => 0, #(GA read/GA genome, complementary to original bottom strand)
	     CT_GA_CT_count => 0, #(CT read1/GA read2/CT genome, original top strand)
	     GA_CT_GA_count => 0, #(GA read1/CT read2/GA genome, complementary to original bottom strand)
	     GA_CT_CT_count => 0, #(GA read1/CT read2/CT genome, complementary to original top strand)
	     CT_GA_GA_count => 0, #(CT read1/GA read2/GA genome, original bottom strand)
	     alignments_rejected_count => 0, # only relevant if --directional was specified
	    );

  if ($directional){
    if ($filename =~ ','){ # paired-end files
      @fhs=(
	    { name => 'CTreadCTgenome',
	      strand_identity => 'con ori forward',
	      bisulfiteIndex => $CT_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'CTreadGAgenome',
	      strand_identity => 'con ori reverse',
	      bisulfiteIndex => $GA_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'GAreadCTgenome',
	      strand_identity => 'compl ori con forward',
	      bisulfiteIndex => $CT_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'GAreadGAgenome',
	    strand_identity => 'compl ori con reverse',
	      bisulfiteIndex => $GA_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	   );
    }
    else{ # single-end files
      @fhs=(
	    { name => 'CTreadCTgenome',
	      strand_identity => 'con ori forward',
	      bisulfiteIndex => $CT_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'CTreadGAgenome',
	      strand_identity => 'con ori reverse',
	      bisulfiteIndex => $GA_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	   );
    }
  }
  elsif($pbat){
    if ($filename =~ ','){ # paired-end files
      @fhs=(
	    { name => 'CTreadCTgenome',
	      strand_identity => 'con ori forward',
	      bisulfiteIndex => $CT_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'CTreadGAgenome',
	      strand_identity => 'con ori reverse',
	      bisulfiteIndex => $GA_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'GAreadCTgenome',
	      strand_identity => 'compl ori con forward',
	      bisulfiteIndex => $CT_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'GAreadGAgenome',
	    strand_identity => 'compl ori con reverse',
	      bisulfiteIndex => $GA_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	   );
    }
    else{ # single-end files
      @fhs=(
	    { name => 'GAreadCTgenome',
	      strand_identity => 'compl ori con forward',
	      bisulfiteIndex => $CT_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	    { name => 'GAreadGAgenome',
	      strand_identity => 'compl ori con reverse',
	      bisulfiteIndex => $GA_index_basename,
	      seen => 0,
	      wrong_strand => 0,
	    },
	   );
    }
  }
  else{
    @fhs=(
	  { name => 'CTreadCTgenome',
	    strand_identity => 'con ori forward',
	    bisulfiteIndex => $CT_index_basename,
	    seen => 0,
	    wrong_strand => 0,
	  },
	  { name => 'CTreadGAgenome',
	    strand_identity => 'con ori reverse',
	    bisulfiteIndex => $GA_index_basename,
	    seen => 0,
	    wrong_strand => 0,
	  },
	  { name => 'GAreadCTgenome',
	    strand_identity => 'compl ori con forward',
	    bisulfiteIndex => $CT_index_basename,
	    seen => 0,
	    wrong_strand => 0,
	  },
	  { name => 'GAreadGAgenome',
	    strand_identity => 'compl ori con reverse',
	    bisulfiteIndex => $GA_index_basename,
	    seen => 0,
	    wrong_strand => 0,
	  },
	 );
  }
}


sub process_command_line{
	my @aligner_options;
	my $help;
	my $mates1;
	my $mates2;
	my $path_to_bowtie2;
	my $path_to_hisat2;
	my $path_to_minimap2;
	my $fastq;
	my $fasta;
	my $skip;
	my $qupto;
	my $phred64;
	my $phred33;
	my $mismatches;
	my $seed_length;
	my $sequence_format;
	my $version;
	my $quiet;
	my $non_directional;
	my $maxins;
	my $minins;
	my $unmapped;
	my $multi_map;
	my $output_dir;
	my $bowtie2;
	my $hisat2;
	my $mm2; # Minimap2
	my $sam_no_hd;
	my $seed_extension_fails;
	my $reseed_repetitive_seeds;
	my $most_valid_alignments;
	my $score_min;
	my $parallel;
	my $temp_dir;
	my $rdg;
	my $rfg;
	my $non_bs_mm;
	my $samtools_path;
	my $bam;
	my $gzip;
	my $pbat;
	my $prefix;
	my $old_flag;
	my $basename;
	my $sam;
	my $multicore;
	my $rg_tag;
	my $rg_id;
	my $rg_sample;
	my $genome_folder;
	my $singles;
	my $ambig_bam;
	my $cram;
	my $cram_ref;
	my $nucleotide_coverage;
	my $dovetail_flag;
	my $no_dovetail;
	my $slam;
	my $nosplice;
	my $known_splices;
	my $icpc;
	my $local;
	my $mm2_short_read;
	my $mm2_pacbio;
	my $mm2_nanopore;
	my $maximum_length_cutoff;
	my $strandID;


	my $command_line = GetOptions ('help'  => \$help,
				 '1=s'                     => \$mates1,
				 '2=s'                     => \$mates2,
				 'path_to_bowtie2=s'       => \$path_to_bowtie2,
				 'path_to_hisat2=s'        => \$path_to_hisat2,
				 'path_to_minimap2=s'      => \$path_to_minimap2,
				 'genome_folder=s'         => \$genome_folder,
				 'f|fasta'                 => \$fasta,
				 'q|fastq'                 => \$fastq,
				 's|skip=i'                => \$skip,
				 'u|upto=i'                => \$qupto,
				 'phred33-quals'           => \$phred33,
				 'phred64-quals'           => \$phred64,
				 'n|seedmms=i'             => \$mismatches,
				 'l|seedlen=i'             => \$seed_length,
				 'version'                 => \$version,
				 'quiet'                   => \$quiet,
				 'non_directional'         => \$non_directional,
				 'I|minins=i'              => \$minins,
				 'X|maxins=i'              => \$maxins,
				 'un|unmapped'             => \$unmapped,
				 'ambiguous'               => \$multi_map,
				 'o|output_dir=s'          => \$output_dir,
				 'bowtie2'                 => \$bowtie2,
				 'hisat2'                  => \$hisat2,
				 'minimap2|mm2'            => \$mm2,
				 'sam-no-hd'               => \$sam_no_hd,
				 'D=i'                     => \$seed_extension_fails,
				 'R=i'                     => \$reseed_repetitive_seeds,
				 'score_min=s'             => \$score_min,
				 'most_valid_alignments=i' => \$most_valid_alignments,
				 'p=i'                     => \$parallel,
				 'temp_dir=s'              => \$temp_dir,
				 'rdg=s'                   => \$rdg,
				 'rfg=s'                   => \$rfg,
				 'non_bs_mm'               => \$non_bs_mm,
				 'samtools_path=s'         => \$samtools_path,
				 'bam'                     => \$bam,
				 'gzip'                    => \$gzip,
				 'pbat'                    => \$pbat,
				 'prefix=s'                => \$prefix,
				 'old_flag'                => \$old_flag,
				 'B|basename=s'            => \$basename,
				 'sam'                     => \$sam,
				 'parallel|multicore=i'    => \$multicore,
				 'rg_tag'                  => \$rg_tag,
				 'rg_id=s'                 => \$rg_id,
				 'rg_sample=s'             => \$rg_sample,
				 'se|single_end=s'         => \$singles,
				 'ambig_bam'               => \$ambig_bam,
				 'cram'                    => \$cram,
				 'cram_ref=s'              => \$cram_ref,
				 'nucleotide_coverage'     => \$nucleotide_coverage,
				 'dovetail'                => \$dovetail_flag, # Not used. Kept here for backwards compatability.
				 'no_dovetail'             => \$no_dovetail,
				 'slam'                    => \$slam,
				 'known-splicesite-infile=s' => \$known_splices,
				 'no-spliced-alignment'    => \$nosplice,
				 'icpc'                    => \$icpc,
				 'local'                   => \$local,
				 'mm2_short_reads'          => \$mm2_short_read,
				 'mm2_maximum_length=i'    => \$maximum_length_cutoff,
				 'mm2_pacbio|pacbio'       => \$mm2_pacbio,
				 'mm2_nanopore|nanopore'   => \$mm2_nanopore,
				 'strandID'                => \$strandID,
				);


	### EXIT ON ERROR if there were errors with any of the supplied options
	unless ($command_line){
		die "Please respecify command line options\n";
	}
	### HELPFILE
	if ($help){
		print_helpfile();
		exit;
	}
	if ($version){
		print << "VERSION";


          Bismark - Bisulfite Mapper and Methylation Caller.

                       Bismark Version: $bismark_version
        Copyright $copyright_dates Felix Krueger, Altos Bioinformatics  
                https://github.com/FelixKrueger/Bismark


VERSION
		exit;
	}


	##########################
	### PROCESSING OPTIONS ###
	##########################

	if ($hisat2){
		if ($bowtie2){
			die "You may not select both --hisat2 and --bowtie2. Make your pick! [default is Bowtie 2]\n";
		}
		if ($mm2){
			die "You may not select both --hisat2 and --minimap2. Make your pick!\n";
		}
		$bowtie2 = 0;
		$hisat2  = 1;
		$mm2     = 0;
	}
	elsif($mm2){
		if ($bowtie2){
			die "You may not select both --minimap2 and --bowtie2. Make your pick! [default is Bowtie 2]\n";
		}
		$bowtie2 = 0;
		$hisat2  = 0;
		$mm2     = 1;
	}
	else{ # Bowtie 2 is the default mode
		$bowtie2 = 1;
		$hisat2  = 0;
		$mm2     = 0;
	}

	unless ($sam_no_hd){
		$sam_no_hd = 0;
	}

	
	### PATH TO BOWTIE 2
	### if a special path to Bowtie 2 was specified we will use that one, otherwise it is assumed that Bowtie 2 is in the PATH
	if ($bowtie2){
		if (defined $path_to_bowtie2){
			unless ($path_to_bowtie2 =~ /\/$/){
				$path_to_bowtie2 =~ s/$/\//;
			}
			if (-d $path_to_bowtie2){
				$path_to_bowtie2 = "${path_to_bowtie2}bowtie2";
				# warn "Setting path to Bowtie2 to: >>$path_to_bowtie2<<\n";
			}
			else{
				die "The path to Bowtie 2 provided ($path_to_bowtie2) is invalid (not a directory)!\n";
			}
		}
		else{
			$path_to_bowtie2 = 'bowtie2';
			# warn "Path to Bowtie 2 specified as: $path_to_bowtie2 (assuming it is in the path)\n";
		}
	}
	elsif($hisat2){
		if ($path_to_hisat2){
			unless ($path_to_hisat2 =~ /\/$/){
				$path_to_hisat2 =~ s/$/\//;
			}
			if (-d $path_to_hisat2){
				$path_to_hisat2 = "${path_to_hisat2}hisat2";
			}
			else{
				die "The path to HISAT2 provided ($path_to_hisat2) is invalid (not a directory)!\n";
			}
		}
		else{
			$path_to_hisat2 = 'hisat2';
			warn "Path to HISAT2 specified as: $path_to_hisat2\n";
		}
	}
	elsif($mm2){
		if ($path_to_minimap2){
			unless ($path_to_minimap2 =~ /\/$/){
				$path_to_minimap2 =~ s/$/\//;
			}
			
			if (-d $path_to_minimap2){
				$path_to_minimap2 = "${path_to_minimap2}minimap2";
			}
			else{
				die "The path to minimap2 provided ($path_to_minimap2) is invalid (it MUST be a directory)!\n";
			}
			warn "Path to minimap2 specified as: $path_to_minimap2\n";
		}
		else{
			$path_to_minimap2 = 'minimap2';
			warn "Minimap2 is assumed to be in PATH, using: '$path_to_minimap2'\n";
		}
	}
	else{
		die "We require either Bowtie 2, HISAT2  or minimap2 mode...";
	}
	
	### testing if the aligners appear to be working
	my $aligner_version;
	if ($bowtie2){
		$aligner_version = ensure_the_aligner_is_working($path_to_bowtie2,'Bowtie 2');
	}
	elsif($mm2){
		$aligner_version = ensure_the_aligner_is_working($path_to_minimap2,'minimap2');
	}	
	else{
		$aligner_version = ensure_the_aligner_is_working($path_to_hisat2,'HISAT2');
	}


	### SAM/CRAM/BAM format
	if ($sam){
		warn "Output format manually set as SAM\n";
	}
	elsif($cram){
		warn "Output format set to CRAM\n";
		if (defined $cram_ref){
			warn "CRAM reference given as: '$cram_ref'\n\n";
			unless (-e $cram_ref){
				die "There is a problem with the CRAM reference '$cram_ref': $!\n\n";
			}

			# determining full path information for the cram reference
			if ($cram_ref =~/\//){
				if (chdir $cram_ref){
					my $absolute_cram_ref_folder = getcwd; ## making the genome folder path absolute
					unless ($absolute_cram_ref_folder =~/\/$/){
						$absolute_cram_ref_folder =~ s/$/\//;
					}
				}
			}
		}
		else{
			warn "CRAM reference not specified explicitely, regenerating from FastA reference files\n\n";
		}
	}
	else{
		$bam = 1;
		warn "Output format is BAM (default)\n";
	}

	### OUTPUT REQUESTED AS BAM FILE (default)
	if ($bam or $cram){
	
		### PATH TO SAMTOOLS
		if (defined $samtools_path){
			# if Samtools was specified as full command
			if ($samtools_path =~ /samtools$/){
				if (-e $samtools_path){
					# Samtools executable found
				}
				else{
					die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
				}	
			}
			else{
				unless ($samtools_path =~ /\/$/){
					$samtools_path =~ s/$/\//;
				}
				$samtools_path .= 'samtools';
				if (-e $samtools_path){
					# Samtools executable found
				}
				else{
					die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
				}
			}

			warn "Alignments will be written out in BAM format. Samtools path provided as: '$samtools_path'\n";
			$bam = 1;
		}
		# Check whether Samtools is in the PATH if no path was supplied by the user
		else{
			if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if samtools is in the PATH
				$samtools_path = `which samtools`;
				chomp $samtools_path;
				warn "Alignments will be written out in BAM format. Samtools found here: '$samtools_path'\n";
				$bam = 1;
			}
		}

		unless (defined $samtools_path){
			$bam = 2;
			warn "Did not find Samtools on the system. Alignments will be compressed with GZIP instead (.sam.gz)\n";
		}
		sleep (1);
	}

	### OPTION AMBIGUOUS BAM
	if ($ambig_bam){
		# this option simply outputs the current alignment to a file
	}

	####################################
	### PROCESSING ARGUMENTS

	### GENOME FOLDER
	if (defined $genome_folder){ # the genome folder may be defined with the option --genome_folder
		# warn "Genome folder specified with --genome_folder $genome_folder\n";
	}
	else{
		# else the first argument is assumed to be the genome folder
		$genome_folder = shift @ARGV; # mandatory
	}

	unless ($genome_folder){
		warn "Genome folder was not specified!\n";
		print_helpfile();
		exit;
	}

	### checking that the genome folder, all subfolders and the required bowtie index files exist
	unless ($genome_folder =~/\/$/){
		$genome_folder =~ s/$/\//;
	}

	if (chdir $genome_folder){
		my $absolute_genome_folder = getcwd(); ## making the genome folder path absolute
		unless ($absolute_genome_folder =~/\/$/){
			$absolute_genome_folder =~ s/$/\//;
		}
		warn "Reference genome folder provided is $genome_folder\t(absolute path is '$absolute_genome_folder)'\n";
		$genome_folder = $absolute_genome_folder;
	}
	else{
		die "Failed to move to $genome_folder: $!\nUSAGE: bismark [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>} [<hits>]    (--help for more details)\n";
	}
	
	my $CT_dir = "${genome_folder}Bisulfite_Genome/CT_conversion/";
	my $GA_dir = "${genome_folder}Bisulfite_Genome/GA_conversion/";

	my $bt2_small_index_present = 1;
	my $bt2_large_index_present = 1;

	my $hisat2_small_index_present = 1;
	my $hisat2_large_index_present = 1;
	 
	my $mm2_index_present = 1;

	if ($bowtie2){
		### Checking for small indixes first (ending in .bt2)

		# checking the integrity of $CT_dir
		chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";

		my @CT_bowtie2_index = ('BS_CT.1.bt2','BS_CT.2.bt2','BS_CT.3.bt2','BS_CT.4.bt2','BS_CT.rev.1.bt2','BS_CT.rev.2.bt2');
		foreach my $file(@CT_bowtie2_index){
			unless (-f $file){
				warn "The Bowtie 2 index of the C->T converted genome seems to be faulty or non-existant ('$file'). Please run the bismark_genome_preparation before running Bismark\n";
				$bt2_small_index_present = 0;
			}
		}
    
		# checking the integrity of $GA_dir
		chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
		my @GA_bowtie2_index = ('BS_GA.1.bt2','BS_GA.2.bt2','BS_GA.3.bt2','BS_GA.4.bt2','BS_GA.rev.1.bt2','BS_GA.rev.2.bt2');

		foreach my $file(@GA_bowtie2_index){
			unless (-f $file){
				warn "The Bowtie 2 index of the G->A converted genome seems to be faulty or non-existant ('$file'). Please run bismark_genome_preparation before running Bismark\n";
				$bt2_small_index_present = 0;
			}
		}

		### Using the small index preferentially
		if ($bt2_small_index_present){
			$bt2_large_index_present = 0;
		}
		else{  # only checking for large indexes if the 'normal' one can't be found
			warn "\nCouldn't find a traditional small Bowtie 2 index for the genome specified (ending in .bt2). Now searching for a large index instead (64-bit index ending in .bt2l)...\n";

			### If no small small indexes were found we look for large indexes (64-bit indexes, ending in .bt2l)
			
			# checking the integrity of $CT_dir
			chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";

			@CT_bowtie2_index = ('BS_CT.1.bt2l','BS_CT.2.bt2l','BS_CT.3.bt2l','BS_CT.4.bt2l','BS_CT.rev.1.bt2l','BS_CT.rev.2.bt2l');
			foreach my $file(@CT_bowtie2_index){
				unless (-f $file){
					die "The Bowtie 2 index of the C->T converted genome seems to be faulty or non-existant ('$file'). Please run the bismark_genome_preparation before running Bismark\n";
					$bt2_large_index_present = 0;  
				}
			}

			### checking the integrity of $GA_dir
			chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
			@GA_bowtie2_index = ('BS_GA.1.bt2l','BS_GA.2.bt2l','BS_GA.3.bt2l','BS_GA.4.bt2l','BS_GA.rev.1.bt2l','BS_GA.rev.2.bt2l');

			foreach my $file(@GA_bowtie2_index){
				unless (-f $file){
					die "The Bowtie 2 index of the G->A converted genome seems to be faulty or non-existant ('$file'). Please run bismark_genome_preparation before running Bismark\n";
					$bt2_large_index_present = 0;
				}
			}

			if ($bt2_large_index_present){
				warn "64-bit large genome Bowtie 2 index found...\n";
			}
			else{
				die "Failed to detect either a standard (.bt2) or 64-bit (.bt2l) Bowtie 2 index for the genome specified. Please run the bismark_genome_preparation before launching Bismark\n\n";
			}
		}
	}
	elsif ($mm2){
		### minimap2 indexes end in .mmi

		# checking the integrity of $CT_dir
		chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";

		my @CT_mm2_index = ('BS_CT.mmi');
		foreach my $file (@CT_mm2_index){
			unless (-f $file){
				warn "The minimap2 index of the C->T converted genome seems to be non-existant ('$file'). Please run the bismark_genome_preparation before running Bismark\n";
				$mm2_index_present = 0;
			}
		}
    
		# checking the integrity of $GA_dir
		chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
		my @GA_mm2_index = ('BS_GA.mmi');

		foreach my $file(@GA_mm2_index){
			unless (-f $file){
				warn "The minimap2 index of the G->A converted genome seems to be non-existant ('$file'). Please run bismark_genome_preparation before running Bismark\n";
				$mm2_index_present = 0;
			}
		}
	}
	else{ ### HISAT2
		### Checking for small indexes first (ending in .ht2)
		### checking the integrity of $CT_dir
		chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";
		my @CT_hisat2_index = ('BS_CT.1.ht2','BS_CT.2.ht2','BS_CT.3.ht2','BS_CT.4.ht2','BS_CT.5.ht2','BS_CT.6.ht2','BS_CT.7.ht2','BS_CT.8.ht2');
      
		foreach my $file(@CT_hisat2_index){
			unless (-f $file){
				warn "The HISAT2 index of the C->T converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation --hisat2 before running Bismark.\n";
				$hisat2_small_index_present = 0;
			}
		}

		### checking the integrity of $GA_dir
		chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
		my @GA_hisat2_index = ('BS_GA.1.ht2','BS_GA.2.ht2','BS_GA.3.ht2','BS_GA.4.ht2','BS_GA.5.ht2','BS_GA.6.ht2','BS_GA.7.ht2','BS_GA.8.ht2');
		foreach my $file(@GA_hisat2_index){
			unless (-f $file){
				warn "The HISAT2 index of the G->A converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation --hisat2 before running Bismark.\n";
				$hisat2_small_index_present = 0;
			}
		}

		### Using the small index preferentially
		if ($hisat2_small_index_present){
			$hisat2_large_index_present = 0;
		}
		else{  # only checking for large indexes if the 'normal' one can't be found
			warn "\nCouldn't find a traditional small HISAT2 index for the genome specified (ending in .ht2). Now searching for a large index instead (64-bit index ending in .ht2l)...\n";

			### If no small small indexes were found we look for large indexes (64-bit indexes, ending in .ht2l)

			### checking the integrity of $CT_dir
			chdir $CT_dir or die "Failed to move to directory $CT_dir: $!\n";
			my @CT_hisat2_index = ('BS_CT.1.ht2l','BS_CT.2.ht2l','BS_CT.3.ht2l','BS_CT.4.ht2l','BS_CT.5.ht2l','BS_CT.6.ht2l','BS_CT.7.ht2l','BS_CT.8.ht2l');
			foreach my $file(@CT_hisat2_index){
				unless (-f $file){
					warn "The HISAT2 index of the C->T converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation --hisat2 before running Bismark.\n";
					$hisat2_large_index_present = 0;
				}
			}

			### checking the integrity of $GA_dir
			chdir $GA_dir or die "Failed to move to directory $GA_dir: $!\n";
			my @GA_hisat2_index = ('BS_GA.1.ht2l','BS_GA.2.ht2l','BS_GA.3.ht2l','BS_GA.4.ht2l','BS_GA.5.ht2l','BS_GA.6.ht2l','BS_GA.7.ht2l','BS_GA.8.ht2l');
			foreach my $file(@GA_hisat2_index){
				unless (-f $file){
					warn "The HISAT2 index of the G->A converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation --hisat2 before running Bismark.\n";
					$hisat2_large_index_present = 0;
				}
			}

			if ($hisat2_large_index_present){
				warn "64-bit large HISAT2 genome index found...\n";
			}
			else{
				die "Failed to detect either a standard (.ht2) or 64-bit (.ht2l) HISAT2 index for the genome specified. Please run the bismark_genome_preparation --hisat2 before launching Bismark\n\n";
			}
		}
	}

	my $CT_index_basename = "${CT_dir}BS_CT";
	my $GA_index_basename = "${GA_dir}BS_GA";

	### INPUT OPTIONS

	### SEQUENCE FILE FORMAT
	### exits if both FastA and FastQ were specified
	if ($fasta and $fastq){
		die "Only one sequence filetype can be specified (fastA or fastQ)\n";
	}

	### unless fastA is specified explicitely, FastQ sequence format is expected by default
	if ($fasta){
		warn "FastA format specified\n";
		$sequence_format = 'FASTA';
		push @aligner_options, '-f';
	}
	elsif ($fastq){
		warn "FastQ format specified\n";
		$sequence_format = 'FASTQ';
		push @aligner_options, '-q';
	}
	else{
		$fastq = 1;
		warn "FastQ format assumed (by default)\n";
		$sequence_format = 'FASTQ';
		push @aligner_options, '-q';
	}	
	
	### SKIP
	if ($skip){
		warn "Skipping the first $skip reads from the input file\n";
		# push @aligner_options,"-s $skip";
	}
	
	### UPTO
	if ($qupto){
		warn "Processing sequences up to read no. $qupto from the input file\n";
	}	

	### QUALITY VALUES
	if ($phred33 and $phred64){
		die "You can only specify one type of quality value at a time! (--phred33 or --phred64)";
	}
	if ($phred33){ ## if nothing else is specified $phred33 will be used as default by both Bowtie 1 and 2.
		# Phred quality values work only when -q is specified
		unless ($fastq){
			die "Phred quality values works only when -q (FASTQ) is specified\n";
		}
		push @aligner_options,"--phred33";
	}

	if ($phred64){
		# Phred quality values work only when -q is specified
		unless ($fastq){
			die "Phred quality values work only when -q (FASTQ) is specified\n";
		}
		push @aligner_options,"--phred64";
	}
	else{
		$phred64 = 0;
	}

	### ALIGNMENT OPTIONS

	### MISMATCHES
	if (defined $mismatches){
		if ($mismatches == 0 or $mismatches == 1){
			push @aligner_options,"-N $mismatches";
		}
		else{
			die "Please set the number of multiseed mismatches with '-N <int>' (where <int> can be 0 or 1)\n";
		}
	}

	### SEED LENGTH
	if (defined $seed_length){
		push @aligner_options,"-L $seed_length";
	}

	
	### BOWTIE 2 EFFORT OPTIONS

	### CONSECUTIVE SEED EXTENSION FAILS
	if (defined $seed_extension_fails){
		die "The option '-D <int>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
		push @aligner_options,"-D $seed_extension_fails";
	}

	### RE-SEEDING REPETITIVE SEEDS
	if (defined $reseed_repetitive_seeds){
		die "The option '-R <int>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
		push @aligner_options,"-R $reseed_repetitive_seeds";
	}

	### BOWTIE 2/HISAT2 SCORING OPTIONS
	# note: calc_map relies on enforcing score_min=G,intercept,slope for local
	# and score_min=L,intercept,slope for end-to-end alignment for the scMin calculation
	my ($score_min_intercept, $score_min_slope);
	if ($score_min){
		if ($local){
			if ($bowtie2){ 
				# Bowtie2 uses a different function for local mode, see below
				unless ($score_min =~ /^G,(.+),(.+)$/){
					die "In Bowtie 2 --local mode, the option '--score_min <func>' needs to be in the format <G,value,value>. Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
				}
			
				($score_min_intercept, $score_min_slope) = ($1, $2);
				push @aligner_options,'--local'; # this option does not work with HISAT2
				push @aligner_options,"--score-min G,$score_min_intercept,$score_min_slope";
			}
			else{ # HISAT2
				unless ($score_min =~ /^L,(.+),(.+)$/){
					die "In HISAT2 --local mode, the option '--score_min <func>' needs to be in the format <L,value,value>\n\n";
				}
				
				($score_min_intercept, $score_min_slope) = ($1, $2); ### even in local mode, HISAT2 appears to use a similar scoring scheme
				push @aligner_options,"--score-min L,$score_min_intercept,$score_min_slope"; # default setting, more stringent than normal Bowtie2
			}
		}
		else{ # global, end-to-end alignments. Default
			unless ($score_min =~ /^L,(.+),(.+)$/){
				die "In end-to-end mode (default) the option '--score_min <func>' needs to be in the format <L,value,value> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
			}
				
			($score_min_intercept, $score_min_slope) = ($1, $2);
			push @aligner_options,"--score-min L,$score_min_intercept,$score_min_slope";
		}
	}
	else{
		if ($local){		
			### The score-min is different for Bowtie2 where it appears to range from 0 to positive, and 
			### HISAT2, where it appears to be the same as for end-to-end mode, i.e. from negative to 0

			if ($bowtie2){
				## the default for score_min in --local mode is G,20,8
			
				# Some Bowtie 2 options specify a function rather than an individual number or setting. In these cases the user specifies
				# three parameters: (a) a function type F, (b) a constant term B, and (c) a coefficient A. The available function types 
				# are constant (C), linear (L), square-root (S), and natural log (G). The parameters are specified as F,B,A - that is, 
				# the function type, the constant term, and the coefficient are separated by commas with no whitespace. The constant term
				# and coefficient may be negative and/or floating-point numbers.
		
				# If the function specification is G,1,5.4, then the function defined is:
				# f(x) = 1.0 + 5.4 * ln(x)
			
				($score_min_intercept, $score_min_slope) = (20,8); ### We need to look up sensible defaults for this
				push @aligner_options,'--local'; # this option does not work with HISAT2
				push @aligner_options,"--score-min G,$score_min_intercept,$score_min_slope"; # default setting, more stringent than normal Bowtie2
			}
			else{ # HISAT2
				($score_min_intercept, $score_min_slope) = (0,-0.2); ### even in local mode, HISAT2 appears to use a similar scoring scheme
				push @aligner_options,"--score-min L,$score_min_intercept,$score_min_slope"; # default setting, more stringent than normal Bowtie2
			}
		}
		else{ # currently the same for Bowtie2 and HISAT2
			($score_min_intercept, $score_min_slope) = (0,-0.2);
			push @aligner_options,"--score-min L,$score_min_intercept,$score_min_slope"; # default setting, more stringent than normal Bowtie2
		}
	}

	### BOWTIE 2/HISAT2 READ GAP OPTIONS
	my ($insertion_open,$insertion_extend,$deletion_open,$deletion_extend);

	if ($rdg){
		if ($rdg =~ /^(\d+),(\d+)$/){
			$deletion_open = $1;
			$deletion_extend = $2;
		}
		else{
			die "The option '--rdg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
		}
		push @aligner_options,"--rdg $rdg";
	}
	else{
		$deletion_open = 5;
		$deletion_extend = 3;
	}

	### BOWTIE 2/HISAT2 REFERENCE GAP OPTIONS
	if ($rfg){
		if ($rfg =~ /^(\d+),(\d+)$/){
			$insertion_open = $1;
			$insertion_extend = $2;
		}
		else{
			die "The option '--rfg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
		}
		push @aligner_options,"--rfg $rfg";
	}
	else{
		$insertion_open = 5;
		$insertion_extend = 3;
	}


	### BOWTIE 2/HISAT2 PARALLELIZATION OPTIONS
	if ($parallel){
		die "Please select a value for -p of 2 or more!\n" unless ($parallel > 1);
		if ($parallel > 4){
			warn "Attention: early reports suggested that high values of -p  to have diminishing returns. Please test different values using a small subset of data for your hardware setting.\n"; sleep(1);
		}
		push @aligner_options,"-p $parallel";
		push @aligner_options,'--reorder'; ## re-orders the Bowtie 2 or HISAT2 output so that it does match the input files. This is abolutely required for parallelization to work.
		if ($bowtie2){
			warn "Each Bowtie 2 instance is going to be run with $parallel threads. Please monitor performance closely and tune down if necessary!\n";
		}
		else{
			warn "Each HISAT2 instance is going to be run with $parallel threads. Please monitor performance closely and tune down if necessary!\n";
		}		
		sleep (2);
    }
  

	### REPORTING OPTIONS

	push @aligner_options,'--ignore-quals'; ## All mismatches will receive penalty for mismatches as if they were of high quality, which is 6 by default

	### Option -M is deprecated since Bowtie 2 version 2.0.0 beta7. I'll leave this option commented out for a while
	if(defined $most_valid_alignments){
		warn "\nThe option -M is now deprecated (as of Bowtie 2 version 2.0.0 beta7). What used to be called -M mode is still the default mode. Use the -D and -R options to adjust the effort expended to find valid alignments.\n\n";
	}

	### PAIRED-END MAPPING
	if ($mates1){

		if (defined $singles){ # if --single_end has been set explicitely
			die "You cannot set --single_end and supply files in paired-end format (-1 <mates1> -2 <mates2>). Please respecify!\n";
		}

		my @mates1 = (split (/,/,$mates1));
		die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n" unless ($mates2);
		my @mates2 = (split(/,/,$mates2));
		unless (scalar @mates1 == scalar @mates2){
			die "Paired-end mapping requires the same amounnt of mate1 and mate2 files, please respecify! (format: -1 <mates1> -2 <mates2>)\n";
		}
		while (1){
			my $mate1 = shift @mates1;
			my $mate2 = shift @mates2;
			last unless ($mate1 and $mate2);

			if ($mate1 eq $mate2){
				die "\n[FATAL ERROR]: Read 1 ($mate1) and Read 2 ($mate2) files were specified as the exact same file, which is almost certainly unintentional (and wrong). Please re-specify!\n\n";
			}
			push @filenames,"$mate1,$mate2";
		}

		#if ($bowtie2){
		push @aligner_options,'--no-mixed';     ## By default Bowtie 2/HISAT2 is not looking for single-end alignments if it can't find concordant or discordant alignments
		push @aligner_options,'--no-discordant';## By default Bowtie 2/HISAT2 is not looking for discordant alignments if it can't find concordant ones

		unless ($no_dovetail){
			$dovetail = 1; # setting the option $dovetail automatically so that 5' trimmed sequences align better
		}

		if ($bowtie2){
			if ($dovetail){
				if ($old_flag){
					die "The option '--dovetail' may only be specified with the current SAM FLAG values. Please respecify...\n";
				}
				push @aligner_options,'--dovetail';     ## 07 03 2016 Adding the option --dovetail, mainly for PBAT and other 5' trimmed alignments
			}
			# HISAT2 doesn't have the concept of --dovetail
		}	
		

		if ($old_flag){
			warn "\nUsing FLAG values for paired-end SAM output used up to Bismark v0.8.2. In addition, paired-end sequences will have /1 and /2 appended to their read IDs\n\n";
			sleep(3);
		}
	}
	elsif ($mates2){
		die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n";
	}

	chdir $parent_dir or die "Failed to move back to parent directory: $!\n\n";
	my $current = getcwd();

	### SINGLE-END MAPPING
	# Single-end mapping will be performed if no mate pairs for paired-end mapping have been specified

	unless ($mates1 and $mates2){
		if (defined $singles){ # if --single_end has been set explicitely
			warn "Mapping set to single-end mode (user defined). File names need to be separated by commas [,] or colons [:]! Supplied file names are: $singles\n";
			$singles =~ s/:/,/g; # replacing colons (:) with commas
		}
		else{
			$singles = join (',',@ARGV);
			unless ($singles){
				die "\nNo filename supplied! Please specify one or more files for single-end Bismark mapping!\n";
			}
			$singles =~ s/\s/,/g; # replacing spaces with commas
		}
		@filenames = (split(/,/,$singles));
	}

	warn "\nInput files to be analysed (in current folder '$current'):\n";
	# checking if files exist and bail if they don't
	foreach my $f (@filenames){
		if ($mates1 and $mates2){
			my ($f1,$f2) = (split (/,/,$f));
			# mate 1
			if (-e $f1){
				warn "$f1\n";
			}
			else{
				die "Supplied filename '$f1' does not exist, please respecify!\n\n";
			}
			# mate 2
			if (-e $f2){
				warn "$f2\n";
			}
			else{
				die "Supplied filename '$f2' does not exist, please respecify!\n\n";
			}
		}
		else{
			if (-e $f){
				warn "$f\n";
			}
			else{
				die "Supplied filename '$f' does not exist, please respecify!\n\n";
			}
		}	
	}
	
	### MININUM INSERT SIZE (PAIRED-END ONLY)
	if (defined $minins){
		die "-I/--minins can only be used for paired-end mapping!\n\n" if ($singles);
		push @aligner_options,"--minins $minins";
	}

	### MAXIMUM INSERT SIZE (PAIRED-END ONLY)
	if (defined $maxins){
		die "-X/--maxins can only be used for paired-end mapping!\n\n" if ($singles);
		push @aligner_options,"--maxins $maxins";
	}
	else{
		unless ($singles){
			push @aligner_options,'--maxins 500';
		}
	}

	### QUIET prints nothing  besides alignments (suppresses warnings)
	if ($quiet){
		push @aligner_options,'--quiet';
	}


	### STRAND-SPECIFIC LIBRARIES
	my $directional;
	if ($non_directional){
		die "A library can only be specified to be either non-directional or a PBAT-Seq library. Please respecify!\n\n" if ($pbat);
		warn "Library was specified to be not strand-specific (non-directional), therefore alignments to all four possible bisulfite strands (OT, CTOT, OB and CTOB) will be reported\n";
		sleep (1);
		$directional = 0;
	}
	elsif($pbat){
		die "The option --pbat is currently not compatible with --gzip. Please run alignments with uncompressed temporary files, i.e. lose the option --gzip\n" if ($gzip);
		die "The option --pbat is currently only working with FastQ files. Please respecify (i.e. lose the option -f)!\n" if ($fasta);

		warn "Library was specified as PBAT-Seq (Post-Bisulfite Adapter Tagging), only performing alignments to the complementary strands (CTOT and CTOB)\n";
		sleep (1);
		$directional = 0;
	}
	else{
		warn "Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!)\n";
		sleep (1);
		$directional = 1; # default behaviour
	}

	### UNMAPPEDSEQUENCE OUTPUT
	$unmapped = 0 unless ($unmapped);

	### AMBIGUOUS ALIGNMENT SEQUENCE OUTPUT
	$multi_map = 0 unless ($multi_map);
	

	### OUTPUT DIRECTORY
	
	chdir $parent_dir or die "Failed to move back to current working directory\n";
	if ($output_dir){
		unless ($output_dir =~ /\/$/){
			$output_dir =~ s/$/\//;
		}

		if (chdir $output_dir){
			$output_dir = getcwd; #  making the path absolute
			unless ($output_dir =~ /\/$/){
				$output_dir =~ s/$/\//;
			}
		}
		else{
			mkdir $output_dir or die "Unable to create directory $output_dir $!\n";
			warn "Created output directory $output_dir!\n\n";
			chdir $output_dir or die "Failed to move to $output_dir\n";
			$output_dir = getcwd; #  making the path absolute
			unless ($output_dir =~ /\/$/){
				$output_dir =~ s/$/\//;
			}
		}
		warn "Output will be written into the directory: $output_dir\n";
	}
	else{
		$output_dir = '';
	}

	### TEMPORARY DIRECTORY for C->T and G->A transcribed files

	chdir $parent_dir or die "Failed to move back to current working directory\n";
	if ($temp_dir){
		warn "\nUsing temp directory: $temp_dir\n";
		unless ($temp_dir =~ /\/$/){
			$temp_dir =~ s/$/\//;
		}

		if (chdir $temp_dir){
			$temp_dir = getcwd; #  making the path absolute
			unless ($temp_dir =~ /\/$/){
				$temp_dir =~ s/$/\//;
			}
		}
		else{
			mkdir $temp_dir or die "Unable to create directory $temp_dir $!\n";
			warn "Created temporary directory $temp_dir!\n\n";
			chdir $temp_dir or die "Failed to move to $temp_dir\n";
			$temp_dir = getcwd; #  making the path absolute
			unless ($temp_dir =~ /\/$/){
				$temp_dir =~ s/$/\//;
			}
		}
		warn "Temporary files will be written into the directory: $temp_dir\n";
	}
	else{
		$temp_dir = '';
	}

	### PREFIX FOR OUTPUT FILES
	if ($prefix){
		# removing trailing dots

		$prefix =~ s/\.+$//;

		warn "Using the following prefix for output files: $prefix\n\n";
		sleep(1);
	}

	if (defined $multicore){
		unless ($multicore > 0){
			die "Core usage needs to be set to 1 or more (currently selected $multicore). Please respecify!\n";
		}
		if ($multicore > 20){
			warn "Core usage currently set to more than 20 threads. This might fail horribly but let's see how it goes... (set value: $multicore)\n\n";
		}
		if ($sam){
			die "The multicore function currently requires the output to be in BAM format, so please lose either option --sam or --parallel/--multi\n";
		}
	}
	else{
		$multicore = 1; # default. Single-thread mode
		warn "Setting parallelization to single-threaded (default)\n\n";
	}	

	if ($basename and $multicore > 1){
		die "Specifying --basename in conjuction with --multicore is currently not supported (but we are aiming to fix this soon). Please lose either --basename or --multicore to proceed\n\n";
	}

	# Read Group Tags for the @RG header
	if (defined $rg_sample){
		if (defined $rg_id){
			warn "--rg_id set to '$rg_id', setting --rg_tag to TRUE\n";
			$rg_tag++; # implicitely setting $rg_tag as well
		}
		else{
			die "--rg_sample cannot be specified without without setting --rg_id. Please set both or none (which would result in the default name 'SAMPLE' for both)\n";
		}
	}

	if ($rg_tag){ # either true because of --rg_tag, or because --rg_id/--rg_sample were defined as well
		unless (defined $rg_id){
			$rg_id = 'SAMPLE';
		}
		unless (defined $rg_sample){
			$rg_sample = 'SAMPLE';
		}
	}
	
	
	### ADDITIONAL ALIGNMENT OPTIONS WE NEED FOR HISAT2
	if ($hisat2){
		
		if ($nosplice){ # unspliced genomic mapping
			if ($known_splices){
				die "You cannot run Bismark in HISAT2 mode with known splice junctions but without spliced alignments! Please respecify!\n\n";
			}
		
			warn "Running HISAT2 without detecting spliced alignments\n"; 
			push @aligner_options,'--no-spliced-alignment';
		}
		
		if ($known_splices){
		
			if (-e $known_splices){
				warn "Running HISAT2 without detcting spliced alignments\n"; 
				push @aligner_options,"--known-splicesite-infile $known_splices";
			}
			else{
				die "Known splice site infile >$known_splices< did not exist. Please check file name and try again!\n\n";
			}
		}
		###	NO SOFTCLIPPING - this is now negotiable
		if ($local){
			# warn "[EXPERIMENTAL]: local alignments with HISAT2\n\n|||||||||||||||||||\n\n";
			push @aligner_options,'--omit-sec-seq'; 
		}
		else{
			push @aligner_options,'--no-softclip --omit-sec-seq'; ## endToEnd alignments - default
		}
		
	}
	else{
		if ($nosplice){
			die "The option --no-spliced-alignment can only be selected in HISAT2 mode! Please re-specificy!\n\n";
		}
		if ($known_splices){
			die "The option --known-splicesite-infile can only be selected in HISAT2 mode! Please re-specificy!\n\n";
		}
	}
	
	### MINIMAP2 OPTIONS

	unless ($mm2){
		if ($mm2_short_read){
			die "You cannot specify minimap2 options (--mm2_short_reads) unless you also use --minimap2. Please respecify!\n\n";
		}
		if ($maximum_length_cutoff){
			die "You cannot specify minimap2 options (--mm2_maximum_length) unless you also use --minimap2. Please respecify!\n\n";
		}
		if ($mm2_pacbio){
			die "You cannot specify minimap2 options (--pacbio) unless you also use --minimap2. Please respecify!\n\n";
		}
		if ($mm2_nanopore){
			die "You cannot specify minimap2 options (--nanopore) unless you also use --minimap2. Please respecify!\n\n";
		}
	}

	if ($mm2){
		if (defined $maximum_length_cutoff){
			if ($maximum_length_cutoff < 100){
				die "Please select a sensible maximum sequence length cutoff (currently 100-100,000 bp)\n";
			}
			if ($maximum_length_cutoff > 100000){
				die "Please select a sensible maximum sequence length cutoff (currently 100-100,000 bp)\n";
			}
		}
		else{
			$maximum_length_cutoff = 10000; # default
		}
		warn "Using a maximum length cutoff of $maximum_length_cutoff bp\n";

		# Minimap requires a completely different set of options, let's start with a clean slate.
		@aligner_options = ();

		# -a           output in the SAM format (PAF by default)	
		push @aligner_options,'-a'; 

		# Output an MD tag
		push @aligner_options,'--MD'; 

		# Adding --secondary=no to reduce alignment time. Or does it?
		push @aligner_options,'--secondary=no'; 

 		# Not sure yet how to handle muliple threads per alignment. Let's be cautious initially
		# -t INT       number of threads [3]
		push @aligner_options,'-t 2'; 

		if ($mm2_short_read){
			if ($mm2_nanopore){
				die "Please select minimap2 in Short Read or Nanopore mode, but not both...\n\n";
			}
			if ($mm2_pacbio){
				die "Please select minimap2 in Short Read or PacBio mode, but not both...\n\n";
			}
			# Preset:
    		# -x STR       preset (always applied before other options; see minimap2.1 for details) []
			#          - sr: genomic short-read mapping
			#          - map-pb/map-ont: PacBio/Nanopore vs reference mapping          
			#          - splice: long-read spliced alignment
			warn "Using preset for short read mapping (-x sr)\n\n";
			push @aligner_options,'-x sr'; 	
		}
		elsif($mm2_pacbio){

			if ($mm2_nanopore){
				die "Please select minimap2 in PacBio or Nanopore mode, but not both...\n\n";
			}
			
			warn "Using preset for PacBio mapping (-x map-pb)\n\n";
			push @aligner_options,'-x map-pb';
			
		}
		else{
			# otherwise we will assume that the default mode is ONT Nanopore vs reference mapping for now
			$mm2_nanopore = 1;
			warn "Using preset for ONT mapping (-x map-ont)\n\n";
			push @aligner_options,'-x map-ont';
		}

		
		# Let's keep these options in mind		
		# -L           write CIGAR with >65535 ops at the CG tag
		# --cs[=STR]   output the cs tag; STR is 'short' (if absent) or 'long' [none]
		# --eqx        write =/X CIGAR operators
		# -Y           use soft clipping for supplementary alignments
		# -K NUM       minibatch size for mapping [500M]
		push @aligner_options, "-K 250K";

	}


	### SUMMARY OF ALL ALIGNER OPTIONS
	my $aligner_options = join (' ',@aligner_options);
	warn "Summary of all aligner options:\t$aligner_options\n"; sleep(2);
	
	if ($slam){
		die "The (experimental) SLAM-seq mode currently only works with FastQ files. Please re-specify\n\n" unless ($fastq);
		if ($mm2){
			die "Options --slam and --minimap2 do not work together. Please stop wanting that!\n\n";
		}
	}
	
	if ($icpc){
		warn "In this mode (--icpc), FastQ read IDs are truncated at the first space to ignore comments\n";
	}
	if ($non_bs_mm){
		if ($local){
			die "The option --non_bs_mm is only available for end-to-end alignments\n\n";
		}
		if ($mm2){
			die "The option --non_bs_mm is only available for end-to-end alignments (and doesn't work with minimap2)\n\n";
		}

	}
	
	return ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie2,$path_to_hisat2,$path_to_minimap2,$sequence_format,$aligner_options,
	$directional,$unmapped,$multi_map,$phred64,$output_dir,$bowtie2,$hisat2,$sam_no_hd,$skip,$qupto,$temp_dir,$non_bs_mm,$insertion_open,
	$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag,$basename,$score_min_intercept,
	$score_min_slope,$bt2_large_index_present,$multicore,$rg_tag,$rg_id,$rg_sample,$ambig_bam,$cram,$cram_ref,$nucleotide_coverage,$dovetail,
	$aligner_version,$slam,$icpc,$local,$strandID,$mm2,$maximum_length_cutoff);
}


sub generate_SAM_header{

    print OUT "\@HD\tVN:1.0\tSO:unsorted\n";          # @HD = header, VN = version, SO = sort order
    if ($ambig_bam){
		print AMBIBAM "\@HD\tVN:1.0\tSO:unsorted\n";
    }

    #  Unordered printing of @SQ headers
    #  foreach my $chr (keys %chromosomes){
    #    my $length = length ($chromosomes{$chr});
    #    print "\@SQ\tSN:$chr\tLN:$length\n";
    #    print OUT "\@SQ\tSN:$chr\tLN:$length\n";        # @SQ = sequence, SN = seq name, LN = length
    #  }

    foreach my $chr (sort {$a<=>$b} keys %SQ_order){
		# warn "$chr\t$SQ_order{$chr}\n";
		my $length = length ($chromosomes{$SQ_order{$chr}});
		print OUT "\@SQ\tSN:$SQ_order{$chr}\tLN:$length\n"; # @SQ = sequence, SN = seq name, LN = length
		if ($ambig_bam){
			print AMBIBAM "\@SQ\tSN:$SQ_order{$chr}\tLN:$length\n";
		}
    }
    
    # 18 11 2015: Added @RG as a header line if --rg_tag or --rg_id/--rg_sample were set as well
    if ($rg_tag){
		print OUT "\@RG\tPL:ILLUMINA\tID:$rg_id\tSM:$rg_sample\n";  # @RG = Read Group, PL = Platform, ID: required, SM: sample, can be a description
    }

    print OUT "\@PG\tID:Bismark\tVN:$bismark_version\tCL:\"bismark $command_line\"\n";        # @PG = program, ID = unique identifier, PN = program name name, VN = program version
    if ($ambig_bam){
		print AMBIBAM "\@PG\tID:Bismark\tVN:$bismark_version\tCL:\"bismark $command_line\"\n";
    }
}

### I would like to thank the following individuals for their valuable contributions to the Bismark SAM output format:
### O. Tam (2010), C. Whelan (2011), E. Vidal (2011), T. McBryan (2011), P. Hickey (2011), A. Dei Rossi (2014)

sub single_end_SAM_output{

	my ($id,$actual_seq,$methylation_call_params,$qual) = @_;
	my $strand            = $methylation_call_params->{$id}->{alignment_strand};
	my $chr               = $methylation_call_params->{$id}->{chromosome};
	my $start             = $methylation_call_params->{$id}->{position};
	my $stop              = $methylation_call_params->{$id}->{end_position};
	my $ref_seq           = $methylation_call_params->{$id}->{unmodified_genomic_sequence};
	my $methcall          = $methylation_call_params->{$id}->{methylation_call};
	my $read_conversion   = $methylation_call_params->{$id}->{read_conversion};
	my $genome_conversion = $methylation_call_params->{$id}->{genome_conversion};
	my $number_of_mismatches;

	$number_of_mismatches= $methylation_call_params->{$id}->{alignment_score};

	### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
	## FLAG: bitwise FLAG. Each bit is explained in the following table:
	## Bit    Description                                                Comment                                Value
	## 0x1    template has multiple segments in sequencing               0: single-end 1: paired end            value: 2**0 (  1)
	## 0x2    each segment properly aligned according to the aligner     true only for paired-end alignments    value: 2**1 (  2)
	## 0x4    segment unmapped                                           ---                                           ---
	## 0x8    next segment in the template unmapped                      ---                                           ---
	## 0x10   SEQ being reverse complemented                                                                    value: 2**4 ( 16)
	## 0x20   SEQ of the next segment in the template being reversed                                            value: 2**5 ( 32)
	## 0x40   the first segment in the template                          read 1                                 value: 2**6 ( 64)
	## 0x80   the last segment in the template                           read 2                                 value: 2**7 (128)
	## 0x100  secondary alignment                                        ---                                           ---
	## 0x200  not passing quality controls                               ---                                           ---
	## 0x400  PCR or optical duplicate                                   ---                                           ---

	#####

	my $flag;                                                           # FLAG variable used for SAM format.
	if ($strand eq "+"){
		if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){
			$flag = 0;                                                      # 0 for "+" strand (OT)
		}
		elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){
			$flag = 16;                                                     # 16 for "-" strand (CTOB, yields information for the original bottom strand)
		}
		else{
			die "Unexpected strand and read/genome conversion: strand: $strand, read conversion: $read_conversion, genome_conversion: $genome_conversion\n\n";
		}
	}
	elsif ($strand eq "-"){
		if ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){
			$flag = 16;                                                     # 16 for "-" strand (OB)
		}
		elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){
			$flag = 0;                                                      # 0 for "+" strand (CTOT, yields information for the original top strand)
		}
		else{
			die "Unexpected strand and read/genome conversion: strand: $strand, read conversion: $read_conversion, genome_conversion: $genome_conversion\n\n";
		}
	}
	else{
		die "Unexpected strand information: $strand\n\n";
	}		

	#####

	my $mapq = $methylation_call_params->{$id}->{mapq};
	
	#####

	my $cigar = $methylation_call_params->{$id}->{CIGAR};                # Actual CIGAR string reported by Bowtie 2 or HISAT2
	
	#####

	my $rnext = "*";                                                    # Paired-end variable

	#####

	my $pnext = 0;                                                      # Paired-end variable

	#####

	my $tlen = 0;                                                       # Paired-end variable

	#####

	if ($read_conversion eq 'CT'){
		$ref_seq = substr($ref_seq, 0, length($ref_seq) - 2);    # Removes additional nucleotides from the 3' end. This only works for the original top or bottom strands
	}
	else{
		$ref_seq = substr($ref_seq, 2, length($ref_seq) - 2);    # Removes additional nucleotides from the 5' end. This works for the complementary strands in non-directional libraries
	}

	if ($strand eq '-'){
		$actual_seq = revcomp($actual_seq);                               # Sequence represented on the forward genomic strand
		$ref_seq = revcomp($ref_seq);                                     # Required for comparison with actual sequence
		if ($cigar =~ /[D]/){
			$methylation_call_params->{$id}->{genomic_seq_for_MD_tag} = revcomp( $methylation_call_params->{$id}->{genomic_seq_for_MD_tag} );
		}
		$qual = reverse $qual;                                            # if the sequence was reverse-complemented the quality string needs to be reversed as well
	}

	#####

	my $hemming_dist = hemming_dist($actual_seq,$ref_seq);     	# Edit distance to the reference, i.e. minimal number of one-nucleotide edits needed to transform the read string
																# into the reference string. hemming_dist()
	$hemming_dist += $methylation_call_params->{$id}->{indels}; # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
	
	my $NM_tag = "NM:i:$hemming_dist";                                  # Optional tag NM: edit distance based on nucleotide differences

	#####
	# warn "Determining MD for read ID: $id\n";
	my $MD_tag = make_mismatch_string($actual_seq, $ref_seq,$cigar,$methylation_call_params->{$id}->{genomic_seq_for_MD_tag});    # Optional tag MD: string providing mismatched reference bases in the alignment (this does include indel information)
	#  my $XX_tag = make_mismatch_string($actual_seq, $ref_seq);        # Optional tag XX: string providing mismatched reference bases in the alignment (NO indel information!)

	#####

	my $XM_tag;                                                  # Optional tag XM: Methylation Call String
	if ($strand eq '+'){
		$XM_tag = "XM:Z:$methcall";
	}
	elsif ($strand eq '-'){
		$XM_tag = 'XM:Z:'.reverse $methcall;                     # if the sequence was reverse-complemented the methylation call string needs to be reversed as well
	}

	#####

	my $XR_tag = "XR:Z:$read_conversion";                               # Optional tag XR: Read Conversion

	#####

	my $XG_tag = "XG:Z:$genome_conversion";                             # Optional tag XG: Genome Conversion
	
	#####

	# Optionally calculating number of mismatches for Bowtie 2 alignments

	if ($non_bs_mm) {

		$number_of_mismatches =~ s/-//; # removing the minus sign
		
		### We need to analyse the CIGAR string whether the read contained any indels to determine the number of mismatches
		### skipped regions (N) do not affect the Alignment Score (AS)
		
		if ($cigar =~ /(D|I)/) {
			# warn "CIGAR: $cigar\n~~~~~\n\n";
			

			# parsing CIGAR string
			my @len = split (/\D+/,$cigar); # storing the length per operation
			my @ops = split (/\d+/,$cigar); # storing the operation
			shift @ops;		# remove the empty first element
			die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

			foreach (0..$#len) {
				if ($ops[$_] eq 'M') {
					# warn "skipping\n";
					next;		# irrelevant
				}
				elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
					$number_of_mismatches -= $insertion_open;
					$number_of_mismatches -= $len[$_] * $insertion_extend;
					# warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
				}
				elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
					$number_of_mismatches -= $deletion_open;
					$number_of_mismatches -= $len[$_] * $deletion_extend;
					# warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
				}
				elsif ($ops[$_] eq 'N') {	# skipped region in the read sequence, splice-junction
					next; # irrelevant
				}
				elsif ($cigar =~ tr/[HPXS=]//) {	# if these (for standard mapping) illegal characters exist we die
					die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I', 'D' or 'N': $cigar";
				}
				else {
					die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I', 'D' or 'N': $cigar";
				}
			}
			# warn "Alignment score $number_of_mismatches\n";
			# print "Mismatches $number_of_mismatches\n\n";
		}
		### Now we have InDel corrected alignment scores

		### if the actual sequence contained Ns we need to adjust the number of mismatches. Ns receive a penalty of -1, but normal mismatches receive -6.
		### This might still break if the sequence contained more than 5 Ns, but this should occur close to never

		my $seq_N_count = $number_of_mismatches % 6; # modulo 6 will return the integer rest after the division
		# warn "N count: $seq_N_count\n";
		$number_of_mismatches = int ($number_of_mismatches / 6) + $seq_N_count;
		# warn "MM    $number_of_mismatches\n";
	}
	
	####

	my $XA_tag = "XA:Z:$number_of_mismatches";

	####

	my $read_group; # optional
	if ($rg_tag){
		$read_group = "RG:Z:$rg_id";
	}

	####

	# SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
	### optionally print number of non-bisulfite mismatches
	if ($non_bs_mm){
		if ($rg_tag){
			print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$MD_tag,$XM_tag,$XR_tag,$XG_tag,$XA_tag,$read_group)),"\n";
		}
		else{
			print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$MD_tag,$XM_tag,$XR_tag,$XG_tag,$XA_tag)),"\n";
		}
	}
	else{ # default
		# SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
		if ($rg_tag){
			print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$MD_tag,$XM_tag,$XR_tag,$XG_tag,$read_group)),"\n";
		}
		else{
			print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$MD_tag,$XM_tag,$XR_tag,$XG_tag)),"\n";
			# warn "FINAL OUTPUT:\n";	
			# warn join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$MD_tag,$XM_tag,$XR_tag,$XG_tag)),"\n";
		}
	}
}

sub paired_end_SAM_output{

	my ($id,$actual_seq_1,$actual_seq_2,$methylation_call_params,$qual_1,$qual_2) = @_;
	my $strand_1                = $methylation_call_params->{$id}->{alignment_read_1}; # Bowtie 1 only reports the read 1 alignment strand
	my $strand_2                = $methylation_call_params->{$id}->{alignment_read_2};
	my $chr                     = $methylation_call_params->{$id}->{chromosome};
	my $ref_seq_1               = $methylation_call_params->{$id}->{unmodified_genomic_sequence_1};
	my $ref_seq_2               = $methylation_call_params->{$id}->{unmodified_genomic_sequence_2};
	my $methcall_1              = $methylation_call_params->{$id}->{methylation_call_1};
	my $methcall_2              = $methylation_call_params->{$id}->{methylation_call_2};
	my $read_conversion_1       = $methylation_call_params->{$id}->{read_conversion_1};
	my $read_conversion_2       = $methylation_call_params->{$id}->{read_conversion_2};
	my $genome_conversion       = $methylation_call_params->{$id}->{genome_conversion};

	my $id_1;
	my $id_2;

	if ($old_flag){
		$id_1 = $id.'/1';
		$id_2 = $id.'/2';
	}
	else{
		$id_1 = $id; # appending /1 or /2 confuses some downstream programs such as Picard
		$id_2 = $id;
	}

	# Allows all degenerate nucleotide sequences in reference genome
	# die "Reference sequence ($ref_seq_1) contains invalid nucleotides!\n" if $ref_seq_1 =~ /[^ACTGNRYMKSWBDHVX]/i; # X are padded nucleotides in case of insertions in the read
	# die "Reference sequence ($ref_seq_2) contains invalid nucleotides!\n" if $ref_seq_2 =~ /[^ACTGNRYMKSWBDHVX]/i;

	my $index; # used to store the srand origin of the alignment in a less convoluted way
	my $strandID_str = "YS:Z:";

	if ($read_conversion_1 eq 'CT' and $genome_conversion eq 'CT'){
		$index = 0; ## this is OT   (original top strand)
		$strandID_str .= "OT";
	}
	elsif ($read_conversion_1 eq 'GA' and $genome_conversion eq 'GA'){
		$index = 1; ## this is CTOB (complementary to OB)
		$strandID_str .= "CTOB";
	}
	elsif ($read_conversion_1 eq 'GA' and $genome_conversion eq 'CT'){
		$index = 2; ## this is CTOT (complementary to OT)
		$strandID_str .= "CTOT";
	}
	elsif ($read_conversion_1 eq 'CT' and $genome_conversion eq 'GA'){
		$index = 3; ## this is OB   (original bottom)
		$strandID_str .= "OB";
	}	
	else {
		die "Unexpected combination of read 1 and genome conversion: $read_conversion_1 / $genome_conversion\n";
	}

	my $number_of_mismatches_1  = $methylation_call_params->{$id}->{alignment_score_1}; # only needed for custom allele-specific output, not the default!
	my $number_of_mismatches_2  = $methylation_call_params->{$id}->{alignment_score_2};
	
	### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the
	### first or last position.

	if ($index == 0 or $index == 3){ # OT or OB
		$ref_seq_1 = substr($ref_seq_1,0,length($ref_seq_1)-2);
		$ref_seq_2 = substr($ref_seq_2,2,length($ref_seq_2)-2);
	}
	else{ # CTOT or CTOB
		$ref_seq_1 = substr($ref_seq_1,2,length($ref_seq_1)-2);
    	$ref_seq_2 = substr($ref_seq_2,0,length($ref_seq_2)-2);
	}

	
	#####

	# start positions

	my $start_read_1 = $methylation_call_params->{$id}->{position_1};
	my $start_read_2 = $methylation_call_params->{$id}->{position_2};
	
	#####

	# end positions

	my $end_read_1 = $methylation_call_params->{$id}->{end_position_1};
	my $end_read_2 = $methylation_call_params->{$id}->{end_position_2};

	#####

	### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
	## FLAG: bitwise FLAG. Each bit is explained in the following table:
	## Bit    Description                                                Comment                                Value
	## 0x1    template having multiple segments in sequencing            0: single-end 1: paired end            value: 2^^0 (  1)
	## 0x2    each segment properly aligned according to the aligner     true only for paired-end alignments    value: 2^^1 (  2)
	## 0x4    segment unmapped                                           ---                                           ---
	## 0x8    next segment in the template unmapped                      ---                                           ---
	## 0x10   SEQ being reverse complemented                             - strand alignment                     value: 2^^4 ( 16)
	## 0x20   SEQ of the next segment in the template being reversed     + strand alignment                     value: 2^^5 ( 32)
	## 0x40   the first segment in the template                          read 1                                 value: 2^^6 ( 64)
	## 0x80   the last segment in the template                           read 2                                 value: 2^^7 (128)
	## 0x100  secondary alignment                                        ---                                           ---
	## 0x200  not passing quality controls                               ---                                           ---
	## 0x400  PCR or optical duplicate                                   ---                                           ---

	### As the FLAG value do not consider that there might be 4 different bisulfite strands of DNA, we are trying to make FLAG tags which take the strand identity into account

	# strands OT and CTOT will be treated as aligning to the top strand (both sequences are scored as aligning to the top strand)
	# strands OB and CTOB will be treated as aligning to the bottom strand (both sequences are scored as reverse complemented sequences)

	my $flag_1;                                                            # FLAG variable used for SAM format
	my $flag_2;

	### The new default FLAG values were changed on 21 07 2015, so that reads do not ignored as discordant reads by the new SeqMonk BAM import
	### In essence we are going to flip the R1 R2 flags around for CTOT and CTOB reads. We still report the first and second read in the same
	### order and only change the actual FLAG value. This should not affect the methylation extraction in any way

	if ($index == 0){       # OT
		unless ($old_flag){
			$flag_1 = 99;                                                      # Read 1 is on the + strand and Read 2 is reversed  (1+2+32+64)
			$flag_2 = 147;                                                     # Read 2 is reverse complemented but informative for the OT  (1+2+16+128)
		}
		else{
			$flag_1 = 67;                                                      # Read 1 is on the + strand  (1+2+64) (Read 2 is technically reverse-complemented, but we do not score it)
			$flag_2 = 131;                                                     # Read 2 is on - strand but informative for the OT        (1+2+128)
		}
	}
	elsif ($index == 1){    # CTOB
		unless($old_flag){
			$flag_1 = 163;                                               # Read 1 is on the forward strand (CTOB) and Read 2 is reverse complemented but we swap round the FLAG
																		 # for R1 and R2 so that we don't end up with discordant pairs
                                                                         # So Read 1 gets Paired read, mapped in proper pair, mate is reversed and second in pair  (1+2+32+128)
			$flag_2 = 83;                                                      # Read 2 gets Read paired, mapped in proper pair, first in pair and Read 2 is reversed  (1+2+16+64)
		}
		else{
			$flag_1 = 115;                                                     # Read 1 is on the + strand, we score for OB  (1+2+16+32+64)
			$flag_2 = 179;                                                     # Read 2 is on the - strand  (1+2+16+32+128)
		}
	}
	elsif ($index == 2){    # CTOT
		unless ($old_flag){
			$flag_1 = 147;                                                     # Read 1 is reverse complemented (CTOT) and Read 2 is the forward read
                                                                         # but we swap round the FLAG for R1 and R2 so that we do not end up with discordant pairs
                                                                         # So Read 1 gets Read paired, read mapped in proper pair, read reverse complemented and second in pair (1+2+32+128)
			$flag_2 = 99;                                                      # Read 2 gets Read paired, read mapped in proper pair, mate reverse strand and First in Pair (1+2+32+64)
		}
		else{
			$flag_1 = 67;                                                      # Read 1 is on the - strand (CTOT) strand, but we score it for OT (1+2+64)
			$flag_2 = 131;                                                     # Read 2 is on the + strand, score it for OT (1+2+128)
		}
	}
	elsif ($index == 3){    # OB
		unless ($old_flag){
			$flag_1 = 83;                                                      # Read 1 is on the - strand, mapped in proper pair and Read 1 is reversed  (1+2+16+64)
			$flag_2 = 163;                                                     # Read 2 is on the - strand, mapped in proper pair and Read 1 is reversed  (1+2+32+128)
		}
		else{
			$flag_1 = 115;                                                     # Read 1 is on the - strand, we score for OB  (1+2+16+32+64)
			$flag_2 = 179;                                                     # Read 2 is on the + strand  (1+2+16+32+128)
		}
	}

	#####

	my $mapq = $methylation_call_params->{$id}->{mapq};
	
	#####
	
	my $cigar_1 = $methylation_call_params->{$id}->{CIGAR_1};             # Actual CIGAR string reported by Bowtie 2
	my $cigar_2 = $methylation_call_params->{$id}->{CIGAR_2};
	
	#####

	my $rnext = '=';                                                     # Chromosome of mate; applies to both reads

	#####

	my $pnext_1 = $start_read_2;                                         # Leftmost position of mate
	my $pnext_2 = $start_read_1;

	#####

	my $tlen_1;                                                          # signed observed Template LENgth (or inferred fragment size)
	my $tlen_2;

	if ($start_read_1 <= $start_read_2){

		# Read 1 alignment is leftmost

		if ($end_read_2 >= $end_read_1){

			if ($flag_1 == 83 and $dovetail){   # R1 has a reverse orientation
				#         ----------------->     read 2   reads are dovetailing, that is one mate alignment extends past the beginning of the other
				#  <-------------------          read 1   such that the wrong mate begins upstream
				# warn "FLAG 1: $flag_1\nFLAG 2: $flag_2\n";
				# warn "Reads are dovetailing\n";
				$tlen_1 = $start_read_1 - $end_read_2 - 1;     # Read 1 still receives a - sign even though it is the leftmost one
				$tlen_2 = $end_read_2 - $start_read_1 + 1;     # Read 2 receives a + sign,
				# warn "TLEN 1: $tlen_1\nTLEN 2: $tlen_2\n";
			}
			else{
				# ------------->                 read 1   reads not overlapping
				#                 <----------    read 2
				#             or
				# 	------------------->           read 1   reads overlapping
				#        <-------------------    read 2
				#             or
				# ------------------------->     read 1
				#   <-----------------------     read 2   read 2 contained within read 1
				#             or
				# ------------------------->     read 1   reads 1 and 2 exactly overlapping
				# <-------------------------     read 2
				#

				$tlen_1 = $end_read_2 - $start_read_1 + 1;                         # Leftmost read has a + sign,
				$tlen_2 = $start_read_1 - $end_read_2 - 1;                         # Rightmost read has a - sign
				# warn "Reads are non/overlapping\nTLEN 1: $tlen_1\nTLEN 2: $tlen_2\n";
			}
		}
		elsif ($end_read_2 < $end_read_1){

			# ------------------------->     read 1
			#       <-----------             read 2   read 2 contained within read 1
			#
			# or
			#
			# ------------------------->     read 1
			# <------------------------      read 2   read 2 contained within read 1

			# start and end of read 2  are fully contained within read 1, using the length of read 1 for the TLEN variable
			$tlen_1 = $end_read_1 - $start_read_1 + 1;          # Set to length of read 1   Leftmost read has a + sign,
			$tlen_2 = ($end_read_1 - $start_read_1 + 1) * -1;   # Set to length of read 1   Rightmost read has a - sign. well this is debatable. Changed this
			### as a request by frozenlyse on SeqAnswers on 24 July 2013
		}
	}
	elsif ($start_read_2 < $start_read_1){

		# Read 2 alignment is leftmost

		if ($end_read_1 >= $end_read_2){

			# Read 2 alignment is leftmost
			if ($flag_1 == 99 and $dovetail){   # R1 has a forward orientation

				#         ----------------->     read 1   reads are dovetailing, that is one mate alignment extends past the beginning of the other
				#  <-------------------          read 2   such that the wrong mate begins upstream

				# warn "FLAG 1: $flag_1\nFLAG 2: $flag_2\n";
				# warn "Reads are dovetailing\n";
				$tlen_1 = $end_read_1 - $start_read_2 + 1;     # Read 1 still receives a + sign even though it is not leftmost
				$tlen_2 = $start_read_2 - $end_read_1 - 1;
				# warn "TLEN 1: $tlen_1\nTLEN 2: $tlen_2\n";
			}
			else{
				# ------------->                 read 2   reads not overlapping
				#                 <----------    read 1
				#             or
				# ------------------------->     read 2   reads overlapping
				#  <-------------------------    read 1
				#             or
				# ------------------------->     read 2
				#   <-----------------------     read 1   read 1 contained within read 2
				#             or
				# ------------------------->     read 2
				#   <-----------------------     read 1   read 1 contained within read 2
				# warn "FLAG 1: $flag_1\nFLAG 2: $flag_2\n";
				# warn "Read 2 has a forward orientation\n";
				$tlen_2 = $end_read_1 - $start_read_2 + 1;                         # Leftmost read has a + sign,
				$tlen_1 = $start_read_2 - $end_read_1 - 1;                         # Rightmost read has a - sign
			}
		}
		elsif ($end_read_1 < $end_read_2){

				# ------------------------->     read 2
				#       <-----------             read 1   read 1 contained within read 2
				#
				# or
				#
				# ------------------------->     read 2
				#  <------------------------      read 1   read 1 contained within read 2

				# start and end of read 1  are fully contained within read 2, using the length of read 2 for the TLEN variable
				$tlen_1 = ($end_read_2 - $start_read_2 + 1) * -1;          # Set to length of read 2   Shorter read receives a - sign,
				$tlen_2 = $end_read_2 - $start_read_2 + 1;                 # Set to length of read 2   Longer read receives a +. Well this is debatable. Changed this
				### as a request by frozenlyse on SeqAnswers on 24 July 2013
		}
	}


	#####

	# adjusting the strand of the sequence before we use them to generate mismatch strings
	if ($strand_1 eq '-'){
		$actual_seq_1 = revcomp($actual_seq_1);                            # Sequence represented on the forward genomic strand
		$ref_seq_1 = revcomp($ref_seq_1);                                  # Required for comparison with actual sequence
		if ($cigar_1 =~ /[D]/){ # deletion or spliced read
			$methylation_call_params->{$id}->{genomic_seq_for_MD_tag_1} = revcomp( $methylation_call_params->{$id}->{genomic_seq_for_MD_tag_1} );
		}
		$qual_1 = reverse $qual_1;                                         # we need to reverse the quality string as well
	}
	if ($strand_2 eq '-'){
		$actual_seq_2 = revcomp($actual_seq_2);                            # Mate sequence represented on the forward genomic strand
		$ref_seq_2 = revcomp($ref_seq_2);                                  # Required for comparison with actual sequence
		if ($cigar_2 =~ /[D]/){ # deletion or spliced read
			$methylation_call_params->{$id}->{genomic_seq_for_MD_tag_2} = revcomp( $methylation_call_params->{$id}->{genomic_seq_for_MD_tag_2} );
		}
		$qual_2 = reverse $qual_2;                                         # If the sequence gets reverse complemented we reverse the quality string as well
	}

	# print "$actual_seq_1\n$ref_seq_1\n\n";
	# print "$actual_seq_2\n$ref_seq_2\n\n";

	#####

	my $hemming_dist_1 = hemming_dist($actual_seq_1,$ref_seq_1);         # Minimal number of one-nucleotide edits needed to transform the read string into the reference sequence
	my $hemming_dist_2 = hemming_dist($actual_seq_2,$ref_seq_2);
	$hemming_dist_1 += $methylation_call_params->{$id}->{indels_1};    # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
	$hemming_dist_2 += $methylation_call_params->{$id}->{indels_2};    # Adding the number of inserted/deleted bases which we parsed while getting the genomic sequence
	
	my $NM_tag_1 = "NM:i:$hemming_dist_1";                               # Optional tag NM: edit distance based on nucleotide differences
	my $NM_tag_2 = "NM:i:$hemming_dist_2";                               # Optional tag NM: edit distance based on nucleotide differences

	#####

	my $MD_tag_1 = make_mismatch_string($actual_seq_1,$ref_seq_1,$cigar_1,$methylation_call_params->{$id}->{genomic_seq_for_MD_tag_1}); # Optional tag MD: String providing mismatched reference bases in the alignment (including indel information)
	my $MD_tag_2 = make_mismatch_string($actual_seq_2,$ref_seq_2,$cigar_2,$methylation_call_params->{$id}->{genomic_seq_for_MD_tag_2});

	#  my $XX_tag_1 = make_mismatch_string($actual_seq_1,$ref_seq_1);       # Optional tag XX: String providing mismatched reference bases in the alignment (NO indel information!)
	#  my $XX_tag_2 = make_mismatch_string($actual_seq_2,$ref_seq_2);

	#####

	my $XM_tag_1;                                                        # Optional tag XM: Methylation call string
	my $XM_tag_2;

	if ($strand_1 eq '-'){
		$XM_tag_1 = 'XM:Z:'.reverse $methcall_1;                           # Needs to be reversed if the sequence was reverse complemented
	}
	else{
		$XM_tag_1 = "XM:Z:$methcall_1";
	}

	if ($strand_2 eq '-'){
		$XM_tag_2 = 'XM:Z:'.reverse $methcall_2;                           # Needs to be reversed if the sequence was reverse complemented
	}
	else{
		$XM_tag_2 = "XM:Z:$methcall_2";
	}

	#####

	my $XR_tag_1 = "XR:Z:$read_conversion_1";                            # Optional tag XR: Read 1 conversion state
	my $XR_tag_2 = "XR:Z:$read_conversion_2";                            # Optional tag XR: Read 2 conversion state

	#####

	my $XG_tag = "XG:Z:$genome_conversion";                              # Optional tag XG: Genome Conversion state; valid for both reads
	
	#####

	# Optionally calculating number of mismatches for Bowtie 2 alignments

	if ($non_bs_mm) {
		$number_of_mismatches_1 =~ s/-//; # removing the minus sign
		$number_of_mismatches_2 =~ s/-//;

		### We need to analyse the CIGAR strings whether the reads contained any indels to determine the number of mismatches

		### CIGAR 1
		if ($cigar_1 =~ /(D|I)/) {
			# warn "$cigar_1\n";

			# parsing CIGAR string
			my @len = split (/\D+/,$cigar_1); # storing the length per operation
			my @ops = split (/\d+/,$cigar_1); # storing the operation
			shift @ops;		# remove the empty first element
			die "CIGAR string '$cigar_1' contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

			foreach (0..$#len) {
				if ($ops[$_] eq 'M') {
					# warn "skipping\n";
					next;		# irrelevant
				}
				elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
					$number_of_mismatches_1 -= $insertion_open;
					$number_of_mismatches_1 -= $len[$_] * $insertion_extend;
					# warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
				}
				elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
					$number_of_mismatches_1 -= $deletion_open;
					$number_of_mismatches_1 -= $len[$_] * $deletion_extend;
					# warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
				}
				elsif ($ops[$_] eq 'N') { # skipped portion, spliced read
					# warn "skipping\n";
					next;		# irrelevant
				}
				elsif ($cigar_1 =~ tr/[HPSX=]//) {	# if these (for standard mapping) illegal characters exist we die
					die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I', 'D' and 'N': $cigar_1";
				}
				else {
					die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I', 'D' and 'N': $cigar_1";
				}
			}

			# warn "Alignment score $number_of_mismatches_1\n";
			# print "Mismatches $number_of_mismatches_1\n\n";
		}

		### CIGAR 2
		if ($cigar_2 =~ /(D|I)/) {
			# warn "$cigar_2\n";

			# parsing CIGAR string
			my @len = split (/\D+/,$cigar_2); # storing the length per operation
			my @ops = split (/\d+/,$cigar_2); # storing the operation
			shift @ops;		# remove the empty first element
			die "CIGAR string '$cigar_2' contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

			foreach (0..$#len) {
				if ($ops[$_] eq 'M') {
					# warn "skipping\n";
					next; #irrelevant
				}
				elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
					$number_of_mismatches_2 -= $insertion_open;
					$number_of_mismatches_2 -= $len[$_] * $insertion_extend;
					# warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
				}
				elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
					$number_of_mismatches_2 -= $deletion_open;
					$number_of_mismatches_2 -= $len[$_] * $deletion_extend;
					# warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
				}
				elsif ($ops[$_] eq 'N') { # skipped portion, spliced-read
					# warn "skipping\n";
					next; #irrelevant
				}
				elsif ($cigar_2 =~ tr/[SHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
					die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I','D' and 'N': $cigar_2";
				}
				else {
					die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I', 'D' and 'N': $cigar_2";
				}
			}
		}


		### Now we have InDel corrected Alignment scores

		### if the actual sequence contained Ns we need to adjust the number of mismatches. Ns receive a penalty of -1, 
		### but normal mismatches receive -6. This might still break if the sequence contained more than 5 Ns, but this should occur close to never

		my $seq_1_N_count = $number_of_mismatches_1 % 6; # modulo 6 will return the integer rest after the division
		my $seq_2_N_count = $number_of_mismatches_2 % 6;
		#   warn "N count 1: $seq_1_N_count\n";
		#   warn "N count 2: $seq_2_N_count\n";

		$number_of_mismatches_1 = int ($number_of_mismatches_1 / 6) + $seq_1_N_count;
		$number_of_mismatches_2 = int ($number_of_mismatches_2 / 6) + $seq_2_N_count;

		# warn "MM1    $number_of_mismatches_1 \n";
		# warn "MM2    $number_of_mismatches_2 \n";
		
	}

	####

	my $XA_tag = "XA:Z:$number_of_mismatches_1";
	my $XB_tag = "XB:Z:$number_of_mismatches_2";

	####

	my $read_group; # optional
	if ($rg_tag){
		$read_group = "RG:Z:$rg_id";
	}

	####

	# SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
	### optionally print number of non-bisulfite mismatches
	if ($non_bs_mm){
		if ($rg_tag){
			print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$XA_tag,$read_group)), "\n";
			print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$XB_tag,$read_group)), "\n";
		}
		else{
			print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$XA_tag)), "\n";
			print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$XB_tag)), "\n";
		}
	}
	else{ # default
		if ($rg_tag){
			if ($strandID){
				print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$read_group,$strandID_str)), "\n";
				print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$read_group,$strandID_str)), "\n";
			}
			else{
				print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$read_group)), "\n";
				print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$read_group)), "\n";
			}
		}
		else{
			if ($strandID){
				print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$strandID_str)), "\n";
				print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$strandID_str)), "\n";
			}
			else{
				print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag)), "\n";
				print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag)), "\n";
			}
			#print join("\t", ("READ 1:",$id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $MD_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag)), "\n";
			#print join("\t", ("READ 2:",$id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $MD_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag)), "\n";
			#print "\n\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n";
		}
	}
}


sub revcomp{
  my $seq = shift or die "Missing seq to reverse complement\n";
  $seq = reverse $seq;
  $seq =~ tr/ACTGactg/TGACTGAC/;
  return $seq;
}

sub hemming_dist{
	my $matches = 0;
	my @actual_seq = split //,(shift @_);
	my @ref_seq = split //,(shift @_);

	foreach (0..$#actual_seq){
		++$matches if ($actual_seq[$_] eq $ref_seq[$_]);
	}
	return my $hd = scalar @actual_seq - $matches;
}


### Getting rid of the bitwise comparison because even though the initial comparison is nice and quick, the regex loop looking for non-null bytes characters isn't. We might
### as well do a substring loop to start with, which enables us to generate proper MD:Z: flags that also take proper care of InDels
### 05 June 2014

	
sub make_mismatch_string{
		
	my ($actual_seq,$ref_seq,$cigar,$md_sequence) = @_;
	
	# warn "About to process the MD:Z: mismatch string\n";
	# warn "This is a soft-clipped read:\nactual_seq:\n$actual_seq\nreference seq:\n$ref_seq\nCIGAR:\n$cigar\nMD seq:\n$md_sequence\n\n~~~~~~~~~~\n";
	# warn "Length of MD sequence: ",length($md_sequence),"\n"; 
	
	if ($cigar =~ /N/){
		#warn "This is a splice junction containing read:\nactual_seq:\n$actual_seq\nreference seq:\n$ref_seq\nCIGAR:\n$cigar\nMD seq:\n$md_sequence\n\n~~~~~~~~~~\n";
		#warn "Length of MD sequence: ",length($md_sequence),"\n"; $verbose = 1;
	}
	
	if ($cigar =~ /S/){
		if (length($md_sequence) > 0){
			# warn "This is a soft-clipped read:\nactual_seq:\n$actual_seq\nreference seq:\n$ref_seq\nCIGAR:\n$cigar\nMD seq:\n$md_sequence\n\n~~~~~~~~~~\n";
			# warn "Length of MD sequence: ",length($md_sequence),"\n"; 
			# $verbose = 1;
		}
		else{
			# $verbose = 0;
		}
	}
	
	my $MD_tag = "MD:Z:";
	my $prev_matching = 0;
	my $last_char;

	my $ref_base;
	my $actual_base;
	
	foreach my $pos ( 0..(length$actual_seq) - 1 ){

		$actual_base = substr($actual_seq,$pos,1);
		$ref_base    = substr($ref_seq,$pos,1);
		# if ($verbose){ warn "reference: $ref_base\tseen base: $actual_base\n";}

		if ( $actual_base eq $ref_base ){
			++$prev_matching;
		}
		else{
			# If the mismatch is due to an insertion or soft-clipping we simply move on, else we print the previously matching bases
			# as well as the mismatching genomic base
			if ($ref_base eq 'X'){
				if ($verbose){ warn "The genome base was an artificially padded '$ref_base' due to an insertion or soft-clipping in the read at this position. Just ignoring it for the MD tag\n";}
			}
			else{
				if ($verbose){ warn "previous matching bases: $prev_matching\n";}

				### There is a mismatch between the sequence and the genome. First we need to write out how may bases matched until now
				if ($prev_matching == 0){
					if ($verbose){ warn "Got a mismatch either at the very start or next to another mismatch. Need to add a padding 0 as well as the mismatch\n";}
					if ($verbose){ warn "${prev_matching}$ref_base\n";}
					$MD_tag .= $prev_matching;
					$MD_tag .= $ref_base;
				}
				else{
					if ($verbose){ warn "${prev_matching}$ref_base\n";}
					$MD_tag .= $prev_matching;
					$MD_tag .= $ref_base;
				}

				$prev_matching = 0; # resetting $prev_matching
			}
		}	
	}
	### appending the number of matches one last time
	$MD_tag .= $prev_matching;
	if ($verbose){warn "MD tag at this point in time: $MD_tag\n";}

	### If the read contains deletion(s) we need to take care of these in the MD-tag as well
	### 12 02 2019: For skipped regions (CIGAR operation: 'N') we do not need to add the sequence to the MD:Z string
	### (which would admittedly get stupidly long otherwise...)
	if ($cigar =~ /D/){
		# $verbose = 1;
		my $deletions_total = 0;
		while ($cigar =~ /D/g){
			++$deletions_total;
		}
		if ($verbose){ warn "Read contains $deletions_total deletions in total\n\n";} 

		if ($verbose){ warn "actual:\t$actual_seq\nref:\t$ref_seq\nMD-seq:\t$md_sequence\nMD-tag: $MD_tag\n";}

		# parsing CIGAR string
		my @len = split (/\D+/,$cigar); # storing the length per operation
		my @ops = split (/\d+/,$cigar); # storing the operation
		shift @ops; # remove the empty first element
		die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

		my $MD_pos_so_far = 0;
		my $deletions_processed = 0;
		my $del_pos = 0;
		my $deleted_bases = '';
		my $new_MD = $1 if ($MD_tag =~ /MD:Z:(.*)/);
		my $md_index_already_processed;

		my @md = split //,$new_MD;
	
		# if ($verbose){ warn "New MD-tag:  $new_MD\n\n";}
		$MD_tag = "MD:Z:"; ### reconstituting a new MD-tag
		$new_MD = ''; # using this to build up a new string that will replace the old \@md

		if ($verbose){ warn "CIGAR string; $cigar\n";}
		### determining end position of a read
		foreach my $index(0..$#len){

			if ($ops[$index] eq 'M'){  # matching bases
				$del_pos += $len[$index];
				if ($verbose){ warn "Operation is 'M', adding $len[$index] bp\n";}
			}
			elsif($ops[$index] eq 'N'){  # skipped regions, splice junctions
				if ($verbose){ warn "Operation is 'N', simply ignoring\n";}
			}
			elsif($ops[$index] eq 'I'){ # insertion
				$del_pos += $len[$index];
				### need to add insertions in the read to MD pos so far!
				$MD_pos_so_far += $len[$index];
				if ($verbose){ warn "Operation is 'I', adding $len[$index] bp\n";}
			}
			elsif($ops[$index] eq 'S'){ # soft-clip. As we addded X as reference base, it kind of resembles a genomic match (M)
				$del_pos += $len[$index];
				if ($verbose){ warn "Operation is 'S', adding $len[$index] bp\n";}
				# TODO: Does this have to be treated like an insertion?
				$MD_pos_so_far += $len[$index];
			}
			elsif($ops[$index] eq 'D'){ # deletion
				if ($verbose){ warn "Operation is 'D', extracting $len[$index] bp\n";}
				$deleted_bases = substr($md_sequence,$del_pos,$len[$index]);
				if ($verbose){ warn "Deleted base(s): $deleted_bases\n\n";}

				### Now we need to process the MD-tag so far and write out everything up until this point, inlcuding the deletion
				if ($verbose){ warn "Now processing the MD-tag\n";}
				
				my $op;

				my $this_deletion_processed;
				my $md_processed_so_far;
				my $current_md_index;
				
				# print join (" ",@md),"\n";
				
				foreach my $el (@md){

					unless (defined $current_md_index){
						$current_md_index = 0; # first element = index 0
					}
					else{
						++$current_md_index;
					}

					if ($md_index_already_processed and ($current_md_index <= $md_index_already_processed)){
						if ($verbose){ warn "This has to be another deletion within the same read. Currently processing index $current_md_index, but have already processed $md_index_already_processed indexes previously\n";}
						$new_MD .= $el;
						next;
					}

					if ($verbose){ warn "Current element: $el\n";}
					unless (defined $op){ # initialize
						$op = $el;
						if ($verbose){ warn "Initializing \$op as $op\n";}
						next;
					}

					if ($deletions_processed == $deletions_total){
						if ($verbose){ warn "Processed $deletions_processed in the read so far, out of $deletions_total total. Just appending elements until the end of the string: here $el\n";}
						$MD_tag .= $el;
						$new_MD .= $el;
						next;
					}
					# this only occurs when there are more deletions in the read but we want to regenerate a new MD tag
					if ($this_deletion_processed){
						$new_MD .= $el;
						next;
					}

					if ($op =~ /^\d+$/){
						if ($verbose){ warn "Operation so far was a digit: $op\n";}
						if ($el =~ /\d/){
							$op .= $el;
							if ($verbose){ warn "Appending current operation $el. New operation is: $op\n";}
							next;
						}
						else{
							if ($verbose){ warn "current element is a word character: $el\n";}

							### Need to determine if the matching operation length includes the deletion position
							if ($verbose){ warn "Processing operation $op and adding it to MD pos which is so far: $MD_pos_so_far; deletion pos is $del_pos.\n";}
							$MD_pos_so_far += $op;
							if ($verbose){ warn "MD pos so far: $MD_pos_so_far\n";}
							if ($MD_pos_so_far < $del_pos){
								if ($verbose){ warn "Doesn't cover the deletion yet. Writing back out.\n";}
								$MD_tag .= $op;
								$new_MD .= $op;
								if ($verbose){ warn "Setting new operation to: $el\n\n";}
								$op = $el; # setting new $op
							}
							else{
								if ($verbose){ warn "Here we go, this operation covers the deletion position!!\n";}
									### splitting up the number of matching bases in number before and after the deletion

								my $pos_after_deletion = $MD_pos_so_far - $del_pos;
								my $pos_before_deletion = $op - $pos_after_deletion;
								if ($verbose){ warn "Splitting up previous operation '$op' into pos before deletion: ${pos_before_deletion} and pos_after_deletion: $pos_after_deletion\n";}
								$MD_tag .= "${pos_before_deletion}^${deleted_bases}";
								$new_MD .= "${pos_before_deletion}^${deleted_bases}${pos_after_deletion}";
								if ($verbose){ warn "\$newMD after adjusting for the current deletion: $new_MD\n";}

								#adjusting the MD_position by the number of bases after the deletion
								$MD_pos_so_far -= $pos_after_deletion;
								if ($verbose){ warn "MD after adjusting for deletion: $MD_pos_so_far\n";	}
								### also appending the current element because we are writing out the rest of the MD-string unchanged to $new_MD
								$new_MD .= $el;

								$deletions_processed += 1;
								$this_deletion_processed = 1;
							

								if ($deletions_processed == $deletions_total){ # this was the last deletion of the read
									if ($verbose){ warn "This was the last deletion in the read ($deletions_processed out of $deletions_total total). Continuing to append \$pos_after_deletion (${pos_after_deletion})..\n";}
									$MD_tag .= "${pos_after_deletion}";

									### also appending the current element because we are writing out the rest of the MD-string unchanged
									if ($verbose){ warn "also appending the current element $el\n"; }
									$MD_tag .= $el;
									### Finally also adding the length of the deletion to $del_pos
									$del_pos += $len[$index];
									if ($verbose){ warn "Adding length of the deletion itself (",$len[$index],") to \$del_pos: currently at $del_pos\n";}
								}
								else{
									if ($verbose){ warn "This wasn't the last deletion in the read. Substituting the last operation with the current deletion and reconstituting \@md\n";}
									if ($verbose){ warn "Adding length of deletion string '${pos_before_deletion}^${deleted_bases}' (",length("${pos_before_deletion}^${deleted_bases}")," - length of current operation (",length$op,") to current_md_index\n";}


									$current_md_index = $current_md_index + length("${pos_before_deletion}^${deleted_bases}") - length$op;
									if ($verbose){  warn "Current index = $current_md_index\n";}

									if ($verbose){ warn "Setting \$md_index_already_processed to ",$current_md_index-1,"\n";}
									$md_index_already_processed = $current_md_index - 1;

									if ($verbose){ warn "Exiting now and waiting for the next deletion\n";}

									### Finally also adding the length of the deletion to $del_pos
									$del_pos += $len[$index];
									$MD_pos_so_far += $len[$index];
									if ($verbose){ warn "Adding length of the deletion itself (",$len[$index],") to \$del_pos: currently at $del_pos\n";}
									if ($verbose){ warn "MD-tag so far: $MD_tag ~~\n";}
									#setting $op to en empty string so it is not being processed as the last element
									$op = '';
									# last; # exiting the loop and processing the CIGAR string further until we hit the next deletion
								}
							}
						}
						if ($verbose){  warn "MD-tag so far: $MD_tag ~~\n";}
					}
					else{
						if ($verbose){  warn "Operation so far was a word character: $op\n";}
						if ($el =~ /\d+/){
							# processing the previous mismatch position
							$MD_tag .= $op;
							$new_MD .= $op;
							$MD_pos_so_far += length($op);
							if ($verbose){  warn "Writing out mismatching base $op and adding length ",length($op),"\n";}
						}
						else{
							# this should never occur since mismatches are followed by a 0 or another digit
							die "current element is a another word character: $el. This should never happen!\n";
						}
						if ($verbose){ warn "Setting new operation to: $el\n";}
						$op = $el; # setting new $op
						if ($verbose){  warn "MD-tag so far: $MD_tag ~~\n";}
					}
				}

				### need to consider last element if it was a digit or number and we are expecting the deletion in the last element of the MD-tag
				if ($op =~ /\d+/ and $deletions_processed < $deletions_total){
					if ($verbose){ warn "\n\nlast operation was $op\n";}
					if ($verbose){ warn "Processing operation $op; deletion pos is $del_pos. MD so far was: $MD_pos_so_far\n";}
			
					$MD_pos_so_far += $op;
					if ($verbose){ warn "Adding $op to MD pos so far: $MD_pos_so_far\n";}
					if ($verbose){ warn "Deletions already processed: $deletions_processed, del total: $deletions_total\n\n";}
					if ($MD_pos_so_far >= $del_pos){
						if ($verbose){ warn "Here we go, this operation covers the deletion position!!\n";}
						### splitting up the number of matching bases in number before and after the deletion

						my $pos_after_deletion = $MD_pos_so_far - $del_pos;
						my $pos_before_deletion = $op - $pos_after_deletion;
						if ($verbose){  warn "Splitting up previous operation '$op' into pos before deletion: ${pos_before_deletion} and pos_after_deletion: $pos_after_deletion\n";}

						$MD_tag .= "${pos_before_deletion}^${deleted_bases}";
						$new_MD .= "${pos_before_deletion}^${deleted_bases}${pos_after_deletion}";

						#adjusting the MD_position by the number of bases after the deletion
						$MD_pos_so_far -= $pos_after_deletion;
						if ($verbose){ warn "MD after adjusting for deletion: $MD_pos_so_far\n";	}

						$deletions_processed += 1;
						$this_deletion_processed = 1;

						if ($deletions_processed == $deletions_total){ # this was the last deletion of the read
							if ($verbose){ warn "This was the last deletion in the read ($deletions_processed out of $deletions_total total). Continuing to append \$pos_after_deletion (${pos_after_deletion})..\n";}
							$MD_tag .= "${pos_after_deletion}";
						}
						else{
							if ($verbose){ warn "This wasn't the last deletion in the read. Substituting the last operation with the current deletion and reconstituting \@md\n";}
							if ($verbose){ warn "Adding length of deletion string '${pos_before_deletion}^${deleted_bases}' (",length("${pos_before_deletion}^${deleted_bases}")," - length of current operation (",length$op,") to current_md_index\n";}

							$current_md_index = $current_md_index + length("${pos_before_deletion}^${deleted_bases}") - length$op;
							if ($verbose){  warn "Current index = $current_md_index\n";}

							if ($verbose){ warn "Setting \$md_index_already_processed to ",$current_md_index-1,"\n";}
							# since we are no longer in the loop we don't have to subtract 1 from $current_md_index (tit hasn't been incremented in the first place...)
							$md_index_already_processed = $current_md_index;

							if ($verbose){ warn "Exiting now and waiting for the next deletion\n";}

							$MD_pos_so_far += $len[$index];
							if ($verbose){ warn "MD-tag so far: $MD_tag ~~\n";}
						}
						### Finally also adding the length of the deletion to $del_pos
						$del_pos += $len[$index];
						if ($verbose){ warn "Adding length of the deletion itself (",$len[$index],") to \$del_pos: currently at $del_pos\n";}
					}
					else{
						die "Something went wrong, we haven't seen a deletion so far even though we should have...\n\n";
					}
				}

				# forming a new @md
				@md = split //,$new_MD;
				$new_MD = '';
				if ($verbose){ warn "New \@md array: @md\n\n";}
				if ($verbose){ warn "MD-tag so far: $MD_tag ~~\nnew_MD so far: $new_MD\n\n";}
			}
			else{
				die "Found CIGAR operations other than M, I, D, S or N: '$ops[$index]'. Not allowed at the moment";
			}
		}

	}	
	if ($verbose){  warn "Returning MD-tag: $MD_tag\n"; sleep(1);}
	return $MD_tag;

}


sub print_helpfile{
	print << "HOW_TO";

DESCRIPTION

The following is a brief description of command line options and arguments to control the Bismark
bisulfite mapper and methylation caller. Bismark takes in FastA or FastQ files and aligns the
reads to a specified bisulfite genome. Sequence reads are transformed into a bisulfite converted forward strand
version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
(C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie 2 or HISAT2
are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original
sequence from the genome and determine if there were any protected C's present or not.

The final output of Bismark is in BAM/SAM format by default, described in more detail below.


USAGE: bismark [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>}


ARGUMENTS:

<genome_folder>          The path to the folder containing the unmodified reference genome
                         as well as the subfolders created by the Bismark_Genome_Preparation
                         script (/Bisulfite_Genome/CT_conversion/ and /Bisulfite_Genome/GA_conversion/).
                         Bismark expects one or more fastA files in this folder (file extension: .fa, .fa.gz
                         or .fasta or .fasta.gz). The path can be relative or absolute. The path may also be set
                         as '--genome_folder /path/to/genome/folder/'.

-1 <mates1>              Comma-separated list of files containing the #1 mates (filename usually includes
                         "_1"), e.g. flyA_1.fq,flyB_1.fq). Sequences specified with this option must
                         correspond file-for-file and read-for-read with those specified in <mates2>.
                         Reads may be a mix of different lengths. Bismark will produce one mapping result
                         and one report file per paired-end input file pair.

-2 <mates2>              Comma-separated list of files containing the #2 mates (filename usually includes
                         "_2"), e.g. flyA_2.fq,flyB_2.fq). Sequences specified with this option must
                         correspond file-for-file and read-for-read with those specified in <mates1>.
                         Reads may be a mix of different lengths.

<singles>                A comma- or space-separated list of files containing the reads to be aligned (e.g.
                         lane1.fq,lane2.fq lane3.fq). Reads may be a mix of different lengths. Bismark will
                         produce one mapping result and one report file per input file. Please note that
                         one should supply a list of files in conjunction with --basename as the output files
                         will constantly overwrite each other...


OPTIONS:


Input:

--se/--single_end <list> Sets single-end mapping mode explicitly giving a list of file names as <list>.
                         The filenames may be provided as a comma [,] or colon [:] separated list.

-q/--fastq               The query input files (specified as <mate1>,<mate2> or <singles> are FASTQ
                         files (usually having extension .fg or .fastq). This is the default. See also
                         --solexa-quals.

-f/--fasta               The query input files (specified as <mate1>,<mate2> or <singles> are FASTA
                         files (usually having extensions .fa, .mfa, .fna or similar). All quality values
                         are assumed to be 40 on the Phred scale. FASTA files are expected to contain both
                         the read name and the sequence on a single line (and not spread over several lines).

-s/--skip <int>          Skip (i.e. do not align) the first <int> reads or read pairs from the input.

-u/--upto <int>          Only aligns the first <int> reads or read pairs from the input. Default: no limit.

--phred33-quals          FASTQ qualities are ASCII chars equal to the Phred quality plus 33. Default: ON.

--phred64-quals          FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off.

--path_to_bowtie2        The full path </../../> to the Bowtie 2 installation folder on your system (not the
                         bowtie2 executable itself). If not specified, it is assumed that Bowtie 2 is in the PATH.

--path_to_hisat2         The full path </../../> to the HISAT2 installation folder on your system (not the 
                         hisat2 executable itself). If not specified, it is assumed that HISAT2 is in the PATH.

--path_to_minimap2       The full path </../../> to the minimap2 installation folder on your system (not the
                         minimap2 executable itself). If not specified, it is assumed that  minimap2 is in the PATH.


Alignment:


-N <int>                 Sets the number of mismatches to allowed in a seed alignment during multiseed alignment.
                         Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower)
                         but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for
                         Bowtie 1 see -n).

-L <int>                 Sets the length of the seed substrings to align during multiseed alignment. Smaller values
                         make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is
                         used by default, which sets -L to 20. maximum of L can be set to 32. The length of the seed
                         would effect the alignment speed dramatically while the larger L, the faster the aligment.
                         This option is only available for Bowtie 2 (for Bowtie 1 see -l).

--ignore-quals           When calculating a mismatch penalty, always consider the quality value at the mismatched
                         position to be the highest possible, regardless of the actual value. I.e. input is treated
                         as though all quality values are high. This is also the default behavior when the input
                         doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default.

-I/--minins <int>        The minimum insert size for valid paired-end alignments. E.g. if -I 60 is specified and
                         a paired-end alignment consists of two 20-bp alignments in the appropriate orientation
                         with a 20-bp gap between them, that alignment is considered valid (as long as -X is also
                         satisfied). A 19-bp gap would not be valid in that case. Default: 0.

-X/--maxins <int>        The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and
                         a paired-end alignment consists of two 20-bp alignments in the proper orientation with a
                         60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied).
                         A 61-bp gap would not be valid in that case. Default: 500.

--parallel <int>         (May also be --multicore <int>) Sets the number of parallel instances of Bismark to be run concurrently.
                         This forks the Bismark alignment step very early on so that each individual Spawn of Bismark processes
                         only every n-th sequence (n being set by --parallel). Once all processes have completed,
                         the individual BAM files, mapping reports, unmapped or ambiguous FastQ files are merged
                         into single files in very much the same way as they would have been generated running Bismark
                         conventionally with only a single instance.

                         If system resources are plentiful this is a viable option to speed up the alignment process
                         (we observed a near linear speed increase for up to --parallel 8 tested). However, please note
                         that a typical Bismark run will use several cores already (Bismark itself, 2 or 4 threads of
                         Bowtie2/HISAT2, Samtools, gzip etc...) and ~10-16GB of memory depending on the choice of aligner
                         and genome. WARNING: Bismark Parallel (BP?) is resource hungry! Each value of --parallel specified
                         will effectively lead to a linear increase in compute and memory requirements, so --parallel 4 for
                         e.g. the GRCm38 mouse genome will probably use ~20 cores and eat ~40GB or RAM, but at the same time
                         reduce the alignment time to ~25-30%. You have been warned.

--local                  In this mode, it is not required that the entire read aligns from one end to the other. Rather, some
                         characters may be omitted (“soft-clipped”) from the ends in order to achieve the greatest possible
                         alignment score. For Bowtie 2, the match bonus --ma (default: 2) is used in this mode, and the best possible
                         alignment score is equal to the match bonus (--ma) times the length of the read. This is mutually exclusive with
                         end-to-end alignments. For HISAT2, it is currently not exactly known how the best alignment is calculated.
                         DEFAULT: OFF.


Output:

--non_directional        The sequencing library was constructed in a non strand-specific manner, alignments to all four
                         bisulfite strands will be reported. Default: OFF.

                         (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary
                         to the original strands are merely theoretical and should not exist in reality. Specifying directional
                         alignments (which is the default) will only run 2 alignment threads to the original top (OT)
                         or bottom (OB) strands in parallel and report these alignments. This is the recommended option
                         for sprand-specific libraries).

--pbat                   This options may be used for PBAT-Seq libraries (Post-Bisulfite Adapter Tagging; Kobayashi et al.,
                         PLoS Genetics, 2012). This is essentially the exact opposite of alignments in 'directional' mode,
                         as it will only launch two alignment threads to the CTOT and CTOB strands instead of the normal OT
                         and OB ones. Use this option only if you are certain that your libraries were constructed following
                         a PBAT protocol (if you don't know what PBAT-Seq is you should not specify this option). The option
                         --pbat works only for FastQ files (in both Bowtie and Bowtie 2 mode) and using uncompressed
                         temporary files only).

--sam-no-hd              Suppress SAM header lines (starting with @). This might be useful when very large input files are
                         split up into several smaller files to run concurrently and the output files are to be merged.

--rg_tag                 Write out a Read Group tag to the resulting SAM/BAM file. This will write the following line to the
                         SAM header: \@RG PL: ILLUMINA ID:SAMPLE SM:SAMPLE ; to set ID and SM see --rg_id and --rg_sample.
                         In addition each read receives an RG:Z:RG-ID tag. Default: OFF.

--rg_id <string>         Sets the ID field in the \@RG header line. The default is 'SAMPLE'.

--rg_sample <string>     Sets the SM field in the \@RG header line; can't be set without setting --rg_id as well. The default is
                         'SAMPLE'.

--strandID               For non-directional paired-end libraries, the strands identity is encoded by the order in which R1 and R2
                         are reported, as well as the read and genome conversion state. If third party tools re-organise this order
                         it may become difficult to determine the alignment strand identity. This option adds an optional tag,
                         e.g. 'YS:Z:OT' or 'YS:Z:CTOB' to preserve this information. See also this thread for more details:
                         https://github.com/FelixKrueger/Bismark/issues/455. Default: OFF.

-un/--unmapped           Write all reads that could not be aligned to a file in the output directory. Written reads will
                         appear as they did in the input, without any translation of quality values that may have
                         taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1
                         and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads
                         with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping)
                         are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well.

--ambiguous              Write all reads which produce more than one valid alignment with the same number of lowest
                         mismatches or other reads that fail to align uniquely to a file in the output directory.
                         Written reads will appear as. they did in the input, without any of the translation of quality
                         values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two
                         parallel files with _1 and _2 inserted in their filenames, i.e. _ambiguous_reads_1.txt and
                         _ambiguous_reads_2.txt. These reads are not written to the file specified with --un.

-o/--output_dir <dir>    Write all output files into this directory. By default the output files will be written into
                         the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt
                         to create it first. The path to the output folder can be either relative or absolute.

--temp_dir <dir>         Write temporary files to this directory instead of into the same directory as the input files. If
                         the specified folder does not exist, Bismark will attempt to create it first. The path to the
                         temporary folder can be either relative or absolute.

--non_bs_mm              Optionally, outputs an extra column specifying the number of non-bisulfite mismatches a read has.
                         This option is only available in end-to-end mode. The value is just the number of actual non-bisulfite
                         mismatches and ignores potential insertions or deletions.
                         The format for single-end reads and read 1 of paired-end reads is 'XA:Z:number of mismatches'
                         and 'XB:Z:number of mismatches' for read 2 of paired-end reads.

--gzip                   Temporary bisulfite conversion files will be written out in a GZIP compressed form to save disk
                         space. This option is available for most alignment modes but is not available for paired-end FastA
                         files. This option might be somewhat slower than writing out uncompressed files, but this awaits
                         further testing.

--sam                    The output will be written out in SAM format instead of the default BAM format. Be warned that this
                         requires ~10 times more disk space. --sam is not compatible with the option --parallel.

--bam                    Bismark will attempt to use the path to Samtools that was specified with '--samtools_path', or, if it hasn't
                         been specified, attempt to find Samtools in the PATH. If no installation of Samtools can be found,
                         the SAM output will be compressed with GZIP instead (yielding a .sam.gz output file). Default: ON.

--cram                   Writes the output to a CRAM file instead of BAM. This requires the use of Samtools 1.2 or higher.

--cram_ref <ref_file>    CRAM output requires you to specify a reference genome as a single FastA file. If this single-FastA
                         reference file is not supplied explicitly it will be regenerated from the genome .fa sequence(s)
                         used for the Bismark run and written to a file called 'Bismark_genome_CRAM_reference.mfa' into the
                         oputput directory.

--samtools_path          The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
                         explicitly if Samtools is in the PATH already.

--prefix <prefix>        Prefixes <prefix> to the output filenames. Trailing dots will be replaced by a single one. For
                         example, '--prefix test' with 'file.fq' would result in the output file 'test.file.fq_bismark.sam' etc.

-B/--basename <basename> Write all output to files starting with this base file name. For example, '--basename foo'
                         would result in the files 'foo.bam' and 'foo_SE_report.txt' (or its paired-end equivalent). Takes
                         precedence over --prefix. Be advised that you should not use this option in conjunction with supplying
                         lists of files to be processed consecutively, as all output files will constantly overwrite each other.

--old_flag               Only in paired-end SAM mode, uses the FLAG values used by Bismark v0.8.2 and before. In addition,
                         this options appends /1 and /2 to the read IDs for reads 1 and 2 relative to the input file. Since
                         both the appended read IDs and custom FLAG values may cause problems with some downstream tools
                         such as Picard, new defaults were implemented as of version 0.8.3.


                                             default                         old_flag
                                       ===================              ===================
                                       Read 1       Read 2              Read 1       Read 2

                              OT:         99          147                  67          131

                              OB:         83          163                 115          179

                              CTOT:      147           99                  67          131

                              CTOB:      163           83                 115          179

--ambig_bam              For reads that have multiple alignments a random alignment is written out to a special file ending in
                         '.ambiguous.bam'. The alignments are in Bowtie2 format and do not any contain Bismark specific
                         entries such as the methylation call etc. These ambiguous BAM files are intended to be used as
                         coverage estimators for variant callers.

--nucleotide_coverage    Calculates the mono- and di-nucleotide sequence composition of covered positions in the analysed BAM
                         file and compares it to the genomic average composition once alignments are complete by calling 'bam2nuc'.
                         Since this calculation may take a while, bam2nuc attempts to write the genomic sequence composition
                         into a file called 'genomic_nucleotide_frequencies.txt' indside the reference genome folder so it can
                         be re-used the next time round instead of calculating it once again. If a file 'nucleotide_stats.txt' is
                         found with the Bismark reports it will be automatically detected and used for the Bismark HTML report.
                         This option works only for BAM or CRAM files.
						 
--icpc                   This option will truncate read IDs at the first space or tab it encounters, which are sometimes used to add
                         comments to a FastQ entry (instead of replacing them with underscores (_) as is the default behaviour). The
                         opion is deliberately somewhat cryptic ("I couldn't possibly comment"), as it only becomes relevant when R1 and R2
                         of read pairs are mapped separately in single-end mode, and then re-paired afterwards (the SAM format dictates
                         that R1 and R2 have the same read ID). Paired-end mapping already creates BAM files with identical read IDs.
                         For more information please see here: https://github.com/FelixKrueger/Bismark/issues/236. Default: OFF.


OTHER:

-h/--help                Displays this help file.

-v/--version             Displays version information.


BOWTIE 2 SPECIFIC OPTIONS:

--bowtie2                Default: ON. Uses Bowtie 2 as default aligner. Bismark limits Bowtie 2 to only perform end-to-end
                         alignments, i.e. searches for alignments involving all read characters (also called
                         untrimmed or unclipped alignments). Bismark assumes that raw sequence data is adapter
                         and/or quality trimmed where appropriate. Both small (.bt2) and large (.bt2l) Bowtie 2
                         indexes are supported. To use HISAT2 instead of Bowtie 2 please see option --hisat2.
						 
						 
--no_dovetail            It is possible, though unusual, for the mates to "dovetail", with the mates seemingly extending
                         "past" each other as in this example:

                         Mate 1:                 GTCAGCTACGATATTGTTTGGGGTGACACATTACGC
                         Mate 2:            TATGAGTCAGCTACGATATTGTTTGGGGTGACACAT
                         Reference: GCAGATTATATGAGTCAGCTACGATATTGTTTGGGGTGACACATTACGCGTCTTTGAC

                         By default, dovetailing is considered inconsistent with concordant alignment, but by default
                         Bismark calls Bowtie 2 with --dovetail, causing it to consider dovetailing alignments as
                         concordant. This becomes relevant whenever reads are clipped from their 5' end prior to mapping,
                         e.g. because of quality or bias issues.

                         Specify --no_dovetail to turn off this behaviour for paired-end libraries. Default: OFF.


HISAT2 SPECIFIC OPTIONS:


--hisat2                 Uses HISAT2 instead of Bowtie 2. Bismark uses HISAT2 in end-to-end mode by default,
                         i.e. searches for alignments involving all read characters (also called untrimmed or unclipped alignments)
                         using the option '--no-softclipping'. Bismark assumes that raw sequence data is adapter and/or quality
                         trimmed where appropriate. From v0.22.0 onwards, Bismark also supports the local alignment mode of
                         HISAT2 (please see --local). Both small (.ht2) and large (.ht2l) HISAT2 indexes are supported. Default: OFF. 

--no-spliced-alignment   Disable spliced alignment. Default: spliced-alignments are performed.

--known-splicesite-infile <path>   Provide a list of known splice sites.


Paired-end options:

--no-mixed               This option disables the behavior to try to find alignments for the individual mates if
                         it cannot find a concordant or discordant alignment for a pair. This option is invariably on by default.

--no-discordant          Normally, Bowtie 2 or HISAT2 look for discordant alignments if it cannot find any concordant alignments.
                         A discordant alignment is an alignment where both mates align uniquely, but that does not
                         satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior
                         and it is on by default.


Bowtie 2 effort options:

-D <int>                 Up to <int> consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using
                         the alignments found so far. A seed extension "fails" if it does not yield a new best or a
                         new second-best alignment. Default: 15.

-R <int>                 <int> is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.
                         When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of
                         mismatches allowed) at different offsets and searches for more alignments. A read is considered
                         to have repetitive seeds if the total number of seed hits divided by the number of seeds
                         that aligned at least once is greater than 300. Default: 2.

Bowtie 2/ HISAT2 parallelization options:


-p NTHREADS              Launch NTHREADS parallel search threads (default: 1). Threads will run on separate processors/cores
                         and synchronize when parsing reads and outputting alignments. Searching for alignments is highly
                         parallel, and speedup is close to linear. Increasing -p increases Bowtie 2's memory footprint.
                         E.g. when aligning to a human genome index, increasing -p from 1 to 8 increases the memory footprint
                         by a few hundred megabytes. This option is only available if Bowtie 2 is linked with the pthreads
                         library (i.e. if BOWTIE_PTHREADS=0 is not specified at build time). In addition, this option will
                         automatically use the option '--reorder', which guarantees that output SAM records are printed in
                         an order corresponding to the order of the reads in the original input file, even when -p is set
                         greater than 1 (Bismark requires the Bowtie 2 output to be this way). Specifying --reorder and
                         setting -p greater than 1 causes Bowtie 2 to run somewhat slower and use somewhat more memory then
                         if --reorder were not specified. Has no effect if -p is set to 1, since output order will naturally
                         correspond to input order in that case.

Scoring options:

--score_min <func>       Sets a function governing the minimum alignment score needed for an alignment to be considered
                         "valid" (i.e. good enough to report). This is a function of read length. 

                         In end-to-end mode (default), and --local mode for HISAT2 only, --score_min is set as a linear function
                         and is set as <L,value,value>.
                         For instance, specifying L,0,-0.2 sets the minimum-score function f to f(x) = 0 + (-0.2) * x, where x
                         is the read length. The default for end-to-end (global) alignments is: L,0,-0.2.
                         
                         In --local mode for Bowtie 2, the function is logarithmic and is set as <G,value,value>. For instance, specifying
                         G,20,8 sets the minimum-score function f to f(x) = 20 + 8 * ln(x), where x is the read length.
                         The default is for local alignments in Bowtie 2 mode is: G,20,8.
						 
                         See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2.

--rdg <int1>,<int2>      Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty
                         of <int1> + N * <int2>. Default: 5, 3.

--rfg <int1>,<int2>      Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets
                         a penalty of <int1> + N * <int2>. Default: 5, 3.


MINIMAP2-SPECIFIC OPTIONS:


--minimap2/--mm2         Uses minimap2 as the underlying read aligner. This mode is very new and currently experimental. Expect that
                         things may change in the near future. The default mapping mode is --nanopore (preset '-x map-ont' (Nanopore reads)). Currently,
                         there are no plans to support PacBio reads. Internally, minimap2 is run with the options -a --MD. More information
                         here: https://lh3.github.io/minimap2/minimap2.html. Default: OFF.

--mm2_nanopore           Using the minimap2 preset for Oxford Nanopore (ONT) vs reference mapping (-x map-ont). Only works in conjuntion with --minimap2.
                         Default mode when --minimap2 is specified without additional qualifiers.

--mm2_pacbio             Using the minimap2 preset for PacBio vs reference mapping (-x map-pb). Only works in conjuntion with --minimap2.
                         Default: OFF.

--mm2_short_reads        This option invokes the minmap2 preset setting '-x sr' and is intended for genomic short-read mapping with
                         accurate reads (probably Illumina 150bp+ ?). For spliced short-reads, please use --hisat2 instead. 
                         The 'sr' preset mode (short single-end reads without splicing) uses the following options: 
                         -k21 -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 -s40 -g200 -2K50m --heap-sort=yes --secondary=no
                         Default: OFF.

--mm2_maximum_length <int>   Maximum length cutoff for very long sequences (currently allowed 100-100,000 bp). Default: 10000.


Bismark BAM/SAM OUTPUT (default):

 (1) QNAME  (seq-ID)
 (2) FLAG   (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))
 (3) RNAME  (chromosome)
 (4) POS    (start position)
 (5) MAPQ   (always 255 for use with Bowtie)
 (6) CIGAR
 (7) RNEXT
 (8) PNEXT
 (9) TLEN
(10) SEQ
(11) QUAL   (Phred33 scale)
(12) NM-tag (edit distance to the reference)
(13) MD-tag (base-by-base mismatches to the reference (handles indels)
(14) XM-tag (methylation call string)
(15) XR-tag (read conversion state for the alignment)
(16) XG-tag (genome conversion state for the alignment)
(17) XA/XB-tag (non-bisulfite mismatches) (optional!)

Each read of paired-end alignments is written out in a separate line in the above format.


Last modified on 23 August 2023
HOW_TO
}