-
Notifications
You must be signed in to change notification settings - Fork 2
/
HiFLEx_UserManual.tex
1840 lines (1519 loc) · 138 KB
/
HiFLEx_UserManual.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[10pt,a4paper]{article}
\usepackage[a4paper, bindingoffset=0.5cm, left=2cm,right=2cm,top=2cm,bottom=2cm,]{geometry}
\usepackage{listings} % Only used for this template to display code, can delete
\usepackage[usenames,dvipsnames,svgnames,table]{xcolor} %Driver-independent color extensions
\usepackage[linktocpage=true]{hyperref} % Extensive support for hypertext in LaTeX
\usepackage{graphicx,subfig} % Enhanced support for graphics
\usepackage{tabularx} % Line breaks in tabluar environment
\usepackage{amsmath}
\usepackage{enumitem}
\newcommand\tab[1][1cm]{\hspace*{#1}}
\hypersetup{
pdfauthor={Ronny Errmann, Neil Cook},
pdfcreator={Ronny Errmann, Neil Cook},
pdftitle={HiFLEx Data Reduction User Manual},
pdfsubject={HiFLEx Data Reduction User Manual},
pdfkeywords={HiFLEx, Data Reduction, User Manual},
colorlinks=true, % false: boxed links; true: colored links
linkcolor=blue, % color of internal links (change box color with linkbordercolor)
citecolor=Maroon, % color of links to bibliography
filecolor=blue, % color of file links
urlcolor=blue, % color of external links
plainpages=false,
}
% This is only used if you want to add code
\lstdefinestyle{base}{
basicstyle=\small\ttfamily,
backgroundcolor=\color{Tan},
language=[LaTeX]{TeX},
moredelim=**[is][\color{red}]{@}{@},
}
% To change: titele; date; https://github.com/ronnyerrmann/HiFLEx/archive
\title{User Manual for Data Reduction using the HiFLEx package\\ version: v1.5.2}
\date{February, 2022}
\author{Ronny Errmann, EXOhSPEC team\\ Physics, Astronomy and Maths, University of Hertfordshire}
\begin{document}
\maketitle
\pagenumbering{Roman}
\tableofcontents
\pagenumbering{arabic}
% -------------------------------------------------------------------------------------------
\newpage
\section{Introduction}
\label{intro}
This program is designed to take a fits image file (i.e. a CCD image) and run data reduction steps, extract out orders from an Echelle spectrograph (regardless of separation and curvature, as long as orders are distinguishable from one-another), apply the wavelength correction, measure the radial velocity, and perform further calibration steps. Single-fiber or bifurcated-fiber inputs are supported. The pipeline works well under Linux and was tested under Windows. It should also work on Mac
If you use this package, please cite Errmann, et al. 2020\footnote{\url{https://ui.adsabs.harvard.edu/abs/2020PASP..132f4504E}} and let me know.
If you use the barycentric corrected radial velocities, the barycentric velocity, or the BJDTDB, please cite Kanodia and Wright 2018\footnote{\url{https://ui.adsabs.harvard.edu/abs/2018RNAAS...2....4K}}.
If you use the cosmic ray correction using deepCR, please cite Zhang \& Bloom 2020\footnote{\url{https://ui.adsabs.harvard.edu/abs/2020ApJ...889...24Z}}.
If you use the radial velocities from TERRA, SERVAL, or CERES please cite the corresponding papers: Anglada-Escud{\'e} et al. 2012\footnote{\url{https://ui.adsabs.harvard.edu/abs/2012ApJS..200...15A}}, Zechmeister et al. 2018\footnote{\url{http://ui.adsabs.harvard.edu/abs/2018A&A...609A..12Z}}, or
Brahm et al. 2017\footnote{\url{https://ui.adsabs.harvard.edu/abs/2017PASP..129c4002B}}, respectively.
% -------------------------------------------------------------------------------------------
%\newpage
\section{For Beginners in Echelle Spectroscopy or Programming}
This Pipeline was intended to be written as a black box for the unskilled user, however a basic understanding of echelle spectroscopy and data reduction of digital astronomical images is necessary in order to use it.
\subsubsection*{Data reduction of CCD or CMOS images (photometry or spectroscopy)}
Please refer to Howell (1976,2006)\footnote{\url{https://www.cambridge.org/core/books/handbook-of-ccd-astronomy/97D3D910788D44D11394B3B57C3FA743}} or check Chapter~\ref{Section:data_reduction_CCD}. A guide for data reduction using python is provided by Craig and Chambers\footnote{\url{https://mwcraig.github.io/ccd-as-book/00-00-Preface.html}}
\subsubsection*{Echelle spectroscopy}
Echelle spectrographs image the disperse the light into a 2-dimensional plane by using a high-dispersing grating (optimised for orders $>$ 50) and a low-dispersing prism (or grating). The low-dispersing element is thereby necessary to separate the individual orders produced by the high-dispersing grating. Some of the wavelengths can be covered by several orders. An example for an echelle spectrum of a black body is shown in Figure~\ref{Fig:apertures_in_master_flat} with the individual orders going up to down and order number increasing from left to right.
In order to work with the spectra, the following steps have to be done:
\begin{itemize}\setlength\itemsep{0em}
\item Trace the orders.
\item Correlate wavelengths to pixel.
\item Extract the spectra.
\item Apply instrument specific corrections.
\end{itemize}
In case of a bifurcated spectrograph, some steps have to applied to the traces of both fibers.
\subsubsection*{Programming (Python)}
The basic things for Python (and the configuration file) are:
\begin{itemize}\setlength\itemsep{0em}
\item Lines starting with one or more \# are comments, which will be ignored by the pipeline.
\item Any text is case-sensitive (except when mentioned otherwise).
\item Empty lines and the amount of spaces normally doesn't matter.
\end{itemize}
% ------------------------------------------------------------------------------
\newpage
\section{Changelog (recently)}
\label{Section:Changelog_new}
Please find the changes of older versions in Chapter~\ref{Section:Changelog_old}.
\subsection*{v1.5.2}
\begin{itemize}\setlength\itemsep{0em}
\item Barycentric velocity was calculated wrong when proper motion was given due to giving epoch in years instead of JD (since version v0.4.1).
\end{itemize}
\subsection*{v1.5.1}
\begin{itemize}\setlength\itemsep{0em}
\item Added parameter \verb|traces_min_separation| to speed up searching of orders.
\item Small improvements and bugfixing.
\end{itemize}
\subsection*{v1.5.0}
\begin{itemize}\setlength\itemsep{0em}
\item Tested on Windows 10. However, multiprocessing has an issue, hence it was disabled.
\item Bugfixing in with exposure times when assigning files.
\item Fixed line strength for ThAr lines below 4000\AA.
\end{itemize}
% -------------------------------------------------------------------------------------------
%\noindent
\section{Quick starting Guide}
\subsection{Setting up a new instrument}
\label{Section:first_configuration}
The pipeline reads the parameters from the configuration file \verb|conf.txt|. This section explains in detail how to reduce the data for a new spectrograph.
\begin{enumerate}
\item General parameters: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Create a new folder and copy the \verb|conf.txt| into that folder and open the file in an editor.
\item Change parameter \verb|raw_data_paths| to point to the folder or folders with your raw or pre-reduced fits-files. The data can be stored in sub-folders.
\item Set the parameter \verb|badpx_mask_filename| if a bad-pixel-mask exists (see \ref{Section:create_badpxmask}). Otherwise leave empty or 'NA'.
\item Adjust \verb|rotate_frame| and \verb|flip_frame| so that the orders are up to down (blue wavelength on top) and red orders are on the left. The pipeline will first rotate and then flip, the flip is swapping left and right (Fig.~\ref{Fig:apertures_in_master_flat} shows the orientation).
\item Adjust parameter \verb|subframe| to a subframe, if only part of the CCD should be used. Otherwise leave empty or set to the full detector size.
\end{itemize}
\item Finding the traces of the science fiber: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Set parameter \verb|original_master_traces_filename = | (empty), or to a non-existing file.
\item To increase signal to noise and to speed up the search for the traces of the orders, binning as given in the parameters \verb|bin_search_apertures| (rough estimate) and \verb|bin_adjust_apertures| (fine tuning) can be adjusted. Please note that after binning, the orders should still be well distinguished from each other. % show highly binned image
\end{itemize}
\item Finding the traces of the calibration fiber: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item The side of the calibration traces compared to the science traces is given in parameter \verb|arcshift_side| (left or right). Set it to center for a single fiber spectrograph.
\end{itemize}
\item Create a new wavelength solution (in case no wavelength solution is required, set \verb|original_master_wavelensolution_filename| to \verb|pseudo| and with the next numbered point): \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Change the path to the catalogue of the reference lines (\verb|reference_catalog|), if necessary. The file must contain one entry per line, each entry consists of tab-separated wavelength, line strength, and element. Line strength can be empty.
\item Set parameter \verb|original_master_wavelensolution_filename =| \\
\verb|master_wavelength_manual.fits|.
\item To find a wavelength solution a file which provides the wavelength for some (extracted) pixel and wavelengths can be given optionally (called \textit{pixel\_to\_wavelength.txt}, Chapter~\ref{section:create_new_wave_solution}).
\item The number of degrees of freedom for the wavelength solution (2-dimensional polynomial fit) can be adjusted with \verb|polynom_order_traces| (polynomial orders along the dispersion direction) and \verb|polynom_order_intertraces| (polynomial orders between the traces).
%\item Decrease \verb|diff_pxs| to create a more precise solution (but strong lines might be rejected).
\end{itemize}
\item Setting up the header keywords: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Adjust the parameters starting with \verb|raw_data_*|. The example configuration file shows the settings for data taken with MaximDL and for data from the HARPS spectrograph. See also \ref{Section:prepare_files}.
\end{itemize}
\item Setting up the data reduction steps: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Adjust the parameters \verb|*_calibs_create_g| to define the reduction steps which should be applied. Please refer to \ref{Section:parameters_CCD_proc} for further information. Some of the \verb|*_calibs_create_g| parameters might not be relevant, e.g. if no \textit{dark} or \textit{rflat} (real flatfield) corrections should be applied. An example for the parameters with additional explanation is shown in Figure~\ref{Fig:example_calibs_create_g}.
\end{itemize}
\item Setting up the observatory site: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item The parameters \verb|site|, \verb|altitude|, \verb|latitude| (negative for west), and \verb|longitude| need to be set, if the information is not stored in the fits-header (see also \ref{Section:barycentric_correction}).
\end{itemize}
% \item Create the file \textit{object\_list.txt}: See Chapter~\ref{Section:create_object_list_file}.
\item For RV analysis: Set path \verb|terra_jar_file| to the TERRA \textit{PRV.jar} file. Set paths \verb|path_serval| and \verb|path_ceres| to the home folders of the SERVAL and CERES pipelines. See also in \ref{Sec:RV_packages}.
\item Run the scrip \verb|file_assignment.py| (see \ref{Section:prepare_files} below for more information): \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Define what files should be used for what calibration.
\item Define what files to extract (and in which RVs will be measured).
\item Please note that you can use already reduced images. In this case the reduction steps as defined in \verb|conf.txt| should be empty for the file type (e.g. dark or real flats) to avoid a second application of the correction.
\end{itemize}
\item Run the scrip \verb|hiflex.py|: \vspace*{-\itemsep}
\begin{itemize}[leftmargin=*]\setlength\itemsep{0em}
\item Afterwards: check the output in the \verb|logfile|, the images in the logging path, or in results in the extracted files.
\end{itemize}
\end{enumerate}
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth]{./figures/example_calibs_create_g}
\end{center}
\caption{\small Example for setting up the calibration steps, in which darks are only available for some (long) exposure times and have been corrected with the overscan and a master bias was created elsewhere.\\
The first 10 lines show what calibration should be done for what types of data and the last 5 lines show parameters for some of the steps. As the master bias was already created, no calibration steps are applied to it.
The overscan was removed in the dark frames (line~2), hence a different subframe is used.\\
To perform flat-field correction, the frames of the evenly illuminated detector are processed by cutting away the overscan area, the bad-pixel mask, and the master dark with the same exposure time (which is created following the settings in line~2). Before combining the frames using a median, the flat-filed frames are normalised using the median flux.\\
For extracting the science spectra (line~8) the subframe, the bad-pixel mask, the dark, the flat-field, and the background correction will be applied. For the flat-field (rflat) correction the image \textit{master\_rflat.fits} will be used, which itself will be loaded using \textit{standard\_calibs\_create} (line~10) or is created by using the calibration given in line~3. Before extracting the spectra, the background flux (stray light) is removed from the reduced frame. The background image is created by fitting a 4 by 3 (cross-dispersion direction) 2-dimensional polynomial to the reduced frame, excluding the traces of the science orders (1.5 times the width) and the traces of the calibration orders (0.5 times the width).\\
A spectrum of the blaze function is extracted from the file created with parameters given in line~7. In this example the exposure time of the continuum source was 9.7\,s, but no darks of 9.7\,s exposure time were taken and therefore darks with 10\,s exposure time should be used.\\
For the master files to search for the traces of the orders and the emission lines to create the wavelength solution no darks were available, hence the master bias will be used.
\label{Fig:example_calibs_create_g}}
\end{figure}
\subsection{Reducing data with the same setup for different nights}
\label{Section:further_configuration}
After all the calibration steps have been performed for a given instrument (see\ref{Section:first_configuration}), this information can be used to extract data of different nights more easily, as long as the instrument itself stays untouched. Here it is assumed that files to find the traces of science and calibration fiber, files to adjust the wavelength solution, and files to find the blaze function are available.
\begin{itemize}
\item Copy the \textit{conf.txt} from the previous night as most modifications will be made there already.
\item Set parameter \verb|original_master_traces_filename| to the file \textit{master\_traces\_sci.fits} in the previous night (contains the properties of the traces).
\item Set parameter \verb|original_master_wavelensolution_filename| to the file \textit{master\_wavelength.fits} in the previous night (contains the wavelength solution). In case no wavelength solution is required, set \verb|master_wavelensolution_filename| to \verb|pseudo|.
\item When comparing the emission lines found in the current observation night with the original wavelength solution, several variations can be applied:
\begin{description}\setlength\itemsep{0em}
\item[order\_offset] It could be that not all or more echelle orders (especially on the red end) will be identified compared to the orders of the night, to which parameter \verb|original_master_wavelensolution_filename| is pointing. The parameter \verb|order_offset| gives the expected range of extra or fewer orders on the red side of the shift.
\item[px\_offset] If the setup is touched, the whole spectrum could be moved along the detector in dispersion direction, compared to the previous solution (given in parameter \verb|original_master_wavelensolution_filename|). The parameter \verb|px_offset| gives the expected range and step size of the pixel-shift.
\item[px\_offset\_order] Gives the range and step size of a shift per order between the individual orders (e.g. the spectrum is tilted compared to the CCD alignment).
\item[resolution\_offset\_pct] Gives the expected change of resolution in percent between the current setup and the previous wavelength solution. The pipeline will test 11 resolutions between 0\% and the value given in parameter \verb|resolution_offset_pct|.
\end{description}
\item Activate the hiflex environment in anaconda (if set, see \ref{Sec:Creating_conda_environment}) \verb|conda activate hiflex|~.
\item Run the scrip \verb|file_assignment.py| to define what files should be used for what calibration.
\item Run the scrip \verb|hiflex.py|.
\end{itemize}
\subsection{Possible parameters for the CCD processing}
\label{Section:parameters_CCD_proc}
\noindent The list below shows all standard CCD calibration options available in the pipeline. The corrections are done on a pixel-by-pixel basis. The steps will be executed in the same order as given here.
\begin{description}
\item[subframe] : The subframe as given in the parameter \verb|subframe| will be applied, only a section of the CCD will be used.
\item[badpx\_mask] : Apply the bad pixel mask, given in parameter \verb|badpx_mask_filename|. If the file doesn't exist, all pixels are assumed to be good. The file must contain a 2-dimensional image with 1 for good and 0 for bad pixel.
\item[bias] : Apply a master bias. This can be either a pre-reduced image, or it can be created by the pipeline. %, which will be created from the files given in the parameter \verb|bias_rawfiles| by using the settings given in the parameter \verb|bias_calibs_create|. These two parameters are created automatically by the script \verb|file_assignment.py| if bias frames exist in the raw data path.
\item[dark] : Apply a master dark. A dark with the same exposure time as read from the fits header will be applied. %The necessary parameters are created automatically by the script \verb|file_assignment.py|.
\item[rflat] : Apply a master true flat. %The necessary parameters are created automatically by the script \verb|file_assignment.py|.
\item[cosmic\_ray] : Do the cosmic ray correction using deepCR (best only on science spectra, otherwise rely on median combination). See more in Chapter~\ref{Section:train_deepCR}.
\item[background] : For Scattered light removal. Applies background correction by fitting a 2D polynomial against the current reduced image, excluding the area of the science and calibration traces. The fitted 2D-image is then subtracted from the current image.
\end{description}
\noindent The following parameters change the way, several CCD images are combined. In the pipeline the individual frames are corrected before frames are combined.
\begin{description}
\item[normalise] : Before combining the files the images are normalised by their median flux.
\item[combine\_sum] : Normally, files are combined using a pixel by pixel median. With this keyword the files will be summed up. This parameter will be overwritten by \verb|combine_mean|. When summing up several images, the value of pixels might extend \verb|max_good_value|.
\item[combine\_mean] : Normally, files are combined using a median. With this keyword a pixel by pixel average will be used.
%\item[] :
\end{description}
\noindent The calibration options can be altered, as long as they contain the unique string in the option name. For example an option \verb|subframe_1| can be used as calibration step, in this case the parameter \verb|subframe_1| needs to be defined in one of the calibration files. If a different bias, dark, or flat should be used, the following parameters need to be included in one of the configuration files (for the example of \textit{darkfixed}):
\begin{itemize}\setlength\itemsep{0em}
\item \verb|darkfixed_rawfiles|
\item \verb|darkfixed_calibs_create|
\item \verb|master_darkfixed_filename|.
\end{itemize}
\noindent An example for the parameters with additional explanation is shown in Figure~\ref{Fig:example_calibs_create_g}. The master files for bias, rflat, and darks can also be copied into the result folder from a previous night.
\subsection{Assigning the observed data and calibration data to the pipeline}
\label{Section:prepare_files}
The script \verb|file_assignment.py| reads all files in the folders (and subfolders) given in parameter \verb|raw_data_paths|. The file name and header information are used to determine the type of file and if it can be used for calibration. The header parameters are thereby defined by the following parameters in the configuration file:
\begin{description}
\item[raw\_data\_imtyp\_keyword:] Header keyword if the image type (standard: \\ \mbox{IMAGETYP}),
\item[raw\_data\_imtyp\_bias:] Value of the for the header keyword \\ raw\_data\_imtyp\_keyword for bias frames (standard: Bias Frame),
\item[raw\_data\_imtyp\_dark:] Value of the for raw\_data\_imtyp\_keyword for dark frames (standard: Dark Frame),
\item[raw\_data\_imtyp\_flat:] Value of the for raw\_data\_imtyp\_keyword for flat frames (standard: Flat Frame),
\item[raw\_data\_imtyp\_trace1:] Value of the for raw\_data\_imtyp\_keyword for frames to trace the science orders (for HARPS: LAMP,DARK,TUN),
\item[raw\_data\_imtyp\_blaze:] Value of the for raw\_data\_imtyp\_keyword for frames with the blaze function (for HARPS: LAMP,LAMP,TUN),
\item[raw\_data\_imtyp\_trace2:] Value of the for raw\_data\_imtyp\_keyword for frames to trace the calibration orders (for HARPS: WAVE,WAVE,THAR2),
\item[raw\_data\_exptim\_keyword:] Header keyword for the exposure (standard: \\ \mbox{EXPTIME}),
\item[raw\_data\_dateobs\_keyword:] Header keyword observing date and time (in UTC). The format needs to be \mbox{YYYY-MM-DDTHH:MM:SS} (e.g. \\ \mbox{2018-02-28T23:34:01}) (standard: \mbox{DATE-OBS}). Other formats can be defined in the procedure \verb|get_obsdate|. \\
If the time zone is not UT, then \verb|raw_data_timezone_cor| can be used for correction, e.g $-7$ when local time in Thailand has been stored as UT time.\\
If the information of exposure metre to calculate the weight of the mid exposure time, a list of header keywords can be given in \verb|raw_data_mid_exposure_keys|.
\item[raw\_data\_object\_name\_keys:] List of header keywords to get the object name, which is then used to derive the information from Simbad.
\end{description}
\noindent The file type and definition of the fibers is done by using the filename and header information. The assignment is done in the order given in Table~\ref{Tab:fiber_definition}. The result of this assignment is stored in a text file (\textit{file\_list\_raw\_data.txt}, which is shown to the user in a GUI (see Figure~\ref{Fig:UI_prepare_filelist}). %The file can be edited (these information won't be overwritten if the script is re-executed). The following information is stored for each file in the raw data path (tab-separated):
\begin{table}
\caption{Assignment of the fibers using the header keywords as given in the parameters of the configuration and filename. The science and calibration fibers are denoted with 'fiber1' and 'fiber2', respectively. Later assignment overwrites earlier ones. The filename is case-insensitive. '--' means no change has been done.}
\label{Tab:fiber_definition}
\begin{tabular}{l l l}
\small
condition & fiber1 & fiber2 \\
\hline
filename contains \verb|bias| & bias & bias \\
filename contains \verb|dark| & dark & dark \\
filename contains \verb|flat| but not \verb|sflat| & flat & flat \\
\verb|raw_data_imtyp_keyword| equals \verb|raw_data_imtyp_flat| & & \\
\hspace{1cm} and filename doesn't contain \verb|sflat| & flat & flat \\
filename contains \verb|arc| & -- & wave \\
\verb|raw_data_imtyp_keyword| equals \verb|raw_data_imtyp_bias| & bias & bias \\
\verb|raw_data_imtyp_keyword| equals \verb|raw_data_imtyp_dark| & dark & dark \\
none of the above and the filename doesn't start with \verb|arc| & science & -- \\
\end{tabular}
\end{table}
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/UI_prepare_filelist}
\end{center}
\caption{User interface to select which files should be used for which calibration step. In this selection the Dark-files will be used for dark correction (ignoring the Darks with 1\,s exposure time), the Flat-files (Tungsten lamp) will be used to find the science traces and to find the blaze function, and the long-exposed ThAr in the calibration fiber will be used to find the traces of the calibration fiber. ThAr of short and long exposure, taken around the Tungsten spectra, are used to create the wavelength solution in the science and calibration fiber. (Short exposure to identify lines that are saturated in the long exposure.) All long exposed ThAr-spectra are used to determine the drift both in the science and calibration wavelength solution, and the relative drift between solutions by assigning them to \textit{Wave shft sci} and \textit{Wave shft sci}. \\
Once the preparation of the above steps are done, spectra will be extracted. The spectra of the sun will be extracted individually, either with the standard setting, or, in case of some of the ``spot''-data with modified settings, called \textit{test1} and \textit{test2}, which are assigned in the next GUI. Furthermore, the linearised spectra of the ``nospot''-data will be combined using a weighted average. The long exposed ThAr-images, that are used to create the wavelength solution, will also be extracted, after combining the files into \textit{ThAr\_scilong} or \textit{ThAr\_callong}.
\label{Fig:UI_prepare_filelist}}
\end{figure}
\begin{itemize}\setlength\itemsep{0em}
\item Comment the file out (don't use it)
\item Observation time
\item Exposure time in seconds
\item Filename
%\item Type of light for the science fiber
%\item Type of light for the calibration fiber
\item What calibration should the file be used for:
\begin{description}
\item[Bias:] Use it to create a master bias.
\item[Dark:] Use it to create a master dark with this exposure time.
\item[Real flat:] For master full flat (detector evenly illuminated).
\item[Sci. trace, Cal. trace:] To find the traces of the science and calibration fiber (for bifurcated fiber input).
\item[Blaze:] Use the file to create a master file from which the blaze function is extracted.
\item[Wave Sci.:] Files from which the wavelength solution for the calibration fiber will be created. The files will first combined into a master file and then extracted. In case of single fiber fed spectrographs, these settings should be used.
\item[Wave Cal.:] For bifurcated fiber fed spectrographs to create the wavelength solution for the calibration fiber.
\item[Wave shft cal, Wave Sci shft:] Define the files from which a time dependent drift between the wavelength solution is calculated. If ThAr light is used in both fibers, than both calibrations should be ticked. The script will calculate the shift using the pairs closest in time. For single fiber fed spectrographs only "Wave shft cal" is used to derive the shift from the wavelength solution.
\item[Extract:] Extract these files individually. The processing as given in parameter \verb|extract_calibs_create_g| will be assigned.
\item[Further entries:] Further ways to extract the science spectra is possible. Several options can be combined using comma:
\item[e\_\textless obj\textgreater :] Extract files individually. The processing as given in parameter \verb|extract<obj>_calibs_create_g| will be assigned. If this parameter doesn't exist, then parameter \verb|extract_calibs_create_g| will be used.
\item[ec\_\textless obj\textgreater :] The same as \textbf{e\_\textless obj\textgreater}, but instead of extracting each file individually, all files with \textbf{ec\_ \textless obj\textgreater} will be combined into a file called \verb|master_<obj>.fits| and then this file is extracted.
\item[elw\_\textless obj\textgreater \textit{ or} elm\_\textless obj\textgreater \textit{ or} els\_\textless obj\textgreater :] The same as \textbf{e\_\textless obj\textgreater}, however, after extracting each file individually, all linearised files with \textbf{elw\_\textless obj\textgreater} \textit{ or} \textbf{elm\_\textless obj\textgreater} \textit{or} \textbf{els\_\textless obj\textgreater} will be combined using a \textbf{w}eighted average (based on the total extracted flux) \textit{or} a \textbf{m}edian \textit{or} the \textbf{s}um into a file called \verb|extract_lin_weight<obj>_lin_cont.fits| \textit{or} \verb|extract_lin_med<obj>_lin_cont.fits| \textit{or} \verb|extract_lin_sum<obj>_lin_cont.fits| (and a csv-file with the same name and content) in folder \verb|<path_extraction_single>|. Requires \verb|wavelength_scale_resolution| to be larger than 0.
\end{description}
\end{itemize}
\noindent If the automatic assignment of the the calibrations is wrong, please check the parameters \verb|raw_data_*| in conf.txt. If this doesn't fix it, please check the hard coded lines in procedure \textit{add\_new\_rawfiles\_file\_list} as some of the assignments had to be hard coded.
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/UI_prepare_fitsconf_objectlist}
\end{center}
\caption{The second step for preparing the files using the data as shown in Figure~\ref{Fig:UI_prepare_filelist}. The upper half shows all the calibration sets and what processing of the files to be done. The exposure time of the ThAr for \textit{cal2\_s} was 2 seconds, however no darks were available. Hence the user assigned the darks with 1 second exposure time. The files for eta Peg were put under a different extraction step, as the user wanted to replace \textit{dark} correction with \textit{background} correction. \\
No header information about the objects was available, but the lookup of the object name on Simbad was successful.
\label{Fig:UI_prepare_fitsconf_objectlist}}
\end{figure}
After the assignment which files to be used for which calibration the data reduction steps for each of these calibrations are shown in a GUI (see Figure~\ref{Fig:UI_prepare_fitsconf_objectlist}). Thereby the information are read from the configuration file from section \verb|*_calibs_create_g|. The description of the parameters can be found in \ref{Section:parameters_CCD_proc}.
\noindent The header information about the coordinates of the object are shown, if available. Furthermore, the information stored in the file given in parameter \verb|object_file| are read, and if the file does not exist or the file does not contain the object, the object name is looked up on Simbad. Please note that the position of Moon, Jupiter, and Sun are calculated by the pipeline using the mid-exposure time if the object name can be derived from the header or the filename. Therefore the object is not required in the object list.
\noindent When Accepting the settings from the GUI, the data will be stored in the files given in parameters \verb|configfile_fitsfiles| and \verb|object_file|. Both files can be edited, and further modification of the object coordinates is described in \ref{Section:create_object_list_file}.
\subsection{Removing/Adding/Extending/Shortening the automatically found traces}
\label{Sec:remove_add_modify_orders}
After checking the results (see \ref{section:results_pipeline}), one might want to remove wrongly identified apertures. If a trace is located close to the borders this can lead to wrongly traced orders, which can't be handled by the code automatically due to its high flexibility. Additionally, in low light areas it can also be that the traces stop prematurely, while strong emission features are still detectable in the science images, hence the user might want to extend the orders. In case of imaging the spectra on multiple detectors, orders might fall between the detectors, hence a dummy order needs to be added later, as the wavelength solution requires consecutive orders (later these orders can be excluded again using \verb|minimum_SNR|). All this is possible in a GUI, as seen in Figure~\ref{Fig:GUI_remove_add_modify_orders}. If \verb|GUI=True|, then the GUI will be opened automatically, otherwise it can be opened by running \verb|python <path_hiflex>/remove_add_modify_orders.py|. Once finished, the old files will be moved into a sub-folder and then the user needs to run \verb|hiflex.py| again.
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth]{./figures/GUI_remove_add_modify_orders}
\end{center}
\caption{GUI to remove/add/modify orders. To change the scale modify the values and use \textit{Update}. To remove orders give the indexes of the orders and use \textit{Update}. To add a new order click \textit{Add a new order}. To add or remove points select the index of order, and, only if applicable, the part of the order: c for the center, l for the left extraction boundary, and r for the right extraction boundary. After \textit{Add points to an order} or \textit{Remove points from an order} click in the area of the image. You might want to zoom in beforehand. \textit{Cancel adding/removing points} will revert to the point before clicking \textit{Add points to an order} or \textit{Remove points from an order}, while \textit{Use added/removed points} will refit the trace of the order using the modified points. Once all changes have been applied, click \textit{Accept} to save the changes.
\newline
In the above example orders 26 and 27 are going to be removed and the points for the center of the trace 74, which was added as new order, are being added.
\label{Fig:GUI_remove_add_modify_orders}}
\end{figure}
\subsection{Creating a new wavelength solution}
\label{section:create_new_wave_solution}
If no previous wavelength solution exist, then wavelengths and pixel need to correlated manually. In this case the parameter \verb|original_master_wavelensolution_filename| needs to point to a file, which does not exist (e.g. \textit{master\_wavelength\_manual.fits}). Then the python scrip can be run normally. It will perform most of the steps described in Section~\ref{Section:pipeline_steps_general} and then open a GUI similar to the one shown in Figure~\ref{Fig:UI_create_manual_wavelengh_solution}. The user can also prepare the correlated data between pixel and wavelength beforehand, which is described below the GUI instructions.
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/UI_create_manual_wavelengh_solution}
\end{center}
\caption{Window to create a new wavelength solution showing short and long exposed emission line spectrum of the current selected order and the previous and next order above and below, respectively. The bottom of the central plot shows the identified lines in pixel and, if a wavelength was given, this wavelength. In the other plots only the wavelength is shown. Red shows lines for which a wavelength was provided. The list on the right show the pixel positions of the identified lines, ordered by peak intensity. \\
In this example the user clicked on the line around pixel 4836, this entry is then selected in the list on the right to add the wavelength. Clicking away from a line will make all wavelengths editable again. \\
To update the plot or to change the order, the button \textit{Update} can be used. If at least two wavelengths are given for the current order, a linear fit will be performed. The lines from the emission line catalogue (parameter \textit{reference\_catalog}) are shown for this fit in red or blue (wavelength given or not) at the top of the central plot. The marker gives the relative line strength from the catalogue. The example shows that a linear fit can be only a starting point as it does not fit the data well. The wavelengths from the linear fit is also given in the table of the pixel positions at the end of each line. If this number is completely off, then probably a line was misidentified by the user. \\
Once the wavelength for a few lines in a few orders has been given, the wavelength solution can be calculated. The offset between this order and the real order from the grating equation has to provided (the exact number is not needed). The results from the wavelength solution are used to plot the lines of the emission line catalogue in all three orders shown in the plot in red (lines used for the fit) and green (lines available (only the brightest 30 lines)). For each pixel position the wavelength is calculated and given after the line properties. If only few orders are given, then low polynomial orders should be used. \\
If some of the identified lines should be deleted, then the user can mark the box at the front of the list entries and then update the plot. At the bottom of the list is the possibility to give pixel and wavelength for non-identified lines. After each update of the plot up to 3 entries can be added like this. The GUI is saving the data each time the plot is updated and at when accepting the wavelength solution into the file given in parameter \textbf{px\_to\_wavelength\_file}.
\label{Fig:UI_create_manual_wavelengh_solution}}
\end{figure}
To create a new wavelength solution with the GUI the following steps are suggested:
\begin{enumerate}\setlength\itemsep{0em}
\item Find an order close to the central order with at least 2 prominent lines for which the wavelength is known, best several 1000 or at least several 100 pixel apart of each other by changing the order number and \textit{Update}.
\item Add the wavelength for these two lines in the table.
\item \textit{Update} the plot to see the linear fit of the catalogue wavelength in blue in the plot. (Please note that as it's only a linear fit, an offset in areas away from the two lines is expected.)
\item Add a few more wavelength using the brightest blue lines (longest marker), which should be close to the position of the brightest emission lines. Please note that the spectrum might deviate from the linear fit. Please also note that for some lamps the gas (Ar) lines are brighter than the metal (Th) lines, however the catalogue might not represent this ratio.
\item Repeat steps 1 to 4 for a few more orders. If consecutive orders are overlapping in wavelength some of the lines could be easily identified in the neighbouring orders.
\item Set the \textit{order offset} to a value approximately the expected value. Set both the \textit{polynom-orders} to 2. \textit{Calculate wavelength solution}.
\item Scan through the orders and add few more wavelengths when the fitted positions of the catalogue lines (green lines in the centre of the central plot) deviate from the emission lines. Doing this every 10 orders might be enough. \textit{Calculate wavelength solution}.
\item Repeat the step 7 until the lowest and highest orders have a good wavelength solution. It might be necessary to increase the \textit{polynom-orders} to create a good fit.
\item Set the polynom-orders to the final values (e.g. 4 and 4), \textit{Calculate wavelength solution} and \textit{Accept}.
\end{enumerate}
\paragraph{Prepared correlated data between pixel and wavelength:}
The user can also prepare the correlated data between pixel and wavelength. It needs to be saved into file \textit{pixel\_to\_wavelength.txt} (in the working directory) with the following tab-separated entries (exactly one separator between each column):
\begin{itemize}\setlength\itemsep{0em}
\item Aperture (starting at 0 for the reddest order)
\item Real/physical order from the grating equation (bigger than 0, usually between 40 and 120, doesn't have to be exactly the right one). %\textbf{Warn:} the difference between columns 1 (Aperture) and 2 (Real/physical) has to be the same for all lines.
\item Pixel
\item Wavelength [\AA] (optional, can be also empty if the user wishes to copy the whole file from \verb|logging_found_arc_lines| and only adds wavelengths to some lines)
\item (optional: Further entries (e.g. ThI, NeII, or comments) ).
\end{itemize}
% and stops after searching for the lines in the emission line spectra with the message:
%\begin{lstlisting}[style=base, basicstyle=\small]
%Error: Files for creating the wavelength solution do not exist ...
%\end{lstlisting}
%The order and pixel position of all identified lines, as well as the width and the height of the Gaussian fit, are stored in the file given in parameter \verb|logging_found_arc_lines| (standard: \textit{logging/arc\_lines\_found.txt}). This file can be opened (easiest in a spreadsheet application) and the corresponding wavelengths can be added to some of the lines (see Table~\ref{Tab:create_wavelength_solution}). The lines for each order are sorted by the highest absolute flux value (this is not the height of the Gaussian fit) of the line, the saturated lines are excluded. Please note that sometimes the side-wing of a saturated line is listed in the found lines, these lines should be ignored as well.
%
%The plots of the extracted spectra, which are stored in the file given in \verb|logging_arc_line_identification_spectrum| (extended by the pipeline with \textit{\_manual.pdf}) can be used for visual guiding (for an example see Figure~\ref{Figure:create_wavelength_solution}).
%Tests have shown that providing about 10 lines every 5 to 10 orders is enough for the pipeline to do the fitting in a good way. However, it is important that the lines cover as much of the CCD image as possible (e.g. covering most of the orders, inclusive the outermost lines of the orders). In case of a completely different setup an iterative approach might be necessary.
%
%\begin{figure}
% \centering
% \subfloat[Part of a Th-Ar spectrum without a wavelength solution from a medium resolution spectrograph.]
% {\includegraphics[height=4.5cm]{./figures/create_wavesol_unknow_spec_MRES_o22}
% }
% \qquad
% \subfloat[Th-Ar spectrum for which the wavelength solution is known from a high-resolution spectrograph. The lines in red are the identified lines, green lines can be ignored.]
% {\includegraphics[height=4.5cm]{./figures/create_wavesol_know_spec_UH_o26.png}
% \label{Figure:create_wavelength_solution_identified}
% }
% \caption{The emission lines for a short (orange) and a long (blue) exposure time. The left figure covers about the same wavelength range as the Th-Ar spectrum with known wavelength solution in the right figure. The right image can be used to correlate pixel-positions and wavelengths in the left spectrum (see Table~\ref{Tab:create_wavelength_solution}).
% \label{Figure:create_wavelength_solution}}
%\end{figure}
%
%\begin{table}
% \caption{Transforming the emission lines found by the pipeline into the file from which the pipeline will create the wavelength solution. A plot of the unknown and a reference spectrum can be found in Figure~\ref{Figure:create_wavelength_solution}.}
% \label{Tab:create_wavelength_solution}
% \tiny
% \subfloat[Example of the entry of the file from parameter \textbf{logging\_found\_arc\_lines} for one order.]{
% \begin{tabular}{r r r r}
%22 & 244.85 & 1.02 & 2336.9 \\
%22 & 419.53 & 1.15 & 2085.9 \\
%22 & 365.86 & 1.09 & 1832.9 \\
%22 & 440.14 & 1.19 & 1571 \\
%22 & 186.32 & 0.93 & 1556.1 \\
%22 & 387.96 & 1.08 & 974.8 \\
%22 & 492.43 & 1.22 & 1013.3 \\
%22 & 334.94 & 1.06 & 658.1 \\
%22 & 173.30 & 1.54 & 505 \\
%22 & 503.78 & 1.23 & 368.8 \\
%22 & 298.19 & 1.04 & 316.3 \\
%22 & 276.35 & 0.95 & 247.6 \\
%22 & 351.78 & 1.09 & 197.2 \\
%22 & 532.12 & 1.06 & 108.5 \\
%22 & 464.67 & 1.14 & 123.7 \\
%22 & 142.64 & 0.95 & 161.1 \\
%22 & 233.87 & 0.87 & 56.7 \\
%22 & 91.77 & 1.16 & 80.8 \\
%22 & 397.14 & 1.18 & 47 \\
%22 & 257.30 & 1.15 & 32.4 \\
%22 & 37.76 & 1 & 76.2 \\
%22 & 73.44 & 0.99 & 50.2 \\
% \end{tabular}}
% \qquad
% \subfloat[Example of \textit{pixel\_to\_wavelength.txt} for one order as seen in a spreadsheet. Lines without a wavelength can, but don't have to be deleted. The text in the columns with line type and following are optional. Columns have to be spaced by exactly one tabulator in the exported txt-file.]{
% \begin{tabular}{ | r | r | r | l | l | l |}
%\hline
%22 & 47 & 244.85 & 6182.6216 & ThI & \\
%\hline
%22 & 47 & 419.53 & & 6212.719 & blend, not used \\
%\hline
%22 & 47 & 365.86 & 6203.4924 & Th I & \\
%\hline
%22 & 47 & 440.14 & 6215.938 & Ar I & \\
%\hline
%22 & 47 & 186.32 & 6172.2775 & Ar II & \\
%\hline
%22 & 47 & 387.96 & 6207.22 & ThI & \\
%\hline
%22 & 47 & 492.43 & 6224.5271 & ThI & faint blending \\
%\hline
%22 & 47 & 334.94 & 6198.2226 & ThI & \\
%\hline
%22 & 47 & 173.30 & & & outside comparison \\
%\hline
%22 & 47 & 503.78 & & & \\
%\hline
%22 & 47 & 298.19 & 6191.9052 & & \\
%\hline
%22 & 47 & 276.35 & 6188.125 & & faint blending \\
%\hline
%22 & 47 & 351.78 & & & \\
%\hline
%22 & 47 & 532.12 & & & \\
%\hline
%22 & 47 & 464.67 & & & \\
%\hline
%22 & 47 & 142.64 & & & outside comparison \\
%\hline
%22 & 47 & 233.87 & 6180.7049 & Th I & \\
%\hline
%22 & 47 & 397.14 & 6208.6872 & & \\
%\hline
%22 & 47 & 257.30 & 6184.7897 & & \\
%\hline
% \end{tabular}}
%\end{table}
%
%
%The correlated data needs to be saved into file \textit{pixel\_to\_wavelength.txt} (in the working directory) with the following tab-separated entries (exactly one separator between each column):
%\begin{itemize}
% \item Aperture (starting at 0 for the reddest order)
% \item Real/physical order from the grating equation (bigger than 0, usually between 40 and 120, doesn't have to be exactly the right one). \textbf{Warn:} the difference between columns 1 (Aperture) and 2 (Real/physical) has to be the same for all lines.
% \item Pixel
% \item Wavelength [\AA] (optional, can be also empty if the user wishes to copy the whole file from \verb|logging_found_arc_lines| and only adds wavelengths to some lines)
% \item (optional: Type of the line (e.g. ThI, NeII) ).
% \item (optional: Comments ).
%\end{itemize}
%
%Afterwards the script can run again and will use this data to create a new wavelength solution. Afterwards the output or logfile, and the file given in parameter \verb|logging_arc_line_identification_spectrum| (standard: \textit{logging/arc\_line\_identification\_spectrum.pdf}) should be checked. The brightest lines of the reference file should be marked in red with the brightest emission lines of the spectrum (e.g. see Figure~\ref{Figure:create_wavelength_solution_identified}. If some areas of the spectrum (some orders, or the right or left side of some orders) are not fitted well, please add more lines of the problematic area to \textit{pixel\_to\_wavelength.txt} and remove the following files (if standard names are used) before running the pipeline again:
%\begin{lstlisting}[style=base]
%rm master_wavelength*.fits
%rm master_flat_spec_norm.fits
%rm extracted/*
%\end{lstlisting}
You might also want to read Chapter~\ref{Section:FAQ_wavelength_solution}.
\subsection{Finding further information - logged results}
\begin{itemize}
\item The pipeline logs the execution of procedures and necessary information in a logfile (standard: \verb|logfile|). Most of this information is also printed into the terminal window. In the logfile, each entry will start similar to
\begin{lstlisting}[style=base]
20190225160303 - 37448 -
\end{lstlisting}
and decodes the current time (YYYYMMDDHHMMSS) of the entry and the PID\footnote{process ID: \url{https://en.wikipedia.org/wiki/Process_identifier}} of the process that created it.
\item All adjustable parameters are logged at the beginning and at the end of the execution of a python script into a logfile (standard: \verb|logfile_params|) in the a json\footnote{\url{https://en.wikipedia.org/wiki/JSON\#Example}} format.
\item Images with the results of some steps can be found in a logging subfolder (standard: \verb|logging|). Some of these images are shown in Figures~\ref{Fig:apertures_in_master_flat} to \ref{figure_arc_line_identification_residuals}.
\item Each parameter is explained in detail in the configuration file.
\item The input and output parameters of the individual procedures are explained in the file \verb|procedures.py|.
\end{itemize}
% -------------------------------------------------------------------------------------------
\newpage
\section{Technical details to some steps}
\subsection{Detailed steps performed by the pipeline}
\label{Section:pipeline_steps_general}
\noindent The following steps will be performed on a new set of data:
\begin{itemize}
\item[1.] Creating the following reduced and combined files:
\begin{description}
\item[trace1] File in which the science traces can be determined (standard: 5x white light flats, best without arc lamp). This file is also used to create a map of the background flux.
\item[trace2] File in which the wavelength calibration traces can be determined (standard: 5x ThAr alone (future development: white light flats)).
\item[cal2\_l]: Long emission lamp exposure to create a good wavelength solution over the whole chip for the calibration fiber (standard: 5x 100\,s ThAr or UNe).
\item[cal2\_s]: Short exposure in order to find the center of the saturated lines in cal2\_l (standard: 5x 10\,s ThAr or UNe taken between the cal2\_l images).
\item[(cal1\_l, cal1\_s)]: emission lamp spectra to create a wavelength solution for the science fiber (if applicable)
\item[blazecor]: File which contains the blaze correction in the science fiber (standard: 5x files with the white light flats in science fiber).
\end{description}
\item[2.] Determining the shift of the science traces compared to the previous solution (e.g. previous day). If the instrument was not touched, the shift should be small and constant ($\rightarrow$~Figure~\ref{Fig:apertures_in_master_flat}). If the file for the previous solution does not exist, or if a big deviation to the previous solution has been found, then another step will be executed:
\begin{itemize}
\item[2a.] Finding the traces of all science orders. First in a heavily binned image (e.g. 20,5) to find the traces, and then in a slightly binned image (only few pixel in dispersion direction) where the position is redefined.
\end{itemize}
% not useful \item[3.] Creating a map of the leaking light between the science orders (background map).
\item[3.] (for bifurcated fiber spectrographs) Finding the traces of the calibration orders by searching for the offset between each order in the trace1 and trace2 file ($\rightarrow$~Figure~\ref{figure_arcapertures_in_master_arc}).
\item[4.] Create the wavelength solution for the night on the calibration fiber spectra (and maybe the science fiber spectra). ($\rightarrow$~Figure~\ref{figure_arc_line_identification_positions} and \ref{figure_arc_line_identification_residuals}). This is based on the solution of the previous data set, which is adjusted according to the given parameters (see \ref{Section:further_configuration}).
\item[5.] Extract the flat and normalise it. This is used for the blaze correction.
\item[6a.] (for single fiber spectrographs) Measure the instrumental drift in emission line spectra to calibrate the wavelength solution.
\item[6b.] (if applicable: find the radial velocity drift between the wavelength solutions of calibration and science fiber at different times during the night.)
\item[7.] Extract the science spectra. This includes the following steps for each image:
\begin{itemize}
\item[a)] Data reduction of the CCD frame (if set up).
\item[b)] Finding the offset between this frame and the traces (e.g. which offsets allows the extraction of maximum flux).
\item[c)] Extraction of the apertures for the science fiber.
\item[d)] (for bifurcated fiber spectrographs) Extraction of the apertures for the calibration fiber.
\item[e1)] Finding the offset between the emission lines in the calibration spectra and the lines used for the wavelength solution (for bifurcated fiber spectrographs).
\item[e1)] Additionally, if possible the measured drift from step 6 is interpolated to the mid exposure time and applied. Calculating the wavelength for each pixel in the extracted spectrum.
\item[f)] Creating the blaze corrected spectrum.
\item[g)] Creating the spectrum with normalised continuum.
%\item[h)] (Optional) Perform the radial velocity analysis (cross-correlation with template spectrum).
\item[h)] Perform some general measurements which are stored in the header.
\item[i)] Write the file with all extracted data into the folder given in the parameter \verb|path_extraction| (standard: \textit{extracted}).
\item[k)] Write subsets of data into different files to create compatibility with other software.
\end{itemize}
\item[8.] (Optional) Perform the radial velocity analysis.
\end{itemize}
\noindent If the result file of any of the above steps already exist in the folder in which the code is run, then the existing file is read instead of performing the step again in order to safe computational time. If a step needs to run again, the according result file needs to be deleted. (Further information can be found in \ref{Section:Problem_solution}.)
\subsection{Further settings for the parameters}
\label{Section:further_parameters}
The following steps give an overview about additional parameters which are not covered by \ref{Section:first_configuration} and \ref{Section:further_configuration}.
\begin{description}
\item[GUI] : If set to true, this allows manipulation of some parameters in a graphical user interface (GUI) during the runtime of the script. (not tested recently)
\item[width\_percentile] Only pixel with more flux than the value given in \verb|width_percentile| (of the maximum flux) will be extracted (see Fig.~\ref{Fig:traces_description}). The extraction width can be varied later using the parameters \verb|extraction_width_multiplier| and \verb|arcextraction_width_multiplier|. The covered area can be checked in the logged images given in the parameters \verb|logging_traces_im| and \verb|logging_arctraces_im|. %In case of a real Gaussian profile, the full width at half maximum and Gaussian width are connected with $FWHM = 2.35482\cdot w_{\mathrm{Gauss}}$.
\item[raw\_data\_exptim\_keyword, raw\_data\_dateobs\_keyword] : Use the right header keywords for the exposure time and the observation date. The format for the observation date should be \verb|%Y-%m-%dT%H:%M:%S.%f| or\footnote{\url{https://docs.python.org/2/library/datetime.html\#strftime-and-strptime-behavior}} \verb|%Y-%m-%dT%H:%M:%S|. More formats can be added in the procedure \textit{get\_obsdate}.
\item[raw\_data\_timezone\_cor] In the unlikely case, that the date and time stored in \verb|raw_date_timezone_cor| is not UTC, the time zone correction can be given here. Numbers will be positive east of UTC and negative west of UTC, e.g. +7 for Bangkok time or -10 for Hawaii.
\item[standard\_calibs\_create] : Define the standard CCD processing, if necessary (see \ref{Section:parameters_CCD_proc} for options).
% \item[] :
\end{description}
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth]{./figures/traces_description}
\end{center}
\caption{A cross section through one order in a frame to determine the traces of the orders (solid black). The centre of the order is determined from the highest flux in a polynomial fit (red dotted vertical lines and red dashed line). The extraction boundaries (black dotted-dashed vertical lines) are set to where the flux reaches 5\% of the flux range (\textbf{width\_percentile = 5}), using the minimum flux as zero point.
\label{Fig:traces_description}}
\end{figure}
\subsection{extra commands}
\begin{description}
\item[norv] Skips running the RV analysis (TERRA, SERVAL, CERES)
\end{description}
\subsection{Necessary data to process}
To get the best results, the following data (or more files) should be taken. Please note that all filenames are case-insensitive. \textbf{No spaces or commas are allowed in the file names.}
\begin{itemize}
\item \textit{(optional) true Flat (at least 11 files of the evenly illuminated detector)}. This has been tested to improve the data quality when using a camera with small full well depth. The filename should contain \textbf{rflat} for automatic processing.
\item Tungsten (11x, white light source through the science fiber), with the filename containing \textbf{flat}, \textbf{tung}, or \textbf{whli}. The calibration fiber should be dark for this data.
\item Wavelength calibration data for the science fiber (5x, calibration lamp through the science fiber, alternating a short (e.g. 5\,s) and a long (e.g. 120\,s) exposure time). The filename should start with \textbf{arc}, \textbf{ThAr}, \textbf{Th\_Ar}, or \textbf{UNe}.
\item Wavelength calibration data for the calibration fiber (5x, calibration lamp through the calibration fiber, alternating a short (e.g. 5\,s) and a long (e.g. 120\,s) exposure time). The filename should start with \textbf{arc2}, \textbf{ThAr2}, \textbf{Th\_Ar2}, or \textbf{UNe2}. Can be taken with the wavelength calibration in the science fiber, in this case both names should be in the filename, e.g. \textbf{thar\_thar2}.
\item (\textit{Only necessary for unstabilised spectrographs, where the the offset in dispersion direction between the fibers of a bifurcated fiber might vary over time}): wavelength calibration in science (and calibration fiber). Occasionally a long (e.g. 60\,s) exposure time between science frames. Same filename as before.
% \item Blazecor (11x, white light source and calibration lamp), the filename should be \textbf{blazecor}, \textbf{sflatThAr}, \textbf{whli-UNe}, or similar.
\item (optional) Bias (11x) and/or Darks (11x, for the exposure time of the true Flat, Tungsten, and Science images)% and Blazecor)
\item Science images: It is best to start the filename with the object name (as in reference file given by parameter \verb|object_file|, see \ref{Section:create_object_list_file}). However, in order to try to match the object name with entries in the \verb|object_file| the parts of the filename containing ``\_'' and ``-'' are stripped away one by one from the end of the filename.
\end{itemize}
The suggestions are for bifurcated fiber input to the spectrograph. If a spectrograph with a single fiber is used, then the same data should be taken using the same fiber (setting parameter \verb|arcshift_side = center| will process the data correctly).
\subsection{Measuring the drift of the wavelength solution}
The wavelength solution is only created once, assuming that the user provides a high quality set of spectra for this step. To calibrate the spectrum during the observation the drift can be measured using emission lamp spectra. For each line that was used to create the wavelength solution a Gaussian fit is applied to the expected area in the emission line spectrum and the pixel-offset measured. The distribution of the offsets is then binned with few different binning widths and a Gaussian fitted. The centre of the Gaussian and the width are used for the corrections.
Depending on the spectrograph and available data, they can be used to
\begin{itemize}
\item Measure the offset between science and calibration fiber (at different times during the night).
\item Monitor the drift during the night, so the drift can be interpolated for the science spectra (single fiber spectrograph).
\item Measure the the drift for the current science frame (bifurcated spectrograph).
\end{itemize}
\subsection{Necessary information to calculate the barycentric correction}
\label{Section:barycentric_correction}
In order to calculate the barycentric correction the time of the observation (or Julian Date), the pointing on the sky, and the position of the observatory on earth need to be known. These information can be given in different ways.
\begin{itemize}
\item The mid-exposure date and time is derived from the image header using the keys given in the parameters \verb|raw_data_dateobs_keyword| and \verb|raw_data_exptim_keyword| (half of the exposure time, or a different fraction if one of the keys in \verb|raw_data_mid_exposure_keys| exists). The pipeline can handle different standard formats (YYYY-MM-DDThh:mm:ss). If necessary, further formats can be added in the function \textit{get\_obsdate} in \textit{procedures.py}.
\item The position of the observatory will be extracted from the following sources (using the first available source in the following list:
\begin{enumerate}\setlength\itemsep{0em}
\item Reading latitude, longitude, and elevation from the header, using the the header keys defined in the beginning of procedure \textit{get\_barycent\_cor}: site\_keys, altitude\_keys, latitude\_keys, longitude\_keys (the first available entry of each list will be used, if necessary these lists can be extended).
\item Using the site coordinates as given in the configuration file in parameters \verb|altitude|, \verb|latitude|, and \verb|longitude|.
\end{enumerate}
\item The pointing of the telescope (RA and DEC) will be derived from the following sources (using the first available source in the following list:
\begin{enumerate}\setlength\itemsep{0em}
\item Reading the object coordinates from a list of objects (see Chapter~\ref{Section:create_object_list_file}).
\item Reading the object coordinates and epoch from the image header, using the header keys defined in the beginning of procedure \textit{get\_barycent\_cor}: ra\_keys, dec\_keys, epoch\_keys (the first available entry of each list will be used, if necessary these lists can be extended).
\item If the object name contains sun, moon, or jupiter then the coordinates of the solar system are calculated for the mid of the observing time.
\end{enumerate}
\end{itemize}
%\subsubsection{get\_ut1\_offset: requested mjd is beyond end of IERS file}
%\label{Section:update_ssephem}
%If the message \verb|get_ut1_offset: requested mjd is beyond end of IERS file| appears, the data from Jet Propulsion Laboratory Development Ephemeris is out of date. It can be updated by running \verb|ssephem_update.py|.
%\textit{At the moment DE403 is used (the CERES pipeline uses DE403). This can be changed by updating \verb|ssephem_update.py|, \verb|SSEphem/update_ssephem.py| (updating the procedure} SSEphemDownload \textit{, and at the beginning of \verb|procedures.py|.} However, this doesn't seem to have an impact on the resulting barycentric corrections.
\subsection{Create a bad pixel mask}
\label{Section:create_badpxmask}
\noindent The bad pixel masked used by the pipeline is fits file which consists of a 2 dimensional image consisting of '1' for good data and '0' for bad pixels. It can be created by the script \verb|create_badpx_mask.py|, but this is script is really work in progress at the moment.
\subsection{Creating a list of objects}
\label{Section:create_object_list_file}
This is done in the procedure \verb|file_assignment.py|. However, the user can also provide their own list using a the file specified in parameter \verb|object_file| (standard: \verb|object_list.txt|). The file can be located in the \verb|result_path| or \verb|raw_data_paths| (the first one has higher priority, in case different file exists in the two folders only the first file will be read). The file must contain one line per object. For each object the following comma-separated values need to be given:
\begin{enumerate}\setlength\itemsep{0em}
\item Object name (the filename should start with the object name in order to be used)
\item RA (hh:mm:ss.ss or hh mm ss.ss or as one number in degrees)
\item DEC ($\pm$dd:mm:ss.ss or $\pm$dd mm ss.ss or as one number in degrees)
\item PM RA (mas/year, can be 0 in the most cases)
\item PM DEC (mas/year, can be 0 in the most cases)
\item Enable/Disable flag (1 for enabled)
\item Mask to use for the CERES radial velocities. At the moment G2, K5, or M2 are possible options. If empty G2 is used.
\item Velocity width to broaden the lines of the binary mask in CERES. Can be empty.
\item Epoch of the coordinates (only the number, e.g. 2000 or 2015.5)
\end{enumerate}
Please note that the position of Moon, Jupiter, and Sun are calculated by the pipeline using the mid-exposure time. This requires that the filename contains the (case-insensitive) object name. Therefore the object is not required in the object list.
\subsection{Results from the pipeline}
\subsubsection{Preparing the data for one night}
\label{section:results_pipeline}
For each of the steps given in Chapter~\ref{Section:pipeline_steps_general} the following data will be created.
\noindent \textbf{Step 1:} The reduced and combined CCD images will be stored in the folder where the pipeline was run. The file name is \verb|master_<type>.fits|, where \verb|<type>| defines the different image types, e.g. bias, sflat, trace, or arc\_l.
\vspace{0.5em}\noindent \textbf{Step 2:} The trace of each scientific order is stored as a table in the file given in parameter \verb|master_trace_sci_filename| (standard: \textit{master\_traces\_sci.fits}). This is a table fits-file and contains one line for each order, normally starting with the reddest order, which is located on the left side of the CCD image. Each line contains the following information:
\begin{itemize}\setlength\itemsep{0em}
\item Number of the aperture (starting at 0).
\item Central pixel (in dispersion direction) for the polynomial fit along the center (first searched by Gaussian fit and then more precise by a the maximum of a third order polynomial around the maximum) of the trace.
\item Parameters of a polynomial fit to the center of the trace (given in parameter \verb|polynom_traces_apertures|), e.g. 5 values if the trace is fitted with a polynomial of order 5.
\item Central pixel (in dispersion direction) for the polynomial fit along the left limit of the trace. The limit is determined from the value given in parameter \verb|width_percentile|
\item Parameters of a polynomial fit to the left limit of the trace (given in parameter \verb|polynom_traces_apertures|).
\item Central pixel (in dispersion direction) for the polynomial fit along the right limit of the trace. The limit is determined from the value given in parameter \verb|width_percentile|
\item Parameters of a polynomial fit to the right limit of the trace (given in parameter \verb|polynom_traces_apertures|).
\item The lowest and highest pixel in dispersion axis, for which the trace can be identified.
\item The last three entries of each line define the width of the trace: left border (where the flux raises over the value given in parameter \verb|width_percentile|), the right border (where the flux falls below \verb|width_percentile|), and the Gaussian width.
\end{itemize}
The identification of the traces can be checked easily in the file given in parameter \verb|logging_traces_im| (standard: \textit{traces\_in\_master\_trace1.png}). This file is located in the folder given in the parameter \verb|logging_path| (standard: \textit{logging}). An example is show in Figure~\ref{Fig:apertures_in_master_flat}.
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/orders_in_master_flat_marker}
\end{center}
\caption{Reduced CCD image of a white light flat (log10 gray scale) with the marked traces of the identified scientific orders/apertures (red). The extraction width (determined from the width given in parameter \textbf{width\_percentile} of the order multiplied with the value in parameter \textbf{extraction\_width\_multiplier}) is given in the dashed lines.
\label{Fig:apertures_in_master_flat}}
\end{figure}
%\vspace{0.5em}\noindent \textbf{Step 3:} The background map, 2d image is stored in the file given in parameter \verb|background_filename| (standard: \textit{background.fits}). Additionally a mask which provides the pixel which are used to create the background map is stored in the file given in parameter \verb|background_px_filename| (standard: \textit{background\_px.fits}).
\vspace{0.5em}\noindent \textbf{Step 3:} The traces of the apertures of the calibration fiber are stored in the file given in parameter \verb|master_trace_cal_filename| (standard: \textit{master\_traces\_cal.fits}). This is a table fits-file and contains the same information as the result of \textbf{Step 2}, only that the lowest order of the parameters of the polynomial fits are shifted (the curvature of the traces is kept the same). The result can be checked easily in the file given in parameter \verb|logging_arctraces_im| (standard: \textit{arctraces\_in\_master\_traces\_cal.png}). An example is show in Figure~\ref{figure_arcapertures_in_master_arc}.
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/arcorders_in_master_arc}
\end{center}
\caption{Reduced CCD image of a ThAr exposure (log10 gray scale) with the marked traces of the identified calibration orders/apertures (red). The extraction width (determined from the width given in parameter \textbf{width\_percentile} of the order multiplied with the value in parameter \textbf{extraction\_width\_multiplier}) is given in the dashed lines.
\label{figure_arcapertures_in_master_arc}}
\end{figure}
\vspace{0.5em}\noindent \textbf{Step 4:} The wavelength solution is stored in a table fits-file given in parameter \verb|master_wavelensolution_filename| (standard: \textit{master\_wavelength.fits}). The parameters for each order are stored in one line. For each order the following values are stored:
\begin{itemize}
\item Real order, as derived from the grating equation.
\item Central pixel of the order (in dispersion direction).
\item Parameters of a polynomial fit to the trace, e.g. 4 values if the dispersion axis is fitted with a polynomial of order 4.
\item List of the wavelengths of all the identified reference lines in this order.
\end{itemize}
During the step to find the wavelength solution, the pipeline logs data similar to the following output:
\begin{lstlisting}[style=base, basicstyle=\tiny]
Info: To match the most lines in the emission line spectrum with the old wavelength solution, a shift of 8 orders,
a multiplier to the resolution of 0.994, a shift of -300 px, and a shift of 0.0 px per order needs to be applied.
7628 lines were identified. The deviation is 0.1074 Angstroms.
Info: used 3413 lines. The standard deviation (using 8 degrees of freedom) of the residuals between the lines and
the fit is 0.00334 Angstroms. The FWHM of the emission lines results in an R = 156231 +- 45280. The 2-pixel
resolution (around the identified lines) is R = 312437 +- 12940. The deviation to the line fit converts into a
resolution R = 1635506. The average of the abs of the residuals is 0.00252 Angstroms.
Info: A 2D polynom fit with 4 orders in dispersion direction (along the traces) and 4 orders in cross-dispersion
direction was used. With this solution, the offset between aperture and real orders is 84. To fulfil the grating
equation the central pixel of the individual orders needs to be 3067.0 + -0.0*order + 0.0*order**2.With this values
the standard deviation of the residuals between the central wavelengths and the grating equation is 0.159 Angstroms.
Using the original solution gives an offset of 84.
ap cenwav minwav maxwav range Ang/ name numb gausswi gausswi min_ max_ range_reflin
px dth_avg dth_std reflin reflin _whole_order
0 7978.6 7894.7 8054.2 159.5 0.038 Ar I 2 1.78 0.58 7916.4 7948.2 98.1
0 7978.6 7894.7 8054.2 159.5 0.038 Th I 2 2.04 1.15 7937.7 8014.5 98.1
...
27 5800.1 5769.1 5854.3 85.2 0.027 Ar I 2 3.06 0.15 5802.1 5834.3 44.6
27 5800.1 5769.1 5854.3 85.2 0.027 Th I 6 2.31 0.41 5789.6 5832.4 44.6
-1 -1.0 -1.0 -1.0 -1.0 1.000 Ar I 148 2.07 0.61 5802.1 7948.2 2146.1
-1 -1.0 -1.0 -1.0 -1.0 1.000 Th I 376 2.04 0.66 5789.6 8014.5 2224.9
\end{lstlisting}
Thereby the Resolution of the FWHM is the best value to use. The distribution of the resolution along the detector is shown in Figure~\ref{figure_wavelength_solution_resolution}. The table contains the following information:
\begin{description}
\item[apert] Aperture of the trace, starting with 0 for the reddest aperture ($\tilde{m}$). To get the real order the offset $m_0$ is given in the text before (here: 72). The offset is determined using two different ways. First the data is compared to the grating equation $\lambda \propto m_0 + \tilde{m}$. In Practical this was done by searching for the smallest slope in the formula $y = (m_0 + \tilde{m})\lambda_c$, were $\lambda_c$ is central wavelength of each order. The second value for the real order offset is determined from the shift towards the previous wavelength solution. Both values for the offset should be the same. Only the first value is saved in the file for the wavelength solution.
\item[cenwave] Central wavelength of the order, determines the zero point of the wavelength solution.
\item[minwave, maxwave, ranwave] Minimum and maximum wavelength, and wavelength range which is covered by the trace of this order.
\item[Ang/px] Resolution in $\frac{\AA}{\mathrm{px}}$ at the central wavelength.
\item[name] Type of the reference line. If different types of reference lines were found in the order then a output for each available type is created.
\item[number] Number of reference line for this type and order.
\item[gausswidth\_avg, gausswidth\_std] Average and standard deviation of the Gaussian width of the emission lines
\item[min\_refline, max\_refline] Minimum and maximum wavelength of the reference lines of this type and order
\item[range\_reflines\_whole\_order] Wavelength range which was covered by reference lines for this order (independent of type of the line)
\end{description}
The last lines give the information for all apertures. Columns, for which no useful information can be derived show the value \verb|-1|.
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/arc_line_identification_positions}
\end{center}
\caption{Reduced CCD image of a ThAr exposure (log10 gray scale) with the identified lines from the reference catalogue (red). Only this set of data was used to create the wavelength solution. The remaining lines of the reference catalogue, which weren't used for fitting the solution are shown in green. The data on an order by order basis is shown in Figure~\ref{figure_arc_line_identification_spectrum}.
\label{figure_arc_line_identification_positions}}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/screenshot_spectral-atlas}
\end{center}
\caption{One page (aperture 13, real order 88) of the spectral atlas (file created from parameter \textbf{logging\_arc\_line\_identification\_spectrum}) created from the wavelength solution. Blue shows the spectrum of the emission lines in the long and orange the emission lines in the short exposure. The identified lines are marked in red, with the length of the marker being proportional to the intensity of the line as given in the \textbf{reference\_catalog} (length is reset for each order). Green shows a subset of the non-identified lines from the \textbf{reference\_catalog}, this means the green markers should normally be shorter than the red ones. Problems with an overcrowded \textbf{reference\_catalog} is visible at a few places (e.g. 6466\,\AA), for better wavelength solutions the line catalogue should be cleared of bad lines.
\label{figure_arc_line_identification_spectrum}}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/arc_line_identification_residuals}
\end{center}
\caption{Residuals between the identified catalogue lines and the wavelength solution. Colour-coded are the different apertures.
\label{figure_arc_line_identification_residuals}}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/wavelength_solution_resolution_on_detector_cal}
\end{center}
\caption{Resolution along the detector. Measurements were only possible were catalogue lines were matched with emission lines. Local areas of low resolution are most likely caused by blended lines. The wavelength solution might be improved by removing these lines from the catalogue.
\label{figure_wavelength_solution_resolution}}
\end{figure}
\vspace{0.5em}\noindent \textbf{Step 5:} The normalised white light flat is stored in the file given in parameter \verb|master_flat_spec_norm_filename| (standard: \textit{master\_flat\_spec\_norm.fits}). The extraction and data format is described in Chapter~\ref{section:extraction_data_format}
\subsubsection{Format of the extracted files}
Different types of extracted files are created. They are described in the following subsections.
\paragraph{raw-data filename in path\_extraction}
\label{section:extraction_data_format}
The final fits file contains the original header and some additional information, for example the calibration steps which were applied and some information about the flux collected in the different apertures. The data in the file is stored in a 3D array in the form: data type, aperture, and pixel. The data types are similar to the ones created by the CERES pipeline and are the following:
\begin{enumerate}
\setcounter{enumi}{-1}
\item 2D array with the wavelength for each aperture and pixel in barycentric system.
\item Extracted spectrum without any modification.
\item Measurement of the error the extracted spectrum (using the scatter of the residuals after fitting a 2D polynomial to the background areas (noise in the dark/bias))
\item Blaze corrected spectrum, calculated by dividing the extracted spectrum and the normalised flat spectrum.
\item Error of the flat corrected spectrum (residuals of a polynomial fitted to the blaze corrected spectrum).
\item Continuum normalised spectrum. The continuum is derived by fitting a polynomial to the flat corrected spectrum, using only areas of the spectrum where no lines are located.
\item Signal to noise ratio in the continuum, calculated from the residuals between continuum fit and measured continuum and the flux in the continuum.
\item Mask with good areas of the spectrum. The following values are used:
\begin{itemize}
\item[1] good
\item[0] no data available
\item[0.1] saturated pixel in the extracted spectrum
\item[0.2] bad pixel in the extracted spectrum
\end{itemize}
\item Spectrum of the calibration fiber, e.g. of the emission line lamp.
\item Wavelength for each order and pixel without barycentric correction.
\end{enumerate}
These files can be plotted with the pipeline. Please find instructions in Chapter~\ref{Section:Plotting_extracted_files}.
\paragraph{path\_harpsformat + Object name}
This file contains the extracted spectrum without any modification in the same form as the \textit{e2ds} files created by the HARPS pipeline. The wavelength solution is stored in the header.
\paragraph{raw-data filename + \_lin}
\label{Section:linearised_spectrum}
The extracted spectrum is linearised using a wavelength step as given in parameter \verb|wavelength_scale_resolution|. The wavelength spectrum is interpolated by using the weighted mean of the neighbouring data points with the wavelength difference as weights.
\paragraph{raw-data filename + \_lin\_cont}
This file contains data in the same form as described in Chapter~\ref{Section:linearised_spectrum}, only that the continuum corrected data was linearised.
\paragraph{path\_extraction\_single}
All the files are 2D arrays with the order/aperture as first and number of pixel in second dimension.
\begin{description}
\item[raw-data filename + \_extr] Extracted spectrum with wavelength solution in IRAF format (wavelength solution without barycentric correction).
\item[raw-data filename + \_extr\_bluefirst] Same as the entry before, but starting with the blue orders instead of the red orders.
\item[raw-data filename + \_blaze] Blaze corrected spectrum with wavelength solution in IRAF format (wavelength solution without barycentric correction).
\item[raw-data filename + \_blaze\_bluefirst] Same as the entry before, but starting with the blue orders instead of the red orders.
\item[raw-data filename + \_wave] The wavelength for each aperture and pixel.
\item[raw-data filename + \_weight] Mask with good areas of the spectrum.
\end{description}
\paragraph{path\_rv\_terra + $<$object name$>$ + /data/YYYY-MM-DDHHMMSS.cvs files}
The data in here is used in order to measure RV with Terra (\url{https://drive.google.com/file/d/1xK-lYghFwpwtdXG9b4IbryYRd102q7So/view}). The barycentric corrected wavelength solution and the continuum corrected spectrum (multiplied by the bad-pixel mask) are used. See \ref{Section:files_folders_rv_for_terra} for more information.
\subsection{Radial velocity analysis}
If the packages are installed, the radial velocity analysis will be run after the extraction of the science files. The necessary files will created each time the \verb|hiflex.py| is run. It is possible to add files from different nights together
\subsubsection{TERRA}
\label{Section:files_folders_rv_for_terra}
All information is relative to the folder given in parameter \verb|path_rv_terra| (standard: terra\_rv).
The data of the individual objects are stored in the subfolder \textit{object name + /data/}. If TERRA is installed and \verb|terra_jar_file| is set correctly to the TERRA \textit{PRV.jar}-file, then TERRA will run on each \textit{object name} within the folder. The results of the radial velocity analysis will be stored in \textit{object name + /results/synthetic.rv}.
It is possible to include the information from previous nights of the same object by either one of the listed way. Please note that in order to this, the number of orders and the number of extracted pixel has to be always the same.
\begin{itemize}
\item Link or copy the \textit{.csv}-files into the corresponding \textit{data/}-folders for the object and (1) rerun \verb|python <path to scripts>/hiflex.py| or (2) alternatively run the commands as stored in the logfile.
\item Link or copy the files from folder given in parameter \verb|path_extraction| (standard: extracted) into the current one. Afterwards run \verb|python <path to scripts>/hiflex.py| again.
\end{itemize}
It is also possible to rerun TERRA with less images (e.g. exclude bad signal to noise files). To do this change into the folder given in parameter \verb|path_rv_terra|. Remove the files you don't need from the subfolder \textit{object name + /data/}. Run TERRA using the command given in the logfile. Afterwards move back to the folder in which HiFLEx was run and run \verb|python <path to scripts>/measurements_to_output.py|.
\subsubsection{SERVAL}
\label{Section:files_folders_rv_for_serval}
All information is relative to the folder given in parameter \verb|path_rv_serval| (standard: serval\_rv).
The data to be used for each Object is stored in the files \textit{filelist\_ + object name + .txt}. If SERVAL is installed and \verb|path_serval| is set correctly, then SERVAL will run on each \textit{object name} within the folder. The results of the radial velocity analysis will be stored in \textit{object name + / + object name + rvc.dat}.
It is possible to include the information from previous nights of the same object by either one of the listed way. Please note that in order to this, the number of orders and the number of extracted pixel has to be always the same.
\begin{itemize}
\item Link or copy the \textit{.csv}-files into the corresponding \textit{data/}-folders and (1) rerun \verb|python <path to scripts>/hiflex.py| or (2) alternatively run the commands as stored in the logfile.
\item Link or copy the files from folder given in parameter \verb|path_extraction| (standard: extracted) into the current one. Afterwards run \verb|python <path to scripts>/hiflex.py| again.
\end{itemize}
It is also possible to rerun SERVAL with less images (e.g. exclude bad signal to noise files). To do this change into the folder given in parameter \verb|path_rv_serval|. Remove the files you don't need from the file \textit{filelist\_ + object name + .txt}. Run SERVAL using the command given in the logfile. Afterwards move back to the folder in which HiFLEx was run and run \verb|python <path to scripts>/measurements_to_output.py|.
Before running SERVAL manually, it might be necessary to the path to PYTHONPATH. The example below shows the commands for \verb|bash| and \verb|csh| to do this:
\begin{lstlisting}[style=base]
export PYTHONPATH=<path/to/mzechmeister>/python:$PYTHONPATH
setenv PYTHONPATH <path/to/mzechmeister>/python:$PYTHONPATH
\end{lstlisting}
\subsubsection{CERES}
The results are stored in the folder given in parameter path\_rv\_ceres (standard: ceres\_rv).
This step is only done, if the CERES pipeline is installed and \verb|path_ceres| is set correctly to the base folder of CERES and the sub-folders \textit{utils/Correlation}, \textit{utils/GLOBALutils}, \textit{utils/OptExtract}, and \textit{utils/CCF} exist within.
\begin{description}
\item[*\_$<$mask$>$.pdf:] Cross-correlation function between object spectrum and the template.
\item[pkl-files:] Data used to plot the cross-correlation function in the python-pickle format.
\item[stellar\_pars.txt-files:] Stellar parameters of the object: $T_{eff}$, $\log g$, $Z$, $v\sin i$, $vel0$
\end{description}
\subsection{Handling Parameters}
\begin{enumerate}
\item The script has some hard coded values in the *.py files (normally at the beginning of a procedure). These values can be changed for testing, but usually do not need any changes.
\item The pipeline reads the parameters from a configuration file (standard: \verb|conf.txt|), given at the beginning of the python scripts. Some of the parameters in the configuration file need adjustment on a regular basis (see \ref{Section:first_configuration} and \ref{Section:further_configuration}).
\item Furthermore, the pipeline reads the configuration file given in parameter \verb|configfile_fitsfiles| (standard: \verb|fits_conf.txt|), which is automatically created by the pipeline when running \verb|file_assignment.py|.
\item The parameters which are set in the configuration file(s) can be overwritten with a command line input when a python script is started (e.g. \verb|python bla.py argument1=valueX argument2=valueY| (no spaces around the =)).
\item Some parameters can be overwritten during the run time of the script by user input if the GUI is enabled.
\end{enumerate}
\subsection{Cosmic ray removal}
\subsubsection{Training the cosmic ray removal deepCR for a new camera}
\label{Section:train_deepCR}
So far deepCR comes only with the training set for HST WFC\footnote{\url{https://github.com/profjsb/deepCR\#quick-start}}. Testing this approach has identified (science-) traces as cosmic rays (settings: \verb|cosmic_ray_settings = [deepCR, ACS-WFC-F606W-2-32, 0.999]| and \verb|path_reduced| set to a valid path to check cosmic ray removal results).
It is possible to train the model on the users camera. The steps are described in
\url{https://github.com/profjsb/deepCR/blob/master/docs/tutorial_train.rst}~. A good dataset is probably to use as many science frames as possible and search for cosmic rays (including along the traces).
% started cooding train_cosmic_ray_deep_learning.py
% -------------------------------------------------------------------------------------------
\newpage
\section{Post-extraction analysis}
% -------------------------------------------------------------------------------------------
\noindent Once the science spectra were extracted these files can be analysed more in detail.
\subsection{Plotting the data}
\label{Section:Plotting_extracted_files}
To plot the results the python program \verb|plot_img_spec.py| can be used. This script reads the files as described in \ref{section:extraction_data_format} and which are listed in the file \verb|plot_files.lst|. At the first start it will plot the extracted spectrum for all orders of all files in the list. The spectra will be plotted in the pixel-reference.
This plot uses the matplotlib libraries and a toolbar for interactive navigation is available\footnote{\label{Footnote:matplotlib_toolbar} \url{https://matplotlib.org/3.1.1/users/navigation_toolbar.html}} in the lower left corner. Please refer to Footnote~\ref{Footnote:matplotlib_toolbar} for instruction on the use. While the changes done with the toolbar are instant, the changes of the parameters on the right have to be updated using the button provided. To reset the plot to automatic scaling enable \textit{reset plot}. Please be aware that once the window size has been changed and the plot is updated again the margins will be adapted to use the plot area in an optimal way.
Further limitations of the plotted data can be done using the GUI. For a plot in different x-axis the first text box can be used. See the list in Table~\ref{Tab:plotting_options} for options. For plotting only data from a subsection of files, the first text boxes \textit{Which file} and \textit{Exclude} below this field can to be used. For plotting only a subsection of data types the second text boxes with \textit{Which data} and \textit{Exclude} are used to define this. For plotting only some apertures, the last text box are used (see an example in Figure~\ref{figure_plotting_results_example1} and \ref{figure_plotting_results_example2}).
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/screenshot_plot_img_spec}
\end{center}
\caption{An example of how to plot the extracted data. Shown are the solar spectra of entry 20 and 280 in the plot\_files.lst file (\textit{Which file}). The graph shows the blaze corrected flux (\textit{Which data}: 3), plotted over the wavelength (w). Only apertures 18, 19, and 20 are shown. Each of the selections can be inverted by ticking the \textit{Exclude}-boxes below their entries.
\label{figure_plotting_results_example1}}
\end{figure}
\begin{figure}
\begin{center}
\includegraphics[width=\textwidth]{./figures/screenshot_plot_img_spec_lomb_scargle_fft}
\end{center}
\caption{Another example of how to plot the extracted data. Shown is the Lomb-Scargle Periodogram (wl) for the same spectra as shown in Figure~\ref{figure_plotting_results_example1} of files 20 and 280 in the plot\_files.lst file. The blaze corrected spectra (data type 3) of all apertures except 0 to 17 and 21 to 280 (this means only apertures 18 to 20) and their corresponding wavelength solution was used as basis for the calculations.
\label{figure_plotting_results_example2}}
\end{figure}
\begin{table}[htb]
\caption{Options for the data on the x-axis for plotting. The tick for wavelength can be set, then it will use the wavelength instead of pixel as basis\vspace{-0.9\baselineskip} }
\label{Tab:plotting_options}
\begin{tabularx}{\textwidth}{c X}
(empty) & The flux is plotted against the pixel along the CCD in dispersion direction. \\
%\textbf{w} & The flux is plotted against the wavelength (stored in data type 0). \\
\textbf{f} & The Fourier transformation is plotted against the period (in pixel). \\
\textbf{l} & The Lomb-Scargle-Periodogram is plotted against the period (in pixel). \\
%\textbf{wl} & The Lomb-Scargle-Periodogram is plotted against the period (in wavelength). If you look for periodic signals in the spectrum, this is what you very probably should use. \\
\textbf{w9} & (w followed by number): The plot is done against the wavelength (stored in data type 9 (or any other number)). \\
\end{tabularx}