SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
format_sam.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <seqan3/std/algorithm>
16 #include <seqan3/std/concepts>
17 #include <iterator>
18 #include <seqan3/std/ranges>
19 #include <string>
20 #include <vector>
21 
51 
52 namespace seqan3
53 {
54 
128 {
129 public:
133  // construction cannot be noexcept because this class has a std::string variable as a quality string buffer.
134  format_sam() = default;
135  format_sam(format_sam const &) = default;
136  format_sam & operator=(format_sam const &) = default;
137  format_sam(format_sam &&) = default;
138  format_sam & operator=(format_sam &&) = default;
139  ~format_sam() = default;
140 
142 
145  {
146  { "sam" },
147  };
148 
149 protected:
150  template <typename stream_type, // constraints checked by file
151  typename seq_legal_alph_type, bool seq_qual_combined,
152  typename seq_type, // other constraints checked inside function
153  typename id_type,
154  typename qual_type>
155  void read_sequence_record(stream_type & stream,
157  seq_type & sequence,
158  id_type & id,
159  qual_type & qualities);
160 
161  template <typename stream_type, // constraints checked by file
162  typename seq_type, // other constraints checked inside function
163  typename id_type,
164  typename qual_type>
165  void write_sequence_record(stream_type & stream,
166  sequence_file_output_options const & SEQAN3_DOXYGEN_ONLY(options),
167  seq_type && sequence,
168  id_type && id,
169  qual_type && qualities);
170 
171  template <typename stream_type, // constraints checked by file
172  typename seq_legal_alph_type,
173  typename ref_seqs_type,
174  typename ref_ids_type,
175  typename seq_type,
176  typename id_type,
177  typename offset_type,
178  typename ref_seq_type,
179  typename ref_id_type,
180  typename ref_offset_type,
181  typename align_type,
182  typename cigar_type,
183  typename flag_type,
184  typename mapq_type,
185  typename qual_type,
186  typename mate_type,
187  typename tag_dict_type,
188  typename e_value_type,
189  typename bit_score_type>
190  void read_alignment_record(stream_type & stream,
191  sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
192  ref_seqs_type & ref_seqs,
194  seq_type & seq,
195  qual_type & qual,
196  id_type & id,
197  offset_type & offset,
198  ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
199  ref_id_type & ref_id,
200  ref_offset_type & ref_offset,
201  align_type & align,
202  cigar_type & cigar_vector,
203  flag_type & flag,
204  mapq_type & mapq,
205  mate_type & mate,
206  tag_dict_type & tag_dict,
207  e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
208  bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
209 
210  template <typename stream_type,
211  typename header_type,
212  typename seq_type,
213  typename id_type,
214  typename ref_seq_type,
215  typename ref_id_type,
216  typename align_type,
217  typename qual_type,
218  typename mate_type,
219  typename tag_dict_type,
220  typename e_value_type,
221  typename bit_score_type>
222  void write_alignment_record(stream_type & stream,
223  sam_file_output_options const & options,
224  header_type && header,
225  seq_type && seq,
226  qual_type && qual,
227  id_type && id,
228  int32_t const offset,
229  ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
230  ref_id_type && ref_id,
231  std::optional<int32_t> ref_offset,
232  align_type && align,
233  std::vector<cigar> const & cigar_vector,
234  sam_flag const flag,
235  uint8_t const mapq,
236  mate_type && mate,
237  tag_dict_type && tag_dict,
238  e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
239  bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
240 
241 private:
244 
246  static constexpr std::string_view dummy{};
247 
250 
253 
256  {
257  return dummy;
258  }
259 
261  template <typename t>
262  decltype(auto) default_or(t && v) const noexcept
263  {
264  return std::forward<t>(v);
265  }
266 
267  using format_sam_base::read_field; // inherit read_field functions from format_base explicitly
268 
269  template <typename stream_view_type, typename value_type>
271  stream_view_type && stream_view,
272  value_type value);
273 
274  template <typename stream_view_type>
276  stream_view_type && stream_view);
277 
278  template <typename stream_view_type>
279  void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
280 
281  template <typename stream_it_t, std::ranges::forward_range field_type>
282  void write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value);
283 
284  template <typename stream_it_t>
285  void write_range_or_asterisk(stream_it_t & stream_it, char const * const field_value);
286 
287  template <typename stream_it_t>
288  void write_tag_fields(stream_it_t & stream, sam_tag_dictionary const & tag_dict, char const separator);
289 };
290 
292 template <typename stream_type, // constraints checked by file
293  typename seq_legal_alph_type, bool seq_qual_combined,
294  typename seq_type, // other constraints checked inside function
295  typename id_type,
296  typename qual_type>
297 inline void format_sam::read_sequence_record(stream_type & stream,
299  seq_type & sequence,
300  id_type & id,
301  qual_type & qualities)
302 {
304 
305  if constexpr (seq_qual_combined)
306  {
307  tmp_qual.clear();
308  read_alignment_record(stream, align_options, std::ignore, default_header, sequence, tmp_qual, id,
309  std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
310  std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
311 
312  for (auto sit = tmp_qual.begin(), dit = std::ranges::begin(sequence); sit != tmp_qual.end(); ++sit, ++dit)
313  get<1>(*dit).assign_char(*sit);
314  }
315  else
316  {
317  read_alignment_record(stream, align_options, std::ignore, default_header, sequence, qualities, id,
318  std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
319  std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
320  }
321 
322  if constexpr (!detail::decays_to_ignore_v<seq_type>)
323  if (std::ranges::distance(sequence) == 0)
324  throw parse_error{"The sequence information must not be empty."};
325  if constexpr (!detail::decays_to_ignore_v<id_type>)
326  if (std::ranges::distance(id) == 0)
327  throw parse_error{"The id information must not be empty."};
328 
329  if (options.truncate_ids)
330  id = id | views::take_until_and_consume(is_space) | views::to<id_type>;
331 }
332 
334 template <typename stream_type, // constraints checked by file
335  typename seq_type, // other constraints checked inside function
336  typename id_type,
337  typename qual_type>
338 inline void format_sam::write_sequence_record(stream_type & stream,
339  sequence_file_output_options const & SEQAN3_DOXYGEN_ONLY(options),
340  seq_type && sequence,
341  id_type && id,
342  qual_type && qualities)
343 {
344  using default_align_t = std::pair<std::span<gapped<char>>, std::span<gapped<char>>>;
345  using default_mate_t = std::tuple<std::string_view, std::optional<int32_t>, int32_t>;
346 
347  sam_file_output_options output_options;
348 
349  write_alignment_record(stream,
350  output_options,
351  /*header*/ std::ignore,
352  /*seq*/ default_or(sequence),
353  /*qual*/ default_or(qualities),
354  /*id*/ default_or(id),
355  /*offset*/ 0,
356  /*ref_seq*/ std::string_view{},
357  /*ref_id*/ std::string_view{},
358  /*ref_offset*/ -1,
359  /*align*/ default_align_t{},
360  /*cigar_vector*/ std::vector<cigar>{},
361  /*flag*/ sam_flag::none,
362  /*mapq*/ 0,
363  /*mate*/ default_mate_t{},
364  /*tag_dict*/ sam_tag_dictionary{},
365  /*e_value*/ 0,
366  /*bit_score*/ 0);
367 }
368 
370 template <typename stream_type, // constraints checked by file
371  typename seq_legal_alph_type,
372  typename ref_seqs_type,
373  typename ref_ids_type,
374  typename seq_type,
375  typename id_type,
376  typename offset_type,
377  typename ref_seq_type,
378  typename ref_id_type,
379  typename ref_offset_type,
380  typename align_type,
381  typename cigar_type,
382  typename flag_type,
383  typename mapq_type,
384  typename qual_type,
385  typename mate_type,
386  typename tag_dict_type,
387  typename e_value_type,
388  typename bit_score_type>
389 inline void format_sam::read_alignment_record(stream_type & stream,
390  sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
391  ref_seqs_type & ref_seqs,
393  seq_type & seq,
394  qual_type & qual,
395  id_type & id,
396  offset_type & offset,
397  ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
398  ref_id_type & ref_id,
399  ref_offset_type & ref_offset,
400  align_type & align,
401  cigar_type & cigar_vector,
402  flag_type & flag,
403  mapq_type & mapq,
404  mate_type & mate,
405  tag_dict_type & tag_dict,
406  e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
407  bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
408 {
409  static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
410  detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
411  "The ref_offset must be a specialisation of std::optional.");
412 
413  auto stream_view = views::istreambuf(stream);
414  auto field_view = stream_view | views::take_until_or_throw_and_consume(is_char<'\t'>);
415 
416  // these variables need to be stored to compute the ALIGNMENT
417  int32_t ref_offset_tmp{};
418  std::ranges::range_value_t<decltype(header.ref_ids())> ref_id_tmp{};
419  [[maybe_unused]] int32_t offset_tmp{};
420  [[maybe_unused]] int32_t soft_clipping_end{};
421  [[maybe_unused]] std::vector<cigar> tmp_cigar_vector{};
422  [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query
423 
424  // Header
425  // -------------------------------------------------------------------------------------------------------------
426  if (is_char<'@'>(*std::ranges::begin(stream_view))) // we always read the header if present
427  {
428  read_header(stream_view, header, ref_seqs);
429 
430  if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // file has no records
431  return;
432  }
433 
434  // Fields 1-5: ID FLAG REF_ID REF_OFFSET MAPQ
435  // -------------------------------------------------------------------------------------------------------------
436  read_field(field_view, id);
437 
438  uint16_t flag_integral{};
439  read_field(field_view, flag_integral);
440  flag = sam_flag{flag_integral};
441 
442  read_field(field_view, ref_id_tmp);
443  check_and_assign_ref_id(ref_id, ref_id_tmp, header, ref_seqs);
444 
445  read_field(field_view, ref_offset_tmp);
446  --ref_offset_tmp; // SAM format is 1-based but SeqAn operates 0-based
447 
448  if (ref_offset_tmp == -1)
449  ref_offset = std::nullopt; // indicates an unmapped read -> ref_offset is not set
450  else if (ref_offset_tmp > -1)
451  ref_offset = ref_offset_tmp;
452  else if (ref_offset_tmp < -1)
453  throw format_error{"No negative values are allowed for field::ref_offset."};
454 
455  read_field(field_view, mapq);
456 
457  // Field 6: CIGAR
458  // -------------------------------------------------------------------------------------------------------------
459  if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
460  {
461  if (!is_char<'*'>(*std::ranges::begin(stream_view))) // no cigar information given
462  {
463  std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view);
464  transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
465  // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying
466  }
467  else
468  {
469  std::ranges::next(std::ranges::begin(field_view)); // skip '*'
470  }
471  }
472  else
473  {
474  detail::consume(field_view);
475  }
476 
477  offset = offset_tmp;
478 
479  // Field 7-9: (RNEXT PNEXT TLEN) = MATE
480  // -------------------------------------------------------------------------------------------------------------
481  if constexpr (!detail::decays_to_ignore_v<mate_type>)
482  {
483  std::ranges::range_value_t<decltype(header.ref_ids())> tmp_mate_ref_id{};
484  read_field(field_view, tmp_mate_ref_id); // RNEXT
485 
486  if (tmp_mate_ref_id == "=") // indicates "same as ref id"
487  {
488  if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
489  get<0>(mate) = ref_id;
490  else
491  check_and_assign_ref_id(get<0>(mate), ref_id_tmp, header, ref_seqs);
492  }
493  else
494  {
495  check_and_assign_ref_id(get<0>(mate), tmp_mate_ref_id, header, ref_seqs);
496  }
497 
498  int32_t tmp_pnext{};
499  read_field(field_view, tmp_pnext); // PNEXT
500 
501  if (tmp_pnext > 0)
502  get<1>(mate) = --tmp_pnext; // SAM format is 1-based but SeqAn operates 0-based.
503  else if (tmp_pnext < 0)
504  throw format_error{"No negative values are allowed at the mate mapping position."};
505  // tmp_pnext == 0 indicates an unmapped mate -> do not fill std::optional get<1>(mate)
506 
507  read_field(field_view, get<2>(mate)); // TLEN
508  }
509  else
510  {
511  for (size_t i = 0; i < 3u; ++i)
512  {
513  detail::consume(field_view);
514  }
515  }
516 
517  // Field 10: Sequence
518  // -------------------------------------------------------------------------------------------------------------
519  if (!is_char<'*'>(*std::ranges::begin(stream_view))) // sequence information is given
520  {
521  auto constexpr is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
522  auto seq_stream = field_view | std::views::transform([is_legal_alph] (char const c) // enforce legal alphabet
523  {
524  if (!is_legal_alph(c))
525  throw parse_error{std::string{"Encountered an unexpected letter: "} +
526  "char_is_valid_for<" +
527  detail::type_name_as_string<seq_legal_alph_type> +
528  "> evaluated to false on " +
530  return c;
531  });
532 
533  if constexpr (detail::decays_to_ignore_v<seq_type>)
534  {
535  if constexpr (!detail::decays_to_ignore_v<align_type>)
536  {
537  static_assert(sequence_container<std::remove_reference_t<decltype(get<1>(align))>>,
538  "If you want to read ALIGNMENT but not SEQ, the alignment"
539  " object must store a sequence container at the second (query) position.");
540 
541  if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given
542  {
543 
544  auto tmp_iter = std::ranges::begin(seq_stream);
545  std::ranges::advance(tmp_iter, offset_tmp);
546 
547  for (; seq_length > 0; --seq_length) // seq_length is not needed anymore
548  {
549  get<1>(align).push_back(std::ranges::range_value_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
550  ++tmp_iter;
551  }
552 
553  std::ranges::advance(tmp_iter, soft_clipping_end);
554  }
555  else
556  {
557  get<1>(align) = std::remove_reference_t<decltype(get<1>(align))>{}; // empty container
558  }
559  }
560  else
561  {
562  detail::consume(seq_stream);
563  }
564  }
565  else
566  {
567  read_field(seq_stream, seq);
568 
569  if constexpr (!detail::decays_to_ignore_v<align_type>)
570  {
571  if (!tmp_cigar_vector.empty()) // if no alignment info is given, the field::alignment should remain empty
572  {
573  assign_unaligned(get<1>(align),
574  seq | views::slice(static_cast<decltype(std::ranges::size(seq))>(offset_tmp),
575  std::ranges::size(seq) - soft_clipping_end));
576  }
577  }
578  }
579  }
580  else
581  {
582  std::ranges::next(std::ranges::begin(field_view)); // skip '*'
583  }
584 
585  // Field 11: Quality
586  // -------------------------------------------------------------------------------------------------------------
587  auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
588  read_field(stream_view | views::take_until_or_throw(tab_or_end), qual);
589 
590  if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
591  {
592  if (std::ranges::distance(seq) != 0 && std::ranges::distance(qual) != 0 &&
593  std::ranges::distance(seq) != std::ranges::distance(qual))
594  {
595  throw format_error{detail::to_string("Sequence length (", std::ranges::distance(seq),
596  ") and quality length (", std::ranges::distance(qual),
597  ") must be the same.")};
598  }
599  }
600 
601  // All remaining optional fields if any: SAM tags dictionary
602  // -------------------------------------------------------------------------------------------------------------
603  while (is_char<'\t'>(*std::ranges::begin(stream_view))) // read all tags if present
604  {
605  std::ranges::next(std::ranges::begin(stream_view)); // skip tab
606  read_field(stream_view | views::take_until_or_throw(tab_or_end), tag_dict);
607  }
608 
609  detail::consume(stream_view | views::take_until(!(is_char<'\r'> || is_char<'\n'>))); // consume new line
610 
611  // DONE READING - wrap up
612  // -------------------------------------------------------------------------------------------------------------
613  // Alignment object construction
614  // Note that the query sequence in get<1>(align) has already been filled while reading Field 10.
615  if constexpr (!detail::decays_to_ignore_v<align_type>)
616  {
617  int32_t ref_idx{(ref_id_tmp.empty()/*unmapped read?*/) ? -1 : 0};
618 
619  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
620  {
621  if (!ref_id_tmp.empty())
622  {
623  assert(header.ref_dict.count(ref_id_tmp) != 0); // taken care of in check_and_assign_ref_id()
624  ref_idx = header.ref_dict[ref_id_tmp]; // get index for reference sequence
625  }
626  }
627 
628  construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
629  }
630 
631  if constexpr (!detail::decays_to_ignore_v<cigar_type>)
632  std::swap(cigar_vector, tmp_cigar_vector);
633 }
634 
636 template <typename stream_type,
637  typename header_type,
638  typename seq_type,
639  typename id_type,
640  typename ref_seq_type,
641  typename ref_id_type,
642  typename align_type,
643  typename qual_type,
644  typename mate_type,
645  typename tag_dict_type,
646  typename e_value_type,
647  typename bit_score_type>
648 inline void format_sam::write_alignment_record(stream_type & stream,
649  sam_file_output_options const & options,
650  header_type && header,
651  seq_type && seq,
652  qual_type && qual,
653  id_type && id,
654  int32_t const offset,
655  ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
656  ref_id_type && ref_id,
657  std::optional<int32_t> ref_offset,
658  align_type && align,
659  std::vector<cigar> const & cigar_vector,
660  sam_flag const flag,
661  uint8_t const mapq,
662  mate_type && mate,
663  tag_dict_type && tag_dict,
664  e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
665  bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
666 {
667  /* Note the following general things:
668  *
669  * - Given the SAM specifications, all fields may be empty
670  *
671  * - arithmetic values default to 0 while all others default to '*'
672  *
673  * - Because of the former, arithmetic values can be directly streamed
674  * into 'stream' as operator<< is defined for all arithmetic types
675  * and the default value (0) is also the SAM default.
676  *
677  * - All other non-arithmetic values need to be checked for emptiness
678  */
679 
680  // ---------------------------------------------------------------------
681  // Type Requirements (as static asserts for user friendliness)
682  // ---------------------------------------------------------------------
683  static_assert((std::ranges::forward_range<seq_type> &&
684  alphabet<std::ranges::range_reference_t<seq_type>>),
685  "The seq object must be a std::ranges::forward_range over "
686  "letters that model seqan3::alphabet.");
687 
688  static_assert((std::ranges::forward_range<id_type> &&
689  alphabet<std::ranges::range_reference_t<id_type>>),
690  "The id object must be a std::ranges::forward_range over "
691  "letters that model seqan3::alphabet.");
692 
693  if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
694  {
695  static_assert((std::ranges::forward_range<ref_id_type> ||
696  std::integral<std::remove_reference_t<ref_id_type>> ||
697  detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>, std::optional>),
698  "The ref_id object must be a std::ranges::forward_range "
699  "over letters that model seqan3::alphabet.");
700 
701  if constexpr (std::integral<std::remove_cvref_t<ref_id_type>> ||
702  detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>, std::optional>)
703  static_assert(!detail::decays_to_ignore_v<header_type>,
704  "If you give indices as reference id information the header must also be present.");
705  }
706 
708  "The align object must be a std::pair of two ranges whose "
709  "value_type is comparable to seqan3::gap");
710 
711  static_assert((std::tuple_size_v<std::remove_cvref_t<align_type>> == 2 &&
712  std::equality_comparable_with<gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
713  std::equality_comparable_with<gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
714  "The align object must be a std::pair of two ranges whose "
715  "value_type is comparable to seqan3::gap");
716 
717  static_assert((std::ranges::forward_range<qual_type> &&
718  alphabet<std::ranges::range_reference_t<qual_type>>),
719  "The qual object must be a std::ranges::forward_range "
720  "over letters that model seqan3::alphabet.");
721 
723  "The mate object must be a std::tuple of size 3 with "
724  "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
725  "2) a std::integral or std::optional<std::integral>, and "
726  "3) a std::integral.");
727 
728  static_assert(((std::ranges::forward_range<decltype(std::get<0>(mate))> ||
729  std::integral<std::remove_cvref_t<decltype(std::get<0>(mate))>> ||
730  detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<0>(mate))>, std::optional>) &&
731  (std::integral<std::remove_cvref_t<decltype(std::get<1>(mate))>> ||
732  detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<1>(mate))>, std::optional>) &&
733  std::integral<std::remove_cvref_t<decltype(std::get<2>(mate))>>),
734  "The mate object must be a std::tuple of size 3 with "
735  "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
736  "2) a std::integral or std::optional<std::integral>, and "
737  "3) a std::integral.");
738 
739  if constexpr (std::integral<std::remove_cvref_t<decltype(std::get<0>(mate))>> ||
740  detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<0>(mate))>, std::optional>)
741  static_assert(!detail::decays_to_ignore_v<header_type>,
742  "If you give indices as mate reference id information the header must also be present.");
743 
744  static_assert(std::same_as<std::remove_cvref_t<tag_dict_type>, sam_tag_dictionary>,
745  "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
746 
747  // ---------------------------------------------------------------------
748  // logical Requirements
749  // ---------------------------------------------------------------------
750  if constexpr (!detail::decays_to_ignore_v<header_type> &&
751  !detail::decays_to_ignore_v<ref_id_type> &&
752  !std::integral<std::remove_reference_t<ref_id_type>> &&
753  !detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>, std::optional>)
754  {
755 
756  if (options.sam_require_header && !std::ranges::empty(ref_id))
757  {
758  auto id_it = header.ref_dict.end();
759 
760  if constexpr (std::ranges::contiguous_range<decltype(ref_id)> &&
761  std::ranges::sized_range<decltype(ref_id)> &&
762  std::ranges::borrowed_range<decltype(ref_id)>)
763  {
764  id_it = header.ref_dict.find(std::span{std::ranges::data(ref_id), std::ranges::size(ref_id)});
765  }
766  else
767  {
768  using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;
769 
771  "The ref_id type is not convertible to the reference id information stored in the "
772  "reference dictionary of the header object.");
773 
774  id_it = header.ref_dict.find(ref_id);
775  }
776 
777  if (id_it == header.ref_dict.end()) // no reference id matched
778  throw format_error{detail::to_string("The ref_id '", ref_id, "' was not in the list of references:",
779  header.ref_ids())};
780  }
781  }
782 
783  if (ref_offset.has_value() && (ref_offset.value() + 1) < 0)
784  throw format_error{"The ref_offset object must be an std::integral >= 0."};
785 
786  // ---------------------------------------------------------------------
787  // Writing the Header on first call
788  // ---------------------------------------------------------------------
789  if constexpr (!detail::decays_to_ignore_v<header_type>)
790  {
791  if (options.sam_require_header && !header_was_written)
792  {
793  write_header(stream, options, header);
794  header_was_written = true;
795  }
796  }
797 
798  // ---------------------------------------------------------------------
799  // Writing the Record
800  // ---------------------------------------------------------------------
801 
802  detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
803  constexpr char separator{'\t'};
804 
805  write_range_or_asterisk(stream_it, id);
806  *stream_it = separator;
807 
808  stream_it.write_number(static_cast<uint16_t>(flag));
809  *stream_it = separator;
810 
811  if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
812  {
813  if constexpr (std::integral<std::remove_reference_t<ref_id_type>>)
814  {
815  write_range_or_asterisk(stream_it, (header.ref_ids())[ref_id]);
816  }
817  else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>, std::optional>)
818  {
819  if (ref_id.has_value())
820  write_range_or_asterisk(stream_it, (header.ref_ids())[ref_id.value()]);
821  else
822  *stream_it = '*';
823  }
824  else
825  {
826  write_range_or_asterisk(stream_it, ref_id);
827  }
828  }
829  else
830  {
831  *stream_it = '*';
832  }
833 
834  *stream_it = separator;
835 
836  // SAM is 1 based, 0 indicates unmapped read if optional is not set
837  stream_it.write_number(ref_offset.value_or(-1) + 1);
838  *stream_it = separator;
839 
840  stream_it.write_number(static_cast<unsigned>(mapq));
841  *stream_it = separator;
842 
843  if (!std::ranges::empty(cigar_vector))
844  {
845  for (auto & c : cigar_vector) //TODO THIS IS PROBABLY TERRIBLE PERFORMANCE_WISE
846  stream_it.write_range(c.to_string());
847  }
848  else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
849  {
850  // compute possible distance from alignment end to sequence end
851  // which indicates soft clipping at the end.
852  // This should be replace by a free count_gaps function for
853  // aligned sequences which is more efficient if possible.
854  size_t off_end{std::ranges::size(seq) - offset};
855  for (auto chr : get<1>(align))
856  if (chr == gap{})
857  ++off_end;
858 
859  // Might happen if get<1>(align) doesn't correspond to the reference.
860  assert(off_end >= std::ranges::size(get<1>(align)));
861  off_end -= std::ranges::size(get<1>(align));
862 
863  write_range_or_asterisk(stream_it, detail::get_cigar_string(align, offset, off_end));
864  }
865  else
866  {
867  *stream_it = '*';
868  }
869 
870  *stream_it = separator;
871 
872  if constexpr (std::integral<std::remove_reference_t<decltype(get<0>(mate))>>)
873  {
874  write_range_or_asterisk(stream_it, (header.ref_ids())[get<0>(mate)]);
875  }
876  else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<decltype(get<0>(mate))>, std::optional>)
877  {
878  if (get<0>(mate).has_value())
879  // value_or(0) instead of value() (which is equivalent here) as a
880  // workaround for a ubsan false-positive in GCC8: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90058
881  write_range_or_asterisk(stream_it, header.ref_ids()[get<0>(mate).value_or(0)]);
882  else
883  *stream_it = '*';
884  }
885  else
886  {
887  write_range_or_asterisk(stream_it, get<0>(mate));
888  }
889 
890  *stream_it = separator;
891 
892  if constexpr (detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(get<1>(mate))>, std::optional>)
893  {
894  // SAM is 1 based, 0 indicates unmapped read if optional is not set
895  stream_it.write_number(get<1>(mate).value_or(-1) + 1);
896  *stream_it = separator;
897  }
898  else
899  {
900  stream_it.write_number(get<1>(mate));
901  *stream_it = separator;
902  }
903 
904  stream_it.write_number(get<2>(mate));
905  *stream_it = separator;
906 
907  write_range_or_asterisk(stream_it, seq);
908  *stream_it = separator;
909 
910  write_range_or_asterisk(stream_it, qual);
911 
912  write_tag_fields(stream_it, tag_dict, separator);
913 
914  stream_it.write_end_of_line(options.add_carriage_return);
915 }
916 
917 
935 template <typename stream_view_type, typename value_type>
937  stream_view_type && stream_view,
938  value_type value)
939 {
940  std::vector<value_type> tmp_vector;
941  while (std::ranges::begin(stream_view) != ranges::end(stream_view)) // not fully consumed yet
942  {
943  read_field(stream_view | views::take_until(is_char<','>), value);
944  tmp_vector.push_back(value);
945 
946  if (is_char<','>(*std::ranges::begin(stream_view)))
947  std::ranges::next(std::ranges::begin(stream_view)); // skip ','
948  }
949  variant = std::move(tmp_vector);
950 }
951 
965 template <typename stream_view_type>
967  stream_view_type && stream_view)
968 {
969  std::vector<std::byte> tmp_vector;
970  std::byte value;
971 
972  while (std::ranges::begin(stream_view) != ranges::end(stream_view)) // not fully consumed yet
973  {
974  try
975  {
976  read_field(stream_view | views::take_exactly_or_throw(2), value);
977  }
978  catch (std::exception const & e)
979  {
980  throw format_error{"Hexadecimal tag has an uneven number of digits!"};
981  }
982 
983  tmp_vector.push_back(value);
984  }
985 
986  variant = std::move(tmp_vector);
987 }
988 
1006 template <typename stream_view_type>
1007 inline void format_sam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
1008 {
1009  /* Every SAM tag has the format "[TAG]:[TYPE_ID]:[VALUE]", where TAG is a two letter
1010  name tag which is converted to a unique integer identifier and TYPE_ID is one character in [A,i,Z,H,B,f]
1011  describing the type for the upcoming VALUES. If TYPE_ID=='B' it signals an array of comma separated
1012  VALUE's and the inner value type is identified by the character following ':', one of [cCsSiIf].
1013  */
1014  uint16_t tag = static_cast<uint16_t>(*std::ranges::begin(stream_view)) << 8;
1015  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
1016  tag += static_cast<uint16_t>(*std::ranges::begin(stream_view));
1017  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
1018  std::ranges::next(std::ranges::begin(stream_view)); // skip ':'
1019  char type_id = *std::ranges::begin(stream_view);
1020  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
1021  std::ranges::next(std::ranges::begin(stream_view)); // skip ':'
1022 
1023  switch (type_id)
1024  {
1025  case 'A' : // char
1026  {
1027  target[tag] = static_cast<char>(*std::ranges::begin(stream_view));
1028  std::ranges::next(std::ranges::begin(stream_view)); // skip char that has been read
1029  break;
1030  }
1031  case 'i' : // int32_t
1032  {
1033  int32_t tmp;
1034  read_field(stream_view, tmp);
1035  target[tag] = tmp;
1036  break;
1037  }
1038  case 'f' : // float
1039  {
1040  float tmp;
1041  read_field(stream_view, tmp);
1042  target[tag] = tmp;
1043  break;
1044  }
1045  case 'Z' : // string
1046  {
1047  target[tag] = stream_view | views::to<std::string>;
1048  break;
1049  }
1050  case 'H' :
1051  {
1052  read_sam_byte_vector(target[tag], stream_view);
1053  break;
1054  }
1055  case 'B' : // Array. Value type depends on second char [cCsSiIf]
1056  {
1057  char array_value_type_id = *std::ranges::begin(stream_view);
1058  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
1059  std::ranges::next(std::ranges::begin(stream_view)); // skip first ','
1060 
1061  switch (array_value_type_id)
1062  {
1063  case 'c' : // int8_t
1064  read_sam_dict_vector(target[tag], stream_view, int8_t{});
1065  break;
1066  case 'C' : // uint8_t
1067  read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1068  break;
1069  case 's' : // int16_t
1070  read_sam_dict_vector(target[tag], stream_view, int16_t{});
1071  break;
1072  case 'S' : // uint16_t
1073  read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1074  break;
1075  case 'i' : // int32_t
1076  read_sam_dict_vector(target[tag], stream_view, int32_t{});
1077  break;
1078  case 'I' : // uint32_t
1079  read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1080  break;
1081  case 'f' : // float
1082  read_sam_dict_vector(target[tag], stream_view, float{});
1083  break;
1084  default:
1085  throw format_error{std::string("The first character in the numerical ") +
1086  "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1087  "' was given."};
1088  }
1089  break;
1090  }
1091  default:
1092  throw format_error{std::string("The second character in the numerical id of a "
1093  "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id + "' was given."};
1094  }
1095 }
1096 
1104 template <typename stream_it_t, std::ranges::forward_range field_type>
1105 inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value)
1106 {
1107  if (std::ranges::empty(field_value))
1108  {
1109  *stream_it = '*';
1110  }
1111  else
1112  {
1113  if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<field_type>>, char>)
1114  stream_it.write_range(field_value);
1115  else // convert from alphabets to their character representation
1116  stream_it.write_range(field_value | views::to_char);
1117  }
1118 }
1119 
1126 template <typename stream_it_t>
1127 inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, char const * const field_value)
1128 {
1129  write_range_or_asterisk(stream_it, std::string_view{field_value});
1130 }
1131 
1139 template <typename stream_it_t>
1140 inline void format_sam::write_tag_fields(stream_it_t & stream_it, sam_tag_dictionary const & tag_dict, char const separator)
1141 {
1142  auto const stream_variant_fn = [&stream_it] (auto && arg) // helper to print an std::variant
1143  {
1144  using T = std::remove_cvref_t<decltype(arg)>;
1145 
1146  if constexpr (std::ranges::input_range<T>)
1147  {
1148  if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>, char>)
1149  {
1150  stream_it.write_range(arg);
1151  }
1152  else if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>, std::byte>)
1153  {
1154  if (!std::ranges::empty(arg))
1155  {
1156  stream_it.write_number(std::to_integer<uint8_t>(*std::ranges::begin(arg)));
1157 
1158  for (auto && elem : arg | views::drop(1))
1159  {
1160  *stream_it = ',';
1161  stream_it.write_number(std::to_integer<uint8_t>(elem));
1162  }
1163  }
1164  }
1165  else
1166  {
1167  if (!std::ranges::empty(arg))
1168  {
1169  stream_it.write_number(*std::ranges::begin(arg));
1170 
1171  for (auto && elem : arg | views::drop(1))
1172  {
1173  *stream_it = ',';
1174  stream_it.write_number(elem);
1175  }
1176  }
1177  }
1178  }
1179  else if constexpr (std::same_as<std::remove_cvref_t<T>, char>)
1180  {
1181  *stream_it = arg;
1182  }
1183  else // number
1184  {
1185  stream_it.write_number(arg);
1186  }
1187  };
1188 
1189  for (auto & [tag, variant] : tag_dict)
1190  {
1191  *stream_it = separator;
1192 
1193  char const char0 = tag / 256;
1194  char const char1 = tag % 256;
1195 
1196  *stream_it = char0;
1197  *stream_it = char1;
1198  *stream_it = ':';
1199  *stream_it = detail::sam_tag_type_char[variant.index()];
1200  *stream_it = ':';
1201 
1202  if (detail::sam_tag_type_char_extra[variant.index()] != '\0')
1203  {
1204  *stream_it = detail::sam_tag_type_char_extra[variant.index()];
1205  *stream_it = ',';
1206  }
1207 
1208  std::visit(stream_variant_fn, variant);
1209  }
1210 }
1211 
1212 } // namespace seqan3
Adaptations of algorithms from the Ranges TS.
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
Provides seqan3::views::char_to.
Functionally the same as std::ostreambuf_iterator, but offers writing a range more efficiently.
Definition: fast_ostreambuf_iterator.hpp:39
The alignment base format.
Definition: format_sam_base.hpp:62
void check_and_assign_ref_id(ref_id_type &ref_id, ref_id_tmp_type &ref_id_tmp, header_type &header, ref_seqs_type &)
Checks for known reference ids or adds a new reference is and assigns a reference id to ref_id.
Definition: format_sam_base.hpp:147
void write_header(stream_t &stream, sam_file_output_options const &options, sam_file_header< ref_ids_type > &header)
Writes the SAM header.
Definition: format_sam_base.hpp:637
void transfer_soft_clipping_to(std::vector< cigar > const &cigar_vector, int32_t &sc_begin, int32_t &sc_end) const
Transfer soft clipping information from the cigar_vector to sc_begin and sc_end.
Definition: format_sam_base.hpp:189
bool header_was_written
A variable that tracks whether the content of header has been written or not.
Definition: format_sam_base.hpp:83
void read_header(stream_view_type &&stream_view, sam_file_header< ref_ids_type > &hdr, ref_seqs_type &)
Reads the SAM header.
Definition: format_sam_base.hpp:419
void construct_alignment(align_type &align, std::vector< cigar > &cigar_vector, [[maybe_unused]] int32_t rid, [[maybe_unused]] ref_seqs_type &ref_seqs, [[maybe_unused]] int32_t ref_start, size_t ref_length)
Construct the field::alignment depending on the given information.
Definition: format_sam_base.hpp:231
The SAM format (tag).
Definition: format_sam.hpp:128
sam_file_header default_header
The default header for the alignment format.
Definition: format_sam.hpp:249
format_sam & operator=(format_sam const &)=default
Defaulted.
void read_sequence_record(stream_type &stream, sequence_file_input_options< seq_legal_alph_type, seq_qual_combined > const &options, seq_type &sequence, id_type &id, qual_type &qualities)
Read from the specified stream and back-insert into the given field buffers.
Definition: format_sam.hpp:297
~format_sam()=default
Defaulted.
format_sam & operator=(format_sam &&)=default
Defaulted.
void read_sam_byte_vector(seqan3::detail::sam_tag_variant &variant, stream_view_type &&stream_view)
Reads a list of byte pairs as it is the case for SAM tag byte arrays.
Definition: format_sam.hpp:966
void read_alignment_record(stream_type &stream, sam_file_input_options< seq_legal_alph_type > const &options, ref_seqs_type &ref_seqs, sam_file_header< ref_ids_type > &header, seq_type &seq, qual_type &qual, id_type &id, offset_type &offset, ref_seq_type &ref_seq, ref_id_type &ref_id, ref_offset_type &ref_offset, align_type &align, cigar_type &cigar_vector, flag_type &flag, mapq_type &mapq, mate_type &mate, tag_dict_type &tag_dict, e_value_type &e_value, bit_score_type &bit_score)
Read from the specified stream and back-insert into the given field buffers.
Definition: format_sam.hpp:389
static std::vector< std::string > file_extensions
The valid file extensions for this format; note that you can modify this value.
Definition: format_sam.hpp:145
std::string_view const & default_or(detail::ignore_t) const noexcept
brief Returns a reference to dummy if passed a std::ignore.
Definition: format_sam.hpp:255
void write_sequence_record(stream_type &stream, sequence_file_output_options const &options, seq_type &&sequence, id_type &&id, qual_type &&qualities)
Write the given fields to the specified stream.
Definition: format_sam.hpp:338
void read_field(stream_view_type &&stream_view, sam_tag_dictionary &target)
Reads the optional tag fields into the seqan3::sam_tag_dictionary.
Definition: format_sam.hpp:1007
format_sam(format_sam &&)=default
Defaulted.
void write_tag_fields(stream_it_t &stream, sam_tag_dictionary const &tag_dict, char const separator)
Writes the optional fields of the seqan3::sam_tag_dictionary.
Definition: format_sam.hpp:1140
void read_sam_dict_vector(seqan3::detail::sam_tag_variant &variant, stream_view_type &&stream_view, value_type value)
Reads a list of values separated by comma as it is the case for SAM tag arrays.
Definition: format_sam.hpp:936
static constexpr std::string_view dummy
An empty dummy container to pass to align_format.write() such that an empty field is written.
Definition: format_sam.hpp:246
void write_range_or_asterisk(stream_it_t &stream_it, field_type &&field_value)
Writes a field value to the stream.
Definition: format_sam.hpp:1105
bool ref_info_present_in_header
Tracks whether reference information (@SR tag) were found in the SAM header.
Definition: format_sam.hpp:252
std::string tmp_qual
Stores quality values temporarily if seq and qual information are combined (not supported by SAM yet)...
Definition: format_sam.hpp:243
void write_alignment_record(stream_type &stream, sam_file_output_options const &options, header_type &&header, seq_type &&seq, qual_type &&qual, id_type &&id, int32_t const offset, ref_seq_type &&ref_seq, ref_id_type &&ref_id, std::optional< int32_t > ref_offset, align_type &&align, std::vector< cigar > const &cigar_vector, sam_flag const flag, uint8_t const mapq, mate_type &&mate, tag_dict_type &&tag_dict, e_value_type &&e_value, bit_score_type &&bit_score)
Write the given fields to the specified stream.
Definition: format_sam.hpp:648
format_sam()=default
Defaulted.
format_sam(format_sam const &)=default
Defaulted.
The alphabet of a gap character '-'.
Definition: gap.hpp:39
Stores the header information of alignment files.
Definition: header.hpp:33
std::unordered_map< key_type, int32_t, std::hash< key_type >, detail::view_equality_fn > ref_dict
The mapping of reference id to position in the ref_ids() range and the ref_id_info range.
Definition: header.hpp:158
ref_ids_type & ref_ids()
The range of reference ids.
Definition: header.hpp:119
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:326
T clear(T... args)
The Concepts library.
Provides various transformation traits used by the range module.
Auxiliary for pretty printing of exception messages.
Provides type traits for working with templates.
T end(T... args)
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
Provides seqan3::detail::fast_ostreambuf_iterator.
std::string make_printable(char const c)
Returns a printable value for the given character c.
Definition: pretty_print.hpp:48
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:144
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:73
std::string get_cigar_string(std::vector< cigar > const &cigar_vector)
Transforms a vector of cigar elements into a string representation.
Definition: cigar.hpp:263
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr void consume(rng_t &&rng)
Iterate over a range (consumes single-pass input ranges).
Definition: misc.hpp:28
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:434
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:150
constexpr auto drop
A view adaptor that returns all elements after n from the underlying range (or an empty range if the ...
Definition: drop.hpp:172
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition: to_char.hpp:65
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:145
constexpr auto take_until_or_throw
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:614
constexpr auto istreambuf
A view factory that returns a view over the stream buffer of an input stream.
Definition: istreambuf.hpp:114
constexpr auto take_until
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:600
constexpr auto take_exactly_or_throw
A view adaptor that returns the first size elements from the underlying range and also exposes size i...
Definition: take_exactly.hpp:91
constexpr auto take_until_or_throw_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:642
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70
constexpr auto take_until_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:628
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
The generic alphabet concept that covers most data types used in ranges.
Resolves to std::ranges::implicitly_convertible_to<type1, type2>(). <dl class="no-api">This entity i...
A more refined container concept than seqan3::container.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Provides various utility functions.
Auxiliary functions for the alignment IO.
Provides seqan3::views::istreambuf.
std::tuple< std::vector< cigar >, int32_t, int32_t > parse_cigar(cigar_input_type &&cigar_input)
Parses a cigar string into a vector of operation-count pairs (e.g. (M, 3)).
Definition: cigar.hpp:134
constexpr char sam_tag_type_char_extra[12]
Each types SAM tag type extra char id. Index corresponds to the seqan3::detail::sam_tag_variant types...
Definition: sam_tag_dictionary.hpp:38
constexpr char sam_tag_type_char[12]
Each SAM tag type char identifier. Index corresponds to the seqan3::detail::sam_tag_variant types.
Definition: sam_tag_dictionary.hpp:36
std::string to_string(value_type &&...values)
Streams all parameters via the seqan3::debug_stream and returns a concatenated string.
Definition: to_string.hpp:29
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
T push_back(T... args)
Provides various utility functions.
Adaptations of concepts from the Ranges TS.
Provides the seqan3::format_sam_base that can be inherited from.
Provides the seqan3::sam_file_header class.
Provides seqan3::sam_file_input_format and auxiliary classes.
Provides seqan3::sam_file_input_options.
Provides seqan3::sam_file_output_format and auxiliary classes.
Provides seqan3::sam_file_output_options.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides helper data structures for the seqan3::sam_file_output.
Provides seqan3::sequence_file_input_format and auxiliary classes.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
Thrown if information given to output format didn't match expectations.
Definition: exception.hpp:88
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: exception.hpp:48
The options type defines various option members that influence the behaviour of all or some formats.
Definition: input_options.hpp:24
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:23
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:27
bool sam_require_header
Whether to require a header for SAM files.
Definition: output_options.hpp:41
The options type defines various option members that influence the behaviour of all or some formats.
Definition: input_options.hpp:30
bool truncate_ids
Read the ID string only up until the first whitespace character.
Definition: input_options.hpp:32
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:22
Exposes the value_type of another type.
Definition: pre.hpp:58
T swap(T... args)
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
T tie(T... args)
Provides seqan3::views::to.
Provides seqan3::views::to_char.
T tuple_size_v
Provides traits to inspect some information of a type, for example its name.
Provides character predicates for tokenisation.
Provides seqan3::tuple_like.
T visit(T... args)