<codeBook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xsi:schemaLocation="ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" xmlns="ddi:codebook:2_5">
  <docDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="sv"></titl>
        <parTitl xml:lang="en">Metagenomic NrdJm5 sequences placed in full phylogeny</parTitl>
        <IDNo agency="SND">doi-10-17045-sthlmuni-7642343-0</IDNo>
        <IDNo agency="DOI">https://doi.org/10.17045/STHLMUNI.7642343</IDNo>
      </titlStmt>
      <prodStmt>
        <producer xml:lang="en" abbr="SND">Swedish National Data Service</producer>
        <producer xml:lang="sv" abbr="SND">Svensk nationell datatjänst</producer>
      </prodStmt>
      <holdings URI="https://doi.org/10.17045/STHLMUNI.7642343">Landing page</holdings>
    </citation>
  </docDscr>
  <stdyDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="sv"></titl>
        <parTitl xml:lang="en">Metagenomic NrdJm5 sequences placed in full phylogeny</parTitl>
        <IDNo agency="SND">doi-10-17045-sthlmuni-7642343-0</IDNo>
        <IDNo agency="DOI">https://doi.org/10.17045/STHLMUNI.7642343</IDNo>
      </titlStmt>
      <rspStmt />
      <prodStmt />
      <distStmt>
        <distrbtr xml:lang="en" abbr="SND" URI="https://snd.se">Swedish National Data Service</distrbtr>
        <distrbtr xml:lang="sv" abbr="SND" URI="https://snd.se">Svensk nationell datatjänst</distrbtr>
        <distDate xml:lang="en" date="2019-01-29" />
      </distStmt>
      <verStmt>
        <version elementVersion="0" elementVersionDate="2019-01-29" />
      </verStmt>
      <holdings URI="https://doi.org/10.17045/STHLMUNI.7642343">Landing page</holdings>
    </citation>
    <stdyInfo>
      <subject />
      <abstract xml:lang="en" contentType="abstract">To search for sequences from metagenomics projects, we downloaded all TARA Ocean ORFs (Eren 2017, https://doi.org/10.6084/m9.figshare.4902917.v1 ; Delmont 2018, https://doi.org/10.1038/s41564-018-0176-9), all ORFs from the Human Microbiome Project (2019-01-09; HMP 2012a, https://doi.org/10.1038/nature11234 ; HMP 2012b, https://doi.org/10.1038/nature11209 ) the majority of bacterial MAGs and SAGs from IMG/MER (4910 MAGs, 2230 SAGs) plus 53 aquatic and soil metagenomes, in particular those with project names containing “virus”, “phage”, “therm” or “hot“ (see img_sags.tsv, img_metag_samples.tsv and img_mags.tsv) (Markowitz 2008, https://doi.org/10.1093/nar/gkm869). Together, we downloaded a total of 250,881,638 ORFs. We used hmm profiles designed for each clan in the phylogeny to search the sequences. We found 181 sequences with a best match to the profile designed from the TV clan. These were aligned to the original alignment using Clustal Omega in profile mode (all.NrdJm5.co.profile.wa.masked.alnfaa ; Sievers 2014, https://doi.org/10.1038/msb.2011.75) and phylogenetically placed in the phylogeny from https://doi.org/10.17045/sthlmuni.7117430.v2  with RAxML (Stamatakis 2014, https://doi.org/10.1093/bioinformatics/btu033). The resulting tree can be viewed with Dendroscope (Huson et al. 2007, https://doi.org/10.1186/1471-2105-8-460); placed sequences have "QUERY" prepended to their names.</abstract>
      <sumDscr />
    </stdyInfo>
    <method>
      <dataColl />
    </method>
    <dataAccs>
      <useStmt>
        <restrctn xml:lang="en">Access to data through an external actor. </restrctn>
        <restrctn xml:lang="sv">Åtkomst till data via extern aktör. </restrctn>
      </useStmt>
    </dataAccs>
    <othrStdyMat />
  </stdyDscr>
</codeBook>