<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273:0</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>doi-10-17044-scilifelab-28673273</r:ID>
  <r:Version>0</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Krzysztof</a:FirstGiven>
            <a:LastFamily>Jurdzinski</a:LastFamily>
            <a:FullName>
              <r:String>Krzysztof Jurdzinski</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0001-9544-5755</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Meike</a:FirstGiven>
            <a:LastFamily>Latz</a:LastFamily>
            <a:FullName>
              <r:String>Meike Latz</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-6583-9291</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Bengt</a:FirstGiven>
            <a:LastFamily>Karlson</a:LastFamily>
            <a:FullName>
              <r:String>Bengt Karlson</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-7524-3504</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Anders</a:FirstGiven>
            <a:LastFamily>Andersson</a:LastFamily>
            <a:FullName>
              <r:String>Anders Andersson</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-3627-6899</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">doi-10-17044-scilifelab-28673273</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="en">DNA-based monitoring of bacterial and protist diversity in the Baltic Sea</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Kungliga Tekniska högskolan</r:String>
          <r:String xml:lang="en">Royal Institute of Technology</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Kungliga Tekniska högskolan</r:String>
          <r:String xml:lang="en">Royal Institute of Technology</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2025-04-11</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.17044/SCILIFELAB.28673273</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="en">Here we share the code, the sequencing processing output, and the intermediate data files for the work on bacterial and protist diversity patterns in the Baltic Sea area based on 16S and 18S metabarcoding as implemented two times for a year alongside the Swedish coastline monitoring programme. This work is available as a preprint:

Distinct bacterial and protist plankton diversity dynamics uncovered through DNA-based monitoring in the Baltic Sea area, Krzysztof T Jurdzinski, Meike AC Latz, Anders Torstensson, Sonia Brugel, Mikael Hedblom, Yue O O Hu, Markus Lindh, Agneta Andersson, Bengt Karlson, Anders F Andersson, bioRxiv 2024.08.14.607742; doi: https://doi.org/10.1101/2024.08.14.607742 

Documentation files:

README.md - description of the files, including all the files within the zipped folders.
environment.yml - conda environment with software/packages needed to run all the included scripts.
workflow.sh - a bash script defining the workflow.

Zipped folders with data processing documentation and intermediate files

ampliseq_16S.zip - this directory includes the scripts used to run the nf-core/ampliseq (https://nf-co.re/ampliseq/2.7.0/)  pipeline on the V3-V4 16S metabarcoding samples, as well as output files needed for downstream analysis.

ampliseq_18S.zip - same as ampliseq_16S.zip, but for the the V4 18S metabarcoding.

taxa_reannotation.zip - each subdirectory contains results of taxonomic re-annotation of the metabarcoding results and the scripts to obtain them. Both 2015-2017 and 2019-2020 datasets were re-annotated with the GTDB corrected for mislabled sequences using SATIVA and with PR2 version 5.0.0 for 16S and 18S respectively. Both 16S datasets were re-annotated using the SILVA database (version 138.1). 

data_2015_2017.zip -these files correspond to the data for the samples from 2015 to 2017 (+ storage test for some 2019 samples). This is new data, later down the pipeline merged with the 2019-2020 dataset.

merged_data.zip - this folder contains merged across the 2015-2017 and the 2019-2020 datasets, based on the files from folders data_2015_2017 and data_2019_2020-

GSHHG.zip - Global Self-consistent, Hierarchical, High-resolution Geography Database (GSHHG) version 2.3.7 file needed to plot maps, as downloaded from the NOAA website (https://www.ngdc.noaa.gov/mgg/shorelines/gshhs.html) .

Herlemann_et_al_2016.zip - data from the transect-based study by Herlemann et al., 2016 (https://doi.org/10.3389/fmicb.2016.01883) .

read_downsampling.zip - This folder includes the scripts used to rarefy raw reads and the key output files. It is all based on 16S data.

freshwater_marine_matching.zip - this folder includes files and code used for matching the ASVs from this study to database freshwater and marine sequences

Zipped folders with key R scripts

processing_code.zip - R scripts used for multiple steps of intermediate data table processing.

analysis_figures_code.zip - R scripts used to analyze the data and generate the figures.</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medical and Health Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medicin och hälsovetenskap</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="106" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Biological Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="106" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Biologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ecology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ekologi</r:Subject>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-28673273.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary"></a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>0</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>