<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410:0</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>doi-10-17044-scilifelab-22817410</r:ID>
  <r:Version>0</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Andreas</a:FirstGiven>
            <a:LastFamily>Wallberg</a:LastFamily>
            <a:FullName>
              <r:String>Andreas Wallberg</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-9081-9663</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Per</a:FirstGiven>
            <a:LastFamily>Unneberg</a:LastFamily>
            <a:FullName>
              <r:String>Per Unneberg</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">doi-10-17044-scilifelab-22817410</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="en">7. Ecological genomics of the Northern krill: Genome-scale comparisons of adaptive divergence</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2024-03-27</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.17044/SCILIFELAB.22817410</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="en">This item holds multiple tar archives with genome-scale comparisons of divergence between Northern krill populations, including estimated allele-frequencies and divergence (e.g. FST) , and extended haplotype signatures (XP-nSL estimates). Many analyses were performed in "chunks" (160 in total across both gene-rich and gene-poor sequences), which are described in a previous item.

Population definitions

Population definitions are the same as desribed in a different item:


  - "at vs. me" = Atlantic Ocean samples (n=67) vs. the Mediterranean (i.e. Barcelona) samples (n=7).

  - "we vs. ea" = South-West North Atlantic Ocean (n=20) vs. North-East North Atlantic Ocean (n=47). In files using this contrast, sometimes the label "wa" is used instead of "we" for the South-West North Atlantic Ocean samples.



Contents:


  - allele_freqs_fst.gene_rich_sequences.at_vs_me.tar, contains per-SNP estimates of allele frequencies and FST between "at" and "me" groups along gene-rich sequences.

  - allele_freqs_fst.gene_rich_sequences.we_vs_ea.tar, as above but between "we" and "ea" groups.

  - allele_freqs_fst.gene_poor_sequences.at_vs_me.tar, contains per-SNP estimates of allele frequencies and FST between "at" and "me" groups along gene-poor sequences.

  - allele_freqs_fst.gene_poor_sequences.we_vs_ea.tar, as above but for "we" and "ea" groups.

  - allele_freqs_fst.merged_sequences.at_vs_me.csv.gz, contains per-SNP estimates of allele frequencies and FST between "at" and "me" merged into a single TSV file.

  - allele_freqs_fst.merged_sequences.we_vs_ea.csv.gz, as above but for "we" and "ea".

  - allele_freqs_fst.gene_rich_sequences_windows.at_vs_me.tar.gz, contains per-window estimates of FST between "at" and "me" groups along gene-rich sequences.

  - allele_freqs_fst.gene_rich_sequences_windows.we_vs_ea.tar.gz, as above but for "we" and "ea" groups.

  - allele_freqs_fst.gene_poor_sequences_windows.at_vs_me.tar.gz, contains per-window estimates of FST between "at" and "me" groups along gene-poor sequences.

  - allele_freqs_fst.gene_poor_sequences_windows.we_vs_ea.tar.gz, as above but for "we" and "ea" groups.

  - selscan_xpnsl.gene_rich_sequences.tar.gz, contains per-SNP cross-population XP-nSL statistics for gene-rich sequences.

  - selscan_xpnsl.gene_poor_sequences.tar.gz, contains per-SNP cross-population XP-nSL statistics for gene-poor sequences.

  - selscan_xpnsl.gene_rich_sequences_windows.tar.gz, contains per-window cross-population XP-nSL statistics for gene-rich sequences.

  - selscan_xpnsl.gene_poor_sequences_windows.tar.gz, as above but for gene-poor sequences.

  - fst_vs_xpnsl.per_snp.at_vs_me.csv.gz, contains per-SNP FST, genomic region and XP-nSL values in a single file for the "at vs. me" contrast.

  - fst_vs_xpnsl.per_snp.we_vs_ea.csv.gz, contains per-SNP FST, genomic region and XP-nSL values in a single file for the "we vs. ea" contrast.

  - fst_vs_xpnsl_vs_diversity_vs_regions.merged_sequences.at_vs_me.tsv.tar.gz, integrates window-based statistics into a single file for the "at vs. me" contrast.

  - fst_vs_xpnsl_vs_diversity_vs_regions.merged_sequences.we_vs_ea.tsv.tar.gz, as above but for the "we vs. ea" contrast.



allele_freqs_fst.gene_(rich|poor)_sequences.(at_vs_me|we_vs_ea).tar

The TSV files in these archives contain per-SNP estimates of allele frequencies and FST, along with SNP annotations. There are nine main fields/columns with overlapping/redundant information to accommodate flexible parsing. Large fields have nested subfields that are separated by "|" (first level) or ":" (second level).


  - name of sequence (e.g. "seq_s_1")

  - position of SNP (e.g. "448878")

  - reference allele (e.g. "A")

  - alternate allele (e.g. "G")

  - major column with FST value and allele frequency and other data for each population. It is described below.

  - type of SNP (e.g. intron, synonymous, missense, intergenic, ...) and label of associated gene (e.g. missense|REF_STRG_1_4_XLOC_012878)

  - FST tag and value (e.g. fst|0.0653)

  - region, type of SNP and gene label (e.g. region|missense|REF_STRG_1_4_XLOC_012878)

  - gene annotation derived from EnTAP annotations and Drosophila homologs, which are described below. Uses comma-separated sub-fields.



Subfields in field 5:

Example:

at/me:0.0653:148:1.0000:1.0000:1.0000|at,134,133.0000,1.0000,0.9925,0.0075|me,14,13.0000,1.0000,0.9286,0.0714

This field splits into three major subfields on "|": one about the pairwise comparison and two with metadata about each population.

1st subfield (at/me:0.0653:148:1.0000:1.0000:1.0000)


  - name of contrast (at/me)

  - FST of SNP (0.0653)

  - Sample size (148)

  - Proportion of observed data given overall sample size (1.0000),</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medical and Health Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medicin och hälsovetenskap</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetics and Genomics</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetik och genomik</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="106" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Biological Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="106" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Biologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ecology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ekologi</r:Subject>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22817410.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary"></a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>0</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>