<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277:0</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>doi-10-17044-scilifelab-22825277</r:ID>
  <r:Version>0</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Andreas</a:FirstGiven>
            <a:LastFamily>Wallberg</a:LastFamily>
            <a:FullName>
              <r:String>Andreas Wallberg</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-9081-9663</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Per</a:FirstGiven>
            <a:LastFamily>Unneberg</a:LastFamily>
            <a:FullName>
              <r:String>Per Unneberg</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">doi-10-17044-scilifelab-22825277</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="en">8. Ecological genomics of the Northern krill: Recombination rates and demographic history</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2024-03-27</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.17044/SCILIFELAB.22825277</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="en">This item contains archives of data and results used to assess recombination rates (iSMC), demographic history (PSMC, MSMC) and haplotype ages (GEVA) using coalescent methods.

Population definitionsPopulation definitions are the same as desribed in a different item:


  - "at vs. me" = Atlantic Ocean samples (n=67) vs. the Mediterranean (i.e. Barcelona) samples (n=7).

  - "we vs. ea" = South-West North Atlantic Ocean (n=20) vs. North-East North Atlantic Ocean (n=47). In files using this contrast, sometimes the label "wa" is used instead of "we" for the South-West North Atlantic Ocean samples.



Contents:


  - psmc_dataset.psmcfa.gz, datasets for PSMC-analyses containing signatures of heterozygosity in the reference specimen that were converted from VCF into the fasta-like PSMCFA format.

  - msmc_datasets.tar.gz, datasets for MSMC-analyses containing signatures of heterozygosity in the reference specimen that were converted from VCF into TSV.

  - ismc_dataset.tar.gz, the VCF dataset and accessory files for iSMC-analyses used to infer recombination rates.

  - geva_datasets.candidates.at_vs_me.tar.gz, the re-coded VCF and binary format datasets as well as analysis output for the 660 candidate gene loci analyzed for "at" and "me" populations in the "at vs. me" contrast.

  - geva_datasets.candidates.we_vs_ea.tar.gz, the re-coded VCF and binary format datasets as well as analysis output for the 34 candidate gene loci analyzed for "we" and "ea" populations in the "we vs. ea" contrast.

  - geva_results.candidates.at_vs_me.tar.gz, the resulting age estimates of minor alleles in the "at vs. me" contrast.

  - geva_results.candidates.we_vs_ea.tar.gz, the resulting age estimates of minor alleles in the "we vs. ea" contrast.



psmc_dataset.psmcfa.gz

A FASTA-like file that encodes the distribution of heterozygous genotypes across 4,911 sequences in the diploid reference specimen at the 10 bp window resolution. Character states are:


  - N=a window with only inaccessible sites (i.e. missing data)

  - T=a window with accessible data

  - K=a window with accessible data and at least one heterozygous genotype



This format is further documented on the site of the original tool: https://github.com/lh3/psmc

msmc_datasets.tar.gz

This archive contains one TSV file per sequence (n=5,176) that specify the distribution of heterozygous genotypes. It countains four fields. Example: seq_s_1	2039	171	TC


  - name of sequence

  - position of the heterozygous genotype

  - number of accessible sites since the last heterozygous genotype

  - the heterozygous genotype (only two a string with alleles in this case when analysing a single individual)



This format is further documented on the site of the original tool: https://github.com/stschiff/msmc-tools/blob/master/msmc-tutorial/guide.md

ismc_dataset.tar.gz

This archive contains several files:


  - 1.merged_contigs.vcf = specifies the distribution of heterozygous genotypes in VCF format

  - 1.merged_contigs.tab = specifies the lengths of sequences (TSV format)

  - 1.merged_contigs.bpp = the program control file with run-time parameters (TXT)

  - 1.merged_contigs.fasta = specifies accessible and inaccessible sites ("N") in FASTA format

  - 1.merged_contigs.out_estimates.txt = the summary results of the analysis (TXT)



geva_datasets.candidates.at_vs_me.tar.gz and geva_datasets.candidates.we_vs_ea.tar.gz

These archives hold data and results from analysing variant ages at each of the 660 or 34 candidate gene loci with divergent haplotypes in each of the two contrasts. For each locus, the files span:


  - Two recoded VCF files. In the first file, the minor allele in one of the two populations (e.g. "at") was taken to represent the derived allele and coded as the ALT allele. In the second file, the minor allele in the other group (e.g. "me") was taken to represent the derived allele and coded as the ALT allele.

  - Intermediate data files generated by GEVA by processing the VCF files (*.bin, *.marker.txt, *.sample.txt), including a log and err file.

  - Results files (*.pairs.txt.gz and *.sites.txt). The "*.sites.txt" contain allele age estimates under mutation clock (M), recombination clock (R), and joint clock models (J). The format of these files are described on site of the original tool: https://github.com/pkalbers/geva



geva_results.candidates.at_vs_me.tar.gz and geva_results.candidates.we_vs_ea.tar.gz

These archives contains four TSV files each. For each population (e.g. "at") there are two files. One of them collects all minor allele age estimates under all three models and the other only for the joint model.</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medical and Health Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medicin och hälsovetenskap</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetics and Genomics</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetik och genomik</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="106" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Biological Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="106" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Biologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ecology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ekologi</r:Subject>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-22825277.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary"></a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>0</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>