<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:2023-148:1</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>2023-148</r:ID>
  <r:Version>1</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:2023-148.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:2023-148.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:2023-148.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2023-148.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Medicine, Huddinge / Center for Hematology and Regenerative Medicine (HERM), Karolinska Institutet</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Gabriele</a:FirstGiven>
            <a:LastFamily>Todisco</a:LastFamily>
            <a:FullName>
              <r:String>Gabriele Todisco</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0001-6583-3829</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2023-148.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Medicine, Huddinge / Center for Hematology and Regenerative Medicine (HERM), Karolinska Institutet</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Eva</a:FirstGiven>
            <a:LastFamily>Hellström-Lindberg</a:LastFamily>
            <a:FullName>
              <r:String>Eva Hellström-Lindberg</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-7839-3743</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:2023-148.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">2023-148</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="sv">Integrated genomic and transcriptomic analysis improves disease classification and risk stratification of MDS with ring sideroblasts</r:String>
        <r:String xml:lang="en">Integrated genomic and transcriptomic analysis improves disease classification and risk stratification of MDS with ring sideroblasts</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:2023-148.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Karolinska Institutet</r:String>
          <r:String xml:lang="en">Karolinska Institutet</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Karolinska Institutet</r:String>
          <r:String xml:lang="en">Karolinska Institutet</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2023-07-25</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.48723/zt59-8x04</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="sv">Heltranskriptom-sekvensering (RNA-seq) från CD34-uttryckande mononukleära benmärgsceller från patienter med myelodysplastisk syndrom med ringsideroblaster (MDS-RS). CD34-uttryckande celler isolerades från mononukleära benmärgsceller via instrumentet AUTO-MACS med dubbelseparation (Miltenyi Biotec, Germany). RNA extraherades från CD34-uttryckande celler via RNeasy Microkit (Qiagen, Hilden, Germany) och behandlades därefter med DNase i enlighet med tillverkarens instruktion. RNA integritetsnumret uppskattades sedan via Agilent RNA 6000 Pico (Agilent Technologies, Palo Alto, CA) och var högre än 6.5 i alla prover (median 8.2). RNA sekvenseringsbiblioteken sattes upp från allt RNA via SMARTer Stranded Total RNA-Seq Kit v2 Pico Input Mammalian med enzymatisk degradering av ribosomalt RNA (Takara Bio, Japan). RNA-biblioteken sekvenserades sedan på Novaseq 6000 med ”paired-end 150bp” inställning. Slutligen kombinerade vi molekylära och kliniska data i syfte att hitta nya prognostiska markörer och förbättra karaktärisering av sjukdomen hos patienter med MDS.

Datasetet består av två filer:
- FASTQ_RS.tar.gz: komprimerad mapp innehållande 258 fastq-filer
- metadata_RS.xlsx 

Datasetets totala storlek är ca 1 TB.</r:Content>
      <r:Content xml:lang="en">Full transcriptome (RNA-sequencing) from bulk CD34+ bone marrow mononuclear cells from MDS patients with ring sideroblasts. CD34+ cells were isolated from the MNC using AUTO-MACS with double-separation option (Miltenyi Biotec, Germany) and submitted for RNA extraction. RNA was extracted with RNeasy Microkit (Qiagen, Hilden, Germany) and treated with DNase, according to manufacturer instruction. RNA integrity number was estimated using Agilent RNA 6000 Pico (Agilent Technologies, Palo Alto, CA) and was greater than 6.5 for all the samples (median 8.2). The RNA-sequencing (RNA-seq) libraries were prepared from total RNA using SMARTer Stranded Total RNA-Seq Kit v2 Pico Input Mammalian with enzymatic ribosomal depletion (Takara Bio, Japan). Libraries were sequenced using the Novaseq 6000 with paired-end 150bp configuration. The molecular data were integrated with clinical information aiming to improve prognosis prediction in this hematologic malignancy.

The dataset consists of 2 files:
- FASTQ_RS.tar.gz: compressed folder that includes 258 fastq files
- metadata_RS.xlsx

The total size of the dataset is approximately 1 TB.</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:2023-148.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="30107" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medical Genetics and Genomics</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="30107" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medicinsk genetik och genomik</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="30202" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Hematology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="30202" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Hematologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="30203" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Cancer and Oncology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="30203" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Cancer och onkologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="30112" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Basic Cancer Research</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="30112" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Basal cancerforskning</r:Subject>
        <r:Keyword xml:lang="en" controlledVocabularyID="D006402" controlledVocabularyName="MeSH">Hematologic Diseases</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="D006402" controlledVocabularyName="MeSH">Blodsjukdomar</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="D009190" controlledVocabularyName="MeSH">Myelodysplastic Syndromes</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="D009190" controlledVocabularyName="MeSH">Myelodysplastiskt syndrom</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="D054437" controlledVocabularyName="MeSH">Myelodysplastic-Myeloproliferative Diseases</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="D054437" controlledVocabularyName="MeSH">Myelodysplastiska-myeloproliferativa syndrom</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="hh" controlledVocabularyName="INSPIRE Spatial Data Themes">Human health and safety</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="hh" controlledVocabularyName="INSPIRE Spatial Data Themes">Människors hälsa och säkerhet</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="39900" controlledVocabularyName="NASA Thesaurus">cancer</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="47989" controlledVocabularyName="NASA Thesaurus">neoplasms</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="54292" controlledVocabularyName="NASA Thesaurus">tumors</r:Keyword>
      </r:TopicalCoverage>
      <r:SpatialCoverage>
        <r:URN>urn:ddi:se.researchdata:2023-148.SpatialCoverage:2.0</r:URN>
        <r:CountryCode>SE</r:CountryCode>
      </r:SpatialCoverage>
      <r:TemporalCoverage>
        <r:URN>urn:ddi:se.researchdata:2023-148.TemporalCoverage:2.0</r:URN>
        <r:ReferenceDate>
          <r:StartDate>2002</r:StartDate>
          <r:EndDate>2022</r:EndDate>
        </r:ReferenceDate>
      </r:TemporalCoverage>
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:2023-148.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:2023-148.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary">restrictedAccess</a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>4</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>