<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:2024-145:1</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>2024-145</r:ID>
  <r:Version>1</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:2024-145.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:2024-145.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:2024-145.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2024-145.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Languages and Literatures, University of Gothenburg</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Andreas</a:FirstGiven>
            <a:LastFamily>Hallberg</a:LastFamily>
            <a:FullName>
              <r:String>Andreas Hallberg</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0001-9442-1495</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:2024-145.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">2024-145</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="sv">Arabisk e-bokskorpus</r:String>
        <r:String xml:lang="en">The Arabic E-Book Corpus</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:2024-145.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Göteborgs universitet</r:String>
          <r:String xml:lang="en">University of Gothenburg</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Göteborgs universitet</r:String>
          <r:String xml:lang="en">University of Gothenburg</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2024-12-11</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.5878/7rbh-gy93</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="sv">Arabisk e-bokskorpus är en fritt tillgänglig samling av 1 745 böcker böcker på arabiska, publicerade av Hindawi Foundation mellan 2008 och 2024. Böckerna är av olika genrer, bland annat, facktext, romaner, barnlitteratur, poesi och pjäser. Korpusen är tillgänglig i två versioner: html och icke-formaterad ren text. Den senare bäst lämpad för de flesta syften. 

För ytterligare detaljer, se Hallberg, A. (2025). An 81-million-word multi-genre corpus of Arabic books. Data in Brief, 60, 111456. https://doi.org/10.1016/j.dib.2025.111456</r:Content>
      <r:Content xml:lang="en">The Arabic E-Book Corpus is a freely available collection of 1,745 books (81.5 million words) published in by the Hindawi foundation between 2008 and 2024. The books are of various genres, including non-fiction, novels, children's literature, poetry, and plays. The corpus is provided in two versions: html and unformatted plain text. The latter version will be appropriate for most purposes.

For additional detail, see Hallberg, A. (2025). An 81-million-word multi-genre corpus of Arabic books. Data in Brief, 60, 111456. https://doi.org/10.1016/j.dib.2025.111456</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:2024-145.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="10208" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Natural Language Processing</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10208" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Språkbehandling och datorlingvistik</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="60202" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Studies of Specific Languages</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="60202" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Studier av enskilda språk</r:Subject>
        <r:Keyword xml:lang="en" controlledVocabularyID="p21436" controlledVocabularyName="YSO">corpus linguistics</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p21436" controlledVocabularyName="YSO">korpuslingvistik</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p25912" controlledVocabularyName="YSO">corpus-based research</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p25912" controlledVocabularyName="YSO">korpusundersökning</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p14417" controlledVocabularyName="YSO">Arabic language</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p14417" controlledVocabularyName="YSO">arabiska</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p26867" controlledVocabularyName="YSO">Arabic alphabet</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p26867" controlledVocabularyName="YSO">arabiska alfabetet</r:Keyword>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
      <r:TemporalCoverage>
        <r:URN>urn:ddi:se.researchdata:2024-145.TemporalCoverage:2.0</r:URN>
        <r:ReferenceDate>
          <r:StartDate>2008</r:StartDate>
          <r:EndDate>2024</r:EndDate>
        </r:ReferenceDate>
      </r:TemporalCoverage>
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:2024-145.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:2024-145.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary">openAccess</a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>5</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>