<codeBook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xsi:schemaLocation="ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" xmlns="ddi:codebook:2_5">
  <docDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="sv">Arabisk e-bokskorpus</titl>
        <altTitl>مدونة لغوية للكتب العربية الإلكترونية</altTitl>
        <parTitl xml:lang="en">The Arabic E-Book Corpus</parTitl>
        <IDNo agency="SND">2024-145-1</IDNo>
        <IDNo agency="DOI">https://doi.org/10.5878/7rbh-gy93</IDNo>
      </titlStmt>
      <prodStmt>
        <producer xml:lang="en" abbr="SND">Swedish National Data Service</producer>
        <producer xml:lang="sv" abbr="SND">Svensk nationell datatjänst</producer>
      </prodStmt>
      <holdings URI="https://doi.org/10.5878/7rbh-gy93">Landing page</holdings>
    </citation>
  </docDscr>
  <stdyDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="sv">Arabisk e-bokskorpus</titl>
        <altTitl>مدونة لغوية للكتب العربية الإلكترونية</altTitl>
        <parTitl xml:lang="en">The Arabic E-Book Corpus</parTitl>
        <IDNo agency="SND">2024-145-1</IDNo>
        <IDNo agency="DOI">https://doi.org/10.5878/7rbh-gy93</IDNo>
        <IDNo agency="DOI">10.1016/j.dib.2025.111456</IDNo>
      </titlStmt>
      <rspStmt>
        <AuthEnty xml:lang="en" affiliation="Department of Languages and Literatures, University of Gothenburg">Hallberg, Andreas</AuthEnty>
        <AuthEnty xml:lang="sv" affiliation="Institutionen för språk och litteraturer, Göteborgs universitet">Hallberg, Andreas</AuthEnty>
      </rspStmt>
      <prodStmt />
      <distStmt>
        <distrbtr xml:lang="en" abbr="SND" URI="https://snd.se">Swedish National Data Service</distrbtr>
        <distrbtr xml:lang="sv" abbr="SND" URI="https://snd.se">Svensk nationell datatjänst</distrbtr>
        <distDate xml:lang="en" date="2024-12-11" />
      </distStmt>
      <verStmt>
        <version elementVersion="1" elementVersionDate="2024-12-11" />
      </verStmt>
      <holdings URI="https://doi.org/10.5878/7rbh-gy93">Landing page</holdings>
    </citation>
    <stdyInfo>
      <subject>
        <keyword xml:lang="en" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p21436">corpus linguistics</keyword>
        <keyword xml:lang="sv" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p21436">korpuslingvistik</keyword>
        <keyword xml:lang="en" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p25912">corpus-based research</keyword>
        <keyword xml:lang="sv" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p25912">korpusundersökning</keyword>
        <keyword xml:lang="en" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p14417">Arabic language</keyword>
        <keyword xml:lang="sv" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p14417">arabiska</keyword>
        <keyword xml:lang="en" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p26867">Arabic alphabet</keyword>
        <keyword xml:lang="sv" vocab="YSO" vocabURI="http://www.yso.fi/onto/yso/p26867">arabiska alfabetet</keyword>
      </subject>
      <abstract xml:lang="en" contentType="abstract">The Arabic E-Book Corpus is a freely available collection of 1,745 books (81.5 million words) published in by the Hindawi foundation between 2008 and 2024. The books are of various genres, including non-fiction, novels, children's literature, poetry, and plays. The corpus is provided in two versions: html and unformatted plain text. The latter version will be appropriate for most purposes.

For additional detail, see Hallberg, A. (2025). An 81-million-word multi-genre corpus of Arabic books. Data in Brief, 60, 111456. https://doi.org/10.1016/j.dib.2025.111456</abstract>
      <abstract xml:lang="sv" contentType="abstract">Arabisk e-bokskorpus är en fritt tillgänglig samling av 1 745 böcker böcker på arabiska, publicerade av Hindawi Foundation mellan 2008 och 2024. Böckerna är av olika genrer, bland annat, facktext, romaner, barnlitteratur, poesi och pjäser. Korpusen är tillgänglig i två versioner: html och icke-formaterad ren text. Den senare bäst lämpad för de flesta syften. 

För ytterligare detaljer, se Hallberg, A. (2025). An 81-million-word multi-genre corpus of Arabic books. Data in Brief, 60, 111456. https://doi.org/10.1016/j.dib.2025.111456</abstract>
      <sumDscr>
        <dataKind xml:lang="en">Text</dataKind>
      </sumDscr>
    </stdyInfo>
    <method>
      <dataColl />
    </method>
    <dataAccs>
      <useStmt>
        <restrctn xml:lang="en">Access to data through SND. Data are freely accessible.</restrctn>
        <restrctn xml:lang="sv">Åtkomst till data via SND. Data är fritt tillgängliga.</restrctn>
        <conditions elementVersion="info:eu-repo-Access-Terms vocabulary">openAccess</conditions>
      </useStmt>
    </dataAccs>
    <othrStdyMat>
      <relPubl>
        <citation>
          <titlStmt>
            <titl xml:lang="sv">Hallberg, A. (2025). An 81-million-word multi-genre corpus of Arabic books. Data in Brief, 60, 111456. https://doi.org/10.1016/j.dib.2025.111456</titl>
            <parTitl xml:lang="en">Hallberg, A. (2025). An 81-million-word multi-genre corpus of Arabic books. Data in Brief, 60, 111456. https://doi.org/10.1016/j.dib.2025.111456</parTitl>
            <IDNo agency="DOI">10.1016/j.dib.2025.111456</IDNo>
          </titlStmt>
          <distStmt>
            <distDate date="2025">2025</distDate>
          </distStmt>
        </citation>
      </relPubl>
    </othrStdyMat>
  </stdyDscr>
</codeBook>