<codeBook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xsi:schemaLocation="ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd" xmlns="ddi:codebook:2_5">
  <docDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="sv">WordReference</titl>
        <parTitl xml:lang="en">WordReference</parTitl>
        <IDNo agency="SND">doi-10-23695-a5k7-az71-0</IDNo>
        <IDNo agency="DOI">https://doi.org/10.23695/A5K7-AZ71</IDNo>
      </titlStmt>
      <prodStmt>
        <producer xml:lang="en" abbr="SND">Swedish National Data Service</producer>
        <producer xml:lang="sv" abbr="SND">Svensk nationell datatjänst</producer>
      </prodStmt>
      <holdings URI="https://doi.org/10.23695/A5K7-AZ71">Landing page</holdings>
    </citation>
  </docDscr>
  <stdyDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="sv">WordReference</titl>
        <parTitl xml:lang="en">WordReference</parTitl>
        <IDNo agency="SND">doi-10-23695-a5k7-az71-0</IDNo>
        <IDNo agency="DOI">https://doi.org/10.23695/A5K7-AZ71</IDNo>
      </titlStmt>
      <rspStmt />
      <prodStmt />
      <distStmt>
        <distrbtr xml:lang="en" abbr="SND" URI="https://snd.se">Swedish National Data Service</distrbtr>
        <distrbtr xml:lang="sv" abbr="SND" URI="https://snd.se">Svensk nationell datatjänst</distrbtr>
        <distDate xml:lang="en" date="2024-01-01" />
      </distStmt>
      <verStmt>
        <version elementVersion="0" elementVersionDate="2024-01-01" />
      </verStmt>
      <holdings URI="https://doi.org/10.23695/A5K7-AZ71">Landing page</holdings>
    </citation>
    <stdyInfo>
      <subject />
      <abstract xml:lang="en" contentType="abstract">The WordReference corpus is a very large corpus (170M+ words) of native and non-native natural written production in four languages: English, Spanish, French and Italian. It has been scraped from WordReference forums.
The corpus consists of four tab-separated files (one per language), each containing seven tab-separated columns: message id, poster's nickname, poster' native language(s), the text of the message (post) itself, the id of the topic in response to which the message has been posted (0 if this message is the topic, i.e. the first in the thread), topicstarter's nickname ("topicstarter" if the poster is the topicstarter), native language of the topicstarter. These are the "raw" data with as little processing as possible (but http links and explicit quotes of other users' posts are removed during the download).
 Several useful scripts for working with the corpus (including the one which labels all participants as L1 or L2-speakers in the respective subforum) can be found in this repository.
 For a more detailed description of the corpus, its creation and examples of how it can be used, see the following paper: {publication 297766}. Please cite the paper if you are using the corpus.
.</abstract>
      <abstract xml:lang="sv" contentType="abstract">The WordReference corpus is a very large corpus (170M+ words) of native and non-native natural written production in four languages: English, Spanish, French and Italian. It has been scraped from WordReference forums.
The corpus consists of four tab-separated files (one per language), each containing seven tab-separated columns: message id, poster's nickname, poster' native language(s), the text of the message (post) itself, the id of the topic in response to which the message has been posted (0 if this message is the topic, i.e. the first in the thread), topicstarter's nickname ("topicstarter" if the poster is the topicstarter), native language of the topicstarter. These are the "raw" data with as little processing as possible (but http links and explicit quotes of other users' posts are removed during the download).
 Several useful scripts for working with the corpus (including the one which labels all participants as L1 or L2-speakers in the respective subforum) can be found in this repository.
 For a more detailed description of the corpus, its creation and examples of how it can be used, see the following paper: {publication 297766}. Please cite the paper if you are using the corpus.
.</abstract>
      <sumDscr />
    </stdyInfo>
    <method>
      <dataColl />
    </method>
    <dataAccs>
      <useStmt>
        <restrctn xml:lang="en">Access to data through an external actor. </restrctn>
        <restrctn xml:lang="sv">Åtkomst till data via extern aktör. </restrctn>
      </useStmt>
    </dataAccs>
    <othrStdyMat />
  </stdyDscr>
</codeBook>