<?xml version="1.0" encoding="utf-8" standalone="yes"?>

<cmd:CMD xmlns="http://www.clarin.eu/cmd/" xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.2/profiles/clarin.eu:cr1:p_1407745711925/xsd">
<cmd:Header>
<cmd:MdCreator>Gunn Inger Lyse Samdal</cmd:MdCreator>
<cmd:MdCreationDate>2025-08-25</cmd:MdCreationDate>
<cmd:MdSelfLink>hdl:11495/EC56-B41C-7774-1</cmd:MdSelfLink>
<cmd:MdProfile>clarin.eu:cr1:p_1407745711925</cmd:MdProfile>
<cmd:MdCollectionDisplayName>Frequency lists (nynorsk) from NNC - Norwegian Newspaper Corpus bokmål</cmd:MdCollectionDisplayName></cmd:Header>
<cmd:Resources><cmd:ResourceProxyList></cmd:ResourceProxyList><cmd:JournalFileProxyList></cmd:JournalFileProxyList>
<cmd:ResourceRelationList></cmd:ResourceRelationList>
</cmd:Resources><cmd:IsPartOfList></cmd:IsPartOfList>
<cmd:Components>
<cmdp:corpusProfile>
    <cmdp:resourceCommonInfo>
        <cmdp:resourceType>corpus</cmdp:resourceType>
        <cmdp:identificationInfo>
            <cmdp:resourceName xml:lang="en">Frequency lists (nynorsk) from NNC - Norwegian Newspaper Corpus</cmdp:resourceName>
            <cmdp:resourceName xml:lang="nn">Frekvensordliste (nynorsk) frå Norsk aviskorpus (1998–2020)</cmdp:resourceName>
            <cmdp:description xml:lang="en">Frequency word list from the Norwegian Newspaper Corpus (nynorsk). The frequency list contains the 10,000 most frequent words in the Norwegian newspaper corpus (the subpart covering the written variety Norwegian Nynorsk), sorted by frequency from highest to lowest. The Norwegian newspaper corpus contains some 21 million words from eleven major newspapers, with material from 1998 to 2020.

If you need other frequency lists from other sources, please feel free to contact clarino@uib.no.

This frequency list can, for example, be used to filter out “stop words” (common words with assumed low informational value) in language technology processing of text, or to quantify “common” words in the vocabulary. The list includes both linguistic words and punctuation (i.e. all tokens), and distinguishes between uppercase and lowercase letters. At the top of the list you will find, among others: ".", "ikkje", "det", and "Det". Note that the most frequent words from newspaper language, as here, may differ from the most frequent words from other genres such as fiction, youth language, or child language. Among the 100 most frequent words you will find, for example,  the word "Foto" (because "Foto" often appears in newspaper texts with photo credits).

The frequency word list was generated on 25.08.2025, and you can always obtain a newer list directly from the Norwegian newspaper corpus via clarino.uib.no/korpuskel. You can also then download more words than the 10,000 which are available here.</cmdp:description>
            <cmdp:description xml:lang="nn">Frekvensordliste frå Norsk aviskorpus (nynorsk). Frekvensordlista er ei liste over dei 10 000 mest frekvente orda i Norsk aviskorpus, sortert på frekvens frå høgast til lågast. Norsk aviskorpus (nynorskdelen) inneheld ca. 21 millionar ord frå elleve store aviser, og har per i dag materiale frå 1998 til 2020. Frekvensordlista kan til dømes brukast til å luke ut «stoppord» (vanlege ord med låg informasjonsverdi) i språkteknologisk prosessering av tekst, eller for å talfeste kva som er «vanlege» ord i ordforrådet. Treng du frekvenslister frå andre typar språkmateriale, ta kontakt med clarino@uib.no.

Denne lista inneheld både språklige ord og teiknsetjing, og skil mellom store og små bokstavar. På toppen av lista finn du mellom anna: ".", "ikkje", "det" og "Det". Merk at dei mest frekvente orda frå avisspråk, som her, kan vere annleis enn dei mest frekvente orda frå andre sjangerar som skjønnlitteratur, ungdomsspråk eller barnespråk. Blant dei 100 mest frekvente orda finn du mellom anna ordet "Foto" (fordi ordet "Foto" ofte går att i avistekstar med bildekreditering).
Frekvensordlista er generert 25.08.2025, og du kan alltid hente ei nyare liste direkte frå Norsk aviskorpus via clarino.uib.no/korpuskel. Du kan då òg hente fleire ord enn dei 10 000 som er nedlastbare her, t.d. alle orda i heile korpuset.</cmdp:description>
            <cmdp:url>https://clarino.uib.no/</cmdp:url>
        </cmdp:identificationInfo>
        <cmdp:distributionInfo>
            <cmdp:licenceInfo>
                <cmdp:userCategory>Public</cmdp:userCategory>
                <cmdp:distributionAccessMedium>downloadable</cmdp:distributionAccessMedium>
                <cmdp:distributionAccessMedium>webExecutable</cmdp:distributionAccessMedium>
                <cmdp:downloadLocation cmd:description="CLARINO Bergen repository">https://repo.clarino.uib.no/</cmdp:downloadLocation>
                <cmdp:executionLocation cmd:description="Corpuscle web interface">https://clarino.uib.no/korpuskel/</cmdp:executionLocation>
                <cmdp:licence>
                    <cmdp:licenceFamily>Creative Commons (CC)</cmdp:licenceFamily>
                    <cmdp:licenceName>Creative_Commons-BY (CC-BY)</cmdp:licenceName>
                    <cmdp:licenceURL>http://creativecommons.org/licenses/by/4.0/</cmdp:licenceURL>
                    <cmdp:conditionsOfUse>BY</cmdp:conditionsOfUse>
                </cmdp:licence>
            </cmdp:licenceInfo>
            <cmdp:iprHolder>
                <cmdp:actorInfo>
                    <cmdp:actorType>organization</cmdp:actorType>
                </cmdp:actorInfo>
            </cmdp:iprHolder>
        </cmdp:distributionInfo>
        <cmdp:contact>
            <cmdp:actorInfo>
                <cmdp:actorType>organization</cmdp:actorType>
                <cmdp:personInfo>
                    <cmdp:surname xml:lang="en">Meurer</cmdp:surname>
                    <cmdp:givenName xml:lang="en">Paul</cmdp:givenName>
                    <cmdp:sex>male</cmdp:sex>
                    <cmdp:position>Senior consultant</cmdp:position>
                    <cmdp:affiliation>
                        <cmdp:organizationInfo>
                            <cmdp:organizationName xml:lang="en">University of Bergen</cmdp:organizationName>
                            <cmdp:organizationName xml:lang="no">Universitetet i Bergen</cmdp:organizationName>
                            <cmdp:organizationShortName xml:lang="no">UiB</cmdp:organizationShortName>
                            <cmdp:organizationShortName xml:lang="en">UoB</cmdp:organizationShortName>
                            <cmdp:departmentName xml:lang="en">University Library</cmdp:departmentName>
                            <cmdp:departmentName xml:lang="no">Universitetsbiblioteket</cmdp:departmentName>
                        </cmdp:organizationInfo>
                    </cmdp:affiliation>
                </cmdp:personInfo>
            </cmdp:actorInfo>
        </cmdp:contact>
        <cmdp:metadataInfo>
            <cmdp:metadataCreationDate>2025-08-25</cmdp:metadataCreationDate>
            <cmdp:metadataLastDateUpdated>2025-08-25</cmdp:metadataLastDateUpdated>
            <cmdp:metadataCreator>
                <cmdp:actorInfo>
                    <cmdp:actorType>person</cmdp:actorType>
                    <cmdp:personInfo>
                        <cmdp:surname xml:lang="no">Lyse</cmdp:surname>
                        <cmdp:givenName xml:lang="no">Gunn Inger</cmdp:givenName>
                        <cmdp:sex>female</cmdp:sex>
                        <cmdp:position>Researcher (Ph.D)</cmdp:position>
                        <cmdp:affiliation>
                            <cmdp:organizationInfo>
                                <cmdp:organizationName xml:lang="en">University of Bergen</cmdp:organizationName>
                                <cmdp:organizationName xml:lang="no">Universitetet i Bergen</cmdp:organizationName>
                                <cmdp:organizationShortName xml:lang="no">UiB</cmdp:organizationShortName>
                                <cmdp:organizationShortName xml:lang="en">UoB</cmdp:organizationShortName>
                                <cmdp:departmentName xml:lang="en">Department of Linguistic, Literary and Aesthetic Studies</cmdp:departmentName>
                            </cmdp:organizationInfo>
                        </cmdp:affiliation>
                    </cmdp:personInfo>
                    <cmdp:communicationInfo>
                        <cmdp:email>clarin@uib.no</cmdp:email>
                        <cmdp:email>gunn.lyse@uib.no</cmdp:email>
                    </cmdp:communicationInfo>
                </cmdp:actorInfo>
            </cmdp:metadataCreator>
        </cmdp:metadataInfo>
        <cmdp:resourceCreationInfo>
            <cmdp:resourceCreator>
                <cmdp:actorInfo>
                    <cmdp:actorType>person</cmdp:actorType>
                    <cmdp:personInfo>
                        <cmdp:surname xml:lang="no">Lyse Samdal</cmdp:surname>
                        <cmdp:givenName xml:lang="no">Gunn Inger</cmdp:givenName>
                        <cmdp:sex>female</cmdp:sex>
                        <cmdp:position>Researcher (Ph.D)</cmdp:position>
                        <cmdp:affiliation>
                            <cmdp:organizationInfo>
                                <cmdp:organizationName xml:lang="en">University of Bergen</cmdp:organizationName>
                                <cmdp:organizationName xml:lang="no">Universitetet i Bergen</cmdp:organizationName>
                                <cmdp:organizationShortName xml:lang="en">UoB</cmdp:organizationShortName>
                                <cmdp:organizationShortName xml:lang="no">UiB</cmdp:organizationShortName>
                                <cmdp:departmentName xml:lang="en">Department of Linguistic, Literary and Aesthetic Studies</cmdp:departmentName>
                                <cmdp:departmentName xml:lang="no">Institutt for Lingvistiske, Litterære og Estetiske fag</cmdp:departmentName>
                            </cmdp:organizationInfo>
                        </cmdp:affiliation>
                    </cmdp:personInfo>
                </cmdp:actorInfo>
            </cmdp:resourceCreator>
        </cmdp:resourceCreationInfo>
        <cmdp:relationInfo></cmdp:relationInfo>
    </cmdp:resourceCommonInfo>
    <cmdp:corpusInfo>
        <cmdp:corpusType>Ngram Corpus</cmdp:corpusType>
        <cmdp:corpusPartInfo>
            <cmdp:mediaType>textNgram</cmdp:mediaType>
            <cmdp:corpusTextNgramInfo>
                <cmdp:ngramInfo>
                    <cmdp:baseItem>word</cmdp:baseItem>
                    <cmdp:order>1</cmdp:order>
                </cmdp:ngramInfo>
                <cmdp:textFormatInfo>
                    <cmdp:mimeType>txt</cmdp:mimeType>
                </cmdp:textFormatInfo>
                <cmdp:characterEncodingInfo>
                    <cmdp:characterEncoding>utf-8</cmdp:characterEncoding>
                </cmdp:characterEncodingInfo>
            </cmdp:corpusTextNgramInfo>
        </cmdp:corpusPartInfo>
        <cmdp:corpusPartGeneralInfo>
            <cmdp:sourceWorkInfo>
                <cmdp:title>Norwegian Newspaper Corpus (Norwegian nynorsk part): 1998–2020</cmdp:title>
                <cmdp:workDescription>Newspaper text compiled in Norwegian nynorsk from large Norwegan newspapers, from 1998 and last updated in 2020.</cmdp:workDescription>
            </cmdp:sourceWorkInfo>
            <cmdp:languageInfo>
                <cmdp:languageId>nn</cmdp:languageId>
                <cmdp:languageName>Norwegian Nynorsk</cmdp:languageName>
                <cmdp:sizePerLanguage>
                    <cmdp:sizeInfo>
                        <cmdp:size>10000</cmdp:size>
                        <cmdp:sizeUnit>tokens</cmdp:sizeUnit>
                    </cmdp:sizeInfo>
                </cmdp:sizePerLanguage>
            </cmdp:languageInfo>
            <cmdp:sizeInfo>
                <cmdp:size>10000</cmdp:size>
                <cmdp:sizeUnit>tokens</cmdp:sizeUnit>
            </cmdp:sizeInfo>
        </cmdp:corpusPartGeneralInfo>
    </cmdp:corpusInfo>
</cmdp:corpusProfile></cmd:Components></cmd:CMD>