MMS Dataset Citations

Citations for the MMS datasets

Citations

Dataset id: ar_arsentdl

  • Domain: social_media
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@InProceedings{dataset_ar_arsentdl,
    author = {Ramy Baly and
                Alaa Khaddaj and
                Hazem M. Hajj and
                Wassim El{-}Hajj and
                Khaled Bashir Shaban},
    title = {{ArSentD-LEV: A Multi-Topic Corpus for Target-based Sentiment Analysis in Arabic Levantine Tweets}},
    booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
    year = {2018},
    month = {may},
    date = {7-12},
    location = {Miyazaki, Japan},
    editor = {Hend Al-Khalifa and King Saud University and KSA Walid Magdy and University of Edinburgh and UK Kareem Darwish and Qatar Computing Research Institute and Qatar Tamer Elsayed and Qatar University and Qatar},
    publisher = {European Language Resources Association (ELRA)},
    address = {Paris, France},
    isbn = {979-10-95546-25-2},
    language = {english}
}

Dataset id: ar_astd

  • Domain: social_media
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_astd,
    title = "{ASTD}: {A}rabic Sentiment Tweets Dataset",
    author = "Nabil, Mahmoud  and
        Aly, Mohamed  and
        Atiya, Amir",
    booktitle = "Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing",
    month = sep,
    year = "2015",
    address = "Lisbon, Portugal",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D15-1299",
    doi = "10.18653/v1/D15-1299",
    pages = "2515--2519",
}

Dataset id: ar_bbn

  • Domain: social_media
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_bbn,
    title = "Sentiment after Translation: A Case-Study on {A}rabic Social Media Posts",
    author = "Salameh, Mohammad  and
        Mohammad, Saif  and
        Kiritchenko, Svetlana",
    booktitle = "Proceedings of the 2015 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies",
    month = may # "{--}" # jun,
    year = "2015",
    address = "Denver, Colorado",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/N15-1078",
    doi = "10.3115/v1/N15-1078",
    pages = "767--777",
}

Dataset id: ar_brad

  • Domain: reviews
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@INPROCEEDINGS{dataset_ar_brad,
    author={Elnagar, Ashraf and Einea, Omar},
    booktitle={2016 IEEE/ACS 13th International Conference of Computer Systems and Applications (AICCSA)}, 
    title={{BRAD} 1.0: Book reviews in Arabic dataset}, 
    year={2016},
    volume={},
    number={},
    pages={1-8},
    doi={10.1109/AICCSA.2016.7945800}
}

Dataset id: ar_hard

  • Domain: reviews
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@Book{dataset_ar_hard,
    author="Elnagar, Ashraf
    and Khalifa, Yasmin S.
    and Einea, Anas",
    title={Hotel Arabic-Reviews Dataset Construction for Sentiment Analysis Applications},
    bookTitle="Intelligent Natural Language Processing: Trends and Applications",
    year="2018",
    publisher="Springer International Publishing",
    address="Cham",
    pages="35--52",
    isbn="978-3-319-67056-0",
    doi="10.1007/978-3-319-67056-0_3",
    url="https://doi.org/10.1007/978-3-319-67056-0_3"
}

Dataset id: ar_labr

  • Domain: reviews
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_labr,
    title = "{LABR}: A Large Scale {A}rabic Book Reviews Dataset",
    author = "Aly, Mohamed  and
        Atiya, Amir",
    booktitle = "Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
    month = aug,
    year = "2013",
    address = "Sofia, Bulgaria",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P13-2088",
    pages = "494--498",
}

Dataset id: ar_oclar

  • Domain: reviews
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_oclar,
    author={Al Omari, Marwan and Al-Hajj, Moustafa and Hammami, Nacereddine and Sabra, Amani},
    booktitle={2019 International Conference on Computer and Information Sciences (ICCIS)}, 
    title={Sentiment Classifier: Logistic Regression for Arabic Services’ Reviews in Lebanon}, 
    year={2019},
    volume={},
    number={},
    pages={1-5},
    doi={10.1109/ICCISci.2019.8716394}
}

Dataset id: ar_semeval_2017

  • Domain: mixed
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_semeval_2017,
    title = "{S}em{E}val-2017 Task 4: Sentiment Analysis in {T}witter",
    author = "Rosenthal, Sara  and
        Farra, Noura  and
        Nakov, Preslav",
    booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)",
    month = aug,
    year = "2017",
    address = "Vancouver, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/S17-2088",
    doi = "10.18653/v1/S17-2088",
    pages = "502--518",
    abstract = "This paper describes the fifth year of the Sentiment Analysis in Twitter task. SemEval-2017 Task 4 continues with a rerun of the subtasks of SemEval-2016 Task 4, which include identifying the overall sentiment of the tweet, sentiment towards a topic with classification on a two-point and on a five-point ordinal scale, and quantification of the distribution of sentiment towards a topic across a number of tweets: again on a two-point and on a five-point ordinal scale. Compared to 2016, we made two changes: (i) we introduced a new language, Arabic, for all subtasks, and (ii) we made available information from the profiles of the Twitter users who posted the target tweets. The task continues to be very popular, with a total of 48 teams participating this year.",
}

Dataset id: ar_syria_corpus

  • Domain: social_media
  • Language: ar
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_bbn,
    title = "Sentiment after Translation: A Case-Study on {A}rabic Social Media Posts",
    author = "Salameh, Mohammad  and
        Mohammad, Saif  and
        Kiritchenko, Svetlana",
    booktitle = "Proceedings of the 2015 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies",
    month = may # "{--}" # jun,
    year = "2015",
    address = "Denver, Colorado",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/N15-1078",
    doi = "10.3115/v1/N15-1078",
    pages = "767--777",
}

Dataset id: bg_twitter_sentiment

  • Domain: social_media
  • Language: bg
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: no article
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: bs_twitter_sentiment

  • Domain: social_media
  • Language: bs
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: other
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: cs_facebook

  • Domain: social_media
  • Language: cs
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative affix
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
    title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
    author = "Habernal, Ivan  and
      Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}}  and
      Steinberger, Josef",
    booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
    month = jun,
    year = "2013",
    address = "Atlanta, Georgia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W13-1609",
    pages = "65--74",
}

Dataset id: cs_mall_product_reviews

  • Domain: reviews
  • Language: cs
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative affix
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
    title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
    author = "Habernal, Ivan  and
      Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}}  and
      Steinberger, Josef",
    booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
    month = jun,
    year = "2013",
    address = "Atlanta, Georgia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W13-1609",
    pages = "65--74",
}

Dataset id: cs_movie_reviews

  • Domain: reviews
  • Language: cs
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative affix
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
    title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
    author = "Habernal, Ivan  and
      Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}}  and
      Steinberger, Josef",
    booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
    month = jun,
    year = "2013",
    address = "Atlanta, Georgia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W13-1609",
    pages = "65--74",
}

Dataset id: cs_news_stance

  • Domain: social_media
  • Language: cs
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative affix
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
    title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
    author = "Habernal, Ivan  and
      Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}}  and
      Steinberger, Josef",
    booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
    month = jun,
    year = "2013",
    address = "Atlanta, Georgia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W13-1609",
    pages = "65--74",
}

Dataset id: de_dai_labor

  • Domain: social_media
  • Language: de
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: 4
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: de_ifeel

  • Domain: social_media
  • Language: de
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: 4
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: de_multilan_amazon

  • Domain: reviews
  • Language: de
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: 4
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_multilan_amazon,
    title = "The Multilingual {A}mazon Reviews Corpus",
    author = {Keung, Phillip  and
        Lu, Yichao  and
        Szarvas, Gy{\"o}rgy  and
        Smith, Noah A.},
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.emnlp-main.369",
    doi = "10.18653/v1/2020.emnlp-main.369",
    pages = "4563--4568",
}

Dataset id: de_omp

  • Domain: social_media
  • Language: de
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: 4
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_de_omp,
    title = "Academic-Industrial Perspective on the Development and Deployment of a Moderation System for a Newspaper Website",
    author = "Schabus, Dietmar  and
        Skowron, Marcin",
    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
    month = may,
    year = "2018",
    address = "Miyazaki, Japan",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L18-1253",
}

Dataset id: de_sb10k

  • Domain: social_media
  • Language: de
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: 4
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_de_sb10k,
    title = "A {T}witter Corpus and Benchmark Resources for {G}erman Sentiment Analysis",
    author = "Cieliebak, Mark  and
        Deriu, Jan Milan  and
        Egger, Dominic  and
        Uzdilli, Fatih",
    booktitle = "Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media",
    month = apr,
    year = "2017",
    address = "Valencia, Spain",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W17-1106",
    doi = "10.18653/v1/W17-1106",
    pages = "45--51",
    abstract = "In this paper we present SB10k, a new corpus for sentiment analysis with approx. 10,000 German tweets. We use this new corpus and two existing corpora to provide state-of-the-art benchmarks for sentiment analysis in German: we implemented a CNN (based on the winning system of SemEval-2016) and a feature-based SVM and compare their performance on all three corpora. For the CNN, we also created German word embeddings trained on 300M tweets. These word embeddings were then optimized for sentiment analysis using distant-supervised learning. The new corpus, the German word embeddings (plain and optimized), and source code to re-run the benchmarks are publicly available.",
}

Dataset id: de_twitter_sentiment

  • Domain: social_media
  • Language: de
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: 4
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: en_amazon

  • Domain: reviews
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_amazon,
    title = "Justifying Recommendations using Distantly-Labeled Reviews and Fine-Grained Aspects",
    author = "Ni, Jianmo  and
        Li, Jiacheng  and
        McAuley, Julian",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D19-1018",
    doi = "10.18653/v1/D19-1018",
    pages = "188--197",
}

Dataset id: en_dai_labor

  • Domain: social_media
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: en_financial_phrasebank_sentences_75agree

  • Domain: news
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@article{dataset_en_financial_phrasebank_sentences_75agree,
    author = {Malo, Pekka and Sinha, Ankur and Korhonen, Pekka and Wallenius, Jyrki and Takala, Pyry},
    title = {Good Debt or Bad Debt: Detecting Semantic Orientations in Economic Texts},
    year = {2014},
    issue_date = {April 2014},
    publisher = {John Wiley & Sons, Inc.},
    address = {USA},
    volume = {65},
    number = {4},
    issn = {2330-1635},
    url = {https://doi.org/10.1002/asi.23062},
    doi = {10.1002/asi.23062},
    journal = {Journal of the Association for Information Science and Technology},
    month = {apr},
    pages = {782–796},
    numpages = {15},
    keywords = {economics, automatic classification, linguistic analysis}
}

Dataset id: en_multilan_amazon

  • Domain: reviews
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_multilan_amazon,
    title = "The Multilingual {A}mazon Reviews Corpus",
    author = {Keung, Phillip  and
        Lu, Yichao  and
        Szarvas, Gy{\"o}rgy  and
        Smith, Noah A.},
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.emnlp-main.369",
    doi = "10.18653/v1/2020.emnlp-main.369",
    pages = "4563--4568",
}

Dataset id: en_per_sent

  • Domain: news
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_per_sent,
    title = "Author{'}s Sentiment Prediction",
    author = "Bastan, Mohaddeseh  and
        Koupaee, Mahnaz  and
        Son, Youngseo  and
        Sicoli, Richard  and
        Balasubramanian, Niranjan",
    booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain (Online)",
    publisher = "International Committee on Computational Linguistics",
    url = "https://aclanthology.org/2020.coling-main.52",
    doi = "10.18653/v1/2020.coling-main.52",
    pages = "604--615",
}

Dataset id: en_poem_sentiment

  • Domain: poems
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_poem_sentiment,
    title = "Investigating Societal Biases in a Poetry Composition System",
    author = "Sheng, Emily  and
        Uthus, David",
    booktitle = "Proceedings of the Second Workshop on Gender Bias in Natural Language Processing",
    month = dec,
    year = "2020",
    address = "Barcelona, Spain (Online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.gebnlp-1.9",
    pages = "93--106",
}

Dataset id: en_semeval_2017

  • Domain: mixed
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_semeval_2017,
    title = "{S}em{E}val-2017 Task 4: Sentiment Analysis in {T}witter",
    author = "Rosenthal, Sara  and
        Farra, Noura  and
        Nakov, Preslav",
    booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)",
    month = aug,
    year = "2017",
    address = "Vancouver, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/S17-2088",
    doi = "10.18653/v1/S17-2088",
    pages = "502--518",
    abstract = "This paper describes the fifth year of the Sentiment Analysis in Twitter task. SemEval-2017 Task 4 continues with a rerun of the subtasks of SemEval-2016 Task 4, which include identifying the overall sentiment of the tweet, sentiment towards a topic with classification on a two-point and on a five-point ordinal scale, and quantification of the distribution of sentiment towards a topic across a number of tweets: again on a two-point and on a five-point ordinal scale. Compared to 2016, we made two changes: (i) we introduced a new language, Arabic, for all subtasks, and (ii) we made available information from the profiles of the Twitter users who posted the target tweets. The task continues to be very popular, with a total of 48 teams participating this year.",
}

Dataset id: en_sentistrength

  • Domain: social_media
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@article{dataset_en_sentistrength,
    author = {Thelwall, Mike and Buckley, Kevan and Paltoglou, Georgios},
    title = {Sentiment Strength Detection for the Social Web},
    year = {2012},
    issue_date = {January 2012},
    publisher = {John Wiley \& Sons, Inc.},
    address = {USA},
    volume = {63},
    number = {1},
    issn = {1532-2882},
    url = {https://doi.org/10.1002/asi.21662},
    doi = {10.1002/asi.21662},
    abstract = {Sentiment analysis is concerned with the automatic extraction of sentiment-related
    information from text. Although most sentiment analysis addresses commercial tass,
    such as extracting opinions from product reviews, there is increasing interest in
    the affective dimension of the social web, and Twitter in particular. Most sentiment
    analysis algorithms are not ideally suited to this task because they exploit indirect
    indicators of sentiment that can reflect genre or topic instead. Hence, such algorithms
    used to process social web texts can identify spurious sentiment patterns caused by
    topics rather than affective phenomena. This article assesses an improved version
    of the algorithm SentiStrength for sentiment strength detection across the social
    web that primarily uses direct indications of sentiment. The results from six diverse
    social web data sets (MySpace, Twitter, YouTube, Digg, RunnersWorld, BBCForums) indicate
    that SentiStrength 2 is successful in the sense of performing better than a baseline
    approach for all data sets in both supervised and unsupervised cases. SentiStrength
    is not always better than machine-learning approaches that exploit indirect indicators
    of sentiment, however, and is particularly weaker for positive sentiment in news-related
    discussions. Overall, the results suggest that, even unsupervised, SentiStrength is
    robust enough to be applied to a wide variety of different social web contexts.},
    journal = {J. Am. Soc. Inf. Sci. Technol.},
    month = jan,
    pages = {163–173},
    numpages = {11}
}

Dataset id: en_silicone_meld_s

  • Domain: chats
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_silicone,
    title = "Hierarchical Pre-training for Sequence Labelling in Spoken Dialog",
    author = "Chapuis, Emile  and
        Colombo, Pierre  and
        Manica, Matteo  and
        Labeau, Matthieu  and
        Clavel, Chlo{\'e}",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.findings-emnlp.239",
    doi = "10.18653/v1/2020.findings-emnlp.239",
    pages = "2636--2648",
}

Dataset id: en_silicone_sem

  • Domain: chats
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_silicone,
    title = "Hierarchical Pre-training for Sequence Labelling in Spoken Dialog",
    author = "Chapuis, Emile  and
        Colombo, Pierre  and
        Manica, Matteo  and
        Labeau, Matthieu  and
        Clavel, Chlo{\'e}",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.findings-emnlp.239",
    doi = "10.18653/v1/2020.findings-emnlp.239",
    pages = "2636--2648",
}

Dataset id: en_tweet_airlines

  • Domain: social_media
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@misc{dataset_en_tweet_airlines,
    url={https://www.kaggle.com/crowdflower/twitter-airline-sentiment},
    author={Crowdflower Inc.},
    title={Twitter US Airline Sentiment},
    year={2015}
}

Dataset id: en_tweets_sanders

  • Domain: social_media
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@article{dataset_en_tweets_sanders,
    title={{Sanders-Twitter Sentiment Corpus}},
    author={Sanders, Niek J},
    journal={Sanders Analytics LLC},
    year={2011}
}

Dataset id: en_twitter_sentiment

  • Domain: social_media
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: en_vader_amazon

  • Domain: reviews
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
    title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
    author={Clayton J. Hutto and Eric Gilbert},
    booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
    year={2014},
    url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
    month={May}, 
    pages={216-225},
    volume=8,
}

Dataset id: en_vader_movie_reviews

  • Domain: reviews
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
    title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
    author={Clayton J. Hutto and Eric Gilbert},
    booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
    year={2014},
    url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
    month={May}, 
    pages={216-225},
    volume=8,
}

Dataset id: en_vader_nyt

  • Domain: news
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
    title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
    author={Clayton J. Hutto and Eric Gilbert},
    booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
    year={2014},
    url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
    month={May}, 
    pages={216-225},
    volume=8,
}

Dataset id: en_vader_twitter

  • Domain: social_media
  • Language: en
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
    title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
    author={Clayton J. Hutto and Eric Gilbert},
    booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
    year={2014},
    url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
    month={May}, 
    pages={216-225},
    volume=8,
}

Dataset id: es_muchocine

  • Domain: reviews
  • Language: es
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@article{dataset_es_muchocine,
    title={Experiments in sentiment classification of movie reviews in Spanish},
    author={Cruz, Fermin L and Troyano, Jose A and Enriquez, Fernando and Ortega, Javier},
    journal={Procesamiento del Lenguaje Natural},
    volume={41},
    pages={73--80},
    year={2008},
    publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}

Dataset id: es_multilan_amazon

  • Domain: reviews
  • Language: es
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_multilan_amazon,
    title = "The Multilingual {A}mazon Reviews Corpus",
    author = {Keung, Phillip  and
        Lu, Yichao  and
        Szarvas, Gy{\"o}rgy  and
        Smith, Noah A.},
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.emnlp-main.369",
    doi = "10.18653/v1/2020.emnlp-main.369",
    pages = "4563--4568",
}

Dataset id: es_paper_reviews

  • Domain: reviews
  • Language: es
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@article{dataset_es_paper_reviews,
    author = {Keith Norambuena, Brian and Lettura, Exequiel and Villegas, Claudio},
    year = {2019},
    month = {02},
    pages = {191-214},
    title = {Sentiment analysis and opinion mining applied to scientific paper reviews},
    volume = {23},
    journal = {Intelligent Data Analysis},
    doi = {10.3233/IDA-173807}
}

Dataset id: es_semeval2020

  • Domain: social_media
  • Language: es
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_semeval_2020,
    title = "{S}em{E}val-2020 Task 9: Overview of Sentiment Analysis of Code-Mixed Tweets",
    author = {Patwa, Parth  and
        Aguilar, Gustavo  and
        Kar, Sudipta  and
        Pandey, Suraj  and
        PYKL, Srinivas  and
        Gamb{\"a}ck, Bj{\"o}rn  and
        Chakraborty, Tanmoy  and
        Solorio, Thamar  and
        Das, Amitava},
    booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
    month = dec,
    year = "2020",
    address = "Barcelona (online)",
    publisher = "International Committee for Computational Linguistics",
    url = "https://aclanthology.org/2020.semeval-1.100",
    doi = "10.18653/v1/2020.semeval-1.100",
    pages = "774--790",
}

Dataset id: es_twitter_sentiment

  • Domain: social_media
  • Language: es
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: fa_sentipers

  • Domain: reviews
  • Language: fa
  • Language family: Indo-European
  • Genus: Iranian
  • Definite articles: no article
  • Indefinite articles: indefinite word same as one
  • Number of cases: 2
  • Order of subject, object, verb: SOV
  • Negative morphemes: negative affix
  • Polar questions: question particle
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@article{dataset_fa_sentipers,
    author    = {Pedram Hosseini and
                Ali Ahmadian Ramaki and
                Hassan Maleki and
                Mansoureh Anvari and
                Seyed Abolghasem Mirroshandel},
    title     = {{SentiPers}: {A} Sentiment Analysis Corpus for Persian},
    journal   = {Computing Research Repository},
    volume    = {arXiv:1801.07737},
    note = {Version 2},
    year      = {2018},
    url       = {http://arxiv.org/abs/1801.07737},
    eprinttype = {arXiv},
    eprint    = {1801.07737},
    timestamp = {Mon, 13 Aug 2018 16:47:47 +0200},
    biburl    = {https://dblp.org/rec/journals/corr/abs-1801-07737.bib},
    bibsource = {dblp computer science bibliography, https://dblp.org}
}

Dataset id: fr_dai_labor

  • Domain: social_media
  • Language: fr
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: OptDoubleNeg
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: fr_ifeel

  • Domain: social_media
  • Language: fr
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: OptDoubleNeg
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: fr_multilan_amazon

  • Domain: reviews
  • Language: fr
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: OptDoubleNeg
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_multilan_amazon,
    title = "The Multilingual {A}mazon Reviews Corpus",
    author = {Keung, Phillip  and
        Lu, Yichao  and
        Szarvas, Gy{\"o}rgy  and
        Smith, Noah A.},
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.emnlp-main.369",
    doi = "10.18653/v1/2020.emnlp-main.369",
    pages = "4563--4568",
}

Dataset id: he_hebrew_sentiment

  • Domain: social_media
  • Language: he
  • Language family: Afro-Asiatic
  • Genus: Semitic
  • Definite articles: definite affix
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_he_hebrew_sentiment,
    title = "Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew",
    author = "Amram, Adam  and
        Ben David, Anat  and
        Tsarfaty, Reut",
    booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
    month = aug,
    year = "2018",
    address = "Santa Fe, New Mexico, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/C18-1190",
    pages = "2242--2252",
    abstract = "This paper empirically studies the effects of representation choices on neural sentiment analysis for Modern Hebrew, a morphologically rich language (MRL) for which no sentiment analyzer currently exists. We study two dimensions of representational choices: (i) the granularity of the input signal (token-based vs. morpheme-based), and (ii) the level of encoding of vocabulary items (string-based vs. character-based). We hypothesise that for MRLs, languages where multiple meaning-bearing elements may be carried by a single space-delimited token, these choices will have measurable effects on task perfromance, and that these effects may vary for different architectural designs {---} fully-connected, convolutional or recurrent. Specifically, we hypothesize that morpheme-based representations will have advantages in terms of their generalization capacity and task accuracy, due to their better OOV coverage. To empirically study these effects, we develop a new sentiment analysis benchmark for Hebrew, based on 12K social media comments, and provide two instances of these data: in token-based and morpheme-based settings. Our experiments show that representation choices empirical effects vary with architecture type. While fully-connected and convolutional networks slightly prefer token-based settings, RNNs benefit from a morpheme-based representation, in accord with the hypothesis that explicit morphological information may help generalize. Our endeavour also delivers the first state-of-the-art broad-coverage sentiment analyzer for Hebrew, with over 89{\%} accuracy, alongside an established benchmark to further study the effects of linguistic representation choices on neural networks{'} task performance.",
}

Dataset id: hi_semeval2020

  • Domain: social_media
  • Language: hi
  • Language family: Indo-European
  • Genus: Indic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 3
  • Order of subject, object, verb: SOV
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SONegV
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_semeval_2020,
    title = "{S}em{E}val-2020 Task 9: Overview of Sentiment Analysis of Code-Mixed Tweets",
    author = {Patwa, Parth  and
        Aguilar, Gustavo  and
        Kar, Sudipta  and
        Pandey, Suraj  and
        PYKL, Srinivas  and
        Gamb{\"a}ck, Bj{\"o}rn  and
        Chakraborty, Tanmoy  and
        Solorio, Thamar  and
        Das, Amitava},
    booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
    month = dec,
    year = "2020",
    address = "Barcelona (online)",
    publisher = "International Committee for Computational Linguistics",
    url = "https://aclanthology.org/2020.semeval-1.100",
    doi = "10.18653/v1/2020.semeval-1.100",
    pages = "774--790",
}

Dataset id: hr_sentiment_news_document

  • Domain: news
  • Language: hr
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: other
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@Article{dataset_hr_sentiment_news_document,
    AUTHOR = {Pelicon, Andraž and Pranjić, Marko and Miljković, Dragana and Škrlj, Blaž and Pollak, Senja},
    TITLE = {Zero-Shot Learning for Cross-Lingual News Sentiment Classification},
    JOURNAL = {Applied Sciences},
    VOLUME = {10},
    YEAR = {2020},
    NUMBER = {17},
    ARTICLE-NUMBER = {5993},
    URL = {https://www.mdpi.com/2076-3417/10/17/5993},
    ISSN = {2076-3417},
    ABSTRACT = {In this paper, we address the task of zero-shot cross-lingual news sentiment classification. Given the annotated dataset of positive, neutral, and negative news in Slovene, the aim is to develop a news classification system that assigns the sentiment category not only to Slovene news, but to news in another language without any training data required. Our system is based on the multilingual BERTmodel, while we test different approaches for handling long documents and propose a novel technique for sentiment enrichment of the BERT model as an intermediate training step. With the proposed approach, we achieve state-of-the-art performance on the sentiment analysis task on Slovenian news. We evaluate the zero-shot cross-lingual capabilities of our system on a novel news sentiment test set in Croatian. The results show that the cross-lingual approach also largely outperforms the majority classifier, as well as all settings without sentiment enrichment in pre-training.},
    DOI = {10.3390/app10175993}
}

Dataset id: hr_twitter_sentiment

  • Domain: social_media
  • Language: hr
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: other
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: hu_twitter_sentiment

  • Domain: social_media
  • Language: hu
  • Language family: Uralic
  • Genus: Ugric
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 10 or more
  • Order of subject, object, verb: no dominant order
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: it_evalita2016

  • Domain: social_media
  • Language: it
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_it_evalita2016,
    TITLE = {{Overview of the Evalita 2016 SENTIment POLarity Classification Task}},
    AUTHOR = {Barbieri, Francesco and Basile, Valerio and Croce, Danilo and Nissim, Malvina and Novielli, Nicole and Patti, Viviana},
    URL = {https://hal.inria.fr/hal-01414731},
    BOOKTITLE = {{Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}},
    ADDRESS = {Naples, Italy},
    YEAR = {2016},
    MONTH = Dec,
    KEYWORDS = {Natural language processing and web ; Social media analysis ; Sentiment analysis},
    PDF = {https://hal.inria.fr/hal-01414731/file/paper_026.pdf},
    HAL_ID = {hal-01414731},
    HAL_VERSION = {v1},
}

Dataset id: it_multiemotions

  • Domain: social_media
  • Language: it
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative intonation only
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_it_multiemotions,
    author = {Sprugnoli, Rachele},
    year = {2020},
    month = {12},
    pages = {},
    title = {MultiEmotions-It: a New Dataset for Opinion Polarity and Emotion Analysis for Italian},
    booktitle = {Proceedings of the Seventh Italian Conference on Computational Linguistics},
}

Dataset id: ja_multilan_amazon

  • Domain: reviews
  • Language: ja
  • Language family: Japanese
  • Genus: Japanese
  • Definite articles: no article
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 8-9
  • Order of subject, object, verb: SOV
  • Negative morphemes: negative affix
  • Polar questions: question particle
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: no grammatical gender
@inproceedings{dataset_multilan_amazon,
    title = "The Multilingual {A}mazon Reviews Corpus",
    author = {Keung, Phillip  and
        Lu, Yichao  and
        Szarvas, Gy{\"o}rgy  and
        Smith, Noah A.},
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.emnlp-main.369",
    doi = "10.18653/v1/2020.emnlp-main.369",
    pages = "4563--4568",
}

Dataset id: lv_ltec_sentiment

  • Domain: social_media
  • Language: lv
  • Language family: Indo-European
  • Genus: Baltic
  • Definite articles: demonstrative word used as definite article
  • Indefinite articles: indefinite word same as one
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative affix
  • Polar questions: question particle
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@article{dataset_lv_ltec_sentiment,
    author    = {Uga Sprogis and
                Matiss Rikters},
    title     = {What Can We Learn From Almost a Decade of Food Tweets},
    journal   = {Computing Research Repository},
    volume    = {arXiv:2007.05194},
    note = {Version 2},
    year      = {2020},
    url       = {https://arxiv.org/abs/2007.05194},
    eprinttype = {arXiv},
    eprint    = {2007.05194},
    timestamp = {Mon, 20 Jul 2020 14:20:39 +0200},
    biburl    = {https://dblp.org/rec/journals/corr/abs-2007-05194.bib},
    bibsource = {dblp computer science bibliography, https://dblp.org}
}

Dataset id: pl_klej_allegro_reviews

  • Domain: reviews
  • Language: pl
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_pl_klej_allegro_reviews,
    title = "{KLEJ}: Comprehensive Benchmark for {P}olish Language Understanding",
    author = "Rybak, Piotr  and
        Mroczkowski, Robert  and
        Tracz, Janusz  and
        Gawlik, Ireneusz",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.acl-main.111",
    doi = "10.18653/v1/2020.acl-main.111",
    pages = "1191--1201",
}

Dataset id: pl_opi_lil_2012

  • Domain: social_media
  • Language: pl
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_pl_opi_lil_2012,
    author = {Pawel Sobkowicz and Antoni Sobkowicz},
    title ={Two-Year Study of Emotion and Communication Patterns in a Highly Polarized Political Discussion Forum},
    journal = {Social Science Computer Review},
    volume = {30},
    number = {4},
    pages = {448-469},
    year = {2012},
    doi = {10.1177/0894439312436512}
}

Dataset id: pl_polemo

  • Domain: reviews
  • Language: pl
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_pl_polemo,
    title = "Multi-Level Sentiment Analysis of {P}ol{E}mo 2.0: Extended Corpus of Multi-Domain Consumer Reviews",
    author = "Koco{\'n}, Jan  and
        Mi{\l}kowski, Piotr  and
        Za{\'s}ko-Zieli{\'n}ska, Monika",
    booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/K19-1092",
    doi = "10.18653/v1/K19-1092",
    pages = "980--991"
}

Dataset id: pl_twitter_sentiment

  • Domain: social_media
  • Language: pl
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: pt_dai_labor

  • Domain: social_media
  • Language: pt
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: pt_ifeel

  • Domain: social_media
  • Language: pt
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
    author = {Narr, Sascha  and Michael Hülfenhaus and  Albayrak, Sahin},
    title = {Language-Independent Twitter Sentiment Analysis},
    booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
    year = {2012},
    location = {Dortmund, Germany},
}

Dataset id: pt_tweet_sent_br

  • Domain: social_media
  • Language: pt
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@inproceedings{dataset_pt_tweet_sent_br,
    title = "Building a Sentiment Corpus of Tweets in {B}razilian {P}ortuguese",
    author = "Brum, Henrico  and
        Volpe Nunes, Maria das Gra{\c{c}}as",
    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
    month = may,
    year = "2018",
    address = "Miyazaki, Japan",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L18-1658",
}

Dataset id: pt_twitter_sentiment

  • Domain: social_media
  • Language: pt
  • Language family: Indo-European
  • Genus: Romance
  • Definite articles: definite word distinct from demonstrative
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: ru_sentiment

  • Domain: social_media
  • Language: ru
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_ru_sentiment,
    title = "{R}u{S}entiment: An Enriched Sentiment Analysis Dataset for Social Media in {R}ussian",
    author = "Rogers, Anna  and
        Romanov, Alexey  and
        Rumshisky, Anna  and
        Volkova, Svitlana  and
        Gronas, Mikhail  and
        Gribov, Alex",
    booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
    month = aug,
    year = "2018",
    address = "Santa Fe, New Mexico, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/C18-1064",
    pages = "755--763",
    abstract = "This paper presents RuSentiment, a new dataset for sentiment analysis of social media posts in Russian, and a new set of comprehensive annotation guidelines that are extensible to other languages. RuSentiment is currently the largest in its class for Russian, with 31,185 posts annotated with Fleiss{'} kappa of 0.58 (3 annotations per post). To diversify the dataset, 6,950 posts were pre-selected with an active learning-style strategy. We report baseline classification results, and we also release the best-performing embeddings trained on 3.2B tokens of Russian VKontakte posts.",
}

Dataset id: ru_twitter_sentiment

  • Domain: social_media
  • Language: ru
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: sk_twitter_sentiment

  • Domain: social_media
  • Language: sk
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative affix
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: MorphNeg
  • Prefixing vs suffixing: weakly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: sl_sentinews

  • Domain: news
  • Language: sl
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@Article{Bučar2018,
    author={Bu{\v{c}}ar, Jo{\v{z}}e
    and {\v{Z}}nidar{\v{s}}i{\v{c}}, Martin
    and Povh, Janez},
    title={Annotated news corpora and a lexicon for sentiment analysis in Slovene},
    journal={Language Resources and Evaluation},
    year={2018},
    month={Sep},
    day={01},
    volume={52},
    number={3},
    pages={895-919},
    abstract={In this study, we introduce Slovene web-crawled news corpora with sentiment annotation on three levels of granularity: sentence, paragraph and document levels. We describe the methodology and tools that were required for their construction. The corpora contain more than 250,000 documents with political, business, economic and financial content from five Slovene media resources on the web. More than 10,000 of them were manually annotated as negative, neutral or positive. All corpora are publicly available under a Creative Commons copyright license. We used the annotated documents to construct a Slovene sentiment lexicon, which is the first of its kind for Slovene, and to assess the sentiment classification approaches used. The constructed corpora were also utilised to monitor within-the-document sentiment dynamics, its changes over time and relations with news topics. We show that sentiment is, on average, more explicit at the beginning of documents, and it loses sharpness towards the end of documents.},
    issn={1574-0218},
    doi={10.1007/s10579-018-9413-3},
    url={https://doi.org/10.1007/s10579-018-9413-3}
}

Dataset id: sl_twitter_sentiment

  • Domain: social_media
  • Language: sl
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 6-7
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: sq_twitter_sentiment

  • Domain: social_media
  • Language: sq
  • Language family: Indo-European
  • Genus: Albanian
  • Definite articles: definite affix
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: 4
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: sr_movie_reviews

  • Domain: reviews
  • Language: sr
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: other
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_sr_serb_movie_reviews,
    title = "Reliable Baselines for Sentiment Analysis in Resource-Limited Languages: The {S}erbian Movie Review Dataset",
    author = "Batanovi{\'c}, Vuk  and
        Nikoli{\'c}, Bo{\v{s}}ko  and
        Milosavljevi{\'c}, Milan",
    booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
    month = may,
    year = "2016",
    address = "Portoro{\v{z}}, Slovenia",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L16-1427",
    pages = "2688--2696",
    abstract = "Collecting data for sentiment analysis in resource-limited languages carries a significant risk of sample selection bias, since the small quantities of available data are most likely not representative of the whole population. Ignoring this bias leads to less robust machine learning classifiers and less reliable evaluation results. In this paper we present a dataset balancing algorithm that minimizes the sample selection bias by eliminating irrelevant systematic differences between the sentiment classes. We prove its superiority over the random sampling method and we use it to create the Serbian movie review dataset ― SerbMR ― the first balanced and topically uniform sentiment analysis dataset in Serbian. In addition, we propose an incremental way of finding the optimal combination of simple text processing options and machine learning features for sentiment classification. Several popular classifiers are used in conjunction with this evaluation approach in order to establish strong but reliable baselines for sentiment analysis in Serbian.",
}

Dataset id: sr_senticomments

  • Domain: reviews
  • Language: sr
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: other
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_sr_senticomments,
    doi = {10.1371/journal.pone.0242050},
    author = {Batanović, Vuk AND Cvetanović, Miloš AND Nikolić, Boško},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {A versatile framework for resource-limited sentiment articulation, annotation, and analysis of short texts},
    year = {2020},
    month = {11},
    volume = {15},
    url = {https://doi.org/10.1371/journal.pone.0242050},
    pages = {1-30},
    abstract = {Choosing a comprehensive and cost-effective way of articulating and annotating the sentiment of a text is not a trivial task, particularly when dealing with short texts, in which sentiment can be expressed through a wide variety of linguistic and rhetorical phenomena. This problem is especially conspicuous in resource-limited settings and languages, where design options are restricted either in terms of manpower and financial means required to produce appropriate sentiment analysis resources, or in terms of available language tools, or both. In this paper, we present a versatile approach to addressing this issue, based on multiple interpretations of sentiment labels that encode information regarding the polarity, subjectivity, and ambiguity of a text, as well as the presence of sarcasm or a mixture of sentiments. We demonstrate its use on Serbian, a resource-limited language, via the creation of a main sentiment analysis dataset focused on movie comments, and two smaller datasets belonging to the movie and book domains. In addition to measuring the quality of the annotation process, we propose a novel metric to validate its cost-effectiveness. Finally, the practicality of our approach is further validated by training, evaluating, and determining the optimal configurations of several different kinds of machine-learning models on a range of sentiment classification tasks using the produced dataset.},
    number = {11},
}

Dataset id: sr_twitter_sentiment

  • Domain: social_media
  • Language: sr
  • Language family: Indo-European
  • Genus: Slavic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 5
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: other
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: sv_twitter_sentiment

  • Domain: social_media
  • Language: sv
  • Language family: Indo-European
  • Genus: Germanic
  • Definite articles: definite affix
  • Indefinite articles: indefinite word same as one
  • Number of cases: 2
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: interrogative word order
  • Position of negative word wrt SOV: more than one position
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: common, neuter
@article{dataset_twitter_sentiment,
    doi = {10.1371/journal.pone.0155036},
    author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
    journal = {PLOS ONE},
    publisher = {Public Library of Science},
    title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
    year = {2016},
    month = {05},
    volume = {11},
    url = {https://doi.org/10.1371/journal.pone.0155036},
    pages = {1-26},
    number = {5},
}

Dataset id: th_wisesight_sentiment

  • Domain: social_media
  • Language: th
  • Language family: Tai-Kadai
  • Genus: Kam-Tai
  • Definite articles: no article
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative auxiliary verb
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: little affixation
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: noun classifiers
@misc{dataset_th_wisesight_sentiment,
    author       = {Suriyawongkul, Arthit and
                    Chuangsuwanich, Ekapol and
                    Chormai, Pattarawat and
                    Polpanumas, Charin},
    title        = {PyThaiNLP/wisesight-sentiment: First release (v1.0)},
    month        = sep,
    year         = 2019,
    publisher    = {Zenodo},
    version      = {v1.0},
    doi          = {10.5281/zenodo.3457447},
    url          = {https://doi.org/10.5281/zenodo.3457447},
    note = {Zenodo}
}

Dataset id: th_wongnai_reviews

  • Domain: reviews
  • Language: th
  • Language family: Tai-Kadai
  • Genus: Kam-Tai
  • Definite articles: no article
  • Indefinite articles: indefinite word distinct from one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative auxiliary verb
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: little affixation
  • Coding of nominal plurality: mixed morphological plural
  • Grammatical genders: noun classifiers
@misc{dataset_th_wongnai_reviews,
    author = {Ekkalak Thongthanomkul and Tanapol Nearunchorn and Yuwat Chuesathuchon},
    title = {wongnai-corpus},
    year = {2019},
    publisher = {GitHub},
    journal = {GitHub repository},
    howpublished = {\url{https://github.com/wongnai/wongnai-corpus}}
}

Dataset id: ur_roman_urdu

  • Domain: mixed
  • Language: ur
  • Language family: Indo-European
  • Genus: Indic
  • Definite articles: no article
  • Indefinite articles: no article
  • Number of cases: 2
  • Order of subject, object, verb: SOV
  • Negative morphemes: negative affix
  • Polar questions: question particle
  • Position of negative word wrt SOV: SONegV
  • Prefixing vs suffixing: strongly suffixing
  • Coding of nominal plurality: plural suffix
  • Grammatical genders: masculine, feminine
@InProceedings{dataset_ur_roman_urdu,
    title     = "Performing Natural Language Processing on Roman Urdu Datasets",
    author   = "Zareen Sharf and Saif Ur Rahman",
    booktitle = "International Journal of Computer Science and Network Security",
    volume    = "18",
    pages     = "141-148",
    year      = "2018",
    url = {http://paper.ijcsns.org/07_book/201801/20180117.pdf}
}

Dataset id: zh_hotel_reviews

  • Domain: reviews
  • Language: zh
  • Language family: Sino-Tibetan
  • Genus: Chinese
  • Definite articles: no article
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: little affixation
  • Coding of nominal plurality: no plural
  • Grammatical genders: noun classifiers
@inproceedings{dataset_zh_hotel_reviews,
    title = "An Empirical Study on Sentiment Classification of {C}hinese Review using Word Embedding",
    author = "Lin, Yiou  and
        Lei, Hang  and
        Wu, Jia  and
        Li, Xiaoyu",
    booktitle = "Proceedings of the 29th Pacific Asia Conference on Language, Information and Computation: Posters",
    month = oct,
    year = "2015",
    address = "Shanghai, China",
    url = "https://aclanthology.org/Y15-2030",
    pages = "258--266",
}

Dataset id: zh_multilan_amazon

  • Domain: reviews
  • Language: zh
  • Language family: Sino-Tibetan
  • Genus: Chinese
  • Definite articles: no article
  • Indefinite articles: indefinite word same as one
  • Number of cases: no morphological case-making
  • Order of subject, object, verb: SVO
  • Negative morphemes: negative particle
  • Polar questions: question particle
  • Position of negative word wrt SOV: SNegVO
  • Prefixing vs suffixing: little affixation
  • Coding of nominal plurality: no plural
  • Grammatical genders: noun classifiers
@inproceedings{dataset_multilan_amazon,
    title = "The Multilingual {A}mazon Reviews Corpus",
    author = {Keung, Phillip  and
        Lu, Yichao  and
        Szarvas, Gy{\"o}rgy  and
        Smith, Noah A.},
    booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
    month = nov,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.emnlp-main.369",
    doi = "10.18653/v1/2020.emnlp-main.369",
    pages = "4563--4568",
}