Dataset id: ar_arsentdl
- Domain: social_media
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@InProceedings{dataset_ar_arsentdl,
author = {Ramy Baly and
Alaa Khaddaj and
Hazem M. Hajj and
Wassim El{-}Hajj and
Khaled Bashir Shaban},title = {{ArSentD-LEV: A Multi-Topic Corpus for Target-based Sentiment Analysis in Arabic Levantine Tweets}},
booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
year = {2018},
month = {may},
date = {7-12},
location = {Miyazaki, Japan},
editor = {Hend Al-Khalifa and King Saud University and KSA Walid Magdy and University of Edinburgh and UK Kareem Darwish and Qatar Computing Research Institute and Qatar Tamer Elsayed and Qatar University and Qatar},
publisher = {European Language Resources Association (ELRA)},
address = {Paris, France},
isbn = {979-10-95546-25-2},
language = {english}
}
Dataset id: ar_astd
- Domain: social_media
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_astd,
title = "{ASTD}: {A}rabic Sentiment Tweets Dataset",
author = "Nabil, Mahmoud and
Aly, Mohamed and
Atiya, Amir",
booktitle = "Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2015",
address = "Lisbon, Portugal",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D15-1299",
doi = "10.18653/v1/D15-1299",
pages = "2515--2519",
}
Dataset id: ar_bbn
- Domain: social_media
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_bbn,
title = "Sentiment after Translation: A Case-Study on {A}rabic Social Media Posts",
author = "Salameh, Mohammad and
Mohammad, Saif and
Kiritchenko, Svetlana",
booktitle = "Proceedings of the 2015 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = may # "{--}" # jun,
year = "2015",
address = "Denver, Colorado",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N15-1078",
doi = "10.3115/v1/N15-1078",
pages = "767--777",
}
Dataset id: ar_brad
- Domain: reviews
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@INPROCEEDINGS{dataset_ar_brad,
author={Elnagar, Ashraf and Einea, Omar},
booktitle={2016 IEEE/ACS 13th International Conference of Computer Systems and Applications (AICCSA)},
title={{BRAD} 1.0: Book reviews in Arabic dataset},
year={2016},
volume={},
number={},
pages={1-8},
doi={10.1109/AICCSA.2016.7945800}
}
Dataset id: ar_hard
- Domain: reviews
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@Book{dataset_ar_hard,
author="Elnagar, Ashraf
and Khalifa, Yasmin S.
and Einea, Anas",
title={Hotel Arabic-Reviews Dataset Construction for Sentiment Analysis Applications},
bookTitle="Intelligent Natural Language Processing: Trends and Applications",
year="2018",
publisher="Springer International Publishing",
address="Cham",
pages="35--52",
isbn="978-3-319-67056-0",
doi="10.1007/978-3-319-67056-0_3",
url="https://doi.org/10.1007/978-3-319-67056-0_3"
}
Dataset id: ar_labr
- Domain: reviews
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_labr,
title = "{LABR}: A Large Scale {A}rabic Book Reviews Dataset",
author = "Aly, Mohamed and
Atiya, Amir",
booktitle = "Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = aug,
year = "2013",
address = "Sofia, Bulgaria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P13-2088",
pages = "494--498",
}
Dataset id: ar_oclar
- Domain: reviews
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_oclar,
author={Al Omari, Marwan and Al-Hajj, Moustafa and Hammami, Nacereddine and Sabra, Amani},
booktitle={2019 International Conference on Computer and Information Sciences (ICCIS)},
title={Sentiment Classifier: Logistic Regression for Arabic Services’ Reviews in Lebanon},
year={2019},
volume={},
number={},
pages={1-5},
doi={10.1109/ICCISci.2019.8716394}
}
Dataset id: ar_semeval_2017
- Domain: mixed
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@inproceedings{dataset_semeval_2017,
title = "{S}em{E}val-2017 Task 4: Sentiment Analysis in {T}witter",
author = "Rosenthal, Sara and
Farra, Noura and
Nakov, Preslav",
booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S17-2088",
doi = "10.18653/v1/S17-2088",
pages = "502--518",
abstract = "This paper describes the fifth year of the Sentiment Analysis in Twitter task. SemEval-2017 Task 4 continues with a rerun of the subtasks of SemEval-2016 Task 4, which include identifying the overall sentiment of the tweet, sentiment towards a topic with classification on a two-point and on a five-point ordinal scale, and quantification of the distribution of sentiment towards a topic across a number of tweets: again on a two-point and on a five-point ordinal scale. Compared to 2016, we made two changes: (i) we introduced a new language, Arabic, for all subtasks, and (ii) we made available information from the profiles of the Twitter users who posted the target tweets. The task continues to be very popular, with a total of 48 teams participating this year.",
}
Dataset id: ar_syria_corpus
- Domain: social_media
- Language: ar
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: masculine, feminine
@inproceedings{dataset_ar_bbn,
title = "Sentiment after Translation: A Case-Study on {A}rabic Social Media Posts",
author = "Salameh, Mohammad and
Mohammad, Saif and
Kiritchenko, Svetlana",
booktitle = "Proceedings of the 2015 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = may # "{--}" # jun,
year = "2015",
address = "Denver, Colorado",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N15-1078",
doi = "10.3115/v1/N15-1078",
pages = "767--777",
}
Dataset id: bg_twitter_sentiment
- Domain: social_media
- Language: bg
- Language family: Indo-European
- Genus: Slavic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: no article
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: bs_twitter_sentiment
- Domain: social_media
- Language: bs
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: other
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: cs_facebook
- Domain: social_media
- Language: cs
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative affix
- Polar questions: interrogative word order
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
author = "Habernal, Ivan and
Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and
Steinberger, Josef",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
month = jun,
year = "2013",
address = "Atlanta, Georgia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W13-1609",
pages = "65--74",
}
Dataset id: cs_mall_product_reviews
- Domain: reviews
- Language: cs
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative affix
- Polar questions: interrogative word order
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
author = "Habernal, Ivan and
Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and
Steinberger, Josef",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
month = jun,
year = "2013",
address = "Atlanta, Georgia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W13-1609",
pages = "65--74",
}
Dataset id: cs_movie_reviews
- Domain: reviews
- Language: cs
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative affix
- Polar questions: interrogative word order
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
author = "Habernal, Ivan and
Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and
Steinberger, Josef",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
month = jun,
year = "2013",
address = "Atlanta, Georgia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W13-1609",
pages = "65--74",
}
Dataset id: cs_news_stance
- Domain: social_media
- Language: cs
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative affix
- Polar questions: interrogative word order
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_cs_social_media,
title = "Sentiment Analysis in {C}zech Social Media Using Supervised Machine Learning",
author = "Habernal, Ivan and
Pt{\'a}{\v{c}}ek, Tom{\'a}{\v{s}} and
Steinberger, Josef",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
month = jun,
year = "2013",
address = "Atlanta, Georgia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W13-1609",
pages = "65--74",
}
Dataset id: de_dai_labor
- Domain: social_media
- Language: de
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: 4
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: de_ifeel
- Domain: social_media
- Language: de
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: 4
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: de_multilan_amazon
- Domain: reviews
- Language: de
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: 4
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_multilan_amazon,
title = "The Multilingual {A}mazon Reviews Corpus",
author = {Keung, Phillip and
Lu, Yichao and\"o}rgy and
Szarvas, Gy{
Smith, Noah A.},booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.369",
doi = "10.18653/v1/2020.emnlp-main.369",
pages = "4563--4568",
}
Dataset id: de_omp
- Domain: social_media
- Language: de
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: 4
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_de_omp,
title = "Academic-Industrial Perspective on the Development and Deployment of a Moderation System for a Newspaper Website",
author = "Schabus, Dietmar and
Skowron, Marcin",
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
month = may,
year = "2018",
address = "Miyazaki, Japan",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L18-1253",
}
Dataset id: de_sb10k
- Domain: social_media
- Language: de
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: 4
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_de_sb10k,
title = "A {T}witter Corpus and Benchmark Resources for {G}erman Sentiment Analysis",
author = "Cieliebak, Mark and
Deriu, Jan Milan and
Egger, Dominic and
Uzdilli, Fatih",
booktitle = "Proceedings of the Fifth International Workshop on Natural Language Processing for Social Media",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1106",
doi = "10.18653/v1/W17-1106",
pages = "45--51",
abstract = "In this paper we present SB10k, a new corpus for sentiment analysis with approx. 10,000 German tweets. We use this new corpus and two existing corpora to provide state-of-the-art benchmarks for sentiment analysis in German: we implemented a CNN (based on the winning system of SemEval-2016) and a feature-based SVM and compare their performance on all three corpora. For the CNN, we also created German word embeddings trained on 300M tweets. These word embeddings were then optimized for sentiment analysis using distant-supervised learning. The new corpus, the German word embeddings (plain and optimized), and source code to re-run the benchmarks are publicly available.",
}
Dataset id: de_twitter_sentiment
- Domain: social_media
- Language: de
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: 4
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: en_amazon
- Domain: reviews
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_amazon,
title = "Justifying Recommendations using Distantly-Labeled Reviews and Fine-Grained Aspects",
author = "Ni, Jianmo and
Li, Jiacheng and
McAuley, Julian",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1018",
doi = "10.18653/v1/D19-1018",
pages = "188--197",
}
Dataset id: en_dai_labor
- Domain: social_media
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: en_financial_phrasebank_sentences_75agree
- Domain: news
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@article{dataset_en_financial_phrasebank_sentences_75agree,
author = {Malo, Pekka and Sinha, Ankur and Korhonen, Pekka and Wallenius, Jyrki and Takala, Pyry},
title = {Good Debt or Bad Debt: Detecting Semantic Orientations in Economic Texts},
year = {2014},
issue_date = {April 2014},
publisher = {John Wiley & Sons, Inc.},
address = {USA},
volume = {65},
number = {4},
issn = {2330-1635},
url = {https://doi.org/10.1002/asi.23062},
doi = {10.1002/asi.23062},
journal = {Journal of the Association for Information Science and Technology},
month = {apr},
pages = {782–796},
numpages = {15},
keywords = {economics, automatic classification, linguistic analysis}
}
Dataset id: en_multilan_amazon
- Domain: reviews
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_multilan_amazon,
title = "The Multilingual {A}mazon Reviews Corpus",
author = {Keung, Phillip and
Lu, Yichao and\"o}rgy and
Szarvas, Gy{
Smith, Noah A.},booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.369",
doi = "10.18653/v1/2020.emnlp-main.369",
pages = "4563--4568",
}
Dataset id: en_per_sent
- Domain: news
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_per_sent,
title = "Author{'}s Sentiment Prediction",
author = "Bastan, Mohaddeseh and
Koupaee, Mahnaz and
Son, Youngseo and
Sicoli, Richard and
Balasubramanian, Niranjan",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.52",
doi = "10.18653/v1/2020.coling-main.52",
pages = "604--615",
}
Dataset id: en_poem_sentiment
- Domain: poems
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_poem_sentiment,
title = "Investigating Societal Biases in a Poetry Composition System",
author = "Sheng, Emily and
Uthus, David",
booktitle = "Proceedings of the Second Workshop on Gender Bias in Natural Language Processing",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.gebnlp-1.9",
pages = "93--106",
}
Dataset id: en_semeval_2017
- Domain: mixed
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_semeval_2017,
title = "{S}em{E}val-2017 Task 4: Sentiment Analysis in {T}witter",
author = "Rosenthal, Sara and
Farra, Noura and
Nakov, Preslav",
booktitle = "Proceedings of the 11th International Workshop on Semantic Evaluation ({S}em{E}val-2017)",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S17-2088",
doi = "10.18653/v1/S17-2088",
pages = "502--518",
abstract = "This paper describes the fifth year of the Sentiment Analysis in Twitter task. SemEval-2017 Task 4 continues with a rerun of the subtasks of SemEval-2016 Task 4, which include identifying the overall sentiment of the tweet, sentiment towards a topic with classification on a two-point and on a five-point ordinal scale, and quantification of the distribution of sentiment towards a topic across a number of tweets: again on a two-point and on a five-point ordinal scale. Compared to 2016, we made two changes: (i) we introduced a new language, Arabic, for all subtasks, and (ii) we made available information from the profiles of the Twitter users who posted the target tweets. The task continues to be very popular, with a total of 48 teams participating this year.",
}
Dataset id: en_sentistrength
- Domain: social_media
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@article{dataset_en_sentistrength,
author = {Thelwall, Mike and Buckley, Kevan and Paltoglou, Georgios},
title = {Sentiment Strength Detection for the Social Web},
year = {2012},
issue_date = {January 2012},
publisher = {John Wiley \& Sons, Inc.},
address = {USA},
volume = {63},
number = {1},
issn = {1532-2882},
url = {https://doi.org/10.1002/asi.21662},
doi = {10.1002/asi.21662},
abstract = {Sentiment analysis is concerned with the automatic extraction of sentiment-related
information from text. Although most sentiment analysis addresses commercial tass,
such as extracting opinions from product reviews, there is increasing interest in
the affective dimension of the social web, and Twitter in particular. Most sentiment
analysis algorithms are not ideally suited to this task because they exploit indirect
indicators of sentiment that can reflect genre or topic instead. Hence, such algorithms
used to process social web texts can identify spurious sentiment patterns caused by
topics rather than affective phenomena. This article assesses an improved version
of the algorithm SentiStrength for sentiment strength detection across the social
web that primarily uses direct indications of sentiment. The results from six diverse
social web data sets (MySpace, Twitter, YouTube, Digg, RunnersWorld, BBCForums) indicate
that SentiStrength 2 is successful in the sense of performing better than a baseline
approach for all data sets in both supervised and unsupervised cases. SentiStrength
is not always better than machine-learning approaches that exploit indirect indicators
of sentiment, however, and is particularly weaker for positive sentiment in news-related
discussions. Overall, the results suggest that, even unsupervised, SentiStrength is
robust enough to be applied to a wide variety of different social web contexts.},journal = {J. Am. Soc. Inf. Sci. Technol.},
month = jan,
pages = {163–173},
numpages = {11}
}
Dataset id: en_silicone_meld_s
- Domain: chats
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_silicone,
title = "Hierarchical Pre-training for Sequence Labelling in Spoken Dialog",
author = "Chapuis, Emile and
Colombo, Pierre and
Manica, Matteo and
Labeau, Matthieu and
Clavel, Chlo{\'e}",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.239",
doi = "10.18653/v1/2020.findings-emnlp.239",
pages = "2636--2648",
}
Dataset id: en_silicone_sem
- Domain: chats
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_silicone,
title = "Hierarchical Pre-training for Sequence Labelling in Spoken Dialog",
author = "Chapuis, Emile and
Colombo, Pierre and
Manica, Matteo and
Labeau, Matthieu and
Clavel, Chlo{\'e}",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.239",
doi = "10.18653/v1/2020.findings-emnlp.239",
pages = "2636--2648",
}
Dataset id: en_tweet_airlines
- Domain: social_media
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@misc{dataset_en_tweet_airlines,
url={https://www.kaggle.com/crowdflower/twitter-airline-sentiment},
author={Crowdflower Inc.},
title={Twitter US Airline Sentiment},
year={2015}
}
Dataset id: en_tweets_sanders
- Domain: social_media
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@article{dataset_en_tweets_sanders,
title={{Sanders-Twitter Sentiment Corpus}},
author={Sanders, Niek J},
journal={Sanders Analytics LLC},
year={2011}
}
Dataset id: en_twitter_sentiment
- Domain: social_media
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: en_vader_amazon
- Domain: reviews
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
author={Clayton J. Hutto and Eric Gilbert},
booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
year={2014},
url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
month={May},
pages={216-225},
volume=8,
}
Dataset id: en_vader_movie_reviews
- Domain: reviews
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
author={Clayton J. Hutto and Eric Gilbert},
booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
year={2014},
url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
month={May},
pages={216-225},
volume=8,
}
Dataset id: en_vader_nyt
- Domain: news
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
author={Clayton J. Hutto and Eric Gilbert},
booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
year={2014},
url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
month={May},
pages={216-225},
volume=8,
}
Dataset id: en_vader_twitter
- Domain: social_media
- Language: en
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_en_vader,
title={{VADER}: A Parsimonious Rule-Based Model for Sentiment Analysis of Social Media Text},
author={Clayton J. Hutto and Eric Gilbert},
booktitle={Proceedings of the International AAAI Conference on Web and Social Media},
year={2014},
url={https://ojs.aaai.org/index.php/ICWSM/article/view/14550},
month={May},
pages={216-225},
volume=8,
}
Dataset id: es_muchocine
- Domain: reviews
- Language: es
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@article{dataset_es_muchocine,
title={Experiments in sentiment classification of movie reviews in Spanish},
author={Cruz, Fermin L and Troyano, Jose A and Enriquez, Fernando and Ortega, Javier},
journal={Procesamiento del Lenguaje Natural},
volume={41},
pages={73--80},
year={2008},
publisher={SOC ESPANOLA PROCESAMIENTO LENGUAJE NATURAL-SEPLN DEPT LENGUAJES \& SISTEMAS~…}
}
Dataset id: es_multilan_amazon
- Domain: reviews
- Language: es
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_multilan_amazon,
title = "The Multilingual {A}mazon Reviews Corpus",
author = {Keung, Phillip and
Lu, Yichao and\"o}rgy and
Szarvas, Gy{
Smith, Noah A.},booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.369",
doi = "10.18653/v1/2020.emnlp-main.369",
pages = "4563--4568",
}
Dataset id: es_paper_reviews
- Domain: reviews
- Language: es
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@article{dataset_es_paper_reviews,
author = {Keith Norambuena, Brian and Lettura, Exequiel and Villegas, Claudio},
year = {2019},
month = {02},
pages = {191-214},
title = {Sentiment analysis and opinion mining applied to scientific paper reviews},
volume = {23},
journal = {Intelligent Data Analysis},
doi = {10.3233/IDA-173807}
}
Dataset id: es_semeval2020
- Domain: social_media
- Language: es
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_semeval_2020,
title = "{S}em{E}val-2020 Task 9: Overview of Sentiment Analysis of Code-Mixed Tweets",
author = {Patwa, Parth and
Aguilar, Gustavo and
Kar, Sudipta and
Pandey, Suraj and
PYKL, Srinivas and\"a}ck, Bj{\"o}rn and
Gamb{
Chakraborty, Tanmoy and
Solorio, Thamar and
Das, Amitava},booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.100",
doi = "10.18653/v1/2020.semeval-1.100",
pages = "774--790",
}
Dataset id: es_twitter_sentiment
- Domain: social_media
- Language: es
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: fa_sentipers
- Domain: reviews
- Language: fa
- Language family: Indo-European
- Genus: Iranian
- Definite articles: no article
- Indefinite articles: indefinite word same as one
- Number of cases: 2
- Order of subject, object, verb: SOV
- Negative morphemes: negative affix
- Polar questions: question particle
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@article{dataset_fa_sentipers,
author = {Pedram Hosseini and
Ali Ahmadian Ramaki and
Hassan Maleki and
Mansoureh Anvari and
Seyed Abolghasem Mirroshandel},title = {{SentiPers}: {A} Sentiment Analysis Corpus for Persian},
journal = {Computing Research Repository},
volume = {arXiv:1801.07737},
note = {Version 2},
year = {2018},
url = {http://arxiv.org/abs/1801.07737},
eprinttype = {arXiv},
eprint = {1801.07737},
timestamp = {Mon, 13 Aug 2018 16:47:47 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1801-07737.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Dataset id: fr_dai_labor
- Domain: social_media
- Language: fr
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: OptDoubleNeg
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: fr_ifeel
- Domain: social_media
- Language: fr
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: OptDoubleNeg
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: fr_multilan_amazon
- Domain: reviews
- Language: fr
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: OptDoubleNeg
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_multilan_amazon,
title = "The Multilingual {A}mazon Reviews Corpus",
author = {Keung, Phillip and
Lu, Yichao and\"o}rgy and
Szarvas, Gy{
Smith, Noah A.},booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.369",
doi = "10.18653/v1/2020.emnlp-main.369",
pages = "4563--4568",
}
Dataset id: he_hebrew_sentiment
- Domain: social_media
- Language: he
- Language family: Afro-Asiatic
- Genus: Semitic
- Definite articles: definite affix
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_he_hebrew_sentiment,
title = "Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew",
author = "Amram, Adam and
Ben David, Anat and
Tsarfaty, Reut",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1190",
pages = "2242--2252",
abstract = "This paper empirically studies the effects of representation choices on neural sentiment analysis for Modern Hebrew, a morphologically rich language (MRL) for which no sentiment analyzer currently exists. We study two dimensions of representational choices: (i) the granularity of the input signal (token-based vs. morpheme-based), and (ii) the level of encoding of vocabulary items (string-based vs. character-based). We hypothesise that for MRLs, languages where multiple meaning-bearing elements may be carried by a single space-delimited token, these choices will have measurable effects on task perfromance, and that these effects may vary for different architectural designs {---} fully-connected, convolutional or recurrent. Specifically, we hypothesize that morpheme-based representations will have advantages in terms of their generalization capacity and task accuracy, due to their better OOV coverage. To empirically study these effects, we develop a new sentiment analysis benchmark for Hebrew, based on 12K social media comments, and provide two instances of these data: in token-based and morpheme-based settings. Our experiments show that representation choices empirical effects vary with architecture type. While fully-connected and convolutional networks slightly prefer token-based settings, RNNs benefit from a morpheme-based representation, in accord with the hypothesis that explicit morphological information may help generalize. Our endeavour also delivers the first state-of-the-art broad-coverage sentiment analyzer for Hebrew, with over 89{\%} accuracy, alongside an established benchmark to further study the effects of linguistic representation choices on neural networks{'} task performance.",
}
Dataset id: hi_semeval2020
- Domain: social_media
- Language: hi
- Language family: Indo-European
- Genus: Indic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 3
- Order of subject, object, verb: SOV
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SONegV
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_semeval_2020,
title = "{S}em{E}val-2020 Task 9: Overview of Sentiment Analysis of Code-Mixed Tweets",
author = {Patwa, Parth and
Aguilar, Gustavo and
Kar, Sudipta and
Pandey, Suraj and
PYKL, Srinivas and\"a}ck, Bj{\"o}rn and
Gamb{
Chakraborty, Tanmoy and
Solorio, Thamar and
Das, Amitava},booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.100",
doi = "10.18653/v1/2020.semeval-1.100",
pages = "774--790",
}
Dataset id: hr_sentiment_news_document
- Domain: news
- Language: hr
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: other
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@Article{dataset_hr_sentiment_news_document,
AUTHOR = {Pelicon, Andraž and Pranjić, Marko and Miljković, Dragana and Škrlj, Blaž and Pollak, Senja},
TITLE = {Zero-Shot Learning for Cross-Lingual News Sentiment Classification},
JOURNAL = {Applied Sciences},
VOLUME = {10},
YEAR = {2020},
NUMBER = {17},
ARTICLE-NUMBER = {5993},
URL = {https://www.mdpi.com/2076-3417/10/17/5993},
ISSN = {2076-3417},
ABSTRACT = {In this paper, we address the task of zero-shot cross-lingual news sentiment classification. Given the annotated dataset of positive, neutral, and negative news in Slovene, the aim is to develop a news classification system that assigns the sentiment category not only to Slovene news, but to news in another language without any training data required. Our system is based on the multilingual BERTmodel, while we test different approaches for handling long documents and propose a novel technique for sentiment enrichment of the BERT model as an intermediate training step. With the proposed approach, we achieve state-of-the-art performance on the sentiment analysis task on Slovenian news. We evaluate the zero-shot cross-lingual capabilities of our system on a novel news sentiment test set in Croatian. The results show that the cross-lingual approach also largely outperforms the majority classifier, as well as all settings without sentiment enrichment in pre-training.},
DOI = {10.3390/app10175993}
}
Dataset id: hr_twitter_sentiment
- Domain: social_media
- Language: hr
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: other
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: hu_twitter_sentiment
- Domain: social_media
- Language: hu
- Language family: Uralic
- Genus: Ugric
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word distinct from one
- Number of cases: 10 or more
- Order of subject, object, verb: no dominant order
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: it_evalita2016
- Domain: social_media
- Language: it
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_it_evalita2016,
TITLE = {{Overview of the Evalita 2016 SENTIment POLarity Classification Task}},
AUTHOR = {Barbieri, Francesco and Basile, Valerio and Croce, Danilo and Nissim, Malvina and Novielli, Nicole and Patti, Viviana},
URL = {https://hal.inria.fr/hal-01414731},
BOOKTITLE = {{Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}},
ADDRESS = {Naples, Italy},
YEAR = {2016},
MONTH = Dec,
KEYWORDS = {Natural language processing and web ; Social media analysis ; Sentiment analysis},
PDF = {https://hal.inria.fr/hal-01414731/file/paper_026.pdf},
HAL_ID = {hal-01414731},
HAL_VERSION = {v1},
}
Dataset id: it_multiemotions
- Domain: social_media
- Language: it
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative intonation only
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_it_multiemotions,
author = {Sprugnoli, Rachele},
year = {2020},
month = {12},
pages = {},
title = {MultiEmotions-It: a New Dataset for Opinion Polarity and Emotion Analysis for Italian},
booktitle = {Proceedings of the Seventh Italian Conference on Computational Linguistics},
}
Dataset id: ja_multilan_amazon
- Domain: reviews
- Language: ja
- Language family: Japanese
- Genus: Japanese
- Definite articles: no article
- Indefinite articles: indefinite word distinct from one
- Number of cases: 8-9
- Order of subject, object, verb: SOV
- Negative morphemes: negative affix
- Polar questions: question particle
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: no grammatical gender
@inproceedings{dataset_multilan_amazon,
title = "The Multilingual {A}mazon Reviews Corpus",
author = {Keung, Phillip and
Lu, Yichao and\"o}rgy and
Szarvas, Gy{
Smith, Noah A.},booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.369",
doi = "10.18653/v1/2020.emnlp-main.369",
pages = "4563--4568",
}
Dataset id: lv_ltec_sentiment
- Domain: social_media
- Language: lv
- Language family: Indo-European
- Genus: Baltic
- Definite articles: demonstrative word used as definite article
- Indefinite articles: indefinite word same as one
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative affix
- Polar questions: question particle
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@article{dataset_lv_ltec_sentiment,
author = {Uga Sprogis and
Matiss Rikters},title = {What Can We Learn From Almost a Decade of Food Tweets},
journal = {Computing Research Repository},
volume = {arXiv:2007.05194},
note = {Version 2},
year = {2020},
url = {https://arxiv.org/abs/2007.05194},
eprinttype = {arXiv},
eprint = {2007.05194},
timestamp = {Mon, 20 Jul 2020 14:20:39 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2007-05194.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Dataset id: pl_klej_allegro_reviews
- Domain: reviews
- Language: pl
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_pl_klej_allegro_reviews,
title = "{KLEJ}: Comprehensive Benchmark for {P}olish Language Understanding",
author = "Rybak, Piotr and
Mroczkowski, Robert and
Tracz, Janusz and
Gawlik, Ireneusz",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.111",
doi = "10.18653/v1/2020.acl-main.111",
pages = "1191--1201",
}
Dataset id: pl_opi_lil_2012
- Domain: social_media
- Language: pl
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_pl_opi_lil_2012,
author = {Pawel Sobkowicz and Antoni Sobkowicz},
title ={Two-Year Study of Emotion and Communication Patterns in a Highly Polarized Political Discussion Forum},
journal = {Social Science Computer Review},
volume = {30},
number = {4},
pages = {448-469},
year = {2012},
doi = {10.1177/0894439312436512}
}
Dataset id: pl_polemo
- Domain: reviews
- Language: pl
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_pl_polemo,
title = "Multi-Level Sentiment Analysis of {P}ol{E}mo 2.0: Extended Corpus of Multi-Domain Consumer Reviews",
author = "Koco{\'n}, Jan and
Mi{\l}kowski, Piotr and
Za{\'s}ko-Zieli{\'n}ska, Monika",
booktitle = "Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K19-1092",
doi = "10.18653/v1/K19-1092",
pages = "980--991"
}
Dataset id: pl_twitter_sentiment
- Domain: social_media
- Language: pl
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: pt_dai_labor
- Domain: social_media
- Language: pt
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: pt_ifeel
- Domain: social_media
- Language: pt
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_dai_labor,
author = {Narr, Sascha and Michael Hülfenhaus and Albayrak, Sahin},
title = {Language-Independent Twitter Sentiment Analysis},
booktitle = {Workshop on Knowledge Discovery, Data Mining and Machine Learning (KDML-2012)},
year = {2012},
location = {Dortmund, Germany},
}
Dataset id: pt_tweet_sent_br
- Domain: social_media
- Language: pt
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@inproceedings{dataset_pt_tweet_sent_br,
title = "Building a Sentiment Corpus of Tweets in {B}razilian {P}ortuguese",
author = "Brum, Henrico and
Volpe Nunes, Maria das Gra{\c{c}}as",
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
month = may,
year = "2018",
address = "Miyazaki, Japan",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L18-1658",
}
Dataset id: pt_twitter_sentiment
- Domain: social_media
- Language: pt
- Language family: Indo-European
- Genus: Romance
- Definite articles: definite word distinct from demonstrative
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: ru_sentiment
- Domain: social_media
- Language: ru
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_ru_sentiment,
title = "{R}u{S}entiment: An Enriched Sentiment Analysis Dataset for Social Media in {R}ussian",
author = "Rogers, Anna and
Romanov, Alexey and
Rumshisky, Anna and
Volkova, Svitlana and
Gronas, Mikhail and
Gribov, Alex",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1064",
pages = "755--763",
abstract = "This paper presents RuSentiment, a new dataset for sentiment analysis of social media posts in Russian, and a new set of comprehensive annotation guidelines that are extensible to other languages. RuSentiment is currently the largest in its class for Russian, with 31,185 posts annotated with Fleiss{'} kappa of 0.58 (3 annotations per post). To diversify the dataset, 6,950 posts were pre-selected with an active learning-style strategy. We report baseline classification results, and we also release the best-performing embeddings trained on 3.2B tokens of Russian VKontakte posts.",
}
Dataset id: ru_twitter_sentiment
- Domain: social_media
- Language: ru
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: sk_twitter_sentiment
- Domain: social_media
- Language: sk
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative affix
- Polar questions: interrogative word order
- Position of negative word wrt SOV: MorphNeg
- Prefixing vs suffixing: weakly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: sl_sentinews
- Domain: news
- Language: sl
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@Article{Bučar2018,
author={Bu{\v{c}}ar, Jo{\v{z}}e
\v{Z}}nidar{\v{s}}i{\v{c}}, Martin
and {
and Povh, Janez},title={Annotated news corpora and a lexicon for sentiment analysis in Slovene},
journal={Language Resources and Evaluation},
year={2018},
month={Sep},
day={01},
volume={52},
number={3},
pages={895-919},
abstract={In this study, we introduce Slovene web-crawled news corpora with sentiment annotation on three levels of granularity: sentence, paragraph and document levels. We describe the methodology and tools that were required for their construction. The corpora contain more than 250,000 documents with political, business, economic and financial content from five Slovene media resources on the web. More than 10,000 of them were manually annotated as negative, neutral or positive. All corpora are publicly available under a Creative Commons copyright license. We used the annotated documents to construct a Slovene sentiment lexicon, which is the first of its kind for Slovene, and to assess the sentiment classification approaches used. The constructed corpora were also utilised to monitor within-the-document sentiment dynamics, its changes over time and relations with news topics. We show that sentiment is, on average, more explicit at the beginning of documents, and it loses sharpness towards the end of documents.},
issn={1574-0218},
doi={10.1007/s10579-018-9413-3},
url={https://doi.org/10.1007/s10579-018-9413-3}
}
Dataset id: sl_twitter_sentiment
- Domain: social_media
- Language: sl
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 6-7
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: sq_twitter_sentiment
- Domain: social_media
- Language: sq
- Language family: Indo-European
- Genus: Albanian
- Definite articles: definite affix
- Indefinite articles: indefinite word distinct from one
- Number of cases: 4
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: sr_movie_reviews
- Domain: reviews
- Language: sr
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: other
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@inproceedings{dataset_sr_serb_movie_reviews,
title = "Reliable Baselines for Sentiment Analysis in Resource-Limited Languages: The {S}erbian Movie Review Dataset",
author = "Batanovi{\'c}, Vuk and
Nikoli{\'c}, Bo{\v{s}}ko and
Milosavljevi{\'c}, Milan",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1427",
pages = "2688--2696",
abstract = "Collecting data for sentiment analysis in resource-limited languages carries a significant risk of sample selection bias, since the small quantities of available data are most likely not representative of the whole population. Ignoring this bias leads to less robust machine learning classifiers and less reliable evaluation results. In this paper we present a dataset balancing algorithm that minimizes the sample selection bias by eliminating irrelevant systematic differences between the sentiment classes. We prove its superiority over the random sampling method and we use it to create the Serbian movie review dataset ― SerbMR ― the first balanced and topically uniform sentiment analysis dataset in Serbian. In addition, we propose an incremental way of finding the optimal combination of simple text processing options and machine learning features for sentiment classification. Several popular classifiers are used in conjunction with this evaluation approach in order to establish strong but reliable baselines for sentiment analysis in Serbian.",
}
Dataset id: sr_senticomments
- Domain: reviews
- Language: sr
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: other
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_sr_senticomments,
doi = {10.1371/journal.pone.0242050},
author = {Batanović, Vuk AND Cvetanović, Miloš AND Nikolić, Boško},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {A versatile framework for resource-limited sentiment articulation, annotation, and analysis of short texts},
year = {2020},
month = {11},
volume = {15},
url = {https://doi.org/10.1371/journal.pone.0242050},
pages = {1-30},
abstract = {Choosing a comprehensive and cost-effective way of articulating and annotating the sentiment of a text is not a trivial task, particularly when dealing with short texts, in which sentiment can be expressed through a wide variety of linguistic and rhetorical phenomena. This problem is especially conspicuous in resource-limited settings and languages, where design options are restricted either in terms of manpower and financial means required to produce appropriate sentiment analysis resources, or in terms of available language tools, or both. In this paper, we present a versatile approach to addressing this issue, based on multiple interpretations of sentiment labels that encode information regarding the polarity, subjectivity, and ambiguity of a text, as well as the presence of sarcasm or a mixture of sentiments. We demonstrate its use on Serbian, a resource-limited language, via the creation of a main sentiment analysis dataset focused on movie comments, and two smaller datasets belonging to the movie and book domains. In addition to measuring the quality of the annotation process, we propose a novel metric to validate its cost-effectiveness. Finally, the practicality of our approach is further validated by training, evaluating, and determining the optimal configurations of several different kinds of machine-learning models on a range of sentiment classification tasks using the produced dataset.},
number = {11},
}
Dataset id: sr_twitter_sentiment
- Domain: social_media
- Language: sr
- Language family: Indo-European
- Genus: Slavic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 5
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: other
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: sv_twitter_sentiment
- Domain: social_media
- Language: sv
- Language family: Indo-European
- Genus: Germanic
- Definite articles: definite affix
- Indefinite articles: indefinite word same as one
- Number of cases: 2
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: interrogative word order
- Position of negative word wrt SOV: more than one position
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: common, neuter
@article{dataset_twitter_sentiment,
doi = {10.1371/journal.pone.0155036},
author = {Mozetič, Igor AND Grčar, Miha AND Smailović, Jasmina},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Multilingual Twitter Sentiment Classification: The Role of Human Annotators},
year = {2016},
month = {05},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0155036},
pages = {1-26},
number = {5},
}
Dataset id: th_wisesight_sentiment
- Domain: social_media
- Language: th
- Language family: Tai-Kadai
- Genus: Kam-Tai
- Definite articles: no article
- Indefinite articles: indefinite word distinct from one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative auxiliary verb
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: little affixation
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: noun classifiers
@misc{dataset_th_wisesight_sentiment,
author = {Suriyawongkul, Arthit and
Chuangsuwanich, Ekapol and
Chormai, Pattarawat and
Polpanumas, Charin},title = {PyThaiNLP/wisesight-sentiment: First release (v1.0)},
month = sep,
year = 2019,
publisher = {Zenodo},
version = {v1.0},
doi = {10.5281/zenodo.3457447},
url = {https://doi.org/10.5281/zenodo.3457447},
note = {Zenodo}
}
Dataset id: th_wongnai_reviews
- Domain: reviews
- Language: th
- Language family: Tai-Kadai
- Genus: Kam-Tai
- Definite articles: no article
- Indefinite articles: indefinite word distinct from one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative auxiliary verb
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: little affixation
- Coding of nominal plurality: mixed morphological plural
- Grammatical genders: noun classifiers
@misc{dataset_th_wongnai_reviews,
author = {Ekkalak Thongthanomkul and Tanapol Nearunchorn and Yuwat Chuesathuchon},
title = {wongnai-corpus},
year = {2019},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/wongnai/wongnai-corpus}}
}
Dataset id: ur_roman_urdu
- Domain: mixed
- Language: ur
- Language family: Indo-European
- Genus: Indic
- Definite articles: no article
- Indefinite articles: no article
- Number of cases: 2
- Order of subject, object, verb: SOV
- Negative morphemes: negative affix
- Polar questions: question particle
- Position of negative word wrt SOV: SONegV
- Prefixing vs suffixing: strongly suffixing
- Coding of nominal plurality: plural suffix
- Grammatical genders: masculine, feminine
@InProceedings{dataset_ur_roman_urdu,
title = "Performing Natural Language Processing on Roman Urdu Datasets",
author = "Zareen Sharf and Saif Ur Rahman",
booktitle = "International Journal of Computer Science and Network Security",
volume = "18",
pages = "141-148",
year = "2018",
url = {http://paper.ijcsns.org/07_book/201801/20180117.pdf}
}
Dataset id: zh_hotel_reviews
- Domain: reviews
- Language: zh
- Language family: Sino-Tibetan
- Genus: Chinese
- Definite articles: no article
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: little affixation
- Coding of nominal plurality: no plural
- Grammatical genders: noun classifiers
@inproceedings{dataset_zh_hotel_reviews,
title = "An Empirical Study on Sentiment Classification of {C}hinese Review using Word Embedding",
author = "Lin, Yiou and
Lei, Hang and
Wu, Jia and
Li, Xiaoyu",
booktitle = "Proceedings of the 29th Pacific Asia Conference on Language, Information and Computation: Posters",
month = oct,
year = "2015",
address = "Shanghai, China",
url = "https://aclanthology.org/Y15-2030",
pages = "258--266",
}
Dataset id: zh_multilan_amazon
- Domain: reviews
- Language: zh
- Language family: Sino-Tibetan
- Genus: Chinese
- Definite articles: no article
- Indefinite articles: indefinite word same as one
- Number of cases: no morphological case-making
- Order of subject, object, verb: SVO
- Negative morphemes: negative particle
- Polar questions: question particle
- Position of negative word wrt SOV: SNegVO
- Prefixing vs suffixing: little affixation
- Coding of nominal plurality: no plural
- Grammatical genders: noun classifiers
@inproceedings{dataset_multilan_amazon,
title = "The Multilingual {A}mazon Reviews Corpus",
author = {Keung, Phillip and
Lu, Yichao and\"o}rgy and
Szarvas, Gy{
Smith, Noah A.},booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.369",
doi = "10.18653/v1/2020.emnlp-main.369",
pages = "4563--4568",
}